1055 files changed, 88985 insertions, 14838 deletions
diff --git a/tools/Makefile b/tools/Makefile
index be02c8b904db..abb358a70ad0 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -21,6 +21,7 @@ help:
 	@echo '  leds                   - LEDs  tools'
 	@echo '  liblockdep             - user-space wrapper for kernel locking-validator'
 	@echo '  bpf                    - misc BPF tools'
+	@echo '  pci                    - PCI tools'
 	@echo '  perf                   - Linux performance measurement and analysis tool'
 	@echo '  selftests              - various kernel selftests'
 	@echo '  spi                    - spi tools'
@@ -59,7 +60,7 @@ acpi: FORCE
 cpupower: FORCE
 	$(call descend,power/$@)
 
-cgroup firewire hv guest spi usb virtio vm bpf iio gpio objtool leds wmi: FORCE
+cgroup firewire hv guest spi usb virtio vm bpf iio gpio objtool leds wmi pci: FORCE
 	$(call descend,$@)
 
 liblockdep: FORCE
@@ -94,7 +95,7 @@ kvm_stat: FORCE
 all: acpi cgroup cpupower gpio hv firewire liblockdep \
 		perf selftests spi turbostat usb \
 		virtio vm bpf x86_energy_perf_policy \
-		tmon freefall iio objtool kvm_stat wmi
+		tmon freefall iio objtool kvm_stat wmi pci
 
 acpi_install:
 	$(call descend,power/$(@:_install=),install)
@@ -102,7 +103,7 @@ acpi_install:
 cpupower_install:
 	$(call descend,power/$(@:_install=),install)
 
-cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install:
+cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install:
 	$(call descend,$(@:_install=),install)
 
 liblockdep_install:
@@ -128,7 +129,7 @@ install: acpi_install cgroup_install cpupower_install gpio_install \
 		perf_install selftests_install turbostat_install usb_install \
 		virtio_install vm_install bpf_install x86_energy_perf_policy_install \
 		tmon_install freefall_install objtool_install kvm_stat_install \
-		wmi_install
+		wmi_install pci_install
 
 acpi_clean:
 	$(call descend,power/acpi,clean)
@@ -136,7 +137,7 @@ acpi_clean:
 cpupower_clean:
 	$(call descend,power/cpupower,clean)
 
-cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean:
+cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean:
 	$(call descend,$(@:_clean=),clean)
 
 liblockdep_clean:
@@ -174,6 +175,6 @@ clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean \
 		perf_clean selftests_clean turbostat_clean spi_clean usb_clean virtio_clean \
 		vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
 		freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \
-		gpio_clean objtool_clean leds_clean wmi_clean
+		gpio_clean objtool_clean leds_clean wmi_clean pci_clean
 
 .PHONY: FORCE
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
index 9f420d98b5fb..8cb504d30384 100644
--- a/tools/accounting/getdelays.c
+++ b/tools/accounting/getdelays.c
@@ -203,6 +203,8 @@ static void print_delayacct(struct taskstats *t)
 	       "SWAP  %15s%15s%15s\n"
 	       "      %15llu%15llu%15llums\n"
 	       "RECLAIM  %12s%15s%15s\n"
+	       "      %15llu%15llu%15llums\n"
+	       "THRASHING%12s%15s%15s\n"
 	       "      %15llu%15llu%15llums\n",
 	       "count", "real total", "virtual total",
 	       "delay total", "delay average",
@@ -222,7 +224,11 @@ static void print_delayacct(struct taskstats *t)
 	       "count", "delay total", "delay average",
 	       (unsigned long long)t->freepages_count,
 	       (unsigned long long)t->freepages_delay_total,
-	       average_ms(t->freepages_delay_total, t->freepages_count));
+	       average_ms(t->freepages_delay_total, t->freepages_count),
+	       "count", "delay total", "delay average",
+	       (unsigned long long)t->thrashing_count,
+	       (unsigned long long)t->thrashing_delay_total,
+	       average_ms(t->thrashing_delay_total, t->thrashing_count));
 }
 
 static void task_context_switch_counts(struct taskstats *t)
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
index caae4843cb70..4602464ebdfb 100644
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -27,6 +27,7 @@
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_READONLY_MEM
+#define __KVM_HAVE_VCPU_EVENTS
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
@@ -91,6 +92,7 @@ struct kvm_regs {
 #define KVM_VGIC_V3_ADDR_TYPE_DIST	2
 #define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
 #define KVM_VGIC_ITS_ADDR_TYPE		4
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION	5
 
 #define KVM_VGIC_V3_DIST_SIZE		SZ_64K
 #define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
@@ -124,6 +126,18 @@ struct kvm_sync_regs {
 struct kvm_arch_memory_slot {
 };
 
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+	struct {
+		__u8 serror_pending;
+		__u8 serror_has_esr;
+		/* Align it to 8 bytes */
+		__u8 pad[6];
+		__u64 serror_esr;
+	} exception;
+	__u32 reserved[12];
+};
+
 /* If you need to interpret the index values, here is the key: */
 #define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
 #define KVM_REG_ARM_COPROC_SHIFT	16
diff --git a/tools/arch/arm64/include/asm/barrier.h b/tools/arch/arm64/include/asm/barrier.h
index 40bde6b23501..378c051fa177 100644
--- a/tools/arch/arm64/include/asm/barrier.h
+++ b/tools/arch/arm64/include/asm/barrier.h
@@ -14,4 +14,75 @@
 #define wmb()		asm volatile("dmb ishst" ::: "memory")
 #define rmb()		asm volatile("dmb ishld" ::: "memory")
 
+#define smp_store_release(p, v)						\
+do {									\
+	union { typeof(*p) __val; char __c[1]; } __u =			\
+		{ .__val = (v) }; 					\
+									\
+	switch (sizeof(*p)) {						\
+	case 1:								\
+		asm volatile ("stlrb %w1, %0"				\
+				: "=Q" (*p)				\
+				: "r" (*(__u8_alias_t *)__u.__c)	\
+				: "memory");				\
+		break;							\
+	case 2:								\
+		asm volatile ("stlrh %w1, %0"				\
+				: "=Q" (*p)				\
+				: "r" (*(__u16_alias_t *)__u.__c)	\
+				: "memory");				\
+		break;							\
+	case 4:								\
+		asm volatile ("stlr %w1, %0"				\
+				: "=Q" (*p)				\
+				: "r" (*(__u32_alias_t *)__u.__c)	\
+				: "memory");				\
+		break;							\
+	case 8:								\
+		asm volatile ("stlr %1, %0"				\
+				: "=Q" (*p)				\
+				: "r" (*(__u64_alias_t *)__u.__c)	\
+				: "memory");				\
+		break;							\
+	default:							\
+		/* Only to shut up gcc ... */				\
+		mb();							\
+		break;							\
+	}								\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	union { typeof(*p) __val; char __c[1]; } __u =			\
+		{ .__c = { 0 } };					\
+									\
+	switch (sizeof(*p)) {						\
+	case 1:								\
+		asm volatile ("ldarb %w0, %1"				\
+			: "=r" (*(__u8_alias_t *)__u.__c)		\
+			: "Q" (*p) : "memory");				\
+		break;							\
+	case 2:								\
+		asm volatile ("ldarh %w0, %1"				\
+			: "=r" (*(__u16_alias_t *)__u.__c)		\
+			: "Q" (*p) : "memory");				\
+		break;							\
+	case 4:								\
+		asm volatile ("ldar %w0, %1"				\
+			: "=r" (*(__u32_alias_t *)__u.__c)		\
+			: "Q" (*p) : "memory");				\
+		break;							\
+	case 8:								\
+		asm volatile ("ldar %0, %1"				\
+			: "=r" (*(__u64_alias_t *)__u.__c)		\
+			: "Q" (*p) : "memory");				\
+		break;							\
+	default:							\
+		/* Only to shut up gcc ... */				\
+		mb();							\
+		break;							\
+	}								\
+	__u.__val;							\
+})
+
 #endif /* _TOOLS_LINUX_ASM_AARCH64_BARRIER_H */
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 04b3256f8e6d..97c3478ee6e7 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -39,6 +39,7 @@
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_READONLY_MEM
+#define __KVM_HAVE_VCPU_EVENTS
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
@@ -91,6 +92,7 @@ struct kvm_regs {
 #define KVM_VGIC_V3_ADDR_TYPE_DIST	2
 #define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
 #define KVM_VGIC_ITS_ADDR_TYPE		4
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION	5
 
 #define KVM_VGIC_V3_DIST_SIZE		SZ_64K
 #define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
@@ -153,6 +155,18 @@ struct kvm_sync_regs {
 struct kvm_arch_memory_slot {
 };
 
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+	struct {
+		__u8 serror_pending;
+		__u8 serror_has_esr;
+		/* Align it to 8 bytes */
+		__u8 pad[6];
+		__u64 serror_esr;
+	} exception;
+	__u32 reserved[12];
+};
+
 /* If you need to interpret the index values, here is the key: */
 #define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
 #define KVM_REG_ARM_COPROC_SHIFT	16
diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h
new file mode 100644
index 000000000000..dae1584cf017
--- /dev/null
+++ b/tools/arch/arm64/include/uapi/asm/unistd.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_NEW_STAT
+
+#include <asm-generic/unistd.h>
diff --git a/tools/arch/ia64/include/asm/barrier.h b/tools/arch/ia64/include/asm/barrier.h
index d808ee0e77b5..4d471d9511a5 100644
--- a/tools/arch/ia64/include/asm/barrier.h
+++ b/tools/arch/ia64/include/asm/barrier.h
@@ -46,4 +46,17 @@
 #define rmb()		mb()
 #define wmb()		mb()
 
+#define smp_store_release(p, v)			\
+do {						\
+	barrier();				\
+	WRITE_ONCE(*p, v);			\
+} while (0)
+
+#define smp_load_acquire(p)			\
+({						\
+	typeof(*p) ___p1 = READ_ONCE(*p);	\
+	barrier();				\
+	___p1;					\
+})
+
 #endif /* _TOOLS_LINUX_ASM_IA64_BARRIER_H */
diff --git a/tools/arch/parisc/include/uapi/asm/errno.h b/tools/arch/parisc/include/uapi/asm/errno.h
index fc0df353ff0d..87245c584784 100644
--- a/tools/arch/parisc/include/uapi/asm/errno.h
+++ b/tools/arch/parisc/include/uapi/asm/errno.h
@@ -113,7 +113,6 @@
 #define	ELOOP		249	/* Too many symbolic links encountered */
 #define	ENOSYS		251	/* Function not implemented */
 
-#define ENOTSUP		252	/* Function not implemented (POSIX.4 / HPUX) */
 #define ECANCELLED	253	/* aio request was canceled before complete (POSIX.4 / HPUX) */
 #define ECANCELED	ECANCELLED	/* SuSv3 and Solaris wants one 'L' */
 
diff --git a/tools/arch/powerpc/include/asm/barrier.h b/tools/arch/powerpc/include/asm/barrier.h
index a634da05bc97..905a2c66d96d 100644
--- a/tools/arch/powerpc/include/asm/barrier.h
+++ b/tools/arch/powerpc/include/asm/barrier.h
@@ -27,4 +27,20 @@
 #define rmb()  __asm__ __volatile__ ("sync" : : : "memory")
 #define wmb()  __asm__ __volatile__ ("sync" : : : "memory")
 
+#if defined(__powerpc64__)
+#define smp_lwsync()	__asm__ __volatile__ ("lwsync" : : : "memory")
+
+#define smp_store_release(p, v)			\
+do {						\
+	smp_lwsync();				\
+	WRITE_ONCE(*p, v);			\
+} while (0)
+
+#define smp_load_acquire(p)			\
+({						\
+	typeof(*p) ___p1 = READ_ONCE(*p);	\
+	smp_lwsync();				\
+	___p1;					\
+})
+#endif /* defined(__powerpc64__) */
 #endif /* _TOOLS_LINUX_ASM_POWERPC_BARRIER_H */
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 833ed9a16adf..8c876c166ef2 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -633,6 +633,8 @@ struct kvm_ppc_cpu_char {
 #define KVM_REG_PPC_PSSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
 
 #define KVM_REG_PPC_DEC_EXPIRY	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
+#define KVM_REG_PPC_ONLINE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
+#define KVM_REG_PPC_PTCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/tools/arch/powerpc/include/uapi/asm/unistd.h b/tools/arch/powerpc/include/uapi/asm/unistd.h
index 389c36fd8299..985534d0b448 100644
--- a/tools/arch/powerpc/include/uapi/asm/unistd.h
+++ b/tools/arch/powerpc/include/uapi/asm/unistd.h
@@ -398,5 +398,7 @@
 #define __NR_pkey_alloc		384
 #define __NR_pkey_free		385
 #define __NR_pkey_mprotect	386
+#define __NR_rseq		387
+#define __NR_io_pgetevents	388
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/tools/arch/s390/include/asm/barrier.h b/tools/arch/s390/include/asm/barrier.h
index 5030c99f47d2..de362fa664d4 100644
--- a/tools/arch/s390/include/asm/barrier.h
+++ b/tools/arch/s390/include/asm/barrier.h
@@ -28,4 +28,17 @@
 #define rmb()				mb()
 #define wmb()				mb()
 
+#define smp_store_release(p, v)			\
+do {						\
+	barrier();				\
+	WRITE_ONCE(*p, v);			\
+} while (0)
+
+#define smp_load_acquire(p)			\
+({						\
+	typeof(*p) ___p1 = READ_ONCE(*p);	\
+	barrier();				\
+	___p1;					\
+})
+
 #endif /* __TOOLS_LIB_ASM_BARRIER_H */
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 4cdaa55fabfe..16511d97e8dc 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -4,7 +4,7 @@
 /*
  * KVM s390 specific structures and definitions
  *
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008, 2018
  *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
@@ -160,6 +160,8 @@ struct kvm_s390_vm_cpu_subfunc {
 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW	1
 #define KVM_S390_VM_CRYPTO_DISABLE_AES_KW	2
 #define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW	3
+#define KVM_S390_VM_CRYPTO_ENABLE_APIE		4
+#define KVM_S390_VM_CRYPTO_DISABLE_APIE		5
 
 /* kvm attributes for migration mode */
 #define KVM_S390_VM_MIGRATION_STOP	0
@@ -225,6 +227,7 @@ struct kvm_guest_debug_arch {
 #define KVM_SYNC_FPRS   (1UL << 8)
 #define KVM_SYNC_GSCB   (1UL << 9)
 #define KVM_SYNC_BPBC   (1UL << 10)
+#define KVM_SYNC_ETOKEN (1UL << 11)
 /* length and alignment of the sdnx as a power of two */
 #define SDNXC 8
 #define SDNXL (1UL << SDNXC)
@@ -258,6 +261,8 @@ struct kvm_sync_regs {
 		struct {
 			__u64 reserved1[2];
 			__u64 gscb[4];
+			__u64 etoken;
+			__u64 etoken_extension;
 		};
 	};
 };
diff --git a/tools/arch/sparc/include/asm/barrier_64.h b/tools/arch/sparc/include/asm/barrier_64.h
index ba61344287d5..cfb0fdc8ccf0 100644
--- a/tools/arch/sparc/include/asm/barrier_64.h
+++ b/tools/arch/sparc/include/asm/barrier_64.h
@@ -40,4 +40,17 @@ do {	__asm__ __volatile__("ba,pt	%%xcc, 1f\n\t" \
 #define rmb()	__asm__ __volatile__("":::"memory")
 #define wmb()	__asm__ __volatile__("":::"memory")
 
+#define smp_store_release(p, v)			\
+do {						\
+	barrier();				\
+	WRITE_ONCE(*p, v);			\
+} while (0)
+
+#define smp_load_acquire(p)			\
+({						\
+	typeof(*p) ___p1 = READ_ONCE(*p);	\
+	barrier();				\
+	___p1;					\
+})
+
 #endif /* !(__TOOLS_LINUX_SPARC64_BARRIER_H) */
diff --git a/tools/arch/x86/include/asm/barrier.h b/tools/arch/x86/include/asm/barrier.h
index 8774dee27471..58919868473c 100644
--- a/tools/arch/x86/include/asm/barrier.h
+++ b/tools/arch/x86/include/asm/barrier.h
@@ -26,4 +26,18 @@
 #define wmb()	asm volatile("sfence" ::: "memory")
 #endif
 
+#if defined(__x86_64__)
+#define smp_store_release(p, v)			\
+do {						\
+	barrier();				\
+	WRITE_ONCE(*p, v);			\
+} while (0)
+
+#define smp_load_acquire(p)			\
+({						\
+	typeof(*p) ___p1 = READ_ONCE(*p);	\
+	barrier();				\
+	___p1;					\
+})
+#endif /* defined(__x86_64__) */
 #endif /* _TOOLS_LINUX_ASM_X86_BARRIER_H */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 578793e97431..28c4a502b419 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -198,7 +198,6 @@
 #define X86_FEATURE_CAT_L2		( 7*32+ 5) /* Cache Allocation Technology L2 */
 #define X86_FEATURE_CDP_L3		( 7*32+ 6) /* Code and Data Prioritization L3 */
 #define X86_FEATURE_INVPCID_SINGLE	( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
-
 #define X86_FEATURE_HW_PSTATE		( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
 #define X86_FEATURE_SME			( 7*32+10) /* AMD Secure Memory Encryption */
@@ -207,13 +206,21 @@
 #define X86_FEATURE_RETPOLINE_AMD	( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
 #define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
 #define X86_FEATURE_CDP_L2		( 7*32+15) /* Code and Data Prioritization L2 */
-
+#define X86_FEATURE_MSR_SPEC_CTRL	( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+#define X86_FEATURE_SSBD		( 7*32+17) /* Speculative Store Bypass Disable */
 #define X86_FEATURE_MBA			( 7*32+18) /* Memory Bandwidth Allocation */
 #define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* "" Fill RSB on context switches */
 #define X86_FEATURE_SEV			( 7*32+20) /* AMD Secure Encrypted Virtualization */
-
 #define X86_FEATURE_USE_IBPB		( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
 #define X86_FEATURE_USE_IBRS_FW		( 7*32+22) /* "" Use IBRS during runtime firmware calls */
+#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE	( 7*32+23) /* "" Disable Speculative Store Bypass. */
+#define X86_FEATURE_LS_CFG_SSBD		( 7*32+24)  /* "" AMD SSBD implementation via LS_CFG MSR */
+#define X86_FEATURE_IBRS		( 7*32+25) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_IBPB		( 7*32+26) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_STIBP		( 7*32+27) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ZEN			( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
+#define X86_FEATURE_L1TF_PTEINV		( 7*32+29) /* "" L1TF workaround PTE inversion */
+#define X86_FEATURE_IBRS_ENHANCED	( 7*32+30) /* Enhanced IBRS */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
@@ -224,7 +231,7 @@
 
 #define X86_FEATURE_VMMCALL		( 8*32+15) /* Prefer VMMCALL to VMCALL */
 #define X86_FEATURE_XENPV		( 8*32+16) /* "" Xen paravirtual guest */
-
+#define X86_FEATURE_EPT_AD		( 8*32+17) /* Intel Extended Page Table access-dirty bit */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE		( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
@@ -274,9 +281,12 @@
 #define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
 #define X86_FEATURE_IRPERF		(13*32+ 1) /* Instructions Retired Count */
 #define X86_FEATURE_XSAVEERPTR		(13*32+ 2) /* Always save/restore FP error pointers */
-#define X86_FEATURE_IBPB		(13*32+12) /* Indirect Branch Prediction Barrier */
-#define X86_FEATURE_IBRS		(13*32+14) /* Indirect Branch Restricted Speculation */
-#define X86_FEATURE_STIBP		(13*32+15) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_AMD_IBPB		(13*32+12) /* "" Indirect Branch Prediction Barrier */
+#define X86_FEATURE_AMD_IBRS		(13*32+14) /* "" Indirect Branch Restricted Speculation */
+#define X86_FEATURE_AMD_STIBP		(13*32+15) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_AMD_SSBD		(13*32+24) /* "" Speculative Store Bypass Disable */
+#define X86_FEATURE_VIRT_SSBD		(13*32+25) /* Virtualized Speculative Store Bypass Disable */
+#define X86_FEATURE_AMD_SSB_NO		(13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
 
 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
 #define X86_FEATURE_DTHERM		(14*32+ 0) /* Digital Thermal Sensor */
@@ -321,6 +331,8 @@
 #define X86_FEATURE_LA57		(16*32+16) /* 5-level page tables */
 #define X86_FEATURE_RDPID		(16*32+22) /* RDPID instruction */
 #define X86_FEATURE_CLDEMOTE		(16*32+25) /* CLDEMOTE instruction */
+#define X86_FEATURE_MOVDIRI		(16*32+27) /* MOVDIRI instruction */
+#define X86_FEATURE_MOVDIR64B		(16*32+28) /* MOVDIR64B instruction */
 
 /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
 #define X86_FEATURE_OVERFLOW_RECOV	(17*32+ 0) /* MCA overflow recovery support */
@@ -333,7 +345,9 @@
 #define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_FLUSH_L1D		(18*32+28) /* Flush L1D cache */
 #define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_SPEC_CTRL_SSBD	(18*32+31) /* "" Speculative Store Bypass Disable */
 
 /*
  * BUG word(s)
@@ -363,5 +377,7 @@
 #define X86_BUG_CPU_MELTDOWN		X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
 #define X86_BUG_SPECTRE_V1		X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
 #define X86_BUG_SPECTRE_V2		X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
+#define X86_BUG_SPEC_STORE_BYPASS	X86_BUG(17) /* CPU is affected by speculative store bypass attack */
+#define X86_BUG_L1TF			X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/mcsafe_test.h b/tools/arch/x86/include/asm/mcsafe_test.h
new file mode 100644
index 000000000000..2ccd588fbad4
--- /dev/null
+++ b/tools/arch/x86/include/asm/mcsafe_test.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MCSAFE_TEST_H_
+#define _MCSAFE_TEST_H_
+
+.macro MCSAFE_TEST_CTL
+.endm
+
+.macro MCSAFE_TEST_SRC reg count target
+.endm
+
+.macro MCSAFE_TEST_DST reg count target
+.endm
+#endif /* _MCSAFE_TEST_H_ */
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index c535c2fdea13..dabfcf7c3941 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -288,6 +288,7 @@ struct kvm_reinject_control {
 #define KVM_VCPUEVENT_VALID_SIPI_VECTOR	0x00000002
 #define KVM_VCPUEVENT_VALID_SHADOW	0x00000004
 #define KVM_VCPUEVENT_VALID_SMM		0x00000008
+#define KVM_VCPUEVENT_VALID_PAYLOAD	0x00000010
 
 /* Interrupt shadow states */
 #define KVM_X86_SHADOW_INT_MOV_SS	0x01
@@ -299,7 +300,7 @@ struct kvm_vcpu_events {
 		__u8 injected;
 		__u8 nr;
 		__u8 has_error_code;
-		__u8 pad;
+		__u8 pending;
 		__u32 error_code;
 	} exception;
 	struct {
@@ -322,7 +323,9 @@ struct kvm_vcpu_events {
 		__u8 smm_inside_nmi;
 		__u8 latched_init;
 	} smi;
-	__u32 reserved[9];
+	__u8 reserved[27];
+	__u8 exception_has_payload;
+	__u64 exception_payload;
 };
 
 /* for KVM_GET/SET_DEBUGREGS */
@@ -377,5 +380,44 @@ struct kvm_sync_regs {
 
 #define KVM_X86_QUIRK_LINT0_REENABLED	(1 << 0)
 #define KVM_X86_QUIRK_CD_NW_CLEARED	(1 << 1)
+#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE	(1 << 2)
+
+#define KVM_STATE_NESTED_GUEST_MODE	0x00000001
+#define KVM_STATE_NESTED_RUN_PENDING	0x00000002
+#define KVM_STATE_NESTED_EVMCS		0x00000004
+
+#define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
+#define KVM_STATE_NESTED_SMM_VMXON	0x00000002
+
+struct kvm_vmx_nested_state {
+	__u64 vmxon_pa;
+	__u64 vmcs_pa;
+
+	struct {
+		__u16 flags;
+	} smm;
+};
+
+/* for KVM_CAP_NESTED_STATE */
+struct kvm_nested_state {
+	/* KVM_STATE_* flags */
+	__u16 flags;
+
+	/* 0 for VMX, 1 for SVM.  */
+	__u16 format;
+
+	/* 128 for SVM, 128 + VMCS size for VMX.  */
+	__u32 size;
+
+	union {
+		/* VMXON, VMCS */
+		struct kvm_vmx_nested_state vmx;
+
+		/* Pad the header to 128 bytes.  */
+		__u8 pad[120];
+	};
+
+	__u8 data[0];
+};
 
 #endif /* _ASM_X86_KVM_H */
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 9a53a06e5a3e..3b24dc05251c 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -3,6 +3,7 @@
 #include <linux/linkage.h>
 #include <asm/errno.h>
 #include <asm/cpufeatures.h>
+#include <asm/mcsafe_test.h>
 #include <asm/alternative-asm.h>
 #include <asm/export.h>
 
@@ -183,12 +184,15 @@ ENTRY(memcpy_orig)
 ENDPROC(memcpy_orig)
 
 #ifndef CONFIG_UML
+
+MCSAFE_TEST_CTL
+
 /*
- * memcpy_mcsafe_unrolled - memory copy with machine check exception handling
+ * __memcpy_mcsafe - memory copy with machine check exception handling
  * Note that we only catch machine checks when reading the source addresses.
  * Writes to target are posted and don't generate machine checks.
  */
-ENTRY(memcpy_mcsafe_unrolled)
+ENTRY(__memcpy_mcsafe)
 	cmpl $8, %edx
 	/* Less than 8 bytes? Go to byte copy loop */
 	jb .L_no_whole_words
@@ -204,58 +208,33 @@ ENTRY(memcpy_mcsafe_unrolled)
 	subl $8, %ecx
 	negl %ecx
 	subl %ecx, %edx
-.L_copy_leading_bytes:
+.L_read_leading_bytes:
 	movb (%rsi), %al
+	MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
+	MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
+.L_write_leading_bytes:
 	movb %al, (%rdi)
 	incq %rsi
 	incq %rdi
 	decl %ecx
-	jnz .L_copy_leading_bytes
+	jnz .L_read_leading_bytes
 
 .L_8byte_aligned:
-	/* Figure out how many whole cache lines (64-bytes) to copy */
-	movl %edx, %ecx
-	andl $63, %edx
-	shrl $6, %ecx
-	jz .L_no_whole_cache_lines
-
-	/* Loop copying whole cache lines */
-.L_cache_w0: movq (%rsi), %r8
-.L_cache_w1: movq 1*8(%rsi), %r9
-.L_cache_w2: movq 2*8(%rsi), %r10
-.L_cache_w3: movq 3*8(%rsi), %r11
-	movq %r8, (%rdi)
-	movq %r9, 1*8(%rdi)
-	movq %r10, 2*8(%rdi)
-	movq %r11, 3*8(%rdi)
-.L_cache_w4: movq 4*8(%rsi), %r8
-.L_cache_w5: movq 5*8(%rsi), %r9
-.L_cache_w6: movq 6*8(%rsi), %r10
-.L_cache_w7: movq 7*8(%rsi), %r11
-	movq %r8, 4*8(%rdi)
-	movq %r9, 5*8(%rdi)
-	movq %r10, 6*8(%rdi)
-	movq %r11, 7*8(%rdi)
-	leaq 64(%rsi), %rsi
-	leaq 64(%rdi), %rdi
-	decl %ecx
-	jnz .L_cache_w0
-
-	/* Are there any trailing 8-byte words? */
-.L_no_whole_cache_lines:
 	movl %edx, %ecx
 	andl $7, %edx
 	shrl $3, %ecx
 	jz .L_no_whole_words
 
-	/* Copy trailing words */
-.L_copy_trailing_words:
+.L_read_words:
 	movq (%rsi), %r8
-	mov %r8, (%rdi)
-	leaq 8(%rsi), %rsi
-	leaq 8(%rdi), %rdi
+	MCSAFE_TEST_SRC %rsi 8 .E_read_words
+	MCSAFE_TEST_DST %rdi 8 .E_write_words
+.L_write_words:
+	movq %r8, (%rdi)
+	addq $8, %rsi
+	addq $8, %rdi
 	decl %ecx
-	jnz .L_copy_trailing_words
+	jnz .L_read_words
 
 	/* Any trailing bytes? */
 .L_no_whole_words:
@@ -264,38 +243,55 @@ ENTRY(memcpy_mcsafe_unrolled)
 
 	/* Copy trailing bytes */
 	movl %edx, %ecx
-.L_copy_trailing_bytes:
+.L_read_trailing_bytes:
 	movb (%rsi), %al
+	MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
+	MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
+.L_write_trailing_bytes:
 	movb %al, (%rdi)
 	incq %rsi
 	incq %rdi
 	decl %ecx
-	jnz .L_copy_trailing_bytes
+	jnz .L_read_trailing_bytes
 
 	/* Copy successful. Return zero */
 .L_done_memcpy_trap:
-	xorq %rax, %rax
+	xorl %eax, %eax
 	ret
-ENDPROC(memcpy_mcsafe_unrolled)
-EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
+ENDPROC(__memcpy_mcsafe)
+EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
 
 	.section .fixup, "ax"
-	/* Return -EFAULT for any failure */
-.L_memcpy_mcsafe_fail:
-	mov	$-EFAULT, %rax
+	/*
+	 * Return number of bytes not copied for any failure. Note that
+	 * there is no "tail" handling since the source buffer is 8-byte
+	 * aligned and poison is cacheline aligned.
+	 */
+.E_read_words:
+	shll	$3, %ecx
+.E_leading_bytes:
+	addl	%edx, %ecx
+.E_trailing_bytes:
+	mov	%ecx, %eax
 	ret
 
+	/*
+	 * For write fault handling, given the destination is unaligned,
+	 * we handle faults on multi-byte writes with a byte-by-byte
+	 * copy up to the write-protected page.
+	 */
+.E_write_words:
+	shll	$3, %ecx
+	addl	%edx, %ecx
+	movl	%ecx, %edx
+	jmp mcsafe_handle_tail
+
 	.previous
 
-	_ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
+	_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
+	_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
+	_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
+	_ASM_EXTABLE(.L_write_words, .E_write_words)
+	_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
 #endif
diff --git a/tools/bpf/.gitignore b/tools/bpf/.gitignore
new file mode 100644
index 000000000000..dfe2bd5a4b95
--- /dev/null
+++ b/tools/bpf/.gitignore
@@ -0,0 +1,5 @@
+FEATURE-DUMP.bpf
+bpf_asm
+bpf_dbg
+bpf_exp.yacc.*
+bpf_jit_disasm
diff --git a/tools/bpf/Makefile.helpers b/tools/bpf/Makefile.helpers
new file mode 100644
index 000000000000..c34fea77f39f
--- /dev/null
+++ b/tools/bpf/Makefile.helpers
@@ -0,0 +1,59 @@
+ifndef allow-override
+  include ../scripts/Makefile.include
+  include ../scripts/utilities.mak
+else
+  # Assume Makefile.helpers is being run from bpftool/Documentation
+  # subdirectory. Go up two more directories to fetch bpf.h header and
+  # associated script.
+  UP2DIR := ../../
+endif
+
+INSTALL ?= install
+RM ?= rm -f
+RMDIR ?= rmdir --ignore-fail-on-non-empty
+
+ifeq ($(V),1)
+  Q =
+else
+  Q = @
+endif
+
+prefix ?= /usr/local
+mandir ?= $(prefix)/man
+man7dir = $(mandir)/man7
+
+HELPERS_RST = bpf-helpers.rst
+MAN7_RST = $(HELPERS_RST)
+
+_DOC_MAN7 = $(patsubst %.rst,%.7,$(MAN7_RST))
+DOC_MAN7 = $(addprefix $(OUTPUT),$(_DOC_MAN7))
+
+helpers: man7
+man7: $(DOC_MAN7)
+
+RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
+
+$(OUTPUT)$(HELPERS_RST): $(UP2DIR)../../include/uapi/linux/bpf.h
+	$(QUIET_GEN)$(UP2DIR)../../scripts/bpf_helpers_doc.py --filename $< > $@
+
+$(OUTPUT)%.7: $(OUTPUT)%.rst
+ifndef RST2MAN_DEP
+	$(error "rst2man not found, but required to generate man pages")
+endif
+	$(QUIET_GEN)rst2man $< > $@
+
+helpers-clean:
+	$(call QUIET_CLEAN, eBPF_helpers-manpage)
+	$(Q)$(RM) $(DOC_MAN7) $(OUTPUT)$(HELPERS_RST)
+
+helpers-install: helpers
+	$(call QUIET_INSTALL, eBPF_helpers-manpage)
+	$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
+	$(Q)$(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
+
+helpers-uninstall:
+	$(call QUIET_UNINST, eBPF_helpers-manpage)
+	$(Q)$(RM) $(addprefix $(DESTDIR)$(man7dir)/,$(_DOC_MAN7))
+	$(Q)$(RMDIR) $(DESTDIR)$(man7dir)
+
+.PHONY: helpers helpers-clean helpers-install helpers-uninstall
diff --git a/tools/bpf/bpf_exp.l b/tools/bpf/bpf_exp.l
index bd83149e7be0..4da8d053d68f 100644
--- a/tools/bpf/bpf_exp.l
+++ b/tools/bpf/bpf_exp.l
@@ -175,7 +175,7 @@ extern void yyerror(const char *str);
 			yylval.number = strtol(yytext, NULL, 10);
 			return number;
 		}
-([0][0-9]+)	{
+([0][0-7]+)	{
 			yylval.number = strtol(yytext + 1, NULL, 8);
 			return number;
 		}
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
new file mode 100644
index 000000000000..67167e44b726
--- /dev/null
+++ b/tools/bpf/bpftool/.gitignore
@@ -0,0 +1,5 @@
+*.d
+bpftool
+bpftool*.8
+bpf-helpers.*
+FEATURE-DUMP.bpftool
diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
index a9d47c1558bb..f7663a3e60c9 100644
--- a/tools/bpf/bpftool/Documentation/Makefile
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -15,12 +15,15 @@ prefix ?= /usr/local
 mandir ?= $(prefix)/man
 man8dir = $(mandir)/man8
 
-MAN8_RST = $(wildcard *.rst)
+# Load targets for building eBPF helpers man page.
+include ../../Makefile.helpers
+
+MAN8_RST = $(filter-out $(HELPERS_RST),$(wildcard *.rst))
 
 _DOC_MAN8 = $(patsubst %.rst,%.8,$(MAN8_RST))
 DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8))
 
-man: man8
+man: man8 helpers
 man8: $(DOC_MAN8)
 
 RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
@@ -31,16 +34,16 @@ ifndef RST2MAN_DEP
 endif
 	$(QUIET_GEN)rst2man $< > $@
 
-clean:
+clean: helpers-clean
 	$(call QUIET_CLEAN, Documentation)
 	$(Q)$(RM) $(DOC_MAN8)
 
-install: man
+install: man helpers-install
 	$(call QUIET_INSTALL, Documentation-man)
 	$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir)
 	$(Q)$(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir)
 
-uninstall:
+uninstall: helpers-uninstall
 	$(call QUIET_UNINST, Documentation-man)
 	$(Q)$(RM) $(addprefix $(DESTDIR)$(man8dir)/,$(_DOC_MAN8))
 	$(Q)$(RMDIR) $(DESTDIR)$(man8dir)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 0e4e923235b6..d07ccf8a23f7 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -15,18 +15,21 @@ SYNOPSIS
 	*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
 
 	*COMMANDS* :=
-	{ **show** | **list** | **attach** | **detach** | **help** }
+	{ **show** | **list** | **tree** | **attach** | **detach** | **help** }
 
 MAP COMMANDS
 =============
 
 |	**bpftool** **cgroup { show | list }** *CGROUP*
+|	**bpftool** **cgroup tree** [*CGROUP_ROOT*]
 |	**bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*]
 |	**bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
 |	**bpftool** **cgroup help**
 |
 |	*PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
-|	*ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** }
+|	*ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
+|		**bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** |
+|               **sendmsg4** | **sendmsg6** }
 |	*ATTACH_FLAGS* := { **multi** | **override** }
 
 DESCRIPTION
@@ -37,6 +40,15 @@ DESCRIPTION
 		  Output will start with program ID followed by attach type,
 		  attach flags and program name.
 
+	**bpftool cgroup tree** [*CGROUP_ROOT*]
+		  Iterate over all cgroups in *CGROUP_ROOT* and list all
+		  attached programs. If *CGROUP_ROOT* is not specified,
+		  bpftool uses cgroup v2 mountpoint.
+
+		  The output is similar to the output of cgroup show/list
+		  commands: it starts with absolute cgroup path, followed by
+		  program ID, attach type, attach flags and program name.
+
 	**bpftool cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*]
 		  Attach program *PROG* to the cgroup *CGROUP* with attach type
 		  *ATTACH_TYPE* and optional *ATTACH_FLAGS*.
@@ -63,7 +75,17 @@ DESCRIPTION
 		  **egress** egress path of the inet socket (since 4.10);
 		  **sock_create** opening of an inet socket (since 4.10);
 		  **sock_ops** various socket operations (since 4.12);
-		  **device** device access (since 4.15).
+		  **device** device access (since 4.15);
+		  **bind4** call to bind(2) for an inet4 socket (since 4.17);
+		  **bind6** call to bind(2) for an inet6 socket (since 4.17);
+		  **post_bind4** return from bind(2) for an inet4 socket (since 4.17);
+		  **post_bind6** return from bind(2) for an inet6 socket (since 4.17);
+		  **connect4** call to connect(2) for an inet4 socket (since 4.17);
+		  **connect6** call to connect(2) for an inet6 socket (since 4.17);
+		  **sendmsg4** call to sendto(2), sendmsg(2), sendmmsg(2) for an
+		  unconnected udp4 socket (since 4.18);
+		  **sendmsg6** call to sendto(2), sendmsg(2), sendmmsg(2) for an
+		  unconnected udp6 socket (since 4.18).
 
 	**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
 		  Detach *PROG* from the cgroup *CGROUP* and attach type
@@ -115,4 +137,10 @@ EXAMPLES
 
 SEE ALSO
 ========
-	**bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
+	**bpf**\ (2),
+	**bpf-helpers**\ (7),
+	**bpftool**\ (8),
+	**bpftool-prog**\ (8),
+	**bpftool-map**\ (8),
+	**bpftool-net**\ (8),
+	**bpftool-perf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 457e868bd32f..7bb787cfa971 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -15,25 +15,34 @@ SYNOPSIS
 	*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
 
 	*COMMANDS* :=
-	{ **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete**
-	| **pin** | **help** }
+	{ **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**
+	| **delete** | **pin** | **help** }
 
 MAP COMMANDS
 =============
 
 |	**bpftool** **map { show | list }**   [*MAP*]
-|	**bpftool** **map dump**    *MAP*
-|	**bpftool** **map update**  *MAP*  **key** *BYTES*   **value** *VALUE* [*UPDATE_FLAGS*]
-|	**bpftool** **map lookup**  *MAP*  **key** *BYTES*
-|	**bpftool** **map getnext** *MAP* [**key** *BYTES*]
-|	**bpftool** **map delete**  *MAP*  **key** *BYTES*
-|	**bpftool** **map pin**     *MAP*  *FILE*
+|	**bpftool** **map create**     *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* \
+|		**entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*]
+|	**bpftool** **map dump**       *MAP*
+|	**bpftool** **map update**     *MAP*  **key** *DATA*   **value** *VALUE* [*UPDATE_FLAGS*]
+|	**bpftool** **map lookup**     *MAP*  **key** *DATA*
+|	**bpftool** **map getnext**    *MAP* [**key** *DATA*]
+|	**bpftool** **map delete**     *MAP*  **key** *DATA*
+|	**bpftool** **map pin**        *MAP*  *FILE*
+|	**bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
 |	**bpftool** **map help**
 |
 |	*MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
+|	*DATA* := { [**hex**] *BYTES* }
 |	*PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
-|	*VALUE* := { *BYTES* | *MAP* | *PROG* }
+|	*VALUE* := { *DATA* | *MAP* | *PROG* }
 |	*UPDATE_FLAGS* := { **any** | **exist** | **noexist** }
+|	*TYPE* := { **hash** | **array** | **prog_array** | **perf_event_array** | **percpu_hash**
+|		| **percpu_array** | **stack_trace** | **cgroup_array** | **lru_hash**
+|		| **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps**
+|		| **devmap** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
+|		| **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** }
 
 DESCRIPTION
 ===========
@@ -45,29 +54,57 @@ DESCRIPTION
 		  Output will start with map ID followed by map type and
 		  zero or more named attributes (depending on kernel version).
 
+	**bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE*  **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*]
+		  Create a new map with given parameters and pin it to *bpffs*
+		  as *FILE*.
+
 	**bpftool map dump**    *MAP*
 		  Dump all entries in a given *MAP*.
 
-	**bpftool map update**  *MAP*  **key** *BYTES*   **value** *VALUE* [*UPDATE_FLAGS*]
+	**bpftool map update**  *MAP*  **key** *DATA*   **value** *VALUE* [*UPDATE_FLAGS*]
 		  Update map entry for a given *KEY*.
 
 		  *UPDATE_FLAGS* can be one of: **any** update existing entry
 		  or add if doesn't exit; **exist** update only if entry already
 		  exists; **noexist** update only if entry doesn't exist.
 
-	**bpftool map lookup**  *MAP*  **key** *BYTES*
+		  If the **hex** keyword is provided in front of the bytes
+		  sequence, the bytes are parsed as hexadeximal values, even if
+		  no "0x" prefix is added. If the keyword is not provided, then
+		  the bytes are parsed as decimal values, unless a "0x" prefix
+		  (for hexadecimal) or a "0" prefix (for octal) is provided.
+
+	**bpftool map lookup**  *MAP*  **key** *DATA*
 		  Lookup **key** in the map.
 
-	**bpftool map getnext** *MAP* [**key** *BYTES*]
+	**bpftool map getnext** *MAP* [**key** *DATA*]
 		  Get next key.  If *key* is not specified, get first key.
 
-	**bpftool map delete**  *MAP*  **key** *BYTES*
+	**bpftool map delete**  *MAP*  **key** *DATA*
 		  Remove entry from the map.
 
 	**bpftool map pin**     *MAP*  *FILE*
 		  Pin map *MAP* as *FILE*.
 
-		  Note: *FILE* must be located in *bpffs* mount.
+		  Note: *FILE* must be located in *bpffs* mount. It must not
+		  contain a dot character ('.'), which is reserved for future
+		  extensions of *bpffs*.
+
+	**bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
+		  Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map.
+
+		  Install perf rings into a perf event array map and dump
+		  output of any bpf_perf_event_output() call in the kernel.
+		  By default read the number of CPUs on the system and
+		  install perf ring for each CPU in the corresponding index
+		  in the array.
+
+		  If **cpu** and **index** are specified, install perf ring
+		  for given **cpu** at **index** in the array (single ring).
+
+		  Note that installing a perf ring into an array will silently
+		  replace any existing ring.  Any other application will stop
+		  receiving events if it installed its rings earlier.
 
 	**bpftool map help**
 		  Print short help message.
@@ -98,7 +135,12 @@ EXAMPLES
   10: hash  name some_map  flags 0x0
 	key 4B  value 8B  max_entries 2048  memlock 167936B
 
-**# bpftool map update id 10 key 13 00 07 00 value 02 00 00 00 01 02 03 04**
+The following three commands are equivalent:
+
+|
+| **# bpftool map update id 10 key hex   20   c4   b7   00 value hex   0f   ff   ff   ab   01   02   03   4c**
+| **# bpftool map update id 10 key     0x20 0xc4 0xb7 0x00 value     0x0f 0xff 0xff 0xab 0x01 0x02 0x03 0x4c**
+| **# bpftool map update id 10 key       32  196  183    0 value       15  255  255  171    1    2    3   76**
 
 **# bpftool map lookup id 10 key 0 1 2 3**
 
@@ -129,4 +171,10 @@ EXAMPLES
 
 SEE ALSO
 ========
-	**bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
+	**bpf**\ (2),
+	**bpf-helpers**\ (7),
+	**bpftool**\ (8),
+	**bpftool-prog**\ (8),
+	**bpftool-cgroup**\ (8),
+	**bpftool-net**\ (8),
+	**bpftool-perf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
new file mode 100644
index 000000000000..ed87c9b619ad
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -0,0 +1,145 @@
+================
+bpftool-net
+================
+-------------------------------------------------------------------------------
+tool for inspection of netdev/tc related bpf prog attachments
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+	**bpftool** [*OPTIONS*] **net** *COMMAND*
+
+	*OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+
+	*COMMANDS* :=
+	{ **show** | **list** } [ **dev** name ] | **help**
+
+NET COMMANDS
+============
+
+|	**bpftool** **net { show | list } [ dev name ]**
+|	**bpftool** **net help**
+
+DESCRIPTION
+===========
+	**bpftool net { show | list } [ dev name ]**
+                  List bpf program attachments in the kernel networking subsystem.
+
+                  Currently, only device driver xdp attachments and tc filter
+                  classification/action attachments are implemented, i.e., for
+                  program types **BPF_PROG_TYPE_SCHED_CLS**,
+                  **BPF_PROG_TYPE_SCHED_ACT** and **BPF_PROG_TYPE_XDP**.
+                  For programs attached to a particular cgroup, e.g.,
+                  **BPF_PROG_TYPE_CGROUP_SKB**, **BPF_PROG_TYPE_CGROUP_SOCK**,
+                  **BPF_PROG_TYPE_SOCK_OPS** and **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+                  users can use **bpftool cgroup** to dump cgroup attachments.
+                  For sk_{filter, skb, msg, reuseport} and lwt/seg6
+                  bpf programs, users should consult other tools, e.g., iproute2.
+
+                  The current output will start with all xdp program attachments, followed by
+                  all tc class/qdisc bpf program attachments. Both xdp programs and
+                  tc programs are ordered based on ifindex number. If multiple bpf
+                  programs attached to the same networking device through **tc filter**,
+                  the order will be first all bpf programs attached to tc classes, then
+                  all bpf programs attached to non clsact qdiscs, and finally all
+                  bpf programs attached to root and clsact qdisc.
+
+	**bpftool net help**
+		  Print short help message.
+
+OPTIONS
+=======
+	-h, --help
+		  Print short generic help message (similar to **bpftool help**).
+
+	-v, --version
+		  Print version number (similar to **bpftool version**).
+
+	-j, --json
+		  Generate JSON output. For commands that cannot produce JSON, this
+		  option has no effect.
+
+	-p, --pretty
+		  Generate human-readable JSON output. Implies **-j**.
+
+EXAMPLES
+========
+
+| **# bpftool net**
+
+::
+
+      xdp:
+      eth0(2) driver id 198
+
+      tc:
+      eth0(2) htb name prefix_matcher.o:[cls_prefix_matcher_htb] id 111727 act []
+      eth0(2) clsact/ingress fbflow_icmp id 130246 act []
+      eth0(2) clsact/egress prefix_matcher.o:[cls_prefix_matcher_clsact] id 111726
+      eth0(2) clsact/egress cls_fg_dscp id 108619 act []
+      eth0(2) clsact/egress fbflow_egress id 130245
+
+|
+| **# bpftool -jp net**
+
+::
+
+    [{
+            "xdp": [{
+                    "devname": "eth0",
+                    "ifindex": 2,
+                    "mode": "driver",
+                    "id": 198
+                }
+            ],
+            "tc": [{
+                    "devname": "eth0",
+                    "ifindex": 2,
+                    "kind": "htb",
+                    "name": "prefix_matcher.o:[cls_prefix_matcher_htb]",
+                    "id": 111727,
+                    "act": []
+                },{
+                    "devname": "eth0",
+                    "ifindex": 2,
+                    "kind": "clsact/ingress",
+                    "name": "fbflow_icmp",
+                    "id": 130246,
+                    "act": []
+                },{
+                    "devname": "eth0",
+                    "ifindex": 2,
+                    "kind": "clsact/egress",
+                    "name": "prefix_matcher.o:[cls_prefix_matcher_clsact]",
+                    "id": 111726,
+                },{
+                    "devname": "eth0",
+                    "ifindex": 2,
+                    "kind": "clsact/egress",
+                    "name": "cls_fg_dscp",
+                    "id": 108619,
+                    "act": []
+                },{
+                    "devname": "eth0",
+                    "ifindex": 2,
+                    "kind": "clsact/egress",
+                    "name": "fbflow_egress",
+                    "id": 130245,
+                }
+            ]
+        }
+    ]
+
+
+SEE ALSO
+========
+	**bpf**\ (2),
+	**bpf-helpers**\ (7),
+	**bpftool**\ (8),
+	**bpftool-prog**\ (8),
+	**bpftool-map**\ (8),
+	**bpftool-cgroup**\ (8),
+	**bpftool-perf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
new file mode 100644
index 000000000000..f4c5e5538bb8
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -0,0 +1,87 @@
+================
+bpftool-perf
+================
+-------------------------------------------------------------------------------
+tool for inspection of perf related bpf prog attachments
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+	**bpftool** [*OPTIONS*] **perf** *COMMAND*
+
+	*OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+
+	*COMMANDS* :=
+	{ **show** | **list** | **help** }
+
+PERF COMMANDS
+=============
+
+|	**bpftool** **perf { show | list }**
+|	**bpftool** **perf help**
+
+DESCRIPTION
+===========
+	**bpftool perf { show | list }**
+		  List all raw_tracepoint, tracepoint, kprobe attachment in the system.
+
+		  Output will start with process id and file descriptor in that process,
+		  followed by bpf program id, attachment information, and attachment point.
+		  The attachment point for raw_tracepoint/tracepoint is the trace probe name.
+		  The attachment point for k[ret]probe is either symbol name and offset,
+		  or a kernel virtual address.
+		  The attachment point for u[ret]probe is the file name and the file offset.
+
+	**bpftool perf help**
+		  Print short help message.
+
+OPTIONS
+=======
+	-h, --help
+		  Print short generic help message (similar to **bpftool help**).
+
+	-v, --version
+		  Print version number (similar to **bpftool version**).
+
+	-j, --json
+		  Generate JSON output. For commands that cannot produce JSON, this
+		  option has no effect.
+
+	-p, --pretty
+		  Generate human-readable JSON output. Implies **-j**.
+
+EXAMPLES
+========
+
+| **# bpftool perf**
+
+::
+
+      pid 21711  fd 5: prog_id 5  kprobe  func __x64_sys_write  offset 0
+      pid 21765  fd 5: prog_id 7  kretprobe  func __x64_sys_nanosleep  offset 0
+      pid 21767  fd 5: prog_id 8  tracepoint  sys_enter_nanosleep
+      pid 21800  fd 5: prog_id 9  uprobe  filename /home/yhs/a.out  offset 1159
+
+|
+| **# bpftool -j perf**
+
+::
+
+    [{"pid":21711,"fd":5,"prog_id":5,"fd_type":"kprobe","func":"__x64_sys_write","offset":0}, \
+     {"pid":21765,"fd":5,"prog_id":7,"fd_type":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
+     {"pid":21767,"fd":5,"prog_id":8,"fd_type":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
+     {"pid":21800,"fd":5,"prog_id":9,"fd_type":"uprobe","filename":"/home/yhs/a.out","offset":1159}]
+
+
+SEE ALSO
+========
+	**bpf**\ (2),
+	**bpf-helpers**\ (7),
+	**bpftool**\ (8),
+	**bpftool-prog**\ (8),
+	**bpftool-map**\ (8),
+	**bpftool-cgroup**\ (8),
+	**bpftool-net**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 67ca6c69376c..ecf618807125 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -24,10 +24,23 @@ MAP COMMANDS
 |	**bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual**}]
 |	**bpftool** **prog dump jited**  *PROG* [{**file** *FILE* | **opcodes**}]
 |	**bpftool** **prog pin** *PROG* *FILE*
-|	**bpftool** **prog load** *OBJ* *FILE*
+|	**bpftool** **prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
+|       **bpftool** **prog attach** *PROG* *ATTACH_TYPE* *MAP*
+|       **bpftool** **prog detach** *PROG* *ATTACH_TYPE* *MAP*
 |	**bpftool** **prog help**
 |
+|	*MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
 |	*PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
+|	*TYPE* := {
+|		**socket** | **kprobe** | **kretprobe** | **classifier** | **action** |
+|		**tracepoint** | **raw_tracepoint** | **xdp** | **perf_event** | **cgroup/skb** |
+|		**cgroup/sock** | **cgroup/dev** | **lwt_in** | **lwt_out** | **lwt_xmit** |
+|		**lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** |
+|		**cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
+|		**cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6**
+|	}
+|       *ATTACH_TYPE* := { **msg_verdict** | **skb_verdict** | **skb_parse** }
+
 
 DESCRIPTION
 ===========
@@ -62,12 +75,35 @@ DESCRIPTION
 	**bpftool prog pin** *PROG* *FILE*
 		  Pin program *PROG* as *FILE*.
 
-		  Note: *FILE* must be located in *bpffs* mount.
+		  Note: *FILE* must be located in *bpffs* mount. It must not
+		  contain a dot character ('.'), which is reserved for future
+		  extensions of *bpffs*.
 
-	**bpftool prog load** *OBJ* *FILE*
+	**bpftool prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
 		  Load bpf program from binary *OBJ* and pin as *FILE*.
-
-		  Note: *FILE* must be located in *bpffs* mount.
+		  **type** is optional, if not specified program type will be
+		  inferred from section names.
+		  By default bpftool will create new maps as declared in the ELF
+		  object being loaded.  **map** parameter allows for the reuse
+		  of existing maps.  It can be specified multiple times, each
+		  time for a different map.  *IDX* refers to index of the map
+		  to be replaced in the ELF file counting from 0, while *NAME*
+		  allows to replace a map by name.  *MAP* specifies the map to
+		  use, referring to it by **id** or through a **pinned** file.
+		  If **dev** *NAME* is specified program will be loaded onto
+		  given networking device (offload).
+
+		  Note: *FILE* must be located in *bpffs* mount. It must not
+		  contain a dot character ('.'), which is reserved for future
+		  extensions of *bpffs*.
+
+        **bpftool prog attach** *PROG* *ATTACH_TYPE* *MAP*
+                  Attach bpf program *PROG* (with type specified by *ATTACH_TYPE*)
+                  to the map *MAP*.
+
+        **bpftool prog detach** *PROG* *ATTACH_TYPE* *MAP*
+                  Detach bpf program *PROG* (with type specified by *ATTACH_TYPE*)
+                  from the map *MAP*.
 
 	**bpftool prog help**
 		  Print short help message.
@@ -88,14 +124,15 @@ OPTIONS
 		  Generate human-readable JSON output. Implies **-j**.
 
 	-f, --bpffs
-		  Show file names of pinned programs.
+		  When showing BPF programs, show file names of pinned
+		  programs.
 
 EXAMPLES
 ========
 **# bpftool prog show**
 ::
 
-  10: xdp  name some_prog  tag 005a3d2123620c8b
+  10: xdp  name some_prog  tag 005a3d2123620c8b  gpl
 	loaded_at Sep 29/20:11  uid 0
 	xlated 528B  jited 370B  memlock 4096B  map_ids 10
 
@@ -108,6 +145,7 @@ EXAMPLES
                 "id": 10,
                 "type": "xdp",
                 "tag": "005a3d2123620c8b",
+                "gpl_compatible": true,
                 "loaded_at": "Sep 29/20:11",
                 "uid": 0,
                 "bytes_xlated": 528,
@@ -158,7 +196,21 @@ EXAMPLES
     mov    %rbx,0x0(%rbp)
     48 89 5d 00
 
+|
+| **# bpftool prog load xdp1_kern.o /sys/fs/bpf/xdp1 type xdp map name rxcnt id 7**
+| **# bpftool prog show pinned /sys/fs/bpf/xdp1**
+|   9: xdp  name xdp_prog1  tag 539ec6ce11b52f98  gpl
+|	loaded_at 2018-06-25T16:17:31-0700  uid 0
+|	xlated 488B  jited 336B  memlock 4096B  map_ids 7
+| **# rm /sys/fs/bpf/xdp1**
+|
 
 SEE ALSO
 ========
-	**bpftool**\ (8), **bpftool-map**\ (8), **bpftool-cgroup**\ (8)
+	**bpf**\ (2),
+	**bpf-helpers**\ (7),
+	**bpftool**\ (8),
+	**bpftool-map**\ (8),
+	**bpftool-cgroup**\ (8),
+	**bpftool-net**\ (8),
+	**bpftool-perf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 20689a321ffe..129b7a9c0f9b 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -16,20 +16,24 @@ SYNOPSIS
 
 	**bpftool** **version**
 
-	*OBJECT* := { **map** | **program** | **cgroup** }
+	*OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** }
 
 	*OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
 	| { **-j** | **--json** } [{ **-p** | **--pretty** }] }
 
 	*MAP-COMMANDS* :=
-	{ **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete**
-	| **pin** | **help** }
+	{ **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**
+	| **delete** | **pin** | **event_pipe** | **help** }
 
 	*PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin**
-	| **load** | **help** }
+	| **load** | **attach** | **detach** | **help** }
 
 	*CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
 
+	*PERF-COMMANDS* := { **show** | **list** | **help** }
+
+	*NET-COMMANDS* := { **show** | **list** | **help** }
+
 DESCRIPTION
 ===========
 	*bpftool* allows for inspection and simple modification of BPF objects
@@ -53,6 +57,16 @@ OPTIONS
 	-p, --pretty
 		  Generate human-readable JSON output. Implies **-j**.
 
+	-m, --mapcompat
+		  Allow loading maps with unknown map definitions.
+
+
 SEE ALSO
 ========
-	**bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
+	**bpf**\ (2),
+	**bpf-helpers**\ (7),
+	**bpftool-prog**\ (8),
+	**bpftool-map**\ (8),
+	**bpftool-cgroup**\ (8),
+	**bpftool-net**\ (8),
+	**bpftool-perf**\ (8)
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 4e69782c4a79..dac7eff4c7e5 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -23,10 +23,10 @@ endif
 
 LIBBPF = $(BPF_PATH)libbpf.a
 
-BPFTOOL_VERSION=$(shell make --no-print-directory -sC ../../.. kernelversion)
+BPFTOOL_VERSION := $(shell make --no-print-directory -sC ../../.. kernelversion)
 
 $(LIBBPF): FORCE
-	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
+	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a
 
 $(LIBBPF)-clean:
 	$(call QUIET_CLEAN, libbpf)
@@ -39,15 +39,27 @@ CC = gcc
 
 CFLAGS += -O2
 CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers
-CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/
+CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
+	-I$(srctree)/kernel/bpf/ \
+	-I$(srctree)/tools/include \
+	-I$(srctree)/tools/include/uapi \
+	-I$(srctree)/tools/lib/bpf \
+	-I$(srctree)/tools/perf
 CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
+ifneq ($(EXTRA_CFLAGS),)
+CFLAGS += $(EXTRA_CFLAGS)
+endif
+ifneq ($(EXTRA_LDFLAGS),)
+LDFLAGS += $(EXTRA_LDFLAGS)
+endif
+
 LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
 
 INSTALL ?= install
 RM ?= rm -f
 
 FEATURE_USER = .bpftool
-FEATURE_TESTS = libbfd disassembler-four-args
+FEATURE_TESTS = libbfd disassembler-four-args reallocarray
 FEATURE_DISPLAY = libbfd disassembler-four-args
 
 check_feat := 1
@@ -70,6 +82,10 @@ ifeq ($(feature-disassembler-four-args), 1)
 CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
 endif
 
+ifeq ($(feature-reallocarray), 0)
+CFLAGS += -DCOMPAT_NEED_REALLOCARRAY
+endif
+
 include $(wildcard $(OUTPUT)*.d)
 
 all: $(OUTPUT)bpftool
@@ -81,7 +97,7 @@ $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
 	$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
 
 $(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
-	$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS)
 
 $(OUTPUT)%.o: %.c
 	$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 490811b45fa7..3f78e6404589 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -1,6 +1,6 @@
 # bpftool(8) bash completion                               -*- shell-script -*-
 #
-# Copyright (C) 2017 Netronome Systems, Inc.
+# Copyright (C) 2017-2018 Netronome Systems, Inc.
 #
 # This software is dual licensed under the GNU General License
 # Version 2, June 1991 as shown in the file COPYING in the top-level
@@ -79,6 +79,14 @@ _bpftool_get_map_ids()
         command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
 }
 
+_bpftool_get_perf_map_ids()
+{
+    COMPREPLY+=( $( compgen -W "$( bpftool -jp map  2>&1 | \
+        command grep -C2 perf_event_array | \
+        command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
+}
+
+
 _bpftool_get_prog_ids()
 {
     COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \
@@ -91,6 +99,35 @@ _bpftool_get_prog_tags()
         command sed -n 's/.*"tag": "\(.*\)",$/\1/p' )" -- "$cur" ) )
 }
 
+_bpftool_get_obj_map_names()
+{
+    local obj
+
+    obj=$1
+
+    maps=$(objdump -j maps -t $obj 2>/dev/null | \
+        command awk '/g     . maps/ {print $NF}')
+
+    COMPREPLY+=( $( compgen -W "$maps" -- "$cur" ) )
+}
+
+_bpftool_get_obj_map_idxs()
+{
+    local obj
+
+    obj=$1
+
+    nmaps=$(objdump -j maps -t $obj 2>/dev/null | grep -c 'g     . maps')
+
+    COMPREPLY+=( $( compgen -W "$(seq 0 $((nmaps - 1)))" -- "$cur" ) )
+}
+
+_sysfs_get_netdevs()
+{
+    COMPREPLY+=( $( compgen -W "$( ls /sys/class/net 2>/dev/null )" -- \
+        "$cur" ) )
+}
+
 # For bpftool map update: retrieve type of the map to update.
 _bpftool_map_update_map_type()
 {
@@ -106,7 +143,7 @@ _bpftool_map_update_map_type()
     local type
     type=$(bpftool -jp map show $keyword $ref | \
         command sed -n 's/.*"type": "\(.*\)",$/\1/p')
-    printf $type
+    [[ -n $type ]] && printf $type
 }
 
 _bpftool_map_update_get_id()
@@ -145,9 +182,16 @@ _bpftool()
     local cur prev words objword
     _init_completion || return
 
+    # Deal with options
+    if [[ ${words[cword]} == -* ]]; then
+        local c='--version --json --pretty --bpffs --mapcompat'
+        COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
+        return 0
+    fi
+
     # Deal with simplest keywords
     case $prev in
-        help|key|opcodes|visual)
+        help|hex|opcodes|visual)
             return 0
             ;;
         tag)
@@ -164,20 +208,23 @@ _bpftool()
             ;;
     esac
 
-    # Search for object and command
-    local object command cmdword
-    for (( cmdword=1; cmdword < ${#words[@]}-1; cmdword++ )); do
-        [[ -n $object ]] && command=${words[cmdword]} && break
-        [[ ${words[cmdword]} != -* ]] && object=${words[cmdword]}
+    # Remove all options so completions don't have to deal with them.
+    local i
+    for (( i=1; i < ${#words[@]}; )); do
+        if [[ ${words[i]::1} == - ]]; then
+            words=( "${words[@]:0:i}" "${words[@]:i+1}" )
+            [[ $i -le $cword ]] && cword=$(( cword - 1 ))
+        else
+            i=$(( ++i ))
+        fi
     done
+    cur=${words[cword]}
+    prev=${words[cword - 1]}
+
+    local object=${words[1]} command=${words[2]}
 
-    if [[ -z $object ]]; then
+    if [[ -z $object || $cword -eq 1 ]]; then
         case $cur in
-            -*)
-                local c='--version --json --pretty'
-                COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
-                return 0
-                ;;
             *)
                 COMPREPLY=( $( compgen -W "$( bpftool help 2>&1 | \
                     command sed \
@@ -196,12 +243,14 @@ _bpftool()
     # Completion depends on object and command in use
     case $object in
         prog)
-            case $prev in
-                id)
-                    _bpftool_get_prog_ids
-                    return 0
-                    ;;
-            esac
+            if [[ $command != "load" ]]; then
+                case $prev in
+                    id)
+                        _bpftool_get_prog_ids
+                        return 0
+                        ;;
+                esac
+            fi
 
             local PROG_TYPE='id pinned tag'
             case $command in
@@ -243,13 +292,79 @@ _bpftool()
                     fi
                     return 0
                     ;;
-                load)
-                    _filedir
+                attach|detach)
+                    if [[ ${#words[@]} == 7 ]]; then
+                        COMPREPLY=( $( compgen -W "id pinned" -- "$cur" ) )
+                        return 0
+                    fi
+
+                    if [[ ${#words[@]} == 6 ]]; then
+                        COMPREPLY=( $( compgen -W "msg_verdict skb_verdict skb_parse" -- "$cur" ) )
+                        return 0
+                    fi
+
+                    if [[ $prev == "$command" ]]; then
+                        COMPREPLY=( $( compgen -W "id pinned" -- "$cur" ) )
+                        return 0
+                    fi
                     return 0
                     ;;
+                load)
+                    local obj
+
+                    if [[ ${#words[@]} -lt 6 ]]; then
+                        _filedir
+                        return 0
+                    fi
+
+                    obj=${words[3]}
+
+                    if [[ ${words[-4]} == "map" ]]; then
+                        COMPREPLY=( $( compgen -W "id pinned" -- "$cur" ) )
+                        return 0
+                    fi
+                    if [[ ${words[-3]} == "map" ]]; then
+                        if [[ ${words[-2]} == "idx" ]]; then
+                            _bpftool_get_obj_map_idxs $obj
+                        elif [[ ${words[-2]} == "name" ]]; then
+                            _bpftool_get_obj_map_names $obj
+                        fi
+                        return 0
+                    fi
+                    if [[ ${words[-2]} == "map" ]]; then
+                        COMPREPLY=( $( compgen -W "idx name" -- "$cur" ) )
+                        return 0
+                    fi
+
+                    case $prev in
+                        type)
+                            COMPREPLY=( $( compgen -W "socket kprobe kretprobe classifier action tracepoint raw_tracepoint xdp perf_event cgroup/skb cgroup/sock cgroup/dev lwt_in lwt_out lwt_xmit lwt_seg6local sockops sk_skb sk_msg lirc_mode2 cgroup/bind4 cgroup/bind6 cgroup/connect4 cgroup/connect6 cgroup/sendmsg4 cgroup/sendmsg6 cgroup/post_bind4 cgroup/post_bind6" -- \
+                                                   "$cur" ) )
+                            return 0
+                            ;;
+                        id)
+                            _bpftool_get_map_ids
+                            return 0
+                            ;;
+                        pinned)
+                            _filedir
+                            return 0
+                            ;;
+                        dev)
+                            _sysfs_get_netdevs
+                            return 0
+                            ;;
+                        *)
+                            COMPREPLY=( $( compgen -W "map" -- "$cur" ) )
+                            _bpftool_once_attr 'type'
+                            _bpftool_once_attr 'dev'
+                            return 0
+                            ;;
+                    esac
+                    ;;
                 *)
                     [[ $prev == $object ]] && \
-                        COMPREPLY=( $( compgen -W 'dump help pin load \
+                        COMPREPLY=( $( compgen -W 'dump help pin attach detach load \
                             show list' -- "$cur" ) )
                     ;;
             esac
@@ -272,6 +387,42 @@ _bpftool()
                             ;;
                     esac
                     ;;
+                create)
+                    case $prev in
+                        $command)
+                            _filedir
+                            return 0
+                            ;;
+                        type)
+                            COMPREPLY=( $( compgen -W 'hash array prog_array \
+                                perf_event_array percpu_hash percpu_array \
+                                stack_trace cgroup_array lru_hash \
+                                lru_percpu_hash lpm_trie array_of_maps \
+                                hash_of_maps devmap sockmap cpumap xskmap \
+                                sockhash cgroup_storage reuseport_sockarray \
+                                percpu_cgroup_storage' -- \
+                                                   "$cur" ) )
+                            return 0
+                            ;;
+                        key|value|flags|name|entries)
+                            return 0
+                            ;;
+                        dev)
+                            _sysfs_get_netdevs
+                            return 0
+                            ;;
+                        *)
+                            _bpftool_once_attr 'type'
+                            _bpftool_once_attr 'key'
+                            _bpftool_once_attr 'value'
+                            _bpftool_once_attr 'entries'
+                            _bpftool_once_attr 'name'
+                            _bpftool_once_attr 'flags'
+                            _bpftool_once_attr 'dev'
+                            return 0
+                            ;;
+                    esac
+                    ;;
                 lookup|getnext|delete)
                     case $prev in
                         $command)
@@ -283,7 +434,7 @@ _bpftool()
                             return 0
                             ;;
                         key)
-                            return 0
+                            COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) )
                             ;;
                         *)
                             _bpftool_once_attr 'key'
@@ -302,7 +453,7 @@ _bpftool()
                             return 0
                             ;;
                         key)
-                            return 0
+                            COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) )
                             ;;
                         value)
                             # We can have bytes, or references to a prog or a
@@ -321,6 +472,8 @@ _bpftool()
                                     return 0
                                     ;;
                                 *)
+                                    COMPREPLY+=( $( compgen -W 'hex' \
+                                        -- "$cur" ) )
                                     return 0
                                     ;;
                             esac
@@ -357,10 +510,34 @@ _bpftool()
                     fi
                     return 0
                     ;;
+                event_pipe)
+                    case $prev in
+                        $command)
+                            COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
+                            return 0
+                            ;;
+                        id)
+                            _bpftool_get_perf_map_ids
+                            return 0
+                            ;;
+                        cpu)
+                            return 0
+                            ;;
+                        index)
+                            return 0
+                            ;;
+                        *)
+                            _bpftool_once_attr 'cpu'
+                            _bpftool_once_attr 'index'
+                            return 0
+                            ;;
+                    esac
+                    ;;
                 *)
                     [[ $prev == $object ]] && \
                         COMPREPLY=( $( compgen -W 'delete dump getnext help \
-                            lookup pin show list update' -- "$cur" ) )
+                            lookup pin event_pipe show list update create' -- \
+                            "$cur" ) )
                     ;;
             esac
             ;;
@@ -370,9 +547,14 @@ _bpftool()
                     _filedir
                     return 0
                     ;;
+                tree)
+                    _filedir
+                    return 0
+                    ;;
                 attach|detach)
                     local ATTACH_TYPES='ingress egress sock_create sock_ops \
-                        device'
+                        device bind4 bind6 post_bind4 post_bind6 connect4 \
+                        connect6 sendmsg4 sendmsg6'
                     local ATTACH_FLAGS='multi override'
                     local PROG_TYPE='id pinned tag'
                     case $prev in
@@ -380,7 +562,9 @@ _bpftool()
                             _filedir
                             return 0
                             ;;
-                        ingress|egress|sock_create|sock_ops|device)
+                        ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
+                        post_bind4|post_bind6|connect4|connect6|sendmsg4|\
+                        sendmsg6)
                             COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
                                 "$cur" ) )
                             return 0
@@ -408,6 +592,24 @@ _bpftool()
                 *)
                     [[ $prev == $object ]] && \
                         COMPREPLY=( $( compgen -W 'help attach detach \
+                            show list tree' -- "$cur" ) )
+                    ;;
+            esac
+            ;;
+        perf)
+            case $command in
+                *)
+                    [[ $prev == $object ]] && \
+                        COMPREPLY=( $( compgen -W 'help \
+                            show list' -- "$cur" ) )
+                    ;;
+            esac
+            ;;
+        net)
+            case $command in
+                *)
+                    [[ $prev == $object ]] && \
+                        COMPREPLY=( $( compgen -W 'help \
                             show list' -- "$cur" ) )
                     ;;
             esac
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
new file mode 100644
index 000000000000..e4e6e2b3fd84
--- /dev/null
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook */
+
+#include <ctype.h>
+#include <stdio.h> /* for (FILE *) used by json_writer */
+#include <string.h>
+#include <asm/byteorder.h>
+#include <linux/bitops.h>
+#include <linux/btf.h>
+#include <linux/err.h>
+
+#include "btf.h"
+#include "json_writer.h"
+#include "main.h"
+
+#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1)
+#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK)
+#define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3)
+#define BITS_ROUNDUP_BYTES(bits) \
+	(BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
+
+static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
+			      __u8 bit_offset, const void *data);
+
+static void btf_dumper_ptr(const void *data, json_writer_t *jw,
+			   bool is_plain_text)
+{
+	if (is_plain_text)
+		jsonw_printf(jw, "%p", *(unsigned long *)data);
+	else
+		jsonw_printf(jw, "%u", *(unsigned long *)data);
+}
+
+static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id,
+			       __u8 bit_offset, const void *data)
+{
+	int actual_type_id;
+
+	actual_type_id = btf__resolve_type(d->btf, type_id);
+	if (actual_type_id < 0)
+		return actual_type_id;
+
+	return btf_dumper_do_type(d, actual_type_id, bit_offset, data);
+}
+
+static void btf_dumper_enum(const void *data, json_writer_t *jw)
+{
+	jsonw_printf(jw, "%d", *(int *)data);
+}
+
+static int btf_dumper_array(const struct btf_dumper *d, __u32 type_id,
+			    const void *data)
+{
+	const struct btf_type *t = btf__type_by_id(d->btf, type_id);
+	struct btf_array *arr = (struct btf_array *)(t + 1);
+	long long elem_size;
+	int ret = 0;
+	__u32 i;
+
+	elem_size = btf__resolve_size(d->btf, arr->type);
+	if (elem_size < 0)
+		return elem_size;
+
+	jsonw_start_array(d->jw);
+	for (i = 0; i < arr->nelems; i++) {
+		ret = btf_dumper_do_type(d, arr->type, 0,
+					 data + i * elem_size);
+		if (ret)
+			break;
+	}
+
+	jsonw_end_array(d->jw);
+	return ret;
+}
+
+static void btf_dumper_int_bits(__u32 int_type, __u8 bit_offset,
+				const void *data, json_writer_t *jw,
+				bool is_plain_text)
+{
+	int left_shift_bits, right_shift_bits;
+	int nr_bits = BTF_INT_BITS(int_type);
+	int total_bits_offset;
+	int bytes_to_copy;
+	int bits_to_copy;
+	__u64 print_num;
+
+	total_bits_offset = bit_offset + BTF_INT_OFFSET(int_type);
+	data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
+	bit_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
+	bits_to_copy = bit_offset + nr_bits;
+	bytes_to_copy = BITS_ROUNDUP_BYTES(bits_to_copy);
+
+	print_num = 0;
+	memcpy(&print_num, data, bytes_to_copy);
+#if defined(__BIG_ENDIAN_BITFIELD)
+	left_shift_bits = bit_offset;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+	left_shift_bits = 64 - bits_to_copy;
+#else
+#error neither big nor little endian
+#endif
+	right_shift_bits = 64 - nr_bits;
+
+	print_num <<= left_shift_bits;
+	print_num >>= right_shift_bits;
+	if (is_plain_text)
+		jsonw_printf(jw, "0x%llx", print_num);
+	else
+		jsonw_printf(jw, "%llu", print_num);
+}
+
+static int btf_dumper_int(const struct btf_type *t, __u8 bit_offset,
+			  const void *data, json_writer_t *jw,
+			  bool is_plain_text)
+{
+	__u32 *int_type;
+	__u32 nr_bits;
+
+	int_type = (__u32 *)(t + 1);
+	nr_bits = BTF_INT_BITS(*int_type);
+	/* if this is bit field */
+	if (bit_offset || BTF_INT_OFFSET(*int_type) ||
+	    BITS_PER_BYTE_MASKED(nr_bits)) {
+		btf_dumper_int_bits(*int_type, bit_offset, data, jw,
+				    is_plain_text);
+		return 0;
+	}
+
+	switch (BTF_INT_ENCODING(*int_type)) {
+	case 0:
+		if (BTF_INT_BITS(*int_type) == 64)
+			jsonw_printf(jw, "%lu", *(__u64 *)data);
+		else if (BTF_INT_BITS(*int_type) == 32)
+			jsonw_printf(jw, "%u", *(__u32 *)data);
+		else if (BTF_INT_BITS(*int_type) == 16)
+			jsonw_printf(jw, "%hu", *(__u16 *)data);
+		else if (BTF_INT_BITS(*int_type) == 8)
+			jsonw_printf(jw, "%hhu", *(__u8 *)data);
+		else
+			btf_dumper_int_bits(*int_type, bit_offset, data, jw,
+					    is_plain_text);
+		break;
+	case BTF_INT_SIGNED:
+		if (BTF_INT_BITS(*int_type) == 64)
+			jsonw_printf(jw, "%ld", *(long long *)data);
+		else if (BTF_INT_BITS(*int_type) == 32)
+			jsonw_printf(jw, "%d", *(int *)data);
+		else if (BTF_INT_BITS(*int_type) == 16)
+			jsonw_printf(jw, "%hd", *(short *)data);
+		else if (BTF_INT_BITS(*int_type) == 8)
+			jsonw_printf(jw, "%hhd", *(char *)data);
+		else
+			btf_dumper_int_bits(*int_type, bit_offset, data, jw,
+					    is_plain_text);
+		break;
+	case BTF_INT_CHAR:
+		if (isprint(*(char *)data))
+			jsonw_printf(jw, "\"%c\"", *(char *)data);
+		else
+			if (is_plain_text)
+				jsonw_printf(jw, "0x%hhx", *(char *)data);
+			else
+				jsonw_printf(jw, "\"\\u00%02hhx\"",
+					     *(char *)data);
+		break;
+	case BTF_INT_BOOL:
+		jsonw_bool(jw, *(int *)data);
+		break;
+	default:
+		/* shouldn't happen */
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int btf_dumper_struct(const struct btf_dumper *d, __u32 type_id,
+			     const void *data)
+{
+	const struct btf_type *t;
+	struct btf_member *m;
+	const void *data_off;
+	int ret = 0;
+	int i, vlen;
+
+	t = btf__type_by_id(d->btf, type_id);
+	if (!t)
+		return -EINVAL;
+
+	vlen = BTF_INFO_VLEN(t->info);
+	jsonw_start_object(d->jw);
+	m = (struct btf_member *)(t + 1);
+
+	for (i = 0; i < vlen; i++) {
+		data_off = data + BITS_ROUNDDOWN_BYTES(m[i].offset);
+		jsonw_name(d->jw, btf__name_by_offset(d->btf, m[i].name_off));
+		ret = btf_dumper_do_type(d, m[i].type,
+					 BITS_PER_BYTE_MASKED(m[i].offset),
+					 data_off);
+		if (ret)
+			break;
+	}
+
+	jsonw_end_object(d->jw);
+
+	return ret;
+}
+
+static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
+			      __u8 bit_offset, const void *data)
+{
+	const struct btf_type *t = btf__type_by_id(d->btf, type_id);
+
+	switch (BTF_INFO_KIND(t->info)) {
+	case BTF_KIND_INT:
+		return btf_dumper_int(t, bit_offset, data, d->jw,
+				     d->is_plain_text);
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+		return btf_dumper_struct(d, type_id, data);
+	case BTF_KIND_ARRAY:
+		return btf_dumper_array(d, type_id, data);
+	case BTF_KIND_ENUM:
+		btf_dumper_enum(data, d->jw);
+		return 0;
+	case BTF_KIND_PTR:
+		btf_dumper_ptr(data, d->jw, d->is_plain_text);
+		return 0;
+	case BTF_KIND_UNKN:
+		jsonw_printf(d->jw, "(unknown)");
+		return 0;
+	case BTF_KIND_FWD:
+		/* map key or value can't be forward */
+		jsonw_printf(d->jw, "(fwd-kind-invalid)");
+		return -EINVAL;
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_CONST:
+	case BTF_KIND_RESTRICT:
+		return btf_dumper_modifier(d, type_id, bit_offset, data);
+	default:
+		jsonw_printf(d->jw, "(unsupported-kind");
+		return -EINVAL;
+	}
+}
+
+int btf_dumper_type(const struct btf_dumper *d, __u32 type_id,
+		    const void *data)
+{
+	return btf_dumper_do_type(d, type_id, 0, data);
+}
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index cae32a61cb18..ee7a9765c6b3 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -2,7 +2,12 @@
 // Copyright (C) 2017 Facebook
 // Author: Roman Gushchin <guro@fb.com>
 
+#define _XOPEN_SOURCE 500
+#include <errno.h>
 #include <fcntl.h>
+#include <ftw.h>
+#include <mntent.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/stat.h>
@@ -16,8 +21,11 @@
 #define HELP_SPEC_ATTACH_FLAGS						\
 	"ATTACH_FLAGS := { multi | override }"
 
-#define HELP_SPEC_ATTACH_TYPES						\
-	"ATTACH_TYPE := { ingress | egress | sock_create | sock_ops | device }"
+#define HELP_SPEC_ATTACH_TYPES						       \
+	"       ATTACH_TYPE := { ingress | egress | sock_create |\n"	       \
+	"                        sock_ops | device | bind4 | bind6 |\n"	       \
+	"                        post_bind4 | post_bind6 | connect4 |\n"       \
+	"                        connect6 | sendmsg4 | sendmsg6 }"
 
 static const char * const attach_type_strings[] = {
 	[BPF_CGROUP_INET_INGRESS] = "ingress",
@@ -25,6 +33,14 @@ static const char * const attach_type_strings[] = {
 	[BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
 	[BPF_CGROUP_SOCK_OPS] = "sock_ops",
 	[BPF_CGROUP_DEVICE] = "device",
+	[BPF_CGROUP_INET4_BIND] = "bind4",
+	[BPF_CGROUP_INET6_BIND] = "bind6",
+	[BPF_CGROUP_INET4_CONNECT] = "connect4",
+	[BPF_CGROUP_INET6_CONNECT] = "connect6",
+	[BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
+	[BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
+	[BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
+	[BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
 	[__MAX_BPF_ATTACH_TYPE] = NULL,
 };
 
@@ -42,7 +58,8 @@ static enum bpf_attach_type parse_attach_type(const char *str)
 }
 
 static int show_bpf_prog(int id, const char *attach_type_str,
-			 const char *attach_flags_str)
+			 const char *attach_flags_str,
+			 int level)
 {
 	struct bpf_prog_info info = {};
 	__u32 info_len = sizeof(info);
@@ -67,7 +84,8 @@ static int show_bpf_prog(int id, const char *attach_type_str,
 		jsonw_string_field(json_wtr, "name", info.name);
 		jsonw_end_object(json_wtr);
 	} else {
-		printf("%-8u %-15s %-15s %-15s\n", info.id,
+		printf("%s%-8u %-15s %-15s %-15s\n", level ? "    " : "",
+		       info.id,
 		       attach_type_str,
 		       attach_flags_str,
 		       info.name);
@@ -77,7 +95,20 @@ static int show_bpf_prog(int id, const char *attach_type_str,
 	return 0;
 }
 
-static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
+static int count_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
+{
+	__u32 prog_cnt = 0;
+	int ret;
+
+	ret = bpf_prog_query(cgroup_fd, type, 0, NULL, NULL, &prog_cnt);
+	if (ret)
+		return -1;
+
+	return prog_cnt;
+}
+
+static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
+				   int level)
 {
 	__u32 prog_ids[1024] = {0};
 	char *attach_flags_str;
@@ -112,7 +143,7 @@ static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
 
 	for (iter = 0; iter < prog_cnt; iter++)
 		show_bpf_prog(prog_ids[iter], attach_type_strings[type],
-			      attach_flags_str);
+			      attach_flags_str, level);
 
 	return 0;
 }
@@ -150,7 +181,7 @@ static int do_show(int argc, char **argv)
 		 * If we were able to get the show for at least one
 		 * attach type, let's return 0.
 		 */
-		if (show_attached_bpf_progs(cgroup_fd, type) == 0)
+		if (show_attached_bpf_progs(cgroup_fd, type, 0) == 0)
 			ret = 0;
 	}
 
@@ -162,6 +193,143 @@ exit:
 	return ret;
 }
 
+/*
+ * To distinguish nftw() errors and do_show_tree_fn() errors
+ * and avoid duplicating error messages, let's return -2
+ * from do_show_tree_fn() in case of error.
+ */
+#define NFTW_ERR		-1
+#define SHOW_TREE_FN_ERR	-2
+static int do_show_tree_fn(const char *fpath, const struct stat *sb,
+			   int typeflag, struct FTW *ftw)
+{
+	enum bpf_attach_type type;
+	bool skip = true;
+	int cgroup_fd;
+
+	if (typeflag != FTW_D)
+		return 0;
+
+	cgroup_fd = open(fpath, O_RDONLY);
+	if (cgroup_fd < 0) {
+		p_err("can't open cgroup %s: %s", fpath, strerror(errno));
+		return SHOW_TREE_FN_ERR;
+	}
+
+	for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
+		int count = count_attached_bpf_progs(cgroup_fd, type);
+
+		if (count < 0 && errno != EINVAL) {
+			p_err("can't query bpf programs attached to %s: %s",
+			      fpath, strerror(errno));
+			close(cgroup_fd);
+			return SHOW_TREE_FN_ERR;
+		}
+		if (count > 0) {
+			skip = false;
+			break;
+		}
+	}
+
+	if (skip) {
+		close(cgroup_fd);
+		return 0;
+	}
+
+	if (json_output) {
+		jsonw_start_object(json_wtr);
+		jsonw_string_field(json_wtr, "cgroup", fpath);
+		jsonw_name(json_wtr, "programs");
+		jsonw_start_array(json_wtr);
+	} else {
+		printf("%s\n", fpath);
+	}
+
+	for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++)
+		show_attached_bpf_progs(cgroup_fd, type, ftw->level);
+
+	if (json_output) {
+		jsonw_end_array(json_wtr);
+		jsonw_end_object(json_wtr);
+	}
+
+	close(cgroup_fd);
+
+	return 0;
+}
+
+static char *find_cgroup_root(void)
+{
+	struct mntent *mnt;
+	FILE *f;
+
+	f = fopen("/proc/mounts", "r");
+	if (f == NULL)
+		return NULL;
+
+	while ((mnt = getmntent(f))) {
+		if (strcmp(mnt->mnt_type, "cgroup2") == 0) {
+			fclose(f);
+			return strdup(mnt->mnt_dir);
+		}
+	}
+
+	fclose(f);
+	return NULL;
+}
+
+static int do_show_tree(int argc, char **argv)
+{
+	char *cgroup_root;
+	int ret;
+
+	switch (argc) {
+	case 0:
+		cgroup_root = find_cgroup_root();
+		if (!cgroup_root) {
+			p_err("cgroup v2 isn't mounted");
+			return -1;
+		}
+		break;
+	case 1:
+		cgroup_root = argv[0];
+		break;
+	default:
+		p_err("too many parameters for cgroup tree");
+		return -1;
+	}
+
+
+	if (json_output)
+		jsonw_start_array(json_wtr);
+	else
+		printf("%s\n"
+		       "%-8s %-15s %-15s %-15s\n",
+		       "CgroupPath",
+		       "ID", "AttachType", "AttachFlags", "Name");
+
+	switch (nftw(cgroup_root, do_show_tree_fn, 1024, FTW_MOUNT)) {
+	case NFTW_ERR:
+		p_err("can't iterate over %s: %s", cgroup_root,
+		      strerror(errno));
+		ret = -1;
+		break;
+	case SHOW_TREE_FN_ERR:
+		ret = -1;
+		break;
+	default:
+		ret = 0;
+	}
+
+	if (json_output)
+		jsonw_end_array(json_wtr);
+
+	if (argc == 0)
+		free(cgroup_root);
+
+	return ret;
+}
+
 static int do_attach(int argc, char **argv)
 {
 	enum bpf_attach_type attach_type;
@@ -278,15 +446,17 @@ static int do_help(int argc, char **argv)
 
 	fprintf(stderr,
 		"Usage: %s %s { show | list } CGROUP\n"
+		"       %s %s tree [CGROUP_ROOT]\n"
 		"       %s %s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n"
 		"       %s %s detach CGROUP ATTACH_TYPE PROG\n"
 		"       %s %s help\n"
 		"\n"
-		"       " HELP_SPEC_ATTACH_TYPES "\n"
+		HELP_SPEC_ATTACH_TYPES "\n"
 		"       " HELP_SPEC_ATTACH_FLAGS "\n"
 		"       " HELP_SPEC_PROGRAM "\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
+		bin_name, argv[-2],
 		bin_name, argv[-2], bin_name, argv[-2],
 		bin_name, argv[-2], bin_name, argv[-2]);
 
@@ -296,6 +466,7 @@ static int do_help(int argc, char **argv)
 static const struct cmd cmds[] = {
 	{ "show",	do_show },
 	{ "list",	do_show },
+	{ "tree",       do_show_tree },
 	{ "attach",	do_attach },
 	{ "detach",	do_detach },
 	{ "help",	do_help },
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 465995281dcd..70fd48d79f61 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -31,8 +31,7 @@
  * SOFTWARE.
  */
 
-/* Author: Jakub Kicinski <kubakici@wp.pl> */
-
+#include <ctype.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <fts.h>
@@ -131,16 +130,17 @@ static int mnt_bpffs(const char *target, char *buff, size_t bufflen)
 	return 0;
 }
 
-int open_obj_pinned(char *path)
+int open_obj_pinned(char *path, bool quiet)
 {
 	int fd;
 
 	fd = bpf_obj_get(path);
 	if (fd < 0) {
-		p_err("bpf obj get (%s): %s", path,
-		      errno == EACCES && !is_bpffs(dirname(path)) ?
-		    "directory not in bpf file system (bpffs)" :
-		    strerror(errno));
+		if (!quiet)
+			p_err("bpf obj get (%s): %s", path,
+			      errno == EACCES && !is_bpffs(dirname(path)) ?
+			    "directory not in bpf file system (bpffs)" :
+			    strerror(errno));
 		return -1;
 	}
 
@@ -152,7 +152,7 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type)
 	enum bpf_obj_type type;
 	int fd;
 
-	fd = open_obj_pinned(path);
+	fd = open_obj_pinned(path, false);
 	if (fd < 0)
 		return -1;
 
@@ -216,6 +216,14 @@ int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32))
 	int err;
 	int fd;
 
+	if (argc < 3) {
+		p_err("too few arguments, id ID and FILE path is required");
+		return -1;
+	} else if (argc > 3) {
+		p_err("too many arguments");
+		return -1;
+	}
+
 	if (!is_prefix(*argv, "id")) {
 		p_err("expected 'id' got %s", *argv);
 		return -1;
@@ -229,9 +237,6 @@ int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32))
 	}
 	NEXT_ARG();
 
-	if (argc != 1)
-		usage();
-
 	fd = get_fd_by_id(id);
 	if (fd < 0) {
 		p_err("can't get prog by id (%u): %s", id, strerror(errno));
@@ -300,7 +305,7 @@ char *get_fdinfo(int fd, const char *key)
 		return NULL;
 	}
 
-	while ((n = getline(&line, &line_n, fdi))) {
+	while ((n = getline(&line, &line_n, fdi)) > 0) {
 		char *value;
 		int len;
 
@@ -330,6 +335,16 @@ char *get_fdinfo(int fd, const char *key)
 	return NULL;
 }
 
+void print_data_json(uint8_t *data, size_t len)
+{
+	unsigned int i;
+
+	jsonw_start_array(json_wtr);
+	for (i = 0; i < len; i++)
+		jsonw_printf(json_wtr, "%d", data[i]);
+	jsonw_end_array(json_wtr);
+}
+
 void print_hex_data_json(uint8_t *data, size_t len)
 {
 	unsigned int i;
@@ -370,7 +385,7 @@ int build_pinned_obj_table(struct pinned_obj_table *tab,
 		while ((ftse = fts_read(fts))) {
 			if (!(ftse->fts_info & FTS_F))
 				continue;
-			fd = open_obj_pinned(ftse->fts_path);
+			fd = open_obj_pinned(ftse->fts_path, true);
 			if (fd < 0)
 				continue;
 
@@ -420,6 +435,70 @@ void delete_pinned_obj_table(struct pinned_obj_table *tab)
 	}
 }
 
+unsigned int get_page_size(void)
+{
+	static int result;
+
+	if (!result)
+		result = getpagesize();
+	return result;
+}
+
+unsigned int get_possible_cpus(void)
+{
+	static unsigned int result;
+	char buf[128];
+	long int n;
+	char *ptr;
+	int fd;
+
+	if (result)
+		return result;
+
+	fd = open("/sys/devices/system/cpu/possible", O_RDONLY);
+	if (fd < 0) {
+		p_err("can't open sysfs possible cpus");
+		exit(-1);
+	}
+
+	n = read(fd, buf, sizeof(buf));
+	if (n < 2) {
+		p_err("can't read sysfs possible cpus");
+		exit(-1);
+	}
+	close(fd);
+
+	if (n == sizeof(buf)) {
+		p_err("read sysfs possible cpus overflow");
+		exit(-1);
+	}
+
+	ptr = buf;
+	n = 0;
+	while (*ptr && *ptr != '\n') {
+		unsigned int a, b;
+
+		if (sscanf(ptr, "%u-%u", &a, &b) == 2) {
+			n += b - a + 1;
+
+			ptr = strchr(ptr, '-') + 1;
+		} else if (sscanf(ptr, "%u", &a) == 1) {
+			n++;
+		} else {
+			assert(0);
+		}
+
+		while (isdigit(*ptr))
+			ptr++;
+		if (*ptr == ',')
+			ptr++;
+	}
+
+	result = n;
+
+	return result;
+}
+
 static char *
 ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf)
 {
@@ -476,7 +555,9 @@ static int read_sysfs_netdev_hex_int(char *devname, const char *entry_name)
 	return read_sysfs_hex_int(full_path);
 }
 
-const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino)
+const char *
+ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino,
+		      const char **opt)
 {
 	char devname[IF_NAMESIZE];
 	int vendor_id;
@@ -501,6 +582,7 @@ const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino)
 		    device_id != 0x6000 &&
 		    device_id != 0x6003)
 			p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch");
+		*opt = "ctx4";
 		return "NFP-6xxx";
 	default:
 		p_err("Can't get bfd arch name for device vendor id 0x%04x",
@@ -540,3 +622,24 @@ void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode)
 		jsonw_string_field(json_wtr, "ifname", name);
 	jsonw_end_object(json_wtr);
 }
+
+int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what)
+{
+	char *endptr;
+
+	NEXT_ARGP();
+
+	if (*val) {
+		p_err("%s already specified", what);
+		return -1;
+	}
+
+	*val = strtoul(**argv, &endptr, 0);
+	if (*endptr) {
+		p_err("can't parse %s as %s", **argv, what);
+		return -1;
+	}
+	NEXT_ARGP();
+
+	return 0;
+}
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index 87439320ef70..c75ffd9ce2bb 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -77,7 +77,7 @@ static int fprintf_json(void *out, const char *fmt, ...)
 }
 
 void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
-		       const char *arch)
+		       const char *arch, const char *disassembler_options)
 {
 	disassembler_ftype disassemble;
 	struct disassemble_info info;
@@ -116,6 +116,8 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
 
 	info.arch = bfd_get_arch(bfdf);
 	info.mach = bfd_get_mach(bfdf);
+	if (disassembler_options)
+		info.disassembler_options = disassembler_options;
 	info.buffer = image;
 	info.buffer_length = len;
 
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 1ec852d21d44..75a3296dc0bc 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -31,8 +31,6 @@
  * SOFTWARE.
  */
 
-/* Author: Jakub Kicinski <kubakici@wp.pl> */
-
 #include <bfd.h>
 #include <ctype.h>
 #include <errno.h>
@@ -57,6 +55,7 @@ json_writer_t *json_wtr;
 bool pretty_output;
 bool json_output;
 bool show_pinned;
+int bpf_flags;
 struct pinned_obj_table prog_table;
 struct pinned_obj_table map_table;
 
@@ -87,7 +86,7 @@ static int do_help(int argc, char **argv)
 		"       %s batch file FILE\n"
 		"       %s version\n"
 		"\n"
-		"       OBJECT := { prog | map | cgroup }\n"
+		"       OBJECT := { prog | map | cgroup | perf | net }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, bin_name, bin_name);
@@ -216,6 +215,8 @@ static const struct cmd cmds[] = {
 	{ "prog",	do_prog },
 	{ "map",	do_map },
 	{ "cgroup",	do_cgroup },
+	{ "perf",	do_perf },
+	{ "net",	do_net },
 	{ "version",	do_version },
 	{ 0 }
 };
@@ -320,7 +321,8 @@ static int do_batch(int argc, char **argv)
 		p_err("reading batch file failed: %s", strerror(errno));
 		err = -1;
 	} else {
-		p_info("processed %d commands", lines);
+		if (!json_output)
+			printf("processed %d commands\n", lines);
 		err = 0;
 	}
 err_close:
@@ -341,6 +343,7 @@ int main(int argc, char **argv)
 		{ "pretty",	no_argument,	NULL,	'p' },
 		{ "version",	no_argument,	NULL,	'V' },
 		{ "bpffs",	no_argument,	NULL,	'f' },
+		{ "mapcompat",	no_argument,	NULL,	'm' },
 		{ 0 }
 	};
 	int opt, ret;
@@ -355,7 +358,7 @@ int main(int argc, char **argv)
 	hash_init(map_table.table);
 
 	opterr = 0;
-	while ((opt = getopt_long(argc, argv, "Vhpjf",
+	while ((opt = getopt_long(argc, argv, "Vhpjfm",
 				  options, NULL)) >= 0) {
 		switch (opt) {
 		case 'V':
@@ -379,6 +382,9 @@ int main(int argc, char **argv)
 		case 'f':
 			show_pinned = true;
 			break;
+		case 'm':
+			bpf_flags = MAPS_RELAX_COMPAT;
+			break;
 		default:
 			p_err("unrecognized option '%s'", argv[optind - 1]);
 			if (json_output)
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index b8e9584d6246..a8bf1e2d9818 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -31,8 +31,6 @@
  * SOFTWARE.
  */
 
-/* Author: Jakub Kicinski <kubakici@wp.pl> */
-
 #ifndef __BPF_TOOL_H
 #define __BPF_TOOL_H
 
@@ -44,6 +42,7 @@
 #include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/hashtable.h>
+#include <tools/libc_compat.h>
 
 #include "json_writer.h"
 
@@ -52,6 +51,21 @@
 #define NEXT_ARG()	({ argc--; argv++; if (argc < 0) usage(); })
 #define NEXT_ARGP()	({ (*argc)--; (*argv)++; if (*argc < 0) usage(); })
 #define BAD_ARG()	({ p_err("what is '%s'?", *argv); -1; })
+#define GET_ARG()	({ argc--; *argv++; })
+#define REQ_ARGS(cnt)							\
+	({								\
+		int _cnt = (cnt);					\
+		bool _res;						\
+									\
+		if (argc < _cnt) {					\
+			p_err("'%s' needs at least %d arguments, %d found", \
+			      argv[-1], _cnt, argc);			\
+			_res = false;					\
+		} else {						\
+			_res = true;					\
+		}							\
+		_res;							\
+	})
 
 #define ERR_MAX_LEN	1024
 
@@ -60,7 +74,9 @@
 #define HELP_SPEC_PROGRAM						\
 	"PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }"
 #define HELP_SPEC_OPTIONS						\
-	"OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} }"
+	"OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} | {-m|--mapcompat}"
+#define HELP_SPEC_MAP							\
+	"MAP := { id MAP_ID | pinned FILE }"
 
 enum bpf_obj_type {
 	BPF_OBJ_UNKNOWN,
@@ -73,6 +89,7 @@ extern const char *bin_name;
 extern json_writer_t *json_wtr;
 extern bool json_output;
 extern bool show_pinned;
+extern int bpf_flags;
 extern struct pinned_obj_table prog_table;
 extern struct pinned_obj_table map_table;
 
@@ -110,21 +127,54 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv,
 int get_fd_type(int fd);
 const char *get_fd_type_name(enum bpf_obj_type type);
 char *get_fdinfo(int fd, const char *key);
-int open_obj_pinned(char *path);
+int open_obj_pinned(char *path, bool quiet);
 int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
 int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32));
 int do_pin_fd(int fd, const char *name);
 
 int do_prog(int argc, char **arg);
 int do_map(int argc, char **arg);
+int do_event_pipe(int argc, char **argv);
 int do_cgroup(int argc, char **arg);
+int do_perf(int argc, char **arg);
+int do_net(int argc, char **arg);
 
+int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
 int prog_parse_fd(int *argc, char ***argv);
+int map_parse_fd(int *argc, char ***argv);
+int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
 
 void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
-		       const char *arch);
+		       const char *arch, const char *disassembler_options);
+void print_data_json(uint8_t *data, size_t len);
 void print_hex_data_json(uint8_t *data, size_t len);
 
-const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino);
+unsigned int get_page_size(void);
+unsigned int get_possible_cpus(void);
+const char *
+ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino,
+		      const char **opt);
+
+struct btf_dumper {
+	const struct btf *btf;
+	json_writer_t *jw;
+	bool is_plain_text;
+};
 
+/* btf_dumper_type - print data along with type information
+ * @d: an instance containing context for dumping types
+ * @type_id: index in btf->types array. this points to the type to be dumped
+ * @data: pointer the actual data, i.e. the values to be printed
+ *
+ * Returns zero on success and negative error code otherwise
+ */
+int btf_dumper_type(const struct btf_dumper *d, __u32 type_id,
+		    const void *data);
+
+struct nlattr;
+struct ifinfomsg;
+struct tcmsg;
+int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb);
+int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind,
+		   const char *devname, int ifindex);
 #endif
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index f509c86faede..7bf38f0e152e 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -31,12 +31,12 @@
  * SOFTWARE.
  */
 
-/* Author: Jakub Kicinski <kubakici@wp.pl> */
-
 #include <assert.h>
-#include <ctype.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <net/if.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -47,6 +47,8 @@
 
 #include <bpf.h>
 
+#include "btf.h"
+#include "json_writer.h"
 #include "main.h"
 
 static const char * const map_type_name[] = {
@@ -67,68 +69,19 @@ static const char * const map_type_name[] = {
 	[BPF_MAP_TYPE_DEVMAP]		= "devmap",
 	[BPF_MAP_TYPE_SOCKMAP]		= "sockmap",
 	[BPF_MAP_TYPE_CPUMAP]		= "cpumap",
+	[BPF_MAP_TYPE_XSKMAP]           = "xskmap",
+	[BPF_MAP_TYPE_SOCKHASH]		= "sockhash",
+	[BPF_MAP_TYPE_CGROUP_STORAGE]	= "cgroup_storage",
+	[BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray",
+	[BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE]	= "percpu_cgroup_storage",
 };
 
-static unsigned int get_possible_cpus(void)
-{
-	static unsigned int result;
-	char buf[128];
-	long int n;
-	char *ptr;
-	int fd;
-
-	if (result)
-		return result;
-
-	fd = open("/sys/devices/system/cpu/possible", O_RDONLY);
-	if (fd < 0) {
-		p_err("can't open sysfs possible cpus");
-		exit(-1);
-	}
-
-	n = read(fd, buf, sizeof(buf));
-	if (n < 2) {
-		p_err("can't read sysfs possible cpus");
-		exit(-1);
-	}
-	close(fd);
-
-	if (n == sizeof(buf)) {
-		p_err("read sysfs possible cpus overflow");
-		exit(-1);
-	}
-
-	ptr = buf;
-	n = 0;
-	while (*ptr && *ptr != '\n') {
-		unsigned int a, b;
-
-		if (sscanf(ptr, "%u-%u", &a, &b) == 2) {
-			n += b - a + 1;
-
-			ptr = strchr(ptr, '-') + 1;
-		} else if (sscanf(ptr, "%u", &a) == 1) {
-			n++;
-		} else {
-			assert(0);
-		}
-
-		while (isdigit(*ptr))
-			ptr++;
-		if (*ptr == ',')
-			ptr++;
-	}
-
-	result = n;
-
-	return result;
-}
-
 static bool map_is_per_cpu(__u32 type)
 {
 	return type == BPF_MAP_TYPE_PERCPU_HASH ||
 	       type == BPF_MAP_TYPE_PERCPU_ARRAY ||
-	       type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
+	       type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+	       type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
 }
 
 static bool map_is_map_of_maps(__u32 type)
@@ -142,15 +95,27 @@ static bool map_is_map_of_progs(__u32 type)
 	return type == BPF_MAP_TYPE_PROG_ARRAY;
 }
 
+static int map_type_from_str(const char *type)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(map_type_name); i++)
+		/* Don't allow prefixing in case of possible future shadowing */
+		if (map_type_name[i] && !strcmp(map_type_name[i], type))
+			return i;
+	return -1;
+}
+
 static void *alloc_value(struct bpf_map_info *info)
 {
 	if (map_is_per_cpu(info->type))
-		return malloc(info->value_size * get_possible_cpus());
+		return malloc(round_up(info->value_size, 8) *
+			      get_possible_cpus());
 	else
 		return malloc(info->value_size);
 }
 
-static int map_parse_fd(int *argc, char ***argv)
+int map_parse_fd(int *argc, char ***argv)
 {
 	int fd;
 
@@ -186,8 +151,7 @@ static int map_parse_fd(int *argc, char ***argv)
 	return -1;
 }
 
-static int
-map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
+int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
 {
 	int err;
 	int fd;
@@ -206,8 +170,128 @@ map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
 	return fd;
 }
 
+static int do_dump_btf(const struct btf_dumper *d,
+		       struct bpf_map_info *map_info, void *key,
+		       void *value)
+{
+	int ret;
+
+	/* start of key-value pair */
+	jsonw_start_object(d->jw);
+
+	jsonw_name(d->jw, "key");
+
+	ret = btf_dumper_type(d, map_info->btf_key_type_id, key);
+	if (ret)
+		goto err_end_obj;
+
+	if (!map_is_per_cpu(map_info->type)) {
+		jsonw_name(d->jw, "value");
+		ret = btf_dumper_type(d, map_info->btf_value_type_id, value);
+	} else {
+		unsigned int i, n, step;
+
+		jsonw_name(d->jw, "values");
+		jsonw_start_array(d->jw);
+		n = get_possible_cpus();
+		step = round_up(map_info->value_size, 8);
+		for (i = 0; i < n; i++) {
+			jsonw_start_object(d->jw);
+			jsonw_int_field(d->jw, "cpu", i);
+			jsonw_name(d->jw, "value");
+			ret = btf_dumper_type(d, map_info->btf_value_type_id,
+					      value + i * step);
+			jsonw_end_object(d->jw);
+			if (ret)
+				break;
+		}
+		jsonw_end_array(d->jw);
+	}
+
+err_end_obj:
+	/* end of key-value pair */
+	jsonw_end_object(d->jw);
+
+	return ret;
+}
+
+static int get_btf(struct bpf_map_info *map_info, struct btf **btf)
+{
+	struct bpf_btf_info btf_info = { 0 };
+	__u32 len = sizeof(btf_info);
+	__u32 last_size;
+	int btf_fd;
+	void *ptr;
+	int err;
+
+	err = 0;
+	*btf = NULL;
+	btf_fd = bpf_btf_get_fd_by_id(map_info->btf_id);
+	if (btf_fd < 0)
+		return 0;
+
+	/* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
+	 * let's start with a sane default - 4KiB here - and resize it only if
+	 * bpf_obj_get_info_by_fd() needs a bigger buffer.
+	 */
+	btf_info.btf_size = 4096;
+	last_size = btf_info.btf_size;
+	ptr = malloc(last_size);
+	if (!ptr) {
+		err = -ENOMEM;
+		goto exit_free;
+	}
+
+	bzero(ptr, last_size);
+	btf_info.btf = ptr_to_u64(ptr);
+	err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+
+	if (!err && btf_info.btf_size > last_size) {
+		void *temp_ptr;
+
+		last_size = btf_info.btf_size;
+		temp_ptr = realloc(ptr, last_size);
+		if (!temp_ptr) {
+			err = -ENOMEM;
+			goto exit_free;
+		}
+		ptr = temp_ptr;
+		bzero(ptr, last_size);
+		btf_info.btf = ptr_to_u64(ptr);
+		err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+	}
+
+	if (err || btf_info.btf_size > last_size) {
+		err = errno;
+		goto exit_free;
+	}
+
+	*btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL);
+	if (IS_ERR(*btf)) {
+		err = PTR_ERR(*btf);
+		*btf = NULL;
+	}
+
+exit_free:
+	close(btf_fd);
+	free(ptr);
+
+	return err;
+}
+
+static json_writer_t *get_btf_writer(void)
+{
+	json_writer_t *jw = jsonw_new(stdout);
+
+	if (!jw)
+		return NULL;
+	jsonw_pretty(jw, true);
+
+	return jw;
+}
+
 static void print_entry_json(struct bpf_map_info *info, unsigned char *key,
-			     unsigned char *value)
+			     unsigned char *value, struct btf *btf)
 {
 	jsonw_start_object(json_wtr);
 
@@ -216,10 +300,21 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key,
 		print_hex_data_json(key, info->key_size);
 		jsonw_name(json_wtr, "value");
 		print_hex_data_json(value, info->value_size);
+		if (btf) {
+			struct btf_dumper d = {
+				.btf = btf,
+				.jw = json_wtr,
+				.is_plain_text = false,
+			};
+
+			jsonw_name(json_wtr, "formatted");
+			do_dump_btf(&d, info, key, value);
+		}
 	} else {
-		unsigned int i, n;
+		unsigned int i, n, step;
 
 		n = get_possible_cpus();
+		step = round_up(info->value_size, 8);
 
 		jsonw_name(json_wtr, "key");
 		print_hex_data_json(key, info->key_size);
@@ -232,17 +327,46 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key,
 			jsonw_int_field(json_wtr, "cpu", i);
 
 			jsonw_name(json_wtr, "value");
-			print_hex_data_json(value + i * info->value_size,
+			print_hex_data_json(value + i * step,
 					    info->value_size);
 
 			jsonw_end_object(json_wtr);
 		}
 		jsonw_end_array(json_wtr);
+		if (btf) {
+			struct btf_dumper d = {
+				.btf = btf,
+				.jw = json_wtr,
+				.is_plain_text = false,
+			};
+
+			jsonw_name(json_wtr, "formatted");
+			do_dump_btf(&d, info, key, value);
+		}
 	}
 
 	jsonw_end_object(json_wtr);
 }
 
+static void print_entry_error(struct bpf_map_info *info, unsigned char *key,
+			      const char *value)
+{
+	int value_size = strlen(value);
+	bool single_line, break_names;
+
+	break_names = info->key_size > 16 || value_size > 16;
+	single_line = info->key_size + value_size <= 24 && !break_names;
+
+	printf("key:%c", break_names ? '\n' : ' ');
+	fprint_hex(stdout, key, info->key_size, " ");
+
+	printf(single_line ? "  " : "\n");
+
+	printf("value:%c%s", break_names ? '\n' : ' ', value);
+
+	printf("\n");
+}
+
 static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
 			      unsigned char *value)
 {
@@ -263,9 +387,10 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
 
 		printf("\n");
 	} else {
-		unsigned int i, n;
+		unsigned int i, n, step;
 
 		n = get_possible_cpus();
+		step = round_up(info->value_size, 8);
 
 		printf("key:\n");
 		fprint_hex(stdout, key, info->key_size, " ");
@@ -273,7 +398,7 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
 		for (i = 0; i < n; i++) {
 			printf("value (CPU %02d):%c",
 			       i, info->value_size > 16 ? '\n' : ' ');
-			fprint_hex(stdout, value + i * info->value_size,
+			fprint_hex(stdout, value + i * step,
 				   info->value_size, " ");
 			printf("\n");
 		}
@@ -283,11 +408,16 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
 static char **parse_bytes(char **argv, const char *name, unsigned char *val,
 			  unsigned int n)
 {
-	unsigned int i = 0;
+	unsigned int i = 0, base = 0;
 	char *endptr;
 
+	if (is_prefix(*argv, "hex")) {
+		base = 16;
+		argv++;
+	}
+
 	while (i < n && argv[i]) {
-		val[i] = strtoul(argv[i], &endptr, 0);
+		val[i] = strtoul(argv[i], &endptr, base);
 		if (*endptr) {
 			p_err("error parsing byte: %s", argv[i]);
 			return NULL;
@@ -559,12 +689,62 @@ static int do_show(int argc, char **argv)
 	return errno == ENOENT ? 0 : -1;
 }
 
+static int dump_map_elem(int fd, void *key, void *value,
+			 struct bpf_map_info *map_info, struct btf *btf,
+			 json_writer_t *btf_wtr)
+{
+	int num_elems = 0;
+	int lookup_errno;
+
+	if (!bpf_map_lookup_elem(fd, key, value)) {
+		if (json_output) {
+			print_entry_json(map_info, key, value, btf);
+		} else {
+			if (btf) {
+				struct btf_dumper d = {
+					.btf = btf,
+					.jw = btf_wtr,
+					.is_plain_text = true,
+				};
+
+				do_dump_btf(&d, map_info, key, value);
+			} else {
+				print_entry_plain(map_info, key, value);
+			}
+			num_elems++;
+		}
+		return num_elems;
+	}
+
+	/* lookup error handling */
+	lookup_errno = errno;
+
+	if (map_is_map_of_maps(map_info->type) ||
+	    map_is_map_of_progs(map_info->type))
+		return 0;
+
+	if (json_output) {
+		jsonw_name(json_wtr, "key");
+		print_hex_data_json(key, map_info->key_size);
+		jsonw_name(json_wtr, "value");
+		jsonw_start_object(json_wtr);
+		jsonw_string_field(json_wtr, "error", strerror(lookup_errno));
+		jsonw_end_object(json_wtr);
+	} else {
+		print_entry_error(map_info, key, strerror(lookup_errno));
+	}
+
+	return 0;
+}
+
 static int do_dump(int argc, char **argv)
 {
+	struct bpf_map_info info = {};
 	void *key, *value, *prev_key;
 	unsigned int num_elems = 0;
-	struct bpf_map_info info = {};
 	__u32 len = sizeof(info);
+	json_writer_t *btf_wtr;
+	struct btf *btf = NULL;
 	int err;
 	int fd;
 
@@ -575,12 +755,6 @@ static int do_dump(int argc, char **argv)
 	if (fd < 0)
 		return -1;
 
-	if (map_is_map_of_maps(info.type) || map_is_map_of_progs(info.type)) {
-		p_err("Dumping maps of maps and program maps not supported");
-		close(fd);
-		return -1;
-	}
-
 	key = malloc(info.key_size);
 	value = alloc_value(&info);
 	if (!key || !value) {
@@ -590,8 +764,27 @@ static int do_dump(int argc, char **argv)
 	}
 
 	prev_key = NULL;
+
+	err = get_btf(&info, &btf);
+	if (err) {
+		p_err("failed to get btf");
+		goto exit_free;
+	}
+
 	if (json_output)
 		jsonw_start_array(json_wtr);
+	else
+		if (btf) {
+			btf_wtr = get_btf_writer();
+			if (!btf_wtr) {
+				p_info("failed to create json writer for btf. falling back to plain output");
+				btf__free(btf);
+				btf = NULL;
+			} else {
+				jsonw_start_array(btf_wtr);
+			}
+		}
+
 	while (true) {
 		err = bpf_map_get_next_key(fd, prev_key, key);
 		if (err) {
@@ -599,42 +792,25 @@ static int do_dump(int argc, char **argv)
 				err = 0;
 			break;
 		}
-
-		if (!bpf_map_lookup_elem(fd, key, value)) {
-			if (json_output)
-				print_entry_json(&info, key, value);
-			else
-				print_entry_plain(&info, key, value);
-		} else {
-			if (json_output) {
-				jsonw_name(json_wtr, "key");
-				print_hex_data_json(key, info.key_size);
-				jsonw_name(json_wtr, "value");
-				jsonw_start_object(json_wtr);
-				jsonw_string_field(json_wtr, "error",
-						   "can't lookup element");
-				jsonw_end_object(json_wtr);
-			} else {
-				p_info("can't lookup element with key: ");
-				fprint_hex(stderr, key, info.key_size, " ");
-				fprintf(stderr, "\n");
-			}
-		}
-
+		num_elems += dump_map_elem(fd, key, value, &info, btf, btf_wtr);
 		prev_key = key;
-		num_elems++;
 	}
 
 	if (json_output)
 		jsonw_end_array(json_wtr);
-	else
+	else if (btf) {
+		jsonw_end_array(btf_wtr);
+		jsonw_destroy(&btf_wtr);
+	} else {
 		printf("Found %u element%s\n", num_elems,
 		       num_elems != 1 ? "s" : "");
+	}
 
 exit_free:
 	free(key);
 	free(value);
 	close(fd);
+	btf__free(btf);
 
 	return err;
 }
@@ -690,6 +866,8 @@ static int do_lookup(int argc, char **argv)
 {
 	struct bpf_map_info info = {};
 	__u32 len = sizeof(info);
+	json_writer_t *btf_wtr;
+	struct btf *btf = NULL;
 	void *key, *value;
 	int err;
 	int fd;
@@ -714,27 +892,60 @@ static int do_lookup(int argc, char **argv)
 		goto exit_free;
 
 	err = bpf_map_lookup_elem(fd, key, value);
-	if (!err) {
-		if (json_output)
-			print_entry_json(&info, key, value);
-		else
+	if (err) {
+		if (errno == ENOENT) {
+			if (json_output) {
+				jsonw_null(json_wtr);
+			} else {
+				printf("key:\n");
+				fprint_hex(stdout, key, info.key_size, " ");
+				printf("\n\nNot found\n");
+			}
+		} else {
+			p_err("lookup failed: %s", strerror(errno));
+		}
+
+		goto exit_free;
+	}
+
+	/* here means bpf_map_lookup_elem() succeeded */
+	err = get_btf(&info, &btf);
+	if (err) {
+		p_err("failed to get btf");
+		goto exit_free;
+	}
+
+	if (json_output) {
+		print_entry_json(&info, key, value, btf);
+	} else if (btf) {
+		/* if here json_wtr wouldn't have been initialised,
+		 * so let's create separate writer for btf
+		 */
+		btf_wtr = get_btf_writer();
+		if (!btf_wtr) {
+			p_info("failed to create json writer for btf. falling back to plain output");
+			btf__free(btf);
+			btf = NULL;
 			print_entry_plain(&info, key, value);
-	} else if (errno == ENOENT) {
-		if (json_output) {
-			jsonw_null(json_wtr);
 		} else {
-			printf("key:\n");
-			fprint_hex(stdout, key, info.key_size, " ");
-			printf("\n\nNot found\n");
+			struct btf_dumper d = {
+				.btf = btf,
+				.jw = btf_wtr,
+				.is_plain_text = true,
+			};
+
+			do_dump_btf(&d, &info, key, value);
+			jsonw_destroy(&btf_wtr);
 		}
 	} else {
-		p_err("lookup failed: %s", strerror(errno));
+		print_entry_plain(&info, key, value);
 	}
 
 exit_free:
 	free(key);
 	free(value);
 	close(fd);
+	btf__free(btf);
 
 	return err;
 }
@@ -859,6 +1070,92 @@ static int do_pin(int argc, char **argv)
 	return err;
 }
 
+static int do_create(int argc, char **argv)
+{
+	struct bpf_create_map_attr attr = { NULL, };
+	const char *pinfile;
+	int err, fd;
+
+	if (!REQ_ARGS(7))
+		return -1;
+	pinfile = GET_ARG();
+
+	while (argc) {
+		if (!REQ_ARGS(2))
+			return -1;
+
+		if (is_prefix(*argv, "type")) {
+			NEXT_ARG();
+
+			if (attr.map_type) {
+				p_err("map type already specified");
+				return -1;
+			}
+
+			attr.map_type = map_type_from_str(*argv);
+			if ((int)attr.map_type < 0) {
+				p_err("unrecognized map type: %s", *argv);
+				return -1;
+			}
+			NEXT_ARG();
+		} else if (is_prefix(*argv, "name")) {
+			NEXT_ARG();
+			attr.name = GET_ARG();
+		} else if (is_prefix(*argv, "key")) {
+			if (parse_u32_arg(&argc, &argv, &attr.key_size,
+					  "key size"))
+				return -1;
+		} else if (is_prefix(*argv, "value")) {
+			if (parse_u32_arg(&argc, &argv, &attr.value_size,
+					  "value size"))
+				return -1;
+		} else if (is_prefix(*argv, "entries")) {
+			if (parse_u32_arg(&argc, &argv, &attr.max_entries,
+					  "max entries"))
+				return -1;
+		} else if (is_prefix(*argv, "flags")) {
+			if (parse_u32_arg(&argc, &argv, &attr.map_flags,
+					  "flags"))
+				return -1;
+		} else if (is_prefix(*argv, "dev")) {
+			NEXT_ARG();
+
+			if (attr.map_ifindex) {
+				p_err("offload device already specified");
+				return -1;
+			}
+
+			attr.map_ifindex = if_nametoindex(*argv);
+			if (!attr.map_ifindex) {
+				p_err("unrecognized netdevice '%s': %s",
+				      *argv, strerror(errno));
+				return -1;
+			}
+			NEXT_ARG();
+		}
+	}
+
+	if (!attr.name) {
+		p_err("map name not specified");
+		return -1;
+	}
+
+	fd = bpf_create_map_xattr(&attr);
+	if (fd < 0) {
+		p_err("map create failed: %s", strerror(errno));
+		return -1;
+	}
+
+	err = do_pin_fd(fd, pinfile);
+	close(fd);
+	if (err)
+		return err;
+
+	if (json_output)
+		jsonw_null(json_wtr);
+	return 0;
+}
+
 static int do_help(int argc, char **argv)
 {
 	if (json_output) {
@@ -868,23 +1165,34 @@ static int do_help(int argc, char **argv)
 
 	fprintf(stderr,
 		"Usage: %s %s { show | list }   [MAP]\n"
-		"       %s %s dump    MAP\n"
-		"       %s %s update  MAP  key BYTES value VALUE [UPDATE_FLAGS]\n"
-		"       %s %s lookup  MAP  key BYTES\n"
-		"       %s %s getnext MAP [key BYTES]\n"
-		"       %s %s delete  MAP  key BYTES\n"
-		"       %s %s pin     MAP  FILE\n"
+		"       %s %s create     FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n"
+		"                              entries MAX_ENTRIES name NAME [flags FLAGS] \\\n"
+		"                              [dev NAME]\n"
+		"       %s %s dump       MAP\n"
+		"       %s %s update     MAP  key DATA value VALUE [UPDATE_FLAGS]\n"
+		"       %s %s lookup     MAP  key DATA\n"
+		"       %s %s getnext    MAP [key DATA]\n"
+		"       %s %s delete     MAP  key DATA\n"
+		"       %s %s pin        MAP  FILE\n"
+		"       %s %s event_pipe MAP [cpu N index M]\n"
 		"       %s %s help\n"
 		"\n"
-		"       MAP := { id MAP_ID | pinned FILE }\n"
+		"       " HELP_SPEC_MAP "\n"
+		"       DATA := { [hex] BYTES }\n"
 		"       " HELP_SPEC_PROGRAM "\n"
-		"       VALUE := { BYTES | MAP | PROG }\n"
+		"       VALUE := { DATA | MAP | PROG }\n"
 		"       UPDATE_FLAGS := { any | exist | noexist }\n"
+		"       TYPE := { hash | array | prog_array | perf_event_array | percpu_hash |\n"
+		"                 percpu_array | stack_trace | cgroup_array | lru_hash |\n"
+		"                 lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n"
+		"                 devmap | sockmap | cpumap | xskmap | sockhash |\n"
+		"                 cgroup_storage | reuseport_sockarray | percpu_cgroup_storage }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
 		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
-		bin_name, argv[-2], bin_name, argv[-2]);
+		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+		bin_name, argv[-2]);
 
 	return 0;
 }
@@ -899,6 +1207,8 @@ static const struct cmd cmds[] = {
 	{ "getnext",	do_getnext },
 	{ "delete",	do_delete },
 	{ "pin",	do_pin },
+	{ "event_pipe",	do_event_pipe },
+	{ "create",	do_create },
 	{ 0 }
 };
 
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
new file mode 100644
index 000000000000..bdaf4062e26e
--- /dev/null
+++ b/tools/bpf/bpftool/map_perf_ring.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2018 Netronome Systems, Inc. */
+/* This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <libbpf.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+
+#include <bpf.h>
+#include <perf-sys.h>
+
+#include "main.h"
+
+#define MMAP_PAGE_CNT	16
+
+static bool stop;
+
+struct event_ring_info {
+	int fd;
+	int key;
+	unsigned int cpu;
+	void *mem;
+};
+
+struct perf_event_sample {
+	struct perf_event_header header;
+	u64 time;
+	__u32 size;
+	unsigned char data[];
+};
+
+static void int_exit(int signo)
+{
+	fprintf(stderr, "Stopping...\n");
+	stop = true;
+}
+
+static enum bpf_perf_event_ret
+print_bpf_output(struct perf_event_header *event, void *private_data)
+{
+	struct perf_event_sample *e = container_of(event, struct perf_event_sample,
+						   header);
+	struct event_ring_info *ring = private_data;
+	struct {
+		struct perf_event_header header;
+		__u64 id;
+		__u64 lost;
+	} *lost = (typeof(lost))event;
+
+	if (json_output) {
+		jsonw_start_object(json_wtr);
+		jsonw_name(json_wtr, "type");
+		jsonw_uint(json_wtr, e->header.type);
+		jsonw_name(json_wtr, "cpu");
+		jsonw_uint(json_wtr, ring->cpu);
+		jsonw_name(json_wtr, "index");
+		jsonw_uint(json_wtr, ring->key);
+		if (e->header.type == PERF_RECORD_SAMPLE) {
+			jsonw_name(json_wtr, "timestamp");
+			jsonw_uint(json_wtr, e->time);
+			jsonw_name(json_wtr, "data");
+			print_data_json(e->data, e->size);
+		} else if (e->header.type == PERF_RECORD_LOST) {
+			jsonw_name(json_wtr, "lost");
+			jsonw_start_object(json_wtr);
+			jsonw_name(json_wtr, "id");
+			jsonw_uint(json_wtr, lost->id);
+			jsonw_name(json_wtr, "count");
+			jsonw_uint(json_wtr, lost->lost);
+			jsonw_end_object(json_wtr);
+		}
+		jsonw_end_object(json_wtr);
+	} else {
+		if (e->header.type == PERF_RECORD_SAMPLE) {
+			printf("== @%lld.%09lld CPU: %d index: %d =====\n",
+			       e->time / 1000000000ULL, e->time % 1000000000ULL,
+			       ring->cpu, ring->key);
+			fprint_hex(stdout, e->data, e->size, " ");
+			printf("\n");
+		} else if (e->header.type == PERF_RECORD_LOST) {
+			printf("lost %lld events\n", lost->lost);
+		} else {
+			printf("unknown event type=%d size=%d\n",
+			       e->header.type, e->header.size);
+		}
+	}
+
+	return LIBBPF_PERF_EVENT_CONT;
+}
+
+static void
+perf_event_read(struct event_ring_info *ring, void **buf, size_t *buf_len)
+{
+	enum bpf_perf_event_ret ret;
+
+	ret = bpf_perf_event_read_simple(ring->mem,
+					 MMAP_PAGE_CNT * get_page_size(),
+					 get_page_size(), buf, buf_len,
+					 print_bpf_output, ring);
+	if (ret != LIBBPF_PERF_EVENT_CONT) {
+		fprintf(stderr, "perf read loop failed with %d\n", ret);
+		stop = true;
+	}
+}
+
+static int perf_mmap_size(void)
+{
+	return get_page_size() * (MMAP_PAGE_CNT + 1);
+}
+
+static void *perf_event_mmap(int fd)
+{
+	int mmap_size = perf_mmap_size();
+	void *base;
+
+	base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if (base == MAP_FAILED) {
+		p_err("event mmap failed: %s\n", strerror(errno));
+		return NULL;
+	}
+
+	return base;
+}
+
+static void perf_event_unmap(void *mem)
+{
+	if (munmap(mem, perf_mmap_size()))
+		fprintf(stderr, "Can't unmap ring memory!\n");
+}
+
+static int bpf_perf_event_open(int map_fd, int key, int cpu)
+{
+	struct perf_event_attr attr = {
+		.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_TIME,
+		.type = PERF_TYPE_SOFTWARE,
+		.config = PERF_COUNT_SW_BPF_OUTPUT,
+	};
+	int pmu_fd;
+
+	pmu_fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
+	if (pmu_fd < 0) {
+		p_err("failed to open perf event %d for CPU %d", key, cpu);
+		return -1;
+	}
+
+	if (bpf_map_update_elem(map_fd, &key, &pmu_fd, BPF_ANY)) {
+		p_err("failed to update map for event %d for CPU %d", key, cpu);
+		goto err_close;
+	}
+	if (ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
+		p_err("failed to enable event %d for CPU %d", key, cpu);
+		goto err_close;
+	}
+
+	return pmu_fd;
+
+err_close:
+	close(pmu_fd);
+	return -1;
+}
+
+int do_event_pipe(int argc, char **argv)
+{
+	int i, nfds, map_fd, index = -1, cpu = -1;
+	struct bpf_map_info map_info = {};
+	struct event_ring_info *rings;
+	size_t tmp_buf_sz = 0;
+	void *tmp_buf = NULL;
+	struct pollfd *pfds;
+	__u32 map_info_len;
+	bool do_all = true;
+
+	map_info_len = sizeof(map_info);
+	map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len);
+	if (map_fd < 0)
+		return -1;
+
+	if (map_info.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+		p_err("map is not a perf event array");
+		goto err_close_map;
+	}
+
+	while (argc) {
+		if (argc < 2) {
+			BAD_ARG();
+			goto err_close_map;
+		}
+
+		if (is_prefix(*argv, "cpu")) {
+			char *endptr;
+
+			NEXT_ARG();
+			cpu = strtoul(*argv, &endptr, 0);
+			if (*endptr) {
+				p_err("can't parse %s as CPU ID", **argv);
+				goto err_close_map;
+			}
+
+			NEXT_ARG();
+		} else if (is_prefix(*argv, "index")) {
+			char *endptr;
+
+			NEXT_ARG();
+			index = strtoul(*argv, &endptr, 0);
+			if (*endptr) {
+				p_err("can't parse %s as index", **argv);
+				goto err_close_map;
+			}
+
+			NEXT_ARG();
+		} else {
+			BAD_ARG();
+			goto err_close_map;
+		}
+
+		do_all = false;
+	}
+
+	if (!do_all) {
+		if (index == -1 || cpu == -1) {
+			p_err("cpu and index must be specified together");
+			goto err_close_map;
+		}
+
+		nfds = 1;
+	} else {
+		nfds = min(get_possible_cpus(), map_info.max_entries);
+		cpu = 0;
+		index = 0;
+	}
+
+	rings = calloc(nfds, sizeof(rings[0]));
+	if (!rings)
+		goto err_close_map;
+
+	pfds = calloc(nfds, sizeof(pfds[0]));
+	if (!pfds)
+		goto err_free_rings;
+
+	for (i = 0; i < nfds; i++) {
+		rings[i].cpu = cpu + i;
+		rings[i].key = index + i;
+
+		rings[i].fd = bpf_perf_event_open(map_fd, rings[i].key,
+						  rings[i].cpu);
+		if (rings[i].fd < 0)
+			goto err_close_fds_prev;
+
+		rings[i].mem = perf_event_mmap(rings[i].fd);
+		if (!rings[i].mem)
+			goto err_close_fds_current;
+
+		pfds[i].fd = rings[i].fd;
+		pfds[i].events = POLLIN;
+	}
+
+	signal(SIGINT, int_exit);
+	signal(SIGHUP, int_exit);
+	signal(SIGTERM, int_exit);
+
+	if (json_output)
+		jsonw_start_array(json_wtr);
+
+	while (!stop) {
+		poll(pfds, nfds, 200);
+		for (i = 0; i < nfds; i++)
+			perf_event_read(&rings[i], &tmp_buf, &tmp_buf_sz);
+	}
+	free(tmp_buf);
+
+	if (json_output)
+		jsonw_end_array(json_wtr);
+
+	for (i = 0; i < nfds; i++) {
+		perf_event_unmap(rings[i].mem);
+		close(rings[i].fd);
+	}
+	free(pfds);
+	free(rings);
+	close(map_fd);
+
+	return 0;
+
+err_close_fds_prev:
+	while (i--) {
+		perf_event_unmap(rings[i].mem);
+err_close_fds_current:
+		close(rings[i].fd);
+	}
+	free(pfds);
+err_free_rings:
+	free(rings);
+err_close_map:
+	close(map_fd);
+	return -1;
+}
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
new file mode 100644
index 000000000000..d441bb7035ca
--- /dev/null
+++ b/tools/bpf/bpftool/net.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (C) 2018 Facebook
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <libbpf.h>
+#include <net/if.h>
+#include <linux/if.h>
+#include <linux/rtnetlink.h>
+#include <linux/tc_act/tc_bpf.h>
+#include <sys/socket.h>
+
+#include <bpf.h>
+#include <nlattr.h>
+#include "main.h"
+#include "netlink_dumper.h"
+
+struct ip_devname_ifindex {
+	char	devname[64];
+	int	ifindex;
+};
+
+struct bpf_netdev_t {
+	struct ip_devname_ifindex *devices;
+	int	used_len;
+	int	array_len;
+	int	filter_idx;
+};
+
+struct tc_kind_handle {
+	char	kind[64];
+	int	handle;
+};
+
+struct bpf_tcinfo_t {
+	struct tc_kind_handle	*handle_array;
+	int			used_len;
+	int			array_len;
+	bool			is_qdisc;
+};
+
+struct bpf_filter_t {
+	const char	*kind;
+	const char	*devname;
+	int		ifindex;
+};
+
+static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb)
+{
+	struct bpf_netdev_t *netinfo = cookie;
+	struct ifinfomsg *ifinfo = msg;
+
+	if (netinfo->filter_idx > 0 && netinfo->filter_idx != ifinfo->ifi_index)
+		return 0;
+
+	if (netinfo->used_len == netinfo->array_len) {
+		netinfo->devices = realloc(netinfo->devices,
+			(netinfo->array_len + 16) *
+			sizeof(struct ip_devname_ifindex));
+		if (!netinfo->devices)
+			return -ENOMEM;
+
+		netinfo->array_len += 16;
+	}
+	netinfo->devices[netinfo->used_len].ifindex = ifinfo->ifi_index;
+	snprintf(netinfo->devices[netinfo->used_len].devname,
+		 sizeof(netinfo->devices[netinfo->used_len].devname),
+		 "%s",
+		 tb[IFLA_IFNAME]
+			 ? libbpf_nla_getattr_str(tb[IFLA_IFNAME])
+			 : "");
+	netinfo->used_len++;
+
+	return do_xdp_dump(ifinfo, tb);
+}
+
+static int dump_class_qdisc_nlmsg(void *cookie, void *msg, struct nlattr **tb)
+{
+	struct bpf_tcinfo_t *tcinfo = cookie;
+	struct tcmsg *info = msg;
+
+	if (tcinfo->is_qdisc) {
+		/* skip clsact qdisc */
+		if (tb[TCA_KIND] &&
+		    strcmp(libbpf_nla_data(tb[TCA_KIND]), "clsact") == 0)
+			return 0;
+		if (info->tcm_handle == 0)
+			return 0;
+	}
+
+	if (tcinfo->used_len == tcinfo->array_len) {
+		tcinfo->handle_array = realloc(tcinfo->handle_array,
+			(tcinfo->array_len + 16) * sizeof(struct tc_kind_handle));
+		if (!tcinfo->handle_array)
+			return -ENOMEM;
+
+		tcinfo->array_len += 16;
+	}
+	tcinfo->handle_array[tcinfo->used_len].handle = info->tcm_handle;
+	snprintf(tcinfo->handle_array[tcinfo->used_len].kind,
+		 sizeof(tcinfo->handle_array[tcinfo->used_len].kind),
+		 "%s",
+		 tb[TCA_KIND]
+			 ? libbpf_nla_getattr_str(tb[TCA_KIND])
+			 : "unknown");
+	tcinfo->used_len++;
+
+	return 0;
+}
+
+static int dump_filter_nlmsg(void *cookie, void *msg, struct nlattr **tb)
+{
+	const struct bpf_filter_t *filter_info = cookie;
+
+	return do_filter_dump((struct tcmsg *)msg, tb, filter_info->kind,
+			      filter_info->devname, filter_info->ifindex);
+}
+
+static int show_dev_tc_bpf(int sock, unsigned int nl_pid,
+			   struct ip_devname_ifindex *dev)
+{
+	struct bpf_filter_t filter_info;
+	struct bpf_tcinfo_t tcinfo;
+	int i, handle, ret = 0;
+
+	tcinfo.handle_array = NULL;
+	tcinfo.used_len = 0;
+	tcinfo.array_len = 0;
+
+	tcinfo.is_qdisc = false;
+	ret = libbpf_nl_get_class(sock, nl_pid, dev->ifindex,
+				  dump_class_qdisc_nlmsg, &tcinfo);
+	if (ret)
+		goto out;
+
+	tcinfo.is_qdisc = true;
+	ret = libbpf_nl_get_qdisc(sock, nl_pid, dev->ifindex,
+				  dump_class_qdisc_nlmsg, &tcinfo);
+	if (ret)
+		goto out;
+
+	filter_info.devname = dev->devname;
+	filter_info.ifindex = dev->ifindex;
+	for (i = 0; i < tcinfo.used_len; i++) {
+		filter_info.kind = tcinfo.handle_array[i].kind;
+		ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex,
+					   tcinfo.handle_array[i].handle,
+					   dump_filter_nlmsg, &filter_info);
+		if (ret)
+			goto out;
+	}
+
+	/* root, ingress and egress handle */
+	handle = TC_H_ROOT;
+	filter_info.kind = "root";
+	ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle,
+				   dump_filter_nlmsg, &filter_info);
+	if (ret)
+		goto out;
+
+	handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
+	filter_info.kind = "clsact/ingress";
+	ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle,
+				   dump_filter_nlmsg, &filter_info);
+	if (ret)
+		goto out;
+
+	handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS);
+	filter_info.kind = "clsact/egress";
+	ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle,
+				   dump_filter_nlmsg, &filter_info);
+	if (ret)
+		goto out;
+
+out:
+	free(tcinfo.handle_array);
+	return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+	int i, sock, ret, filter_idx = -1;
+	struct bpf_netdev_t dev_array;
+	unsigned int nl_pid;
+	char err_buf[256];
+
+	if (argc == 2) {
+		if (strcmp(argv[0], "dev") != 0)
+			usage();
+		filter_idx = if_nametoindex(argv[1]);
+		if (filter_idx == 0) {
+			fprintf(stderr, "invalid dev name %s\n", argv[1]);
+			return -1;
+		}
+	} else if (argc != 0) {
+		usage();
+	}
+
+	sock = libbpf_netlink_open(&nl_pid);
+	if (sock < 0) {
+		fprintf(stderr, "failed to open netlink sock\n");
+		return -1;
+	}
+
+	dev_array.devices = NULL;
+	dev_array.used_len = 0;
+	dev_array.array_len = 0;
+	dev_array.filter_idx = filter_idx;
+
+	if (json_output)
+		jsonw_start_array(json_wtr);
+	NET_START_OBJECT;
+	NET_START_ARRAY("xdp", "%s:\n");
+	ret = libbpf_nl_get_link(sock, nl_pid, dump_link_nlmsg, &dev_array);
+	NET_END_ARRAY("\n");
+
+	if (!ret) {
+		NET_START_ARRAY("tc", "%s:\n");
+		for (i = 0; i < dev_array.used_len; i++) {
+			ret = show_dev_tc_bpf(sock, nl_pid,
+					      &dev_array.devices[i]);
+			if (ret)
+				break;
+		}
+		NET_END_ARRAY("\n");
+	}
+	NET_END_OBJECT;
+	if (json_output)
+		jsonw_end_array(json_wtr);
+
+	if (ret) {
+		if (json_output)
+			jsonw_null(json_wtr);
+		libbpf_strerror(ret, err_buf, sizeof(err_buf));
+		fprintf(stderr, "Error: %s\n", err_buf);
+	}
+	free(dev_array.devices);
+	close(sock);
+	return ret;
+}
+
+static int do_help(int argc, char **argv)
+{
+	if (json_output) {
+		jsonw_null(json_wtr);
+		return 0;
+	}
+
+	fprintf(stderr,
+		"Usage: %s %s { show | list } [dev <devname>]\n"
+		"       %s %s help\n"
+		"Note: Only xdp and tc attachments are supported now.\n"
+		"      For progs attached to cgroups, use \"bpftool cgroup\"\n"
+		"      to dump program attachments. For program types\n"
+		"      sk_{filter,skb,msg,reuseport} and lwt/seg6, please\n"
+		"      consult iproute2.\n",
+		bin_name, argv[-2], bin_name, argv[-2]);
+
+	return 0;
+}
+
+static const struct cmd cmds[] = {
+	{ "show",	do_show },
+	{ "list",	do_show },
+	{ "help",	do_help },
+	{ 0 }
+};
+
+int do_net(int argc, char **argv)
+{
+	return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/netlink_dumper.c b/tools/bpf/bpftool/netlink_dumper.c
new file mode 100644
index 000000000000..4e9f4531269f
--- /dev/null
+++ b/tools/bpf/bpftool/netlink_dumper.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (C) 2018 Facebook
+
+#include <stdlib.h>
+#include <string.h>
+#include <libbpf.h>
+#include <linux/rtnetlink.h>
+#include <linux/tc_act/tc_bpf.h>
+
+#include <nlattr.h>
+#include "main.h"
+#include "netlink_dumper.h"
+
+static void xdp_dump_prog_id(struct nlattr **tb, int attr,
+			     const char *mode,
+			     bool new_json_object)
+{
+	if (!tb[attr])
+		return;
+
+	if (new_json_object)
+		NET_START_OBJECT
+	NET_DUMP_STR("mode", " %s", mode);
+	NET_DUMP_UINT("id", " id %u", libbpf_nla_getattr_u32(tb[attr]))
+	if (new_json_object)
+		NET_END_OBJECT
+}
+
+static int do_xdp_dump_one(struct nlattr *attr, unsigned int ifindex,
+			   const char *name)
+{
+	struct nlattr *tb[IFLA_XDP_MAX + 1];
+	unsigned char mode;
+
+	if (libbpf_nla_parse_nested(tb, IFLA_XDP_MAX, attr, NULL) < 0)
+		return -1;
+
+	if (!tb[IFLA_XDP_ATTACHED])
+		return 0;
+
+	mode = libbpf_nla_getattr_u8(tb[IFLA_XDP_ATTACHED]);
+	if (mode == XDP_ATTACHED_NONE)
+		return 0;
+
+	NET_START_OBJECT;
+	if (name)
+		NET_DUMP_STR("devname", "%s", name);
+	NET_DUMP_UINT("ifindex", "(%d)", ifindex);
+
+	if (mode == XDP_ATTACHED_MULTI) {
+		if (json_output) {
+			jsonw_name(json_wtr, "multi_attachments");
+			jsonw_start_array(json_wtr);
+		}
+		xdp_dump_prog_id(tb, IFLA_XDP_SKB_PROG_ID, "generic", true);
+		xdp_dump_prog_id(tb, IFLA_XDP_DRV_PROG_ID, "driver", true);
+		xdp_dump_prog_id(tb, IFLA_XDP_HW_PROG_ID, "offload", true);
+		if (json_output)
+			jsonw_end_array(json_wtr);
+	} else if (mode == XDP_ATTACHED_DRV) {
+		xdp_dump_prog_id(tb, IFLA_XDP_PROG_ID, "driver", false);
+	} else if (mode == XDP_ATTACHED_SKB) {
+		xdp_dump_prog_id(tb, IFLA_XDP_PROG_ID, "generic", false);
+	} else if (mode == XDP_ATTACHED_HW) {
+		xdp_dump_prog_id(tb, IFLA_XDP_PROG_ID, "offload", false);
+	}
+
+	NET_END_OBJECT_FINAL;
+	return 0;
+}
+
+int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb)
+{
+	if (!tb[IFLA_XDP])
+		return 0;
+
+	return do_xdp_dump_one(tb[IFLA_XDP], ifinfo->ifi_index,
+			       libbpf_nla_getattr_str(tb[IFLA_IFNAME]));
+}
+
+static int do_bpf_dump_one_act(struct nlattr *attr)
+{
+	struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
+
+	if (libbpf_nla_parse_nested(tb, TCA_ACT_BPF_MAX, attr, NULL) < 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	if (!tb[TCA_ACT_BPF_PARMS])
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	NET_START_OBJECT_NESTED2;
+	if (tb[TCA_ACT_BPF_NAME])
+		NET_DUMP_STR("name", "%s",
+			     libbpf_nla_getattr_str(tb[TCA_ACT_BPF_NAME]));
+	if (tb[TCA_ACT_BPF_ID])
+		NET_DUMP_UINT("id", " id %u",
+			      libbpf_nla_getattr_u32(tb[TCA_ACT_BPF_ID]));
+	NET_END_OBJECT_NESTED;
+	return 0;
+}
+
+static int do_dump_one_act(struct nlattr *attr)
+{
+	struct nlattr *tb[TCA_ACT_MAX + 1];
+
+	if (!attr)
+		return 0;
+
+	if (libbpf_nla_parse_nested(tb, TCA_ACT_MAX, attr, NULL) < 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	if (tb[TCA_ACT_KIND] &&
+	    strcmp(libbpf_nla_data(tb[TCA_ACT_KIND]), "bpf") == 0)
+		return do_bpf_dump_one_act(tb[TCA_ACT_OPTIONS]);
+
+	return 0;
+}
+
+static int do_bpf_act_dump(struct nlattr *attr)
+{
+	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
+	int act, ret;
+
+	if (libbpf_nla_parse_nested(tb, TCA_ACT_MAX_PRIO, attr, NULL) < 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	NET_START_ARRAY("act", " %s [");
+	for (act = 0; act <= TCA_ACT_MAX_PRIO; act++) {
+		ret = do_dump_one_act(tb[act]);
+		if (ret)
+			break;
+	}
+	NET_END_ARRAY("] ");
+
+	return ret;
+}
+
+static int do_bpf_filter_dump(struct nlattr *attr)
+{
+	struct nlattr *tb[TCA_BPF_MAX + 1];
+	int ret;
+
+	if (libbpf_nla_parse_nested(tb, TCA_BPF_MAX, attr, NULL) < 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	if (tb[TCA_BPF_NAME])
+		NET_DUMP_STR("name", " %s",
+			     libbpf_nla_getattr_str(tb[TCA_BPF_NAME]));
+	if (tb[TCA_BPF_ID])
+		NET_DUMP_UINT("id", " id %u",
+			      libbpf_nla_getattr_u32(tb[TCA_BPF_ID]));
+	if (tb[TCA_BPF_ACT]) {
+		ret = do_bpf_act_dump(tb[TCA_BPF_ACT]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int do_filter_dump(struct tcmsg *info, struct nlattr **tb, const char *kind,
+		   const char *devname, int ifindex)
+{
+	int ret = 0;
+
+	if (tb[TCA_OPTIONS] &&
+	    strcmp(libbpf_nla_data(tb[TCA_KIND]), "bpf") == 0) {
+		NET_START_OBJECT;
+		if (devname[0] != '\0')
+			NET_DUMP_STR("devname", "%s", devname);
+		NET_DUMP_UINT("ifindex", "(%u)", ifindex);
+		NET_DUMP_STR("kind", " %s", kind);
+		ret = do_bpf_filter_dump(tb[TCA_OPTIONS]);
+		NET_END_OBJECT_FINAL;
+	}
+
+	return ret;
+}
diff --git a/tools/bpf/bpftool/netlink_dumper.h b/tools/bpf/bpftool/netlink_dumper.h
new file mode 100644
index 000000000000..e3516b586a34
--- /dev/null
+++ b/tools/bpf/bpftool/netlink_dumper.h
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (C) 2018 Facebook
+
+#ifndef _NETLINK_DUMPER_H_
+#define _NETLINK_DUMPER_H_
+
+#define NET_START_OBJECT				\
+{							\
+	if (json_output)				\
+		jsonw_start_object(json_wtr);		\
+}
+
+#define NET_START_OBJECT_NESTED(name)			\
+{							\
+	if (json_output) {				\
+		jsonw_name(json_wtr, name);		\
+		jsonw_start_object(json_wtr);		\
+	} else {					\
+		fprintf(stdout, "%s {", name);		\
+	}						\
+}
+
+#define NET_START_OBJECT_NESTED2			\
+{							\
+	if (json_output)				\
+		jsonw_start_object(json_wtr);		\
+	else						\
+		fprintf(stdout, "{");			\
+}
+
+#define NET_END_OBJECT_NESTED				\
+{							\
+	if (json_output)				\
+		jsonw_end_object(json_wtr);		\
+	else						\
+		fprintf(stdout, "}");			\
+}
+
+#define NET_END_OBJECT					\
+{							\
+	if (json_output)				\
+		jsonw_end_object(json_wtr);		\
+}
+
+#define NET_END_OBJECT_FINAL				\
+{							\
+	if (json_output)				\
+		jsonw_end_object(json_wtr);		\
+	else						\
+		fprintf(stdout, "\n");			\
+}
+
+#define NET_START_ARRAY(name, fmt_str)			\
+{							\
+	if (json_output) {				\
+		jsonw_name(json_wtr, name);		\
+		jsonw_start_array(json_wtr);		\
+	} else {					\
+		fprintf(stdout, fmt_str, name);		\
+	}						\
+}
+
+#define NET_END_ARRAY(endstr)				\
+{							\
+	if (json_output)				\
+		jsonw_end_array(json_wtr);		\
+	else						\
+		fprintf(stdout, "%s", endstr);		\
+}
+
+#define NET_DUMP_UINT(name, fmt_str, val)		\
+{							\
+	if (json_output)				\
+		jsonw_uint_field(json_wtr, name, val);	\
+	else						\
+		fprintf(stdout, fmt_str, val);		\
+}
+
+#define NET_DUMP_STR(name, fmt_str, str)		\
+{							\
+	if (json_output)				\
+		jsonw_string_field(json_wtr, name, str);\
+	else						\
+		fprintf(stdout, fmt_str, str);		\
+}
+
+#define NET_DUMP_STR_ONLY(str)				\
+{							\
+	if (json_output)				\
+		jsonw_string(json_wtr, str);		\
+	else						\
+		fprintf(stdout, "%s ", str);		\
+}
+
+#endif
diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
new file mode 100644
index 000000000000..b76b77dcfd1f
--- /dev/null
+++ b/tools/bpf/bpftool/perf.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (C) 2018 Facebook
+// Author: Yonghong Song <yhs@fb.com>
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <ftw.h>
+
+#include <bpf.h>
+
+#include "main.h"
+
+/* 0: undecided, 1: supported, 2: not supported */
+static int perf_query_supported;
+static bool has_perf_query_support(void)
+{
+	__u64 probe_offset, probe_addr;
+	__u32 len, prog_id, fd_type;
+	char buf[256];
+	int fd;
+
+	if (perf_query_supported)
+		goto out;
+
+	fd = open("/", O_RDONLY);
+	if (fd < 0) {
+		p_err("perf_query_support: cannot open directory \"/\" (%s)",
+		      strerror(errno));
+		goto out;
+	}
+
+	/* the following query will fail as no bpf attachment,
+	 * the expected errno is ENOTSUPP
+	 */
+	errno = 0;
+	len = sizeof(buf);
+	bpf_task_fd_query(getpid(), fd, 0, buf, &len, &prog_id,
+			  &fd_type, &probe_offset, &probe_addr);
+
+	if (errno == 524 /* ENOTSUPP */) {
+		perf_query_supported = 1;
+		goto close_fd;
+	}
+
+	perf_query_supported = 2;
+	p_err("perf_query_support: %s", strerror(errno));
+	fprintf(stderr,
+		"HINT: non root or kernel doesn't support TASK_FD_QUERY\n");
+
+close_fd:
+	close(fd);
+out:
+	return perf_query_supported == 1;
+}
+
+static void print_perf_json(int pid, int fd, __u32 prog_id, __u32 fd_type,
+			    char *buf, __u64 probe_offset, __u64 probe_addr)
+{
+	jsonw_start_object(json_wtr);
+	jsonw_int_field(json_wtr, "pid", pid);
+	jsonw_int_field(json_wtr, "fd", fd);
+	jsonw_uint_field(json_wtr, "prog_id", prog_id);
+	switch (fd_type) {
+	case BPF_FD_TYPE_RAW_TRACEPOINT:
+		jsonw_string_field(json_wtr, "fd_type", "raw_tracepoint");
+		jsonw_string_field(json_wtr, "tracepoint", buf);
+		break;
+	case BPF_FD_TYPE_TRACEPOINT:
+		jsonw_string_field(json_wtr, "fd_type", "tracepoint");
+		jsonw_string_field(json_wtr, "tracepoint", buf);
+		break;
+	case BPF_FD_TYPE_KPROBE:
+		jsonw_string_field(json_wtr, "fd_type", "kprobe");
+		if (buf[0] != '\0') {
+			jsonw_string_field(json_wtr, "func", buf);
+			jsonw_lluint_field(json_wtr, "offset", probe_offset);
+		} else {
+			jsonw_lluint_field(json_wtr, "addr", probe_addr);
+		}
+		break;
+	case BPF_FD_TYPE_KRETPROBE:
+		jsonw_string_field(json_wtr, "fd_type", "kretprobe");
+		if (buf[0] != '\0') {
+			jsonw_string_field(json_wtr, "func", buf);
+			jsonw_lluint_field(json_wtr, "offset", probe_offset);
+		} else {
+			jsonw_lluint_field(json_wtr, "addr", probe_addr);
+		}
+		break;
+	case BPF_FD_TYPE_UPROBE:
+		jsonw_string_field(json_wtr, "fd_type", "uprobe");
+		jsonw_string_field(json_wtr, "filename", buf);
+		jsonw_lluint_field(json_wtr, "offset", probe_offset);
+		break;
+	case BPF_FD_TYPE_URETPROBE:
+		jsonw_string_field(json_wtr, "fd_type", "uretprobe");
+		jsonw_string_field(json_wtr, "filename", buf);
+		jsonw_lluint_field(json_wtr, "offset", probe_offset);
+		break;
+	}
+	jsonw_end_object(json_wtr);
+}
+
+static void print_perf_plain(int pid, int fd, __u32 prog_id, __u32 fd_type,
+			     char *buf, __u64 probe_offset, __u64 probe_addr)
+{
+	printf("pid %d  fd %d: prog_id %u  ", pid, fd, prog_id);
+	switch (fd_type) {
+	case BPF_FD_TYPE_RAW_TRACEPOINT:
+		printf("raw_tracepoint  %s\n", buf);
+		break;
+	case BPF_FD_TYPE_TRACEPOINT:
+		printf("tracepoint  %s\n", buf);
+		break;
+	case BPF_FD_TYPE_KPROBE:
+		if (buf[0] != '\0')
+			printf("kprobe  func %s  offset %llu\n", buf,
+			       probe_offset);
+		else
+			printf("kprobe  addr %llu\n", probe_addr);
+		break;
+	case BPF_FD_TYPE_KRETPROBE:
+		if (buf[0] != '\0')
+			printf("kretprobe  func %s  offset %llu\n", buf,
+			       probe_offset);
+		else
+			printf("kretprobe  addr %llu\n", probe_addr);
+		break;
+	case BPF_FD_TYPE_UPROBE:
+		printf("uprobe  filename %s  offset %llu\n", buf, probe_offset);
+		break;
+	case BPF_FD_TYPE_URETPROBE:
+		printf("uretprobe  filename %s  offset %llu\n", buf,
+		       probe_offset);
+		break;
+	}
+}
+
+static int show_proc(const char *fpath, const struct stat *sb,
+		     int tflag, struct FTW *ftwbuf)
+{
+	__u64 probe_offset, probe_addr;
+	__u32 len, prog_id, fd_type;
+	int err, pid = 0, fd = 0;
+	const char *pch;
+	char buf[4096];
+
+	/* prefix always /proc */
+	pch = fpath + 5;
+	if (*pch == '\0')
+		return 0;
+
+	/* pid should be all numbers */
+	pch++;
+	while (isdigit(*pch)) {
+		pid = pid * 10 + *pch - '0';
+		pch++;
+	}
+	if (*pch == '\0')
+		return 0;
+	if (*pch != '/')
+		return FTW_SKIP_SUBTREE;
+
+	/* check /proc/<pid>/fd directory */
+	pch++;
+	if (strncmp(pch, "fd", 2))
+		return FTW_SKIP_SUBTREE;
+	pch += 2;
+	if (*pch == '\0')
+		return 0;
+	if (*pch != '/')
+		return FTW_SKIP_SUBTREE;
+
+	/* check /proc/<pid>/fd/<fd_num> */
+	pch++;
+	while (isdigit(*pch)) {
+		fd = fd * 10 + *pch - '0';
+		pch++;
+	}
+	if (*pch != '\0')
+		return FTW_SKIP_SUBTREE;
+
+	/* query (pid, fd) for potential perf events */
+	len = sizeof(buf);
+	err = bpf_task_fd_query(pid, fd, 0, buf, &len, &prog_id, &fd_type,
+				&probe_offset, &probe_addr);
+	if (err < 0)
+		return 0;
+
+	if (json_output)
+		print_perf_json(pid, fd, prog_id, fd_type, buf, probe_offset,
+				probe_addr);
+	else
+		print_perf_plain(pid, fd, prog_id, fd_type, buf, probe_offset,
+				 probe_addr);
+
+	return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+	int flags = FTW_ACTIONRETVAL | FTW_PHYS;
+	int err = 0, nopenfd = 16;
+
+	if (!has_perf_query_support())
+		return -1;
+
+	if (json_output)
+		jsonw_start_array(json_wtr);
+	if (nftw("/proc", show_proc, nopenfd, flags) == -1) {
+		p_err("%s", strerror(errno));
+		err = -1;
+	}
+	if (json_output)
+		jsonw_end_array(json_wtr);
+
+	return err;
+}
+
+static int do_help(int argc, char **argv)
+{
+	fprintf(stderr,
+		"Usage: %s %s { show | list | help }\n"
+		"",
+		bin_name, argv[-2]);
+
+	return 0;
+}
+
+static const struct cmd cmds[] = {
+	{ "show",	do_show },
+	{ "list",	do_show },
+	{ "help",	do_help },
+	{ 0 }
+};
+
+int do_perf(int argc, char **argv)
+{
+	return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index f7a810897eac..ccee180dfb76 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -31,8 +31,7 @@
  * SOFTWARE.
  */
 
-/* Author: Jakub Kicinski <kubakici@wp.pl> */
-
+#define _GNU_SOURCE
 #include <errno.h>
 #include <fcntl.h>
 #include <stdarg.h>
@@ -41,9 +40,12 @@
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
+#include <net/if.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 
+#include <linux/err.h>
+
 #include <bpf.h>
 #include <libbpf.h>
 
@@ -68,8 +70,33 @@ static const char * const prog_type_name[] = {
 	[BPF_PROG_TYPE_SOCK_OPS]	= "sock_ops",
 	[BPF_PROG_TYPE_SK_SKB]		= "sk_skb",
 	[BPF_PROG_TYPE_CGROUP_DEVICE]	= "cgroup_device",
+	[BPF_PROG_TYPE_SK_MSG]		= "sk_msg",
+	[BPF_PROG_TYPE_RAW_TRACEPOINT]	= "raw_tracepoint",
+	[BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
+	[BPF_PROG_TYPE_LIRC_MODE2]	= "lirc_mode2",
+	[BPF_PROG_TYPE_FLOW_DISSECTOR]	= "flow_dissector",
+};
+
+static const char * const attach_type_strings[] = {
+	[BPF_SK_SKB_STREAM_PARSER] = "stream_parser",
+	[BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict",
+	[BPF_SK_MSG_VERDICT] = "msg_verdict",
+	[__MAX_BPF_ATTACH_TYPE] = NULL,
 };
 
+enum bpf_attach_type parse_attach_type(const char *str)
+{
+	enum bpf_attach_type type;
+
+	for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
+		if (attach_type_strings[type] &&
+		    is_prefix(str, attach_type_strings[type]))
+			return type;
+	}
+
+	return __MAX_BPF_ATTACH_TYPE;
+}
+
 static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
 {
 	struct timespec real_time_ts, boot_time_ts;
@@ -86,14 +113,19 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
 	}
 
 	wallclock_secs = (real_time_ts.tv_sec - boot_time_ts.tv_sec) +
-		nsecs / 1000000000;
+		(real_time_ts.tv_nsec - boot_time_ts.tv_nsec + nsecs) /
+		1000000000;
+
 
 	if (!localtime_r(&wallclock_secs, &load_tm)) {
 		snprintf(buf, size, "%llu", nsecs / 1000000000);
 		return;
 	}
 
-	strftime(buf, size, "%b %d/%H:%M", &load_tm);
+	if (json_output)
+		strftime(buf, size, "%s", &load_tm);
+	else
+		strftime(buf, size, "%FT%T%z", &load_tm);
 }
 
 static int prog_fd_by_tag(unsigned char *tag)
@@ -232,6 +264,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
 		     info->tag[0], info->tag[1], info->tag[2], info->tag[3],
 		     info->tag[4], info->tag[5], info->tag[6], info->tag[7]);
 
+	jsonw_bool_field(json_wtr, "gpl_compatible", info->gpl_compatible);
+
 	print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
 
 	if (info->load_time) {
@@ -240,7 +274,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
 		print_boot_time(info->load_time, buf, sizeof(buf));
 
 		/* Piggy back on load_time, since 0 uid is a valid one */
-		jsonw_string_field(json_wtr, "loaded_at", buf);
+		jsonw_name(json_wtr, "loaded_at");
+		jsonw_printf(json_wtr, "%s", buf);
 		jsonw_uint_field(json_wtr, "uid", info->created_by_uid);
 	}
 
@@ -292,6 +327,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
 	printf("tag ");
 	fprint_hex(stdout, info->tag, BPF_TAG_SIZE, "");
 	print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
+	printf("%s", info->gpl_compatible ? "  gpl" : "");
 	printf("\n");
 
 	if (info->load_time) {
@@ -321,10 +357,9 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
 	if (!hash_empty(prog_table.table)) {
 		struct pinned_obj *obj;
 
-		printf("\n");
 		hash_for_each_possible(prog_table.table, obj, hash, info->id) {
 			if (obj->id == info->id)
-				printf("\tpinned %s\n", obj->path);
+				printf("\n\tpinned %s", obj->path);
 		}
 	}
 
@@ -410,7 +445,12 @@ static int do_show(int argc, char **argv)
 
 static int do_dump(int argc, char **argv)
 {
+	unsigned long *func_ksyms = NULL;
 	struct bpf_prog_info info = {};
+	unsigned int *func_lens = NULL;
+	const char *disasm_opt = NULL;
+	unsigned int nr_func_ksyms;
+	unsigned int nr_func_lens;
 	struct dump_data dd = {};
 	__u32 len = sizeof(info);
 	unsigned int buf_size;
@@ -486,10 +526,34 @@ static int do_dump(int argc, char **argv)
 		return -1;
 	}
 
+	nr_func_ksyms = info.nr_jited_ksyms;
+	if (nr_func_ksyms) {
+		func_ksyms = malloc(nr_func_ksyms * sizeof(__u64));
+		if (!func_ksyms) {
+			p_err("mem alloc failed");
+			close(fd);
+			goto err_free;
+		}
+	}
+
+	nr_func_lens = info.nr_jited_func_lens;
+	if (nr_func_lens) {
+		func_lens = malloc(nr_func_lens * sizeof(__u32));
+		if (!func_lens) {
+			p_err("mem alloc failed");
+			close(fd);
+			goto err_free;
+		}
+	}
+
 	memset(&info, 0, sizeof(info));
 
 	*member_ptr = ptr_to_u64(buf);
 	*member_len = buf_size;
+	info.jited_ksyms = ptr_to_u64(func_ksyms);
+	info.nr_jited_ksyms = nr_func_ksyms;
+	info.jited_func_lens = ptr_to_u64(func_lens);
+	info.nr_jited_func_lens = nr_func_lens;
 
 	err = bpf_obj_get_info_by_fd(fd, &info, &len);
 	close(fd);
@@ -503,6 +567,16 @@ static int do_dump(int argc, char **argv)
 		goto err_free;
 	}
 
+	if (info.nr_jited_ksyms > nr_func_ksyms) {
+		p_err("too many addresses returned");
+		goto err_free;
+	}
+
+	if (info.nr_jited_func_lens > nr_func_lens) {
+		p_err("too many values returned");
+		goto err_free;
+	}
+
 	if ((member_len == &info.jited_prog_len &&
 	     info.jited_prog_insns == 0) ||
 	    (member_len == &info.xlated_prog_len &&
@@ -533,14 +607,67 @@ static int do_dump(int argc, char **argv)
 		const char *name = NULL;
 
 		if (info.ifindex) {
-			name = ifindex_to_bfd_name_ns(info.ifindex,
-						      info.netns_dev,
-						      info.netns_ino);
+			name = ifindex_to_bfd_params(info.ifindex,
+						     info.netns_dev,
+						     info.netns_ino,
+						     &disasm_opt);
 			if (!name)
 				goto err_free;
 		}
 
-		disasm_print_insn(buf, *member_len, opcodes, name);
+		if (info.nr_jited_func_lens && info.jited_func_lens) {
+			struct kernel_sym *sym = NULL;
+			char sym_name[SYM_MAX_NAME];
+			unsigned char *img = buf;
+			__u64 *ksyms = NULL;
+			__u32 *lens;
+			__u32 i;
+
+			if (info.nr_jited_ksyms) {
+				kernel_syms_load(&dd);
+				ksyms = (__u64 *) info.jited_ksyms;
+			}
+
+			if (json_output)
+				jsonw_start_array(json_wtr);
+
+			lens = (__u32 *) info.jited_func_lens;
+			for (i = 0; i < info.nr_jited_func_lens; i++) {
+				if (ksyms) {
+					sym = kernel_syms_search(&dd, ksyms[i]);
+					if (sym)
+						sprintf(sym_name, "%s", sym->name);
+					else
+						sprintf(sym_name, "0x%016llx", ksyms[i]);
+				} else {
+					strcpy(sym_name, "unknown");
+				}
+
+				if (json_output) {
+					jsonw_start_object(json_wtr);
+					jsonw_name(json_wtr, "name");
+					jsonw_string(json_wtr, sym_name);
+					jsonw_name(json_wtr, "insns");
+				} else {
+					printf("%s:\n", sym_name);
+				}
+
+				disasm_print_insn(img, lens[i], opcodes, name,
+						  disasm_opt);
+				img += lens[i];
+
+				if (json_output)
+					jsonw_end_object(json_wtr);
+				else
+					printf("\n");
+			}
+
+			if (json_output)
+				jsonw_end_array(json_wtr);
+		} else {
+			disasm_print_insn(buf, *member_len, opcodes, name,
+					  disasm_opt);
+		}
 	} else if (visual) {
 		if (json_output)
 			jsonw_null(json_wtr);
@@ -548,6 +675,9 @@ static int do_dump(int argc, char **argv)
 			dump_xlated_cfg(buf, *member_len);
 	} else {
 		kernel_syms_load(&dd);
+		dd.nr_jited_ksyms = info.nr_jited_ksyms;
+		dd.jited_ksyms = (__u64 *) info.jited_ksyms;
+
 		if (json_output)
 			dump_xlated_json(&dd, buf, *member_len, opcodes);
 		else
@@ -556,10 +686,14 @@ static int do_dump(int argc, char **argv)
 	}
 
 	free(buf);
+	free(func_ksyms);
+	free(func_lens);
 	return 0;
 
 err_free:
 	free(buf);
+	free(func_ksyms);
+	free(func_lens);
 	return -1;
 }
 
@@ -573,28 +707,323 @@ static int do_pin(int argc, char **argv)
 	return err;
 }
 
+struct map_replace {
+	int idx;
+	int fd;
+	char *name;
+};
+
+int map_replace_compar(const void *p1, const void *p2)
+{
+	const struct map_replace *a = p1, *b = p2;
+
+	return a->idx - b->idx;
+}
+
+static int do_attach(int argc, char **argv)
+{
+	enum bpf_attach_type attach_type;
+	int err, mapfd, progfd;
+
+	if (!REQ_ARGS(5)) {
+		p_err("too few parameters for map attach");
+		return -EINVAL;
+	}
+
+	progfd = prog_parse_fd(&argc, &argv);
+	if (progfd < 0)
+		return progfd;
+
+	attach_type = parse_attach_type(*argv);
+	if (attach_type == __MAX_BPF_ATTACH_TYPE) {
+		p_err("invalid attach type");
+		return -EINVAL;
+	}
+	NEXT_ARG();
+
+	mapfd = map_parse_fd(&argc, &argv);
+	if (mapfd < 0)
+		return mapfd;
+
+	err = bpf_prog_attach(progfd, mapfd, attach_type, 0);
+	if (err) {
+		p_err("failed prog attach to map");
+		return -EINVAL;
+	}
+
+	if (json_output)
+		jsonw_null(json_wtr);
+	return 0;
+}
+
+static int do_detach(int argc, char **argv)
+{
+	enum bpf_attach_type attach_type;
+	int err, mapfd, progfd;
+
+	if (!REQ_ARGS(5)) {
+		p_err("too few parameters for map detach");
+		return -EINVAL;
+	}
+
+	progfd = prog_parse_fd(&argc, &argv);
+	if (progfd < 0)
+		return progfd;
+
+	attach_type = parse_attach_type(*argv);
+	if (attach_type == __MAX_BPF_ATTACH_TYPE) {
+		p_err("invalid attach type");
+		return -EINVAL;
+	}
+	NEXT_ARG();
+
+	mapfd = map_parse_fd(&argc, &argv);
+	if (mapfd < 0)
+		return mapfd;
+
+	err = bpf_prog_detach2(progfd, mapfd, attach_type);
+	if (err) {
+		p_err("failed prog detach from map");
+		return -EINVAL;
+	}
+
+	if (json_output)
+		jsonw_null(json_wtr);
+	return 0;
+}
 static int do_load(int argc, char **argv)
 {
+	enum bpf_attach_type expected_attach_type;
+	struct bpf_object_open_attr attr = {
+		.prog_type	= BPF_PROG_TYPE_UNSPEC,
+	};
+	struct map_replace *map_replace = NULL;
+	unsigned int old_map_fds = 0;
+	struct bpf_program *prog;
 	struct bpf_object *obj;
-	int prog_fd;
+	struct bpf_map *map;
+	const char *pinfile;
+	unsigned int i, j;
+	__u32 ifindex = 0;
+	int idx, err;
 
-	if (argc != 2)
-		usage();
-
-	if (bpf_prog_load(argv[0], BPF_PROG_TYPE_UNSPEC, &obj, &prog_fd)) {
-		p_err("failed to load program");
+	if (!REQ_ARGS(2))
 		return -1;
+	attr.file = GET_ARG();
+	pinfile = GET_ARG();
+
+	while (argc) {
+		if (is_prefix(*argv, "type")) {
+			char *type;
+
+			NEXT_ARG();
+
+			if (attr.prog_type != BPF_PROG_TYPE_UNSPEC) {
+				p_err("program type already specified");
+				goto err_free_reuse_maps;
+			}
+			if (!REQ_ARGS(1))
+				goto err_free_reuse_maps;
+
+			/* Put a '/' at the end of type to appease libbpf */
+			type = malloc(strlen(*argv) + 2);
+			if (!type) {
+				p_err("mem alloc failed");
+				goto err_free_reuse_maps;
+			}
+			*type = 0;
+			strcat(type, *argv);
+			strcat(type, "/");
+
+			err = libbpf_prog_type_by_name(type, &attr.prog_type,
+						       &expected_attach_type);
+			free(type);
+			if (err < 0) {
+				p_err("unknown program type '%s'", *argv);
+				goto err_free_reuse_maps;
+			}
+			NEXT_ARG();
+		} else if (is_prefix(*argv, "map")) {
+			void *new_map_replace;
+			char *endptr, *name;
+			int fd;
+
+			NEXT_ARG();
+
+			if (!REQ_ARGS(4))
+				goto err_free_reuse_maps;
+
+			if (is_prefix(*argv, "idx")) {
+				NEXT_ARG();
+
+				idx = strtoul(*argv, &endptr, 0);
+				if (*endptr) {
+					p_err("can't parse %s as IDX", *argv);
+					goto err_free_reuse_maps;
+				}
+				name = NULL;
+			} else if (is_prefix(*argv, "name")) {
+				NEXT_ARG();
+
+				name = *argv;
+				idx = -1;
+			} else {
+				p_err("expected 'idx' or 'name', got: '%s'?",
+				      *argv);
+				goto err_free_reuse_maps;
+			}
+			NEXT_ARG();
+
+			fd = map_parse_fd(&argc, &argv);
+			if (fd < 0)
+				goto err_free_reuse_maps;
+
+			new_map_replace = reallocarray(map_replace,
+						       old_map_fds + 1,
+						       sizeof(*map_replace));
+			if (!new_map_replace) {
+				p_err("mem alloc failed");
+				goto err_free_reuse_maps;
+			}
+			map_replace = new_map_replace;
+
+			map_replace[old_map_fds].idx = idx;
+			map_replace[old_map_fds].name = name;
+			map_replace[old_map_fds].fd = fd;
+			old_map_fds++;
+		} else if (is_prefix(*argv, "dev")) {
+			NEXT_ARG();
+
+			if (ifindex) {
+				p_err("offload device already specified");
+				goto err_free_reuse_maps;
+			}
+			if (!REQ_ARGS(1))
+				goto err_free_reuse_maps;
+
+			ifindex = if_nametoindex(*argv);
+			if (!ifindex) {
+				p_err("unrecognized netdevice '%s': %s",
+				      *argv, strerror(errno));
+				goto err_free_reuse_maps;
+			}
+			NEXT_ARG();
+		} else {
+			p_err("expected no more arguments, 'type', 'map' or 'dev', got: '%s'?",
+			      *argv);
+			goto err_free_reuse_maps;
+		}
 	}
 
-	if (do_pin_fd(prog_fd, argv[1])) {
-		p_err("failed to pin program");
-		return -1;
+	obj = __bpf_object__open_xattr(&attr, bpf_flags);
+	if (IS_ERR_OR_NULL(obj)) {
+		p_err("failed to open object file");
+		goto err_free_reuse_maps;
+	}
+
+	prog = bpf_program__next(NULL, obj);
+	if (!prog) {
+		p_err("object file doesn't contain any bpf program");
+		goto err_close_obj;
 	}
 
+	bpf_program__set_ifindex(prog, ifindex);
+	if (attr.prog_type == BPF_PROG_TYPE_UNSPEC) {
+		const char *sec_name = bpf_program__title(prog, false);
+
+		err = libbpf_prog_type_by_name(sec_name, &attr.prog_type,
+					       &expected_attach_type);
+		if (err < 0) {
+			p_err("failed to guess program type based on section name %s\n",
+			      sec_name);
+			goto err_close_obj;
+		}
+	}
+	bpf_program__set_type(prog, attr.prog_type);
+	bpf_program__set_expected_attach_type(prog, expected_attach_type);
+
+	qsort(map_replace, old_map_fds, sizeof(*map_replace),
+	      map_replace_compar);
+
+	/* After the sort maps by name will be first on the list, because they
+	 * have idx == -1.  Resolve them.
+	 */
+	j = 0;
+	while (j < old_map_fds && map_replace[j].name) {
+		i = 0;
+		bpf_map__for_each(map, obj) {
+			if (!strcmp(bpf_map__name(map), map_replace[j].name)) {
+				map_replace[j].idx = i;
+				break;
+			}
+			i++;
+		}
+		if (map_replace[j].idx == -1) {
+			p_err("unable to find map '%s'", map_replace[j].name);
+			goto err_close_obj;
+		}
+		j++;
+	}
+	/* Resort if any names were resolved */
+	if (j)
+		qsort(map_replace, old_map_fds, sizeof(*map_replace),
+		      map_replace_compar);
+
+	/* Set ifindex and name reuse */
+	j = 0;
+	idx = 0;
+	bpf_map__for_each(map, obj) {
+		if (!bpf_map__is_offload_neutral(map))
+			bpf_map__set_ifindex(map, ifindex);
+
+		if (j < old_map_fds && idx == map_replace[j].idx) {
+			err = bpf_map__reuse_fd(map, map_replace[j++].fd);
+			if (err) {
+				p_err("unable to set up map reuse: %d", err);
+				goto err_close_obj;
+			}
+
+			/* Next reuse wants to apply to the same map */
+			if (j < old_map_fds && map_replace[j].idx == idx) {
+				p_err("replacement for map idx %d specified more than once",
+				      idx);
+				goto err_close_obj;
+			}
+		}
+
+		idx++;
+	}
+	if (j < old_map_fds) {
+		p_err("map idx '%d' not used", map_replace[j].idx);
+		goto err_close_obj;
+	}
+
+	err = bpf_object__load(obj);
+	if (err) {
+		p_err("failed to load object file");
+		goto err_close_obj;
+	}
+
+	if (do_pin_fd(bpf_program__fd(prog), pinfile))
+		goto err_close_obj;
+
 	if (json_output)
 		jsonw_null(json_wtr);
 
+	bpf_object__close(obj);
+	for (i = 0; i < old_map_fds; i++)
+		close(map_replace[i].fd);
+	free(map_replace);
+
 	return 0;
+
+err_close_obj:
+	bpf_object__close(obj);
+err_free_reuse_maps:
+	for (i = 0; i < old_map_fds; i++)
+		close(map_replace[i].fd);
+	free(map_replace);
+	return -1;
 }
 
 static int do_help(int argc, char **argv)
@@ -609,14 +1038,27 @@ static int do_help(int argc, char **argv)
 		"       %s %s dump xlated PROG [{ file FILE | opcodes | visual }]\n"
 		"       %s %s dump jited  PROG [{ file FILE | opcodes }]\n"
 		"       %s %s pin   PROG FILE\n"
-		"       %s %s load  OBJ  FILE\n"
+		"       %s %s load  OBJ  FILE [type TYPE] [dev NAME] \\\n"
+		"                         [map { idx IDX | name NAME } MAP]\n"
+		"       %s %s attach PROG ATTACH_TYPE MAP\n"
+		"       %s %s detach PROG ATTACH_TYPE MAP\n"
 		"       %s %s help\n"
 		"\n"
+		"       " HELP_SPEC_MAP "\n"
 		"       " HELP_SPEC_PROGRAM "\n"
+		"       TYPE := { socket | kprobe | kretprobe | classifier | action |\n"
+		"                 tracepoint | raw_tracepoint | xdp | perf_event | cgroup/skb |\n"
+		"                 cgroup/sock | cgroup/dev | lwt_in | lwt_out | lwt_xmit |\n"
+		"                 lwt_seg6local | sockops | sk_skb | sk_msg | lirc_mode2 |\n"
+		"                 cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n"
+		"                 cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n"
+		"                 cgroup/sendmsg4 | cgroup/sendmsg6 }\n"
+		"       ATTACH_TYPE := { msg_verdict | skb_verdict | skb_parse }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
-		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]);
+		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+		bin_name, argv[-2], bin_name, argv[-2]);
 
 	return 0;
 }
@@ -628,6 +1070,8 @@ static const struct cmd cmds[] = {
 	{ "dump",	do_dump },
 	{ "pin",	do_pin },
 	{ "load",	do_load },
+	{ "attach",	do_attach },
+	{ "detach",	do_detach },
 	{ 0 }
 };
 
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 7a3173b76c16..3284759df98a 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -35,6 +35,7 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#define _GNU_SOURCE
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -66,9 +67,8 @@ void kernel_syms_load(struct dump_data *dd)
 	while (!feof(fp)) {
 		if (!fgets(buff, sizeof(buff), fp))
 			break;
-		tmp = realloc(dd->sym_mapping,
-			      (dd->sym_count + 1) *
-			      sizeof(*dd->sym_mapping));
+		tmp = reallocarray(dd->sym_mapping, dd->sym_count + 1,
+				   sizeof(*dd->sym_mapping));
 		if (!tmp) {
 out:
 			free(dd->sym_mapping);
@@ -102,8 +102,8 @@ void kernel_syms_destroy(struct dump_data *dd)
 	free(dd->sym_mapping);
 }
 
-static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
-					     unsigned long key)
+struct kernel_sym *kernel_syms_search(struct dump_data *dd,
+				      unsigned long key)
 {
 	struct kernel_sym sym = {
 		.address = key,
@@ -174,7 +174,11 @@ static const char *print_call_pcrel(struct dump_data *dd,
 				    unsigned long address,
 				    const struct bpf_insn *insn)
 {
-	if (sym)
+	if (!dd->nr_jited_ksyms)
+		/* Do not show address for interpreted programs */
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			"%+d", insn->off);
+	else if (sym)
 		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
 			 "%+d#%s", insn->off, sym->name);
 	else
@@ -203,6 +207,10 @@ static const char *print_call(void *private_data,
 	unsigned long address = dd->address_call_base + insn->imm;
 	struct kernel_sym *sym;
 
+	if (insn->src_reg == BPF_PSEUDO_CALL &&
+	    (__u32) insn->imm < dd->nr_jited_ksyms)
+		address = dd->jited_ksyms[insn->imm];
+
 	sym = kernel_syms_search(dd, address);
 	if (insn->src_reg == BPF_PSEUDO_CALL)
 		return print_call_pcrel(dd, sym, address, insn);
diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h
index b34affa7ef2d..33d86e2b369b 100644
--- a/tools/bpf/bpftool/xlated_dumper.h
+++ b/tools/bpf/bpftool/xlated_dumper.h
@@ -49,11 +49,14 @@ struct dump_data {
 	unsigned long address_call_base;
 	struct kernel_sym *sym_mapping;
 	__u32 sym_count;
+	__u64 *jited_ksyms;
+	__u32 nr_jited_ksyms;
 	char scratch_buff[SYM_MAX_NAME + 8];
 };
 
 void kernel_syms_load(struct dump_data *dd);
 void kernel_syms_destroy(struct dump_data *dd);
+struct kernel_sym *kernel_syms_search(struct dump_data *dd, unsigned long key);
 void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
 		      bool opcodes);
 void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
diff --git a/tools/build/Build.include b/tools/build/Build.include
index a4bbb984941d..9ec01f4454f9 100644
--- a/tools/build/Build.include
+++ b/tools/build/Build.include
@@ -63,8 +63,8 @@ dep-cmd = $(if $(wildcard $(fixdep)),
            $(fixdep) $(depfile) $@ '$(make-cmd)' > $(dot-target).tmp;           \
            rm -f $(depfile);                                                    \
            mv -f $(dot-target).tmp $(dot-target).cmd,                           \
-           printf '\# cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \
-           printf '\# using basic dep data\n\n' >> $(dot-target).cmd;           \
+           printf '$(pound) cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \
+           printf '$(pound) using basic dep data\n\n' >> $(dot-target).cmd;           \
            cat $(depfile) >> $(dot-target).cmd;                                 \
            printf '\n%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd)
 
@@ -98,4 +98,4 @@ cxx_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXX
 ###
 ## HOSTCC C flags
 
-host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CHOSTFLAGS) -D"BUILD_STR(s)=\#s" $(CHOSTFLAGS_$(basetarget).o) $(CHOSTFLAGS_$(obj))
+host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(KBUILD_HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj))
diff --git a/tools/build/Makefile b/tools/build/Makefile
index 5eb4b5ad79cb..727050c40f09 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -43,7 +43,7 @@ $(OUTPUT)fixdep-in.o: FORCE
 	$(Q)$(MAKE) $(build)=fixdep
 
 $(OUTPUT)fixdep: $(OUTPUT)fixdep-in.o
-	$(QUIET_LINK)$(HOSTCC) $(LDFLAGS) -o $@ $<
+	$(QUIET_LINK)$(HOSTCC) $(KBUILD_HOSTLDFLAGS) -o $@ $<
 
 FORCE:
 
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 5b6dda3b1ca8..d74bb9414d7c 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -33,6 +33,7 @@ FEATURE_TESTS_BASIC :=                  \
         dwarf_getlocations              \
         fortify-source                  \
         sync-compare-and-swap           \
+        get_current_dir_name            \
         glibc                           \
         gtk2                            \
         gtk2-infobar                    \
@@ -57,6 +58,7 @@ FEATURE_TESTS_BASIC :=                  \
         libunwind-aarch64               \
         pthread-attr-setaffinity-np     \
         pthread-barrier     		\
+        reallocarray                    \
         stackprotector-all              \
         timerfd                         \
         libdw-dwarf-unwind              \
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index dac9563b5470..304b984f11b9 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -7,6 +7,7 @@ FILES=                                          \
          test-dwarf_getlocations.bin            \
          test-fortify-source.bin                \
          test-sync-compare-and-swap.bin         \
+         test-get_current_dir_name.bin          \
          test-glibc.bin                         \
          test-gtk2.bin                          \
          test-gtk2-infobar.bin                  \
@@ -14,6 +15,7 @@ FILES=                                          \
          test-libaudit.bin                      \
          test-libbfd.bin                        \
          test-disassembler-four-args.bin        \
+         test-reallocarray.bin			\
          test-liberty.bin                       \
          test-liberty-z.bin                     \
          test-cplus-demangle.bin                \
@@ -100,6 +102,9 @@ $(OUTPUT)test-bionic.bin:
 $(OUTPUT)test-libelf.bin:
 	$(BUILD) -lelf
 
+$(OUTPUT)test-get_current_dir_name.bin:
+	$(BUILD)
+
 $(OUTPUT)test-glibc.bin:
 	$(BUILD)
 
@@ -204,6 +209,9 @@ $(OUTPUT)test-libbfd.bin:
 $(OUTPUT)test-disassembler-four-args.bin:
 	$(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes
 
+$(OUTPUT)test-reallocarray.bin:
+	$(BUILD)
+
 $(OUTPUT)test-liberty.bin:
 	$(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty
 
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 8dc20a61341f..56722bfe6bdd 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -34,6 +34,10 @@
 # include "test-libelf-mmap.c"
 #undef main
 
+#define main main_test_get_current_dir_name
+# include "test-get_current_dir_name.c"
+#undef main
+
 #define main main_test_glibc
 # include "test-glibc.c"
 #undef main
@@ -174,6 +178,7 @@ int main(int argc, char *argv[])
 	main_test_hello();
 	main_test_libelf();
 	main_test_libelf_mmap();
+	main_test_get_current_dir_name();
 	main_test_glibc();
 	main_test_dwarf();
 	main_test_dwarf_getlocations();
diff --git a/tools/build/feature/test-get_current_dir_name.c b/tools/build/feature/test-get_current_dir_name.c
new file mode 100644
index 000000000000..573000f93212
--- /dev/null
+++ b/tools/build/feature/test-get_current_dir_name.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <stdlib.h>
+
+int main(void)
+{
+	free(get_current_dir_name());
+	return 0;
+}
diff --git a/tools/build/feature/test-reallocarray.c b/tools/build/feature/test-reallocarray.c
new file mode 100644
index 000000000000..8170de35150d
--- /dev/null
+++ b/tools/build/feature/test-reallocarray.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <stdlib.h>
+
+int main(void)
+{
+	return !!reallocarray(NULL, 1, 1);
+}
diff --git a/tools/crypto/getstat.c b/tools/crypto/getstat.c
new file mode 100644
index 000000000000..24115173a483
--- /dev/null
+++ b/tools/crypto/getstat.c
@@ -0,0 +1,294 @@
+/* Heavily copied from libkcapi 2015 - 2017, Stephan Mueller <smueller@chronox.de> */
+#include <errno.h>
+#include <linux/cryptouser.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#define CR_RTA(x)  ((struct rtattr *)(((char *)(x)) + NLMSG_ALIGN(sizeof(struct crypto_user_alg))))
+
+static int get_stat(const char *drivername)
+{
+	struct {
+		struct nlmsghdr n;
+		struct crypto_user_alg cru;
+	} req;
+	struct sockaddr_nl nl;
+	int sd = 0, ret;
+	socklen_t addr_len;
+	struct iovec iov;
+	struct msghdr msg;
+	char buf[4096];
+	struct nlmsghdr *res_n = (struct nlmsghdr *)buf;
+	struct crypto_user_alg *cru_res = NULL;
+	int res_len = 0;
+	struct rtattr *tb[CRYPTOCFGA_MAX + 1];
+	struct rtattr *rta;
+	struct nlmsgerr *errmsg;
+
+	memset(&req, 0, sizeof(req));
+	memset(&buf, 0, sizeof(buf));
+	memset(&msg, 0, sizeof(msg));
+
+	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.cru));
+	req.n.nlmsg_flags = NLM_F_REQUEST;
+	req.n.nlmsg_type = CRYPTO_MSG_GETSTAT;
+	req.n.nlmsg_seq = time(NULL);
+
+	strncpy(req.cru.cru_driver_name, drivername, strlen(drivername));
+
+	sd =  socket(AF_NETLINK, SOCK_RAW, NETLINK_CRYPTO);
+	if (sd < 0) {
+		fprintf(stderr, "Netlink error: cannot open netlink socket");
+		return -errno;
+	}
+	memset(&nl, 0, sizeof(nl));
+	nl.nl_family = AF_NETLINK;
+	if (bind(sd, (struct sockaddr *)&nl, sizeof(nl)) < 0) {
+		ret = -errno;
+		fprintf(stderr, "Netlink error: cannot bind netlink socket");
+		goto out;
+	}
+
+	/* sanity check that netlink socket was successfully opened */
+	addr_len = sizeof(nl);
+	if (getsockname(sd, (struct sockaddr *)&nl, &addr_len) < 0) {
+		ret = -errno;
+		printf("Netlink error: cannot getsockname");
+		goto out;
+	}
+	if (addr_len != sizeof(nl)) {
+		ret = -errno;
+		printf("Netlink error: wrong address length %d", addr_len);
+		goto out;
+	}
+	if (nl.nl_family != AF_NETLINK) {
+		ret = -errno;
+		printf("Netlink error: wrong address family %d",
+				nl.nl_family);
+		goto out;
+	}
+
+	memset(&nl, 0, sizeof(nl));
+	nl.nl_family = AF_NETLINK;
+	iov.iov_base = (void *)&req.n;
+	iov.iov_len = req.n.nlmsg_len;
+	msg.msg_name = &nl;
+	msg.msg_namelen = sizeof(nl);
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	if (sendmsg(sd, &msg, 0) < 0) {
+		ret = -errno;
+		printf("Netlink error: sendmsg failed");
+		goto out;
+	}
+	memset(buf, 0, sizeof(buf));
+	iov.iov_base = buf;
+	while (1) {
+		iov.iov_len = sizeof(buf);
+		ret = recvmsg(sd, &msg, 0);
+		if (ret < 0) {
+			if (errno == EINTR || errno == EAGAIN)
+				continue;
+			ret = -errno;
+			printf("Netlink error: netlink receive error");
+			goto out;
+		}
+		if (ret == 0) {
+			ret = -errno;
+			printf("Netlink error: no data");
+			goto out;
+		}
+		if (ret > sizeof(buf)) {
+			ret = -errno;
+			printf("Netlink error: received too much data");
+			goto out;
+		}
+		break;
+	}
+
+	ret = -EFAULT;
+	res_len = res_n->nlmsg_len;
+	if (res_n->nlmsg_type == NLMSG_ERROR) {
+		errmsg = NLMSG_DATA(res_n);
+		fprintf(stderr, "Fail with %d\n", errmsg->error);
+		ret = errmsg->error;
+		goto out;
+	}
+
+	if (res_n->nlmsg_type == CRYPTO_MSG_GETSTAT) {
+		cru_res = NLMSG_DATA(res_n);
+		res_len -= NLMSG_SPACE(sizeof(*cru_res));
+	}
+	if (res_len < 0) {
+		printf("Netlink error: nlmsg len %d\n", res_len);
+		goto out;
+	}
+
+	if (!cru_res) {
+		ret = -EFAULT;
+		printf("Netlink error: no cru_res\n");
+		goto out;
+	}
+
+	rta = CR_RTA(cru_res);
+	memset(tb, 0, sizeof(struct rtattr *) * (CRYPTOCFGA_MAX + 1));
+	while (RTA_OK(rta, res_len)) {
+		if ((rta->rta_type <= CRYPTOCFGA_MAX) && (!tb[rta->rta_type]))
+			tb[rta->rta_type] = rta;
+		rta = RTA_NEXT(rta, res_len);
+	}
+	if (res_len) {
+		printf("Netlink error: unprocessed data %d",
+				res_len);
+		goto out;
+	}
+
+	if (tb[CRYPTOCFGA_STAT_HASH]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_HASH];
+		struct crypto_stat *rhash =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tHash\n\tHash: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			rhash->stat_hash_cnt, rhash->stat_hash_tlen,
+			rhash->stat_hash_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_COMPRESS]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_COMPRESS];
+		struct crypto_stat *rblk =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tCompress\n\tCompress: %u bytes: %llu\n\tDecompress: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			rblk->stat_compress_cnt, rblk->stat_compress_tlen,
+			rblk->stat_decompress_cnt, rblk->stat_decompress_tlen,
+			rblk->stat_compress_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_ACOMP]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_ACOMP];
+		struct crypto_stat *rcomp =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tACompress\n\tCompress: %u bytes: %llu\n\tDecompress: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			rcomp->stat_compress_cnt, rcomp->stat_compress_tlen,
+			rcomp->stat_decompress_cnt, rcomp->stat_decompress_tlen,
+			rcomp->stat_compress_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_AEAD]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_AEAD];
+		struct crypto_stat *raead =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tAEAD\n\tEncrypt: %u bytes: %llu\n\tDecrypt: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			raead->stat_encrypt_cnt, raead->stat_encrypt_tlen,
+			raead->stat_decrypt_cnt, raead->stat_decrypt_tlen,
+			raead->stat_aead_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_BLKCIPHER]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_BLKCIPHER];
+		struct crypto_stat *rblk =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tCipher\n\tEncrypt: %u bytes: %llu\n\tDecrypt: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			rblk->stat_encrypt_cnt, rblk->stat_encrypt_tlen,
+			rblk->stat_decrypt_cnt, rblk->stat_decrypt_tlen,
+			rblk->stat_cipher_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_AKCIPHER]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_AKCIPHER];
+		struct crypto_stat *rblk =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tAkcipher\n\tEncrypt: %u bytes: %llu\n\tDecrypt: %u bytes: %llu\n\tSign: %u\n\tVerify: %u\n\tErrors: %u\n",
+			drivername,
+			rblk->stat_encrypt_cnt, rblk->stat_encrypt_tlen,
+			rblk->stat_decrypt_cnt, rblk->stat_decrypt_tlen,
+			rblk->stat_sign_cnt, rblk->stat_verify_cnt,
+			rblk->stat_akcipher_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_CIPHER]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_CIPHER];
+		struct crypto_stat *rblk =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tcipher\n\tEncrypt: %u bytes: %llu\n\tDecrypt: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			rblk->stat_encrypt_cnt, rblk->stat_encrypt_tlen,
+			rblk->stat_decrypt_cnt, rblk->stat_decrypt_tlen,
+			rblk->stat_cipher_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_RNG]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_RNG];
+		struct crypto_stat *rrng =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tRNG\n\tSeed: %u\n\tGenerate: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			rrng->stat_seed_cnt,
+			rrng->stat_generate_cnt, rrng->stat_generate_tlen,
+			rrng->stat_rng_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_KPP]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_KPP];
+		struct crypto_stat *rkpp =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tKPP\n\tSetsecret: %u\n\tGenerate public key: %u\n\tCompute_shared_secret: %u\n\tErrors: %u\n",
+			drivername,
+			rkpp->stat_setsecret_cnt,
+			rkpp->stat_generate_public_key_cnt,
+			rkpp->stat_compute_shared_secret_cnt,
+			rkpp->stat_kpp_err_cnt);
+	} else {
+		fprintf(stderr, "%s is of an unknown algorithm\n", drivername);
+	}
+	ret = 0;
+out:
+	close(sd);
+	return ret;
+}
+
+int main(int argc, const char *argv[])
+{
+	char buf[4096];
+	FILE *procfd;
+	int i, lastspace;
+	int ret;
+
+	procfd = fopen("/proc/crypto", "r");
+	if (!procfd) {
+		ret = errno;
+		fprintf(stderr, "Cannot open /proc/crypto %s\n", strerror(errno));
+		return ret;
+	}
+	if (argc > 1) {
+		if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
+			printf("Usage: %s [-h|--help] display this help\n", argv[0]);
+			printf("Usage: %s display all crypto statistics\n", argv[0]);
+			printf("Usage: %s drivername1 drivername2 ... = display crypto statistics about drivername1 ...\n", argv[0]);
+			return 0;
+		}
+		for (i = 1; i < argc; i++) {
+			ret = get_stat(argv[i]);
+			if (ret) {
+				fprintf(stderr, "Failed with %s\n", strerror(-ret));
+				return ret;
+			}
+		}
+		return 0;
+	}
+
+	while (fgets(buf, sizeof(buf), procfd)) {
+		if (!strncmp(buf, "driver", 6)) {
+			lastspace = 0;
+			i = 0;
+			while (i < strlen(buf)) {
+				i++;
+				if (buf[i] == ' ')
+					lastspace = i;
+			}
+			buf[strlen(buf) - 1] = '\0';
+			ret = get_stat(buf + lastspace + 1);
+			if (ret) {
+				fprintf(stderr, "Failed with %s\n", strerror(-ret));
+				goto out;
+			}
+		}
+	}
+out:
+	fclose(procfd);
+	return ret;
+}
diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c
index d78aed86af09..8ff8cb1a11f4 100644
--- a/tools/hv/hv_fcopy_daemon.c
+++ b/tools/hv/hv_fcopy_daemon.c
@@ -234,6 +234,7 @@ int main(int argc, char *argv[])
 			break;
 
 		default:
+			error = HV_E_FAIL;
 			syslog(LOG_ERR, "Unknown operation: %d",
 				buffer.hdr.operation);
 
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index dbf6e8bd98ba..bbb2a8ef367c 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -286,7 +286,7 @@ static int kvp_key_delete(int pool, const __u8 *key, int key_size)
 		 * Found a match; just move the remaining
 		 * entries up.
 		 */
-		if (i == num_records) {
+		if (i == (num_records - 1)) {
 			kvp_file_info[pool].num_records--;
 			kvp_update_file(pool);
 			return 0;
diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
index 34031a297f02..b13300172762 100644
--- a/tools/hv/hv_vss_daemon.c
+++ b/tools/hv/hv_vss_daemon.c
@@ -36,6 +36,8 @@
 #include <linux/hyperv.h>
 #include <syslog.h>
 #include <getopt.h>
+#include <stdbool.h>
+#include <dirent.h>
 
 /* Don't use syslog() in the function since that can cause write to disk */
 static int vss_do_freeze(char *dir, unsigned int cmd)
@@ -68,6 +70,55 @@ static int vss_do_freeze(char *dir, unsigned int cmd)
 	return !!ret;
 }
 
+static bool is_dev_loop(const char *blkname)
+{
+	char *buffer;
+	DIR *dir;
+	struct dirent *entry;
+	bool ret = false;
+
+	buffer = malloc(PATH_MAX);
+	if (!buffer) {
+		syslog(LOG_ERR, "Can't allocate memory!");
+		exit(1);
+	}
+
+	snprintf(buffer, PATH_MAX, "%s/loop", blkname);
+	if (!access(buffer, R_OK | X_OK)) {
+		ret = true;
+		goto free_buffer;
+	} else if (errno != ENOENT) {
+		syslog(LOG_ERR, "Can't access: %s; error:%d %s!",
+		       buffer, errno, strerror(errno));
+	}
+
+	snprintf(buffer, PATH_MAX, "%s/slaves", blkname);
+	dir = opendir(buffer);
+	if (!dir) {
+		if (errno != ENOENT)
+			syslog(LOG_ERR, "Can't opendir: %s; error:%d %s!",
+			       buffer, errno, strerror(errno));
+		goto free_buffer;
+	}
+
+	while ((entry = readdir(dir)) != NULL) {
+		if (strcmp(entry->d_name, ".") == 0 ||
+		    strcmp(entry->d_name, "..") == 0)
+			continue;
+
+		snprintf(buffer, PATH_MAX, "%s/slaves/%s", blkname,
+			 entry->d_name);
+		if (is_dev_loop(buffer)) {
+			ret = true;
+			break;
+		}
+	}
+	closedir(dir);
+free_buffer:
+	free(buffer);
+	return ret;
+}
+
 static int vss_operate(int operation)
 {
 	char match[] = "/dev/";
@@ -75,6 +126,7 @@ static int vss_operate(int operation)
 	struct mntent *ent;
 	struct stat sb;
 	char errdir[1024] = {0};
+	char blkdir[23]; /* /sys/dev/block/XXX:XXX */
 	unsigned int cmd;
 	int error = 0, root_seen = 0, save_errno = 0;
 
@@ -96,10 +148,15 @@ static int vss_operate(int operation)
 	while ((ent = getmntent(mounts))) {
 		if (strncmp(ent->mnt_fsname, match, strlen(match)))
 			continue;
-		if (stat(ent->mnt_fsname, &sb) == -1)
-			continue;
-		if (S_ISBLK(sb.st_mode) && major(sb.st_rdev) == LOOP_MAJOR)
-			continue;
+		if (stat(ent->mnt_fsname, &sb)) {
+			syslog(LOG_ERR, "Can't stat: %s; error:%d %s!",
+			       ent->mnt_fsname, errno, strerror(errno));
+		} else {
+			sprintf(blkdir, "/sys/dev/block/%d:%d",
+				major(sb.st_rdev), minor(sb.st_rdev));
+			if (is_dev_loop(blkdir))
+				continue;
+		}
 		if (hasmntopt(ent, MNTOPT_RO) != NULL)
 			continue;
 		if (strcmp(ent->mnt_type, "vfat") == 0)
diff --git a/tools/hv/lsvmbus b/tools/hv/lsvmbus
index 353e56768df8..55e7374bade0 100644
--- a/tools/hv/lsvmbus
+++ b/tools/hv/lsvmbus
@@ -17,7 +17,7 @@ if options.verbose is not None:
 
 vmbus_sys_path = '/sys/bus/vmbus/devices'
 if not os.path.isdir(vmbus_sys_path):
-	print "%s doesn't exist: exiting..." % vmbus_sys_path
+	print("%s doesn't exist: exiting..." % vmbus_sys_path)
 	exit(-1)
 
 vmbus_dev_dict = {
@@ -93,11 +93,11 @@ format2 = '%2s: Class_ID = %s - %s\n\tDevice_ID = %s\n\tSysfs path: %s\n%s'
 
 for d in vmbus_dev_list:
 	if verbose == 0:
-		print ('VMBUS ID ' + format0) % (d.vmbus_id, d.dev_desc)
+		print(('VMBUS ID ' + format0) % (d.vmbus_id, d.dev_desc))
 	elif verbose == 1:
-		print ('VMBUS ID ' + format1) %	\
-			(d.vmbus_id, d.class_id, d.dev_desc, d.chn_vp_mapping)
+		print (('VMBUS ID ' + format1) %	\
+			(d.vmbus_id, d.class_id, d.dev_desc, d.chn_vp_mapping))
 	else:
-		print ('VMBUS ID ' + format2) % \
+		print (('VMBUS ID ' + format2) % \
 			(d.vmbus_id, d.class_id, d.dev_desc, \
-			d.device_id, d.sysfs_path, d.chn_vp_mapping)
+			d.device_id, d.sysfs_path, d.chn_vp_mapping))
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
index b61245e1181d..ac2de6b7e89f 100644
--- a/tools/iio/iio_event_monitor.c
+++ b/tools/iio/iio_event_monitor.c
@@ -58,6 +58,8 @@ static const char * const iio_chan_type_name_spec[] = {
 	[IIO_PH] = "ph",
 	[IIO_UVINDEX] = "uvindex",
 	[IIO_GRAVITY] = "gravity",
+	[IIO_POSITIONRELATIVE] = "positionrelative",
+	[IIO_PHASE] = "phase",
 };
 
 static const char * const iio_ev_type_text[] = {
@@ -96,6 +98,7 @@ static const char * const iio_modifier_names[] = {
 	[IIO_MOD_LIGHT_GREEN] = "green",
 	[IIO_MOD_LIGHT_BLUE] = "blue",
 	[IIO_MOD_LIGHT_UV] = "uv",
+	[IIO_MOD_LIGHT_DUV] = "duv",
 	[IIO_MOD_QUATERNION] = "quaternion",
 	[IIO_MOD_TEMP_AMBIENT] = "ambient",
 	[IIO_MOD_TEMP_OBJECT] = "object",
@@ -151,6 +154,8 @@ static bool event_is_known(struct iio_event_data *event)
 	case IIO_PH:
 	case IIO_UVINDEX:
 	case IIO_GRAVITY:
+	case IIO_POSITIONRELATIVE:
+	case IIO_PHASE:
 		break;
 	default:
 		return false;
@@ -178,6 +183,7 @@ static bool event_is_known(struct iio_event_data *event)
 	case IIO_MOD_LIGHT_GREEN:
 	case IIO_MOD_LIGHT_BLUE:
 	case IIO_MOD_LIGHT_UV:
+	case IIO_MOD_LIGHT_DUV:
 	case IIO_MOD_QUATERNION:
 	case IIO_MOD_TEMP_AMBIENT:
 	case IIO_MOD_TEMP_OBJECT:
diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c
index f0c6f54a8b2f..3040830d7797 100644
--- a/tools/iio/iio_generic_buffer.c
+++ b/tools/iio/iio_generic_buffer.c
@@ -248,7 +248,7 @@ void print_usage(void)
 		"Capture, convert and output data from IIO device buffer\n"
 		"  -a         Auto-activate all available channels\n"
 		"  -A         Force-activate ALL channels\n"
-		"  -c <n>     Do n conversions\n"
+		"  -c <n>     Do n conversions, or loop forever if n < 0\n"
 		"  -e         Disable wait for event (new data)\n"
 		"  -g         Use trigger-less mode\n"
 		"  -l <n>     Set buffer length to n samples\n"
@@ -330,11 +330,14 @@ static const struct option longopts[] = {
 
 int main(int argc, char **argv)
 {
-	unsigned long num_loops = 2;
+	unsigned long long num_loops = 2;
 	unsigned long timedelay = 1000000;
 	unsigned long buf_len = 128;
 
-	int ret, c, i, j, toread;
+	ssize_t i;
+	unsigned long long j;
+	unsigned long toread;
+	int ret, c;
 	int fp = -1;
 
 	int num_channels = 0;
@@ -366,7 +369,7 @@ int main(int argc, char **argv)
 			break;	
 		case 'c':
 			errno = 0;
-			num_loops = strtoul(optarg, &dummy, 10);
+			num_loops = strtoll(optarg, &dummy, 10);
 			if (errno) {
 				ret = -errno;
 				goto error;
@@ -634,7 +637,7 @@ int main(int argc, char **argv)
 		goto error;
 	}
 
-	for (j = 0; j < num_loops; j++) {
+	for (j = 0; j < num_loops || num_loops < 0; j++) {
 		if (!noevents) {
 			struct pollfd pfd = {
 				.fd = fp,
diff --git a/tools/include/asm-generic/bitops.h b/tools/include/asm-generic/bitops.h
index 9bce3b56b5e7..5d2ab38965cc 100644
--- a/tools/include/asm-generic/bitops.h
+++ b/tools/include/asm-generic/bitops.h
@@ -27,5 +27,6 @@
 #include <asm-generic/bitops/hweight.h>
 
 #include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/non-atomic.h>
 
 #endif /* __TOOLS_ASM_GENERIC_BITOPS_H */
diff --git a/tools/include/asm-generic/bitops/atomic.h b/tools/include/asm-generic/bitops/atomic.h
index 21c41ccd1266..2f6ea28764a7 100644
--- a/tools/include/asm-generic/bitops/atomic.h
+++ b/tools/include/asm-generic/bitops/atomic.h
@@ -15,13 +15,4 @@ static inline void clear_bit(int nr, unsigned long *addr)
 	addr[nr / __BITS_PER_LONG] &= ~(1UL << (nr % __BITS_PER_LONG));
 }
 
-static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
-{
-	return ((1UL << (nr % __BITS_PER_LONG)) &
-		(((unsigned long *)addr)[nr / __BITS_PER_LONG])) != 0;
-}
-
-#define __set_bit(nr, addr)	set_bit(nr, addr)
-#define __clear_bit(nr, addr)	clear_bit(nr, addr)
-
 #endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ */
diff --git a/tools/include/asm-generic/bitops/non-atomic.h b/tools/include/asm-generic/bitops/non-atomic.h
new file mode 100644
index 000000000000..7e10c4b50c5d
--- /dev/null
+++ b/tools/include/asm-generic/bitops/non-atomic.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
+#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
+
+#include <asm/types.h>
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __set_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+	*p  |= mask;
+}
+
+static inline void __clear_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+	*p &= ~mask;
+}
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to change
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __change_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+	*p ^= mask;
+}
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long old = *p;
+
+	*p = old | mask;
+	return (old & mask) != 0;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long old = *p;
+
+	*p = old & ~mask;
+	return (old & mask) != 0;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static inline int __test_and_change_bit(int nr,
+					    volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long old = *p;
+
+	*p = old ^ mask;
+	return (old & mask) != 0;
+}
+
+/**
+ * test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static inline int test_bit(int nr, const volatile unsigned long *addr)
+{
+	return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+}
+
+#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
index 391d942536e5..8d378c57cb01 100644
--- a/tools/include/asm/barrier.h
+++ b/tools/include/asm/barrier.h
@@ -1,4 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/compiler.h>
 #if defined(__i386__) || defined(__x86_64__)
 #include "../../arch/x86/include/asm/barrier.h"
 #elif defined(__arm__)
@@ -26,3 +27,37 @@
 #else
 #include <asm-generic/barrier.h>
 #endif
+
+/*
+ * Generic fallback smp_*() definitions for archs that haven't
+ * been updated yet.
+ */
+
+#ifndef smp_rmb
+# define smp_rmb()	rmb()
+#endif
+
+#ifndef smp_wmb
+# define smp_wmb()	wmb()
+#endif
+
+#ifndef smp_mb
+# define smp_mb()	mb()
+#endif
+
+#ifndef smp_store_release
+# define smp_store_release(p, v)		\
+do {						\
+	smp_mb();				\
+	WRITE_ONCE(*p, v);			\
+} while (0)
+#endif
+
+#ifndef smp_load_acquire
+# define smp_load_acquire(p)			\
+({						\
+	typeof(*p) ___p1 = READ_ONCE(*p);	\
+	smp_mb();				\
+	___p1;					\
+})
+#endif
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index 63440cc8d618..05dca5c203f3 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -15,6 +15,7 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
 		 const unsigned long *bitmap2, int bits);
 int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
 		 const unsigned long *bitmap2, unsigned int bits);
+void bitmap_clear(unsigned long *map, unsigned int start, int len);
 
 #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
 
@@ -97,6 +98,23 @@ static inline int test_and_set_bit(int nr, unsigned long *addr)
 }
 
 /**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ */
+static inline int test_and_clear_bit(int nr, unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long old;
+
+	old = *p;
+	*p = old & ~mask;
+
+	return (old & mask) != 0;
+}
+
+/**
  * bitmap_alloc - Allocate bitmap
  * @nbits: Number of bits
  */
diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h
index acc704bd3998..0b0ef3abc966 100644
--- a/tools/include/linux/bitops.h
+++ b/tools/include/linux/bitops.h
@@ -3,8 +3,6 @@
 #define _TOOLS_LINUX_BITOPS_H_
 
 #include <asm/types.h>
-#include <linux/compiler.h>
-
 #ifndef __WORDSIZE
 #define __WORDSIZE (__SIZEOF_LONG__ * 8)
 #endif
@@ -12,10 +10,9 @@
 #ifndef BITS_PER_LONG
 # define BITS_PER_LONG __WORDSIZE
 #endif
+#include <linux/bits.h>
+#include <linux/compiler.h>
 
-#define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
-#define BIT_WORD(nr)		((nr) / BITS_PER_LONG)
-#define BITS_PER_BYTE		8
 #define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
 #define BITS_TO_U64(nr)		DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
 #define BITS_TO_U32(nr)		DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32))
diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h
new file mode 100644
index 000000000000..2b7b532c1d51
--- /dev/null
+++ b/tools/include/linux/bits.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_BITS_H
+#define __LINUX_BITS_H
+#include <asm/bitsperlong.h>
+
+#define BIT(nr)			(1UL << (nr))
+#define BIT_ULL(nr)		(1ULL << (nr))
+#define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
+#define BIT_WORD(nr)		((nr) / BITS_PER_LONG)
+#define BIT_ULL_MASK(nr)	(1ULL << ((nr) % BITS_PER_LONG_LONG))
+#define BIT_ULL_WORD(nr)	((nr) / BITS_PER_LONG_LONG)
+#define BITS_PER_BYTE		8
+
+/*
+ * Create a contiguous bitmask starting at bit position @l and ending at
+ * position @h. For example
+ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
+ */
+#define GENMASK(h, l) \
+	(((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
+
+#define GENMASK_ULL(h, l) \
+	(((~0ULL) - (1ULL << (l)) + 1) & \
+	 (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h))))
+
+#endif	/* __LINUX_BITS_H */
diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
index a3a4427441bf..0d35f18006a1 100644
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -21,6 +21,9 @@
 /* &a[0] degrades to a pointer: a different type from an array */
 #define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
 
+#ifndef __pure
+#define  __pure		__attribute__((pure))
+#endif
 #define  noinline	__attribute__((noinline))
 #ifndef __packed
 #define __packed	__attribute__((packed))
@@ -33,3 +36,7 @@
 #endif
 #define __printf(a, b)	__attribute__((format(printf, a, b)))
 #define __scanf(a, b)	__attribute__((format(scanf, a, b)))
+
+#if GCC_VERSION >= 50100
+#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
+#endif
diff --git a/tools/include/linux/err.h b/tools/include/linux/err.h
index 7a8b61ad44cb..094649667bae 100644
--- a/tools/include/linux/err.h
+++ b/tools/include/linux/err.h
@@ -52,4 +52,11 @@ static inline bool __must_check IS_ERR_OR_NULL(__force const void *ptr)
 	return unlikely(!ptr) || IS_ERR_VALUE((unsigned long)ptr);
 }
 
+static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr)
+{
+	if (IS_ERR(ptr))
+		return PTR_ERR(ptr);
+	else
+		return 0;
+}
 #endif /* _LINUX_ERR_H */
diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
index c5e512da8d8a..af55acf73e75 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -263,6 +263,16 @@
 #define BPF_LD_MAP_FD(DST, MAP_FD)				\
 	BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
 
+/* Relative call */
+
+#define BPF_CALL_REL(TGT)					\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP | BPF_CALL,			\
+		.dst_reg = 0,					\
+		.src_reg = BPF_PSEUDO_CALL,			\
+		.off   = 0,					\
+		.imm   = TGT })
+
 /* Program exit */
 
 #define BPF_EXIT_INSN()						\
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 0ad884452c5c..6935ef94e77a 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -70,6 +70,7 @@
 #define BUG_ON(cond) assert(!(cond))
 #endif
 #endif
+#define BUG()	BUG_ON(1)
 
 #if __BYTE_ORDER == __BIG_ENDIAN
 #define cpu_to_le16 bswap_16
diff --git a/tools/include/linux/lockdep.h b/tools/include/linux/lockdep.h
index 6b0c36a58fcb..e56997288f2b 100644
--- a/tools/include/linux/lockdep.h
+++ b/tools/include/linux/lockdep.h
@@ -30,9 +30,12 @@ struct task_struct {
 	struct held_lock held_locks[MAX_LOCK_DEPTH];
 	gfp_t lockdep_reclaim_gfp;
 	int pid;
+	int state;
 	char comm[17];
 };
 
+#define TASK_RUNNING 0
+
 extern struct task_struct *__curr(void);
 
 #define current (__curr())
diff --git a/tools/include/linux/nmi.h b/tools/include/linux/nmi.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/include/linux/nmi.h
diff --git a/tools/include/linux/overflow.h b/tools/include/linux/overflow.h
new file mode 100644
index 000000000000..8712ff70995f
--- /dev/null
+++ b/tools/include/linux/overflow.h
@@ -0,0 +1,278 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+#ifndef __LINUX_OVERFLOW_H
+#define __LINUX_OVERFLOW_H
+
+#include <linux/compiler.h>
+
+/*
+ * In the fallback code below, we need to compute the minimum and
+ * maximum values representable in a given type. These macros may also
+ * be useful elsewhere, so we provide them outside the
+ * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block.
+ *
+ * It would seem more obvious to do something like
+ *
+ * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0)
+ * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0)
+ *
+ * Unfortunately, the middle expressions, strictly speaking, have
+ * undefined behaviour, and at least some versions of gcc warn about
+ * the type_max expression (but not if -fsanitize=undefined is in
+ * effect; in that case, the warning is deferred to runtime...).
+ *
+ * The slightly excessive casting in type_min is to make sure the
+ * macros also produce sensible values for the exotic type _Bool. [The
+ * overflow checkers only almost work for _Bool, but that's
+ * a-feature-not-a-bug, since people shouldn't be doing arithmetic on
+ * _Bools. Besides, the gcc builtins don't allow _Bool* as third
+ * argument.]
+ *
+ * Idea stolen from
+ * https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html -
+ * credit to Christian Biere.
+ */
+#define is_signed_type(type)       (((type)(-1)) < (type)1)
+#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type)))
+#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
+#define type_min(T) ((T)((T)-type_max(T)-(T)1))
+
+
+#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
+/*
+ * For simplicity and code hygiene, the fallback code below insists on
+ * a, b and *d having the same type (similar to the min() and max()
+ * macros), whereas gcc's type-generic overflow checkers accept
+ * different types. Hence we don't just make check_add_overflow an
+ * alias for __builtin_add_overflow, but add type checks similar to
+ * below.
+ */
+#define check_add_overflow(a, b, d) ({		\
+	typeof(a) __a = (a);			\
+	typeof(b) __b = (b);			\
+	typeof(d) __d = (d);			\
+	(void) (&__a == &__b);			\
+	(void) (&__a == __d);			\
+	__builtin_add_overflow(__a, __b, __d);	\
+})
+
+#define check_sub_overflow(a, b, d) ({		\
+	typeof(a) __a = (a);			\
+	typeof(b) __b = (b);			\
+	typeof(d) __d = (d);			\
+	(void) (&__a == &__b);			\
+	(void) (&__a == __d);			\
+	__builtin_sub_overflow(__a, __b, __d);	\
+})
+
+#define check_mul_overflow(a, b, d) ({		\
+	typeof(a) __a = (a);			\
+	typeof(b) __b = (b);			\
+	typeof(d) __d = (d);			\
+	(void) (&__a == &__b);			\
+	(void) (&__a == __d);			\
+	__builtin_mul_overflow(__a, __b, __d);	\
+})
+
+#else
+
+
+/* Checking for unsigned overflow is relatively easy without causing UB. */
+#define __unsigned_add_overflow(a, b, d) ({	\
+	typeof(a) __a = (a);			\
+	typeof(b) __b = (b);			\
+	typeof(d) __d = (d);			\
+	(void) (&__a == &__b);			\
+	(void) (&__a == __d);			\
+	*__d = __a + __b;			\
+	*__d < __a;				\
+})
+#define __unsigned_sub_overflow(a, b, d) ({	\
+	typeof(a) __a = (a);			\
+	typeof(b) __b = (b);			\
+	typeof(d) __d = (d);			\
+	(void) (&__a == &__b);			\
+	(void) (&__a == __d);			\
+	*__d = __a - __b;			\
+	__a < __b;				\
+})
+/*
+ * If one of a or b is a compile-time constant, this avoids a division.
+ */
+#define __unsigned_mul_overflow(a, b, d) ({		\
+	typeof(a) __a = (a);				\
+	typeof(b) __b = (b);				\
+	typeof(d) __d = (d);				\
+	(void) (&__a == &__b);				\
+	(void) (&__a == __d);				\
+	*__d = __a * __b;				\
+	__builtin_constant_p(__b) ?			\
+	  __b > 0 && __a > type_max(typeof(__a)) / __b : \
+	  __a > 0 && __b > type_max(typeof(__b)) / __a;	 \
+})
+
+/*
+ * For signed types, detecting overflow is much harder, especially if
+ * we want to avoid UB. But the interface of these macros is such that
+ * we must provide a result in *d, and in fact we must produce the
+ * result promised by gcc's builtins, which is simply the possibly
+ * wrapped-around value. Fortunately, we can just formally do the
+ * operations in the widest relevant unsigned type (u64) and then
+ * truncate the result - gcc is smart enough to generate the same code
+ * with and without the (u64) casts.
+ */
+
+/*
+ * Adding two signed integers can overflow only if they have the same
+ * sign, and overflow has happened iff the result has the opposite
+ * sign.
+ */
+#define __signed_add_overflow(a, b, d) ({	\
+	typeof(a) __a = (a);			\
+	typeof(b) __b = (b);			\
+	typeof(d) __d = (d);			\
+	(void) (&__a == &__b);			\
+	(void) (&__a == __d);			\
+	*__d = (u64)__a + (u64)__b;		\
+	(((~(__a ^ __b)) & (*__d ^ __a))	\
+		& type_min(typeof(__a))) != 0;	\
+})
+
+/*
+ * Subtraction is similar, except that overflow can now happen only
+ * when the signs are opposite. In this case, overflow has happened if
+ * the result has the opposite sign of a.
+ */
+#define __signed_sub_overflow(a, b, d) ({	\
+	typeof(a) __a = (a);			\
+	typeof(b) __b = (b);			\
+	typeof(d) __d = (d);			\
+	(void) (&__a == &__b);			\
+	(void) (&__a == __d);			\
+	*__d = (u64)__a - (u64)__b;		\
+	((((__a ^ __b)) & (*__d ^ __a))		\
+		& type_min(typeof(__a))) != 0;	\
+})
+
+/*
+ * Signed multiplication is rather hard. gcc always follows C99, so
+ * division is truncated towards 0. This means that we can write the
+ * overflow check like this:
+ *
+ * (a > 0 && (b > MAX/a || b < MIN/a)) ||
+ * (a < -1 && (b > MIN/a || b < MAX/a) ||
+ * (a == -1 && b == MIN)
+ *
+ * The redundant casts of -1 are to silence an annoying -Wtype-limits
+ * (included in -Wextra) warning: When the type is u8 or u16, the
+ * __b_c_e in check_mul_overflow obviously selects
+ * __unsigned_mul_overflow, but unfortunately gcc still parses this
+ * code and warns about the limited range of __b.
+ */
+
+#define __signed_mul_overflow(a, b, d) ({				\
+	typeof(a) __a = (a);						\
+	typeof(b) __b = (b);						\
+	typeof(d) __d = (d);						\
+	typeof(a) __tmax = type_max(typeof(a));				\
+	typeof(a) __tmin = type_min(typeof(a));				\
+	(void) (&__a == &__b);						\
+	(void) (&__a == __d);						\
+	*__d = (u64)__a * (u64)__b;					\
+	(__b > 0   && (__a > __tmax/__b || __a < __tmin/__b)) ||	\
+	(__b < (typeof(__b))-1  && (__a > __tmin/__b || __a < __tmax/__b)) || \
+	(__b == (typeof(__b))-1 && __a == __tmin);			\
+})
+
+
+#define check_add_overflow(a, b, d)					\
+	__builtin_choose_expr(is_signed_type(typeof(a)),		\
+			__signed_add_overflow(a, b, d),			\
+			__unsigned_add_overflow(a, b, d))
+
+#define check_sub_overflow(a, b, d)					\
+	__builtin_choose_expr(is_signed_type(typeof(a)),		\
+			__signed_sub_overflow(a, b, d),			\
+			__unsigned_sub_overflow(a, b, d))
+
+#define check_mul_overflow(a, b, d)					\
+	__builtin_choose_expr(is_signed_type(typeof(a)),		\
+			__signed_mul_overflow(a, b, d),			\
+			__unsigned_mul_overflow(a, b, d))
+
+
+#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
+
+/**
+ * array_size() - Calculate size of 2-dimensional array.
+ *
+ * @a: dimension one
+ * @b: dimension two
+ *
+ * Calculates size of 2-dimensional array: @a * @b.
+ *
+ * Returns: number of bytes needed to represent the array or SIZE_MAX on
+ * overflow.
+ */
+static inline __must_check size_t array_size(size_t a, size_t b)
+{
+	size_t bytes;
+
+	if (check_mul_overflow(a, b, &bytes))
+		return SIZE_MAX;
+
+	return bytes;
+}
+
+/**
+ * array3_size() - Calculate size of 3-dimensional array.
+ *
+ * @a: dimension one
+ * @b: dimension two
+ * @c: dimension three
+ *
+ * Calculates size of 3-dimensional array: @a * @b * @c.
+ *
+ * Returns: number of bytes needed to represent the array or SIZE_MAX on
+ * overflow.
+ */
+static inline __must_check size_t array3_size(size_t a, size_t b, size_t c)
+{
+	size_t bytes;
+
+	if (check_mul_overflow(a, b, &bytes))
+		return SIZE_MAX;
+	if (check_mul_overflow(bytes, c, &bytes))
+		return SIZE_MAX;
+
+	return bytes;
+}
+
+static inline __must_check size_t __ab_c_size(size_t n, size_t size, size_t c)
+{
+	size_t bytes;
+
+	if (check_mul_overflow(n, size, &bytes))
+		return SIZE_MAX;
+	if (check_add_overflow(bytes, c, &bytes))
+		return SIZE_MAX;
+
+	return bytes;
+}
+
+/**
+ * struct_size() - Calculate size of structure with trailing array.
+ * @p: Pointer to the structure.
+ * @member: Name of the array member.
+ * @n: Number of elements in the array.
+ *
+ * Calculates size of memory needed for structure @p followed by an
+ * array of @n @member elements.
+ *
+ * Return: number of bytes needed or SIZE_MAX on overflow.
+ */
+#define struct_size(p, member, n)					\
+	__ab_c_size(n,							\
+		    sizeof(*(p)->member) + __must_be_array((p)->member),\
+		    sizeof(*(p)))
+
+#endif /* __LINUX_OVERFLOW_H */
diff --git a/tools/include/linux/ring_buffer.h b/tools/include/linux/ring_buffer.h
new file mode 100644
index 000000000000..9a083ae60473
--- /dev/null
+++ b/tools/include/linux/ring_buffer.h
@@ -0,0 +1,73 @@
+#ifndef _TOOLS_LINUX_RING_BUFFER_H_
+#define _TOOLS_LINUX_RING_BUFFER_H_
+
+#include <asm/barrier.h>
+
+/*
+ * Contract with kernel for walking the perf ring buffer from
+ * user space requires the following barrier pairing (quote
+ * from kernel/events/ring_buffer.c):
+ *
+ *   Since the mmap() consumer (userspace) can run on a
+ *   different CPU:
+ *
+ *   kernel                             user
+ *
+ *   if (LOAD ->data_tail) {            LOAD ->data_head
+ *                      (A)             smp_rmb()       (C)
+ *      STORE $data                     LOAD $data
+ *      smp_wmb()       (B)             smp_mb()        (D)
+ *      STORE ->data_head               STORE ->data_tail
+ *   }
+ *
+ *   Where A pairs with D, and B pairs with C.
+ *
+ *   In our case A is a control dependency that separates the
+ *   load of the ->data_tail and the stores of $data. In case
+ *   ->data_tail indicates there is no room in the buffer to
+ *   store $data we do not.
+ *
+ *   D needs to be a full barrier since it separates the data
+ *   READ from the tail WRITE.
+ *
+ *   For B a WMB is sufficient since it separates two WRITEs,
+ *   and for C an RMB is sufficient since it separates two READs.
+ *
+ * Note, instead of B, C, D we could also use smp_store_release()
+ * in B and D as well as smp_load_acquire() in C.
+ *
+ * However, this optimization does not make sense for all kernel
+ * supported architectures since for a fair number it would
+ * resolve into READ_ONCE() + smp_mb() pair for smp_load_acquire(),
+ * and smp_mb() + WRITE_ONCE() pair for smp_store_release().
+ *
+ * Thus for those smp_wmb() in B and smp_rmb() in C would still
+ * be less expensive. For the case of D this has either the same
+ * cost or is less expensive, for example, due to TSO x86 can
+ * avoid the CPU barrier entirely.
+ */
+
+static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base)
+{
+/*
+ * Architectures where smp_load_acquire() does not fallback to
+ * READ_ONCE() + smp_mb() pair.
+ */
+#if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \
+    defined(__ia64__) || defined(__sparc__) && defined(__arch64__)
+	return smp_load_acquire(&base->data_head);
+#else
+	u64 head = READ_ONCE(base->data_head);
+
+	smp_rmb();
+	return head;
+#endif
+}
+
+static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base,
+					  u64 tail)
+{
+	smp_store_release(&base->data_tail, tail);
+}
+
+#endif /* _TOOLS_LINUX_RING_BUFFER_H_ */
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
index 1738c0391da4..c934572d935c 100644
--- a/tools/include/linux/spinlock.h
+++ b/tools/include/linux/spinlock.h
@@ -8,8 +8,14 @@
 #define spinlock_t		pthread_mutex_t
 #define DEFINE_SPINLOCK(x)	pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER
 #define __SPIN_LOCK_UNLOCKED(x)	(pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER
-#define spin_lock_init(x)      pthread_mutex_init(x, NULL)
-
+#define spin_lock_init(x)	pthread_mutex_init(x, NULL)
+
+#define spin_lock(x)			pthread_mutex_lock(x)
+#define spin_unlock(x)			pthread_mutex_unlock(x)
+#define spin_lock_bh(x)			pthread_mutex_lock(x)
+#define spin_unlock_bh(x)		pthread_mutex_unlock(x)
+#define spin_lock_irq(x)		pthread_mutex_lock(x)
+#define spin_unlock_irq(x)		pthread_mutex_unlock(x)
 #define spin_lock_irqsave(x, f)		(void)f, pthread_mutex_lock(x)
 #define spin_unlock_irqrestore(x, f)	(void)f, pthread_mutex_unlock(x)
 
@@ -31,4 +37,6 @@ static inline bool arch_spin_is_locked(arch_spinlock_t *mutex)
 	return true;
 }
 
+#include <linux/lockdep.h>
+
 #endif
diff --git a/tools/include/tools/libc_compat.h b/tools/include/tools/libc_compat.h
new file mode 100644
index 000000000000..e907ba6f15e5
--- /dev/null
+++ b/tools/include/tools/libc_compat.h
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: (LGPL-2.0+ OR BSD-2-Clause)
+/* Copyright (C) 2018 Netronome Systems, Inc. */
+
+#ifndef __TOOLS_LIBC_COMPAT_H
+#define __TOOLS_LIBC_COMPAT_H
+
+#include <stdlib.h>
+#include <linux/overflow.h>
+
+#ifdef COMPAT_NEED_REALLOCARRAY
+static inline void *reallocarray(void *ptr, size_t nmemb, size_t size)
+{
+	size_t bytes;
+
+	if (unlikely(check_mul_overflow(nmemb, size, &bytes)))
+		return NULL;
+	return realloc(ptr, bytes);
+}
+#endif
+#endif
diff --git a/tools/include/uapi/asm-generic/ioctls.h b/tools/include/uapi/asm-generic/ioctls.h
index 040651735662..cdc9f4ca8c27 100644
--- a/tools/include/uapi/asm-generic/ioctls.h
+++ b/tools/include/uapi/asm-generic/ioctls.h
@@ -79,6 +79,8 @@
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
 #define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
+#define TIOCGISO7816	_IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x43, struct serial_iso7816)
 
 #define FIONCLEX	0x5450
 #define FIOCLEX		0x5451
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
new file mode 100644
index 000000000000..538546edbfbd
--- /dev/null
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -0,0 +1,787 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#include <asm/bitsperlong.h>
+
+/*
+ * This file contains the system call numbers, based on the
+ * layout of the x86-64 architecture, which embeds the
+ * pointer to the syscall in the table.
+ *
+ * As a basic principle, no duplication of functionality
+ * should be added, e.g. we don't use lseek when llseek
+ * is present. New architectures should use this file
+ * and implement the less feature-full calls in user space.
+ */
+
+#ifndef __SYSCALL
+#define __SYSCALL(x, y)
+#endif
+
+#if __BITS_PER_LONG == 32 || defined(__SYSCALL_COMPAT)
+#define __SC_3264(_nr, _32, _64) __SYSCALL(_nr, _32)
+#else
+#define __SC_3264(_nr, _32, _64) __SYSCALL(_nr, _64)
+#endif
+
+#ifdef __SYSCALL_COMPAT
+#define __SC_COMP(_nr, _sys, _comp) __SYSCALL(_nr, _comp)
+#define __SC_COMP_3264(_nr, _32, _64, _comp) __SYSCALL(_nr, _comp)
+#else
+#define __SC_COMP(_nr, _sys, _comp) __SYSCALL(_nr, _sys)
+#define __SC_COMP_3264(_nr, _32, _64, _comp) __SC_3264(_nr, _32, _64)
+#endif
+
+#define __NR_io_setup 0
+__SC_COMP(__NR_io_setup, sys_io_setup, compat_sys_io_setup)
+#define __NR_io_destroy 1
+__SYSCALL(__NR_io_destroy, sys_io_destroy)
+#define __NR_io_submit 2
+__SC_COMP(__NR_io_submit, sys_io_submit, compat_sys_io_submit)
+#define __NR_io_cancel 3
+__SYSCALL(__NR_io_cancel, sys_io_cancel)
+#define __NR_io_getevents 4
+__SC_COMP(__NR_io_getevents, sys_io_getevents, compat_sys_io_getevents)
+
+/* fs/xattr.c */
+#define __NR_setxattr 5
+__SYSCALL(__NR_setxattr, sys_setxattr)
+#define __NR_lsetxattr 6
+__SYSCALL(__NR_lsetxattr, sys_lsetxattr)
+#define __NR_fsetxattr 7
+__SYSCALL(__NR_fsetxattr, sys_fsetxattr)
+#define __NR_getxattr 8
+__SYSCALL(__NR_getxattr, sys_getxattr)
+#define __NR_lgetxattr 9
+__SYSCALL(__NR_lgetxattr, sys_lgetxattr)
+#define __NR_fgetxattr 10
+__SYSCALL(__NR_fgetxattr, sys_fgetxattr)
+#define __NR_listxattr 11
+__SYSCALL(__NR_listxattr, sys_listxattr)
+#define __NR_llistxattr 12
+__SYSCALL(__NR_llistxattr, sys_llistxattr)
+#define __NR_flistxattr 13
+__SYSCALL(__NR_flistxattr, sys_flistxattr)
+#define __NR_removexattr 14
+__SYSCALL(__NR_removexattr, sys_removexattr)
+#define __NR_lremovexattr 15
+__SYSCALL(__NR_lremovexattr, sys_lremovexattr)
+#define __NR_fremovexattr 16
+__SYSCALL(__NR_fremovexattr, sys_fremovexattr)
+
+/* fs/dcache.c */
+#define __NR_getcwd 17
+__SYSCALL(__NR_getcwd, sys_getcwd)
+
+/* fs/cookies.c */
+#define __NR_lookup_dcookie 18
+__SC_COMP(__NR_lookup_dcookie, sys_lookup_dcookie, compat_sys_lookup_dcookie)
+
+/* fs/eventfd.c */
+#define __NR_eventfd2 19
+__SYSCALL(__NR_eventfd2, sys_eventfd2)
+
+/* fs/eventpoll.c */
+#define __NR_epoll_create1 20
+__SYSCALL(__NR_epoll_create1, sys_epoll_create1)
+#define __NR_epoll_ctl 21
+__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl)
+#define __NR_epoll_pwait 22
+__SC_COMP(__NR_epoll_pwait, sys_epoll_pwait, compat_sys_epoll_pwait)
+
+/* fs/fcntl.c */
+#define __NR_dup 23
+__SYSCALL(__NR_dup, sys_dup)
+#define __NR_dup3 24
+__SYSCALL(__NR_dup3, sys_dup3)
+#define __NR3264_fcntl 25
+__SC_COMP_3264(__NR3264_fcntl, sys_fcntl64, sys_fcntl, compat_sys_fcntl64)
+
+/* fs/inotify_user.c */
+#define __NR_inotify_init1 26
+__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
+#define __NR_inotify_add_watch 27
+__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
+#define __NR_inotify_rm_watch 28
+__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
+
+/* fs/ioctl.c */
+#define __NR_ioctl 29
+__SC_COMP(__NR_ioctl, sys_ioctl, compat_sys_ioctl)
+
+/* fs/ioprio.c */
+#define __NR_ioprio_set 30
+__SYSCALL(__NR_ioprio_set, sys_ioprio_set)
+#define __NR_ioprio_get 31
+__SYSCALL(__NR_ioprio_get, sys_ioprio_get)
+
+/* fs/locks.c */
+#define __NR_flock 32
+__SYSCALL(__NR_flock, sys_flock)
+
+/* fs/namei.c */
+#define __NR_mknodat 33
+__SYSCALL(__NR_mknodat, sys_mknodat)
+#define __NR_mkdirat 34
+__SYSCALL(__NR_mkdirat, sys_mkdirat)
+#define __NR_unlinkat 35
+__SYSCALL(__NR_unlinkat, sys_unlinkat)
+#define __NR_symlinkat 36
+__SYSCALL(__NR_symlinkat, sys_symlinkat)
+#define __NR_linkat 37
+__SYSCALL(__NR_linkat, sys_linkat)
+#ifdef __ARCH_WANT_RENAMEAT
+/* renameat is superseded with flags by renameat2 */
+#define __NR_renameat 38
+__SYSCALL(__NR_renameat, sys_renameat)
+#endif /* __ARCH_WANT_RENAMEAT */
+
+/* fs/namespace.c */
+#define __NR_umount2 39
+__SYSCALL(__NR_umount2, sys_umount)
+#define __NR_mount 40
+__SC_COMP(__NR_mount, sys_mount, compat_sys_mount)
+#define __NR_pivot_root 41
+__SYSCALL(__NR_pivot_root, sys_pivot_root)
+
+/* fs/nfsctl.c */
+#define __NR_nfsservctl 42
+__SYSCALL(__NR_nfsservctl, sys_ni_syscall)
+
+/* fs/open.c */
+#define __NR3264_statfs 43
+__SC_COMP_3264(__NR3264_statfs, sys_statfs64, sys_statfs, \
+	       compat_sys_statfs64)
+#define __NR3264_fstatfs 44
+__SC_COMP_3264(__NR3264_fstatfs, sys_fstatfs64, sys_fstatfs, \
+	       compat_sys_fstatfs64)
+#define __NR3264_truncate 45
+__SC_COMP_3264(__NR3264_truncate, sys_truncate64, sys_truncate, \
+	       compat_sys_truncate64)
+#define __NR3264_ftruncate 46
+__SC_COMP_3264(__NR3264_ftruncate, sys_ftruncate64, sys_ftruncate, \
+	       compat_sys_ftruncate64)
+
+#define __NR_fallocate 47
+__SC_COMP(__NR_fallocate, sys_fallocate, compat_sys_fallocate)
+#define __NR_faccessat 48
+__SYSCALL(__NR_faccessat, sys_faccessat)
+#define __NR_chdir 49
+__SYSCALL(__NR_chdir, sys_chdir)
+#define __NR_fchdir 50
+__SYSCALL(__NR_fchdir, sys_fchdir)
+#define __NR_chroot 51
+__SYSCALL(__NR_chroot, sys_chroot)
+#define __NR_fchmod 52
+__SYSCALL(__NR_fchmod, sys_fchmod)
+#define __NR_fchmodat 53
+__SYSCALL(__NR_fchmodat, sys_fchmodat)
+#define __NR_fchownat 54
+__SYSCALL(__NR_fchownat, sys_fchownat)
+#define __NR_fchown 55
+__SYSCALL(__NR_fchown, sys_fchown)
+#define __NR_openat 56
+__SC_COMP(__NR_openat, sys_openat, compat_sys_openat)
+#define __NR_close 57
+__SYSCALL(__NR_close, sys_close)
+#define __NR_vhangup 58
+__SYSCALL(__NR_vhangup, sys_vhangup)
+
+/* fs/pipe.c */
+#define __NR_pipe2 59
+__SYSCALL(__NR_pipe2, sys_pipe2)
+
+/* fs/quota.c */
+#define __NR_quotactl 60
+__SYSCALL(__NR_quotactl, sys_quotactl)
+
+/* fs/readdir.c */
+#define __NR_getdents64 61
+__SYSCALL(__NR_getdents64, sys_getdents64)
+
+/* fs/read_write.c */
+#define __NR3264_lseek 62
+__SC_3264(__NR3264_lseek, sys_llseek, sys_lseek)
+#define __NR_read 63
+__SYSCALL(__NR_read, sys_read)
+#define __NR_write 64
+__SYSCALL(__NR_write, sys_write)
+#define __NR_readv 65
+__SC_COMP(__NR_readv, sys_readv, compat_sys_readv)
+#define __NR_writev 66
+__SC_COMP(__NR_writev, sys_writev, compat_sys_writev)
+#define __NR_pread64 67
+__SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64)
+#define __NR_pwrite64 68
+__SC_COMP(__NR_pwrite64, sys_pwrite64, compat_sys_pwrite64)
+#define __NR_preadv 69
+__SC_COMP(__NR_preadv, sys_preadv, compat_sys_preadv)
+#define __NR_pwritev 70
+__SC_COMP(__NR_pwritev, sys_pwritev, compat_sys_pwritev)
+
+/* fs/sendfile.c */
+#define __NR3264_sendfile 71
+__SYSCALL(__NR3264_sendfile, sys_sendfile64)
+
+/* fs/select.c */
+#define __NR_pselect6 72
+__SC_COMP(__NR_pselect6, sys_pselect6, compat_sys_pselect6)
+#define __NR_ppoll 73
+__SC_COMP(__NR_ppoll, sys_ppoll, compat_sys_ppoll)
+
+/* fs/signalfd.c */
+#define __NR_signalfd4 74
+__SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4)
+
+/* fs/splice.c */
+#define __NR_vmsplice 75
+__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice)
+#define __NR_splice 76
+__SYSCALL(__NR_splice, sys_splice)
+#define __NR_tee 77
+__SYSCALL(__NR_tee, sys_tee)
+
+/* fs/stat.c */
+#define __NR_readlinkat 78
+__SYSCALL(__NR_readlinkat, sys_readlinkat)
+#if defined(__ARCH_WANT_NEW_STAT) || defined(__ARCH_WANT_STAT64)
+#define __NR3264_fstatat 79
+__SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat)
+#define __NR3264_fstat 80
+__SC_3264(__NR3264_fstat, sys_fstat64, sys_newfstat)
+#endif
+
+/* fs/sync.c */
+#define __NR_sync 81
+__SYSCALL(__NR_sync, sys_sync)
+#define __NR_fsync 82
+__SYSCALL(__NR_fsync, sys_fsync)
+#define __NR_fdatasync 83
+__SYSCALL(__NR_fdatasync, sys_fdatasync)
+#ifdef __ARCH_WANT_SYNC_FILE_RANGE2
+#define __NR_sync_file_range2 84
+__SC_COMP(__NR_sync_file_range2, sys_sync_file_range2, \
+	  compat_sys_sync_file_range2)
+#else
+#define __NR_sync_file_range 84
+__SC_COMP(__NR_sync_file_range, sys_sync_file_range, \
+	  compat_sys_sync_file_range)
+#endif
+
+/* fs/timerfd.c */
+#define __NR_timerfd_create 85
+__SYSCALL(__NR_timerfd_create, sys_timerfd_create)
+#define __NR_timerfd_settime 86
+__SC_COMP(__NR_timerfd_settime, sys_timerfd_settime, \
+	  compat_sys_timerfd_settime)
+#define __NR_timerfd_gettime 87
+__SC_COMP(__NR_timerfd_gettime, sys_timerfd_gettime, \
+	  compat_sys_timerfd_gettime)
+
+/* fs/utimes.c */
+#define __NR_utimensat 88
+__SC_COMP(__NR_utimensat, sys_utimensat, compat_sys_utimensat)
+
+/* kernel/acct.c */
+#define __NR_acct 89
+__SYSCALL(__NR_acct, sys_acct)
+
+/* kernel/capability.c */
+#define __NR_capget 90
+__SYSCALL(__NR_capget, sys_capget)
+#define __NR_capset 91
+__SYSCALL(__NR_capset, sys_capset)
+
+/* kernel/exec_domain.c */
+#define __NR_personality 92
+__SYSCALL(__NR_personality, sys_personality)
+
+/* kernel/exit.c */
+#define __NR_exit 93
+__SYSCALL(__NR_exit, sys_exit)
+#define __NR_exit_group 94
+__SYSCALL(__NR_exit_group, sys_exit_group)
+#define __NR_waitid 95
+__SC_COMP(__NR_waitid, sys_waitid, compat_sys_waitid)
+
+/* kernel/fork.c */
+#define __NR_set_tid_address 96
+__SYSCALL(__NR_set_tid_address, sys_set_tid_address)
+#define __NR_unshare 97
+__SYSCALL(__NR_unshare, sys_unshare)
+
+/* kernel/futex.c */
+#define __NR_futex 98
+__SC_COMP(__NR_futex, sys_futex, compat_sys_futex)
+#define __NR_set_robust_list 99
+__SC_COMP(__NR_set_robust_list, sys_set_robust_list, \
+	  compat_sys_set_robust_list)
+#define __NR_get_robust_list 100
+__SC_COMP(__NR_get_robust_list, sys_get_robust_list, \
+	  compat_sys_get_robust_list)
+
+/* kernel/hrtimer.c */
+#define __NR_nanosleep 101
+__SC_COMP(__NR_nanosleep, sys_nanosleep, compat_sys_nanosleep)
+
+/* kernel/itimer.c */
+#define __NR_getitimer 102
+__SC_COMP(__NR_getitimer, sys_getitimer, compat_sys_getitimer)
+#define __NR_setitimer 103
+__SC_COMP(__NR_setitimer, sys_setitimer, compat_sys_setitimer)
+
+/* kernel/kexec.c */
+#define __NR_kexec_load 104
+__SC_COMP(__NR_kexec_load, sys_kexec_load, compat_sys_kexec_load)
+
+/* kernel/module.c */
+#define __NR_init_module 105
+__SYSCALL(__NR_init_module, sys_init_module)
+#define __NR_delete_module 106
+__SYSCALL(__NR_delete_module, sys_delete_module)
+
+/* kernel/posix-timers.c */
+#define __NR_timer_create 107
+__SC_COMP(__NR_timer_create, sys_timer_create, compat_sys_timer_create)
+#define __NR_timer_gettime 108
+__SC_COMP(__NR_timer_gettime, sys_timer_gettime, compat_sys_timer_gettime)
+#define __NR_timer_getoverrun 109
+__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
+#define __NR_timer_settime 110
+__SC_COMP(__NR_timer_settime, sys_timer_settime, compat_sys_timer_settime)
+#define __NR_timer_delete 111
+__SYSCALL(__NR_timer_delete, sys_timer_delete)
+#define __NR_clock_settime 112
+__SC_COMP(__NR_clock_settime, sys_clock_settime, compat_sys_clock_settime)
+#define __NR_clock_gettime 113
+__SC_COMP(__NR_clock_gettime, sys_clock_gettime, compat_sys_clock_gettime)
+#define __NR_clock_getres 114
+__SC_COMP(__NR_clock_getres, sys_clock_getres, compat_sys_clock_getres)
+#define __NR_clock_nanosleep 115
+__SC_COMP(__NR_clock_nanosleep, sys_clock_nanosleep, \
+	  compat_sys_clock_nanosleep)
+
+/* kernel/printk.c */
+#define __NR_syslog 116
+__SYSCALL(__NR_syslog, sys_syslog)
+
+/* kernel/ptrace.c */
+#define __NR_ptrace 117
+__SYSCALL(__NR_ptrace, sys_ptrace)
+
+/* kernel/sched/core.c */
+#define __NR_sched_setparam 118
+__SYSCALL(__NR_sched_setparam, sys_sched_setparam)
+#define __NR_sched_setscheduler 119
+__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler)
+#define __NR_sched_getscheduler 120
+__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler)
+#define __NR_sched_getparam 121
+__SYSCALL(__NR_sched_getparam, sys_sched_getparam)
+#define __NR_sched_setaffinity 122
+__SC_COMP(__NR_sched_setaffinity, sys_sched_setaffinity, \
+	  compat_sys_sched_setaffinity)
+#define __NR_sched_getaffinity 123
+__SC_COMP(__NR_sched_getaffinity, sys_sched_getaffinity, \
+	  compat_sys_sched_getaffinity)
+#define __NR_sched_yield 124
+__SYSCALL(__NR_sched_yield, sys_sched_yield)
+#define __NR_sched_get_priority_max 125
+__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
+#define __NR_sched_get_priority_min 126
+__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
+#define __NR_sched_rr_get_interval 127
+__SC_COMP(__NR_sched_rr_get_interval, sys_sched_rr_get_interval, \
+	  compat_sys_sched_rr_get_interval)
+
+/* kernel/signal.c */
+#define __NR_restart_syscall 128
+__SYSCALL(__NR_restart_syscall, sys_restart_syscall)
+#define __NR_kill 129
+__SYSCALL(__NR_kill, sys_kill)
+#define __NR_tkill 130
+__SYSCALL(__NR_tkill, sys_tkill)
+#define __NR_tgkill 131
+__SYSCALL(__NR_tgkill, sys_tgkill)
+#define __NR_sigaltstack 132
+__SC_COMP(__NR_sigaltstack, sys_sigaltstack, compat_sys_sigaltstack)
+#define __NR_rt_sigsuspend 133
+__SC_COMP(__NR_rt_sigsuspend, sys_rt_sigsuspend, compat_sys_rt_sigsuspend)
+#define __NR_rt_sigaction 134
+__SC_COMP(__NR_rt_sigaction, sys_rt_sigaction, compat_sys_rt_sigaction)
+#define __NR_rt_sigprocmask 135
+__SC_COMP(__NR_rt_sigprocmask, sys_rt_sigprocmask, compat_sys_rt_sigprocmask)
+#define __NR_rt_sigpending 136
+__SC_COMP(__NR_rt_sigpending, sys_rt_sigpending, compat_sys_rt_sigpending)
+#define __NR_rt_sigtimedwait 137
+__SC_COMP(__NR_rt_sigtimedwait, sys_rt_sigtimedwait, \
+	  compat_sys_rt_sigtimedwait)
+#define __NR_rt_sigqueueinfo 138
+__SC_COMP(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo, \
+	  compat_sys_rt_sigqueueinfo)
+#define __NR_rt_sigreturn 139
+__SC_COMP(__NR_rt_sigreturn, sys_rt_sigreturn, compat_sys_rt_sigreturn)
+
+/* kernel/sys.c */
+#define __NR_setpriority 140
+__SYSCALL(__NR_setpriority, sys_setpriority)
+#define __NR_getpriority 141
+__SYSCALL(__NR_getpriority, sys_getpriority)
+#define __NR_reboot 142
+__SYSCALL(__NR_reboot, sys_reboot)
+#define __NR_setregid 143
+__SYSCALL(__NR_setregid, sys_setregid)
+#define __NR_setgid 144
+__SYSCALL(__NR_setgid, sys_setgid)
+#define __NR_setreuid 145
+__SYSCALL(__NR_setreuid, sys_setreuid)
+#define __NR_setuid 146
+__SYSCALL(__NR_setuid, sys_setuid)
+#define __NR_setresuid 147
+__SYSCALL(__NR_setresuid, sys_setresuid)
+#define __NR_getresuid 148
+__SYSCALL(__NR_getresuid, sys_getresuid)
+#define __NR_setresgid 149
+__SYSCALL(__NR_setresgid, sys_setresgid)
+#define __NR_getresgid 150
+__SYSCALL(__NR_getresgid, sys_getresgid)
+#define __NR_setfsuid 151
+__SYSCALL(__NR_setfsuid, sys_setfsuid)
+#define __NR_setfsgid 152
+__SYSCALL(__NR_setfsgid, sys_setfsgid)
+#define __NR_times 153
+__SC_COMP(__NR_times, sys_times, compat_sys_times)
+#define __NR_setpgid 154
+__SYSCALL(__NR_setpgid, sys_setpgid)
+#define __NR_getpgid 155
+__SYSCALL(__NR_getpgid, sys_getpgid)
+#define __NR_getsid 156
+__SYSCALL(__NR_getsid, sys_getsid)
+#define __NR_setsid 157
+__SYSCALL(__NR_setsid, sys_setsid)
+#define __NR_getgroups 158
+__SYSCALL(__NR_getgroups, sys_getgroups)
+#define __NR_setgroups 159
+__SYSCALL(__NR_setgroups, sys_setgroups)
+#define __NR_uname 160
+__SYSCALL(__NR_uname, sys_newuname)
+#define __NR_sethostname 161
+__SYSCALL(__NR_sethostname, sys_sethostname)
+#define __NR_setdomainname 162
+__SYSCALL(__NR_setdomainname, sys_setdomainname)
+#define __NR_getrlimit 163
+__SC_COMP(__NR_getrlimit, sys_getrlimit, compat_sys_getrlimit)
+#define __NR_setrlimit 164
+__SC_COMP(__NR_setrlimit, sys_setrlimit, compat_sys_setrlimit)
+#define __NR_getrusage 165
+__SC_COMP(__NR_getrusage, sys_getrusage, compat_sys_getrusage)
+#define __NR_umask 166
+__SYSCALL(__NR_umask, sys_umask)
+#define __NR_prctl 167
+__SYSCALL(__NR_prctl, sys_prctl)
+#define __NR_getcpu 168
+__SYSCALL(__NR_getcpu, sys_getcpu)
+
+/* kernel/time.c */
+#define __NR_gettimeofday 169
+__SC_COMP(__NR_gettimeofday, sys_gettimeofday, compat_sys_gettimeofday)
+#define __NR_settimeofday 170
+__SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday)
+#define __NR_adjtimex 171
+__SC_COMP(__NR_adjtimex, sys_adjtimex, compat_sys_adjtimex)
+
+/* kernel/timer.c */
+#define __NR_getpid 172
+__SYSCALL(__NR_getpid, sys_getpid)
+#define __NR_getppid 173
+__SYSCALL(__NR_getppid, sys_getppid)
+#define __NR_getuid 174
+__SYSCALL(__NR_getuid, sys_getuid)
+#define __NR_geteuid 175
+__SYSCALL(__NR_geteuid, sys_geteuid)
+#define __NR_getgid 176
+__SYSCALL(__NR_getgid, sys_getgid)
+#define __NR_getegid 177
+__SYSCALL(__NR_getegid, sys_getegid)
+#define __NR_gettid 178
+__SYSCALL(__NR_gettid, sys_gettid)
+#define __NR_sysinfo 179
+__SC_COMP(__NR_sysinfo, sys_sysinfo, compat_sys_sysinfo)
+
+/* ipc/mqueue.c */
+#define __NR_mq_open 180
+__SC_COMP(__NR_mq_open, sys_mq_open, compat_sys_mq_open)
+#define __NR_mq_unlink 181
+__SYSCALL(__NR_mq_unlink, sys_mq_unlink)
+#define __NR_mq_timedsend 182
+__SC_COMP(__NR_mq_timedsend, sys_mq_timedsend, compat_sys_mq_timedsend)
+#define __NR_mq_timedreceive 183
+__SC_COMP(__NR_mq_timedreceive, sys_mq_timedreceive, \
+	  compat_sys_mq_timedreceive)
+#define __NR_mq_notify 184
+__SC_COMP(__NR_mq_notify, sys_mq_notify, compat_sys_mq_notify)
+#define __NR_mq_getsetattr 185
+__SC_COMP(__NR_mq_getsetattr, sys_mq_getsetattr, compat_sys_mq_getsetattr)
+
+/* ipc/msg.c */
+#define __NR_msgget 186
+__SYSCALL(__NR_msgget, sys_msgget)
+#define __NR_msgctl 187
+__SC_COMP(__NR_msgctl, sys_msgctl, compat_sys_msgctl)
+#define __NR_msgrcv 188
+__SC_COMP(__NR_msgrcv, sys_msgrcv, compat_sys_msgrcv)
+#define __NR_msgsnd 189
+__SC_COMP(__NR_msgsnd, sys_msgsnd, compat_sys_msgsnd)
+
+/* ipc/sem.c */
+#define __NR_semget 190
+__SYSCALL(__NR_semget, sys_semget)
+#define __NR_semctl 191
+__SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl)
+#define __NR_semtimedop 192
+__SC_COMP(__NR_semtimedop, sys_semtimedop, compat_sys_semtimedop)
+#define __NR_semop 193
+__SYSCALL(__NR_semop, sys_semop)
+
+/* ipc/shm.c */
+#define __NR_shmget 194
+__SYSCALL(__NR_shmget, sys_shmget)
+#define __NR_shmctl 195
+__SC_COMP(__NR_shmctl, sys_shmctl, compat_sys_shmctl)
+#define __NR_shmat 196
+__SC_COMP(__NR_shmat, sys_shmat, compat_sys_shmat)
+#define __NR_shmdt 197
+__SYSCALL(__NR_shmdt, sys_shmdt)
+
+/* net/socket.c */
+#define __NR_socket 198
+__SYSCALL(__NR_socket, sys_socket)
+#define __NR_socketpair 199
+__SYSCALL(__NR_socketpair, sys_socketpair)
+#define __NR_bind 200
+__SYSCALL(__NR_bind, sys_bind)
+#define __NR_listen 201
+__SYSCALL(__NR_listen, sys_listen)
+#define __NR_accept 202
+__SYSCALL(__NR_accept, sys_accept)
+#define __NR_connect 203
+__SYSCALL(__NR_connect, sys_connect)
+#define __NR_getsockname 204
+__SYSCALL(__NR_getsockname, sys_getsockname)
+#define __NR_getpeername 205
+__SYSCALL(__NR_getpeername, sys_getpeername)
+#define __NR_sendto 206
+__SYSCALL(__NR_sendto, sys_sendto)
+#define __NR_recvfrom 207
+__SC_COMP(__NR_recvfrom, sys_recvfrom, compat_sys_recvfrom)
+#define __NR_setsockopt 208
+__SC_COMP(__NR_setsockopt, sys_setsockopt, compat_sys_setsockopt)
+#define __NR_getsockopt 209
+__SC_COMP(__NR_getsockopt, sys_getsockopt, compat_sys_getsockopt)
+#define __NR_shutdown 210
+__SYSCALL(__NR_shutdown, sys_shutdown)
+#define __NR_sendmsg 211
+__SC_COMP(__NR_sendmsg, sys_sendmsg, compat_sys_sendmsg)
+#define __NR_recvmsg 212
+__SC_COMP(__NR_recvmsg, sys_recvmsg, compat_sys_recvmsg)
+
+/* mm/filemap.c */
+#define __NR_readahead 213
+__SC_COMP(__NR_readahead, sys_readahead, compat_sys_readahead)
+
+/* mm/nommu.c, also with MMU */
+#define __NR_brk 214
+__SYSCALL(__NR_brk, sys_brk)
+#define __NR_munmap 215
+__SYSCALL(__NR_munmap, sys_munmap)
+#define __NR_mremap 216
+__SYSCALL(__NR_mremap, sys_mremap)
+
+/* security/keys/keyctl.c */
+#define __NR_add_key 217
+__SYSCALL(__NR_add_key, sys_add_key)
+#define __NR_request_key 218
+__SYSCALL(__NR_request_key, sys_request_key)
+#define __NR_keyctl 219
+__SC_COMP(__NR_keyctl, sys_keyctl, compat_sys_keyctl)
+
+/* arch/example/kernel/sys_example.c */
+#define __NR_clone 220
+__SYSCALL(__NR_clone, sys_clone)
+#define __NR_execve 221
+__SC_COMP(__NR_execve, sys_execve, compat_sys_execve)
+
+#define __NR3264_mmap 222
+__SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap)
+/* mm/fadvise.c */
+#define __NR3264_fadvise64 223
+__SC_COMP(__NR3264_fadvise64, sys_fadvise64_64, compat_sys_fadvise64_64)
+
+/* mm/, CONFIG_MMU only */
+#ifndef __ARCH_NOMMU
+#define __NR_swapon 224
+__SYSCALL(__NR_swapon, sys_swapon)
+#define __NR_swapoff 225
+__SYSCALL(__NR_swapoff, sys_swapoff)
+#define __NR_mprotect 226
+__SYSCALL(__NR_mprotect, sys_mprotect)
+#define __NR_msync 227
+__SYSCALL(__NR_msync, sys_msync)
+#define __NR_mlock 228
+__SYSCALL(__NR_mlock, sys_mlock)
+#define __NR_munlock 229
+__SYSCALL(__NR_munlock, sys_munlock)
+#define __NR_mlockall 230
+__SYSCALL(__NR_mlockall, sys_mlockall)
+#define __NR_munlockall 231
+__SYSCALL(__NR_munlockall, sys_munlockall)
+#define __NR_mincore 232
+__SYSCALL(__NR_mincore, sys_mincore)
+#define __NR_madvise 233
+__SYSCALL(__NR_madvise, sys_madvise)
+#define __NR_remap_file_pages 234
+__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages)
+#define __NR_mbind 235
+__SC_COMP(__NR_mbind, sys_mbind, compat_sys_mbind)
+#define __NR_get_mempolicy 236
+__SC_COMP(__NR_get_mempolicy, sys_get_mempolicy, compat_sys_get_mempolicy)
+#define __NR_set_mempolicy 237
+__SC_COMP(__NR_set_mempolicy, sys_set_mempolicy, compat_sys_set_mempolicy)
+#define __NR_migrate_pages 238
+__SC_COMP(__NR_migrate_pages, sys_migrate_pages, compat_sys_migrate_pages)
+#define __NR_move_pages 239
+__SC_COMP(__NR_move_pages, sys_move_pages, compat_sys_move_pages)
+#endif
+
+#define __NR_rt_tgsigqueueinfo 240
+__SC_COMP(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo, \
+	  compat_sys_rt_tgsigqueueinfo)
+#define __NR_perf_event_open 241
+__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
+#define __NR_accept4 242
+__SYSCALL(__NR_accept4, sys_accept4)
+#define __NR_recvmmsg 243
+__SC_COMP(__NR_recvmmsg, sys_recvmmsg, compat_sys_recvmmsg)
+
+/*
+ * Architectures may provide up to 16 syscalls of their own
+ * starting with this value.
+ */
+#define __NR_arch_specific_syscall 244
+
+#define __NR_wait4 260
+__SC_COMP(__NR_wait4, sys_wait4, compat_sys_wait4)
+#define __NR_prlimit64 261
+__SYSCALL(__NR_prlimit64, sys_prlimit64)
+#define __NR_fanotify_init 262
+__SYSCALL(__NR_fanotify_init, sys_fanotify_init)
+#define __NR_fanotify_mark 263
+__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
+#define __NR_name_to_handle_at         264
+__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
+#define __NR_open_by_handle_at         265
+__SC_COMP(__NR_open_by_handle_at, sys_open_by_handle_at, \
+	  compat_sys_open_by_handle_at)
+#define __NR_clock_adjtime 266
+__SC_COMP(__NR_clock_adjtime, sys_clock_adjtime, compat_sys_clock_adjtime)
+#define __NR_syncfs 267
+__SYSCALL(__NR_syncfs, sys_syncfs)
+#define __NR_setns 268
+__SYSCALL(__NR_setns, sys_setns)
+#define __NR_sendmmsg 269
+__SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg)
+#define __NR_process_vm_readv 270
+__SC_COMP(__NR_process_vm_readv, sys_process_vm_readv, \
+          compat_sys_process_vm_readv)
+#define __NR_process_vm_writev 271
+__SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
+          compat_sys_process_vm_writev)
+#define __NR_kcmp 272
+__SYSCALL(__NR_kcmp, sys_kcmp)
+#define __NR_finit_module 273
+__SYSCALL(__NR_finit_module, sys_finit_module)
+#define __NR_sched_setattr 274
+__SYSCALL(__NR_sched_setattr, sys_sched_setattr)
+#define __NR_sched_getattr 275
+__SYSCALL(__NR_sched_getattr, sys_sched_getattr)
+#define __NR_renameat2 276
+__SYSCALL(__NR_renameat2, sys_renameat2)
+#define __NR_seccomp 277
+__SYSCALL(__NR_seccomp, sys_seccomp)
+#define __NR_getrandom 278
+__SYSCALL(__NR_getrandom, sys_getrandom)
+#define __NR_memfd_create 279
+__SYSCALL(__NR_memfd_create, sys_memfd_create)
+#define __NR_bpf 280
+__SYSCALL(__NR_bpf, sys_bpf)
+#define __NR_execveat 281
+__SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
+#define __NR_userfaultfd 282
+__SYSCALL(__NR_userfaultfd, sys_userfaultfd)
+#define __NR_membarrier 283
+__SYSCALL(__NR_membarrier, sys_membarrier)
+#define __NR_mlock2 284
+__SYSCALL(__NR_mlock2, sys_mlock2)
+#define __NR_copy_file_range 285
+__SYSCALL(__NR_copy_file_range, sys_copy_file_range)
+#define __NR_preadv2 286
+__SC_COMP(__NR_preadv2, sys_preadv2, compat_sys_preadv2)
+#define __NR_pwritev2 287
+__SC_COMP(__NR_pwritev2, sys_pwritev2, compat_sys_pwritev2)
+#define __NR_pkey_mprotect 288
+__SYSCALL(__NR_pkey_mprotect, sys_pkey_mprotect)
+#define __NR_pkey_alloc 289
+__SYSCALL(__NR_pkey_alloc,    sys_pkey_alloc)
+#define __NR_pkey_free 290
+__SYSCALL(__NR_pkey_free,     sys_pkey_free)
+#define __NR_statx 291
+__SYSCALL(__NR_statx,     sys_statx)
+#define __NR_io_pgetevents 292
+__SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
+#define __NR_rseq 293
+__SYSCALL(__NR_rseq, sys_rseq)
+
+#undef __NR_syscalls
+#define __NR_syscalls 294
+
+/*
+ * 32 bit systems traditionally used different
+ * syscalls for off_t and loff_t arguments, while
+ * 64 bit systems only need the off_t version.
+ * For new 32 bit platforms, there is no need to
+ * implement the old 32 bit off_t syscalls, so
+ * they take different names.
+ * Here we map the numbers so that both versions
+ * use the same syscall table layout.
+ */
+#if __BITS_PER_LONG == 64 && !defined(__SYSCALL_COMPAT)
+#define __NR_fcntl __NR3264_fcntl
+#define __NR_statfs __NR3264_statfs
+#define __NR_fstatfs __NR3264_fstatfs
+#define __NR_truncate __NR3264_truncate
+#define __NR_ftruncate __NR3264_ftruncate
+#define __NR_lseek __NR3264_lseek
+#define __NR_sendfile __NR3264_sendfile
+#define __NR_newfstatat __NR3264_fstatat
+#define __NR_fstat __NR3264_fstat
+#define __NR_mmap __NR3264_mmap
+#define __NR_fadvise64 __NR3264_fadvise64
+#ifdef __NR3264_stat
+#define __NR_stat __NR3264_stat
+#define __NR_lstat __NR3264_lstat
+#endif
+#else
+#define __NR_fcntl64 __NR3264_fcntl
+#define __NR_statfs64 __NR3264_statfs
+#define __NR_fstatfs64 __NR3264_fstatfs
+#define __NR_truncate64 __NR3264_truncate
+#define __NR_ftruncate64 __NR3264_ftruncate
+#define __NR_llseek __NR3264_lseek
+#define __NR_sendfile64 __NR3264_sendfile
+#define __NR_fstatat64 __NR3264_fstatat
+#define __NR_fstat64 __NR3264_fstat
+#define __NR_mmap2 __NR3264_mmap
+#define __NR_fadvise64_64 __NR3264_fadvise64
+#ifdef __NR3264_stat
+#define __NR_stat64 __NR3264_stat
+#define __NR_lstat64 __NR3264_lstat
+#endif
+#endif
diff --git a/tools/include/uapi/asm/bitsperlong.h b/tools/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..8dd6aefdafa4
--- /dev/null
+++ b/tools/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if defined(__i386__) || defined(__x86_64__)
+#include "../../arch/x86/include/uapi/asm/bitsperlong.h"
+#elif defined(__aarch64__)
+#include "../../arch/arm64/include/uapi/asm/bitsperlong.h"
+#elif defined(__powerpc__)
+#include "../../arch/powerpc/include/uapi/asm/bitsperlong.h"
+#elif defined(__s390__)
+#include "../../arch/s390/include/uapi/asm/bitsperlong.h"
+#elif defined(__sparc__)
+#include "../../arch/sparc/include/uapi/asm/bitsperlong.h"
+#elif defined(__mips__)
+#include "../../arch/mips/include/uapi/asm/bitsperlong.h"
+#elif defined(__ia64__)
+#include "../../arch/ia64/include/uapi/asm/bitsperlong.h"
+#else
+#include <asm-generic/bitsperlong.h>
+#endif
diff --git a/tools/include/uapi/asm/errno.h b/tools/include/uapi/asm/errno.h
new file mode 100644
index 000000000000..ce3c5945a1c4
--- /dev/null
+++ b/tools/include/uapi/asm/errno.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if defined(__i386__) || defined(__x86_64__)
+#include "../../arch/x86/include/uapi/asm/errno.h"
+#elif defined(__powerpc__)
+#include "../../arch/powerpc/include/uapi/asm/errno.h"
+#elif defined(__sparc__)
+#include "../../arch/sparc/include/uapi/asm/errno.h"
+#elif defined(__alpha__)
+#include "../../arch/alpha/include/uapi/asm/errno.h"
+#elif defined(__mips__)
+#include "../../arch/mips/include/uapi/asm/errno.h"
+#elif defined(__ia64__)
+#include "../../arch/ia64/include/uapi/asm/errno.h"
+#elif defined(__xtensa__)
+#include "../../arch/xtensa/include/uapi/asm/errno.h"
+#else
+#include <asm-generic/errno.h>
+#endif
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 6fdff5945c8a..300f336633f2 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -680,6 +680,22 @@ struct drm_get_cap {
  */
 #define DRM_CLIENT_CAP_ATOMIC	3
 
+/**
+ * DRM_CLIENT_CAP_ASPECT_RATIO
+ *
+ * If set to 1, the DRM core will provide aspect ratio information in modes.
+ */
+#define DRM_CLIENT_CAP_ASPECT_RATIO    4
+
+/**
+ * DRM_CLIENT_CAP_WRITEBACK_CONNECTORS
+ *
+ * If set to 1, the DRM core will expose special connectors to be used for
+ * writing back to memory the scene setup in the commit. Depends on client
+ * also supporting DRM_CLIENT_CAP_ATOMIC
+ */
+#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS	5
+
 /** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
 struct drm_set_client_cap {
 	__u64 capability;
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 7f5634ce8e88..a4446f452040 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -529,6 +529,28 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
 
+/*
+ * Once upon a time we supposed that writes through the GGTT would be
+ * immediately in physical memory (once flushed out of the CPU path). However,
+ * on a few different processors and chipsets, this is not necessarily the case
+ * as the writes appear to be buffered internally. Thus a read of the backing
+ * storage (physical memory) via a different path (with different physical tags
+ * to the indirect write via the GGTT) will see stale values from before
+ * the GGTT write. Inside the kernel, we can for the most part keep track of
+ * the different read/write domains in use (e.g. set-domain), but the assumption
+ * of coherency is baked into the ABI, hence reporting its true state in this
+ * parameter.
+ *
+ * Reports true when writes via mmap_gtt are immediately visible following an
+ * lfence to flush the WCB.
+ *
+ * Reports false when writes via mmap_gtt are indeterminately delayed in an in
+ * internal buffer and are _not_ immediately visible to third parties accessing
+ * directly via mmap_cpu/mmap_wc. Use of mmap_gtt as part of an IPC
+ * communications channel when reporting false is strongly disadvised.
+ */
+#define I915_PARAM_MMAP_GTT_COHERENT	52
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 8c317737ba3f..72c453a8bf50 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -75,6 +75,11 @@ struct bpf_lpm_trie_key {
 	__u8	data[0];	/* Arbitrary size */
 };
 
+struct bpf_cgroup_storage_key {
+	__u64	cgroup_inode_id;	/* cgroup inode id */
+	__u32	attach_type;		/* program attach type */
+};
+
 /* BPF syscall commands, see bpf(2) man-page for details. */
 enum bpf_cmd {
 	BPF_MAP_CREATE,
@@ -95,6 +100,10 @@ enum bpf_cmd {
 	BPF_OBJ_GET_INFO_BY_FD,
 	BPF_PROG_QUERY,
 	BPF_RAW_TRACEPOINT_OPEN,
+	BPF_BTF_LOAD,
+	BPF_BTF_GET_FD_BY_ID,
+	BPF_TASK_FD_QUERY,
+	BPF_MAP_LOOKUP_AND_DELETE_ELEM,
 };
 
 enum bpf_map_type {
@@ -115,6 +124,13 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_DEVMAP,
 	BPF_MAP_TYPE_SOCKMAP,
 	BPF_MAP_TYPE_CPUMAP,
+	BPF_MAP_TYPE_XSKMAP,
+	BPF_MAP_TYPE_SOCKHASH,
+	BPF_MAP_TYPE_CGROUP_STORAGE,
+	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+	BPF_MAP_TYPE_QUEUE,
+	BPF_MAP_TYPE_STACK,
 };
 
 enum bpf_prog_type {
@@ -137,6 +153,10 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SK_MSG,
 	BPF_PROG_TYPE_RAW_TRACEPOINT,
 	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+	BPF_PROG_TYPE_LWT_SEG6LOCAL,
+	BPF_PROG_TYPE_LIRC_MODE2,
+	BPF_PROG_TYPE_SK_REUSEPORT,
+	BPF_PROG_TYPE_FLOW_DISSECTOR,
 };
 
 enum bpf_attach_type {
@@ -154,6 +174,10 @@ enum bpf_attach_type {
 	BPF_CGROUP_INET6_CONNECT,
 	BPF_CGROUP_INET4_POST_BIND,
 	BPF_CGROUP_INET6_POST_BIND,
+	BPF_CGROUP_UDP4_SENDMSG,
+	BPF_CGROUP_UDP6_SENDMSG,
+	BPF_LIRC_MODE2,
+	BPF_FLOW_DISSECTOR,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -279,6 +303,9 @@ union bpf_attr {
 					 */
 		char	map_name[BPF_OBJ_NAME_LEN];
 		__u32	map_ifindex;	/* ifindex of netdev to create on */
+		__u32	btf_fd;		/* fd pointing to a BTF type data */
+		__u32	btf_key_type_id;	/* BTF type_id of the key */
+		__u32	btf_value_type_id;	/* BTF type_id of the value */
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -339,6 +366,7 @@ union bpf_attr {
 			__u32		start_id;
 			__u32		prog_id;
 			__u32		map_id;
+			__u32		btf_id;
 		};
 		__u32		next_id;
 		__u32		open_flags;
@@ -363,398 +391,1880 @@ union bpf_attr {
 		__u64 name;
 		__u32 prog_fd;
 	} raw_tracepoint;
+
+	struct { /* anonymous struct for BPF_BTF_LOAD */
+		__aligned_u64	btf;
+		__aligned_u64	btf_log_buf;
+		__u32		btf_size;
+		__u32		btf_log_size;
+		__u32		btf_log_level;
+	};
+
+	struct {
+		__u32		pid;		/* input: pid */
+		__u32		fd;		/* input: fd */
+		__u32		flags;		/* input: flags */
+		__u32		buf_len;	/* input/output: buf len */
+		__aligned_u64	buf;		/* input/output:
+						 *   tp_name for tracepoint
+						 *   symbol for kprobe
+						 *   filename for uprobe
+						 */
+		__u32		prog_id;	/* output: prod_id */
+		__u32		fd_type;	/* output: BPF_FD_TYPE_* */
+		__u64		probe_offset;	/* output: probe_offset */
+		__u64		probe_addr;	/* output: probe_addr */
+	} task_fd_query;
 } __attribute__((aligned(8)));
 
-/* BPF helper function descriptions:
- *
- * void *bpf_map_lookup_elem(&map, &key)
- *     Return: Map value or NULL
- *
- * int bpf_map_update_elem(&map, &key, &value, flags)
- *     Return: 0 on success or negative error
- *
- * int bpf_map_delete_elem(&map, &key)
- *     Return: 0 on success or negative error
- *
- * int bpf_probe_read(void *dst, int size, void *src)
- *     Return: 0 on success or negative error
+/* The description below is an attempt at providing documentation to eBPF
+ * developers about the multiple available eBPF helper functions. It can be
+ * parsed and used to produce a manual page. The workflow is the following,
+ * and requires the rst2man utility:
+ *
+ *     $ ./scripts/bpf_helpers_doc.py \
+ *             --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
+ *     $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
+ *     $ man /tmp/bpf-helpers.7
+ *
+ * Note that in order to produce this external documentation, some RST
+ * formatting is used in the descriptions to get "bold" and "italics" in
+ * manual pages. Also note that the few trailing white spaces are
+ * intentional, removing them would break paragraphs for rst2man.
+ *
+ * Start of BPF helper function descriptions:
+ *
+ * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+ * 	Description
+ * 		Perform a lookup in *map* for an entry associated to *key*.
+ * 	Return
+ * 		Map value associated to *key*, or **NULL** if no entry was
+ * 		found.
+ *
+ * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+ * 	Description
+ * 		Add or update the value of the entry associated to *key* in
+ * 		*map* with *value*. *flags* is one of:
+ *
+ * 		**BPF_NOEXIST**
+ * 			The entry for *key* must not exist in the map.
+ * 		**BPF_EXIST**
+ * 			The entry for *key* must already exist in the map.
+ * 		**BPF_ANY**
+ * 			No condition on the existence of the entry for *key*.
+ *
+ * 		Flag value **BPF_NOEXIST** cannot be used for maps of types
+ * 		**BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY**  (all
+ * 		elements always exist), the helper would return an error.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
+ * 	Description
+ * 		Delete entry with *key* from *map*.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * 	Description
+ * 		Push an element *value* in *map*. *flags* is one of:
+ *
+ * 		**BPF_EXIST**
+ * 		If the queue/stack is full, the oldest element is removed to
+ * 		make room for this.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * 	Description
+ * 		Pop an element from *map*.
+ * Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * 	Description
+ * 		Get an element from *map* without removing it.
+ * Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read(void *dst, u32 size, const void *src)
+ * 	Description
+ * 		For tracing programs, safely attempt to read *size* bytes from
+ * 		address *src* and store the data in *dst*.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
  *
  * u64 bpf_ktime_get_ns(void)
- *     Return: current ktime
- *
- * int bpf_trace_printk(const char *fmt, int fmt_size, ...)
- *     Return: length of buffer written or negative error
- *
- * u32 bpf_prandom_u32(void)
- *     Return: random value
- *
- * u32 bpf_raw_smp_processor_id(void)
- *     Return: SMP processor ID
- *
- * int bpf_skb_store_bytes(skb, offset, from, len, flags)
- *     store bytes into packet
- *     @skb: pointer to skb
- *     @offset: offset within packet from skb->mac_header
- *     @from: pointer where to copy bytes from
- *     @len: number of bytes to store into packet
- *     @flags: bit 0 - if true, recompute skb->csum
- *             other bits - reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_l3_csum_replace(skb, offset, from, to, flags)
- *     recompute IP checksum
- *     @skb: pointer to skb
- *     @offset: offset within packet where IP checksum is located
- *     @from: old value of header field
- *     @to: new value of header field
- *     @flags: bits 0-3 - size of header field
- *             other bits - reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_l4_csum_replace(skb, offset, from, to, flags)
- *     recompute TCP/UDP checksum
- *     @skb: pointer to skb
- *     @offset: offset within packet where TCP/UDP checksum is located
- *     @from: old value of header field
- *     @to: new value of header field
- *     @flags: bits 0-3 - size of header field
- *             bit 4 - is pseudo header
- *             other bits - reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_tail_call(ctx, prog_array_map, index)
- *     jump into another BPF program
- *     @ctx: context pointer passed to next program
- *     @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
- *     @index: 32-bit index inside array that selects specific program to run
- *     Return: 0 on success or negative error
- *
- * int bpf_clone_redirect(skb, ifindex, flags)
- *     redirect to another netdev
- *     @skb: pointer to skb
- *     @ifindex: ifindex of the net device
- *     @flags: bit 0 - if set, redirect to ingress instead of egress
- *             other bits - reserved
- *     Return: 0 on success or negative error
+ * 	Description
+ * 		Return the time elapsed since system boot, in nanoseconds.
+ * 	Return
+ * 		Current *ktime*.
+ *
+ * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
+ * 	Description
+ * 		This helper is a "printk()-like" facility for debugging. It
+ * 		prints a message defined by format *fmt* (of size *fmt_size*)
+ * 		to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+ * 		available. It can take up to three additional **u64**
+ * 		arguments (as an eBPF helpers, the total number of arguments is
+ * 		limited to five).
+ *
+ * 		Each time the helper is called, it appends a line to the trace.
+ * 		The format of the trace is customizable, and the exact output
+ * 		one will get depends on the options set in
+ * 		*\/sys/kernel/debug/tracing/trace_options* (see also the
+ * 		*README* file under the same directory). However, it usually
+ * 		defaults to something like:
+ *
+ * 		::
+ *
+ * 			telnet-470   [001] .N.. 419421.045894: 0x00000001: <formatted msg>
+ *
+ * 		In the above:
+ *
+ * 			* ``telnet`` is the name of the current task.
+ * 			* ``470`` is the PID of the current task.
+ * 			* ``001`` is the CPU number on which the task is
+ * 			  running.
+ * 			* In ``.N..``, each character refers to a set of
+ * 			  options (whether irqs are enabled, scheduling
+ * 			  options, whether hard/softirqs are running, level of
+ * 			  preempt_disabled respectively). **N** means that
+ * 			  **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
+ * 			  are set.
+ * 			* ``419421.045894`` is a timestamp.
+ * 			* ``0x00000001`` is a fake value used by BPF for the
+ * 			  instruction pointer register.
+ * 			* ``<formatted msg>`` is the message formatted with
+ * 			  *fmt*.
+ *
+ * 		The conversion specifiers supported by *fmt* are similar, but
+ * 		more limited than for printk(). They are **%d**, **%i**,
+ * 		**%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
+ * 		**%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
+ * 		of field, padding with zeroes, etc.) is available, and the
+ * 		helper will return **-EINVAL** (but print nothing) if it
+ * 		encounters an unknown specifier.
+ *
+ * 		Also, note that **bpf_trace_printk**\ () is slow, and should
+ * 		only be used for debugging purposes. For this reason, a notice
+ * 		bloc (spanning several lines) is printed to kernel logs and
+ * 		states that the helper should not be used "for production use"
+ * 		the first time this helper is used (or more precisely, when
+ * 		**trace_printk**\ () buffers are allocated). For passing values
+ * 		to user space, perf events should be preferred.
+ * 	Return
+ * 		The number of bytes written to the buffer, or a negative error
+ * 		in case of failure.
+ *
+ * u32 bpf_get_prandom_u32(void)
+ * 	Description
+ * 		Get a pseudo-random number.
+ *
+ * 		From a security point of view, this helper uses its own
+ * 		pseudo-random internal state, and cannot be used to infer the
+ * 		seed of other random functions in the kernel. However, it is
+ * 		essential to note that the generator used by the helper is not
+ * 		cryptographically secure.
+ * 	Return
+ * 		A random 32-bit unsigned value.
+ *
+ * u32 bpf_get_smp_processor_id(void)
+ * 	Description
+ * 		Get the SMP (symmetric multiprocessing) processor id. Note that
+ * 		all programs run with preemption disabled, which means that the
+ * 		SMP processor id is stable during all the execution of the
+ * 		program.
+ * 	Return
+ * 		The SMP id of the processor running the program.
+ *
+ * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
+ * 	Description
+ * 		Store *len* bytes from address *from* into the packet
+ * 		associated to *skb*, at *offset*. *flags* are a combination of
+ * 		**BPF_F_RECOMPUTE_CSUM** (automatically recompute the
+ * 		checksum for the packet after storing the bytes) and
+ * 		**BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
+ * 		**->swhash** and *skb*\ **->l4hash** to 0).
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
+ * 	Description
+ * 		Recompute the layer 3 (e.g. IP) checksum for the packet
+ * 		associated to *skb*. Computation is incremental, so the helper
+ * 		must know the former value of the header field that was
+ * 		modified (*from*), the new value of this field (*to*), and the
+ * 		number of bytes (2 or 4) for this field, stored in *size*.
+ * 		Alternatively, it is possible to store the difference between
+ * 		the previous and the new values of the header field in *to*, by
+ * 		setting *from* and *size* to 0. For both methods, *offset*
+ * 		indicates the location of the IP checksum within the packet.
+ *
+ * 		This helper works in combination with **bpf_csum_diff**\ (),
+ * 		which does not update the checksum in-place, but offers more
+ * 		flexibility and can handle sizes larger than 2 or 4 for the
+ * 		checksum to update.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
+ * 	Description
+ * 		Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
+ * 		packet associated to *skb*. Computation is incremental, so the
+ * 		helper must know the former value of the header field that was
+ * 		modified (*from*), the new value of this field (*to*), and the
+ * 		number of bytes (2 or 4) for this field, stored on the lowest
+ * 		four bits of *flags*. Alternatively, it is possible to store
+ * 		the difference between the previous and the new values of the
+ * 		header field in *to*, by setting *from* and the four lowest
+ * 		bits of *flags* to 0. For both methods, *offset* indicates the
+ * 		location of the IP checksum within the packet. In addition to
+ * 		the size of the field, *flags* can be added (bitwise OR) actual
+ * 		flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left
+ * 		untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
+ * 		for updates resulting in a null checksum the value is set to
+ * 		**CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
+ * 		the checksum is to be computed against a pseudo-header.
+ *
+ * 		This helper works in combination with **bpf_csum_diff**\ (),
+ * 		which does not update the checksum in-place, but offers more
+ * 		flexibility and can handle sizes larger than 2 or 4 for the
+ * 		checksum to update.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
+ * 	Description
+ * 		This special helper is used to trigger a "tail call", or in
+ * 		other words, to jump into another eBPF program. The same stack
+ * 		frame is used (but values on stack and in registers for the
+ * 		caller are not accessible to the callee). This mechanism allows
+ * 		for program chaining, either for raising the maximum number of
+ * 		available eBPF instructions, or to execute given programs in
+ * 		conditional blocks. For security reasons, there is an upper
+ * 		limit to the number of successive tail calls that can be
+ * 		performed.
+ *
+ * 		Upon call of this helper, the program attempts to jump into a
+ * 		program referenced at index *index* in *prog_array_map*, a
+ * 		special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes
+ * 		*ctx*, a pointer to the context.
+ *
+ * 		If the call succeeds, the kernel immediately runs the first
+ * 		instruction of the new program. This is not a function call,
+ * 		and it never returns to the previous program. If the call
+ * 		fails, then the helper has no effect, and the caller continues
+ * 		to run its subsequent instructions. A call can fail if the
+ * 		destination program for the jump does not exist (i.e. *index*
+ * 		is superior to the number of entries in *prog_array_map*), or
+ * 		if the maximum number of tail calls has been reached for this
+ * 		chain of programs. This limit is defined in the kernel by the
+ * 		macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
+ * 		which is currently set to 32.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
+ * 	Description
+ * 		Clone and redirect the packet associated to *skb* to another
+ * 		net device of index *ifindex*. Both ingress and egress
+ * 		interfaces can be used for redirection. The **BPF_F_INGRESS**
+ * 		value in *flags* is used to make the distinction (ingress path
+ * 		is selected if the flag is present, egress path otherwise).
+ * 		This is the only flag supported for now.
+ *
+ * 		In comparison with **bpf_redirect**\ () helper,
+ * 		**bpf_clone_redirect**\ () has the associated cost of
+ * 		duplicating the packet buffer, but this can be executed out of
+ * 		the eBPF program. Conversely, **bpf_redirect**\ () is more
+ * 		efficient, but it is handled through an action code where the
+ * 		redirection happens only after the eBPF program has returned.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
  *
  * u64 bpf_get_current_pid_tgid(void)
- *     Return: current->tgid << 32 | current->pid
+ * 	Return
+ * 		A 64-bit integer containing the current tgid and pid, and
+ * 		created as such:
+ * 		*current_task*\ **->tgid << 32 \|**
+ * 		*current_task*\ **->pid**.
  *
  * u64 bpf_get_current_uid_gid(void)
- *     Return: current_gid << 32 | current_uid
- *
- * int bpf_get_current_comm(char *buf, int size_of_buf)
- *     stores current->comm into buf
- *     Return: 0 on success or negative error
- *
- * u32 bpf_get_cgroup_classid(skb)
- *     retrieve a proc's classid
- *     @skb: pointer to skb
- *     Return: classid if != 0
- *
- * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci)
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_vlan_pop(skb)
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_get_tunnel_key(skb, key, size, flags)
- * int bpf_skb_set_tunnel_key(skb, key, size, flags)
- *     retrieve or populate tunnel metadata
- *     @skb: pointer to skb
- *     @key: pointer to 'struct bpf_tunnel_key'
- *     @size: size of 'struct bpf_tunnel_key'
- *     @flags: room for future extensions
- *     Return: 0 on success or negative error
- *
- * u64 bpf_perf_event_read(map, flags)
- *     read perf event counter value
- *     @map: pointer to perf_event_array map
- *     @flags: index of event in the map or bitmask flags
- *     Return: value of perf event counter read or error code
- *
- * int bpf_redirect(ifindex, flags)
- *     redirect to another netdev
- *     @ifindex: ifindex of the net device
- *     @flags:
- *	  cls_bpf:
- *          bit 0 - if set, redirect to ingress instead of egress
- *          other bits - reserved
- *	  xdp_bpf:
- *	    all bits - reserved
- *     Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
- *	       xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
- * int bpf_redirect_map(map, key, flags)
- *     redirect to endpoint in map
- *     @map: pointer to dev map
- *     @key: index in map to lookup
- *     @flags: --
- *     Return: XDP_REDIRECT on success or XDP_ABORT on error
- *
- * u32 bpf_get_route_realm(skb)
- *     retrieve a dst's tclassid
- *     @skb: pointer to skb
- *     Return: realm if != 0
- *
- * int bpf_perf_event_output(ctx, map, flags, data, size)
- *     output perf raw sample
- *     @ctx: struct pt_regs*
- *     @map: pointer to perf_event_array map
- *     @flags: index of event in the map or bitmask flags
- *     @data: data on stack to be output as raw data
- *     @size: size of data
- *     Return: 0 on success or negative error
- *
- * int bpf_get_stackid(ctx, map, flags)
- *     walk user or kernel stack and return id
- *     @ctx: struct pt_regs*
- *     @map: pointer to stack_trace map
- *     @flags: bits 0-7 - numer of stack frames to skip
- *             bit 8 - collect user stack instead of kernel
- *             bit 9 - compare stacks by hash only
- *             bit 10 - if two different stacks hash into the same stackid
- *                      discard old
- *             other bits - reserved
- *     Return: >= 0 stackid on success or negative error
- *
- * s64 bpf_csum_diff(from, from_size, to, to_size, seed)
- *     calculate csum diff
- *     @from: raw from buffer
- *     @from_size: length of from buffer
- *     @to: raw to buffer
- *     @to_size: length of to buffer
- *     @seed: optional seed
- *     Return: csum result or negative error code
- *
- * int bpf_skb_get_tunnel_opt(skb, opt, size)
- *     retrieve tunnel options metadata
- *     @skb: pointer to skb
- *     @opt: pointer to raw tunnel option data
- *     @size: size of @opt
- *     Return: option size
- *
- * int bpf_skb_set_tunnel_opt(skb, opt, size)
- *     populate tunnel options metadata
- *     @skb: pointer to skb
- *     @opt: pointer to raw tunnel option data
- *     @size: size of @opt
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_change_proto(skb, proto, flags)
- *     Change protocol of the skb. Currently supported is v4 -> v6,
- *     v6 -> v4 transitions. The helper will also resize the skb. eBPF
- *     program is expected to fill the new headers via skb_store_bytes
- *     and lX_csum_replace.
- *     @skb: pointer to skb
- *     @proto: new skb->protocol type
- *     @flags: reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_change_type(skb, type)
- *     Change packet type of skb.
- *     @skb: pointer to skb
- *     @type: new skb->pkt_type type
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_under_cgroup(skb, map, index)
- *     Check cgroup2 membership of skb
- *     @skb: pointer to skb
- *     @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
- *     @index: index of the cgroup in the bpf_map
- *     Return:
- *       == 0 skb failed the cgroup2 descendant test
- *       == 1 skb succeeded the cgroup2 descendant test
- *        < 0 error
- *
- * u32 bpf_get_hash_recalc(skb)
- *     Retrieve and possibly recalculate skb->hash.
- *     @skb: pointer to skb
- *     Return: hash
+ * 	Return
+ * 		A 64-bit integer containing the current GID and UID, and
+ * 		created as such: *current_gid* **<< 32 \|** *current_uid*.
+ *
+ * int bpf_get_current_comm(char *buf, u32 size_of_buf)
+ * 	Description
+ * 		Copy the **comm** attribute of the current task into *buf* of
+ * 		*size_of_buf*. The **comm** attribute contains the name of
+ * 		the executable (excluding the path) for the current task. The
+ * 		*size_of_buf* must be strictly positive. On success, the
+ * 		helper makes sure that the *buf* is NUL-terminated. On failure,
+ * 		it is filled with zeroes.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * u32 bpf_get_cgroup_classid(struct sk_buff *skb)
+ * 	Description
+ * 		Retrieve the classid for the current task, i.e. for the net_cls
+ * 		cgroup to which *skb* belongs.
+ *
+ * 		This helper can be used on TC egress path, but not on ingress.
+ *
+ * 		The net_cls cgroup provides an interface to tag network packets
+ * 		based on a user-provided identifier for all traffic coming from
+ * 		the tasks belonging to the related cgroup. See also the related
+ * 		kernel documentation, available from the Linux sources in file
+ * 		*Documentation/cgroup-v1/net_cls.txt*.
+ *
+ * 		The Linux kernel has two versions for cgroups: there are
+ * 		cgroups v1 and cgroups v2. Both are available to users, who can
+ * 		use a mixture of them, but note that the net_cls cgroup is for
+ * 		cgroup v1 only. This makes it incompatible with BPF programs
+ * 		run on cgroups, which is a cgroup-v2-only feature (a socket can
+ * 		only hold data for one version of cgroups at a time).
+ *
+ * 		This helper is only available is the kernel was compiled with
+ * 		the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
+ * 		"**y**" or to "**m**".
+ * 	Return
+ * 		The classid, or 0 for the default unconfigured classid.
+ *
+ * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
+ * 	Description
+ * 		Push a *vlan_tci* (VLAN tag control information) of protocol
+ * 		*vlan_proto* to the packet associated to *skb*, then update
+ * 		the checksum. Note that if *vlan_proto* is different from
+ * 		**ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
+ * 		be **ETH_P_8021Q**.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_vlan_pop(struct sk_buff *skb)
+ * 	Description
+ * 		Pop a VLAN header from the packet associated to *skb*.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * 	Description
+ * 		Get tunnel metadata. This helper takes a pointer *key* to an
+ * 		empty **struct bpf_tunnel_key** of **size**, that will be
+ * 		filled with tunnel metadata for the packet associated to *skb*.
+ * 		The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
+ * 		indicates that the tunnel is based on IPv6 protocol instead of
+ * 		IPv4.
+ *
+ * 		The **struct bpf_tunnel_key** is an object that generalizes the
+ * 		principal parameters used by various tunneling protocols into a
+ * 		single struct. This way, it can be used to easily make a
+ * 		decision based on the contents of the encapsulation header,
+ * 		"summarized" in this struct. In particular, it holds the IP
+ * 		address of the remote end (IPv4 or IPv6, depending on the case)
+ * 		in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also,
+ * 		this struct exposes the *key*\ **->tunnel_id**, which is
+ * 		generally mapped to a VNI (Virtual Network Identifier), making
+ * 		it programmable together with the **bpf_skb_set_tunnel_key**\
+ * 		() helper.
+ *
+ * 		Let's imagine that the following code is part of a program
+ * 		attached to the TC ingress interface, on one end of a GRE
+ * 		tunnel, and is supposed to filter out all messages coming from
+ * 		remote ends with IPv4 address other than 10.0.0.1:
+ *
+ * 		::
+ *
+ * 			int ret;
+ * 			struct bpf_tunnel_key key = {};
+ * 			
+ * 			ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ * 			if (ret < 0)
+ * 				return TC_ACT_SHOT;	// drop packet
+ * 			
+ * 			if (key.remote_ipv4 != 0x0a000001)
+ * 				return TC_ACT_SHOT;	// drop packet
+ * 			
+ * 			return TC_ACT_OK;		// accept packet
+ *
+ * 		This interface can also be used with all encapsulation devices
+ * 		that can operate in "collect metadata" mode: instead of having
+ * 		one network device per specific configuration, the "collect
+ * 		metadata" mode only requires a single device where the
+ * 		configuration can be extracted from this helper.
+ *
+ * 		This can be used together with various tunnels such as VXLan,
+ * 		Geneve, GRE or IP in IP (IPIP).
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * 	Description
+ * 		Populate tunnel metadata for packet associated to *skb.* The
+ * 		tunnel metadata is set to the contents of *key*, of *size*. The
+ * 		*flags* can be set to a combination of the following values:
+ *
+ * 		**BPF_F_TUNINFO_IPV6**
+ * 			Indicate that the tunnel is based on IPv6 protocol
+ * 			instead of IPv4.
+ * 		**BPF_F_ZERO_CSUM_TX**
+ * 			For IPv4 packets, add a flag to tunnel metadata
+ * 			indicating that checksum computation should be skipped
+ * 			and checksum set to zeroes.
+ * 		**BPF_F_DONT_FRAGMENT**
+ * 			Add a flag to tunnel metadata indicating that the
+ * 			packet should not be fragmented.
+ * 		**BPF_F_SEQ_NUMBER**
+ * 			Add a flag to tunnel metadata indicating that a
+ * 			sequence number should be added to tunnel header before
+ * 			sending the packet. This flag was added for GRE
+ * 			encapsulation, but might be used with other protocols
+ * 			as well in the future.
+ *
+ * 		Here is a typical usage on the transmit path:
+ *
+ * 		::
+ *
+ * 			struct bpf_tunnel_key key;
+ * 			     populate key ...
+ * 			bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ * 			bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
+ *
+ * 		See also the description of the **bpf_skb_get_tunnel_key**\ ()
+ * 		helper for additional information.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags)
+ * 	Description
+ * 		Read the value of a perf event counter. This helper relies on a
+ * 		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
+ * 		the perf event counter is selected when *map* is updated with
+ * 		perf event file descriptors. The *map* is an array whose size
+ * 		is the number of available CPUs, and each cell contains a value
+ * 		relative to one CPU. The value to retrieve is indicated by
+ * 		*flags*, that contains the index of the CPU to look up, masked
+ * 		with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * 		**BPF_F_CURRENT_CPU** to indicate that the value for the
+ * 		current CPU should be retrieved.
+ *
+ * 		Note that before Linux 4.13, only hardware perf event can be
+ * 		retrieved.
+ *
+ * 		Also, be aware that the newer helper
+ * 		**bpf_perf_event_read_value**\ () is recommended over
+ * 		**bpf_perf_event_read**\ () in general. The latter has some ABI
+ * 		quirks where error and counter value are used as a return code
+ * 		(which is wrong to do since ranges may overlap). This issue is
+ * 		fixed with **bpf_perf_event_read_value**\ (), which at the same
+ * 		time provides more features over the **bpf_perf_event_read**\
+ * 		() interface. Please refer to the description of
+ * 		**bpf_perf_event_read_value**\ () for details.
+ * 	Return
+ * 		The value of the perf event counter read from the map, or a
+ * 		negative error code in case of failure.
+ *
+ * int bpf_redirect(u32 ifindex, u64 flags)
+ * 	Description
+ * 		Redirect the packet to another net device of index *ifindex*.
+ * 		This helper is somewhat similar to **bpf_clone_redirect**\
+ * 		(), except that the packet is not cloned, which provides
+ * 		increased performance.
+ *
+ * 		Except for XDP, both ingress and egress interfaces can be used
+ * 		for redirection. The **BPF_F_INGRESS** value in *flags* is used
+ * 		to make the distinction (ingress path is selected if the flag
+ * 		is present, egress path otherwise). Currently, XDP only
+ * 		supports redirection to the egress interface, and accepts no
+ * 		flag at all.
+ *
+ * 		The same effect can be attained with the more generic
+ * 		**bpf_redirect_map**\ (), which requires specific maps to be
+ * 		used but offers better performance.
+ * 	Return
+ * 		For XDP, the helper returns **XDP_REDIRECT** on success or
+ * 		**XDP_ABORTED** on error. For other program types, the values
+ * 		are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on
+ * 		error.
+ *
+ * u32 bpf_get_route_realm(struct sk_buff *skb)
+ * 	Description
+ * 		Retrieve the realm or the route, that is to say the
+ * 		**tclassid** field of the destination for the *skb*. The
+ * 		indentifier retrieved is a user-provided tag, similar to the
+ * 		one used with the net_cls cgroup (see description for
+ * 		**bpf_get_cgroup_classid**\ () helper), but here this tag is
+ * 		held by a route (a destination entry), not by a task.
+ *
+ * 		Retrieving this identifier works with the clsact TC egress hook
+ * 		(see also **tc-bpf(8)**), or alternatively on conventional
+ * 		classful egress qdiscs, but not on TC ingress path. In case of
+ * 		clsact TC egress hook, this has the advantage that, internally,
+ * 		the destination entry has not been dropped yet in the transmit
+ * 		path. Therefore, the destination entry does not need to be
+ * 		artificially held via **netif_keep_dst**\ () for a classful
+ * 		qdisc until the *skb* is freed.
+ *
+ * 		This helper is available only if the kernel was compiled with
+ * 		**CONFIG_IP_ROUTE_CLASSID** configuration option.
+ * 	Return
+ * 		The realm of the route for the packet associated to *skb*, or 0
+ * 		if none was found.
+ *
+ * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * 	Description
+ * 		Write raw *data* blob into a special BPF perf event held by
+ * 		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * 		event must have the following attributes: **PERF_SAMPLE_RAW**
+ * 		as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * 		**PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * 		The *flags* are used to indicate the index in *map* for which
+ * 		the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * 		Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * 		to indicate that the index of the current CPU core should be
+ * 		used.
+ *
+ * 		The value to write, of *size*, is passed through eBPF stack and
+ * 		pointed by *data*.
+ *
+ * 		The context of the program *ctx* needs also be passed to the
+ * 		helper.
+ *
+ * 		On user space, a program willing to read the values needs to
+ * 		call **perf_event_open**\ () on the perf event (either for
+ * 		one or for all CPUs) and to store the file descriptor into the
+ * 		*map*. This must be done before the eBPF program can send data
+ * 		into it. An example is available in file
+ * 		*samples/bpf/trace_output_user.c* in the Linux kernel source
+ * 		tree (the eBPF program counterpart is in
+ * 		*samples/bpf/trace_output_kern.c*).
+ *
+ * 		**bpf_perf_event_output**\ () achieves better performance
+ * 		than **bpf_trace_printk**\ () for sharing data with user
+ * 		space, and is much better suitable for streaming data from eBPF
+ * 		programs.
+ *
+ * 		Note that this helper is not restricted to tracing use cases
+ * 		and can be used with programs attached to TC or XDP as well,
+ * 		where it allows for passing data to user space listeners. Data
+ * 		can be:
+ *
+ * 		* Only custom structs,
+ * 		* Only the packet payload, or
+ * 		* A combination of both.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
+ * 	Description
+ * 		This helper was provided as an easy way to load data from a
+ * 		packet. It can be used to load *len* bytes from *offset* from
+ * 		the packet associated to *skb*, into the buffer pointed by
+ * 		*to*.
+ *
+ * 		Since Linux 4.7, usage of this helper has mostly been replaced
+ * 		by "direct packet access", enabling packet data to be
+ * 		manipulated with *skb*\ **->data** and *skb*\ **->data_end**
+ * 		pointing respectively to the first byte of packet data and to
+ * 		the byte after the last byte of packet data. However, it
+ * 		remains useful if one wishes to read large quantities of data
+ * 		at once from a packet into the eBPF stack.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags)
+ * 	Description
+ * 		Walk a user or a kernel stack and return its id. To achieve
+ * 		this, the helper needs *ctx*, which is a pointer to the context
+ * 		on which the tracing program is executed, and a pointer to a
+ * 		*map* of type **BPF_MAP_TYPE_STACK_TRACE**.
+ *
+ * 		The last argument, *flags*, holds the number of stack frames to
+ * 		skip (from 0 to 255), masked with
+ * 		**BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * 		a combination of the following flags:
+ *
+ * 		**BPF_F_USER_STACK**
+ * 			Collect a user space stack instead of a kernel stack.
+ * 		**BPF_F_FAST_STACK_CMP**
+ * 			Compare stacks by hash only.
+ * 		**BPF_F_REUSE_STACKID**
+ * 			If two different stacks hash into the same *stackid*,
+ * 			discard the old one.
+ *
+ * 		The stack id retrieved is a 32 bit long integer handle which
+ * 		can be further combined with other data (including other stack
+ * 		ids) and used as a key into maps. This can be useful for
+ * 		generating a variety of graphs (such as flame graphs or off-cpu
+ * 		graphs).
+ *
+ * 		For walking a stack, this helper is an improvement over
+ * 		**bpf_probe_read**\ (), which can be used with unrolled loops
+ * 		but is not efficient and consumes a lot of eBPF instructions.
+ * 		Instead, **bpf_get_stackid**\ () can collect up to
+ * 		**PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that
+ * 		this limit can be controlled with the **sysctl** program, and
+ * 		that it should be manually increased in order to profile long
+ * 		user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * 		::
+ *
+ * 			# sysctl kernel.perf_event_max_stack=<new value>
+ * 	Return
+ * 		The positive or null stack id on success, or a negative error
+ * 		in case of failure.
+ *
+ * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed)
+ * 	Description
+ * 		Compute a checksum difference, from the raw buffer pointed by
+ * 		*from*, of length *from_size* (that must be a multiple of 4),
+ * 		towards the raw buffer pointed by *to*, of size *to_size*
+ * 		(same remark). An optional *seed* can be added to the value
+ * 		(this can be cascaded, the seed may come from a previous call
+ * 		to the helper).
+ *
+ * 		This is flexible enough to be used in several ways:
+ *
+ * 		* With *from_size* == 0, *to_size* > 0 and *seed* set to
+ * 		  checksum, it can be used when pushing new data.
+ * 		* With *from_size* > 0, *to_size* == 0 and *seed* set to
+ * 		  checksum, it can be used when removing data from a packet.
+ * 		* With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it
+ * 		  can be used to compute a diff. Note that *from_size* and
+ * 		  *to_size* do not need to be equal.
+ *
+ * 		This helper can be used in combination with
+ * 		**bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to
+ * 		which one can feed in the difference computed with
+ * 		**bpf_csum_diff**\ ().
+ * 	Return
+ * 		The checksum result, or a negative error code in case of
+ * 		failure.
+ *
+ * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ * 	Description
+ * 		Retrieve tunnel options metadata for the packet associated to
+ * 		*skb*, and store the raw tunnel option data to the buffer *opt*
+ * 		of *size*.
+ *
+ * 		This helper can be used with encapsulation devices that can
+ * 		operate in "collect metadata" mode (please refer to the related
+ * 		note in the description of **bpf_skb_get_tunnel_key**\ () for
+ * 		more details). A particular example where this can be used is
+ * 		in combination with the Geneve encapsulation protocol, where it
+ * 		allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper)
+ * 		and retrieving arbitrary TLVs (Type-Length-Value headers) from
+ * 		the eBPF program. This allows for full customization of these
+ * 		headers.
+ * 	Return
+ * 		The size of the option data retrieved.
+ *
+ * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ * 	Description
+ * 		Set tunnel options metadata for the packet associated to *skb*
+ * 		to the option data contained in the raw buffer *opt* of *size*.
+ *
+ * 		See also the description of the **bpf_skb_get_tunnel_opt**\ ()
+ * 		helper for additional information.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
+ * 	Description
+ * 		Change the protocol of the *skb* to *proto*. Currently
+ * 		supported are transition from IPv4 to IPv6, and from IPv6 to
+ * 		IPv4. The helper takes care of the groundwork for the
+ * 		transition, including resizing the socket buffer. The eBPF
+ * 		program is expected to fill the new headers, if any, via
+ * 		**skb_store_bytes**\ () and to recompute the checksums with
+ * 		**bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\
+ * 		(). The main case for this helper is to perform NAT64
+ * 		operations out of an eBPF program.
+ *
+ * 		Internally, the GSO type is marked as dodgy so that headers are
+ * 		checked and segments are recalculated by the GSO/GRO engine.
+ * 		The size for GSO target is adapted as well.
+ *
+ * 		All values for *flags* are reserved for future usage, and must
+ * 		be left at zero.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
+ * 	Description
+ * 		Change the packet type for the packet associated to *skb*. This
+ * 		comes down to setting *skb*\ **->pkt_type** to *type*, except
+ * 		the eBPF program does not have a write access to *skb*\
+ * 		**->pkt_type** beside this helper. Using a helper here allows
+ * 		for graceful handling of errors.
+ *
+ * 		The major use case is to change incoming *skb*s to
+ * 		**PACKET_HOST** in a programmatic way instead of having to
+ * 		recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for
+ * 		example.
+ *
+ * 		Note that *type* only allows certain values. At this time, they
+ * 		are:
+ *
+ * 		**PACKET_HOST**
+ * 			Packet is for us.
+ * 		**PACKET_BROADCAST**
+ * 			Send packet to all.
+ * 		**PACKET_MULTICAST**
+ * 			Send packet to group.
+ * 		**PACKET_OTHERHOST**
+ * 			Send packet to someone else.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
+ * 	Description
+ * 		Check whether *skb* is a descendant of the cgroup2 held by
+ * 		*map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ * 	Return
+ * 		The return value depends on the result of the test, and can be:
+ *
+ * 		* 0, if the *skb* failed the cgroup2 descendant test.
+ * 		* 1, if the *skb* succeeded the cgroup2 descendant test.
+ * 		* A negative error code, if an error occurred.
+ *
+ * u32 bpf_get_hash_recalc(struct sk_buff *skb)
+ * 	Description
+ * 		Retrieve the hash of the packet, *skb*\ **->hash**. If it is
+ * 		not set, in particular if the hash was cleared due to mangling,
+ * 		recompute this hash. Later accesses to the hash can be done
+ * 		directly with *skb*\ **->hash**.
+ *
+ * 		Calling **bpf_set_hash_invalid**\ (), changing a packet
+ * 		prototype with **bpf_skb_change_proto**\ (), or calling
+ * 		**bpf_skb_store_bytes**\ () with the
+ * 		**BPF_F_INVALIDATE_HASH** are actions susceptible to clear
+ * 		the hash and to trigger a new computation for the next call to
+ * 		**bpf_get_hash_recalc**\ ().
+ * 	Return
+ * 		The 32-bit hash.
  *
  * u64 bpf_get_current_task(void)
- *     Returns current task_struct
- *     Return: current
- *
- * int bpf_probe_write_user(void *dst, void *src, int len)
- *     safely attempt to write to a location
- *     @dst: destination address in userspace
- *     @src: source address on stack
- *     @len: number of bytes to copy
- *     Return: 0 on success or negative error
- *
- * int bpf_current_task_under_cgroup(map, index)
- *     Check cgroup2 membership of current task
- *     @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
- *     @index: index of the cgroup in the bpf_map
- *     Return:
- *       == 0 current failed the cgroup2 descendant test
- *       == 1 current succeeded the cgroup2 descendant test
- *        < 0 error
- *
- * int bpf_skb_change_tail(skb, len, flags)
- *     The helper will resize the skb to the given new size, to be used f.e.
- *     with control messages.
- *     @skb: pointer to skb
- *     @len: new skb length
- *     @flags: reserved
- *     Return: 0 on success or negative error
- *
- * int bpf_skb_pull_data(skb, len)
- *     The helper will pull in non-linear data in case the skb is non-linear
- *     and not all of len are part of the linear section. Only needed for
- *     read/write with direct packet access.
- *     @skb: pointer to skb
- *     @len: len to make read/writeable
- *     Return: 0 on success or negative error
- *
- * s64 bpf_csum_update(skb, csum)
- *     Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
- *     @skb: pointer to skb
- *     @csum: csum to add
- *     Return: csum on success or negative error
- *
- * void bpf_set_hash_invalid(skb)
- *     Invalidate current skb->hash.
- *     @skb: pointer to skb
- *
- * int bpf_get_numa_node_id()
- *     Return: Id of current NUMA node.
- *
- * int bpf_skb_change_head()
- *     Grows headroom of skb and adjusts MAC header offset accordingly.
- *     Will extends/reallocae as required automatically.
- *     May change skb data pointer and will thus invalidate any check
- *     performed for direct packet access.
- *     @skb: pointer to skb
- *     @len: length of header to be pushed in front
- *     @flags: Flags (unused for now)
- *     Return: 0 on success or negative error
- *
- * int bpf_xdp_adjust_head(xdp_md, delta)
- *     Adjust the xdp_md.data by delta
- *     @xdp_md: pointer to xdp_md
- *     @delta: An positive/negative integer to be added to xdp_md.data
- *     Return: 0 on success or negative on error
+ * 	Return
+ * 		A pointer to the current task struct.
+ *
+ * int bpf_probe_write_user(void *dst, const void *src, u32 len)
+ * 	Description
+ * 		Attempt in a safe way to write *len* bytes from the buffer
+ * 		*src* to *dst* in memory. It only works for threads that are in
+ * 		user context, and *dst* must be a valid user space address.
+ *
+ * 		This helper should not be used to implement any kind of
+ * 		security mechanism because of TOC-TOU attacks, but rather to
+ * 		debug, divert, and manipulate execution of semi-cooperative
+ * 		processes.
+ *
+ * 		Keep in mind that this feature is meant for experiments, and it
+ * 		has a risk of crashing the system and running programs.
+ * 		Therefore, when an eBPF program using this helper is attached,
+ * 		a warning including PID and process name is printed to kernel
+ * 		logs.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
+ * 	Description
+ * 		Check whether the probe is being run is the context of a given
+ * 		subset of the cgroup2 hierarchy. The cgroup2 to test is held by
+ * 		*map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ * 	Return
+ * 		The return value depends on the result of the test, and can be:
+ *
+ * 		* 0, if the *skb* task belongs to the cgroup2.
+ * 		* 1, if the *skb* task does not belong to the cgroup2.
+ * 		* A negative error code, if an error occurred.
+ *
+ * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+ * 	Description
+ * 		Resize (trim or grow) the packet associated to *skb* to the
+ * 		new *len*. The *flags* are reserved for future usage, and must
+ * 		be left at zero.
+ *
+ * 		The basic idea is that the helper performs the needed work to
+ * 		change the size of the packet, then the eBPF program rewrites
+ * 		the rest via helpers like **bpf_skb_store_bytes**\ (),
+ * 		**bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
+ * 		and others. This helper is a slow path utility intended for
+ * 		replies with control messages. And because it is targeted for
+ * 		slow path, the helper itself can afford to be slow: it
+ * 		implicitly linearizes, unclones and drops offloads from the
+ * 		*skb*.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
+ * 	Description
+ * 		Pull in non-linear data in case the *skb* is non-linear and not
+ * 		all of *len* are part of the linear section. Make *len* bytes
+ * 		from *skb* readable and writable. If a zero value is passed for
+ * 		*len*, then the whole length of the *skb* is pulled.
+ *
+ * 		This helper is only needed for reading and writing with direct
+ * 		packet access.
+ *
+ * 		For direct packet access, testing that offsets to access
+ * 		are within packet boundaries (test on *skb*\ **->data_end**) is
+ * 		susceptible to fail if offsets are invalid, or if the requested
+ * 		data is in non-linear parts of the *skb*. On failure the
+ * 		program can just bail out, or in the case of a non-linear
+ * 		buffer, use a helper to make the data available. The
+ * 		**bpf_skb_load_bytes**\ () helper is a first solution to access
+ * 		the data. Another one consists in using **bpf_skb_pull_data**
+ * 		to pull in once the non-linear parts, then retesting and
+ * 		eventually access the data.
+ *
+ * 		At the same time, this also makes sure the *skb* is uncloned,
+ * 		which is a necessary condition for direct write. As this needs
+ * 		to be an invariant for the write part only, the verifier
+ * 		detects writes and adds a prologue that is calling
+ * 		**bpf_skb_pull_data()** to effectively unclone the *skb* from
+ * 		the very beginning in case it is indeed cloned.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum)
+ * 	Description
+ * 		Add the checksum *csum* into *skb*\ **->csum** in case the
+ * 		driver has supplied a checksum for the entire packet into that
+ * 		field. Return an error otherwise. This helper is intended to be
+ * 		used in combination with **bpf_csum_diff**\ (), in particular
+ * 		when the checksum needs to be updated after data has been
+ * 		written into the packet through direct packet access.
+ * 	Return
+ * 		The checksum on success, or a negative error code in case of
+ * 		failure.
+ *
+ * void bpf_set_hash_invalid(struct sk_buff *skb)
+ * 	Description
+ * 		Invalidate the current *skb*\ **->hash**. It can be used after
+ * 		mangling on headers through direct packet access, in order to
+ * 		indicate that the hash is outdated and to trigger a
+ * 		recalculation the next time the kernel tries to access this
+ * 		hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *
+ * int bpf_get_numa_node_id(void)
+ * 	Description
+ * 		Return the id of the current NUMA node. The primary use case
+ * 		for this helper is the selection of sockets for the local NUMA
+ * 		node, when the program is attached to sockets using the
+ * 		**SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**),
+ * 		but the helper is also available to other eBPF program types,
+ * 		similarly to **bpf_get_smp_processor_id**\ ().
+ * 	Return
+ * 		The id of current NUMA node.
+ *
+ * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
+ * 	Description
+ * 		Grows headroom of packet associated to *skb* and adjusts the
+ * 		offset of the MAC header accordingly, adding *len* bytes of
+ * 		space. It automatically extends and reallocates memory as
+ * 		required.
+ *
+ * 		This helper can be used on a layer 3 *skb* to push a MAC header
+ * 		for redirection into a layer 2 device.
+ *
+ * 		All values for *flags* are reserved for future usage, and must
+ * 		be left at zero.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
+ * 	Description
+ * 		Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
+ * 		it is possible to use a negative value for *delta*. This helper
+ * 		can be used to prepare the packet for pushing or popping
+ * 		headers.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
  *
  * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
- *     Copy a NUL terminated string from unsafe address. In case the string
- *     length is smaller than size, the target is not padded with further NUL
- *     bytes. In case the string length is larger than size, just count-1
- *     bytes are copied and the last byte is set to NUL.
- *     @dst: destination address
- *     @size: maximum number of bytes to copy, including the trailing NUL
- *     @unsafe_ptr: unsafe address
- *     Return:
- *       > 0 length of the string including the trailing NUL on success
- *       < 0 error
- *
- * u64 bpf_get_socket_cookie(skb)
- *     Get the cookie for the socket stored inside sk_buff.
- *     @skb: pointer to skb
- *     Return: 8 Bytes non-decreasing number on success or 0 if the socket
- *     field is missing inside sk_buff
- *
- * u32 bpf_get_socket_uid(skb)
- *     Get the owner uid of the socket stored inside sk_buff.
- *     @skb: pointer to skb
- *     Return: uid of the socket owner on success or overflowuid if failed.
- *
- * u32 bpf_set_hash(skb, hash)
- *     Set full skb->hash.
- *     @skb: pointer to skb
- *     @hash: hash to set
- *
- * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen)
- *     Calls setsockopt. Not all opts are available, only those with
- *     integer optvals plus TCP_CONGESTION.
- *     Supported levels: SOL_SOCKET and IPPROTO_TCP
- *     @bpf_socket: pointer to bpf_socket
- *     @level: SOL_SOCKET or IPPROTO_TCP
- *     @optname: option name
- *     @optval: pointer to option value
- *     @optlen: length of optval in bytes
- *     Return: 0 or negative error
- *
- * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen)
- *     Calls getsockopt. Not all opts are available.
- *     Supported levels: IPPROTO_TCP
- *     @bpf_socket: pointer to bpf_socket
- *     @level: IPPROTO_TCP
- *     @optname: option name
- *     @optval: pointer to option value
- *     @optlen: length of optval in bytes
- *     Return: 0 or negative error
- *
- * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
- *     Set callback flags for sock_ops
- *     @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
- *     @flags: flags value
- *     Return: 0 for no error
- *             -EINVAL if there is no full tcp socket
- *             bits in flags that are not supported by current kernel
- *
- * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
- *     Grow or shrink room in sk_buff.
- *     @skb: pointer to skb
- *     @len_diff: (signed) amount of room to grow/shrink
- *     @mode: operation mode (enum bpf_adj_room_mode)
- *     @flags: reserved for future use
- *     Return: 0 on success or negative error code
- *
- * int bpf_sk_redirect_map(map, key, flags)
- *     Redirect skb to a sock in map using key as a lookup key for the
- *     sock in map.
- *     @map: pointer to sockmap
- *     @key: key to lookup sock in map
- *     @flags: reserved for future use
- *     Return: SK_PASS
- *
- * int bpf_sock_map_update(skops, map, key, flags)
- *	@skops: pointer to bpf_sock_ops
- *	@map: pointer to sockmap to update
- *	@key: key to insert/update sock in map
- *	@flags: same flags as map update elem
- *
- * int bpf_xdp_adjust_meta(xdp_md, delta)
- *     Adjust the xdp_md.data_meta by delta
- *     @xdp_md: pointer to xdp_md
- *     @delta: An positive/negative integer to be added to xdp_md.data_meta
- *     Return: 0 on success or negative on error
- *
- * int bpf_perf_event_read_value(map, flags, buf, buf_size)
- *     read perf event counter value and perf event enabled/running time
- *     @map: pointer to perf_event_array map
- *     @flags: index of event in the map or bitmask flags
- *     @buf: buf to fill
- *     @buf_size: size of the buf
- *     Return: 0 on success or negative error code
- *
- * int bpf_perf_prog_read_value(ctx, buf, buf_size)
- *     read perf prog attached perf event counter and enabled/running time
- *     @ctx: pointer to ctx
- *     @buf: buf to fill
- *     @buf_size: size of the buf
- *     Return : 0 on success or negative error code
- *
- * int bpf_override_return(pt_regs, rc)
- *	@pt_regs: pointer to struct pt_regs
- *	@rc: the return value to set
- *
- * int bpf_msg_redirect_map(map, key, flags)
- *     Redirect msg to a sock in map using key as a lookup key for the
- *     sock in map.
- *     @map: pointer to sockmap
- *     @key: key to lookup sock in map
- *     @flags: reserved for future use
- *     Return: SK_PASS
- *
- * int bpf_bind(ctx, addr, addr_len)
- *     Bind socket to address. Only binding to IP is supported, no port can be
- *     set in addr.
- *     @ctx: pointer to context of type bpf_sock_addr
- *     @addr: pointer to struct sockaddr to bind socket to
- *     @addr_len: length of sockaddr structure
- *     Return: 0 on success or negative error code
+ * 	Description
+ * 		Copy a NUL terminated string from an unsafe address
+ * 		*unsafe_ptr* to *dst*. The *size* should include the
+ * 		terminating NUL byte. In case the string length is smaller than
+ * 		*size*, the target is not padded with further NUL bytes. If the
+ * 		string length is larger than *size*, just *size*-1 bytes are
+ * 		copied and the last byte is set to NUL.
+ *
+ * 		On success, the length of the copied string is returned. This
+ * 		makes this helper useful in tracing programs for reading
+ * 		strings, and more importantly to get its length at runtime. See
+ * 		the following snippet:
+ *
+ * 		::
+ *
+ * 			SEC("kprobe/sys_open")
+ * 			void bpf_sys_open(struct pt_regs *ctx)
+ * 			{
+ * 			        char buf[PATHLEN]; // PATHLEN is defined to 256
+ * 			        int res = bpf_probe_read_str(buf, sizeof(buf),
+ * 				                             ctx->di);
+ *
+ * 				// Consume buf, for example push it to
+ * 				// userspace via bpf_perf_event_output(); we
+ * 				// can use res (the string length) as event
+ * 				// size, after checking its boundaries.
+ * 			}
+ *
+ * 		In comparison, using **bpf_probe_read()** helper here instead
+ * 		to read the string would require to estimate the length at
+ * 		compile time, and would often result in copying more memory
+ * 		than necessary.
+ *
+ * 		Another useful use case is when parsing individual process
+ * 		arguments or individual environment variables navigating
+ * 		*current*\ **->mm->arg_start** and *current*\
+ * 		**->mm->env_start**: using this helper and the return value,
+ * 		one can quickly iterate at the right offset of the memory area.
+ * 	Return
+ * 		On success, the strictly positive length of the string,
+ * 		including the trailing NUL character. On error, a negative
+ * 		value.
+ *
+ * u64 bpf_get_socket_cookie(struct sk_buff *skb)
+ * 	Description
+ * 		If the **struct sk_buff** pointed by *skb* has a known socket,
+ * 		retrieve the cookie (generated by the kernel) of this socket.
+ * 		If no cookie has been set yet, generate a new cookie. Once
+ * 		generated, the socket cookie remains stable for the life of the
+ * 		socket. This helper can be useful for monitoring per socket
+ * 		networking traffic statistics as it provides a unique socket
+ * 		identifier per namespace.
+ * 	Return
+ * 		A 8-byte long non-decreasing number on success, or 0 if the
+ * 		socket field is missing inside *skb*.
+ *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
+ * 	Description
+ * 		Equivalent to bpf_get_socket_cookie() helper that accepts
+ * 		*skb*, but gets socket from **struct bpf_sock_addr** contex.
+ * 	Return
+ * 		A 8-byte long non-decreasing number.
+ *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
+ * 	Description
+ * 		Equivalent to bpf_get_socket_cookie() helper that accepts
+ * 		*skb*, but gets socket from **struct bpf_sock_ops** contex.
+ * 	Return
+ * 		A 8-byte long non-decreasing number.
+ *
+ * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ * 	Return
+ * 		The owner UID of the socket associated to *skb*. If the socket
+ * 		is **NULL**, or if it is not a full socket (i.e. if it is a
+ * 		time-wait or a request socket instead), **overflowuid** value
+ * 		is returned (note that **overflowuid** might also be the actual
+ * 		UID value for the socket).
+ *
+ * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
+ * 	Description
+ * 		Set the full hash for *skb* (set the field *skb*\ **->hash**)
+ * 		to value *hash*.
+ * 	Return
+ * 		0
+ *
+ * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+ * 	Description
+ * 		Emulate a call to **setsockopt()** on the socket associated to
+ * 		*bpf_socket*, which must be a full socket. The *level* at
+ * 		which the option resides and the name *optname* of the option
+ * 		must be specified, see **setsockopt(2)** for more information.
+ * 		The option value of length *optlen* is pointed by *optval*.
+ *
+ * 		This helper actually implements a subset of **setsockopt()**.
+ * 		It supports the following *level*\ s:
+ *
+ * 		* **SOL_SOCKET**, which supports the following *optname*\ s:
+ * 		  **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
+ * 		  **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ * 		* **IPPROTO_TCP**, which supports the following *optname*\ s:
+ * 		  **TCP_CONGESTION**, **TCP_BPF_IW**,
+ * 		  **TCP_BPF_SNDCWND_CLAMP**.
+ * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags)
+ * 	Description
+ * 		Grow or shrink the room for data in the packet associated to
+ * 		*skb* by *len_diff*, and according to the selected *mode*.
+ *
+ * 		There is a single supported mode at this time:
+ *
+ * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
+ * 		  (room space is added or removed below the layer 3 header).
+ *
+ * 		All values for *flags* are reserved for future usage, and must
+ * 		be left at zero.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * 	Description
+ * 		Redirect the packet to the endpoint referenced by *map* at
+ * 		index *key*. Depending on its type, this *map* can contain
+ * 		references to net devices (for forwarding packets through other
+ * 		ports), or to CPUs (for redirecting XDP frames to another CPU;
+ * 		but this is only implemented for native XDP (with driver
+ * 		support) as of this writing).
+ *
+ * 		All values for *flags* are reserved for future usage, and must
+ * 		be left at zero.
+ *
+ * 		When used to redirect packets to net devices, this helper
+ * 		provides a high performance increase over **bpf_redirect**\ ().
+ * 		This is due to various implementation details of the underlying
+ * 		mechanisms, one of which is the fact that **bpf_redirect_map**\
+ * 		() tries to send packet as a "bulk" to the device.
+ * 	Return
+ * 		**XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ *
+ * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * 	Description
+ * 		Redirect the packet to the socket referenced by *map* (of type
+ * 		**BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * 		egress interfaces can be used for redirection. The
+ * 		**BPF_F_INGRESS** value in *flags* is used to make the
+ * 		distinction (ingress path is selected if the flag is present,
+ * 		egress path otherwise). This is the only flag supported for now.
+ * 	Return
+ * 		**SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ * 	Description
+ * 		Add an entry to, or update a *map* referencing sockets. The
+ * 		*skops* is used as a new value for the entry associated to
+ * 		*key*. *flags* is one of:
+ *
+ * 		**BPF_NOEXIST**
+ * 			The entry for *key* must not exist in the map.
+ * 		**BPF_EXIST**
+ * 			The entry for *key* must already exist in the map.
+ * 		**BPF_ANY**
+ * 			No condition on the existence of the entry for *key*.
+ *
+ * 		If the *map* has eBPF programs (parser and verdict), those will
+ * 		be inherited by the socket being added. If the socket is
+ * 		already attached to eBPF programs, this results in an error.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
+ * 	Description
+ * 		Adjust the address pointed by *xdp_md*\ **->data_meta** by
+ * 		*delta* (which can be positive or negative). Note that this
+ * 		operation modifies the address stored in *xdp_md*\ **->data**,
+ * 		so the latter must be loaded only after the helper has been
+ * 		called.
+ *
+ * 		The use of *xdp_md*\ **->data_meta** is optional and programs
+ * 		are not required to use it. The rationale is that when the
+ * 		packet is processed with XDP (e.g. as DoS filter), it is
+ * 		possible to push further meta data along with it before passing
+ * 		to the stack, and to give the guarantee that an ingress eBPF
+ * 		program attached as a TC classifier on the same device can pick
+ * 		this up for further post-processing. Since TC works with socket
+ * 		buffers, it remains possible to set from XDP the **mark** or
+ * 		**priority** pointers, or other pointers for the socket buffer.
+ * 		Having this scratch space generic and programmable allows for
+ * 		more flexibility as the user is free to store whatever meta
+ * 		data they need.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
+ * 	Description
+ * 		Read the value of a perf event counter, and store it into *buf*
+ * 		of size *buf_size*. This helper relies on a *map* of type
+ * 		**BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event
+ * 		counter is selected when *map* is updated with perf event file
+ * 		descriptors. The *map* is an array whose size is the number of
+ * 		available CPUs, and each cell contains a value relative to one
+ * 		CPU. The value to retrieve is indicated by *flags*, that
+ * 		contains the index of the CPU to look up, masked with
+ * 		**BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * 		**BPF_F_CURRENT_CPU** to indicate that the value for the
+ * 		current CPU should be retrieved.
+ *
+ * 		This helper behaves in a way close to
+ * 		**bpf_perf_event_read**\ () helper, save that instead of
+ * 		just returning the value observed, it fills the *buf*
+ * 		structure. This allows for additional data to be retrieved: in
+ * 		particular, the enabled and running times (in *buf*\
+ * 		**->enabled** and *buf*\ **->running**, respectively) are
+ * 		copied. In general, **bpf_perf_event_read_value**\ () is
+ * 		recommended over **bpf_perf_event_read**\ (), which has some
+ * 		ABI issues and provides fewer functionalities.
+ *
+ * 		These values are interesting, because hardware PMU (Performance
+ * 		Monitoring Unit) counters are limited resources. When there are
+ * 		more PMU based perf events opened than available counters,
+ * 		kernel will multiplex these events so each event gets certain
+ * 		percentage (but not all) of the PMU time. In case that
+ * 		multiplexing happens, the number of samples or counter value
+ * 		will not reflect the case compared to when no multiplexing
+ * 		occurs. This makes comparison between different runs difficult.
+ * 		Typically, the counter value should be normalized before
+ * 		comparing to other experiments. The usual normalization is done
+ * 		as follows.
+ *
+ * 		::
+ *
+ * 			normalized_counter = counter * t_enabled / t_running
+ *
+ * 		Where t_enabled is the time enabled for event and t_running is
+ * 		the time running for event since last normalization. The
+ * 		enabled and running times are accumulated since the perf event
+ * 		open. To achieve scaling factor between two invocations of an
+ * 		eBPF program, users can can use CPU id as the key (which is
+ * 		typical for perf array usage model) to remember the previous
+ * 		value and do the calculation inside the eBPF program.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ * 	Description
+ * 		For en eBPF program attached to a perf event, retrieve the
+ * 		value of the event counter associated to *ctx* and store it in
+ * 		the structure pointed by *buf* and of size *buf_size*. Enabled
+ * 		and running times are also stored in the structure (see
+ * 		description of helper **bpf_perf_event_read_value**\ () for
+ * 		more details).
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+ * 	Description
+ * 		Emulate a call to **getsockopt()** on the socket associated to
+ * 		*bpf_socket*, which must be a full socket. The *level* at
+ * 		which the option resides and the name *optname* of the option
+ * 		must be specified, see **getsockopt(2)** for more information.
+ * 		The retrieved value is stored in the structure pointed by
+ * 		*opval* and of length *optlen*.
+ *
+ * 		This helper actually implements a subset of **getsockopt()**.
+ * 		It supports the following *level*\ s:
+ *
+ * 		* **IPPROTO_TCP**, which supports *optname*
+ * 		  **TCP_CONGESTION**.
+ * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_override_return(struct pt_reg *regs, u64 rc)
+ * 	Description
+ * 		Used for error injection, this helper uses kprobes to override
+ * 		the return value of the probed function, and to set it to *rc*.
+ * 		The first argument is the context *regs* on which the kprobe
+ * 		works.
+ *
+ * 		This helper works by setting setting the PC (program counter)
+ * 		to an override function which is run in place of the original
+ * 		probed function. This means the probed function is not run at
+ * 		all. The replacement function just returns with the required
+ * 		value.
+ *
+ * 		This helper has security implications, and thus is subject to
+ * 		restrictions. It is only available if the kernel was compiled
+ * 		with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
+ * 		option, and in this case it only works on functions tagged with
+ * 		**ALLOW_ERROR_INJECTION** in the kernel code.
+ *
+ * 		Also, the helper is only available for the architectures having
+ * 		the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
+ * 		x86 architecture is the only one to support this feature.
+ * 	Return
+ * 		0
+ *
+ * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
+ * 	Description
+ * 		Attempt to set the value of the **bpf_sock_ops_cb_flags** field
+ * 		for the full TCP socket associated to *bpf_sock_ops* to
+ * 		*argval*.
+ *
+ * 		The primary use of this field is to determine if there should
+ * 		be calls to eBPF programs of type
+ * 		**BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP
+ * 		code. A program of the same type can change its value, per
+ * 		connection and as necessary, when the connection is
+ * 		established. This field is directly accessible for reading, but
+ * 		this helper must be used for updates in order to return an
+ * 		error if an eBPF program tries to set a callback that is not
+ * 		supported in the current kernel.
+ *
+ * 		The supported callback values that *argval* can combine are:
+ *
+ * 		* **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
+ * 		* **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
+ * 		* **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ *
+ * 		Here are some examples of where one could call such eBPF
+ * 		program:
+ *
+ * 		* When RTO fires.
+ * 		* When a packet is retransmitted.
+ * 		* When the connection terminates.
+ * 		* When a packet is sent.
+ * 		* When a packet is received.
+ * 	Return
+ * 		Code **-EINVAL** if the socket is not a full TCP socket;
+ * 		otherwise, a positive number containing the bits that could not
+ * 		be set is returned (which comes down to 0 if all bits were set
+ * 		as required).
+ *
+ * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+ * 	Description
+ * 		This helper is used in programs implementing policies at the
+ * 		socket level. If the message *msg* is allowed to pass (i.e. if
+ * 		the verdict eBPF program returns **SK_PASS**), redirect it to
+ * 		the socket referenced by *map* (of type
+ * 		**BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * 		egress interfaces can be used for redirection. The
+ * 		**BPF_F_INGRESS** value in *flags* is used to make the
+ * 		distinction (ingress path is selected if the flag is present,
+ * 		egress path otherwise). This is the only flag supported for now.
+ * 	Return
+ * 		**SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * 	Description
+ * 		For socket policies, apply the verdict of the eBPF program to
+ * 		the next *bytes* (number of bytes) of message *msg*.
+ *
+ * 		For example, this helper can be used in the following cases:
+ *
+ * 		* A single **sendmsg**\ () or **sendfile**\ () system call
+ * 		  contains multiple logical messages that the eBPF program is
+ * 		  supposed to read and for which it should apply a verdict.
+ * 		* An eBPF program only cares to read the first *bytes* of a
+ * 		  *msg*. If the message has a large payload, then setting up
+ * 		  and calling the eBPF program repeatedly for all bytes, even
+ * 		  though the verdict is already known, would create unnecessary
+ * 		  overhead.
+ *
+ * 		When called from within an eBPF program, the helper sets a
+ * 		counter internal to the BPF infrastructure, that is used to
+ * 		apply the last verdict to the next *bytes*. If *bytes* is
+ * 		smaller than the current data being processed from a
+ * 		**sendmsg**\ () or **sendfile**\ () system call, the first
+ * 		*bytes* will be sent and the eBPF program will be re-run with
+ * 		the pointer for start of data pointing to byte number *bytes*
+ * 		**+ 1**. If *bytes* is larger than the current data being
+ * 		processed, then the eBPF verdict will be applied to multiple
+ * 		**sendmsg**\ () or **sendfile**\ () calls until *bytes* are
+ * 		consumed.
+ *
+ * 		Note that if a socket closes with the internal counter holding
+ * 		a non-zero value, this is not a problem because data is not
+ * 		being buffered for *bytes* and is sent as it is received.
+ * 	Return
+ * 		0
+ *
+ * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * 	Description
+ * 		For socket policies, prevent the execution of the verdict eBPF
+ * 		program for message *msg* until *bytes* (byte number) have been
+ * 		accumulated.
+ *
+ * 		This can be used when one needs a specific number of bytes
+ * 		before a verdict can be assigned, even if the data spans
+ * 		multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme
+ * 		case would be a user calling **sendmsg**\ () repeatedly with
+ * 		1-byte long message segments. Obviously, this is bad for
+ * 		performance, but it is still valid. If the eBPF program needs
+ * 		*bytes* bytes to validate a header, this helper can be used to
+ * 		prevent the eBPF program to be called again until *bytes* have
+ * 		been accumulated.
+ * 	Return
+ * 		0
+ *
+ * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+ * 	Description
+ * 		For socket policies, pull in non-linear data from user space
+ * 		for *msg* and set pointers *msg*\ **->data** and *msg*\
+ * 		**->data_end** to *start* and *end* bytes offsets into *msg*,
+ * 		respectively.
+ *
+ * 		If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ * 		*msg* it can only parse data that the (**data**, **data_end**)
+ * 		pointers have already consumed. For **sendmsg**\ () hooks this
+ * 		is likely the first scatterlist element. But for calls relying
+ * 		on the **sendpage** handler (e.g. **sendfile**\ ()) this will
+ * 		be the range (**0**, **0**) because the data is shared with
+ * 		user space and by default the objective is to avoid allowing
+ * 		user space to modify data while (or after) eBPF verdict is
+ * 		being decided. This helper can be used to pull in data and to
+ * 		set the start and end pointer to given values. Data will be
+ * 		copied if necessary (i.e. if data was not linear and if start
+ * 		and end pointers do not point to the same chunk).
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ *
+ * 		All values for *flags* are reserved for future usage, and must
+ * 		be left at zero.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
+ * 	Description
+ * 		Bind the socket associated to *ctx* to the address pointed by
+ * 		*addr*, of length *addr_len*. This allows for making outgoing
+ * 		connection from the desired IP address, which can be useful for
+ * 		example when all processes inside a cgroup should use one
+ * 		single IP address on a host that has multiple IP configured.
+ *
+ * 		This helper works for IPv4 and IPv6, TCP and UDP sockets. The
+ * 		domain (*addr*\ **->sa_family**) must be **AF_INET** (or
+ * 		**AF_INET6**). Looking for a free port to bind to can be
+ * 		expensive, therefore binding to port is not permitted by the
+ * 		helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
+ * 		must be set to zero.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
+ * 	Description
+ * 		Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
+ * 		only possible to shrink the packet as of this writing,
+ * 		therefore *delta* must be a negative integer.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
+ * 	Description
+ * 		Retrieve the XFRM state (IP transform framework, see also
+ * 		**ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
+ *
+ * 		The retrieved value is stored in the **struct bpf_xfrm_state**
+ * 		pointed by *xfrm_state* and of length *size*.
+ *
+ * 		All values for *flags* are reserved for future usage, and must
+ * 		be left at zero.
+ *
+ * 		This helper is available only if the kernel was compiled with
+ * 		**CONFIG_XFRM** configuration option.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
+ * 	Description
+ * 		Return a user or a kernel stack in bpf program provided buffer.
+ * 		To achieve this, the helper needs *ctx*, which is a pointer
+ * 		to the context on which the tracing program is executed.
+ * 		To store the stacktrace, the bpf program provides *buf* with
+ * 		a nonnegative *size*.
+ *
+ * 		The last argument, *flags*, holds the number of stack frames to
+ * 		skip (from 0 to 255), masked with
+ * 		**BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * 		the following flags:
+ *
+ * 		**BPF_F_USER_STACK**
+ * 			Collect a user space stack instead of a kernel stack.
+ * 		**BPF_F_USER_BUILD_ID**
+ * 			Collect buildid+offset instead of ips for user stack,
+ * 			only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ * 		**bpf_get_stack**\ () can collect up to
+ * 		**PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ * 		to sufficient large buffer size. Note that
+ * 		this limit can be controlled with the **sysctl** program, and
+ * 		that it should be manually increased in order to profile long
+ * 		user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * 		::
+ *
+ * 			# sysctl kernel.perf_event_max_stack=<new value>
+ * 	Return
+ * 		A non-negative value equal to or less than *size* on success,
+ * 		or a negative error in case of failure.
+ *
+ * int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * 	Description
+ * 		This helper is similar to **bpf_skb_load_bytes**\ () in that
+ * 		it provides an easy way to load *len* bytes from *offset*
+ * 		from the packet associated to *skb*, into the buffer pointed
+ * 		by *to*. The difference to **bpf_skb_load_bytes**\ () is that
+ * 		a fifth argument *start_header* exists in order to select a
+ * 		base offset to start from. *start_header* can be one of:
+ *
+ * 		**BPF_HDR_START_MAC**
+ * 			Base offset to load data from is *skb*'s mac header.
+ * 		**BPF_HDR_START_NET**
+ * 			Base offset to load data from is *skb*'s network header.
+ *
+ * 		In general, "direct packet access" is the preferred method to
+ * 		access packet data, however, this helper is in particular useful
+ * 		in socket filters where *skb*\ **->data** does not always point
+ * 		to the start of the mac header and where "direct packet access"
+ * 		is not available.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
+ *	Description
+ *		Do FIB lookup in kernel tables using parameters in *params*.
+ *		If lookup is successful and result shows packet is to be
+ *		forwarded, the neighbor tables are searched for the nexthop.
+ *		If successful (ie., FIB lookup shows forwarding and nexthop
+ *		is resolved), the nexthop address is returned in ipv4_dst
+ *		or ipv6_dst based on family, smac is set to mac address of
+ *		egress device, dmac is set to nexthop mac address, rt_metric
+ *		is set to metric from route (IPv4/IPv6 only), and ifindex
+ *		is set to the device index of the nexthop from the FIB lookup.
+ *
+ *             *plen* argument is the size of the passed in struct.
+ *             *flags* argument can be a combination of one or more of the
+ *             following values:
+ *
+ *		**BPF_FIB_LOOKUP_DIRECT**
+ *			Do a direct table lookup vs full lookup using FIB
+ *			rules.
+ *		**BPF_FIB_LOOKUP_OUTPUT**
+ *			Perform lookup from an egress perspective (default is
+ *			ingress).
+ *
+ *             *ctx* is either **struct xdp_md** for XDP programs or
+ *             **struct sk_buff** tc cls_act programs.
+ *     Return
+ *		* < 0 if any input argument is invalid
+ *		*   0 on success (packet is forwarded, nexthop neighbor exists)
+ *		* > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
+ *		  packet is not forwarded or needs assist from full stack
+ *
+ * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
+ *	Description
+ *		Add an entry to, or update a sockhash *map* referencing sockets.
+ *		The *skops* is used as a new value for the entry associated to
+ *		*key*. *flags* is one of:
+ *
+ *		**BPF_NOEXIST**
+ *			The entry for *key* must not exist in the map.
+ *		**BPF_EXIST**
+ *			The entry for *key* must already exist in the map.
+ *		**BPF_ANY**
+ *			No condition on the existence of the entry for *key*.
+ *
+ *		If the *map* has eBPF programs (parser and verdict), those will
+ *		be inherited by the socket being added. If the socket is
+ *		already attached to eBPF programs, this results in an error.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
+ *	Description
+ *		This helper is used in programs implementing policies at the
+ *		socket level. If the message *msg* is allowed to pass (i.e. if
+ *		the verdict eBPF program returns **SK_PASS**), redirect it to
+ *		the socket referenced by *map* (of type
+ *		**BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ *		egress interfaces can be used for redirection. The
+ *		**BPF_F_INGRESS** value in *flags* is used to make the
+ *		distinction (ingress path is selected if the flag is present,
+ *		egress path otherwise). This is the only flag supported for now.
+ *	Return
+ *		**SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
+ *	Description
+ *		This helper is used in programs implementing policies at the
+ *		skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
+ *		if the verdeict eBPF program returns **SK_PASS**), redirect it
+ *		to the socket referenced by *map* (of type
+ *		**BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ *		egress interfaces can be used for redirection. The
+ *		**BPF_F_INGRESS** value in *flags* is used to make the
+ *		distinction (ingress path is selected if the flag is present,
+ *		egress otherwise). This is the only flag supported for now.
+ *	Return
+ *		**SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+ *	Description
+ *		Encapsulate the packet associated to *skb* within a Layer 3
+ *		protocol header. This header is provided in the buffer at
+ *		address *hdr*, with *len* its size in bytes. *type* indicates
+ *		the protocol of the header and can be one of:
+ *
+ *		**BPF_LWT_ENCAP_SEG6**
+ *			IPv6 encapsulation with Segment Routing Header
+ *			(**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
+ *			the IPv6 header is computed by the kernel.
+ *		**BPF_LWT_ENCAP_SEG6_INLINE**
+ *			Only works if *skb* contains an IPv6 packet. Insert a
+ *			Segment Routing Header (**struct ipv6_sr_hdr**) inside
+ *			the IPv6 header.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ *	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
+ *	Description
+ *		Store *len* bytes from address *from* into the packet
+ *		associated to *skb*, at *offset*. Only the flags, tag and TLVs
+ *		inside the outermost IPv6 Segment Routing Header can be
+ *		modified through this helper.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ *	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
+ *	Description
+ *		Adjust the size allocated to TLVs in the outermost IPv6
+ *		Segment Routing Header contained in the packet associated to
+ *		*skb*, at position *offset* by *delta* bytes. Only offsets
+ *		after the segments are accepted. *delta* can be as well
+ *		positive (growing) as negative (shrinking).
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ *	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
+ *	Description
+ *		Apply an IPv6 Segment Routing action of type *action* to the
+ *		packet associated to *skb*. Each action takes a parameter
+ *		contained at address *param*, and of length *param_len* bytes.
+ *		*action* can be one of:
+ *
+ *		**SEG6_LOCAL_ACTION_END_X**
+ *			End.X action: Endpoint with Layer-3 cross-connect.
+ *			Type of *param*: **struct in6_addr**.
+ *		**SEG6_LOCAL_ACTION_END_T**
+ *			End.T action: Endpoint with specific IPv6 table lookup.
+ *			Type of *param*: **int**.
+ *		**SEG6_LOCAL_ACTION_END_B6**
+ *			End.B6 action: Endpoint bound to an SRv6 policy.
+ *			Type of param: **struct ipv6_sr_hdr**.
+ *		**SEG6_LOCAL_ACTION_END_B6_ENCAP**
+ *			End.B6.Encap action: Endpoint bound to an SRv6
+ *			encapsulation policy.
+ *			Type of param: **struct ipv6_sr_hdr**.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ *	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+ *	Description
+ *		This helper is used in programs implementing IR decoding, to
+ *		report a successfully decoded key press with *scancode*,
+ *		*toggle* value in the given *protocol*. The scancode will be
+ *		translated to a keycode using the rc keymap, and reported as
+ *		an input key down event. After a period a key up event is
+ *		generated. This period can be extended by calling either
+ *		**bpf_rc_keydown** () again with the same values, or calling
+ *		**bpf_rc_repeat** ().
+ *
+ *		Some protocols include a toggle bit, in case the button	was
+ *		released and pressed again between consecutive scancodes.
+ *
+ *		The *ctx* should point to the lirc sample as passed into
+ *		the program.
+ *
+ *		The *protocol* is the decoded protocol number (see
+ *		**enum rc_proto** for some predefined values).
+ *
+ *		This helper is only available is the kernel was compiled with
+ *		the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ *		"**y**".
+ *	Return
+ *		0
+ *
+ * int bpf_rc_repeat(void *ctx)
+ *	Description
+ *		This helper is used in programs implementing IR decoding, to
+ *		report a successfully decoded repeat key message. This delays
+ *		the generation of a key up event for previously generated
+ *		key down event.
+ *
+ *		Some IR protocols like NEC have a special IR message for
+ *		repeating last button, for when a button is held down.
+ *
+ *		The *ctx* should point to the lirc sample as passed into
+ *		the program.
+ *
+ *		This helper is only available is the kernel was compiled with
+ *		the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ *		"**y**".
+ *	Return
+ *		0
+ *
+ * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb)
+ * 	Description
+ * 		Return the cgroup v2 id of the socket associated with the *skb*.
+ * 		This is roughly similar to the **bpf_get_cgroup_classid**\ ()
+ * 		helper for cgroup v1 by providing a tag resp. identifier that
+ * 		can be matched on or used for map lookups e.g. to implement
+ * 		policy. The cgroup v2 id of a given path in the hierarchy is
+ * 		exposed in user space through the f_handle API in order to get
+ * 		to the same 64-bit id.
+ *
+ * 		This helper can be used on TC egress path, but not on ingress,
+ * 		and is available only if the kernel was compiled with the
+ * 		**CONFIG_SOCK_CGROUP_DATA** configuration option.
+ * 	Return
+ * 		The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
+ *	Description
+ *		Return id of cgroup v2 that is ancestor of cgroup associated
+ *		with the *skb* at the *ancestor_level*.  The root cgroup is at
+ *		*ancestor_level* zero and each step down the hierarchy
+ *		increments the level. If *ancestor_level* == level of cgroup
+ *		associated with *skb*, then return value will be same as that
+ *		of **bpf_skb_cgroup_id**\ ().
+ *
+ *		The helper is useful to implement policies based on cgroups
+ *		that are upper in hierarchy than immediate cgroup associated
+ *		with *skb*.
+ *
+ *		The format of returned id and helper limitations are same as in
+ *		**bpf_skb_cgroup_id**\ ().
+ *	Return
+ *		The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_get_current_cgroup_id(void)
+ * 	Return
+ * 		A 64-bit integer containing the current cgroup id based
+ * 		on the cgroup within which the current task is running.
+ *
+ * void* get_local_storage(void *map, u64 flags)
+ *	Description
+ *		Get the pointer to the local storage area.
+ *		The type and the size of the local storage is defined
+ *		by the *map* argument.
+ *		The *flags* meaning is specific for each map type,
+ *		and has to be 0 for cgroup local storage.
+ *
+ *		Depending on the bpf program type, a local storage area
+ *		can be shared between multiple instances of the bpf program,
+ *		running simultaneously.
+ *
+ *		A user should care about the synchronization by himself.
+ *		For example, by using the BPF_STX_XADD instruction to alter
+ *		the shared data.
+ *	Return
+ *		Pointer to the local storage area.
+ *
+ * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
+ *	Description
+ *		Select a SO_REUSEPORT sk from a	BPF_MAP_TYPE_REUSEPORT_ARRAY map
+ *		It checks the selected sk is matching the incoming
+ *		request in the skb.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ *	Description
+ *		Look for TCP socket matching *tuple*, optionally in a child
+ *		network namespace *netns*. The return value must be checked,
+ *		and if non-NULL, released via **bpf_sk_release**\ ().
+ *
+ *		The *ctx* should point to the context of the program, such as
+ *		the skb or socket (depending on the hook in use). This is used
+ *		to determine the base network namespace for the lookup.
+ *
+ *		*tuple_size* must be one of:
+ *
+ *		**sizeof**\ (*tuple*\ **->ipv4**)
+ *			Look for an IPv4 socket.
+ *		**sizeof**\ (*tuple*\ **->ipv6**)
+ *			Look for an IPv6 socket.
+ *
+ *		If the *netns* is a negative signed 32-bit integer, then the
+ *		socket lookup table in the netns associated with the *ctx* will
+ *		will be used. For the TC hooks, this is the netns of the device
+ *		in the skb. For socket hooks, this is the netns of the socket.
+ *		If *netns* is any other signed 32-bit value greater than or
+ *		equal to zero then it specifies the ID of the netns relative to
+ *		the netns associated with the *ctx*. *netns* values beyond the
+ *		range of 32-bit integers are reserved for future use.
+ *
+ *		All values for *flags* are reserved for future usage, and must
+ *		be left at zero.
+ *
+ *		This helper is available only if the kernel was compiled with
+ *		**CONFIG_NET** configuration option.
+ *	Return
+ *		Pointer to *struct bpf_sock*, or NULL in case of failure.
+ *		For sockets with reuseport option, the *struct bpf_sock*
+ *		result is from reuse->socks[] using the hash of the tuple.
+ *
+ * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ *	Description
+ *		Look for UDP socket matching *tuple*, optionally in a child
+ *		network namespace *netns*. The return value must be checked,
+ *		and if non-NULL, released via **bpf_sk_release**\ ().
+ *
+ *		The *ctx* should point to the context of the program, such as
+ *		the skb or socket (depending on the hook in use). This is used
+ *		to determine the base network namespace for the lookup.
+ *
+ *		*tuple_size* must be one of:
+ *
+ *		**sizeof**\ (*tuple*\ **->ipv4**)
+ *			Look for an IPv4 socket.
+ *		**sizeof**\ (*tuple*\ **->ipv6**)
+ *			Look for an IPv6 socket.
+ *
+ *		If the *netns* is a negative signed 32-bit integer, then the
+ *		socket lookup table in the netns associated with the *ctx* will
+ *		will be used. For the TC hooks, this is the netns of the device
+ *		in the skb. For socket hooks, this is the netns of the socket.
+ *		If *netns* is any other signed 32-bit value greater than or
+ *		equal to zero then it specifies the ID of the netns relative to
+ *		the netns associated with the *ctx*. *netns* values beyond the
+ *		range of 32-bit integers are reserved for future use.
+ *
+ *		All values for *flags* are reserved for future usage, and must
+ *		be left at zero.
+ *
+ *		This helper is available only if the kernel was compiled with
+ *		**CONFIG_NET** configuration option.
+ *	Return
+ *		Pointer to *struct bpf_sock*, or NULL in case of failure.
+ *		For sockets with reuseport option, the *struct bpf_sock*
+ *		result is from reuse->socks[] using the hash of the tuple.
+ *
+ * int bpf_sk_release(struct bpf_sock *sk)
+ *	Description
+ *		Release the reference held by *sock*. *sock* must be a non-NULL
+ *		pointer that was returned from bpf_sk_lookup_xxx\ ().
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags)
+ *	Description
+ *		For socket policies, insert *len* bytes into msg at offset
+ *		*start*.
+ *
+ *		If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ *		*msg* it may want to insert metadata or options into the msg.
+ *		This can later be read and used by any of the lower layer BPF
+ *		hooks.
+ *
+ *		This helper may fail if under memory pressure (a malloc
+ *		fails) in these cases BPF programs will get an appropriate
+ *		error and BPF programs will need to handle them.
+ *
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -821,7 +2331,33 @@ union bpf_attr {
 	FN(msg_apply_bytes),		\
 	FN(msg_cork_bytes),		\
 	FN(msg_pull_data),		\
-	FN(bind),
+	FN(bind),			\
+	FN(xdp_adjust_tail),		\
+	FN(skb_get_xfrm_state),		\
+	FN(get_stack),			\
+	FN(skb_load_bytes_relative),	\
+	FN(fib_lookup),			\
+	FN(sock_hash_update),		\
+	FN(msg_redirect_hash),		\
+	FN(sk_redirect_hash),		\
+	FN(lwt_push_encap),		\
+	FN(lwt_seg6_store_bytes),	\
+	FN(lwt_seg6_adjust_srh),	\
+	FN(lwt_seg6_action),		\
+	FN(rc_repeat),			\
+	FN(rc_keydown),			\
+	FN(skb_cgroup_id),		\
+	FN(get_current_cgroup_id),	\
+	FN(get_local_storage),		\
+	FN(sk_select_reuseport),	\
+	FN(skb_ancestor_cgroup_id),	\
+	FN(sk_lookup_tcp),		\
+	FN(sk_lookup_udp),		\
+	FN(sk_release),			\
+	FN(map_push_elem),		\
+	FN(map_pop_elem),		\
+	FN(map_peek_elem),		\
+	FN(msg_push_data),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -855,11 +2391,14 @@ enum bpf_func_id {
 /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
 #define BPF_F_TUNINFO_IPV6		(1ULL << 0)
 
-/* BPF_FUNC_get_stackid flags. */
+/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
 #define BPF_F_SKIP_FIELD_MASK		0xffULL
 #define BPF_F_USER_STACK		(1ULL << 8)
+/* flags used by BPF_FUNC_get_stackid only. */
 #define BPF_F_FAST_STACK_CMP		(1ULL << 9)
 #define BPF_F_REUSE_STACKID		(1ULL << 10)
+/* flags used by BPF_FUNC_get_stack only. */
+#define BPF_F_USER_BUILD_ID		(1ULL << 11)
 
 /* BPF_FUNC_skb_set_tunnel_key flags. */
 #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
@@ -874,11 +2413,32 @@ enum bpf_func_id {
 /* BPF_FUNC_perf_event_output for sk_buff input context. */
 #define BPF_F_CTXLEN_MASK		(0xfffffULL << 32)
 
+/* Current network namespace */
+#define BPF_F_CURRENT_NETNS		(-1L)
+
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
 };
 
+/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
+enum bpf_hdr_start_off {
+	BPF_HDR_START_MAC,
+	BPF_HDR_START_NET,
+};
+
+/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
+enum bpf_lwt_encap_mode {
+	BPF_LWT_ENCAP_SEG6,
+	BPF_LWT_ENCAP_SEG6_INLINE
+};
+
+#define __bpf_md_ptr(type, name)	\
+union {					\
+	type name;			\
+	__u64 :64;			\
+} __attribute__((aligned(8)))
+
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
  */
@@ -913,6 +2473,7 @@ struct __sk_buff {
 	/* ... here. */
 
 	__u32 data_meta;
+	__bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
 };
 
 struct bpf_tunnel_key {
@@ -923,10 +2484,24 @@ struct bpf_tunnel_key {
 	};
 	__u8 tunnel_tos;
 	__u8 tunnel_ttl;
-	__u16 tunnel_ext;
+	__u16 tunnel_ext;	/* Padding, future use. */
 	__u32 tunnel_label;
 };
 
+/* user accessible mirror of in-kernel xfrm_state.
+ * new fields can only be added to the end of this structure
+ */
+struct bpf_xfrm_state {
+	__u32 reqid;
+	__u32 spi;	/* Stored in network byte order */
+	__u16 family;
+	__u16 ext;	/* Padding, future use. */
+	union {
+		__u32 remote_ipv4;	/* Stored in network byte order */
+		__u32 remote_ipv6[4];	/* Stored in network byte order */
+	};
+};
+
 /* Generic BPF return codes which all BPF program types may support.
  * The values are binary compatible with their TC_ACT_* counter-part to
  * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
@@ -961,6 +2536,23 @@ struct bpf_sock {
 				 */
 };
 
+struct bpf_sock_tuple {
+	union {
+		struct {
+			__be32 saddr;
+			__be32 daddr;
+			__be16 sport;
+			__be16 dport;
+		} ipv4;
+		struct {
+			__be32 saddr[4];
+			__be32 daddr[4];
+			__be16 sport;
+			__be16 dport;
+		} ipv6;
+	};
+};
+
 #define XDP_PACKET_HEADROOM 256
 
 /* User return codes for XDP prog type.
@@ -997,8 +2589,41 @@ enum sk_action {
  * be added to the end of this structure
  */
 struct sk_msg_md {
-	void *data;
-	void *data_end;
+	__bpf_md_ptr(void *, data);
+	__bpf_md_ptr(void *, data_end);
+
+	__u32 family;
+	__u32 remote_ip4;	/* Stored in network byte order */
+	__u32 local_ip4;	/* Stored in network byte order */
+	__u32 remote_ip6[4];	/* Stored in network byte order */
+	__u32 local_ip6[4];	/* Stored in network byte order */
+	__u32 remote_port;	/* Stored in network byte order */
+	__u32 local_port;	/* stored in host byte order */
+};
+
+struct sk_reuseport_md {
+	/*
+	 * Start of directly accessible data. It begins from
+	 * the tcp/udp header.
+	 */
+	__bpf_md_ptr(void *, data);
+	/* End of directly accessible data */
+	__bpf_md_ptr(void *, data_end);
+	/*
+	 * Total length of packet (starting from the tcp/udp header).
+	 * Note that the directly accessible bytes (data_end - data)
+	 * could be less than this "len".  Those bytes could be
+	 * indirectly read by a helper "bpf_skb_load_bytes()".
+	 */
+	__u32 len;
+	/*
+	 * Eth protocol in the mac header (network byte order). e.g.
+	 * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD)
+	 */
+	__u32 eth_protocol;
+	__u32 ip_protocol;	/* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
+	__u32 bind_inany;	/* Is sock bound to an INANY address? */
+	__u32 hash;		/* A hash of the packet 4 tuples */
 };
 
 #define BPF_TAG_SIZE	8
@@ -1017,9 +2642,13 @@ struct bpf_prog_info {
 	__aligned_u64 map_ids;
 	char name[BPF_OBJ_NAME_LEN];
 	__u32 ifindex;
-	__u32 :32;
+	__u32 gpl_compatible:1;
 	__u64 netns_dev;
 	__u64 netns_ino;
+	__u32 nr_jited_ksyms;
+	__u32 nr_jited_func_lens;
+	__aligned_u64 jited_ksyms;
+	__aligned_u64 jited_func_lens;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
@@ -1034,6 +2663,15 @@ struct bpf_map_info {
 	__u32 :32;
 	__u64 netns_dev;
 	__u64 netns_ino;
+	__u32 btf_id;
+	__u32 btf_key_type_id;
+	__u32 btf_value_type_id;
+} __attribute__((aligned(8)));
+
+struct bpf_btf_info {
+	__aligned_u64 btf;
+	__u32 btf_size;
+	__u32 id;
 } __attribute__((aligned(8)));
 
 /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
@@ -1054,6 +2692,12 @@ struct bpf_sock_addr {
 	__u32 family;		/* Allows 4-byte read, but no write */
 	__u32 type;		/* Allows 4-byte read, but no write */
 	__u32 protocol;		/* Allows 4-byte read, but no write */
+	__u32 msg_src_ip4;	/* Allows 1,2,4-byte read an 4-byte write.
+				 * Stored in network byte order.
+				 */
+	__u32 msg_src_ip6[4];	/* Allows 1,2,4-byte read an 4-byte write.
+				 * Stored in network byte order.
+				 */
 };
 
 /* User bpf_sock_ops struct to access socket values and specify request ops
@@ -1163,6 +2807,9 @@ enum {
 					 * Arg1: old_state
 					 * Arg2: new_state
 					 */
+	BPF_SOCK_OPS_TCP_LISTEN_CB,	/* Called on listen(2), right after
+					 * socket transition to LISTEN state.
+					 */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
@@ -1214,4 +2861,103 @@ struct bpf_raw_tracepoint_args {
 	__u64 args[0];
 };
 
+/* DIRECT:  Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT:  Do lookup from egress perspective; default is ingress
+ */
+#define BPF_FIB_LOOKUP_DIRECT  BIT(0)
+#define BPF_FIB_LOOKUP_OUTPUT  BIT(1)
+
+enum {
+	BPF_FIB_LKUP_RET_SUCCESS,      /* lookup successful */
+	BPF_FIB_LKUP_RET_BLACKHOLE,    /* dest is blackholed; can be dropped */
+	BPF_FIB_LKUP_RET_UNREACHABLE,  /* dest is unreachable; can be dropped */
+	BPF_FIB_LKUP_RET_PROHIBIT,     /* dest not allowed; can be dropped */
+	BPF_FIB_LKUP_RET_NOT_FWDED,    /* packet is not forwarded */
+	BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */
+	BPF_FIB_LKUP_RET_UNSUPP_LWT,   /* fwd requires encapsulation */
+	BPF_FIB_LKUP_RET_NO_NEIGH,     /* no neighbor entry for nh */
+	BPF_FIB_LKUP_RET_FRAG_NEEDED,  /* fragmentation required to fwd */
+};
+
+struct bpf_fib_lookup {
+	/* input:  network family for lookup (AF_INET, AF_INET6)
+	 * output: network family of egress nexthop
+	 */
+	__u8	family;
+
+	/* set if lookup is to consider L4 data - e.g., FIB rules */
+	__u8	l4_protocol;
+	__be16	sport;
+	__be16	dport;
+
+	/* total length of packet from network header - used for MTU check */
+	__u16	tot_len;
+
+	/* input: L3 device index for lookup
+	 * output: device index from FIB lookup
+	 */
+	__u32	ifindex;
+
+	union {
+		/* inputs to lookup */
+		__u8	tos;		/* AF_INET  */
+		__be32	flowinfo;	/* AF_INET6, flow_label + priority */
+
+		/* output: metric of fib result (IPv4/IPv6 only) */
+		__u32	rt_metric;
+	};
+
+	union {
+		__be32		ipv4_src;
+		__u32		ipv6_src[4];  /* in6_addr; network order */
+	};
+
+	/* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in
+	 * network header. output: bpf_fib_lookup sets to gateway address
+	 * if FIB lookup returns gateway route
+	 */
+	union {
+		__be32		ipv4_dst;
+		__u32		ipv6_dst[4];  /* in6_addr; network order */
+	};
+
+	/* output */
+	__be16	h_vlan_proto;
+	__be16	h_vlan_TCI;
+	__u8	smac[6];     /* ETH_ALEN */
+	__u8	dmac[6];     /* ETH_ALEN */
+};
+
+enum bpf_task_fd_type {
+	BPF_FD_TYPE_RAW_TRACEPOINT,	/* tp name */
+	BPF_FD_TYPE_TRACEPOINT,		/* tp name */
+	BPF_FD_TYPE_KPROBE,		/* (symbol + offset) or addr */
+	BPF_FD_TYPE_KRETPROBE,		/* (symbol + offset) or addr */
+	BPF_FD_TYPE_UPROBE,		/* filename + offset */
+	BPF_FD_TYPE_URETPROBE,		/* filename + offset */
+};
+
+struct bpf_flow_keys {
+	__u16	nhoff;
+	__u16	thoff;
+	__u16	addr_proto;			/* ETH_P_* of valid addrs */
+	__u8	is_frag;
+	__u8	is_first_frag;
+	__u8	is_encap;
+	__u8	ip_proto;
+	__be16	n_proto;
+	__be16	sport;
+	__be16	dport;
+	union {
+		struct {
+			__be32	ipv4_src;
+			__be32	ipv4_dst;
+		};
+		struct {
+			__u32	ipv6_src[4];	/* in6_addr; network order */
+			__u32	ipv6_dst[4];	/* in6_addr; network order */
+		};
+	};
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
new file mode 100644
index 000000000000..972265f32871
--- /dev/null
+++ b/tools/include/uapi/linux/btf.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2018 Facebook */
+#ifndef _UAPI__LINUX_BTF_H__
+#define _UAPI__LINUX_BTF_H__
+
+#include <linux/types.h>
+
+#define BTF_MAGIC	0xeB9F
+#define BTF_VERSION	1
+
+struct btf_header {
+	__u16	magic;
+	__u8	version;
+	__u8	flags;
+	__u32	hdr_len;
+
+	/* All offsets are in bytes relative to the end of this header */
+	__u32	type_off;	/* offset of type section	*/
+	__u32	type_len;	/* length of type section	*/
+	__u32	str_off;	/* offset of string section	*/
+	__u32	str_len;	/* length of string section	*/
+};
+
+/* Max # of type identifier */
+#define BTF_MAX_TYPE	0x0000ffff
+/* Max offset into the string section */
+#define BTF_MAX_NAME_OFFSET	0x0000ffff
+/* Max # of struct/union/enum members or func args */
+#define BTF_MAX_VLEN	0xffff
+
+struct btf_type {
+	__u32 name_off;
+	/* "info" bits arrangement
+	 * bits  0-15: vlen (e.g. # of struct's members)
+	 * bits 16-23: unused
+	 * bits 24-27: kind (e.g. int, ptr, array...etc)
+	 * bits 28-31: unused
+	 */
+	__u32 info;
+	/* "size" is used by INT, ENUM, STRUCT and UNION.
+	 * "size" tells the size of the type it is describing.
+	 *
+	 * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
+	 * "type" is a type_id referring to another type.
+	 */
+	union {
+		__u32 size;
+		__u32 type;
+	};
+};
+
+#define BTF_INFO_KIND(info)	(((info) >> 24) & 0x0f)
+#define BTF_INFO_VLEN(info)	((info) & 0xffff)
+
+#define BTF_KIND_UNKN		0	/* Unknown	*/
+#define BTF_KIND_INT		1	/* Integer	*/
+#define BTF_KIND_PTR		2	/* Pointer	*/
+#define BTF_KIND_ARRAY		3	/* Array	*/
+#define BTF_KIND_STRUCT		4	/* Struct	*/
+#define BTF_KIND_UNION		5	/* Union	*/
+#define BTF_KIND_ENUM		6	/* Enumeration	*/
+#define BTF_KIND_FWD		7	/* Forward	*/
+#define BTF_KIND_TYPEDEF	8	/* Typedef	*/
+#define BTF_KIND_VOLATILE	9	/* Volatile	*/
+#define BTF_KIND_CONST		10	/* Const	*/
+#define BTF_KIND_RESTRICT	11	/* Restrict	*/
+#define BTF_KIND_MAX		11
+#define NR_BTF_KINDS		12
+
+/* For some specific BTF_KIND, "struct btf_type" is immediately
+ * followed by extra data.
+ */
+
+/* BTF_KIND_INT is followed by a u32 and the following
+ * is the 32 bits arrangement:
+ */
+#define BTF_INT_ENCODING(VAL)	(((VAL) & 0x0f000000) >> 24)
+#define BTF_INT_OFFSET(VAL)	(((VAL  & 0x00ff0000)) >> 16)
+#define BTF_INT_BITS(VAL)	((VAL)  & 0x000000ff)
+
+/* Attributes stored in the BTF_INT_ENCODING */
+#define BTF_INT_SIGNED	(1 << 0)
+#define BTF_INT_CHAR	(1 << 1)
+#define BTF_INT_BOOL	(1 << 2)
+
+/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
+ * The exact number of btf_enum is stored in the vlen (of the
+ * info in "struct btf_type").
+ */
+struct btf_enum {
+	__u32	name_off;
+	__s32	val;
+};
+
+/* BTF_KIND_ARRAY is followed by one "struct btf_array" */
+struct btf_array {
+	__u32	type;
+	__u32	index_type;
+	__u32	nelems;
+};
+
+/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed
+ * by multiple "struct btf_member".  The exact number
+ * of btf_member is stored in the vlen (of the info in
+ * "struct btf_type").
+ */
+struct btf_member {
+	__u32	name_off;
+	__u32	type;
+	__u32	offset;	/* offset in bits */
+};
+
+#endif /* _UAPI__LINUX_BTF_H__ */
diff --git a/tools/include/uapi/linux/erspan.h b/tools/include/uapi/linux/erspan.h
new file mode 100644
index 000000000000..841573019ae1
--- /dev/null
+++ b/tools/include/uapi/linux/erspan.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * ERSPAN Tunnel Metadata
+ *
+ * Copyright (c) 2018 VMware
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * Userspace API for metadata mode ERSPAN tunnel
+ */
+#ifndef _UAPI_ERSPAN_H
+#define _UAPI_ERSPAN_H
+
+#include <linux/types.h>	/* For __beXX in userspace */
+#include <asm/byteorder.h>
+
+/* ERSPAN version 2 metadata header */
+struct erspan_md2 {
+	__be32 timestamp;
+	__be16 sgt;	/* security group tag */
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u8	hwid_upper:2,
+		ft:5,
+		p:1;
+	__u8	o:1,
+		gra:2,
+		dir:1,
+		hwid:4;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+	__u8	p:1,
+		ft:5,
+		hwid_upper:2;
+	__u8	hwid:4,
+		dir:1,
+		gra:2,
+		o:1;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+};
+
+struct erspan_metadata {
+	int version;
+	union {
+		__be32 index;		/* Version 1 (type II)*/
+		struct erspan_md2 md2;	/* Version 2 (type III) */
+	} u;
+};
+
+#endif /* _UAPI_ERSPAN_H */
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
new file mode 100644
index 000000000000..a441ea1bfe6d
--- /dev/null
+++ b/tools/include/uapi/linux/fs.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_FS_H
+#define _UAPI_LINUX_FS_H
+
+/*
+ * This file has definitions for some important file table structures
+ * and constants and structures used by various generic file system
+ * ioctl's.  Please do not make any changes in this file before
+ * sending patches for review to linux-fsdevel@vger.kernel.org and
+ * linux-api@vger.kernel.org.
+ */
+
+#include <linux/limits.h>
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
+ * the file limit at runtime and only root can increase the per-process
+ * nr_file rlimit, so it's safe to set up a ridiculously high absolute
+ * upper limit on files-per-process.
+ *
+ * Some programs (notably those using select()) may have to be 
+ * recompiled to take full advantage of the new limits..  
+ */
+
+/* Fixed constants first: */
+#undef NR_OPEN
+#define INR_OPEN_CUR 1024	/* Initial setting for nfile rlimits */
+#define INR_OPEN_MAX 4096	/* Hard limit for nfile rlimits */
+
+#define BLOCK_SIZE_BITS 10
+#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
+
+#define SEEK_SET	0	/* seek relative to beginning of file */
+#define SEEK_CUR	1	/* seek relative to current file position */
+#define SEEK_END	2	/* seek relative to end of file */
+#define SEEK_DATA	3	/* seek to the next data */
+#define SEEK_HOLE	4	/* seek to the next hole */
+#define SEEK_MAX	SEEK_HOLE
+
+#define RENAME_NOREPLACE	(1 << 0)	/* Don't overwrite target */
+#define RENAME_EXCHANGE		(1 << 1)	/* Exchange source and dest */
+#define RENAME_WHITEOUT		(1 << 2)	/* Whiteout source */
+
+struct file_clone_range {
+	__s64 src_fd;
+	__u64 src_offset;
+	__u64 src_length;
+	__u64 dest_offset;
+};
+
+struct fstrim_range {
+	__u64 start;
+	__u64 len;
+	__u64 minlen;
+};
+
+/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
+#define FILE_DEDUPE_RANGE_SAME		0
+#define FILE_DEDUPE_RANGE_DIFFERS	1
+
+/* from struct btrfs_ioctl_file_extent_same_info */
+struct file_dedupe_range_info {
+	__s64 dest_fd;		/* in - destination file */
+	__u64 dest_offset;	/* in - start of extent in destination */
+	__u64 bytes_deduped;	/* out - total # of bytes we were able
+				 * to dedupe from this file. */
+	/* status of this dedupe operation:
+	 * < 0 for error
+	 * == FILE_DEDUPE_RANGE_SAME if dedupe succeeds
+	 * == FILE_DEDUPE_RANGE_DIFFERS if data differs
+	 */
+	__s32 status;		/* out - see above description */
+	__u32 reserved;		/* must be zero */
+};
+
+/* from struct btrfs_ioctl_file_extent_same_args */
+struct file_dedupe_range {
+	__u64 src_offset;	/* in - start of extent in source */
+	__u64 src_length;	/* in - length of extent */
+	__u16 dest_count;	/* in - total elements in info array */
+	__u16 reserved1;	/* must be zero */
+	__u32 reserved2;	/* must be zero */
+	struct file_dedupe_range_info info[0];
+};
+
+/* And dynamically-tunable limits and defaults: */
+struct files_stat_struct {
+	unsigned long nr_files;		/* read only */
+	unsigned long nr_free_files;	/* read only */
+	unsigned long max_files;		/* tunable */
+};
+
+struct inodes_stat_t {
+	long nr_inodes;
+	long nr_unused;
+	long dummy[5];		/* padding for sysctl ABI compatibility */
+};
+
+
+#define NR_FILE  8192	/* this can well be larger on a larger system */
+
+
+/*
+ * These are the fs-independent mount-flags: up to 32 flags are supported
+ */
+#define MS_RDONLY	 1	/* Mount read-only */
+#define MS_NOSUID	 2	/* Ignore suid and sgid bits */
+#define MS_NODEV	 4	/* Disallow access to device special files */
+#define MS_NOEXEC	 8	/* Disallow program execution */
+#define MS_SYNCHRONOUS	16	/* Writes are synced at once */
+#define MS_REMOUNT	32	/* Alter flags of a mounted FS */
+#define MS_MANDLOCK	64	/* Allow mandatory locks on an FS */
+#define MS_DIRSYNC	128	/* Directory modifications are synchronous */
+#define MS_NOATIME	1024	/* Do not update access times. */
+#define MS_NODIRATIME	2048	/* Do not update directory access times */
+#define MS_BIND		4096
+#define MS_MOVE		8192
+#define MS_REC		16384
+#define MS_VERBOSE	32768	/* War is peace. Verbosity is silence.
+				   MS_VERBOSE is deprecated. */
+#define MS_SILENT	32768
+#define MS_POSIXACL	(1<<16)	/* VFS does not apply the umask */
+#define MS_UNBINDABLE	(1<<17)	/* change to unbindable */
+#define MS_PRIVATE	(1<<18)	/* change to private */
+#define MS_SLAVE	(1<<19)	/* change to slave */
+#define MS_SHARED	(1<<20)	/* change to shared */
+#define MS_RELATIME	(1<<21)	/* Update atime relative to mtime/ctime. */
+#define MS_KERNMOUNT	(1<<22) /* this is a kern_mount call */
+#define MS_I_VERSION	(1<<23) /* Update inode I_version field */
+#define MS_STRICTATIME	(1<<24) /* Always perform atime updates */
+#define MS_LAZYTIME	(1<<25) /* Update the on-disk [acm]times lazily */
+
+/* These sb flags are internal to the kernel */
+#define MS_SUBMOUNT     (1<<26)
+#define MS_NOREMOTELOCK	(1<<27)
+#define MS_NOSEC	(1<<28)
+#define MS_BORN		(1<<29)
+#define MS_ACTIVE	(1<<30)
+#define MS_NOUSER	(1<<31)
+
+/*
+ * Superblock flags that can be altered by MS_REMOUNT
+ */
+#define MS_RMT_MASK	(MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\
+			 MS_LAZYTIME)
+
+/*
+ * Old magic mount flag and mask
+ */
+#define MS_MGC_VAL 0xC0ED0000
+#define MS_MGC_MSK 0xffff0000
+
+/*
+ * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR.
+ */
+struct fsxattr {
+	__u32		fsx_xflags;	/* xflags field value (get/set) */
+	__u32		fsx_extsize;	/* extsize field value (get/set)*/
+	__u32		fsx_nextents;	/* nextents field value (get)	*/
+	__u32		fsx_projid;	/* project identifier (get/set) */
+	__u32		fsx_cowextsize;	/* CoW extsize field value (get/set)*/
+	unsigned char	fsx_pad[8];
+};
+
+/*
+ * Flags for the fsx_xflags field
+ */
+#define FS_XFLAG_REALTIME	0x00000001	/* data in realtime volume */
+#define FS_XFLAG_PREALLOC	0x00000002	/* preallocated file extents */
+#define FS_XFLAG_IMMUTABLE	0x00000008	/* file cannot be modified */
+#define FS_XFLAG_APPEND		0x00000010	/* all writes append */
+#define FS_XFLAG_SYNC		0x00000020	/* all writes synchronous */
+#define FS_XFLAG_NOATIME	0x00000040	/* do not update access time */
+#define FS_XFLAG_NODUMP		0x00000080	/* do not include in backups */
+#define FS_XFLAG_RTINHERIT	0x00000100	/* create with rt bit set */
+#define FS_XFLAG_PROJINHERIT	0x00000200	/* create with parents projid */
+#define FS_XFLAG_NOSYMLINKS	0x00000400	/* disallow symlink creation */
+#define FS_XFLAG_EXTSIZE	0x00000800	/* extent size allocator hint */
+#define FS_XFLAG_EXTSZINHERIT	0x00001000	/* inherit inode extent size */
+#define FS_XFLAG_NODEFRAG	0x00002000	/* do not defragment */
+#define FS_XFLAG_FILESTREAM	0x00004000	/* use filestream allocator */
+#define FS_XFLAG_DAX		0x00008000	/* use DAX for IO */
+#define FS_XFLAG_COWEXTSIZE	0x00010000	/* CoW extent size allocator hint */
+#define FS_XFLAG_HASATTR	0x80000000	/* no DIFLAG for this	*/
+
+/* the read-only stuff doesn't really belong here, but any other place is
+   probably as bad and I don't want to create yet another include file. */
+
+#define BLKROSET   _IO(0x12,93)	/* set device read-only (0 = read-write) */
+#define BLKROGET   _IO(0x12,94)	/* get read-only status (0 = read_write) */
+#define BLKRRPART  _IO(0x12,95)	/* re-read partition table */
+#define BLKGETSIZE _IO(0x12,96)	/* return device size /512 (long *arg) */
+#define BLKFLSBUF  _IO(0x12,97)	/* flush buffer cache */
+#define BLKRASET   _IO(0x12,98)	/* set read ahead for block device */
+#define BLKRAGET   _IO(0x12,99)	/* get current read ahead setting */
+#define BLKFRASET  _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */
+#define BLKFRAGET  _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */
+#define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */
+#define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */
+#define BLKSSZGET  _IO(0x12,104)/* get block device sector size */
+#if 0
+#define BLKPG      _IO(0x12,105)/* See blkpg.h */
+
+/* Some people are morons.  Do not use sizeof! */
+
+#define BLKELVGET  _IOR(0x12,106,size_t)/* elevator get */
+#define BLKELVSET  _IOW(0x12,107,size_t)/* elevator set */
+/* This was here just to show that the number is taken -
+   probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */
+#endif
+/* A jump here: 108-111 have been used for various private purposes. */
+#define BLKBSZGET  _IOR(0x12,112,size_t)
+#define BLKBSZSET  _IOW(0x12,113,size_t)
+#define BLKGETSIZE64 _IOR(0x12,114,size_t)	/* return device size in bytes (u64 *arg) */
+#define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup)
+#define BLKTRACESTART _IO(0x12,116)
+#define BLKTRACESTOP _IO(0x12,117)
+#define BLKTRACETEARDOWN _IO(0x12,118)
+#define BLKDISCARD _IO(0x12,119)
+#define BLKIOMIN _IO(0x12,120)
+#define BLKIOOPT _IO(0x12,121)
+#define BLKALIGNOFF _IO(0x12,122)
+#define BLKPBSZGET _IO(0x12,123)
+#define BLKDISCARDZEROES _IO(0x12,124)
+#define BLKSECDISCARD _IO(0x12,125)
+#define BLKROTATIONAL _IO(0x12,126)
+#define BLKZEROOUT _IO(0x12,127)
+/*
+ * A jump here: 130-131 are reserved for zoned block devices
+ * (see uapi/linux/blkzoned.h)
+ */
+
+#define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
+#define FIBMAP	   _IO(0x00,1)	/* bmap access */
+#define FIGETBSZ   _IO(0x00,2)	/* get the block size used for bmap */
+#define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
+#define FITHAW		_IOWR('X', 120, int)	/* Thaw */
+#define FITRIM		_IOWR('X', 121, struct fstrim_range)	/* Trim */
+#define FICLONE		_IOW(0x94, 9, int)
+#define FICLONERANGE	_IOW(0x94, 13, struct file_clone_range)
+#define FIDEDUPERANGE	_IOWR(0x94, 54, struct file_dedupe_range)
+
+#define FSLABEL_MAX 256	/* Max chars for the interface; each fs may differ */
+
+#define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
+#define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
+#define	FS_IOC_GETVERSION		_IOR('v', 1, long)
+#define	FS_IOC_SETVERSION		_IOW('v', 2, long)
+#define FS_IOC_FIEMAP			_IOWR('f', 11, struct fiemap)
+#define FS_IOC32_GETFLAGS		_IOR('f', 1, int)
+#define FS_IOC32_SETFLAGS		_IOW('f', 2, int)
+#define FS_IOC32_GETVERSION		_IOR('v', 1, int)
+#define FS_IOC32_SETVERSION		_IOW('v', 2, int)
+#define FS_IOC_FSGETXATTR		_IOR('X', 31, struct fsxattr)
+#define FS_IOC_FSSETXATTR		_IOW('X', 32, struct fsxattr)
+#define FS_IOC_GETFSLABEL		_IOR(0x94, 49, char[FSLABEL_MAX])
+#define FS_IOC_SETFSLABEL		_IOW(0x94, 50, char[FSLABEL_MAX])
+
+/*
+ * File system encryption support
+ */
+/* Policy provided via an ioctl on the topmost directory */
+#define FS_KEY_DESCRIPTOR_SIZE	8
+
+#define FS_POLICY_FLAGS_PAD_4		0x00
+#define FS_POLICY_FLAGS_PAD_8		0x01
+#define FS_POLICY_FLAGS_PAD_16		0x02
+#define FS_POLICY_FLAGS_PAD_32		0x03
+#define FS_POLICY_FLAGS_PAD_MASK	0x03
+#define FS_POLICY_FLAGS_VALID		0x03
+
+/* Encryption algorithms */
+#define FS_ENCRYPTION_MODE_INVALID		0
+#define FS_ENCRYPTION_MODE_AES_256_XTS		1
+#define FS_ENCRYPTION_MODE_AES_256_GCM		2
+#define FS_ENCRYPTION_MODE_AES_256_CBC		3
+#define FS_ENCRYPTION_MODE_AES_256_CTS		4
+#define FS_ENCRYPTION_MODE_AES_128_CBC		5
+#define FS_ENCRYPTION_MODE_AES_128_CTS		6
+#define FS_ENCRYPTION_MODE_SPECK128_256_XTS	7 /* Removed, do not use. */
+#define FS_ENCRYPTION_MODE_SPECK128_256_CTS	8 /* Removed, do not use. */
+
+struct fscrypt_policy {
+	__u8 version;
+	__u8 contents_encryption_mode;
+	__u8 filenames_encryption_mode;
+	__u8 flags;
+	__u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+};
+
+#define FS_IOC_SET_ENCRYPTION_POLICY	_IOR('f', 19, struct fscrypt_policy)
+#define FS_IOC_GET_ENCRYPTION_PWSALT	_IOW('f', 20, __u8[16])
+#define FS_IOC_GET_ENCRYPTION_POLICY	_IOW('f', 21, struct fscrypt_policy)
+
+/* Parameters for passing an encryption key into the kernel keyring */
+#define FS_KEY_DESC_PREFIX		"fscrypt:"
+#define FS_KEY_DESC_PREFIX_SIZE		8
+
+/* Structure that userspace passes to the kernel keyring */
+#define FS_MAX_KEY_SIZE			64
+
+struct fscrypt_key {
+	__u32 mode;
+	__u8 raw[FS_MAX_KEY_SIZE];
+	__u32 size;
+};
+
+/*
+ * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
+ *
+ * Note: for historical reasons, these flags were originally used and
+ * defined for use by ext2/ext3, and then other file systems started
+ * using these flags so they wouldn't need to write their own version
+ * of chattr/lsattr (which was shipped as part of e2fsprogs).  You
+ * should think twice before trying to use these flags in new
+ * contexts, or trying to assign these flags, since they are used both
+ * as the UAPI and the on-disk encoding for ext2/3/4.  Also, we are
+ * almost out of 32-bit flags.  :-)
+ *
+ * We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from
+ * XFS to the generic FS level interface.  This uses a structure that
+ * has padding and hence has more room to grow, so it may be more
+ * appropriate for many new use cases.
+ *
+ * Please do not change these flags or interfaces before checking with
+ * linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org.
+ */
+#define	FS_SECRM_FL			0x00000001 /* Secure deletion */
+#define	FS_UNRM_FL			0x00000002 /* Undelete */
+#define	FS_COMPR_FL			0x00000004 /* Compress file */
+#define FS_SYNC_FL			0x00000008 /* Synchronous updates */
+#define FS_IMMUTABLE_FL			0x00000010 /* Immutable file */
+#define FS_APPEND_FL			0x00000020 /* writes to file may only append */
+#define FS_NODUMP_FL			0x00000040 /* do not dump file */
+#define FS_NOATIME_FL			0x00000080 /* do not update atime */
+/* Reserved for compression usage... */
+#define FS_DIRTY_FL			0x00000100
+#define FS_COMPRBLK_FL			0x00000200 /* One or more compressed clusters */
+#define FS_NOCOMP_FL			0x00000400 /* Don't compress */
+/* End compression flags --- maybe not all used */
+#define FS_ENCRYPT_FL			0x00000800 /* Encrypted file */
+#define FS_BTREE_FL			0x00001000 /* btree format dir */
+#define FS_INDEX_FL			0x00001000 /* hash-indexed directory */
+#define FS_IMAGIC_FL			0x00002000 /* AFS directory */
+#define FS_JOURNAL_DATA_FL		0x00004000 /* Reserved for ext3 */
+#define FS_NOTAIL_FL			0x00008000 /* file tail should not be merged */
+#define FS_DIRSYNC_FL			0x00010000 /* dirsync behaviour (directories only) */
+#define FS_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
+#define FS_HUGE_FILE_FL			0x00040000 /* Reserved for ext4 */
+#define FS_EXTENT_FL			0x00080000 /* Extents */
+#define FS_EA_INODE_FL			0x00200000 /* Inode used for large EA */
+#define FS_EOFBLOCKS_FL			0x00400000 /* Reserved for ext4 */
+#define FS_NOCOW_FL			0x00800000 /* Do not cow file */
+#define FS_INLINE_DATA_FL		0x10000000 /* Reserved for ext4 */
+#define FS_PROJINHERIT_FL		0x20000000 /* Create with parents projid */
+#define FS_RESERVED_FL			0x80000000 /* reserved for ext2 lib */
+
+#define FS_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
+#define FS_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
+
+
+#define SYNC_FILE_RANGE_WAIT_BEFORE	1
+#define SYNC_FILE_RANGE_WRITE		2
+#define SYNC_FILE_RANGE_WAIT_AFTER	4
+
+/*
+ * Flags for preadv2/pwritev2:
+ */
+
+typedef int __bitwise __kernel_rwf_t;
+
+/* high priority request, poll if possible */
+#define RWF_HIPRI	((__force __kernel_rwf_t)0x00000001)
+
+/* per-IO O_DSYNC */
+#define RWF_DSYNC	((__force __kernel_rwf_t)0x00000002)
+
+/* per-IO O_SYNC */
+#define RWF_SYNC	((__force __kernel_rwf_t)0x00000004)
+
+/* per-IO, return -EAGAIN if operation would block */
+#define RWF_NOWAIT	((__force __kernel_rwf_t)0x00000008)
+
+/* per-IO O_APPEND */
+#define RWF_APPEND	((__force __kernel_rwf_t)0x00000010)
+
+/* mask of flags supported by the kernel */
+#define RWF_SUPPORTED	(RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
+			 RWF_APPEND)
+
+#endif /* _UAPI_LINUX_FS_H */
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 68699f654118..1debfa42cba1 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -161,9 +161,12 @@ enum {
 	IFLA_EVENT,
 	IFLA_NEW_NETNSID,
 	IFLA_IF_NETNSID,
+	IFLA_TARGET_NETNSID = IFLA_IF_NETNSID, /* new alias */
 	IFLA_CARRIER_UP_COUNT,
 	IFLA_CARRIER_DOWN_COUNT,
 	IFLA_NEW_IFINDEX,
+	IFLA_MIN_MTU,
+	IFLA_MAX_MTU,
 	__IFLA_MAX
 };
 
@@ -284,6 +287,7 @@ enum {
 	IFLA_BR_MCAST_STATS_ENABLED,
 	IFLA_BR_MCAST_IGMP_VERSION,
 	IFLA_BR_MCAST_MLD_VERSION,
+	IFLA_BR_VLAN_STATS_PER_PORT,
 	__IFLA_BR_MAX,
 };
 
@@ -333,6 +337,8 @@ enum {
 	IFLA_BRPORT_BCAST_FLOOD,
 	IFLA_BRPORT_GROUP_FWD_MASK,
 	IFLA_BRPORT_NEIGH_SUPPRESS,
+	IFLA_BRPORT_ISOLATED,
+	IFLA_BRPORT_BACKUP_PORT,
 	__IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
@@ -458,6 +464,16 @@ enum {
 
 #define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1)
 
+/* XFRM section */
+enum {
+	IFLA_XFRM_UNSPEC,
+	IFLA_XFRM_LINK,
+	IFLA_XFRM_IF_ID,
+	__IFLA_XFRM_MAX
+};
+
+#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1)
+
 enum macsec_validation_type {
 	MACSEC_VALIDATE_DISABLED = 0,
 	MACSEC_VALIDATE_CHECK = 1,
@@ -516,6 +532,7 @@ enum {
 	IFLA_VXLAN_COLLECT_METADATA,
 	IFLA_VXLAN_LABEL,
 	IFLA_VXLAN_GPE,
+	IFLA_VXLAN_TTL_INHERIT,
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
@@ -539,6 +556,7 @@ enum {
 	IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
 	IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
 	IFLA_GENEVE_LABEL,
+	IFLA_GENEVE_TTL_INHERIT,
 	__IFLA_GENEVE_MAX
 };
 #define IFLA_GENEVE_MAX	(__IFLA_GENEVE_MAX - 1)
@@ -918,6 +936,7 @@ enum {
 	XDP_ATTACHED_DRV,
 	XDP_ATTACHED_SKB,
 	XDP_ATTACHED_HW,
+	XDP_ATTACHED_MULTI,
 };
 
 enum {
@@ -926,6 +945,9 @@ enum {
 	IFLA_XDP_ATTACHED,
 	IFLA_XDP_FLAGS,
 	IFLA_XDP_PROG_ID,
+	IFLA_XDP_DRV_PROG_ID,
+	IFLA_XDP_SKB_PROG_ID,
+	IFLA_XDP_HW_PROG_ID,
 	__IFLA_XDP_MAX,
 };
 
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
new file mode 100644
index 000000000000..48e8a225b985
--- /dev/null
+++ b/tools/include/uapi/linux/in.h
@@ -0,0 +1,301 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Definitions of the Internet Protocol.
+ *
+ * Version:	@(#)in.h	1.0.1	04/21/93
+ *
+ * Authors:	Original taken from the GNU Project <netinet/in.h> file.
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+#ifndef _UAPI_LINUX_IN_H
+#define _UAPI_LINUX_IN_H
+
+#include <linux/types.h>
+#include <linux/libc-compat.h>
+#include <linux/socket.h>
+
+#if __UAPI_DEF_IN_IPPROTO
+/* Standard well-defined IP protocols.  */
+enum {
+  IPPROTO_IP = 0,		/* Dummy protocol for TCP		*/
+#define IPPROTO_IP		IPPROTO_IP
+  IPPROTO_ICMP = 1,		/* Internet Control Message Protocol	*/
+#define IPPROTO_ICMP		IPPROTO_ICMP
+  IPPROTO_IGMP = 2,		/* Internet Group Management Protocol	*/
+#define IPPROTO_IGMP		IPPROTO_IGMP
+  IPPROTO_IPIP = 4,		/* IPIP tunnels (older KA9Q tunnels use 94) */
+#define IPPROTO_IPIP		IPPROTO_IPIP
+  IPPROTO_TCP = 6,		/* Transmission Control Protocol	*/
+#define IPPROTO_TCP		IPPROTO_TCP
+  IPPROTO_EGP = 8,		/* Exterior Gateway Protocol		*/
+#define IPPROTO_EGP		IPPROTO_EGP
+  IPPROTO_PUP = 12,		/* PUP protocol				*/
+#define IPPROTO_PUP		IPPROTO_PUP
+  IPPROTO_UDP = 17,		/* User Datagram Protocol		*/
+#define IPPROTO_UDP		IPPROTO_UDP
+  IPPROTO_IDP = 22,		/* XNS IDP protocol			*/
+#define IPPROTO_IDP		IPPROTO_IDP
+  IPPROTO_TP = 29,		/* SO Transport Protocol Class 4	*/
+#define IPPROTO_TP		IPPROTO_TP
+  IPPROTO_DCCP = 33,		/* Datagram Congestion Control Protocol */
+#define IPPROTO_DCCP		IPPROTO_DCCP
+  IPPROTO_IPV6 = 41,		/* IPv6-in-IPv4 tunnelling		*/
+#define IPPROTO_IPV6		IPPROTO_IPV6
+  IPPROTO_RSVP = 46,		/* RSVP Protocol			*/
+#define IPPROTO_RSVP		IPPROTO_RSVP
+  IPPROTO_GRE = 47,		/* Cisco GRE tunnels (rfc 1701,1702)	*/
+#define IPPROTO_GRE		IPPROTO_GRE
+  IPPROTO_ESP = 50,		/* Encapsulation Security Payload protocol */
+#define IPPROTO_ESP		IPPROTO_ESP
+  IPPROTO_AH = 51,		/* Authentication Header protocol	*/
+#define IPPROTO_AH		IPPROTO_AH
+  IPPROTO_MTP = 92,		/* Multicast Transport Protocol		*/
+#define IPPROTO_MTP		IPPROTO_MTP
+  IPPROTO_BEETPH = 94,		/* IP option pseudo header for BEET	*/
+#define IPPROTO_BEETPH		IPPROTO_BEETPH
+  IPPROTO_ENCAP = 98,		/* Encapsulation Header			*/
+#define IPPROTO_ENCAP		IPPROTO_ENCAP
+  IPPROTO_PIM = 103,		/* Protocol Independent Multicast	*/
+#define IPPROTO_PIM		IPPROTO_PIM
+  IPPROTO_COMP = 108,		/* Compression Header Protocol		*/
+#define IPPROTO_COMP		IPPROTO_COMP
+  IPPROTO_SCTP = 132,		/* Stream Control Transport Protocol	*/
+#define IPPROTO_SCTP		IPPROTO_SCTP
+  IPPROTO_UDPLITE = 136,	/* UDP-Lite (RFC 3828)			*/
+#define IPPROTO_UDPLITE		IPPROTO_UDPLITE
+  IPPROTO_MPLS = 137,		/* MPLS in IP (RFC 4023)		*/
+#define IPPROTO_MPLS		IPPROTO_MPLS
+  IPPROTO_RAW = 255,		/* Raw IP packets			*/
+#define IPPROTO_RAW		IPPROTO_RAW
+  IPPROTO_MAX
+};
+#endif
+
+#if __UAPI_DEF_IN_ADDR
+/* Internet address. */
+struct in_addr {
+	__be32	s_addr;
+};
+#endif
+
+#define IP_TOS		1
+#define IP_TTL		2
+#define IP_HDRINCL	3
+#define IP_OPTIONS	4
+#define IP_ROUTER_ALERT	5
+#define IP_RECVOPTS	6
+#define IP_RETOPTS	7
+#define IP_PKTINFO	8
+#define IP_PKTOPTIONS	9
+#define IP_MTU_DISCOVER	10
+#define IP_RECVERR	11
+#define IP_RECVTTL	12
+#define	IP_RECVTOS	13
+#define IP_MTU		14
+#define IP_FREEBIND	15
+#define IP_IPSEC_POLICY	16
+#define IP_XFRM_POLICY	17
+#define IP_PASSSEC	18
+#define IP_TRANSPARENT	19
+
+/* BSD compatibility */
+#define IP_RECVRETOPTS	IP_RETOPTS
+
+/* TProxy original addresses */
+#define IP_ORIGDSTADDR       20
+#define IP_RECVORIGDSTADDR   IP_ORIGDSTADDR
+
+#define IP_MINTTL       21
+#define IP_NODEFRAG     22
+#define IP_CHECKSUM	23
+#define IP_BIND_ADDRESS_NO_PORT	24
+#define IP_RECVFRAGSIZE	25
+
+/* IP_MTU_DISCOVER values */
+#define IP_PMTUDISC_DONT		0	/* Never send DF frames */
+#define IP_PMTUDISC_WANT		1	/* Use per route hints	*/
+#define IP_PMTUDISC_DO			2	/* Always DF		*/
+#define IP_PMTUDISC_PROBE		3       /* Ignore dst pmtu      */
+/* Always use interface mtu (ignores dst pmtu) but don't set DF flag.
+ * Also incoming ICMP frag_needed notifications will be ignored on
+ * this socket to prevent accepting spoofed ones.
+ */
+#define IP_PMTUDISC_INTERFACE		4
+/* weaker version of IP_PMTUDISC_INTERFACE, which allos packets to get
+ * fragmented if they exeed the interface mtu
+ */
+#define IP_PMTUDISC_OMIT		5
+
+#define IP_MULTICAST_IF			32
+#define IP_MULTICAST_TTL 		33
+#define IP_MULTICAST_LOOP 		34
+#define IP_ADD_MEMBERSHIP		35
+#define IP_DROP_MEMBERSHIP		36
+#define IP_UNBLOCK_SOURCE		37
+#define IP_BLOCK_SOURCE			38
+#define IP_ADD_SOURCE_MEMBERSHIP	39
+#define IP_DROP_SOURCE_MEMBERSHIP	40
+#define IP_MSFILTER			41
+#define MCAST_JOIN_GROUP		42
+#define MCAST_BLOCK_SOURCE		43
+#define MCAST_UNBLOCK_SOURCE		44
+#define MCAST_LEAVE_GROUP		45
+#define MCAST_JOIN_SOURCE_GROUP		46
+#define MCAST_LEAVE_SOURCE_GROUP	47
+#define MCAST_MSFILTER			48
+#define IP_MULTICAST_ALL		49
+#define IP_UNICAST_IF			50
+
+#define MCAST_EXCLUDE	0
+#define MCAST_INCLUDE	1
+
+/* These need to appear somewhere around here */
+#define IP_DEFAULT_MULTICAST_TTL        1
+#define IP_DEFAULT_MULTICAST_LOOP       1
+
+/* Request struct for multicast socket ops */
+
+#if __UAPI_DEF_IP_MREQ
+struct ip_mreq  {
+	struct in_addr imr_multiaddr;	/* IP multicast address of group */
+	struct in_addr imr_interface;	/* local IP address of interface */
+};
+
+struct ip_mreqn {
+	struct in_addr	imr_multiaddr;		/* IP multicast address of group */
+	struct in_addr	imr_address;		/* local IP address of interface */
+	int		imr_ifindex;		/* Interface index */
+};
+
+struct ip_mreq_source {
+	__be32		imr_multiaddr;
+	__be32		imr_interface;
+	__be32		imr_sourceaddr;
+};
+
+struct ip_msfilter {
+	__be32		imsf_multiaddr;
+	__be32		imsf_interface;
+	__u32		imsf_fmode;
+	__u32		imsf_numsrc;
+	__be32		imsf_slist[1];
+};
+
+#define IP_MSFILTER_SIZE(numsrc) \
+	(sizeof(struct ip_msfilter) - sizeof(__u32) \
+	+ (numsrc) * sizeof(__u32))
+
+struct group_req {
+	__u32				 gr_interface;	/* interface index */
+	struct __kernel_sockaddr_storage gr_group;	/* group address */
+};
+
+struct group_source_req {
+	__u32				 gsr_interface;	/* interface index */
+	struct __kernel_sockaddr_storage gsr_group;	/* group address */
+	struct __kernel_sockaddr_storage gsr_source;	/* source address */
+};
+
+struct group_filter {
+	__u32				 gf_interface;	/* interface index */
+	struct __kernel_sockaddr_storage gf_group;	/* multicast address */
+	__u32				 gf_fmode;	/* filter mode */
+	__u32				 gf_numsrc;	/* number of sources */
+	struct __kernel_sockaddr_storage gf_slist[1];	/* interface index */
+};
+
+#define GROUP_FILTER_SIZE(numsrc) \
+	(sizeof(struct group_filter) - sizeof(struct __kernel_sockaddr_storage) \
+	+ (numsrc) * sizeof(struct __kernel_sockaddr_storage))
+#endif
+
+#if __UAPI_DEF_IN_PKTINFO
+struct in_pktinfo {
+	int		ipi_ifindex;
+	struct in_addr	ipi_spec_dst;
+	struct in_addr	ipi_addr;
+};
+#endif
+
+/* Structure describing an Internet (IP) socket address. */
+#if  __UAPI_DEF_SOCKADDR_IN
+#define __SOCK_SIZE__	16		/* sizeof(struct sockaddr)	*/
+struct sockaddr_in {
+  __kernel_sa_family_t	sin_family;	/* Address family		*/
+  __be16		sin_port;	/* Port number			*/
+  struct in_addr	sin_addr;	/* Internet address		*/
+
+  /* Pad to size of `struct sockaddr'. */
+  unsigned char		__pad[__SOCK_SIZE__ - sizeof(short int) -
+			sizeof(unsigned short int) - sizeof(struct in_addr)];
+};
+#define sin_zero	__pad		/* for BSD UNIX comp. -FvK	*/
+#endif
+
+#if __UAPI_DEF_IN_CLASS
+/*
+ * Definitions of the bits in an Internet address integer.
+ * On subnets, host and network parts are found according
+ * to the subnet mask, not these masks.
+ */
+#define	IN_CLASSA(a)		((((long int) (a)) & 0x80000000) == 0)
+#define	IN_CLASSA_NET		0xff000000
+#define	IN_CLASSA_NSHIFT	24
+#define	IN_CLASSA_HOST		(0xffffffff & ~IN_CLASSA_NET)
+#define	IN_CLASSA_MAX		128
+
+#define	IN_CLASSB(a)		((((long int) (a)) & 0xc0000000) == 0x80000000)
+#define	IN_CLASSB_NET		0xffff0000
+#define	IN_CLASSB_NSHIFT	16
+#define	IN_CLASSB_HOST		(0xffffffff & ~IN_CLASSB_NET)
+#define	IN_CLASSB_MAX		65536
+
+#define	IN_CLASSC(a)		((((long int) (a)) & 0xe0000000) == 0xc0000000)
+#define	IN_CLASSC_NET		0xffffff00
+#define	IN_CLASSC_NSHIFT	8
+#define	IN_CLASSC_HOST		(0xffffffff & ~IN_CLASSC_NET)
+
+#define	IN_CLASSD(a)		((((long int) (a)) & 0xf0000000) == 0xe0000000)
+#define	IN_MULTICAST(a)		IN_CLASSD(a)
+#define IN_MULTICAST_NET	0xF0000000
+
+#define	IN_EXPERIMENTAL(a)	((((long int) (a)) & 0xf0000000) == 0xf0000000)
+#define	IN_BADCLASS(a)		IN_EXPERIMENTAL((a))
+
+/* Address to accept any incoming messages. */
+#define	INADDR_ANY		((unsigned long int) 0x00000000)
+
+/* Address to send to all hosts. */
+#define	INADDR_BROADCAST	((unsigned long int) 0xffffffff)
+
+/* Address indicating an error return. */
+#define	INADDR_NONE		((unsigned long int) 0xffffffff)
+
+/* Network number for local host loopback. */
+#define	IN_LOOPBACKNET		127
+
+/* Address to loopback in software to local host.  */
+#define	INADDR_LOOPBACK		0x7f000001	/* 127.0.0.1   */
+#define	IN_LOOPBACK(a)		((((long int) (a)) & 0xff000000) == 0x7f000000)
+
+/* Defines for Multicast INADDR */
+#define INADDR_UNSPEC_GROUP   	0xe0000000U	/* 224.0.0.0   */
+#define INADDR_ALLHOSTS_GROUP 	0xe0000001U	/* 224.0.0.1   */
+#define INADDR_ALLRTRS_GROUP    0xe0000002U	/* 224.0.0.2 */
+#define INADDR_MAX_LOCAL_GROUP  0xe00000ffU	/* 224.0.0.255 */
+#endif
+
+/* <asm/byteorder.h> contains the htonl type stuff.. */
+#include <asm/byteorder.h> 
+
+
+#endif /* _UAPI_LINUX_IN_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index b02c41e53d56..2b7a652c9fa4 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -420,13 +420,19 @@ struct kvm_run {
 struct kvm_coalesced_mmio_zone {
 	__u64 addr;
 	__u32 size;
-	__u32 pad;
+	union {
+		__u32 pad;
+		__u32 pio;
+	};
 };
 
 struct kvm_coalesced_mmio {
 	__u64 phys_addr;
 	__u32 len;
-	__u32 pad;
+	union {
+		__u32 pad;
+		__u32 pio;
+	};
 	__u8  data[8];
 };
 
@@ -677,10 +683,10 @@ struct kvm_ioeventfd {
 };
 
 #define KVM_X86_DISABLE_EXITS_MWAIT          (1 << 0)
-#define KVM_X86_DISABLE_EXITS_HTL            (1 << 1)
+#define KVM_X86_DISABLE_EXITS_HLT            (1 << 1)
 #define KVM_X86_DISABLE_EXITS_PAUSE          (1 << 2)
 #define KVM_X86_DISABLE_VALID_EXITS          (KVM_X86_DISABLE_EXITS_MWAIT | \
-                                              KVM_X86_DISABLE_EXITS_HTL | \
+                                              KVM_X86_DISABLE_EXITS_HLT | \
                                               KVM_X86_DISABLE_EXITS_PAUSE)
 
 /* for KVM_ENABLE_CAP */
@@ -719,6 +725,7 @@ struct kvm_ppc_one_seg_page_size {
 
 #define KVM_PPC_PAGE_SIZES_REAL		0x00000001
 #define KVM_PPC_1T_SEGMENTS		0x00000002
+#define KVM_PPC_NO_HASH			0x00000004
 
 struct kvm_ppc_smmu_info {
 	__u64 flags;
@@ -751,6 +758,15 @@ struct kvm_ppc_resize_hpt {
 #define KVM_S390_SIE_PAGE_OFFSET 1
 
 /*
+ * On arm64, machine type can be used to request the physical
+ * address size for the VM. Bits[7-0] are reserved for the guest
+ * PA size shift (i.e, log2(PA_Size)). For backward compatibility,
+ * value 0 implies the default IPA size, 40bits.
+ */
+#define KVM_VM_TYPE_ARM_IPA_SIZE_MASK	0xffULL
+#define KVM_VM_TYPE_ARM_IPA_SIZE(x)		\
+	((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
+/*
  * ioctls for /dev/kvm fds:
  */
 #define KVM_GET_API_VERSION       _IO(KVMIO,   0x00)
@@ -948,6 +964,17 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_BPB 152
 #define KVM_CAP_GET_MSR_FEATURES 153
 #define KVM_CAP_HYPERV_EVENTFD 154
+#define KVM_CAP_HYPERV_TLBFLUSH 155
+#define KVM_CAP_S390_HPAGE_1M 156
+#define KVM_CAP_NESTED_STATE 157
+#define KVM_CAP_ARM_INJECT_SERROR_ESR 158
+#define KVM_CAP_MSR_PLATFORM_INFO 159
+#define KVM_CAP_PPC_NESTED_HV 160
+#define KVM_CAP_HYPERV_SEND_IPI 161
+#define KVM_CAP_COALESCED_PIO 162
+#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163
+#define KVM_CAP_EXCEPTION_PAYLOAD 164
+#define KVM_CAP_ARM_VM_IPA_SIZE 165
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1390,6 +1417,9 @@ struct kvm_enc_region {
 /* Available with KVM_CAP_HYPERV_EVENTFD */
 #define KVM_HYPERV_EVENTFD        _IOW(KVMIO,  0xbd, struct kvm_hyperv_eventfd)
 
+/* Available with KVM_CAP_NESTED_STATE */
+#define KVM_GET_NESTED_STATE         _IOWR(KVMIO, 0xbe, struct kvm_nested_state)
+#define KVM_SET_NESTED_STATE         _IOW(KVMIO,  0xbf, struct kvm_nested_state)
 
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
diff --git a/tools/include/uapi/linux/lirc.h b/tools/include/uapi/linux/lirc.h
new file mode 100644
index 000000000000..f189931042a7
--- /dev/null
+++ b/tools/include/uapi/linux/lirc.h
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * lirc.h - linux infrared remote control header file
+ * last modified 2010/07/13 by Jarod Wilson
+ */
+
+#ifndef _LINUX_LIRC_H
+#define _LINUX_LIRC_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define PULSE_BIT       0x01000000
+#define PULSE_MASK      0x00FFFFFF
+
+#define LIRC_MODE2_SPACE     0x00000000
+#define LIRC_MODE2_PULSE     0x01000000
+#define LIRC_MODE2_FREQUENCY 0x02000000
+#define LIRC_MODE2_TIMEOUT   0x03000000
+
+#define LIRC_VALUE_MASK      0x00FFFFFF
+#define LIRC_MODE2_MASK      0xFF000000
+
+#define LIRC_SPACE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_SPACE)
+#define LIRC_PULSE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_PULSE)
+#define LIRC_FREQUENCY(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY)
+#define LIRC_TIMEOUT(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT)
+
+#define LIRC_VALUE(val) ((val)&LIRC_VALUE_MASK)
+#define LIRC_MODE2(val) ((val)&LIRC_MODE2_MASK)
+
+#define LIRC_IS_SPACE(val) (LIRC_MODE2(val) == LIRC_MODE2_SPACE)
+#define LIRC_IS_PULSE(val) (LIRC_MODE2(val) == LIRC_MODE2_PULSE)
+#define LIRC_IS_FREQUENCY(val) (LIRC_MODE2(val) == LIRC_MODE2_FREQUENCY)
+#define LIRC_IS_TIMEOUT(val) (LIRC_MODE2(val) == LIRC_MODE2_TIMEOUT)
+
+/* used heavily by lirc userspace */
+#define lirc_t int
+
+/*** lirc compatible hardware features ***/
+
+#define LIRC_MODE2SEND(x) (x)
+#define LIRC_SEND2MODE(x) (x)
+#define LIRC_MODE2REC(x) ((x) << 16)
+#define LIRC_REC2MODE(x) ((x) >> 16)
+
+#define LIRC_MODE_RAW                  0x00000001
+#define LIRC_MODE_PULSE                0x00000002
+#define LIRC_MODE_MODE2                0x00000004
+#define LIRC_MODE_SCANCODE             0x00000008
+#define LIRC_MODE_LIRCCODE             0x00000010
+
+
+#define LIRC_CAN_SEND_RAW              LIRC_MODE2SEND(LIRC_MODE_RAW)
+#define LIRC_CAN_SEND_PULSE            LIRC_MODE2SEND(LIRC_MODE_PULSE)
+#define LIRC_CAN_SEND_MODE2            LIRC_MODE2SEND(LIRC_MODE_MODE2)
+#define LIRC_CAN_SEND_LIRCCODE         LIRC_MODE2SEND(LIRC_MODE_LIRCCODE)
+
+#define LIRC_CAN_SEND_MASK             0x0000003f
+
+#define LIRC_CAN_SET_SEND_CARRIER      0x00000100
+#define LIRC_CAN_SET_SEND_DUTY_CYCLE   0x00000200
+#define LIRC_CAN_SET_TRANSMITTER_MASK  0x00000400
+
+#define LIRC_CAN_REC_RAW               LIRC_MODE2REC(LIRC_MODE_RAW)
+#define LIRC_CAN_REC_PULSE             LIRC_MODE2REC(LIRC_MODE_PULSE)
+#define LIRC_CAN_REC_MODE2             LIRC_MODE2REC(LIRC_MODE_MODE2)
+#define LIRC_CAN_REC_SCANCODE          LIRC_MODE2REC(LIRC_MODE_SCANCODE)
+#define LIRC_CAN_REC_LIRCCODE          LIRC_MODE2REC(LIRC_MODE_LIRCCODE)
+
+#define LIRC_CAN_REC_MASK              LIRC_MODE2REC(LIRC_CAN_SEND_MASK)
+
+#define LIRC_CAN_SET_REC_CARRIER       (LIRC_CAN_SET_SEND_CARRIER << 16)
+#define LIRC_CAN_SET_REC_DUTY_CYCLE    (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16)
+
+#define LIRC_CAN_SET_REC_DUTY_CYCLE_RANGE 0x40000000
+#define LIRC_CAN_SET_REC_CARRIER_RANGE    0x80000000
+#define LIRC_CAN_GET_REC_RESOLUTION       0x20000000
+#define LIRC_CAN_SET_REC_TIMEOUT          0x10000000
+#define LIRC_CAN_SET_REC_FILTER           0x08000000
+
+#define LIRC_CAN_MEASURE_CARRIER          0x02000000
+#define LIRC_CAN_USE_WIDEBAND_RECEIVER    0x04000000
+
+#define LIRC_CAN_SEND(x) ((x)&LIRC_CAN_SEND_MASK)
+#define LIRC_CAN_REC(x) ((x)&LIRC_CAN_REC_MASK)
+
+#define LIRC_CAN_NOTIFY_DECODE            0x01000000
+
+/*** IOCTL commands for lirc driver ***/
+
+#define LIRC_GET_FEATURES              _IOR('i', 0x00000000, __u32)
+
+#define LIRC_GET_SEND_MODE             _IOR('i', 0x00000001, __u32)
+#define LIRC_GET_REC_MODE              _IOR('i', 0x00000002, __u32)
+#define LIRC_GET_REC_RESOLUTION        _IOR('i', 0x00000007, __u32)
+
+#define LIRC_GET_MIN_TIMEOUT           _IOR('i', 0x00000008, __u32)
+#define LIRC_GET_MAX_TIMEOUT           _IOR('i', 0x00000009, __u32)
+
+/* code length in bits, currently only for LIRC_MODE_LIRCCODE */
+#define LIRC_GET_LENGTH                _IOR('i', 0x0000000f, __u32)
+
+#define LIRC_SET_SEND_MODE             _IOW('i', 0x00000011, __u32)
+#define LIRC_SET_REC_MODE              _IOW('i', 0x00000012, __u32)
+/* Note: these can reset the according pulse_width */
+#define LIRC_SET_SEND_CARRIER          _IOW('i', 0x00000013, __u32)
+#define LIRC_SET_REC_CARRIER           _IOW('i', 0x00000014, __u32)
+#define LIRC_SET_SEND_DUTY_CYCLE       _IOW('i', 0x00000015, __u32)
+#define LIRC_SET_TRANSMITTER_MASK      _IOW('i', 0x00000017, __u32)
+
+/*
+ * when a timeout != 0 is set the driver will send a
+ * LIRC_MODE2_TIMEOUT data packet, otherwise LIRC_MODE2_TIMEOUT is
+ * never sent, timeout is disabled by default
+ */
+#define LIRC_SET_REC_TIMEOUT           _IOW('i', 0x00000018, __u32)
+
+/* 1 enables, 0 disables timeout reports in MODE2 */
+#define LIRC_SET_REC_TIMEOUT_REPORTS   _IOW('i', 0x00000019, __u32)
+
+/*
+ * if enabled from the next key press on the driver will send
+ * LIRC_MODE2_FREQUENCY packets
+ */
+#define LIRC_SET_MEASURE_CARRIER_MODE	_IOW('i', 0x0000001d, __u32)
+
+/*
+ * to set a range use LIRC_SET_REC_CARRIER_RANGE with the
+ * lower bound first and later LIRC_SET_REC_CARRIER with the upper bound
+ */
+#define LIRC_SET_REC_CARRIER_RANGE     _IOW('i', 0x0000001f, __u32)
+
+#define LIRC_SET_WIDEBAND_RECEIVER     _IOW('i', 0x00000023, __u32)
+
+/*
+ * struct lirc_scancode - decoded scancode with protocol for use with
+ *	LIRC_MODE_SCANCODE
+ *
+ * @timestamp: Timestamp in nanoseconds using CLOCK_MONOTONIC when IR
+ *	was decoded.
+ * @flags: should be 0 for transmit. When receiving scancodes,
+ *	LIRC_SCANCODE_FLAG_TOGGLE or LIRC_SCANCODE_FLAG_REPEAT can be set
+ *	depending on the protocol
+ * @rc_proto: see enum rc_proto
+ * @keycode: the translated keycode. Set to 0 for transmit.
+ * @scancode: the scancode received or to be sent
+ */
+struct lirc_scancode {
+	__u64	timestamp;
+	__u16	flags;
+	__u16	rc_proto;
+	__u32	keycode;
+	__u64	scancode;
+};
+
+/* Set if the toggle bit of rc-5 or rc-6 is enabled */
+#define LIRC_SCANCODE_FLAG_TOGGLE	1
+/* Set if this is a nec or sanyo repeat */
+#define LIRC_SCANCODE_FLAG_REPEAT	2
+
+/**
+ * enum rc_proto - the Remote Controller protocol
+ *
+ * @RC_PROTO_UNKNOWN: Protocol not known
+ * @RC_PROTO_OTHER: Protocol known but proprietary
+ * @RC_PROTO_RC5: Philips RC5 protocol
+ * @RC_PROTO_RC5X_20: Philips RC5x 20 bit protocol
+ * @RC_PROTO_RC5_SZ: StreamZap variant of RC5
+ * @RC_PROTO_JVC: JVC protocol
+ * @RC_PROTO_SONY12: Sony 12 bit protocol
+ * @RC_PROTO_SONY15: Sony 15 bit protocol
+ * @RC_PROTO_SONY20: Sony 20 bit protocol
+ * @RC_PROTO_NEC: NEC protocol
+ * @RC_PROTO_NECX: Extended NEC protocol
+ * @RC_PROTO_NEC32: NEC 32 bit protocol
+ * @RC_PROTO_SANYO: Sanyo protocol
+ * @RC_PROTO_MCIR2_KBD: RC6-ish MCE keyboard
+ * @RC_PROTO_MCIR2_MSE: RC6-ish MCE mouse
+ * @RC_PROTO_RC6_0: Philips RC6-0-16 protocol
+ * @RC_PROTO_RC6_6A_20: Philips RC6-6A-20 protocol
+ * @RC_PROTO_RC6_6A_24: Philips RC6-6A-24 protocol
+ * @RC_PROTO_RC6_6A_32: Philips RC6-6A-32 protocol
+ * @RC_PROTO_RC6_MCE: MCE (Philips RC6-6A-32 subtype) protocol
+ * @RC_PROTO_SHARP: Sharp protocol
+ * @RC_PROTO_XMP: XMP protocol
+ * @RC_PROTO_CEC: CEC protocol
+ * @RC_PROTO_IMON: iMon Pad protocol
+ */
+enum rc_proto {
+	RC_PROTO_UNKNOWN	= 0,
+	RC_PROTO_OTHER		= 1,
+	RC_PROTO_RC5		= 2,
+	RC_PROTO_RC5X_20	= 3,
+	RC_PROTO_RC5_SZ		= 4,
+	RC_PROTO_JVC		= 5,
+	RC_PROTO_SONY12		= 6,
+	RC_PROTO_SONY15		= 7,
+	RC_PROTO_SONY20		= 8,
+	RC_PROTO_NEC		= 9,
+	RC_PROTO_NECX		= 10,
+	RC_PROTO_NEC32		= 11,
+	RC_PROTO_SANYO		= 12,
+	RC_PROTO_MCIR2_KBD	= 13,
+	RC_PROTO_MCIR2_MSE	= 14,
+	RC_PROTO_RC6_0		= 15,
+	RC_PROTO_RC6_6A_20	= 16,
+	RC_PROTO_RC6_6A_24	= 17,
+	RC_PROTO_RC6_6A_32	= 18,
+	RC_PROTO_RC6_MCE	= 19,
+	RC_PROTO_SHARP		= 20,
+	RC_PROTO_XMP		= 21,
+	RC_PROTO_CEC		= 22,
+	RC_PROTO_IMON		= 23,
+};
+
+#endif
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
index bfd5938fede6..d0f515d53299 100644
--- a/tools/include/uapi/linux/mman.h
+++ b/tools/include/uapi/linux/mman.h
@@ -28,7 +28,9 @@
 #define MAP_HUGE_2MB	HUGETLB_FLAG_ENCODE_2MB
 #define MAP_HUGE_8MB	HUGETLB_FLAG_ENCODE_8MB
 #define MAP_HUGE_16MB	HUGETLB_FLAG_ENCODE_16MB
+#define MAP_HUGE_32MB	HUGETLB_FLAG_ENCODE_32MB
 #define MAP_HUGE_256MB	HUGETLB_FLAG_ENCODE_256MB
+#define MAP_HUGE_512MB	HUGETLB_FLAG_ENCODE_512MB
 #define MAP_HUGE_1GB	HUGETLB_FLAG_ENCODE_1GB
 #define MAP_HUGE_2GB	HUGETLB_FLAG_ENCODE_2GB
 #define MAP_HUGE_16GB	HUGETLB_FLAG_ENCODE_16GB
diff --git a/tools/include/uapi/linux/netlink.h b/tools/include/uapi/linux/netlink.h
index 776bc92e9118..0a4d73317759 100644
--- a/tools/include/uapi/linux/netlink.h
+++ b/tools/include/uapi/linux/netlink.h
@@ -155,6 +155,7 @@ enum nlmsgerr_attrs {
 #define NETLINK_LIST_MEMBERSHIPS	9
 #define NETLINK_CAP_ACK			10
 #define NETLINK_EXT_ACK			11
+#define NETLINK_GET_STRICT_CHK		12
 
 struct nl_pktinfo {
 	__u32	group;
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index b8e288a1f740..9de8780ac8d9 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -143,6 +143,8 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_PHYS_ADDR			= 1U << 19,
 
 	PERF_SAMPLE_MAX = 1U << 20,		/* non-ABI */
+
+	__PERF_SAMPLE_CALLCHAIN_EARLY		= 1ULL << 63, /* non-ABI; internal use */
 };
 
 /*
@@ -644,10 +646,12 @@ struct perf_event_mmap_page {
  *
  *   PERF_RECORD_MISC_MMAP_DATA  - PERF_RECORD_MMAP* events
  *   PERF_RECORD_MISC_COMM_EXEC  - PERF_RECORD_COMM event
+ *   PERF_RECORD_MISC_FORK_EXEC  - PERF_RECORD_FORK event (perf internal)
  *   PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events
  */
 #define PERF_RECORD_MISC_MMAP_DATA		(1 << 13)
 #define PERF_RECORD_MISC_COMM_EXEC		(1 << 13)
+#define PERF_RECORD_MISC_FORK_EXEC		(1 << 13)
 #define PERF_RECORD_MISC_SWITCH_OUT		(1 << 13)
 /*
  * These PERF_RECORD_MISC_* flags below are safely reused
diff --git a/tools/include/uapi/linux/pkt_cls.h b/tools/include/uapi/linux/pkt_cls.h
new file mode 100644
index 000000000000..401d0c1e612d
--- /dev/null
+++ b/tools/include/uapi/linux/pkt_cls.h
@@ -0,0 +1,612 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_PKT_CLS_H
+#define __LINUX_PKT_CLS_H
+
+#include <linux/types.h>
+#include <linux/pkt_sched.h>
+
+#define TC_COOKIE_MAX_SIZE 16
+
+/* Action attributes */
+enum {
+	TCA_ACT_UNSPEC,
+	TCA_ACT_KIND,
+	TCA_ACT_OPTIONS,
+	TCA_ACT_INDEX,
+	TCA_ACT_STATS,
+	TCA_ACT_PAD,
+	TCA_ACT_COOKIE,
+	__TCA_ACT_MAX
+};
+
+#define TCA_ACT_MAX __TCA_ACT_MAX
+#define TCA_OLD_COMPAT (TCA_ACT_MAX+1)
+#define TCA_ACT_MAX_PRIO 32
+#define TCA_ACT_BIND	1
+#define TCA_ACT_NOBIND	0
+#define TCA_ACT_UNBIND	1
+#define TCA_ACT_NOUNBIND	0
+#define TCA_ACT_REPLACE		1
+#define TCA_ACT_NOREPLACE	0
+
+#define TC_ACT_UNSPEC	(-1)
+#define TC_ACT_OK		0
+#define TC_ACT_RECLASSIFY	1
+#define TC_ACT_SHOT		2
+#define TC_ACT_PIPE		3
+#define TC_ACT_STOLEN		4
+#define TC_ACT_QUEUED		5
+#define TC_ACT_REPEAT		6
+#define TC_ACT_REDIRECT		7
+#define TC_ACT_TRAP		8 /* For hw path, this means "trap to cpu"
+				   * and don't further process the frame
+				   * in hardware. For sw path, this is
+				   * equivalent of TC_ACT_STOLEN - drop
+				   * the skb and act like everything
+				   * is alright.
+				   */
+#define TC_ACT_VALUE_MAX	TC_ACT_TRAP
+
+/* There is a special kind of actions called "extended actions",
+ * which need a value parameter. These have a local opcode located in
+ * the highest nibble, starting from 1. The rest of the bits
+ * are used to carry the value. These two parts together make
+ * a combined opcode.
+ */
+#define __TC_ACT_EXT_SHIFT 28
+#define __TC_ACT_EXT(local) ((local) << __TC_ACT_EXT_SHIFT)
+#define TC_ACT_EXT_VAL_MASK ((1 << __TC_ACT_EXT_SHIFT) - 1)
+#define TC_ACT_EXT_OPCODE(combined) ((combined) & (~TC_ACT_EXT_VAL_MASK))
+#define TC_ACT_EXT_CMP(combined, opcode) (TC_ACT_EXT_OPCODE(combined) == opcode)
+
+#define TC_ACT_JUMP __TC_ACT_EXT(1)
+#define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2)
+#define TC_ACT_EXT_OPCODE_MAX	TC_ACT_GOTO_CHAIN
+
+/* Action type identifiers*/
+enum {
+	TCA_ID_UNSPEC=0,
+	TCA_ID_POLICE=1,
+	/* other actions go here */
+	__TCA_ID_MAX=255
+};
+
+#define TCA_ID_MAX __TCA_ID_MAX
+
+struct tc_police {
+	__u32			index;
+	int			action;
+#define TC_POLICE_UNSPEC	TC_ACT_UNSPEC
+#define TC_POLICE_OK		TC_ACT_OK
+#define TC_POLICE_RECLASSIFY	TC_ACT_RECLASSIFY
+#define TC_POLICE_SHOT		TC_ACT_SHOT
+#define TC_POLICE_PIPE		TC_ACT_PIPE
+
+	__u32			limit;
+	__u32			burst;
+	__u32			mtu;
+	struct tc_ratespec	rate;
+	struct tc_ratespec	peakrate;
+	int			refcnt;
+	int			bindcnt;
+	__u32			capab;
+};
+
+struct tcf_t {
+	__u64   install;
+	__u64   lastuse;
+	__u64   expires;
+	__u64   firstuse;
+};
+
+struct tc_cnt {
+	int                   refcnt;
+	int                   bindcnt;
+};
+
+#define tc_gen \
+	__u32                 index; \
+	__u32                 capab; \
+	int                   action; \
+	int                   refcnt; \
+	int                   bindcnt
+
+enum {
+	TCA_POLICE_UNSPEC,
+	TCA_POLICE_TBF,
+	TCA_POLICE_RATE,
+	TCA_POLICE_PEAKRATE,
+	TCA_POLICE_AVRATE,
+	TCA_POLICE_RESULT,
+	TCA_POLICE_TM,
+	TCA_POLICE_PAD,
+	__TCA_POLICE_MAX
+#define TCA_POLICE_RESULT TCA_POLICE_RESULT
+};
+
+#define TCA_POLICE_MAX (__TCA_POLICE_MAX - 1)
+
+/* tca flags definitions */
+#define TCA_CLS_FLAGS_SKIP_HW	(1 << 0) /* don't offload filter to HW */
+#define TCA_CLS_FLAGS_SKIP_SW	(1 << 1) /* don't use filter in SW */
+#define TCA_CLS_FLAGS_IN_HW	(1 << 2) /* filter is offloaded to HW */
+#define TCA_CLS_FLAGS_NOT_IN_HW (1 << 3) /* filter isn't offloaded to HW */
+#define TCA_CLS_FLAGS_VERBOSE	(1 << 4) /* verbose logging */
+
+/* U32 filters */
+
+#define TC_U32_HTID(h) ((h)&0xFFF00000)
+#define TC_U32_USERHTID(h) (TC_U32_HTID(h)>>20)
+#define TC_U32_HASH(h) (((h)>>12)&0xFF)
+#define TC_U32_NODE(h) ((h)&0xFFF)
+#define TC_U32_KEY(h) ((h)&0xFFFFF)
+#define TC_U32_UNSPEC	0
+#define TC_U32_ROOT	(0xFFF00000)
+
+enum {
+	TCA_U32_UNSPEC,
+	TCA_U32_CLASSID,
+	TCA_U32_HASH,
+	TCA_U32_LINK,
+	TCA_U32_DIVISOR,
+	TCA_U32_SEL,
+	TCA_U32_POLICE,
+	TCA_U32_ACT,
+	TCA_U32_INDEV,
+	TCA_U32_PCNT,
+	TCA_U32_MARK,
+	TCA_U32_FLAGS,
+	TCA_U32_PAD,
+	__TCA_U32_MAX
+};
+
+#define TCA_U32_MAX (__TCA_U32_MAX - 1)
+
+struct tc_u32_key {
+	__be32		mask;
+	__be32		val;
+	int		off;
+	int		offmask;
+};
+
+struct tc_u32_sel {
+	unsigned char		flags;
+	unsigned char		offshift;
+	unsigned char		nkeys;
+
+	__be16			offmask;
+	__u16			off;
+	short			offoff;
+
+	short			hoff;
+	__be32			hmask;
+	struct tc_u32_key	keys[0];
+};
+
+struct tc_u32_mark {
+	__u32		val;
+	__u32		mask;
+	__u32		success;
+};
+
+struct tc_u32_pcnt {
+	__u64 rcnt;
+	__u64 rhit;
+	__u64 kcnts[0];
+};
+
+/* Flags */
+
+#define TC_U32_TERMINAL		1
+#define TC_U32_OFFSET		2
+#define TC_U32_VAROFFSET	4
+#define TC_U32_EAT		8
+
+#define TC_U32_MAXDEPTH 8
+
+
+/* RSVP filter */
+
+enum {
+	TCA_RSVP_UNSPEC,
+	TCA_RSVP_CLASSID,
+	TCA_RSVP_DST,
+	TCA_RSVP_SRC,
+	TCA_RSVP_PINFO,
+	TCA_RSVP_POLICE,
+	TCA_RSVP_ACT,
+	__TCA_RSVP_MAX
+};
+
+#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 )
+
+struct tc_rsvp_gpi {
+	__u32	key;
+	__u32	mask;
+	int	offset;
+};
+
+struct tc_rsvp_pinfo {
+	struct tc_rsvp_gpi dpi;
+	struct tc_rsvp_gpi spi;
+	__u8	protocol;
+	__u8	tunnelid;
+	__u8	tunnelhdr;
+	__u8	pad;
+};
+
+/* ROUTE filter */
+
+enum {
+	TCA_ROUTE4_UNSPEC,
+	TCA_ROUTE4_CLASSID,
+	TCA_ROUTE4_TO,
+	TCA_ROUTE4_FROM,
+	TCA_ROUTE4_IIF,
+	TCA_ROUTE4_POLICE,
+	TCA_ROUTE4_ACT,
+	__TCA_ROUTE4_MAX
+};
+
+#define TCA_ROUTE4_MAX (__TCA_ROUTE4_MAX - 1)
+
+
+/* FW filter */
+
+enum {
+	TCA_FW_UNSPEC,
+	TCA_FW_CLASSID,
+	TCA_FW_POLICE,
+	TCA_FW_INDEV, /*  used by CONFIG_NET_CLS_IND */
+	TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */
+	TCA_FW_MASK,
+	__TCA_FW_MAX
+};
+
+#define TCA_FW_MAX (__TCA_FW_MAX - 1)
+
+/* TC index filter */
+
+enum {
+	TCA_TCINDEX_UNSPEC,
+	TCA_TCINDEX_HASH,
+	TCA_TCINDEX_MASK,
+	TCA_TCINDEX_SHIFT,
+	TCA_TCINDEX_FALL_THROUGH,
+	TCA_TCINDEX_CLASSID,
+	TCA_TCINDEX_POLICE,
+	TCA_TCINDEX_ACT,
+	__TCA_TCINDEX_MAX
+};
+
+#define TCA_TCINDEX_MAX     (__TCA_TCINDEX_MAX - 1)
+
+/* Flow filter */
+
+enum {
+	FLOW_KEY_SRC,
+	FLOW_KEY_DST,
+	FLOW_KEY_PROTO,
+	FLOW_KEY_PROTO_SRC,
+	FLOW_KEY_PROTO_DST,
+	FLOW_KEY_IIF,
+	FLOW_KEY_PRIORITY,
+	FLOW_KEY_MARK,
+	FLOW_KEY_NFCT,
+	FLOW_KEY_NFCT_SRC,
+	FLOW_KEY_NFCT_DST,
+	FLOW_KEY_NFCT_PROTO_SRC,
+	FLOW_KEY_NFCT_PROTO_DST,
+	FLOW_KEY_RTCLASSID,
+	FLOW_KEY_SKUID,
+	FLOW_KEY_SKGID,
+	FLOW_KEY_VLAN_TAG,
+	FLOW_KEY_RXHASH,
+	__FLOW_KEY_MAX,
+};
+
+#define FLOW_KEY_MAX	(__FLOW_KEY_MAX - 1)
+
+enum {
+	FLOW_MODE_MAP,
+	FLOW_MODE_HASH,
+};
+
+enum {
+	TCA_FLOW_UNSPEC,
+	TCA_FLOW_KEYS,
+	TCA_FLOW_MODE,
+	TCA_FLOW_BASECLASS,
+	TCA_FLOW_RSHIFT,
+	TCA_FLOW_ADDEND,
+	TCA_FLOW_MASK,
+	TCA_FLOW_XOR,
+	TCA_FLOW_DIVISOR,
+	TCA_FLOW_ACT,
+	TCA_FLOW_POLICE,
+	TCA_FLOW_EMATCHES,
+	TCA_FLOW_PERTURB,
+	__TCA_FLOW_MAX
+};
+
+#define TCA_FLOW_MAX	(__TCA_FLOW_MAX - 1)
+
+/* Basic filter */
+
+enum {
+	TCA_BASIC_UNSPEC,
+	TCA_BASIC_CLASSID,
+	TCA_BASIC_EMATCHES,
+	TCA_BASIC_ACT,
+	TCA_BASIC_POLICE,
+	__TCA_BASIC_MAX
+};
+
+#define TCA_BASIC_MAX (__TCA_BASIC_MAX - 1)
+
+
+/* Cgroup classifier */
+
+enum {
+	TCA_CGROUP_UNSPEC,
+	TCA_CGROUP_ACT,
+	TCA_CGROUP_POLICE,
+	TCA_CGROUP_EMATCHES,
+	__TCA_CGROUP_MAX,
+};
+
+#define TCA_CGROUP_MAX (__TCA_CGROUP_MAX - 1)
+
+/* BPF classifier */
+
+#define TCA_BPF_FLAG_ACT_DIRECT		(1 << 0)
+
+enum {
+	TCA_BPF_UNSPEC,
+	TCA_BPF_ACT,
+	TCA_BPF_POLICE,
+	TCA_BPF_CLASSID,
+	TCA_BPF_OPS_LEN,
+	TCA_BPF_OPS,
+	TCA_BPF_FD,
+	TCA_BPF_NAME,
+	TCA_BPF_FLAGS,
+	TCA_BPF_FLAGS_GEN,
+	TCA_BPF_TAG,
+	TCA_BPF_ID,
+	__TCA_BPF_MAX,
+};
+
+#define TCA_BPF_MAX (__TCA_BPF_MAX - 1)
+
+/* Flower classifier */
+
+enum {
+	TCA_FLOWER_UNSPEC,
+	TCA_FLOWER_CLASSID,
+	TCA_FLOWER_INDEV,
+	TCA_FLOWER_ACT,
+	TCA_FLOWER_KEY_ETH_DST,		/* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_DST_MASK,	/* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_SRC,		/* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_SRC_MASK,	/* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_TYPE,	/* be16 */
+	TCA_FLOWER_KEY_IP_PROTO,	/* u8 */
+	TCA_FLOWER_KEY_IPV4_SRC,	/* be32 */
+	TCA_FLOWER_KEY_IPV4_SRC_MASK,	/* be32 */
+	TCA_FLOWER_KEY_IPV4_DST,	/* be32 */
+	TCA_FLOWER_KEY_IPV4_DST_MASK,	/* be32 */
+	TCA_FLOWER_KEY_IPV6_SRC,	/* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_SRC_MASK,	/* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_DST,	/* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_DST_MASK,	/* struct in6_addr */
+	TCA_FLOWER_KEY_TCP_SRC,		/* be16 */
+	TCA_FLOWER_KEY_TCP_DST,		/* be16 */
+	TCA_FLOWER_KEY_UDP_SRC,		/* be16 */
+	TCA_FLOWER_KEY_UDP_DST,		/* be16 */
+
+	TCA_FLOWER_FLAGS,
+	TCA_FLOWER_KEY_VLAN_ID,		/* be16 */
+	TCA_FLOWER_KEY_VLAN_PRIO,	/* u8   */
+	TCA_FLOWER_KEY_VLAN_ETH_TYPE,	/* be16 */
+
+	TCA_FLOWER_KEY_ENC_KEY_ID,	/* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_SRC,	/* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,/* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_DST,	/* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,/* be32 */
+	TCA_FLOWER_KEY_ENC_IPV6_SRC,	/* struct in6_addr */
+	TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,/* struct in6_addr */
+	TCA_FLOWER_KEY_ENC_IPV6_DST,	/* struct in6_addr */
+	TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,/* struct in6_addr */
+
+	TCA_FLOWER_KEY_TCP_SRC_MASK,	/* be16 */
+	TCA_FLOWER_KEY_TCP_DST_MASK,	/* be16 */
+	TCA_FLOWER_KEY_UDP_SRC_MASK,	/* be16 */
+	TCA_FLOWER_KEY_UDP_DST_MASK,	/* be16 */
+	TCA_FLOWER_KEY_SCTP_SRC_MASK,	/* be16 */
+	TCA_FLOWER_KEY_SCTP_DST_MASK,	/* be16 */
+
+	TCA_FLOWER_KEY_SCTP_SRC,	/* be16 */
+	TCA_FLOWER_KEY_SCTP_DST,	/* be16 */
+
+	TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,	/* be16 */
+	TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,	/* be16 */
+	TCA_FLOWER_KEY_ENC_UDP_DST_PORT,	/* be16 */
+	TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,	/* be16 */
+
+	TCA_FLOWER_KEY_FLAGS,		/* be32 */
+	TCA_FLOWER_KEY_FLAGS_MASK,	/* be32 */
+
+	TCA_FLOWER_KEY_ICMPV4_CODE,	/* u8 */
+	TCA_FLOWER_KEY_ICMPV4_CODE_MASK,/* u8 */
+	TCA_FLOWER_KEY_ICMPV4_TYPE,	/* u8 */
+	TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,/* u8 */
+	TCA_FLOWER_KEY_ICMPV6_CODE,	/* u8 */
+	TCA_FLOWER_KEY_ICMPV6_CODE_MASK,/* u8 */
+	TCA_FLOWER_KEY_ICMPV6_TYPE,	/* u8 */
+	TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,/* u8 */
+
+	TCA_FLOWER_KEY_ARP_SIP,		/* be32 */
+	TCA_FLOWER_KEY_ARP_SIP_MASK,	/* be32 */
+	TCA_FLOWER_KEY_ARP_TIP,		/* be32 */
+	TCA_FLOWER_KEY_ARP_TIP_MASK,	/* be32 */
+	TCA_FLOWER_KEY_ARP_OP,		/* u8 */
+	TCA_FLOWER_KEY_ARP_OP_MASK,	/* u8 */
+	TCA_FLOWER_KEY_ARP_SHA,		/* ETH_ALEN */
+	TCA_FLOWER_KEY_ARP_SHA_MASK,	/* ETH_ALEN */
+	TCA_FLOWER_KEY_ARP_THA,		/* ETH_ALEN */
+	TCA_FLOWER_KEY_ARP_THA_MASK,	/* ETH_ALEN */
+
+	TCA_FLOWER_KEY_MPLS_TTL,	/* u8 - 8 bits */
+	TCA_FLOWER_KEY_MPLS_BOS,	/* u8 - 1 bit */
+	TCA_FLOWER_KEY_MPLS_TC,		/* u8 - 3 bits */
+	TCA_FLOWER_KEY_MPLS_LABEL,	/* be32 - 20 bits */
+
+	TCA_FLOWER_KEY_TCP_FLAGS,	/* be16 */
+	TCA_FLOWER_KEY_TCP_FLAGS_MASK,	/* be16 */
+
+	TCA_FLOWER_KEY_IP_TOS,		/* u8 */
+	TCA_FLOWER_KEY_IP_TOS_MASK,	/* u8 */
+	TCA_FLOWER_KEY_IP_TTL,		/* u8 */
+	TCA_FLOWER_KEY_IP_TTL_MASK,	/* u8 */
+
+	TCA_FLOWER_KEY_CVLAN_ID,	/* be16 */
+	TCA_FLOWER_KEY_CVLAN_PRIO,	/* u8   */
+	TCA_FLOWER_KEY_CVLAN_ETH_TYPE,	/* be16 */
+
+	TCA_FLOWER_KEY_ENC_IP_TOS,	/* u8 */
+	TCA_FLOWER_KEY_ENC_IP_TOS_MASK,	/* u8 */
+	TCA_FLOWER_KEY_ENC_IP_TTL,	/* u8 */
+	TCA_FLOWER_KEY_ENC_IP_TTL_MASK,	/* u8 */
+
+	TCA_FLOWER_KEY_ENC_OPTS,
+	TCA_FLOWER_KEY_ENC_OPTS_MASK,
+
+	TCA_FLOWER_IN_HW_COUNT,
+
+	__TCA_FLOWER_MAX,
+};
+
+#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
+
+enum {
+	TCA_FLOWER_KEY_ENC_OPTS_UNSPEC,
+	TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested
+					 * TCA_FLOWER_KEY_ENC_OPT_GENEVE_
+					 * attributes
+					 */
+	__TCA_FLOWER_KEY_ENC_OPTS_MAX,
+};
+
+#define TCA_FLOWER_KEY_ENC_OPTS_MAX (__TCA_FLOWER_KEY_ENC_OPTS_MAX - 1)
+
+enum {
+	TCA_FLOWER_KEY_ENC_OPT_GENEVE_UNSPEC,
+	TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS,            /* u16 */
+	TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE,             /* u8 */
+	TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA,             /* 4 to 128 bytes */
+
+	__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX,
+};
+
+#define TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX \
+		(__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX - 1)
+
+enum {
+	TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
+	TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
+};
+
+/* Match-all classifier */
+
+enum {
+	TCA_MATCHALL_UNSPEC,
+	TCA_MATCHALL_CLASSID,
+	TCA_MATCHALL_ACT,
+	TCA_MATCHALL_FLAGS,
+	__TCA_MATCHALL_MAX,
+};
+
+#define TCA_MATCHALL_MAX (__TCA_MATCHALL_MAX - 1)
+
+/* Extended Matches */
+
+struct tcf_ematch_tree_hdr {
+	__u16		nmatches;
+	__u16		progid;
+};
+
+enum {
+	TCA_EMATCH_TREE_UNSPEC,
+	TCA_EMATCH_TREE_HDR,
+	TCA_EMATCH_TREE_LIST,
+	__TCA_EMATCH_TREE_MAX
+};
+#define TCA_EMATCH_TREE_MAX (__TCA_EMATCH_TREE_MAX - 1)
+
+struct tcf_ematch_hdr {
+	__u16		matchid;
+	__u16		kind;
+	__u16		flags;
+	__u16		pad; /* currently unused */
+};
+
+/*  0                   1
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 
+ * +-----------------------+-+-+---+
+ * |         Unused        |S|I| R |
+ * +-----------------------+-+-+---+
+ *
+ * R(2) ::= relation to next ematch
+ *          where: 0 0 END (last ematch)
+ *                 0 1 AND
+ *                 1 0 OR
+ *                 1 1 Unused (invalid)
+ * I(1) ::= invert result
+ * S(1) ::= simple payload
+ */
+#define TCF_EM_REL_END	0
+#define TCF_EM_REL_AND	(1<<0)
+#define TCF_EM_REL_OR	(1<<1)
+#define TCF_EM_INVERT	(1<<2)
+#define TCF_EM_SIMPLE	(1<<3)
+
+#define TCF_EM_REL_MASK	3
+#define TCF_EM_REL_VALID(v) (((v) & TCF_EM_REL_MASK) != TCF_EM_REL_MASK)
+
+enum {
+	TCF_LAYER_LINK,
+	TCF_LAYER_NETWORK,
+	TCF_LAYER_TRANSPORT,
+	__TCF_LAYER_MAX
+};
+#define TCF_LAYER_MAX (__TCF_LAYER_MAX - 1)
+
+/* Ematch type assignments
+ *   1..32767		Reserved for ematches inside kernel tree
+ *   32768..65535	Free to use, not reliable
+ */
+#define	TCF_EM_CONTAINER	0
+#define	TCF_EM_CMP		1
+#define	TCF_EM_NBYTE		2
+#define	TCF_EM_U32		3
+#define	TCF_EM_META		4
+#define	TCF_EM_TEXT		5
+#define	TCF_EM_VLAN		6
+#define	TCF_EM_CANID		7
+#define	TCF_EM_IPSET		8
+#define	TCF_EM_IPT		9
+#define	TCF_EM_MAX		9
+
+enum {
+	TCF_EM_PROG_TC
+};
+
+enum {
+	TCF_EM_OPND_EQ,
+	TCF_EM_OPND_GT,
+	TCF_EM_OPND_LT
+};
+
+#endif
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index af5f8c2df87a..b17201edfa09 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -170,7 +170,7 @@ struct prctl_mm_map {
  * asking selinux for a specific new context (e.g. with runcon) will result
  * in execve returning -EPERM.
  *
- * See Documentation/prctl/no_new_privs.txt for more details.
+ * See Documentation/userspace-api/no_new_privs.rst for more details.
  */
 #define PR_SET_NO_NEW_PRIVS	38
 #define PR_GET_NO_NEW_PRIVS	39
@@ -207,4 +207,17 @@ struct prctl_mm_map {
 # define PR_SVE_VL_LEN_MASK		0xffff
 # define PR_SVE_VL_INHERIT		(1 << 17) /* inherit across exec */
 
+/* Per task speculation control */
+#define PR_GET_SPECULATION_CTRL		52
+#define PR_SET_SPECULATION_CTRL		53
+/* Speculation control variants */
+# define PR_SPEC_STORE_BYPASS		0
+# define PR_SPEC_INDIRECT_BRANCH	1
+/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
+# define PR_SPEC_NOT_AFFECTED		0
+# define PR_SPEC_PRCTL			(1UL << 0)
+# define PR_SPEC_ENABLE			(1UL << 1)
+# define PR_SPEC_DISABLE		(1UL << 2)
+# define PR_SPEC_FORCE_DISABLE		(1UL << 3)
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/tools/include/uapi/linux/seg6.h b/tools/include/uapi/linux/seg6.h
new file mode 100644
index 000000000000..286e8d6a8e98
--- /dev/null
+++ b/tools/include/uapi/linux/seg6.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_SEG6_H
+#define _UAPI_LINUX_SEG6_H
+
+#include <linux/types.h>
+#include <linux/in6.h>		/* For struct in6_addr. */
+
+/*
+ * SRH
+ */
+struct ipv6_sr_hdr {
+	__u8	nexthdr;
+	__u8	hdrlen;
+	__u8	type;
+	__u8	segments_left;
+	__u8	first_segment; /* Represents the last_entry field of SRH */
+	__u8	flags;
+	__u16	tag;
+
+	struct in6_addr segments[0];
+};
+
+#define SR6_FLAG1_PROTECTED	(1 << 6)
+#define SR6_FLAG1_OAM		(1 << 5)
+#define SR6_FLAG1_ALERT		(1 << 4)
+#define SR6_FLAG1_HMAC		(1 << 3)
+
+#define SR6_TLV_INGRESS		1
+#define SR6_TLV_EGRESS		2
+#define SR6_TLV_OPAQUE		3
+#define SR6_TLV_PADDING		4
+#define SR6_TLV_HMAC		5
+
+#define sr_has_hmac(srh) ((srh)->flags & SR6_FLAG1_HMAC)
+
+struct sr6_tlv {
+	__u8 type;
+	__u8 len;
+	__u8 data[0];
+};
+
+#endif
diff --git a/tools/include/uapi/linux/seg6_local.h b/tools/include/uapi/linux/seg6_local.h
new file mode 100644
index 000000000000..edc138bdc56d
--- /dev/null
+++ b/tools/include/uapi/linux/seg6_local.h
@@ -0,0 +1,80 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_SEG6_LOCAL_H
+#define _UAPI_LINUX_SEG6_LOCAL_H
+
+#include <linux/seg6.h>
+
+enum {
+	SEG6_LOCAL_UNSPEC,
+	SEG6_LOCAL_ACTION,
+	SEG6_LOCAL_SRH,
+	SEG6_LOCAL_TABLE,
+	SEG6_LOCAL_NH4,
+	SEG6_LOCAL_NH6,
+	SEG6_LOCAL_IIF,
+	SEG6_LOCAL_OIF,
+	SEG6_LOCAL_BPF,
+	__SEG6_LOCAL_MAX,
+};
+#define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)
+
+enum {
+	SEG6_LOCAL_ACTION_UNSPEC	= 0,
+	/* node segment */
+	SEG6_LOCAL_ACTION_END		= 1,
+	/* adjacency segment (IPv6 cross-connect) */
+	SEG6_LOCAL_ACTION_END_X		= 2,
+	/* lookup of next seg NH in table */
+	SEG6_LOCAL_ACTION_END_T		= 3,
+	/* decap and L2 cross-connect */
+	SEG6_LOCAL_ACTION_END_DX2	= 4,
+	/* decap and IPv6 cross-connect */
+	SEG6_LOCAL_ACTION_END_DX6	= 5,
+	/* decap and IPv4 cross-connect */
+	SEG6_LOCAL_ACTION_END_DX4	= 6,
+	/* decap and lookup of DA in v6 table */
+	SEG6_LOCAL_ACTION_END_DT6	= 7,
+	/* decap and lookup of DA in v4 table */
+	SEG6_LOCAL_ACTION_END_DT4	= 8,
+	/* binding segment with insertion */
+	SEG6_LOCAL_ACTION_END_B6	= 9,
+	/* binding segment with encapsulation */
+	SEG6_LOCAL_ACTION_END_B6_ENCAP	= 10,
+	/* binding segment with MPLS encap */
+	SEG6_LOCAL_ACTION_END_BM	= 11,
+	/* lookup last seg in table */
+	SEG6_LOCAL_ACTION_END_S		= 12,
+	/* forward to SR-unaware VNF with static proxy */
+	SEG6_LOCAL_ACTION_END_AS	= 13,
+	/* forward to SR-unaware VNF with masquerading */
+	SEG6_LOCAL_ACTION_END_AM	= 14,
+	/* custom BPF action */
+	SEG6_LOCAL_ACTION_END_BPF	= 15,
+
+	__SEG6_LOCAL_ACTION_MAX,
+};
+
+#define SEG6_LOCAL_ACTION_MAX (__SEG6_LOCAL_ACTION_MAX - 1)
+
+enum {
+	SEG6_LOCAL_BPF_PROG_UNSPEC,
+	SEG6_LOCAL_BPF_PROG,
+	SEG6_LOCAL_BPF_PROG_NAME,
+	__SEG6_LOCAL_BPF_PROG_MAX,
+};
+
+#define SEG6_LOCAL_BPF_PROG_MAX (__SEG6_LOCAL_BPF_PROG_MAX - 1)
+
+#endif
diff --git a/tools/include/uapi/linux/tc_act/tc_bpf.h b/tools/include/uapi/linux/tc_act/tc_bpf.h
new file mode 100644
index 000000000000..6e89a5df49a4
--- /dev/null
+++ b/tools/include/uapi/linux/tc_act/tc_bpf.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __LINUX_TC_BPF_H
+#define __LINUX_TC_BPF_H
+
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_BPF 13
+
+struct tc_act_bpf {
+	tc_gen;
+};
+
+enum {
+	TCA_ACT_BPF_UNSPEC,
+	TCA_ACT_BPF_TM,
+	TCA_ACT_BPF_PARMS,
+	TCA_ACT_BPF_OPS_LEN,
+	TCA_ACT_BPF_OPS,
+	TCA_ACT_BPF_FD,
+	TCA_ACT_BPF_NAME,
+	TCA_ACT_BPF_PAD,
+	TCA_ACT_BPF_TAG,
+	TCA_ACT_BPF_ID,
+	__TCA_ACT_BPF_MAX,
+};
+#define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1)
+
+#endif
diff --git a/tools/include/uapi/linux/tls.h b/tools/include/uapi/linux/tls.h
new file mode 100644
index 000000000000..ff02287495ac
--- /dev/null
+++ b/tools/include/uapi/linux/tls.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
+/*
+ * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _UAPI_LINUX_TLS_H
+#define _UAPI_LINUX_TLS_H
+
+#include <linux/types.h>
+
+/* TLS socket options */
+#define TLS_TX			1	/* Set transmit parameters */
+#define TLS_RX			2	/* Set receive parameters */
+
+/* Supported versions */
+#define TLS_VERSION_MINOR(ver)	((ver) & 0xFF)
+#define TLS_VERSION_MAJOR(ver)	(((ver) >> 8) & 0xFF)
+
+#define TLS_VERSION_NUMBER(id)	((((id##_VERSION_MAJOR) & 0xFF) << 8) |	\
+				 ((id##_VERSION_MINOR) & 0xFF))
+
+#define TLS_1_2_VERSION_MAJOR	0x3
+#define TLS_1_2_VERSION_MINOR	0x3
+#define TLS_1_2_VERSION		TLS_VERSION_NUMBER(TLS_1_2)
+
+/* Supported ciphers */
+#define TLS_CIPHER_AES_GCM_128				51
+#define TLS_CIPHER_AES_GCM_128_IV_SIZE			8
+#define TLS_CIPHER_AES_GCM_128_KEY_SIZE		16
+#define TLS_CIPHER_AES_GCM_128_SALT_SIZE		4
+#define TLS_CIPHER_AES_GCM_128_TAG_SIZE		16
+#define TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE		8
+
+#define TLS_SET_RECORD_TYPE	1
+#define TLS_GET_RECORD_TYPE	2
+
+struct tls_crypto_info {
+	__u16 version;
+	__u16 cipher_type;
+};
+
+struct tls12_crypto_info_aes_gcm_128 {
+	struct tls_crypto_info info;
+	unsigned char iv[TLS_CIPHER_AES_GCM_128_IV_SIZE];
+	unsigned char key[TLS_CIPHER_AES_GCM_128_KEY_SIZE];
+	unsigned char salt[TLS_CIPHER_AES_GCM_128_SALT_SIZE];
+	unsigned char rec_seq[TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE];
+};
+
+#endif /* _UAPI_LINUX_TLS_H */
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
index c51f8e5cc608..84c3de89696a 100644
--- a/tools/include/uapi/linux/vhost.h
+++ b/tools/include/uapi/linux/vhost.h
@@ -65,6 +65,7 @@ struct vhost_iotlb_msg {
 };
 
 #define VHOST_IOTLB_MSG 0x1
+#define VHOST_IOTLB_MSG_V2 0x2
 
 struct vhost_msg {
 	int type;
@@ -74,6 +75,15 @@ struct vhost_msg {
 	};
 };
 
+struct vhost_msg_v2 {
+	__u32 type;
+	__u32 reserved;
+	union {
+		struct vhost_iotlb_msg iotlb;
+		__u8 padding[64];
+	};
+};
+
 struct vhost_memory_region {
 	__u64 guest_phys_addr;
 	__u64 memory_size; /* bytes */
@@ -160,6 +170,14 @@ struct vhost_memory {
 #define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24,	\
 					 struct vhost_vring_state)
 
+/* Set or get vhost backend capability */
+
+/* Use message type V2 */
+#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
+
+#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
+#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
+
 /* VHOST_NET specific defines */
 
 /* Attach virtio net ring to a raw socket, or tap device.
diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h
index ed0a120d4f08..404d4b9ffe76 100644
--- a/tools/include/uapi/sound/asound.h
+++ b/tools/include/uapi/sound/asound.h
@@ -752,7 +752,7 @@ struct snd_timer_info {
 #define SNDRV_TIMER_PSFLG_EARLY_EVENT	(1<<2)	/* write early event to the poll queue */
 
 struct snd_timer_params {
-	unsigned int flags;		/* flags - SNDRV_MIXER_PSFLG_* */
+	unsigned int flags;		/* flags - SNDRV_TIMER_PSFLG_* */
 	unsigned int ticks;		/* requested resolution in ticks */
 	unsigned int queue_size;	/* total size of queue (32-1024) */
 	unsigned int reserved0;		/* reserved, was: failure locations */
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 56c4b3f8a01b..195ba486640f 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -759,12 +759,18 @@ class DebugfsProvider(Provider):
             if len(vms) == 0:
                 self.do_read = False
 
-            self.paths = filter(lambda x: "{}-".format(pid) in x, vms)
+            self.paths = list(filter(lambda x: "{}-".format(pid) in x, vms))
 
         else:
             self.paths = []
             self.do_read = True
-        self.reset()
+
+    def _verify_paths(self):
+        """Remove invalid paths"""
+        for path in self.paths:
+            if not os.path.exists(os.path.join(PATH_DEBUGFS_KVM, path)):
+                self.paths.remove(path)
+                continue
 
     def read(self, reset=0, by_guest=0):
         """Returns a dict with format:'file name / field -> current value'.
@@ -780,6 +786,7 @@ class DebugfsProvider(Provider):
         # If no debugfs filtering support is available, then don't read.
         if not self.do_read:
             return results
+        self._verify_paths()
 
         paths = self.paths
         if self._pid == 0:
@@ -1096,15 +1103,16 @@ class Tui(object):
             pid = self.stats.pid_filter
         self.screen.erase()
         gname = self.get_gname_from_pid(pid)
+        self._gname = gname
         if gname:
             gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...'
                                    if len(gname) > MAX_GUEST_NAME_LEN
                                    else gname))
         if pid > 0:
-            self.screen.addstr(0, 0, 'kvm statistics - pid {0} {1}'
-                               .format(pid, gname), curses.A_BOLD)
+            self._headline = 'kvm statistics - pid {0} {1}'.format(pid, gname)
         else:
-            self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
+            self._headline = 'kvm statistics - summary'
+        self.screen.addstr(0, 0, self._headline, curses.A_BOLD)
         if self.stats.fields_filter:
             regex = self.stats.fields_filter
             if len(regex) > MAX_REGEX_LEN:
@@ -1162,6 +1170,19 @@ class Tui(object):
 
             return sorted_items
 
+        if not self._is_running_guest(self.stats.pid_filter):
+            if self._gname:
+                try: # ...to identify the guest by name in case it's back
+                    pids = self.get_pid_from_gname(self._gname)
+                    if len(pids) == 1:
+                        self._refresh_header(pids[0])
+                        self._update_pid(pids[0])
+                        return
+                except:
+                    pass
+            self._display_guest_dead()
+            # leave final data on screen
+            return
         row = 3
         self.screen.move(row, 0)
         self.screen.clrtobot()
@@ -1184,6 +1205,7 @@ class Tui(object):
         # print events
         tavg = 0
         tcur = 0
+        guest_removed = False
         for key, values in get_sorted_events(self, stats):
             if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0):
                 break
@@ -1191,7 +1213,10 @@ class Tui(object):
                 key = self.get_gname_from_pid(key)
                 if not key:
                     continue
-            cur = int(round(values.delta / sleeptime)) if values.delta else ''
+            cur = int(round(values.delta / sleeptime)) if values.delta else 0
+            if cur < 0:
+                guest_removed = True
+                continue
             if key[0] != ' ':
                 if values.delta:
                     tcur += values.delta
@@ -1204,13 +1229,21 @@ class Tui(object):
                                values.value * 100 / float(ltotal), cur))
             row += 1
         if row == 3:
-            self.screen.addstr(4, 1, 'No matching events reported yet')
+            if guest_removed:
+                self.screen.addstr(4, 1, 'Guest removed, updating...')
+            else:
+                self.screen.addstr(4, 1, 'No matching events reported yet')
         if row > 4:
             tavg = int(round(tcur / sleeptime)) if tcur > 0 else ''
             self.screen.addstr(row, 1, '%-40s %10d        %8s' %
                                ('Total', total, tavg), curses.A_BOLD)
         self.screen.refresh()
 
+    def _display_guest_dead(self):
+        marker = '   Guest is DEAD   '
+        y = min(len(self._headline), 80 - len(marker))
+        self.screen.addstr(0, y, marker, curses.A_BLINK | curses.A_STANDOUT)
+
     def _show_msg(self, text):
         """Display message centered text and exit on key press"""
         hint = 'Press any key to continue'
@@ -1219,10 +1252,10 @@ class Tui(object):
         (x, term_width) = self.screen.getmaxyx()
         row = 2
         for line in text:
-            start = (term_width - len(line)) / 2
+            start = (term_width - len(line)) // 2
             self.screen.addstr(row, start, line)
             row += 1
-        self.screen.addstr(row + 1, (term_width - len(hint)) / 2, hint,
+        self.screen.addstr(row + 1, (term_width - len(hint)) // 2, hint,
                            curses.A_STANDOUT)
         self.screen.getkey()
 
@@ -1292,7 +1325,7 @@ class Tui(object):
         msg = ''
         while True:
             self.screen.erase()
-            self.screen.addstr(0, 0, 'Set update interval (defaults to %fs).' %
+            self.screen.addstr(0, 0, 'Set update interval (defaults to %.1fs).' %
                                DELAY_DEFAULT, curses.A_BOLD)
             self.screen.addstr(4, 0, msg)
             self.screen.addstr(2, 0, 'Change delay from %.1fs to ' %
@@ -1319,6 +1352,12 @@ class Tui(object):
                 msg = '"' + str(val) + '": Invalid value'
         self._refresh_header()
 
+    def _is_running_guest(self, pid):
+        """Check if pid is still a running process."""
+        if not pid:
+            return True
+        return os.path.isdir(os.path.join('/proc/', str(pid)))
+
     def _show_vm_selection_by_guest(self):
         """Draws guest selection mask.
 
@@ -1346,7 +1385,7 @@ class Tui(object):
             if not guest or guest == '0':
                 break
             if guest.isdigit():
-                if not os.path.isdir(os.path.join('/proc/', guest)):
+                if not self._is_running_guest(guest):
                     msg = '"' + guest + '": Not a running process'
                     continue
                 pid = int(guest)
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 6a12bbf39f7b..7aba8243a0e7 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -201,7 +201,7 @@ static void mem_toupper(char *f, size_t len)
 
 /*
  * Check for "NAME_PATH" environment variable to override fs location (for
- * testing). This matches the recommendation in Documentation/sysfs-rules.txt
+ * testing). This matches the recommendation in Documentation/admin-guide/sysfs-rules.rst
  * for SYSFS_PATH.
  */
 static bool fs__env_override(struct fs *fs)
diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c
index 7b7fd0b18551..5afb11b30fca 100644
--- a/tools/lib/api/fs/tracing_path.c
+++ b/tools/lib/api/fs/tracing_path.c
@@ -13,11 +13,9 @@
 
 #include "tracing_path.h"
 
-
-char tracing_mnt[PATH_MAX]         = "/sys/kernel/debug";
-char tracing_path[PATH_MAX]        = "/sys/kernel/debug/tracing";
-char tracing_events_path[PATH_MAX] = "/sys/kernel/debug/tracing/events";
-
+static char tracing_mnt[PATH_MAX]  = "/sys/kernel/debug";
+static char tracing_path[PATH_MAX]        = "/sys/kernel/debug/tracing";
+static char tracing_events_path[PATH_MAX] = "/sys/kernel/debug/tracing/events";
 
 static void __tracing_path_set(const char *tracing, const char *mountpoint)
 {
@@ -38,7 +36,7 @@ static const char *tracing_path_tracefs_mount(void)
 
 	__tracing_path_set("", mnt);
 
-	return mnt;
+	return tracing_path;
 }
 
 static const char *tracing_path_debugfs_mount(void)
@@ -51,7 +49,7 @@ static const char *tracing_path_debugfs_mount(void)
 
 	__tracing_path_set("tracing/", mnt);
 
-	return mnt;
+	return tracing_path;
 }
 
 const char *tracing_path_mount(void)
@@ -76,7 +74,7 @@ char *get_tracing_file(const char *name)
 {
 	char *file;
 
-	if (asprintf(&file, "%s/%s", tracing_path, name) < 0)
+	if (asprintf(&file, "%s/%s", tracing_path_mount(), name) < 0)
 		return NULL;
 
 	return file;
@@ -87,6 +85,34 @@ void put_tracing_file(char *file)
 	free(file);
 }
 
+char *get_events_file(const char *name)
+{
+	char *file;
+
+	if (asprintf(&file, "%s/events/%s", tracing_path_mount(), name) < 0)
+		return NULL;
+
+	return file;
+}
+
+void put_events_file(char *file)
+{
+	free(file);
+}
+
+DIR *tracing_events__opendir(void)
+{
+	DIR *dir = NULL;
+	char *path = get_tracing_file("events");
+
+	if (path) {
+		dir = opendir(path);
+		put_events_file(path);
+	}
+
+	return dir;
+}
+
 int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
 				   const char *sys, const char *name)
 {
@@ -129,7 +155,7 @@ int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
 		snprintf(buf, size,
 			 "Error:\tNo permissions to read %s/%s\n"
 			 "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
-			 tracing_events_path, filename, tracing_mnt);
+			 tracing_events_path, filename, tracing_path_mount());
 	}
 		break;
 	default:
diff --git a/tools/lib/api/fs/tracing_path.h b/tools/lib/api/fs/tracing_path.h
index 0066f06cc381..a19136b086dc 100644
--- a/tools/lib/api/fs/tracing_path.h
+++ b/tools/lib/api/fs/tracing_path.h
@@ -3,9 +3,9 @@
 #define __API_FS_TRACING_PATH_H
 
 #include <linux/types.h>
+#include <dirent.h>
 
-extern char tracing_path[];
-extern char tracing_events_path[];
+DIR *tracing_events__opendir(void);
 
 void tracing_path_set(const char *mountpoint);
 const char *tracing_path_mount(void);
@@ -13,5 +13,10 @@ const char *tracing_path_mount(void);
 char *get_tracing_file(const char *name);
 void put_tracing_file(char *file);
 
+char *get_events_file(const char *name);
+void put_events_file(char *file);
+
+#define zput_events_file(ptr) ({ free(*ptr); *ptr = NULL; })
+
 int tracing_path__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name);
 #endif /* __API_FS_TRACING_PATH_H */
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 64c679d67109..7bc31c905018 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1 +1 @@
-libbpf-y := libbpf.o bpf.o nlattr.o
+libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index e6d5f8d1477f..425b480bda75 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 # Most of this file is copied from tools/lib/traceevent/Makefile
 
 BPF_VERSION = 0
@@ -66,7 +66,7 @@ ifndef VERBOSE
 endif
 
 FEATURE_USER = .libbpf
-FEATURE_TESTS = libelf libelf-getphdrnum libelf-mmap bpf
+FEATURE_TESTS = libelf libelf-mmap bpf reallocarray
 FEATURE_DISPLAY = libelf bpf
 
 INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
@@ -116,8 +116,8 @@ ifeq ($(feature-libelf-mmap), 1)
   override CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
 endif
 
-ifeq ($(feature-libelf-getphdrnum), 1)
-  override CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT
+ifeq ($(feature-reallocarray), 0)
+  override CFLAGS += -DCOMPAT_NEED_REALLOCARRAY
 endif
 
 # Append required CFLAGS
@@ -125,6 +125,7 @@ override CFLAGS += $(EXTRA_WARNINGS)
 override CFLAGS += -Werror -Wall
 override CFLAGS += -fPIC
 override CFLAGS += $(INCLUDES)
+override CFLAGS += -fvisibility=hidden
 
 ifeq ($(VERBOSE),1)
   Q =
@@ -189,6 +190,7 @@ install_headers:
 	$(call QUIET_INSTALL, headers) \
 		$(call do_install,bpf.h,$(prefix)/include/bpf,644); \
 		$(call do_install,libbpf.h,$(prefix)/include/bpf,644);
+		$(call do_install,btf.h,$(prefix)/include/bpf,644);
 
 install: install_lib
 
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index acbb3f8b3bec..03f9bcc4ef50 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: LGPL-2.1
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 
 /*
  * common eBPF ELF operations.
@@ -28,16 +28,8 @@
 #include <linux/bpf.h>
 #include "bpf.h"
 #include "libbpf.h"
-#include "nlattr.h"
-#include <linux/rtnetlink.h>
-#include <linux/if_link.h>
-#include <sys/socket.h>
 #include <errno.h>
 
-#ifndef SOL_NETLINK
-#define SOL_NETLINK 270
-#endif
-
 /*
  * When building perf, unistd.h is overridden. __NR_bpf is
  * required to be defined explicitly.
@@ -73,43 +65,78 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
 	return syscall(__NR_bpf, cmd, attr, size);
 }
 
-int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
-			int key_size, int value_size, int max_entries,
-			__u32 map_flags, int node)
+int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
 {
-	__u32 name_len = name ? strlen(name) : 0;
+	__u32 name_len = create_attr->name ? strlen(create_attr->name) : 0;
 	union bpf_attr attr;
 
 	memset(&attr, '\0', sizeof(attr));
 
-	attr.map_type = map_type;
-	attr.key_size = key_size;
-	attr.value_size = value_size;
-	attr.max_entries = max_entries;
-	attr.map_flags = map_flags;
-	memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
+	attr.map_type = create_attr->map_type;
+	attr.key_size = create_attr->key_size;
+	attr.value_size = create_attr->value_size;
+	attr.max_entries = create_attr->max_entries;
+	attr.map_flags = create_attr->map_flags;
+	memcpy(attr.map_name, create_attr->name,
+	       min(name_len, BPF_OBJ_NAME_LEN - 1));
+	attr.numa_node = create_attr->numa_node;
+	attr.btf_fd = create_attr->btf_fd;
+	attr.btf_key_type_id = create_attr->btf_key_type_id;
+	attr.btf_value_type_id = create_attr->btf_value_type_id;
+	attr.map_ifindex = create_attr->map_ifindex;
+	attr.inner_map_fd = create_attr->inner_map_fd;
+
+	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
 
+int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
+			int key_size, int value_size, int max_entries,
+			__u32 map_flags, int node)
+{
+	struct bpf_create_map_attr map_attr = {};
+
+	map_attr.name = name;
+	map_attr.map_type = map_type;
+	map_attr.map_flags = map_flags;
+	map_attr.key_size = key_size;
+	map_attr.value_size = value_size;
+	map_attr.max_entries = max_entries;
 	if (node >= 0) {
-		attr.map_flags |= BPF_F_NUMA_NODE;
-		attr.numa_node = node;
+		map_attr.numa_node = node;
+		map_attr.map_flags |= BPF_F_NUMA_NODE;
 	}
 
-	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+	return bpf_create_map_xattr(&map_attr);
 }
 
 int bpf_create_map(enum bpf_map_type map_type, int key_size,
 		   int value_size, int max_entries, __u32 map_flags)
 {
-	return bpf_create_map_node(map_type, NULL, key_size, value_size,
-				   max_entries, map_flags, -1);
+	struct bpf_create_map_attr map_attr = {};
+
+	map_attr.map_type = map_type;
+	map_attr.map_flags = map_flags;
+	map_attr.key_size = key_size;
+	map_attr.value_size = value_size;
+	map_attr.max_entries = max_entries;
+
+	return bpf_create_map_xattr(&map_attr);
 }
 
 int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
 			int key_size, int value_size, int max_entries,
 			__u32 map_flags)
 {
-	return bpf_create_map_node(map_type, name, key_size, value_size,
-				   max_entries, map_flags, -1);
+	struct bpf_create_map_attr map_attr = {};
+
+	map_attr.name = name;
+	map_attr.map_type = map_type;
+	map_attr.map_flags = map_flags;
+	map_attr.key_size = key_size;
+	map_attr.value_size = value_size;
+	map_attr.max_entries = max_entries;
+
+	return bpf_create_map_xattr(&map_attr);
 }
 
 int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
@@ -168,6 +195,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 	attr.log_size = 0;
 	attr.log_level = 0;
 	attr.kern_version = load_attr->kern_version;
+	attr.prog_ifindex = load_attr->prog_ifindex;
 	memcpy(attr.prog_name, load_attr->name,
 	       min(name_len, BPF_OBJ_NAME_LEN - 1));
 
@@ -250,6 +278,18 @@ int bpf_map_lookup_elem(int fd, const void *key, void *value)
 	return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
 }
 
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
+{
+	union bpf_attr attr;
+
+	bzero(&attr, sizeof(attr));
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+	attr.value = ptr_to_u64(value);
+
+	return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+}
+
 int bpf_map_delete_elem(int fd, const void *key)
 {
 	union bpf_attr attr;
@@ -425,6 +465,16 @@ int bpf_map_get_fd_by_id(__u32 id)
 	return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
 }
 
+int bpf_btf_get_fd_by_id(__u32 id)
+{
+	union bpf_attr attr;
+
+	bzero(&attr, sizeof(attr));
+	attr.btf_id = id;
+
+	return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
 int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
 {
 	union bpf_attr attr;
@@ -453,123 +503,50 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd)
 	return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
 }
 
-int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
-{
-	struct sockaddr_nl sa;
-	int sock, seq = 0, len, ret = -1;
-	char buf[4096];
-	struct nlattr *nla, *nla_xdp;
-	struct {
-		struct nlmsghdr  nh;
-		struct ifinfomsg ifinfo;
-		char             attrbuf[64];
-	} req;
-	struct nlmsghdr *nh;
-	struct nlmsgerr *err;
-	socklen_t addrlen;
-	int one = 1;
-
-	memset(&sa, 0, sizeof(sa));
-	sa.nl_family = AF_NETLINK;
-
-	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
-	if (sock < 0) {
-		return -errno;
-	}
-
-	if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK,
-		       &one, sizeof(one)) < 0) {
-		fprintf(stderr, "Netlink error reporting not supported\n");
-	}
-
-	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
-		ret = -errno;
-		goto cleanup;
-	}
+int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
+		 bool do_log)
+{
+	union bpf_attr attr = {};
+	int fd;
 
-	addrlen = sizeof(sa);
-	if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
-		ret = -errno;
-		goto cleanup;
-	}
+	attr.btf = ptr_to_u64(btf);
+	attr.btf_size = btf_size;
 
-	if (addrlen != sizeof(sa)) {
-		ret = -LIBBPF_ERRNO__INTERNAL;
-		goto cleanup;
+retry:
+	if (do_log && log_buf && log_buf_size) {
+		attr.btf_log_level = 1;
+		attr.btf_log_size = log_buf_size;
+		attr.btf_log_buf = ptr_to_u64(log_buf);
 	}
 
-	memset(&req, 0, sizeof(req));
-	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
-	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
-	req.nh.nlmsg_type = RTM_SETLINK;
-	req.nh.nlmsg_pid = 0;
-	req.nh.nlmsg_seq = ++seq;
-	req.ifinfo.ifi_family = AF_UNSPEC;
-	req.ifinfo.ifi_index = ifindex;
-
-	/* started nested attribute for XDP */
-	nla = (struct nlattr *)(((char *)&req)
-				+ NLMSG_ALIGN(req.nh.nlmsg_len));
-	nla->nla_type = NLA_F_NESTED | IFLA_XDP;
-	nla->nla_len = NLA_HDRLEN;
-
-	/* add XDP fd */
-	nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
-	nla_xdp->nla_type = IFLA_XDP_FD;
-	nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
-	memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
-	nla->nla_len += nla_xdp->nla_len;
-
-	/* if user passed in any flags, add those too */
-	if (flags) {
-		nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
-		nla_xdp->nla_type = IFLA_XDP_FLAGS;
-		nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
-		memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
-		nla->nla_len += nla_xdp->nla_len;
+	fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
+	if (fd == -1 && !do_log && log_buf && log_buf_size) {
+		do_log = true;
+		goto retry;
 	}
 
-	req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
-
-	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
-		ret = -errno;
-		goto cleanup;
-	}
+	return fd;
+}
 
-	len = recv(sock, buf, sizeof(buf), 0);
-	if (len < 0) {
-		ret = -errno;
-		goto cleanup;
-	}
+int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
+		      __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
+		      __u64 *probe_addr)
+{
+	union bpf_attr attr = {};
+	int err;
 
-	for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
-	     nh = NLMSG_NEXT(nh, len)) {
-		if (nh->nlmsg_pid != sa.nl_pid) {
-			ret = -LIBBPF_ERRNO__WRNGPID;
-			goto cleanup;
-		}
-		if (nh->nlmsg_seq != seq) {
-			ret = -LIBBPF_ERRNO__INVSEQ;
-			goto cleanup;
-		}
-		switch (nh->nlmsg_type) {
-		case NLMSG_ERROR:
-			err = (struct nlmsgerr *)NLMSG_DATA(nh);
-			if (!err->error)
-				continue;
-			ret = err->error;
-			nla_dump_errormsg(nh);
-			goto cleanup;
-		case NLMSG_DONE:
-			break;
-		default:
-			break;
-		}
-	}
+	attr.task_fd_query.pid = pid;
+	attr.task_fd_query.fd = fd;
+	attr.task_fd_query.flags = flags;
+	attr.task_fd_query.buf = ptr_to_u64(buf);
+	attr.task_fd_query.buf_len = *buf_len;
 
-	ret = 0;
+	err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
+	*buf_len = attr.task_fd_query.buf_len;
+	*prog_id = attr.task_fd_query.prog_id;
+	*fd_type = attr.task_fd_query.fd_type;
+	*probe_offset = attr.task_fd_query.probe_offset;
+	*probe_addr = attr.task_fd_query.probe_addr;
 
-cleanup:
-	close(sock);
-	return ret;
+	return err;
 }
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 39f6a0d64a3b..26a51538213c 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: LGPL-2.1 */
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
 
 /*
  * common eBPF ELF operations.
@@ -20,26 +20,50 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with this program; if not,  see <http://www.gnu.org/licenses>
  */
-#ifndef __BPF_BPF_H
-#define __BPF_BPF_H
+#ifndef __LIBBPF_BPF_H
+#define __LIBBPF_BPF_H
 
 #include <linux/bpf.h>
+#include <stdbool.h>
 #include <stddef.h>
 
-int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
-			int key_size, int value_size, int max_entries,
-			__u32 map_flags, int node);
-int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
-			int key_size, int value_size, int max_entries,
-			__u32 map_flags);
-int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
-		   int max_entries, __u32 map_flags);
-int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
-			       int key_size, int inner_map_fd, int max_entries,
-			       __u32 map_flags, int node);
-int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
-			  int key_size, int inner_map_fd, int max_entries,
-			  __u32 map_flags);
+#ifndef LIBBPF_API
+#define LIBBPF_API __attribute__((visibility("default")))
+#endif
+
+struct bpf_create_map_attr {
+	const char *name;
+	enum bpf_map_type map_type;
+	__u32 map_flags;
+	__u32 key_size;
+	__u32 value_size;
+	__u32 max_entries;
+	__u32 numa_node;
+	__u32 btf_fd;
+	__u32 btf_key_type_id;
+	__u32 btf_value_type_id;
+	__u32 map_ifindex;
+	__u32 inner_map_fd;
+};
+
+LIBBPF_API int
+bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr);
+LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
+				   int key_size, int value_size,
+				   int max_entries, __u32 map_flags, int node);
+LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
+				   int key_size, int value_size,
+				   int max_entries, __u32 map_flags);
+LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size,
+			      int value_size, int max_entries, __u32 map_flags);
+LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type,
+					  const char *name, int key_size,
+					  int inner_map_fd, int max_entries,
+					  __u32 map_flags, int node);
+LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type,
+				     const char *name, int key_size,
+				     int inner_map_fd, int max_entries,
+				     __u32 map_flags);
 
 struct bpf_load_program_attr {
 	enum bpf_prog_type prog_type;
@@ -49,42 +73,59 @@ struct bpf_load_program_attr {
 	size_t insns_cnt;
 	const char *license;
 	__u32 kern_version;
+	__u32 prog_ifindex;
 };
 
+/* Flags to direct loading requirements */
+#define MAPS_RELAX_COMPAT	0x01
+
 /* Recommend log buffer size */
 #define BPF_LOG_BUF_SIZE (256 * 1024)
-int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
-			   char *log_buf, size_t log_buf_sz);
-int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
-		     size_t insns_cnt, const char *license,
-		     __u32 kern_version, char *log_buf,
-		     size_t log_buf_sz);
-int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
-		       size_t insns_cnt, int strict_alignment,
-		       const char *license, __u32 kern_version,
-		       char *log_buf, size_t log_buf_sz, int log_level);
+LIBBPF_API int
+bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
+		       char *log_buf, size_t log_buf_sz);
+LIBBPF_API int bpf_load_program(enum bpf_prog_type type,
+				const struct bpf_insn *insns, size_t insns_cnt,
+				const char *license, __u32 kern_version,
+				char *log_buf, size_t log_buf_sz);
+LIBBPF_API int bpf_verify_program(enum bpf_prog_type type,
+				  const struct bpf_insn *insns,
+				  size_t insns_cnt, int strict_alignment,
+				  const char *license, __u32 kern_version,
+				  char *log_buf, size_t log_buf_sz,
+				  int log_level);
 
-int bpf_map_update_elem(int fd, const void *key, const void *value,
-			__u64 flags);
+LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value,
+				   __u64 flags);
 
-int bpf_map_lookup_elem(int fd, const void *key, void *value);
-int bpf_map_delete_elem(int fd, const void *key);
-int bpf_map_get_next_key(int fd, const void *key, void *next_key);
-int bpf_obj_pin(int fd, const char *pathname);
-int bpf_obj_get(const char *pathname);
-int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type,
-		    unsigned int flags);
-int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
-int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type);
-int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
-		      void *data_out, __u32 *size_out, __u32 *retval,
-		      __u32 *duration);
-int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
-int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
-int bpf_prog_get_fd_by_id(__u32 id);
-int bpf_map_get_fd_by_id(__u32 id);
-int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
-int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
-		   __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt);
-int bpf_raw_tracepoint_open(const char *name, int prog_fd);
-#endif
+LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value);
+LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
+					      void *value);
+LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
+LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
+LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
+LIBBPF_API int bpf_obj_get(const char *pathname);
+LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
+			       enum bpf_attach_type type, unsigned int flags);
+LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
+LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
+				enum bpf_attach_type type);
+LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data,
+				 __u32 size, void *data_out, __u32 *size_out,
+				 __u32 *retval, __u32 *duration);
+LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
+LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
+LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_map_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
+LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
+			      __u32 query_flags, __u32 *attach_flags,
+			      __u32 *prog_ids, __u32 *prog_cnt);
+LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd);
+LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf,
+			    __u32 log_buf_size, bool do_log);
+LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
+				 __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
+				 __u64 *probe_offset, __u64 *probe_addr);
+#endif /* __LIBBPF_BPF_H */
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
new file mode 100644
index 000000000000..449591aa9900
--- /dev/null
+++ b/tools/lib/bpf/btf.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2018 Facebook */
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/err.h>
+#include <linux/btf.h>
+#include "btf.h"
+#include "bpf.h"
+
+#define elog(fmt, ...) { if (err_log) err_log(fmt, ##__VA_ARGS__); }
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+#define BTF_MAX_NR_TYPES 65535
+
+#define IS_MODIFIER(k) (((k) == BTF_KIND_TYPEDEF) || \
+		((k) == BTF_KIND_VOLATILE) || \
+		((k) == BTF_KIND_CONST) || \
+		((k) == BTF_KIND_RESTRICT))
+
+static struct btf_type btf_void;
+
+struct btf {
+	union {
+		struct btf_header *hdr;
+		void *data;
+	};
+	struct btf_type **types;
+	const char *strings;
+	void *nohdr_data;
+	__u32 nr_types;
+	__u32 types_size;
+	__u32 data_size;
+	int fd;
+};
+
+static int btf_add_type(struct btf *btf, struct btf_type *t)
+{
+	if (btf->types_size - btf->nr_types < 2) {
+		struct btf_type **new_types;
+		__u32 expand_by, new_size;
+
+		if (btf->types_size == BTF_MAX_NR_TYPES)
+			return -E2BIG;
+
+		expand_by = max(btf->types_size >> 2, 16);
+		new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by);
+
+		new_types = realloc(btf->types, sizeof(*new_types) * new_size);
+		if (!new_types)
+			return -ENOMEM;
+
+		if (btf->nr_types == 0)
+			new_types[0] = &btf_void;
+
+		btf->types = new_types;
+		btf->types_size = new_size;
+	}
+
+	btf->types[++(btf->nr_types)] = t;
+
+	return 0;
+}
+
+static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log)
+{
+	const struct btf_header *hdr = btf->hdr;
+	__u32 meta_left;
+
+	if (btf->data_size < sizeof(struct btf_header)) {
+		elog("BTF header not found\n");
+		return -EINVAL;
+	}
+
+	if (hdr->magic != BTF_MAGIC) {
+		elog("Invalid BTF magic:%x\n", hdr->magic);
+		return -EINVAL;
+	}
+
+	if (hdr->version != BTF_VERSION) {
+		elog("Unsupported BTF version:%u\n", hdr->version);
+		return -ENOTSUP;
+	}
+
+	if (hdr->flags) {
+		elog("Unsupported BTF flags:%x\n", hdr->flags);
+		return -ENOTSUP;
+	}
+
+	meta_left = btf->data_size - sizeof(*hdr);
+	if (!meta_left) {
+		elog("BTF has no data\n");
+		return -EINVAL;
+	}
+
+	if (meta_left < hdr->type_off) {
+		elog("Invalid BTF type section offset:%u\n", hdr->type_off);
+		return -EINVAL;
+	}
+
+	if (meta_left < hdr->str_off) {
+		elog("Invalid BTF string section offset:%u\n", hdr->str_off);
+		return -EINVAL;
+	}
+
+	if (hdr->type_off >= hdr->str_off) {
+		elog("BTF type section offset >= string section offset. No type?\n");
+		return -EINVAL;
+	}
+
+	if (hdr->type_off & 0x02) {
+		elog("BTF type section is not aligned to 4 bytes\n");
+		return -EINVAL;
+	}
+
+	btf->nohdr_data = btf->hdr + 1;
+
+	return 0;
+}
+
+static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log)
+{
+	const struct btf_header *hdr = btf->hdr;
+	const char *start = btf->nohdr_data + hdr->str_off;
+	const char *end = start + btf->hdr->str_len;
+
+	if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
+	    start[0] || end[-1]) {
+		elog("Invalid BTF string section\n");
+		return -EINVAL;
+	}
+
+	btf->strings = start;
+
+	return 0;
+}
+
+static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log)
+{
+	struct btf_header *hdr = btf->hdr;
+	void *nohdr_data = btf->nohdr_data;
+	void *next_type = nohdr_data + hdr->type_off;
+	void *end_type = nohdr_data + hdr->str_off;
+
+	while (next_type < end_type) {
+		struct btf_type *t = next_type;
+		__u16 vlen = BTF_INFO_VLEN(t->info);
+		int err;
+
+		next_type += sizeof(*t);
+		switch (BTF_INFO_KIND(t->info)) {
+		case BTF_KIND_INT:
+			next_type += sizeof(int);
+			break;
+		case BTF_KIND_ARRAY:
+			next_type += sizeof(struct btf_array);
+			break;
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION:
+			next_type += vlen * sizeof(struct btf_member);
+			break;
+		case BTF_KIND_ENUM:
+			next_type += vlen * sizeof(struct btf_enum);
+			break;
+		case BTF_KIND_TYPEDEF:
+		case BTF_KIND_PTR:
+		case BTF_KIND_FWD:
+		case BTF_KIND_VOLATILE:
+		case BTF_KIND_CONST:
+		case BTF_KIND_RESTRICT:
+			break;
+		default:
+			elog("Unsupported BTF_KIND:%u\n",
+			     BTF_INFO_KIND(t->info));
+			return -EINVAL;
+		}
+
+		err = btf_add_type(btf, t);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
+{
+	if (type_id > btf->nr_types)
+		return NULL;
+
+	return btf->types[type_id];
+}
+
+static bool btf_type_is_void(const struct btf_type *t)
+{
+	return t == &btf_void || BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
+}
+
+static bool btf_type_is_void_or_null(const struct btf_type *t)
+{
+	return !t || btf_type_is_void(t);
+}
+
+static __s64 btf_type_size(const struct btf_type *t)
+{
+	switch (BTF_INFO_KIND(t->info)) {
+	case BTF_KIND_INT:
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+	case BTF_KIND_ENUM:
+		return t->size;
+	case BTF_KIND_PTR:
+		return sizeof(void *);
+	default:
+		return -EINVAL;
+	}
+}
+
+#define MAX_RESOLVE_DEPTH 32
+
+__s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
+{
+	const struct btf_array *array;
+	const struct btf_type *t;
+	__u32 nelems = 1;
+	__s64 size = -1;
+	int i;
+
+	t = btf__type_by_id(btf, type_id);
+	for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
+	     i++) {
+		size = btf_type_size(t);
+		if (size >= 0)
+			break;
+
+		switch (BTF_INFO_KIND(t->info)) {
+		case BTF_KIND_TYPEDEF:
+		case BTF_KIND_VOLATILE:
+		case BTF_KIND_CONST:
+		case BTF_KIND_RESTRICT:
+			type_id = t->type;
+			break;
+		case BTF_KIND_ARRAY:
+			array = (const struct btf_array *)(t + 1);
+			if (nelems && array->nelems > UINT32_MAX / nelems)
+				return -E2BIG;
+			nelems *= array->nelems;
+			type_id = array->type;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		t = btf__type_by_id(btf, type_id);
+	}
+
+	if (size < 0)
+		return -EINVAL;
+
+	if (nelems && size > UINT32_MAX / nelems)
+		return -E2BIG;
+
+	return nelems * size;
+}
+
+int btf__resolve_type(const struct btf *btf, __u32 type_id)
+{
+	const struct btf_type *t;
+	int depth = 0;
+
+	t = btf__type_by_id(btf, type_id);
+	while (depth < MAX_RESOLVE_DEPTH &&
+	       !btf_type_is_void_or_null(t) &&
+	       IS_MODIFIER(BTF_INFO_KIND(t->info))) {
+		type_id = t->type;
+		t = btf__type_by_id(btf, type_id);
+		depth++;
+	}
+
+	if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t))
+		return -EINVAL;
+
+	return type_id;
+}
+
+__s32 btf__find_by_name(const struct btf *btf, const char *type_name)
+{
+	__u32 i;
+
+	if (!strcmp(type_name, "void"))
+		return 0;
+
+	for (i = 1; i <= btf->nr_types; i++) {
+		const struct btf_type *t = btf->types[i];
+		const char *name = btf__name_by_offset(btf, t->name_off);
+
+		if (name && !strcmp(type_name, name))
+			return i;
+	}
+
+	return -ENOENT;
+}
+
+void btf__free(struct btf *btf)
+{
+	if (!btf)
+		return;
+
+	if (btf->fd != -1)
+		close(btf->fd);
+
+	free(btf->data);
+	free(btf->types);
+	free(btf);
+}
+
+struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log)
+{
+	__u32 log_buf_size = 0;
+	char *log_buf = NULL;
+	struct btf *btf;
+	int err;
+
+	btf = calloc(1, sizeof(struct btf));
+	if (!btf)
+		return ERR_PTR(-ENOMEM);
+
+	btf->fd = -1;
+
+	if (err_log) {
+		log_buf = malloc(BPF_LOG_BUF_SIZE);
+		if (!log_buf) {
+			err = -ENOMEM;
+			goto done;
+		}
+		*log_buf = 0;
+		log_buf_size = BPF_LOG_BUF_SIZE;
+	}
+
+	btf->data = malloc(size);
+	if (!btf->data) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	memcpy(btf->data, data, size);
+	btf->data_size = size;
+
+	btf->fd = bpf_load_btf(btf->data, btf->data_size,
+			       log_buf, log_buf_size, false);
+
+	if (btf->fd == -1) {
+		err = -errno;
+		elog("Error loading BTF: %s(%d)\n", strerror(errno), errno);
+		if (log_buf && *log_buf)
+			elog("%s\n", log_buf);
+		goto done;
+	}
+
+	err = btf_parse_hdr(btf, err_log);
+	if (err)
+		goto done;
+
+	err = btf_parse_str_sec(btf, err_log);
+	if (err)
+		goto done;
+
+	err = btf_parse_type_sec(btf, err_log);
+
+done:
+	free(log_buf);
+
+	if (err) {
+		btf__free(btf);
+		return ERR_PTR(err);
+	}
+
+	return btf;
+}
+
+int btf__fd(const struct btf *btf)
+{
+	return btf->fd;
+}
+
+const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
+{
+	if (offset < btf->hdr->str_len)
+		return &btf->strings[offset];
+	else
+		return NULL;
+}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
new file mode 100644
index 000000000000..b77e7080f7e7
--- /dev/null
+++ b/tools/lib/bpf/btf.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2018 Facebook */
+
+#ifndef __LIBBPF_BTF_H
+#define __LIBBPF_BTF_H
+
+#include <linux/types.h>
+
+#ifndef LIBBPF_API
+#define LIBBPF_API __attribute__((visibility("default")))
+#endif
+
+#define BTF_ELF_SEC ".BTF"
+
+struct btf;
+struct btf_type;
+
+typedef int (*btf_print_fn_t)(const char *, ...)
+	__attribute__((format(printf, 1, 2)));
+
+LIBBPF_API void btf__free(struct btf *btf);
+LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log);
+LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
+				   const char *type_name);
+LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
+						  __u32 id);
+LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
+LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
+LIBBPF_API int btf__fd(const struct btf *btf);
+LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
+
+#endif /* __LIBBPF_BTF_H */
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 0f9f06df49bc..d6e62e90e8d4 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: LGPL-2.1
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 
 /*
  * Common eBPF ELF object loading operations.
@@ -7,21 +7,9 @@
  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
  * Copyright (C) 2015 Huawei Inc.
  * Copyright (C) 2017 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
  */
 
+#define _GNU_SOURCE
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdarg.h>
@@ -35,16 +23,22 @@
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/bpf.h>
+#include <linux/btf.h>
 #include <linux/list.h>
 #include <linux/limits.h>
+#include <linux/perf_event.h>
+#include <linux/ring_buffer.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/vfs.h>
+#include <tools/libc_compat.h>
 #include <libelf.h>
 #include <gelf.h>
 
 #include "libbpf.h"
 #include "bpf.h"
+#include "btf.h"
+#include "str_error.h"
 
 #ifndef EM_BPF
 #define EM_BPF 247
@@ -93,54 +87,6 @@ void libbpf_set_print(libbpf_print_fn_t warn,
 
 #define STRERR_BUFSIZE  128
 
-#define ERRNO_OFFSET(e)		((e) - __LIBBPF_ERRNO__START)
-#define ERRCODE_OFFSET(c)	ERRNO_OFFSET(LIBBPF_ERRNO__##c)
-#define NR_ERRNO	(__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START)
-
-static const char *libbpf_strerror_table[NR_ERRNO] = {
-	[ERRCODE_OFFSET(LIBELF)]	= "Something wrong in libelf",
-	[ERRCODE_OFFSET(FORMAT)]	= "BPF object format invalid",
-	[ERRCODE_OFFSET(KVERSION)]	= "'version' section incorrect or lost",
-	[ERRCODE_OFFSET(ENDIAN)]	= "Endian mismatch",
-	[ERRCODE_OFFSET(INTERNAL)]	= "Internal error in libbpf",
-	[ERRCODE_OFFSET(RELOC)]		= "Relocation failed",
-	[ERRCODE_OFFSET(VERIFY)]	= "Kernel verifier blocks program loading",
-	[ERRCODE_OFFSET(PROG2BIG)]	= "Program too big",
-	[ERRCODE_OFFSET(KVER)]		= "Incorrect kernel version",
-	[ERRCODE_OFFSET(PROGTYPE)]	= "Kernel doesn't support this program type",
-	[ERRCODE_OFFSET(WRNGPID)]	= "Wrong pid in netlink message",
-	[ERRCODE_OFFSET(INVSEQ)]	= "Invalid netlink sequence",
-};
-
-int libbpf_strerror(int err, char *buf, size_t size)
-{
-	if (!buf || !size)
-		return -1;
-
-	err = err > 0 ? err : -err;
-
-	if (err < __LIBBPF_ERRNO__START) {
-		int ret;
-
-		ret = strerror_r(err, buf, size);
-		buf[size - 1] = '\0';
-		return ret;
-	}
-
-	if (err < __LIBBPF_ERRNO__END) {
-		const char *msg;
-
-		msg = libbpf_strerror_table[ERRNO_OFFSET(err)];
-		snprintf(buf, size, "%s", msg);
-		buf[size - 1] = '\0';
-		return 0;
-	}
-
-	snprintf(buf, size, "Unknown libbpf error %d", err);
-	buf[size - 1] = '\0';
-	return -1;
-}
-
 #define CHECK_ERR(action, err, out) do {	\
 	err = action;			\
 	if (err)			\
@@ -176,6 +122,7 @@ struct bpf_program {
 	/* Index in elf obj file, for relocation use. */
 	int idx;
 	char *name;
+	int prog_ifindex;
 	char *section_name;
 	struct bpf_insn *insns;
 	size_t insns_cnt, main_prog_cnt;
@@ -211,7 +158,10 @@ struct bpf_map {
 	int fd;
 	char *name;
 	size_t offset;
+	int map_ifindex;
 	struct bpf_map_def def;
+	__u32 btf_key_type_id;
+	__u32 btf_value_type_id;
 	void *priv;
 	bpf_map_clear_priv_t clear_priv;
 };
@@ -220,7 +170,7 @@ static LIST_HEAD(bpf_objects_list);
 
 struct bpf_object {
 	char license[64];
-	u32 kern_version;
+	__u32 kern_version;
 
 	struct bpf_program *programs;
 	size_t nr_programs;
@@ -228,6 +178,7 @@ struct bpf_object {
 	size_t nr_maps;
 
 	bool loaded;
+	bool has_pseudo_calls;
 
 	/*
 	 * Information when doing elf related work. Only valid if fd
@@ -256,6 +207,8 @@ struct bpf_object {
 	 */
 	struct list_head list;
 
+	struct btf *btf;
+
 	void *priv;
 	bpf_object_clear_priv_t clear_priv;
 
@@ -263,7 +216,7 @@ struct bpf_object {
 };
 #define obj_elf_valid(o)	((o)->efile.elf)
 
-static void bpf_program__unload(struct bpf_program *prog)
+void bpf_program__unload(struct bpf_program *prog)
 {
 	int i;
 
@@ -360,7 +313,7 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
 	progs = obj->programs;
 	nr_progs = obj->nr_programs;
 
-	progs = realloc(progs, sizeof(progs[0]) * (nr_progs + 1));
+	progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0]));
 	if (!progs) {
 		/*
 		 * In this case the original obj->programs
@@ -392,10 +345,6 @@ bpf_object__init_prog_names(struct bpf_object *obj)
 		const char *name = NULL;
 
 		prog = &obj->programs[pi];
-		if (prog->idx == obj->efile.text_shndx) {
-			name = ".text";
-			goto skip_search;
-		}
 
 		for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name;
 		     si++) {
@@ -418,12 +367,15 @@ bpf_object__init_prog_names(struct bpf_object *obj)
 			}
 		}
 
+		if (!name && prog->idx == obj->efile.text_shndx)
+			name = ".text";
+
 		if (!name) {
 			pr_warning("failed to find sym for prog %s\n",
 				   prog->section_name);
 			return -EINVAL;
 		}
-skip_search:
+
 		prog->name = strdup(name);
 		if (!prog->name) {
 			pr_warning("failed to allocate memory for prog sym %s\n",
@@ -505,8 +457,11 @@ static int bpf_object__elf_init(struct bpf_object *obj)
 	} else {
 		obj->efile.fd = open(obj->path, O_RDONLY);
 		if (obj->efile.fd < 0) {
-			pr_warning("failed to open %s: %s\n", obj->path,
-					strerror(errno));
+			char errmsg[STRERR_BUFSIZE];
+			char *cp = libbpf_strerror_r(errno, errmsg,
+						     sizeof(errmsg));
+
+			pr_warning("failed to open %s: %s\n", obj->path, cp);
 			return -errno;
 		}
 
@@ -586,7 +541,7 @@ static int
 bpf_object__init_kversion(struct bpf_object *obj,
 			  void *data, size_t size)
 {
-	u32 kver;
+	__u32 kver;
 
 	if (size != sizeof(kver)) {
 		pr_warning("invalid kver section in %s\n", obj->path);
@@ -608,8 +563,9 @@ static int compare_bpf_map(const void *_a, const void *_b)
 }
 
 static int
-bpf_object__init_maps(struct bpf_object *obj)
+bpf_object__init_maps(struct bpf_object *obj, int flags)
 {
+	bool strict = !(flags & MAPS_RELAX_COMPAT);
 	int i, map_idx, map_def_sz, nr_maps = 0;
 	Elf_Scn *scn;
 	Elf_Data *data;
@@ -731,7 +687,8 @@ bpf_object__init_maps(struct bpf_object *obj)
 						   "has unrecognized, non-zero "
 						   "options\n",
 						   obj->path, map_name);
-					return -EINVAL;
+					if (strict)
+						return -EINVAL;
 				}
 			}
 			memcpy(&obj->maps[map_idx].def, def,
@@ -762,7 +719,7 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx)
 	return false;
 }
 
-static int bpf_object__elf_collect(struct bpf_object *obj)
+static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 {
 	Elf *elf = obj->efile.elf;
 	GElf_Ehdr *ep = &obj->efile.ehdr;
@@ -819,7 +776,15 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 							data->d_size);
 		else if (strcmp(name, "maps") == 0)
 			obj->efile.maps_shndx = idx;
-		else if (sh.sh_type == SHT_SYMTAB) {
+		else if (strcmp(name, BTF_ELF_SEC) == 0) {
+			obj->btf = btf__new(data->d_buf, data->d_size,
+					    __pr_debug);
+			if (IS_ERR(obj->btf)) {
+				pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
+					   BTF_ELF_SEC, PTR_ERR(obj->btf));
+				obj->btf = NULL;
+			}
+		} else if (sh.sh_type == SHT_SYMTAB) {
 			if (obj->efile.symbols) {
 				pr_warning("bpf: multiple SYMTAB in %s\n",
 					   obj->path);
@@ -837,10 +802,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 						      data->d_size, name, idx);
 			if (err) {
 				char errmsg[STRERR_BUFSIZE];
+				char *cp = libbpf_strerror_r(-err, errmsg,
+							     sizeof(errmsg));
 
-				strerror_r(-err, errmsg, sizeof(errmsg));
 				pr_warning("failed to alloc program %s (%s): %s",
-					   name, obj->path, errmsg);
+					   name, obj->path, cp);
 			}
 		} else if (sh.sh_type == SHT_REL) {
 			void *reloc = obj->efile.reloc;
@@ -854,8 +820,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 				continue;
 			}
 
-			reloc = realloc(reloc,
-					sizeof(*obj->efile.reloc) * nr_reloc);
+			reloc = reallocarray(reloc, nr_reloc,
+					     sizeof(*obj->efile.reloc));
 			if (!reloc) {
 				pr_warning("realloc failed\n");
 				err = -ENOMEM;
@@ -880,7 +846,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 		return LIBBPF_ERRNO__FORMAT;
 	}
 	if (obj->efile.maps_shndx >= 0) {
-		err = bpf_object__init_maps(obj);
+		err = bpf_object__init_maps(obj, flags);
 		if (err)
 			goto out;
 	}
@@ -903,6 +869,18 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
 	return NULL;
 }
 
+struct bpf_program *
+bpf_object__find_program_by_title(struct bpf_object *obj, const char *title)
+{
+	struct bpf_program *pos;
+
+	bpf_object__for_each_program(pos, obj) {
+		if (pos->section_name && !strcmp(pos->section_name, title))
+			return pos;
+	}
+	return NULL;
+}
+
 static int
 bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 			   Elf_Data *data, struct bpf_object *obj)
@@ -965,6 +943,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 			prog->reloc_desc[i].type = RELO_CALL;
 			prog->reloc_desc[i].insn_idx = insn_idx;
 			prog->reloc_desc[i].text_off = sym.st_value;
+			obj->has_pseudo_calls = true;
 			continue;
 		}
 
@@ -996,33 +975,186 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 	return 0;
 }
 
+static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
+{
+	const struct btf_type *container_type;
+	const struct btf_member *key, *value;
+	struct bpf_map_def *def = &map->def;
+	const size_t max_name = 256;
+	char container_name[max_name];
+	__s64 key_size, value_size;
+	__s32 container_id;
+
+	if (snprintf(container_name, max_name, "____btf_map_%s", map->name) ==
+	    max_name) {
+		pr_warning("map:%s length of '____btf_map_%s' is too long\n",
+			   map->name, map->name);
+		return -EINVAL;
+	}
+
+	container_id = btf__find_by_name(btf, container_name);
+	if (container_id < 0) {
+		pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
+			 map->name, container_name);
+		return container_id;
+	}
+
+	container_type = btf__type_by_id(btf, container_id);
+	if (!container_type) {
+		pr_warning("map:%s cannot find BTF type for container_id:%u\n",
+			   map->name, container_id);
+		return -EINVAL;
+	}
+
+	if (BTF_INFO_KIND(container_type->info) != BTF_KIND_STRUCT ||
+	    BTF_INFO_VLEN(container_type->info) < 2) {
+		pr_warning("map:%s container_name:%s is an invalid container struct\n",
+			   map->name, container_name);
+		return -EINVAL;
+	}
+
+	key = (struct btf_member *)(container_type + 1);
+	value = key + 1;
+
+	key_size = btf__resolve_size(btf, key->type);
+	if (key_size < 0) {
+		pr_warning("map:%s invalid BTF key_type_size\n",
+			   map->name);
+		return key_size;
+	}
+
+	if (def->key_size != key_size) {
+		pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
+			   map->name, (__u32)key_size, def->key_size);
+		return -EINVAL;
+	}
+
+	value_size = btf__resolve_size(btf, value->type);
+	if (value_size < 0) {
+		pr_warning("map:%s invalid BTF value_type_size\n", map->name);
+		return value_size;
+	}
+
+	if (def->value_size != value_size) {
+		pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
+			   map->name, (__u32)value_size, def->value_size);
+		return -EINVAL;
+	}
+
+	map->btf_key_type_id = key->type;
+	map->btf_value_type_id = value->type;
+
+	return 0;
+}
+
+int bpf_map__reuse_fd(struct bpf_map *map, int fd)
+{
+	struct bpf_map_info info = {};
+	__u32 len = sizeof(info);
+	int new_fd, err;
+	char *new_name;
+
+	err = bpf_obj_get_info_by_fd(fd, &info, &len);
+	if (err)
+		return err;
+
+	new_name = strdup(info.name);
+	if (!new_name)
+		return -errno;
+
+	new_fd = open("/", O_RDONLY | O_CLOEXEC);
+	if (new_fd < 0)
+		goto err_free_new_name;
+
+	new_fd = dup3(fd, new_fd, O_CLOEXEC);
+	if (new_fd < 0)
+		goto err_close_new_fd;
+
+	err = zclose(map->fd);
+	if (err)
+		goto err_close_new_fd;
+	free(map->name);
+
+	map->fd = new_fd;
+	map->name = new_name;
+	map->def.type = info.type;
+	map->def.key_size = info.key_size;
+	map->def.value_size = info.value_size;
+	map->def.max_entries = info.max_entries;
+	map->def.map_flags = info.map_flags;
+	map->btf_key_type_id = info.btf_key_type_id;
+	map->btf_value_type_id = info.btf_value_type_id;
+
+	return 0;
+
+err_close_new_fd:
+	close(new_fd);
+err_free_new_name:
+	free(new_name);
+	return -errno;
+}
+
 static int
 bpf_object__create_maps(struct bpf_object *obj)
 {
+	struct bpf_create_map_attr create_attr = {};
 	unsigned int i;
+	int err;
 
 	for (i = 0; i < obj->nr_maps; i++) {
-		struct bpf_map_def *def = &obj->maps[i].def;
-		int *pfd = &obj->maps[i].fd;
-
-		*pfd = bpf_create_map_name(def->type,
-					   obj->maps[i].name,
-					   def->key_size,
-					   def->value_size,
-					   def->max_entries,
-					   def->map_flags);
+		struct bpf_map *map = &obj->maps[i];
+		struct bpf_map_def *def = &map->def;
+		char *cp, errmsg[STRERR_BUFSIZE];
+		int *pfd = &map->fd;
+
+		if (map->fd >= 0) {
+			pr_debug("skip map create (preset) %s: fd=%d\n",
+				 map->name, map->fd);
+			continue;
+		}
+
+		create_attr.name = map->name;
+		create_attr.map_ifindex = map->map_ifindex;
+		create_attr.map_type = def->type;
+		create_attr.map_flags = def->map_flags;
+		create_attr.key_size = def->key_size;
+		create_attr.value_size = def->value_size;
+		create_attr.max_entries = def->max_entries;
+		create_attr.btf_fd = 0;
+		create_attr.btf_key_type_id = 0;
+		create_attr.btf_value_type_id = 0;
+
+		if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) {
+			create_attr.btf_fd = btf__fd(obj->btf);
+			create_attr.btf_key_type_id = map->btf_key_type_id;
+			create_attr.btf_value_type_id = map->btf_value_type_id;
+		}
+
+		*pfd = bpf_create_map_xattr(&create_attr);
+		if (*pfd < 0 && create_attr.btf_key_type_id) {
+			cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+			pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
+				   map->name, cp, errno);
+			create_attr.btf_fd = 0;
+			create_attr.btf_key_type_id = 0;
+			create_attr.btf_value_type_id = 0;
+			map->btf_key_type_id = 0;
+			map->btf_value_type_id = 0;
+			*pfd = bpf_create_map_xattr(&create_attr);
+		}
+
 		if (*pfd < 0) {
 			size_t j;
-			int err = *pfd;
 
+			err = *pfd;
+			cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 			pr_warning("failed to create map (name: '%s'): %s\n",
-				   obj->maps[i].name,
-				   strerror(errno));
+				   map->name, cp);
 			for (j = 0; j < i; j++)
 				zclose(obj->maps[j].fd);
 			return err;
 		}
-		pr_debug("create map %s: fd=%d\n", obj->maps[i].name, *pfd);
+		pr_debug("create map %s: fd=%d\n", map->name, *pfd);
 	}
 
 	return 0;
@@ -1052,7 +1184,7 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
 			return -LIBBPF_ERRNO__RELOC;
 		}
 		new_cnt = prog->insns_cnt + text->insns_cnt;
-		new_insn = realloc(prog->insns, new_cnt * sizeof(*insn));
+		new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn));
 		if (!new_insn) {
 			pr_warning("oom in prog realloc\n");
 			return -ENOMEM;
@@ -1166,9 +1298,10 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
 static int
 load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
 	     const char *name, struct bpf_insn *insns, int insns_cnt,
-	     char *license, u32 kern_version, int *pfd)
+	     char *license, __u32 kern_version, int *pfd, int prog_ifindex)
 {
 	struct bpf_load_program_attr load_attr;
+	char *cp, errmsg[STRERR_BUFSIZE];
 	char *log_buf;
 	int ret;
 
@@ -1180,6 +1313,7 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
 	load_attr.insns_cnt = insns_cnt;
 	load_attr.license = license;
 	load_attr.kern_version = kern_version;
+	load_attr.prog_ifindex = prog_ifindex;
 
 	if (!load_attr.insns || !load_attr.insns_cnt)
 		return -EINVAL;
@@ -1197,7 +1331,8 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
 	}
 
 	ret = -LIBBPF_ERRNO__LOAD;
-	pr_warning("load bpf program failed: %s\n", strerror(errno));
+	cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+	pr_warning("load bpf program failed: %s\n", cp);
 
 	if (log_buf && log_buf[0] != '\0') {
 		ret = -LIBBPF_ERRNO__VERIFY;
@@ -1232,9 +1367,9 @@ out:
 	return ret;
 }
 
-static int
+int
 bpf_program__load(struct bpf_program *prog,
-		  char *license, u32 kern_version)
+		  char *license, __u32 kern_version)
 {
 	int err = 0, fd, i;
 
@@ -1261,7 +1396,8 @@ bpf_program__load(struct bpf_program *prog,
 		}
 		err = load_program(prog->type, prog->expected_attach_type,
 				   prog->name, prog->insns, prog->insns_cnt,
-				   license, kern_version, &fd);
+				   license, kern_version, &fd,
+				   prog->prog_ifindex);
 		if (!err)
 			prog->instances.fds[0] = fd;
 		goto out;
@@ -1292,7 +1428,8 @@ bpf_program__load(struct bpf_program *prog,
 		err = load_program(prog->type, prog->expected_attach_type,
 				   prog->name, result.new_insn_ptr,
 				   result.new_insn_cnt,
-				   license, kern_version, &fd);
+				   license, kern_version, &fd,
+				   prog->prog_ifindex);
 
 		if (err) {
 			pr_warning("Loading the %dth instance of program '%s' failed\n",
@@ -1313,6 +1450,12 @@ out:
 	return err;
 }
 
+static bool bpf_program__is_function_storage(struct bpf_program *prog,
+					     struct bpf_object *obj)
+{
+	return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls;
+}
+
 static int
 bpf_object__load_progs(struct bpf_object *obj)
 {
@@ -1320,7 +1463,7 @@ bpf_object__load_progs(struct bpf_object *obj)
 	int err;
 
 	for (i = 0; i < obj->nr_programs; i++) {
-		if (obj->programs[i].idx == obj->efile.text_shndx)
+		if (bpf_program__is_function_storage(&obj->programs[i], obj))
 			continue;
 		err = bpf_program__load(&obj->programs[i],
 					obj->license,
@@ -1331,9 +1474,41 @@ bpf_object__load_progs(struct bpf_object *obj)
 	return 0;
 }
 
-static int bpf_object__validate(struct bpf_object *obj)
+static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
+{
+	switch (type) {
+	case BPF_PROG_TYPE_SOCKET_FILTER:
+	case BPF_PROG_TYPE_SCHED_CLS:
+	case BPF_PROG_TYPE_SCHED_ACT:
+	case BPF_PROG_TYPE_XDP:
+	case BPF_PROG_TYPE_CGROUP_SKB:
+	case BPF_PROG_TYPE_CGROUP_SOCK:
+	case BPF_PROG_TYPE_LWT_IN:
+	case BPF_PROG_TYPE_LWT_OUT:
+	case BPF_PROG_TYPE_LWT_XMIT:
+	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
+	case BPF_PROG_TYPE_SOCK_OPS:
+	case BPF_PROG_TYPE_SK_SKB:
+	case BPF_PROG_TYPE_CGROUP_DEVICE:
+	case BPF_PROG_TYPE_SK_MSG:
+	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+	case BPF_PROG_TYPE_LIRC_MODE2:
+	case BPF_PROG_TYPE_SK_REUSEPORT:
+	case BPF_PROG_TYPE_FLOW_DISSECTOR:
+		return false;
+	case BPF_PROG_TYPE_UNSPEC:
+	case BPF_PROG_TYPE_KPROBE:
+	case BPF_PROG_TYPE_TRACEPOINT:
+	case BPF_PROG_TYPE_PERF_EVENT:
+	case BPF_PROG_TYPE_RAW_TRACEPOINT:
+	default:
+		return true;
+	}
+}
+
+static int bpf_object__validate(struct bpf_object *obj, bool needs_kver)
 {
-	if (obj->kern_version == 0) {
+	if (needs_kver && obj->kern_version == 0) {
 		pr_warning("%s doesn't provide kernel version\n",
 			   obj->path);
 		return -LIBBPF_ERRNO__KVERSION;
@@ -1342,7 +1517,8 @@ static int bpf_object__validate(struct bpf_object *obj)
 }
 
 static struct bpf_object *
-__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz)
+__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz,
+		   bool needs_kver, int flags)
 {
 	struct bpf_object *obj;
 	int err;
@@ -1358,9 +1534,9 @@ __bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz)
 
 	CHECK_ERR(bpf_object__elf_init(obj), err, out);
 	CHECK_ERR(bpf_object__check_endianness(obj), err, out);
-	CHECK_ERR(bpf_object__elf_collect(obj), err, out);
+	CHECK_ERR(bpf_object__elf_collect(obj, flags), err, out);
 	CHECK_ERR(bpf_object__collect_reloc(obj), err, out);
-	CHECK_ERR(bpf_object__validate(obj), err, out);
+	CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out);
 
 	bpf_object__elf_finish(obj);
 	return obj;
@@ -1369,15 +1545,33 @@ out:
 	return ERR_PTR(err);
 }
 
-struct bpf_object *bpf_object__open(const char *path)
+struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr,
+					    int flags)
 {
 	/* param validation */
-	if (!path)
+	if (!attr->file)
 		return NULL;
 
-	pr_debug("loading %s\n", path);
+	pr_debug("loading %s\n", attr->file);
+
+	return __bpf_object__open(attr->file, NULL, 0,
+				  bpf_prog_type__needs_kver(attr->prog_type),
+				  flags);
+}
+
+struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
+{
+	return __bpf_object__open_xattr(attr, 0);
+}
+
+struct bpf_object *bpf_object__open(const char *path)
+{
+	struct bpf_object_open_attr attr = {
+		.file		= path,
+		.prog_type	= BPF_PROG_TYPE_UNSPEC,
+	};
 
-	return __bpf_object__open(path, NULL, 0);
+	return bpf_object__open_xattr(&attr);
 }
 
 struct bpf_object *bpf_object__open_buffer(void *obj_buf,
@@ -1400,7 +1594,7 @@ struct bpf_object *bpf_object__open_buffer(void *obj_buf,
 	pr_debug("loading object '%s' from buffer\n",
 		 name);
 
-	return __bpf_object__open(name, obj_buf, obj_buf_sz);
+	return __bpf_object__open(name, obj_buf, obj_buf_sz, true, true);
 }
 
 int bpf_object__unload(struct bpf_object *obj)
@@ -1446,6 +1640,7 @@ out:
 
 static int check_path(const char *path)
 {
+	char *cp, errmsg[STRERR_BUFSIZE];
 	struct statfs st_fs;
 	char *dname, *dir;
 	int err = 0;
@@ -1459,7 +1654,8 @@ static int check_path(const char *path)
 
 	dir = dirname(dname);
 	if (statfs(dir, &st_fs)) {
-		pr_warning("failed to statfs %s: %s\n", dir, strerror(errno));
+		cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+		pr_warning("failed to statfs %s: %s\n", dir, cp);
 		err = -errno;
 	}
 	free(dname);
@@ -1475,6 +1671,7 @@ static int check_path(const char *path)
 int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
 			      int instance)
 {
+	char *cp, errmsg[STRERR_BUFSIZE];
 	int err;
 
 	err = check_path(path);
@@ -1493,7 +1690,8 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
 	}
 
 	if (bpf_obj_pin(prog->instances.fds[instance], path)) {
-		pr_warning("failed to pin program: %s\n", strerror(errno));
+		cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+		pr_warning("failed to pin program: %s\n", cp);
 		return -errno;
 	}
 	pr_debug("pinned program '%s'\n", path);
@@ -1503,13 +1701,16 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
 
 static int make_dir(const char *path)
 {
+	char *cp, errmsg[STRERR_BUFSIZE];
 	int err = 0;
 
 	if (mkdir(path, 0700) && errno != EEXIST)
 		err = -errno;
 
-	if (err)
-		pr_warning("failed to mkdir %s: %s\n", path, strerror(-err));
+	if (err) {
+		cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
+		pr_warning("failed to mkdir %s: %s\n", path, cp);
+	}
 	return err;
 }
 
@@ -1556,6 +1757,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path)
 
 int bpf_map__pin(struct bpf_map *map, const char *path)
 {
+	char *cp, errmsg[STRERR_BUFSIZE];
 	int err;
 
 	err = check_path(path);
@@ -1568,7 +1770,8 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
 	}
 
 	if (bpf_obj_pin(map->fd, path)) {
-		pr_warning("failed to pin map: %s\n", strerror(errno));
+		cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+		pr_warning("failed to pin map: %s\n", cp);
 		return -errno;
 	}
 
@@ -1641,6 +1844,7 @@ void bpf_object__close(struct bpf_object *obj)
 
 	bpf_object__elf_finish(obj);
 	bpf_object__unload(obj);
+	btf__free(obj->btf);
 
 	for (i = 0; i < obj->nr_maps; i++) {
 		zfree(&obj->maps[i].name);
@@ -1692,6 +1896,11 @@ unsigned int bpf_object__kversion(struct bpf_object *obj)
 	return obj ? obj->kern_version : 0;
 }
 
+int bpf_object__btf_fd(const struct bpf_object *obj)
+{
+	return obj->btf ? btf__fd(obj->btf) : -1;
+}
+
 int bpf_object__set_priv(struct bpf_object *obj, void *priv,
 			 bpf_object_clear_priv_t clear_priv)
 {
@@ -1708,8 +1917,8 @@ void *bpf_object__priv(struct bpf_object *obj)
 	return obj ? obj->priv : ERR_PTR(-EINVAL);
 }
 
-struct bpf_program *
-bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
+static struct bpf_program *
+__bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
 {
 	size_t idx;
 
@@ -1730,6 +1939,18 @@ bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
 	return &obj->programs[idx];
 }
 
+struct bpf_program *
+bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
+{
+	struct bpf_program *prog = prev;
+
+	do {
+		prog = __bpf_program__next(prog, obj);
+	} while (prog && bpf_program__is_function_storage(prog, obj));
+
+	return prog;
+}
+
 int bpf_program__set_priv(struct bpf_program *prog, void *priv,
 			  bpf_program_clear_priv_t clear_priv)
 {
@@ -1746,6 +1967,11 @@ void *bpf_program__priv(struct bpf_program *prog)
 	return prog ? prog->priv : ERR_PTR(-EINVAL);
 }
 
+void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
+{
+	prog->prog_ifindex = ifindex;
+}
+
 const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)
 {
 	const char *title;
@@ -1799,6 +2025,9 @@ int bpf_program__nth_fd(struct bpf_program *prog, int n)
 {
 	int fd;
 
+	if (!prog)
+		return -EINVAL;
+
 	if (n >= prog->instances.nr || n < 0) {
 		pr_warning("Can't get the %dth fd from program %s: only %d instances\n",
 			   n, prog->section_name, prog->instances.nr);
@@ -1845,73 +2074,146 @@ BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
 BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
 BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
 BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
+BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
 
-static void bpf_program__set_expected_attach_type(struct bpf_program *prog,
-						 enum bpf_attach_type type)
+void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+					   enum bpf_attach_type type)
 {
 	prog->expected_attach_type = type;
 }
 
-#define BPF_PROG_SEC_FULL(string, ptype, atype) \
-	{ string, sizeof(string) - 1, ptype, atype }
+#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, atype) \
+	{ string, sizeof(string) - 1, ptype, eatype, is_attachable, atype }
 
-#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0)
+/* Programs that can NOT be attached. */
+#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0)
 
-#define BPF_SA_PROG_SEC(string, ptype) \
-	BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype)
+/* Programs that can be attached. */
+#define BPF_APROG_SEC(string, ptype, atype) \
+	BPF_PROG_SEC_IMPL(string, ptype, 0, 1, atype)
+
+/* Programs that must specify expected attach type at load time. */
+#define BPF_EAPROG_SEC(string, ptype, eatype) \
+	BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, eatype)
+
+/* Programs that can be attached but attach type can't be identified by section
+ * name. Kept for backward compatibility.
+ */
+#define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype)
 
 static const struct {
 	const char *sec;
 	size_t len;
 	enum bpf_prog_type prog_type;
 	enum bpf_attach_type expected_attach_type;
+	int is_attachable;
+	enum bpf_attach_type attach_type;
 } section_names[] = {
-	BPF_PROG_SEC("socket",		BPF_PROG_TYPE_SOCKET_FILTER),
-	BPF_PROG_SEC("kprobe/",		BPF_PROG_TYPE_KPROBE),
-	BPF_PROG_SEC("kretprobe/",	BPF_PROG_TYPE_KPROBE),
-	BPF_PROG_SEC("classifier",	BPF_PROG_TYPE_SCHED_CLS),
-	BPF_PROG_SEC("action",		BPF_PROG_TYPE_SCHED_ACT),
-	BPF_PROG_SEC("tracepoint/",	BPF_PROG_TYPE_TRACEPOINT),
-	BPF_PROG_SEC("xdp",		BPF_PROG_TYPE_XDP),
-	BPF_PROG_SEC("perf_event",	BPF_PROG_TYPE_PERF_EVENT),
-	BPF_PROG_SEC("cgroup/skb",	BPF_PROG_TYPE_CGROUP_SKB),
-	BPF_PROG_SEC("cgroup/sock",	BPF_PROG_TYPE_CGROUP_SOCK),
-	BPF_PROG_SEC("cgroup/dev",	BPF_PROG_TYPE_CGROUP_DEVICE),
-	BPF_PROG_SEC("lwt_in",		BPF_PROG_TYPE_LWT_IN),
-	BPF_PROG_SEC("lwt_out",		BPF_PROG_TYPE_LWT_OUT),
-	BPF_PROG_SEC("lwt_xmit",	BPF_PROG_TYPE_LWT_XMIT),
-	BPF_PROG_SEC("sockops",		BPF_PROG_TYPE_SOCK_OPS),
-	BPF_PROG_SEC("sk_skb",		BPF_PROG_TYPE_SK_SKB),
-	BPF_PROG_SEC("sk_msg",		BPF_PROG_TYPE_SK_MSG),
-	BPF_SA_PROG_SEC("cgroup/bind4",	BPF_CGROUP_INET4_BIND),
-	BPF_SA_PROG_SEC("cgroup/bind6",	BPF_CGROUP_INET6_BIND),
-	BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
-	BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT),
+	BPF_PROG_SEC("socket",			BPF_PROG_TYPE_SOCKET_FILTER),
+	BPF_PROG_SEC("kprobe/",			BPF_PROG_TYPE_KPROBE),
+	BPF_PROG_SEC("kretprobe/",		BPF_PROG_TYPE_KPROBE),
+	BPF_PROG_SEC("classifier",		BPF_PROG_TYPE_SCHED_CLS),
+	BPF_PROG_SEC("action",			BPF_PROG_TYPE_SCHED_ACT),
+	BPF_PROG_SEC("tracepoint/",		BPF_PROG_TYPE_TRACEPOINT),
+	BPF_PROG_SEC("raw_tracepoint/",		BPF_PROG_TYPE_RAW_TRACEPOINT),
+	BPF_PROG_SEC("xdp",			BPF_PROG_TYPE_XDP),
+	BPF_PROG_SEC("perf_event",		BPF_PROG_TYPE_PERF_EVENT),
+	BPF_PROG_SEC("lwt_in",			BPF_PROG_TYPE_LWT_IN),
+	BPF_PROG_SEC("lwt_out",			BPF_PROG_TYPE_LWT_OUT),
+	BPF_PROG_SEC("lwt_xmit",		BPF_PROG_TYPE_LWT_XMIT),
+	BPF_PROG_SEC("lwt_seg6local",		BPF_PROG_TYPE_LWT_SEG6LOCAL),
+	BPF_APROG_SEC("cgroup_skb/ingress",	BPF_PROG_TYPE_CGROUP_SKB,
+						BPF_CGROUP_INET_INGRESS),
+	BPF_APROG_SEC("cgroup_skb/egress",	BPF_PROG_TYPE_CGROUP_SKB,
+						BPF_CGROUP_INET_EGRESS),
+	BPF_APROG_COMPAT("cgroup/skb",		BPF_PROG_TYPE_CGROUP_SKB),
+	BPF_APROG_SEC("cgroup/sock",		BPF_PROG_TYPE_CGROUP_SOCK,
+						BPF_CGROUP_INET_SOCK_CREATE),
+	BPF_EAPROG_SEC("cgroup/post_bind4",	BPF_PROG_TYPE_CGROUP_SOCK,
+						BPF_CGROUP_INET4_POST_BIND),
+	BPF_EAPROG_SEC("cgroup/post_bind6",	BPF_PROG_TYPE_CGROUP_SOCK,
+						BPF_CGROUP_INET6_POST_BIND),
+	BPF_APROG_SEC("cgroup/dev",		BPF_PROG_TYPE_CGROUP_DEVICE,
+						BPF_CGROUP_DEVICE),
+	BPF_APROG_SEC("sockops",		BPF_PROG_TYPE_SOCK_OPS,
+						BPF_CGROUP_SOCK_OPS),
+	BPF_APROG_SEC("sk_skb/stream_parser",	BPF_PROG_TYPE_SK_SKB,
+						BPF_SK_SKB_STREAM_PARSER),
+	BPF_APROG_SEC("sk_skb/stream_verdict",	BPF_PROG_TYPE_SK_SKB,
+						BPF_SK_SKB_STREAM_VERDICT),
+	BPF_APROG_COMPAT("sk_skb",		BPF_PROG_TYPE_SK_SKB),
+	BPF_APROG_SEC("sk_msg",			BPF_PROG_TYPE_SK_MSG,
+						BPF_SK_MSG_VERDICT),
+	BPF_APROG_SEC("lirc_mode2",		BPF_PROG_TYPE_LIRC_MODE2,
+						BPF_LIRC_MODE2),
+	BPF_APROG_SEC("flow_dissector",		BPF_PROG_TYPE_FLOW_DISSECTOR,
+						BPF_FLOW_DISSECTOR),
+	BPF_EAPROG_SEC("cgroup/bind4",		BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_INET4_BIND),
+	BPF_EAPROG_SEC("cgroup/bind6",		BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_INET6_BIND),
+	BPF_EAPROG_SEC("cgroup/connect4",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_INET4_CONNECT),
+	BPF_EAPROG_SEC("cgroup/connect6",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_INET6_CONNECT),
+	BPF_EAPROG_SEC("cgroup/sendmsg4",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_UDP4_SENDMSG),
+	BPF_EAPROG_SEC("cgroup/sendmsg6",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_UDP6_SENDMSG),
 };
 
+#undef BPF_PROG_SEC_IMPL
 #undef BPF_PROG_SEC
-#undef BPF_PROG_SEC_FULL
-#undef BPF_SA_PROG_SEC
+#undef BPF_APROG_SEC
+#undef BPF_EAPROG_SEC
+#undef BPF_APROG_COMPAT
 
-static int bpf_program__identify_section(struct bpf_program *prog)
+int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
+			     enum bpf_attach_type *expected_attach_type)
 {
 	int i;
 
-	if (!prog->section_name)
-		goto err;
+	if (!name)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(section_names); i++) {
+		if (strncmp(name, section_names[i].sec, section_names[i].len))
+			continue;
+		*prog_type = section_names[i].prog_type;
+		*expected_attach_type = section_names[i].expected_attach_type;
+		return 0;
+	}
+	return -EINVAL;
+}
 
-	for (i = 0; i < ARRAY_SIZE(section_names); i++)
-		if (strncmp(prog->section_name, section_names[i].sec,
-			    section_names[i].len) == 0)
-			return i;
+int libbpf_attach_type_by_name(const char *name,
+			       enum bpf_attach_type *attach_type)
+{
+	int i;
 
-err:
-	pr_warning("failed to guess program type based on section name %s\n",
-		   prog->section_name);
+	if (!name)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(section_names); i++) {
+		if (strncmp(name, section_names[i].sec, section_names[i].len))
+			continue;
+		if (!section_names[i].is_attachable)
+			return -EINVAL;
+		*attach_type = section_names[i].attach_type;
+		return 0;
+	}
+	return -EINVAL;
+}
 
-	return -1;
+static int
+bpf_program__identify_section(struct bpf_program *prog,
+			      enum bpf_prog_type *prog_type,
+			      enum bpf_attach_type *expected_attach_type)
+{
+	return libbpf_prog_type_by_name(prog->section_name, prog_type,
+					expected_attach_type);
 }
 
 int bpf_map__fd(struct bpf_map *map)
@@ -1929,6 +2231,16 @@ const char *bpf_map__name(struct bpf_map *map)
 	return map ? map->name : NULL;
 }
 
+__u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
+{
+	return map ? map->btf_key_type_id : 0;
+}
+
+__u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
+{
+	return map ? map->btf_value_type_id : 0;
+}
+
 int bpf_map__set_priv(struct bpf_map *map, void *priv,
 		     bpf_map_clear_priv_t clear_priv)
 {
@@ -1950,6 +2262,16 @@ void *bpf_map__priv(struct bpf_map *map)
 	return map ? map->priv : ERR_PTR(-EINVAL);
 }
 
+bool bpf_map__is_offload_neutral(struct bpf_map *map)
+{
+	return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
+}
+
+void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
+{
+	map->map_ifindex = ifindex;
+}
+
 struct bpf_map *
 bpf_map__next(struct bpf_map *prev, struct bpf_object *obj)
 {
@@ -2024,17 +2346,23 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
 int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 			struct bpf_object **pobj, int *prog_fd)
 {
+	struct bpf_object_open_attr open_attr = {
+		.file		= attr->file,
+		.prog_type	= attr->prog_type,
+	};
 	struct bpf_program *prog, *first_prog = NULL;
 	enum bpf_attach_type expected_attach_type;
 	enum bpf_prog_type prog_type;
 	struct bpf_object *obj;
-	int section_idx;
+	struct bpf_map *map;
 	int err;
 
 	if (!attr)
 		return -EINVAL;
+	if (!attr->file)
+		return -EINVAL;
 
-	obj = bpf_object__open(attr->file);
+	obj = bpf_object__open_xattr(&open_attr);
 	if (IS_ERR_OR_NULL(obj))
 		return -ENOENT;
 
@@ -2044,26 +2372,32 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 		 * section name.
 		 */
 		prog_type = attr->prog_type;
+		prog->prog_ifindex = attr->ifindex;
 		expected_attach_type = attr->expected_attach_type;
 		if (prog_type == BPF_PROG_TYPE_UNSPEC) {
-			section_idx = bpf_program__identify_section(prog);
-			if (section_idx < 0) {
+			err = bpf_program__identify_section(prog, &prog_type,
+							    &expected_attach_type);
+			if (err < 0) {
+				pr_warning("failed to guess program type based on section name %s\n",
+					   prog->section_name);
 				bpf_object__close(obj);
 				return -EINVAL;
 			}
-			prog_type = section_names[section_idx].prog_type;
-			expected_attach_type =
-				section_names[section_idx].expected_attach_type;
 		}
 
 		bpf_program__set_type(prog, prog_type);
 		bpf_program__set_expected_attach_type(prog,
 						      expected_attach_type);
 
-		if (prog->idx != obj->efile.text_shndx && !first_prog)
+		if (!first_prog)
 			first_prog = prog;
 	}
 
+	bpf_map__for_each(map, obj) {
+		if (!bpf_map__is_offload_neutral(map))
+			map->map_ifindex = attr->ifindex;
+	}
+
 	if (!first_prog) {
 		pr_warning("object file doesn't contain bpf program\n");
 		bpf_object__close(obj);
@@ -2080,3 +2414,51 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 	*prog_fd = bpf_program__fd(first_prog);
 	return 0;
 }
+
+enum bpf_perf_event_ret
+bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
+			   void **copy_mem, size_t *copy_size,
+			   bpf_perf_event_print_t fn, void *private_data)
+{
+	struct perf_event_mmap_page *header = mmap_mem;
+	__u64 data_head = ring_buffer_read_head(header);
+	__u64 data_tail = header->data_tail;
+	void *base = ((__u8 *)header) + page_size;
+	int ret = LIBBPF_PERF_EVENT_CONT;
+	struct perf_event_header *ehdr;
+	size_t ehdr_size;
+
+	while (data_head != data_tail) {
+		ehdr = base + (data_tail & (mmap_size - 1));
+		ehdr_size = ehdr->size;
+
+		if (((void *)ehdr) + ehdr_size > base + mmap_size) {
+			void *copy_start = ehdr;
+			size_t len_first = base + mmap_size - copy_start;
+			size_t len_secnd = ehdr_size - len_first;
+
+			if (*copy_size < ehdr_size) {
+				free(*copy_mem);
+				*copy_mem = malloc(ehdr_size);
+				if (!*copy_mem) {
+					*copy_size = 0;
+					ret = LIBBPF_PERF_EVENT_ERROR;
+					break;
+				}
+				*copy_size = ehdr_size;
+			}
+
+			memcpy(*copy_mem, copy_start, len_first);
+			memcpy(*copy_mem + len_first, base, len_secnd);
+			ehdr = *copy_mem;
+		}
+
+		ret = fn(ehdr, private_data);
+		data_tail += ehdr_size;
+		if (ret != LIBBPF_PERF_EVENT_CONT)
+			break;
+	}
+
+	ring_buffer_write_tail(header, data_tail);
+	return ret;
+}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index a3a62a583f27..1f3468dad8b2 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: LGPL-2.1 */
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
 
 /*
  * Common eBPF ELF object loading operations.
@@ -6,22 +6,9 @@
  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
  * Copyright (C) 2015 Huawei Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
  */
-#ifndef __BPF_LIBBPF_H
-#define __BPF_LIBBPF_H
+#ifndef __LIBBPF_LIBBPF_H
+#define __LIBBPF_LIBBPF_H
 
 #include <stdio.h>
 #include <stdint.h>
@@ -29,6 +16,10 @@
 #include <sys/types.h>  // for size_t
 #include <linux/bpf.h>
 
+#ifndef LIBBPF_API
+#define LIBBPF_API __attribute__((visibility("default")))
+#endif
+
 enum libbpf_errno {
 	__LIBBPF_ERRNO__START = 4000,
 
@@ -46,40 +37,54 @@ enum libbpf_errno {
 	LIBBPF_ERRNO__PROGTYPE,	/* Kernel doesn't support this program type */
 	LIBBPF_ERRNO__WRNGPID,	/* Wrong pid in netlink message */
 	LIBBPF_ERRNO__INVSEQ,	/* Invalid netlink sequence */
+	LIBBPF_ERRNO__NLPARSE,	/* netlink parsing error */
 	__LIBBPF_ERRNO__END,
 };
 
-int libbpf_strerror(int err, char *buf, size_t size);
+LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size);
 
 /*
- * In include/linux/compiler-gcc.h, __printf is defined. However
- * it should be better if libbpf.h doesn't depend on Linux header file.
+ * __printf is defined in include/linux/compiler-gcc.h. However,
+ * it would be better if libbpf.h didn't depend on Linux header files.
  * So instead of __printf, here we use gcc attribute directly.
  */
 typedef int (*libbpf_print_fn_t)(const char *, ...)
 	__attribute__((format(printf, 1, 2)));
 
-void libbpf_set_print(libbpf_print_fn_t warn,
-		      libbpf_print_fn_t info,
-		      libbpf_print_fn_t debug);
+LIBBPF_API void libbpf_set_print(libbpf_print_fn_t warn,
+				 libbpf_print_fn_t info,
+				 libbpf_print_fn_t debug);
 
 /* Hide internal to user */
 struct bpf_object;
 
-struct bpf_object *bpf_object__open(const char *path);
-struct bpf_object *bpf_object__open_buffer(void *obj_buf,
-					   size_t obj_buf_sz,
-					   const char *name);
-int bpf_object__pin(struct bpf_object *object, const char *path);
-void bpf_object__close(struct bpf_object *object);
+struct bpf_object_open_attr {
+	const char *file;
+	enum bpf_prog_type prog_type;
+};
+
+LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
+LIBBPF_API struct bpf_object *
+bpf_object__open_xattr(struct bpf_object_open_attr *attr);
+struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr,
+					    int flags);
+LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf,
+						      size_t obj_buf_sz,
+						      const char *name);
+LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path);
+LIBBPF_API void bpf_object__close(struct bpf_object *object);
 
 /* Load/unload object into/from kernel */
-int bpf_object__load(struct bpf_object *obj);
-int bpf_object__unload(struct bpf_object *obj);
-const char *bpf_object__name(struct bpf_object *obj);
-unsigned int bpf_object__kversion(struct bpf_object *obj);
+LIBBPF_API int bpf_object__load(struct bpf_object *obj);
+LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
+LIBBPF_API const char *bpf_object__name(struct bpf_object *obj);
+LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj);
+LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj);
+
+LIBBPF_API struct bpf_program *
+bpf_object__find_program_by_title(struct bpf_object *obj, const char *title);
 
-struct bpf_object *bpf_object__next(struct bpf_object *prev);
+LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev);
 #define bpf_object__for_each_safe(pos, tmp)			\
 	for ((pos) = bpf_object__next(NULL),		\
 		(tmp) = bpf_object__next(pos);		\
@@ -87,14 +92,20 @@ struct bpf_object *bpf_object__next(struct bpf_object *prev);
 	     (pos) = (tmp), (tmp) = bpf_object__next(tmp))
 
 typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *);
-int bpf_object__set_priv(struct bpf_object *obj, void *priv,
-			 bpf_object_clear_priv_t clear_priv);
-void *bpf_object__priv(struct bpf_object *prog);
+LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv,
+				    bpf_object_clear_priv_t clear_priv);
+LIBBPF_API void *bpf_object__priv(struct bpf_object *prog);
 
-/* Accessors of bpf_program. */
+LIBBPF_API int
+libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
+			 enum bpf_attach_type *expected_attach_type);
+LIBBPF_API int libbpf_attach_type_by_name(const char *name,
+					  enum bpf_attach_type *attach_type);
+
+/* Accessors of bpf_program */
 struct bpf_program;
-struct bpf_program *bpf_program__next(struct bpf_program *prog,
-				      struct bpf_object *obj);
+LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog,
+						 struct bpf_object *obj);
 
 #define bpf_object__for_each_program(pos, obj)		\
 	for ((pos) = bpf_program__next(NULL, (obj));	\
@@ -104,44 +115,51 @@ struct bpf_program *bpf_program__next(struct bpf_program *prog,
 typedef void (*bpf_program_clear_priv_t)(struct bpf_program *,
 					 void *);
 
-int bpf_program__set_priv(struct bpf_program *prog, void *priv,
-			  bpf_program_clear_priv_t clear_priv);
+LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv,
+				     bpf_program_clear_priv_t clear_priv);
 
-void *bpf_program__priv(struct bpf_program *prog);
+LIBBPF_API void *bpf_program__priv(struct bpf_program *prog);
+LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
+					 __u32 ifindex);
 
-const char *bpf_program__title(struct bpf_program *prog, bool needs_copy);
+LIBBPF_API const char *bpf_program__title(struct bpf_program *prog,
+					  bool needs_copy);
 
-int bpf_program__fd(struct bpf_program *prog);
-int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
-			      int instance);
-int bpf_program__pin(struct bpf_program *prog, const char *path);
+LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license,
+				 __u32 kern_version);
+LIBBPF_API int bpf_program__fd(struct bpf_program *prog);
+LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog,
+					 const char *path,
+					 int instance);
+LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path);
+LIBBPF_API void bpf_program__unload(struct bpf_program *prog);
 
 struct bpf_insn;
 
 /*
  * Libbpf allows callers to adjust BPF programs before being loaded
- * into kernel. One program in an object file can be transform into
- * multiple variants to be attached to different code.
+ * into kernel. One program in an object file can be transformed into
+ * multiple variants to be attached to different hooks.
  *
  * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd
- * are APIs for this propose.
+ * form an API for this purpose.
  *
  * - bpf_program_prep_t:
- *   It defines 'preprocessor', which is a caller defined function
+ *   Defines a 'preprocessor', which is a caller defined function
  *   passed to libbpf through bpf_program__set_prep(), and will be
  *   called before program is loaded. The processor should adjust
- *   the program one time for each instances according to the number
+ *   the program one time for each instance according to the instance id
  *   passed to it.
  *
  * - bpf_program__set_prep:
- *   Attachs a preprocessor to a BPF program. The number of instances
- *   whould be created is also passed through this function.
+ *   Attaches a preprocessor to a BPF program. The number of instances
+ *   that should be created is also passed through this function.
  *
  * - bpf_program__nth_fd:
- *   After the program is loaded, get resuling fds from bpf program for
- *   each instances.
+ *   After the program is loaded, get resulting FD of a given instance
+ *   of the BPF program.
  *
- * If bpf_program__set_prep() is not used, the program whould be loaded
+ * If bpf_program__set_prep() is not used, the program would be loaded
  * without adjustment during bpf_object__load(). The program has only
  * one instance. In this case bpf_program__fd(prog) is equal to
  * bpf_program__nth_fd(prog, 0).
@@ -155,7 +173,7 @@ struct bpf_prog_prep_result {
 	struct bpf_insn *new_insn_ptr;
 	int new_insn_cnt;
 
-	/* If not NULL, result fd is set to it */
+	/* If not NULL, result FD is written to it. */
 	int *pfd;
 };
 
@@ -168,43 +186,49 @@ struct bpf_prog_prep_result {
  *  - res:	Output parameter, result of transformation.
  *
  * Return value:
- *  - Zero: pre-processing success.
- *  - Non-zero: pre-processing, stop loading.
+ *  - Zero:	pre-processing success.
+ *  - Non-zero:	pre-processing error, stop loading.
  */
 typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
 				  struct bpf_insn *insns, int insns_cnt,
 				  struct bpf_prog_prep_result *res);
 
-int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
-			  bpf_program_prep_t prep);
+LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
+				     bpf_program_prep_t prep);
 
-int bpf_program__nth_fd(struct bpf_program *prog, int n);
+LIBBPF_API int bpf_program__nth_fd(struct bpf_program *prog, int n);
 
 /*
- * Adjust type of bpf program. Default is kprobe.
+ * Adjust type of BPF program. Default is kprobe.
  */
-int bpf_program__set_socket_filter(struct bpf_program *prog);
-int bpf_program__set_tracepoint(struct bpf_program *prog);
-int bpf_program__set_kprobe(struct bpf_program *prog);
-int bpf_program__set_sched_cls(struct bpf_program *prog);
-int bpf_program__set_sched_act(struct bpf_program *prog);
-int bpf_program__set_xdp(struct bpf_program *prog);
-int bpf_program__set_perf_event(struct bpf_program *prog);
-void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type);
-
-bool bpf_program__is_socket_filter(struct bpf_program *prog);
-bool bpf_program__is_tracepoint(struct bpf_program *prog);
-bool bpf_program__is_kprobe(struct bpf_program *prog);
-bool bpf_program__is_sched_cls(struct bpf_program *prog);
-bool bpf_program__is_sched_act(struct bpf_program *prog);
-bool bpf_program__is_xdp(struct bpf_program *prog);
-bool bpf_program__is_perf_event(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
+LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
+				      enum bpf_prog_type type);
+LIBBPF_API void
+bpf_program__set_expected_attach_type(struct bpf_program *prog,
+				      enum bpf_attach_type type);
+
+LIBBPF_API bool bpf_program__is_socket_filter(struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_tracepoint(struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_raw_tracepoint(struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_kprobe(struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_sched_cls(struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_sched_act(struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_xdp(struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_perf_event(struct bpf_program *prog);
 
 /*
- * We don't need __attribute__((packed)) now since it is
- * unnecessary for 'bpf_map_def' because they are all aligned.
- * In addition, using it will trigger -Wpacked warning message,
- * and will be treated as an error due to -Werror.
+ * No need for __attribute__((packed)), all members of 'bpf_map_def'
+ * are all aligned.  In addition, using __attribute__((packed))
+ * would trigger a -Wpacked warning message, and lead to an error
+ * if -Werror is set.
  */
 struct bpf_map_def {
 	unsigned int type;
@@ -215,49 +239,82 @@ struct bpf_map_def {
 };
 
 /*
- * There is another 'struct bpf_map' in include/linux/map.h. However,
- * it is not a uapi header so no need to consider name clash.
+ * The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel,
+ * so no need to worry about a name clash.
  */
 struct bpf_map;
-struct bpf_map *
+LIBBPF_API struct bpf_map *
 bpf_object__find_map_by_name(struct bpf_object *obj, const char *name);
 
 /*
  * Get bpf_map through the offset of corresponding struct bpf_map_def
- * in the bpf object file.
+ * in the BPF object file.
  */
-struct bpf_map *
+LIBBPF_API struct bpf_map *
 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
 
-struct bpf_map *
+LIBBPF_API struct bpf_map *
 bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
 #define bpf_map__for_each(pos, obj)		\
 	for ((pos) = bpf_map__next(NULL, (obj));	\
 	     (pos) != NULL;				\
 	     (pos) = bpf_map__next((pos), (obj)))
 
-int bpf_map__fd(struct bpf_map *map);
-const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
-const char *bpf_map__name(struct bpf_map *map);
+LIBBPF_API int bpf_map__fd(struct bpf_map *map);
+LIBBPF_API const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
+LIBBPF_API const char *bpf_map__name(struct bpf_map *map);
+LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map);
+LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map);
 
 typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
-int bpf_map__set_priv(struct bpf_map *map, void *priv,
-		      bpf_map_clear_priv_t clear_priv);
-void *bpf_map__priv(struct bpf_map *map);
-int bpf_map__pin(struct bpf_map *map, const char *path);
+LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
+				 bpf_map_clear_priv_t clear_priv);
+LIBBPF_API void *bpf_map__priv(struct bpf_map *map);
+LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
+LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map);
+LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
+LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
 
-long libbpf_get_error(const void *ptr);
+LIBBPF_API long libbpf_get_error(const void *ptr);
 
 struct bpf_prog_load_attr {
 	const char *file;
 	enum bpf_prog_type prog_type;
 	enum bpf_attach_type expected_attach_type;
+	int ifindex;
 };
 
-int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
-			struct bpf_object **pobj, int *prog_fd);
-int bpf_prog_load(const char *file, enum bpf_prog_type type,
-		  struct bpf_object **pobj, int *prog_fd);
+LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
+				   struct bpf_object **pobj, int *prog_fd);
+LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type,
+			     struct bpf_object **pobj, int *prog_fd);
 
-int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
-#endif
+LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+
+enum bpf_perf_event_ret {
+	LIBBPF_PERF_EVENT_DONE	= 0,
+	LIBBPF_PERF_EVENT_ERROR	= -1,
+	LIBBPF_PERF_EVENT_CONT	= -2,
+};
+
+struct perf_event_header;
+typedef enum bpf_perf_event_ret
+	(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
+				  void *private_data);
+LIBBPF_API enum bpf_perf_event_ret
+bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
+			   void **copy_mem, size_t *copy_size,
+			   bpf_perf_event_print_t fn, void *private_data);
+
+struct nlattr;
+typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
+int libbpf_netlink_open(unsigned int *nl_pid);
+int libbpf_nl_get_link(int sock, unsigned int nl_pid,
+		       libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
+int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
+			libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie);
+int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
+			libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie);
+int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
+			 libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie);
+#endif /* __LIBBPF_LIBBPF_H */
diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c
new file mode 100644
index 000000000000..d83b17f8435c
--- /dev/null
+++ b/tools/lib/bpf/libbpf_errno.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ * Copyright (C) 2017 Nicira, Inc.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "libbpf.h"
+
+#define ERRNO_OFFSET(e)		((e) - __LIBBPF_ERRNO__START)
+#define ERRCODE_OFFSET(c)	ERRNO_OFFSET(LIBBPF_ERRNO__##c)
+#define NR_ERRNO	(__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START)
+
+static const char *libbpf_strerror_table[NR_ERRNO] = {
+	[ERRCODE_OFFSET(LIBELF)]	= "Something wrong in libelf",
+	[ERRCODE_OFFSET(FORMAT)]	= "BPF object format invalid",
+	[ERRCODE_OFFSET(KVERSION)]	= "'version' section incorrect or lost",
+	[ERRCODE_OFFSET(ENDIAN)]	= "Endian mismatch",
+	[ERRCODE_OFFSET(INTERNAL)]	= "Internal error in libbpf",
+	[ERRCODE_OFFSET(RELOC)]		= "Relocation failed",
+	[ERRCODE_OFFSET(VERIFY)]	= "Kernel verifier blocks program loading",
+	[ERRCODE_OFFSET(PROG2BIG)]	= "Program too big",
+	[ERRCODE_OFFSET(KVER)]		= "Incorrect kernel version",
+	[ERRCODE_OFFSET(PROGTYPE)]	= "Kernel doesn't support this program type",
+	[ERRCODE_OFFSET(WRNGPID)]	= "Wrong pid in netlink message",
+	[ERRCODE_OFFSET(INVSEQ)]	= "Invalid netlink sequence",
+	[ERRCODE_OFFSET(NLPARSE)]	= "Incorrect netlink message parsing",
+};
+
+int libbpf_strerror(int err, char *buf, size_t size)
+{
+	if (!buf || !size)
+		return -1;
+
+	err = err > 0 ? err : -err;
+
+	if (err < __LIBBPF_ERRNO__START) {
+		int ret;
+
+		ret = strerror_r(err, buf, size);
+		buf[size - 1] = '\0';
+		return ret;
+	}
+
+	if (err < __LIBBPF_ERRNO__END) {
+		const char *msg;
+
+		msg = libbpf_strerror_table[ERRNO_OFFSET(err)];
+		snprintf(buf, size, "%s", msg);
+		buf[size - 1] = '\0';
+		return 0;
+	}
+
+	snprintf(buf, size, "Unknown libbpf error %d", err);
+	buf[size - 1] = '\0';
+	return -1;
+}
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
new file mode 100644
index 000000000000..0ce67aea8f3b
--- /dev/null
+++ b/tools/lib/bpf/netlink.c
@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2018 Facebook */
+
+#include <stdlib.h>
+#include <memory.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <linux/rtnetlink.h>
+#include <sys/socket.h>
+#include <errno.h>
+#include <time.h>
+
+#include "bpf.h"
+#include "libbpf.h"
+#include "nlattr.h"
+
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
+typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t,
+			      void *cookie);
+
+int libbpf_netlink_open(__u32 *nl_pid)
+{
+	struct sockaddr_nl sa;
+	socklen_t addrlen;
+	int one = 1, ret;
+	int sock;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.nl_family = AF_NETLINK;
+
+	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (sock < 0)
+		return -errno;
+
+	if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK,
+		       &one, sizeof(one)) < 0) {
+		fprintf(stderr, "Netlink error reporting not supported\n");
+	}
+
+	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+		ret = -errno;
+		goto cleanup;
+	}
+
+	addrlen = sizeof(sa);
+	if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
+		ret = -errno;
+		goto cleanup;
+	}
+
+	if (addrlen != sizeof(sa)) {
+		ret = -LIBBPF_ERRNO__INTERNAL;
+		goto cleanup;
+	}
+
+	*nl_pid = sa.nl_pid;
+	return sock;
+
+cleanup:
+	close(sock);
+	return ret;
+}
+
+static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq,
+			    __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
+			    void *cookie)
+{
+	bool multipart = true;
+	struct nlmsgerr *err;
+	struct nlmsghdr *nh;
+	char buf[4096];
+	int len, ret;
+
+	while (multipart) {
+		multipart = false;
+		len = recv(sock, buf, sizeof(buf), 0);
+		if (len < 0) {
+			ret = -errno;
+			goto done;
+		}
+
+		if (len == 0)
+			break;
+
+		for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+		     nh = NLMSG_NEXT(nh, len)) {
+			if (nh->nlmsg_pid != nl_pid) {
+				ret = -LIBBPF_ERRNO__WRNGPID;
+				goto done;
+			}
+			if (nh->nlmsg_seq != seq) {
+				ret = -LIBBPF_ERRNO__INVSEQ;
+				goto done;
+			}
+			if (nh->nlmsg_flags & NLM_F_MULTI)
+				multipart = true;
+			switch (nh->nlmsg_type) {
+			case NLMSG_ERROR:
+				err = (struct nlmsgerr *)NLMSG_DATA(nh);
+				if (!err->error)
+					continue;
+				ret = err->error;
+				libbpf_nla_dump_errormsg(nh);
+				goto done;
+			case NLMSG_DONE:
+				return 0;
+			default:
+				break;
+			}
+			if (_fn) {
+				ret = _fn(nh, fn, cookie);
+				if (ret)
+					return ret;
+			}
+		}
+	}
+	ret = 0;
+done:
+	return ret;
+}
+
+int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+{
+	int sock, seq = 0, ret;
+	struct nlattr *nla, *nla_xdp;
+	struct {
+		struct nlmsghdr  nh;
+		struct ifinfomsg ifinfo;
+		char             attrbuf[64];
+	} req;
+	__u32 nl_pid;
+
+	sock = libbpf_netlink_open(&nl_pid);
+	if (sock < 0)
+		return sock;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_type = RTM_SETLINK;
+	req.nh.nlmsg_pid = 0;
+	req.nh.nlmsg_seq = ++seq;
+	req.ifinfo.ifi_family = AF_UNSPEC;
+	req.ifinfo.ifi_index = ifindex;
+
+	/* started nested attribute for XDP */
+	nla = (struct nlattr *)(((char *)&req)
+				+ NLMSG_ALIGN(req.nh.nlmsg_len));
+	nla->nla_type = NLA_F_NESTED | IFLA_XDP;
+	nla->nla_len = NLA_HDRLEN;
+
+	/* add XDP fd */
+	nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+	nla_xdp->nla_type = IFLA_XDP_FD;
+	nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+	memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+	nla->nla_len += nla_xdp->nla_len;
+
+	/* if user passed in any flags, add those too */
+	if (flags) {
+		nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+		nla_xdp->nla_type = IFLA_XDP_FLAGS;
+		nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
+		memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
+		nla->nla_len += nla_xdp->nla_len;
+	}
+
+	req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		ret = -errno;
+		goto cleanup;
+	}
+	ret = bpf_netlink_recv(sock, nl_pid, seq, NULL, NULL, NULL);
+
+cleanup:
+	close(sock);
+	return ret;
+}
+
+static int __dump_link_nlmsg(struct nlmsghdr *nlh,
+			     libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
+{
+	struct nlattr *tb[IFLA_MAX + 1], *attr;
+	struct ifinfomsg *ifi = NLMSG_DATA(nlh);
+	int len;
+
+	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
+	attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi)));
+	if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	return dump_link_nlmsg(cookie, ifi, tb);
+}
+
+int libbpf_nl_get_link(int sock, unsigned int nl_pid,
+		       libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
+{
+	struct {
+		struct nlmsghdr nlh;
+		struct ifinfomsg ifm;
+	} req = {
+		.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+		.nlh.nlmsg_type = RTM_GETLINK,
+		.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+		.ifm.ifi_family = AF_PACKET,
+	};
+	int seq = time(NULL);
+
+	req.nlh.nlmsg_seq = seq;
+	if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+		return -errno;
+
+	return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg,
+				dump_link_nlmsg, cookie);
+}
+
+static int __dump_class_nlmsg(struct nlmsghdr *nlh,
+			      libbpf_dump_nlmsg_t dump_class_nlmsg,
+			      void *cookie)
+{
+	struct nlattr *tb[TCA_MAX + 1], *attr;
+	struct tcmsg *t = NLMSG_DATA(nlh);
+	int len;
+
+	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+	attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+	if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	return dump_class_nlmsg(cookie, t, tb);
+}
+
+int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
+			libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie)
+{
+	struct {
+		struct nlmsghdr nlh;
+		struct tcmsg t;
+	} req = {
+		.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+		.nlh.nlmsg_type = RTM_GETTCLASS,
+		.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+		.t.tcm_family = AF_UNSPEC,
+		.t.tcm_ifindex = ifindex,
+	};
+	int seq = time(NULL);
+
+	req.nlh.nlmsg_seq = seq;
+	if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+		return -errno;
+
+	return bpf_netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg,
+				dump_class_nlmsg, cookie);
+}
+
+static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh,
+			      libbpf_dump_nlmsg_t dump_qdisc_nlmsg,
+			      void *cookie)
+{
+	struct nlattr *tb[TCA_MAX + 1], *attr;
+	struct tcmsg *t = NLMSG_DATA(nlh);
+	int len;
+
+	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+	attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+	if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	return dump_qdisc_nlmsg(cookie, t, tb);
+}
+
+int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
+			libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie)
+{
+	struct {
+		struct nlmsghdr nlh;
+		struct tcmsg t;
+	} req = {
+		.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+		.nlh.nlmsg_type = RTM_GETQDISC,
+		.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+		.t.tcm_family = AF_UNSPEC,
+		.t.tcm_ifindex = ifindex,
+	};
+	int seq = time(NULL);
+
+	req.nlh.nlmsg_seq = seq;
+	if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+		return -errno;
+
+	return bpf_netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg,
+				dump_qdisc_nlmsg, cookie);
+}
+
+static int __dump_filter_nlmsg(struct nlmsghdr *nlh,
+			       libbpf_dump_nlmsg_t dump_filter_nlmsg,
+			       void *cookie)
+{
+	struct nlattr *tb[TCA_MAX + 1], *attr;
+	struct tcmsg *t = NLMSG_DATA(nlh);
+	int len;
+
+	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+	attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+	if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	return dump_filter_nlmsg(cookie, t, tb);
+}
+
+int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
+			 libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie)
+{
+	struct {
+		struct nlmsghdr nlh;
+		struct tcmsg t;
+	} req = {
+		.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+		.nlh.nlmsg_type = RTM_GETTFILTER,
+		.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+		.t.tcm_family = AF_UNSPEC,
+		.t.tcm_ifindex = ifindex,
+		.t.tcm_parent = handle,
+	};
+	int seq = time(NULL);
+
+	req.nlh.nlmsg_seq = seq;
+	if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+		return -errno;
+
+	return bpf_netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg,
+				dump_filter_nlmsg, cookie);
+}
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
index 4719434278b2..1e69c0c8d413 100644
--- a/tools/lib/bpf/nlattr.c
+++ b/tools/lib/bpf/nlattr.c
@@ -1,13 +1,8 @@
-// SPDX-License-Identifier: LGPL-2.1
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 
 /*
  * NETLINK      Netlink attributes
  *
- *	This library is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU Lesser General Public
- *	License as published by the Free Software Foundation version 2.1
- *	of the License.
- *
  * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch>
  */
 
@@ -17,20 +12,15 @@
 #include <string.h>
 #include <stdio.h>
 
-static uint16_t nla_attr_minlen[NLA_TYPE_MAX+1] = {
-	[NLA_U8]	= sizeof(uint8_t),
-	[NLA_U16]	= sizeof(uint16_t),
-	[NLA_U32]	= sizeof(uint32_t),
-	[NLA_U64]	= sizeof(uint64_t),
-	[NLA_STRING]	= 1,
-	[NLA_FLAG]	= 0,
+static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = {
+	[LIBBPF_NLA_U8]		= sizeof(uint8_t),
+	[LIBBPF_NLA_U16]	= sizeof(uint16_t),
+	[LIBBPF_NLA_U32]	= sizeof(uint32_t),
+	[LIBBPF_NLA_U64]	= sizeof(uint64_t),
+	[LIBBPF_NLA_STRING]	= 1,
+	[LIBBPF_NLA_FLAG]	= 0,
 };
 
-static int nla_len(const struct nlattr *nla)
-{
-	return nla->nla_len - NLA_HDRLEN;
-}
-
 static struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
 {
 	int totlen = NLA_ALIGN(nla->nla_len);
@@ -46,20 +36,15 @@ static int nla_ok(const struct nlattr *nla, int remaining)
 	       nla->nla_len <= remaining;
 }
 
-static void *nla_data(const struct nlattr *nla)
-{
-	return (char *) nla + NLA_HDRLEN;
-}
-
 static int nla_type(const struct nlattr *nla)
 {
 	return nla->nla_type & NLA_TYPE_MASK;
 }
 
 static int validate_nla(struct nlattr *nla, int maxtype,
-			struct nla_policy *policy)
+			struct libbpf_nla_policy *policy)
 {
-	struct nla_policy *pt;
+	struct libbpf_nla_policy *pt;
 	unsigned int minlen = 0;
 	int type = nla_type(nla);
 
@@ -68,23 +53,24 @@ static int validate_nla(struct nlattr *nla, int maxtype,
 
 	pt = &policy[type];
 
-	if (pt->type > NLA_TYPE_MAX)
+	if (pt->type > LIBBPF_NLA_TYPE_MAX)
 		return 0;
 
 	if (pt->minlen)
 		minlen = pt->minlen;
-	else if (pt->type != NLA_UNSPEC)
+	else if (pt->type != LIBBPF_NLA_UNSPEC)
 		minlen = nla_attr_minlen[pt->type];
 
-	if (nla_len(nla) < minlen)
+	if (libbpf_nla_len(nla) < minlen)
 		return -1;
 
-	if (pt->maxlen && nla_len(nla) > pt->maxlen)
+	if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen)
 		return -1;
 
-	if (pt->type == NLA_STRING) {
-		char *data = nla_data(nla);
-		if (data[nla_len(nla) - 1] != '\0')
+	if (pt->type == LIBBPF_NLA_STRING) {
+		char *data = libbpf_nla_data(nla);
+
+		if (data[libbpf_nla_len(nla) - 1] != '\0')
 			return -1;
 	}
 
@@ -114,15 +100,15 @@ static inline int nlmsg_len(const struct nlmsghdr *nlh)
  * @see nla_validate
  * @return 0 on success or a negative error code.
  */
-static int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
-		     struct nla_policy *policy)
+int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head,
+		     int len, struct libbpf_nla_policy *policy)
 {
 	struct nlattr *nla;
 	int rem, err;
 
 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
 
-	nla_for_each_attr(nla, head, len, rem) {
+	libbpf_nla_for_each_attr(nla, head, len, rem) {
 		int type = nla_type(nla);
 
 		if (type > maxtype)
@@ -146,12 +132,33 @@ errout:
 	return err;
 }
 
+/**
+ * Create attribute index based on nested attribute
+ * @arg tb              Index array to be filled (maxtype+1 elements).
+ * @arg maxtype         Maximum attribute type expected and accepted.
+ * @arg nla             Nested Attribute.
+ * @arg policy          Attribute validation policy.
+ *
+ * Feeds the stream of attributes nested into the specified attribute
+ * to libbpf_nla_parse().
+ *
+ * @see libbpf_nla_parse
+ * @return 0 on success or a negative error code.
+ */
+int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype,
+			    struct nlattr *nla,
+			    struct libbpf_nla_policy *policy)
+{
+	return libbpf_nla_parse(tb, maxtype, libbpf_nla_data(nla),
+				libbpf_nla_len(nla), policy);
+}
+
 /* dump netlink extended ack error message */
-int nla_dump_errormsg(struct nlmsghdr *nlh)
+int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh)
 {
-	struct nla_policy extack_policy[NLMSGERR_ATTR_MAX + 1] = {
-		[NLMSGERR_ATTR_MSG]	= { .type = NLA_STRING },
-		[NLMSGERR_ATTR_OFFS]	= { .type = NLA_U32 },
+	struct libbpf_nla_policy extack_policy[NLMSGERR_ATTR_MAX + 1] = {
+		[NLMSGERR_ATTR_MSG]	= { .type = LIBBPF_NLA_STRING },
+		[NLMSGERR_ATTR_OFFS]	= { .type = LIBBPF_NLA_U32 },
 	};
 	struct nlattr *tb[NLMSGERR_ATTR_MAX + 1], *attr;
 	struct nlmsgerr *err;
@@ -172,14 +179,15 @@ int nla_dump_errormsg(struct nlmsghdr *nlh)
 	attr = (struct nlattr *) ((void *) err + hlen);
 	alen = nlh->nlmsg_len - hlen;
 
-	if (nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen, extack_policy) != 0) {
+	if (libbpf_nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen,
+			     extack_policy) != 0) {
 		fprintf(stderr,
 			"Failed to parse extended error attributes\n");
 		return 0;
 	}
 
 	if (tb[NLMSGERR_ATTR_MSG])
-		errmsg = (char *) nla_data(tb[NLMSGERR_ATTR_MSG]);
+		errmsg = (char *) libbpf_nla_data(tb[NLMSGERR_ATTR_MSG]);
 
 	fprintf(stderr, "Kernel error message: %s\n", errmsg);
 
diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h
index 931a71f68f93..6cc3ac91690f 100644
--- a/tools/lib/bpf/nlattr.h
+++ b/tools/lib/bpf/nlattr.h
@@ -1,18 +1,13 @@
-/* SPDX-License-Identifier: LGPL-2.1 */
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
 
 /*
  * NETLINK      Netlink attributes
  *
- *	This library is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU Lesser General Public
- *	License as published by the Free Software Foundation version 2.1
- *	of the License.
- *
  * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch>
  */
 
-#ifndef __NLATTR_H
-#define __NLATTR_H
+#ifndef __LIBBPF_NLATTR_H
+#define __LIBBPF_NLATTR_H
 
 #include <stdint.h>
 #include <linux/netlink.h>
@@ -23,19 +18,19 @@
  * Standard attribute types to specify validation policy
  */
 enum {
-	NLA_UNSPEC,	/**< Unspecified type, binary data chunk */
-	NLA_U8,		/**< 8 bit integer */
-	NLA_U16,	/**< 16 bit integer */
-	NLA_U32,	/**< 32 bit integer */
-	NLA_U64,	/**< 64 bit integer */
-	NLA_STRING,	/**< NUL terminated character string */
-	NLA_FLAG,	/**< Flag */
-	NLA_MSECS,	/**< Micro seconds (64bit) */
-	NLA_NESTED,	/**< Nested attributes */
-	__NLA_TYPE_MAX,
+	LIBBPF_NLA_UNSPEC,	/**< Unspecified type, binary data chunk */
+	LIBBPF_NLA_U8,		/**< 8 bit integer */
+	LIBBPF_NLA_U16,		/**< 16 bit integer */
+	LIBBPF_NLA_U32,		/**< 32 bit integer */
+	LIBBPF_NLA_U64,		/**< 64 bit integer */
+	LIBBPF_NLA_STRING,	/**< NUL terminated character string */
+	LIBBPF_NLA_FLAG,	/**< Flag */
+	LIBBPF_NLA_MSECS,	/**< Micro seconds (64bit) */
+	LIBBPF_NLA_NESTED,	/**< Nested attributes */
+	__LIBBPF_NLA_TYPE_MAX,
 };
 
-#define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1)
+#define LIBBPF_NLA_TYPE_MAX (__LIBBPF_NLA_TYPE_MAX - 1)
 
 /**
  * @ingroup attr
@@ -43,8 +38,8 @@ enum {
  *
  * See section @core_doc{core_attr_parse,Attribute Parsing} for more details.
  */
-struct nla_policy {
-	/** Type of attribute or NLA_UNSPEC */
+struct libbpf_nla_policy {
+	/** Type of attribute or LIBBPF_NLA_UNSPEC */
 	uint16_t	type;
 
 	/** Minimal length of payload required */
@@ -62,11 +57,50 @@ struct nla_policy {
  * @arg len	length of attribute stream
  * @arg rem	initialized to len, holds bytes currently remaining in stream
  */
-#define nla_for_each_attr(pos, head, len, rem) \
+#define libbpf_nla_for_each_attr(pos, head, len, rem) \
 	for (pos = head, rem = len; \
 	     nla_ok(pos, rem); \
 	     pos = nla_next(pos, &(rem)))
 
-int nla_dump_errormsg(struct nlmsghdr *nlh);
+/**
+ * libbpf_nla_data - head of payload
+ * @nla: netlink attribute
+ */
+static inline void *libbpf_nla_data(const struct nlattr *nla)
+{
+	return (char *) nla + NLA_HDRLEN;
+}
+
+static inline uint8_t libbpf_nla_getattr_u8(const struct nlattr *nla)
+{
+	return *(uint8_t *)libbpf_nla_data(nla);
+}
+
+static inline uint32_t libbpf_nla_getattr_u32(const struct nlattr *nla)
+{
+	return *(uint32_t *)libbpf_nla_data(nla);
+}
+
+static inline const char *libbpf_nla_getattr_str(const struct nlattr *nla)
+{
+	return (const char *)libbpf_nla_data(nla);
+}
+
+/**
+ * libbpf_nla_len - length of payload
+ * @nla: netlink attribute
+ */
+static inline int libbpf_nla_len(const struct nlattr *nla)
+{
+	return nla->nla_len - NLA_HDRLEN;
+}
+
+int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head,
+		     int len, struct libbpf_nla_policy *policy);
+int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype,
+			    struct nlattr *nla,
+			    struct libbpf_nla_policy *policy);
+
+int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh);
 
-#endif /* __NLATTR_H */
+#endif /* __LIBBPF_NLATTR_H */
diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c
new file mode 100644
index 000000000000..00e48ac5b806
--- /dev/null
+++ b/tools/lib/bpf/str_error.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+#undef _GNU_SOURCE
+#include <string.h>
+#include <stdio.h>
+#include "str_error.h"
+
+/*
+ * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl
+ * libc, while checking strerror_r() return to avoid having to check this in
+ * all places calling it.
+ */
+char *libbpf_strerror_r(int err, char *dst, int len)
+{
+	int ret = strerror_r(err, dst, len);
+	if (ret)
+		snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret);
+	return dst;
+}
diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h
new file mode 100644
index 000000000000..a139334d57b6
--- /dev/null
+++ b/tools/lib/bpf/str_error.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __LIBBPF_STR_ERROR_H
+#define __LIBBPF_STR_ERROR_H
+
+char *libbpf_strerror_r(int err, char *dst, int len);
+#endif /* __LIBBPF_STR_ERROR_H */
diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile
index 9b0ca3ad1ef3..9dafb8cb752f 100644
--- a/tools/lib/lockdep/Makefile
+++ b/tools/lib/lockdep/Makefile
@@ -129,12 +129,12 @@ $(OUTPUT)liblockdep.a: $(LIB_IN)
 tags:	force
 	$(RM) tags
 	find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
-	--regex-c++='/_PE\(([^,)]*).*/PEVENT_ERRNO__\1/'
+	--regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/'
 
 TAGS:	force
 	$(RM) TAGS
 	find . -name '*.[ch]' | xargs etags \
-	--regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/'
+	--regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/'
 
 define do_install
 	$(print_install)				\
diff --git a/tools/lib/subcmd/pager.c b/tools/lib/subcmd/pager.c
index 9997a8805a82..e3d47b59b14d 100644
--- a/tools/lib/subcmd/pager.c
+++ b/tools/lib/subcmd/pager.c
@@ -23,6 +23,13 @@ void pager_init(const char *pager_env)
 	subcmd_config.pager_env = pager_env;
 }
 
+static const char *forced_pager;
+
+void force_pager(const char *pager)
+{
+	forced_pager = pager;
+}
+
 static void pager_preexec(void)
 {
 	/*
@@ -66,7 +73,9 @@ void setup_pager(void)
 	const char *pager = getenv(subcmd_config.pager_env);
 	struct winsize sz;
 
-	if (!isatty(1))
+	if (forced_pager)
+		pager = forced_pager;
+	if (!isatty(1) && !forced_pager)
 		return;
 	if (ioctl(1, TIOCGWINSZ, &sz) == 0)
 		pager_columns = sz.ws_col;
diff --git a/tools/lib/subcmd/pager.h b/tools/lib/subcmd/pager.h
index f1a53cf29880..a818964693ab 100644
--- a/tools/lib/subcmd/pager.h
+++ b/tools/lib/subcmd/pager.h
@@ -7,5 +7,6 @@ extern void pager_init(const char *pager_env);
 extern void setup_pager(void);
 extern int pager_in_use(void);
 extern int pager_get_columns(void);
+extern void force_pager(const char *);
 
 #endif /* __SUBCMD_PAGER_H */
diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c
index cb7154eccbdc..dbb9efbf718a 100644
--- a/tools/lib/subcmd/parse-options.c
+++ b/tools/lib/subcmd/parse-options.c
@@ -116,6 +116,7 @@ static int get_value(struct parse_opt_ctx_t *p,
 		case OPTION_INTEGER:
 		case OPTION_UINTEGER:
 		case OPTION_LONG:
+		case OPTION_ULONG:
 		case OPTION_U64:
 		default:
 			break;
@@ -166,6 +167,7 @@ static int get_value(struct parse_opt_ctx_t *p,
 		case OPTION_INTEGER:
 		case OPTION_UINTEGER:
 		case OPTION_LONG:
+		case OPTION_ULONG:
 		case OPTION_U64:
 		default:
 			break;
@@ -295,6 +297,22 @@ static int get_value(struct parse_opt_ctx_t *p,
 			return opterror(opt, "expects a numerical value", flags);
 		return 0;
 
+	case OPTION_ULONG:
+		if (unset) {
+			*(unsigned long *)opt->value = 0;
+			return 0;
+		}
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+			*(unsigned long *)opt->value = opt->defval;
+			return 0;
+		}
+		if (get_arg(p, opt, flags, &arg))
+			return -1;
+		*(unsigned long *)opt->value = strtoul(arg, (char **)&s, 10);
+		if (*s)
+			return opterror(opt, "expects a numerical value", flags);
+		return 0;
+
 	case OPTION_U64:
 		if (unset) {
 			*(u64 *)opt->value = 0;
@@ -703,6 +721,7 @@ static void print_option_help(const struct option *opts, int full)
 	case OPTION_ARGUMENT:
 		break;
 	case OPTION_LONG:
+	case OPTION_ULONG:
 	case OPTION_U64:
 	case OPTION_INTEGER:
 	case OPTION_UINTEGER:
diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h
index 92fdbe1519f6..6ca2a8bfe716 100644
--- a/tools/lib/subcmd/parse-options.h
+++ b/tools/lib/subcmd/parse-options.h
@@ -25,6 +25,7 @@ enum parse_opt_type {
 	OPTION_STRING,
 	OPTION_INTEGER,
 	OPTION_LONG,
+	OPTION_ULONG,
 	OPTION_CALLBACK,
 	OPTION_U64,
 	OPTION_UINTEGER,
@@ -133,6 +134,7 @@ struct option {
 #define OPT_INTEGER(s, l, v, h)     { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) }
 #define OPT_UINTEGER(s, l, v, h)    { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h) }
 #define OPT_LONG(s, l, v, h)        { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) }
+#define OPT_ULONG(s, l, v, h)        { .type = OPTION_ULONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned long *), .help = (h) }
 #define OPT_U64(s, l, v, h)         { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) }
 #define OPT_STRING(s, l, v, a, h)   { .type = OPTION_STRING,  .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h) }
 #define OPT_STRING_OPTARG(s, l, v, a, h, d) \
diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c
index 689b6a130dd7..96d830545bbb 100644
--- a/tools/lib/symbol/kallsyms.c
+++ b/tools/lib/symbol/kallsyms.c
@@ -10,6 +10,12 @@ u8 kallsyms2elf_type(char type)
 	return (type == 't' || type == 'w') ? STT_FUNC : STT_OBJECT;
 }
 
+bool kallsyms__is_function(char symbol_type)
+{
+	symbol_type = toupper(symbol_type);
+	return symbol_type == 'T' || symbol_type == 'W';
+}
+
 int kallsyms__parse(const char *filename, void *arg,
 		    int (*process_symbol)(void *arg, const char *name,
 					  char type, u64 start))
diff --git a/tools/lib/symbol/kallsyms.h b/tools/lib/symbol/kallsyms.h
index bc40101d72c1..72ab9870454b 100644
--- a/tools/lib/symbol/kallsyms.h
+++ b/tools/lib/symbol/kallsyms.h
@@ -20,6 +20,8 @@ static inline u8 kallsyms2elf_binding(char type)
 
 u8 kallsyms2elf_type(char type);
 
+bool kallsyms__is_function(char symbol_type);
+
 int kallsyms__parse(const char *filename, void *arg,
 		    int (*process_symbol)(void *arg, const char *name,
 					  char type, u64 start));
diff --git a/tools/lib/traceevent/Build b/tools/lib/traceevent/Build
index c681d0575d16..ba54bfce0b0b 100644
--- a/tools/lib/traceevent/Build
+++ b/tools/lib/traceevent/Build
@@ -4,6 +4,8 @@ libtraceevent-y += trace-seq.o
 libtraceevent-y += parse-filter.o
 libtraceevent-y += parse-utils.o
 libtraceevent-y += kbuffer-parse.o
+libtraceevent-y += tep_strerror.o
+libtraceevent-y += event-parse-api.o
 
 plugin_jbd2-y         += plugin_jbd2.o
 plugin_hrtimer-y      += plugin_hrtimer.o
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index 46cd5f871ad7..0b4e833088a4 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -233,12 +233,12 @@ endef
 tags:	force
 	$(RM) tags
 	find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
-	--regex-c++='/_PE\(([^,)]*).*/PEVENT_ERRNO__\1/'
+	--regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/'
 
 TAGS:	force
 	$(RM) TAGS
 	find . -name '*.[ch]' | xargs etags \
-	--regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/'
+	--regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/'
 
 define do_install_mkdir
 	if [ ! -d '$(DESTDIR_SQ)$1' ]; then		\
diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c
new file mode 100644
index 000000000000..61f7149085ee
--- /dev/null
+++ b/tools/lib/traceevent/event-parse-api.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+#include "event-parse.h"
+#include "event-parse-local.h"
+#include "event-utils.h"
+
+/**
+ * tep_get_first_event - returns the first event in the events array
+ * @tep: a handle to the tep_handle
+ *
+ * This returns pointer to the first element of the events array
+ * If @tep is NULL, NULL is returned.
+ */
+struct tep_event_format *tep_get_first_event(struct tep_handle *tep)
+{
+	if (tep && tep->events)
+		return tep->events[0];
+
+	return NULL;
+}
+
+/**
+ * tep_get_events_count - get the number of defined events
+ * @tep: a handle to the tep_handle
+ *
+ * This returns number of elements in event array
+ * If @tep is NULL, 0 is returned.
+ */
+int tep_get_events_count(struct tep_handle *tep)
+{
+	if(tep)
+		return tep->nr_events;
+	return 0;
+}
+
+/**
+ * tep_set_flag - set event parser flag
+ * @tep: a handle to the tep_handle
+ * @flag: flag, or combination of flags to be set
+ * can be any combination from enum tep_flag
+ *
+ * This sets a flag or mbination of flags  from enum tep_flag
+  */
+void tep_set_flag(struct tep_handle *tep, int flag)
+{
+	if(tep)
+		tep->flags |= flag;
+}
+
+unsigned short __tep_data2host2(struct tep_handle *pevent, unsigned short data)
+{
+	unsigned short swap;
+
+	if (!pevent || pevent->host_bigendian == pevent->file_bigendian)
+		return data;
+
+	swap = ((data & 0xffULL) << 8) |
+		((data & (0xffULL << 8)) >> 8);
+
+	return swap;
+}
+
+unsigned int __tep_data2host4(struct tep_handle *pevent, unsigned int data)
+{
+	unsigned int swap;
+
+	if (!pevent || pevent->host_bigendian == pevent->file_bigendian)
+		return data;
+
+	swap = ((data & 0xffULL) << 24) |
+		((data & (0xffULL << 8)) << 8) |
+		((data & (0xffULL << 16)) >> 8) |
+		((data & (0xffULL << 24)) >> 24);
+
+	return swap;
+}
+
+unsigned long long
+__tep_data2host8(struct tep_handle *pevent, unsigned long long data)
+{
+	unsigned long long swap;
+
+	if (!pevent || pevent->host_bigendian == pevent->file_bigendian)
+		return data;
+
+	swap = ((data & 0xffULL) << 56) |
+		((data & (0xffULL << 8)) << 40) |
+		((data & (0xffULL << 16)) << 24) |
+		((data & (0xffULL << 24)) << 8) |
+		((data & (0xffULL << 32)) >> 8) |
+		((data & (0xffULL << 40)) >> 24) |
+		((data & (0xffULL << 48)) >> 40) |
+		((data & (0xffULL << 56)) >> 56);
+
+	return swap;
+}
+
+/**
+ * tep_get_header_page_size - get size of the header page
+ * @pevent: a handle to the tep_handle
+ *
+ * This returns size of the header page
+ * If @pevent is NULL, 0 is returned.
+ */
+int tep_get_header_page_size(struct tep_handle *pevent)
+{
+	if(pevent)
+		return pevent->header_page_size_size;
+	return 0;
+}
+
+/**
+ * tep_get_cpus - get the number of CPUs
+ * @pevent: a handle to the tep_handle
+ *
+ * This returns the number of CPUs
+ * If @pevent is NULL, 0 is returned.
+ */
+int tep_get_cpus(struct tep_handle *pevent)
+{
+	if(pevent)
+		return pevent->cpus;
+	return 0;
+}
+
+/**
+ * tep_set_cpus - set the number of CPUs
+ * @pevent: a handle to the tep_handle
+ *
+ * This sets the number of CPUs
+ */
+void tep_set_cpus(struct tep_handle *pevent, int cpus)
+{
+	if(pevent)
+		pevent->cpus = cpus;
+}
+
+/**
+ * tep_get_long_size - get the size of a long integer on the current machine
+ * @pevent: a handle to the tep_handle
+ *
+ * This returns the size of a long integer on the current machine
+ * If @pevent is NULL, 0 is returned.
+ */
+int tep_get_long_size(struct tep_handle *pevent)
+{
+	if(pevent)
+		return pevent->long_size;
+	return 0;
+}
+
+/**
+ * tep_set_long_size - set the size of a long integer on the current machine
+ * @pevent: a handle to the tep_handle
+ * @size: size, in bytes, of a long integer
+ *
+ * This sets the size of a long integer on the current machine
+ */
+void tep_set_long_size(struct tep_handle *pevent, int long_size)
+{
+	if(pevent)
+		pevent->long_size = long_size;
+}
+
+/**
+ * tep_get_page_size - get the size of a memory page on the current machine
+ * @pevent: a handle to the tep_handle
+ *
+ * This returns the size of a memory page on the current machine
+ * If @pevent is NULL, 0 is returned.
+ */
+int tep_get_page_size(struct tep_handle *pevent)
+{
+	if(pevent)
+		return pevent->page_size;
+	return 0;
+}
+
+/**
+ * tep_set_page_size - set the size of a memory page on the current machine
+ * @pevent: a handle to the tep_handle
+ * @_page_size: size of a memory page, in bytes
+ *
+ * This sets the size of a memory page on the current machine
+ */
+void tep_set_page_size(struct tep_handle *pevent, int _page_size)
+{
+	if(pevent)
+		pevent->page_size = _page_size;
+}
+
+/**
+ * tep_is_file_bigendian - get if the file is in big endian order
+ * @pevent: a handle to the tep_handle
+ *
+ * This returns if the file is in big endian order
+ * If @pevent is NULL, 0 is returned.
+ */
+int tep_is_file_bigendian(struct tep_handle *pevent)
+{
+	if(pevent)
+		return pevent->file_bigendian;
+	return 0;
+}
+
+/**
+ * tep_set_file_bigendian - set if the file is in big endian order
+ * @pevent: a handle to the tep_handle
+ * @endian: non zero, if the file is in big endian order
+ *
+ * This sets if the file is in big endian order
+ */
+void tep_set_file_bigendian(struct tep_handle *pevent, enum tep_endian endian)
+{
+	if(pevent)
+		pevent->file_bigendian = endian;
+}
+
+/**
+ * tep_is_host_bigendian - get if the order of the current host is big endian
+ * @pevent: a handle to the tep_handle
+ *
+ * This gets if the order of the current host is big endian
+ * If @pevent is NULL, 0 is returned.
+ */
+int tep_is_host_bigendian(struct tep_handle *pevent)
+{
+	if(pevent)
+		return pevent->host_bigendian;
+	return 0;
+}
+
+/**
+ * tep_set_host_bigendian - set the order of the local host
+ * @pevent: a handle to the tep_handle
+ * @endian: non zero, if the local host has big endian order
+ *
+ * This sets the order of the local host
+ */
+void tep_set_host_bigendian(struct tep_handle *pevent, enum tep_endian endian)
+{
+	if(pevent)
+		pevent->host_bigendian = endian;
+}
+
+/**
+ * tep_is_latency_format - get if the latency output format is configured
+ * @pevent: a handle to the tep_handle
+ *
+ * This gets if the latency output format is configured
+ * If @pevent is NULL, 0 is returned.
+ */
+int tep_is_latency_format(struct tep_handle *pevent)
+{
+	if(pevent)
+		return pevent->latency_format;
+	return 0;
+}
+
+/**
+ * tep_set_latency_format - set the latency output format
+ * @pevent: a handle to the tep_handle
+ * @lat: non zero for latency output format
+ *
+ * This sets the latency output format
+  */
+void tep_set_latency_format(struct tep_handle *pevent, int lat)
+{
+	if(pevent)
+		pevent->latency_format = lat;
+}
diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h
new file mode 100644
index 000000000000..b9bddde577f8
--- /dev/null
+++ b/tools/lib/traceevent/event-parse-local.h
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+#ifndef _PARSE_EVENTS_INT_H
+#define _PARSE_EVENTS_INT_H
+
+struct cmdline;
+struct cmdline_list;
+struct func_map;
+struct func_list;
+struct event_handler;
+struct func_resolver;
+
+struct tep_handle {
+	int ref_count;
+
+	int header_page_ts_offset;
+	int header_page_ts_size;
+	int header_page_size_offset;
+	int header_page_size_size;
+	int header_page_data_offset;
+	int header_page_data_size;
+	int header_page_overwrite;
+
+	enum tep_endian file_bigendian;
+	enum tep_endian host_bigendian;
+
+	int latency_format;
+
+	int old_format;
+
+	int cpus;
+	int long_size;
+	int page_size;
+
+	struct cmdline *cmdlines;
+	struct cmdline_list *cmdlist;
+	int cmdline_count;
+
+	struct func_map *func_map;
+	struct func_resolver *func_resolver;
+	struct func_list *funclist;
+	unsigned int func_count;
+
+	struct printk_map *printk_map;
+	struct printk_list *printklist;
+	unsigned int printk_count;
+
+
+	struct tep_event_format **events;
+	int nr_events;
+	struct tep_event_format **sort_events;
+	enum tep_event_sort_type last_type;
+
+	int type_offset;
+	int type_size;
+
+	int pid_offset;
+	int pid_size;
+
+	int pc_offset;
+	int pc_size;
+
+	int flags_offset;
+	int flags_size;
+
+	int ld_offset;
+	int ld_size;
+
+	int print_raw;
+
+	int test_filters;
+
+	int flags;
+
+	struct tep_format_field *bprint_ip_field;
+	struct tep_format_field *bprint_fmt_field;
+	struct tep_format_field *bprint_buf_field;
+
+	struct event_handler *handlers;
+	struct tep_function_handler *func_handlers;
+
+	/* cache */
+	struct tep_event_format *last_event;
+
+	char *trace_clock;
+};
+
+#endif /* _PARSE_EVENTS_INT_H */
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index e5f2acbb70cc..3692f29fee46 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -1,21 +1,7 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
  *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *
  *  The parts for function graph printing was taken and modified from the
  *  Linux Kernel that were written by
@@ -32,12 +18,14 @@
 #include <errno.h>
 #include <stdint.h>
 #include <limits.h>
-#include <linux/string.h>
 #include <linux/time64.h>
 
 #include <netinet/in.h>
 #include "event-parse.h"
+
+#include "event-parse-local.h"
 #include "event-utils.h"
+#include "trace-seq.h"
 
 static const char *input_buf;
 static unsigned long long input_buf_ptr;
@@ -73,12 +61,12 @@ static void init_input_buf(const char *buf, unsigned long long size)
 	input_buf_ptr = 0;
 }
 
-const char *pevent_get_input_buf(void)
+const char *tep_get_input_buf(void)
 {
 	return input_buf;
 }
 
-unsigned long long pevent_get_input_buf_ptr(void)
+unsigned long long tep_get_input_buf_ptr(void)
 {
 	return input_buf_ptr;
 }
@@ -88,39 +76,39 @@ struct event_handler {
 	int				id;
 	const char			*sys_name;
 	const char			*event_name;
-	pevent_event_handler_func	func;
+	tep_event_handler_func		func;
 	void				*context;
 };
 
-struct pevent_func_params {
-	struct pevent_func_params	*next;
-	enum pevent_func_arg_type	type;
+struct func_params {
+	struct func_params	*next;
+	enum tep_func_arg_type	type;
 };
 
-struct pevent_function_handler {
-	struct pevent_function_handler	*next;
-	enum pevent_func_arg_type	ret_type;
+struct tep_function_handler {
+	struct tep_function_handler	*next;
+	enum tep_func_arg_type		ret_type;
 	char				*name;
-	pevent_func_handler		func;
-	struct pevent_func_params	*params;
+	tep_func_handler		func;
+	struct func_params		*params;
 	int				nr_args;
 };
 
 static unsigned long long
 process_defined_func(struct trace_seq *s, void *data, int size,
-		     struct event_format *event, struct print_arg *arg);
+		     struct tep_event_format *event, struct tep_print_arg *arg);
 
-static void free_func_handle(struct pevent_function_handler *func);
+static void free_func_handle(struct tep_function_handler *func);
 
 /**
- * pevent_buffer_init - init buffer for parsing
+ * tep_buffer_init - init buffer for parsing
  * @buf: buffer to parse
  * @size: the size of the buffer
  *
- * For use with pevent_read_token(), this initializes the internal
- * buffer that pevent_read_token() will parse.
+ * For use with tep_read_token(), this initializes the internal
+ * buffer that tep_read_token() will parse.
  */
-void pevent_buffer_init(const char *buf, unsigned long long size)
+void tep_buffer_init(const char *buf, unsigned long long size)
 {
 	init_input_buf(buf, size);
 }
@@ -131,9 +119,9 @@ void breakpoint(void)
 	x++;
 }
 
-struct print_arg *alloc_arg(void)
+struct tep_print_arg *alloc_arg(void)
 {
-	return calloc(1, sizeof(struct print_arg));
+	return calloc(1, sizeof(struct tep_print_arg));
 }
 
 struct cmdline {
@@ -160,7 +148,7 @@ struct cmdline_list {
 	int			pid;
 };
 
-static int cmdline_init(struct pevent *pevent)
+static int cmdline_init(struct tep_handle *pevent)
 {
 	struct cmdline_list *cmdlist = pevent->cmdlist;
 	struct cmdline_list *item;
@@ -189,7 +177,7 @@ static int cmdline_init(struct pevent *pevent)
 	return 0;
 }
 
-static const char *find_cmdline(struct pevent *pevent, int pid)
+static const char *find_cmdline(struct tep_handle *pevent, int pid)
 {
 	const struct cmdline *comm;
 	struct cmdline key;
@@ -211,14 +199,14 @@ static const char *find_cmdline(struct pevent *pevent, int pid)
 }
 
 /**
- * pevent_pid_is_registered - return if a pid has a cmdline registered
+ * tep_pid_is_registered - return if a pid has a cmdline registered
  * @pevent: handle for the pevent
  * @pid: The pid to check if it has a cmdline registered with.
  *
  * Returns 1 if the pid has a cmdline mapped to it
  * 0 otherwise.
  */
-int pevent_pid_is_registered(struct pevent *pevent, int pid)
+int tep_pid_is_registered(struct tep_handle *pevent, int pid)
 {
 	const struct cmdline *comm;
 	struct cmdline key;
@@ -244,7 +232,7 @@ int pevent_pid_is_registered(struct pevent *pevent, int pid)
  * we must add this pid. This is much slower than when cmdlines
  * are added before the array is initialized.
  */
-static int add_new_comm(struct pevent *pevent, const char *comm, int pid)
+static int add_new_comm(struct tep_handle *pevent, const char *comm, int pid)
 {
 	struct cmdline *cmdlines = pevent->cmdlines;
 	const struct cmdline *cmdline;
@@ -288,7 +276,7 @@ static int add_new_comm(struct pevent *pevent, const char *comm, int pid)
 }
 
 /**
- * pevent_register_comm - register a pid / comm mapping
+ * tep_register_comm - register a pid / comm mapping
  * @pevent: handle for the pevent
  * @comm: the command line to register
  * @pid: the pid to map the command line to
@@ -296,7 +284,7 @@ static int add_new_comm(struct pevent *pevent, const char *comm, int pid)
  * This adds a mapping to search for command line names with
  * a given pid. The comm is duplicated.
  */
-int pevent_register_comm(struct pevent *pevent, const char *comm, int pid)
+int tep_register_comm(struct tep_handle *pevent, const char *comm, int pid)
 {
 	struct cmdline_list *item;
 
@@ -324,7 +312,7 @@ int pevent_register_comm(struct pevent *pevent, const char *comm, int pid)
 	return 0;
 }
 
-int pevent_register_trace_clock(struct pevent *pevent, const char *trace_clock)
+int tep_register_trace_clock(struct tep_handle *pevent, const char *trace_clock)
 {
 	pevent->trace_clock = strdup(trace_clock);
 	if (!pevent->trace_clock) {
@@ -381,7 +369,7 @@ static int func_bcmp(const void *a, const void *b)
 	return 1;
 }
 
-static int func_map_init(struct pevent *pevent)
+static int func_map_init(struct tep_handle *pevent)
 {
 	struct func_list *funclist;
 	struct func_list *item;
@@ -421,7 +409,7 @@ static int func_map_init(struct pevent *pevent)
 }
 
 static struct func_map *
-__find_func(struct pevent *pevent, unsigned long long addr)
+__find_func(struct tep_handle *pevent, unsigned long long addr)
 {
 	struct func_map *func;
 	struct func_map key;
@@ -438,13 +426,13 @@ __find_func(struct pevent *pevent, unsigned long long addr)
 }
 
 struct func_resolver {
-	pevent_func_resolver_t *func;
-	void		       *priv;
-	struct func_map	       map;
+	tep_func_resolver_t	*func;
+	void			*priv;
+	struct func_map		map;
 };
 
 /**
- * pevent_set_function_resolver - set an alternative function resolver
+ * tep_set_function_resolver - set an alternative function resolver
  * @pevent: handle for the pevent
  * @resolver: function to be used
  * @priv: resolver function private state.
@@ -453,8 +441,8 @@ struct func_resolver {
  * keep using it instead of duplicating all the entries inside
  * pevent->funclist.
  */
-int pevent_set_function_resolver(struct pevent *pevent,
-				 pevent_func_resolver_t *func, void *priv)
+int tep_set_function_resolver(struct tep_handle *pevent,
+			      tep_func_resolver_t *func, void *priv)
 {
 	struct func_resolver *resolver = malloc(sizeof(*resolver));
 
@@ -471,20 +459,20 @@ int pevent_set_function_resolver(struct pevent *pevent,
 }
 
 /**
- * pevent_reset_function_resolver - reset alternative function resolver
+ * tep_reset_function_resolver - reset alternative function resolver
  * @pevent: handle for the pevent
  *
  * Stop using whatever alternative resolver was set, use the default
  * one instead.
  */
-void pevent_reset_function_resolver(struct pevent *pevent)
+void tep_reset_function_resolver(struct tep_handle *pevent)
 {
 	free(pevent->func_resolver);
 	pevent->func_resolver = NULL;
 }
 
 static struct func_map *
-find_func(struct pevent *pevent, unsigned long long addr)
+find_func(struct tep_handle *pevent, unsigned long long addr)
 {
 	struct func_map *map;
 
@@ -503,7 +491,7 @@ find_func(struct pevent *pevent, unsigned long long addr)
 }
 
 /**
- * pevent_find_function - find a function by a given address
+ * tep_find_function - find a function by a given address
  * @pevent: handle for the pevent
  * @addr: the address to find the function with
  *
@@ -511,7 +499,7 @@ find_func(struct pevent *pevent, unsigned long long addr)
  * address. Note, the address does not have to be exact, it
  * will select the function that would contain the address.
  */
-const char *pevent_find_function(struct pevent *pevent, unsigned long long addr)
+const char *tep_find_function(struct tep_handle *pevent, unsigned long long addr)
 {
 	struct func_map *map;
 
@@ -523,16 +511,16 @@ const char *pevent_find_function(struct pevent *pevent, unsigned long long addr)
 }
 
 /**
- * pevent_find_function_address - find a function address by a given address
+ * tep_find_function_address - find a function address by a given address
  * @pevent: handle for the pevent
  * @addr: the address to find the function with
  *
  * Returns the address the function starts at. This can be used in
- * conjunction with pevent_find_function to print both the function
+ * conjunction with tep_find_function to print both the function
  * name and the function offset.
  */
 unsigned long long
-pevent_find_function_address(struct pevent *pevent, unsigned long long addr)
+tep_find_function_address(struct tep_handle *pevent, unsigned long long addr)
 {
 	struct func_map *map;
 
@@ -544,7 +532,7 @@ pevent_find_function_address(struct pevent *pevent, unsigned long long addr)
 }
 
 /**
- * pevent_register_function - register a function with a given address
+ * tep_register_function - register a function with a given address
  * @pevent: handle for the pevent
  * @function: the function name to register
  * @addr: the address the function starts at
@@ -553,8 +541,8 @@ pevent_find_function_address(struct pevent *pevent, unsigned long long addr)
  * This registers a function name with an address and module.
  * The @func passed in is duplicated.
  */
-int pevent_register_function(struct pevent *pevent, char *func,
-			     unsigned long long addr, char *mod)
+int tep_register_function(struct tep_handle *pevent, char *func,
+			  unsigned long long addr, char *mod)
 {
 	struct func_list *item = malloc(sizeof(*item));
 
@@ -589,12 +577,12 @@ out_free:
 }
 
 /**
- * pevent_print_funcs - print out the stored functions
+ * tep_print_funcs - print out the stored functions
  * @pevent: handle for the pevent
  *
  * This prints out the stored functions.
  */
-void pevent_print_funcs(struct pevent *pevent)
+void tep_print_funcs(struct tep_handle *pevent)
 {
 	int i;
 
@@ -636,7 +624,7 @@ static int printk_cmp(const void *a, const void *b)
 	return 0;
 }
 
-static int printk_map_init(struct pevent *pevent)
+static int printk_map_init(struct tep_handle *pevent)
 {
 	struct printk_list *printklist;
 	struct printk_list *item;
@@ -668,7 +656,7 @@ static int printk_map_init(struct pevent *pevent)
 }
 
 static struct printk_map *
-find_printk(struct pevent *pevent, unsigned long long addr)
+find_printk(struct tep_handle *pevent, unsigned long long addr)
 {
 	struct printk_map *printk;
 	struct printk_map key;
@@ -685,7 +673,7 @@ find_printk(struct pevent *pevent, unsigned long long addr)
 }
 
 /**
- * pevent_register_print_string - register a string by its address
+ * tep_register_print_string - register a string by its address
  * @pevent: handle for the pevent
  * @fmt: the string format to register
  * @addr: the address the string was located at
@@ -693,8 +681,8 @@ find_printk(struct pevent *pevent, unsigned long long addr)
  * This registers a string by the address it was stored in the kernel.
  * The @fmt passed in is duplicated.
  */
-int pevent_register_print_string(struct pevent *pevent, const char *fmt,
-				 unsigned long long addr)
+int tep_register_print_string(struct tep_handle *pevent, const char *fmt,
+			      unsigned long long addr)
 {
 	struct printk_list *item = malloc(sizeof(*item));
 	char *p;
@@ -732,12 +720,12 @@ out_free:
 }
 
 /**
- * pevent_print_printk - print out the stored strings
+ * tep_print_printk - print out the stored strings
  * @pevent: handle for the pevent
  *
  * This prints the string formats that were stored.
  */
-void pevent_print_printk(struct pevent *pevent)
+void tep_print_printk(struct tep_handle *pevent)
 {
 	int i;
 
@@ -751,16 +739,16 @@ void pevent_print_printk(struct pevent *pevent)
 	}
 }
 
-static struct event_format *alloc_event(void)
+static struct tep_event_format *alloc_event(void)
 {
-	return calloc(1, sizeof(struct event_format));
+	return calloc(1, sizeof(struct tep_event_format));
 }
 
-static int add_event(struct pevent *pevent, struct event_format *event)
+static int add_event(struct tep_handle *pevent, struct tep_event_format *event)
 {
 	int i;
-	struct event_format **events = realloc(pevent->events, sizeof(event) *
-					       (pevent->nr_events + 1));
+	struct tep_event_format **events = realloc(pevent->events, sizeof(event) *
+						  (pevent->nr_events + 1));
 	if (!events)
 		return -1;
 
@@ -783,20 +771,20 @@ static int add_event(struct pevent *pevent, struct event_format *event)
 	return 0;
 }
 
-static int event_item_type(enum event_type type)
+static int event_item_type(enum tep_event_type type)
 {
 	switch (type) {
-	case EVENT_ITEM ... EVENT_SQUOTE:
+	case TEP_EVENT_ITEM ... TEP_EVENT_SQUOTE:
 		return 1;
-	case EVENT_ERROR ... EVENT_DELIM:
+	case TEP_EVENT_ERROR ... TEP_EVENT_DELIM:
 	default:
 		return 0;
 	}
 }
 
-static void free_flag_sym(struct print_flag_sym *fsym)
+static void free_flag_sym(struct tep_print_flag_sym *fsym)
 {
-	struct print_flag_sym *next;
+	struct tep_print_flag_sym *next;
 
 	while (fsym) {
 		next = fsym->next;
@@ -807,60 +795,60 @@ static void free_flag_sym(struct print_flag_sym *fsym)
 	}
 }
 
-static void free_arg(struct print_arg *arg)
+static void free_arg(struct tep_print_arg *arg)
 {
-	struct print_arg *farg;
+	struct tep_print_arg *farg;
 
 	if (!arg)
 		return;
 
 	switch (arg->type) {
-	case PRINT_ATOM:
+	case TEP_PRINT_ATOM:
 		free(arg->atom.atom);
 		break;
-	case PRINT_FIELD:
+	case TEP_PRINT_FIELD:
 		free(arg->field.name);
 		break;
-	case PRINT_FLAGS:
+	case TEP_PRINT_FLAGS:
 		free_arg(arg->flags.field);
 		free(arg->flags.delim);
 		free_flag_sym(arg->flags.flags);
 		break;
-	case PRINT_SYMBOL:
+	case TEP_PRINT_SYMBOL:
 		free_arg(arg->symbol.field);
 		free_flag_sym(arg->symbol.symbols);
 		break;
-	case PRINT_HEX:
-	case PRINT_HEX_STR:
+	case TEP_PRINT_HEX:
+	case TEP_PRINT_HEX_STR:
 		free_arg(arg->hex.field);
 		free_arg(arg->hex.size);
 		break;
-	case PRINT_INT_ARRAY:
+	case TEP_PRINT_INT_ARRAY:
 		free_arg(arg->int_array.field);
 		free_arg(arg->int_array.count);
 		free_arg(arg->int_array.el_size);
 		break;
-	case PRINT_TYPE:
+	case TEP_PRINT_TYPE:
 		free(arg->typecast.type);
 		free_arg(arg->typecast.item);
 		break;
-	case PRINT_STRING:
-	case PRINT_BSTRING:
+	case TEP_PRINT_STRING:
+	case TEP_PRINT_BSTRING:
 		free(arg->string.string);
 		break;
-	case PRINT_BITMASK:
+	case TEP_PRINT_BITMASK:
 		free(arg->bitmask.bitmask);
 		break;
-	case PRINT_DYNAMIC_ARRAY:
-	case PRINT_DYNAMIC_ARRAY_LEN:
+	case TEP_PRINT_DYNAMIC_ARRAY:
+	case TEP_PRINT_DYNAMIC_ARRAY_LEN:
 		free(arg->dynarray.index);
 		break;
-	case PRINT_OP:
+	case TEP_PRINT_OP:
 		free(arg->op.op);
 		free_arg(arg->op.left);
 		free_arg(arg->op.right);
 		break;
-	case PRINT_FUNC:
+	case TEP_PRINT_FUNC:
 		while (arg->func.args) {
 			farg = arg->func.args;
 			arg->func.args = farg->next;
@@ -868,7 +856,7 @@ static void free_arg(struct print_arg *arg)
 		}
 		break;
 
-	case PRINT_NULL:
+	case TEP_PRINT_NULL:
 	default:
 		break;
 	}
@@ -876,24 +864,24 @@ static void free_arg(struct print_arg *arg)
 	free(arg);
 }
 
-static enum event_type get_type(int ch)
+static enum tep_event_type get_type(int ch)
 {
 	if (ch == '\n')
-		return EVENT_NEWLINE;
+		return TEP_EVENT_NEWLINE;
 	if (isspace(ch))
-		return EVENT_SPACE;
+		return TEP_EVENT_SPACE;
 	if (isalnum(ch) || ch == '_')
-		return EVENT_ITEM;
+		return TEP_EVENT_ITEM;
 	if (ch == '\'')
-		return EVENT_SQUOTE;
+		return TEP_EVENT_SQUOTE;
 	if (ch == '"')
-		return EVENT_DQUOTE;
+		return TEP_EVENT_DQUOTE;
 	if (!isprint(ch))
-		return EVENT_NONE;
+		return TEP_EVENT_NONE;
 	if (ch == '(' || ch == ')' || ch == ',')
-		return EVENT_DELIM;
+		return TEP_EVENT_DELIM;
 
-	return EVENT_OP;
+	return TEP_EVENT_OP;
 }
 
 static int __read_char(void)
@@ -913,11 +901,11 @@ static int __peek_char(void)
 }
 
 /**
- * pevent_peek_char - peek at the next character that will be read
+ * tep_peek_char - peek at the next character that will be read
  *
  * Returns the next character read, or -1 if end of buffer.
  */
-int pevent_peek_char(void)
+int tep_peek_char(void)
 {
 	return __peek_char();
 }
@@ -941,38 +929,38 @@ static int extend_token(char **tok, char *buf, int size)
 	return 0;
 }
 
-static enum event_type force_token(const char *str, char **tok);
+static enum tep_event_type force_token(const char *str, char **tok);
 
-static enum event_type __read_token(char **tok)
+static enum tep_event_type __read_token(char **tok)
 {
 	char buf[BUFSIZ];
 	int ch, last_ch, quote_ch, next_ch;
 	int i = 0;
 	int tok_size = 0;
-	enum event_type type;
+	enum tep_event_type type;
 
 	*tok = NULL;
 
 
 	ch = __read_char();
 	if (ch < 0)
-		return EVENT_NONE;
+		return TEP_EVENT_NONE;
 
 	type = get_type(ch);
-	if (type == EVENT_NONE)
+	if (type == TEP_EVENT_NONE)
 		return type;
 
 	buf[i++] = ch;
 
 	switch (type) {
-	case EVENT_NEWLINE:
-	case EVENT_DELIM:
+	case TEP_EVENT_NEWLINE:
+	case TEP_EVENT_DELIM:
 		if (asprintf(tok, "%c", ch) < 0)
-			return EVENT_ERROR;
+			return TEP_EVENT_ERROR;
 
 		return type;
 
-	case EVENT_OP:
+	case TEP_EVENT_OP:
 		switch (ch) {
 		case '-':
 			next_ch = __peek_char();
@@ -1015,8 +1003,8 @@ static enum event_type __read_token(char **tok)
 			buf[i++] = __read_char();
 		goto out;
 
-	case EVENT_DQUOTE:
-	case EVENT_SQUOTE:
+	case TEP_EVENT_DQUOTE:
+	case TEP_EVENT_SQUOTE:
 		/* don't keep quotes */
 		i--;
 		quote_ch = ch;
@@ -1028,7 +1016,7 @@ static enum event_type __read_token(char **tok)
 				tok_size += BUFSIZ;
 
 				if (extend_token(tok, buf, tok_size) < 0)
-					return EVENT_NONE;
+					return TEP_EVENT_NONE;
 				i = 0;
 			}
 			last_ch = ch;
@@ -1045,7 +1033,7 @@ static enum event_type __read_token(char **tok)
 		 * For strings (double quotes) check the next token.
 		 * If it is another string, concatinate the two.
 		 */
-		if (type == EVENT_DQUOTE) {
+		if (type == TEP_EVENT_DQUOTE) {
 			unsigned long long save_input_buf_ptr = input_buf_ptr;
 
 			do {
@@ -1058,8 +1046,8 @@ static enum event_type __read_token(char **tok)
 
 		goto out;
 
-	case EVENT_ERROR ... EVENT_SPACE:
-	case EVENT_ITEM:
+	case TEP_EVENT_ERROR ... TEP_EVENT_SPACE:
+	case TEP_EVENT_ITEM:
 	default:
 		break;
 	}
@@ -1070,7 +1058,7 @@ static enum event_type __read_token(char **tok)
 			tok_size += BUFSIZ;
 
 			if (extend_token(tok, buf, tok_size) < 0)
-				return EVENT_NONE;
+				return TEP_EVENT_NONE;
 			i = 0;
 		}
 		ch = __read_char();
@@ -1080,9 +1068,9 @@ static enum event_type __read_token(char **tok)
  out:
 	buf[i] = 0;
 	if (extend_token(tok, buf, tok_size + i + 1) < 0)
-		return EVENT_NONE;
+		return TEP_EVENT_NONE;
 
-	if (type == EVENT_ITEM) {
+	if (type == TEP_EVENT_ITEM) {
 		/*
 		 * Older versions of the kernel has a bug that
 		 * creates invalid symbols and will break the mac80211
@@ -1109,12 +1097,12 @@ static enum event_type __read_token(char **tok)
 	return type;
 }
 
-static enum event_type force_token(const char *str, char **tok)
+static enum tep_event_type force_token(const char *str, char **tok)
 {
 	const char *save_input_buf;
 	unsigned long long save_input_buf_ptr;
 	unsigned long long save_input_buf_siz;
-	enum event_type type;
+	enum tep_event_type type;
 	
 	/* save off the current input pointers */
 	save_input_buf = input_buf;
@@ -1139,13 +1127,13 @@ static void free_token(char *tok)
 		free(tok);
 }
 
-static enum event_type read_token(char **tok)
+static enum tep_event_type read_token(char **tok)
 {
-	enum event_type type;
+	enum tep_event_type type;
 
 	for (;;) {
 		type = __read_token(tok);
-		if (type != EVENT_SPACE)
+		if (type != TEP_EVENT_SPACE)
 			return type;
 
 		free_token(*tok);
@@ -1153,40 +1141,40 @@ static enum event_type read_token(char **tok)
 
 	/* not reached */
 	*tok = NULL;
-	return EVENT_NONE;
+	return TEP_EVENT_NONE;
 }
 
 /**
- * pevent_read_token - access to utilites to use the pevent parser
+ * tep_read_token - access to utilites to use the pevent parser
  * @tok: The token to return
  *
  * This will parse tokens from the string given by
- * pevent_init_data().
+ * tep_init_data().
  *
  * Returns the token type.
  */
-enum event_type pevent_read_token(char **tok)
+enum tep_event_type tep_read_token(char **tok)
 {
 	return read_token(tok);
 }
 
 /**
- * pevent_free_token - free a token returned by pevent_read_token
+ * tep_free_token - free a token returned by tep_read_token
  * @token: the token to free
  */
-void pevent_free_token(char *token)
+void tep_free_token(char *token)
 {
 	free_token(token);
 }
 
 /* no newline */
-static enum event_type read_token_item(char **tok)
+static enum tep_event_type read_token_item(char **tok)
 {
-	enum event_type type;
+	enum tep_event_type type;
 
 	for (;;) {
 		type = __read_token(tok);
-		if (type != EVENT_SPACE && type != EVENT_NEWLINE)
+		if (type != TEP_EVENT_SPACE && type != TEP_EVENT_NEWLINE)
 			return type;
 		free_token(*tok);
 		*tok = NULL;
@@ -1194,10 +1182,10 @@ static enum event_type read_token_item(char **tok)
 
 	/* not reached */
 	*tok = NULL;
-	return EVENT_NONE;
+	return TEP_EVENT_NONE;
 }
 
-static int test_type(enum event_type type, enum event_type expect)
+static int test_type(enum tep_event_type type, enum tep_event_type expect)
 {
 	if (type != expect) {
 		do_warning("Error: expected type %d but read %d",
@@ -1207,8 +1195,8 @@ static int test_type(enum event_type type, enum event_type expect)
 	return 0;
 }
 
-static int test_type_token(enum event_type type, const char *token,
-		    enum event_type expect, const char *expect_tok)
+static int test_type_token(enum tep_event_type type, const char *token,
+		    enum tep_event_type expect, const char *expect_tok)
 {
 	if (type != expect) {
 		do_warning("Error: expected type %d but read %d",
@@ -1224,9 +1212,9 @@ static int test_type_token(enum event_type type, const char *token,
 	return 0;
 }
 
-static int __read_expect_type(enum event_type expect, char **tok, int newline_ok)
+static int __read_expect_type(enum tep_event_type expect, char **tok, int newline_ok)
 {
-	enum event_type type;
+	enum tep_event_type type;
 
 	if (newline_ok)
 		type = read_token(tok);
@@ -1235,15 +1223,15 @@ static int __read_expect_type(enum event_type expect, char **tok, int newline_ok
 	return test_type(type, expect);
 }
 
-static int read_expect_type(enum event_type expect, char **tok)
+static int read_expect_type(enum tep_event_type expect, char **tok)
 {
 	return __read_expect_type(expect, tok, 1);
 }
 
-static int __read_expected(enum event_type expect, const char *str,
+static int __read_expected(enum tep_event_type expect, const char *str,
 			   int newline_ok)
 {
-	enum event_type type;
+	enum tep_event_type type;
 	char *token;
 	int ret;
 
@@ -1259,12 +1247,12 @@ static int __read_expected(enum event_type expect, const char *str,
 	return ret;
 }
 
-static int read_expected(enum event_type expect, const char *str)
+static int read_expected(enum tep_event_type expect, const char *str)
 {
 	return __read_expected(expect, str, 1);
 }
 
-static int read_expected_item(enum event_type expect, const char *str)
+static int read_expected_item(enum tep_event_type expect, const char *str)
 {
 	return __read_expected(expect, str, 0);
 }
@@ -1273,13 +1261,13 @@ static char *event_read_name(void)
 {
 	char *token;
 
-	if (read_expected(EVENT_ITEM, "name") < 0)
+	if (read_expected(TEP_EVENT_ITEM, "name") < 0)
 		return NULL;
 
-	if (read_expected(EVENT_OP, ":") < 0)
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
 		return NULL;
 
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 		goto fail;
 
 	return token;
@@ -1294,13 +1282,13 @@ static int event_read_id(void)
 	char *token;
 	int id;
 
-	if (read_expected_item(EVENT_ITEM, "ID") < 0)
+	if (read_expected_item(TEP_EVENT_ITEM, "ID") < 0)
 		return -1;
 
-	if (read_expected(EVENT_OP, ":") < 0)
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
 		return -1;
 
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 		goto fail;
 
 	id = strtoul(token, NULL, 0);
@@ -1312,9 +1300,9 @@ static int event_read_id(void)
 	return -1;
 }
 
-static int field_is_string(struct format_field *field)
+static int field_is_string(struct tep_format_field *field)
 {
-	if ((field->flags & FIELD_IS_ARRAY) &&
+	if ((field->flags & TEP_FIELD_IS_ARRAY) &&
 	    (strstr(field->type, "char") || strstr(field->type, "u8") ||
 	     strstr(field->type, "s8")))
 		return 1;
@@ -1322,7 +1310,7 @@ static int field_is_string(struct format_field *field)
 	return 0;
 }
 
-static int field_is_dynamic(struct format_field *field)
+static int field_is_dynamic(struct tep_format_field *field)
 {
 	if (strncmp(field->type, "__data_loc", 10) == 0)
 		return 1;
@@ -1330,7 +1318,7 @@ static int field_is_dynamic(struct format_field *field)
 	return 0;
 }
 
-static int field_is_long(struct format_field *field)
+static int field_is_long(struct tep_format_field *field)
 {
 	/* includes long long */
 	if (strstr(field->type, "long"))
@@ -1341,7 +1329,7 @@ static int field_is_long(struct format_field *field)
 
 static unsigned int type_size(const char *name)
 {
-	/* This covers all FIELD_IS_STRING types. */
+	/* This covers all TEP_FIELD_IS_STRING types. */
 	static struct {
 		const char *type;
 		unsigned int size;
@@ -1367,10 +1355,10 @@ static unsigned int type_size(const char *name)
 	return 0;
 }
 
-static int event_read_fields(struct event_format *event, struct format_field **fields)
+static int event_read_fields(struct tep_event_format *event, struct tep_format_field **fields)
 {
-	struct format_field *field = NULL;
-	enum event_type type;
+	struct tep_format_field *field = NULL;
+	enum tep_event_type type;
 	char *token;
 	char *last_token;
 	int count = 0;
@@ -1379,14 +1367,14 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 		unsigned int size_dynamic = 0;
 
 		type = read_token(&token);
-		if (type == EVENT_NEWLINE) {
+		if (type == TEP_EVENT_NEWLINE) {
 			free_token(token);
 			return count;
 		}
 
 		count++;
 
-		if (test_type_token(type, token, EVENT_ITEM, "field"))
+		if (test_type_token(type, token, TEP_EVENT_ITEM, "field"))
 			goto fail;
 		free_token(token);
 
@@ -1395,17 +1383,17 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 		 * The ftrace fields may still use the "special" name.
 		 * Just ignore it.
 		 */
-		if (event->flags & EVENT_FL_ISFTRACE &&
-		    type == EVENT_ITEM && strcmp(token, "special") == 0) {
+		if (event->flags & TEP_EVENT_FL_ISFTRACE &&
+		    type == TEP_EVENT_ITEM && strcmp(token, "special") == 0) {
 			free_token(token);
 			type = read_token(&token);
 		}
 
-		if (test_type_token(type, token, EVENT_OP, ":") < 0)
+		if (test_type_token(type, token, TEP_EVENT_OP, ":") < 0)
 			goto fail;
 
 		free_token(token);
-		if (read_expect_type(EVENT_ITEM, &token) < 0)
+		if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 			goto fail;
 
 		last_token = token;
@@ -1419,17 +1407,17 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 		/* read the rest of the type */
 		for (;;) {
 			type = read_token(&token);
-			if (type == EVENT_ITEM ||
-			    (type == EVENT_OP && strcmp(token, "*") == 0) ||
+			if (type == TEP_EVENT_ITEM ||
+			    (type == TEP_EVENT_OP && strcmp(token, "*") == 0) ||
 			    /*
 			     * Some of the ftrace fields are broken and have
 			     * an illegal "." in them.
 			     */
-			    (event->flags & EVENT_FL_ISFTRACE &&
-			     type == EVENT_OP && strcmp(token, ".") == 0)) {
+			    (event->flags & TEP_EVENT_FL_ISFTRACE &&
+			     type == TEP_EVENT_OP && strcmp(token, ".") == 0)) {
 
 				if (strcmp(token, "*") == 0)
-					field->flags |= FIELD_IS_POINTER;
+					field->flags |= TEP_FIELD_IS_POINTER;
 
 				if (field->type) {
 					char *new_type;
@@ -1459,27 +1447,27 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 		}
 		field->name = field->alias = last_token;
 
-		if (test_type(type, EVENT_OP))
+		if (test_type(type, TEP_EVENT_OP))
 			goto fail;
 
 		if (strcmp(token, "[") == 0) {
-			enum event_type last_type = type;
+			enum tep_event_type last_type = type;
 			char *brackets = token;
 			char *new_brackets;
 			int len;
 
-			field->flags |= FIELD_IS_ARRAY;
+			field->flags |= TEP_FIELD_IS_ARRAY;
 
 			type = read_token(&token);
 
-			if (type == EVENT_ITEM)
+			if (type == TEP_EVENT_ITEM)
 				field->arraylen = strtoul(token, NULL, 0);
 			else
 				field->arraylen = 0;
 
 		        while (strcmp(token, "]") != 0) {
-				if (last_type == EVENT_ITEM &&
-				    type == EVENT_ITEM)
+				if (last_type == TEP_EVENT_ITEM &&
+				    type == TEP_EVENT_ITEM)
 					len = 2;
 				else
 					len = 1;
@@ -1500,7 +1488,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 				field->arraylen = strtoul(token, NULL, 0);
 				free_token(token);
 				type = read_token(&token);
-				if (type == EVENT_NONE) {
+				if (type == TEP_EVENT_NONE) {
 					do_warning_event(event, "failed to find token");
 					goto fail;
 				}
@@ -1523,7 +1511,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 			 * If the next token is not an OP, then it is of
 			 * the format: type [] item;
 			 */
-			if (type == EVENT_ITEM) {
+			if (type == TEP_EVENT_ITEM) {
 				char *new_type;
 				new_type = realloc(field->type,
 						   strlen(field->type) +
@@ -1557,79 +1545,79 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 		}
 
 		if (field_is_string(field))
-			field->flags |= FIELD_IS_STRING;
+			field->flags |= TEP_FIELD_IS_STRING;
 		if (field_is_dynamic(field))
-			field->flags |= FIELD_IS_DYNAMIC;
+			field->flags |= TEP_FIELD_IS_DYNAMIC;
 		if (field_is_long(field))
-			field->flags |= FIELD_IS_LONG;
+			field->flags |= TEP_FIELD_IS_LONG;
 
-		if (test_type_token(type, token,  EVENT_OP, ";"))
+		if (test_type_token(type, token,  TEP_EVENT_OP, ";"))
 			goto fail;
 		free_token(token);
 
-		if (read_expected(EVENT_ITEM, "offset") < 0)
+		if (read_expected(TEP_EVENT_ITEM, "offset") < 0)
 			goto fail_expect;
 
-		if (read_expected(EVENT_OP, ":") < 0)
+		if (read_expected(TEP_EVENT_OP, ":") < 0)
 			goto fail_expect;
 
-		if (read_expect_type(EVENT_ITEM, &token))
+		if (read_expect_type(TEP_EVENT_ITEM, &token))
 			goto fail;
 		field->offset = strtoul(token, NULL, 0);
 		free_token(token);
 
-		if (read_expected(EVENT_OP, ";") < 0)
+		if (read_expected(TEP_EVENT_OP, ";") < 0)
 			goto fail_expect;
 
-		if (read_expected(EVENT_ITEM, "size") < 0)
+		if (read_expected(TEP_EVENT_ITEM, "size") < 0)
 			goto fail_expect;
 
-		if (read_expected(EVENT_OP, ":") < 0)
+		if (read_expected(TEP_EVENT_OP, ":") < 0)
 			goto fail_expect;
 
-		if (read_expect_type(EVENT_ITEM, &token))
+		if (read_expect_type(TEP_EVENT_ITEM, &token))
 			goto fail;
 		field->size = strtoul(token, NULL, 0);
 		free_token(token);
 
-		if (read_expected(EVENT_OP, ";") < 0)
+		if (read_expected(TEP_EVENT_OP, ";") < 0)
 			goto fail_expect;
 
 		type = read_token(&token);
-		if (type != EVENT_NEWLINE) {
+		if (type != TEP_EVENT_NEWLINE) {
 			/* newer versions of the kernel have a "signed" type */
-			if (test_type_token(type, token, EVENT_ITEM, "signed"))
+			if (test_type_token(type, token, TEP_EVENT_ITEM, "signed"))
 				goto fail;
 
 			free_token(token);
 
-			if (read_expected(EVENT_OP, ":") < 0)
+			if (read_expected(TEP_EVENT_OP, ":") < 0)
 				goto fail_expect;
 
-			if (read_expect_type(EVENT_ITEM, &token))
+			if (read_expect_type(TEP_EVENT_ITEM, &token))
 				goto fail;
 
 			if (strtoul(token, NULL, 0))
-				field->flags |= FIELD_IS_SIGNED;
+				field->flags |= TEP_FIELD_IS_SIGNED;
 
 			free_token(token);
-			if (read_expected(EVENT_OP, ";") < 0)
+			if (read_expected(TEP_EVENT_OP, ";") < 0)
 				goto fail_expect;
 
-			if (read_expect_type(EVENT_NEWLINE, &token))
+			if (read_expect_type(TEP_EVENT_NEWLINE, &token))
 				goto fail;
 		}
 
 		free_token(token);
 
-		if (field->flags & FIELD_IS_ARRAY) {
+		if (field->flags & TEP_FIELD_IS_ARRAY) {
 			if (field->arraylen)
 				field->elementsize = field->size / field->arraylen;
-			else if (field->flags & FIELD_IS_DYNAMIC)
+			else if (field->flags & TEP_FIELD_IS_DYNAMIC)
 				field->elementsize = size_dynamic;
-			else if (field->flags & FIELD_IS_STRING)
+			else if (field->flags & TEP_FIELD_IS_STRING)
 				field->elementsize = 1;
-			else if (field->flags & FIELD_IS_LONG)
+			else if (field->flags & TEP_FIELD_IS_LONG)
 				field->elementsize = event->pevent ?
 						     event->pevent->long_size :
 						     sizeof(long);
@@ -1654,18 +1642,18 @@ fail_expect:
 	return -1;
 }
 
-static int event_read_format(struct event_format *event)
+static int event_read_format(struct tep_event_format *event)
 {
 	char *token;
 	int ret;
 
-	if (read_expected_item(EVENT_ITEM, "format") < 0)
+	if (read_expected_item(TEP_EVENT_ITEM, "format") < 0)
 		return -1;
 
-	if (read_expected(EVENT_OP, ":") < 0)
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
 		return -1;
 
-	if (read_expect_type(EVENT_NEWLINE, &token))
+	if (read_expect_type(TEP_EVENT_NEWLINE, &token))
 		goto fail;
 	free_token(token);
 
@@ -1686,14 +1674,14 @@ static int event_read_format(struct event_format *event)
 	return -1;
 }
 
-static enum event_type
-process_arg_token(struct event_format *event, struct print_arg *arg,
-		  char **tok, enum event_type type);
+static enum tep_event_type
+process_arg_token(struct tep_event_format *event, struct tep_print_arg *arg,
+		  char **tok, enum tep_event_type type);
 
-static enum event_type
-process_arg(struct event_format *event, struct print_arg *arg, char **tok)
+static enum tep_event_type
+process_arg(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
 {
-	enum event_type type;
+	enum tep_event_type type;
 	char *token;
 
 	type = read_token(&token);
@@ -1702,32 +1690,32 @@ process_arg(struct event_format *event, struct print_arg *arg, char **tok)
 	return process_arg_token(event, arg, tok, type);
 }
 
-static enum event_type
-process_op(struct event_format *event, struct print_arg *arg, char **tok);
+static enum tep_event_type
+process_op(struct tep_event_format *event, struct tep_print_arg *arg, char **tok);
 
 /*
  * For __print_symbolic() and __print_flags, we need to completely
  * evaluate the first argument, which defines what to print next.
  */
-static enum event_type
-process_field_arg(struct event_format *event, struct print_arg *arg, char **tok)
+static enum tep_event_type
+process_field_arg(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
 {
-	enum event_type type;
+	enum tep_event_type type;
 
 	type = process_arg(event, arg, tok);
 
-	while (type == EVENT_OP) {
+	while (type == TEP_EVENT_OP) {
 		type = process_op(event, arg, tok);
 	}
 
 	return type;
 }
 
-static enum event_type
-process_cond(struct event_format *event, struct print_arg *top, char **tok)
+static enum tep_event_type
+process_cond(struct tep_event_format *event, struct tep_print_arg *top, char **tok)
 {
-	struct print_arg *arg, *left, *right;
-	enum event_type type;
+	struct tep_print_arg *arg, *left, *right;
+	enum tep_event_type type;
 	char *token = NULL;
 
 	arg = alloc_arg();
@@ -1742,7 +1730,7 @@ process_cond(struct event_format *event, struct print_arg *top, char **tok)
 		goto out_free;
 	}
 
-	arg->type = PRINT_OP;
+	arg->type = TEP_PRINT_OP;
 	arg->op.left = left;
 	arg->op.right = right;
 
@@ -1750,16 +1738,16 @@ process_cond(struct event_format *event, struct print_arg *top, char **tok)
 	type = process_arg(event, left, &token);
 
  again:
-	if (type == EVENT_ERROR)
+	if (type == TEP_EVENT_ERROR)
 		goto out_free;
 
 	/* Handle other operations in the arguments */
-	if (type == EVENT_OP && strcmp(token, ":") != 0) {
+	if (type == TEP_EVENT_OP && strcmp(token, ":") != 0) {
 		type = process_op(event, left, &token);
 		goto again;
 	}
 
-	if (test_type_token(type, token, EVENT_OP, ":"))
+	if (test_type_token(type, token, TEP_EVENT_OP, ":"))
 		goto out_free;
 
 	arg->op.op = token;
@@ -1776,14 +1764,14 @@ out_free:
 	top->op.right = NULL;
 	free_token(token);
 	free_arg(arg);
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_array(struct event_format *event, struct print_arg *top, char **tok)
+static enum tep_event_type
+process_array(struct tep_event_format *event, struct tep_print_arg *top, char **tok)
 {
-	struct print_arg *arg;
-	enum event_type type;
+	struct tep_print_arg *arg;
+	enum tep_event_type type;
 	char *token = NULL;
 
 	arg = alloc_arg();
@@ -1791,12 +1779,12 @@ process_array(struct event_format *event, struct print_arg *top, char **tok)
 		do_warning_event(event, "%s: not enough memory!", __func__);
 		/* '*tok' is set to top->op.op.  No need to free. */
 		*tok = NULL;
-		return EVENT_ERROR;
+		return TEP_EVENT_ERROR;
 	}
 
 	*tok = NULL;
 	type = process_arg(event, arg, &token);
-	if (test_type_token(type, token, EVENT_OP, "]"))
+	if (test_type_token(type, token, TEP_EVENT_OP, "]"))
 		goto out_free;
 
 	top->op.right = arg;
@@ -1810,7 +1798,7 @@ process_array(struct event_format *event, struct print_arg *top, char **tok)
 out_free:
 	free_token(token);
 	free_arg(arg);
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
 static int get_op_prio(char *op)
@@ -1868,11 +1856,11 @@ static int get_op_prio(char *op)
 	}
 }
 
-static int set_op_prio(struct print_arg *arg)
+static int set_op_prio(struct tep_print_arg *arg)
 {
 
 	/* single ops are the greatest */
-	if (!arg->op.left || arg->op.left->type == PRINT_NULL)
+	if (!arg->op.left || arg->op.left->type == TEP_PRINT_NULL)
 		arg->op.prio = 0;
 	else
 		arg->op.prio = get_op_prio(arg->op.op);
@@ -1881,17 +1869,17 @@ static int set_op_prio(struct print_arg *arg)
 }
 
 /* Note, *tok does not get freed, but will most likely be saved */
-static enum event_type
-process_op(struct event_format *event, struct print_arg *arg, char **tok)
+static enum tep_event_type
+process_op(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
 {
-	struct print_arg *left, *right = NULL;
-	enum event_type type;
+	struct tep_print_arg *left, *right = NULL;
+	enum tep_event_type type;
 	char *token;
 
 	/* the op is passed in via tok */
 	token = *tok;
 
-	if (arg->type == PRINT_OP && !arg->op.left) {
+	if (arg->type == TEP_PRINT_OP && !arg->op.left) {
 		/* handle single op */
 		if (token[1]) {
 			do_warning_event(event, "bad op token %s", token);
@@ -1914,7 +1902,7 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
 		if (!left)
 			goto out_warn_free;
 
-		left->type = PRINT_NULL;
+		left->type = TEP_PRINT_NULL;
 		arg->op.left = left;
 
 		right = alloc_arg();
@@ -1936,7 +1924,7 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
 		/* copy the top arg to the left */
 		*left = *arg;
 
-		arg->type = PRINT_OP;
+		arg->type = TEP_PRINT_OP;
 		arg->op.op = token;
 		arg->op.left = left;
 		arg->op.prio = 0;
@@ -1970,13 +1958,13 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
 		/* copy the top arg to the left */
 		*left = *arg;
 
-		arg->type = PRINT_OP;
+		arg->type = TEP_PRINT_OP;
 		arg->op.op = token;
 		arg->op.left = left;
 		arg->op.right = NULL;
 
 		if (set_op_prio(arg) == -1) {
-			event->flags |= EVENT_FL_FAILED;
+			event->flags |= TEP_EVENT_FL_FAILED;
 			/* arg->op.op (= token) will be freed at out_free */
 			arg->op.op = NULL;
 			goto out_free;
@@ -1987,10 +1975,10 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
 
 		/* could just be a type pointer */
 		if ((strcmp(arg->op.op, "*") == 0) &&
-		    type == EVENT_DELIM && (strcmp(token, ")") == 0)) {
+		    type == TEP_EVENT_DELIM && (strcmp(token, ")") == 0)) {
 			char *new_atom;
 
-			if (left->type != PRINT_ATOM) {
+			if (left->type != TEP_PRINT_ATOM) {
 				do_warning_event(event, "bad pointer type");
 				goto out_free;
 			}
@@ -2013,16 +2001,16 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
 			goto out_warn_free;
 
 		type = process_arg_token(event, right, tok, type);
-		if (type == EVENT_ERROR) {
+		if (type == TEP_EVENT_ERROR) {
 			free_arg(right);
 			/* token was freed in process_arg_token() via *tok */
 			token = NULL;
 			goto out_free;
 		}
 
-		if (right->type == PRINT_OP &&
+		if (right->type == TEP_PRINT_OP &&
 		    get_op_prio(arg->op.op) < get_op_prio(right->op.op)) {
-			struct print_arg tmp;
+			struct tep_print_arg tmp;
 
 			/* rotate ops according to the priority */
 			arg->op.right = right->op.left;
@@ -2044,7 +2032,7 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
 
 		*left = *arg;
 
-		arg->type = PRINT_OP;
+		arg->type = TEP_PRINT_OP;
 		arg->op.op = token;
 		arg->op.left = left;
 
@@ -2055,12 +2043,12 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
 
 	} else {
 		do_warning_event(event, "unknown op '%s'", token);
-		event->flags |= EVENT_FL_FAILED;
+		event->flags |= TEP_EVENT_FL_FAILED;
 		/* the arg is now the left side */
 		goto out_free;
 	}
 
-	if (type == EVENT_OP && strcmp(*tok, ":") != 0) {
+	if (type == TEP_EVENT_OP && strcmp(*tok, ":") != 0) {
 		int prio;
 
 		/* higher prios need to be closer to the root */
@@ -2079,34 +2067,34 @@ out_warn_free:
 out_free:
 	free_token(token);
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_entry(struct event_format *event __maybe_unused, struct print_arg *arg,
+static enum tep_event_type
+process_entry(struct tep_event_format *event __maybe_unused, struct tep_print_arg *arg,
 	      char **tok)
 {
-	enum event_type type;
+	enum tep_event_type type;
 	char *field;
 	char *token;
 
-	if (read_expected(EVENT_OP, "->") < 0)
+	if (read_expected(TEP_EVENT_OP, "->") < 0)
 		goto out_err;
 
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 		goto out_free;
 	field = token;
 
-	arg->type = PRINT_FIELD;
+	arg->type = TEP_PRINT_FIELD;
 	arg->field.name = field;
 
 	if (is_flag_field) {
-		arg->field.field = pevent_find_any_field(event, arg->field.name);
-		arg->field.field->flags |= FIELD_IS_FLAG;
+		arg->field.field = tep_find_any_field(event, arg->field.name);
+		arg->field.field->flags |= TEP_FIELD_IS_FLAG;
 		is_flag_field = 0;
 	} else if (is_symbolic_field) {
-		arg->field.field = pevent_find_any_field(event, arg->field.name);
-		arg->field.field->flags |= FIELD_IS_SYMBOLIC;
+		arg->field.field = tep_find_any_field(event, arg->field.name);
+		arg->field.field->flags |= TEP_FIELD_IS_SYMBOLIC;
 		is_symbolic_field = 0;
 	}
 
@@ -2119,14 +2107,14 @@ process_entry(struct event_format *event __maybe_unused, struct print_arg *arg,
 	free_token(token);
  out_err:
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static int alloc_and_process_delim(struct event_format *event, char *next_token,
-				   struct print_arg **print_arg)
+static int alloc_and_process_delim(struct tep_event_format *event, char *next_token,
+				   struct tep_print_arg **print_arg)
 {
-	struct print_arg *field;
-	enum event_type type;
+	struct tep_print_arg *field;
+	enum tep_event_type type;
 	char *token;
 	int ret = 0;
 
@@ -2139,7 +2127,7 @@ static int alloc_and_process_delim(struct event_format *event, char *next_token,
 
 	type = process_arg(event, field, &token);
 
-	if (test_type_token(type, token, EVENT_DELIM, next_token)) {
+	if (test_type_token(type, token, TEP_EVENT_DELIM, next_token)) {
 		errno = EINVAL;
 		ret = -1;
 		free_arg(field);
@@ -2154,7 +2142,7 @@ out_free_token:
 	return ret;
 }
 
-static char *arg_eval (struct print_arg *arg);
+static char *arg_eval (struct tep_print_arg *arg);
 
 static unsigned long long
 eval_type_str(unsigned long long val, const char *type, int pointer)
@@ -2251,9 +2239,9 @@ eval_type_str(unsigned long long val, const char *type, int pointer)
  * Try to figure out the type.
  */
 static unsigned long long
-eval_type(unsigned long long val, struct print_arg *arg, int pointer)
+eval_type(unsigned long long val, struct tep_print_arg *arg, int pointer)
 {
-	if (arg->type != PRINT_TYPE) {
+	if (arg->type != TEP_PRINT_TYPE) {
 		do_warning("expected type argument");
 		return 0;
 	}
@@ -2261,22 +2249,22 @@ eval_type(unsigned long long val, struct print_arg *arg, int pointer)
 	return eval_type_str(val, arg->typecast.type, pointer);
 }
 
-static int arg_num_eval(struct print_arg *arg, long long *val)
+static int arg_num_eval(struct tep_print_arg *arg, long long *val)
 {
 	long long left, right;
 	int ret = 1;
 
 	switch (arg->type) {
-	case PRINT_ATOM:
+	case TEP_PRINT_ATOM:
 		*val = strtoll(arg->atom.atom, NULL, 0);
 		break;
-	case PRINT_TYPE:
+	case TEP_PRINT_TYPE:
 		ret = arg_num_eval(arg->typecast.item, val);
 		if (!ret)
 			break;
 		*val = eval_type(*val, arg, 0);
 		break;
-	case PRINT_OP:
+	case TEP_PRINT_OP:
 		switch (arg->op.op[0]) {
 		case '|':
 			ret = arg_num_eval(arg->op.left, &left);
@@ -2379,7 +2367,7 @@ static int arg_num_eval(struct print_arg *arg, long long *val)
 			break;
 		case '-':
 			/* check for negative */
-			if (arg->op.left->type == PRINT_NULL)
+			if (arg->op.left->type == TEP_PRINT_NULL)
 				left = 0;
 			else
 				ret = arg_num_eval(arg->op.left, &left);
@@ -2391,7 +2379,7 @@ static int arg_num_eval(struct print_arg *arg, long long *val)
 			*val = left - right;
 			break;
 		case '+':
-			if (arg->op.left->type == PRINT_NULL)
+			if (arg->op.left->type == TEP_PRINT_NULL)
 				left = 0;
 			else
 				ret = arg_num_eval(arg->op.left, &left);
@@ -2414,11 +2402,11 @@ static int arg_num_eval(struct print_arg *arg, long long *val)
 		}
 		break;
 
-	case PRINT_NULL:
-	case PRINT_FIELD ... PRINT_SYMBOL:
-	case PRINT_STRING:
-	case PRINT_BSTRING:
-	case PRINT_BITMASK:
+	case TEP_PRINT_NULL:
+	case TEP_PRINT_FIELD ... TEP_PRINT_SYMBOL:
+	case TEP_PRINT_STRING:
+	case TEP_PRINT_BSTRING:
+	case TEP_PRINT_BITMASK:
 	default:
 		do_warning("invalid eval type %d", arg->type);
 		ret = 0;
@@ -2427,27 +2415,27 @@ static int arg_num_eval(struct print_arg *arg, long long *val)
 	return ret;
 }
 
-static char *arg_eval (struct print_arg *arg)
+static char *arg_eval (struct tep_print_arg *arg)
 {
 	long long val;
 	static char buf[20];
 
 	switch (arg->type) {
-	case PRINT_ATOM:
+	case TEP_PRINT_ATOM:
 		return arg->atom.atom;
-	case PRINT_TYPE:
+	case TEP_PRINT_TYPE:
 		return arg_eval(arg->typecast.item);
-	case PRINT_OP:
+	case TEP_PRINT_OP:
 		if (!arg_num_eval(arg, &val))
 			break;
 		sprintf(buf, "%lld", val);
 		return buf;
 
-	case PRINT_NULL:
-	case PRINT_FIELD ... PRINT_SYMBOL:
-	case PRINT_STRING:
-	case PRINT_BSTRING:
-	case PRINT_BITMASK:
+	case TEP_PRINT_NULL:
+	case TEP_PRINT_FIELD ... TEP_PRINT_SYMBOL:
+	case TEP_PRINT_STRING:
+	case TEP_PRINT_BSTRING:
+	case TEP_PRINT_BITMASK:
 	default:
 		do_warning("invalid eval type %d", arg->type);
 		break;
@@ -2456,19 +2444,19 @@ static char *arg_eval (struct print_arg *arg)
 	return NULL;
 }
 
-static enum event_type
-process_fields(struct event_format *event, struct print_flag_sym **list, char **tok)
+static enum tep_event_type
+process_fields(struct tep_event_format *event, struct tep_print_flag_sym **list, char **tok)
 {
-	enum event_type type;
-	struct print_arg *arg = NULL;
-	struct print_flag_sym *field;
+	enum tep_event_type type;
+	struct tep_print_arg *arg = NULL;
+	struct tep_print_flag_sym *field;
 	char *token = *tok;
 	char *value;
 
 	do {
 		free_token(token);
 		type = read_token_item(&token);
-		if (test_type_token(type, token, EVENT_OP, "{"))
+		if (test_type_token(type, token, TEP_EVENT_OP, "{"))
 			break;
 
 		arg = alloc_arg();
@@ -2478,13 +2466,13 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char **
 		free_token(token);
 		type = process_arg(event, arg, &token);
 
-		if (type == EVENT_OP)
+		if (type == TEP_EVENT_OP)
 			type = process_op(event, arg, &token);
 
-		if (type == EVENT_ERROR)
+		if (type == TEP_EVENT_ERROR)
 			goto out_free;
 
-		if (test_type_token(type, token, EVENT_DELIM, ","))
+		if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
 			goto out_free;
 
 		field = calloc(1, sizeof(*field));
@@ -2505,7 +2493,7 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char **
 
 		free_token(token);
 		type = process_arg(event, arg, &token);
-		if (test_type_token(type, token, EVENT_OP, "}"))
+		if (test_type_token(type, token, TEP_EVENT_OP, "}"))
 			goto out_free_field;
 
 		value = arg_eval(arg);
@@ -2522,7 +2510,7 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char **
 
 		free_token(token);
 		type = read_token_item(&token);
-	} while (type == EVENT_DELIM && strcmp(token, ",") == 0);
+	} while (type == TEP_EVENT_DELIM && strcmp(token, ",") == 0);
 
 	*tok = token;
 	return type;
@@ -2534,18 +2522,18 @@ out_free:
 	free_token(token);
 	*tok = NULL;
 
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_flags(struct event_format *event, struct print_arg *arg, char **tok)
+static enum tep_event_type
+process_flags(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
 {
-	struct print_arg *field;
-	enum event_type type;
+	struct tep_print_arg *field;
+	enum tep_event_type type;
 	char *token = NULL;
 
 	memset(arg, 0, sizeof(*arg));
-	arg->type = PRINT_FLAGS;
+	arg->type = TEP_PRINT_FLAGS;
 
 	field = alloc_arg();
 	if (!field) {
@@ -2556,10 +2544,10 @@ process_flags(struct event_format *event, struct print_arg *arg, char **tok)
 	type = process_field_arg(event, field, &token);
 
 	/* Handle operations in the first argument */
-	while (type == EVENT_OP)
+	while (type == TEP_EVENT_OP)
 		type = process_op(event, field, &token);
 
-	if (test_type_token(type, token, EVENT_DELIM, ","))
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
 		goto out_free_field;
 	free_token(token);
 
@@ -2571,11 +2559,11 @@ process_flags(struct event_format *event, struct print_arg *arg, char **tok)
 		type = read_token_item(&token);
 	}
 
-	if (test_type_token(type, token, EVENT_DELIM, ","))
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
 		goto out_free;
 
 	type = process_fields(event, &arg->flags.flags, &token);
-	if (test_type_token(type, token, EVENT_DELIM, ")"))
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ")"))
 		goto out_free;
 
 	free_token(token);
@@ -2587,18 +2575,18 @@ out_free_field:
 out_free:
 	free_token(token);
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_symbols(struct event_format *event, struct print_arg *arg, char **tok)
+static enum tep_event_type
+process_symbols(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
 {
-	struct print_arg *field;
-	enum event_type type;
+	struct tep_print_arg *field;
+	enum tep_event_type type;
 	char *token = NULL;
 
 	memset(arg, 0, sizeof(*arg));
-	arg->type = PRINT_SYMBOL;
+	arg->type = TEP_PRINT_SYMBOL;
 
 	field = alloc_arg();
 	if (!field) {
@@ -2608,13 +2596,13 @@ process_symbols(struct event_format *event, struct print_arg *arg, char **tok)
 
 	type = process_field_arg(event, field, &token);
 
-	if (test_type_token(type, token, EVENT_DELIM, ","))
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
 		goto out_free_field;
 
 	arg->symbol.field = field;
 
 	type = process_fields(event, &arg->symbol.symbols, &token);
-	if (test_type_token(type, token, EVENT_DELIM, ")"))
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ")"))
 		goto out_free;
 
 	free_token(token);
@@ -2626,12 +2614,12 @@ out_free_field:
 out_free:
 	free_token(token);
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_hex_common(struct event_format *event, struct print_arg *arg,
-		   char **tok, enum print_arg_type type)
+static enum tep_event_type
+process_hex_common(struct tep_event_format *event, struct tep_print_arg *arg,
+		   char **tok, enum tep_print_arg_type type)
 {
 	memset(arg, 0, sizeof(*arg));
 	arg->type = type;
@@ -2649,27 +2637,27 @@ free_field:
 	arg->hex.field = NULL;
 out:
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_hex(struct event_format *event, struct print_arg *arg, char **tok)
+static enum tep_event_type
+process_hex(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
 {
-	return process_hex_common(event, arg, tok, PRINT_HEX);
+	return process_hex_common(event, arg, tok, TEP_PRINT_HEX);
 }
 
-static enum event_type
-process_hex_str(struct event_format *event, struct print_arg *arg,
+static enum tep_event_type
+process_hex_str(struct tep_event_format *event, struct tep_print_arg *arg,
 		char **tok)
 {
-	return process_hex_common(event, arg, tok, PRINT_HEX_STR);
+	return process_hex_common(event, arg, tok, TEP_PRINT_HEX_STR);
 }
 
-static enum event_type
-process_int_array(struct event_format *event, struct print_arg *arg, char **tok)
+static enum tep_event_type
+process_int_array(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
 {
 	memset(arg, 0, sizeof(*arg));
-	arg->type = PRINT_INT_ARRAY;
+	arg->type = TEP_PRINT_INT_ARRAY;
 
 	if (alloc_and_process_delim(event, ",", &arg->int_array.field))
 		goto out;
@@ -2690,18 +2678,18 @@ free_field:
 	arg->int_array.field = NULL;
 out:
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_dynamic_array(struct event_format *event, struct print_arg *arg, char **tok)
+static enum tep_event_type
+process_dynamic_array(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
 {
-	struct format_field *field;
-	enum event_type type;
+	struct tep_format_field *field;
+	enum tep_event_type type;
 	char *token;
 
 	memset(arg, 0, sizeof(*arg));
-	arg->type = PRINT_DYNAMIC_ARRAY;
+	arg->type = TEP_PRINT_DYNAMIC_ARRAY;
 
 	/*
 	 * The item within the parenthesis is another field that holds
@@ -2709,25 +2697,25 @@ process_dynamic_array(struct event_format *event, struct print_arg *arg, char **
 	 */
 	type = read_token(&token);
 	*tok = token;
-	if (type != EVENT_ITEM)
+	if (type != TEP_EVENT_ITEM)
 		goto out_free;
 
 	/* Find the field */
 
-	field = pevent_find_field(event, token);
+	field = tep_find_field(event, token);
 	if (!field)
 		goto out_free;
 
 	arg->dynarray.field = field;
 	arg->dynarray.index = 0;
 
-	if (read_expected(EVENT_DELIM, ")") < 0)
+	if (read_expected(TEP_EVENT_DELIM, ")") < 0)
 		goto out_free;
 
 	free_token(token);
 	type = read_token_item(&token);
 	*tok = token;
-	if (type != EVENT_OP || strcmp(token, "[") != 0)
+	if (type != TEP_EVENT_OP || strcmp(token, "[") != 0)
 		return type;
 
 	free_token(token);
@@ -2735,14 +2723,14 @@ process_dynamic_array(struct event_format *event, struct print_arg *arg, char **
 	if (!arg) {
 		do_warning_event(event, "%s: not enough memory!", __func__);
 		*tok = NULL;
-		return EVENT_ERROR;
+		return TEP_EVENT_ERROR;
 	}
 
 	type = process_arg(event, arg, &token);
-	if (type == EVENT_ERROR)
+	if (type == TEP_EVENT_ERROR)
 		goto out_free_arg;
 
-	if (!test_type_token(type, token, EVENT_OP, "]"))
+	if (!test_type_token(type, token, TEP_EVENT_OP, "]"))
 		goto out_free_arg;
 
 	free_token(token);
@@ -2754,31 +2742,31 @@ process_dynamic_array(struct event_format *event, struct print_arg *arg, char **
  out_free:
 	free_token(token);
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_dynamic_array_len(struct event_format *event, struct print_arg *arg,
+static enum tep_event_type
+process_dynamic_array_len(struct tep_event_format *event, struct tep_print_arg *arg,
 			  char **tok)
 {
-	struct format_field *field;
-	enum event_type type;
+	struct tep_format_field *field;
+	enum tep_event_type type;
 	char *token;
 
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 		goto out_free;
 
-	arg->type = PRINT_DYNAMIC_ARRAY_LEN;
+	arg->type = TEP_PRINT_DYNAMIC_ARRAY_LEN;
 
 	/* Find the field */
-	field = pevent_find_field(event, token);
+	field = tep_find_field(event, token);
 	if (!field)
 		goto out_free;
 
 	arg->dynarray.field = field;
 	arg->dynarray.index = 0;
 
-	if (read_expected(EVENT_DELIM, ")") < 0)
+	if (read_expected(TEP_EVENT_DELIM, ")") < 0)
 		goto out_err;
 
 	type = read_token(&token);
@@ -2790,28 +2778,28 @@ process_dynamic_array_len(struct event_format *event, struct print_arg *arg,
 	free_token(token);
  out_err:
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_paren(struct event_format *event, struct print_arg *arg, char **tok)
+static enum tep_event_type
+process_paren(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
 {
-	struct print_arg *item_arg;
-	enum event_type type;
+	struct tep_print_arg *item_arg;
+	enum tep_event_type type;
 	char *token;
 
 	type = process_arg(event, arg, &token);
 
-	if (type == EVENT_ERROR)
+	if (type == TEP_EVENT_ERROR)
 		goto out_free;
 
-	if (type == EVENT_OP)
+	if (type == TEP_EVENT_OP)
 		type = process_op(event, arg, &token);
 
-	if (type == EVENT_ERROR)
+	if (type == TEP_EVENT_ERROR)
 		goto out_free;
 
-	if (test_type_token(type, token, EVENT_DELIM, ")"))
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ")"))
 		goto out_free;
 
 	free_token(token);
@@ -2822,13 +2810,13 @@ process_paren(struct event_format *event, struct print_arg *arg, char **tok)
 	 * this was a typecast.
 	 */
 	if (event_item_type(type) ||
-	    (type == EVENT_DELIM && strcmp(token, "(") == 0)) {
+	    (type == TEP_EVENT_DELIM && strcmp(token, "(") == 0)) {
 
 		/* make this a typecast and contine */
 
 		/* prevous must be an atom */
-		if (arg->type != PRINT_ATOM) {
-			do_warning_event(event, "previous needed to be PRINT_ATOM");
+		if (arg->type != TEP_PRINT_ATOM) {
+			do_warning_event(event, "previous needed to be TEP_PRINT_ATOM");
 			goto out_free;
 		}
 
@@ -2839,7 +2827,7 @@ process_paren(struct event_format *event, struct print_arg *arg, char **tok)
 			goto out_free;
 		}
 
-		arg->type = PRINT_TYPE;
+		arg->type = TEP_PRINT_TYPE;
 		arg->typecast.type = arg->atom.atom;
 		arg->typecast.item = item_arg;
 		type = process_arg_token(event, item_arg, &token, type);
@@ -2852,25 +2840,25 @@ process_paren(struct event_format *event, struct print_arg *arg, char **tok)
  out_free:
 	free_token(token);
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
 
-static enum event_type
-process_str(struct event_format *event __maybe_unused, struct print_arg *arg,
+static enum tep_event_type
+process_str(struct tep_event_format *event __maybe_unused, struct tep_print_arg *arg,
 	    char **tok)
 {
-	enum event_type type;
+	enum tep_event_type type;
 	char *token;
 
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 		goto out_free;
 
-	arg->type = PRINT_STRING;
+	arg->type = TEP_PRINT_STRING;
 	arg->string.string = token;
 	arg->string.offset = -1;
 
-	if (read_expected(EVENT_DELIM, ")") < 0)
+	if (read_expected(TEP_EVENT_DELIM, ")") < 0)
 		goto out_err;
 
 	type = read_token(&token);
@@ -2882,24 +2870,24 @@ process_str(struct event_format *event __maybe_unused, struct print_arg *arg,
 	free_token(token);
  out_err:
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_bitmask(struct event_format *event __maybe_unused, struct print_arg *arg,
-	    char **tok)
+static enum tep_event_type
+process_bitmask(struct tep_event_format *event __maybe_unused, struct tep_print_arg *arg,
+		char **tok)
 {
-	enum event_type type;
+	enum tep_event_type type;
 	char *token;
 
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 		goto out_free;
 
-	arg->type = PRINT_BITMASK;
+	arg->type = TEP_PRINT_BITMASK;
 	arg->bitmask.bitmask = token;
 	arg->bitmask.offset = -1;
 
-	if (read_expected(EVENT_DELIM, ")") < 0)
+	if (read_expected(TEP_EVENT_DELIM, ")") < 0)
 		goto out_err;
 
 	type = read_token(&token);
@@ -2911,13 +2899,13 @@ process_bitmask(struct event_format *event __maybe_unused, struct print_arg *arg
 	free_token(token);
  out_err:
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static struct pevent_function_handler *
-find_func_handler(struct pevent *pevent, char *func_name)
+static struct tep_function_handler *
+find_func_handler(struct tep_handle *pevent, char *func_name)
 {
-	struct pevent_function_handler *func;
+	struct tep_function_handler *func;
 
 	if (!pevent)
 		return NULL;
@@ -2930,10 +2918,10 @@ find_func_handler(struct pevent *pevent, char *func_name)
 	return func;
 }
 
-static void remove_func_handler(struct pevent *pevent, char *func_name)
+static void remove_func_handler(struct tep_handle *pevent, char *func_name)
 {
-	struct pevent_function_handler *func;
-	struct pevent_function_handler **next;
+	struct tep_function_handler *func;
+	struct tep_function_handler **next;
 
 	next = &pevent->func_handlers;
 	while ((func = *next)) {
@@ -2946,17 +2934,17 @@ static void remove_func_handler(struct pevent *pevent, char *func_name)
 	}
 }
 
-static enum event_type
-process_func_handler(struct event_format *event, struct pevent_function_handler *func,
-		     struct print_arg *arg, char **tok)
+static enum tep_event_type
+process_func_handler(struct tep_event_format *event, struct tep_function_handler *func,
+		     struct tep_print_arg *arg, char **tok)
 {
-	struct print_arg **next_arg;
-	struct print_arg *farg;
-	enum event_type type;
+	struct tep_print_arg **next_arg;
+	struct tep_print_arg *farg;
+	enum tep_event_type type;
 	char *token;
 	int i;
 
-	arg->type = PRINT_FUNC;
+	arg->type = TEP_PRINT_FUNC;
 	arg->func.func = func;
 
 	*tok = NULL;
@@ -2967,12 +2955,12 @@ process_func_handler(struct event_format *event, struct pevent_function_handler
 		if (!farg) {
 			do_warning_event(event, "%s: not enough memory!",
 					 __func__);
-			return EVENT_ERROR;
+			return TEP_EVENT_ERROR;
 		}
 
 		type = process_arg(event, farg, &token);
 		if (i < (func->nr_args - 1)) {
-			if (type != EVENT_DELIM || strcmp(token, ",") != 0) {
+			if (type != TEP_EVENT_DELIM || strcmp(token, ",") != 0) {
 				do_warning_event(event,
 					"Error: function '%s()' expects %d arguments but event %s only uses %d",
 					func->name, func->nr_args,
@@ -2980,7 +2968,7 @@ process_func_handler(struct event_format *event, struct pevent_function_handler
 				goto err;
 			}
 		} else {
-			if (type != EVENT_DELIM || strcmp(token, ")") != 0) {
+			if (type != TEP_EVENT_DELIM || strcmp(token, ")") != 0) {
 				do_warning_event(event,
 					"Error: function '%s()' only expects %d arguments but event %s has more",
 					func->name, func->nr_args, event->name);
@@ -3001,14 +2989,14 @@ process_func_handler(struct event_format *event, struct pevent_function_handler
 err:
 	free_arg(farg);
 	free_token(token);
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_function(struct event_format *event, struct print_arg *arg,
+static enum tep_event_type
+process_function(struct tep_event_format *event, struct tep_print_arg *arg,
 		 char *token, char **tok)
 {
-	struct pevent_function_handler *func;
+	struct tep_function_handler *func;
 
 	if (strcmp(token, "__print_flags") == 0) {
 		free_token(token);
@@ -3057,12 +3045,12 @@ process_function(struct event_format *event, struct print_arg *arg,
 
 	do_warning_event(event, "function %s not defined", token);
 	free_token(token);
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_arg_token(struct event_format *event, struct print_arg *arg,
-		  char **tok, enum event_type type)
+static enum tep_event_type
+process_arg_token(struct tep_event_format *event, struct tep_print_arg *arg,
+		  char **tok, enum tep_event_type type)
 {
 	char *token;
 	char *atom;
@@ -3070,7 +3058,7 @@ process_arg_token(struct event_format *event, struct print_arg *arg,
 	token = *tok;
 
 	switch (type) {
-	case EVENT_ITEM:
+	case TEP_EVENT_ITEM:
 		if (strcmp(token, "REC") == 0) {
 			free_token(token);
 			type = process_entry(event, arg, &token);
@@ -3084,7 +3072,7 @@ process_arg_token(struct event_format *event, struct print_arg *arg,
 		 * If the next token is a parenthesis, then this
 		 * is a function.
 		 */
-		if (type == EVENT_DELIM && strcmp(token, "(") == 0) {
+		if (type == TEP_EVENT_DELIM && strcmp(token, "(") == 0) {
 			free_token(token);
 			token = NULL;
 			/* this will free atom. */
@@ -3092,7 +3080,7 @@ process_arg_token(struct event_format *event, struct print_arg *arg,
 			break;
 		}
 		/* atoms can be more than one token long */
-		while (type == EVENT_ITEM) {
+		while (type == TEP_EVENT_ITEM) {
 			char *new_atom;
 			new_atom = realloc(atom,
 					   strlen(atom) + strlen(token) + 2);
@@ -3100,7 +3088,7 @@ process_arg_token(struct event_format *event, struct print_arg *arg,
 				free(atom);
 				*tok = NULL;
 				free_token(token);
-				return EVENT_ERROR;
+				return TEP_EVENT_ERROR;
 			}
 			atom = new_atom;
 			strcat(atom, " ");
@@ -3109,55 +3097,55 @@ process_arg_token(struct event_format *event, struct print_arg *arg,
 			type = read_token_item(&token);
 		}
 
-		arg->type = PRINT_ATOM;
+		arg->type = TEP_PRINT_ATOM;
 		arg->atom.atom = atom;
 		break;
 
-	case EVENT_DQUOTE:
-	case EVENT_SQUOTE:
-		arg->type = PRINT_ATOM;
+	case TEP_EVENT_DQUOTE:
+	case TEP_EVENT_SQUOTE:
+		arg->type = TEP_PRINT_ATOM;
 		arg->atom.atom = token;
 		type = read_token_item(&token);
 		break;
-	case EVENT_DELIM:
+	case TEP_EVENT_DELIM:
 		if (strcmp(token, "(") == 0) {
 			free_token(token);
 			type = process_paren(event, arg, &token);
 			break;
 		}
-	case EVENT_OP:
+	case TEP_EVENT_OP:
 		/* handle single ops */
-		arg->type = PRINT_OP;
+		arg->type = TEP_PRINT_OP;
 		arg->op.op = token;
 		arg->op.left = NULL;
 		type = process_op(event, arg, &token);
 
 		/* On error, the op is freed */
-		if (type == EVENT_ERROR)
+		if (type == TEP_EVENT_ERROR)
 			arg->op.op = NULL;
 
 		/* return error type if errored */
 		break;
 
-	case EVENT_ERROR ... EVENT_NEWLINE:
+	case TEP_EVENT_ERROR ... TEP_EVENT_NEWLINE:
 	default:
 		do_warning_event(event, "unexpected type %d", type);
-		return EVENT_ERROR;
+		return TEP_EVENT_ERROR;
 	}
 	*tok = token;
 
 	return type;
 }
 
-static int event_read_print_args(struct event_format *event, struct print_arg **list)
+static int event_read_print_args(struct tep_event_format *event, struct tep_print_arg **list)
 {
-	enum event_type type = EVENT_ERROR;
-	struct print_arg *arg;
+	enum tep_event_type type = TEP_EVENT_ERROR;
+	struct tep_print_arg *arg;
 	char *token;
 	int args = 0;
 
 	do {
-		if (type == EVENT_NEWLINE) {
+		if (type == TEP_EVENT_NEWLINE) {
 			type = read_token_item(&token);
 			continue;
 		}
@@ -3171,7 +3159,7 @@ static int event_read_print_args(struct event_format *event, struct print_arg **
 
 		type = process_arg(event, arg, &token);
 
-		if (type == EVENT_ERROR) {
+		if (type == TEP_EVENT_ERROR) {
 			free_token(token);
 			free_arg(arg);
 			return -1;
@@ -3180,10 +3168,10 @@ static int event_read_print_args(struct event_format *event, struct print_arg **
 		*list = arg;
 		args++;
 
-		if (type == EVENT_OP) {
+		if (type == TEP_EVENT_OP) {
 			type = process_op(event, arg, &token);
 			free_token(token);
-			if (type == EVENT_ERROR) {
+			if (type == TEP_EVENT_ERROR) {
 				*list = NULL;
 				free_arg(arg);
 				return -1;
@@ -3192,37 +3180,37 @@ static int event_read_print_args(struct event_format *event, struct print_arg **
 			continue;
 		}
 
-		if (type == EVENT_DELIM && strcmp(token, ",") == 0) {
+		if (type == TEP_EVENT_DELIM && strcmp(token, ",") == 0) {
 			free_token(token);
 			*list = arg;
 			list = &arg->next;
 			continue;
 		}
 		break;
-	} while (type != EVENT_NONE);
+	} while (type != TEP_EVENT_NONE);
 
-	if (type != EVENT_NONE && type != EVENT_ERROR)
+	if (type != TEP_EVENT_NONE && type != TEP_EVENT_ERROR)
 		free_token(token);
 
 	return args;
 }
 
-static int event_read_print(struct event_format *event)
+static int event_read_print(struct tep_event_format *event)
 {
-	enum event_type type;
+	enum tep_event_type type;
 	char *token;
 	int ret;
 
-	if (read_expected_item(EVENT_ITEM, "print") < 0)
+	if (read_expected_item(TEP_EVENT_ITEM, "print") < 0)
 		return -1;
 
-	if (read_expected(EVENT_ITEM, "fmt") < 0)
+	if (read_expected(TEP_EVENT_ITEM, "fmt") < 0)
 		return -1;
 
-	if (read_expected(EVENT_OP, ":") < 0)
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
 		return -1;
 
-	if (read_expect_type(EVENT_DQUOTE, &token) < 0)
+	if (read_expect_type(TEP_EVENT_DQUOTE, &token) < 0)
 		goto fail;
 
  concat:
@@ -3232,11 +3220,11 @@ static int event_read_print(struct event_format *event)
 	/* ok to have no arg */
 	type = read_token_item(&token);
 
-	if (type == EVENT_NONE)
+	if (type == TEP_EVENT_NONE)
 		return 0;
 
 	/* Handle concatenation of print lines */
-	if (type == EVENT_DQUOTE) {
+	if (type == TEP_EVENT_DQUOTE) {
 		char *cat;
 
 		if (asprintf(&cat, "%s%s", event->print_fmt.format, token) < 0)
@@ -3248,7 +3236,7 @@ static int event_read_print(struct event_format *event)
 		goto concat;
 	}
 			     
-	if (test_type_token(type, token, EVENT_DELIM, ","))
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
 		goto fail;
 
 	free_token(token);
@@ -3265,17 +3253,17 @@ static int event_read_print(struct event_format *event)
 }
 
 /**
- * pevent_find_common_field - return a common field by event
+ * tep_find_common_field - return a common field by event
  * @event: handle for the event
  * @name: the name of the common field to return
  *
  * Returns a common field from the event by the given @name.
  * This only searchs the common fields and not all field.
  */
-struct format_field *
-pevent_find_common_field(struct event_format *event, const char *name)
+struct tep_format_field *
+tep_find_common_field(struct tep_event_format *event, const char *name)
 {
-	struct format_field *format;
+	struct tep_format_field *format;
 
 	for (format = event->format.common_fields;
 	     format; format = format->next) {
@@ -3287,17 +3275,17 @@ pevent_find_common_field(struct event_format *event, const char *name)
 }
 
 /**
- * pevent_find_field - find a non-common field
+ * tep_find_field - find a non-common field
  * @event: handle for the event
  * @name: the name of the non-common field
  *
  * Returns a non-common field by the given @name.
  * This does not search common fields.
  */
-struct format_field *
-pevent_find_field(struct event_format *event, const char *name)
+struct tep_format_field *
+tep_find_field(struct tep_event_format *event, const char *name)
 {
-	struct format_field *format;
+	struct tep_format_field *format;
 
 	for (format = event->format.fields;
 	     format; format = format->next) {
@@ -3309,7 +3297,7 @@ pevent_find_field(struct event_format *event, const char *name)
 }
 
 /**
- * pevent_find_any_field - find any field by name
+ * tep_find_any_field - find any field by name
  * @event: handle for the event
  * @name: the name of the field
  *
@@ -3317,19 +3305,19 @@ pevent_find_field(struct event_format *event, const char *name)
  * This searchs the common field names first, then
  * the non-common ones if a common one was not found.
  */
-struct format_field *
-pevent_find_any_field(struct event_format *event, const char *name)
+struct tep_format_field *
+tep_find_any_field(struct tep_event_format *event, const char *name)
 {
-	struct format_field *format;
+	struct tep_format_field *format;
 
-	format = pevent_find_common_field(event, name);
+	format = tep_find_common_field(event, name);
 	if (format)
 		return format;
-	return pevent_find_field(event, name);
+	return tep_find_field(event, name);
 }
 
 /**
- * pevent_read_number - read a number from data
+ * tep_read_number - read a number from data
  * @pevent: handle for the pevent
  * @ptr: the raw data
  * @size: the size of the data that holds the number
@@ -3337,18 +3325,18 @@ pevent_find_any_field(struct event_format *event, const char *name)
  * Returns the number (converted to host) from the
  * raw data.
  */
-unsigned long long pevent_read_number(struct pevent *pevent,
-				      const void *ptr, int size)
+unsigned long long tep_read_number(struct tep_handle *pevent,
+				   const void *ptr, int size)
 {
 	switch (size) {
 	case 1:
 		return *(unsigned char *)ptr;
 	case 2:
-		return data2host2(pevent, ptr);
+		return tep_data2host2(pevent, ptr);
 	case 4:
-		return data2host4(pevent, ptr);
+		return tep_data2host4(pevent, ptr);
 	case 8:
-		return data2host8(pevent, ptr);
+		return tep_data2host8(pevent, ptr);
 	default:
 		/* BUG! */
 		return 0;
@@ -3356,7 +3344,7 @@ unsigned long long pevent_read_number(struct pevent *pevent,
 }
 
 /**
- * pevent_read_number_field - read a number from data
+ * tep_read_number_field - read a number from data
  * @field: a handle to the field
  * @data: the raw data to read
  * @value: the value to place the number in
@@ -3366,8 +3354,8 @@ unsigned long long pevent_read_number(struct pevent *pevent,
  *
  * Returns 0 on success, -1 otherwise.
  */
-int pevent_read_number_field(struct format_field *field, const void *data,
-			     unsigned long long *value)
+int tep_read_number_field(struct tep_format_field *field, const void *data,
+			  unsigned long long *value)
 {
 	if (!field)
 		return -1;
@@ -3376,19 +3364,19 @@ int pevent_read_number_field(struct format_field *field, const void *data,
 	case 2:
 	case 4:
 	case 8:
-		*value = pevent_read_number(field->event->pevent,
-					    data + field->offset, field->size);
+		*value = tep_read_number(field->event->pevent,
+					 data + field->offset, field->size);
 		return 0;
 	default:
 		return -1;
 	}
 }
 
-static int get_common_info(struct pevent *pevent,
+static int get_common_info(struct tep_handle *pevent,
 			   const char *type, int *offset, int *size)
 {
-	struct event_format *event;
-	struct format_field *field;
+	struct tep_event_format *event;
+	struct tep_format_field *field;
 
 	/*
 	 * All events should have the same common elements.
@@ -3400,7 +3388,7 @@ static int get_common_info(struct pevent *pevent,
 	}
 
 	event = pevent->events[0];
-	field = pevent_find_common_field(event, type);
+	field = tep_find_common_field(event, type);
 	if (!field)
 		return -1;
 
@@ -3410,7 +3398,7 @@ static int get_common_info(struct pevent *pevent,
 	return 0;
 }
 
-static int __parse_common(struct pevent *pevent, void *data,
+static int __parse_common(struct tep_handle *pevent, void *data,
 			  int *size, int *offset, const char *name)
 {
 	int ret;
@@ -3420,45 +3408,45 @@ static int __parse_common(struct pevent *pevent, void *data,
 		if (ret < 0)
 			return ret;
 	}
-	return pevent_read_number(pevent, data + *offset, *size);
+	return tep_read_number(pevent, data + *offset, *size);
 }
 
-static int trace_parse_common_type(struct pevent *pevent, void *data)
+static int trace_parse_common_type(struct tep_handle *pevent, void *data)
 {
 	return __parse_common(pevent, data,
 			      &pevent->type_size, &pevent->type_offset,
 			      "common_type");
 }
 
-static int parse_common_pid(struct pevent *pevent, void *data)
+static int parse_common_pid(struct tep_handle *pevent, void *data)
 {
 	return __parse_common(pevent, data,
 			      &pevent->pid_size, &pevent->pid_offset,
 			      "common_pid");
 }
 
-static int parse_common_pc(struct pevent *pevent, void *data)
+static int parse_common_pc(struct tep_handle *pevent, void *data)
 {
 	return __parse_common(pevent, data,
 			      &pevent->pc_size, &pevent->pc_offset,
 			      "common_preempt_count");
 }
 
-static int parse_common_flags(struct pevent *pevent, void *data)
+static int parse_common_flags(struct tep_handle *pevent, void *data)
 {
 	return __parse_common(pevent, data,
 			      &pevent->flags_size, &pevent->flags_offset,
 			      "common_flags");
 }
 
-static int parse_common_lock_depth(struct pevent *pevent, void *data)
+static int parse_common_lock_depth(struct tep_handle *pevent, void *data)
 {
 	return __parse_common(pevent, data,
 			      &pevent->ld_size, &pevent->ld_offset,
 			      "common_lock_depth");
 }
 
-static int parse_common_migrate_disable(struct pevent *pevent, void *data)
+static int parse_common_migrate_disable(struct tep_handle *pevent, void *data)
 {
 	return __parse_common(pevent, data,
 			      &pevent->ld_size, &pevent->ld_offset,
@@ -3468,17 +3456,17 @@ static int parse_common_migrate_disable(struct pevent *pevent, void *data)
 static int events_id_cmp(const void *a, const void *b);
 
 /**
- * pevent_find_event - find an event by given id
+ * tep_find_event - find an event by given id
  * @pevent: a handle to the pevent
  * @id: the id of the event
  *
  * Returns an event that has a given @id.
  */
-struct event_format *pevent_find_event(struct pevent *pevent, int id)
+struct tep_event_format *tep_find_event(struct tep_handle *pevent, int id)
 {
-	struct event_format **eventptr;
-	struct event_format key;
-	struct event_format *pkey = &key;
+	struct tep_event_format **eventptr;
+	struct tep_event_format key;
+	struct tep_event_format *pkey = &key;
 
 	/* Check cache first */
 	if (pevent->last_event && pevent->last_event->id == id)
@@ -3498,7 +3486,7 @@ struct event_format *pevent_find_event(struct pevent *pevent, int id)
 }
 
 /**
- * pevent_find_event_by_name - find an event by given name
+ * tep_find_event_by_name - find an event by given name
  * @pevent: a handle to the pevent
  * @sys: the system name to search for
  * @name: the name of the event to search for
@@ -3506,11 +3494,11 @@ struct event_format *pevent_find_event(struct pevent *pevent, int id)
  * This returns an event with a given @name and under the system
  * @sys. If @sys is NULL the first event with @name is returned.
  */
-struct event_format *
-pevent_find_event_by_name(struct pevent *pevent,
-			  const char *sys, const char *name)
+struct tep_event_format *
+tep_find_event_by_name(struct tep_handle *pevent,
+		       const char *sys, const char *name)
 {
-	struct event_format *event;
+	struct tep_event_format *event;
 	int i;
 
 	if (pevent->last_event &&
@@ -3535,54 +3523,54 @@ pevent_find_event_by_name(struct pevent *pevent,
 }
 
 static unsigned long long
-eval_num_arg(void *data, int size, struct event_format *event, struct print_arg *arg)
+eval_num_arg(void *data, int size, struct tep_event_format *event, struct tep_print_arg *arg)
 {
-	struct pevent *pevent = event->pevent;
+	struct tep_handle *pevent = event->pevent;
 	unsigned long long val = 0;
 	unsigned long long left, right;
-	struct print_arg *typearg = NULL;
-	struct print_arg *larg;
+	struct tep_print_arg *typearg = NULL;
+	struct tep_print_arg *larg;
 	unsigned long offset;
 	unsigned int field_size;
 
 	switch (arg->type) {
-	case PRINT_NULL:
+	case TEP_PRINT_NULL:
 		/* ?? */
 		return 0;
-	case PRINT_ATOM:
+	case TEP_PRINT_ATOM:
 		return strtoull(arg->atom.atom, NULL, 0);
-	case PRINT_FIELD:
+	case TEP_PRINT_FIELD:
 		if (!arg->field.field) {
-			arg->field.field = pevent_find_any_field(event, arg->field.name);
+			arg->field.field = tep_find_any_field(event, arg->field.name);
 			if (!arg->field.field)
 				goto out_warning_field;
 			
 		}
 		/* must be a number */
-		val = pevent_read_number(pevent, data + arg->field.field->offset,
-				arg->field.field->size);
+		val = tep_read_number(pevent, data + arg->field.field->offset,
+				      arg->field.field->size);
 		break;
-	case PRINT_FLAGS:
-	case PRINT_SYMBOL:
-	case PRINT_INT_ARRAY:
-	case PRINT_HEX:
-	case PRINT_HEX_STR:
+	case TEP_PRINT_FLAGS:
+	case TEP_PRINT_SYMBOL:
+	case TEP_PRINT_INT_ARRAY:
+	case TEP_PRINT_HEX:
+	case TEP_PRINT_HEX_STR:
 		break;
-	case PRINT_TYPE:
+	case TEP_PRINT_TYPE:
 		val = eval_num_arg(data, size, event, arg->typecast.item);
 		return eval_type(val, arg, 0);
-	case PRINT_STRING:
-	case PRINT_BSTRING:
-	case PRINT_BITMASK:
+	case TEP_PRINT_STRING:
+	case TEP_PRINT_BSTRING:
+	case TEP_PRINT_BITMASK:
 		return 0;
-	case PRINT_FUNC: {
+	case TEP_PRINT_FUNC: {
 		struct trace_seq s;
 		trace_seq_init(&s);
 		val = process_defined_func(&s, data, size, event, arg);
 		trace_seq_destroy(&s);
 		return val;
 	}
-	case PRINT_OP:
+	case TEP_PRINT_OP:
 		if (strcmp(arg->op.op, "[") == 0) {
 			/*
 			 * Arrays are special, since we don't want
@@ -3592,7 +3580,7 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg
 
 			/* handle typecasts */
 			larg = arg->op.left;
-			while (larg->type == PRINT_TYPE) {
+			while (larg->type == TEP_PRINT_TYPE) {
 				if (!typearg)
 					typearg = larg;
 				larg = larg->typecast.item;
@@ -3602,8 +3590,8 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg
 			field_size = pevent->long_size;
 
 			switch (larg->type) {
-			case PRINT_DYNAMIC_ARRAY:
-				offset = pevent_read_number(pevent,
+			case TEP_PRINT_DYNAMIC_ARRAY:
+				offset = tep_read_number(pevent,
 						   data + larg->dynarray.field->offset,
 						   larg->dynarray.field->size);
 				if (larg->dynarray.field->elementsize)
@@ -3616,10 +3604,10 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg
 				offset &= 0xffff;
 				offset += right;
 				break;
-			case PRINT_FIELD:
+			case TEP_PRINT_FIELD:
 				if (!larg->field.field) {
 					larg->field.field =
-						pevent_find_any_field(event, larg->field.name);
+						tep_find_any_field(event, larg->field.name);
 					if (!larg->field.field) {
 						arg = larg;
 						goto out_warning_field;
@@ -3632,8 +3620,8 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg
 			default:
 				goto default_op; /* oops, all bets off */
 			}
-			val = pevent_read_number(pevent,
-						 data + offset, field_size);
+			val = tep_read_number(pevent,
+					      data + offset, field_size);
 			if (typearg)
 				val = eval_type(val, typearg, 1);
 			break;
@@ -3732,10 +3720,10 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg
 			goto out_warning_op;
 		}
 		break;
-	case PRINT_DYNAMIC_ARRAY_LEN:
-		offset = pevent_read_number(pevent,
-					    data + arg->dynarray.field->offset,
-					    arg->dynarray.field->size);
+	case TEP_PRINT_DYNAMIC_ARRAY_LEN:
+		offset = tep_read_number(pevent,
+					 data + arg->dynarray.field->offset,
+					 arg->dynarray.field->size);
 		/*
 		 * The total allocated length of the dynamic array is
 		 * stored in the top half of the field, and the offset
@@ -3743,11 +3731,11 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg
 		 */
 		val = (unsigned long long)(offset >> 16);
 		break;
-	case PRINT_DYNAMIC_ARRAY:
+	case TEP_PRINT_DYNAMIC_ARRAY:
 		/* Without [], we pass the address to the dynamic data */
-		offset = pevent_read_number(pevent,
-					    data + arg->dynarray.field->offset,
-					    arg->dynarray.field->size);
+		offset = tep_read_number(pevent,
+					 data + arg->dynarray.field->offset,
+					 arg->dynarray.field->size);
 		/*
 		 * The total allocated length of the dynamic array is
 		 * stored in the top half of the field, and the offset
@@ -3820,7 +3808,7 @@ static void print_str_to_seq(struct trace_seq *s, const char *format,
 		trace_seq_printf(s, format, str);
 }
 
-static void print_bitmask_to_seq(struct pevent *pevent,
+static void print_bitmask_to_seq(struct tep_handle *pevent,
 				 struct trace_seq *s, const char *format,
 				 int len_arg, const void *data, int size)
 {
@@ -3875,12 +3863,12 @@ static void print_bitmask_to_seq(struct pevent *pevent,
 }
 
 static void print_str_arg(struct trace_seq *s, void *data, int size,
-			  struct event_format *event, const char *format,
-			  int len_arg, struct print_arg *arg)
+			  struct tep_event_format *event, const char *format,
+			  int len_arg, struct tep_print_arg *arg)
 {
-	struct pevent *pevent = event->pevent;
-	struct print_flag_sym *flag;
-	struct format_field *field;
+	struct tep_handle *pevent = event->pevent;
+	struct tep_print_flag_sym *flag;
+	struct tep_format_field *field;
 	struct printk_map *printk;
 	long long val, fval;
 	unsigned long long addr;
@@ -3890,16 +3878,16 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 	int i, len;
 
 	switch (arg->type) {
-	case PRINT_NULL:
+	case TEP_PRINT_NULL:
 		/* ?? */
 		return;
-	case PRINT_ATOM:
+	case TEP_PRINT_ATOM:
 		print_str_to_seq(s, format, len_arg, arg->atom.atom);
 		return;
-	case PRINT_FIELD:
+	case TEP_PRINT_FIELD:
 		field = arg->field.field;
 		if (!field) {
-			field = pevent_find_any_field(event, arg->field.name);
+			field = tep_find_any_field(event, arg->field.name);
 			if (!field) {
 				str = arg->field.name;
 				goto out_warning_field;
@@ -3914,7 +3902,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 		 * and the size is the same as long_size, assume that it
 		 * is a pointer.
 		 */
-		if (!(field->flags & FIELD_IS_ARRAY) &&
+		if (!(field->flags & TEP_FIELD_IS_ARRAY) &&
 		    field->size == pevent->long_size) {
 
 			/* Handle heterogeneous recording and processing
@@ -3953,7 +3941,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 		print_str_to_seq(s, format, len_arg, str);
 		free(str);
 		break;
-	case PRINT_FLAGS:
+	case TEP_PRINT_FLAGS:
 		val = eval_num_arg(data, size, event, arg->flags.field);
 		print = 0;
 		for (flag = arg->flags.flags; flag; flag = flag->next) {
@@ -3976,7 +3964,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 			trace_seq_printf(s, "0x%llx", val);
 		}
 		break;
-	case PRINT_SYMBOL:
+	case TEP_PRINT_SYMBOL:
 		val = eval_num_arg(data, size, event, arg->symbol.field);
 		for (flag = arg->symbol.symbols; flag; flag = flag->next) {
 			fval = eval_flag(flag->value);
@@ -3988,11 +3976,11 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 		if (!flag)
 			trace_seq_printf(s, "0x%llx", val);
 		break;
-	case PRINT_HEX:
-	case PRINT_HEX_STR:
-		if (arg->hex.field->type == PRINT_DYNAMIC_ARRAY) {
+	case TEP_PRINT_HEX:
+	case TEP_PRINT_HEX_STR:
+		if (arg->hex.field->type == TEP_PRINT_DYNAMIC_ARRAY) {
 			unsigned long offset;
-			offset = pevent_read_number(pevent,
+			offset = tep_read_number(pevent,
 				data + arg->hex.field->dynarray.field->offset,
 				arg->hex.field->dynarray.field->size);
 			hex = data + (offset & 0xffff);
@@ -4000,7 +3988,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 			field = arg->hex.field->field.field;
 			if (!field) {
 				str = arg->hex.field->field.name;
-				field = pevent_find_any_field(event, str);
+				field = tep_find_any_field(event, str);
 				if (!field)
 					goto out_warning_field;
 				arg->hex.field->field.field = field;
@@ -4009,29 +3997,29 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 		}
 		len = eval_num_arg(data, size, event, arg->hex.size);
 		for (i = 0; i < len; i++) {
-			if (i && arg->type == PRINT_HEX)
+			if (i && arg->type == TEP_PRINT_HEX)
 				trace_seq_putc(s, ' ');
 			trace_seq_printf(s, "%02x", hex[i]);
 		}
 		break;
 
-	case PRINT_INT_ARRAY: {
+	case TEP_PRINT_INT_ARRAY: {
 		void *num;
 		int el_size;
 
-		if (arg->int_array.field->type == PRINT_DYNAMIC_ARRAY) {
+		if (arg->int_array.field->type == TEP_PRINT_DYNAMIC_ARRAY) {
 			unsigned long offset;
-			struct format_field *field =
+			struct tep_format_field *field =
 				arg->int_array.field->dynarray.field;
-			offset = pevent_read_number(pevent,
-						    data + field->offset,
-						    field->size);
+			offset = tep_read_number(pevent,
+						 data + field->offset,
+						 field->size);
 			num = data + (offset & 0xffff);
 		} else {
 			field = arg->int_array.field->field.field;
 			if (!field) {
 				str = arg->int_array.field->field.name;
-				field = pevent_find_any_field(event, str);
+				field = tep_find_any_field(event, str);
 				if (!field)
 					goto out_warning_field;
 				arg->int_array.field->field.field = field;
@@ -4063,43 +4051,43 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 		}
 		break;
 	}
-	case PRINT_TYPE:
+	case TEP_PRINT_TYPE:
 		break;
-	case PRINT_STRING: {
+	case TEP_PRINT_STRING: {
 		int str_offset;
 
 		if (arg->string.offset == -1) {
-			struct format_field *f;
+			struct tep_format_field *f;
 
-			f = pevent_find_any_field(event, arg->string.string);
+			f = tep_find_any_field(event, arg->string.string);
 			arg->string.offset = f->offset;
 		}
-		str_offset = data2host4(pevent, data + arg->string.offset);
+		str_offset = tep_data2host4(pevent, data + arg->string.offset);
 		str_offset &= 0xffff;
 		print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset);
 		break;
 	}
-	case PRINT_BSTRING:
+	case TEP_PRINT_BSTRING:
 		print_str_to_seq(s, format, len_arg, arg->string.string);
 		break;
-	case PRINT_BITMASK: {
+	case TEP_PRINT_BITMASK: {
 		int bitmask_offset;
 		int bitmask_size;
 
 		if (arg->bitmask.offset == -1) {
-			struct format_field *f;
+			struct tep_format_field *f;
 
-			f = pevent_find_any_field(event, arg->bitmask.bitmask);
+			f = tep_find_any_field(event, arg->bitmask.bitmask);
 			arg->bitmask.offset = f->offset;
 		}
-		bitmask_offset = data2host4(pevent, data + arg->bitmask.offset);
+		bitmask_offset = tep_data2host4(pevent, data + arg->bitmask.offset);
 		bitmask_size = bitmask_offset >> 16;
 		bitmask_offset &= 0xffff;
 		print_bitmask_to_seq(pevent, s, format, len_arg,
 				     data + bitmask_offset, bitmask_size);
 		break;
 	}
-	case PRINT_OP:
+	case TEP_PRINT_OP:
 		/*
 		 * The only op for string should be ? :
 		 */
@@ -4113,7 +4101,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 			print_str_arg(s, data, size, event,
 				      format, len_arg, arg->op.right->op.right);
 		break;
-	case PRINT_FUNC:
+	case TEP_PRINT_FUNC:
 		process_defined_func(s, data, size, event, arg);
 		break;
 	default:
@@ -4130,13 +4118,13 @@ out_warning_field:
 
 static unsigned long long
 process_defined_func(struct trace_seq *s, void *data, int size,
-		     struct event_format *event, struct print_arg *arg)
+		     struct tep_event_format *event, struct tep_print_arg *arg)
 {
-	struct pevent_function_handler *func_handle = arg->func.func;
-	struct pevent_func_params *param;
+	struct tep_function_handler *func_handle = arg->func.func;
+	struct func_params *param;
 	unsigned long long *args;
 	unsigned long long ret;
-	struct print_arg *farg;
+	struct tep_print_arg *farg;
 	struct trace_seq str;
 	struct save_str {
 		struct save_str *next;
@@ -4159,12 +4147,12 @@ process_defined_func(struct trace_seq *s, void *data, int size,
 
 	for (i = 0; i < func_handle->nr_args; i++) {
 		switch (param->type) {
-		case PEVENT_FUNC_ARG_INT:
-		case PEVENT_FUNC_ARG_LONG:
-		case PEVENT_FUNC_ARG_PTR:
+		case TEP_FUNC_ARG_INT:
+		case TEP_FUNC_ARG_LONG:
+		case TEP_FUNC_ARG_PTR:
 			args[i] = eval_num_arg(data, size, event, farg);
 			break;
-		case PEVENT_FUNC_ARG_STRING:
+		case TEP_FUNC_ARG_STRING:
 			trace_seq_init(&str);
 			print_str_arg(&str, data, size, event, "%s", -1, farg);
 			trace_seq_terminate(&str);
@@ -4213,9 +4201,9 @@ out_free:
 	return ret;
 }
 
-static void free_args(struct print_arg *args)
+static void free_args(struct tep_print_arg *args)
 {
-	struct print_arg *next;
+	struct tep_print_arg *next;
 
 	while (args) {
 		next = args->next;
@@ -4225,11 +4213,11 @@ static void free_args(struct print_arg *args)
 	}
 }
 
-static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struct event_format *event)
+static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, struct tep_event_format *event)
 {
-	struct pevent *pevent = event->pevent;
-	struct format_field *field, *ip_field;
-	struct print_arg *args, *arg, **next;
+	struct tep_handle *pevent = event->pevent;
+	struct tep_format_field *field, *ip_field;
+	struct tep_print_arg *args, *arg, **next;
 	unsigned long long ip, val;
 	char *ptr;
 	void *bptr;
@@ -4239,12 +4227,12 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
 	ip_field = pevent->bprint_ip_field;
 
 	if (!field) {
-		field = pevent_find_field(event, "buf");
+		field = tep_find_field(event, "buf");
 		if (!field) {
 			do_warning_event(event, "can't find buffer field for binary printk");
 			return NULL;
 		}
-		ip_field = pevent_find_field(event, "ip");
+		ip_field = tep_find_field(event, "ip");
 		if (!ip_field) {
 			do_warning_event(event, "can't find ip field for binary printk");
 			return NULL;
@@ -4253,7 +4241,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
 		pevent->bprint_ip_field = ip_field;
 	}
 
-	ip = pevent_read_number(pevent, data + ip_field->offset, ip_field->size);
+	ip = tep_read_number(pevent, data + ip_field->offset, ip_field->size);
 
 	/*
 	 * The first arg is the IP pointer.
@@ -4268,7 +4256,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
 	arg->next = NULL;
 	next = &arg->next;
 
-	arg->type = PRINT_ATOM;
+	arg->type = TEP_PRINT_ATOM;
 		
 	if (asprintf(&arg->atom.atom, "%lld", ip) < 0)
 		goto out_free;
@@ -4347,7 +4335,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
 				/* the pointers are always 4 bytes aligned */
 				bptr = (void *)(((unsigned long)bptr + 3) &
 						~3);
-				val = pevent_read_number(pevent, bptr, vsize);
+				val = tep_read_number(pevent, bptr, vsize);
 				bptr += vsize;
 				arg = alloc_arg();
 				if (!arg) {
@@ -4356,7 +4344,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
 					goto out_free;
 				}
 				arg->next = NULL;
-				arg->type = PRINT_ATOM;
+				arg->type = TEP_PRINT_ATOM;
 				if (asprintf(&arg->atom.atom, "%lld", val) < 0) {
 					free(arg);
 					goto out_free;
@@ -4380,7 +4368,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
 					goto out_free;
 				}
 				arg->next = NULL;
-				arg->type = PRINT_BSTRING;
+				arg->type = TEP_PRINT_BSTRING;
 				arg->string.string = strdup(bptr);
 				if (!arg->string.string)
 					goto out_free;
@@ -4402,18 +4390,18 @@ out_free:
 
 static char *
 get_bprint_format(void *data, int size __maybe_unused,
-		  struct event_format *event)
+		  struct tep_event_format *event)
 {
-	struct pevent *pevent = event->pevent;
+	struct tep_handle *pevent = event->pevent;
 	unsigned long long addr;
-	struct format_field *field;
+	struct tep_format_field *field;
 	struct printk_map *printk;
 	char *format;
 
 	field = pevent->bprint_fmt_field;
 
 	if (!field) {
-		field = pevent_find_field(event, "fmt");
+		field = tep_find_field(event, "fmt");
 		if (!field) {
 			do_warning_event(event, "can't find format field for binary printk");
 			return NULL;
@@ -4421,7 +4409,7 @@ get_bprint_format(void *data, int size __maybe_unused,
 		pevent->bprint_fmt_field = field;
 	}
 
-	addr = pevent_read_number(pevent, data + field->offset, field->size);
+	addr = tep_read_number(pevent, data + field->offset, field->size);
 
 	printk = find_printk(pevent, addr);
 	if (!printk) {
@@ -4437,17 +4425,17 @@ get_bprint_format(void *data, int size __maybe_unused,
 }
 
 static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size,
-			  struct event_format *event, struct print_arg *arg)
+			  struct tep_event_format *event, struct tep_print_arg *arg)
 {
 	unsigned char *buf;
 	const char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x";
 
-	if (arg->type == PRINT_FUNC) {
+	if (arg->type == TEP_PRINT_FUNC) {
 		process_defined_func(s, data, size, event, arg);
 		return;
 	}
 
-	if (arg->type != PRINT_FIELD) {
+	if (arg->type != TEP_PRINT_FIELD) {
 		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d",
 				 arg->type);
 		return;
@@ -4457,7 +4445,7 @@ static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size,
 		fmt = "%.2x%.2x%.2x%.2x%.2x%.2x";
 	if (!arg->field.field) {
 		arg->field.field =
-			pevent_find_any_field(event, arg->field.name);
+			tep_find_any_field(event, arg->field.name);
 		if (!arg->field.field) {
 			do_warning_event(event, "%s: field %s not found",
 					 __func__, arg->field.name);
@@ -4590,24 +4578,24 @@ static void print_ip6_addr(struct trace_seq *s, char i, unsigned char *buf)
  * %pISpc print an IP address based on sockaddr; p adds port.
  */
 static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
-			  void *data, int size, struct event_format *event,
-			  struct print_arg *arg)
+			  void *data, int size, struct tep_event_format *event,
+			  struct tep_print_arg *arg)
 {
 	unsigned char *buf;
 
-	if (arg->type == PRINT_FUNC) {
+	if (arg->type == TEP_PRINT_FUNC) {
 		process_defined_func(s, data, size, event, arg);
 		return 0;
 	}
 
-	if (arg->type != PRINT_FIELD) {
+	if (arg->type != TEP_PRINT_FIELD) {
 		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
 		return 0;
 	}
 
 	if (!arg->field.field) {
 		arg->field.field =
-			pevent_find_any_field(event, arg->field.name);
+			tep_find_any_field(event, arg->field.name);
 		if (!arg->field.field) {
 			do_warning("%s: field %s not found",
 				   __func__, arg->field.name);
@@ -4627,8 +4615,8 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
 }
 
 static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i,
-			  void *data, int size, struct event_format *event,
-			  struct print_arg *arg)
+			  void *data, int size, struct tep_event_format *event,
+			  struct tep_print_arg *arg)
 {
 	char have_c = 0;
 	unsigned char *buf;
@@ -4641,19 +4629,19 @@ static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i,
 		rc++;
 	}
 
-	if (arg->type == PRINT_FUNC) {
+	if (arg->type == TEP_PRINT_FUNC) {
 		process_defined_func(s, data, size, event, arg);
 		return rc;
 	}
 
-	if (arg->type != PRINT_FIELD) {
+	if (arg->type != TEP_PRINT_FIELD) {
 		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
 		return rc;
 	}
 
 	if (!arg->field.field) {
 		arg->field.field =
-			pevent_find_any_field(event, arg->field.name);
+			tep_find_any_field(event, arg->field.name);
 		if (!arg->field.field) {
 			do_warning("%s: field %s not found",
 				   __func__, arg->field.name);
@@ -4677,8 +4665,8 @@ static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i,
 }
 
 static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
-			  void *data, int size, struct event_format *event,
-			  struct print_arg *arg)
+			  void *data, int size, struct tep_event_format *event,
+			  struct tep_print_arg *arg)
 {
 	char have_c = 0, have_p = 0;
 	unsigned char *buf;
@@ -4699,19 +4687,19 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
 		}
 	}
 
-	if (arg->type == PRINT_FUNC) {
+	if (arg->type == TEP_PRINT_FUNC) {
 		process_defined_func(s, data, size, event, arg);
 		return rc;
 	}
 
-	if (arg->type != PRINT_FIELD) {
+	if (arg->type != TEP_PRINT_FIELD) {
 		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
 		return rc;
 	}
 
 	if (!arg->field.field) {
 		arg->field.field =
-			pevent_find_any_field(event, arg->field.name);
+			tep_find_any_field(event, arg->field.name);
 		if (!arg->field.field) {
 			do_warning("%s: field %s not found",
 				   __func__, arg->field.name);
@@ -4759,8 +4747,8 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
 }
 
 static int print_ip_arg(struct trace_seq *s, const char *ptr,
-			void *data, int size, struct event_format *event,
-			struct print_arg *arg)
+			void *data, int size, struct tep_event_format *event,
+			struct tep_print_arg *arg)
 {
 	char i = *ptr;  /* 'i' or 'I' */
 	char ver;
@@ -4800,23 +4788,23 @@ static int is_printable_array(char *p, unsigned int len)
 	return 1;
 }
 
-void pevent_print_field(struct trace_seq *s, void *data,
-			struct format_field *field)
+void tep_print_field(struct trace_seq *s, void *data,
+		     struct tep_format_field *field)
 {
 	unsigned long long val;
 	unsigned int offset, len, i;
-	struct pevent *pevent = field->event->pevent;
+	struct tep_handle *pevent = field->event->pevent;
 
-	if (field->flags & FIELD_IS_ARRAY) {
+	if (field->flags & TEP_FIELD_IS_ARRAY) {
 		offset = field->offset;
 		len = field->size;
-		if (field->flags & FIELD_IS_DYNAMIC) {
-			val = pevent_read_number(pevent, data + offset, len);
+		if (field->flags & TEP_FIELD_IS_DYNAMIC) {
+			val = tep_read_number(pevent, data + offset, len);
 			offset = val;
 			len = offset >> 16;
 			offset &= 0xffff;
 		}
-		if (field->flags & FIELD_IS_STRING &&
+		if (field->flags & TEP_FIELD_IS_STRING &&
 		    is_printable_array(data + offset, len)) {
 			trace_seq_printf(s, "%s", (char *)data + offset);
 		} else {
@@ -4828,21 +4816,21 @@ void pevent_print_field(struct trace_seq *s, void *data,
 						 *((unsigned char *)data + offset + i));
 			}
 			trace_seq_putc(s, ']');
-			field->flags &= ~FIELD_IS_STRING;
+			field->flags &= ~TEP_FIELD_IS_STRING;
 		}
 	} else {
-		val = pevent_read_number(pevent, data + field->offset,
-					 field->size);
-		if (field->flags & FIELD_IS_POINTER) {
+		val = tep_read_number(pevent, data + field->offset,
+				      field->size);
+		if (field->flags & TEP_FIELD_IS_POINTER) {
 			trace_seq_printf(s, "0x%llx", val);
-		} else if (field->flags & FIELD_IS_SIGNED) {
+		} else if (field->flags & TEP_FIELD_IS_SIGNED) {
 			switch (field->size) {
 			case 4:
 				/*
 				 * If field is long then print it in hex.
 				 * A long usually stores pointers.
 				 */
-				if (field->flags & FIELD_IS_LONG)
+				if (field->flags & TEP_FIELD_IS_LONG)
 					trace_seq_printf(s, "0x%x", (int)val);
 				else
 					trace_seq_printf(s, "%d", (int)val);
@@ -4857,7 +4845,7 @@ void pevent_print_field(struct trace_seq *s, void *data,
 				trace_seq_printf(s, "%lld", val);
 			}
 		} else {
-			if (field->flags & FIELD_IS_LONG)
+			if (field->flags & TEP_FIELD_IS_LONG)
 				trace_seq_printf(s, "0x%llx", val);
 			else
 				trace_seq_printf(s, "%llu", val);
@@ -4865,25 +4853,25 @@ void pevent_print_field(struct trace_seq *s, void *data,
 	}
 }
 
-void pevent_print_fields(struct trace_seq *s, void *data,
-			 int size __maybe_unused, struct event_format *event)
+void tep_print_fields(struct trace_seq *s, void *data,
+		      int size __maybe_unused, struct tep_event_format *event)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 
 	field = event->format.fields;
 	while (field) {
 		trace_seq_printf(s, " %s=", field->name);
-		pevent_print_field(s, data, field);
+		tep_print_field(s, data, field);
 		field = field->next;
 	}
 }
 
-static void pretty_print(struct trace_seq *s, void *data, int size, struct event_format *event)
+static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event_format *event)
 {
-	struct pevent *pevent = event->pevent;
-	struct print_fmt *print_fmt = &event->print_fmt;
-	struct print_arg *arg = print_fmt->args;
-	struct print_arg *args = NULL;
+	struct tep_handle *pevent = event->pevent;
+	struct tep_print_fmt *print_fmt = &event->print_fmt;
+	struct tep_print_arg *arg = print_fmt->args;
+	struct tep_print_arg *args = NULL;
 	const char *ptr = print_fmt->format;
 	unsigned long long val;
 	struct func_map *func;
@@ -4897,13 +4885,13 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 	int len;
 	int ls;
 
-	if (event->flags & EVENT_FL_FAILED) {
+	if (event->flags & TEP_EVENT_FL_FAILED) {
 		trace_seq_printf(s, "[FAILED TO PARSE]");
-		pevent_print_fields(s, data, size, event);
+		tep_print_fields(s, data, size, event);
 		return;
 	}
 
-	if (event->flags & EVENT_FL_ISBPRINT) {
+	if (event->flags & TEP_EVENT_FL_ISBPRINT) {
 		bprint_fmt = get_bprint_format(data, size, event);
 		args = make_bprint_args(bprint_fmt, data, size, event);
 		arg = args;
@@ -4958,7 +4946,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 				/* The argument is the length. */
 				if (!arg) {
 					do_warning_event(event, "no argument match");
-					event->flags |= EVENT_FL_FAILED;
+					event->flags |= TEP_EVENT_FL_FAILED;
 					goto out_failed;
 				}
 				len_arg = eval_num_arg(data, size, event, arg);
@@ -4980,7 +4968,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 				if (isalnum(ptr[1]))
 					ptr++;
 
-				if (arg->type == PRINT_BSTRING) {
+				if (arg->type == TEP_PRINT_BSTRING) {
 					trace_seq_puts(s, arg->string.string);
 					break;
 				}
@@ -5011,7 +4999,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 			case 'u':
 				if (!arg) {
 					do_warning_event(event, "no argument match");
-					event->flags |= EVENT_FL_FAILED;
+					event->flags |= TEP_EVENT_FL_FAILED;
 					goto out_failed;
 				}
 
@@ -5021,7 +5009,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 				/* should never happen */
 				if (len > 31) {
 					do_warning_event(event, "bad format!");
-					event->flags |= EVENT_FL_FAILED;
+					event->flags |= TEP_EVENT_FL_FAILED;
 					len = 31;
 				}
 
@@ -5087,13 +5075,13 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 					break;
 				default:
 					do_warning_event(event, "bad count (%d)", ls);
-					event->flags |= EVENT_FL_FAILED;
+					event->flags |= TEP_EVENT_FL_FAILED;
 				}
 				break;
 			case 's':
 				if (!arg) {
 					do_warning_event(event, "no matching argument");
-					event->flags |= EVENT_FL_FAILED;
+					event->flags |= TEP_EVENT_FL_FAILED;
 					goto out_failed;
 				}
 
@@ -5103,7 +5091,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 				/* should never happen */
 				if (len > 31) {
 					do_warning_event(event, "bad format!");
-					event->flags |= EVENT_FL_FAILED;
+					event->flags |= TEP_EVENT_FL_FAILED;
 					len = 31;
 				}
 
@@ -5128,7 +5116,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 			trace_seq_putc(s, *ptr);
 	}
 
-	if (event->flags & EVENT_FL_FAILED) {
+	if (event->flags & TEP_EVENT_FL_FAILED) {
 out_failed:
 		trace_seq_printf(s, "[FAILED TO PARSE]");
 	}
@@ -5140,7 +5128,7 @@ out_failed:
 }
 
 /**
- * pevent_data_lat_fmt - parse the data for the latency format
+ * tep_data_lat_fmt - parse the data for the latency format
  * @pevent: a handle to the pevent
  * @s: the trace_seq to write to
  * @record: the record to read from
@@ -5149,8 +5137,8 @@ out_failed:
  * need rescheduling, in hard/soft interrupt, preempt count
  * and lock depth) and places it into the trace_seq.
  */
-void pevent_data_lat_fmt(struct pevent *pevent,
-			 struct trace_seq *s, struct pevent_record *record)
+void tep_data_lat_fmt(struct tep_handle *pevent,
+		      struct trace_seq *s, struct tep_record *record)
 {
 	static int check_lock_depth = 1;
 	static int check_migrate_disable = 1;
@@ -5223,55 +5211,55 @@ void pevent_data_lat_fmt(struct pevent *pevent,
 }
 
 /**
- * pevent_data_type - parse out the given event type
+ * tep_data_type - parse out the given event type
  * @pevent: a handle to the pevent
  * @rec: the record to read from
  *
  * This returns the event id from the @rec.
  */
-int pevent_data_type(struct pevent *pevent, struct pevent_record *rec)
+int tep_data_type(struct tep_handle *pevent, struct tep_record *rec)
 {
 	return trace_parse_common_type(pevent, rec->data);
 }
 
 /**
- * pevent_data_event_from_type - find the event by a given type
+ * tep_data_event_from_type - find the event by a given type
  * @pevent: a handle to the pevent
  * @type: the type of the event.
  *
  * This returns the event form a given @type;
  */
-struct event_format *pevent_data_event_from_type(struct pevent *pevent, int type)
+struct tep_event_format *tep_data_event_from_type(struct tep_handle *pevent, int type)
 {
-	return pevent_find_event(pevent, type);
+	return tep_find_event(pevent, type);
 }
 
 /**
- * pevent_data_pid - parse the PID from record
+ * tep_data_pid - parse the PID from record
  * @pevent: a handle to the pevent
  * @rec: the record to parse
  *
  * This returns the PID from a record.
  */
-int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec)
+int tep_data_pid(struct tep_handle *pevent, struct tep_record *rec)
 {
 	return parse_common_pid(pevent, rec->data);
 }
 
 /**
- * pevent_data_preempt_count - parse the preempt count from the record
+ * tep_data_preempt_count - parse the preempt count from the record
  * @pevent: a handle to the pevent
  * @rec: the record to parse
  *
  * This returns the preempt count from a record.
  */
-int pevent_data_preempt_count(struct pevent *pevent, struct pevent_record *rec)
+int tep_data_preempt_count(struct tep_handle *pevent, struct tep_record *rec)
 {
 	return parse_common_pc(pevent, rec->data);
 }
 
 /**
- * pevent_data_flags - parse the latency flags from the record
+ * tep_data_flags - parse the latency flags from the record
  * @pevent: a handle to the pevent
  * @rec: the record to parse
  *
@@ -5279,20 +5267,20 @@ int pevent_data_preempt_count(struct pevent *pevent, struct pevent_record *rec)
  *
  *  Use trace_flag_type enum for the flags (see event-parse.h).
  */
-int pevent_data_flags(struct pevent *pevent, struct pevent_record *rec)
+int tep_data_flags(struct tep_handle *pevent, struct tep_record *rec)
 {
 	return parse_common_flags(pevent, rec->data);
 }
 
 /**
- * pevent_data_comm_from_pid - return the command line from PID
+ * tep_data_comm_from_pid - return the command line from PID
  * @pevent: a handle to the pevent
  * @pid: the PID of the task to search for
  *
  * This returns a pointer to the command line that has the given
  * @pid.
  */
-const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid)
+const char *tep_data_comm_from_pid(struct tep_handle *pevent, int pid)
 {
 	const char *comm;
 
@@ -5301,7 +5289,7 @@ const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid)
 }
 
 static struct cmdline *
-pid_from_cmdlist(struct pevent *pevent, const char *comm, struct cmdline *next)
+pid_from_cmdlist(struct tep_handle *pevent, const char *comm, struct cmdline *next)
 {
 	struct cmdline_list *cmdlist = (struct cmdline_list *)next;
 
@@ -5317,7 +5305,7 @@ pid_from_cmdlist(struct pevent *pevent, const char *comm, struct cmdline *next)
 }
 
 /**
- * pevent_data_pid_from_comm - return the pid from a given comm
+ * tep_data_pid_from_comm - return the pid from a given comm
  * @pevent: a handle to the pevent
  * @comm: the cmdline to find the pid from
  * @next: the cmdline structure to find the next comm
@@ -5329,8 +5317,8 @@ pid_from_cmdlist(struct pevent *pevent, const char *comm, struct cmdline *next)
  * next pid.
  * Also, it does a linear seach, so it may be slow.
  */
-struct cmdline *pevent_data_pid_from_comm(struct pevent *pevent, const char *comm,
-					  struct cmdline *next)
+struct cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *comm,
+				       struct cmdline *next)
 {
 	struct cmdline *cmdline;
 
@@ -5365,13 +5353,13 @@ struct cmdline *pevent_data_pid_from_comm(struct pevent *pevent, const char *com
 }
 
 /**
- * pevent_cmdline_pid - return the pid associated to a given cmdline
+ * tep_cmdline_pid - return the pid associated to a given cmdline
  * @cmdline: The cmdline structure to get the pid from
  *
  * Returns the pid for a give cmdline. If @cmdline is NULL, then
  * -1 is returned.
  */
-int pevent_cmdline_pid(struct pevent *pevent, struct cmdline *cmdline)
+int tep_cmdline_pid(struct tep_handle *pevent, struct cmdline *cmdline)
 {
 	struct cmdline_list *cmdlist = (struct cmdline_list *)cmdline;
 
@@ -5391,7 +5379,7 @@ int pevent_cmdline_pid(struct pevent *pevent, struct cmdline *cmdline)
 }
 
 /**
- * pevent_data_comm_from_pid - parse the data into the print format
+ * tep_event_info - parse the data into the print format
  * @s: the trace_seq to write to
  * @event: the handle to the event
  * @record: the record to read from
@@ -5399,16 +5387,16 @@ int pevent_cmdline_pid(struct pevent *pevent, struct cmdline *cmdline)
  * This parses the raw @data using the given @event information and
  * writes the print format into the trace_seq.
  */
-void pevent_event_info(struct trace_seq *s, struct event_format *event,
-		       struct pevent_record *record)
+void tep_event_info(struct trace_seq *s, struct tep_event_format *event,
+		    struct tep_record *record)
 {
 	int print_pretty = 1;
 
-	if (event->pevent->print_raw || (event->flags & EVENT_FL_PRINTRAW))
-		pevent_print_fields(s, record->data, record->size, event);
+	if (event->pevent->print_raw || (event->flags & TEP_EVENT_FL_PRINTRAW))
+		tep_print_fields(s, record->data, record->size, event);
 	else {
 
-		if (event->handler && !(event->flags & EVENT_FL_NOHANDLE))
+		if (event->handler && !(event->flags & TEP_EVENT_FL_NOHANDLE))
 			print_pretty = event->handler(s, record, event,
 						      event->context);
 
@@ -5433,15 +5421,15 @@ static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock)
 }
 
 /**
- * pevent_find_event_by_record - return the event from a given record
+ * tep_find_event_by_record - return the event from a given record
  * @pevent: a handle to the pevent
  * @record: The record to get the event from
  *
  * Returns the associated event for a given record, or NULL if non is
  * is found.
  */
-struct event_format *
-pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record)
+struct tep_event_format *
+tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record)
 {
 	int type;
 
@@ -5452,11 +5440,11 @@ pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record)
 
 	type = trace_parse_common_type(pevent, record->data);
 
-	return pevent_find_event(pevent, type);
+	return tep_find_event(pevent, type);
 }
 
 /**
- * pevent_print_event_task - Write the event task comm, pid and CPU
+ * tep_print_event_task - Write the event task comm, pid and CPU
  * @pevent: a handle to the pevent
  * @s: the trace_seq to write to
  * @event: the handle to the record's event
@@ -5464,9 +5452,9 @@ pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record)
  *
  * Writes the tasks comm, pid and CPU to @s.
  */
-void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s,
-			     struct event_format *event,
-			     struct pevent_record *record)
+void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s,
+			  struct tep_event_format *event,
+			  struct tep_record *record)
 {
 	void *data = record->data;
 	const char *comm;
@@ -5483,7 +5471,7 @@ void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s,
 }
 
 /**
- * pevent_print_event_time - Write the event timestamp
+ * tep_print_event_time - Write the event timestamp
  * @pevent: a handle to the pevent
  * @s: the trace_seq to write to
  * @event: the handle to the record's event
@@ -5492,10 +5480,10 @@ void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s,
  *
  * Writes the timestamp of the record into @s.
  */
-void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
-			     struct event_format *event,
-			     struct pevent_record *record,
-			     bool use_trace_clock)
+void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s,
+			  struct tep_event_format *event,
+			  struct tep_record *record,
+			  bool use_trace_clock)
 {
 	unsigned long secs;
 	unsigned long usecs;
@@ -5511,11 +5499,11 @@ void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
 	}
 
 	if (pevent->latency_format) {
-		pevent_data_lat_fmt(pevent, s, record);
+		tep_data_lat_fmt(pevent, s, record);
 	}
 
 	if (use_usec_format) {
-		if (pevent->flags & PEVENT_NSEC_OUTPUT) {
+		if (pevent->flags & TEP_NSEC_OUTPUT) {
 			usecs = nsecs;
 			p = 9;
 		} else {
@@ -5534,7 +5522,7 @@ void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
 }
 
 /**
- * pevent_print_event_data - Write the event data section
+ * tep_print_event_data - Write the event data section
  * @pevent: a handle to the pevent
  * @s: the trace_seq to write to
  * @event: the handle to the record's event
@@ -5542,9 +5530,9 @@ void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
  *
  * Writes the parsing of the record's data to @s.
  */
-void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s,
-			     struct event_format *event,
-			     struct pevent_record *record)
+void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s,
+			  struct tep_event_format *event,
+			  struct tep_record *record)
 {
 	static const char *spaces = "                    "; /* 20 spaces */
 	int len;
@@ -5556,15 +5544,15 @@ void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s,
 	if (len < 20)
 		trace_seq_printf(s, "%.*s", 20 - len, spaces);
 
-	pevent_event_info(s, event, record);
+	tep_event_info(s, event, record);
 }
 
-void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
-			struct pevent_record *record, bool use_trace_clock)
+void tep_print_event(struct tep_handle *pevent, struct trace_seq *s,
+		     struct tep_record *record, bool use_trace_clock)
 {
-	struct event_format *event;
+	struct tep_event_format *event;
 
-	event = pevent_find_event_by_record(pevent, record);
+	event = tep_find_event_by_record(pevent, record);
 	if (!event) {
 		int i;
 		int type = trace_parse_common_type(pevent, record->data);
@@ -5577,15 +5565,15 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
 		return;
 	}
 
-	pevent_print_event_task(pevent, s, event, record);
-	pevent_print_event_time(pevent, s, event, record, use_trace_clock);
-	pevent_print_event_data(pevent, s, event, record);
+	tep_print_event_task(pevent, s, event, record);
+	tep_print_event_time(pevent, s, event, record, use_trace_clock);
+	tep_print_event_data(pevent, s, event, record);
 }
 
 static int events_id_cmp(const void *a, const void *b)
 {
-	struct event_format * const * ea = a;
-	struct event_format * const * eb = b;
+	struct tep_event_format * const * ea = a;
+	struct tep_event_format * const * eb = b;
 
 	if ((*ea)->id < (*eb)->id)
 		return -1;
@@ -5598,8 +5586,8 @@ static int events_id_cmp(const void *a, const void *b)
 
 static int events_name_cmp(const void *a, const void *b)
 {
-	struct event_format * const * ea = a;
-	struct event_format * const * eb = b;
+	struct tep_event_format * const * ea = a;
+	struct tep_event_format * const * eb = b;
 	int res;
 
 	res = strcmp((*ea)->name, (*eb)->name);
@@ -5615,8 +5603,8 @@ static int events_name_cmp(const void *a, const void *b)
 
 static int events_system_cmp(const void *a, const void *b)
 {
-	struct event_format * const * ea = a;
-	struct event_format * const * eb = b;
+	struct tep_event_format * const * ea = a;
+	struct tep_event_format * const * eb = b;
 	int res;
 
 	res = strcmp((*ea)->system, (*eb)->system);
@@ -5630,9 +5618,9 @@ static int events_system_cmp(const void *a, const void *b)
 	return events_id_cmp(a, b);
 }
 
-struct event_format **pevent_list_events(struct pevent *pevent, enum event_sort_type sort_type)
+struct tep_event_format **tep_list_events(struct tep_handle *pevent, enum tep_event_sort_type sort_type)
 {
-	struct event_format **events;
+	struct tep_event_format **events;
 	int (*sort)(const void *a, const void *b);
 
 	events = pevent->sort_events;
@@ -5651,20 +5639,20 @@ struct event_format **pevent_list_events(struct pevent *pevent, enum event_sort_
 		pevent->sort_events = events;
 
 		/* the internal events are sorted by id */
-		if (sort_type == EVENT_SORT_ID) {
+		if (sort_type == TEP_EVENT_SORT_ID) {
 			pevent->last_type = sort_type;
 			return events;
 		}
 	}
 
 	switch (sort_type) {
-	case EVENT_SORT_ID:
+	case TEP_EVENT_SORT_ID:
 		sort = events_id_cmp;
 		break;
-	case EVENT_SORT_NAME:
+	case TEP_EVENT_SORT_NAME:
 		sort = events_name_cmp;
 		break;
-	case EVENT_SORT_SYSTEM:
+	case TEP_EVENT_SORT_SYSTEM:
 		sort = events_system_cmp;
 		break;
 	default:
@@ -5677,12 +5665,12 @@ struct event_format **pevent_list_events(struct pevent *pevent, enum event_sort_
 	return events;
 }
 
-static struct format_field **
+static struct tep_format_field **
 get_event_fields(const char *type, const char *name,
-		 int count, struct format_field *list)
+		 int count, struct tep_format_field *list)
 {
-	struct format_field **fields;
-	struct format_field *field;
+	struct tep_format_field **fields;
+	struct tep_format_field *field;
 	int i = 0;
 
 	fields = malloc(sizeof(*fields) * (count + 1));
@@ -5709,13 +5697,13 @@ get_event_fields(const char *type, const char *name,
 }
 
 /**
- * pevent_event_common_fields - return a list of common fields for an event
+ * tep_event_common_fields - return a list of common fields for an event
  * @event: the event to return the common fields of.
  *
  * Returns an allocated array of fields. The last item in the array is NULL.
  * The array must be freed with free().
  */
-struct format_field **pevent_event_common_fields(struct event_format *event)
+struct tep_format_field **tep_event_common_fields(struct tep_event_format *event)
 {
 	return get_event_fields("common", event->name,
 				event->format.nr_common,
@@ -5723,20 +5711,20 @@ struct format_field **pevent_event_common_fields(struct event_format *event)
 }
 
 /**
- * pevent_event_fields - return a list of event specific fields for an event
+ * tep_event_fields - return a list of event specific fields for an event
  * @event: the event to return the fields of.
  *
  * Returns an allocated array of fields. The last item in the array is NULL.
  * The array must be freed with free().
  */
-struct format_field **pevent_event_fields(struct event_format *event)
+struct tep_format_field **tep_event_fields(struct tep_event_format *event)
 {
 	return get_event_fields("event", event->name,
 				event->format.nr_fields,
 				event->format.fields);
 }
 
-static void print_fields(struct trace_seq *s, struct print_flag_sym *field)
+static void print_fields(struct trace_seq *s, struct tep_print_flag_sym *field)
 {
 	trace_seq_printf(s, "{ %s, %s }", field->value, field->str);
 	if (field->next) {
@@ -5746,22 +5734,22 @@ static void print_fields(struct trace_seq *s, struct print_flag_sym *field)
 }
 
 /* for debugging */
-static void print_args(struct print_arg *args)
+static void print_args(struct tep_print_arg *args)
 {
 	int print_paren = 1;
 	struct trace_seq s;
 
 	switch (args->type) {
-	case PRINT_NULL:
+	case TEP_PRINT_NULL:
 		printf("null");
 		break;
-	case PRINT_ATOM:
+	case TEP_PRINT_ATOM:
 		printf("%s", args->atom.atom);
 		break;
-	case PRINT_FIELD:
+	case TEP_PRINT_FIELD:
 		printf("REC->%s", args->field.name);
 		break;
-	case PRINT_FLAGS:
+	case TEP_PRINT_FLAGS:
 		printf("__print_flags(");
 		print_args(args->flags.field);
 		printf(", %s, ", args->flags.delim);
@@ -5771,7 +5759,7 @@ static void print_args(struct print_arg *args)
 		trace_seq_destroy(&s);
 		printf(")");
 		break;
-	case PRINT_SYMBOL:
+	case TEP_PRINT_SYMBOL:
 		printf("__print_symbolic(");
 		print_args(args->symbol.field);
 		printf(", ");
@@ -5781,21 +5769,21 @@ static void print_args(struct print_arg *args)
 		trace_seq_destroy(&s);
 		printf(")");
 		break;
-	case PRINT_HEX:
+	case TEP_PRINT_HEX:
 		printf("__print_hex(");
 		print_args(args->hex.field);
 		printf(", ");
 		print_args(args->hex.size);
 		printf(")");
 		break;
-	case PRINT_HEX_STR:
+	case TEP_PRINT_HEX_STR:
 		printf("__print_hex_str(");
 		print_args(args->hex.field);
 		printf(", ");
 		print_args(args->hex.size);
 		printf(")");
 		break;
-	case PRINT_INT_ARRAY:
+	case TEP_PRINT_INT_ARRAY:
 		printf("__print_array(");
 		print_args(args->int_array.field);
 		printf(", ");
@@ -5804,18 +5792,18 @@ static void print_args(struct print_arg *args)
 		print_args(args->int_array.el_size);
 		printf(")");
 		break;
-	case PRINT_STRING:
-	case PRINT_BSTRING:
+	case TEP_PRINT_STRING:
+	case TEP_PRINT_BSTRING:
 		printf("__get_str(%s)", args->string.string);
 		break;
-	case PRINT_BITMASK:
+	case TEP_PRINT_BITMASK:
 		printf("__get_bitmask(%s)", args->bitmask.bitmask);
 		break;
-	case PRINT_TYPE:
+	case TEP_PRINT_TYPE:
 		printf("(%s)", args->typecast.type);
 		print_args(args->typecast.item);
 		break;
-	case PRINT_OP:
+	case TEP_PRINT_OP:
 		if (strcmp(args->op.op, ":") == 0)
 			print_paren = 0;
 		if (print_paren)
@@ -5847,13 +5835,13 @@ static void parse_header_field(const char *field,
 	save_input_buf_ptr = input_buf_ptr;
 	save_input_buf_siz = input_buf_siz;
 
-	if (read_expected(EVENT_ITEM, "field") < 0)
+	if (read_expected(TEP_EVENT_ITEM, "field") < 0)
 		return;
-	if (read_expected(EVENT_OP, ":") < 0)
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
 		return;
 
 	/* type */
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 		goto fail;
 	free_token(token);
 
@@ -5861,42 +5849,42 @@ static void parse_header_field(const char *field,
 	 * If this is not a mandatory field, then test it first.
 	 */
 	if (mandatory) {
-		if (read_expected(EVENT_ITEM, field) < 0)
+		if (read_expected(TEP_EVENT_ITEM, field) < 0)
 			return;
 	} else {
-		if (read_expect_type(EVENT_ITEM, &token) < 0)
+		if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 			goto fail;
 		if (strcmp(token, field) != 0)
 			goto discard;
 		free_token(token);
 	}
 
-	if (read_expected(EVENT_OP, ";") < 0)
+	if (read_expected(TEP_EVENT_OP, ";") < 0)
 		return;
-	if (read_expected(EVENT_ITEM, "offset") < 0)
+	if (read_expected(TEP_EVENT_ITEM, "offset") < 0)
 		return;
-	if (read_expected(EVENT_OP, ":") < 0)
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
 		return;
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 		goto fail;
 	*offset = atoi(token);
 	free_token(token);
-	if (read_expected(EVENT_OP, ";") < 0)
+	if (read_expected(TEP_EVENT_OP, ";") < 0)
 		return;
-	if (read_expected(EVENT_ITEM, "size") < 0)
+	if (read_expected(TEP_EVENT_ITEM, "size") < 0)
 		return;
-	if (read_expected(EVENT_OP, ":") < 0)
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
 		return;
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 		goto fail;
 	*size = atoi(token);
 	free_token(token);
-	if (read_expected(EVENT_OP, ";") < 0)
+	if (read_expected(TEP_EVENT_OP, ";") < 0)
 		return;
 	type = read_token(&token);
-	if (type != EVENT_NEWLINE) {
+	if (type != TEP_EVENT_NEWLINE) {
 		/* newer versions of the kernel have a "signed" type */
-		if (type != EVENT_ITEM)
+		if (type != TEP_EVENT_ITEM)
 			goto fail;
 
 		if (strcmp(token, "signed") != 0)
@@ -5904,17 +5892,17 @@ static void parse_header_field(const char *field,
 
 		free_token(token);
 
-		if (read_expected(EVENT_OP, ":") < 0)
+		if (read_expected(TEP_EVENT_OP, ":") < 0)
 			return;
 
-		if (read_expect_type(EVENT_ITEM, &token))
+		if (read_expect_type(TEP_EVENT_ITEM, &token))
 			goto fail;
 
 		free_token(token);
-		if (read_expected(EVENT_OP, ";") < 0)
+		if (read_expected(TEP_EVENT_OP, ";") < 0)
 			return;
 
-		if (read_expect_type(EVENT_NEWLINE, &token))
+		if (read_expect_type(TEP_EVENT_NEWLINE, &token))
 			goto fail;
 	}
  fail:
@@ -5930,7 +5918,7 @@ static void parse_header_field(const char *field,
 }
 
 /**
- * pevent_parse_header_page - parse the data stored in the header page
+ * tep_parse_header_page - parse the data stored in the header page
  * @pevent: the handle to the pevent
  * @buf: the buffer storing the header page format string
  * @size: the size of @buf
@@ -5941,8 +5929,8 @@ static void parse_header_field(const char *field,
  *
  * /sys/kernel/debug/tracing/events/header_page
  */
-int pevent_parse_header_page(struct pevent *pevent, char *buf, unsigned long size,
-			     int long_size)
+int tep_parse_header_page(struct tep_handle *pevent, char *buf, unsigned long size,
+			  int long_size)
 {
 	int ignore;
 
@@ -5971,7 +5959,7 @@ int pevent_parse_header_page(struct pevent *pevent, char *buf, unsigned long siz
 	return 0;
 }
 
-static int event_matches(struct event_format *event,
+static int event_matches(struct tep_event_format *event,
 			 int id, const char *sys_name,
 			 const char *event_name)
 {
@@ -5994,7 +5982,7 @@ static void free_handler(struct event_handler *handle)
 	free(handle);
 }
 
-static int find_event_handle(struct pevent *pevent, struct event_format *event)
+static int find_event_handle(struct tep_handle *pevent, struct tep_event_format *event)
 {
 	struct event_handler *handle, **next;
 
@@ -6023,7 +6011,7 @@ static int find_event_handle(struct pevent *pevent, struct event_format *event)
 }
 
 /**
- * __pevent_parse_format - parse the event format
+ * __tep_parse_format - parse the event format
  * @buf: the buffer storing the event format string
  * @size: the size of @buf
  * @sys: the system the event belongs to
@@ -6035,36 +6023,36 @@ static int find_event_handle(struct pevent *pevent, struct event_format *event)
  *
  * /sys/kernel/debug/tracing/events/.../.../format
  */
-enum pevent_errno __pevent_parse_format(struct event_format **eventp,
-					struct pevent *pevent, const char *buf,
-					unsigned long size, const char *sys)
+enum tep_errno __tep_parse_format(struct tep_event_format **eventp,
+				  struct tep_handle *pevent, const char *buf,
+				  unsigned long size, const char *sys)
 {
-	struct event_format *event;
+	struct tep_event_format *event;
 	int ret;
 
 	init_input_buf(buf, size);
 
 	*eventp = event = alloc_event();
 	if (!event)
-		return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+		return TEP_ERRNO__MEM_ALLOC_FAILED;
 
 	event->name = event_read_name();
 	if (!event->name) {
 		/* Bad event? */
-		ret = PEVENT_ERRNO__MEM_ALLOC_FAILED;
+		ret = TEP_ERRNO__MEM_ALLOC_FAILED;
 		goto event_alloc_failed;
 	}
 
 	if (strcmp(sys, "ftrace") == 0) {
-		event->flags |= EVENT_FL_ISFTRACE;
+		event->flags |= TEP_EVENT_FL_ISFTRACE;
 
 		if (strcmp(event->name, "bprint") == 0)
-			event->flags |= EVENT_FL_ISBPRINT;
+			event->flags |= TEP_EVENT_FL_ISBPRINT;
 	}
 		
 	event->id = event_read_id();
 	if (event->id < 0) {
-		ret = PEVENT_ERRNO__READ_ID_FAILED;
+		ret = TEP_ERRNO__READ_ID_FAILED;
 		/*
 		 * This isn't an allocation error actually.
 		 * But as the ID is critical, just bail out.
@@ -6074,7 +6062,7 @@ enum pevent_errno __pevent_parse_format(struct event_format **eventp,
 
 	event->system = strdup(sys);
 	if (!event->system) {
-		ret = PEVENT_ERRNO__MEM_ALLOC_FAILED;
+		ret = TEP_ERRNO__MEM_ALLOC_FAILED;
 		goto event_alloc_failed;
 	}
 
@@ -6083,7 +6071,7 @@ enum pevent_errno __pevent_parse_format(struct event_format **eventp,
 
 	ret = event_read_format(event);
 	if (ret < 0) {
-		ret = PEVENT_ERRNO__READ_FORMAT_FAILED;
+		ret = TEP_ERRNO__READ_FORMAT_FAILED;
 		goto event_parse_failed;
 	}
 
@@ -6098,28 +6086,28 @@ enum pevent_errno __pevent_parse_format(struct event_format **eventp,
 	show_warning = 1;
 
 	if (ret < 0) {
-		ret = PEVENT_ERRNO__READ_PRINT_FAILED;
+		ret = TEP_ERRNO__READ_PRINT_FAILED;
 		goto event_parse_failed;
 	}
 
-	if (!ret && (event->flags & EVENT_FL_ISFTRACE)) {
-		struct format_field *field;
-		struct print_arg *arg, **list;
+	if (!ret && (event->flags & TEP_EVENT_FL_ISFTRACE)) {
+		struct tep_format_field *field;
+		struct tep_print_arg *arg, **list;
 
 		/* old ftrace had no args */
 		list = &event->print_fmt.args;
 		for (field = event->format.fields; field; field = field->next) {
 			arg = alloc_arg();
 			if (!arg) {
-				event->flags |= EVENT_FL_FAILED;
-				return PEVENT_ERRNO__OLD_FTRACE_ARG_FAILED;
+				event->flags |= TEP_EVENT_FL_FAILED;
+				return TEP_ERRNO__OLD_FTRACE_ARG_FAILED;
 			}
-			arg->type = PRINT_FIELD;
+			arg->type = TEP_PRINT_FIELD;
 			arg->field.name = strdup(field->name);
 			if (!arg->field.name) {
-				event->flags |= EVENT_FL_FAILED;
+				event->flags |= TEP_EVENT_FL_FAILED;
 				free_arg(arg);
-				return PEVENT_ERRNO__OLD_FTRACE_ARG_FAILED;
+				return TEP_ERRNO__OLD_FTRACE_ARG_FAILED;
 			}
 			arg->field.field = field;
 			*list = arg;
@@ -6131,7 +6119,7 @@ enum pevent_errno __pevent_parse_format(struct event_format **eventp,
 	return 0;
 
  event_parse_failed:
-	event->flags |= EVENT_FL_FAILED;
+	event->flags |= TEP_EVENT_FL_FAILED;
 	return ret;
 
  event_alloc_failed:
@@ -6142,20 +6130,20 @@ enum pevent_errno __pevent_parse_format(struct event_format **eventp,
 	return ret;
 }
 
-static enum pevent_errno
-__pevent_parse_event(struct pevent *pevent,
-		     struct event_format **eventp,
-		     const char *buf, unsigned long size,
-		     const char *sys)
+static enum tep_errno
+__parse_event(struct tep_handle *pevent,
+	      struct tep_event_format **eventp,
+	      const char *buf, unsigned long size,
+	      const char *sys)
 {
-	int ret = __pevent_parse_format(eventp, pevent, buf, size, sys);
-	struct event_format *event = *eventp;
+	int ret = __tep_parse_format(eventp, pevent, buf, size, sys);
+	struct tep_event_format *event = *eventp;
 
 	if (event == NULL)
 		return ret;
 
 	if (pevent && add_event(pevent, event)) {
-		ret = PEVENT_ERRNO__MEM_ALLOC_FAILED;
+		ret = TEP_ERRNO__MEM_ALLOC_FAILED;
 		goto event_add_failed;
 	}
 
@@ -6166,12 +6154,12 @@ __pevent_parse_event(struct pevent *pevent,
 	return 0;
 
 event_add_failed:
-	pevent_free_format(event);
+	tep_free_format(event);
 	return ret;
 }
 
 /**
- * pevent_parse_format - parse the event format
+ * tep_parse_format - parse the event format
  * @pevent: the handle to the pevent
  * @eventp: returned format
  * @buf: the buffer storing the event format string
@@ -6185,16 +6173,16 @@ event_add_failed:
  *
  * /sys/kernel/debug/tracing/events/.../.../format
  */
-enum pevent_errno pevent_parse_format(struct pevent *pevent,
-				      struct event_format **eventp,
-				      const char *buf,
-				      unsigned long size, const char *sys)
+enum tep_errno tep_parse_format(struct tep_handle *pevent,
+				struct tep_event_format **eventp,
+				const char *buf,
+				unsigned long size, const char *sys)
 {
-	return __pevent_parse_event(pevent, eventp, buf, size, sys);
+	return __parse_event(pevent, eventp, buf, size, sys);
 }
 
 /**
- * pevent_parse_event - parse the event format
+ * tep_parse_event - parse the event format
  * @pevent: the handle to the pevent
  * @buf: the buffer storing the event format string
  * @size: the size of @buf
@@ -6207,44 +6195,15 @@ enum pevent_errno pevent_parse_format(struct pevent *pevent,
  *
  * /sys/kernel/debug/tracing/events/.../.../format
  */
-enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf,
-				     unsigned long size, const char *sys)
+enum tep_errno tep_parse_event(struct tep_handle *pevent, const char *buf,
+			       unsigned long size, const char *sys)
 {
-	struct event_format *event = NULL;
-	return __pevent_parse_event(pevent, &event, buf, size, sys);
+	struct tep_event_format *event = NULL;
+	return __parse_event(pevent, &event, buf, size, sys);
 }
 
-#undef _PE
-#define _PE(code, str) str
-static const char * const pevent_error_str[] = {
-	PEVENT_ERRORS
-};
-#undef _PE
-
-int pevent_strerror(struct pevent *pevent __maybe_unused,
-		    enum pevent_errno errnum, char *buf, size_t buflen)
-{
-	int idx;
-	const char *msg;
-
-	if (errnum >= 0) {
-		str_error_r(errnum, buf, buflen);
-		return 0;
-	}
-
-	if (errnum <= __PEVENT_ERRNO__START ||
-	    errnum >= __PEVENT_ERRNO__END)
-		return -1;
-
-	idx = errnum - __PEVENT_ERRNO__START - 1;
-	msg = pevent_error_str[idx];
-	snprintf(buf, buflen, "%s", msg);
-
-	return 0;
-}
-
-int get_field_val(struct trace_seq *s, struct format_field *field,
-		  const char *name, struct pevent_record *record,
+int get_field_val(struct trace_seq *s, struct tep_format_field *field,
+		  const char *name, struct tep_record *record,
 		  unsigned long long *val, int err)
 {
 	if (!field) {
@@ -6253,7 +6212,7 @@ int get_field_val(struct trace_seq *s, struct format_field *field,
 		return -1;
 	}
 
-	if (pevent_read_number_field(field, record->data, val)) {
+	if (tep_read_number_field(field, record->data, val)) {
 		if (err)
 			trace_seq_printf(s, " %s=INVALID", name);
 		return -1;
@@ -6263,7 +6222,7 @@ int get_field_val(struct trace_seq *s, struct format_field *field,
 }
 
 /**
- * pevent_get_field_raw - return the raw pointer into the data field
+ * tep_get_field_raw - return the raw pointer into the data field
  * @s: The seq to print to on error
  * @event: the event that the field is for
  * @name: The name of the field
@@ -6276,11 +6235,11 @@ int get_field_val(struct trace_seq *s, struct format_field *field,
  *
  * On failure, it returns NULL.
  */
-void *pevent_get_field_raw(struct trace_seq *s, struct event_format *event,
-			   const char *name, struct pevent_record *record,
-			   int *len, int err)
+void *tep_get_field_raw(struct trace_seq *s, struct tep_event_format *event,
+			const char *name, struct tep_record *record,
+			int *len, int err)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 	void *data = record->data;
 	unsigned offset;
 	int dummy;
@@ -6288,7 +6247,7 @@ void *pevent_get_field_raw(struct trace_seq *s, struct event_format *event,
 	if (!event)
 		return NULL;
 
-	field = pevent_find_field(event, name);
+	field = tep_find_field(event, name);
 
 	if (!field) {
 		if (err)
@@ -6301,8 +6260,8 @@ void *pevent_get_field_raw(struct trace_seq *s, struct event_format *event,
 		len = &dummy;
 
 	offset = field->offset;
-	if (field->flags & FIELD_IS_DYNAMIC) {
-		offset = pevent_read_number(event->pevent,
+	if (field->flags & TEP_FIELD_IS_DYNAMIC) {
+		offset = tep_read_number(event->pevent,
 					    data + offset, field->size);
 		*len = offset >> 16;
 		offset &= 0xffff;
@@ -6313,7 +6272,7 @@ void *pevent_get_field_raw(struct trace_seq *s, struct event_format *event,
 }
 
 /**
- * pevent_get_field_val - find a field and return its value
+ * tep_get_field_val - find a field and return its value
  * @s: The seq to print to on error
  * @event: the event that the field is for
  * @name: The name of the field
@@ -6323,22 +6282,22 @@ void *pevent_get_field_raw(struct trace_seq *s, struct event_format *event,
  *
  * Returns 0 on success -1 on field not found.
  */
-int pevent_get_field_val(struct trace_seq *s, struct event_format *event,
-			 const char *name, struct pevent_record *record,
-			 unsigned long long *val, int err)
+int tep_get_field_val(struct trace_seq *s, struct tep_event_format *event,
+		      const char *name, struct tep_record *record,
+		      unsigned long long *val, int err)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 
 	if (!event)
 		return -1;
 
-	field = pevent_find_field(event, name);
+	field = tep_find_field(event, name);
 
 	return get_field_val(s, field, name, record, val, err);
 }
 
 /**
- * pevent_get_common_field_val - find a common field and return its value
+ * tep_get_common_field_val - find a common field and return its value
  * @s: The seq to print to on error
  * @event: the event that the field is for
  * @name: The name of the field
@@ -6348,22 +6307,22 @@ int pevent_get_field_val(struct trace_seq *s, struct event_format *event,
  *
  * Returns 0 on success -1 on field not found.
  */
-int pevent_get_common_field_val(struct trace_seq *s, struct event_format *event,
-				const char *name, struct pevent_record *record,
-				unsigned long long *val, int err)
+int tep_get_common_field_val(struct trace_seq *s, struct tep_event_format *event,
+			     const char *name, struct tep_record *record,
+			     unsigned long long *val, int err)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 
 	if (!event)
 		return -1;
 
-	field = pevent_find_common_field(event, name);
+	field = tep_find_common_field(event, name);
 
 	return get_field_val(s, field, name, record, val, err);
 }
 
 /**
- * pevent_get_any_field_val - find a any field and return its value
+ * tep_get_any_field_val - find a any field and return its value
  * @s: The seq to print to on error
  * @event: the event that the field is for
  * @name: The name of the field
@@ -6373,22 +6332,22 @@ int pevent_get_common_field_val(struct trace_seq *s, struct event_format *event,
  *
  * Returns 0 on success -1 on field not found.
  */
-int pevent_get_any_field_val(struct trace_seq *s, struct event_format *event,
-			     const char *name, struct pevent_record *record,
-			     unsigned long long *val, int err)
+int tep_get_any_field_val(struct trace_seq *s, struct tep_event_format *event,
+			  const char *name, struct tep_record *record,
+			  unsigned long long *val, int err)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 
 	if (!event)
 		return -1;
 
-	field = pevent_find_any_field(event, name);
+	field = tep_find_any_field(event, name);
 
 	return get_field_val(s, field, name, record, val, err);
 }
 
 /**
- * pevent_print_num_field - print a field and a format
+ * tep_print_num_field - print a field and a format
  * @s: The seq to print to
  * @fmt: The printf format to print the field with.
  * @event: the event that the field is for
@@ -6398,17 +6357,17 @@ int pevent_get_any_field_val(struct trace_seq *s, struct event_format *event,
  *
  * Returns: 0 on success, -1 field not found, or 1 if buffer is full.
  */
-int pevent_print_num_field(struct trace_seq *s, const char *fmt,
-			   struct event_format *event, const char *name,
-			   struct pevent_record *record, int err)
+int tep_print_num_field(struct trace_seq *s, const char *fmt,
+			struct tep_event_format *event, const char *name,
+			struct tep_record *record, int err)
 {
-	struct format_field *field = pevent_find_field(event, name);
+	struct tep_format_field *field = tep_find_field(event, name);
 	unsigned long long val;
 
 	if (!field)
 		goto failed;
 
-	if (pevent_read_number_field(field, record->data, &val))
+	if (tep_read_number_field(field, record->data, &val))
 		goto failed;
 
 	return trace_seq_printf(s, fmt, val);
@@ -6420,7 +6379,7 @@ int pevent_print_num_field(struct trace_seq *s, const char *fmt,
 }
 
 /**
- * pevent_print_func_field - print a field and a format for function pointers
+ * tep_print_func_field - print a field and a format for function pointers
  * @s: The seq to print to
  * @fmt: The printf format to print the field with.
  * @event: the event that the field is for
@@ -6430,12 +6389,12 @@ int pevent_print_num_field(struct trace_seq *s, const char *fmt,
  *
  * Returns: 0 on success, -1 field not found, or 1 if buffer is full.
  */
-int pevent_print_func_field(struct trace_seq *s, const char *fmt,
-			    struct event_format *event, const char *name,
-			    struct pevent_record *record, int err)
+int tep_print_func_field(struct trace_seq *s, const char *fmt,
+			 struct tep_event_format *event, const char *name,
+			 struct tep_record *record, int err)
 {
-	struct format_field *field = pevent_find_field(event, name);
-	struct pevent *pevent = event->pevent;
+	struct tep_format_field *field = tep_find_field(event, name);
+	struct tep_handle *pevent = event->pevent;
 	unsigned long long val;
 	struct func_map *func;
 	char tmp[128];
@@ -6443,7 +6402,7 @@ int pevent_print_func_field(struct trace_seq *s, const char *fmt,
 	if (!field)
 		goto failed;
 
-	if (pevent_read_number_field(field, record->data, &val))
+	if (tep_read_number_field(field, record->data, &val))
 		goto failed;
 
 	func = find_func(pevent, val);
@@ -6461,9 +6420,9 @@ int pevent_print_func_field(struct trace_seq *s, const char *fmt,
 	return -1;
 }
 
-static void free_func_handle(struct pevent_function_handler *func)
+static void free_func_handle(struct tep_function_handler *func)
 {
-	struct pevent_func_params *params;
+	struct func_params *params;
 
 	free(func->name);
 
@@ -6477,29 +6436,29 @@ static void free_func_handle(struct pevent_function_handler *func)
 }
 
 /**
- * pevent_register_print_function - register a helper function
+ * tep_register_print_function - register a helper function
  * @pevent: the handle to the pevent
  * @func: the function to process the helper function
  * @ret_type: the return type of the helper function
  * @name: the name of the helper function
- * @parameters: A list of enum pevent_func_arg_type
+ * @parameters: A list of enum tep_func_arg_type
  *
  * Some events may have helper functions in the print format arguments.
  * This allows a plugin to dynamically create a way to process one
  * of these functions.
  *
- * The @parameters is a variable list of pevent_func_arg_type enums that
- * must end with PEVENT_FUNC_ARG_VOID.
+ * The @parameters is a variable list of tep_func_arg_type enums that
+ * must end with TEP_FUNC_ARG_VOID.
  */
-int pevent_register_print_function(struct pevent *pevent,
-				   pevent_func_handler func,
-				   enum pevent_func_arg_type ret_type,
-				   char *name, ...)
-{
-	struct pevent_function_handler *func_handle;
-	struct pevent_func_params **next_param;
-	struct pevent_func_params *param;
-	enum pevent_func_arg_type type;
+int tep_register_print_function(struct tep_handle *pevent,
+				tep_func_handler func,
+				enum tep_func_arg_type ret_type,
+				char *name, ...)
+{
+	struct tep_function_handler *func_handle;
+	struct func_params **next_param;
+	struct func_params *param;
+	enum tep_func_arg_type type;
 	va_list ap;
 	int ret;
 
@@ -6517,7 +6476,7 @@ int pevent_register_print_function(struct pevent *pevent,
 	func_handle = calloc(1, sizeof(*func_handle));
 	if (!func_handle) {
 		do_warning("Failed to allocate function handler");
-		return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+		return TEP_ERRNO__MEM_ALLOC_FAILED;
 	}
 
 	func_handle->ret_type = ret_type;
@@ -6526,26 +6485,26 @@ int pevent_register_print_function(struct pevent *pevent,
 	if (!func_handle->name) {
 		do_warning("Failed to allocate function name");
 		free(func_handle);
-		return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+		return TEP_ERRNO__MEM_ALLOC_FAILED;
 	}
 
 	next_param = &(func_handle->params);
 	va_start(ap, name);
 	for (;;) {
-		type = va_arg(ap, enum pevent_func_arg_type);
-		if (type == PEVENT_FUNC_ARG_VOID)
+		type = va_arg(ap, enum tep_func_arg_type);
+		if (type == TEP_FUNC_ARG_VOID)
 			break;
 
-		if (type >= PEVENT_FUNC_ARG_MAX_TYPES) {
+		if (type >= TEP_FUNC_ARG_MAX_TYPES) {
 			do_warning("Invalid argument type %d", type);
-			ret = PEVENT_ERRNO__INVALID_ARG_TYPE;
+			ret = TEP_ERRNO__INVALID_ARG_TYPE;
 			goto out_free;
 		}
 
 		param = malloc(sizeof(*param));
 		if (!param) {
 			do_warning("Failed to allocate function param");
-			ret = PEVENT_ERRNO__MEM_ALLOC_FAILED;
+			ret = TEP_ERRNO__MEM_ALLOC_FAILED;
 			goto out_free;
 		}
 		param->type = type;
@@ -6569,7 +6528,7 @@ int pevent_register_print_function(struct pevent *pevent,
 }
 
 /**
- * pevent_unregister_print_function - unregister a helper function
+ * tep_unregister_print_function - unregister a helper function
  * @pevent: the handle to the pevent
  * @func: the function to process the helper function
  * @name: the name of the helper function
@@ -6578,10 +6537,10 @@ int pevent_register_print_function(struct pevent *pevent,
  *
  * Returns 0 if the handler was removed successully, -1 otherwise.
  */
-int pevent_unregister_print_function(struct pevent *pevent,
-				     pevent_func_handler func, char *name)
+int tep_unregister_print_function(struct tep_handle *pevent,
+				  tep_func_handler func, char *name)
 {
-	struct pevent_function_handler *func_handle;
+	struct tep_function_handler *func_handle;
 
 	func_handle = find_func_handler(pevent, name);
 	if (func_handle && func_handle->func == func) {
@@ -6591,15 +6550,15 @@ int pevent_unregister_print_function(struct pevent *pevent,
 	return -1;
 }
 
-static struct event_format *pevent_search_event(struct pevent *pevent, int id,
-						const char *sys_name,
-						const char *event_name)
+static struct tep_event_format *search_event(struct tep_handle *pevent, int id,
+					 const char *sys_name,
+					 const char *event_name)
 {
-	struct event_format *event;
+	struct tep_event_format *event;
 
 	if (id >= 0) {
 		/* search by id */
-		event = pevent_find_event(pevent, id);
+		event = tep_find_event(pevent, id);
 		if (!event)
 			return NULL;
 		if (event_name && (strcmp(event_name, event->name) != 0))
@@ -6607,7 +6566,7 @@ static struct event_format *pevent_search_event(struct pevent *pevent, int id,
 		if (sys_name && (strcmp(sys_name, event->system) != 0))
 			return NULL;
 	} else {
-		event = pevent_find_event_by_name(pevent, sys_name, event_name);
+		event = tep_find_event_by_name(pevent, sys_name, event_name);
 		if (!event)
 			return NULL;
 	}
@@ -6615,7 +6574,7 @@ static struct event_format *pevent_search_event(struct pevent *pevent, int id,
 }
 
 /**
- * pevent_register_event_handler - register a way to parse an event
+ * tep_register_event_handler - register a way to parse an event
  * @pevent: the handle to the pevent
  * @id: the id of the event to register
  * @sys_name: the system name the event belongs to
@@ -6631,14 +6590,14 @@ static struct event_format *pevent_search_event(struct pevent *pevent, int id,
  * If @id is >= 0, then it is used to find the event.
  * else @sys_name and @event_name are used.
  */
-int pevent_register_event_handler(struct pevent *pevent, int id,
-				  const char *sys_name, const char *event_name,
-				  pevent_event_handler_func func, void *context)
+int tep_register_event_handler(struct tep_handle *pevent, int id,
+			       const char *sys_name, const char *event_name,
+			       tep_event_handler_func func, void *context)
 {
-	struct event_format *event;
+	struct tep_event_format *event;
 	struct event_handler *handle;
 
-	event = pevent_search_event(pevent, id, sys_name, event_name);
+	event = search_event(pevent, id, sys_name, event_name);
 	if (event == NULL)
 		goto not_found;
 
@@ -6654,7 +6613,7 @@ int pevent_register_event_handler(struct pevent *pevent, int id,
 	handle = calloc(1, sizeof(*handle));
 	if (!handle) {
 		do_warning("Failed to allocate event handler");
-		return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+		return TEP_ERRNO__MEM_ALLOC_FAILED;
 	}
 
 	handle->id = id;
@@ -6669,7 +6628,7 @@ int pevent_register_event_handler(struct pevent *pevent, int id,
 		free((void *)handle->event_name);
 		free((void *)handle->sys_name);
 		free(handle);
-		return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+		return TEP_ERRNO__MEM_ALLOC_FAILED;
 	}
 
 	handle->func = func;
@@ -6682,7 +6641,7 @@ int pevent_register_event_handler(struct pevent *pevent, int id,
 
 static int handle_matches(struct event_handler *handler, int id,
 			  const char *sys_name, const char *event_name,
-			  pevent_event_handler_func func, void *context)
+			  tep_event_handler_func func, void *context)
 {
 	if (id >= 0 && id != handler->id)
 		return 0;
@@ -6700,7 +6659,7 @@ static int handle_matches(struct event_handler *handler, int id,
 }
 
 /**
- * pevent_unregister_event_handler - unregister an existing event handler
+ * tep_unregister_event_handler - unregister an existing event handler
  * @pevent: the handle to the pevent
  * @id: the id of the event to unregister
  * @sys_name: the system name the handler belongs to
@@ -6715,15 +6674,15 @@ static int handle_matches(struct event_handler *handler, int id,
  *
  * Returns 0 if handler was removed successfully, -1 if event was not found.
  */
-int pevent_unregister_event_handler(struct pevent *pevent, int id,
-				    const char *sys_name, const char *event_name,
-				    pevent_event_handler_func func, void *context)
+int tep_unregister_event_handler(struct tep_handle *pevent, int id,
+				 const char *sys_name, const char *event_name,
+				 tep_event_handler_func func, void *context)
 {
-	struct event_format *event;
+	struct tep_event_format *event;
 	struct event_handler *handle;
 	struct event_handler **next;
 
-	event = pevent_search_event(pevent, id, sys_name, event_name);
+	event = search_event(pevent, id, sys_name, event_name);
 	if (event == NULL)
 		goto not_found;
 
@@ -6754,11 +6713,11 @@ not_found:
 }
 
 /**
- * pevent_alloc - create a pevent handle
+ * tep_alloc - create a pevent handle
  */
-struct pevent *pevent_alloc(void)
+struct tep_handle *tep_alloc(void)
 {
-	struct pevent *pevent = calloc(1, sizeof(*pevent));
+	struct tep_handle *pevent = calloc(1, sizeof(*pevent));
 
 	if (pevent)
 		pevent->ref_count = 1;
@@ -6766,12 +6725,12 @@ struct pevent *pevent_alloc(void)
 	return pevent;
 }
 
-void pevent_ref(struct pevent *pevent)
+void tep_ref(struct tep_handle *pevent)
 {
 	pevent->ref_count++;
 }
 
-void pevent_free_format_field(struct format_field *field)
+void tep_free_format_field(struct tep_format_field *field)
 {
 	free(field->type);
 	if (field->alias != field->name)
@@ -6780,24 +6739,24 @@ void pevent_free_format_field(struct format_field *field)
 	free(field);
 }
 
-static void free_format_fields(struct format_field *field)
+static void free_format_fields(struct tep_format_field *field)
 {
-	struct format_field *next;
+	struct tep_format_field *next;
 
 	while (field) {
 		next = field->next;
-		pevent_free_format_field(field);
+		tep_free_format_field(field);
 		field = next;
 	}
 }
 
-static void free_formats(struct format *format)
+static void free_formats(struct tep_format *format)
 {
 	free_format_fields(format->common_fields);
 	free_format_fields(format->fields);
 }
 
-void pevent_free_format(struct event_format *event)
+void tep_free_format(struct tep_event_format *event)
 {
 	free(event->name);
 	free(event->system);
@@ -6811,15 +6770,15 @@ void pevent_free_format(struct event_format *event)
 }
 
 /**
- * pevent_free - free a pevent handle
+ * tep_free - free a pevent handle
  * @pevent: the pevent handle to free
  */
-void pevent_free(struct pevent *pevent)
+void tep_free(struct tep_handle *pevent)
 {
 	struct cmdline_list *cmdlist, *cmdnext;
 	struct func_list *funclist, *funcnext;
 	struct printk_list *printklist, *printknext;
-	struct pevent_function_handler *func_handler;
+	struct tep_function_handler *func_handler;
 	struct event_handler *handle;
 	int i;
 
@@ -6883,7 +6842,7 @@ void pevent_free(struct pevent *pevent)
 	}
 
 	for (i = 0; i < pevent->nr_events; i++)
-		pevent_free_format(pevent->events[i]);
+		tep_free_format(pevent->events[i]);
 
 	while (pevent->handlers) {
 		handle = pevent->handlers;
@@ -6899,7 +6858,7 @@ void pevent_free(struct pevent *pevent)
 	free(pevent);
 }
 
-void pevent_unref(struct pevent *pevent)
+void tep_unref(struct tep_handle *pevent)
 {
-	pevent_free(pevent);
+	tep_free(pevent);
 }
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 0c03538df74c..16bf4c890b6f 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -26,22 +26,17 @@
 #include <regex.h>
 #include <string.h>
 
+#include "trace-seq.h"
+
 #ifndef __maybe_unused
 #define __maybe_unused __attribute__((unused))
 #endif
 
-/* ----------------------- trace_seq ----------------------- */
-
-
-#ifndef TRACE_SEQ_BUF_SIZE
-#define TRACE_SEQ_BUF_SIZE 4096
-#endif
-
 #ifndef DEBUG_RECORD
 #define DEBUG_RECORD 0
 #endif
 
-struct pevent_record {
+struct tep_record {
 	unsigned long long	ts;
 	unsigned long long	offset;
 	long long		missed_events;	/* buffer dropped events before */
@@ -53,64 +48,27 @@ struct pevent_record {
 	int			locked;		/* Do not free, even if ref_count is zero */
 	void			*priv;
 #if DEBUG_RECORD
-	struct pevent_record	*prev;
-	struct pevent_record	*next;
+	struct tep_record	*prev;
+	struct tep_record	*next;
 	long			alloc_addr;
 #endif
 };
 
-enum trace_seq_fail {
-	TRACE_SEQ__GOOD,
-	TRACE_SEQ__BUFFER_POISONED,
-	TRACE_SEQ__MEM_ALLOC_FAILED,
-};
-
-/*
- * Trace sequences are used to allow a function to call several other functions
- * to create a string of data to use (up to a max of PAGE_SIZE).
- */
-
-struct trace_seq {
-	char			*buffer;
-	unsigned int		buffer_size;
-	unsigned int		len;
-	unsigned int		readpos;
-	enum trace_seq_fail	state;
-};
-
-void trace_seq_init(struct trace_seq *s);
-void trace_seq_reset(struct trace_seq *s);
-void trace_seq_destroy(struct trace_seq *s);
-
-extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
-	__attribute__ ((format (printf, 2, 3)));
-extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
-	__attribute__ ((format (printf, 2, 0)));
-
-extern int trace_seq_puts(struct trace_seq *s, const char *str);
-extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
+/* ----------------------- tep ----------------------- */
 
-extern void trace_seq_terminate(struct trace_seq *s);
+struct tep_handle;
+struct tep_event_format;
 
-extern int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp);
-extern int trace_seq_do_printf(struct trace_seq *s);
+typedef int (*tep_event_handler_func)(struct trace_seq *s,
+				      struct tep_record *record,
+				      struct tep_event_format *event,
+				      void *context);
 
+typedef int (*tep_plugin_load_func)(struct tep_handle *pevent);
+typedef int (*tep_plugin_unload_func)(struct tep_handle *pevent);
 
-/* ----------------------- pevent ----------------------- */
-
-struct pevent;
-struct event_format;
-
-typedef int (*pevent_event_handler_func)(struct trace_seq *s,
-					 struct pevent_record *record,
-					 struct event_format *event,
-					 void *context);
-
-typedef int (*pevent_plugin_load_func)(struct pevent *pevent);
-typedef int (*pevent_plugin_unload_func)(struct pevent *pevent);
-
-struct pevent_plugin_option {
-	struct pevent_plugin_option	*next;
+struct tep_plugin_option {
+	struct tep_plugin_option	*next;
 	void				*handle;
 	char				*file;
 	char				*name;
@@ -124,20 +82,20 @@ struct pevent_plugin_option {
 /*
  * Plugin hooks that can be called:
  *
- * PEVENT_PLUGIN_LOADER:  (required)
+ * TEP_PLUGIN_LOADER:  (required)
  *   The function name to initialized the plugin.
  *
- *   int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+ *   int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
  *
- * PEVENT_PLUGIN_UNLOADER:  (optional)
+ * TEP_PLUGIN_UNLOADER:  (optional)
  *   The function called just before unloading
  *
- *   int PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+ *   int TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
  *
- * PEVENT_PLUGIN_OPTIONS:  (optional)
+ * TEP_PLUGIN_OPTIONS:  (optional)
  *   Plugin options that can be set before loading
  *
- *   struct pevent_plugin_option PEVENT_PLUGIN_OPTIONS[] = {
+ *   struct tep_plugin_option TEP_PLUGIN_OPTIONS[] = {
  *	{
  *		.name = "option-name",
  *		.plugin_alias = "override-file-name", (optional)
@@ -158,34 +116,34 @@ struct pevent_plugin_option {
  *   .set will be processed. If .value is defined, then it is considered
  *   a string option and .set will be ignored.
  *
- * PEVENT_PLUGIN_ALIAS: (optional)
+ * TEP_PLUGIN_ALIAS: (optional)
  *   The name to use for finding options (uses filename if not defined)
  */
-#define PEVENT_PLUGIN_LOADER pevent_plugin_loader
-#define PEVENT_PLUGIN_UNLOADER pevent_plugin_unloader
-#define PEVENT_PLUGIN_OPTIONS pevent_plugin_options
-#define PEVENT_PLUGIN_ALIAS pevent_plugin_alias
+#define TEP_PLUGIN_LOADER tep_plugin_loader
+#define TEP_PLUGIN_UNLOADER tep_plugin_unloader
+#define TEP_PLUGIN_OPTIONS tep_plugin_options
+#define TEP_PLUGIN_ALIAS tep_plugin_alias
 #define _MAKE_STR(x)	#x
 #define MAKE_STR(x)	_MAKE_STR(x)
-#define PEVENT_PLUGIN_LOADER_NAME MAKE_STR(PEVENT_PLUGIN_LOADER)
-#define PEVENT_PLUGIN_UNLOADER_NAME MAKE_STR(PEVENT_PLUGIN_UNLOADER)
-#define PEVENT_PLUGIN_OPTIONS_NAME MAKE_STR(PEVENT_PLUGIN_OPTIONS)
-#define PEVENT_PLUGIN_ALIAS_NAME MAKE_STR(PEVENT_PLUGIN_ALIAS)
-
-enum format_flags {
-	FIELD_IS_ARRAY		= 1,
-	FIELD_IS_POINTER	= 2,
-	FIELD_IS_SIGNED		= 4,
-	FIELD_IS_STRING		= 8,
-	FIELD_IS_DYNAMIC	= 16,
-	FIELD_IS_LONG		= 32,
-	FIELD_IS_FLAG		= 64,
-	FIELD_IS_SYMBOLIC	= 128,
-};
-
-struct format_field {
-	struct format_field	*next;
-	struct event_format	*event;
+#define TEP_PLUGIN_LOADER_NAME MAKE_STR(TEP_PLUGIN_LOADER)
+#define TEP_PLUGIN_UNLOADER_NAME MAKE_STR(TEP_PLUGIN_UNLOADER)
+#define TEP_PLUGIN_OPTIONS_NAME MAKE_STR(TEP_PLUGIN_OPTIONS)
+#define TEP_PLUGIN_ALIAS_NAME MAKE_STR(TEP_PLUGIN_ALIAS)
+
+enum tep_format_flags {
+	TEP_FIELD_IS_ARRAY	= 1,
+	TEP_FIELD_IS_POINTER	= 2,
+	TEP_FIELD_IS_SIGNED	= 4,
+	TEP_FIELD_IS_STRING	= 8,
+	TEP_FIELD_IS_DYNAMIC	= 16,
+	TEP_FIELD_IS_LONG	= 32,
+	TEP_FIELD_IS_FLAG	= 64,
+	TEP_FIELD_IS_SYMBOLIC	= 128,
+};
+
+struct tep_format_field {
+	struct tep_format_field	*next;
+	struct tep_event_format	*event;
 	char			*type;
 	char			*name;
 	char			*alias;
@@ -196,190 +154,190 @@ struct format_field {
 	unsigned long		flags;
 };
 
-struct format {
+struct tep_format {
 	int			nr_common;
 	int			nr_fields;
-	struct format_field	*common_fields;
-	struct format_field	*fields;
+	struct tep_format_field	*common_fields;
+	struct tep_format_field	*fields;
 };
 
-struct print_arg_atom {
+struct tep_print_arg_atom {
 	char			*atom;
 };
 
-struct print_arg_string {
+struct tep_print_arg_string {
 	char			*string;
 	int			offset;
 };
 
-struct print_arg_bitmask {
+struct tep_print_arg_bitmask {
 	char			*bitmask;
 	int			offset;
 };
 
-struct print_arg_field {
+struct tep_print_arg_field {
 	char			*name;
-	struct format_field	*field;
+	struct tep_format_field	*field;
 };
 
-struct print_flag_sym {
-	struct print_flag_sym	*next;
-	char			*value;
-	char			*str;
+struct tep_print_flag_sym {
+	struct tep_print_flag_sym	*next;
+	char				*value;
+	char				*str;
 };
 
-struct print_arg_typecast {
+struct tep_print_arg_typecast {
 	char 			*type;
-	struct print_arg	*item;
+	struct tep_print_arg	*item;
 };
 
-struct print_arg_flags {
-	struct print_arg	*field;
-	char			*delim;
-	struct print_flag_sym	*flags;
+struct tep_print_arg_flags {
+	struct tep_print_arg		*field;
+	char				*delim;
+	struct tep_print_flag_sym	*flags;
 };
 
-struct print_arg_symbol {
-	struct print_arg	*field;
-	struct print_flag_sym	*symbols;
+struct tep_print_arg_symbol {
+	struct tep_print_arg		*field;
+	struct tep_print_flag_sym	*symbols;
 };
 
-struct print_arg_hex {
-	struct print_arg	*field;
-	struct print_arg	*size;
+struct tep_print_arg_hex {
+	struct tep_print_arg	*field;
+	struct tep_print_arg	*size;
 };
 
-struct print_arg_int_array {
-	struct print_arg	*field;
-	struct print_arg	*count;
-	struct print_arg	*el_size;
+struct tep_print_arg_int_array {
+	struct tep_print_arg	*field;
+	struct tep_print_arg	*count;
+	struct tep_print_arg	*el_size;
 };
 
-struct print_arg_dynarray {
-	struct format_field	*field;
-	struct print_arg	*index;
+struct tep_print_arg_dynarray {
+	struct tep_format_field	*field;
+	struct tep_print_arg	*index;
 };
 
-struct print_arg;
+struct tep_print_arg;
 
-struct print_arg_op {
+struct tep_print_arg_op {
 	char			*op;
 	int			prio;
-	struct print_arg	*left;
-	struct print_arg	*right;
-};
-
-struct pevent_function_handler;
-
-struct print_arg_func {
-	struct pevent_function_handler	*func;
-	struct print_arg		*args;
-};
-
-enum print_arg_type {
-	PRINT_NULL,
-	PRINT_ATOM,
-	PRINT_FIELD,
-	PRINT_FLAGS,
-	PRINT_SYMBOL,
-	PRINT_HEX,
-	PRINT_INT_ARRAY,
-	PRINT_TYPE,
-	PRINT_STRING,
-	PRINT_BSTRING,
-	PRINT_DYNAMIC_ARRAY,
-	PRINT_OP,
-	PRINT_FUNC,
-	PRINT_BITMASK,
-	PRINT_DYNAMIC_ARRAY_LEN,
-	PRINT_HEX_STR,
-};
-
-struct print_arg {
-	struct print_arg		*next;
-	enum print_arg_type		type;
+	struct tep_print_arg	*left;
+	struct tep_print_arg	*right;
+};
+
+struct tep_function_handler;
+
+struct tep_print_arg_func {
+	struct tep_function_handler	*func;
+	struct tep_print_arg		*args;
+};
+
+enum tep_print_arg_type {
+	TEP_PRINT_NULL,
+	TEP_PRINT_ATOM,
+	TEP_PRINT_FIELD,
+	TEP_PRINT_FLAGS,
+	TEP_PRINT_SYMBOL,
+	TEP_PRINT_HEX,
+	TEP_PRINT_INT_ARRAY,
+	TEP_PRINT_TYPE,
+	TEP_PRINT_STRING,
+	TEP_PRINT_BSTRING,
+	TEP_PRINT_DYNAMIC_ARRAY,
+	TEP_PRINT_OP,
+	TEP_PRINT_FUNC,
+	TEP_PRINT_BITMASK,
+	TEP_PRINT_DYNAMIC_ARRAY_LEN,
+	TEP_PRINT_HEX_STR,
+};
+
+struct tep_print_arg {
+	struct tep_print_arg		*next;
+	enum tep_print_arg_type		type;
 	union {
-		struct print_arg_atom		atom;
-		struct print_arg_field		field;
-		struct print_arg_typecast	typecast;
-		struct print_arg_flags		flags;
-		struct print_arg_symbol		symbol;
-		struct print_arg_hex		hex;
-		struct print_arg_int_array	int_array;
-		struct print_arg_func		func;
-		struct print_arg_string		string;
-		struct print_arg_bitmask	bitmask;
-		struct print_arg_op		op;
-		struct print_arg_dynarray	dynarray;
+		struct tep_print_arg_atom	atom;
+		struct tep_print_arg_field	field;
+		struct tep_print_arg_typecast	typecast;
+		struct tep_print_arg_flags	flags;
+		struct tep_print_arg_symbol	symbol;
+		struct tep_print_arg_hex	hex;
+		struct tep_print_arg_int_array	int_array;
+		struct tep_print_arg_func	func;
+		struct tep_print_arg_string	string;
+		struct tep_print_arg_bitmask	bitmask;
+		struct tep_print_arg_op		op;
+		struct tep_print_arg_dynarray	dynarray;
 	};
 };
 
-struct print_fmt {
+struct tep_print_fmt {
 	char			*format;
-	struct print_arg	*args;
+	struct tep_print_arg	*args;
 };
 
-struct event_format {
-	struct pevent		*pevent;
+struct tep_event_format {
+	struct tep_handle	*pevent;
 	char			*name;
 	int			id;
 	int			flags;
-	struct format		format;
-	struct print_fmt	print_fmt;
+	struct tep_format	format;
+	struct tep_print_fmt	print_fmt;
 	char			*system;
-	pevent_event_handler_func handler;
+	tep_event_handler_func	handler;
 	void			*context;
 };
 
 enum {
-	EVENT_FL_ISFTRACE	= 0x01,
-	EVENT_FL_ISPRINT	= 0x02,
-	EVENT_FL_ISBPRINT	= 0x04,
-	EVENT_FL_ISFUNCENT	= 0x10,
-	EVENT_FL_ISFUNCRET	= 0x20,
-	EVENT_FL_NOHANDLE	= 0x40,
-	EVENT_FL_PRINTRAW	= 0x80,
+	TEP_EVENT_FL_ISFTRACE	= 0x01,
+	TEP_EVENT_FL_ISPRINT	= 0x02,
+	TEP_EVENT_FL_ISBPRINT	= 0x04,
+	TEP_EVENT_FL_ISFUNCENT	= 0x10,
+	TEP_EVENT_FL_ISFUNCRET	= 0x20,
+	TEP_EVENT_FL_NOHANDLE	= 0x40,
+	TEP_EVENT_FL_PRINTRAW	= 0x80,
 
-	EVENT_FL_FAILED		= 0x80000000
+	TEP_EVENT_FL_FAILED	= 0x80000000
 };
 
-enum event_sort_type {
-	EVENT_SORT_ID,
-	EVENT_SORT_NAME,
-	EVENT_SORT_SYSTEM,
+enum tep_event_sort_type {
+	TEP_EVENT_SORT_ID,
+	TEP_EVENT_SORT_NAME,
+	TEP_EVENT_SORT_SYSTEM,
 };
 
-enum event_type {
-	EVENT_ERROR,
-	EVENT_NONE,
-	EVENT_SPACE,
-	EVENT_NEWLINE,
-	EVENT_OP,
-	EVENT_DELIM,
-	EVENT_ITEM,
-	EVENT_DQUOTE,
-	EVENT_SQUOTE,
+enum tep_event_type {
+	TEP_EVENT_ERROR,
+	TEP_EVENT_NONE,
+	TEP_EVENT_SPACE,
+	TEP_EVENT_NEWLINE,
+	TEP_EVENT_OP,
+	TEP_EVENT_DELIM,
+	TEP_EVENT_ITEM,
+	TEP_EVENT_DQUOTE,
+	TEP_EVENT_SQUOTE,
 };
 
-typedef unsigned long long (*pevent_func_handler)(struct trace_seq *s,
-					     unsigned long long *args);
+typedef unsigned long long (*tep_func_handler)(struct trace_seq *s,
+					       unsigned long long *args);
 
-enum pevent_func_arg_type {
-	PEVENT_FUNC_ARG_VOID,
-	PEVENT_FUNC_ARG_INT,
-	PEVENT_FUNC_ARG_LONG,
-	PEVENT_FUNC_ARG_STRING,
-	PEVENT_FUNC_ARG_PTR,
-	PEVENT_FUNC_ARG_MAX_TYPES
+enum tep_func_arg_type {
+	TEP_FUNC_ARG_VOID,
+	TEP_FUNC_ARG_INT,
+	TEP_FUNC_ARG_LONG,
+	TEP_FUNC_ARG_STRING,
+	TEP_FUNC_ARG_PTR,
+	TEP_FUNC_ARG_MAX_TYPES
 };
 
-enum pevent_flag {
-	PEVENT_NSEC_OUTPUT		= 1,	/* output in NSECS */
-	PEVENT_DISABLE_SYS_PLUGINS	= 1 << 1,
-	PEVENT_DISABLE_PLUGINS		= 1 << 2,
+enum tep_flag {
+	TEP_NSEC_OUTPUT		= 1,	/* output in NSECS */
+	TEP_DISABLE_SYS_PLUGINS	= 1 << 1,
+	TEP_DISABLE_PLUGINS	= 1 << 2,
 };
 
-#define PEVENT_ERRORS 							      \
+#define TEP_ERRORS 							      \
 	_PE(MEM_ALLOC_FAILED,	"failed to allocate memory"),		      \
 	_PE(PARSE_EVENT_FAILED,	"failed to parse event"),		      \
 	_PE(READ_ID_FAILED,	"failed to read event id"),		      \
@@ -411,10 +369,10 @@ enum pevent_flag {
 	_PE(FILTER_MISS,	"record does not match to filter")
 
 #undef _PE
-#define _PE(__code, __str) PEVENT_ERRNO__ ## __code
-enum pevent_errno {
-	PEVENT_ERRNO__SUCCESS			= 0,
-	PEVENT_ERRNO__FILTER_MATCH		= PEVENT_ERRNO__SUCCESS,
+#define _PE(__code, __str) TEP_ERRNO__ ## __code
+enum tep_errno {
+	TEP_ERRNO__SUCCESS			= 0,
+	TEP_ERRNO__FILTER_MATCH			= TEP_ERRNO__SUCCESS,
 
 	/*
 	 * Choose an arbitrary negative big number not to clash with standard
@@ -423,181 +381,50 @@ enum pevent_errno {
 	 *
 	 * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
 	 */
-	__PEVENT_ERRNO__START			= -100000,
+	__TEP_ERRNO__START			= -100000,
 
-	PEVENT_ERRORS,
+	TEP_ERRORS,
 
-	__PEVENT_ERRNO__END,
+	__TEP_ERRNO__END,
 };
 #undef _PE
 
-struct plugin_list;
+struct tep_plugin_list;
 
 #define INVALID_PLUGIN_LIST_OPTION	((char **)((unsigned long)-1))
 
-struct plugin_list *traceevent_load_plugins(struct pevent *pevent);
-void traceevent_unload_plugins(struct plugin_list *plugin_list,
-			       struct pevent *pevent);
-char **traceevent_plugin_list_options(void);
-void traceevent_plugin_free_options_list(char **list);
-int traceevent_plugin_add_options(const char *name,
-				  struct pevent_plugin_option *options);
-void traceevent_plugin_remove_options(struct pevent_plugin_option *options);
-void traceevent_print_plugins(struct trace_seq *s,
-			      const char *prefix, const char *suffix,
-			      const struct plugin_list *list);
-
-struct cmdline;
-struct cmdline_list;
-struct func_map;
-struct func_list;
-struct event_handler;
-struct func_resolver;
-
-typedef char *(pevent_func_resolver_t)(void *priv,
-				       unsigned long long *addrp, char **modp);
-
-struct pevent {
-	int ref_count;
-
-	int header_page_ts_offset;
-	int header_page_ts_size;
-	int header_page_size_offset;
-	int header_page_size_size;
-	int header_page_data_offset;
-	int header_page_data_size;
-	int header_page_overwrite;
-
-	int file_bigendian;
-	int host_bigendian;
-
-	int latency_format;
-
-	int old_format;
-
-	int cpus;
-	int long_size;
-	int page_size;
-
-	struct cmdline *cmdlines;
-	struct cmdline_list *cmdlist;
-	int cmdline_count;
-
-	struct func_map *func_map;
-	struct func_resolver *func_resolver;
-	struct func_list *funclist;
-	unsigned int func_count;
-
-	struct printk_map *printk_map;
-	struct printk_list *printklist;
-	unsigned int printk_count;
-
-
-	struct event_format **events;
-	int nr_events;
-	struct event_format **sort_events;
-	enum event_sort_type last_type;
-
-	int type_offset;
-	int type_size;
-
-	int pid_offset;
-	int pid_size;
-
- 	int pc_offset;
-	int pc_size;
-
-	int flags_offset;
-	int flags_size;
-
-	int ld_offset;
-	int ld_size;
-
-	int print_raw;
-
-	int test_filters;
-
-	int flags;
-
-	struct format_field *bprint_ip_field;
-	struct format_field *bprint_fmt_field;
-	struct format_field *bprint_buf_field;
-
-	struct event_handler *handlers;
-	struct pevent_function_handler *func_handlers;
-
-	/* cache */
-	struct event_format *last_event;
-
-	char *trace_clock;
-};
-
-static inline void pevent_set_flag(struct pevent *pevent, int flag)
-{
-	pevent->flags |= flag;
-}
-
-static inline unsigned short
-__data2host2(struct pevent *pevent, unsigned short data)
-{
-	unsigned short swap;
-
-	if (pevent->host_bigendian == pevent->file_bigendian)
-		return data;
-
-	swap = ((data & 0xffULL) << 8) |
-		((data & (0xffULL << 8)) >> 8);
-
-	return swap;
-}
-
-static inline unsigned int
-__data2host4(struct pevent *pevent, unsigned int data)
-{
-	unsigned int swap;
-
-	if (pevent->host_bigendian == pevent->file_bigendian)
-		return data;
-
-	swap = ((data & 0xffULL) << 24) |
-		((data & (0xffULL << 8)) << 8) |
-		((data & (0xffULL << 16)) >> 8) |
-		((data & (0xffULL << 24)) >> 24);
-
-	return swap;
-}
-
-static inline unsigned long long
-__data2host8(struct pevent *pevent, unsigned long long data)
-{
-	unsigned long long swap;
-
-	if (pevent->host_bigendian == pevent->file_bigendian)
-		return data;
-
-	swap = ((data & 0xffULL) << 56) |
-		((data & (0xffULL << 8)) << 40) |
-		((data & (0xffULL << 16)) << 24) |
-		((data & (0xffULL << 24)) << 8) |
-		((data & (0xffULL << 32)) >> 8) |
-		((data & (0xffULL << 40)) >> 24) |
-		((data & (0xffULL << 48)) >> 40) |
-		((data & (0xffULL << 56)) >> 56);
-
-	return swap;
-}
+struct tep_plugin_list *tep_load_plugins(struct tep_handle *pevent);
+void tep_unload_plugins(struct tep_plugin_list *plugin_list,
+			struct tep_handle *pevent);
+char **tep_plugin_list_options(void);
+void tep_plugin_free_options_list(char **list);
+int tep_plugin_add_options(const char *name,
+			   struct tep_plugin_option *options);
+void tep_plugin_remove_options(struct tep_plugin_option *options);
+void tep_print_plugins(struct trace_seq *s,
+			const char *prefix, const char *suffix,
+			const struct tep_plugin_list *list);
+
+/* tep_handle */
+typedef char *(tep_func_resolver_t)(void *priv,
+				    unsigned long long *addrp, char **modp);
+void tep_set_flag(struct tep_handle *tep, int flag);
+unsigned short __tep_data2host2(struct tep_handle *pevent, unsigned short data);
+unsigned int __tep_data2host4(struct tep_handle *pevent, unsigned int data);
+unsigned long long
+__tep_data2host8(struct tep_handle *pevent, unsigned long long data);
 
-#define data2host2(pevent, ptr)		__data2host2(pevent, *(unsigned short *)(ptr))
-#define data2host4(pevent, ptr)		__data2host4(pevent, *(unsigned int *)(ptr))
-#define data2host8(pevent, ptr)					\
+#define tep_data2host2(pevent, ptr)	__tep_data2host2(pevent, *(unsigned short *)(ptr))
+#define tep_data2host4(pevent, ptr)	__tep_data2host4(pevent, *(unsigned int *)(ptr))
+#define tep_data2host8(pevent, ptr)	\
 ({								\
 	unsigned long long __val;				\
 								\
 	memcpy(&__val, (ptr), sizeof(unsigned long long));	\
-	__data2host8(pevent, __val);				\
+	__tep_data2host8(pevent, __val);				\
 })
 
-static inline int traceevent_host_bigendian(void)
+static inline int tep_host_bigendian(void)
 {
 	unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 };
 	unsigned int val;
@@ -615,377 +442,336 @@ enum trace_flag_type {
 	TRACE_FLAG_SOFTIRQ		= 0x10,
 };
 
-int pevent_set_function_resolver(struct pevent *pevent,
-				 pevent_func_resolver_t *func, void *priv);
-void pevent_reset_function_resolver(struct pevent *pevent);
-int pevent_register_comm(struct pevent *pevent, const char *comm, int pid);
-int pevent_register_trace_clock(struct pevent *pevent, const char *trace_clock);
-int pevent_register_function(struct pevent *pevent, char *name,
-			     unsigned long long addr, char *mod);
-int pevent_register_print_string(struct pevent *pevent, const char *fmt,
-				 unsigned long long addr);
-int pevent_pid_is_registered(struct pevent *pevent, int pid);
-
-void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s,
-			     struct event_format *event,
-			     struct pevent_record *record);
-void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
-			     struct event_format *event,
-			     struct pevent_record *record,
-			     bool use_trace_clock);
-void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s,
-			     struct event_format *event,
-			     struct pevent_record *record);
-void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
-			struct pevent_record *record, bool use_trace_clock);
-
-int pevent_parse_header_page(struct pevent *pevent, char *buf, unsigned long size,
-			     int long_size);
-
-enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf,
-				     unsigned long size, const char *sys);
-enum pevent_errno pevent_parse_format(struct pevent *pevent,
-				      struct event_format **eventp,
-				      const char *buf,
-				      unsigned long size, const char *sys);
-void pevent_free_format(struct event_format *event);
-void pevent_free_format_field(struct format_field *field);
-
-void *pevent_get_field_raw(struct trace_seq *s, struct event_format *event,
-			   const char *name, struct pevent_record *record,
-			   int *len, int err);
-
-int pevent_get_field_val(struct trace_seq *s, struct event_format *event,
-			 const char *name, struct pevent_record *record,
-			 unsigned long long *val, int err);
-int pevent_get_common_field_val(struct trace_seq *s, struct event_format *event,
-				const char *name, struct pevent_record *record,
-				unsigned long long *val, int err);
-int pevent_get_any_field_val(struct trace_seq *s, struct event_format *event,
-			     const char *name, struct pevent_record *record,
+int tep_set_function_resolver(struct tep_handle *pevent,
+			      tep_func_resolver_t *func, void *priv);
+void tep_reset_function_resolver(struct tep_handle *pevent);
+int tep_register_comm(struct tep_handle *pevent, const char *comm, int pid);
+int tep_register_trace_clock(struct tep_handle *pevent, const char *trace_clock);
+int tep_register_function(struct tep_handle *pevent, char *name,
+			  unsigned long long addr, char *mod);
+int tep_register_print_string(struct tep_handle *pevent, const char *fmt,
+			      unsigned long long addr);
+int tep_pid_is_registered(struct tep_handle *pevent, int pid);
+
+void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s,
+			  struct tep_event_format *event,
+			  struct tep_record *record);
+void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s,
+			  struct tep_event_format *event,
+			  struct tep_record *record,
+			  bool use_trace_clock);
+void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s,
+			  struct tep_event_format *event,
+			  struct tep_record *record);
+void tep_print_event(struct tep_handle *pevent, struct trace_seq *s,
+		     struct tep_record *record, bool use_trace_clock);
+
+int tep_parse_header_page(struct tep_handle *pevent, char *buf, unsigned long size,
+			  int long_size);
+
+enum tep_errno tep_parse_event(struct tep_handle *pevent, const char *buf,
+			       unsigned long size, const char *sys);
+enum tep_errno tep_parse_format(struct tep_handle *pevent,
+				struct tep_event_format **eventp,
+				const char *buf,
+				unsigned long size, const char *sys);
+void tep_free_format(struct tep_event_format *event);
+void tep_free_format_field(struct tep_format_field *field);
+
+void *tep_get_field_raw(struct trace_seq *s, struct tep_event_format *event,
+			const char *name, struct tep_record *record,
+			int *len, int err);
+
+int tep_get_field_val(struct trace_seq *s, struct tep_event_format *event,
+		      const char *name, struct tep_record *record,
+		      unsigned long long *val, int err);
+int tep_get_common_field_val(struct trace_seq *s, struct tep_event_format *event,
+			     const char *name, struct tep_record *record,
 			     unsigned long long *val, int err);
-
-int pevent_print_num_field(struct trace_seq *s, const char *fmt,
-			   struct event_format *event, const char *name,
-			   struct pevent_record *record, int err);
-
-int pevent_print_func_field(struct trace_seq *s, const char *fmt,
-			   struct event_format *event, const char *name,
-			   struct pevent_record *record, int err);
-
-int pevent_register_event_handler(struct pevent *pevent, int id,
-				  const char *sys_name, const char *event_name,
-				  pevent_event_handler_func func, void *context);
-int pevent_unregister_event_handler(struct pevent *pevent, int id,
-				    const char *sys_name, const char *event_name,
-				    pevent_event_handler_func func, void *context);
-int pevent_register_print_function(struct pevent *pevent,
-				   pevent_func_handler func,
-				   enum pevent_func_arg_type ret_type,
-				   char *name, ...);
-int pevent_unregister_print_function(struct pevent *pevent,
-				     pevent_func_handler func, char *name);
-
-struct format_field *pevent_find_common_field(struct event_format *event, const char *name);
-struct format_field *pevent_find_field(struct event_format *event, const char *name);
-struct format_field *pevent_find_any_field(struct event_format *event, const char *name);
-
-const char *pevent_find_function(struct pevent *pevent, unsigned long long addr);
+int tep_get_any_field_val(struct trace_seq *s, struct tep_event_format *event,
+			  const char *name, struct tep_record *record,
+			  unsigned long long *val, int err);
+
+int tep_print_num_field(struct trace_seq *s, const char *fmt,
+			struct tep_event_format *event, const char *name,
+			struct tep_record *record, int err);
+
+int tep_print_func_field(struct trace_seq *s, const char *fmt,
+			 struct tep_event_format *event, const char *name,
+			 struct tep_record *record, int err);
+
+int tep_register_event_handler(struct tep_handle *pevent, int id,
+			       const char *sys_name, const char *event_name,
+			       tep_event_handler_func func, void *context);
+int tep_unregister_event_handler(struct tep_handle *pevent, int id,
+				 const char *sys_name, const char *event_name,
+				 tep_event_handler_func func, void *context);
+int tep_register_print_function(struct tep_handle *pevent,
+				tep_func_handler func,
+				enum tep_func_arg_type ret_type,
+				char *name, ...);
+int tep_unregister_print_function(struct tep_handle *pevent,
+				  tep_func_handler func, char *name);
+
+struct tep_format_field *tep_find_common_field(struct tep_event_format *event, const char *name);
+struct tep_format_field *tep_find_field(struct tep_event_format *event, const char *name);
+struct tep_format_field *tep_find_any_field(struct tep_event_format *event, const char *name);
+
+const char *tep_find_function(struct tep_handle *pevent, unsigned long long addr);
 unsigned long long
-pevent_find_function_address(struct pevent *pevent, unsigned long long addr);
-unsigned long long pevent_read_number(struct pevent *pevent, const void *ptr, int size);
-int pevent_read_number_field(struct format_field *field, const void *data,
-			     unsigned long long *value);
-
-struct event_format *pevent_find_event(struct pevent *pevent, int id);
-
-struct event_format *
-pevent_find_event_by_name(struct pevent *pevent, const char *sys, const char *name);
-
-struct event_format *
-pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record);
-
-void pevent_data_lat_fmt(struct pevent *pevent,
-			 struct trace_seq *s, struct pevent_record *record);
-int pevent_data_type(struct pevent *pevent, struct pevent_record *rec);
-struct event_format *pevent_data_event_from_type(struct pevent *pevent, int type);
-int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec);
-int pevent_data_preempt_count(struct pevent *pevent, struct pevent_record *rec);
-int pevent_data_flags(struct pevent *pevent, struct pevent_record *rec);
-const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid);
+tep_find_function_address(struct tep_handle *pevent, unsigned long long addr);
+unsigned long long tep_read_number(struct tep_handle *pevent, const void *ptr, int size);
+int tep_read_number_field(struct tep_format_field *field, const void *data,
+			  unsigned long long *value);
+
+struct tep_event_format *tep_get_first_event(struct tep_handle *tep);
+int tep_get_events_count(struct tep_handle *tep);
+struct tep_event_format *tep_find_event(struct tep_handle *pevent, int id);
+
+struct tep_event_format *
+tep_find_event_by_name(struct tep_handle *pevent, const char *sys, const char *name);
+struct tep_event_format *
+tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record);
+
+void tep_data_lat_fmt(struct tep_handle *pevent,
+		      struct trace_seq *s, struct tep_record *record);
+int tep_data_type(struct tep_handle *pevent, struct tep_record *rec);
+struct tep_event_format *tep_data_event_from_type(struct tep_handle *pevent, int type);
+int tep_data_pid(struct tep_handle *pevent, struct tep_record *rec);
+int tep_data_preempt_count(struct tep_handle *pevent, struct tep_record *rec);
+int tep_data_flags(struct tep_handle *pevent, struct tep_record *rec);
+const char *tep_data_comm_from_pid(struct tep_handle *pevent, int pid);
 struct cmdline;
-struct cmdline *pevent_data_pid_from_comm(struct pevent *pevent, const char *comm,
-					  struct cmdline *next);
-int pevent_cmdline_pid(struct pevent *pevent, struct cmdline *cmdline);
-
-void pevent_print_field(struct trace_seq *s, void *data,
-			struct format_field *field);
-void pevent_print_fields(struct trace_seq *s, void *data,
-			 int size __maybe_unused, struct event_format *event);
-void pevent_event_info(struct trace_seq *s, struct event_format *event,
-		       struct pevent_record *record);
-int pevent_strerror(struct pevent *pevent, enum pevent_errno errnum,
+struct cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *comm,
+				       struct cmdline *next);
+int tep_cmdline_pid(struct tep_handle *pevent, struct cmdline *cmdline);
+
+void tep_print_field(struct trace_seq *s, void *data,
+		     struct tep_format_field *field);
+void tep_print_fields(struct trace_seq *s, void *data,
+		      int size __maybe_unused, struct tep_event_format *event);
+void tep_event_info(struct trace_seq *s, struct tep_event_format *event,
+		       struct tep_record *record);
+int tep_strerror(struct tep_handle *pevent, enum tep_errno errnum,
 		    char *buf, size_t buflen);
 
-struct event_format **pevent_list_events(struct pevent *pevent, enum event_sort_type);
-struct format_field **pevent_event_common_fields(struct event_format *event);
-struct format_field **pevent_event_fields(struct event_format *event);
-
-static inline int pevent_get_cpus(struct pevent *pevent)
-{
-	return pevent->cpus;
-}
-
-static inline void pevent_set_cpus(struct pevent *pevent, int cpus)
-{
-	pevent->cpus = cpus;
-}
-
-static inline int pevent_get_long_size(struct pevent *pevent)
-{
-	return pevent->long_size;
-}
-
-static inline void pevent_set_long_size(struct pevent *pevent, int long_size)
-{
-	pevent->long_size = long_size;
-}
-
-static inline int pevent_get_page_size(struct pevent *pevent)
-{
-	return pevent->page_size;
-}
-
-static inline void pevent_set_page_size(struct pevent *pevent, int _page_size)
-{
-	pevent->page_size = _page_size;
-}
-
-static inline int pevent_is_file_bigendian(struct pevent *pevent)
-{
-	return pevent->file_bigendian;
-}
-
-static inline void pevent_set_file_bigendian(struct pevent *pevent, int endian)
-{
-	pevent->file_bigendian = endian;
-}
-
-static inline int pevent_is_host_bigendian(struct pevent *pevent)
-{
-	return pevent->host_bigendian;
-}
-
-static inline void pevent_set_host_bigendian(struct pevent *pevent, int endian)
-{
-	pevent->host_bigendian = endian;
-}
-
-static inline int pevent_is_latency_format(struct pevent *pevent)
-{
-	return pevent->latency_format;
-}
-
-static inline void pevent_set_latency_format(struct pevent *pevent, int lat)
-{
-	pevent->latency_format = lat;
-}
-
-struct pevent *pevent_alloc(void);
-void pevent_free(struct pevent *pevent);
-void pevent_ref(struct pevent *pevent);
-void pevent_unref(struct pevent *pevent);
+struct tep_event_format **tep_list_events(struct tep_handle *pevent, enum tep_event_sort_type);
+struct tep_format_field **tep_event_common_fields(struct tep_event_format *event);
+struct tep_format_field **tep_event_fields(struct tep_event_format *event);
+
+enum tep_endian {
+        TEP_LITTLE_ENDIAN = 0,
+        TEP_BIG_ENDIAN
+};
+int tep_get_cpus(struct tep_handle *pevent);
+void tep_set_cpus(struct tep_handle *pevent, int cpus);
+int tep_get_long_size(struct tep_handle *pevent);
+void tep_set_long_size(struct tep_handle *pevent, int long_size);
+int tep_get_page_size(struct tep_handle *pevent);
+void tep_set_page_size(struct tep_handle *pevent, int _page_size);
+int tep_is_file_bigendian(struct tep_handle *pevent);
+void tep_set_file_bigendian(struct tep_handle *pevent, enum tep_endian endian);
+int tep_is_host_bigendian(struct tep_handle *pevent);
+void tep_set_host_bigendian(struct tep_handle *pevent, enum tep_endian endian);
+int tep_is_latency_format(struct tep_handle *pevent);
+void tep_set_latency_format(struct tep_handle *pevent, int lat);
+int tep_get_header_page_size(struct tep_handle *pevent);
+
+struct tep_handle *tep_alloc(void);
+void tep_free(struct tep_handle *pevent);
+void tep_ref(struct tep_handle *pevent);
+void tep_unref(struct tep_handle *pevent);
 
 /* access to the internal parser */
-void pevent_buffer_init(const char *buf, unsigned long long size);
-enum event_type pevent_read_token(char **tok);
-void pevent_free_token(char *token);
-int pevent_peek_char(void);
-const char *pevent_get_input_buf(void);
-unsigned long long pevent_get_input_buf_ptr(void);
+void tep_buffer_init(const char *buf, unsigned long long size);
+enum tep_event_type tep_read_token(char **tok);
+void tep_free_token(char *token);
+int tep_peek_char(void);
+const char *tep_get_input_buf(void);
+unsigned long long tep_get_input_buf_ptr(void);
 
 /* for debugging */
-void pevent_print_funcs(struct pevent *pevent);
-void pevent_print_printk(struct pevent *pevent);
+void tep_print_funcs(struct tep_handle *pevent);
+void tep_print_printk(struct tep_handle *pevent);
 
 /* ----------------------- filtering ----------------------- */
 
-enum filter_boolean_type {
-	FILTER_FALSE,
-	FILTER_TRUE,
+enum tep_filter_boolean_type {
+	TEP_FILTER_FALSE,
+	TEP_FILTER_TRUE,
 };
 
-enum filter_op_type {
-	FILTER_OP_AND = 1,
-	FILTER_OP_OR,
-	FILTER_OP_NOT,
+enum tep_filter_op_type {
+	TEP_FILTER_OP_AND = 1,
+	TEP_FILTER_OP_OR,
+	TEP_FILTER_OP_NOT,
 };
 
-enum filter_cmp_type {
-	FILTER_CMP_NONE,
-	FILTER_CMP_EQ,
-	FILTER_CMP_NE,
-	FILTER_CMP_GT,
-	FILTER_CMP_LT,
-	FILTER_CMP_GE,
-	FILTER_CMP_LE,
-	FILTER_CMP_MATCH,
-	FILTER_CMP_NOT_MATCH,
-	FILTER_CMP_REGEX,
-	FILTER_CMP_NOT_REGEX,
+enum tep_filter_cmp_type {
+	TEP_FILTER_CMP_NONE,
+	TEP_FILTER_CMP_EQ,
+	TEP_FILTER_CMP_NE,
+	TEP_FILTER_CMP_GT,
+	TEP_FILTER_CMP_LT,
+	TEP_FILTER_CMP_GE,
+	TEP_FILTER_CMP_LE,
+	TEP_FILTER_CMP_MATCH,
+	TEP_FILTER_CMP_NOT_MATCH,
+	TEP_FILTER_CMP_REGEX,
+	TEP_FILTER_CMP_NOT_REGEX,
 };
 
-enum filter_exp_type {
-	FILTER_EXP_NONE,
-	FILTER_EXP_ADD,
-	FILTER_EXP_SUB,
-	FILTER_EXP_MUL,
-	FILTER_EXP_DIV,
-	FILTER_EXP_MOD,
-	FILTER_EXP_RSHIFT,
-	FILTER_EXP_LSHIFT,
-	FILTER_EXP_AND,
-	FILTER_EXP_OR,
-	FILTER_EXP_XOR,
-	FILTER_EXP_NOT,
+enum tep_filter_exp_type {
+	TEP_FILTER_EXP_NONE,
+	TEP_FILTER_EXP_ADD,
+	TEP_FILTER_EXP_SUB,
+	TEP_FILTER_EXP_MUL,
+	TEP_FILTER_EXP_DIV,
+	TEP_FILTER_EXP_MOD,
+	TEP_FILTER_EXP_RSHIFT,
+	TEP_FILTER_EXP_LSHIFT,
+	TEP_FILTER_EXP_AND,
+	TEP_FILTER_EXP_OR,
+	TEP_FILTER_EXP_XOR,
+	TEP_FILTER_EXP_NOT,
 };
 
-enum filter_arg_type {
-	FILTER_ARG_NONE,
-	FILTER_ARG_BOOLEAN,
-	FILTER_ARG_VALUE,
-	FILTER_ARG_FIELD,
-	FILTER_ARG_EXP,
-	FILTER_ARG_OP,
-	FILTER_ARG_NUM,
-	FILTER_ARG_STR,
+enum tep_filter_arg_type {
+	TEP_FILTER_ARG_NONE,
+	TEP_FILTER_ARG_BOOLEAN,
+	TEP_FILTER_ARG_VALUE,
+	TEP_FILTER_ARG_FIELD,
+	TEP_FILTER_ARG_EXP,
+	TEP_FILTER_ARG_OP,
+	TEP_FILTER_ARG_NUM,
+	TEP_FILTER_ARG_STR,
 };
 
-enum filter_value_type {
-	FILTER_NUMBER,
-	FILTER_STRING,
-	FILTER_CHAR
+enum tep_filter_value_type {
+	TEP_FILTER_NUMBER,
+	TEP_FILTER_STRING,
+	TEP_FILTER_CHAR
 };
 
-struct fliter_arg;
+struct tep_filter_arg;
 
-struct filter_arg_boolean {
-	enum filter_boolean_type	value;
+struct tep_filter_arg_boolean {
+	enum tep_filter_boolean_type	value;
 };
 
-struct filter_arg_field {
-	struct format_field	*field;
+struct tep_filter_arg_field {
+	struct tep_format_field		*field;
 };
 
-struct filter_arg_value {
-	enum filter_value_type	type;
+struct tep_filter_arg_value {
+	enum tep_filter_value_type	type;
 	union {
 		char			*str;
 		unsigned long long	val;
 	};
 };
 
-struct filter_arg_op {
-	enum filter_op_type	type;
-	struct filter_arg	*left;
-	struct filter_arg	*right;
+struct tep_filter_arg_op {
+	enum tep_filter_op_type		type;
+	struct tep_filter_arg		*left;
+	struct tep_filter_arg		*right;
 };
 
-struct filter_arg_exp {
-	enum filter_exp_type	type;
-	struct filter_arg	*left;
-	struct filter_arg	*right;
+struct tep_filter_arg_exp {
+	enum tep_filter_exp_type	type;
+	struct tep_filter_arg		*left;
+	struct tep_filter_arg		*right;
 };
 
-struct filter_arg_num {
-	enum filter_cmp_type	type;
-	struct filter_arg	*left;
-	struct filter_arg	*right;
+struct tep_filter_arg_num {
+	enum tep_filter_cmp_type	type;
+	struct tep_filter_arg		*left;
+	struct tep_filter_arg		*right;
 };
 
-struct filter_arg_str {
-	enum filter_cmp_type	type;
-	struct format_field	*field;
-	char			*val;
-	char			*buffer;
-	regex_t			reg;
+struct tep_filter_arg_str {
+	enum tep_filter_cmp_type	type;
+	struct tep_format_field		*field;
+	char				*val;
+	char				*buffer;
+	regex_t				reg;
 };
 
-struct filter_arg {
-	enum filter_arg_type	type;
+struct tep_filter_arg {
+	enum tep_filter_arg_type		type;
 	union {
-		struct filter_arg_boolean	boolean;
-		struct filter_arg_field		field;
-		struct filter_arg_value		value;
-		struct filter_arg_op		op;
-		struct filter_arg_exp		exp;
-		struct filter_arg_num		num;
-		struct filter_arg_str		str;
+		struct tep_filter_arg_boolean	boolean;
+		struct tep_filter_arg_field	field;
+		struct tep_filter_arg_value	value;
+		struct tep_filter_arg_op	op;
+		struct tep_filter_arg_exp	exp;
+		struct tep_filter_arg_num	num;
+		struct tep_filter_arg_str	str;
 	};
 };
 
-struct filter_type {
+struct tep_filter_type {
 	int			event_id;
-	struct event_format	*event;
-	struct filter_arg	*filter;
+	struct tep_event_format	*event;
+	struct tep_filter_arg	*filter;
 };
 
-#define PEVENT_FILTER_ERROR_BUFSZ  1024
+#define TEP_FILTER_ERROR_BUFSZ  1024
 
-struct event_filter {
-	struct pevent		*pevent;
+struct tep_event_filter {
+	struct tep_handle	*pevent;
 	int			filters;
-	struct filter_type	*event_filters;
-	char			error_buffer[PEVENT_FILTER_ERROR_BUFSZ];
+	struct tep_filter_type	*event_filters;
+	char			error_buffer[TEP_FILTER_ERROR_BUFSZ];
 };
 
-struct event_filter *pevent_filter_alloc(struct pevent *pevent);
+struct tep_event_filter *tep_filter_alloc(struct tep_handle *pevent);
 
 /* for backward compatibility */
-#define FILTER_NONE		PEVENT_ERRNO__NO_FILTER
-#define FILTER_NOEXIST		PEVENT_ERRNO__FILTER_NOT_FOUND
-#define FILTER_MISS		PEVENT_ERRNO__FILTER_MISS
-#define FILTER_MATCH		PEVENT_ERRNO__FILTER_MATCH
+#define FILTER_NONE		TEP_ERRNO__NO_FILTER
+#define FILTER_NOEXIST		TEP_ERRNO__FILTER_NOT_FOUND
+#define FILTER_MISS		TEP_ERRNO__FILTER_MISS
+#define FILTER_MATCH		TEP_ERRNO__FILTER_MATCH
 
-enum filter_trivial_type {
-	FILTER_TRIVIAL_FALSE,
-	FILTER_TRIVIAL_TRUE,
-	FILTER_TRIVIAL_BOTH,
+enum tep_filter_trivial_type {
+	TEP_FILTER_TRIVIAL_FALSE,
+	TEP_FILTER_TRIVIAL_TRUE,
+	TEP_FILTER_TRIVIAL_BOTH,
 };
 
-enum pevent_errno pevent_filter_add_filter_str(struct event_filter *filter,
-					       const char *filter_str);
+enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter,
+					 const char *filter_str);
 
-enum pevent_errno pevent_filter_match(struct event_filter *filter,
-				      struct pevent_record *record);
+enum tep_errno tep_filter_match(struct tep_event_filter *filter,
+				struct tep_record *record);
 
-int pevent_filter_strerror(struct event_filter *filter, enum pevent_errno err,
-			   char *buf, size_t buflen);
+int tep_filter_strerror(struct tep_event_filter *filter, enum tep_errno err,
+			char *buf, size_t buflen);
 
-int pevent_event_filtered(struct event_filter *filter,
-			  int event_id);
+int tep_event_filtered(struct tep_event_filter *filter,
+		       int event_id);
 
-void pevent_filter_reset(struct event_filter *filter);
+void tep_filter_reset(struct tep_event_filter *filter);
 
-int pevent_filter_clear_trivial(struct event_filter *filter,
-				 enum filter_trivial_type type);
+int tep_filter_clear_trivial(struct tep_event_filter *filter,
+			     enum tep_filter_trivial_type type);
 
-void pevent_filter_free(struct event_filter *filter);
+void tep_filter_free(struct tep_event_filter *filter);
 
-char *pevent_filter_make_string(struct event_filter *filter, int event_id);
+char *tep_filter_make_string(struct tep_event_filter *filter, int event_id);
 
-int pevent_filter_remove_event(struct event_filter *filter,
-			       int event_id);
+int tep_filter_remove_event(struct tep_event_filter *filter,
+			    int event_id);
 
-int pevent_filter_event_has_trivial(struct event_filter *filter,
-				    int event_id,
-				    enum filter_trivial_type type);
+int tep_filter_event_has_trivial(struct tep_event_filter *filter,
+				 int event_id,
+				 enum tep_filter_trivial_type type);
 
-int pevent_filter_copy(struct event_filter *dest, struct event_filter *source);
+int tep_filter_copy(struct tep_event_filter *dest, struct tep_event_filter *source);
 
-int pevent_update_trivial(struct event_filter *dest, struct event_filter *source,
-			  enum filter_trivial_type type);
+int tep_update_trivial(struct tep_event_filter *dest, struct tep_event_filter *source,
+			enum tep_filter_trivial_type type);
 
-int pevent_filter_compare(struct event_filter *filter1, struct event_filter *filter2);
+int tep_filter_compare(struct tep_event_filter *filter1, struct tep_event_filter *filter2);
 
 #endif /* _PARSE_EVENTS_H */
diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c
index d542cb60ca1a..e74f16c88398 100644
--- a/tools/lib/traceevent/event-plugin.c
+++ b/tools/lib/traceevent/event-plugin.c
@@ -1,21 +1,7 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
  *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 
 #include <ctype.h>
@@ -28,13 +14,15 @@
 #include <unistd.h>
 #include <dirent.h>
 #include "event-parse.h"
+#include "event-parse-local.h"
 #include "event-utils.h"
+#include "trace-seq.h"
 
 #define LOCAL_PLUGIN_DIR ".traceevent/plugins"
 
 static struct registered_plugin_options {
 	struct registered_plugin_options	*next;
-	struct pevent_plugin_option		*options;
+	struct tep_plugin_option		*options;
 } *registered_options;
 
 static struct trace_plugin_options {
@@ -44,8 +32,8 @@ static struct trace_plugin_options {
 	char				*value;
 } *trace_plugin_options;
 
-struct plugin_list {
-	struct plugin_list	*next;
+struct tep_plugin_list {
+	struct tep_plugin_list	*next;
 	char			*name;
 	void			*handle;
 };
@@ -58,7 +46,7 @@ static void lower_case(char *str)
 		*str = tolower(*str);
 }
 
-static int update_option_value(struct pevent_plugin_option *op, const char *val)
+static int update_option_value(struct tep_plugin_option *op, const char *val)
 {
 	char *op_val;
 
@@ -97,7 +85,7 @@ static int update_option_value(struct pevent_plugin_option *op, const char *val)
 }
 
 /**
- * traceevent_plugin_list_options - get list of plugin options
+ * tep_plugin_list_options - get list of plugin options
  *
  * Returns an array of char strings that list the currently registered
  * plugin options in the format of <plugin>:<option>. This list can be
@@ -106,12 +94,12 @@ static int update_option_value(struct pevent_plugin_option *op, const char *val)
  * Returns NULL if there's no options registered. On error it returns
  * INVALID_PLUGIN_LIST_OPTION
  *
- * Must be freed with traceevent_plugin_free_options_list().
+ * Must be freed with tep_plugin_free_options_list().
  */
-char **traceevent_plugin_list_options(void)
+char **tep_plugin_list_options(void)
 {
 	struct registered_plugin_options *reg;
-	struct pevent_plugin_option *op;
+	struct tep_plugin_option *op;
 	char **list = NULL;
 	char *name;
 	int count = 0;
@@ -146,7 +134,7 @@ char **traceevent_plugin_list_options(void)
 	return INVALID_PLUGIN_LIST_OPTION;
 }
 
-void traceevent_plugin_free_options_list(char **list)
+void tep_plugin_free_options_list(char **list)
 {
 	int i;
 
@@ -163,7 +151,7 @@ void traceevent_plugin_free_options_list(char **list)
 }
 
 static int
-update_option(const char *file, struct pevent_plugin_option *option)
+update_option(const char *file, struct tep_plugin_option *option)
 {
 	struct trace_plugin_options *op;
 	char *plugin;
@@ -215,14 +203,14 @@ update_option(const char *file, struct pevent_plugin_option *option)
 }
 
 /**
- * traceevent_plugin_add_options - Add a set of options by a plugin
+ * tep_plugin_add_options - Add a set of options by a plugin
  * @name: The name of the plugin adding the options
  * @options: The set of options being loaded
  *
  * Sets the options with the values that have been added by user.
  */
-int traceevent_plugin_add_options(const char *name,
-				  struct pevent_plugin_option *options)
+int tep_plugin_add_options(const char *name,
+			   struct tep_plugin_option *options)
 {
 	struct registered_plugin_options *reg;
 
@@ -241,10 +229,10 @@ int traceevent_plugin_add_options(const char *name,
 }
 
 /**
- * traceevent_plugin_remove_options - remove plugin options that were registered
- * @options: Options to removed that were registered with traceevent_plugin_add_options
+ * tep_plugin_remove_options - remove plugin options that were registered
+ * @options: Options to removed that were registered with tep_plugin_add_options
  */
-void traceevent_plugin_remove_options(struct pevent_plugin_option *options)
+void tep_plugin_remove_options(struct tep_plugin_option *options)
 {
 	struct registered_plugin_options **last;
 	struct registered_plugin_options *reg;
@@ -260,19 +248,19 @@ void traceevent_plugin_remove_options(struct pevent_plugin_option *options)
 }
 
 /**
- * traceevent_print_plugins - print out the list of plugins loaded
+ * tep_print_plugins - print out the list of plugins loaded
  * @s: the trace_seq descripter to write to
  * @prefix: The prefix string to add before listing the option name
  * @suffix: The suffix string ot append after the option name
- * @list: The list of plugins (usually returned by traceevent_load_plugins()
+ * @list: The list of plugins (usually returned by tep_load_plugins()
  *
  * Writes to the trace_seq @s the list of plugins (files) that is
- * returned by traceevent_load_plugins(). Use @prefix and @suffix for formating:
+ * returned by tep_load_plugins(). Use @prefix and @suffix for formating:
  * @prefix = "  ", @suffix = "\n".
  */
-void traceevent_print_plugins(struct trace_seq *s,
-			      const char *prefix, const char *suffix,
-			      const struct plugin_list *list)
+void tep_print_plugins(struct trace_seq *s,
+		       const char *prefix, const char *suffix,
+		       const struct tep_plugin_list *list)
 {
 	while (list) {
 		trace_seq_printf(s, "%s%s%s", prefix, list->name, suffix);
@@ -281,12 +269,12 @@ void traceevent_print_plugins(struct trace_seq *s,
 }
 
 static void
-load_plugin(struct pevent *pevent, const char *path,
+load_plugin(struct tep_handle *pevent, const char *path,
 	    const char *file, void *data)
 {
-	struct plugin_list **plugin_list = data;
-	pevent_plugin_load_func func;
-	struct plugin_list *list;
+	struct tep_plugin_list **plugin_list = data;
+	tep_plugin_load_func func;
+	struct tep_plugin_list *list;
 	const char *alias;
 	char *plugin;
 	void *handle;
@@ -305,14 +293,14 @@ load_plugin(struct pevent *pevent, const char *path,
 		goto out_free;
 	}
 
-	alias = dlsym(handle, PEVENT_PLUGIN_ALIAS_NAME);
+	alias = dlsym(handle, TEP_PLUGIN_ALIAS_NAME);
 	if (!alias)
 		alias = file;
 
-	func = dlsym(handle, PEVENT_PLUGIN_LOADER_NAME);
+	func = dlsym(handle, TEP_PLUGIN_LOADER_NAME);
 	if (!func) {
 		warning("could not find func '%s' in plugin '%s'\n%s\n",
-			PEVENT_PLUGIN_LOADER_NAME, plugin, dlerror());
+			TEP_PLUGIN_LOADER_NAME, plugin, dlerror());
 		goto out_free;
 	}
 
@@ -336,9 +324,9 @@ load_plugin(struct pevent *pevent, const char *path,
 }
 
 static void
-load_plugins_dir(struct pevent *pevent, const char *suffix,
+load_plugins_dir(struct tep_handle *pevent, const char *suffix,
 		 const char *path,
-		 void (*load_plugin)(struct pevent *pevent,
+		 void (*load_plugin)(struct tep_handle *pevent,
 				     const char *path,
 				     const char *name,
 				     void *data),
@@ -378,8 +366,8 @@ load_plugins_dir(struct pevent *pevent, const char *suffix,
 }
 
 static void
-load_plugins(struct pevent *pevent, const char *suffix,
-	     void (*load_plugin)(struct pevent *pevent,
+load_plugins(struct tep_handle *pevent, const char *suffix,
+	     void (*load_plugin)(struct tep_handle *pevent,
 				 const char *path,
 				 const char *name,
 				 void *data),
@@ -390,7 +378,7 @@ load_plugins(struct pevent *pevent, const char *suffix,
 	char *envdir;
 	int ret;
 
-	if (pevent->flags & PEVENT_DISABLE_PLUGINS)
+	if (pevent->flags & TEP_DISABLE_PLUGINS)
 		return;
 
 	/*
@@ -398,7 +386,7 @@ load_plugins(struct pevent *pevent, const char *suffix,
 	 * check that first.
 	 */
 #ifdef PLUGIN_DIR
-	if (!(pevent->flags & PEVENT_DISABLE_SYS_PLUGINS))
+	if (!(pevent->flags & TEP_DISABLE_SYS_PLUGINS))
 		load_plugins_dir(pevent, suffix, PLUGIN_DIR,
 				 load_plugin, data);
 #endif
@@ -430,25 +418,25 @@ load_plugins(struct pevent *pevent, const char *suffix,
 	free(path);
 }
 
-struct plugin_list*
-traceevent_load_plugins(struct pevent *pevent)
+struct tep_plugin_list*
+tep_load_plugins(struct tep_handle *pevent)
 {
-	struct plugin_list *list = NULL;
+	struct tep_plugin_list *list = NULL;
 
 	load_plugins(pevent, ".so", load_plugin, &list);
 	return list;
 }
 
 void
-traceevent_unload_plugins(struct plugin_list *plugin_list, struct pevent *pevent)
+tep_unload_plugins(struct tep_plugin_list *plugin_list, struct tep_handle *pevent)
 {
-	pevent_plugin_unload_func func;
-	struct plugin_list *list;
+	tep_plugin_unload_func func;
+	struct tep_plugin_list *list;
 
 	while (plugin_list) {
 		list = plugin_list;
 		plugin_list = list->next;
-		func = dlsym(list->handle, PEVENT_PLUGIN_UNLOADER_NAME);
+		func = dlsym(list->handle, TEP_PLUGIN_UNLOADER_NAME);
 		if (func)
 			func(pevent);
 		dlclose(list->handle);
diff --git a/tools/lib/traceevent/event-utils.h b/tools/lib/traceevent/event-utils.h
index d1dc2170e402..0560b96a31d1 100644
--- a/tools/lib/traceevent/event-utils.h
+++ b/tools/lib/traceevent/event-utils.h
@@ -1,21 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
  *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 #ifndef __UTIL_H
 #define __UTIL_H
diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c
index ca424b157e46..af2a1f3b7424 100644
--- a/tools/lib/traceevent/kbuffer-parse.c
+++ b/tools/lib/traceevent/kbuffer-parse.c
@@ -1,22 +1,7 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
  *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index 431e8b309f6e..ed87cb56713d 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -1,21 +1,7 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
  *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 #include <stdio.h>
 #include <stdlib.h>
@@ -25,22 +11,23 @@
 #include <sys/types.h>
 
 #include "event-parse.h"
+#include "event-parse-local.h"
 #include "event-utils.h"
 
 #define COMM "COMM"
 #define CPU "CPU"
 
-static struct format_field comm = {
+static struct tep_format_field comm = {
 	.name = "COMM",
 };
 
-static struct format_field cpu = {
+static struct tep_format_field cpu = {
 	.name = "CPU",
 };
 
 struct event_list {
 	struct event_list	*next;
-	struct event_format	*event;
+	struct tep_event_format	*event;
 };
 
 static void show_error(char *error_buf, const char *fmt, ...)
@@ -51,8 +38,8 @@ static void show_error(char *error_buf, const char *fmt, ...)
 	int len;
 	int i;
 
-	input = pevent_get_input_buf();
-	index = pevent_get_input_buf_ptr();
+	input = tep_get_input_buf();
+	index = tep_get_input_buf_ptr();
 	len = input ? strlen(input) : 0;
 
 	if (len) {
@@ -66,39 +53,39 @@ static void show_error(char *error_buf, const char *fmt, ...)
 	}
 
 	va_start(ap, fmt);
-	vsnprintf(error_buf + len, PEVENT_FILTER_ERROR_BUFSZ - len, fmt, ap);
+	vsnprintf(error_buf + len, TEP_FILTER_ERROR_BUFSZ - len, fmt, ap);
 	va_end(ap);
 }
 
 static void free_token(char *token)
 {
-	pevent_free_token(token);
+	tep_free_token(token);
 }
 
-static enum event_type read_token(char **tok)
+static enum tep_event_type read_token(char **tok)
 {
-	enum event_type type;
+	enum tep_event_type type;
 	char *token = NULL;
 
 	do {
 		free_token(token);
-		type = pevent_read_token(&token);
-	} while (type == EVENT_NEWLINE || type == EVENT_SPACE);
+		type = tep_read_token(&token);
+	} while (type == TEP_EVENT_NEWLINE || type == TEP_EVENT_SPACE);
 
 	/* If token is = or ! check to see if the next char is ~ */
 	if (token &&
 	    (strcmp(token, "=") == 0 || strcmp(token, "!") == 0) &&
-	    pevent_peek_char() == '~') {
+	    tep_peek_char() == '~') {
 		/* append it */
 		*tok = malloc(3);
 		if (*tok == NULL) {
 			free_token(token);
-			return EVENT_ERROR;
+			return TEP_EVENT_ERROR;
 		}
 		sprintf(*tok, "%c%c", *token, '~');
 		free_token(token);
 		/* Now remove the '~' from the buffer */
-		pevent_read_token(&token);
+		tep_read_token(&token);
 		free_token(token);
 	} else
 		*tok = token;
@@ -108,8 +95,8 @@ static enum event_type read_token(char **tok)
 
 static int filter_cmp(const void *a, const void *b)
 {
-	const struct filter_type *ea = a;
-	const struct filter_type *eb = b;
+	const struct tep_filter_type *ea = a;
+	const struct tep_filter_type *eb = b;
 
 	if (ea->event_id < eb->event_id)
 		return -1;
@@ -120,11 +107,11 @@ static int filter_cmp(const void *a, const void *b)
 	return 0;
 }
 
-static struct filter_type *
-find_filter_type(struct event_filter *filter, int id)
+static struct tep_filter_type *
+find_filter_type(struct tep_event_filter *filter, int id)
 {
-	struct filter_type *filter_type;
-	struct filter_type key;
+	struct tep_filter_type *filter_type;
+	struct tep_filter_type key;
 
 	key.event_id = id;
 
@@ -136,10 +123,10 @@ find_filter_type(struct event_filter *filter, int id)
 	return filter_type;
 }
 
-static struct filter_type *
-add_filter_type(struct event_filter *filter, int id)
+static struct tep_filter_type *
+add_filter_type(struct tep_event_filter *filter, int id)
 {
-	struct filter_type *filter_type;
+	struct tep_filter_type *filter_type;
 	int i;
 
 	filter_type = find_filter_type(filter, id);
@@ -167,7 +154,7 @@ add_filter_type(struct event_filter *filter, int id)
 
 	filter_type = &filter->event_filters[i];
 	filter_type->event_id = id;
-	filter_type->event = pevent_find_event(filter->pevent, id);
+	filter_type->event = tep_find_event(filter->pevent, id);
 	filter_type->filter = NULL;
 
 	filter->filters++;
@@ -176,12 +163,12 @@ add_filter_type(struct event_filter *filter, int id)
 }
 
 /**
- * pevent_filter_alloc - create a new event filter
+ * tep_filter_alloc - create a new event filter
  * @pevent: The pevent that this filter is associated with
  */
-struct event_filter *pevent_filter_alloc(struct pevent *pevent)
+struct tep_event_filter *tep_filter_alloc(struct tep_handle *pevent)
 {
-	struct event_filter *filter;
+	struct tep_event_filter *filter;
 
 	filter = malloc(sizeof(*filter));
 	if (filter == NULL)
@@ -189,49 +176,49 @@ struct event_filter *pevent_filter_alloc(struct pevent *pevent)
 
 	memset(filter, 0, sizeof(*filter));
 	filter->pevent = pevent;
-	pevent_ref(pevent);
+	tep_ref(pevent);
 
 	return filter;
 }
 
-static struct filter_arg *allocate_arg(void)
+static struct tep_filter_arg *allocate_arg(void)
 {
-	return calloc(1, sizeof(struct filter_arg));
+	return calloc(1, sizeof(struct tep_filter_arg));
 }
 
-static void free_arg(struct filter_arg *arg)
+static void free_arg(struct tep_filter_arg *arg)
 {
 	if (!arg)
 		return;
 
 	switch (arg->type) {
-	case FILTER_ARG_NONE:
-	case FILTER_ARG_BOOLEAN:
+	case TEP_FILTER_ARG_NONE:
+	case TEP_FILTER_ARG_BOOLEAN:
 		break;
 
-	case FILTER_ARG_NUM:
+	case TEP_FILTER_ARG_NUM:
 		free_arg(arg->num.left);
 		free_arg(arg->num.right);
 		break;
 
-	case FILTER_ARG_EXP:
+	case TEP_FILTER_ARG_EXP:
 		free_arg(arg->exp.left);
 		free_arg(arg->exp.right);
 		break;
 
-	case FILTER_ARG_STR:
+	case TEP_FILTER_ARG_STR:
 		free(arg->str.val);
 		regfree(&arg->str.reg);
 		free(arg->str.buffer);
 		break;
 
-	case FILTER_ARG_VALUE:
-		if (arg->value.type == FILTER_STRING ||
-		    arg->value.type == FILTER_CHAR)
+	case TEP_FILTER_ARG_VALUE:
+		if (arg->value.type == TEP_FILTER_STRING ||
+		    arg->value.type == TEP_FILTER_CHAR)
 			free(arg->value.str);
 		break;
 
-	case FILTER_ARG_OP:
+	case TEP_FILTER_ARG_OP:
 		free_arg(arg->op.left);
 		free_arg(arg->op.right);
 	default:
@@ -242,7 +229,7 @@ static void free_arg(struct filter_arg *arg)
 }
 
 static int add_event(struct event_list **events,
-		      struct event_format *event)
+		     struct tep_event_format *event)
 {
 	struct event_list *list;
 
@@ -256,7 +243,7 @@ static int add_event(struct event_list **events,
 	return 0;
 }
 
-static int event_match(struct event_format *event,
+static int event_match(struct tep_event_format *event,
 		       regex_t *sreg, regex_t *ereg)
 {
 	if (sreg) {
@@ -268,11 +255,11 @@ static int event_match(struct event_format *event,
 		!regexec(ereg, event->name, 0, NULL, 0);
 }
 
-static enum pevent_errno
-find_event(struct pevent *pevent, struct event_list **events,
+static enum tep_errno
+find_event(struct tep_handle *pevent, struct event_list **events,
 	   char *sys_name, char *event_name)
 {
-	struct event_format *event;
+	struct tep_event_format *event;
 	regex_t ereg;
 	regex_t sreg;
 	int match = 0;
@@ -289,26 +276,26 @@ find_event(struct pevent *pevent, struct event_list **events,
 
 	ret = asprintf(&reg, "^%s$", event_name);
 	if (ret < 0)
-		return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+		return TEP_ERRNO__MEM_ALLOC_FAILED;
 
 	ret = regcomp(&ereg, reg, REG_ICASE|REG_NOSUB);
 	free(reg);
 
 	if (ret)
-		return PEVENT_ERRNO__INVALID_EVENT_NAME;
+		return TEP_ERRNO__INVALID_EVENT_NAME;
 
 	if (sys_name) {
 		ret = asprintf(&reg, "^%s$", sys_name);
 		if (ret < 0) {
 			regfree(&ereg);
-			return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+			return TEP_ERRNO__MEM_ALLOC_FAILED;
 		}
 
 		ret = regcomp(&sreg, reg, REG_ICASE|REG_NOSUB);
 		free(reg);
 		if (ret) {
 			regfree(&ereg);
-			return PEVENT_ERRNO__INVALID_EVENT_NAME;
+			return TEP_ERRNO__INVALID_EVENT_NAME;
 		}
 	}
 
@@ -328,9 +315,9 @@ find_event(struct pevent *pevent, struct event_list **events,
 		regfree(&sreg);
 
 	if (!match)
-		return PEVENT_ERRNO__EVENT_NOT_FOUND;
+		return TEP_ERRNO__EVENT_NOT_FOUND;
 	if (fail)
-		return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+		return TEP_ERRNO__MEM_ALLOC_FAILED;
 
 	return 0;
 }
@@ -346,43 +333,43 @@ static void free_events(struct event_list *events)
 	}
 }
 
-static enum pevent_errno
-create_arg_item(struct event_format *event, const char *token,
-		enum event_type type, struct filter_arg **parg, char *error_str)
+static enum tep_errno
+create_arg_item(struct tep_event_format *event, const char *token,
+		enum tep_event_type type, struct tep_filter_arg **parg, char *error_str)
 {
-	struct format_field *field;
-	struct filter_arg *arg;
+	struct tep_format_field *field;
+	struct tep_filter_arg *arg;
 
 	arg = allocate_arg();
 	if (arg == NULL) {
 		show_error(error_str, "failed to allocate filter arg");
-		return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+		return TEP_ERRNO__MEM_ALLOC_FAILED;
 	}
 
 	switch (type) {
 
-	case EVENT_SQUOTE:
-	case EVENT_DQUOTE:
-		arg->type = FILTER_ARG_VALUE;
+	case TEP_EVENT_SQUOTE:
+	case TEP_EVENT_DQUOTE:
+		arg->type = TEP_FILTER_ARG_VALUE;
 		arg->value.type =
-			type == EVENT_DQUOTE ? FILTER_STRING : FILTER_CHAR;
+			type == TEP_EVENT_DQUOTE ? TEP_FILTER_STRING : TEP_FILTER_CHAR;
 		arg->value.str = strdup(token);
 		if (!arg->value.str) {
 			free_arg(arg);
 			show_error(error_str, "failed to allocate string filter arg");
-			return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+			return TEP_ERRNO__MEM_ALLOC_FAILED;
 		}
 		break;
-	case EVENT_ITEM:
+	case TEP_EVENT_ITEM:
 		/* if it is a number, then convert it */
 		if (isdigit(token[0])) {
-			arg->type = FILTER_ARG_VALUE;
-			arg->value.type = FILTER_NUMBER;
+			arg->type = TEP_FILTER_ARG_VALUE;
+			arg->value.type = TEP_FILTER_NUMBER;
 			arg->value.val = strtoull(token, NULL, 0);
 			break;
 		}
 		/* Consider this a field */
-		field = pevent_find_any_field(event, token);
+		field = tep_find_any_field(event, token);
 		if (!field) {
 			/* If token is 'COMM' or 'CPU' then it is special */
 			if (strcmp(token, COMM) == 0) {
@@ -391,103 +378,103 @@ create_arg_item(struct event_format *event, const char *token,
 				field = &cpu;
 			} else {
 				/* not a field, Make it false */
-				arg->type = FILTER_ARG_BOOLEAN;
-				arg->boolean.value = FILTER_FALSE;
+				arg->type = TEP_FILTER_ARG_BOOLEAN;
+				arg->boolean.value = TEP_FILTER_FALSE;
 				break;
 			}
 		}
-		arg->type = FILTER_ARG_FIELD;
+		arg->type = TEP_FILTER_ARG_FIELD;
 		arg->field.field = field;
 		break;
 	default:
 		free_arg(arg);
 		show_error(error_str, "expected a value but found %s", token);
-		return PEVENT_ERRNO__UNEXPECTED_TYPE;
+		return TEP_ERRNO__UNEXPECTED_TYPE;
 	}
 	*parg = arg;
 	return 0;
 }
 
-static struct filter_arg *
-create_arg_op(enum filter_op_type btype)
+static struct tep_filter_arg *
+create_arg_op(enum tep_filter_op_type btype)
 {
-	struct filter_arg *arg;
+	struct tep_filter_arg *arg;
 
 	arg = allocate_arg();
 	if (!arg)
 		return NULL;
 
-	arg->type = FILTER_ARG_OP;
+	arg->type = TEP_FILTER_ARG_OP;
 	arg->op.type = btype;
 
 	return arg;
 }
 
-static struct filter_arg *
-create_arg_exp(enum filter_exp_type etype)
+static struct tep_filter_arg *
+create_arg_exp(enum tep_filter_exp_type etype)
 {
-	struct filter_arg *arg;
+	struct tep_filter_arg *arg;
 
 	arg = allocate_arg();
 	if (!arg)
 		return NULL;
 
-	arg->type = FILTER_ARG_EXP;
+	arg->type = TEP_FILTER_ARG_EXP;
 	arg->exp.type = etype;
 
 	return arg;
 }
 
-static struct filter_arg *
-create_arg_cmp(enum filter_cmp_type ctype)
+static struct tep_filter_arg *
+create_arg_cmp(enum tep_filter_cmp_type ctype)
 {
-	struct filter_arg *arg;
+	struct tep_filter_arg *arg;
 
 	arg = allocate_arg();
 	if (!arg)
 		return NULL;
 
 	/* Use NUM and change if necessary */
-	arg->type = FILTER_ARG_NUM;
+	arg->type = TEP_FILTER_ARG_NUM;
 	arg->num.type = ctype;
 
 	return arg;
 }
 
-static enum pevent_errno
-add_right(struct filter_arg *op, struct filter_arg *arg, char *error_str)
+static enum tep_errno
+add_right(struct tep_filter_arg *op, struct tep_filter_arg *arg, char *error_str)
 {
-	struct filter_arg *left;
+	struct tep_filter_arg *left;
 	char *str;
 	int op_type;
 	int ret;
 
 	switch (op->type) {
-	case FILTER_ARG_EXP:
+	case TEP_FILTER_ARG_EXP:
 		if (op->exp.right)
 			goto out_fail;
 		op->exp.right = arg;
 		break;
 
-	case FILTER_ARG_OP:
+	case TEP_FILTER_ARG_OP:
 		if (op->op.right)
 			goto out_fail;
 		op->op.right = arg;
 		break;
 
-	case FILTER_ARG_NUM:
+	case TEP_FILTER_ARG_NUM:
 		if (op->op.right)
 			goto out_fail;
 		/*
 		 * The arg must be num, str, or field
 		 */
 		switch (arg->type) {
-		case FILTER_ARG_VALUE:
-		case FILTER_ARG_FIELD:
+		case TEP_FILTER_ARG_VALUE:
+		case TEP_FILTER_ARG_FIELD:
 			break;
 		default:
 			show_error(error_str, "Illegal rvalue");
-			return PEVENT_ERRNO__ILLEGAL_RVALUE;
+			return TEP_ERRNO__ILLEGAL_RVALUE;
 		}
 
 		/*
@@ -495,20 +482,20 @@ add_right(struct filter_arg *op, struct filter_arg *arg, char *error_str)
 		 * convert this to a string or regex.
 		 */
 		switch (arg->value.type) {
-		case FILTER_CHAR:
+		case TEP_FILTER_CHAR:
 			/*
 			 * A char should be converted to number if
 			 * the string is 1 byte, and the compare
 			 * is not a REGEX.
 			 */
 			if (strlen(arg->value.str) == 1 &&
-			    op->num.type != FILTER_CMP_REGEX &&
-			    op->num.type != FILTER_CMP_NOT_REGEX) {
-				arg->value.type = FILTER_NUMBER;
+			    op->num.type != TEP_FILTER_CMP_REGEX &&
+			    op->num.type != TEP_FILTER_CMP_NOT_REGEX) {
+				arg->value.type = TEP_FILTER_NUMBER;
 				goto do_int;
 			}
 			/* fall through */
-		case FILTER_STRING:
+		case TEP_FILTER_STRING:
 
 			/* convert op to a string arg */
 			op_type = op->num.type;
@@ -522,53 +509,53 @@ add_right(struct filter_arg *op, struct filter_arg *arg, char *error_str)
 			 * If left arg was a field not found then
 			 * NULL the entire op.
 			 */
-			if (left->type == FILTER_ARG_BOOLEAN) {
+			if (left->type == TEP_FILTER_ARG_BOOLEAN) {
 				free_arg(left);
 				free_arg(arg);
-				op->type = FILTER_ARG_BOOLEAN;
-				op->boolean.value = FILTER_FALSE;
+				op->type = TEP_FILTER_ARG_BOOLEAN;
+				op->boolean.value = TEP_FILTER_FALSE;
 				break;
 			}
 
 			/* Left arg must be a field */
-			if (left->type != FILTER_ARG_FIELD) {
+			if (left->type != TEP_FILTER_ARG_FIELD) {
 				show_error(error_str,
 					   "Illegal lvalue for string comparison");
-				return PEVENT_ERRNO__ILLEGAL_LVALUE;
+				return TEP_ERRNO__ILLEGAL_LVALUE;
 			}
 
 			/* Make sure this is a valid string compare */
 			switch (op_type) {
-			case FILTER_CMP_EQ:
-				op_type = FILTER_CMP_MATCH;
+			case TEP_FILTER_CMP_EQ:
+				op_type = TEP_FILTER_CMP_MATCH;
 				break;
-			case FILTER_CMP_NE:
-				op_type = FILTER_CMP_NOT_MATCH;
+			case TEP_FILTER_CMP_NE:
+				op_type = TEP_FILTER_CMP_NOT_MATCH;
 				break;
 
-			case FILTER_CMP_REGEX:
-			case FILTER_CMP_NOT_REGEX:
+			case TEP_FILTER_CMP_REGEX:
+			case TEP_FILTER_CMP_NOT_REGEX:
 				ret = regcomp(&op->str.reg, str, REG_ICASE|REG_NOSUB);
 				if (ret) {
 					show_error(error_str,
 						   "RegEx '%s' did not compute",
 						   str);
-					return PEVENT_ERRNO__INVALID_REGEX;
+					return TEP_ERRNO__INVALID_REGEX;
 				}
 				break;
 			default:
 				show_error(error_str,
 					   "Illegal comparison for string");
-				return PEVENT_ERRNO__ILLEGAL_STRING_CMP;
+				return TEP_ERRNO__ILLEGAL_STRING_CMP;
 			}
 
-			op->type = FILTER_ARG_STR;
+			op->type = TEP_FILTER_ARG_STR;
 			op->str.type = op_type;
 			op->str.field = left->field.field;
 			op->str.val = strdup(str);
 			if (!op->str.val) {
 				show_error(error_str, "Failed to allocate string filter");
-				return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+				return TEP_ERRNO__MEM_ALLOC_FAILED;
 			}
 			/*
 			 * Need a buffer to copy data for tests
@@ -576,7 +563,7 @@ add_right(struct filter_arg *op, struct filter_arg *arg, char *error_str)
 			op->str.buffer = malloc(op->str.field->size + 1);
 			if (!op->str.buffer) {
 				show_error(error_str, "Failed to allocate string filter");
-				return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+				return TEP_ERRNO__MEM_ALLOC_FAILED;
 			}
 			/* Null terminate this buffer */
 			op->str.buffer[op->str.field->size] = 0;
@@ -587,15 +574,15 @@ add_right(struct filter_arg *op, struct filter_arg *arg, char *error_str)
 
 			break;
 
-		case FILTER_NUMBER:
+		case TEP_FILTER_NUMBER:
 
  do_int:
 			switch (op->num.type) {
-			case FILTER_CMP_REGEX:
-			case FILTER_CMP_NOT_REGEX:
+			case TEP_FILTER_CMP_REGEX:
+			case TEP_FILTER_CMP_NOT_REGEX:
 				show_error(error_str,
 					   "Op not allowed with integers");
-				return PEVENT_ERRNO__ILLEGAL_INTEGER_CMP;
+				return TEP_ERRNO__ILLEGAL_INTEGER_CMP;
 
 			default:
 				break;
@@ -616,43 +603,43 @@ add_right(struct filter_arg *op, struct filter_arg *arg, char *error_str)
 
  out_fail:
 	show_error(error_str, "Syntax error");
-	return PEVENT_ERRNO__SYNTAX_ERROR;
+	return TEP_ERRNO__SYNTAX_ERROR;
 }
 
-static struct filter_arg *
-rotate_op_right(struct filter_arg *a, struct filter_arg *b)
+static struct tep_filter_arg *
+rotate_op_right(struct tep_filter_arg *a, struct tep_filter_arg *b)
 {
-	struct filter_arg *arg;
+	struct tep_filter_arg *arg;
 
 	arg = a->op.right;
 	a->op.right = b;
 	return arg;
 }
 
-static enum pevent_errno add_left(struct filter_arg *op, struct filter_arg *arg)
+static enum tep_errno add_left(struct tep_filter_arg *op, struct tep_filter_arg *arg)
 {
 	switch (op->type) {
-	case FILTER_ARG_EXP:
-		if (arg->type == FILTER_ARG_OP)
+	case TEP_FILTER_ARG_EXP:
+		if (arg->type == TEP_FILTER_ARG_OP)
 			arg = rotate_op_right(arg, op);
 		op->exp.left = arg;
 		break;
 
-	case FILTER_ARG_OP:
+	case TEP_FILTER_ARG_OP:
 		op->op.left = arg;
 		break;
-	case FILTER_ARG_NUM:
-		if (arg->type == FILTER_ARG_OP)
+	case TEP_FILTER_ARG_NUM:
+		if (arg->type == TEP_FILTER_ARG_OP)
 			arg = rotate_op_right(arg, op);
 
 		/* left arg of compares must be a field */
-		if (arg->type != FILTER_ARG_FIELD &&
-		    arg->type != FILTER_ARG_BOOLEAN)
-			return PEVENT_ERRNO__INVALID_ARG_TYPE;
+		if (arg->type != TEP_FILTER_ARG_FIELD &&
+		    arg->type != TEP_FILTER_ARG_BOOLEAN)
+			return TEP_ERRNO__INVALID_ARG_TYPE;
 		op->num.left = arg;
 		break;
 	default:
-		return PEVENT_ERRNO__INVALID_ARG_TYPE;
+		return TEP_ERRNO__INVALID_ARG_TYPE;
 	}
 	return 0;
 }
@@ -666,91 +653,91 @@ enum op_type {
 };
 
 static enum op_type process_op(const char *token,
-			       enum filter_op_type *btype,
-			       enum filter_cmp_type *ctype,
-			       enum filter_exp_type *etype)
+			       enum tep_filter_op_type *btype,
+			       enum tep_filter_cmp_type *ctype,
+			       enum tep_filter_exp_type *etype)
 {
-	*btype = FILTER_OP_NOT;
-	*etype = FILTER_EXP_NONE;
-	*ctype = FILTER_CMP_NONE;
+	*btype = TEP_FILTER_OP_NOT;
+	*etype = TEP_FILTER_EXP_NONE;
+	*ctype = TEP_FILTER_CMP_NONE;
 
 	if (strcmp(token, "&&") == 0)
-		*btype = FILTER_OP_AND;
+		*btype = TEP_FILTER_OP_AND;
 	else if (strcmp(token, "||") == 0)
-		*btype = FILTER_OP_OR;
+		*btype = TEP_FILTER_OP_OR;
 	else if (strcmp(token, "!") == 0)
 		return OP_NOT;
 
-	if (*btype != FILTER_OP_NOT)
+	if (*btype != TEP_FILTER_OP_NOT)
 		return OP_BOOL;
 
 	/* Check for value expressions */
 	if (strcmp(token, "+") == 0) {
-		*etype = FILTER_EXP_ADD;
+		*etype = TEP_FILTER_EXP_ADD;
 	} else if (strcmp(token, "-") == 0) {
-		*etype = FILTER_EXP_SUB;
+		*etype = TEP_FILTER_EXP_SUB;
 	} else if (strcmp(token, "*") == 0) {
-		*etype = FILTER_EXP_MUL;
+		*etype = TEP_FILTER_EXP_MUL;
 	} else if (strcmp(token, "/") == 0) {
-		*etype = FILTER_EXP_DIV;
+		*etype = TEP_FILTER_EXP_DIV;
 	} else if (strcmp(token, "%") == 0) {
-		*etype = FILTER_EXP_MOD;
+		*etype = TEP_FILTER_EXP_MOD;
 	} else if (strcmp(token, ">>") == 0) {
-		*etype = FILTER_EXP_RSHIFT;
+		*etype = TEP_FILTER_EXP_RSHIFT;
 	} else if (strcmp(token, "<<") == 0) {
-		*etype = FILTER_EXP_LSHIFT;
+		*etype = TEP_FILTER_EXP_LSHIFT;
 	} else if (strcmp(token, "&") == 0) {
-		*etype = FILTER_EXP_AND;
+		*etype = TEP_FILTER_EXP_AND;
 	} else if (strcmp(token, "|") == 0) {
-		*etype = FILTER_EXP_OR;
+		*etype = TEP_FILTER_EXP_OR;
 	} else if (strcmp(token, "^") == 0) {
-		*etype = FILTER_EXP_XOR;
+		*etype = TEP_FILTER_EXP_XOR;
 	} else if (strcmp(token, "~") == 0)
-		*etype = FILTER_EXP_NOT;
+		*etype = TEP_FILTER_EXP_NOT;
 
-	if (*etype != FILTER_EXP_NONE)
+	if (*etype != TEP_FILTER_EXP_NONE)
 		return OP_EXP;
 
 	/* Check for compares */
 	if (strcmp(token, "==") == 0)
-		*ctype = FILTER_CMP_EQ;
+		*ctype = TEP_FILTER_CMP_EQ;
 	else if (strcmp(token, "!=") == 0)
-		*ctype = FILTER_CMP_NE;
+		*ctype = TEP_FILTER_CMP_NE;
 	else if (strcmp(token, "<") == 0)
-		*ctype = FILTER_CMP_LT;
+		*ctype = TEP_FILTER_CMP_LT;
 	else if (strcmp(token, ">") == 0)
-		*ctype = FILTER_CMP_GT;
+		*ctype = TEP_FILTER_CMP_GT;
 	else if (strcmp(token, "<=") == 0)
-		*ctype = FILTER_CMP_LE;
+		*ctype = TEP_FILTER_CMP_LE;
 	else if (strcmp(token, ">=") == 0)
-		*ctype = FILTER_CMP_GE;
+		*ctype = TEP_FILTER_CMP_GE;
 	else if (strcmp(token, "=~") == 0)
-		*ctype = FILTER_CMP_REGEX;
+		*ctype = TEP_FILTER_CMP_REGEX;
 	else if (strcmp(token, "!~") == 0)
-		*ctype = FILTER_CMP_NOT_REGEX;
+		*ctype = TEP_FILTER_CMP_NOT_REGEX;
 	else
 		return OP_NONE;
 
 	return OP_CMP;
 }
 
-static int check_op_done(struct filter_arg *arg)
+static int check_op_done(struct tep_filter_arg *arg)
 {
 	switch (arg->type) {
-	case FILTER_ARG_EXP:
+	case TEP_FILTER_ARG_EXP:
 		return arg->exp.right != NULL;
 
-	case FILTER_ARG_OP:
+	case TEP_FILTER_ARG_OP:
 		return arg->op.right != NULL;
 
-	case FILTER_ARG_NUM:
+	case TEP_FILTER_ARG_NUM:
 		return arg->num.right != NULL;
 
-	case FILTER_ARG_STR:
+	case TEP_FILTER_ARG_STR:
 		/* A string conversion is always done */
 		return 1;
 
-	case FILTER_ARG_BOOLEAN:
+	case TEP_FILTER_ARG_BOOLEAN:
 		/* field not found, is ok */
 		return 1;
 
@@ -765,17 +752,17 @@ enum filter_vals {
 	FILTER_VAL_TRUE,
 };
 
-static enum pevent_errno
-reparent_op_arg(struct filter_arg *parent, struct filter_arg *old_child,
-		struct filter_arg *arg, char *error_str)
+static enum tep_errno
+reparent_op_arg(struct tep_filter_arg *parent, struct tep_filter_arg *old_child,
+		struct tep_filter_arg *arg, char *error_str)
 {
-	struct filter_arg *other_child;
-	struct filter_arg **ptr;
+	struct tep_filter_arg *other_child;
+	struct tep_filter_arg **ptr;
 
-	if (parent->type != FILTER_ARG_OP &&
-	    arg->type != FILTER_ARG_OP) {
+	if (parent->type != TEP_FILTER_ARG_OP &&
+	    arg->type != TEP_FILTER_ARG_OP) {
 		show_error(error_str, "can not reparent other than OP");
-		return PEVENT_ERRNO__REPARENT_NOT_OP;
+		return TEP_ERRNO__REPARENT_NOT_OP;
 	}
 
 	/* Get the sibling */
@@ -787,7 +774,7 @@ reparent_op_arg(struct filter_arg *parent, struct filter_arg *old_child,
 		other_child = old_child->op.right;
 	} else {
 		show_error(error_str, "Error in reparent op, find other child");
-		return PEVENT_ERRNO__REPARENT_FAILED;
+		return TEP_ERRNO__REPARENT_FAILED;
 	}
 
 	/* Detach arg from old_child */
@@ -808,7 +795,7 @@ reparent_op_arg(struct filter_arg *parent, struct filter_arg *old_child,
 		ptr = &parent->op.left;
 	else {
 		show_error(error_str, "Error in reparent op");
-		return PEVENT_ERRNO__REPARENT_FAILED;
+		return TEP_ERRNO__REPARENT_FAILED;
 	}
 
 	*ptr = arg;
@@ -817,8 +804,8 @@ reparent_op_arg(struct filter_arg *parent, struct filter_arg *old_child,
 	return 0;
 }
 
-/* Returns either filter_vals (success) or pevent_errno (failfure) */
-static int test_arg(struct filter_arg *parent, struct filter_arg *arg,
+/* Returns either filter_vals (success) or tep_errno (failfure) */
+static int test_arg(struct tep_filter_arg *parent, struct tep_filter_arg *arg,
 		    char *error_str)
 {
 	int lval, rval;
@@ -826,16 +813,16 @@ static int test_arg(struct filter_arg *parent, struct filter_arg *arg,
 	switch (arg->type) {
 
 		/* bad case */
-	case FILTER_ARG_BOOLEAN:
+	case TEP_FILTER_ARG_BOOLEAN:
 		return FILTER_VAL_FALSE + arg->boolean.value;
 
 		/* good cases: */
-	case FILTER_ARG_STR:
-	case FILTER_ARG_VALUE:
-	case FILTER_ARG_FIELD:
+	case TEP_FILTER_ARG_STR:
+	case TEP_FILTER_ARG_VALUE:
+	case TEP_FILTER_ARG_FIELD:
 		return FILTER_VAL_NORM;
 
-	case FILTER_ARG_EXP:
+	case TEP_FILTER_ARG_EXP:
 		lval = test_arg(arg, arg->exp.left, error_str);
 		if (lval != FILTER_VAL_NORM)
 			return lval;
@@ -844,7 +831,7 @@ static int test_arg(struct filter_arg *parent, struct filter_arg *arg,
 			return rval;
 		return FILTER_VAL_NORM;
 
-	case FILTER_ARG_NUM:
+	case TEP_FILTER_ARG_NUM:
 		lval = test_arg(arg, arg->num.left, error_str);
 		if (lval != FILTER_VAL_NORM)
 			return lval;
@@ -853,14 +840,14 @@ static int test_arg(struct filter_arg *parent, struct filter_arg *arg,
 			return rval;
 		return FILTER_VAL_NORM;
 
-	case FILTER_ARG_OP:
-		if (arg->op.type != FILTER_OP_NOT) {
+	case TEP_FILTER_ARG_OP:
+		if (arg->op.type != TEP_FILTER_OP_NOT) {
 			lval = test_arg(arg, arg->op.left, error_str);
 			switch (lval) {
 			case FILTER_VAL_NORM:
 				break;
 			case FILTER_VAL_TRUE:
-				if (arg->op.type == FILTER_OP_OR)
+				if (arg->op.type == TEP_FILTER_OP_OR)
 					return FILTER_VAL_TRUE;
 				rval = test_arg(arg, arg->op.right, error_str);
 				if (rval != FILTER_VAL_NORM)
@@ -870,7 +857,7 @@ static int test_arg(struct filter_arg *parent, struct filter_arg *arg,
 						       error_str);
 
 			case FILTER_VAL_FALSE:
-				if (arg->op.type == FILTER_OP_AND)
+				if (arg->op.type == TEP_FILTER_OP_AND)
 					return FILTER_VAL_FALSE;
 				rval = test_arg(arg, arg->op.right, error_str);
 				if (rval != FILTER_VAL_NORM)
@@ -891,18 +878,18 @@ static int test_arg(struct filter_arg *parent, struct filter_arg *arg,
 			break;
 
 		case FILTER_VAL_TRUE:
-			if (arg->op.type == FILTER_OP_OR)
+			if (arg->op.type == TEP_FILTER_OP_OR)
 				return FILTER_VAL_TRUE;
-			if (arg->op.type == FILTER_OP_NOT)
+			if (arg->op.type == TEP_FILTER_OP_NOT)
 				return FILTER_VAL_FALSE;
 
 			return reparent_op_arg(parent, arg, arg->op.left,
 					       error_str);
 
 		case FILTER_VAL_FALSE:
-			if (arg->op.type == FILTER_OP_AND)
+			if (arg->op.type == TEP_FILTER_OP_AND)
 				return FILTER_VAL_FALSE;
-			if (arg->op.type == FILTER_OP_NOT)
+			if (arg->op.type == TEP_FILTER_OP_NOT)
 				return FILTER_VAL_TRUE;
 
 			return reparent_op_arg(parent, arg, arg->op.left,
@@ -912,14 +899,14 @@ static int test_arg(struct filter_arg *parent, struct filter_arg *arg,
 		return rval;
 	default:
 		show_error(error_str, "bad arg in filter tree");
-		return PEVENT_ERRNO__BAD_FILTER_ARG;
+		return TEP_ERRNO__BAD_FILTER_ARG;
 	}
 	return FILTER_VAL_NORM;
 }
 
 /* Remove any unknown event fields */
-static int collapse_tree(struct filter_arg *arg,
-			 struct filter_arg **arg_collapsed, char *error_str)
+static int collapse_tree(struct tep_filter_arg *arg,
+			 struct tep_filter_arg **arg_collapsed, char *error_str)
 {
 	int ret;
 
@@ -933,11 +920,11 @@ static int collapse_tree(struct filter_arg *arg,
 		free_arg(arg);
 		arg = allocate_arg();
 		if (arg) {
-			arg->type = FILTER_ARG_BOOLEAN;
+			arg->type = TEP_FILTER_ARG_BOOLEAN;
 			arg->boolean.value = ret == FILTER_VAL_TRUE;
 		} else {
 			show_error(error_str, "Failed to allocate filter arg");
-			ret = PEVENT_ERRNO__MEM_ALLOC_FAILED;
+			ret = TEP_ERRNO__MEM_ALLOC_FAILED;
 		}
 		break;
 
@@ -952,21 +939,21 @@ static int collapse_tree(struct filter_arg *arg,
 	return ret;
 }
 
-static enum pevent_errno
-process_filter(struct event_format *event, struct filter_arg **parg,
+static enum tep_errno
+process_filter(struct tep_event_format *event, struct tep_filter_arg **parg,
 	       char *error_str, int not)
 {
-	enum event_type type;
+	enum tep_event_type type;
 	char *token = NULL;
-	struct filter_arg *current_op = NULL;
-	struct filter_arg *current_exp = NULL;
-	struct filter_arg *left_item = NULL;
-	struct filter_arg *arg = NULL;
+	struct tep_filter_arg *current_op = NULL;
+	struct tep_filter_arg *current_exp = NULL;
+	struct tep_filter_arg *left_item = NULL;
+	struct tep_filter_arg *arg = NULL;
 	enum op_type op_type;
-	enum filter_op_type btype;
-	enum filter_exp_type etype;
-	enum filter_cmp_type ctype;
-	enum pevent_errno ret;
+	enum tep_filter_op_type btype;
+	enum tep_filter_exp_type etype;
+	enum tep_filter_cmp_type ctype;
+	enum tep_errno ret;
 
 	*parg = NULL;
 
@@ -974,9 +961,9 @@ process_filter(struct event_format *event, struct filter_arg **parg,
 		free(token);
 		type = read_token(&token);
 		switch (type) {
-		case EVENT_SQUOTE:
-		case EVENT_DQUOTE:
-		case EVENT_ITEM:
+		case TEP_EVENT_SQUOTE:
+		case TEP_EVENT_DQUOTE:
+		case TEP_EVENT_ITEM:
 			ret = create_arg_item(event, token, type, &arg, error_str);
 			if (ret < 0)
 				goto fail;
@@ -1001,10 +988,10 @@ process_filter(struct event_format *event, struct filter_arg **parg,
 			arg = NULL;
 			break;
 
-		case EVENT_DELIM:
+		case TEP_EVENT_DELIM:
 			if (*token == ',') {
 				show_error(error_str, "Illegal token ','");
-				ret = PEVENT_ERRNO__ILLEGAL_TOKEN;
+				ret = TEP_ERRNO__ILLEGAL_TOKEN;
 				goto fail;
 			}
 
@@ -1012,22 +999,22 @@ process_filter(struct event_format *event, struct filter_arg **parg,
 				if (left_item) {
 					show_error(error_str,
 						   "Open paren can not come after item");
-					ret = PEVENT_ERRNO__INVALID_PAREN;
+					ret = TEP_ERRNO__INVALID_PAREN;
 					goto fail;
 				}
 				if (current_exp) {
 					show_error(error_str,
 						   "Open paren can not come after expression");
-					ret = PEVENT_ERRNO__INVALID_PAREN;
+					ret = TEP_ERRNO__INVALID_PAREN;
 					goto fail;
 				}
 
 				ret = process_filter(event, &arg, error_str, 0);
-				if (ret != PEVENT_ERRNO__UNBALANCED_PAREN) {
+				if (ret != TEP_ERRNO__UNBALANCED_PAREN) {
 					if (ret == 0) {
 						show_error(error_str,
 							   "Unbalanced number of '('");
-						ret = PEVENT_ERRNO__UNBALANCED_PAREN;
+						ret = TEP_ERRNO__UNBALANCED_PAREN;
 					}
 					goto fail;
 				}
@@ -1064,11 +1051,11 @@ process_filter(struct event_format *event, struct filter_arg **parg,
 				else
 					*parg = current_exp;
 				free(token);
-				return PEVENT_ERRNO__UNBALANCED_PAREN;
+				return TEP_ERRNO__UNBALANCED_PAREN;
 			}
 			break;
 
-		case EVENT_OP:
+		case TEP_EVENT_OP:
 			op_type = process_op(token, &btype, &ctype, &etype);
 
 			/* All expect a left arg except for NOT */
@@ -1091,7 +1078,7 @@ process_filter(struct event_format *event, struct filter_arg **parg,
 			case OP_NONE:
 				show_error(error_str,
 					   "Unknown op token %s", token);
-				ret = PEVENT_ERRNO__UNKNOWN_TOKEN;
+				ret = TEP_ERRNO__UNKNOWN_TOKEN;
 				goto fail;
 			}
 
@@ -1153,14 +1140,14 @@ process_filter(struct event_format *event, struct filter_arg **parg,
 			if (ret < 0)
 				goto fail_syntax;
 			break;
-		case EVENT_NONE:
+		case TEP_EVENT_NONE:
 			break;
-		case EVENT_ERROR:
+		case TEP_EVENT_ERROR:
 			goto fail_alloc;
 		default:
 			goto fail_syntax;
 		}
-	} while (type != EVENT_NONE);
+	} while (type != TEP_EVENT_NONE);
 
 	if (!current_op && !current_exp)
 		goto fail_syntax;
@@ -1179,11 +1166,11 @@ process_filter(struct event_format *event, struct filter_arg **parg,
 
  fail_alloc:
 	show_error(error_str, "failed to allocate filter arg");
-	ret = PEVENT_ERRNO__MEM_ALLOC_FAILED;
+	ret = TEP_ERRNO__MEM_ALLOC_FAILED;
 	goto fail;
  fail_syntax:
 	show_error(error_str, "Syntax error");
-	ret = PEVENT_ERRNO__SYNTAX_ERROR;
+	ret = TEP_ERRNO__SYNTAX_ERROR;
  fail:
 	free_arg(current_op);
 	free_arg(current_exp);
@@ -1192,13 +1179,13 @@ process_filter(struct event_format *event, struct filter_arg **parg,
 	return ret;
 }
 
-static enum pevent_errno
-process_event(struct event_format *event, const char *filter_str,
-	      struct filter_arg **parg, char *error_str)
+static enum tep_errno
+process_event(struct tep_event_format *event, const char *filter_str,
+	      struct tep_filter_arg **parg, char *error_str)
 {
 	int ret;
 
-	pevent_buffer_init(filter_str, strlen(filter_str));
+	tep_buffer_init(filter_str, strlen(filter_str));
 
 	ret = process_filter(event, parg, error_str, 0);
 	if (ret < 0)
@@ -1208,22 +1195,22 @@ process_event(struct event_format *event, const char *filter_str,
 	if (!*parg) {
 		*parg = allocate_arg();
 		if (*parg == NULL)
-			return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+			return TEP_ERRNO__MEM_ALLOC_FAILED;
 
-		(*parg)->type = FILTER_ARG_BOOLEAN;
-		(*parg)->boolean.value = FILTER_FALSE;
+		(*parg)->type = TEP_FILTER_ARG_BOOLEAN;
+		(*parg)->boolean.value = TEP_FILTER_FALSE;
 	}
 
 	return 0;
 }
 
-static enum pevent_errno
-filter_event(struct event_filter *filter, struct event_format *event,
+static enum tep_errno
+filter_event(struct tep_event_filter *filter, struct tep_event_format *event,
 	     const char *filter_str, char *error_str)
 {
-	struct filter_type *filter_type;
-	struct filter_arg *arg;
-	enum pevent_errno ret;
+	struct tep_filter_type *filter_type;
+	struct tep_filter_arg *arg;
+	enum tep_errno ret;
 
 	if (filter_str) {
 		ret = process_event(event, filter_str, &arg, error_str);
@@ -1234,15 +1221,15 @@ filter_event(struct event_filter *filter, struct event_format *event,
 		/* just add a TRUE arg */
 		arg = allocate_arg();
 		if (arg == NULL)
-			return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+			return TEP_ERRNO__MEM_ALLOC_FAILED;
 
-		arg->type = FILTER_ARG_BOOLEAN;
-		arg->boolean.value = FILTER_TRUE;
+		arg->type = TEP_FILTER_ARG_BOOLEAN;
+		arg->boolean.value = TEP_FILTER_TRUE;
 	}
 
 	filter_type = add_filter_type(filter, event->id);
 	if (filter_type == NULL)
-		return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+		return TEP_ERRNO__MEM_ALLOC_FAILED;
 
 	if (filter_type->filter)
 		free_arg(filter_type->filter);
@@ -1251,26 +1238,26 @@ filter_event(struct event_filter *filter, struct event_format *event,
 	return 0;
 }
 
-static void filter_init_error_buf(struct event_filter *filter)
+static void filter_init_error_buf(struct tep_event_filter *filter)
 {
 	/* clear buffer to reset show error */
-	pevent_buffer_init("", 0);
+	tep_buffer_init("", 0);
 	filter->error_buffer[0] = '\0';
 }
 
 /**
- * pevent_filter_add_filter_str - add a new filter
+ * tep_filter_add_filter_str - add a new filter
  * @filter: the event filter to add to
  * @filter_str: the filter string that contains the filter
  *
  * Returns 0 if the filter was successfully added or a
- * negative error code.  Use pevent_filter_strerror() to see
+ * negative error code.  Use tep_filter_strerror() to see
  * actual error message in case of error.
  */
-enum pevent_errno pevent_filter_add_filter_str(struct event_filter *filter,
-					       const char *filter_str)
+enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter,
+					 const char *filter_str)
 {
-	struct pevent *pevent = filter->pevent;
+	struct tep_handle *pevent = filter->pevent;
 	struct event_list *event;
 	struct event_list *events = NULL;
 	const char *filter_start;
@@ -1279,7 +1266,7 @@ enum pevent_errno pevent_filter_add_filter_str(struct event_filter *filter,
 	char *event_name = NULL;
 	char *sys_name = NULL;
 	char *sp;
-	enum pevent_errno rtn = 0; /* PEVENT_ERRNO__SUCCESS */
+	enum tep_errno rtn = 0; /* TEP_ERRNO__SUCCESS */
 	int len;
 	int ret;
 
@@ -1305,7 +1292,7 @@ enum pevent_errno pevent_filter_add_filter_str(struct event_filter *filter,
 		if (this_event == NULL) {
 			/* This can only happen when events is NULL, but still */
 			free_events(events);
-			return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+			return TEP_ERRNO__MEM_ALLOC_FAILED;
 		}
 		memcpy(this_event, filter_str, len);
 		this_event[len] = 0;
@@ -1322,7 +1309,7 @@ enum pevent_errno pevent_filter_add_filter_str(struct event_filter *filter,
 			/* This can only happen when events is NULL, but still */
 			free_events(events);
 			free(this_event);
-			return PEVENT_ERRNO__FILTER_NOT_FOUND;
+			return TEP_ERRNO__FILTER_NOT_FOUND;
 		}
 
 		/* Find this event */
@@ -1349,7 +1336,7 @@ enum pevent_errno pevent_filter_add_filter_str(struct event_filter *filter,
 
 		if (ret >= 0 && pevent->test_filters) {
 			char *test;
-			test = pevent_filter_make_string(filter, event->event->id);
+			test = tep_filter_make_string(filter, event->event->id);
 			if (test) {
 				printf(" '%s: %s'\n", event->event->name, test);
 				free(test);
@@ -1365,13 +1352,13 @@ enum pevent_errno pevent_filter_add_filter_str(struct event_filter *filter,
 	return rtn;
 }
 
-static void free_filter_type(struct filter_type *filter_type)
+static void free_filter_type(struct tep_filter_type *filter_type)
 {
 	free_arg(filter_type->filter);
 }
 
 /**
- * pevent_filter_strerror - fill error message in a buffer
+ * tep_filter_strerror - fill error message in a buffer
  * @filter: the event filter contains error
  * @err: the error code
  * @buf: the buffer to be filled in
@@ -1379,10 +1366,10 @@ static void free_filter_type(struct filter_type *filter_type)
  *
  * Returns 0 if message was filled successfully, -1 if error
  */
-int pevent_filter_strerror(struct event_filter *filter, enum pevent_errno err,
-			   char *buf, size_t buflen)
+int tep_filter_strerror(struct tep_event_filter *filter, enum tep_errno err,
+			char *buf, size_t buflen)
 {
-	if (err <= __PEVENT_ERRNO__START || err >= __PEVENT_ERRNO__END)
+	if (err <= __TEP_ERRNO__START || err >= __TEP_ERRNO__END)
 		return -1;
 
 	if (strlen(filter->error_buffer) > 0) {
@@ -1393,11 +1380,11 @@ int pevent_filter_strerror(struct event_filter *filter, enum pevent_errno err,
 		return 0;
 	}
 
-	return pevent_strerror(filter->pevent, err, buf, buflen);
+	return tep_strerror(filter->pevent, err, buf, buflen);
 }
 
 /**
- * pevent_filter_remove_event - remove a filter for an event
+ * tep_filter_remove_event - remove a filter for an event
  * @filter: the event filter to remove from
  * @event_id: the event to remove a filter for
  *
@@ -1407,10 +1394,10 @@ int pevent_filter_strerror(struct event_filter *filter, enum pevent_errno err,
  * Returns 1: if an event was removed
  *   0: if the event was not found
  */
-int pevent_filter_remove_event(struct event_filter *filter,
-			       int event_id)
+int tep_filter_remove_event(struct tep_event_filter *filter,
+			    int event_id)
 {
-	struct filter_type *filter_type;
+	struct tep_filter_type *filter_type;
 	unsigned long len;
 
 	if (!filter->filters)
@@ -1437,12 +1424,12 @@ int pevent_filter_remove_event(struct event_filter *filter,
 }
 
 /**
- * pevent_filter_reset - clear all filters in a filter
+ * tep_filter_reset - clear all filters in a filter
  * @filter: the event filter to reset
  *
  * Removes all filters from a filter and resets it.
  */
-void pevent_filter_reset(struct event_filter *filter)
+void tep_filter_reset(struct tep_event_filter *filter)
 {
 	int i;
 
@@ -1454,23 +1441,23 @@ void pevent_filter_reset(struct event_filter *filter)
 	filter->event_filters = NULL;
 }
 
-void pevent_filter_free(struct event_filter *filter)
+void tep_filter_free(struct tep_event_filter *filter)
 {
-	pevent_unref(filter->pevent);
+	tep_unref(filter->pevent);
 
-	pevent_filter_reset(filter);
+	tep_filter_reset(filter);
 
 	free(filter);
 }
 
-static char *arg_to_str(struct event_filter *filter, struct filter_arg *arg);
+static char *arg_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg);
 
-static int copy_filter_type(struct event_filter *filter,
-			     struct event_filter *source,
-			     struct filter_type *filter_type)
+static int copy_filter_type(struct tep_event_filter *filter,
+			    struct tep_event_filter *source,
+			    struct tep_filter_type *filter_type)
 {
-	struct filter_arg *arg;
-	struct event_format *event;
+	struct tep_filter_arg *arg;
+	struct tep_event_format *event;
 	const char *sys;
 	const char *name;
 	char *str;
@@ -1478,7 +1465,7 @@ static int copy_filter_type(struct event_filter *filter,
 	/* Can't assume that the pevent's are the same */
 	sys = filter_type->event->system;
 	name = filter_type->event->name;
-	event = pevent_find_event_by_name(filter->pevent, sys, name);
+	event = tep_find_event_by_name(filter->pevent, sys, name);
 	if (!event)
 		return -1;
 
@@ -1492,7 +1479,7 @@ static int copy_filter_type(struct event_filter *filter,
 		if (arg == NULL)
 			return -1;
 
-		arg->type = FILTER_ARG_BOOLEAN;
+		arg->type = TEP_FILTER_ARG_BOOLEAN;
 		if (strcmp(str, "TRUE") == 0)
 			arg->boolean.value = 1;
 		else
@@ -1515,18 +1502,18 @@ static int copy_filter_type(struct event_filter *filter,
 }
 
 /**
- * pevent_filter_copy - copy a filter using another filter
+ * tep_filter_copy - copy a filter using another filter
  * @dest - the filter to copy to
  * @source - the filter to copy from
  *
  * Returns 0 on success and -1 if not all filters were copied
  */
-int pevent_filter_copy(struct event_filter *dest, struct event_filter *source)
+int tep_filter_copy(struct tep_event_filter *dest, struct tep_event_filter *source)
 {
 	int ret = 0;
 	int i;
 
-	pevent_filter_reset(dest);
+	tep_filter_reset(dest);
 
 	for (i = 0; i < source->filters; i++) {
 		if (copy_filter_type(dest, source, &source->event_filters[i]))
@@ -1537,7 +1524,7 @@ int pevent_filter_copy(struct event_filter *dest, struct event_filter *source)
 
 
 /**
- * pevent_update_trivial - update the trivial filters with the given filter
+ * tep_update_trivial - update the trivial filters with the given filter
  * @dest - the filter to update
  * @source - the filter as the source of the update
  * @type - the type of trivial filter to update.
@@ -1547,14 +1534,14 @@ int pevent_filter_copy(struct event_filter *dest, struct event_filter *source)
  * Returns 0 on success and -1 if there was a problem updating, but
  *   events may have still been updated on error.
  */
-int pevent_update_trivial(struct event_filter *dest, struct event_filter *source,
-			  enum filter_trivial_type type)
+int tep_update_trivial(struct tep_event_filter *dest, struct tep_event_filter *source,
+		       enum tep_filter_trivial_type type)
 {
-	struct pevent *src_pevent;
-	struct pevent *dest_pevent;
-	struct event_format *event;
-	struct filter_type *filter_type;
-	struct filter_arg *arg;
+	struct tep_handle *src_pevent;
+	struct tep_handle *dest_pevent;
+	struct tep_event_format *event;
+	struct tep_filter_type *filter_type;
+	struct tep_filter_arg *arg;
 	char *str;
 	int i;
 
@@ -1568,24 +1555,24 @@ int pevent_update_trivial(struct event_filter *dest, struct event_filter *source
 	for (i = 0; i < dest->filters; i++) {
 		filter_type = &dest->event_filters[i];
 		arg = filter_type->filter;
-		if (arg->type != FILTER_ARG_BOOLEAN)
+		if (arg->type != TEP_FILTER_ARG_BOOLEAN)
 			continue;
-		if ((arg->boolean.value && type == FILTER_TRIVIAL_FALSE) ||
-		    (!arg->boolean.value && type == FILTER_TRIVIAL_TRUE))
+		if ((arg->boolean.value && type == TEP_FILTER_TRIVIAL_FALSE) ||
+		    (!arg->boolean.value && type == TEP_FILTER_TRIVIAL_TRUE))
 			continue;
 
 		event = filter_type->event;
 
 		if (src_pevent != dest_pevent) {
 			/* do a look up */
-			event = pevent_find_event_by_name(src_pevent,
-							  event->system,
-							  event->name);
+			event = tep_find_event_by_name(src_pevent,
+						       event->system,
+						       event->name);
 			if (!event)
 				return -1;
 		}
 
-		str = pevent_filter_make_string(source, event->id);
+		str = tep_filter_make_string(source, event->id);
 		if (!str)
 			continue;
 
@@ -1598,7 +1585,7 @@ int pevent_update_trivial(struct event_filter *dest, struct event_filter *source
 }
 
 /**
- * pevent_filter_clear_trivial - clear TRUE and FALSE filters
+ * tep_filter_clear_trivial - clear TRUE and FALSE filters
  * @filter: the filter to remove trivial filters from
  * @type: remove only true, false, or both
  *
@@ -1606,10 +1593,10 @@ int pevent_update_trivial(struct event_filter *dest, struct event_filter *source
  *
  * Returns 0 on success and -1 if there was a problem.
  */
-int pevent_filter_clear_trivial(struct event_filter *filter,
-				 enum filter_trivial_type type)
+int tep_filter_clear_trivial(struct tep_event_filter *filter,
+			     enum tep_filter_trivial_type type)
 {
-	struct filter_type *filter_type;
+	struct tep_filter_type *filter_type;
 	int count = 0;
 	int *ids = NULL;
 	int i;
@@ -1625,14 +1612,14 @@ int pevent_filter_clear_trivial(struct event_filter *filter,
 		int *new_ids;
 
 		filter_type = &filter->event_filters[i];
-		if (filter_type->filter->type != FILTER_ARG_BOOLEAN)
+		if (filter_type->filter->type != TEP_FILTER_ARG_BOOLEAN)
 			continue;
 		switch (type) {
-		case FILTER_TRIVIAL_FALSE:
+		case TEP_FILTER_TRIVIAL_FALSE:
 			if (filter_type->filter->boolean.value)
 				continue;
 			break;
-		case FILTER_TRIVIAL_TRUE:
+		case TEP_FILTER_TRIVIAL_TRUE:
 			if (!filter_type->filter->boolean.value)
 				continue;
 		default:
@@ -1653,14 +1640,14 @@ int pevent_filter_clear_trivial(struct event_filter *filter,
 		return 0;
 
 	for (i = 0; i < count; i++)
-		pevent_filter_remove_event(filter, ids[i]);
+		tep_filter_remove_event(filter, ids[i]);
 
 	free(ids);
 	return 0;
 }
 
 /**
- * pevent_filter_event_has_trivial - return true event contains trivial filter
+ * tep_filter_event_has_trivial - return true event contains trivial filter
  * @filter: the filter with the information
  * @event_id: the id of the event to test
  * @type: trivial type to test for (TRUE, FALSE, EITHER)
@@ -1668,11 +1655,11 @@ int pevent_filter_clear_trivial(struct event_filter *filter,
  * Returns 1 if the event contains a matching trivial type
  *  otherwise 0.
  */
-int pevent_filter_event_has_trivial(struct event_filter *filter,
-				    int event_id,
-				    enum filter_trivial_type type)
+int tep_filter_event_has_trivial(struct tep_event_filter *filter,
+				 int event_id,
+				 enum tep_filter_trivial_type type)
 {
-	struct filter_type *filter_type;
+	struct tep_filter_type *filter_type;
 
 	if (!filter->filters)
 		return 0;
@@ -1682,37 +1669,37 @@ int pevent_filter_event_has_trivial(struct event_filter *filter,
 	if (!filter_type)
 		return 0;
 
-	if (filter_type->filter->type != FILTER_ARG_BOOLEAN)
+	if (filter_type->filter->type != TEP_FILTER_ARG_BOOLEAN)
 		return 0;
 
 	switch (type) {
-	case FILTER_TRIVIAL_FALSE:
+	case TEP_FILTER_TRIVIAL_FALSE:
 		return !filter_type->filter->boolean.value;
 
-	case FILTER_TRIVIAL_TRUE:
+	case TEP_FILTER_TRIVIAL_TRUE:
 		return filter_type->filter->boolean.value;
 	default:
 		return 1;
 	}
 }
 
-static int test_filter(struct event_format *event, struct filter_arg *arg,
-		       struct pevent_record *record, enum pevent_errno *err);
+static int test_filter(struct tep_event_format *event, struct tep_filter_arg *arg,
+		       struct tep_record *record, enum tep_errno *err);
 
 static const char *
-get_comm(struct event_format *event, struct pevent_record *record)
+get_comm(struct tep_event_format *event, struct tep_record *record)
 {
 	const char *comm;
 	int pid;
 
-	pid = pevent_data_pid(event->pevent, record);
-	comm = pevent_data_comm_from_pid(event->pevent, pid);
+	pid = tep_data_pid(event->pevent, record);
+	comm = tep_data_comm_from_pid(event->pevent, pid);
 	return comm;
 }
 
 static unsigned long long
-get_value(struct event_format *event,
-	  struct format_field *field, struct pevent_record *record)
+get_value(struct tep_event_format *event,
+	  struct tep_format_field *field, struct tep_record *record)
 {
 	unsigned long long val;
 
@@ -1728,9 +1715,9 @@ get_value(struct event_format *event,
 	if (field == &cpu)
 		return record->cpu;
 
-	pevent_read_number_field(field, record->data, &val);
+	tep_read_number_field(field, record->data, &val);
 
-	if (!(field->flags & FIELD_IS_SIGNED))
+	if (!(field->flags & TEP_FIELD_IS_SIGNED))
 		return val;
 
 	switch (field->size) {
@@ -1747,12 +1734,12 @@ get_value(struct event_format *event,
 }
 
 static unsigned long long
-get_arg_value(struct event_format *event, struct filter_arg *arg,
-	      struct pevent_record *record, enum pevent_errno *err);
+get_arg_value(struct tep_event_format *event, struct tep_filter_arg *arg,
+	      struct tep_record *record, enum tep_errno *err);
 
 static unsigned long long
-get_exp_value(struct event_format *event, struct filter_arg *arg,
-	      struct pevent_record *record, enum pevent_errno *err)
+get_exp_value(struct tep_event_format *event, struct tep_filter_arg *arg,
+	      struct tep_record *record, enum tep_errno *err)
 {
 	unsigned long long lval, rval;
 
@@ -1767,71 +1754,71 @@ get_exp_value(struct event_format *event, struct filter_arg *arg,
 	}
 
 	switch (arg->exp.type) {
-	case FILTER_EXP_ADD:
+	case TEP_FILTER_EXP_ADD:
 		return lval + rval;
 
-	case FILTER_EXP_SUB:
+	case TEP_FILTER_EXP_SUB:
 		return lval - rval;
 
-	case FILTER_EXP_MUL:
+	case TEP_FILTER_EXP_MUL:
 		return lval * rval;
 
-	case FILTER_EXP_DIV:
+	case TEP_FILTER_EXP_DIV:
 		return lval / rval;
 
-	case FILTER_EXP_MOD:
+	case TEP_FILTER_EXP_MOD:
 		return lval % rval;
 
-	case FILTER_EXP_RSHIFT:
+	case TEP_FILTER_EXP_RSHIFT:
 		return lval >> rval;
 
-	case FILTER_EXP_LSHIFT:
+	case TEP_FILTER_EXP_LSHIFT:
 		return lval << rval;
 
-	case FILTER_EXP_AND:
+	case TEP_FILTER_EXP_AND:
 		return lval & rval;
 
-	case FILTER_EXP_OR:
+	case TEP_FILTER_EXP_OR:
 		return lval | rval;
 
-	case FILTER_EXP_XOR:
+	case TEP_FILTER_EXP_XOR:
 		return lval ^ rval;
 
-	case FILTER_EXP_NOT:
+	case TEP_FILTER_EXP_NOT:
 	default:
 		if (!*err)
-			*err = PEVENT_ERRNO__INVALID_EXP_TYPE;
+			*err = TEP_ERRNO__INVALID_EXP_TYPE;
 	}
 	return 0;
 }
 
 static unsigned long long
-get_arg_value(struct event_format *event, struct filter_arg *arg,
-	      struct pevent_record *record, enum pevent_errno *err)
+get_arg_value(struct tep_event_format *event, struct tep_filter_arg *arg,
+	      struct tep_record *record, enum tep_errno *err)
 {
 	switch (arg->type) {
-	case FILTER_ARG_FIELD:
+	case TEP_FILTER_ARG_FIELD:
 		return get_value(event, arg->field.field, record);
 
-	case FILTER_ARG_VALUE:
-		if (arg->value.type != FILTER_NUMBER) {
+	case TEP_FILTER_ARG_VALUE:
+		if (arg->value.type != TEP_FILTER_NUMBER) {
 			if (!*err)
-				*err = PEVENT_ERRNO__NOT_A_NUMBER;
+				*err = TEP_ERRNO__NOT_A_NUMBER;
 		}
 		return arg->value.val;
 
-	case FILTER_ARG_EXP:
+	case TEP_FILTER_ARG_EXP:
 		return get_exp_value(event, arg, record, err);
 
 	default:
 		if (!*err)
-			*err = PEVENT_ERRNO__INVALID_ARG_TYPE;
+			*err = TEP_ERRNO__INVALID_ARG_TYPE;
 	}
 	return 0;
 }
 
-static int test_num(struct event_format *event, struct filter_arg *arg,
-		    struct pevent_record *record, enum pevent_errno *err)
+static int test_num(struct tep_event_format *event, struct tep_filter_arg *arg,
+		    struct tep_record *record, enum tep_errno *err)
 {
 	unsigned long long lval, rval;
 
@@ -1846,46 +1833,46 @@ static int test_num(struct event_format *event, struct filter_arg *arg,
 	}
 
 	switch (arg->num.type) {
-	case FILTER_CMP_EQ:
+	case TEP_FILTER_CMP_EQ:
 		return lval == rval;
 
-	case FILTER_CMP_NE:
+	case TEP_FILTER_CMP_NE:
 		return lval != rval;
 
-	case FILTER_CMP_GT:
+	case TEP_FILTER_CMP_GT:
 		return lval > rval;
 
-	case FILTER_CMP_LT:
+	case TEP_FILTER_CMP_LT:
 		return lval < rval;
 
-	case FILTER_CMP_GE:
+	case TEP_FILTER_CMP_GE:
 		return lval >= rval;
 
-	case FILTER_CMP_LE:
+	case TEP_FILTER_CMP_LE:
 		return lval <= rval;
 
 	default:
 		if (!*err)
-			*err = PEVENT_ERRNO__ILLEGAL_INTEGER_CMP;
+			*err = TEP_ERRNO__ILLEGAL_INTEGER_CMP;
 		return 0;
 	}
 }
 
-static const char *get_field_str(struct filter_arg *arg, struct pevent_record *record)
+static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record *record)
 {
-	struct event_format *event;
-	struct pevent *pevent;
+	struct tep_event_format *event;
+	struct tep_handle *pevent;
 	unsigned long long addr;
 	const char *val = NULL;
 	unsigned int size;
 	char hex[64];
 
 	/* If the field is not a string convert it */
-	if (arg->str.field->flags & FIELD_IS_STRING) {
+	if (arg->str.field->flags & TEP_FIELD_IS_STRING) {
 		val = record->data + arg->str.field->offset;
 		size = arg->str.field->size;
 
-		if (arg->str.field->flags & FIELD_IS_DYNAMIC) {
+		if (arg->str.field->flags & TEP_FIELD_IS_DYNAMIC) {
 			addr = *(unsigned int *)val;
 			val = record->data + (addr & 0xffff);
 			size = addr >> 16;
@@ -1907,9 +1894,9 @@ static const char *get_field_str(struct filter_arg *arg, struct pevent_record *r
 		pevent = event->pevent;
 		addr = get_value(event, arg->str.field, record);
 
-		if (arg->str.field->flags & (FIELD_IS_POINTER | FIELD_IS_LONG))
+		if (arg->str.field->flags & (TEP_FIELD_IS_POINTER | TEP_FIELD_IS_LONG))
 			/* convert to a kernel symbol */
-			val = pevent_find_function(pevent, addr);
+			val = tep_find_function(pevent, addr);
 
 		if (val == NULL) {
 			/* just use the hex of the string name */
@@ -1921,8 +1908,8 @@ static const char *get_field_str(struct filter_arg *arg, struct pevent_record *r
 	return val;
 }
 
-static int test_str(struct event_format *event, struct filter_arg *arg,
-		    struct pevent_record *record, enum pevent_errno *err)
+static int test_str(struct tep_event_format *event, struct tep_filter_arg *arg,
+		    struct tep_record *record, enum tep_errno *err)
 {
 	const char *val;
 
@@ -1932,50 +1919,50 @@ static int test_str(struct event_format *event, struct filter_arg *arg,
 		val = get_field_str(arg, record);
 
 	switch (arg->str.type) {
-	case FILTER_CMP_MATCH:
+	case TEP_FILTER_CMP_MATCH:
 		return strcmp(val, arg->str.val) == 0;
 
-	case FILTER_CMP_NOT_MATCH:
+	case TEP_FILTER_CMP_NOT_MATCH:
 		return strcmp(val, arg->str.val) != 0;
 
-	case FILTER_CMP_REGEX:
+	case TEP_FILTER_CMP_REGEX:
 		/* Returns zero on match */
 		return !regexec(&arg->str.reg, val, 0, NULL, 0);
 
-	case FILTER_CMP_NOT_REGEX:
+	case TEP_FILTER_CMP_NOT_REGEX:
 		return regexec(&arg->str.reg, val, 0, NULL, 0);
 
 	default:
 		if (!*err)
-			*err = PEVENT_ERRNO__ILLEGAL_STRING_CMP;
+			*err = TEP_ERRNO__ILLEGAL_STRING_CMP;
 		return 0;
 	}
 }
 
-static int test_op(struct event_format *event, struct filter_arg *arg,
-		   struct pevent_record *record, enum pevent_errno *err)
+static int test_op(struct tep_event_format *event, struct tep_filter_arg *arg,
+		   struct tep_record *record, enum tep_errno *err)
 {
 	switch (arg->op.type) {
-	case FILTER_OP_AND:
+	case TEP_FILTER_OP_AND:
 		return test_filter(event, arg->op.left, record, err) &&
 			test_filter(event, arg->op.right, record, err);
 
-	case FILTER_OP_OR:
+	case TEP_FILTER_OP_OR:
 		return test_filter(event, arg->op.left, record, err) ||
 			test_filter(event, arg->op.right, record, err);
 
-	case FILTER_OP_NOT:
+	case TEP_FILTER_OP_NOT:
 		return !test_filter(event, arg->op.right, record, err);
 
 	default:
 		if (!*err)
-			*err = PEVENT_ERRNO__INVALID_OP_TYPE;
+			*err = TEP_ERRNO__INVALID_OP_TYPE;
 		return 0;
 	}
 }
 
-static int test_filter(struct event_format *event, struct filter_arg *arg,
-		       struct pevent_record *record, enum pevent_errno *err)
+static int test_filter(struct tep_event_format *event, struct tep_filter_arg *arg,
+		       struct tep_record *record, enum tep_errno *err)
 {
 	if (*err) {
 		/*
@@ -1985,22 +1972,22 @@ static int test_filter(struct event_format *event, struct filter_arg *arg,
 	}
 
 	switch (arg->type) {
-	case FILTER_ARG_BOOLEAN:
+	case TEP_FILTER_ARG_BOOLEAN:
 		/* easy case */
 		return arg->boolean.value;
 
-	case FILTER_ARG_OP:
+	case TEP_FILTER_ARG_OP:
 		return test_op(event, arg, record, err);
 
-	case FILTER_ARG_NUM:
+	case TEP_FILTER_ARG_NUM:
 		return test_num(event, arg, record, err);
 
-	case FILTER_ARG_STR:
+	case TEP_FILTER_ARG_STR:
 		return test_str(event, arg, record, err);
 
-	case FILTER_ARG_EXP:
-	case FILTER_ARG_VALUE:
-	case FILTER_ARG_FIELD:
+	case TEP_FILTER_ARG_EXP:
+	case TEP_FILTER_ARG_VALUE:
+	case TEP_FILTER_ARG_FIELD:
 		/*
 		 * Expressions, fields and values evaluate
 		 * to true if they return non zero
@@ -2009,22 +1996,22 @@ static int test_filter(struct event_format *event, struct filter_arg *arg,
 
 	default:
 		if (!*err)
-			*err = PEVENT_ERRNO__INVALID_ARG_TYPE;
+			*err = TEP_ERRNO__INVALID_ARG_TYPE;
 		return 0;
 	}
 }
 
 /**
- * pevent_event_filtered - return true if event has filter
+ * tep_event_filtered - return true if event has filter
  * @filter: filter struct with filter information
  * @event_id: event id to test if filter exists
  *
  * Returns 1 if filter found for @event_id
  *   otherwise 0;
  */
-int pevent_event_filtered(struct event_filter *filter, int event_id)
+int tep_event_filtered(struct tep_event_filter *filter, int event_id)
 {
-	struct filter_type *filter_type;
+	struct tep_filter_type *filter_type;
 
 	if (!filter->filters)
 		return 0;
@@ -2035,45 +2022,45 @@ int pevent_event_filtered(struct event_filter *filter, int event_id)
 }
 
 /**
- * pevent_filter_match - test if a record matches a filter
+ * tep_filter_match - test if a record matches a filter
  * @filter: filter struct with filter information
  * @record: the record to test against the filter
  *
- * Returns: match result or error code (prefixed with PEVENT_ERRNO__)
+ * Returns: match result or error code (prefixed with TEP_ERRNO__)
  * FILTER_MATCH - filter found for event and @record matches
  * FILTER_MISS  - filter found for event and @record does not match
  * FILTER_NOT_FOUND - no filter found for @record's event
  * NO_FILTER - if no filters exist
  * otherwise - error occurred during test
  */
-enum pevent_errno pevent_filter_match(struct event_filter *filter,
-				      struct pevent_record *record)
+enum tep_errno tep_filter_match(struct tep_event_filter *filter,
+				struct tep_record *record)
 {
-	struct pevent *pevent = filter->pevent;
-	struct filter_type *filter_type;
+	struct tep_handle *pevent = filter->pevent;
+	struct tep_filter_type *filter_type;
 	int event_id;
 	int ret;
-	enum pevent_errno err = 0;
+	enum tep_errno err = 0;
 
 	filter_init_error_buf(filter);
 
 	if (!filter->filters)
-		return PEVENT_ERRNO__NO_FILTER;
+		return TEP_ERRNO__NO_FILTER;
 
-	event_id = pevent_data_type(pevent, record);
+	event_id = tep_data_type(pevent, record);
 
 	filter_type = find_filter_type(filter, event_id);
 	if (!filter_type)
-		return PEVENT_ERRNO__FILTER_NOT_FOUND;
+		return TEP_ERRNO__FILTER_NOT_FOUND;
 
 	ret = test_filter(filter_type->event, filter_type->filter, record, &err);
 	if (err)
 		return err;
 
-	return ret ? PEVENT_ERRNO__FILTER_MATCH : PEVENT_ERRNO__FILTER_MISS;
+	return ret ? TEP_ERRNO__FILTER_MATCH : TEP_ERRNO__FILTER_MISS;
 }
 
-static char *op_to_str(struct event_filter *filter, struct filter_arg *arg)
+static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
 {
 	char *str = NULL;
 	char *left = NULL;
@@ -2084,10 +2071,10 @@ static char *op_to_str(struct event_filter *filter, struct filter_arg *arg)
 	int val;
 
 	switch (arg->op.type) {
-	case FILTER_OP_AND:
+	case TEP_FILTER_OP_AND:
 		op = "&&";
 		/* fall through */
-	case FILTER_OP_OR:
+	case TEP_FILTER_OP_OR:
 		if (!op)
 			op = "||";
 
@@ -2108,8 +2095,8 @@ static char *op_to_str(struct event_filter *filter, struct filter_arg *arg)
 			right_val = 0;
 
 		if (left_val >= 0) {
-			if ((arg->op.type == FILTER_OP_AND && !left_val) ||
-			    (arg->op.type == FILTER_OP_OR && left_val)) {
+			if ((arg->op.type == TEP_FILTER_OP_AND && !left_val) ||
+			    (arg->op.type == TEP_FILTER_OP_OR && left_val)) {
 				/* Just return left value */
 				str = left;
 				left = NULL;
@@ -2119,10 +2106,10 @@ static char *op_to_str(struct event_filter *filter, struct filter_arg *arg)
 				/* just evaluate this. */
 				val = 0;
 				switch (arg->op.type) {
-				case FILTER_OP_AND:
+				case TEP_FILTER_OP_AND:
 					val = left_val && right_val;
 					break;
-				case FILTER_OP_OR:
+				case TEP_FILTER_OP_OR:
 					val = left_val || right_val;
 					break;
 				default:
@@ -2133,8 +2120,8 @@ static char *op_to_str(struct event_filter *filter, struct filter_arg *arg)
 			}
 		}
 		if (right_val >= 0) {
-			if ((arg->op.type == FILTER_OP_AND && !right_val) ||
-			    (arg->op.type == FILTER_OP_OR && right_val)) {
+			if ((arg->op.type == TEP_FILTER_OP_AND && !right_val) ||
+			    (arg->op.type == TEP_FILTER_OP_OR && right_val)) {
 				/* Just return right value */
 				str = right;
 				right = NULL;
@@ -2149,7 +2136,7 @@ static char *op_to_str(struct event_filter *filter, struct filter_arg *arg)
 		asprintf(&str, "(%s) %s (%s)", left, op, right);
 		break;
 
-	case FILTER_OP_NOT:
+	case TEP_FILTER_OP_NOT:
 		op = "!";
 		right = arg_to_str(filter, arg->op.right);
 		if (!right)
@@ -2177,7 +2164,7 @@ static char *op_to_str(struct event_filter *filter, struct filter_arg *arg)
 	return str;
 }
 
-static char *val_to_str(struct event_filter *filter, struct filter_arg *arg)
+static char *val_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
 {
 	char *str = NULL;
 
@@ -2186,12 +2173,12 @@ static char *val_to_str(struct event_filter *filter, struct filter_arg *arg)
 	return str;
 }
 
-static char *field_to_str(struct event_filter *filter, struct filter_arg *arg)
+static char *field_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
 {
 	return strdup(arg->field.field->name);
 }
 
-static char *exp_to_str(struct event_filter *filter, struct filter_arg *arg)
+static char *exp_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
 {
 	char *lstr;
 	char *rstr;
@@ -2204,34 +2191,34 @@ static char *exp_to_str(struct event_filter *filter, struct filter_arg *arg)
 		goto out;
 
 	switch (arg->exp.type) {
-	case FILTER_EXP_ADD:
+	case TEP_FILTER_EXP_ADD:
 		op = "+";
 		break;
-	case FILTER_EXP_SUB:
+	case TEP_FILTER_EXP_SUB:
 		op = "-";
 		break;
-	case FILTER_EXP_MUL:
+	case TEP_FILTER_EXP_MUL:
 		op = "*";
 		break;
-	case FILTER_EXP_DIV:
+	case TEP_FILTER_EXP_DIV:
 		op = "/";
 		break;
-	case FILTER_EXP_MOD:
+	case TEP_FILTER_EXP_MOD:
 		op = "%";
 		break;
-	case FILTER_EXP_RSHIFT:
+	case TEP_FILTER_EXP_RSHIFT:
 		op = ">>";
 		break;
-	case FILTER_EXP_LSHIFT:
+	case TEP_FILTER_EXP_LSHIFT:
 		op = "<<";
 		break;
-	case FILTER_EXP_AND:
+	case TEP_FILTER_EXP_AND:
 		op = "&";
 		break;
-	case FILTER_EXP_OR:
+	case TEP_FILTER_EXP_OR:
 		op = "|";
 		break;
-	case FILTER_EXP_XOR:
+	case TEP_FILTER_EXP_XOR:
 		op = "^";
 		break;
 	default:
@@ -2247,7 +2234,7 @@ out:
 	return str;
 }
 
-static char *num_to_str(struct event_filter *filter, struct filter_arg *arg)
+static char *num_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
 {
 	char *lstr;
 	char *rstr;
@@ -2260,26 +2247,26 @@ static char *num_to_str(struct event_filter *filter, struct filter_arg *arg)
 		goto out;
 
 	switch (arg->num.type) {
-	case FILTER_CMP_EQ:
+	case TEP_FILTER_CMP_EQ:
 		op = "==";
 		/* fall through */
-	case FILTER_CMP_NE:
+	case TEP_FILTER_CMP_NE:
 		if (!op)
 			op = "!=";
 		/* fall through */
-	case FILTER_CMP_GT:
+	case TEP_FILTER_CMP_GT:
 		if (!op)
 			op = ">";
 		/* fall through */
-	case FILTER_CMP_LT:
+	case TEP_FILTER_CMP_LT:
 		if (!op)
 			op = "<";
 		/* fall through */
-	case FILTER_CMP_GE:
+	case TEP_FILTER_CMP_GE:
 		if (!op)
 			op = ">=";
 		/* fall through */
-	case FILTER_CMP_LE:
+	case TEP_FILTER_CMP_LE:
 		if (!op)
 			op = "<=";
 
@@ -2297,24 +2284,24 @@ out:
 	return str;
 }
 
-static char *str_to_str(struct event_filter *filter, struct filter_arg *arg)
+static char *str_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
 {
 	char *str = NULL;
 	char *op = NULL;
 
 	switch (arg->str.type) {
-	case FILTER_CMP_MATCH:
+	case TEP_FILTER_CMP_MATCH:
 		op = "==";
 		/* fall through */
-	case FILTER_CMP_NOT_MATCH:
+	case TEP_FILTER_CMP_NOT_MATCH:
 		if (!op)
 			op = "!=";
 		/* fall through */
-	case FILTER_CMP_REGEX:
+	case TEP_FILTER_CMP_REGEX:
 		if (!op)
 			op = "=~";
 		/* fall through */
-	case FILTER_CMP_NOT_REGEX:
+	case TEP_FILTER_CMP_NOT_REGEX:
 		if (!op)
 			op = "!~";
 
@@ -2329,31 +2316,31 @@ static char *str_to_str(struct event_filter *filter, struct filter_arg *arg)
 	return str;
 }
 
-static char *arg_to_str(struct event_filter *filter, struct filter_arg *arg)
+static char *arg_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
 {
 	char *str = NULL;
 
 	switch (arg->type) {
-	case FILTER_ARG_BOOLEAN:
+	case TEP_FILTER_ARG_BOOLEAN:
 		asprintf(&str, arg->boolean.value ? "TRUE" : "FALSE");
 		return str;
 
-	case FILTER_ARG_OP:
+	case TEP_FILTER_ARG_OP:
 		return op_to_str(filter, arg);
 
-	case FILTER_ARG_NUM:
+	case TEP_FILTER_ARG_NUM:
 		return num_to_str(filter, arg);
 
-	case FILTER_ARG_STR:
+	case TEP_FILTER_ARG_STR:
 		return str_to_str(filter, arg);
 
-	case FILTER_ARG_VALUE:
+	case TEP_FILTER_ARG_VALUE:
 		return val_to_str(filter, arg);
 
-	case FILTER_ARG_FIELD:
+	case TEP_FILTER_ARG_FIELD:
 		return field_to_str(filter, arg);
 
-	case FILTER_ARG_EXP:
+	case TEP_FILTER_ARG_EXP:
 		return exp_to_str(filter, arg);
 
 	default:
@@ -2364,7 +2351,7 @@ static char *arg_to_str(struct event_filter *filter, struct filter_arg *arg)
 }
 
 /**
- * pevent_filter_make_string - return a string showing the filter
+ * tep_filter_make_string - return a string showing the filter
  * @filter: filter struct with filter information
  * @event_id: the event id to return the filter string with
  *
@@ -2373,9 +2360,9 @@ static char *arg_to_str(struct event_filter *filter, struct filter_arg *arg)
  *  NULL is returned if no filter is found or allocation failed.
  */
 char *
-pevent_filter_make_string(struct event_filter *filter, int event_id)
+tep_filter_make_string(struct tep_event_filter *filter, int event_id)
 {
-	struct filter_type *filter_type;
+	struct tep_filter_type *filter_type;
 
 	if (!filter->filters)
 		return NULL;
@@ -2389,7 +2376,7 @@ pevent_filter_make_string(struct event_filter *filter, int event_id)
 }
 
 /**
- * pevent_filter_compare - compare two filters and return if they are the same
+ * tep_filter_compare - compare two filters and return if they are the same
  * @filter1: Filter to compare with @filter2
  * @filter2: Filter to compare with @filter1
  *
@@ -2397,10 +2384,10 @@ pevent_filter_make_string(struct event_filter *filter, int event_id)
  *  1 if the two filters hold the same content.
  *  0 if they do not.
  */
-int pevent_filter_compare(struct event_filter *filter1, struct event_filter *filter2)
+int tep_filter_compare(struct tep_event_filter *filter1, struct tep_event_filter *filter2)
 {
-	struct filter_type *filter_type1;
-	struct filter_type *filter_type2;
+	struct tep_filter_type *filter_type1;
+	struct tep_filter_type *filter_type2;
 	char *str1, *str2;
 	int result;
 	int i;
@@ -2423,8 +2410,8 @@ int pevent_filter_compare(struct event_filter *filter1, struct event_filter *fil
 		if (filter_type1->filter->type != filter_type2->filter->type)
 			break;
 		switch (filter_type1->filter->type) {
-		case FILTER_TRIVIAL_FALSE:
-		case FILTER_TRIVIAL_TRUE:
+		case TEP_FILTER_TRIVIAL_FALSE:
+		case TEP_FILTER_TRIVIAL_TRUE:
 			/* trivial types just need the type compared */
 			continue;
 		default:
diff --git a/tools/lib/traceevent/parse-utils.c b/tools/lib/traceevent/parse-utils.c
index eda07fa31dca..77e4ec6402dd 100644
--- a/tools/lib/traceevent/parse-utils.c
+++ b/tools/lib/traceevent/parse-utils.c
@@ -1,21 +1,7 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
  *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/tools/lib/traceevent/plugin_cfg80211.c b/tools/lib/traceevent/plugin_cfg80211.c
index 8f8586912da7..a51b366f47da 100644
--- a/tools/lib/traceevent/plugin_cfg80211.c
+++ b/tools/lib/traceevent/plugin_cfg80211.c
@@ -25,19 +25,19 @@ process___le16_to_cpup(struct trace_seq *s, unsigned long long *args)
 	return val ? (long long) le16toh(*val) : 0;
 }
 
-int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
 {
-	pevent_register_print_function(pevent,
-				       process___le16_to_cpup,
-				       PEVENT_FUNC_ARG_INT,
-				       "__le16_to_cpup",
-				       PEVENT_FUNC_ARG_PTR,
-				       PEVENT_FUNC_ARG_VOID);
+	tep_register_print_function(pevent,
+				    process___le16_to_cpup,
+				    TEP_FUNC_ARG_INT,
+				    "__le16_to_cpup",
+				    TEP_FUNC_ARG_PTR,
+				    TEP_FUNC_ARG_VOID);
 	return 0;
 }
 
-void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
 {
-	pevent_unregister_print_function(pevent, process___le16_to_cpup,
-					 "__le16_to_cpup");
+	tep_unregister_print_function(pevent, process___le16_to_cpup,
+				      "__le16_to_cpup");
 }
diff --git a/tools/lib/traceevent/plugin_function.c b/tools/lib/traceevent/plugin_function.c
index 42dbf73758f3..528acc75d81a 100644
--- a/tools/lib/traceevent/plugin_function.c
+++ b/tools/lib/traceevent/plugin_function.c
@@ -23,6 +23,7 @@
 
 #include "event-parse.h"
 #include "event-utils.h"
+#include "trace-seq.h"
 
 static struct func_stack {
 	int size;
@@ -33,7 +34,7 @@ static int cpus = -1;
 
 #define STK_BLK 10
 
-struct pevent_plugin_option plugin_options[] =
+struct tep_plugin_option plugin_options[] =
 {
 	{
 		.name = "parent",
@@ -53,8 +54,8 @@ struct pevent_plugin_option plugin_options[] =
 	}
 };
 
-static struct pevent_plugin_option *ftrace_parent = &plugin_options[0];
-static struct pevent_plugin_option *ftrace_indent = &plugin_options[1];
+static struct tep_plugin_option *ftrace_parent = &plugin_options[0];
+static struct tep_plugin_option *ftrace_indent = &plugin_options[1];
 
 static void add_child(struct func_stack *stack, const char *child, int pos)
 {
@@ -122,25 +123,25 @@ static int add_and_get_index(const char *parent, const char *child, int cpu)
 	return 0;
 }
 
-static int function_handler(struct trace_seq *s, struct pevent_record *record,
-			    struct event_format *event, void *context)
+static int function_handler(struct trace_seq *s, struct tep_record *record,
+			    struct tep_event_format *event, void *context)
 {
-	struct pevent *pevent = event->pevent;
+	struct tep_handle *pevent = event->pevent;
 	unsigned long long function;
 	unsigned long long pfunction;
 	const char *func;
 	const char *parent;
 	int index = 0;
 
-	if (pevent_get_field_val(s, event, "ip", record, &function, 1))
+	if (tep_get_field_val(s, event, "ip", record, &function, 1))
 		return trace_seq_putc(s, '!');
 
-	func = pevent_find_function(pevent, function);
+	func = tep_find_function(pevent, function);
 
-	if (pevent_get_field_val(s, event, "parent_ip", record, &pfunction, 1))
+	if (tep_get_field_val(s, event, "parent_ip", record, &pfunction, 1))
 		return trace_seq_putc(s, '!');
 
-	parent = pevent_find_function(pevent, pfunction);
+	parent = tep_find_function(pevent, pfunction);
 
 	if (parent && ftrace_indent->set)
 		index = add_and_get_index(parent, func, record->cpu);
@@ -163,22 +164,22 @@ static int function_handler(struct trace_seq *s, struct pevent_record *record,
 	return 0;
 }
 
-int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
 {
-	pevent_register_event_handler(pevent, -1, "ftrace", "function",
-				      function_handler, NULL);
+	tep_register_event_handler(pevent, -1, "ftrace", "function",
+				   function_handler, NULL);
 
-	traceevent_plugin_add_options("ftrace", plugin_options);
+	tep_plugin_add_options("ftrace", plugin_options);
 
 	return 0;
 }
 
-void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
 {
 	int i, x;
 
-	pevent_unregister_event_handler(pevent, -1, "ftrace", "function",
-					function_handler, NULL);
+	tep_unregister_event_handler(pevent, -1, "ftrace", "function",
+				     function_handler, NULL);
 
 	for (i = 0; i <= cpus; i++) {
 		for (x = 0; x < fstack[i].size && fstack[i].stack[x]; x++)
@@ -186,7 +187,7 @@ void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
 		free(fstack[i].stack);
 	}
 
-	traceevent_plugin_remove_options(plugin_options);
+	tep_plugin_remove_options(plugin_options);
 
 	free(fstack);
 	fstack = NULL;
diff --git a/tools/lib/traceevent/plugin_hrtimer.c b/tools/lib/traceevent/plugin_hrtimer.c
index 12bf14cc1152..9aa05b4ca811 100644
--- a/tools/lib/traceevent/plugin_hrtimer.c
+++ b/tools/lib/traceevent/plugin_hrtimer.c
@@ -23,66 +23,67 @@
 #include <string.h>
 
 #include "event-parse.h"
+#include "trace-seq.h"
 
 static int timer_expire_handler(struct trace_seq *s,
-				struct pevent_record *record,
-				struct event_format *event, void *context)
+				struct tep_record *record,
+				struct tep_event_format *event, void *context)
 {
 	trace_seq_printf(s, "hrtimer=");
 
-	if (pevent_print_num_field(s, "0x%llx", event, "timer",
-				   record, 0) == -1)
-		pevent_print_num_field(s, "0x%llx", event, "hrtimer",
-				       record, 1);
+	if (tep_print_num_field(s, "0x%llx", event, "timer",
+				record, 0) == -1)
+		tep_print_num_field(s, "0x%llx", event, "hrtimer",
+				    record, 1);
 
 	trace_seq_printf(s, " now=");
 
-	pevent_print_num_field(s, "%llu", event, "now", record, 1);
+	tep_print_num_field(s, "%llu", event, "now", record, 1);
 
-	pevent_print_func_field(s, " function=%s", event, "function",
+	tep_print_func_field(s, " function=%s", event, "function",
 				record, 0);
 	return 0;
 }
 
 static int timer_start_handler(struct trace_seq *s,
-			       struct pevent_record *record,
-			       struct event_format *event, void *context)
+			       struct tep_record *record,
+			       struct tep_event_format *event, void *context)
 {
 	trace_seq_printf(s, "hrtimer=");
 
-	if (pevent_print_num_field(s, "0x%llx", event, "timer",
-				   record, 0) == -1)
-		pevent_print_num_field(s, "0x%llx", event, "hrtimer",
-				       record, 1);
+	if (tep_print_num_field(s, "0x%llx", event, "timer",
+				record, 0) == -1)
+		tep_print_num_field(s, "0x%llx", event, "hrtimer",
+				    record, 1);
 
-	pevent_print_func_field(s, " function=%s", event, "function",
-				record, 0);
+	tep_print_func_field(s, " function=%s", event, "function",
+			     record, 0);
 
 	trace_seq_printf(s, " expires=");
-	pevent_print_num_field(s, "%llu", event, "expires", record, 1);
+	tep_print_num_field(s, "%llu", event, "expires", record, 1);
 
 	trace_seq_printf(s, " softexpires=");
-	pevent_print_num_field(s, "%llu", event, "softexpires", record, 1);
+	tep_print_num_field(s, "%llu", event, "softexpires", record, 1);
 	return 0;
 }
 
-int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
 {
-	pevent_register_event_handler(pevent, -1,
-				      "timer", "hrtimer_expire_entry",
-				      timer_expire_handler, NULL);
+	tep_register_event_handler(pevent, -1,
+				   "timer", "hrtimer_expire_entry",
+				   timer_expire_handler, NULL);
 
-	pevent_register_event_handler(pevent, -1, "timer", "hrtimer_start",
-				      timer_start_handler, NULL);
+	tep_register_event_handler(pevent, -1, "timer", "hrtimer_start",
+				   timer_start_handler, NULL);
 	return 0;
 }
 
-void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
 {
-	pevent_unregister_event_handler(pevent, -1,
-					"timer", "hrtimer_expire_entry",
-					timer_expire_handler, NULL);
+	tep_unregister_event_handler(pevent, -1,
+				     "timer", "hrtimer_expire_entry",
+				     timer_expire_handler, NULL);
 
-	pevent_unregister_event_handler(pevent, -1, "timer", "hrtimer_start",
-					timer_start_handler, NULL);
+	tep_unregister_event_handler(pevent, -1, "timer", "hrtimer_start",
+				     timer_start_handler, NULL);
 }
diff --git a/tools/lib/traceevent/plugin_jbd2.c b/tools/lib/traceevent/plugin_jbd2.c
index 5c23d5bd27ce..a5e34135dd6a 100644
--- a/tools/lib/traceevent/plugin_jbd2.c
+++ b/tools/lib/traceevent/plugin_jbd2.c
@@ -22,6 +22,7 @@
 #include <string.h>
 
 #include "event-parse.h"
+#include "trace-seq.h"
 
 #define MINORBITS	20
 #define MINORMASK	((1U << MINORBITS) - 1)
@@ -47,29 +48,29 @@ process_jiffies_to_msecs(struct trace_seq *s, unsigned long long *args)
 	return jiffies;
 }
 
-int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
 {
-	pevent_register_print_function(pevent,
-				       process_jbd2_dev_to_name,
-				       PEVENT_FUNC_ARG_STRING,
-				       "jbd2_dev_to_name",
-				       PEVENT_FUNC_ARG_INT,
-				       PEVENT_FUNC_ARG_VOID);
+	tep_register_print_function(pevent,
+				    process_jbd2_dev_to_name,
+				    TEP_FUNC_ARG_STRING,
+				    "jbd2_dev_to_name",
+				    TEP_FUNC_ARG_INT,
+				    TEP_FUNC_ARG_VOID);
 
-	pevent_register_print_function(pevent,
-				       process_jiffies_to_msecs,
-				       PEVENT_FUNC_ARG_LONG,
-				       "jiffies_to_msecs",
-				       PEVENT_FUNC_ARG_LONG,
-				       PEVENT_FUNC_ARG_VOID);
+	tep_register_print_function(pevent,
+				    process_jiffies_to_msecs,
+				    TEP_FUNC_ARG_LONG,
+				    "jiffies_to_msecs",
+				    TEP_FUNC_ARG_LONG,
+				    TEP_FUNC_ARG_VOID);
 	return 0;
 }
 
-void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
 {
-	pevent_unregister_print_function(pevent, process_jbd2_dev_to_name,
-					 "jbd2_dev_to_name");
+	tep_unregister_print_function(pevent, process_jbd2_dev_to_name,
+				      "jbd2_dev_to_name");
 
-	pevent_unregister_print_function(pevent, process_jiffies_to_msecs,
-					 "jiffies_to_msecs");
+	tep_unregister_print_function(pevent, process_jiffies_to_msecs,
+				      "jiffies_to_msecs");
 }
diff --git a/tools/lib/traceevent/plugin_kmem.c b/tools/lib/traceevent/plugin_kmem.c
index 70650ff48d78..1beb4eaddfdf 100644
--- a/tools/lib/traceevent/plugin_kmem.c
+++ b/tools/lib/traceevent/plugin_kmem.c
@@ -22,73 +22,74 @@
 #include <string.h>
 
 #include "event-parse.h"
+#include "trace-seq.h"
 
-static int call_site_handler(struct trace_seq *s, struct pevent_record *record,
-			     struct event_format *event, void *context)
+static int call_site_handler(struct trace_seq *s, struct tep_record *record,
+			     struct tep_event_format *event, void *context)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 	unsigned long long val, addr;
 	void *data = record->data;
 	const char *func;
 
-	field = pevent_find_field(event, "call_site");
+	field = tep_find_field(event, "call_site");
 	if (!field)
 		return 1;
 
-	if (pevent_read_number_field(field, data, &val))
+	if (tep_read_number_field(field, data, &val))
 		return 1;
 
-	func = pevent_find_function(event->pevent, val);
+	func = tep_find_function(event->pevent, val);
 	if (!func)
 		return 1;
 
-	addr = pevent_find_function_address(event->pevent, val);
+	addr = tep_find_function_address(event->pevent, val);
 
 	trace_seq_printf(s, "(%s+0x%x) ", func, (int)(val - addr));
 	return 1;
 }
 
-int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
 {
-	pevent_register_event_handler(pevent, -1, "kmem", "kfree",
-				      call_site_handler, NULL);
+	tep_register_event_handler(pevent, -1, "kmem", "kfree",
+				   call_site_handler, NULL);
 
-	pevent_register_event_handler(pevent, -1, "kmem", "kmalloc",
-				      call_site_handler, NULL);
+	tep_register_event_handler(pevent, -1, "kmem", "kmalloc",
+				   call_site_handler, NULL);
 
-	pevent_register_event_handler(pevent, -1, "kmem", "kmalloc_node",
-				      call_site_handler, NULL);
+	tep_register_event_handler(pevent, -1, "kmem", "kmalloc_node",
+				   call_site_handler, NULL);
 
-	pevent_register_event_handler(pevent, -1, "kmem", "kmem_cache_alloc",
-				      call_site_handler, NULL);
+	tep_register_event_handler(pevent, -1, "kmem", "kmem_cache_alloc",
+				   call_site_handler, NULL);
 
-	pevent_register_event_handler(pevent, -1, "kmem",
-				      "kmem_cache_alloc_node",
-				      call_site_handler, NULL);
+	tep_register_event_handler(pevent, -1, "kmem",
+				   "kmem_cache_alloc_node",
+				   call_site_handler, NULL);
 
-	pevent_register_event_handler(pevent, -1, "kmem", "kmem_cache_free",
-				      call_site_handler, NULL);
+	tep_register_event_handler(pevent, -1, "kmem", "kmem_cache_free",
+				   call_site_handler, NULL);
 	return 0;
 }
 
-void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
 {
-	pevent_unregister_event_handler(pevent, -1, "kmem", "kfree",
-					call_site_handler, NULL);
+	tep_unregister_event_handler(pevent, -1, "kmem", "kfree",
+				     call_site_handler, NULL);
 
-	pevent_unregister_event_handler(pevent, -1, "kmem", "kmalloc",
-					call_site_handler, NULL);
+	tep_unregister_event_handler(pevent, -1, "kmem", "kmalloc",
+				     call_site_handler, NULL);
 
-	pevent_unregister_event_handler(pevent, -1, "kmem", "kmalloc_node",
-					call_site_handler, NULL);
+	tep_unregister_event_handler(pevent, -1, "kmem", "kmalloc_node",
+				     call_site_handler, NULL);
 
-	pevent_unregister_event_handler(pevent, -1, "kmem", "kmem_cache_alloc",
-					call_site_handler, NULL);
+	tep_unregister_event_handler(pevent, -1, "kmem", "kmem_cache_alloc",
+				     call_site_handler, NULL);
 
-	pevent_unregister_event_handler(pevent, -1, "kmem",
-					"kmem_cache_alloc_node",
-					call_site_handler, NULL);
+	tep_unregister_event_handler(pevent, -1, "kmem",
+				     "kmem_cache_alloc_node",
+				     call_site_handler, NULL);
 
-	pevent_unregister_event_handler(pevent, -1, "kmem", "kmem_cache_free",
-					call_site_handler, NULL);
+	tep_unregister_event_handler(pevent, -1, "kmem", "kmem_cache_free",
+				     call_site_handler, NULL);
 }
diff --git a/tools/lib/traceevent/plugin_kvm.c b/tools/lib/traceevent/plugin_kvm.c
index 18536f756577..d13c22846fa9 100644
--- a/tools/lib/traceevent/plugin_kvm.c
+++ b/tools/lib/traceevent/plugin_kvm.c
@@ -23,6 +23,7 @@
 #include <stdint.h>
 
 #include "event-parse.h"
+#include "trace-seq.h"
 
 #ifdef HAVE_UDIS86
 
@@ -247,17 +248,17 @@ static const char *find_exit_reason(unsigned isa, int val)
 	return strings[i].str;
 }
 
-static int print_exit_reason(struct trace_seq *s, struct pevent_record *record,
-			     struct event_format *event, const char *field)
+static int print_exit_reason(struct trace_seq *s, struct tep_record *record,
+			     struct tep_event_format *event, const char *field)
 {
 	unsigned long long isa;
 	unsigned long long val;
 	const char *reason;
 
-	if (pevent_get_field_val(s, event, field, record, &val, 1) < 0)
+	if (tep_get_field_val(s, event, field, record, &val, 1) < 0)
 		return -1;
 
-	if (pevent_get_field_val(s, event, "isa", record, &isa, 0) < 0)
+	if (tep_get_field_val(s, event, "isa", record, &isa, 0) < 0)
 		isa = 1;
 
 	reason = find_exit_reason(isa, val);
@@ -268,18 +269,18 @@ static int print_exit_reason(struct trace_seq *s, struct pevent_record *record,
 	return 0;
 }
 
-static int kvm_exit_handler(struct trace_seq *s, struct pevent_record *record,
-			    struct event_format *event, void *context)
+static int kvm_exit_handler(struct trace_seq *s, struct tep_record *record,
+			    struct tep_event_format *event, void *context)
 {
 	unsigned long long info1 = 0, info2 = 0;
 
 	if (print_exit_reason(s, record, event, "exit_reason") < 0)
 		return -1;
 
-	pevent_print_num_field(s, " rip 0x%lx", event, "guest_rip", record, 1);
+	tep_print_num_field(s, " rip 0x%lx", event, "guest_rip", record, 1);
 
-	if (pevent_get_field_val(s, event, "info1", record, &info1, 0) >= 0
-	    && pevent_get_field_val(s, event, "info2", record, &info2, 0) >= 0)
+	if (tep_get_field_val(s, event, "info1", record, &info1, 0) >= 0
+	    && tep_get_field_val(s, event, "info2", record, &info2, 0) >= 0)
 		trace_seq_printf(s, " info %llx %llx", info1, info2);
 
 	return 0;
@@ -291,30 +292,30 @@ static int kvm_exit_handler(struct trace_seq *s, struct pevent_record *record,
 #define KVM_EMUL_INSN_F_CS_L   (1 << 3)
 
 static int kvm_emulate_insn_handler(struct trace_seq *s,
-				    struct pevent_record *record,
-				    struct event_format *event, void *context)
+				    struct tep_record *record,
+				    struct tep_event_format *event, void *context)
 {
 	unsigned long long rip, csbase, len, flags, failed;
 	int llen;
 	uint8_t *insn;
 	const char *disasm;
 
-	if (pevent_get_field_val(s, event, "rip", record, &rip, 1) < 0)
+	if (tep_get_field_val(s, event, "rip", record, &rip, 1) < 0)
 		return -1;
 
-	if (pevent_get_field_val(s, event, "csbase", record, &csbase, 1) < 0)
+	if (tep_get_field_val(s, event, "csbase", record, &csbase, 1) < 0)
 		return -1;
 
-	if (pevent_get_field_val(s, event, "len", record, &len, 1) < 0)
+	if (tep_get_field_val(s, event, "len", record, &len, 1) < 0)
 		return -1;
 
-	if (pevent_get_field_val(s, event, "flags", record, &flags, 1) < 0)
+	if (tep_get_field_val(s, event, "flags", record, &flags, 1) < 0)
 		return -1;
 
-	if (pevent_get_field_val(s, event, "failed", record, &failed, 1) < 0)
+	if (tep_get_field_val(s, event, "failed", record, &failed, 1) < 0)
 		return -1;
 
-	insn = pevent_get_field_raw(s, event, "insn", record, &llen, 1);
+	insn = tep_get_field_raw(s, event, "insn", record, &llen, 1);
 	if (!insn)
 		return -1;
 
@@ -330,24 +331,24 @@ static int kvm_emulate_insn_handler(struct trace_seq *s,
 }
 
 
-static int kvm_nested_vmexit_inject_handler(struct trace_seq *s, struct pevent_record *record,
-					    struct event_format *event, void *context)
+static int kvm_nested_vmexit_inject_handler(struct trace_seq *s, struct tep_record *record,
+					    struct tep_event_format *event, void *context)
 {
 	if (print_exit_reason(s, record, event, "exit_code") < 0)
 		return -1;
 
-	pevent_print_num_field(s, " info1 %llx", event, "exit_info1", record, 1);
-	pevent_print_num_field(s, " info2 %llx", event, "exit_info2", record, 1);
-	pevent_print_num_field(s, " int_info %llx", event, "exit_int_info", record, 1);
-	pevent_print_num_field(s, " int_info_err %llx", event, "exit_int_info_err", record, 1);
+	tep_print_num_field(s, " info1 %llx", event, "exit_info1", record, 1);
+	tep_print_num_field(s, " info2 %llx", event, "exit_info2", record, 1);
+	tep_print_num_field(s, " int_info %llx", event, "exit_int_info", record, 1);
+	tep_print_num_field(s, " int_info_err %llx", event, "exit_int_info_err", record, 1);
 
 	return 0;
 }
 
-static int kvm_nested_vmexit_handler(struct trace_seq *s, struct pevent_record *record,
-				     struct event_format *event, void *context)
+static int kvm_nested_vmexit_handler(struct trace_seq *s, struct tep_record *record,
+				     struct tep_event_format *event, void *context)
 {
-	pevent_print_num_field(s, "rip %llx ", event, "rip", record, 1);
+	tep_print_num_field(s, "rip %llx ", event, "rip", record, 1);
 
 	return kvm_nested_vmexit_inject_handler(s, record, event, context);
 }
@@ -370,8 +371,8 @@ union kvm_mmu_page_role {
 	};
 };
 
-static int kvm_mmu_print_role(struct trace_seq *s, struct pevent_record *record,
-			      struct event_format *event, void *context)
+static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record,
+			      struct tep_event_format *event, void *context)
 {
 	unsigned long long val;
 	static const char *access_str[] = {
@@ -379,7 +380,7 @@ static int kvm_mmu_print_role(struct trace_seq *s, struct pevent_record *record,
 	};
 	union kvm_mmu_page_role role;
 
-	if (pevent_get_field_val(s, event, "role", record, &val, 1) < 0)
+	if (tep_get_field_val(s, event, "role", record, &val, 1) < 0)
 		return -1;
 
 	role.word = (int)val;
@@ -388,8 +389,8 @@ static int kvm_mmu_print_role(struct trace_seq *s, struct pevent_record *record,
 	 * We can only use the structure if file is of the same
 	 * endianess.
 	 */
-	if (pevent_is_file_bigendian(event->pevent) ==
-	    pevent_is_host_bigendian(event->pevent)) {
+	if (tep_is_file_bigendian(event->pevent) ==
+	    tep_is_host_bigendian(event->pevent)) {
 
 		trace_seq_printf(s, "%u q%u%s %s%s %spae %snxe %swp%s%s%s",
 				 role.level,
@@ -406,10 +407,10 @@ static int kvm_mmu_print_role(struct trace_seq *s, struct pevent_record *record,
 	} else
 		trace_seq_printf(s, "WORD: %08x", role.word);
 
-	pevent_print_num_field(s, " root %u ",  event,
-			       "root_count", record, 1);
+	tep_print_num_field(s, " root %u ",  event,
+			    "root_count", record, 1);
 
-	if (pevent_get_field_val(s, event, "unsync", record, &val, 1) < 0)
+	if (tep_get_field_val(s, event, "unsync", record, &val, 1) < 0)
 		return -1;
 
 	trace_seq_printf(s, "%s%c",  val ? "unsync" : "sync", 0);
@@ -417,17 +418,17 @@ static int kvm_mmu_print_role(struct trace_seq *s, struct pevent_record *record,
 }
 
 static int kvm_mmu_get_page_handler(struct trace_seq *s,
-				    struct pevent_record *record,
-				    struct event_format *event, void *context)
+				    struct tep_record *record,
+				    struct tep_event_format *event, void *context)
 {
 	unsigned long long val;
 
-	if (pevent_get_field_val(s, event, "created", record, &val, 1) < 0)
+	if (tep_get_field_val(s, event, "created", record, &val, 1) < 0)
 		return -1;
 
 	trace_seq_printf(s, "%s ", val ? "new" : "existing");
 
-	if (pevent_get_field_val(s, event, "gfn", record, &val, 1) < 0)
+	if (tep_get_field_val(s, event, "gfn", record, &val, 1) < 0)
 		return -1;
 
 	trace_seq_printf(s, "sp gfn %llx ", val);
@@ -444,79 +445,79 @@ process_is_writable_pte(struct trace_seq *s, unsigned long long *args)
 	return pte & PT_WRITABLE_MASK;
 }
 
-int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
 {
 	init_disassembler();
 
-	pevent_register_event_handler(pevent, -1, "kvm", "kvm_exit",
-				      kvm_exit_handler, NULL);
+	tep_register_event_handler(pevent, -1, "kvm", "kvm_exit",
+				   kvm_exit_handler, NULL);
 
-	pevent_register_event_handler(pevent, -1, "kvm", "kvm_emulate_insn",
-				      kvm_emulate_insn_handler, NULL);
+	tep_register_event_handler(pevent, -1, "kvm", "kvm_emulate_insn",
+				   kvm_emulate_insn_handler, NULL);
 
-	pevent_register_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit",
-				      kvm_nested_vmexit_handler, NULL);
+	tep_register_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit",
+				   kvm_nested_vmexit_handler, NULL);
 
-	pevent_register_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit_inject",
-				      kvm_nested_vmexit_inject_handler, NULL);
+	tep_register_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit_inject",
+				   kvm_nested_vmexit_inject_handler, NULL);
 
-	pevent_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_get_page",
-				      kvm_mmu_get_page_handler, NULL);
+	tep_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_get_page",
+				   kvm_mmu_get_page_handler, NULL);
 
-	pevent_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_sync_page",
-				      kvm_mmu_print_role, NULL);
+	tep_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_sync_page",
+				   kvm_mmu_print_role, NULL);
 
-	pevent_register_event_handler(pevent, -1,
-				      "kvmmmu", "kvm_mmu_unsync_page",
-				      kvm_mmu_print_role, NULL);
+	tep_register_event_handler(pevent, -1,
+				   "kvmmmu", "kvm_mmu_unsync_page",
+				   kvm_mmu_print_role, NULL);
 
-	pevent_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_zap_page",
-				      kvm_mmu_print_role, NULL);
+	tep_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_zap_page",
+				   kvm_mmu_print_role, NULL);
 
-	pevent_register_event_handler(pevent, -1, "kvmmmu",
+	tep_register_event_handler(pevent, -1, "kvmmmu",
 			"kvm_mmu_prepare_zap_page", kvm_mmu_print_role,
 			NULL);
 
-	pevent_register_print_function(pevent,
-				       process_is_writable_pte,
-				       PEVENT_FUNC_ARG_INT,
-				       "is_writable_pte",
-				       PEVENT_FUNC_ARG_LONG,
-				       PEVENT_FUNC_ARG_VOID);
+	tep_register_print_function(pevent,
+				    process_is_writable_pte,
+				    TEP_FUNC_ARG_INT,
+				    "is_writable_pte",
+				    TEP_FUNC_ARG_LONG,
+				    TEP_FUNC_ARG_VOID);
 	return 0;
 }
 
-void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
 {
-	pevent_unregister_event_handler(pevent, -1, "kvm", "kvm_exit",
-					kvm_exit_handler, NULL);
+	tep_unregister_event_handler(pevent, -1, "kvm", "kvm_exit",
+				     kvm_exit_handler, NULL);
 
-	pevent_unregister_event_handler(pevent, -1, "kvm", "kvm_emulate_insn",
-					kvm_emulate_insn_handler, NULL);
+	tep_unregister_event_handler(pevent, -1, "kvm", "kvm_emulate_insn",
+				     kvm_emulate_insn_handler, NULL);
 
-	pevent_unregister_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit",
-					kvm_nested_vmexit_handler, NULL);
+	tep_unregister_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit",
+				     kvm_nested_vmexit_handler, NULL);
 
-	pevent_unregister_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit_inject",
-					kvm_nested_vmexit_inject_handler, NULL);
+	tep_unregister_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit_inject",
+				     kvm_nested_vmexit_inject_handler, NULL);
 
-	pevent_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_get_page",
-					kvm_mmu_get_page_handler, NULL);
+	tep_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_get_page",
+				     kvm_mmu_get_page_handler, NULL);
 
-	pevent_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_sync_page",
-					kvm_mmu_print_role, NULL);
+	tep_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_sync_page",
+				     kvm_mmu_print_role, NULL);
 
-	pevent_unregister_event_handler(pevent, -1,
-					"kvmmmu", "kvm_mmu_unsync_page",
-					kvm_mmu_print_role, NULL);
+	tep_unregister_event_handler(pevent, -1,
+				     "kvmmmu", "kvm_mmu_unsync_page",
+				     kvm_mmu_print_role, NULL);
 
-	pevent_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_zap_page",
-					kvm_mmu_print_role, NULL);
+	tep_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_zap_page",
+				     kvm_mmu_print_role, NULL);
 
-	pevent_unregister_event_handler(pevent, -1, "kvmmmu",
+	tep_unregister_event_handler(pevent, -1, "kvmmmu",
 			"kvm_mmu_prepare_zap_page", kvm_mmu_print_role,
 			NULL);
 
-	pevent_unregister_print_function(pevent, process_is_writable_pte,
-					 "is_writable_pte");
+	tep_unregister_print_function(pevent, process_is_writable_pte,
+				      "is_writable_pte");
 }
diff --git a/tools/lib/traceevent/plugin_mac80211.c b/tools/lib/traceevent/plugin_mac80211.c
index 7e15a0f1c2fd..da3855e7b86f 100644
--- a/tools/lib/traceevent/plugin_mac80211.c
+++ b/tools/lib/traceevent/plugin_mac80211.c
@@ -22,13 +22,14 @@
 #include <string.h>
 
 #include "event-parse.h"
+#include "trace-seq.h"
 
 #define INDENT 65
 
-static void print_string(struct trace_seq *s, struct event_format *event,
+static void print_string(struct trace_seq *s, struct tep_event_format *event,
 			 const char *name, const void *data)
 {
-	struct format_field *f = pevent_find_field(event, name);
+	struct tep_format_field *f = tep_find_field(event, name);
 	int offset;
 	int length;
 
@@ -42,7 +43,7 @@ static void print_string(struct trace_seq *s, struct event_format *event,
 
 	if (!strncmp(f->type, "__data_loc", 10)) {
 		unsigned long long v;
-		if (pevent_read_number_field(f, data, &v)) {
+		if (tep_read_number_field(f, data, &v)) {
 			trace_seq_printf(s, "invalid_data_loc");
 			return;
 		}
@@ -53,20 +54,20 @@ static void print_string(struct trace_seq *s, struct event_format *event,
 	trace_seq_printf(s, "%.*s", length, (char *)data + offset);
 }
 
-#define SF(fn)	pevent_print_num_field(s, fn ":%d", event, fn, record, 0)
-#define SFX(fn)	pevent_print_num_field(s, fn ":%#x", event, fn, record, 0)
+#define SF(fn)	tep_print_num_field(s, fn ":%d", event, fn, record, 0)
+#define SFX(fn)	tep_print_num_field(s, fn ":%#x", event, fn, record, 0)
 #define SP()	trace_seq_putc(s, ' ')
 
 static int drv_bss_info_changed(struct trace_seq *s,
-				struct pevent_record *record,
-				struct event_format *event, void *context)
+				struct tep_record *record,
+				struct tep_event_format *event, void *context)
 {
 	void *data = record->data;
 
 	print_string(s, event, "wiphy_name", data);
 	trace_seq_printf(s, " vif:");
 	print_string(s, event, "vif_name", data);
-	pevent_print_num_field(s, "(%d)", event, "vif_type", record, 1);
+	tep_print_num_field(s, "(%d)", event, "vif_type", record, 1);
 
 	trace_seq_printf(s, "\n%*s", INDENT, "");
 	SF("assoc"); SP();
@@ -86,17 +87,17 @@ static int drv_bss_info_changed(struct trace_seq *s,
 	return 0;
 }
 
-int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
 {
-	pevent_register_event_handler(pevent, -1, "mac80211",
-				      "drv_bss_info_changed",
-				      drv_bss_info_changed, NULL);
+	tep_register_event_handler(pevent, -1, "mac80211",
+				   "drv_bss_info_changed",
+				   drv_bss_info_changed, NULL);
 	return 0;
 }
 
-void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
 {
-	pevent_unregister_event_handler(pevent, -1, "mac80211",
-					"drv_bss_info_changed",
-					drv_bss_info_changed, NULL);
+	tep_unregister_event_handler(pevent, -1, "mac80211",
+				     "drv_bss_info_changed",
+				     drv_bss_info_changed, NULL);
 }
diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugin_sched_switch.c
index ec30c2fcbac0..77882272672f 100644
--- a/tools/lib/traceevent/plugin_sched_switch.c
+++ b/tools/lib/traceevent/plugin_sched_switch.c
@@ -22,6 +22,7 @@
 #include <string.h>
 
 #include "event-parse.h"
+#include "trace-seq.h"
 
 static void write_state(struct trace_seq *s, int val)
 {
@@ -44,8 +45,8 @@ static void write_state(struct trace_seq *s, int val)
 		trace_seq_putc(s, 'R');
 }
 
-static void write_and_save_comm(struct format_field *field,
-				struct pevent_record *record,
+static void write_and_save_comm(struct tep_format_field *field,
+				struct tep_record *record,
 				struct trace_seq *s, int pid)
 {
 	const char *comm;
@@ -61,100 +62,100 @@ static void write_and_save_comm(struct format_field *field,
 	comm = &s->buffer[len];
 
 	/* Help out the comm to ids. This will handle dups */
-	pevent_register_comm(field->event->pevent, comm, pid);
+	tep_register_comm(field->event->pevent, comm, pid);
 }
 
 static int sched_wakeup_handler(struct trace_seq *s,
-				struct pevent_record *record,
-				struct event_format *event, void *context)
+				struct tep_record *record,
+				struct tep_event_format *event, void *context)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 	unsigned long long val;
 
-	if (pevent_get_field_val(s, event, "pid", record, &val, 1))
+	if (tep_get_field_val(s, event, "pid", record, &val, 1))
 		return trace_seq_putc(s, '!');
 
-	field = pevent_find_any_field(event, "comm");
+	field = tep_find_any_field(event, "comm");
 	if (field) {
 		write_and_save_comm(field, record, s, val);
 		trace_seq_putc(s, ':');
 	}
 	trace_seq_printf(s, "%lld", val);
 
-	if (pevent_get_field_val(s, event, "prio", record, &val, 0) == 0)
+	if (tep_get_field_val(s, event, "prio", record, &val, 0) == 0)
 		trace_seq_printf(s, " [%lld]", val);
 
-	if (pevent_get_field_val(s, event, "success", record, &val, 1) == 0)
+	if (tep_get_field_val(s, event, "success", record, &val, 1) == 0)
 		trace_seq_printf(s, " success=%lld", val);
 
-	if (pevent_get_field_val(s, event, "target_cpu", record, &val, 0) == 0)
+	if (tep_get_field_val(s, event, "target_cpu", record, &val, 0) == 0)
 		trace_seq_printf(s, " CPU:%03llu", val);
 
 	return 0;
 }
 
 static int sched_switch_handler(struct trace_seq *s,
-				struct pevent_record *record,
-				struct event_format *event, void *context)
+				struct tep_record *record,
+				struct tep_event_format *event, void *context)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 	unsigned long long val;
 
-	if (pevent_get_field_val(s, event, "prev_pid", record, &val, 1))
+	if (tep_get_field_val(s, event, "prev_pid", record, &val, 1))
 		return trace_seq_putc(s, '!');
 
-	field = pevent_find_any_field(event, "prev_comm");
+	field = tep_find_any_field(event, "prev_comm");
 	if (field) {
 		write_and_save_comm(field, record, s, val);
 		trace_seq_putc(s, ':');
 	}
 	trace_seq_printf(s, "%lld ", val);
 
-	if (pevent_get_field_val(s, event, "prev_prio", record, &val, 0) == 0)
+	if (tep_get_field_val(s, event, "prev_prio", record, &val, 0) == 0)
 		trace_seq_printf(s, "[%d] ", (int) val);
 
-	if (pevent_get_field_val(s,  event, "prev_state", record, &val, 0) == 0)
+	if (tep_get_field_val(s,  event, "prev_state", record, &val, 0) == 0)
 		write_state(s, val);
 
 	trace_seq_puts(s, " ==> ");
 
-	if (pevent_get_field_val(s, event, "next_pid", record, &val, 1))
+	if (tep_get_field_val(s, event, "next_pid", record, &val, 1))
 		return trace_seq_putc(s, '!');
 
-	field = pevent_find_any_field(event, "next_comm");
+	field = tep_find_any_field(event, "next_comm");
 	if (field) {
 		write_and_save_comm(field, record, s, val);
 		trace_seq_putc(s, ':');
 	}
 	trace_seq_printf(s, "%lld", val);
 
-	if (pevent_get_field_val(s, event, "next_prio", record, &val, 0) == 0)
+	if (tep_get_field_val(s, event, "next_prio", record, &val, 0) == 0)
 		trace_seq_printf(s, " [%d]", (int) val);
 
 	return 0;
 }
 
-int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
 {
-	pevent_register_event_handler(pevent, -1, "sched", "sched_switch",
-				      sched_switch_handler, NULL);
+	tep_register_event_handler(pevent, -1, "sched", "sched_switch",
+				   sched_switch_handler, NULL);
 
-	pevent_register_event_handler(pevent, -1, "sched", "sched_wakeup",
-				      sched_wakeup_handler, NULL);
+	tep_register_event_handler(pevent, -1, "sched", "sched_wakeup",
+				   sched_wakeup_handler, NULL);
 
-	pevent_register_event_handler(pevent, -1, "sched", "sched_wakeup_new",
-				      sched_wakeup_handler, NULL);
+	tep_register_event_handler(pevent, -1, "sched", "sched_wakeup_new",
+				   sched_wakeup_handler, NULL);
 	return 0;
 }
 
-void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
 {
-	pevent_unregister_event_handler(pevent, -1, "sched", "sched_switch",
-					sched_switch_handler, NULL);
+	tep_unregister_event_handler(pevent, -1, "sched", "sched_switch",
+				     sched_switch_handler, NULL);
 
-	pevent_unregister_event_handler(pevent, -1, "sched", "sched_wakeup",
-					sched_wakeup_handler, NULL);
+	tep_unregister_event_handler(pevent, -1, "sched", "sched_wakeup",
+				     sched_wakeup_handler, NULL);
 
-	pevent_unregister_event_handler(pevent, -1, "sched", "sched_wakeup_new",
-					sched_wakeup_handler, NULL);
+	tep_unregister_event_handler(pevent, -1, "sched", "sched_wakeup_new",
+				     sched_wakeup_handler, NULL);
 }
diff --git a/tools/lib/traceevent/plugin_scsi.c b/tools/lib/traceevent/plugin_scsi.c
index 5e750af2b461..4eba25cc1431 100644
--- a/tools/lib/traceevent/plugin_scsi.c
+++ b/tools/lib/traceevent/plugin_scsi.c
@@ -3,6 +3,7 @@
 #include <string.h>
 #include <inttypes.h>
 #include "event-parse.h"
+#include "trace-seq.h"
 
 typedef unsigned long sector_t;
 typedef uint64_t u64;
@@ -413,21 +414,21 @@ unsigned long long process_scsi_trace_parse_cdb(struct trace_seq *s,
 	return 0;
 }
 
-int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
 {
-	pevent_register_print_function(pevent,
-				       process_scsi_trace_parse_cdb,
-				       PEVENT_FUNC_ARG_STRING,
-				       "scsi_trace_parse_cdb",
-				       PEVENT_FUNC_ARG_PTR,
-				       PEVENT_FUNC_ARG_PTR,
-				       PEVENT_FUNC_ARG_INT,
-				       PEVENT_FUNC_ARG_VOID);
+	tep_register_print_function(pevent,
+				    process_scsi_trace_parse_cdb,
+				    TEP_FUNC_ARG_STRING,
+				    "scsi_trace_parse_cdb",
+				    TEP_FUNC_ARG_PTR,
+				    TEP_FUNC_ARG_PTR,
+				    TEP_FUNC_ARG_INT,
+				    TEP_FUNC_ARG_VOID);
 	return 0;
 }
 
-void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
 {
-	pevent_unregister_print_function(pevent, process_scsi_trace_parse_cdb,
-					 "scsi_trace_parse_cdb");
+	tep_unregister_print_function(pevent, process_scsi_trace_parse_cdb,
+				      "scsi_trace_parse_cdb");
 }
diff --git a/tools/lib/traceevent/plugin_xen.c b/tools/lib/traceevent/plugin_xen.c
index 690173bfa13e..bc0496e4c296 100644
--- a/tools/lib/traceevent/plugin_xen.c
+++ b/tools/lib/traceevent/plugin_xen.c
@@ -3,6 +3,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include "event-parse.h"
+#include "trace-seq.h"
 
 #define __HYPERVISOR_set_trap_table			0
 #define __HYPERVISOR_mmu_update				1
@@ -119,19 +120,19 @@ unsigned long long process_xen_hypercall_name(struct trace_seq *s,
 	return 0;
 }
 
-int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
 {
-	pevent_register_print_function(pevent,
-				       process_xen_hypercall_name,
-				       PEVENT_FUNC_ARG_STRING,
-				       "xen_hypercall_name",
-				       PEVENT_FUNC_ARG_INT,
-				       PEVENT_FUNC_ARG_VOID);
+	tep_register_print_function(pevent,
+				    process_xen_hypercall_name,
+				    TEP_FUNC_ARG_STRING,
+				    "xen_hypercall_name",
+				    TEP_FUNC_ARG_INT,
+				    TEP_FUNC_ARG_VOID);
 	return 0;
 }
 
-void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
 {
-	pevent_unregister_print_function(pevent, process_xen_hypercall_name,
-					 "xen_hypercall_name");
+	tep_unregister_print_function(pevent, process_xen_hypercall_name,
+				      "xen_hypercall_name");
 }
diff --git a/tools/lib/traceevent/tep_strerror.c b/tools/lib/traceevent/tep_strerror.c
new file mode 100644
index 000000000000..4ac26445b2f6
--- /dev/null
+++ b/tools/lib/traceevent/tep_strerror.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: LGPL-2.1
+#undef _GNU_SOURCE
+#include <string.h>
+#include <stdio.h>
+
+#include "event-parse.h"
+
+#undef _PE
+#define _PE(code, str) str
+static const char * const tep_error_str[] = {
+	TEP_ERRORS
+};
+#undef _PE
+
+/*
+ * The tools so far have been using the strerror_r() GNU variant, that returns
+ * a string, be it the buffer passed or something else.
+ *
+ * But that, besides being tricky in cases where we expect that the function
+ * using strerror_r() returns the error formatted in a provided buffer (we have
+ * to check if it returned something else and copy that instead), breaks the
+ * build on systems not using glibc, like Alpine Linux, where musl libc is
+ * used.
+ *
+ * So, introduce yet another wrapper, str_error_r(), that has the GNU
+ * interface, but uses the portable XSI variant of strerror_r(), so that users
+ * rest asured that the provided buffer is used and it is what is returned.
+ */
+int tep_strerror(struct tep_handle *tep __maybe_unused,
+		 enum tep_errno errnum, char *buf, size_t buflen)
+{
+	const char *msg;
+	int idx;
+
+	if (!buflen)
+		return 0;
+
+	if (errnum >= 0) {
+		int err = strerror_r(errnum, buf, buflen);
+		buf[buflen - 1] = 0;
+		return err;
+	}
+
+	if (errnum <= __TEP_ERRNO__START ||
+	    errnum >= __TEP_ERRNO__END)
+		return -1;
+
+	idx = errnum - __TEP_ERRNO__START - 1;
+	msg = tep_error_str[idx];
+	snprintf(buf, buflen, "%s", msg);
+
+	return 0;
+}
diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c
index 292dc9f1d233..8ff1d55954d1 100644
--- a/tools/lib/traceevent/trace-seq.c
+++ b/tools/lib/traceevent/trace-seq.c
@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
  *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
+#include "trace-seq.h"
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
diff --git a/tools/lib/traceevent/trace-seq.h b/tools/lib/traceevent/trace-seq.h
new file mode 100644
index 000000000000..d68ec69f8d1a
--- /dev/null
+++ b/tools/lib/traceevent/trace-seq.h
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+#ifndef _TRACE_SEQ_H
+#define _TRACE_SEQ_H
+
+#include <stdarg.h>
+#include <stdio.h>
+
+/* ----------------------- trace_seq ----------------------- */
+
+#ifndef TRACE_SEQ_BUF_SIZE
+#define TRACE_SEQ_BUF_SIZE 4096
+#endif
+
+enum trace_seq_fail {
+	TRACE_SEQ__GOOD,
+	TRACE_SEQ__BUFFER_POISONED,
+	TRACE_SEQ__MEM_ALLOC_FAILED,
+};
+
+/*
+ * Trace sequences are used to allow a function to call several other functions
+ * to create a string of data to use (up to a max of PAGE_SIZE).
+ */
+
+struct trace_seq {
+	char			*buffer;
+	unsigned int		buffer_size;
+	unsigned int		len;
+	unsigned int		readpos;
+	enum trace_seq_fail	state;
+};
+
+void trace_seq_init(struct trace_seq *s);
+void trace_seq_reset(struct trace_seq *s);
+void trace_seq_destroy(struct trace_seq *s);
+
+extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+	__attribute__ ((format (printf, 2, 0)));
+
+extern int trace_seq_puts(struct trace_seq *s, const char *str);
+extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
+
+extern void trace_seq_terminate(struct trace_seq *s);
+
+extern int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp);
+extern int trace_seq_do_printf(struct trace_seq *s);
+
+#endif /* _TRACE_SEQ_H */
diff --git a/tools/memory-model/Documentation/cheatsheet.txt b/tools/memory-model/Documentation/cheatsheet.txt
index 956b1ae4aafb..33ba98d72b16 100644
--- a/tools/memory-model/Documentation/cheatsheet.txt
+++ b/tools/memory-model/Documentation/cheatsheet.txt
@@ -1,6 +1,6 @@
                                   Prior Operation     Subsequent Operation
                                   ---------------  ---------------------------
-                               C  Self  R  W  RWM  Self  R  W  DR  DW  RMW  SV
+                               C  Self  R  W  RMW  Self  R  W  DR  DW  RMW  SV
                               --  ----  -  -  ---  ----  -  -  --  --  ---  --
 
 Store, e.g., WRITE_ONCE()            Y                                       Y
@@ -14,7 +14,7 @@ smp_wmb()                                  Y    W           Y       Y    W
 smp_mb() & synchronize_rcu()  CP        Y  Y    Y        Y  Y   Y   Y    Y
 Successful full non-void RMW  CP     Y  Y  Y    Y     Y  Y  Y   Y   Y    Y   Y
 smp_mb__before_atomic()       CP        Y  Y    Y        a  a   a   a    Y
-smp_mb__after_atomic()        CP        a  a    Y        Y  Y   Y   Y
+smp_mb__after_atomic()        CP        a  a    Y        Y  Y   Y   Y    Y
 
 
 Key:	C:	Ordering is cumulative
@@ -26,4 +26,5 @@ Key:	C:	Ordering is cumulative
 	DR:	Dependent read (address dependency)
 	DW:	Dependent write (address, data, or control dependency)
 	RMW:	Atomic read-modify-write operation
-	SV	Same-variable access
+	SELF:	Orders self, as opposed to accesses before and/or after
+	SV:	Orders later accesses to the same variable
diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index a727c82bd434..35bff92cc773 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -27,8 +27,9 @@ Explanation of the Linux-Kernel Memory Consistency Model
   19. AND THEN THERE WAS ALPHA
   20. THE HAPPENS-BEFORE RELATION: hb
   21. THE PROPAGATES-BEFORE RELATION: pb
-  22. RCU RELATIONS: link, gp-link, rscs-link, and rcu-path
-  23. ODDS AND ENDS
+  22. RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb
+  23. LOCKING
+  24. ODDS AND ENDS
 
 
 
@@ -804,7 +805,7 @@ type of fence:
 Second, some types of fence affect the way the memory subsystem
 propagates stores.  When a fence instruction is executed on CPU C:
 
-	For each other CPU C', smb_wmb() forces all po-earlier stores
+	For each other CPU C', smp_wmb() forces all po-earlier stores
 	on C to propagate to C' before any po-later stores do.
 
 	For each other CPU C', any store which propagates to C before
@@ -1067,28 +1068,6 @@ allowing out-of-order writes like this to occur.  The model avoided
 violating the write-write coherence rule by requiring the CPU not to
 send the W write to the memory subsystem at all!)
 
-There is one last example of preserved program order in the LKMM: when
-a load-acquire reads from an earlier store-release.  For example:
-
-	smp_store_release(&x, 123);
-	r1 = smp_load_acquire(&x);
-
-If the smp_load_acquire() ends up obtaining the 123 value that was
-stored by the smp_store_release(), the LKMM says that the load must be
-executed after the store; the store cannot be forwarded to the load.
-This requirement does not arise from the operational model, but it
-yields correct predictions on all architectures supported by the Linux
-kernel, although for differing reasons.
-
-On some architectures, including x86 and ARMv8, it is true that the
-store cannot be forwarded to the load.  On others, including PowerPC
-and ARMv7, smp_store_release() generates object code that starts with
-a fence and smp_load_acquire() generates object code that ends with a
-fence.  The upshot is that even though the store may be forwarded to
-the load, it is still true that any instruction preceding the store
-will be executed before the load or any following instructions, and
-the store will be executed before any instruction following the load.
-
 
 AND THEN THERE WAS ALPHA
 ------------------------
@@ -1451,8 +1430,8 @@ they execute means that it cannot have cycles.  This requirement is
 the content of the LKMM's "propagation" axiom.
 
 
-RCU RELATIONS: link, gp-link, rscs-link, and rcu-path
------------------------------------------------------
+RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb
+----------------------------------------------------
 
 RCU (Read-Copy-Update) is a powerful synchronization mechanism.  It
 rests on two concepts: grace periods and read-side critical sections.
@@ -1509,8 +1488,8 @@ y, which occurs before the end of the critical section, did not
 propagate to P1 before the end of the grace period, violating the
 Guarantee.
 
-In the kernel's implementations of RCU, the business about stores
-propagating to every CPU is realized by placing strong fences at
+In the kernel's implementations of RCU, the requirements for stores
+to propagate to every CPU are fulfilled by placing strong fences at
 suitable places in the RCU-related code.  Thus, if a critical section
 starts before a grace period does then the critical section's CPU will
 execute an smp_mb() fence after the end of the critical section and
@@ -1523,72 +1502,124 @@ executes.
 What exactly do we mean by saying that a critical section "starts
 before" or "ends after" a grace period?  Some aspects of the meaning
 are pretty obvious, as in the example above, but the details aren't
-entirely clear.  The LKMM formalizes this notion by means of a
-relation with the unfortunately generic name "link".  It is a very
-general relation; among other things, X ->link Z includes cases where
-X happens-before or is equal to some event Y which is equal to or
-comes before Z in the coherence order.  Taking Y = Z, this says that
-X ->rfe Z implies X ->link Z, and taking Y = X, it says that X ->fr Z
-and X ->co Z each imply X ->link Z.
-
-The formal definition of the link relation is more than a little
+entirely clear.  The LKMM formalizes this notion by means of the
+rcu-link relation.  rcu-link encompasses a very general notion of
+"before": Among other things, X ->rcu-link Z includes cases where X
+happens-before or is equal to some event Y which is equal to or comes
+before Z in the coherence order.  When Y = Z this says that X ->rfe Z
+implies X ->rcu-link Z.  In addition, when Y = X it says that X ->fr Z
+and X ->co Z each imply X ->rcu-link Z.
+
+The formal definition of the rcu-link relation is more than a little
 obscure, and we won't give it here.  It is closely related to the pb
 relation, and the details don't matter unless you want to comb through
 a somewhat lengthy formal proof.  Pretty much all you need to know
-about link is the information in the preceding paragraph.
-
-The LKMM goes on to define the gp-link and rscs-link relations.  They
-bring grace periods and read-side critical sections into the picture,
-in the following way:
-
-	E ->gp-link F means there is a synchronize_rcu() fence event S
-	and an event X such that E ->po S, either S ->po X or S = X,
-	and X ->link F.  In other words, E and F are connected by a
-	grace period followed by an instance of link.
-
-	E ->rscs-link F means there is a critical section delimited by
-	an rcu_read_lock() fence L and an rcu_read_unlock() fence U,
-	and an event X such that E ->po U, either L ->po X or L = X,
-	and X ->link F.  Roughly speaking, this says that some event
-	in the same critical section as E is connected by link to F.
-
-If we think of the link relation as standing for an extended "before",
-then E ->gp-link F says that E executes before a grace period which
-ends before F executes.  (In fact it says more than this, because it
-includes cases where E executes before a grace period and some store
-propagates to F's CPU before F executes and doesn't propagate to some
-other CPU until after the grace period ends.)  Similarly,
-E ->rscs-link F says that E is part of (or before the start of) a
-critical section which starts before F executes.
+about rcu-link is the information in the preceding paragraph.
+
+The LKMM also defines the gp and rscs relations.  They bring grace
+periods and read-side critical sections into the picture, in the
+following way:
+
+	E ->gp F means there is a synchronize_rcu() fence event S such
+	that E ->po S and either S ->po F or S = F.  In simple terms,
+	there is a grace period po-between E and F.
+
+	E ->rscs F means there is a critical section delimited by an
+	rcu_read_lock() fence L and an rcu_read_unlock() fence U, such
+	that E ->po U and either L ->po F or L = F.  You can think of
+	this as saying that E and F are in the same critical section
+	(in fact, it also allows E to be po-before the start of the
+	critical section and F to be po-after the end).
+
+If we think of the rcu-link relation as standing for an extended
+"before", then X ->gp Y ->rcu-link Z says that X executes before a
+grace period which ends before Z executes.  (In fact it covers more
+than this, because it also includes cases where X executes before a
+grace period and some store propagates to Z's CPU before Z executes
+but doesn't propagate to some other CPU until after the grace period
+ends.)  Similarly, X ->rscs Y ->rcu-link Z says that X is part of (or
+before the start of) a critical section which starts before Z
+executes.
+
+The LKMM goes on to define the rcu-fence relation as a sequence of gp
+and rscs links separated by rcu-link links, in which the number of gp
+links is >= the number of rscs links.  For example:
+
+	X ->gp Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V
+
+would imply that X ->rcu-fence V, because this sequence contains two
+gp links and only one rscs link.  (It also implies that X ->rcu-fence T
+and Z ->rcu-fence V.)  On the other hand:
+
+	X ->rscs Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V
+
+does not imply X ->rcu-fence V, because the sequence contains only
+one gp link but two rscs links.
+
+The rcu-fence relation is important because the Grace Period Guarantee
+means that rcu-fence acts kind of like a strong fence.  In particular,
+if W is a write and we have W ->rcu-fence Z, the Guarantee says that W
+will propagate to every CPU before Z executes.
+
+To prove this in full generality requires some intellectual effort.
+We'll consider just a very simple case:
+
+	W ->gp X ->rcu-link Y ->rscs Z.
+
+This formula means that there is a grace period G and a critical
+section C such that:
+
+	1. W is po-before G;
+
+	2. X is equal to or po-after G;
+
+	3. X comes "before" Y in some sense;
+
+	4. Y is po-before the end of C;
+
+	5. Z is equal to or po-after the start of C.
+
+From 2 - 4 we deduce that the grace period G ends before the critical
+section C.  Then the second part of the Grace Period Guarantee says
+not only that G starts before C does, but also that W (which executes
+on G's CPU before G starts) must propagate to every CPU before C
+starts.  In particular, W propagates to every CPU before Z executes
+(or finishes executing, in the case where Z is equal to the
+rcu_read_lock() fence event which starts C.)  This sort of reasoning
+can be expanded to handle all the situations covered by rcu-fence.
+
+Finally, the LKMM defines the RCU-before (rb) relation in terms of
+rcu-fence.  This is done in essentially the same way as the pb
+relation was defined in terms of strong-fence.  We will omit the
+details; the end result is that E ->rb F implies E must execute before
+F, just as E ->pb F does (and for much the same reasons).
 
 Putting this all together, the LKMM expresses the Grace Period
-Guarantee by requiring that there are no cycles consisting of gp-link
-and rscs-link connections in which the number of gp-link instances is
->= the number of rscs-link instances.  It does this by defining the
-rcu-path relation to link events E and F whenever it is possible to
-pass from E to F by a sequence of gp-link and rscs-link connections
-with at least as many of the former as the latter.  The LKMM's "rcu"
-axiom then says that there are no events E such that E ->rcu-path E.
-
-Justifying this axiom takes some intellectual effort, but it is in
-fact a valid formalization of the Grace Period Guarantee.  We won't
-attempt to go through the detailed argument, but the following
-analysis gives a taste of what is involved.  Suppose we have a
-violation of the first part of the Guarantee: A critical section
-starts before a grace period, and some store propagates to the
-critical section's CPU before the end of the critical section but
-doesn't propagate to some other CPU until after the end of the grace
-period.
+Guarantee by requiring that the rb relation does not contain a cycle.
+Equivalently, this "rcu" axiom requires that there are no events E and
+F with E ->rcu-link F ->rcu-fence E.  Or to put it a third way, the
+axiom requires that there are no cycles consisting of gp and rscs
+alternating with rcu-link, where the number of gp links is >= the
+number of rscs links.
+
+Justifying the axiom isn't easy, but it is in fact a valid
+formalization of the Grace Period Guarantee.  We won't attempt to go
+through the detailed argument, but the following analysis gives a
+taste of what is involved.  Suppose we have a violation of the first
+part of the Guarantee: A critical section starts before a grace
+period, and some store propagates to the critical section's CPU before
+the end of the critical section but doesn't propagate to some other
+CPU until after the end of the grace period.
 
 Putting symbols to these ideas, let L and U be the rcu_read_lock() and
 rcu_read_unlock() fence events delimiting the critical section in
 question, and let S be the synchronize_rcu() fence event for the grace
 period.  Saying that the critical section starts before S means there
 are events E and F where E is po-after L (which marks the start of the
-critical section), E is "before" F in the sense of the link relation,
-and F is po-before the grace period S:
+critical section), E is "before" F in the sense of the rcu-link
+relation, and F is po-before the grace period S:
 
-	L ->po E ->link F ->po S.
+	L ->po E ->rcu-link F ->po S.
 
 Let W be the store mentioned above, let Z come before the end of the
 critical section and witness that W propagates to the critical
@@ -1600,16 +1631,19 @@ some event X which is po-after S.  Symbolically, this amounts to:
 
 The fr link from Y to W indicates that W has not propagated to Y's CPU
 at the time that Y executes.  From this, it can be shown (see the
-discussion of the link relation earlier) that X and Z are connected by
-link, yielding:
+discussion of the rcu-link relation earlier) that X and Z are related
+by rcu-link, yielding:
 
-	S ->po X ->link Z ->po U.
+	S ->po X ->rcu-link Z ->po U.
 
-These formulas say that S is po-between F and X, hence F ->gp-link Z
-via X.  They also say that Z comes before the end of the critical
-section and E comes after its start, hence Z ->rscs-link F via E.  But
-now we have a forbidden cycle: F ->gp-link Z ->rscs-link F.  Thus the
-"rcu" axiom rules out this violation of the Grace Period Guarantee.
+The formulas say that S is po-between F and X, hence F ->gp X.  They
+also say that Z comes before the end of the critical section and E
+comes after its start, hence Z ->rscs E.  From all this we obtain:
+
+	F ->gp X ->rcu-link Z ->rscs E ->rcu-link F,
+
+a forbidden cycle.  Thus the "rcu" axiom rules out this violation of
+the Grace Period Guarantee.
 
 For something a little more down-to-earth, let's see how the axiom
 works out in practice.  Consider the RCU code example from above, this
@@ -1635,18 +1669,18 @@ time with statement labels added to the memory access instructions:
 	}
 
 
-If r2 = 0 at the end then P0's store at X overwrites the value
-that P1's load at Z reads from, so we have Z ->fre X and thus
-Z ->link X.  In addition, there is a synchronize_rcu() between Y and
-Z, so therefore we have Y ->gp-link X.
+If r2 = 0 at the end then P0's store at X overwrites the value that
+P1's load at Z reads from, so we have Z ->fre X and thus Z ->rcu-link X.
+In addition, there is a synchronize_rcu() between Y and Z, so therefore
+we have Y ->gp Z.
 
 If r1 = 1 at the end then P1's load at Y reads from P0's store at W,
-so we have W ->link Y.  In addition, W and X are in the same critical
-section, so therefore we have X ->rscs-link Y.
+so we have W ->rcu-link Y.  In addition, W and X are in the same critical
+section, so therefore we have X ->rscs W.
 
-This gives us a cycle, Y ->gp-link X ->rscs-link Y, with one gp-link
-and one rscs-link, violating the "rcu" axiom.  Hence the outcome is
-not allowed by the LKMM, as we would expect.
+Then X ->rscs W ->rcu-link Y ->gp Z ->rcu-link X is a forbidden cycle,
+violating the "rcu" axiom.  Hence the outcome is not allowed by the
+LKMM, as we would expect.
 
 For contrast, let's see what can happen in a more complicated example:
 
@@ -1682,15 +1716,11 @@ For contrast, let's see what can happen in a more complicated example:
 	}
 
 If r0 = r1 = r2 = 1 at the end, then similar reasoning to before shows
-that W ->rscs-link Y via X, Y ->gp-link U via Z, and U ->rscs-link W
-via V.  And just as before, this gives a cycle:
-
-	W ->rscs-link Y ->gp-link U ->rscs-link W.
-
-However, this cycle has fewer gp-link instances than rscs-link
-instances, and consequently the outcome is not forbidden by the LKMM.
-The following instruction timing diagram shows how it might actually
-occur:
+that W ->rscs X ->rcu-link Y ->gp Z ->rcu-link U ->rscs V ->rcu-link W.
+However this cycle is not forbidden, because the sequence of relations
+contains fewer instances of gp (one) than of rscs (two).  Consequently
+the outcome is allowed by the LKMM.  The following instruction timing
+diagram shows how it might actually occur:
 
 P0			P1			P2
 --------------------	--------------------	--------------------
@@ -1715,6 +1745,147 @@ before it does, and the critical section in P2 both starts after P1's
 grace period does and ends after it does.
 
 
+LOCKING
+-------
+
+The LKMM includes locking.  In fact, there is special code for locking
+in the formal model, added in order to make tools run faster.
+However, this special code is intended to be more or less equivalent
+to concepts we have already covered.  A spinlock_t variable is treated
+the same as an int, and spin_lock(&s) is treated almost the same as:
+
+	while (cmpxchg_acquire(&s, 0, 1) != 0)
+		cpu_relax();
+
+This waits until s is equal to 0 and then atomically sets it to 1,
+and the read part of the cmpxchg operation acts as an acquire fence.
+An alternate way to express the same thing would be:
+
+	r = xchg_acquire(&s, 1);
+
+along with a requirement that at the end, r = 0.  Similarly,
+spin_trylock(&s) is treated almost the same as:
+
+	return !cmpxchg_acquire(&s, 0, 1);
+
+which atomically sets s to 1 if it is currently equal to 0 and returns
+true if it succeeds (the read part of the cmpxchg operation acts as an
+acquire fence only if the operation is successful).  spin_unlock(&s)
+is treated almost the same as:
+
+	smp_store_release(&s, 0);
+
+The "almost" qualifiers above need some explanation.  In the LKMM, the
+store-release in a spin_unlock() and the load-acquire which forms the
+first half of the atomic rmw update in a spin_lock() or a successful
+spin_trylock() -- we can call these things lock-releases and
+lock-acquires -- have two properties beyond those of ordinary releases
+and acquires.
+
+First, when a lock-acquire reads from a lock-release, the LKMM
+requires that every instruction po-before the lock-release must
+execute before any instruction po-after the lock-acquire.  This would
+naturally hold if the release and acquire operations were on different
+CPUs, but the LKMM says it holds even when they are on the same CPU.
+For example:
+
+	int x, y;
+	spinlock_t s;
+
+	P0()
+	{
+		int r1, r2;
+
+		spin_lock(&s);
+		r1 = READ_ONCE(x);
+		spin_unlock(&s);
+		spin_lock(&s);
+		r2 = READ_ONCE(y);
+		spin_unlock(&s);
+	}
+
+	P1()
+	{
+		WRITE_ONCE(y, 1);
+		smp_wmb();
+		WRITE_ONCE(x, 1);
+	}
+
+Here the second spin_lock() reads from the first spin_unlock(), and
+therefore the load of x must execute before the load of y.  Thus we
+cannot have r1 = 1 and r2 = 0 at the end (this is an instance of the
+MP pattern).
+
+This requirement does not apply to ordinary release and acquire
+fences, only to lock-related operations.  For instance, suppose P0()
+in the example had been written as:
+
+	P0()
+	{
+		int r1, r2, r3;
+
+		r1 = READ_ONCE(x);
+		smp_store_release(&s, 1);
+		r3 = smp_load_acquire(&s);
+		r2 = READ_ONCE(y);
+	}
+
+Then the CPU would be allowed to forward the s = 1 value from the
+smp_store_release() to the smp_load_acquire(), executing the
+instructions in the following order:
+
+		r3 = smp_load_acquire(&s);	// Obtains r3 = 1
+		r2 = READ_ONCE(y);
+		r1 = READ_ONCE(x);
+		smp_store_release(&s, 1);	// Value is forwarded
+
+and thus it could load y before x, obtaining r2 = 0 and r1 = 1.
+
+Second, when a lock-acquire reads from a lock-release, and some other
+stores W and W' occur po-before the lock-release and po-after the
+lock-acquire respectively, the LKMM requires that W must propagate to
+each CPU before W' does.  For example, consider:
+
+	int x, y;
+	spinlock_t x;
+
+	P0()
+	{
+		spin_lock(&s);
+		WRITE_ONCE(x, 1);
+		spin_unlock(&s);
+	}
+
+	P1()
+	{
+		int r1;
+
+		spin_lock(&s);
+		r1 = READ_ONCE(x);
+		WRITE_ONCE(y, 1);
+		spin_unlock(&s);
+	}
+
+	P2()
+	{
+		int r2, r3;
+
+		r2 = READ_ONCE(y);
+		smp_rmb();
+		r3 = READ_ONCE(x);
+	}
+
+If r1 = 1 at the end then the spin_lock() in P1 must have read from
+the spin_unlock() in P0.  Hence the store to x must propagate to P2
+before the store to y does, so we cannot have r2 = 1 and r3 = 0.
+
+These two special requirements for lock-release and lock-acquire do
+not arise from the operational model.  Nevertheless, kernel developers
+have come to expect and rely on them because they do hold on all
+architectures supported by the Linux kernel, albeit for various
+differing reasons.
+
+
 ODDS AND ENDS
 -------------
 
@@ -1780,26 +1951,6 @@ they behave as follows:
 	events and the events preceding them against all po-later
 	events.
 
-The LKMM includes locking.  In fact, there is special code for locking
-in the formal model, added in order to make tools run faster.
-However, this special code is intended to be exactly equivalent to
-concepts we have already covered.  A spinlock_t variable is treated
-the same as an int, and spin_lock(&s) is treated the same as:
-
-	while (cmpxchg_acquire(&s, 0, 1) != 0)
-		cpu_relax();
-
-which waits until s is equal to 0 and then atomically sets it to 1,
-and where the read part of the atomic update is also an acquire fence.
-An alternate way to express the same thing would be:
-
-	r = xchg_acquire(&s, 1);
-
-along with a requirement that at the end, r = 0.  spin_unlock(&s) is
-treated the same as:
-
-	smp_store_release(&s, 0);
-
 Interestingly, RCU and locking each introduce the possibility of
 deadlock.  When faced with code sequences such as:
 
diff --git a/tools/memory-model/Documentation/recipes.txt b/tools/memory-model/Documentation/recipes.txt
index ee4309a87fc4..7fe8d7aa3029 100644
--- a/tools/memory-model/Documentation/recipes.txt
+++ b/tools/memory-model/Documentation/recipes.txt
@@ -126,7 +126,7 @@ However, it is not necessarily the case that accesses ordered by
 locking will be seen as ordered by CPUs not holding that lock.
 Consider this example:
 
-	/* See Z6.0+pooncelock+pooncelock+pombonce.litmus. */
+	/* See Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus. */
 	void CPU0(void)
 	{
 		spin_lock(&mylock);
@@ -292,7 +292,7 @@ and to use smp_load_acquire() instead of smp_rmb().  However, the older
 smp_wmb() and smp_rmb() APIs are still heavily used, so it is important
 to understand their use cases.  The general approach is shown below:
 
-	/* See MP+wmbonceonce+rmbonceonce.litmus. */
+	/* See MP+fencewmbonceonce+fencermbonceonce.litmus. */
 	void CPU0(void)
 	{
 		WRITE_ONCE(x, 1);
@@ -311,7 +311,7 @@ The smp_wmb() macro orders prior stores against later stores, and the
 smp_rmb() macro orders prior loads against later loads.  Therefore, if
 the final value of r0 is 1, the final value of r1 must also be 1.
 
-The the xlog_state_switch_iclogs() function in fs/xfs/xfs_log.c contains
+The xlog_state_switch_iclogs() function in fs/xfs/xfs_log.c contains
 the following write-side code fragment:
 
 	log->l_curr_block -= log->l_logBBsize;
@@ -322,9 +322,9 @@ the following write-side code fragment:
 And the xlog_valid_lsn() function in fs/xfs/xfs_log_priv.h contains
 the corresponding read-side code fragment:
 
-	cur_cycle = ACCESS_ONCE(log->l_curr_cycle);
+	cur_cycle = READ_ONCE(log->l_curr_cycle);
 	smp_rmb();
-	cur_block = ACCESS_ONCE(log->l_curr_block);
+	cur_block = READ_ONCE(log->l_curr_block);
 
 Alternatively, consider the following comment in function
 perf_output_put_handle() in kernel/events/ring_buffer.c:
@@ -360,7 +360,7 @@ can be seen in the LB+poonceonces.litmus litmus test.
 One way of avoiding the counter-intuitive outcome is through the use of a
 control dependency paired with a full memory barrier:
 
-	/* See LB+ctrlonceonce+mbonceonce.litmus. */
+	/* See LB+fencembonceonce+ctrlonceonce.litmus. */
 	void CPU0(void)
 	{
 		r0 = READ_ONCE(x);
@@ -476,7 +476,7 @@ that one CPU first stores to one variable and then loads from a second,
 while another CPU stores to the second variable and then loads from the
 first.  Preserving order requires nothing less than full barriers:
 
-	/* See SB+mbonceonces.litmus. */
+	/* See SB+fencembonceonces.litmus. */
 	void CPU0(void)
 	{
 		WRITE_ONCE(x, 1);
diff --git a/tools/memory-model/Documentation/references.txt b/tools/memory-model/Documentation/references.txt
index ba2e34c2ec3f..b177f3e4a614 100644
--- a/tools/memory-model/Documentation/references.txt
+++ b/tools/memory-model/Documentation/references.txt
@@ -63,15 +63,22 @@ o	Shaked Flur, Susmit Sarkar, Christopher Pulte, Kyndylan Nienhuis,
 	Principles of Programming Languages (POPL 2017). ACM, New York,
 	NY, USA, 429–442.
 
+o	Christopher Pulte, Shaked Flur, Will Deacon, Jon French,
+	Susmit Sarkar, and Peter Sewell. 2018. "Simplifying ARM concurrency:
+	multicopy-atomic axiomatic and operational models for ARMv8". In
+	Proceedings of the ACM on Programming Languages, Volume 2, Issue
+	POPL, Article No. 19. ACM, New York, NY, USA.
+
 
 Linux-kernel memory model
 =========================
 
-o	Andrea Parri, Alan Stern, Luc Maranget, Paul E. McKenney,
-	and Jade Alglave.  2017. "A formal model of
-	Linux-kernel memory ordering - companion webpage".
-	http://moscova.inria.fr/∼maranget/cats7/linux/. (2017). [Online;
-	accessed 30-January-2017].
+o	Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
+	Alan Stern.  2018. "Frightening small children and disconcerting
+	grown-ups: Concurrency in the Linux kernel". In Proceedings of
+	the 23rd International Conference on Architectural Support for
+	Programming Languages and Operating Systems (ASPLOS 2018). ACM,
+	New York, NY, USA, 405-418.  Webpage: http://diy.inria.fr/linux/.
 
 o	Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
 	Alan Stern.  2017.  "A formal kernel memory-ordering model (part 1)"
diff --git a/tools/memory-model/README b/tools/memory-model/README
index 0b3a5f3c9ccd..acf9077cffaa 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -20,7 +20,7 @@ that litmus test to be exercised within the Linux kernel.
 REQUIREMENTS
 ============
 
-Version 7.48 of the "herd7" and "klitmus7" tools must be downloaded
+Version 7.49 of the "herd7" and "klitmus7" tools must be downloaded
 separately:
 
   https://github.com/herd/herdtools7
@@ -35,13 +35,13 @@ BASIC USAGE: HERD7
 The memory model is used, in conjunction with "herd7", to exhaustively
 explore the state space of small litmus tests.
 
-For example, to run SB+mbonceonces.litmus against the memory model:
+For example, to run SB+fencembonceonces.litmus against the memory model:
 
-  $ herd7 -conf linux-kernel.cfg litmus-tests/SB+mbonceonces.litmus
+  $ herd7 -conf linux-kernel.cfg litmus-tests/SB+fencembonceonces.litmus
 
 Here is the corresponding output:
 
-  Test SB+mbonceonces Allowed
+  Test SB+fencembonceonces Allowed
   States 3
   0:r0=0; 1:r0=1;
   0:r0=1; 1:r0=0;
@@ -50,8 +50,8 @@ Here is the corresponding output:
   Witnesses
   Positive: 0 Negative: 3
   Condition exists (0:r0=0 /\ 1:r0=0)
-  Observation SB+mbonceonces Never 0 3
-  Time SB+mbonceonces 0.01
+  Observation SB+fencembonceonces Never 0 3
+  Time SB+fencembonceonces 0.01
   Hash=d66d99523e2cac6b06e66f4c995ebb48
 
 The "Positive: 0 Negative: 3" and the "Never 0 3" each indicate that
@@ -67,16 +67,16 @@ BASIC USAGE: KLITMUS7
 The "klitmus7" tool converts a litmus test into a Linux kernel module,
 which may then be loaded and run.
 
-For example, to run SB+mbonceonces.litmus against hardware:
+For example, to run SB+fencembonceonces.litmus against hardware:
 
   $ mkdir mymodules
-  $ klitmus7 -o mymodules litmus-tests/SB+mbonceonces.litmus
+  $ klitmus7 -o mymodules litmus-tests/SB+fencembonceonces.litmus
   $ cd mymodules ; make
   $ sudo sh run.sh
 
 The corresponding output includes:
 
-  Test SB+mbonceonces Allowed
+  Test SB+fencembonceonces Allowed
   Histogram (3 states)
   644580  :>0:r0=1; 1:r0=0;
   644328  :>0:r0=0; 1:r0=1;
@@ -86,8 +86,8 @@ The corresponding output includes:
   Positive: 0, Negative: 2000000
   Condition exists (0:r0=0 /\ 1:r0=0) is NOT validated
   Hash=d66d99523e2cac6b06e66f4c995ebb48
-  Observation SB+mbonceonces Never 0 2000000
-  Time SB+mbonceonces 0.16
+  Observation SB+fencembonceonces Never 0 2000000
+  Time SB+fencembonceonces 0.16
 
 The "Positive: 0 Negative: 2000000" and the "Never 0 2000000" indicate
 that during two million trials, the state specified in this litmus
@@ -171,6 +171,12 @@ The Linux-kernel memory model has the following limitations:
 	particular, the "THE PROGRAM ORDER RELATION: po AND po-loc"
 	and "A WARNING" sections).
 
+	Note that this limitation in turn limits LKMM's ability to
+	accurately model address, control, and data dependencies.
+	For example, if the compiler can deduce the value of some variable
+	carrying a dependency, then the compiler can break that dependency
+	by substituting a constant of that value.
+
 2.	Multiple access sizes for a single variable are not supported,
 	and neither are misaligned or partially overlapping accesses.
 
@@ -190,6 +196,36 @@ The Linux-kernel memory model has the following limitations:
 	However, a substantial amount of support is provided for these
 	operations, as shown in the linux-kernel.def file.
 
+	a.	When rcu_assign_pointer() is passed NULL, the Linux
+		kernel provides no ordering, but LKMM models this
+		case as a store release.
+
+	b.	The "unless" RMW operations are not currently modeled:
+		atomic_long_add_unless(), atomic_add_unless(),
+		atomic_inc_unless_negative(), and
+		atomic_dec_unless_positive().  These can be emulated
+		in litmus tests, for example, by using atomic_cmpxchg().
+
+	c.	The call_rcu() function is not modeled.  It can be
+		emulated in litmus tests by adding another process that
+		invokes synchronize_rcu() and the body of the callback
+		function, with (for example) a release-acquire from
+		the site of the emulated call_rcu() to the beginning
+		of the additional process.
+
+	d.	The rcu_barrier() function is not modeled.  It can be
+		emulated in litmus tests emulating call_rcu() via
+		(for example) a release-acquire from the end of each
+		additional call_rcu() process to the site of the
+		emulated rcu-barrier().
+
+	e.	Sleepable RCU (SRCU) is not modeled.  It can be
+		emulated, but perhaps not simply.
+
+	f.	Reader-writer locking is not modeled.  It can be
+		emulated in litmus tests using atomic read-modify-write
+		operations.
+
 The "herd7" tool has some additional limitations of its own, apart from
 the memory model:
 
@@ -204,3 +240,6 @@ the memory model:
 Some of these limitations may be overcome in the future, but others are
 more likely to be addressed by incorporating the Linux-kernel memory model
 into other tools.
+
+Finally, please note that LKMM is subject to change as hardware, use cases,
+and compilers evolve.
diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell
index 432c7cf71b23..b84fb2f67109 100644
--- a/tools/memory-model/linux-kernel.bell
+++ b/tools/memory-model/linux-kernel.bell
@@ -5,15 +5,15 @@
  * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>,
  *                    Andrea Parri <parri.andrea@gmail.com>
  *
- * An earlier version of this file appears in the companion webpage for
+ * An earlier version of this file appeared in the companion webpage for
  * "Frightening small children and disconcerting grown-ups: Concurrency
  * in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
- * which is to appear in ASPLOS 2018.
+ * which appeared in ASPLOS 2018.
  *)
 
 "Linux-kernel memory consistency model"
 
-enum Accesses = 'once (*READ_ONCE,WRITE_ONCE,ACCESS_ONCE*) ||
+enum Accesses = 'once (*READ_ONCE,WRITE_ONCE*) ||
 		'release (*smp_store_release*) ||
 		'acquire (*smp_load_acquire*) ||
 		'noreturn (* R of non-return RMW *)
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index df97db03b6c2..882fc33274ac 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -5,10 +5,10 @@
  * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>,
  *                    Andrea Parri <parri.andrea@gmail.com>
  *
- * An earlier version of this file appears in the companion webpage for
+ * An earlier version of this file appeared in the companion webpage for
  * "Frightening small children and disconcerting grown-ups: Concurrency
  * in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
- * which is to appear in ASPLOS 2018.
+ * which appeared in ASPLOS 2018.
  *)
 
 "Linux-kernel memory consistency model"
@@ -38,7 +38,7 @@ let strong-fence = mb | gp
 (* Release Acquire *)
 let acq-po = [Acquire] ; po ; [M]
 let po-rel = [M] ; po ; [Release]
-let rfi-rel-acq = [Release] ; rfi ; [Acquire]
+let po-unlock-rf-lock-po = po ; [UL] ; rf ; [LKR] ; po
 
 (**********************************)
 (* Fundamental coherence ordering *)
@@ -60,13 +60,13 @@ let dep = addr | data
 let rwdep = (dep | ctrl) ; [W]
 let overwrite = co | fr
 let to-w = rwdep | (overwrite & int)
-let to-r = addr | (dep ; rfi) | rfi-rel-acq
+let to-r = addr | (dep ; rfi)
 let fence = strong-fence | wmb | po-rel | rmb | acq-po
-let ppo = to-r | to-w | fence
+let ppo = to-r | to-w | fence | (po-unlock-rf-lock-po & int)
 
 (* Propagation: Ordering from release operations and strong fences. *)
 let A-cumul(r) = rfe? ; r
-let cumul-fence = A-cumul(strong-fence | po-rel) | wmb
+let cumul-fence = A-cumul(strong-fence | po-rel) | wmb | po-unlock-rf-lock-po
 let prop = (overwrite & ext)? ; cumul-fence* ; rfe?
 
 (*
@@ -100,22 +100,29 @@ let rscs = po ; crit^-1 ; po?
  * one but two non-rf relations, but only in conjunction with an RCU
  * read-side critical section.
  *)
-let link = hb* ; pb* ; prop
+let rcu-link = hb* ; pb* ; prop
 
-(* Chains that affect the RCU grace-period guarantee *)
-let gp-link = gp ; link
-let rscs-link = rscs ; link
+(*
+ * Any sequence containing at least as many grace periods as RCU read-side
+ * critical sections (joined by rcu-link) acts as a generalized strong fence.
+ *)
+let rec rcu-fence = gp |
+	(gp ; rcu-link ; rscs) |
+	(rscs ; rcu-link ; gp) |
+	(gp ; rcu-link ; rcu-fence ; rcu-link ; rscs) |
+	(rscs ; rcu-link ; rcu-fence ; rcu-link ; gp) |
+	(rcu-fence ; rcu-link ; rcu-fence)
+
+(* rb orders instructions just as pb does *)
+let rb = prop ; rcu-fence ; hb* ; pb*
+
+irreflexive rb as rcu
 
 (*
- * A cycle containing at least as many grace periods as RCU read-side
- * critical sections is forbidden.
+ * The happens-before, propagation, and rcu constraints are all
+ * expressions of temporal ordering.  They could be replaced by
+ * a single constraint on an "executes-before" relation, xb:
+ *
+ * let xb = hb | pb | rb
+ * acyclic xb as executes-before
  *)
-let rec rcu-path =
-	gp-link |
-	(gp-link ; rscs-link) |
-	(rscs-link ; gp-link) |
-	(rcu-path ; rcu-path) |
-	(gp-link ; rcu-path ; rscs-link) |
-	(rscs-link ; rcu-path ; gp-link)
-
-irreflexive rcu-path as rcu
diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def
index 397e4e67e8c8..6fa3eb28d40b 100644
--- a/tools/memory-model/linux-kernel.def
+++ b/tools/memory-model/linux-kernel.def
@@ -1,9 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0+
 //
-// An earlier version of this file appears in the companion webpage for
+// An earlier version of this file appeared in the companion webpage for
 // "Frightening small children and disconcerting grown-ups: Concurrency
 // in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
-// which is to appear in ASPLOS 2018.
+// which appeared in ASPLOS 2018.
 
 // ONCE
 READ_ONCE(X) __load{once}(X)
@@ -14,14 +14,15 @@ smp_store_release(X,V) { __store{release}(*X,V); }
 smp_load_acquire(X) __load{acquire}(*X)
 rcu_assign_pointer(X,V) { __store{release}(X,V); }
 rcu_dereference(X) __load{once}(X)
+smp_store_mb(X,V) { __store{once}(X,V); __fence{mb}; }
 
 // Fences
-smp_mb() { __fence{mb} ; }
-smp_rmb() { __fence{rmb} ; }
-smp_wmb() { __fence{wmb} ; }
-smp_mb__before_atomic() { __fence{before-atomic} ; }
-smp_mb__after_atomic() { __fence{after-atomic} ; }
-smp_mb__after_spinlock() { __fence{after-spinlock} ; }
+smp_mb() { __fence{mb}; }
+smp_rmb() { __fence{rmb}; }
+smp_wmb() { __fence{wmb}; }
+smp_mb__before_atomic() { __fence{before-atomic}; }
+smp_mb__after_atomic() { __fence{after-atomic}; }
+smp_mb__after_spinlock() { __fence{after-spinlock}; }
 
 // Exchange
 xchg(X,V)  __xchg{mb}(X,V)
@@ -34,26 +35,27 @@ cmpxchg_acquire(X,V,W) __cmpxchg{acquire}(X,V,W)
 cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W)
 
 // Spinlocks
-spin_lock(X) { __lock(X) ; }
-spin_unlock(X) { __unlock(X) ; }
+spin_lock(X) { __lock(X); }
+spin_unlock(X) { __unlock(X); }
 spin_trylock(X) __trylock(X)
+spin_is_locked(X) __islocked(X)
 
 // RCU
 rcu_read_lock() { __fence{rcu-lock}; }
-rcu_read_unlock() { __fence{rcu-unlock};}
+rcu_read_unlock() { __fence{rcu-unlock}; }
 synchronize_rcu() { __fence{sync-rcu}; }
 synchronize_rcu_expedited() { __fence{sync-rcu}; }
 
 // Atomic
 atomic_read(X) READ_ONCE(*X)
-atomic_set(X,V) { WRITE_ONCE(*X,V) ; }
+atomic_set(X,V) { WRITE_ONCE(*X,V); }
 atomic_read_acquire(X) smp_load_acquire(X)
 atomic_set_release(X,V) { smp_store_release(X,V); }
 
-atomic_add(V,X) { __atomic_op(X,+,V) ; }
-atomic_sub(V,X) { __atomic_op(X,-,V) ; }
-atomic_inc(X)   { __atomic_op(X,+,1) ; }
-atomic_dec(X)   { __atomic_op(X,-,1) ; }
+atomic_add(V,X) { __atomic_op(X,+,V); }
+atomic_sub(V,X) { __atomic_op(X,-,V); }
+atomic_inc(X)   { __atomic_op(X,+,1); }
+atomic_dec(X)   { __atomic_op(X,-,1); }
 
 atomic_add_return(V,X) __atomic_op_return{mb}(X,+,V)
 atomic_add_return_relaxed(V,X) __atomic_op_return{once}(X,+,V)
diff --git a/tools/memory-model/litmus-tests/.gitignore b/tools/memory-model/litmus-tests/.gitignore
new file mode 100644
index 000000000000..6e2ddc54152f
--- /dev/null
+++ b/tools/memory-model/litmus-tests/.gitignore
@@ -0,0 +1 @@
+*.litmus.out
diff --git a/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus b/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus
index 50d5db9ea983..e729d2776e89 100644
--- a/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus
+++ b/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus
@@ -1,4 +1,4 @@
-C IRIW+mbonceonces+OnceOnce
+C IRIW+fencembonceonces+OnceOnce
 
 (*
  * Result: Never
@@ -7,7 +7,7 @@ C IRIW+mbonceonces+OnceOnce
  * between each pairs of reads.  In other words, is smp_mb() sufficient to
  * cause two different reading processes to agree on the order of a pair
  * of writes, where each write is to a different variable by a different
- * process?
+ * process?  This litmus test exercises LKMM's "propagation" rule.
  *)
 
 {}
diff --git a/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus b/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus
index 7a39a0aaa976..094d58df7789 100644
--- a/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus
+++ b/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus
@@ -1,11 +1,10 @@
-C ISA2+pooncelock+pooncelock+pombonce.litmus
+C ISA2+pooncelock+pooncelock+pombonce
 
 (*
- * Result: Sometimes
+ * Result: Never
  *
- * This test shows that the ordering provided by a lock-protected S
- * litmus test (P0() and P1()) are not visible to external process P2().
- * This is likely to change soon.
+ * This test shows that write-write ordering provided by locks
+ * (in P0() and P1()) is visible to external process P2().
  *)
 
 {}
diff --git a/tools/memory-model/litmus-tests/LB+ctrlonceonce+mbonceonce.litmus b/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus
index de6708229dd1..4727f5aaf03b 100644
--- a/tools/memory-model/litmus-tests/LB+ctrlonceonce+mbonceonce.litmus
+++ b/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus
@@ -1,4 +1,4 @@
-C LB+ctrlonceonce+mbonceonce
+C LB+fencembonceonce+ctrlonceonce
 
 (*
  * Result: Never
diff --git a/tools/memory-model/litmus-tests/MP+wmbonceonce+rmbonceonce.litmus b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
index c078f38ff27a..a273da9faa6d 100644
--- a/tools/memory-model/litmus-tests/MP+wmbonceonce+rmbonceonce.litmus
+++ b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
@@ -1,4 +1,4 @@
-C MP+wmbonceonce+rmbonceonce
+C MP+fencewmbonceonce+fencermbonceonce
 
 (*
  * Result: Never
diff --git a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
new file mode 100644
index 000000000000..50f4d62bbf0e
--- /dev/null
+++ b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
@@ -0,0 +1,35 @@
+C MP+polockmbonce+poacquiresilsil
+
+(*
+ * Result: Never
+ *
+ * Do spinlocks combined with smp_mb__after_spinlock() provide order
+ * to outside observers using spin_is_locked() to sense the lock-held
+ * state, ordered by acquire?  Note that when the first spin_is_locked()
+ * returns false and the second true, we know that the smp_load_acquire()
+ * executed before the lock was acquired (loosely speaking).
+ *)
+
+{
+}
+
+P0(spinlock_t *lo, int *x)
+{
+	spin_lock(lo);
+	smp_mb__after_spinlock();
+	WRITE_ONCE(*x, 1);
+	spin_unlock(lo);
+}
+
+P1(spinlock_t *lo, int *x)
+{
+	int r1;
+	int r2;
+	int r3;
+
+	r1 = smp_load_acquire(x);
+	r2 = spin_is_locked(lo);
+	r3 = spin_is_locked(lo);
+}
+
+exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1)
diff --git a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
new file mode 100644
index 000000000000..abf81e7a0895
--- /dev/null
+++ b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
@@ -0,0 +1,34 @@
+C MP+polockonce+poacquiresilsil
+
+(*
+ * Result: Sometimes
+ *
+ * Do spinlocks provide order to outside observers using spin_is_locked()
+ * to sense the lock-held state, ordered by acquire?  Note that when the
+ * first spin_is_locked() returns false and the second true, we know that
+ * the smp_load_acquire() executed before the lock was acquired (loosely
+ * speaking).
+ *)
+
+{
+}
+
+P0(spinlock_t *lo, int *x)
+{
+	spin_lock(lo);
+	WRITE_ONCE(*x, 1);
+	spin_unlock(lo);
+}
+
+P1(spinlock_t *lo, int *x)
+{
+	int r1;
+	int r2;
+	int r3;
+
+	r1 = smp_load_acquire(x);
+	r2 = spin_is_locked(lo);
+	r3 = spin_is_locked(lo);
+}
+
+exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1)
diff --git a/tools/memory-model/litmus-tests/R+mbonceonces.litmus b/tools/memory-model/litmus-tests/R+fencembonceonces.litmus
index a0e884ad2132..222a0b850b4a 100644
--- a/tools/memory-model/litmus-tests/R+mbonceonces.litmus
+++ b/tools/memory-model/litmus-tests/R+fencembonceonces.litmus
@@ -1,4 +1,4 @@
-C R+mbonceonces
+C R+fencembonceonces
 
 (*
  * Result: Never
diff --git a/tools/memory-model/litmus-tests/README b/tools/memory-model/litmus-tests/README
index 04096fb8b8d9..5ee08f129094 100644
--- a/tools/memory-model/litmus-tests/README
+++ b/tools/memory-model/litmus-tests/README
@@ -1,4 +1,6 @@
-This directory contains the following litmus tests:
+============
+LITMUS TESTS
+============
 
 CoRR+poonceonce+Once.litmus
 	Test of read-read coherence, that is, whether or not two
@@ -18,12 +20,13 @@ CoWW+poonceonce.litmus
 	Test of write-write coherence, that is, whether or not two
 	successive writes to the same variable are ordered.
 
-IRIW+mbonceonces+OnceOnce.litmus
+IRIW+fencembonceonces+OnceOnce.litmus
 	Test of independent reads from independent writes with smp_mb()
 	between each pairs of reads.  In other words, is smp_mb()
 	sufficient to cause two different reading processes to agree on
 	the order of a pair of writes, where each write is to a different
-	variable by a different process?
+	variable by a different process?  This litmus test is forbidden
+	by LKMM's propagation rule.
 
 IRIW+poonceonces+OnceOnce.litmus
 	Test of independent reads from independent writes with nothing
@@ -35,7 +38,7 @@ IRIW+poonceonces+OnceOnce.litmus
 ISA2+pooncelock+pooncelock+pombonce.litmus
 	Tests whether the ordering provided by a lock-protected S
 	litmus test is visible to an external process whose accesses are
-	separated by smp_mb().	This addition of an external process to
+	separated by smp_mb().  This addition of an external process to
 	S is otherwise known as ISA2.
 
 ISA2+poonceonces.litmus
@@ -46,7 +49,7 @@ ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus
 	Can a release-acquire chain order a prior store against
 	a later load?
 
-LB+ctrlonceonce+mbonceonce.litmus
+LB+fencembonceonce+ctrlonceonce.litmus
 	Does a control dependency and an smp_mb() suffice for the
 	load-buffering litmus test, where each process reads from one
 	of two variables then writes to the other?
@@ -63,6 +66,16 @@ LB+poonceonces.litmus
 MP+onceassign+derefonce.litmus
 	As below, but with rcu_assign_pointer() and an rcu_dereference().
 
+MP+polockmbonce+poacquiresilsil.litmus
+	Protect the access with a lock and an smp_mb__after_spinlock()
+	in one process, and use an acquire load followed by a pair of
+	spin_is_locked() calls in the other process.
+
+MP+polockonce+poacquiresilsil.litmus
+	Protect the access with a lock in one process, and use an
+	acquire load followed by a pair of spin_is_locked() calls
+	in the other process.
+
 MP+polocks.litmus
 	As below, but with the second access of the writer process
 	and the first access of reader process protected by a lock.
@@ -77,14 +90,14 @@ MP+porevlocks.litmus
 	As below, but with the first access of the writer process
 	and the second access of reader process protected by a lock.
 
-MP+wmbonceonce+rmbonceonce.litmus
+MP+fencewmbonceonce+fencermbonceonce.litmus
 	Does a smp_wmb() (between the stores) and an smp_rmb() (between
 	the loads) suffice for the message-passing litmus test, where one
 	process writes data and then a flag, and the other process reads
 	the flag and then the data.  (This is similar to the ISA2 tests,
 	but with two processes instead of three.)
 
-R+mbonceonces.litmus
+R+fencembonceonces.litmus
 	This is the fully ordered (via smp_mb()) version of one of
 	the classic counterintuitive litmus tests that illustrates the
 	effects of store propagation delays.
@@ -92,7 +105,7 @@ R+mbonceonces.litmus
 R+poonceonces.litmus
 	As above, but without the smp_mb() invocations.
 
-SB+mbonceonces.litmus
+SB+fencembonceonces.litmus
 	This is the fully ordered (again, via smp_mb() version of store
 	buffering, which forms the core of Dekker's mutual-exclusion
 	algorithm.
@@ -100,17 +113,28 @@ SB+mbonceonces.litmus
 SB+poonceonces.litmus
 	As above, but without the smp_mb() invocations.
 
+SB+rfionceonce-poonceonces.litmus
+	This litmus test demonstrates that LKMM is not fully multicopy
+	atomic.  (Neither is it other multicopy atomic.)  This litmus test
+	also demonstrates the "locations" debugging aid, which designates
+	additional registers and locations to be printed out in the dump
+	of final states in the herd7 output.  Without the "locations"
+	statement, only those registers and locations mentioned in the
+	"exists" clause will be printed.
+
 S+poonceonces.litmus
 	As below, but without the smp_wmb() and acquire load.
 
-S+wmbonceonce+poacquireonce.litmus
+S+fencewmbonceonce+poacquireonce.litmus
 	Can a smp_wmb(), instead of a release, and an acquire order
 	a prior store against a subsequent store?
 
 WRC+poonceonces+Once.litmus
-WRC+pooncerelease+rmbonceonce+Once.litmus
-	These two are members of an extension of the MP litmus-test class
-	in which the first write is moved to a separate process.
+WRC+pooncerelease+fencermbonceonce+Once.litmus
+	These two are members of an extension of the MP litmus-test
+	class in which the first write is moved to a separate process.
+	The second is forbidden because smp_store_release() is
+	A-cumulative in LKMM.
 
 Z6.0+pooncelock+pooncelock+pombonce.litmus
 	Is the ordering provided by a spin_unlock() and a subsequent
@@ -121,7 +145,7 @@ Z6.0+pooncelock+poonceLock+pombonce.litmus
 	As above, but with smp_mb__after_spinlock() immediately
 	following the spin_lock().
 
-Z6.0+pooncerelease+poacquirerelease+mbonceonce.litmus
+Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus
 	Is the ordering provided by a release-acquire chain sufficient
 	to make ordering apparent to accesses by a process that does
 	not participate in that release-acquire chain?
@@ -129,3 +153,101 @@ Z6.0+pooncerelease+poacquirerelease+mbonceonce.litmus
 A great many more litmus tests are available here:
 
 	https://github.com/paulmckrcu/litmus
+
+==================
+LITMUS TEST NAMING
+==================
+
+Litmus tests are usually named based on their contents, which means that
+looking at the name tells you what the litmus test does.  The naming
+scheme covers litmus tests having a single cycle that passes through
+each process exactly once, so litmus tests not fitting this description
+are named on an ad-hoc basis.
+
+The structure of a litmus-test name is the litmus-test class, a plus
+sign ("+"), and one string for each process, separated by plus signs.
+The end of the name is ".litmus".
+
+The litmus-test classes may be found in the infamous test6.pdf:
+https://www.cl.cam.ac.uk/~pes20/ppc-supplemental/test6.pdf
+Each class defines the pattern of accesses and of the variables accessed.
+For example, if the one process writes to a pair of variables, and
+the other process reads from these same variables, the corresponding
+litmus-test class is "MP" (message passing), which may be found on the
+left-hand end of the second row of tests on page one of test6.pdf.
+
+The strings used to identify the actions carried out by each process are
+complex due to a desire to have short(er) names.  Thus, there is a tool to
+generate these strings from a given litmus test's actions.  For example,
+consider the processes from SB+rfionceonce-poonceonces.litmus:
+
+	P0(int *x, int *y)
+	{
+		int r1;
+		int r2;
+
+		WRITE_ONCE(*x, 1);
+		r1 = READ_ONCE(*x);
+		r2 = READ_ONCE(*y);
+	}
+
+	P1(int *x, int *y)
+	{
+		int r3;
+		int r4;
+
+		WRITE_ONCE(*y, 1);
+		r3 = READ_ONCE(*y);
+		r4 = READ_ONCE(*x);
+	}
+
+The next step is to construct a space-separated list of descriptors,
+interleaving descriptions of the relation between a pair of consecutive
+accesses with descriptions of the second access in the pair.
+
+P0()'s WRITE_ONCE() is read by its first READ_ONCE(), which is a
+reads-from link (rf) and internal to the P0() process.  This is
+"rfi", which is an abbreviation for "reads-from internal".  Because
+some of the tools string these abbreviations together with space
+characters separating processes, the first character is capitalized,
+resulting in "Rfi".
+
+P0()'s second access is a READ_ONCE(), as opposed to (for example)
+smp_load_acquire(), so next is "Once".  Thus far, we have "Rfi Once".
+
+P0()'s third access is also a READ_ONCE(), but to y rather than x.
+This is related to P0()'s second access by program order ("po"),
+to a different variable ("d"), and both accesses are reads ("RR").
+The resulting descriptor is "PodRR".  Because P0()'s third access is
+READ_ONCE(), we add another "Once" descriptor.
+
+A from-read ("fre") relation links P0()'s third to P1()'s first
+access, and the resulting descriptor is "Fre".  P1()'s first access is
+WRITE_ONCE(), which as before gives the descriptor "Once".  The string
+thus far is thus "Rfi Once PodRR Once Fre Once".
+
+The remainder of P1() is similar to P0(), which means we add
+"Rfi Once PodRR Once".  Another fre links P1()'s last access to
+P0()'s first access, which is WRITE_ONCE(), so we add "Fre Once".
+The full string is thus:
+
+	Rfi Once PodRR Once Fre Once Rfi Once PodRR Once Fre Once
+
+This string can be given to the "norm7" and "classify7" tools to
+produce the name:
+
+	$ norm7 -bell linux-kernel.bell \
+		Rfi Once PodRR Once Fre Once Rfi Once PodRR Once Fre Once | \
+	  sed -e 's/:.*//g'
+	SB+rfionceonce-poonceonces
+
+Adding the ".litmus" suffix: SB+rfionceonce-poonceonces.litmus
+
+The descriptors that describe connections between consecutive accesses
+within the cycle through a given litmus test can be provided by the herd
+tool (Rfi, Po, Fre, and so on) or by the linux-kernel.bell file (Once,
+Release, Acquire, and so on).
+
+To see the full list of descriptors, execute the following command:
+
+	$ diyone7 -bell linux-kernel.bell -show edges
diff --git a/tools/memory-model/litmus-tests/S+wmbonceonce+poacquireonce.litmus b/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus
index c53350205d28..18479823cd6c 100644
--- a/tools/memory-model/litmus-tests/S+wmbonceonce+poacquireonce.litmus
+++ b/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus
@@ -1,4 +1,4 @@
-C S+wmbonceonce+poacquireonce
+C S+fencewmbonceonce+poacquireonce
 
 (*
  * Result: Never
diff --git a/tools/memory-model/litmus-tests/SB+mbonceonces.litmus b/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus
index 74b874ffa8da..ed5fff18d223 100644
--- a/tools/memory-model/litmus-tests/SB+mbonceonces.litmus
+++ b/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus
@@ -1,4 +1,4 @@
-C SB+mbonceonces
+C SB+fencembonceonces
 
 (*
  * Result: Never
diff --git a/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus b/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus
new file mode 100644
index 000000000000..04a16603660b
--- /dev/null
+++ b/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus
@@ -0,0 +1,32 @@
+C SB+rfionceonce-poonceonces
+
+(*
+ * Result: Sometimes
+ *
+ * This litmus test demonstrates that LKMM is not fully multicopy atomic.
+ *)
+
+{}
+
+P0(int *x, int *y)
+{
+	int r1;
+	int r2;
+
+	WRITE_ONCE(*x, 1);
+	r1 = READ_ONCE(*x);
+	r2 = READ_ONCE(*y);
+}
+
+P1(int *x, int *y)
+{
+	int r3;
+	int r4;
+
+	WRITE_ONCE(*y, 1);
+	r3 = READ_ONCE(*y);
+	r4 = READ_ONCE(*x);
+}
+
+locations [0:r1; 1:r3; x; y] (* Debug aid: Print things not in "exists". *)
+exists (0:r2=0 /\ 1:r4=0)
diff --git a/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus b/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus
index 97fcbffde9a0..e9947250d7de 100644
--- a/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus
+++ b/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus
@@ -1,11 +1,13 @@
-C WRC+pooncerelease+rmbonceonce+Once
+C WRC+pooncerelease+fencermbonceonce+Once
 
 (*
  * Result: Never
  *
  * This litmus test is an extension of the message-passing pattern, where
  * the first write is moved to a separate process.  Because it features
- * a release and a read memory barrier, it should be forbidden.
+ * a release and a read memory barrier, it should be forbidden.  More
+ * specifically, this litmus test is forbidden because smp_store_release()
+ * is A-cumulative in LKMM.
  *)
 
 {}
diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+mbonceonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus
index a20fc3fafb53..88e70b87a683 100644
--- a/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+mbonceonce.litmus
+++ b/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus
@@ -1,4 +1,4 @@
-C Z6.0+pooncerelease+poacquirerelease+mbonceonce
+C Z6.0+pooncerelease+poacquirerelease+fencembonceonce
 
 (*
  * Result: Sometimes
diff --git a/tools/memory-model/lock.cat b/tools/memory-model/lock.cat
index ba4a4ec6d313..305ded17e741 100644
--- a/tools/memory-model/lock.cat
+++ b/tools/memory-model/lock.cat
@@ -4,46 +4,72 @@
  * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>
  *)
 
-(* Generate coherence orders and handle lock operations *)
+(*
+ * Generate coherence orders and handle lock operations
+ *
+ * Warning: spin_is_locked() crashes herd7 versions strictly before 7.48.
+ * spin_is_locked() is functional from herd7 version 7.49.
+ *)
 
 include "cross.cat"
 
-(* From lock reads to their partner lock writes *)
-let lk-rmw = ([LKR] ; po-loc ; [LKW]) \ (po ; po)
-let rmw = rmw | lk-rmw
-
 (*
- * A paired LKR must always see an unlocked value; spin_lock() calls nested
- * inside a critical section (for the same lock) always deadlock.
+ * The lock-related events generated by herd are as follows:
+ *
+ * LKR		Lock-Read: the read part of a spin_lock() or successful
+ *			spin_trylock() read-modify-write event pair
+ * LKW		Lock-Write: the write part of a spin_lock() or successful
+ *			spin_trylock() RMW event pair
+ * UL		Unlock: a spin_unlock() event
+ * LF		Lock-Fail: a failed spin_trylock() event
+ * RL		Read-Locked: a spin_is_locked() event which returns True
+ * RU		Read-Unlocked: a spin_is_locked() event which returns False
+ *
+ * LKR and LKW events always come paired, like all RMW event sequences.
+ *
+ * LKR, LF, RL, and RU are read events; LKR has Acquire ordering.
+ * LKW and UL are write events; UL has Release ordering.
+ * LKW, LF, RL, and RU have no ordering properties.
  *)
-empty ([LKW] ; po-loc ; [domain(lk-rmw)]) \ (po-loc ; [UL] ; po-loc)
-	as lock-nest
 
-(* The litmus test is invalid if an LKW event is not part of an RMW pair *)
-flag ~empty LKW \ range(lk-rmw) as unpaired-LKW
+(* Backward compatibility *)
+let RL = try RL with emptyset
+let RU = try RU with emptyset
 
-(* This will be allowed if we implement spin_is_locked() *)
-flag ~empty LKR \ domain(lk-rmw) as unpaired-LKR
+(* Treat RL as a kind of LF: a read with no ordering properties *)
+let LF = LF | RL
 
-(* There should be no R or W accesses to spinlocks *)
-let ALL-LOCKS = LKR | LKW | UL | LF
+(* There should be no ordinary R or W accesses to spinlocks *)
+let ALL-LOCKS = LKR | LKW | UL | LF | RU
 flag ~empty [M \ IW] ; loc ; [ALL-LOCKS] as mixed-lock-accesses
 
+(* Link Lock-Reads to their RMW-partner Lock-Writes *)
+let lk-rmw = ([LKR] ; po-loc ; [LKW]) \ (po ; po)
+let rmw = rmw | lk-rmw
+
+(* The litmus test is invalid if an LKR/LKW event is not part of an RMW pair *)
+flag ~empty LKW \ range(lk-rmw) as unpaired-LKW
+flag ~empty LKR \ domain(lk-rmw) as unpaired-LKR
+
+(*
+ * An LKR must always see an unlocked value; spin_lock() calls nested
+ * inside a critical section (for the same lock) always deadlock.
+ *)
+empty ([LKW] ; po-loc ; [LKR]) \ (po-loc ; [UL] ; po-loc) as lock-nest
+
 (* The final value of a spinlock should not be tested *)
 flag ~empty [FW] ; loc ; [ALL-LOCKS] as lock-final
 
-
 (*
  * Put lock operations in their appropriate classes, but leave UL out of W
  * until after the co relation has been generated.
  *)
-let R = R | LKR | LF
+let R = R | LKR | LF | RU
 let W = W | LKW
 
 let Release = Release | UL
 let Acquire = Acquire | LKR
 
-
 (* Match LKW events to their corresponding UL events *)
 let critical = ([LKW] ; po-loc ; [UL]) \ (po-loc ; [LKW | UL] ; po-loc)
 
@@ -53,27 +79,48 @@ flag ~empty UL \ range(critical) as unmatched-unlock
 let UNMATCHED-LKW = LKW \ domain(critical)
 empty ([UNMATCHED-LKW] ; loc ; [UNMATCHED-LKW]) \ id as unmatched-locks
 
-
 (* rfi for LF events: link each LKW to the LF events in its critical section *)
 let rfi-lf = ([LKW] ; po-loc ; [LF]) \ ([LKW] ; po-loc ; [UL] ; po-loc)
 
 (* rfe for LF events *)
 let all-possible-rfe-lf =
-  (*
-   * Given an LF event r, compute the possible rfe edges for that event
-   * (all those starting from LKW events in other threads),
-   * and then convert that relation to a set of single-edge relations.
-   *)
-  let possible-rfe-lf r =
-    let pair-to-relation p = p ++ 0
-    in map pair-to-relation ((LKW * {r}) & loc & ext)
-  (* Do this for each LF event r that isn't in rfi-lf *)
-  in map possible-rfe-lf (LF \ range(rfi-lf))
+	(*
+	 * Given an LF event r, compute the possible rfe edges for that event
+	 * (all those starting from LKW events in other threads),
+	 * and then convert that relation to a set of single-edge relations.
+	 *)
+	let possible-rfe-lf r =
+		let pair-to-relation p = p ++ 0
+		in map pair-to-relation ((LKW * {r}) & loc & ext)
+	(* Do this for each LF event r that isn't in rfi-lf *)
+	in map possible-rfe-lf (LF \ range(rfi-lf))
 
 (* Generate all rf relations for LF events *)
 with rfe-lf from cross(all-possible-rfe-lf)
-let rf = rf | rfi-lf | rfe-lf
+let rf-lf = rfe-lf | rfi-lf
+
+(*
+ * RU, i.e., spin_is_locked() returning False, is slightly different.
+ * We rely on the memory model to rule out cases where spin_is_locked()
+ * within one of the lock's critical sections returns False.
+ *)
+
+(* rfi for RU events: an RU may read from the last po-previous UL *)
+let rfi-ru = ([UL] ; po-loc ; [RU]) \ ([UL] ; po-loc ; [LKW] ; po-loc)
+
+(* rfe for RU events: an RU may read from an external UL or the initial write *)
+let all-possible-rfe-ru =
+	let possible-rfe-ru r =
+		let pair-to-relation p = p ++ 0
+		in map pair-to-relation (((UL | IW) * {r}) & loc & ext)
+	in map possible-rfe-ru RU
+
+(* Generate all rf relations for RU events *)
+with rfe-ru from cross(all-possible-rfe-ru)
+let rf-ru = rfe-ru | rfi-ru
 
+(* Final rf relation *)
+let rf = rf | rf-lf | rf-ru
 
 (* Generate all co relations, including LKW events but not UL *)
 let co0 = co0 | ([IW] ; loc ; [LKW]) |
diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh
new file mode 100755
index 000000000000..ca528f9a24d4
--- /dev/null
+++ b/tools/memory-model/scripts/checkalllitmus.sh
@@ -0,0 +1,73 @@
+#!/bin/sh
+#
+# Run herd tests on all .litmus files in the specified directory (which
+# defaults to litmus-tests) and check each file's result against a "Result:"
+# comment within that litmus test.  If the verification result does not
+# match that specified in the litmus test, this script prints an error
+# message prefixed with "^^^".  It also outputs verification results to
+# a file whose name is that of the specified litmus test, but with ".out"
+# appended.
+#
+# Usage:
+#	checkalllitmus.sh [ directory ]
+#
+# The LINUX_HERD_OPTIONS environment variable may be used to specify
+# arguments to herd, whose default is defined by the checklitmus.sh script.
+# Thus, one would normally run this in the directory containing the memory
+# model, specifying the pathname of the litmus test to check.
+#
+# This script makes no attempt to run the litmus tests concurrently.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright IBM Corporation, 2018
+#
+# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+litmusdir=${1-litmus-tests}
+if test -d "$litmusdir" -a -r "$litmusdir" -a -x "$litmusdir"
+then
+	:
+else
+	echo ' --- ' error: $litmusdir is not an accessible directory
+	exit 255
+fi
+
+# Find the checklitmus script.  If it is not where we expect it, then
+# assume that the caller has the PATH environment variable set
+# appropriately.
+if test -x scripts/checklitmus.sh
+then
+	clscript=scripts/checklitmus.sh
+else
+	clscript=checklitmus.sh
+fi
+
+# Run the script on all the litmus tests in the specified directory
+ret=0
+for i in litmus-tests/*.litmus
+do
+	if ! $clscript $i
+	then
+		ret=1
+	fi
+done
+if test "$ret" -ne 0
+then
+	echo " ^^^ VERIFICATION MISMATCHES"
+else
+	echo All litmus tests verified as was expected.
+fi
+exit $ret
diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh
new file mode 100755
index 000000000000..bf12a75c0719
--- /dev/null
+++ b/tools/memory-model/scripts/checklitmus.sh
@@ -0,0 +1,86 @@
+#!/bin/sh
+#
+# Run a herd test and check the result against a "Result:" comment within
+# the litmus test.  If the verification result does not match that specified
+# in the litmus test, this script prints an error message prefixed with
+# "^^^" and exits with a non-zero status.  It also outputs verification
+# results to a file whose name is that of the specified litmus test, but
+# with ".out" appended.
+#
+# Usage:
+#	checklitmus.sh file.litmus
+#
+# The LINUX_HERD_OPTIONS environment variable may be used to specify
+# arguments to herd, which default to "-conf linux-kernel.cfg".  Thus,
+# one would normally run this in the directory containing the memory model,
+# specifying the pathname of the litmus test to check.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright IBM Corporation, 2018
+#
+# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+litmus=$1
+herdoptions=${LINUX_HERD_OPTIONS--conf linux-kernel.cfg}
+
+if test -f "$litmus" -a -r "$litmus"
+then
+	:
+else
+	echo ' --- ' error: \"$litmus\" is not a readable file
+	exit 255
+fi
+if grep -q '^ \* Result: ' $litmus
+then
+	outcome=`grep -m 1 '^ \* Result: ' $litmus | awk '{ print $3 }'`
+else
+	outcome=specified
+fi
+
+echo Herd options: $herdoptions > $litmus.out
+/usr/bin/time herd7 -o ~/tmp $herdoptions $litmus >> $litmus.out 2>&1
+grep "Herd options:" $litmus.out
+grep '^Observation' $litmus.out
+if grep -q '^Observation' $litmus.out
+then
+	:
+else
+	cat $litmus.out
+	echo ' ^^^ Verification error'
+	echo ' ^^^ Verification error' >> $litmus.out 2>&1
+	exit 255
+fi
+if test "$outcome" = DEADLOCK
+then
+	echo grep 3 and 4
+	if grep '^Observation' $litmus.out | grep -q 'Never 0 0$'
+	then
+		ret=0
+	else
+		echo " ^^^ Unexpected non-$outcome verification"
+		echo " ^^^ Unexpected non-$outcome verification" >> $litmus.out 2>&1
+		ret=1
+	fi
+elif grep '^Observation' $litmus.out | grep -q $outcome || test "$outcome" = Maybe
+then
+	ret=0
+else
+	echo " ^^^ Unexpected non-$outcome verification"
+	echo " ^^^ Unexpected non-$outcome verification" >> $litmus.out 2>&1
+	ret=1
+fi
+tail -2 $litmus.out | head -1
+exit $ret
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index f76d9914686a..c9d038f91af6 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -31,8 +31,8 @@ INCLUDES := -I$(srctree)/tools/include \
 	    -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
 	    -I$(srctree)/tools/objtool/arch/$(ARCH)/include
 WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed
-CFLAGS   += -Werror $(WARNINGS) $(HOSTCFLAGS) -g $(INCLUDES)
-LDFLAGS  += -lelf $(LIBSUBCMD) $(HOSTLDFLAGS)
+CFLAGS   += -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES)
+LDFLAGS  += -lelf $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
 
 # Allow old libelf to be used:
 elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(CC) $(CFLAGS) -x c -E - | grep elf_getshdr)
diff --git a/tools/objtool/arch/x86/include/asm/orc_types.h b/tools/objtool/arch/x86/include/asm/orc_types.h
index 9c9dc579bd7d..46f516dd80ce 100644
--- a/tools/objtool/arch/x86/include/asm/orc_types.h
+++ b/tools/objtool/arch/x86/include/asm/orc_types.h
@@ -88,6 +88,7 @@ struct orc_entry {
 	unsigned	sp_reg:4;
 	unsigned	bp_reg:4;
 	unsigned	type:2;
+	unsigned	end:1;
 } __packed;
 
 /*
@@ -101,6 +102,7 @@ struct unwind_hint {
 	s16		sp_offset;
 	u8		sp_reg;
 	u8		type;
+	u8		end;
 };
 #endif /* __ASSEMBLY__ */
 
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 3a31b238f885..0414a0d52262 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -164,6 +164,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
 		"lbug_with_loc",
 		"fortify_panic",
 		"usercopy_abort",
+		"machine_real_restart",
 	};
 
 	if (func->bind == STB_WEAK)
@@ -543,6 +544,28 @@ static int add_jump_destinations(struct objtool_file *file)
 				  dest_off);
 			return -1;
 		}
+
+		/*
+		 * For GCC 8+, create parent/child links for any cold
+		 * subfunctions.  This is _mostly_ redundant with a similar
+		 * initialization in read_symbols().
+		 *
+		 * If a function has aliases, we want the *first* such function
+		 * in the symbol table to be the subfunction's parent.  In that
+		 * case we overwrite the initialization done in read_symbols().
+		 *
+		 * However this code can't completely replace the
+		 * read_symbols() code because this doesn't detect the case
+		 * where the parent function's only reference to a subfunction
+		 * is through a switch table.
+		 */
+		if (insn->func && insn->jump_dest->func &&
+		    insn->func != insn->jump_dest->func &&
+		    !strstr(insn->func->name, ".cold.") &&
+		    strstr(insn->jump_dest->func->name, ".cold.")) {
+			insn->func->cfunc = insn->jump_dest->func;
+			insn->jump_dest->func->pfunc = insn->func;
+		}
 	}
 
 	return 0;
@@ -813,7 +836,7 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn,
 	struct symbol *pfunc = insn->func->pfunc;
 	unsigned int prev_offset = 0;
 
-	list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
+	list_for_each_entry_from(rela, &table->rela_sec->rela_list, list) {
 		if (rela == next_table)
 			break;
 
@@ -903,6 +926,7 @@ static struct rela *find_switch_table(struct objtool_file *file,
 {
 	struct rela *text_rela, *rodata_rela;
 	struct instruction *orig_insn = insn;
+	struct section *rodata_sec;
 	unsigned long table_offset;
 
 	/*
@@ -930,10 +954,13 @@ static struct rela *find_switch_table(struct objtool_file *file,
 		/* look for a relocation which references .rodata */
 		text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
 						    insn->len);
-		if (!text_rela || text_rela->sym != file->rodata->sym)
+		if (!text_rela || text_rela->sym->type != STT_SECTION ||
+		    !text_rela->sym->sec->rodata)
 			continue;
 
 		table_offset = text_rela->addend;
+		rodata_sec = text_rela->sym->sec;
+
 		if (text_rela->type == R_X86_64_PC32)
 			table_offset += 4;
 
@@ -941,10 +968,10 @@ static struct rela *find_switch_table(struct objtool_file *file,
 		 * Make sure the .rodata address isn't associated with a
 		 * symbol.  gcc jump tables are anonymous data.
 		 */
-		if (find_symbol_containing(file->rodata, table_offset))
+		if (find_symbol_containing(rodata_sec, table_offset))
 			continue;
 
-		rodata_rela = find_rela_by_dest(file->rodata, table_offset);
+		rodata_rela = find_rela_by_dest(rodata_sec, table_offset);
 		if (rodata_rela) {
 			/*
 			 * Use of RIP-relative switch jumps is quite rare, and
@@ -1029,7 +1056,7 @@ static int add_switch_table_alts(struct objtool_file *file)
 	struct symbol *func;
 	int ret;
 
-	if (!file->rodata || !file->rodata->rela)
+	if (!file->rodata)
 		return 0;
 
 	for_each_sec(file, sec) {
@@ -1134,6 +1161,7 @@ static int read_unwind_hints(struct objtool_file *file)
 
 		cfa->offset = hint->sp_offset;
 		insn->state.type = hint->type;
+		insn->state.end = hint->end;
 	}
 
 	return 0;
@@ -1174,10 +1202,33 @@ static int read_retpoline_hints(struct objtool_file *file)
 	return 0;
 }
 
+static void mark_rodata(struct objtool_file *file)
+{
+	struct section *sec;
+	bool found = false;
+
+	/*
+	 * This searches for the .rodata section or multiple .rodata.func_name
+	 * sections if -fdata-sections is being used. The .str.1.1 and .str.1.8
+	 * rodata sections are ignored as they don't contain jump tables.
+	 */
+	for_each_sec(file, sec) {
+		if (!strncmp(sec->name, ".rodata", 7) &&
+		    !strstr(sec->name, ".str1.")) {
+			sec->rodata = true;
+			found = true;
+		}
+	}
+
+	file->rodata = found;
+}
+
 static int decode_sections(struct objtool_file *file)
 {
 	int ret;
 
+	mark_rodata(file);
+
 	ret = decode_instructions(file);
 	if (ret)
 		return ret;
@@ -2147,7 +2198,6 @@ int check(const char *_objname, bool orc)
 	INIT_LIST_HEAD(&file.insn_list);
 	hash_init(file.insn_hash);
 	file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
-	file.rodata = find_section_by_name(file.elf, ".rodata");
 	file.c_file = find_section_by_name(file.elf, ".comment");
 	file.ignore_unreachables = no_unreachable;
 	file.hints = false;
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index c6b68fcb926f..e6e8a655b556 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -31,7 +31,7 @@ struct insn_state {
 	int stack_size;
 	unsigned char type;
 	bool bp_scratch;
-	bool drap;
+	bool drap, end;
 	int drap_reg, drap_offset;
 	struct cfi_reg vals[CFI_NUM_REGS];
 };
@@ -60,8 +60,8 @@ struct objtool_file {
 	struct elf *elf;
 	struct list_head insn_list;
 	DECLARE_HASHTABLE(insn_hash, 16);
-	struct section *rodata, *whitelist;
-	bool ignore_unreachables, c_file, hints;
+	struct section *whitelist;
+	bool ignore_unreachables, c_file, hints, rodata;
 };
 
 int check(const char *objname, bool orc);
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 4e60e105583e..b8f3cca8e58b 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -31,6 +31,8 @@
 #include "elf.h"
 #include "warn.h"
 
+#define MAX_NAME_LEN 128
+
 struct section *find_section_by_name(struct elf *elf, const char *name)
 {
 	struct section *sec;
@@ -298,23 +300,47 @@ static int read_symbols(struct elf *elf)
 	/* Create parent/child links for any cold subfunctions */
 	list_for_each_entry(sec, &elf->sections, list) {
 		list_for_each_entry(sym, &sec->symbol_list, list) {
+			char pname[MAX_NAME_LEN + 1];
+			size_t pnamelen;
 			if (sym->type != STT_FUNC)
 				continue;
 			sym->pfunc = sym->cfunc = sym;
-			coldstr = strstr(sym->name, ".cold.");
-			if (coldstr) {
-				coldstr[0] = '\0';
-				pfunc = find_symbol_by_name(elf, sym->name);
-				coldstr[0] = '.';
-
-				if (!pfunc) {
-					WARN("%s(): can't find parent function",
-					     sym->name);
-					goto err;
-				}
-
-				sym->pfunc = pfunc;
-				pfunc->cfunc = sym;
+			coldstr = strstr(sym->name, ".cold");
+			if (!coldstr)
+				continue;
+
+			pnamelen = coldstr - sym->name;
+			if (pnamelen > MAX_NAME_LEN) {
+				WARN("%s(): parent function name exceeds maximum length of %d characters",
+				     sym->name, MAX_NAME_LEN);
+				return -1;
+			}
+
+			strncpy(pname, sym->name, pnamelen);
+			pname[pnamelen] = '\0';
+			pfunc = find_symbol_by_name(elf, pname);
+
+			if (!pfunc) {
+				WARN("%s(): can't find parent function",
+				     sym->name);
+				return -1;
+			}
+
+			sym->pfunc = pfunc;
+			pfunc->cfunc = sym;
+
+			/*
+			 * Unfortunately, -fnoreorder-functions puts the child
+			 * inside the parent.  Remove the overlap so we can
+			 * have sane assumptions.
+			 *
+			 * Note that pfunc->len now no longer matches
+			 * pfunc->sym.st_size.
+			 */
+			if (sym->sec == pfunc->sec &&
+			    sym->offset >= pfunc->offset &&
+			    sym->offset + sym->len == pfunc->offset + pfunc->len) {
+				pfunc->len -= sym->len;
 			}
 		}
 	}
@@ -364,6 +390,7 @@ static int read_relas(struct elf *elf)
 			rela->offset = rela->rela.r_offset;
 			symndx = GELF_R_SYM(rela->rela.r_info);
 			rela->sym = find_symbol_by_index(elf, symndx);
+			rela->rela_sec = sec;
 			if (!rela->sym) {
 				WARN("can't find rela entry symbol %d for %s",
 				     symndx, sec->name);
@@ -504,10 +531,12 @@ struct section *elf_create_section(struct elf *elf, const char *name,
 	sec->sh.sh_flags = SHF_ALLOC;
 
 
-	/* Add section name to .shstrtab */
+	/* Add section name to .shstrtab (or .strtab for Clang) */
 	shstrtab = find_section_by_name(elf, ".shstrtab");
+	if (!shstrtab)
+		shstrtab = find_section_by_name(elf, ".strtab");
 	if (!shstrtab) {
-		WARN("can't find .shstrtab section");
+		WARN("can't find .shstrtab or .strtab section");
 		return NULL;
 	}
 
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index de5cd2ddded9..bc97ed86b9cd 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -48,7 +48,7 @@ struct section {
 	char *name;
 	int idx;
 	unsigned int len;
-	bool changed, text;
+	bool changed, text, rodata;
 };
 
 struct symbol {
@@ -68,6 +68,7 @@ struct rela {
 	struct list_head list;
 	struct hlist_node hash;
 	GElf_Rela rela;
+	struct section *rela_sec;
 	struct symbol *sym;
 	unsigned int type;
 	unsigned long offset;
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
index c3343820916a..faa444270ee3 100644
--- a/tools/objtool/orc_dump.c
+++ b/tools/objtool/orc_dump.c
@@ -203,7 +203,8 @@ int orc_dump(const char *_objname)
 
 		print_reg(orc[i].bp_reg, orc[i].bp_offset);
 
-		printf(" type:%s\n", orc_type_name(orc[i].type));
+		printf(" type:%s end:%d\n",
+		       orc_type_name(orc[i].type), orc[i].end);
 	}
 
 	elf_end(elf);
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 18384d9be4e1..3f98dcfbc177 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -31,6 +31,8 @@ int create_orc(struct objtool_file *file)
 		struct cfi_reg *cfa = &insn->state.cfa;
 		struct cfi_reg *bp = &insn->state.regs[CFI_BP];
 
+		orc->end = insn->state.end;
+
 		if (cfa->base == CFI_UNDEFINED) {
 			orc->sp_reg = ORC_REG_UNDEFINED;
 			continue;
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index 84f001d52322..50af4e1274b3 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -30,9 +30,9 @@
 #define EX_ORIG_OFFSET		0
 #define EX_NEW_OFFSET		4
 
-#define JUMP_ENTRY_SIZE		24
+#define JUMP_ENTRY_SIZE		16
 #define JUMP_ORIG_OFFSET	0
-#define JUMP_NEW_OFFSET		8
+#define JUMP_NEW_OFFSET		4
 
 #define ALT_ENTRY_SIZE		13
 #define ALT_ORIG_OFFSET		0
diff --git a/tools/pci/Build b/tools/pci/Build
new file mode 100644
index 000000000000..c375aea21790
--- /dev/null
+++ b/tools/pci/Build
@@ -0,0 +1 @@
+pcitest-y += pcitest.o
diff --git a/tools/pci/Makefile b/tools/pci/Makefile
new file mode 100644
index 000000000000..46e4c2f318c9
--- /dev/null
+++ b/tools/pci/Makefile
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../scripts/Makefile.include
+
+bindir ?= /usr/bin
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+# Do not use make's built-in rules
+# (this improves performance and avoids hard-to-debug behaviour);
+MAKEFLAGS += -r
+
+CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
+
+ALL_TARGETS := pcitest pcitest.sh
+ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
+
+all: $(ALL_PROGRAMS)
+
+export srctree OUTPUT CC LD CFLAGS
+include $(srctree)/tools/build/Makefile.include
+
+#
+# We need the following to be outside of kernel tree
+#
+$(OUTPUT)include/linux/: ../../include/uapi/linux/
+	mkdir -p $(OUTPUT)include/linux/ 2>&1 || true
+	ln -sf $(CURDIR)/../../include/uapi/linux/pcitest.h $@
+
+prepare: $(OUTPUT)include/linux/
+
+PCITEST_IN := $(OUTPUT)pcitest-in.o
+$(PCITEST_IN): prepare FORCE
+	$(Q)$(MAKE) $(build)=pcitest
+$(OUTPUT)pcitest: $(PCITEST_IN)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+clean:
+	rm -f $(ALL_PROGRAMS)
+	rm -rf $(OUTPUT)include/
+	find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
+
+install: $(ALL_PROGRAMS)
+	install -d -m 755 $(DESTDIR)$(bindir);		\
+	for program in $(ALL_PROGRAMS); do		\
+		install $$program $(DESTDIR)$(bindir);	\
+	done
+
+FORCE:
+
+.PHONY: all install clean FORCE prepare
diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c
index 9074b477bff0..ec4d51f3308b 100644
--- a/tools/pci/pcitest.c
+++ b/tools/pci/pcitest.c
@@ -23,7 +23,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/ioctl.h>
-#include <time.h>
 #include <unistd.h>
 
 #include <linux/pcitest.h>
@@ -31,29 +30,32 @@
 #define BILLION 1E9
 
 static char *result[] = { "NOT OKAY", "OKAY" };
+static char *irq[] = { "LEGACY", "MSI", "MSI-X" };
 
 struct pci_test {
 	char		*device;
 	char		barnum;
 	bool		legacyirq;
 	unsigned int	msinum;
+	unsigned int	msixnum;
+	int		irqtype;
+	bool		set_irqtype;
+	bool		get_irqtype;
 	bool		read;
 	bool		write;
 	bool		copy;
 	unsigned long	size;
 };
 
-static int run_test(struct pci_test *test)
+static void run_test(struct pci_test *test)
 {
 	long ret;
 	int fd;
-	struct timespec start, end;
-	double time;
 
 	fd = open(test->device, O_RDWR);
 	if (fd < 0) {
 		perror("can't open PCI Endpoint Test device");
-		return fd;
+		return;
 	}
 
 	if (test->barnum >= 0 && test->barnum <= 5) {
@@ -65,6 +67,24 @@ static int run_test(struct pci_test *test)
 			fprintf(stdout, "%s\n", result[ret]);
 	}
 
+	if (test->set_irqtype) {
+		ret = ioctl(fd, PCITEST_SET_IRQTYPE, test->irqtype);
+		fprintf(stdout, "SET IRQ TYPE TO %s:\t\t", irq[test->irqtype]);
+		if (ret < 0)
+			fprintf(stdout, "FAILED\n");
+		else
+			fprintf(stdout, "%s\n", result[ret]);
+	}
+
+	if (test->get_irqtype) {
+		ret = ioctl(fd, PCITEST_GET_IRQTYPE);
+		fprintf(stdout, "GET IRQ TYPE:\t\t");
+		if (ret < 0)
+			fprintf(stdout, "FAILED\n");
+		else
+			fprintf(stdout, "%s\n", irq[ret]);
+	}
+
 	if (test->legacyirq) {
 		ret = ioctl(fd, PCITEST_LEGACY_IRQ, 0);
 		fprintf(stdout, "LEGACY IRQ:\t");
@@ -83,6 +103,15 @@ static int run_test(struct pci_test *test)
 			fprintf(stdout, "%s\n", result[ret]);
 	}
 
+	if (test->msixnum > 0 && test->msixnum <= 2048) {
+		ret = ioctl(fd, PCITEST_MSIX, test->msixnum);
+		fprintf(stdout, "MSI-X%d:\t\t", test->msixnum);
+		if (ret < 0)
+			fprintf(stdout, "TEST FAILED\n");
+		else
+			fprintf(stdout, "%s\n", result[ret]);
+	}
+
 	if (test->write) {
 		ret = ioctl(fd, PCITEST_WRITE, test->size);
 		fprintf(stdout, "WRITE (%7ld bytes):\t\t", test->size);
@@ -133,7 +162,7 @@ int main(int argc, char **argv)
 	/* set default endpoint device */
 	test->device = "/dev/pci-endpoint-test.0";
 
-	while ((c = getopt(argc, argv, "D:b:m:lrwcs:")) != EOF)
+	while ((c = getopt(argc, argv, "D:b:m:x:i:Ilrwcs:")) != EOF)
 	switch (c) {
 	case 'D':
 		test->device = optarg;
@@ -151,6 +180,20 @@ int main(int argc, char **argv)
 		if (test->msinum < 1 || test->msinum > 32)
 			goto usage;
 		continue;
+	case 'x':
+		test->msixnum = atoi(optarg);
+		if (test->msixnum < 1 || test->msixnum > 2048)
+			goto usage;
+		continue;
+	case 'i':
+		test->irqtype = atoi(optarg);
+		if (test->irqtype < 0 || test->irqtype > 2)
+			goto usage;
+		test->set_irqtype = true;
+		continue;
+	case 'I':
+		test->get_irqtype = true;
+		continue;
 	case 'r':
 		test->read = true;
 		continue;
@@ -173,6 +216,9 @@ usage:
 			"\t-D <dev>		PCI endpoint test device {default: /dev/pci-endpoint-test.0}\n"
 			"\t-b <bar num>		BAR test (bar number between 0..5)\n"
 			"\t-m <msi num>		MSI test (msi number between 1..32)\n"
+			"\t-x <msix num>	\tMSI-X test (msix number between 1..2048)\n"
+			"\t-i <irq type>	\tSet IRQ type (0 - Legacy, 1 - MSI, 2 - MSI-X)\n"
+			"\t-I			Get current IRQ type configured\n"
 			"\t-l			Legacy IRQ test\n"
 			"\t-r			Read buffer test\n"
 			"\t-w			Write buffer test\n"
diff --git a/tools/pci/pcitest.sh b/tools/pci/pcitest.sh
index 77e8c85ef744..75ed48ff2990 100644
--- a/tools/pci/pcitest.sh
+++ b/tools/pci/pcitest.sh
@@ -16,7 +16,10 @@ echo
 echo "Interrupt tests"
 echo
 
+pcitest -i 0
 pcitest -l
+
+pcitest -i 1
 msi=1
 
 while [ $msi -lt 33 ]
@@ -26,9 +29,21 @@ do
 done
 echo
 
+pcitest -i 2
+msix=1
+
+while [ $msix -lt 2049 ]
+do
+        pcitest -x $msix
+        msix=`expr $msix + 1`
+done
+echo
+
 echo "Read Tests"
 echo
 
+pcitest -i 1
+
 pcitest -r -s 1
 pcitest -r -s 1024
 pcitest -r -s 1025
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index db11478e30b4..ac841bc5c35b 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -47,7 +47,8 @@ man5dir=$(mandir)/man5
 man7dir=$(mandir)/man7
 
 ASCIIDOC=asciidoc
-ASCIIDOC_EXTRA = --unsafe
+ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf
+ASCIIDOC_HTML = xhtml11
 MANPAGE_XSL = manpage-normal.xsl
 XMLTO_EXTRA =
 INSTALL?=install
@@ -55,6 +56,14 @@ RM ?= rm -f
 DOC_REF = origin/man
 HTML_REF = origin/html
 
+ifdef USE_ASCIIDOCTOR
+ASCIIDOC = asciidoctor
+ASCIIDOC_EXTRA = -a compat-mode
+ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions
+ASCIIDOC_EXTRA += -a mansource="perf" -a manmanual="perf Manual"
+ASCIIDOC_HTML = xhtml5
+endif
+
 infodir?=$(prefix)/share/info
 MAKEINFO=makeinfo
 INSTALL_INFO=install-info
@@ -73,10 +82,12 @@ ifeq ($(_tmp_tool_path),)
 	missing_tools = $(ASCIIDOC)
 endif
 
+ifndef USE_ASCIIDOCTOR
 _tmp_tool_path := $(call get-executable,$(XMLTO))
 ifeq ($(_tmp_tool_path),)
 	missing_tools += $(XMLTO)
 endif
+endif
 
 #
 # For asciidoc ...
@@ -264,9 +275,17 @@ clean:
 
 $(MAN_HTML): $(OUTPUT)%.html : %.txt
 	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
-	$(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \
+	$(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \
+		$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
+	mv $@+ $@
+
+ifdef USE_ASCIIDOCTOR
+$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : %.txt
+	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+	$(ASCIIDOC) -b manpage -d manpage \
 		$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
 	mv $@+ $@
+endif
 
 $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
 	$(QUIET_XMLTO)$(RM) $@ && \
@@ -274,7 +293,7 @@ $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
 
 $(OUTPUT)%.xml : %.txt
 	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
-	$(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \
+	$(ASCIIDOC) -b docbook -d manpage \
 		$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
 	mv $@+ $@
 
@@ -321,13 +340,13 @@ howto-index.txt: howto-index.sh $(wildcard howto/*.txt)
 	mv $@+ $@
 
 $(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt
-	$(QUIET_ASCIIDOC)$(ASCIIDOC) -b xhtml11 $*.txt
+	$(QUIET_ASCIIDOC)$(ASCIIDOC) -b $(ASCIIDOC_HTML) $*.txt
 
 WEBDOC_DEST = /pub/software/tools/perf/docs
 
 $(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt
 	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
-	sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \
+	sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b $(ASCIIDOC_HTML) - >$@+ && \
 	mv $@+ $@
 
 # UNIMPLEMENTED
diff --git a/tools/perf/Documentation/asciidoctor-extensions.rb b/tools/perf/Documentation/asciidoctor-extensions.rb
new file mode 100644
index 000000000000..d148fe95c0c4
--- /dev/null
+++ b/tools/perf/Documentation/asciidoctor-extensions.rb
@@ -0,0 +1,29 @@
+require 'asciidoctor'
+require 'asciidoctor/extensions'
+
+module Perf
+  module Documentation
+    class LinkPerfProcessor < Asciidoctor::Extensions::InlineMacroProcessor
+      use_dsl
+
+      named :chrome
+
+      def process(parent, target, attrs)
+        if parent.document.basebackend? 'html'
+          %(<a href="#{target}.html">#{target}(#{attrs[1]})</a>\n)
+        elsif parent.document.basebackend? 'manpage'
+          "#{target}(#{attrs[1]})"
+        elsif parent.document.basebackend? 'docbook'
+          "<citerefentry>\n" \
+            "<refentrytitle>#{target}</refentrytitle>" \
+            "<manvolnum>#{attrs[1]}</manvolnum>\n" \
+          "</citerefentry>\n"
+        end
+      end
+    end
+  end
+end
+
+Asciidoctor::Extensions.register do
+  inline_macro Perf::Documentation::LinkPerfProcessor, :linkperf
+end
diff --git a/tools/perf/Documentation/build-xed.txt b/tools/perf/Documentation/build-xed.txt
new file mode 100644
index 000000000000..6222c1e7231f
--- /dev/null
+++ b/tools/perf/Documentation/build-xed.txt
@@ -0,0 +1,19 @@
+
+For --xed the xed tool is needed. Here is how to install it:
+
+  $ git clone https://github.com/intelxed/mbuild.git mbuild
+  $ git clone https://github.com/intelxed/xed
+  $ cd xed
+  $ ./mfile.py --share
+  $ ./mfile.py examples
+  $ sudo ./mfile.py --prefix=/usr/local install
+  $ sudo ldconfig
+  $ sudo cp obj/examples/xed /usr/local/bin
+
+Basic xed testing:
+
+  $ xed | head -3
+  ERROR: required argument(s) were missing
+  Copyright (C) 2017, Intel Corporation. All rights reserved.
+  XED version: [v10.0-328-g7d62c8c49b7b]
+  $
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index 76971d2e4164..115eaacc455f 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -106,7 +106,7 @@ in transaction, respectively.
 While it is possible to create scripts to analyze the data, an alternative
 approach is available to export the data to a sqlite or postgresql database.
 Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
-and to script call-graph-from-sql.py for an example of using the database.
+and to script exported-sql-viewer.py for an example of using the database.
 
 There is also script intel-pt-events.py which provides an example of how to
 unpack the raw data for power events and PTWRITE.
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index a3abe04c779d..c2182cbabde3 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -11,10 +11,11 @@
 		l	synthesize last branch entries (use with i or x)
 		s       skip initial number of events
 
-	The default is all events i.e. the same as --itrace=ibxwpe
+	The default is all events i.e. the same as --itrace=ibxwpe,
+	except for perf script where it is --itrace=ce
 
-	In addition, the period (default 100000) for instructions events
-	can be specified in units of:
+	In addition, the period (default 100000, except for perf script where it is 1)
+	for instructions events can be specified in units of:
 
 		i	instructions
 		t	ticks
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index 749cc6055dac..e8c972f89357 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -118,6 +118,15 @@ OPTIONS
 --group::
 	Show event group information together
 
+--percent-type::
+	Set annotation percent type from following choices:
+	  global-period, local-period, global-hits, local-hits
+
+	The local/global keywords set if the percentage is computed
+	in the scope of the function (local) or the whole data (global).
+	The period/hits keywords set the base the percentage is computed
+	on - the samples period or the number of samples (hits).
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
index 73c2650bd0db..f6de0952ff3c 100644
--- a/tools/perf/Documentation/perf-buildid-cache.txt
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -48,6 +48,9 @@ OPTIONS
 --purge=::
         Purge all cached binaries including older caches which have specified
 	path from the cache.
+-P::
+--purge-all::
+	Purge all cached binaries. This will flush out entire cache.
 -M::
 --missing=::
 	List missing build ids in the cache for the specified file.
@@ -59,7 +62,9 @@ OPTIONS
 	exactly same build-id, that is replaced by new one. It can be used
 	to update kallsyms and kernel dso to vmlinux in order to support
 	annotation.
-
+-l::
+--list::
+	List all valid binaries from cache.
 -v::
 --verbose::
 	Be more verbose.
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 2549c34a7895..667c14e56031 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -18,6 +18,10 @@ various perf commands with the -e option.
 
 OPTIONS
 -------
+-d::
+--desc::
+Print extra event descriptions. (default)
+
 --no-desc::
 Don't print descriptions.
 
@@ -25,11 +29,13 @@ Don't print descriptions.
 --long-desc::
 Print longer event descriptions.
 
+--debug::
+Enable debugging output.
+
 --details::
 Print how named events are resolved internally into perf events, and also
 any extra expressions computed by perf stat.
 
-
 [[EVENT_MODIFIERS]]
 EVENT MODIFIERS
 ---------------
@@ -49,7 +55,6 @@ counted. The following modifiers exist:
  S - read sample value (PERF_SAMPLE_READ)
  D - pin the event to the PMU
  W - group is weak and will fallback to non-group if not schedulable,
-     only supported in 'perf stat' for now.
 
 The 'p' modifier can be used for specifying how precise the instruction
 address should be. The 'p' modifier can be specified multiple times:
@@ -124,7 +129,11 @@ The available PMUs and their raw parameters can be listed with
 For example the raw event "LSD.UOPS" core pmu event above could
 be specified as
 
-  perf stat -e cpu/event=0xa8,umask=0x1,name=LSD.UOPS_CYCLES,cmask=1/ ...
+  perf stat -e cpu/event=0xa8,umask=0x1,name=LSD.UOPS_CYCLES,cmask=0x1/ ...
+
+  or using extended name syntax
+
+  perf stat -e cpu/event=0xa8,umask=0x1,cmask=0x1,name=\'LSD.UOPS_CYCLES:cmask=0x1\'/ ...
 
 PER SOCKET PMUS
 ---------------
@@ -230,7 +239,7 @@ perf also supports group leader sampling using the :S specifier.
   perf record -e '{cycles,instructions}:S' ...
   perf report --group
 
-Normally all events in a event group sample, but with :S only
+Normally all events in an event group sample, but with :S only
 the first event (the leader) samples, and it only reads the values of the
 other events in the group.
 
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index cc37b3a4be76..246dee081efd 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -57,6 +57,9 @@ OPTIONS
 			 FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and
 			 "no" for disable callgraph.
 	  - 'stack-size': user stack size for dwarf mode
+	  - 'name' : User defined event name. Single quotes (') may be used to
+		    escape symbols in the name from parsing by shell and tool
+		    like this: name=\'CPU_CLK_UNHALTED.THREAD:cmask=0x1\'.
 
           See the linkperf:perf-list[1] man page for more parameters.
 
@@ -91,7 +94,7 @@ OPTIONS
 	  "perf report" to view group events together.
 
 --filter=<filter>::
-        Event filter. This option should follow a event selector (-e) which
+        Event filter. This option should follow an event selector (-e) which
 	selects either tracepoint event(s) or a hardware trace PMU
 	(e.g. Intel PT or CoreSight).
 
@@ -150,7 +153,7 @@ OPTIONS
 
 --exclude-perf::
 	Don't record events issued by perf itself. This option should follow
-	a event selector (-e) which selects tracepoint event(s). It adds a
+	an event selector (-e) which selects tracepoint event(s). It adds a
 	filter expression 'common_pid != $PERFPID' to filters. If other
 	'--filter' exists, the new filter expression will be combined with
 	them by '&&'.
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 917e36fde6d8..474a4941f65d 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -477,6 +477,15 @@ include::itrace.txt[]
 	Display monitored tasks stored in perf data. Displaying pid/tid/ppid
 	plus the command string aligned to distinguish parent and child tasks.
 
+--percent-type::
+	Set annotation percent type from following choices:
+	  global-period, local-period, global-hits, local-hits
+
+	The local/global keywords set if the percentage is computed
+	in the scope of the function (local) or the whole data (global).
+	The period/hits keywords set the base the percentage is computed
+	on - the samples period or the number of samples (hits).
+
 include::callchain-overhead-calculation.txt[]
 
 SEE ALSO
diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt
index 51ec2d20068a..0fb9eda3cbca 100644
--- a/tools/perf/Documentation/perf-script-python.txt
+++ b/tools/perf/Documentation/perf-script-python.txt
@@ -610,6 +610,32 @@ Various utility functions for use with perf script:
   nsecs_str(nsecs) - returns printable string in the form secs.nsecs
   avg(total, n) - returns average given a sum and a total number of values
 
+SUPPORTED FIELDS
+----------------
+
+Currently supported fields:
+
+ev_name, comm, pid, tid, cpu, ip, time, period, phys_addr, addr,
+symbol, dso, time_enabled, time_running, values, callchain,
+brstack, brstacksym, datasrc, datasrc_decode, iregs, uregs,
+weight, transaction, raw_buf, attr.
+
+Some fields have sub items:
+
+brstack:
+    from, to, from_dsoname, to_dsoname, mispred,
+    predicted, in_tx, abort, cycles.
+
+brstacksym:
+    items: from, to, pred, in_tx, abort (converted string)
+
+For example,
+We can use this code to print brstack "from", "to", "cycles".
+
+if 'brstack' in dict:
+	for entry in dict['brstack']:
+		print "from %s, to %s, cycles %s" % (entry["from"], entry["to"], entry["cycles"])
+
 SEE ALSO
 --------
 linkperf:perf-script[1]
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index afdafe2110a1..a2b37ce48094 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -383,6 +383,24 @@ include::itrace.txt[]
 	will be printed. Each entry has function name and file/line. Enabled by
 	default, disable with --no-inline.
 
+--insn-trace::
+	Show instruction stream for intel_pt traces. Combine with --xed to
+	show disassembly.
+
+--xed::
+	Run xed disassembler on output. Requires installing the xed disassembler.
+
+--call-trace::
+	Show call stream for intel_pt traces. The CPUs are interleaved, but
+	can be filtered with -C.
+
+--call-ret-trace::
+	Show call and return stream for intel_pt traces.
+
+--graph-function::
+	For itrace only show specified functions and their callees for
+	itrace. Multiple functions can be separated by comma.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index e6c3b4e555c2..b10a90b6a718 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -116,6 +116,22 @@ Do not aggregate counts across all monitored CPUs.
 print counts using a CSV-style output to make it easy to import directly into
 spreadsheets. Columns are separated by the string specified in SEP.
 
+--table:: Display time for each run (-r option), in a table format, e.g.:
+
+  $ perf stat --null -r 5 --table perf bench sched pipe
+
+   Performance counter stats for 'perf bench sched pipe' (5 runs):
+
+             # Table of individual measurements:
+             5.189 (-0.293) #
+             5.189 (-0.294) #
+             5.186 (-0.296) #
+             5.663 (+0.181) ##
+             6.186 (+0.703) ####
+
+             # Final result:
+             5.483 +- 0.198 seconds time elapsed  ( +-  3.62% )
+
 -G name::
 --cgroup name::
 monitor only in the container (cgroup) called "name". This option is available only
@@ -162,6 +178,9 @@ Print count deltas for fixed number of times.
 This option should be used together with "-I" option.
 	example: 'perf stat -I 1000 --interval-count 2 -e cycles -a'
 
+--interval-clear::
+Clear the screen before next interval.
+
 --timeout msecs::
 Stop the 'perf stat' session and print count deltas after N milliseconds (minimum: 10 ms).
 This option is not supported with the "-I" option.
@@ -294,20 +313,38 @@ Users who wants to get the actual value can apply --no-metric-only.
 EXAMPLES
 --------
 
-$ perf stat -- make -j
+$ perf stat -- make
+
+   Performance counter stats for 'make':
+
+        83723.452481      task-clock:u (msec)       #    1.004 CPUs utilized
+                   0      context-switches:u        #    0.000 K/sec
+                   0      cpu-migrations:u          #    0.000 K/sec
+           3,228,188      page-faults:u             #    0.039 M/sec
+     229,570,665,834      cycles:u                  #    2.742 GHz
+     313,163,853,778      instructions:u            #    1.36  insn per cycle
+      69,704,684,856      branches:u                #  832.559 M/sec
+       2,078,861,393      branch-misses:u           #    2.98% of all branches
+
+        83.409183620 seconds time elapsed
+
+        74.684747000 seconds user
+         8.739217000 seconds sys
+
+TIMINGS
+-------
+As displayed in the example above we can display 3 types of timings.
+We always display the time the counters were enabled/alive:
+
+        83.409183620 seconds time elapsed
 
- Performance counter stats for 'make -j':
+For workload sessions we also display time the workloads spent in
+user/system lands:
 
-    8117.370256  task clock ticks     #      11.281 CPU utilization factor
-            678  context switches     #       0.000 M/sec
-            133  CPU migrations       #       0.000 M/sec
-         235724  pagefaults           #       0.029 M/sec
-    24821162526  CPU cycles           #    3057.784 M/sec
-    18687303457  instructions         #    2302.138 M/sec
-      172158895  cache references     #      21.209 M/sec
-       27075259  cache misses         #       3.335 M/sec
+        74.684747000 seconds user
+         8.739217000 seconds sys
 
- Wall-clock time elapsed:   719.554352 msecs
+Those times are the very same as displayed by the 'time' tool.
 
 CSV FORMAT
 ----------
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 114fda12aa49..808b664343c9 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -242,6 +242,16 @@ Default is to monitor all CPUS.
 --hierarchy::
 	Enable hierarchy output.
 
+--overwrite::
+	Enable this to use just the most recent records, which helps in high core count
+	machines such as Knights Landing/Mill, but right now is disabled by default as
+	the pausing used in this technique is leading to loss of metadata events such
+	as PERF_RECORD_MMAP which makes 'perf top' unable to resolve samples, leading
+	to lots of unknown samples appearing on the UI. Enable this if you are in such
+	machines and profiling a workload that doesn't creates short lived threads and/or
+	doesn't uses many executable mmap operations. Work is being planed to solve
+	this situation, till then, this will remain disabled by default.
+
 --force::
 	Don't do ownership validation.
 
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 115db9e06ecd..e113450503d2 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -171,6 +171,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 --kernel-syscall-graph::
 	 Show the kernel callchains on the syscall exit path.
 
+--max-events=N::
+	Stop after processing N events. Note that strace-like events are considered
+	only at exit time or when a syscall is interrupted, i.e. in those cases this
+	option is equivalent to the number of lines printed.
+
 --max-stack::
         Set the stack depth limit when parsing the callchain, anything
         beyond the specified depth will be ignored. Note that at this point
@@ -238,6 +243,68 @@ Trace syscalls, major and minor pagefaults:
   As you can see, there was major pagefault in python process, from
   CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so.
 
+Trace the first 4 open, openat or open_by_handle_at syscalls (in the future more syscalls may match here):
+
+  $ perf trace -e open* --max-events 4
+  [root@jouet perf]# trace -e open* --max-events 4
+  2272.992 ( 0.037 ms): gnome-shell/1370 openat(dfd: CWD, filename: /proc/self/stat) = 31
+  2277.481 ( 0.139 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
+  3026.398 ( 0.076 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
+  4294.665 ( 0.015 ms): sed/15879 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) = 3
+  $
+
+Trace the first minor page fault when running a workload:
+
+  # perf trace -F min --max-stack=7 --max-events 1 sleep 1
+     0.000 ( 0.000 ms): sleep/18006 minfault [__clear_user+0x1a] => 0x5626efa56080 (?k)
+                                       __clear_user ([kernel.kallsyms])
+                                       load_elf_binary ([kernel.kallsyms])
+                                       search_binary_handler ([kernel.kallsyms])
+                                       __do_execve_file.isra.33 ([kernel.kallsyms])
+                                       __x64_sys_execve ([kernel.kallsyms])
+                                       do_syscall_64 ([kernel.kallsyms])
+                                       entry_SYSCALL_64 ([kernel.kallsyms])
+  #
+
+Trace the next min page page fault to take place on the first CPU:
+
+  # perf trace -F min --call-graph=dwarf --max-events 1 --cpu 0
+     0.000 ( 0.000 ms): Web Content/17136 minfault [js::gc::Chunk::fetchNextDecommittedArena+0x4b] => 0x7fbe6181b000 (?.)
+                                       js::gc::FreeSpan::initAsEmpty (inlined)
+                                       js::gc::Arena::setAsNotAllocated (inlined)
+                                       js::gc::Chunk::fetchNextDecommittedArena (/usr/lib64/firefox/libxul.so)
+                                       js::gc::Chunk::allocateArena (/usr/lib64/firefox/libxul.so)
+                                       js::gc::GCRuntime::allocateArena (/usr/lib64/firefox/libxul.so)
+                                       js::gc::ArenaLists::allocateFromArena (/usr/lib64/firefox/libxul.so)
+                                       js::gc::GCRuntime::tryNewTenuredThing<JSString, (js::AllowGC)1> (inlined)
+                                       js::AllocateString<JSString, (js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
+                                       js::Allocate<JSThinInlineString, (js::AllowGC)1> (inlined)
+                                       JSThinInlineString::new_<(js::AllowGC)1> (inlined)
+                                       AllocateInlineString<(js::AllowGC)1, unsigned char> (inlined)
+                                       js::ConcatStrings<(js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
+                                       [0x18b26e6bc2bd] (/tmp/perf-17136.map)
+  #
+
+Trace the next two sched:sched_switch events, four block:*_plug events, the
+next block:*_unplug and the next three net:*dev_queue events, this last one
+with a backtrace of at most 16 entries, system wide:
+
+  # perf trace -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/
+     0.000 :0/0 sched:sched_switch:swapper/2:0 [120] S ==> rcu_sched:10 [120]
+     0.015 rcu_sched/10 sched:sched_switch:rcu_sched:10 [120] R ==> swapper/2:0 [120]
+   254.198 irq/50-iwlwifi/680 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=66
+                                       __dev_queue_xmit ([kernel.kallsyms])
+   273.977 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=78
+                                       __dev_queue_xmit ([kernel.kallsyms])
+   274.007 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051ff00 len=78
+                                       __dev_queue_xmit ([kernel.kallsyms])
+  2930.140 kworker/u16:58/2722 block:block_plug:[kworker/u16:58]
+  2930.162 kworker/u16:58/2722 block:block_unplug:[kworker/u16:58] 1
+  4466.094 jbd2/dm-2-8/748 block:block_plug:[jbd2/dm-2-8]
+  8050.123 kworker/u16:30/2694 block:block_plug:[kworker/u16:30]
+  8050.271 kworker/u16:30/2694 block:block_plug:[kworker/u16:30]
+  #
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script[1]
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 225454416ed5..7902a5681fc8 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -84,10 +84,10 @@ endif # has_clean
 endif # MAKECMDGOALS
 
 #
-# The clean target is not really parallel, don't print the jobs info:
+# Explicitly disable parallelism for the clean target.
 #
 clean:
-	$(make)
+	$(make) -j1
 
 #
 # The build-test target is not really parallel, don't print the jobs info,
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index ae7dc46e8f8a..a0e8c23f9125 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -54,6 +54,8 @@ endif
 
 ifeq ($(SRCARCH),arm64)
   NO_PERF_REGS := 0
+  NO_SYSCALL_TABLE := 0
+  CFLAGS += -I$(OUTPUT)arch/arm64/include/generated
   LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
 endif
 
@@ -207,8 +209,7 @@ ifdef PYTHON_CONFIG
   PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
   PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
   PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
-  PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
-  PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
+  PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null)
   FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
 endif
 
@@ -298,6 +299,11 @@ ifndef NO_BIONIC
   endif
 endif
 
+ifeq ($(feature-get_current_dir_name), 1)
+  CFLAGS += -DHAVE_GET_CURRENT_DIR_NAME
+endif
+
+
 ifdef NO_LIBELF
   NO_DWARF := 1
   NO_DEMANGLE := 1
@@ -832,7 +838,7 @@ ifndef NO_JVMTI
     JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}')
   else
     ifneq (,$(wildcard /usr/sbin/alternatives))
-      JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
+      JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
     endif
   endif
   ifndef JDIR
@@ -885,6 +891,8 @@ endif
 
 # Among the variables below, these:
 #   perfexecdir
+#   perf_include_dir
+#   perf_examples_dir
 #   template_dir
 #   mandir
 #   infodir
@@ -904,6 +912,8 @@ bindir = $(abspath $(prefix)/$(bindir_relative))
 mandir = share/man
 infodir = share/info
 perfexecdir = libexec/perf-core
+perf_include_dir = lib/perf/include
+perf_examples_dir = lib/perf/examples
 sharedir = $(prefix)/share
 template_dir = share/perf-core/templates
 STRACE_GROUPS_DIR = share/perf-core/strace/groups
@@ -934,6 +944,8 @@ bindir_SQ = $(subst ','\'',$(bindir))
 mandir_SQ = $(subst ','\'',$(mandir))
 infodir_SQ = $(subst ','\'',$(infodir))
 perfexecdir_SQ = $(subst ','\'',$(perfexecdir))
+perf_include_dir_SQ = $(subst ','\'',$(perf_include_dir))
+perf_examples_dir_SQ = $(subst ','\'',$(perf_examples_dir))
 template_dir_SQ = $(subst ','\'',$(template_dir))
 htmldir_SQ = $(subst ','\'',$(htmldir))
 tipdir_SQ = $(subst ','\'',$(tipdir))
@@ -944,14 +956,20 @@ srcdir_SQ = $(subst ','\'',$(srcdir))
 
 ifneq ($(filter /%,$(firstword $(perfexecdir))),)
 perfexec_instdir = $(perfexecdir)
+perf_include_instdir = $(perf_include_dir)
+perf_examples_instdir = $(perf_examples_dir)
 STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR)
 tip_instdir = $(tipdir)
 else
 perfexec_instdir = $(prefix)/$(perfexecdir)
+perf_include_instdir = $(prefix)/$(perf_include_dir)
+perf_examples_instdir = $(prefix)/$(perf_examples_dir)
 STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR)
 tip_instdir = $(prefix)/$(tipdir)
 endif
 perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
+perf_include_instdir_SQ = $(subst ','\'',$(perf_include_instdir))
+perf_examples_instdir_SQ = $(subst ','\'',$(perf_examples_instdir))
 STRACE_GROUPS_INSTDIR_SQ = $(subst ','\'',$(STRACE_GROUPS_INSTDIR))
 tip_instdir_SQ = $(subst ','\'',$(tip_instdir))
 
@@ -999,6 +1017,8 @@ $(call detected_var,ETC_PERFCONFIG_SQ)
 $(call detected_var,STRACE_GROUPS_DIR_SQ)
 $(call detected_var,prefix_SQ)
 $(call detected_var,perfexecdir_SQ)
+$(call detected_var,perf_include_dir_SQ)
+$(call detected_var,perf_examples_dir_SQ)
 $(call detected_var,tipdir_SQ)
 $(call detected_var,srcdir_SQ)
 $(call detected_var,LIBDIR)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 83e453de36f8..d95655489f7e 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -1,4 +1,5 @@
 include ../scripts/Makefile.include
+include ../scripts/Makefile.arch
 
 # The default target of this Makefile is...
 all:
@@ -384,6 +385,10 @@ export INSTALL SHELL_PATH
 
 SHELL = $(SHELL_PATH)
 
+linux_uapi_dir := $(srctree)/tools/include/uapi/linux
+asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic
+arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/
+
 beauty_outdir := $(OUTPUT)trace/beauty/generated
 beauty_ioctl_outdir := $(beauty_outdir)/ioctl
 drm_ioctl_array := $(beauty_ioctl_outdir)/drm_ioctl_array.c
@@ -431,6 +436,12 @@ kvm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/kvm_ioctl.sh
 $(kvm_ioctl_array): $(kvm_hdr_dir)/kvm.h $(kvm_ioctl_tbl)
 	$(Q)$(SHELL) '$(kvm_ioctl_tbl)' $(kvm_hdr_dir) > $@
 
+socket_ipproto_array := $(beauty_outdir)/socket_ipproto_array.c
+socket_ipproto_tbl := $(srctree)/tools/perf/trace/beauty/socket_ipproto.sh
+
+$(socket_ipproto_array): $(linux_uapi_dir)/in.h $(socket_ipproto_tbl)
+	$(Q)$(SHELL) '$(socket_ipproto_tbl)' $(linux_uapi_dir) > $@
+
 vhost_virtio_ioctl_array := $(beauty_ioctl_outdir)/vhost_virtio_ioctl_array.c
 vhost_virtio_hdr_dir := $(srctree)/tools/include/uapi/linux
 vhost_virtio_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
@@ -452,6 +463,18 @@ madvise_behavior_tbl := $(srctree)/tools/perf/trace/beauty/madvise_behavior.sh
 $(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_tbl)
 	$(Q)$(SHELL) '$(madvise_behavior_tbl)' $(madvise_hdr_dir) > $@
 
+mmap_flags_array := $(beauty_outdir)/mmap_flags_array.c
+mmap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mmap_flags.sh
+
+$(mmap_flags_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(arch_asm_uapi_dir)/mman.h $(mmap_flags_tbl)
+	$(Q)$(SHELL) '$(mmap_flags_tbl)' $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@
+
+mount_flags_array := $(beauty_outdir)/mount_flags_array.c
+mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh
+
+$(mount_flags_array): $(linux_uapi_dir)/fs.h $(mount_flags_tbl)
+	$(Q)$(SHELL) '$(mount_flags_tbl)' $(linux_uapi_dir) > $@
+
 prctl_option_array := $(beauty_outdir)/prctl_option_array.c
 prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/
 prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh
@@ -566,8 +589,11 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
 	$(sndrv_ctl_ioctl_array) \
 	$(kcmp_type_array) \
 	$(kvm_ioctl_array) \
+	$(socket_ipproto_array) \
 	$(vhost_virtio_ioctl_array) \
 	$(madvise_behavior_array) \
+	$(mmap_flags_array) \
+	$(mount_flags_array) \
 	$(perf_ioctl_array) \
 	$(prctl_option_array) \
 	$(arch_errno_name_array)
@@ -626,7 +652,7 @@ $(LIBPERF_IN): prepare FORCE
 $(LIB_FILE): $(LIBPERF_IN)
 	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS)
 
-LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ)
+LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)'
 
 $(LIBTRACEEVENT): FORCE
 	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
@@ -767,6 +793,16 @@ ifndef NO_JVMTI
 endif
 	$(call QUIET_INSTALL, libexec) \
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+ifndef NO_LIBBPF
+	$(call QUIET_INSTALL, bpf-headers) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux'; \
+		$(INSTALL) include/bpf/*.h -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \
+		$(INSTALL) include/bpf/linux/*.h -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux'
+	$(call QUIET_INSTALL, bpf-examples) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'; \
+		$(INSTALL) examples/bpf/*.c -t '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
+endif
 	$(call QUIET_INSTALL, perf-archive) \
 		$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
 	$(call QUIET_INSTALL, perf-with-kcore) \
@@ -844,12 +880,15 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
 		$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
 		$(OUTPUT)pmu-events/pmu-events.c \
 		$(OUTPUT)$(madvise_behavior_array) \
+		$(OUTPUT)$(mmap_flags_array) \
+		$(OUTPUT)$(mount_flags_array) \
 		$(OUTPUT)$(drm_ioctl_array) \
 		$(OUTPUT)$(pkey_alloc_access_rights_array) \
 		$(OUTPUT)$(sndrv_ctl_ioctl_array) \
 		$(OUTPUT)$(sndrv_pcm_ioctl_array) \
 		$(OUTPUT)$(kvm_ioctl_array) \
 		$(OUTPUT)$(kcmp_type_array) \
+		$(OUTPUT)$(socket_ipproto_array) \
 		$(OUTPUT)$(vhost_virtio_ioctl_array) \
 		$(OUTPUT)$(perf_ioctl_array) \
 		$(OUTPUT)$(prctl_option_array) \
diff --git a/tools/perf/arch/arm/tests/dwarf-unwind.c b/tools/perf/arch/arm/tests/dwarf-unwind.c
index 8cb347760233..9a0242e74cfc 100644
--- a/tools/perf/arch/arm/tests/dwarf-unwind.c
+++ b/tools/perf/arch/arm/tests/dwarf-unwind.c
@@ -25,7 +25,7 @@ static int sample_ustack(struct perf_sample *sample,
 
 	sp = (unsigned long) regs[PERF_REG_ARM_SP];
 
-	map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+	map = map_groups__find(thread->mg, (u64)sp);
 	if (!map) {
 		pr_debug("failed to get stack map\n");
 		free(buf);
diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
index 91de4860faad..dbef716a1913 100644
--- a/tools/perf/arch/arm64/Makefile
+++ b/tools/perf/arch/arm64/Makefile
@@ -4,3 +4,25 @@ PERF_HAVE_DWARF_REGS := 1
 endif
 PERF_HAVE_JITDUMP := 1
 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
+
+#
+# Syscall table generation for perf
+#
+
+out    := $(OUTPUT)arch/arm64/include/generated/asm
+header := $(out)/syscalls.c
+incpath := $(srctree)/tools
+sysdef := $(srctree)/tools/arch/arm64/include/uapi/asm/unistd.h
+sysprf := $(srctree)/tools/perf/arch/arm64/entry/syscalls/
+systbl := $(sysprf)/mksyscalltbl
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+
+$(header): $(sysdef) $(systbl)
+	$(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@
+
+clean::
+	$(call QUIET_CLEAN, arm64) $(RM) $(header)
+
+archheaders: $(header)
diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c
index 6688977e4ac7..76c6345a57d5 100644
--- a/tools/perf/arch/arm64/annotate/instructions.c
+++ b/tools/perf/arch/arm64/annotate/instructions.c
@@ -8,6 +8,63 @@ struct arm64_annotate {
 		jump_insn;
 };
 
+static int arm64_mov__parse(struct arch *arch __maybe_unused,
+			    struct ins_operands *ops,
+			    struct map_symbol *ms __maybe_unused)
+{
+	char *s = strchr(ops->raw, ','), *target, *endptr;
+
+	if (s == NULL)
+		return -1;
+
+	*s = '\0';
+	ops->source.raw = strdup(ops->raw);
+	*s = ',';
+
+	if (ops->source.raw == NULL)
+		return -1;
+
+	target = ++s;
+	ops->target.raw = strdup(target);
+	if (ops->target.raw == NULL)
+		goto out_free_source;
+
+	ops->target.addr = strtoull(target, &endptr, 16);
+	if (endptr == target)
+		goto out_free_target;
+
+	s = strchr(endptr, '<');
+	if (s == NULL)
+		goto out_free_target;
+	endptr = strchr(s + 1, '>');
+	if (endptr == NULL)
+		goto out_free_target;
+
+	*endptr = '\0';
+	*s = ' ';
+	ops->target.name = strdup(s);
+	*s = '<';
+	*endptr = '>';
+	if (ops->target.name == NULL)
+		goto out_free_target;
+
+	return 0;
+
+out_free_target:
+	zfree(&ops->target.raw);
+out_free_source:
+	zfree(&ops->source.raw);
+	return -1;
+}
+
+static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
+			  struct ins_operands *ops);
+
+static struct ins_ops arm64_mov_ops = {
+	.parse	   = arm64_mov__parse,
+	.scnprintf = mov__scnprintf,
+};
+
 static struct ins_ops *arm64__associate_instruction_ops(struct arch *arch, const char *name)
 {
 	struct arm64_annotate *arm = arch->priv;
@@ -21,7 +78,7 @@ static struct ins_ops *arm64__associate_instruction_ops(struct arch *arch, const
 	else if (!strcmp(name, "ret"))
 		ops = &ret_ops;
 	else
-		return NULL;
+		ops = &arm64_mov_ops;
 
 	arch__associate_ins_ops(arch, name, ops);
 	return ops;
diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
new file mode 100755
index 000000000000..c88fd32563eb
--- /dev/null
+++ b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
@@ -0,0 +1,62 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generate system call table for perf. Derived from
+# powerpc script.
+#
+# Copyright IBM Corp. 2017
+# Author(s):  Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+# Changed by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
+# Changed by: Kim Phillips <kim.phillips@arm.com>
+
+gcc=$1
+hostcc=$2
+incpath=$3
+input=$4
+
+if ! test -r $input; then
+	echo "Could not read input file" >&2
+	exit 1
+fi
+
+create_table_from_c()
+{
+	local sc nr last_sc
+
+	create_table_exe=`mktemp ${TMPDIR:-/tmp}/create-table-XXXXXX`
+
+	{
+
+	cat <<-_EoHEADER
+		#include <stdio.h>
+		#include "$input"
+		int main(int argc, char *argv[])
+		{
+	_EoHEADER
+
+	while read sc nr; do
+		printf "%s\n" "	printf(\"\\t[%d] = \\\"$sc\\\",\\n\", __NR_$sc);"
+		last_sc=$sc
+	done
+
+	printf "%s\n" "	printf(\"#define SYSCALLTBL_ARM64_MAX_ID %d\\n\", __NR_$last_sc);"
+	printf "}\n"
+
+	} | $hostcc -I $incpath/include/uapi -o $create_table_exe -x c -
+
+	$create_table_exe
+
+	rm -f $create_table_exe
+}
+
+create_table()
+{
+	echo "static const char *syscalltbl_arm64[] = {"
+	create_table_from_c
+	echo "};"
+}
+
+$gcc -E -dM -x c  $input	       \
+	|sed -ne 's/^#define __NR_//p' \
+	|sort -t' ' -k2 -nu	       \
+	|create_table
diff --git a/tools/perf/arch/arm64/tests/dwarf-unwind.c b/tools/perf/arch/arm64/tests/dwarf-unwind.c
index e907f0f4c20c..5522ce384723 100644
--- a/tools/perf/arch/arm64/tests/dwarf-unwind.c
+++ b/tools/perf/arch/arm64/tests/dwarf-unwind.c
@@ -25,7 +25,7 @@ static int sample_ustack(struct perf_sample *sample,
 
 	sp = (unsigned long) regs[PERF_REG_ARM64_SP];
 
-	map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+	map = map_groups__find(thread->mg, (u64)sp);
 	if (!map) {
 		pr_debug("failed to get stack map\n");
 		free(buf);
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index 1120e39c1b00..5ccfce87e693 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -194,6 +194,7 @@ struct auxtrace_record *arm_spe_recording_init(int *err,
 	sper->itr.read_finish = arm_spe_read_finish;
 	sper->itr.alignment = 0;
 
+	*err = 0;
 	return &sper->itr;
 }
 
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index c6f373508a4f..82657c01a3b8 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -189,7 +189,7 @@ out_error:
 	return -1;
 }
 
-int perf_env__lookup_objdump(struct perf_env *env)
+int perf_env__lookup_objdump(struct perf_env *env, const char **path)
 {
 	/*
 	 * For live mode, env->arch will be NULL and we can use
@@ -198,5 +198,5 @@ int perf_env__lookup_objdump(struct perf_env *env)
 	if (env->arch == NULL)
 		return 0;
 
-	return perf_env__lookup_binutils_path(env, "objdump", &objdump_path);
+	return perf_env__lookup_binutils_path(env, "objdump", path);
 }
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h
index 2d875baa92e6..2167001b18c5 100644
--- a/tools/perf/arch/common.h
+++ b/tools/perf/arch/common.h
@@ -4,8 +4,6 @@
 
 #include "../util/env.h"
 
-extern const char *objdump_path;
-
-int perf_env__lookup_objdump(struct perf_env *env);
+int perf_env__lookup_objdump(struct perf_env *env, const char **path);
 
 #endif /* ARCH_PERF_COMMON_H */
diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
index 30cbbd6d5be0..5f39efef0856 100644
--- a/tools/perf/arch/powerpc/tests/dwarf-unwind.c
+++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
@@ -26,7 +26,7 @@ static int sample_ustack(struct perf_sample *sample,
 
 	sp = (unsigned long) regs[PERF_REG_POWERPC_R1];
 
-	map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+	map = map_groups__find(thread->mg, (u64)sp);
 	if (!map) {
 		pr_debug("failed to get stack map\n");
 		free(buf);
diff --git a/tools/perf/arch/powerpc/util/book3s_hv_exits.h b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
index 853b95d1e139..2011376c7ab5 100644
--- a/tools/perf/arch/powerpc/util/book3s_hv_exits.h
+++ b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
@@ -15,7 +15,6 @@
 	{0x400, "INST_STORAGE"}, \
 	{0x480, "INST_SEGMENT"}, \
 	{0x500, "EXTERNAL"}, \
-	{0x501, "EXTERNAL_LEVEL"}, \
 	{0x502, "EXTERNAL_HV"}, \
 	{0x600, "ALIGNMENT"}, \
 	{0x700, "PROGRAM"}, \
diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
index 0c370f81e002..7c6eeb4633fe 100644
--- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c
+++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
@@ -58,9 +58,13 @@ static int check_return_reg(int ra_regno, Dwarf_Frame *frame)
 	}
 
 	/*
-	 * Check if return address is on the stack.
+	 * Check if return address is on the stack. If return address
+	 * is in a register (typically R0), it is yet to be saved on
+	 * the stack.
 	 */
-	if (nops != 0 || ops != NULL)
+	if ((nops != 0 || ops != NULL) &&
+		!(nops == 1 && ops[0].atom == DW_OP_regx &&
+			ops[0].number2 == 0 && ops[0].offset == 0))
 		return 0;
 
 	/*
@@ -243,13 +247,12 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
 	u64 ip;
 	u64 skip_slot = -1;
 
-	if (chain->nr < 3)
+	if (!chain || chain->nr < 3)
 		return skip_slot;
 
-	ip = chain->ips[2];
+	ip = chain->ips[1];
 
-	thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
-			MAP__FUNCTION, ip, &al);
+	thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al);
 
 	if (al.map)
 		dso = al.map->dso;
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index 53d83d7e6a09..10a44e946f77 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -22,15 +22,16 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
 
 #endif
 
-#if !defined(_CALL_ELF) || _CALL_ELF != 2
 int arch__choose_best_symbol(struct symbol *syma,
 			     struct symbol *symb __maybe_unused)
 {
 	char *sym = syma->name;
 
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
 	/* Skip over any initial dot */
 	if (*sym == '.')
 		sym++;
+#endif
 
 	/* Avoid "SyS" kernel syscall aliases */
 	if (strlen(sym) >= 3 && !strncmp(sym, "SyS", 3))
@@ -41,6 +42,7 @@ int arch__choose_best_symbol(struct symbol *syma,
 	return SYMBOL_A;
 }
 
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
 /* Allow matching against dot variants */
 int arch__compare_symbol_names(const char *namea, const char *nameb)
 {
@@ -141,8 +143,10 @@ void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
 	for (i = 0; i < ntevs; i++) {
 		tev = &pev->tevs[i];
 		map__for_each_symbol(map, sym, tmp) {
-			if (map->unmap_ip(map, sym->start) == tev->point.address)
+			if (map->unmap_ip(map, sym->start) == tev->point.address) {
 				arch__fix_tev_from_maps(pev, tev, map, sym);
+				break;
+			}
 		}
 	}
 }
diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c
index cee4e2f7c057..de0dd66dbb48 100644
--- a/tools/perf/arch/s390/annotate/instructions.c
+++ b/tools/perf/arch/s390/annotate/instructions.c
@@ -100,8 +100,6 @@ out_free_source:
 	return -1;
 }
 
-static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
-			  struct ins_operands *ops);
 
 static struct ins_ops s390_mov_ops = {
 	.parse	   = s390_mov__parse,
diff --git a/tools/perf/arch/s390/util/auxtrace.c b/tools/perf/arch/s390/util/auxtrace.c
index 3afe8256eff2..44c857388897 100644
--- a/tools/perf/arch/s390/util/auxtrace.c
+++ b/tools/perf/arch/s390/util/auxtrace.c
@@ -30,6 +30,7 @@ cpumsf_info_fill(struct auxtrace_record *itr __maybe_unused,
 		 struct auxtrace_info_event *auxtrace_info __maybe_unused,
 		 size_t priv_size __maybe_unused)
 {
+	auxtrace_info->type = PERF_AUXTRACE_S390_CPUMSF;
 	return 0;
 }
 
diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c
index d233e2eb9592..aaabab5e2830 100644
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -102,7 +102,7 @@ const char * const kvm_skip_events[] = {
 
 int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid)
 {
-	if (strstr(cpuid, "IBM/S390")) {
+	if (strstr(cpuid, "IBM")) {
 		kvm->exit_reasons = sie_exit_reasons;
 		kvm->exit_reasons_isa = "SIE";
 	} else
diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile
index 7fbca175099e..275dea7ff59a 100644
--- a/tools/perf/arch/sparc/Makefile
+++ b/tools/perf/arch/sparc/Makefile
@@ -1,3 +1,5 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+
+PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/arch/sparc/annotate/instructions.c b/tools/perf/arch/sparc/annotate/instructions.c
new file mode 100644
index 000000000000..2614c010c235
--- /dev/null
+++ b/tools/perf/arch/sparc/annotate/instructions.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+
+static int is_branch_cond(const char *cond)
+{
+	if (cond[0] == '\0')
+		return 1;
+
+	if (cond[0] == 'a' && cond[1] == '\0')
+		return 1;
+
+	if (cond[0] == 'c' &&
+	    (cond[1] == 'c' || cond[1] == 's') &&
+	    cond[2] == '\0')
+		return 1;
+
+	if (cond[0] == 'e' &&
+	    (cond[1] == '\0' ||
+	     (cond[1] == 'q' && cond[2] == '\0')))
+		return 1;
+
+	if (cond[0] == 'g' &&
+	    (cond[1] == '\0' ||
+	     (cond[1] == 't' && cond[2] == '\0') ||
+	     (cond[1] == 'e' && cond[2] == '\0') ||
+	     (cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0')))
+		return 1;
+
+	if (cond[0] == 'l' &&
+	    (cond[1] == '\0' ||
+	     (cond[1] == 't' && cond[2] == '\0') ||
+	     (cond[1] == 'u' && cond[2] == '\0') ||
+	     (cond[1] == 'e' && cond[2] == '\0') ||
+	     (cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0')))
+		return 1;
+
+	if (cond[0] == 'n' &&
+	    (cond[1] == '\0' ||
+	     (cond[1] == 'e' && cond[2] == '\0') ||
+	     (cond[1] == 'z' && cond[2] == '\0') ||
+	     (cond[1] == 'e' && cond[2] == 'g' && cond[3] == '\0')))
+		return 1;
+
+	if (cond[0] == 'b' &&
+	    cond[1] == 'p' &&
+	    cond[2] == 'o' &&
+	    cond[3] == 's' &&
+	    cond[4] == '\0')
+		return 1;
+
+	if (cond[0] == 'v' &&
+	    (cond[1] == 'c' || cond[1] == 's') &&
+	    cond[2] == '\0')
+		return 1;
+
+	if (cond[0] == 'b' &&
+	    cond[1] == 'z' &&
+	    cond[2] == '\0')
+		return 1;
+
+	return 0;
+}
+
+static int is_branch_reg_cond(const char *cond)
+{
+	if ((cond[0] == 'n' || cond[0] == 'l') &&
+	    cond[1] == 'z' &&
+	    cond[2] == '\0')
+		return 1;
+
+	if (cond[0] == 'z' &&
+	    cond[1] == '\0')
+		return 1;
+
+	if ((cond[0] == 'g' || cond[0] == 'l') &&
+	    cond[1] == 'e' &&
+	    cond[2] == 'z' &&
+	    cond[3] == '\0')
+		return 1;
+
+	if (cond[0] == 'g' &&
+	    cond[1] == 'z' &&
+	    cond[2] == '\0')
+		return 1;
+
+	return 0;
+}
+
+static int is_branch_float_cond(const char *cond)
+{
+	if (cond[0] == '\0')
+		return 1;
+
+	if ((cond[0] == 'a' || cond[0] == 'e' ||
+	     cond[0] == 'z' || cond[0] == 'g' ||
+	     cond[0] == 'l' || cond[0] == 'n' ||
+	     cond[0] == 'o' || cond[0] == 'u') &&
+	    cond[1] == '\0')
+		return 1;
+
+	if (((cond[0] == 'g' && cond[1] == 'e') ||
+	     (cond[0] == 'l' && (cond[1] == 'e' ||
+				 cond[1] == 'g')) ||
+	     (cond[0] == 'n' && (cond[1] == 'e' ||
+				 cond[1] == 'z')) ||
+	     (cond[0] == 'u' && (cond[1] == 'e' ||
+				 cond[1] == 'g' ||
+				 cond[1] == 'l'))) &&
+	    cond[2] == '\0')
+		return 1;
+
+	if (cond[0] == 'u' &&
+	    (cond[1] == 'g' || cond[1] == 'l') &&
+	    cond[2] == 'e' &&
+	    cond[3] == '\0')
+		return 1;
+
+	return 0;
+}
+
+static struct ins_ops *sparc__associate_instruction_ops(struct arch *arch, const char *name)
+{
+	struct ins_ops *ops = NULL;
+
+	if (!strcmp(name, "call") ||
+	    !strcmp(name, "jmp") ||
+	    !strcmp(name, "jmpl")) {
+		ops = &call_ops;
+	} else if (!strcmp(name, "ret") ||
+		   !strcmp(name, "retl") ||
+		   !strcmp(name, "return")) {
+		ops = &ret_ops;
+	} else if (!strcmp(name, "mov")) {
+		ops = &mov_ops;
+	} else {
+		if (name[0] == 'c' &&
+		    (name[1] == 'w' || name[1] == 'x'))
+			name += 2;
+
+		if (name[0] == 'b') {
+			const char *cond = name + 1;
+
+			if (cond[0] == 'r') {
+				if (is_branch_reg_cond(cond + 1))
+					ops = &jump_ops;
+			} else if (is_branch_cond(cond)) {
+				ops = &jump_ops;
+			}
+		} else if (name[0] == 'f' && name[1] == 'b') {
+			if (is_branch_float_cond(name + 2))
+				ops = &jump_ops;
+		}
+	}
+
+	if (ops)
+		arch__associate_ins_ops(arch, name, ops);
+
+	return ops;
+}
+
+static int sparc__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
+{
+	if (!arch->initialized) {
+		arch->initialized = true;
+		arch->associate_instruction_ops = sparc__associate_instruction_ops;
+		arch->objdump.comment_char = '#';
+	}
+
+	return 0;
+}
diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
index 1a38e78117ce..8cc6642fce7a 100644
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile
@@ -19,9 +19,6 @@ systbl := $(sys)/syscalltbl.sh
 _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
 
 $(header): $(sys)/syscall_64.tbl $(systbl)
-	@(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
-        (diff -B arch/x86/entry/syscalls/syscall_64.tbl ../../arch/x86/entry/syscalls/syscall_64.tbl >/dev/null) \
-        || echo "Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'" >&2 )) || true
 	$(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@
 
 clean::
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 4dfe42666d0c..f0b1709a5ffb 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -341,6 +341,8 @@
 330	common	pkey_alloc		__x64_sys_pkey_alloc
 331	common	pkey_free		__x64_sys_pkey_free
 332	common	statx			__x64_sys_statx
+333	common	io_pgetevents		__x64_sys_io_pgetevents
+334	common	rseq			__x64_sys_rseq
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index c1bd979b957b..613709cfbbd0 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -9,6 +9,7 @@ struct test;
 int test__rdpmc(struct test *test __maybe_unused, int subtest);
 int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest);
 int test__insn_x86(struct test *test __maybe_unused, int subtest);
+int test__bp_modify(struct test *test, int subtest);
 
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
 struct thread;
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index 8e2c5a38c3b9..586849ff83a0 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -5,3 +5,4 @@ libperf-y += arch-tests.o
 libperf-y += rdpmc.o
 libperf-y += perf-time-to-tsc.o
 libperf-$(CONFIG_AUXTRACE) += insn-x86.o
+libperf-$(CONFIG_X86_64) += bp-modify.o
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index cc1802ff5410..d47d3f8e3c8e 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -24,6 +24,12 @@ struct test arch_tests[] = {
 		.func = test__insn_x86,
 	},
 #endif
+#if defined(__x86_64__)
+	{
+		.desc = "x86 bp modify",
+		.func = test__bp_modify,
+	},
+#endif
 	{
 		.func = NULL,
 	},
diff --git a/tools/perf/arch/x86/tests/bp-modify.c b/tools/perf/arch/x86/tests/bp-modify.c
new file mode 100644
index 000000000000..f53e4406709f
--- /dev/null
+++ b/tools/perf/arch/x86/tests/bp-modify.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/user.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ptrace.h>
+#include <asm/ptrace.h>
+#include <errno.h>
+#include "debug.h"
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+static noinline int bp_1(void)
+{
+	pr_debug("in %s\n", __func__);
+	return 0;
+}
+
+static noinline int bp_2(void)
+{
+	pr_debug("in %s\n", __func__);
+	return 0;
+}
+
+static int spawn_child(void)
+{
+	int child = fork();
+
+	if (child == 0) {
+		/*
+		 * The child sets itself for as tracee and
+		 * waits in signal for parent to trace it,
+		 * then it calls bp_1 and quits.
+		 */
+		int err = ptrace(PTRACE_TRACEME, 0, NULL, NULL);
+
+		if (err) {
+			pr_debug("failed to PTRACE_TRACEME\n");
+			exit(1);
+		}
+
+		raise(SIGCONT);
+		bp_1();
+		exit(0);
+	}
+
+	return child;
+}
+
+/*
+ * This tests creates HW breakpoint, tries to
+ * change it and checks it was properly changed.
+ */
+static int bp_modify1(void)
+{
+	pid_t child;
+	int status;
+	unsigned long rip = 0, dr7 = 1;
+
+	child = spawn_child();
+
+	waitpid(child, &status, 0);
+	if (WIFEXITED(status)) {
+		pr_debug("tracee exited prematurely 1\n");
+		return TEST_FAIL;
+	}
+
+	/*
+	 * The parent does following steps:
+	 *  - creates a new breakpoint (id 0) for bp_2 function
+	 *  - changes that breakponit to bp_1 function
+	 *  - waits for the breakpoint to hit and checks
+	 *    it has proper rip of bp_1 function
+	 *  - detaches the child
+	 */
+	if (ptrace(PTRACE_POKEUSER, child,
+		   offsetof(struct user, u_debugreg[0]), bp_2)) {
+		pr_debug("failed to set breakpoint, 1st time: %s\n",
+			 strerror(errno));
+		goto out;
+	}
+
+	if (ptrace(PTRACE_POKEUSER, child,
+		   offsetof(struct user, u_debugreg[0]), bp_1)) {
+		pr_debug("failed to set breakpoint, 2nd time: %s\n",
+			 strerror(errno));
+		goto out;
+	}
+
+	if (ptrace(PTRACE_POKEUSER, child,
+		   offsetof(struct user, u_debugreg[7]), dr7)) {
+		pr_debug("failed to set dr7: %s\n", strerror(errno));
+		goto out;
+	}
+
+	if (ptrace(PTRACE_CONT, child, NULL, NULL)) {
+		pr_debug("failed to PTRACE_CONT: %s\n", strerror(errno));
+		goto out;
+	}
+
+	waitpid(child, &status, 0);
+	if (WIFEXITED(status)) {
+		pr_debug("tracee exited prematurely 2\n");
+		return TEST_FAIL;
+	}
+
+	rip = ptrace(PTRACE_PEEKUSER, child,
+		     offsetof(struct user_regs_struct, rip), NULL);
+	if (rip == (unsigned long) -1) {
+		pr_debug("failed to PTRACE_PEEKUSER: %s\n",
+			 strerror(errno));
+		goto out;
+	}
+
+	pr_debug("rip %lx, bp_1 %p\n", rip, bp_1);
+
+out:
+	if (ptrace(PTRACE_DETACH, child, NULL, NULL)) {
+		pr_debug("failed to PTRACE_DETACH: %s", strerror(errno));
+		return TEST_FAIL;
+	}
+
+	return rip == (unsigned long) bp_1 ? TEST_OK : TEST_FAIL;
+}
+
+/*
+ * This tests creates HW breakpoint, tries to
+ * change it to bogus value and checks the original
+ * breakpoint is hit.
+ */
+static int bp_modify2(void)
+{
+	pid_t child;
+	int status;
+	unsigned long rip = 0, dr7 = 1;
+
+	child = spawn_child();
+
+	waitpid(child, &status, 0);
+	if (WIFEXITED(status)) {
+		pr_debug("tracee exited prematurely 1\n");
+		return TEST_FAIL;
+	}
+
+	/*
+	 * The parent does following steps:
+	 *  - creates a new breakpoint (id 0) for bp_1 function
+	 *  - tries to change that breakpoint to (-1) address
+	 *  - waits for the breakpoint to hit and checks
+	 *    it has proper rip of bp_1 function
+	 *  - detaches the child
+	 */
+	if (ptrace(PTRACE_POKEUSER, child,
+		   offsetof(struct user, u_debugreg[0]), bp_1)) {
+		pr_debug("failed to set breakpoint: %s\n",
+			 strerror(errno));
+		goto out;
+	}
+
+	if (ptrace(PTRACE_POKEUSER, child,
+		   offsetof(struct user, u_debugreg[7]), dr7)) {
+		pr_debug("failed to set dr7: %s\n", strerror(errno));
+		goto out;
+	}
+
+	if (!ptrace(PTRACE_POKEUSER, child,
+		   offsetof(struct user, u_debugreg[0]), (unsigned long) (-1))) {
+		pr_debug("failed, breakpoint set to bogus address\n");
+		goto out;
+	}
+
+	if (ptrace(PTRACE_CONT, child, NULL, NULL)) {
+		pr_debug("failed to PTRACE_CONT: %s\n", strerror(errno));
+		goto out;
+	}
+
+	waitpid(child, &status, 0);
+	if (WIFEXITED(status)) {
+		pr_debug("tracee exited prematurely 2\n");
+		return TEST_FAIL;
+	}
+
+	rip = ptrace(PTRACE_PEEKUSER, child,
+		     offsetof(struct user_regs_struct, rip), NULL);
+	if (rip == (unsigned long) -1) {
+		pr_debug("failed to PTRACE_PEEKUSER: %s\n",
+			 strerror(errno));
+		goto out;
+	}
+
+	pr_debug("rip %lx, bp_1 %p\n", rip, bp_1);
+
+out:
+	if (ptrace(PTRACE_DETACH, child, NULL, NULL)) {
+		pr_debug("failed to PTRACE_DETACH: %s", strerror(errno));
+		return TEST_FAIL;
+	}
+
+	return rip == (unsigned long) bp_1 ? TEST_OK : TEST_FAIL;
+}
+
+int test__bp_modify(struct test *test __maybe_unused,
+		    int subtest __maybe_unused)
+{
+	TEST_ASSERT_VAL("modify test 1 failed\n", !bp_modify1());
+	TEST_ASSERT_VAL("modify test 2 failed\n", !bp_modify2());
+
+	return 0;
+}
diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c
index 95036c7a59e8..7879df34569a 100644
--- a/tools/perf/arch/x86/tests/dwarf-unwind.c
+++ b/tools/perf/arch/x86/tests/dwarf-unwind.c
@@ -26,7 +26,7 @@ static int sample_ustack(struct perf_sample *sample,
 
 	sp = (unsigned long) regs[PERF_REG_X86_SP];
 
-	map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+	map = map_groups__find(thread->mg, (u64)sp);
 	if (!map) {
 		pr_debug("failed to get stack map\n");
 		free(buf);
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index f95e6f46ef0d..844b8f335532 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -4,6 +4,8 @@ libperf-y += pmu.o
 libperf-y += kvm-stat.o
 libperf-y += perf_regs.o
 libperf-y += group.o
+libperf-y += machine.o
+libperf-y += event.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
new file mode 100644
index 000000000000..675a0213044d
--- /dev/null
+++ b/tools/perf/arch/x86/util/event.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/string.h>
+
+#include "../../util/machine.h"
+#include "../../util/tool.h"
+#include "../../util/map.h"
+#include "../../util/util.h"
+#include "../../util/debug.h"
+
+#if defined(__x86_64__)
+
+int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
+				       perf_event__handler_t process,
+				       struct machine *machine)
+{
+	int rc = 0;
+	struct map *pos;
+	struct map_groups *kmaps = &machine->kmaps;
+	struct maps *maps = &kmaps->maps;
+	union perf_event *event = zalloc(sizeof(event->mmap) +
+					 machine->id_hdr_size);
+
+	if (!event) {
+		pr_debug("Not enough memory synthesizing mmap event "
+			 "for extra kernel maps\n");
+		return -1;
+	}
+
+	for (pos = maps__first(maps); pos; pos = map__next(pos)) {
+		struct kmap *kmap;
+		size_t size;
+
+		if (!__map__is_extra_kernel_map(pos))
+			continue;
+
+		kmap = map__kmap(pos);
+
+		size = sizeof(event->mmap) - sizeof(event->mmap.filename) +
+		       PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) +
+		       machine->id_hdr_size;
+
+		memset(event, 0, size);
+
+		event->mmap.header.type = PERF_RECORD_MMAP;
+
+		/*
+		 * kernel uses 0 for user space maps, see kernel/perf_event.c
+		 * __perf_event_mmap
+		 */
+		if (machine__is_host(machine))
+			event->header.misc = PERF_RECORD_MISC_KERNEL;
+		else
+			event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+
+		event->mmap.header.size = size;
+
+		event->mmap.start = pos->start;
+		event->mmap.len   = pos->end - pos->start;
+		event->mmap.pgoff = pos->pgoff;
+		event->mmap.pid   = machine->pid;
+
+		strlcpy(event->mmap.filename, kmap->name, PATH_MAX);
+
+		if (perf_tool__process_synth_event(tool, event, machine,
+						   process) != 0) {
+			rc = -1;
+			break;
+		}
+	}
+
+	free(event);
+	return rc;
+}
+
+#endif
diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c
new file mode 100644
index 000000000000..4520ac53caa9
--- /dev/null
+++ b/tools/perf/arch/x86/util/machine.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/string.h>
+#include <stdlib.h>
+
+#include "../../util/machine.h"
+#include "../../util/map.h"
+#include "../../util/symbol.h"
+#include "../../util/sane_ctype.h"
+
+#include <symbol/kallsyms.h>
+
+#if defined(__x86_64__)
+
+struct extra_kernel_map_info {
+	int cnt;
+	int max_cnt;
+	struct extra_kernel_map *maps;
+	bool get_entry_trampolines;
+	u64 entry_trampoline;
+};
+
+static int add_extra_kernel_map(struct extra_kernel_map_info *mi, u64 start,
+				u64 end, u64 pgoff, const char *name)
+{
+	if (mi->cnt >= mi->max_cnt) {
+		void *buf;
+		size_t sz;
+
+		mi->max_cnt = mi->max_cnt ? mi->max_cnt * 2 : 32;
+		sz = sizeof(struct extra_kernel_map) * mi->max_cnt;
+		buf = realloc(mi->maps, sz);
+		if (!buf)
+			return -1;
+		mi->maps = buf;
+	}
+
+	mi->maps[mi->cnt].start = start;
+	mi->maps[mi->cnt].end   = end;
+	mi->maps[mi->cnt].pgoff = pgoff;
+	strlcpy(mi->maps[mi->cnt].name, name, KMAP_NAME_LEN);
+
+	mi->cnt += 1;
+
+	return 0;
+}
+
+static int find_extra_kernel_maps(void *arg, const char *name, char type,
+				  u64 start)
+{
+	struct extra_kernel_map_info *mi = arg;
+
+	if (!mi->entry_trampoline && kallsyms2elf_binding(type) == STB_GLOBAL &&
+	    !strcmp(name, "_entry_trampoline")) {
+		mi->entry_trampoline = start;
+		return 0;
+	}
+
+	if (is_entry_trampoline(name)) {
+		u64 end = start + page_size;
+
+		return add_extra_kernel_map(mi, start, end, 0, name);
+	}
+
+	return 0;
+}
+
+int machine__create_extra_kernel_maps(struct machine *machine,
+				      struct dso *kernel)
+{
+	struct extra_kernel_map_info mi = { .cnt = 0, };
+	char filename[PATH_MAX];
+	int ret;
+	int i;
+
+	machine__get_kallsyms_filename(machine, filename, PATH_MAX);
+
+	if (symbol__restricted_filename(filename, "/proc/kallsyms"))
+		return 0;
+
+	ret = kallsyms__parse(filename, &mi, find_extra_kernel_maps);
+	if (ret)
+		goto out_free;
+
+	if (!mi.entry_trampoline)
+		goto out_free;
+
+	for (i = 0; i < mi.cnt; i++) {
+		struct extra_kernel_map *xm = &mi.maps[i];
+
+		xm->pgoff = mi.entry_trampoline;
+		ret = machine__create_extra_kernel_map(machine, kernel, xm);
+		if (ret)
+			goto out_free;
+	}
+
+	machine->trampolines_mapped = mi.cnt;
+out_free:
+	free(mi.maps);
+	return ret;
+}
+
+#endif
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index 4b2caf6d48e7..fead6b3b4206 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -226,7 +226,7 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
 		else if (rm[2].rm_so != rm[2].rm_eo)
 			prefix[0] = '+';
 		else
-			strncpy(prefix, "+0", 2);
+			scnprintf(prefix, sizeof(prefix), "+0");
 	}
 
 	/* Rename register */
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index 63a74c32ddc5..e33ef5bc31c5 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <string.h>
 
+#include <linux/stddef.h>
 #include <linux/perf_event.h>
 
 #include "../../util/intel-pt.h"
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
index 06bae7023a51..950539f9a4f7 100644
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -2,6 +2,7 @@
 #include <stdbool.h>
 #include <errno.h>
 
+#include <linux/stddef.h>
 #include <linux/perf_event.h>
 
 #include "../../perf.h"
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index 60bf11943047..eafce1a130a1 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -7,6 +7,7 @@ perf-y += futex-wake-parallel.o
 perf-y += futex-requeue.o
 perf-y += futex-lock-pi.o
 
+perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
 perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
 
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
index b43f8d2a34ec..9ad015a1e202 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm.S
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -6,6 +6,7 @@
 #define altinstr_replacement text
 #define globl p2align 4; .globl
 #define _ASM_EXTABLE_FAULT(x, y)
+#define _ASM_EXTABLE(x, y)
 
 #include "../../arch/x86/lib/memcpy_64.S"
 /*
diff --git a/tools/perf/bench/mem-memcpy-x86-64-lib.c b/tools/perf/bench/mem-memcpy-x86-64-lib.c
new file mode 100644
index 000000000000..4130734dde84
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-x86-64-lib.c
@@ -0,0 +1,24 @@
+/*
+ * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy
+ * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy'
+ * happy.
+ */
+#include <linux/types.h>
+
+unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt);
+unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len);
+
+unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len)
+{
+	for (; len; --len, to++, from++) {
+		/*
+		 * Call the assembly routine back directly since
+		 * memcpy_mcsafe() may silently fallback to memcpy.
+		 */
+		unsigned long rem = __memcpy_mcsafe(to, from, 1);
+
+		if (rem)
+			break;
+	}
+	return len;
+}
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 63eb49082774..44195514b19e 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -1098,7 +1098,7 @@ static void *worker_thread(void *__tdata)
 	u8 *global_data;
 	u8 *process_data;
 	u8 *thread_data;
-	u64 bytes_done;
+	u64 bytes_done, secs;
 	long work_done;
 	u32 l;
 	struct rusage rusage;
@@ -1254,7 +1254,8 @@ static void *worker_thread(void *__tdata)
 	timersub(&stop, &start0, &diff);
 	td->runtime_ns = diff.tv_sec * NSEC_PER_SEC;
 	td->runtime_ns += diff.tv_usec * NSEC_PER_USEC;
-	td->speed_gbs = bytes_done / (td->runtime_ns / NSEC_PER_SEC) / 1e9;
+	secs = td->runtime_ns / NSEC_PER_SEC;
+	td->speed_gbs = secs ? bytes_done / secs / 1e9 : 0;
 
 	getrusage(RUSAGE_THREAD, &rusage);
 	td->system_time_ns = rusage.ru_stime.tv_sec * NSEC_PER_SEC;
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 51709a961496..93d679eaf1f4 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -40,11 +40,11 @@
 struct perf_annotate {
 	struct perf_tool tool;
 	struct perf_session *session;
+	struct annotation_options opts;
 	bool	   use_tui, use_stdio, use_stdio2, use_gtk;
-	bool	   full_paths;
-	bool	   print_line;
 	bool	   skip_missing;
 	bool	   has_br_stack;
+	bool	   group_set;
 	const char *sym_hist_filter;
 	const char *cpu_list;
 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -161,12 +161,12 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter,
 	hist__account_cycles(sample->branch_stack, al, sample, false);
 
 	bi = he->branch_info;
-	err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx);
+	err = addr_map_symbol__inc_samples(&bi->from, sample, evsel);
 
 	if (err)
 		goto out;
 
-	err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx);
+	err = addr_map_symbol__inc_samples(&bi->to, sample, evsel);
 
 out:
 	return err;
@@ -228,7 +228,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
 		 */
 		if (al->sym != NULL) {
 			rb_erase(&al->sym->rb_node,
-				 &al->map->dso->symbols[al->map->type]);
+				 &al->map->dso->symbols);
 			symbol__delete(al->sym);
 			dso__reset_find_symbol_cache(al->map->dso);
 		}
@@ -248,7 +248,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
 	if (he == NULL)
 		return -ENOMEM;
 
-	ret = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr);
+	ret = hist_entry__inc_addr_samples(he, sample, evsel, al->addr);
 	hists__inc_nr_samples(hists, true);
 	return ret;
 }
@@ -283,15 +283,22 @@ out_put:
 	return ret;
 }
 
+static int process_feature_event(struct perf_session *session,
+				 union perf_event *event)
+{
+	if (event->feat.feat_id < HEADER_LAST_FEATURE)
+		return perf_event__process_feature(session, event);
+	return 0;
+}
+
 static int hist_entry__tty_annotate(struct hist_entry *he,
 				    struct perf_evsel *evsel,
 				    struct perf_annotate *ann)
 {
 	if (!ann->use_stdio2)
-		return symbol__tty_annotate(he->ms.sym, he->ms.map, evsel,
-					    ann->print_line, ann->full_paths, 0, 0);
-	return symbol__tty_annotate2(he->ms.sym, he->ms.map, evsel,
-				     ann->print_line, ann->full_paths);
+		return symbol__tty_annotate(he->ms.sym, he->ms.map, evsel, &ann->opts);
+
+	return symbol__tty_annotate2(he->ms.sym, he->ms.map, evsel, &ann->opts);
 }
 
 static void hists__find_annotations(struct hists *hists,
@@ -342,7 +349,7 @@ find_next:
 			/* skip missing symbols */
 			nd = rb_next(nd);
 		} else if (use_browser == 1) {
-			key = hist_entry__tui_annotate(he, evsel, NULL);
+			key = hist_entry__tui_annotate(he, evsel, NULL, &ann->opts);
 
 			switch (key) {
 			case -1:
@@ -389,8 +396,9 @@ static int __cmd_annotate(struct perf_annotate *ann)
 			goto out;
 	}
 
-	if (!objdump_path) {
-		ret = perf_env__lookup_objdump(&session->header.env);
+	if (!ann->opts.objdump_path) {
+		ret = perf_env__lookup_objdump(&session->header.env,
+					       &ann->opts.objdump_path);
 		if (ret)
 			goto out;
 	}
@@ -471,10 +479,11 @@ int cmd_annotate(int argc, const char **argv)
 			.attr	= perf_event__process_attr,
 			.build_id = perf_event__process_build_id,
 			.tracing_data   = perf_event__process_tracing_data,
-			.feature	= perf_event__process_feature,
+			.feature	= process_feature_event,
 			.ordered_events = true,
 			.ordering_requires_timestamps = true,
 		},
+		.opts = annotation__default_options,
 	};
 	struct perf_data data = {
 		.mode  = PERF_DATA_MODE_READ,
@@ -502,23 +511,26 @@ int cmd_annotate(int argc, const char **argv)
 		   "file", "vmlinux pathname"),
 	OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
 		    "load module symbols - WARNING: use only with -k and LIVE kernel"),
-	OPT_BOOLEAN('l', "print-line", &annotate.print_line,
+	OPT_BOOLEAN('l', "print-line", &annotate.opts.print_lines,
 		    "print matching source lines (may be slow)"),
-	OPT_BOOLEAN('P', "full-paths", &annotate.full_paths,
+	OPT_BOOLEAN('P', "full-paths", &annotate.opts.full_path,
 		    "Don't shorten the displayed pathnames"),
 	OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing,
 		    "Skip symbols that cannot be annotated"),
+	OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group,
+			&annotate.group_set,
+			"Show event group information together"),
 	OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"),
 	OPT_CALLBACK(0, "symfs", NULL, "directory",
 		     "Look for files with symbols relative to this directory",
 		     symbol__config_symfs),
-	OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
+	OPT_BOOLEAN(0, "source", &annotate.opts.annotate_src,
 		    "Interleave source code with assembly code (default)"),
-	OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
+	OPT_BOOLEAN(0, "asm-raw", &annotate.opts.show_asm_raw,
 		    "Display raw encoding of assembly instructions (default)"),
-	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
+	OPT_STRING('M', "disassembler-style", &annotate.opts.disassembler_style, "disassembler style",
 		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
-	OPT_STRING(0, "objdump", &objdump_path, "path",
+	OPT_STRING(0, "objdump", &annotate.opts.objdump_path, "path",
 		   "objdump binary to use for disassembly and annotations"),
 	OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
 		    "Show event group information together"),
@@ -529,6 +541,10 @@ int cmd_annotate(int argc, const char **argv)
 	OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode",
 			     "'always' (default), 'never' or 'auto' only applicable to --stdio mode",
 			     stdio__config_color, "always"),
+	OPT_CALLBACK(0, "percent-type", &annotate.opts, "local-period",
+		     "Set percent type local/global-period/hits",
+		     annotate_parse_percent_type),
+
 	OPT_END()
 	};
 	int ret;
@@ -570,6 +586,9 @@ int cmd_annotate(int argc, const char **argv)
 	annotate.has_br_stack = perf_header__has_feat(&annotate.session->header,
 						      HEADER_BRANCH_STACK);
 
+	if (annotate.group_set)
+		perf_evlist__force_leader(annotate.session->evlist);
+
 	ret = symbol__annotation_init();
 	if (ret < 0)
 		goto out_delete;
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index 41db2cba77eb..115110a4796a 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -25,6 +25,7 @@
 #include "util/session.h"
 #include "util/symbol.h"
 #include "util/time-utils.h"
+#include "util/probe-file.h"
 
 static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid)
 {
@@ -239,6 +240,34 @@ out:
 	return err;
 }
 
+static int build_id_cache__purge_all(void)
+{
+	struct strlist *list;
+	struct str_node *pos;
+	int err = 0;
+	char *buf;
+
+	list = build_id_cache__list_all(false);
+	if (!list) {
+		pr_debug("Failed to get buildids: -%d\n", errno);
+		return -EINVAL;
+	}
+
+	strlist__for_each_entry(pos, list) {
+		buf = build_id_cache__origname(pos->s);
+		err = build_id_cache__remove_s(pos->s);
+		pr_debug("Removing %s (%s): %s\n", buf, pos->s,
+			 err ? "FAIL" : "Ok");
+		free(buf);
+		if (err)
+			break;
+	}
+	strlist__delete(list);
+
+	pr_debug("Purged all: %s\n", err ? "FAIL" : "Ok");
+	return err;
+}
+
 static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused)
 {
 	char filename[PATH_MAX];
@@ -297,6 +326,26 @@ static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi)
 	return err;
 }
 
+static int build_id_cache__show_all(void)
+{
+	struct strlist *bidlist;
+	struct str_node *nd;
+	char *buf;
+
+	bidlist = build_id_cache__list_all(true);
+	if (!bidlist) {
+		pr_debug("Failed to get buildids: -%d\n", errno);
+		return -1;
+	}
+	strlist__for_each_entry(nd, bidlist) {
+		buf = build_id_cache__origname(nd->s);
+		fprintf(stdout, "%s %s\n", nd->s, buf);
+		free(buf);
+	}
+	strlist__delete(bidlist);
+	return 0;
+}
+
 int cmd_buildid_cache(int argc, const char **argv)
 {
 	struct strlist *list;
@@ -304,6 +353,9 @@ int cmd_buildid_cache(int argc, const char **argv)
 	int ret = 0;
 	int ns_id = -1;
 	bool force = false;
+	bool list_files = false;
+	bool opts_flag = false;
+	bool purge_all = false;
 	char const *add_name_list_str = NULL,
 		   *remove_name_list_str = NULL,
 		   *purge_name_list_str = NULL,
@@ -327,6 +379,8 @@ int cmd_buildid_cache(int argc, const char **argv)
 		    "file(s) to remove"),
 	OPT_STRING('p', "purge", &purge_name_list_str, "file list",
 		    "file(s) to remove (remove old caches too)"),
+	OPT_BOOLEAN('P', "purge-all", &purge_all, "purge all cached files"),
+	OPT_BOOLEAN('l', "list", &list_files, "list all cached files"),
 	OPT_STRING('M', "missing", &missing_filename, "file",
 		   "to find missing build ids in the cache"),
 	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
@@ -344,11 +398,20 @@ int cmd_buildid_cache(int argc, const char **argv)
 	argc = parse_options(argc, argv, buildid_cache_options,
 			     buildid_cache_usage, 0);
 
-	if (argc || (!add_name_list_str && !kcore_filename &&
-		     !remove_name_list_str && !purge_name_list_str &&
-		     !missing_filename && !update_name_list_str))
+	opts_flag = add_name_list_str || kcore_filename ||
+		remove_name_list_str || purge_name_list_str ||
+		missing_filename || update_name_list_str ||
+		purge_all;
+
+	if (argc || !(list_files || opts_flag))
 		usage_with_options(buildid_cache_usage, buildid_cache_options);
 
+	/* -l is exclusive. It can not be used with other options. */
+	if (list_files && opts_flag) {
+		usage_with_options_msg(buildid_cache_usage,
+			buildid_cache_options, "-l is exclusive.\n");
+	}
+
 	if (ns_id > 0)
 		nsi = nsinfo__new(ns_id);
 
@@ -366,6 +429,11 @@ int cmd_buildid_cache(int argc, const char **argv)
 
 	setup_pager();
 
+	if (list_files) {
+		ret = build_id_cache__show_all();
+		goto out;
+	}
+
 	if (add_name_list_str) {
 		list = strlist__new(add_name_list_str, NULL);
 		if (list) {
@@ -420,6 +488,13 @@ int cmd_buildid_cache(int argc, const char **argv)
 		}
 	}
 
+	if (purge_all) {
+		if (build_id_cache__purge_all()) {
+			pr_warning("Couldn't remove some caches. Error: %s.\n",
+				str_error_r(errno, sbuf, sizeof(sbuf)));
+		}
+	}
+
 	if (missing_filename)
 		ret = build_id_cache__fprintf_missing(session, stdout);
 
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 2126bfbcb385..f3aa9d02a5ab 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -56,16 +56,16 @@ struct c2c_hist_entry {
 
 	struct compute_stats	 cstats;
 
+	unsigned long		 paddr;
+	unsigned long		 paddr_cnt;
+	bool			 paddr_zero;
+	char			*nodestr;
+
 	/*
 	 * must be at the end,
 	 * because of its callchain dynamic entry
 	 */
 	struct hist_entry	he;
-
-	unsigned long		 paddr;
-	unsigned long		 paddr_cnt;
-	bool			 paddr_zero;
-	char			*nodestr;
 };
 
 static char const *coalesce_default = "pid,iaddr";
@@ -1976,7 +1976,7 @@ static int filter_cb(struct hist_entry *he)
 	c2c_he = container_of(he, struct c2c_hist_entry, he);
 
 	if (c2c.show_src && !he->srcline)
-		he->srcline = hist_entry__get_srcline(he);
+		he->srcline = hist_entry__srcline(he);
 
 	calc_width(c2c_he);
 
@@ -2193,7 +2193,7 @@ static void print_cacheline(struct c2c_hists *c2c_hists,
 	fprintf(out, "%s\n", bf);
 	fprintf(out, "  -------------------------------------------------------------\n");
 
-	hists__fprintf(&c2c_hists->hists, false, 0, 0, 0, out, true);
+	hists__fprintf(&c2c_hists->hists, false, 0, 0, 0, out, false);
 }
 
 static void print_pareto(FILE *out)
@@ -2268,7 +2268,7 @@ static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session)
 	fprintf(out, "=================================================\n");
 	fprintf(out, "#\n");
 
-	hists__fprintf(&c2c.hists.hists, true, 0, 0, 0, stdout, false);
+	hists__fprintf(&c2c.hists.hists, true, 0, 0, 0, stdout, true);
 
 	fprintf(out, "\n");
 	fprintf(out, "=================================================\n");
@@ -2349,6 +2349,9 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he)
 	" s             Toggle full length of symbol and source line columns \n"
 	" q             Return back to cacheline list \n";
 
+	if (!he)
+		return 0;
+
 	/* Display compact version first. */
 	c2c.symbol_full = false;
 
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index d660cb7b222b..39db2ee32d48 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -696,7 +696,7 @@ static void hists__process(struct hists *hists)
 	hists__output_resort(hists, NULL);
 
 	hists__fprintf(hists, !quiet, 0, 0, 0, stdout,
-		       symbol_conf.use_callchain);
+		       !symbol_conf.use_callchain);
 }
 
 static void data__fprintf(void)
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 40fe919bbcf3..eda41673c4f3 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -86,12 +86,10 @@ static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused,
 }
 #endif
 
-static int perf_event__repipe_op2_synth(struct perf_tool *tool,
-					union perf_event *event,
-					struct perf_session *session
-					__maybe_unused)
+static int perf_event__repipe_op2_synth(struct perf_session *session,
+					union perf_event *event)
 {
-	return perf_event__repipe_synth(tool, event);
+	return perf_event__repipe_synth(session->tool, event);
 }
 
 static int perf_event__repipe_attr(struct perf_tool *tool,
@@ -133,10 +131,10 @@ static int copy_bytes(struct perf_inject *inject, int fd, off_t size)
 	return 0;
 }
 
-static s64 perf_event__repipe_auxtrace(struct perf_tool *tool,
-				       union perf_event *event,
-				       struct perf_session *session)
+static s64 perf_event__repipe_auxtrace(struct perf_session *session,
+				       union perf_event *event)
 {
+	struct perf_tool *tool = session->tool;
 	struct perf_inject *inject = container_of(tool, struct perf_inject,
 						  tool);
 	int ret;
@@ -174,9 +172,8 @@ static s64 perf_event__repipe_auxtrace(struct perf_tool *tool,
 #else
 
 static s64
-perf_event__repipe_auxtrace(struct perf_tool *tool __maybe_unused,
-			    union perf_event *event __maybe_unused,
-			    struct perf_session *session __maybe_unused)
+perf_event__repipe_auxtrace(struct perf_session *session __maybe_unused,
+			    union perf_event *event __maybe_unused)
 {
 	pr_err("AUX area tracing not supported\n");
 	return -EINVAL;
@@ -362,26 +359,24 @@ static int perf_event__repipe_exit(struct perf_tool *tool,
 	return err;
 }
 
-static int perf_event__repipe_tracing_data(struct perf_tool *tool,
-					   union perf_event *event,
-					   struct perf_session *session)
+static int perf_event__repipe_tracing_data(struct perf_session *session,
+					   union perf_event *event)
 {
 	int err;
 
-	perf_event__repipe_synth(tool, event);
-	err = perf_event__process_tracing_data(tool, event, session);
+	perf_event__repipe_synth(session->tool, event);
+	err = perf_event__process_tracing_data(session, event);
 
 	return err;
 }
 
-static int perf_event__repipe_id_index(struct perf_tool *tool,
-				       union perf_event *event,
-				       struct perf_session *session)
+static int perf_event__repipe_id_index(struct perf_session *session,
+				       union perf_event *event)
 {
 	int err;
 
-	perf_event__repipe_synth(tool, event);
-	err = perf_event__process_id_index(tool, event, session);
+	perf_event__repipe_synth(session->tool, event);
+	err = perf_event__process_id_index(session, event);
 
 	return err;
 }
@@ -440,9 +435,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
 		goto repipe;
 	}
 
-	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al);
-
-	if (al.map != NULL) {
+	if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) {
 		if (!al.map->dso->hit) {
 			al.map->dso->hit = 1;
 			if (map__load(al.map) >= 0) {
@@ -805,7 +798,8 @@ int cmd_inject(int argc, const char **argv)
 			   "kallsyms pathname"),
 		OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
 		OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
-				    NULL, "opts", "Instruction Tracing options",
+				    NULL, "opts", "Instruction Tracing options\n"
+				    ITRACE_HELP,
 				    itrace_parse_synth_opts),
 		OPT_BOOLEAN(0, "strip", &inject.strip,
 			    "strip non-synthesized events (use with --itrace)"),
diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c
index bcfb363112d3..90d1a2305b72 100644
--- a/tools/perf/builtin-kallsyms.c
+++ b/tools/perf/builtin-kallsyms.c
@@ -27,7 +27,7 @@ static int __cmd_kallsyms(int argc, const char **argv)
 
 	for (i = 0; i < argc; ++i) {
 		struct map *map;
-		struct symbol *symbol = machine__find_kernel_function_by_name(machine, argv[i], &map);
+		struct symbol *symbol = machine__find_kernel_symbol_by_name(machine, argv[i], &map);
 
 		if (symbol == NULL) {
 			printf("%s: not found\n", argv[i]);
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index ae11e4c3516a..b63bca4b0c2a 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -729,7 +729,7 @@ static char *compact_gfp_string(unsigned long gfp_flags)
 static int parse_gfp_flags(struct perf_evsel *evsel, struct perf_sample *sample,
 			   unsigned int gfp_flags)
 {
-	struct pevent_record record = {
+	struct tep_record record = {
 		.cpu = sample->cpu,
 		.data = sample->raw_data,
 		.size = sample->raw_size,
@@ -747,7 +747,7 @@ static int parse_gfp_flags(struct perf_evsel *evsel, struct perf_sample *sample,
 	}
 
 	trace_seq_init(&seq);
-	pevent_event_info(&seq, evsel->tp_format, &record);
+	tep_event_info(&seq, evsel->tp_format, &record);
 
 	str = strtok_r(seq.buffer, " ", &pos);
 	while (str) {
@@ -1004,7 +1004,7 @@ static void __print_slab_result(struct rb_root *root,
 		if (is_caller) {
 			addr = data->call_site;
 			if (!raw_ip)
-				sym = machine__find_kernel_function(machine, addr, &map);
+				sym = machine__find_kernel_symbol(machine, addr, &map);
 		} else
 			addr = data->ptr;
 
@@ -1068,7 +1068,7 @@ static void __print_page_alloc_result(struct perf_session *session, int n_lines)
 		char *caller = buf;
 
 		data = rb_entry(next, struct page_stat, node);
-		sym = machine__find_kernel_function(machine, data->callsite, &map);
+		sym = machine__find_kernel_symbol(machine, data->callsite, &map);
 		if (sym)
 			caller = sym->name;
 		else
@@ -1110,7 +1110,7 @@ static void __print_page_caller_result(struct perf_session *session, int n_lines
 		char *caller = buf;
 
 		data = rb_entry(next, struct page_stat, node);
-		sym = machine__find_kernel_function(machine, data->callsite, &map);
+		sym = machine__find_kernel_symbol(machine, data->callsite, &map);
 		if (sym)
 			caller = sym->name;
 		else
@@ -1974,7 +1974,7 @@ int cmd_kmem(int argc, const char **argv)
 			goto out_delete;
 		}
 
-		kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent);
+		kmem_page_size = tep_get_page_size(evsel->tp_format->pevent);
 		symbol_conf.use_callchain = true;
 	}
 
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 72e2ca096bf5..2b1ef704169f 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1438,8 +1438,6 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
 		goto out;
 	}
 
-	symbol_conf.nr_events = kvm->evlist->nr_entries;
-
 	if (perf_evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0)
 		usage_with_options(live_usage, live_options);
 
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index c0065923a525..99de91698de1 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -81,8 +81,7 @@ static int parse_probe_event(const char *str)
 		params.target_used = true;
 	}
 
-	if (params.nsi)
-		pev->nsi = nsinfo__get(params.nsi);
+	pev->nsi = nsinfo__get(params.nsi);
 
 	/* Parse a perf-probe command into event */
 	ret = parse_perf_probe_command(str, pev);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 22ebeb92ac51..488779bc4c8d 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -106,9 +106,12 @@ static bool switch_output_time(struct record *rec)
 	       trigger_is_ready(&switch_output_trigger);
 }
 
-static int record__write(struct record *rec, void *bf, size_t size)
+static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
+			 void *bf, size_t size)
 {
-	if (perf_data__write(rec->session->data, bf, size) < 0) {
+	struct perf_data_file *file = &rec->session->data->file;
+
+	if (perf_data_file__write(file, bf, size) < 0) {
 		pr_err("failed to write perf data, error: %m\n");
 		return -1;
 	}
@@ -127,15 +130,15 @@ static int process_synthesized_event(struct perf_tool *tool,
 				     struct machine *machine __maybe_unused)
 {
 	struct record *rec = container_of(tool, struct record, tool);
-	return record__write(rec, event, event->header.size);
+	return record__write(rec, NULL, event, event->header.size);
 }
 
-static int record__pushfn(void *to, void *bf, size_t size)
+static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
 {
 	struct record *rec = to;
 
 	rec->samples++;
-	return record__write(rec, bf, size);
+	return record__write(rec, map, bf, size);
 }
 
 static volatile int done;
@@ -170,6 +173,7 @@ static void record__sig_exit(void)
 #ifdef HAVE_AUXTRACE_SUPPORT
 
 static int record__process_auxtrace(struct perf_tool *tool,
+				    struct perf_mmap *map,
 				    union perf_event *event, void *data1,
 				    size_t len1, void *data2, size_t len2)
 {
@@ -197,21 +201,21 @@ static int record__process_auxtrace(struct perf_tool *tool,
 	if (padding)
 		padding = 8 - padding;
 
-	record__write(rec, event, event->header.size);
-	record__write(rec, data1, len1);
+	record__write(rec, map, event, event->header.size);
+	record__write(rec, map, data1, len1);
 	if (len2)
-		record__write(rec, data2, len2);
-	record__write(rec, &pad, padding);
+		record__write(rec, map, data2, len2);
+	record__write(rec, map, &pad, padding);
 
 	return 0;
 }
 
 static int record__auxtrace_mmap_read(struct record *rec,
-				      struct auxtrace_mmap *mm)
+				      struct perf_mmap *map)
 {
 	int ret;
 
-	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
+	ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
 				  record__process_auxtrace);
 	if (ret < 0)
 		return ret;
@@ -223,11 +227,11 @@ static int record__auxtrace_mmap_read(struct record *rec,
 }
 
 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
-					       struct auxtrace_mmap *mm)
+					       struct perf_mmap *map)
 {
 	int ret;
 
-	ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
+	ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
 					   record__process_auxtrace,
 					   rec->opts.auxtrace_snapshot_size);
 	if (ret < 0)
@@ -245,13 +249,12 @@ static int record__auxtrace_read_snapshot_all(struct record *rec)
 	int rc = 0;
 
 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
-		struct auxtrace_mmap *mm =
-				&rec->evlist->mmap[i].auxtrace_mmap;
+		struct perf_mmap *map = &rec->evlist->mmap[i];
 
-		if (!mm->base)
+		if (!map->auxtrace_mmap.base)
 			continue;
 
-		if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
+		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
 			rc = -1;
 			goto out;
 		}
@@ -295,7 +298,7 @@ static int record__auxtrace_init(struct record *rec)
 
 static inline
 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
-			       struct auxtrace_mmap *mm __maybe_unused)
+			       struct perf_mmap *map __maybe_unused)
 {
 	return 0;
 }
@@ -388,7 +391,12 @@ try_again:
 					ui__warning("%s\n", msg);
 				goto try_again;
 			}
-
+			if ((errno == EINVAL || errno == EBADF) &&
+			    pos->leader != pos &&
+			    pos->weak_group) {
+			        pos = perf_evlist__reset_weak_group(evlist, pos);
+				goto try_again;
+			}
 			rc = -errno;
 			perf_evsel__open_strerror(pos, &opts->target,
 						  errno, msg, sizeof(msg));
@@ -529,17 +537,17 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
 		return 0;
 
 	for (i = 0; i < evlist->nr_mmaps; i++) {
-		struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
+		struct perf_mmap *map = &maps[i];
 
-		if (maps[i].base) {
-			if (perf_mmap__push(&maps[i], rec, record__pushfn) != 0) {
+		if (map->base) {
+			if (perf_mmap__push(map, rec, record__pushfn) != 0) {
 				rc = -1;
 				goto out;
 			}
 		}
 
-		if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
-		    record__auxtrace_mmap_read(rec, mm) != 0) {
+		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
+		    record__auxtrace_mmap_read(rec, map) != 0) {
 			rc = -1;
 			goto out;
 		}
@@ -550,7 +558,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
 	 * at least one event.
 	 */
 	if (bytes_written != rec->bytes_written)
-		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
+		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
 
 	if (overwrite)
 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
@@ -589,6 +597,9 @@ static void record__init_features(struct record *rec)
 	if (!rec->opts.full_auxtrace)
 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
 
+	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
+		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
+
 	perf_header__clear_feat(&session->header, HEADER_STAT);
 }
 
@@ -758,7 +769,7 @@ static int record__synthesize(struct record *rec, bool tail)
 		 * We need to synthesize events first, because some
 		 * features works on top of them (on report side).
 		 */
-		err = perf_event__synthesize_attrs(tool, session,
+		err = perf_event__synthesize_attrs(tool, rec->evlist,
 						   process_synthesized_event);
 		if (err < 0) {
 			pr_err("Couldn't synthesize attrs.\n");
@@ -894,6 +905,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 
 	record__init_features(rec);
 
+	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
+		session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
+
 	if (forks) {
 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
 						    argv, data->is_pipe,
@@ -1334,6 +1348,19 @@ static const struct clockid_map clockids[] = {
 	CLOCKID_END,
 };
 
+static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
+{
+	struct timespec res;
+
+	*res_ns = 0;
+	if (!clock_getres(clk_id, &res))
+		*res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
+	else
+		pr_warning("WARNING: Failed to determine specified clock resolution.\n");
+
+	return 0;
+}
+
 static int parse_clockid(const struct option *opt, const char *str, int unset)
 {
 	struct record_opts *opts = (struct record_opts *)opt->value;
@@ -1357,7 +1384,7 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
 
 	/* if its a number, we're done */
 	if (sscanf(str, "%d", &opts->clockid) == 1)
-		return 0;
+		return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
 
 	/* allow a "CLOCK_" prefix to the name */
 	if (!strncasecmp(str, "CLOCK_", 6))
@@ -1366,7 +1393,8 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
 	for (cm = clockids; cm->name; cm++) {
 		if (!strcasecmp(str, cm->name)) {
 			opts->clockid = cm->clockid;
-			return 0;
+			return get_clockid_res(opts->clockid,
+					       &opts->clockid_res_ns);
 		}
 	}
 
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 0f198f6d9b77..257c9c18cb7e 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -71,6 +71,7 @@ struct report {
 	bool			group_set;
 	int			max_stack;
 	struct perf_read_values	show_threads_values;
+	struct annotation_options annotation_opts;
 	const char		*pretty_printing_style;
 	const char		*cpu_list;
 	const char		*symbol_filter_str;
@@ -136,26 +137,25 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
 
 	if (sort__mode == SORT_MODE__BRANCH) {
 		bi = he->branch_info;
-		err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx);
+		err = addr_map_symbol__inc_samples(&bi->from, sample, evsel);
 		if (err)
 			goto out;
 
-		err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx);
+		err = addr_map_symbol__inc_samples(&bi->to, sample, evsel);
 
 	} else if (rep->mem_mode) {
 		mi = he->mem_info;
-		err = addr_map_symbol__inc_samples(&mi->daddr, sample, evsel->idx);
+		err = addr_map_symbol__inc_samples(&mi->daddr, sample, evsel);
 		if (err)
 			goto out;
 
-		err = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr);
+		err = hist_entry__inc_addr_samples(he, sample, evsel, al->addr);
 
 	} else if (symbol_conf.cumulate_callchain) {
 		if (single)
-			err = hist_entry__inc_addr_samples(he, sample, evsel->idx,
-							   al->addr);
+			err = hist_entry__inc_addr_samples(he, sample, evsel, al->addr);
 	} else {
-		err = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr);
+		err = hist_entry__inc_addr_samples(he, sample, evsel, al->addr);
 	}
 
 out:
@@ -181,11 +181,11 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter,
 			     rep->nonany_branch_mode);
 
 	bi = he->branch_info;
-	err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx);
+	err = addr_map_symbol__inc_samples(&bi->from, sample, evsel);
 	if (err)
 		goto out;
 
-	err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx);
+	err = addr_map_symbol__inc_samples(&bi->to, sample, evsel);
 
 	branch_type_count(&rep->brtype_stat, &bi->flags,
 			  bi->from.addr, bi->to.addr);
@@ -194,30 +194,20 @@ out:
 	return err;
 }
 
-/*
- * Events in data file are not collect in groups, but we still want
- * the group display. Set the artificial group and set the leader's
- * forced_leader flag to notify the display code.
- */
 static void setup_forced_leader(struct report *report,
 				struct perf_evlist *evlist)
 {
-	if (report->group_set && !evlist->nr_groups) {
-		struct perf_evsel *leader = perf_evlist__first(evlist);
-
-		perf_evlist__set_leader(evlist);
-		leader->forced_leader = true;
-	}
+	if (report->group_set)
+		perf_evlist__force_leader(evlist);
 }
 
-static int process_feature_event(struct perf_tool *tool,
-				 union perf_event *event,
-				 struct perf_session *session __maybe_unused)
+static int process_feature_event(struct perf_session *session,
+				 union perf_event *event)
 {
-	struct report *rep = container_of(tool, struct report, tool);
+	struct report *rep = container_of(session->tool, struct report, tool);
 
 	if (event->feat.feat_id < HEADER_LAST_FEATURE)
-		return perf_event__process_feature(tool, event, session);
+		return perf_event__process_feature(session, event);
 
 	if (event->feat.feat_id != HEADER_LAST_FEATURE) {
 		pr_err("failed: wrong feature ID: %" PRIu64 "\n",
@@ -226,7 +216,8 @@ static int process_feature_event(struct perf_tool *tool,
 	}
 
 	/*
-	 * All features are received, we can force the
+	 * (feat_id = HEADER_LAST_FEATURE) is the end marker which
+	 * means all features are received, now we can force the
 	 * group if needed.
 	 */
 	setup_forced_leader(rep, session->evlist);
@@ -486,8 +477,8 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
 
 		hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
 		hists__fprintf(hists, !quiet, 0, 0, rep->min_percent, stdout,
-			       symbol_conf.use_callchain ||
-			       symbol_conf.show_branchflag_count);
+			       !(symbol_conf.use_callchain ||
+			         symbol_conf.show_branchflag_count));
 		fprintf(stdout, "\n\n");
 	}
 
@@ -523,12 +514,9 @@ static void report__warn_kptr_restrict(const struct report *rep)
 		    "As no suitable kallsyms nor vmlinux was found, kernel samples\n"
 		    "can't be resolved.";
 
-		if (kernel_map) {
-			const struct dso *kdso = kernel_map->dso;
-			if (!RB_EMPTY_ROOT(&kdso->symbols[MAP__FUNCTION])) {
-				desc = "If some relocation was applied (e.g. "
-				       "kexec) symbols may be misresolved.";
-			}
+		if (kernel_map && map__has_symbols(kernel_map)) {
+			desc = "If some relocation was applied (e.g. "
+			       "kexec) symbols may be misresolved.";
 		}
 
 		ui__warning(
@@ -573,7 +561,7 @@ static int report__browse_hists(struct report *rep)
 		ret = perf_evlist__tui_browse_hists(evlist, help, NULL,
 						    rep->min_percent,
 						    &session->header.env,
-						    true);
+						    true, &rep->annotation_opts);
 		/*
 		 * Usually "ret" is the last pressed key, and we only
 		 * care if the key notifies us to switch data file.
@@ -718,10 +706,7 @@ static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp)
 
 static int map_groups__fprintf_task(struct map_groups *mg, int indent, FILE *fp)
 {
-	int printed = 0, i;
-	for (i = 0; i < MAP__NR_TYPES; ++i)
-		printed += maps__fprintf_task(&mg->maps[i], indent, fp);
-	return printed;
+	return maps__fprintf_task(&mg->maps, indent, fp);
 }
 
 static void task__print_level(struct task *task, FILE *fp, int level)
@@ -961,12 +946,6 @@ parse_percent_limit(const struct option *opt, const char *str,
 	return 0;
 }
 
-#define CALLCHAIN_DEFAULT_OPT  "graph,0.5,caller,function,percent"
-
-const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n"
-				     CALLCHAIN_REPORT_HELP
-				     "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT;
-
 int cmd_report(int argc, const char **argv)
 {
 	struct perf_session *session;
@@ -975,6 +954,10 @@ int cmd_report(int argc, const char **argv)
 	bool has_br_stack = false;
 	int branch_mode = -1;
 	bool branch_call_mode = false;
+#define CALLCHAIN_DEFAULT_OPT  "graph,0.5,caller,function,percent"
+	const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n"
+					     CALLCHAIN_REPORT_HELP
+					     "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT;
 	char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT;
 	const char * const report_usage[] = {
 		"perf report [<options>]",
@@ -997,6 +980,7 @@ int cmd_report(int argc, const char **argv)
 			.id_index	 = perf_event__process_id_index,
 			.auxtrace_info	 = perf_event__process_auxtrace_info,
 			.auxtrace	 = perf_event__process_auxtrace,
+			.event_update	 = perf_event__process_event_update,
 			.feature	 = process_feature_event,
 			.ordered_events	 = true,
 			.ordering_requires_timestamps = true,
@@ -1004,6 +988,7 @@ int cmd_report(int argc, const char **argv)
 		.max_stack		 = PERF_MAX_STACK_DEPTH,
 		.pretty_printing_style	 = "normal",
 		.socket_filter		 = -1,
+		.annotation_opts	 = annotation__default_options,
 	};
 	const struct option options[] = {
 	OPT_STRING('i', "input", &input_name, "file",
@@ -1093,11 +1078,11 @@ int cmd_report(int argc, const char **argv)
 		   "list of cpus to profile"),
 	OPT_BOOLEAN('I', "show-info", &report.show_full_info,
 		    "Display extended information about perf.data file"),
-	OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
+	OPT_BOOLEAN(0, "source", &report.annotation_opts.annotate_src,
 		    "Interleave source code with assembly code (default)"),
-	OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
+	OPT_BOOLEAN(0, "asm-raw", &report.annotation_opts.show_asm_raw,
 		    "Display raw encoding of assembly instructions (default)"),
-	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
+	OPT_STRING('M', "disassembler-style", &report.annotation_opts.disassembler_style, "disassembler style",
 		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
 	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
 		    "Show a column with the sum of periods"),
@@ -1108,7 +1093,7 @@ int cmd_report(int argc, const char **argv)
 		    parse_branch_mode),
 	OPT_BOOLEAN(0, "branch-history", &branch_call_mode,
 		    "add last branch records to call history"),
-	OPT_STRING(0, "objdump", &objdump_path, "path",
+	OPT_STRING(0, "objdump", &report.annotation_opts.objdump_path, "path",
 		   "objdump binary to use for disassembly and annotations"),
 	OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
 		    "Disable symbol demangling"),
@@ -1120,7 +1105,7 @@ int cmd_report(int argc, const char **argv)
 	OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
 		     "how to display percentage of filtered entries", parse_filter_percentage),
 	OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
-			    "Instruction Tracing options",
+			    "Instruction Tracing options\n" ITRACE_HELP,
 			    itrace_parse_synth_opts),
 	OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename,
 			"Show full source file name path for source lines"),
@@ -1139,6 +1124,9 @@ int cmd_report(int argc, const char **argv)
 		   "Time span of interest (start,stop)"),
 	OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name,
 		    "Show inline function"),
+	OPT_CALLBACK(0, "percent-type", &report.annotation_opts, "local-period",
+		     "Set percent type local/global-period/hits",
+		     annotate_parse_percent_type),
 	OPT_END()
 	};
 	struct perf_data data = {
@@ -1381,9 +1369,9 @@ repeat:
 	}
 
 	if (session->tevent.pevent &&
-	    pevent_set_function_resolver(session->tevent.pevent,
-					 machine__resolve_kernel_addr,
-					 &session->machines.host) < 0) {
+	    tep_set_function_resolver(session->tevent.pevent,
+				      machine__resolve_kernel_addr,
+				      &session->machines.host) < 0) {
 		pr_err("%s: failed to set libtraceevent function resolver\n",
 		       __func__);
 		return -1;
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 4dfdee668b0c..cbf39dab19c1 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -2143,7 +2143,7 @@ static void save_task_callchain(struct perf_sched *sched,
 		return;
 	}
 
-	if (!symbol_conf.use_callchain || sample->callchain == NULL)
+	if (!sched->show_callchain || sample->callchain == NULL)
 		return;
 
 	if (thread__resolve_callchain(thread, cursor, evsel, sample,
@@ -2271,10 +2271,11 @@ static struct thread *get_idle_thread(int cpu)
 	return idle_threads[cpu];
 }
 
-static void save_idle_callchain(struct idle_thread_runtime *itr,
+static void save_idle_callchain(struct perf_sched *sched,
+				struct idle_thread_runtime *itr,
 				struct perf_sample *sample)
 {
-	if (!symbol_conf.use_callchain || sample->callchain == NULL)
+	if (!sched->show_callchain || sample->callchain == NULL)
 		return;
 
 	callchain_cursor__copy(&itr->cursor, &callchain_cursor);
@@ -2320,7 +2321,7 @@ static struct thread *timehist_get_thread(struct perf_sched *sched,
 
 			/* copy task callchain when entering to idle */
 			if (perf_evsel__intval(evsel, sample, "next_pid") == 0)
-				save_idle_callchain(itr, sample);
+				save_idle_callchain(sched, itr, sample);
 		}
 	}
 
@@ -2849,7 +2850,7 @@ static void timehist_print_summary(struct perf_sched *sched,
 			printf("    CPU %2d idle entire time window\n", i);
 	}
 
-	if (sched->idle_hist && symbol_conf.use_callchain) {
+	if (sched->idle_hist && sched->show_callchain) {
 		callchain_param.mode  = CHAIN_FOLDED;
 		callchain_param.value = CCVAL_PERIOD;
 
@@ -2933,8 +2934,7 @@ static int timehist_check_attr(struct perf_sched *sched,
 			return -1;
 		}
 
-		if (sched->show_callchain &&
-		    !(evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) {
+		if (sched->show_callchain && !evsel__has_callchain(evsel)) {
 			pr_info("Samples do not have callchains.\n");
 			sched->show_callchain = 0;
 			symbol_conf.use_callchain = 0;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index e0a9845b6cbc..b5bc85bd0bbe 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -44,6 +44,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <unistd.h>
+#include <subcmd/pager.h>
 
 #include "sane_ctype.h"
 
@@ -153,8 +154,8 @@ static struct {
 		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
 			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
 			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
-			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
-			      PERF_OUTPUT_PERIOD,
+			      PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+			      PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD,
 
 		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
@@ -165,8 +166,9 @@ static struct {
 		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
 			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
 			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
-			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
-			      PERF_OUTPUT_PERIOD | PERF_OUTPUT_BPF_OUTPUT,
+			      PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+			      PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD |
+			      PERF_OUTPUT_BPF_OUTPUT,
 
 		.invalid_fields = PERF_OUTPUT_TRACE,
 	},
@@ -179,16 +181,28 @@ static struct {
 				  PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE
 	},
 
+	[PERF_TYPE_HW_CACHE] = {
+		.user_set = false,
+
+		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+			      PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+			      PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD,
+
+		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
+	},
+
 	[PERF_TYPE_RAW] = {
 		.user_set = false,
 
 		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
 			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
 			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
-			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
-			      PERF_OUTPUT_PERIOD |  PERF_OUTPUT_ADDR |
-			      PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT |
-			      PERF_OUTPUT_PHYS_ADDR,
+			      PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+			      PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD |
+			      PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC |
+			      PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR,
 
 		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
@@ -199,8 +213,8 @@ static struct {
 		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
 			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
 			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
-			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
-			      PERF_OUTPUT_PERIOD,
+			      PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+			      PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD,
 
 		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
@@ -211,8 +225,8 @@ static struct {
 		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
 			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
 			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
-			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
-			      PERF_OUTPUT_SYNTH,
+			      PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+			      PERF_OUTPUT_DSO | PERF_OUTPUT_SYNTH,
 
 		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
@@ -393,9 +407,10 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 					PERF_OUTPUT_WEIGHT))
 		return -EINVAL;
 
-	if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
+	if (PRINT_FIELD(SYM) &&
+		!(evsel->attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) {
 		pr_err("Display of symbols requested but neither sample IP nor "
-			   "sample address\nis selected. Hence, no addresses to convert "
+			   "sample address\navailable. Hence, no addresses to convert "
 		       "to symbols.\n");
 		return -EINVAL;
 	}
@@ -404,10 +419,9 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 		       "selected.\n");
 		return -EINVAL;
 	}
-	if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) &&
-	    !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM) && !PRINT_FIELD(BRSTACKOFF)) {
-		pr_err("Display of DSO requested but no address to convert.  Select\n"
-		       "sample IP, sample address, brstack, brstacksym, or brstackoff.\n");
+	if (PRINT_FIELD(DSO) &&
+		!(evsel->attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) {
+		pr_err("Display of DSO requested but no address to convert.\n");
 		return -EINVAL;
 	}
 	if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) {
@@ -516,7 +530,7 @@ static int perf_session__check_output_opt(struct perf_session *session)
 
 		evlist__for_each_entry(session->evlist, evsel) {
 			not_pipe = true;
-			if (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
+			if (evsel__has_callchain(evsel)) {
 				use_callchain = true;
 				break;
 			}
@@ -531,21 +545,18 @@ static int perf_session__check_output_opt(struct perf_session *session)
 	 */
 	if (symbol_conf.use_callchain &&
 	    !output[PERF_TYPE_TRACEPOINT].user_set) {
-		struct perf_event_attr *attr;
-
 		j = PERF_TYPE_TRACEPOINT;
 
 		evlist__for_each_entry(session->evlist, evsel) {
 			if (evsel->attr.type != j)
 				continue;
 
-			attr = &evsel->attr;
-
-			if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
+			if (evsel__has_callchain(evsel)) {
 				output[j].fields |= PERF_OUTPUT_IP;
 				output[j].fields |= PERF_OUTPUT_SYM;
+				output[j].fields |= PERF_OUTPUT_SYMOFFSET;
 				output[j].fields |= PERF_OUTPUT_DSO;
-				set_print_ip_opts(attr);
+				set_print_ip_opts(&evsel->attr);
 				goto out;
 			}
 		}
@@ -608,7 +619,7 @@ static int perf_sample__fprintf_start(struct perf_sample *sample,
 	if (PRINT_FIELD(COMM)) {
 		if (latency_format)
 			printed += fprintf(fp, "%8.8s ", thread__comm_str(thread));
-		else if (PRINT_FIELD(IP) && symbol_conf.use_callchain)
+		else if (PRINT_FIELD(IP) && evsel__has_callchain(evsel) && symbol_conf.use_callchain)
 			printed += fprintf(fp, "%s ", thread__comm_str(thread));
 		else
 			printed += fprintf(fp, "%16s ", thread__comm_str(thread));
@@ -717,8 +728,8 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
 		if (PRINT_FIELD(DSO)) {
 			memset(&alf, 0, sizeof(alf));
 			memset(&alt, 0, sizeof(alt));
-			thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
-			thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
+			thread__find_map(thread, sample->cpumode, from, &alf);
+			thread__find_map(thread, sample->cpumode, to, &alt);
 		}
 
 		printed += fprintf(fp, " 0x%"PRIx64, from);
@@ -764,13 +775,8 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
 		from = br->entries[i].from;
 		to   = br->entries[i].to;
 
-		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
-		if (alf.map)
-			alf.sym = map__find_symbol(alf.map, alf.addr);
-
-		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
-		if (alt.map)
-			alt.sym = map__find_symbol(alt.map, alt.addr);
+		thread__find_symbol(thread, sample->cpumode, from, &alf);
+		thread__find_symbol(thread, sample->cpumode, to, &alt);
 
 		printed += symbol__fprintf_symname_offs(alf.sym, &alf, fp);
 		if (PRINT_FIELD(DSO)) {
@@ -814,12 +820,12 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
 		from = br->entries[i].from;
 		to   = br->entries[i].to;
 
-		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
-		if (alf.map && !alf.map->dso->adjust_symbols)
+		if (thread__find_map(thread, sample->cpumode, from, &alf) &&
+		    !alf.map->dso->adjust_symbols)
 			from = map__map_ip(alf.map, from);
 
-		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
-		if (alt.map && !alt.map->dso->adjust_symbols)
+		if (thread__find_map(thread, sample->cpumode, to, &alt) &&
+		    !alt.map->dso->adjust_symbols)
 			to = map__map_ip(alt.map, to);
 
 		printed += fprintf(fp, " 0x%"PRIx64, from);
@@ -882,8 +888,7 @@ static int grab_bb(u8 *buffer, u64 start, u64 end,
 		return 0;
 	}
 
-	thread__find_addr_map(thread, *cpumode, MAP__FUNCTION, start, &al);
-	if (!al.map || !al.map->dso) {
+	if (!thread__find_map(thread, *cpumode, start, &al) || !al.map->dso) {
 		pr_debug("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end);
 		return 0;
 	}
@@ -908,7 +913,7 @@ static int grab_bb(u8 *buffer, u64 start, u64 end,
 
 static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
 			    struct perf_insn *x, u8 *inbuf, int len,
-			    int insn, FILE *fp)
+			    int insn, FILE *fp, int *total_cycles)
 {
 	int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", ip,
 			      dump_insn(x, ip, inbuf, len, NULL),
@@ -917,7 +922,8 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
 			      en->flags.in_tx ? " INTX" : "",
 			      en->flags.abort ? " ABORT" : "");
 	if (en->flags.cycles) {
-		printed += fprintf(fp, " %d cycles", en->flags.cycles);
+		*total_cycles += en->flags.cycles;
+		printed += fprintf(fp, " %d cycles [%d]", en->flags.cycles, *total_cycles);
 		if (insn)
 			printed += fprintf(fp, " %.2f IPC", (float)insn / en->flags.cycles);
 	}
@@ -933,10 +939,8 @@ static int ip__fprintf_sym(uint64_t addr, struct thread *thread,
 
 	memset(&al, 0, sizeof(al));
 
-	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al);
-	if (!al.map)
-		thread__find_addr_map(thread, cpumode, MAP__VARIABLE,
-				      addr, &al);
+	thread__find_map(thread, cpumode, addr, &al);
+
 	if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end)
 		return 0;
 
@@ -976,6 +980,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 	u8 buffer[MAXBB];
 	unsigned off;
 	struct symbol *lastsym = NULL;
+	int total_cycles = 0;
 
 	if (!(br && br->nr))
 		return 0;
@@ -996,7 +1001,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 		printed += ip__fprintf_sym(br->entries[nr - 1].from, thread,
 					   x.cpumode, x.cpu, &lastsym, attr, fp);
 		printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
-					    &x, buffer, len, 0, fp);
+					    &x, buffer, len, 0, fp, &total_cycles);
 	}
 
 	/* Print all blocks */
@@ -1024,7 +1029,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 
 			printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
 			if (ip == end) {
-				printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp);
+				printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp,
+							    &total_cycles);
 				break;
 			} else {
 				printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip,
@@ -1102,6 +1108,35 @@ out:
 	return printed;
 }
 
+static const char *resolve_branch_sym(struct perf_sample *sample,
+				      struct perf_evsel *evsel,
+				      struct thread *thread,
+				      struct addr_location *al,
+				      u64 *ip)
+{
+	struct addr_location addr_al;
+	struct perf_event_attr *attr = &evsel->attr;
+	const char *name = NULL;
+
+	if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
+		if (sample_addr_correlates_sym(attr)) {
+			thread__resolve(thread, &addr_al, sample);
+			if (addr_al.sym)
+				name = addr_al.sym->name;
+			else
+				*ip = sample->addr;
+		} else {
+			*ip = sample->addr;
+		}
+	} else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
+		if (al->sym)
+			name = al->sym->name;
+		else
+			*ip = sample->ip;
+	}
+	return name;
+}
+
 static int perf_sample__fprintf_callindent(struct perf_sample *sample,
 					   struct perf_evsel *evsel,
 					   struct thread *thread,
@@ -1109,10 +1144,10 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
 {
 	struct perf_event_attr *attr = &evsel->attr;
 	size_t depth = thread_stack__depth(thread);
-	struct addr_location addr_al;
 	const char *name = NULL;
 	static int spacing;
 	int len = 0;
+	int dlen = 0;
 	u64 ip = 0;
 
 	/*
@@ -1122,21 +1157,12 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
 	if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN)
 		depth += 1;
 
-	if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
-		if (sample_addr_correlates_sym(attr)) {
-			thread__resolve(thread, &addr_al, sample);
-			if (addr_al.sym)
-				name = addr_al.sym->name;
-			else
-				ip = sample->addr;
-		} else {
-			ip = sample->addr;
-		}
-	} else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
-		if (al->sym)
-			name = al->sym->name;
-		else
-			ip = sample->ip;
+	name = resolve_branch_sym(sample, evsel, thread, al, &ip);
+
+	if (PRINT_FIELD(DSO) && !(PRINT_FIELD(IP) || PRINT_FIELD(ADDR))) {
+		dlen += fprintf(fp, "(");
+		dlen += map__fprintf_dsoname(al->map, fp);
+		dlen += fprintf(fp, ")\t");
 	}
 
 	if (name)
@@ -1157,7 +1183,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
 	if (len < spacing)
 		len += fprintf(fp, "%*s", spacing - len, "");
 
-	return len;
+	return len + dlen;
 }
 
 static int perf_sample__fprintf_insn(struct perf_sample *sample,
@@ -1253,6 +1279,18 @@ static struct {
 	{0, NULL}
 };
 
+static const char *sample_flags_to_name(u32 flags)
+{
+	int i;
+
+	for (i = 0; sample_flags[i].name ; i++) {
+		if (sample_flags[i].flags == flags)
+			return sample_flags[i].name;
+	}
+
+	return NULL;
+}
+
 static int perf_sample__fprintf_flags(u32 flags, FILE *fp)
 {
 	const char *chars = PERF_IP_FLAG_CHARS;
@@ -1262,11 +1300,20 @@ static int perf_sample__fprintf_flags(u32 flags, FILE *fp)
 	char str[33];
 	int i, pos = 0;
 
-	for (i = 0; sample_flags[i].name ; i++) {
-		if (sample_flags[i].flags == (flags & ~PERF_IP_FLAG_IN_TX)) {
-			name = sample_flags[i].name;
-			break;
-		}
+	name = sample_flags_to_name(flags & ~PERF_IP_FLAG_IN_TX);
+	if (name)
+		return fprintf(fp, "  %-15s%4s ", name, in_tx ? "(x)" : "");
+
+	if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
+		name = sample_flags_to_name(flags & ~(PERF_IP_FLAG_IN_TX | PERF_IP_FLAG_TRACE_BEGIN));
+		if (name)
+			return fprintf(fp, "  tr strt %-7s%4s ", name, in_tx ? "(x)" : "");
+	}
+
+	if (flags & PERF_IP_FLAG_TRACE_END) {
+		name = sample_flags_to_name(flags & ~(PERF_IP_FLAG_IN_TX | PERF_IP_FLAG_TRACE_END));
+		if (name)
+			return fprintf(fp, "  tr end  %-7s%4s ", name, in_tx ? "(x)" : "");
 	}
 
 	for (i = 0; i < n; i++, flags >>= 1) {
@@ -1279,10 +1326,7 @@ static int perf_sample__fprintf_flags(u32 flags, FILE *fp)
 	}
 	str[pos] = 0;
 
-	if (name)
-		return fprintf(fp, "  %-7s%4s ", name, in_tx ? "(x)" : "");
-
-	return fprintf(fp, "  %-11s ", str);
+	return fprintf(fp, "  %-19s ", str);
 }
 
 struct printer_data {
@@ -1542,7 +1586,8 @@ struct metric_ctx {
 	FILE 			*fp;
 };
 
-static void script_print_metric(void *ctx, const char *color,
+static void script_print_metric(struct perf_stat_config *config __maybe_unused,
+				void *ctx, const char *color,
 			        const char *fmt,
 			        const char *unit, double val)
 {
@@ -1560,7 +1605,8 @@ static void script_print_metric(void *ctx, const char *color,
 	fprintf(mctx->fp, " %s\n", unit);
 }
 
-static void script_new_line(void *ctx)
+static void script_new_line(struct perf_stat_config *config __maybe_unused,
+			    void *ctx)
 {
 	struct metric_ctx *mctx = ctx;
 
@@ -1606,7 +1652,7 @@ static void perf_sample__fprint_metric(struct perf_script *script,
 	evsel_script(evsel)->val = val;
 	if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) {
 		for_each_group_member (ev2, evsel->leader) {
-			perf_stat__print_shadow_stats(ev2,
+			perf_stat__print_shadow_stats(&stat_config, ev2,
 						      evsel_script(ev2)->val,
 						      sample->cpu,
 						      &ctx,
@@ -1617,6 +1663,47 @@ static void perf_sample__fprint_metric(struct perf_script *script,
 	}
 }
 
+static bool show_event(struct perf_sample *sample,
+		       struct perf_evsel *evsel,
+		       struct thread *thread,
+		       struct addr_location *al)
+{
+	int depth = thread_stack__depth(thread);
+
+	if (!symbol_conf.graph_function)
+		return true;
+
+	if (thread->filter) {
+		if (depth <= thread->filter_entry_depth) {
+			thread->filter = false;
+			return false;
+		}
+		return true;
+	} else {
+		const char *s = symbol_conf.graph_function;
+		u64 ip;
+		const char *name = resolve_branch_sym(sample, evsel, thread, al,
+				&ip);
+		unsigned nlen;
+
+		if (!name)
+			return false;
+		nlen = strlen(name);
+		while (*s) {
+			unsigned len = strcspn(s, ",");
+			if (nlen == len && !strncmp(name, s, len)) {
+				thread->filter = true;
+				thread->filter_entry_depth = depth;
+				return true;
+			}
+			s += len;
+			if (*s == ',')
+				s++;
+		}
+		return false;
+	}
+}
+
 static void process_event(struct perf_script *script,
 			  struct perf_sample *sample, struct perf_evsel *evsel,
 			  struct addr_location *al,
@@ -1631,6 +1718,9 @@ static void process_event(struct perf_script *script,
 	if (output[type].fields == 0)
 		return;
 
+	if (!show_event(sample, evsel, thread, al))
+		return;
+
 	++es->samples;
 
 	perf_sample__fprintf_start(sample, thread, evsel,
@@ -1708,6 +1798,9 @@ static void process_event(struct perf_script *script,
 
 	if (PRINT_FIELD(METRIC))
 		perf_sample__fprint_metric(script, thread, evsel, sample, fp);
+
+	if (verbose)
+		fflush(fp);
 }
 
 static struct scripting_ops	*scripting_ops;
@@ -1832,6 +1925,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
 	struct perf_evlist *evlist;
 	struct perf_evsel *evsel, *pos;
 	int err;
+	static struct perf_evsel_script *es;
 
 	err = perf_event__process_attr(tool, event, pevlist);
 	if (err)
@@ -1840,6 +1934,19 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
 	evlist = *pevlist;
 	evsel = perf_evlist__last(*pevlist);
 
+	if (!evsel->priv) {
+		if (scr->per_event_dump) {
+			evsel->priv = perf_evsel_script__new(evsel,
+						scr->session->data);
+		} else {
+			es = zalloc(sizeof(*es));
+			if (!es)
+				return -ENOMEM;
+			es->fp = stdout;
+			evsel->priv = es;
+		}
+	}
+
 	if (evsel->attr.type >= PERF_TYPE_MAX &&
 	    evsel->attr.type != PERF_TYPE_SYNTH)
 		return 0;
@@ -2473,6 +2580,8 @@ parse:
 						output[j].fields &= ~all_output_options[i].field;
 					else
 						output[j].fields |= all_output_options[i].field;
+					output[j].user_set = true;
+					output[j].wildcard_set = true;
 				}
 			}
 		} else {
@@ -2483,7 +2592,8 @@ parse:
 				rc = -EINVAL;
 				goto out;
 			}
-			output[type].fields |= all_output_options[i].field;
+			output[type].user_set = true;
+			output[type].wildcard_set = true;
 		}
 	}
 
@@ -2947,9 +3057,8 @@ static void script__setup_sample_type(struct perf_script *script)
 	}
 }
 
-static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
-				    union perf_event *event,
-				    struct perf_session *session)
+static int process_stat_round_event(struct perf_session *session,
+				    union perf_event *event)
 {
 	struct stat_round_event *round = &event->stat_round;
 	struct perf_evsel *counter;
@@ -2963,9 +3072,8 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
 	return 0;
 }
 
-static int process_stat_config_event(struct perf_tool *tool __maybe_unused,
-				     union perf_event *event,
-				     struct perf_session *session __maybe_unused)
+static int process_stat_config_event(struct perf_session *session __maybe_unused,
+				     union perf_event *event)
 {
 	perf_event__read_stat_config(&stat_config, &event->stat_config);
 	return 0;
@@ -2991,10 +3099,10 @@ static int set_maps(struct perf_script *script)
 }
 
 static
-int process_thread_map_event(struct perf_tool *tool,
-			     union perf_event *event,
-			     struct perf_session *session __maybe_unused)
+int process_thread_map_event(struct perf_session *session,
+			     union perf_event *event)
 {
+	struct perf_tool *tool = session->tool;
 	struct perf_script *script = container_of(tool, struct perf_script, tool);
 
 	if (script->threads) {
@@ -3010,10 +3118,10 @@ int process_thread_map_event(struct perf_tool *tool,
 }
 
 static
-int process_cpu_map_event(struct perf_tool *tool __maybe_unused,
-			  union perf_event *event,
-			  struct perf_session *session __maybe_unused)
+int process_cpu_map_event(struct perf_session *session,
+			  union perf_event *event)
 {
+	struct perf_tool *tool = session->tool;
 	struct perf_script *script = container_of(tool, struct perf_script, tool);
 
 	if (script->cpus) {
@@ -3028,12 +3136,21 @@ int process_cpu_map_event(struct perf_tool *tool __maybe_unused,
 	return set_maps(script);
 }
 
+static int process_feature_event(struct perf_session *session,
+				 union perf_event *event)
+{
+	if (event->feat.feat_id < HEADER_LAST_FEATURE)
+		return perf_event__process_feature(session, event);
+	return 0;
+}
+
 #ifdef HAVE_AUXTRACE_SUPPORT
-static int perf_script__process_auxtrace_info(struct perf_tool *tool,
-					      union perf_event *event,
-					      struct perf_session *session)
+static int perf_script__process_auxtrace_info(struct perf_session *session,
+					      union perf_event *event)
 {
-	int ret = perf_event__process_auxtrace_info(tool, event, session);
+	struct perf_tool *tool = session->tool;
+
+	int ret = perf_event__process_auxtrace_info(session, event);
 
 	if (ret == 0) {
 		struct perf_script *script = container_of(tool, struct perf_script, tool);
@@ -3047,6 +3164,44 @@ static int perf_script__process_auxtrace_info(struct perf_tool *tool,
 #define perf_script__process_auxtrace_info 0
 #endif
 
+static int parse_insn_trace(const struct option *opt __maybe_unused,
+			    const char *str __maybe_unused,
+			    int unset __maybe_unused)
+{
+	parse_output_fields(NULL, "+insn,-event,-period", 0);
+	itrace_parse_synth_opts(opt, "i0ns", 0);
+	nanosecs = true;
+	return 0;
+}
+
+static int parse_xed(const struct option *opt __maybe_unused,
+		     const char *str __maybe_unused,
+		     int unset __maybe_unused)
+{
+	force_pager("xed -F insn: -A -64 | less");
+	return 0;
+}
+
+static int parse_call_trace(const struct option *opt __maybe_unused,
+			    const char *str __maybe_unused,
+			    int unset __maybe_unused)
+{
+	parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent", 0);
+	itrace_parse_synth_opts(opt, "cewp", 0);
+	nanosecs = true;
+	return 0;
+}
+
+static int parse_callret_trace(const struct option *opt __maybe_unused,
+			    const char *str __maybe_unused,
+			    int unset __maybe_unused)
+{
+	parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent,+flags", 0);
+	itrace_parse_synth_opts(opt, "crewp", 0);
+	nanosecs = true;
+	return 0;
+}
+
 int cmd_script(int argc, const char **argv)
 {
 	bool show_full_info = false;
@@ -3056,7 +3211,10 @@ int cmd_script(int argc, const char **argv)
 	char *rec_script_path = NULL;
 	char *rep_script_path = NULL;
 	struct perf_session *session;
-	struct itrace_synth_opts itrace_synth_opts = { .set = false, };
+	struct itrace_synth_opts itrace_synth_opts = {
+		.set = false,
+		.default_no_sample = true,
+	};
 	char *script_path = NULL;
 	const char **__argv;
 	int i, j, err = 0;
@@ -3072,7 +3230,7 @@ int cmd_script(int argc, const char **argv)
 			.attr		 = process_attr,
 			.event_update   = perf_event__process_event_update,
 			.tracing_data	 = perf_event__process_tracing_data,
-			.feature	 = perf_event__process_feature,
+			.feature	 = process_feature_event,
 			.build_id	 = perf_event__process_build_id,
 			.id_index	 = perf_event__process_id_index,
 			.auxtrace_info	 = perf_script__process_auxtrace_info,
@@ -3123,13 +3281,24 @@ int cmd_script(int argc, const char **argv)
 		     "+field to add and -field to remove."
 		     "Valid types: hw,sw,trace,raw,synth. "
 		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
-		     "addr,symoff,period,iregs,uregs,brstack,brstacksym,flags,"
-		     "bpf-output,callindent,insn,insnlen,brstackinsn,synth,phys_addr",
+		     "addr,symoff,srcline,period,iregs,uregs,brstack,"
+		     "brstacksym,flags,bpf-output,brstackinsn,brstackoff,"
+		     "callindent,insn,insnlen,synth,phys_addr,metric,misc",
 		     parse_output_fields),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 		    "system-wide collection from all CPUs"),
 	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
 		   "only consider these symbols"),
+	OPT_CALLBACK_OPTARG(0, "insn-trace", &itrace_synth_opts, NULL, NULL,
+			"Decode instructions from itrace", parse_insn_trace),
+	OPT_CALLBACK_OPTARG(0, "xed", NULL, NULL, NULL,
+			"Run xed disassembler on output", parse_xed),
+	OPT_CALLBACK_OPTARG(0, "call-trace", &itrace_synth_opts, NULL, NULL,
+			"Decode calls from from itrace", parse_call_trace),
+	OPT_CALLBACK_OPTARG(0, "call-ret-trace", &itrace_synth_opts, NULL, NULL,
+			"Decode calls and returns from itrace", parse_callret_trace),
+	OPT_STRING(0, "graph-function", &symbol_conf.graph_function, "symbol[,symbol...]",
+			"Only print symbols and callees with --call-trace/--call-ret-trace"),
 	OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]",
 		   "Stop display of callgraph at these symbols"),
 	OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
@@ -3167,7 +3336,7 @@ int cmd_script(int argc, const char **argv)
 	OPT_BOOLEAN(0, "ns", &nanosecs,
 		    "Use 9 decimal places when displaying time"),
 	OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
-			    "Instruction Tracing options",
+			    "Instruction Tracing options\n" ITRACE_HELP,
 			    itrace_parse_synth_opts),
 	OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename,
 			"Show full source file name path for source lines"),
@@ -3363,8 +3532,10 @@ int cmd_script(int argc, const char **argv)
 		exit(-1);
 	}
 
-	if (!script_name)
+	if (!script_name) {
 		setup_pager();
+		use_browser = 0;
+	}
 
 	session = perf_session__new(&data, false, &script.tool);
 	if (session == NULL)
@@ -3385,7 +3556,8 @@ int cmd_script(int argc, const char **argv)
 	script.session = session;
 	script__setup_sample_type(&script);
 
-	if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT)
+	if ((output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT) ||
+	    symbol_conf.graph_function)
 		itrace_synth_opts.thread_stack = true;
 
 	session->itrace_synth_opts = &itrace_synth_opts;
@@ -3403,9 +3575,9 @@ int cmd_script(int argc, const char **argv)
 		symbol_conf.use_callchain = false;
 
 	if (session->tevent.pevent &&
-	    pevent_set_function_resolver(session->tevent.pevent,
-					 machine__resolve_kernel_addr,
-					 &session->machines.host) < 0) {
+	    tep_set_function_resolver(session->tevent.pevent,
+				      machine__resolve_kernel_addr,
+				      &session->machines.host) < 0) {
 		pr_err("%s: failed to set libtraceevent function resolver\n", __func__);
 		err = -1;
 		goto out_delete;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f17dc601b0f3..a635abfa77b6 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -65,6 +65,7 @@
 #include "util/tool.h"
 #include "util/string2.h"
 #include "util/metricgroup.h"
+#include "util/top.h"
 #include "asm/bug.h"
 
 #include <linux/time64.h>
@@ -80,12 +81,13 @@
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <unistd.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
 
 #include "sane_ctype.h"
 
 #define DEFAULT_SEPARATOR	" "
-#define CNTR_NOT_SUPPORTED	"<not supported>"
-#define CNTR_NOT_COUNTED	"<not counted>"
 #define FREEZE_ON_SMI_PATH	"devices/cpu/freeze_on_smi"
 
 static void print_counters(struct timespec *ts, int argc, const char **argv);
@@ -133,46 +135,30 @@ static const char *smi_cost_attrs = {
 
 static struct perf_evlist	*evsel_list;
 
-static struct rblist		 metric_events;
-
 static struct target target = {
 	.uid	= UINT_MAX,
 };
 
-typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu);
+#define METRIC_ONLY_LEN 20
 
-static int			run_count			=  1;
-static bool			no_inherit			= false;
 static volatile pid_t		child_pid			= -1;
-static bool			null_run			=  false;
 static int			detailed_run			=  0;
 static bool			transaction_run;
 static bool			topdown_run			= false;
 static bool			smi_cost			= false;
 static bool			smi_reset			= false;
-static bool			big_num				=  true;
 static int			big_num_opt			=  -1;
-static const char		*csv_sep			= NULL;
-static bool			csv_output			= false;
 static bool			group				= false;
 static const char		*pre_cmd			= NULL;
 static const char		*post_cmd			= NULL;
 static bool			sync_run			= false;
-static unsigned int		initial_delay			= 0;
-static unsigned int		unit_width			= 4; /* strlen("unit") */
 static bool			forever				= false;
-static bool			metric_only			= false;
 static bool			force_metric_only		= false;
-static bool			no_merge			= false;
 static struct timespec		ref_time;
-static struct cpu_map		*aggr_map;
-static aggr_get_id_t		aggr_get_id;
 static bool			append_file;
 static bool			interval_count;
 static const char		*output_name;
 static int			output_fd;
-static int			print_free_counters_hint;
-static int			print_mixed_hw_group_error;
 
 struct perf_stat {
 	bool			 record;
@@ -192,15 +178,15 @@ static struct perf_stat		perf_stat;
 static volatile int done = 0;
 
 static struct perf_stat_config stat_config = {
-	.aggr_mode	= AGGR_GLOBAL,
-	.scale		= true,
+	.aggr_mode		= AGGR_GLOBAL,
+	.scale			= true,
+	.unit_width		= 4, /* strlen("unit") */
+	.run_count		= 1,
+	.metric_only_len	= METRIC_ONLY_LEN,
+	.walltime_nsecs_stats	= &walltime_nsecs_stats,
+	.big_num		= true,
 };
 
-static bool is_duration_time(struct perf_evsel *evsel)
-{
-	return !strcmp(evsel->name, "duration_time");
-}
-
 static inline void diff_timespec(struct timespec *r, struct timespec *a,
 				 struct timespec *b)
 {
@@ -224,78 +210,6 @@ static void perf_stat__reset_stats(void)
 		perf_stat__reset_shadow_per_stat(&stat_config.stats[i]);
 }
 
-static int create_perf_stat_counter(struct perf_evsel *evsel)
-{
-	struct perf_event_attr *attr = &evsel->attr;
-	struct perf_evsel *leader = evsel->leader;
-
-	if (stat_config.scale) {
-		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
-				    PERF_FORMAT_TOTAL_TIME_RUNNING;
-	}
-
-	/*
-	 * The event is part of non trivial group, let's enable
-	 * the group read (for leader) and ID retrieval for all
-	 * members.
-	 */
-	if (leader->nr_members > 1)
-		attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
-
-	attr->inherit = !no_inherit;
-
-	/*
-	 * Some events get initialized with sample_(period/type) set,
-	 * like tracepoints. Clear it up for counting.
-	 */
-	attr->sample_period = 0;
-
-	/*
-	 * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
-	 * while avoiding that older tools show confusing messages.
-	 *
-	 * However for pipe sessions we need to keep it zero,
-	 * because script's perf_evsel__check_attr is triggered
-	 * by attr->sample_type != 0, and we can't run it on
-	 * stat sessions.
-	 */
-	if (!(STAT_RECORD && perf_stat.data.is_pipe))
-		attr->sample_type = PERF_SAMPLE_IDENTIFIER;
-
-	/*
-	 * Disabling all counters initially, they will be enabled
-	 * either manually by us or by kernel via enable_on_exec
-	 * set later.
-	 */
-	if (perf_evsel__is_group_leader(evsel)) {
-		attr->disabled = 1;
-
-		/*
-		 * In case of initial_delay we enable tracee
-		 * events manually.
-		 */
-		if (target__none(&target) && !initial_delay)
-			attr->enable_on_exec = 1;
-	}
-
-	if (target__has_cpu(&target) && !target__has_per_thread(&target))
-		return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
-
-	return perf_evsel__open_per_thread(evsel, evsel_list->threads);
-}
-
-/*
- * Does the counter have nsecs as a unit?
- */
-static inline int nsec_counter(struct perf_evsel *evsel)
-{
-	if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
-	    perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
-		return 1;
-
-	return 0;
-}
-
 static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
 				     union perf_event *event,
 				     struct perf_sample *sample __maybe_unused,
@@ -428,15 +342,15 @@ static void process_interval(void)
 
 static void enable_counters(void)
 {
-	if (initial_delay)
-		usleep(initial_delay * USEC_PER_MSEC);
+	if (stat_config.initial_delay)
+		usleep(stat_config.initial_delay * USEC_PER_MSEC);
 
 	/*
 	 * We need to enable counters only if:
 	 * - we don't have tracee (attaching to task or cpu)
 	 * - we have initial delay configured
 	 */
-	if (!target__none(&target) || initial_delay)
+	if (!target__none(&target) || stat_config.initial_delay)
 		perf_evlist__enable(evsel_list);
 }
 
@@ -464,112 +378,34 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf
 	workload_exec_errno = info->si_value.sival_int;
 }
 
-static int perf_stat_synthesize_config(bool is_pipe)
-{
-	int err;
-
-	if (is_pipe) {
-		err = perf_event__synthesize_attrs(NULL, perf_stat.session,
-						   process_synthesized_event);
-		if (err < 0) {
-			pr_err("Couldn't synthesize attrs.\n");
-			return err;
-		}
-	}
-
-	err = perf_event__synthesize_extra_attr(NULL,
-						evsel_list,
-						process_synthesized_event,
-						is_pipe);
-
-	err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads,
-						process_synthesized_event,
-						NULL);
-	if (err < 0) {
-		pr_err("Couldn't synthesize thread map.\n");
-		return err;
-	}
-
-	err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus,
-					     process_synthesized_event, NULL);
-	if (err < 0) {
-		pr_err("Couldn't synthesize thread map.\n");
-		return err;
-	}
-
-	err = perf_event__synthesize_stat_config(NULL, &stat_config,
-						 process_synthesized_event, NULL);
-	if (err < 0) {
-		pr_err("Couldn't synthesize config.\n");
-		return err;
-	}
-
-	return 0;
-}
-
-#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
-
-static int __store_counter_ids(struct perf_evsel *counter)
-{
-	int cpu, thread;
-
-	for (cpu = 0; cpu < xyarray__max_x(counter->fd); cpu++) {
-		for (thread = 0; thread < xyarray__max_y(counter->fd);
-		     thread++) {
-			int fd = FD(counter, cpu, thread);
-
-			if (perf_evlist__id_add_fd(evsel_list, counter,
-						   cpu, thread, fd) < 0)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-static int store_counter_ids(struct perf_evsel *counter)
-{
-	struct cpu_map *cpus = counter->cpus;
-	struct thread_map *threads = counter->threads;
-
-	if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr))
-		return -ENOMEM;
-
-	return __store_counter_ids(counter);
-}
-
 static bool perf_evsel__should_store_id(struct perf_evsel *counter)
 {
 	return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
 }
 
-static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
+static bool is_target_alive(struct target *_target,
+			    struct thread_map *threads)
 {
-	struct perf_evsel *c2, *leader;
-	bool is_open = true;
+	struct stat st;
+	int i;
 
-	leader = evsel->leader;
-	pr_debug("Weak group for %s/%d failed\n",
-			leader->name, leader->nr_members);
+	if (!target__has_task(_target))
+		return true;
 
-	/*
-	 * for_each_group_member doesn't work here because it doesn't
-	 * include the first entry.
-	 */
-	evlist__for_each_entry(evsel_list, c2) {
-		if (c2 == evsel)
-			is_open = false;
-		if (c2->leader == leader) {
-			if (is_open)
-				perf_evsel__close(c2);
-			c2->leader = c2;
-			c2->nr_members = 0;
-		}
+	for (i = 0; i < threads->nr; i++) {
+		char path[PATH_MAX];
+
+		scnprintf(path, PATH_MAX, "%s/%d", procfs__mountpoint(),
+			  threads->map[i].pid);
+
+		if (!stat(path, &st))
+			return true;
 	}
-	return leader;
+
+	return false;
 }
 
-static int __run_perf_stat(int argc, const char **argv)
+static int __run_perf_stat(int argc, const char **argv, int run_idx)
 {
 	int interval = stat_config.interval;
 	int times = stat_config.times;
@@ -609,13 +445,13 @@ static int __run_perf_stat(int argc, const char **argv)
 
 	evlist__for_each_entry(evsel_list, counter) {
 try_again:
-		if (create_perf_stat_counter(counter) < 0) {
+		if (create_perf_stat_counter(counter, &stat_config, &target) < 0) {
 
 			/* Weak group failed. Reset the group. */
 			if ((errno == EINVAL || errno == EBADF) &&
 			    counter->leader != counter &&
 			    counter->weak_group) {
-				counter = perf_evsel__reset_weak_group(counter);
+				counter = perf_evlist__reset_weak_group(evsel_list, counter);
 				goto try_again;
 			}
 
@@ -664,11 +500,11 @@ try_again:
 		counter->supported = true;
 
 		l = strlen(counter->unit);
-		if (l > unit_width)
-			unit_width = l;
+		if (l > stat_config.unit_width)
+			stat_config.unit_width = l;
 
 		if (perf_evsel__should_store_id(counter) &&
-		    store_counter_ids(counter))
+		    perf_evsel__store_ids(counter, evsel_list))
 			return -1;
 	}
 
@@ -699,7 +535,8 @@ try_again:
 		if (err < 0)
 			return err;
 
-		err = perf_stat_synthesize_config(is_pipe);
+		err = perf_stat_synthesize_config(&stat_config, NULL, evsel_list,
+						  process_synthesized_event, is_pipe);
 		if (err < 0)
 			return err;
 	}
@@ -724,7 +561,7 @@ try_again:
 					break;
 			}
 		}
-		waitpid(child_pid, &status, 0);
+		wait4(child_pid, &status, 0, &stat_config.ru_data);
 
 		if (workload_exec_errno) {
 			const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
@@ -738,6 +575,8 @@ try_again:
 		enable_counters();
 		while (!done) {
 			nanosleep(&ts, NULL);
+			if (!is_target_alive(&target, evsel_list->threads))
+				break;
 			if (timeout)
 				break;
 			if (interval) {
@@ -752,6 +591,9 @@ try_again:
 
 	t1 = rdclock();
 
+	if (stat_config.walltime_run_table)
+		stat_config.walltime_run[run_idx] = t1 - t0;
+
 	update_stats(&walltime_nsecs_stats, t1 - t0);
 
 	/*
@@ -766,7 +608,7 @@ try_again:
 	return WEXITSTATUS(status);
 }
 
-static int run_perf_stat(int argc, const char **argv)
+static int run_perf_stat(int argc, const char **argv, int run_idx)
 {
 	int ret;
 
@@ -779,7 +621,7 @@ static int run_perf_stat(int argc, const char **argv)
 	if (sync_run)
 		sync();
 
-	ret = __run_perf_stat(argc, argv);
+	ret = __run_perf_stat(argc, argv, run_idx);
 	if (ret)
 		return ret;
 
@@ -792,1076 +634,14 @@ static int run_perf_stat(int argc, const char **argv)
 	return ret;
 }
 
-static void print_running(u64 run, u64 ena)
-{
-	if (csv_output) {
-		fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
-					csv_sep,
-					run,
-					csv_sep,
-					ena ? 100.0 * run / ena : 100.0);
-	} else if (run != ena) {
-		fprintf(stat_config.output, "  (%.2f%%)", 100.0 * run / ena);
-	}
-}
-
-static void print_noise_pct(double total, double avg)
-{
-	double pct = rel_stddev_stats(total, avg);
-
-	if (csv_output)
-		fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct);
-	else if (pct)
-		fprintf(stat_config.output, "  ( +-%6.2f%% )", pct);
-}
-
-static void print_noise(struct perf_evsel *evsel, double avg)
-{
-	struct perf_stat_evsel *ps;
-
-	if (run_count == 1)
-		return;
-
-	ps = evsel->stats;
-	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
-}
-
-static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
-{
-	switch (stat_config.aggr_mode) {
-	case AGGR_CORE:
-		fprintf(stat_config.output, "S%d-C%*d%s%*d%s",
-			cpu_map__id_to_socket(id),
-			csv_output ? 0 : -8,
-			cpu_map__id_to_cpu(id),
-			csv_sep,
-			csv_output ? 0 : 4,
-			nr,
-			csv_sep);
-		break;
-	case AGGR_SOCKET:
-		fprintf(stat_config.output, "S%*d%s%*d%s",
-			csv_output ? 0 : -5,
-			id,
-			csv_sep,
-			csv_output ? 0 : 4,
-			nr,
-			csv_sep);
-			break;
-	case AGGR_NONE:
-		fprintf(stat_config.output, "CPU%*d%s",
-			csv_output ? 0 : -4,
-			perf_evsel__cpus(evsel)->map[id], csv_sep);
-		break;
-	case AGGR_THREAD:
-		fprintf(stat_config.output, "%*s-%*d%s",
-			csv_output ? 0 : 16,
-			thread_map__comm(evsel->threads, id),
-			csv_output ? 0 : -8,
-			thread_map__pid(evsel->threads, id),
-			csv_sep);
-		break;
-	case AGGR_GLOBAL:
-	case AGGR_UNSET:
-	default:
-		break;
-	}
-}
-
-struct outstate {
-	FILE *fh;
-	bool newline;
-	const char *prefix;
-	int  nfields;
-	int  id, nr;
-	struct perf_evsel *evsel;
-};
-
-#define METRIC_LEN  35
-
-static void new_line_std(void *ctx)
-{
-	struct outstate *os = ctx;
-
-	os->newline = true;
-}
-
-static void do_new_line_std(struct outstate *os)
-{
-	fputc('\n', os->fh);
-	fputs(os->prefix, os->fh);
-	aggr_printout(os->evsel, os->id, os->nr);
-	if (stat_config.aggr_mode == AGGR_NONE)
-		fprintf(os->fh, "        ");
-	fprintf(os->fh, "                                                 ");
-}
-
-static void print_metric_std(void *ctx, const char *color, const char *fmt,
-			     const char *unit, double val)
-{
-	struct outstate *os = ctx;
-	FILE *out = os->fh;
-	int n;
-	bool newline = os->newline;
-
-	os->newline = false;
-
-	if (unit == NULL || fmt == NULL) {
-		fprintf(out, "%-*s", METRIC_LEN, "");
-		return;
-	}
-
-	if (newline)
-		do_new_line_std(os);
-
-	n = fprintf(out, " # ");
-	if (color)
-		n += color_fprintf(out, color, fmt, val);
-	else
-		n += fprintf(out, fmt, val);
-	fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
-}
-
-static void new_line_csv(void *ctx)
-{
-	struct outstate *os = ctx;
-	int i;
-
-	fputc('\n', os->fh);
-	if (os->prefix)
-		fprintf(os->fh, "%s%s", os->prefix, csv_sep);
-	aggr_printout(os->evsel, os->id, os->nr);
-	for (i = 0; i < os->nfields; i++)
-		fputs(csv_sep, os->fh);
-}
-
-static void print_metric_csv(void *ctx,
-			     const char *color __maybe_unused,
-			     const char *fmt, const char *unit, double val)
-{
-	struct outstate *os = ctx;
-	FILE *out = os->fh;
-	char buf[64], *vals, *ends;
-
-	if (unit == NULL || fmt == NULL) {
-		fprintf(out, "%s%s", csv_sep, csv_sep);
-		return;
-	}
-	snprintf(buf, sizeof(buf), fmt, val);
-	ends = vals = ltrim(buf);
-	while (isdigit(*ends) || *ends == '.')
-		ends++;
-	*ends = 0;
-	while (isspace(*unit))
-		unit++;
-	fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit);
-}
-
-#define METRIC_ONLY_LEN 20
-
-/* Filter out some columns that don't work well in metrics only mode */
-
-static bool valid_only_metric(const char *unit)
-{
-	if (!unit)
-		return false;
-	if (strstr(unit, "/sec") ||
-	    strstr(unit, "hz") ||
-	    strstr(unit, "Hz") ||
-	    strstr(unit, "CPUs utilized"))
-		return false;
-	return true;
-}
-
-static const char *fixunit(char *buf, struct perf_evsel *evsel,
-			   const char *unit)
-{
-	if (!strncmp(unit, "of all", 6)) {
-		snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel),
-			 unit);
-		return buf;
-	}
-	return unit;
-}
-
-static void print_metric_only(void *ctx, const char *color, const char *fmt,
-			      const char *unit, double val)
-{
-	struct outstate *os = ctx;
-	FILE *out = os->fh;
-	int n;
-	char buf[1024];
-	unsigned mlen = METRIC_ONLY_LEN;
-
-	if (!valid_only_metric(unit))
-		return;
-	unit = fixunit(buf, os->evsel, unit);
-	if (color)
-		n = color_fprintf(out, color, fmt, val);
-	else
-		n = fprintf(out, fmt, val);
-	if (n > METRIC_ONLY_LEN)
-		n = METRIC_ONLY_LEN;
-	if (mlen < strlen(unit))
-		mlen = strlen(unit) + 1;
-	fprintf(out, "%*s", mlen - n, "");
-}
-
-static void print_metric_only_csv(void *ctx, const char *color __maybe_unused,
-				  const char *fmt,
-				  const char *unit, double val)
-{
-	struct outstate *os = ctx;
-	FILE *out = os->fh;
-	char buf[64], *vals, *ends;
-	char tbuf[1024];
-
-	if (!valid_only_metric(unit))
-		return;
-	unit = fixunit(tbuf, os->evsel, unit);
-	snprintf(buf, sizeof buf, fmt, val);
-	ends = vals = ltrim(buf);
-	while (isdigit(*ends) || *ends == '.')
-		ends++;
-	*ends = 0;
-	fprintf(out, "%s%s", vals, csv_sep);
-}
-
-static void new_line_metric(void *ctx __maybe_unused)
-{
-}
-
-static void print_metric_header(void *ctx, const char *color __maybe_unused,
-				const char *fmt __maybe_unused,
-				const char *unit, double val __maybe_unused)
-{
-	struct outstate *os = ctx;
-	char tbuf[1024];
-
-	if (!valid_only_metric(unit))
-		return;
-	unit = fixunit(tbuf, os->evsel, unit);
-	if (csv_output)
-		fprintf(os->fh, "%s%s", unit, csv_sep);
-	else
-		fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit);
-}
-
-static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
-{
-	FILE *output = stat_config.output;
-	double msecs = avg / NSEC_PER_MSEC;
-	const char *fmt_v, *fmt_n;
-	char name[25];
-
-	fmt_v = csv_output ? "%.6f%s" : "%18.6f%s";
-	fmt_n = csv_output ? "%s" : "%-25s";
-
-	aggr_printout(evsel, id, nr);
-
-	scnprintf(name, sizeof(name), "%s%s",
-		  perf_evsel__name(evsel), csv_output ? "" : " (msec)");
-
-	fprintf(output, fmt_v, msecs, csv_sep);
-
-	if (csv_output)
-		fprintf(output, "%s%s", evsel->unit, csv_sep);
-	else
-		fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep);
-
-	fprintf(output, fmt_n, name);
-
-	if (evsel->cgrp)
-		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
-}
-
-static int first_shadow_cpu(struct perf_evsel *evsel, int id)
-{
-	int i;
-
-	if (!aggr_get_id)
-		return 0;
-
-	if (stat_config.aggr_mode == AGGR_NONE)
-		return id;
-
-	if (stat_config.aggr_mode == AGGR_GLOBAL)
-		return 0;
-
-	for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
-		int cpu2 = perf_evsel__cpus(evsel)->map[i];
-
-		if (aggr_get_id(evsel_list->cpus, cpu2) == id)
-			return cpu2;
-	}
-	return 0;
-}
-
-static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
-{
-	FILE *output = stat_config.output;
-	double sc =  evsel->scale;
-	const char *fmt;
-
-	if (csv_output) {
-		fmt = floor(sc) != sc ?  "%.2f%s" : "%.0f%s";
-	} else {
-		if (big_num)
-			fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s";
-		else
-			fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s";
-	}
-
-	aggr_printout(evsel, id, nr);
-
-	fprintf(output, fmt, avg, csv_sep);
-
-	if (evsel->unit)
-		fprintf(output, "%-*s%s",
-			csv_output ? 0 : unit_width,
-			evsel->unit, csv_sep);
-
-	fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel));
-
-	if (evsel->cgrp)
-		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
-}
-
-static bool is_mixed_hw_group(struct perf_evsel *counter)
-{
-	struct perf_evlist *evlist = counter->evlist;
-	u32 pmu_type = counter->attr.type;
-	struct perf_evsel *pos;
-
-	if (counter->nr_members < 2)
-		return false;
-
-	evlist__for_each_entry(evlist, pos) {
-		/* software events can be part of any hardware group */
-		if (pos->attr.type == PERF_TYPE_SOFTWARE)
-			continue;
-		if (pmu_type == PERF_TYPE_SOFTWARE) {
-			pmu_type = pos->attr.type;
-			continue;
-		}
-		if (pmu_type != pos->attr.type)
-			return true;
-	}
-
-	return false;
-}
-
-static void printout(int id, int nr, struct perf_evsel *counter, double uval,
-		     char *prefix, u64 run, u64 ena, double noise,
-		     struct runtime_stat *st)
-{
-	struct perf_stat_output_ctx out;
-	struct outstate os = {
-		.fh = stat_config.output,
-		.prefix = prefix ? prefix : "",
-		.id = id,
-		.nr = nr,
-		.evsel = counter,
-	};
-	print_metric_t pm = print_metric_std;
-	void (*nl)(void *);
-
-	if (metric_only) {
-		nl = new_line_metric;
-		if (csv_output)
-			pm = print_metric_only_csv;
-		else
-			pm = print_metric_only;
-	} else
-		nl = new_line_std;
-
-	if (csv_output && !metric_only) {
-		static int aggr_fields[] = {
-			[AGGR_GLOBAL] = 0,
-			[AGGR_THREAD] = 1,
-			[AGGR_NONE] = 1,
-			[AGGR_SOCKET] = 2,
-			[AGGR_CORE] = 2,
-		};
-
-		pm = print_metric_csv;
-		nl = new_line_csv;
-		os.nfields = 3;
-		os.nfields += aggr_fields[stat_config.aggr_mode];
-		if (counter->cgrp)
-			os.nfields++;
-	}
-	if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
-		if (metric_only) {
-			pm(&os, NULL, "", "", 0);
-			return;
-		}
-		aggr_printout(counter, id, nr);
-
-		fprintf(stat_config.output, "%*s%s",
-			csv_output ? 0 : 18,
-			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
-			csv_sep);
-
-		if (counter->supported) {
-			print_free_counters_hint = 1;
-			if (is_mixed_hw_group(counter))
-				print_mixed_hw_group_error = 1;
-		}
-
-		fprintf(stat_config.output, "%-*s%s",
-			csv_output ? 0 : unit_width,
-			counter->unit, csv_sep);
-
-		fprintf(stat_config.output, "%*s",
-			csv_output ? 0 : -25,
-			perf_evsel__name(counter));
-
-		if (counter->cgrp)
-			fprintf(stat_config.output, "%s%s",
-				csv_sep, counter->cgrp->name);
-
-		if (!csv_output)
-			pm(&os, NULL, NULL, "", 0);
-		print_noise(counter, noise);
-		print_running(run, ena);
-		if (csv_output)
-			pm(&os, NULL, NULL, "", 0);
-		return;
-	}
-
-	if (metric_only)
-		/* nothing */;
-	else if (nsec_counter(counter))
-		nsec_printout(id, nr, counter, uval);
-	else
-		abs_printout(id, nr, counter, uval);
-
-	out.print_metric = pm;
-	out.new_line = nl;
-	out.ctx = &os;
-	out.force_header = false;
-
-	if (csv_output && !metric_only) {
-		print_noise(counter, noise);
-		print_running(run, ena);
-	}
-
-	perf_stat__print_shadow_stats(counter, uval,
-				first_shadow_cpu(counter, id),
-				&out, &metric_events, st);
-	if (!csv_output && !metric_only) {
-		print_noise(counter, noise);
-		print_running(run, ena);
-	}
-}
-
-static void aggr_update_shadow(void)
-{
-	int cpu, s2, id, s;
-	u64 val;
-	struct perf_evsel *counter;
-
-	for (s = 0; s < aggr_map->nr; s++) {
-		id = aggr_map->map[s];
-		evlist__for_each_entry(evsel_list, counter) {
-			val = 0;
-			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
-				s2 = aggr_get_id(evsel_list->cpus, cpu);
-				if (s2 != id)
-					continue;
-				val += perf_counts(counter->counts, cpu, 0)->val;
-			}
-			perf_stat__update_shadow_stats(counter, val,
-					first_shadow_cpu(counter, id),
-					&rt_stat);
-		}
-	}
-}
-
-static void uniquify_event_name(struct perf_evsel *counter)
-{
-	char *new_name;
-	char *config;
-
-	if (counter->uniquified_name ||
-	    !counter->pmu_name || !strncmp(counter->name, counter->pmu_name,
-					   strlen(counter->pmu_name)))
-		return;
-
-	config = strchr(counter->name, '/');
-	if (config) {
-		if (asprintf(&new_name,
-			     "%s%s", counter->pmu_name, config) > 0) {
-			free(counter->name);
-			counter->name = new_name;
-		}
-	} else {
-		if (asprintf(&new_name,
-			     "%s [%s]", counter->name, counter->pmu_name) > 0) {
-			free(counter->name);
-			counter->name = new_name;
-		}
-	}
-
-	counter->uniquified_name = true;
-}
-
-static void collect_all_aliases(struct perf_evsel *counter,
-			    void (*cb)(struct perf_evsel *counter, void *data,
-				       bool first),
-			    void *data)
-{
-	struct perf_evsel *alias;
-
-	alias = list_prepare_entry(counter, &(evsel_list->entries), node);
-	list_for_each_entry_continue (alias, &evsel_list->entries, node) {
-		if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) ||
-		    alias->scale != counter->scale ||
-		    alias->cgrp != counter->cgrp ||
-		    strcmp(alias->unit, counter->unit) ||
-		    nsec_counter(alias) != nsec_counter(counter))
-			break;
-		alias->merged_stat = true;
-		cb(alias, data, false);
-	}
-}
-
-static bool collect_data(struct perf_evsel *counter,
-			    void (*cb)(struct perf_evsel *counter, void *data,
-				       bool first),
-			    void *data)
-{
-	if (counter->merged_stat)
-		return false;
-	cb(counter, data, true);
-	if (no_merge)
-		uniquify_event_name(counter);
-	else if (counter->auto_merge_stats)
-		collect_all_aliases(counter, cb, data);
-	return true;
-}
-
-struct aggr_data {
-	u64 ena, run, val;
-	int id;
-	int nr;
-	int cpu;
-};
-
-static void aggr_cb(struct perf_evsel *counter, void *data, bool first)
-{
-	struct aggr_data *ad = data;
-	int cpu, s2;
-
-	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
-		struct perf_counts_values *counts;
-
-		s2 = aggr_get_id(perf_evsel__cpus(counter), cpu);
-		if (s2 != ad->id)
-			continue;
-		if (first)
-			ad->nr++;
-		counts = perf_counts(counter->counts, cpu, 0);
-		/*
-		 * When any result is bad, make them all to give
-		 * consistent output in interval mode.
-		 */
-		if (counts->ena == 0 || counts->run == 0 ||
-		    counter->counts->scaled == -1) {
-			ad->ena = 0;
-			ad->run = 0;
-			break;
-		}
-		ad->val += counts->val;
-		ad->ena += counts->ena;
-		ad->run += counts->run;
-	}
-}
-
-static void print_aggr(char *prefix)
-{
-	FILE *output = stat_config.output;
-	struct perf_evsel *counter;
-	int s, id, nr;
-	double uval;
-	u64 ena, run, val;
-	bool first;
-
-	if (!(aggr_map || aggr_get_id))
-		return;
-
-	aggr_update_shadow();
-
-	/*
-	 * With metric_only everything is on a single line.
-	 * Without each counter has its own line.
-	 */
-	for (s = 0; s < aggr_map->nr; s++) {
-		struct aggr_data ad;
-		if (prefix && metric_only)
-			fprintf(output, "%s", prefix);
-
-		ad.id = id = aggr_map->map[s];
-		first = true;
-		evlist__for_each_entry(evsel_list, counter) {
-			if (is_duration_time(counter))
-				continue;
-
-			ad.val = ad.ena = ad.run = 0;
-			ad.nr = 0;
-			if (!collect_data(counter, aggr_cb, &ad))
-				continue;
-			nr = ad.nr;
-			ena = ad.ena;
-			run = ad.run;
-			val = ad.val;
-			if (first && metric_only) {
-				first = false;
-				aggr_printout(counter, id, nr);
-			}
-			if (prefix && !metric_only)
-				fprintf(output, "%s", prefix);
-
-			uval = val * counter->scale;
-			printout(id, nr, counter, uval, prefix, run, ena, 1.0,
-				 &rt_stat);
-			if (!metric_only)
-				fputc('\n', output);
-		}
-		if (metric_only)
-			fputc('\n', output);
-	}
-}
-
-static int cmp_val(const void *a, const void *b)
-{
-	return ((struct perf_aggr_thread_value *)b)->val -
-		((struct perf_aggr_thread_value *)a)->val;
-}
-
-static struct perf_aggr_thread_value *sort_aggr_thread(
-					struct perf_evsel *counter,
-					int nthreads, int ncpus,
-					int *ret)
-{
-	int cpu, thread, i = 0;
-	double uval;
-	struct perf_aggr_thread_value *buf;
-
-	buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value));
-	if (!buf)
-		return NULL;
-
-	for (thread = 0; thread < nthreads; thread++) {
-		u64 ena = 0, run = 0, val = 0;
-
-		for (cpu = 0; cpu < ncpus; cpu++) {
-			val += perf_counts(counter->counts, cpu, thread)->val;
-			ena += perf_counts(counter->counts, cpu, thread)->ena;
-			run += perf_counts(counter->counts, cpu, thread)->run;
-		}
-
-		uval = val * counter->scale;
-
-		/*
-		 * Skip value 0 when enabling --per-thread globally,
-		 * otherwise too many 0 output.
-		 */
-		if (uval == 0.0 && target__has_per_thread(&target))
-			continue;
-
-		buf[i].counter = counter;
-		buf[i].id = thread;
-		buf[i].uval = uval;
-		buf[i].val = val;
-		buf[i].run = run;
-		buf[i].ena = ena;
-		i++;
-	}
-
-	qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val);
-
-	if (ret)
-		*ret = i;
-
-	return buf;
-}
-
-static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
-{
-	FILE *output = stat_config.output;
-	int nthreads = thread_map__nr(counter->threads);
-	int ncpus = cpu_map__nr(counter->cpus);
-	int thread, sorted_threads, id;
-	struct perf_aggr_thread_value *buf;
-
-	buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads);
-	if (!buf) {
-		perror("cannot sort aggr thread");
-		return;
-	}
-
-	for (thread = 0; thread < sorted_threads; thread++) {
-		if (prefix)
-			fprintf(output, "%s", prefix);
-
-		id = buf[thread].id;
-		if (stat_config.stats)
-			printout(id, 0, buf[thread].counter, buf[thread].uval,
-				 prefix, buf[thread].run, buf[thread].ena, 1.0,
-				 &stat_config.stats[id]);
-		else
-			printout(id, 0, buf[thread].counter, buf[thread].uval,
-				 prefix, buf[thread].run, buf[thread].ena, 1.0,
-				 &rt_stat);
-		fputc('\n', output);
-	}
-
-	free(buf);
-}
-
-struct caggr_data {
-	double avg, avg_enabled, avg_running;
-};
-
-static void counter_aggr_cb(struct perf_evsel *counter, void *data,
-			    bool first __maybe_unused)
-{
-	struct caggr_data *cd = data;
-	struct perf_stat_evsel *ps = counter->stats;
-
-	cd->avg += avg_stats(&ps->res_stats[0]);
-	cd->avg_enabled += avg_stats(&ps->res_stats[1]);
-	cd->avg_running += avg_stats(&ps->res_stats[2]);
-}
-
-/*
- * Print out the results of a single counter:
- * aggregated counts in system-wide mode
- */
-static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
-{
-	FILE *output = stat_config.output;
-	double uval;
-	struct caggr_data cd = { .avg = 0.0 };
-
-	if (!collect_data(counter, counter_aggr_cb, &cd))
-		return;
-
-	if (prefix && !metric_only)
-		fprintf(output, "%s", prefix);
-
-	uval = cd.avg * counter->scale;
-	printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled,
-		 cd.avg, &rt_stat);
-	if (!metric_only)
-		fprintf(output, "\n");
-}
-
-static void counter_cb(struct perf_evsel *counter, void *data,
-		       bool first __maybe_unused)
-{
-	struct aggr_data *ad = data;
-
-	ad->val += perf_counts(counter->counts, ad->cpu, 0)->val;
-	ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena;
-	ad->run += perf_counts(counter->counts, ad->cpu, 0)->run;
-}
-
-/*
- * Print out the results of a single counter:
- * does not use aggregated count in system-wide
- */
-static void print_counter(struct perf_evsel *counter, char *prefix)
-{
-	FILE *output = stat_config.output;
-	u64 ena, run, val;
-	double uval;
-	int cpu;
-
-	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
-		struct aggr_data ad = { .cpu = cpu };
-
-		if (!collect_data(counter, counter_cb, &ad))
-			return;
-		val = ad.val;
-		ena = ad.ena;
-		run = ad.run;
-
-		if (prefix)
-			fprintf(output, "%s", prefix);
-
-		uval = val * counter->scale;
-		printout(cpu, 0, counter, uval, prefix, run, ena, 1.0,
-			 &rt_stat);
-
-		fputc('\n', output);
-	}
-}
-
-static void print_no_aggr_metric(char *prefix)
-{
-	int cpu;
-	int nrcpus = 0;
-	struct perf_evsel *counter;
-	u64 ena, run, val;
-	double uval;
-
-	nrcpus = evsel_list->cpus->nr;
-	for (cpu = 0; cpu < nrcpus; cpu++) {
-		bool first = true;
-
-		if (prefix)
-			fputs(prefix, stat_config.output);
-		evlist__for_each_entry(evsel_list, counter) {
-			if (is_duration_time(counter))
-				continue;
-			if (first) {
-				aggr_printout(counter, cpu, 0);
-				first = false;
-			}
-			val = perf_counts(counter->counts, cpu, 0)->val;
-			ena = perf_counts(counter->counts, cpu, 0)->ena;
-			run = perf_counts(counter->counts, cpu, 0)->run;
-
-			uval = val * counter->scale;
-			printout(cpu, 0, counter, uval, prefix, run, ena, 1.0,
-				 &rt_stat);
-		}
-		fputc('\n', stat_config.output);
-	}
-}
-
-static int aggr_header_lens[] = {
-	[AGGR_CORE] = 18,
-	[AGGR_SOCKET] = 12,
-	[AGGR_NONE] = 6,
-	[AGGR_THREAD] = 24,
-	[AGGR_GLOBAL] = 0,
-};
-
-static const char *aggr_header_csv[] = {
-	[AGGR_CORE] 	= 	"core,cpus,",
-	[AGGR_SOCKET] 	= 	"socket,cpus",
-	[AGGR_NONE] 	= 	"cpu,",
-	[AGGR_THREAD] 	= 	"comm-pid,",
-	[AGGR_GLOBAL] 	=	""
-};
-
-static void print_metric_headers(const char *prefix, bool no_indent)
-{
-	struct perf_stat_output_ctx out;
-	struct perf_evsel *counter;
-	struct outstate os = {
-		.fh = stat_config.output
-	};
-
-	if (prefix)
-		fprintf(stat_config.output, "%s", prefix);
-
-	if (!csv_output && !no_indent)
-		fprintf(stat_config.output, "%*s",
-			aggr_header_lens[stat_config.aggr_mode], "");
-	if (csv_output) {
-		if (stat_config.interval)
-			fputs("time,", stat_config.output);
-		fputs(aggr_header_csv[stat_config.aggr_mode],
-			stat_config.output);
-	}
-
-	/* Print metrics headers only */
-	evlist__for_each_entry(evsel_list, counter) {
-		if (is_duration_time(counter))
-			continue;
-		os.evsel = counter;
-		out.ctx = &os;
-		out.print_metric = print_metric_header;
-		out.new_line = new_line_metric;
-		out.force_header = true;
-		os.evsel = counter;
-		perf_stat__print_shadow_stats(counter, 0,
-					      0,
-					      &out,
-					      &metric_events,
-					      &rt_stat);
-	}
-	fputc('\n', stat_config.output);
-}
-
-static void print_interval(char *prefix, struct timespec *ts)
-{
-	FILE *output = stat_config.output;
-	static int num_print_interval;
-
-	sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
-
-	if (num_print_interval == 0 && !csv_output) {
-		switch (stat_config.aggr_mode) {
-		case AGGR_SOCKET:
-			fprintf(output, "#           time socket cpus");
-			if (!metric_only)
-				fprintf(output, "             counts %*s events\n", unit_width, "unit");
-			break;
-		case AGGR_CORE:
-			fprintf(output, "#           time core         cpus");
-			if (!metric_only)
-				fprintf(output, "             counts %*s events\n", unit_width, "unit");
-			break;
-		case AGGR_NONE:
-			fprintf(output, "#           time CPU");
-			if (!metric_only)
-				fprintf(output, "                counts %*s events\n", unit_width, "unit");
-			break;
-		case AGGR_THREAD:
-			fprintf(output, "#           time             comm-pid");
-			if (!metric_only)
-				fprintf(output, "                  counts %*s events\n", unit_width, "unit");
-			break;
-		case AGGR_GLOBAL:
-		default:
-			fprintf(output, "#           time");
-			if (!metric_only)
-				fprintf(output, "             counts %*s events\n", unit_width, "unit");
-		case AGGR_UNSET:
-			break;
-		}
-	}
-
-	if (num_print_interval == 0 && metric_only)
-		print_metric_headers(" ", true);
-	if (++num_print_interval == 25)
-		num_print_interval = 0;
-}
-
-static void print_header(int argc, const char **argv)
-{
-	FILE *output = stat_config.output;
-	int i;
-
-	fflush(stdout);
-
-	if (!csv_output) {
-		fprintf(output, "\n");
-		fprintf(output, " Performance counter stats for ");
-		if (target.system_wide)
-			fprintf(output, "\'system wide");
-		else if (target.cpu_list)
-			fprintf(output, "\'CPU(s) %s", target.cpu_list);
-		else if (!target__has_task(&target)) {
-			fprintf(output, "\'%s", argv ? argv[0] : "pipe");
-			for (i = 1; argv && (i < argc); i++)
-				fprintf(output, " %s", argv[i]);
-		} else if (target.pid)
-			fprintf(output, "process id \'%s", target.pid);
-		else
-			fprintf(output, "thread id \'%s", target.tid);
-
-		fprintf(output, "\'");
-		if (run_count > 1)
-			fprintf(output, " (%d runs)", run_count);
-		fprintf(output, ":\n\n");
-	}
-}
-
-static void print_footer(void)
-{
-	FILE *output = stat_config.output;
-	int n;
-
-	if (!null_run)
-		fprintf(output, "\n");
-	fprintf(output, " %17.9f seconds time elapsed",
-			avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC);
-	if (run_count > 1) {
-		fprintf(output, "                                        ");
-		print_noise_pct(stddev_stats(&walltime_nsecs_stats),
-				avg_stats(&walltime_nsecs_stats));
-	}
-	fprintf(output, "\n\n");
-
-	if (print_free_counters_hint &&
-	    sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 &&
-	    n > 0)
-		fprintf(output,
-"Some events weren't counted. Try disabling the NMI watchdog:\n"
-"	echo 0 > /proc/sys/kernel/nmi_watchdog\n"
-"	perf stat ...\n"
-"	echo 1 > /proc/sys/kernel/nmi_watchdog\n");
-
-	if (print_mixed_hw_group_error)
-		fprintf(output,
-			"The events in group usually have to be from "
-			"the same PMU. Try reorganizing the group.\n");
-}
-
 static void print_counters(struct timespec *ts, int argc, const char **argv)
 {
-	int interval = stat_config.interval;
-	struct perf_evsel *counter;
-	char buf[64], *prefix = NULL;
-
 	/* Do not print anything if we record to the pipe. */
 	if (STAT_RECORD && perf_stat.data.is_pipe)
 		return;
 
-	if (interval)
-		print_interval(prefix = buf, ts);
-	else
-		print_header(argc, argv);
-
-	if (metric_only) {
-		static int num_print_iv;
-
-		if (num_print_iv == 0 && !interval)
-			print_metric_headers(prefix, false);
-		if (num_print_iv++ == 25)
-			num_print_iv = 0;
-		if (stat_config.aggr_mode == AGGR_GLOBAL && prefix)
-			fprintf(stat_config.output, "%s", prefix);
-	}
-
-	switch (stat_config.aggr_mode) {
-	case AGGR_CORE:
-	case AGGR_SOCKET:
-		print_aggr(prefix);
-		break;
-	case AGGR_THREAD:
-		evlist__for_each_entry(evsel_list, counter) {
-			if (is_duration_time(counter))
-				continue;
-			print_aggr_thread(counter, prefix);
-		}
-		break;
-	case AGGR_GLOBAL:
-		evlist__for_each_entry(evsel_list, counter) {
-			if (is_duration_time(counter))
-				continue;
-			print_counter_aggr(counter, prefix);
-		}
-		if (metric_only)
-			fputc('\n', stat_config.output);
-		break;
-	case AGGR_NONE:
-		if (metric_only)
-			print_no_aggr_metric(prefix);
-		else {
-			evlist__for_each_entry(evsel_list, counter) {
-				if (is_duration_time(counter))
-					continue;
-				print_counter(counter, prefix);
-			}
-		}
-		break;
-	case AGGR_UNSET:
-	default:
-		break;
-	}
-
-	if (!interval && !csv_output)
-		print_footer();
-
-	fflush(stat_config.output);
+	perf_evlist__print_counters(evsel_list, &stat_config, &target,
+				    ts, argc, argv);
 }
 
 static volatile int signr = -1;
@@ -1918,7 +698,7 @@ static int enable_metric_only(const struct option *opt __maybe_unused,
 			      const char *s __maybe_unused, int unset)
 {
 	force_metric_only = true;
-	metric_only = !unset;
+	stat_config.metric_only = !unset;
 	return 0;
 }
 
@@ -1926,7 +706,7 @@ static int parse_metric_groups(const struct option *opt,
 			       const char *str,
 			       int unset __maybe_unused)
 {
-	return metricgroup__parse_groups(opt, str, &metric_events);
+	return metricgroup__parse_groups(opt, str, &stat_config.metric_events);
 }
 
 static const struct option stat_options[] = {
@@ -1937,7 +717,7 @@ static const struct option stat_options[] = {
 		     parse_events_option),
 	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
 		     "event filter", parse_filter),
-	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
+	OPT_BOOLEAN('i', "no-inherit", &stat_config.no_inherit,
 		    "child tasks do not inherit counters"),
 	OPT_STRING('p', "pid", &target.pid, "pid",
 		   "stat events on existing process id"),
@@ -1950,9 +730,11 @@ static const struct option stat_options[] = {
 	OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show counter open errors, etc)"),
-	OPT_INTEGER('r', "repeat", &run_count,
+	OPT_INTEGER('r', "repeat", &stat_config.run_count,
 		    "repeat command and print average + stddev (max: 100, forever: 0)"),
-	OPT_BOOLEAN('n', "null", &null_run,
+	OPT_BOOLEAN(0, "table", &stat_config.walltime_run_table,
+		    "display details about each run (only with -r option)"),
+	OPT_BOOLEAN('n', "null", &stat_config.null_run,
 		    "null run - dont start any counters"),
 	OPT_INCR('d', "detailed", &detailed_run,
 		    "detailed run - start a lot of events"),
@@ -1965,8 +747,8 @@ static const struct option stat_options[] = {
 		    "list of cpus to monitor in system-wide"),
 	OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
 		    "disable CPU count aggregation", AGGR_NONE),
-	OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"),
-	OPT_STRING('x', "field-separator", &csv_sep, "separator",
+	OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"),
+	OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
 		   "print counts with custom separator"),
 	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
 		     "monitor event in cgroup name only", parse_cgroups),
@@ -1983,6 +765,8 @@ static const struct option stat_options[] = {
 		    "(overhead is possible for values <= 100ms)"),
 	OPT_INTEGER(0, "interval-count", &stat_config.times,
 		    "print counts for fixed number of times"),
+	OPT_BOOLEAN(0, "interval-clear", &stat_config.interval_clear,
+		    "clear screen in between new interval"),
 	OPT_UINTEGER(0, "timeout", &stat_config.timeout,
 		    "stop workload and print counts after a timeout period in ms (>= 10ms)"),
 	OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
@@ -1991,9 +775,9 @@ static const struct option stat_options[] = {
 		     "aggregate counts per physical processor core", AGGR_CORE),
 	OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
 		     "aggregate counts per thread", AGGR_THREAD),
-	OPT_UINTEGER('D', "delay", &initial_delay,
+	OPT_UINTEGER('D', "delay", &stat_config.initial_delay,
 		     "ms to wait before starting measurement after program start"),
-	OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL,
+	OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
 			"Only print computed metrics. No raw values", enable_metric_only),
 	OPT_BOOLEAN(0, "topdown", &topdown_run,
 			"measure topdown level 1 statistics"),
@@ -2005,12 +789,14 @@ static const struct option stat_options[] = {
 	OPT_END()
 };
 
-static int perf_stat__get_socket(struct cpu_map *map, int cpu)
+static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
+				 struct cpu_map *map, int cpu)
 {
 	return cpu_map__get_socket(map, cpu, NULL);
 }
 
-static int perf_stat__get_core(struct cpu_map *map, int cpu)
+static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
+			       struct cpu_map *map, int cpu)
 {
 	return cpu_map__get_core(map, cpu, NULL);
 }
@@ -2027,9 +813,8 @@ static int cpu_map__get_max(struct cpu_map *map)
 	return max;
 }
 
-static struct cpu_map *cpus_aggr_map;
-
-static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx)
+static int perf_stat__get_aggr(struct perf_stat_config *config,
+			       aggr_get_id_t get_id, struct cpu_map *map, int idx)
 {
 	int cpu;
 
@@ -2038,20 +823,22 @@ static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int id
 
 	cpu = map->map[idx];
 
-	if (cpus_aggr_map->map[cpu] == -1)
-		cpus_aggr_map->map[cpu] = get_id(map, idx);
+	if (config->cpus_aggr_map->map[cpu] == -1)
+		config->cpus_aggr_map->map[cpu] = get_id(config, map, idx);
 
-	return cpus_aggr_map->map[cpu];
+	return config->cpus_aggr_map->map[cpu];
 }
 
-static int perf_stat__get_socket_cached(struct cpu_map *map, int idx)
+static int perf_stat__get_socket_cached(struct perf_stat_config *config,
+					struct cpu_map *map, int idx)
 {
-	return perf_stat__get_aggr(perf_stat__get_socket, map, idx);
+	return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx);
 }
 
-static int perf_stat__get_core_cached(struct cpu_map *map, int idx)
+static int perf_stat__get_core_cached(struct perf_stat_config *config,
+				      struct cpu_map *map, int idx)
 {
-	return perf_stat__get_aggr(perf_stat__get_core, map, idx);
+	return perf_stat__get_aggr(config, perf_stat__get_core, map, idx);
 }
 
 static int perf_stat_init_aggr_mode(void)
@@ -2060,18 +847,18 @@ static int perf_stat_init_aggr_mode(void)
 
 	switch (stat_config.aggr_mode) {
 	case AGGR_SOCKET:
-		if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
+		if (cpu_map__build_socket_map(evsel_list->cpus, &stat_config.aggr_map)) {
 			perror("cannot build socket map");
 			return -1;
 		}
-		aggr_get_id = perf_stat__get_socket_cached;
+		stat_config.aggr_get_id = perf_stat__get_socket_cached;
 		break;
 	case AGGR_CORE:
-		if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
+		if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) {
 			perror("cannot build core map");
 			return -1;
 		}
-		aggr_get_id = perf_stat__get_core_cached;
+		stat_config.aggr_get_id = perf_stat__get_core_cached;
 		break;
 	case AGGR_NONE:
 	case AGGR_GLOBAL:
@@ -2087,16 +874,16 @@ static int perf_stat_init_aggr_mode(void)
 	 * the aggregation translate cpumap.
 	 */
 	nr = cpu_map__get_max(evsel_list->cpus);
-	cpus_aggr_map = cpu_map__empty_new(nr + 1);
-	return cpus_aggr_map ? 0 : -ENOMEM;
+	stat_config.cpus_aggr_map = cpu_map__empty_new(nr + 1);
+	return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
 }
 
 static void perf_stat__exit_aggr_mode(void)
 {
-	cpu_map__put(aggr_map);
-	cpu_map__put(cpus_aggr_map);
-	aggr_map = NULL;
-	cpus_aggr_map = NULL;
+	cpu_map__put(stat_config.aggr_map);
+	cpu_map__put(stat_config.cpus_aggr_map);
+	stat_config.aggr_map = NULL;
+	stat_config.cpus_aggr_map = NULL;
 }
 
 static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx)
@@ -2154,12 +941,14 @@ static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus,
 	return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
 }
 
-static int perf_stat__get_socket_file(struct cpu_map *map, int idx)
+static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
+				      struct cpu_map *map, int idx)
 {
 	return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
 }
 
-static int perf_stat__get_core_file(struct cpu_map *map, int idx)
+static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
+				    struct cpu_map *map, int idx)
 {
 	return perf_env__get_core(map, idx, &perf_stat.session->header.env);
 }
@@ -2170,18 +959,18 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
 
 	switch (stat_config.aggr_mode) {
 	case AGGR_SOCKET:
-		if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) {
+		if (perf_env__build_socket_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
 			perror("cannot build socket map");
 			return -1;
 		}
-		aggr_get_id = perf_stat__get_socket_file;
+		stat_config.aggr_get_id = perf_stat__get_socket_file;
 		break;
 	case AGGR_CORE:
-		if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) {
+		if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
 			perror("cannot build core map");
 			return -1;
 		}
-		aggr_get_id = perf_stat__get_core_file;
+		stat_config.aggr_get_id = perf_stat__get_core_file;
 		break;
 	case AGGR_NONE:
 	case AGGR_GLOBAL:
@@ -2362,13 +1151,24 @@ static int add_default_attributes(void)
 	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
 };
+	struct parse_events_error errinfo;
 
 	/* Set attrs if no event is selected and !null_run: */
-	if (null_run)
+	if (stat_config.null_run)
 		return 0;
 
 	if (transaction_run) {
-		struct parse_events_error errinfo;
+		/* Handle -T as -M transaction. Once platform specific metrics
+		 * support has been added to the json files, all archictures
+		 * will use this approach. To determine transaction support
+		 * on an architecture test for such a metric name.
+		 */
+		if (metricgroup__has_metric("transaction")) {
+			struct option opt = { .value = &evsel_list };
+
+			return metricgroup__parse_groups(&opt, "transaction",
+							 &stat_config.metric_events);
+		}
 
 		if (pmu_have_event("cpu", "cycles-ct") &&
 		    pmu_have_event("cpu", "el-start"))
@@ -2380,6 +1180,7 @@ static int add_default_attributes(void)
 					   &errinfo);
 		if (err) {
 			fprintf(stderr, "Cannot set up transaction events\n");
+			parse_events_print_error(&errinfo, transaction_attrs);
 			return -1;
 		}
 		return 0;
@@ -2404,11 +1205,12 @@ static int add_default_attributes(void)
 		if (pmu_have_event("msr", "aperf") &&
 		    pmu_have_event("msr", "smi")) {
 			if (!force_metric_only)
-				metric_only = true;
-			err = parse_events(evsel_list, smi_cost_attrs, NULL);
+				stat_config.metric_only = true;
+			err = parse_events(evsel_list, smi_cost_attrs, &errinfo);
 		} else {
 			fprintf(stderr, "To measure SMI cost, it needs "
 				"msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
+			parse_events_print_error(&errinfo, smi_cost_attrs);
 			return -1;
 		}
 		if (err) {
@@ -2434,7 +1236,7 @@ static int add_default_attributes(void)
 		}
 
 		if (!force_metric_only)
-			metric_only = true;
+			stat_config.metric_only = true;
 		if (topdown_filter_events(topdown_attrs, &str,
 				arch_topdown_check_group(&warn)) < 0) {
 			pr_err("Out of memory\n");
@@ -2443,12 +1245,13 @@ static int add_default_attributes(void)
 		if (topdown_attrs[0] && str) {
 			if (warn)
 				arch_topdown_group_warn();
-			err = parse_events(evsel_list, str, NULL);
+			err = parse_events(evsel_list, str, &errinfo);
 			if (err) {
 				fprintf(stderr,
 					"Cannot set up top down events %s: %d\n",
 					str, err);
 				free(str);
+				parse_events_print_error(&errinfo, str);
 				return -1;
 			}
 		} else {
@@ -2530,7 +1333,7 @@ static int __cmd_record(int argc, const char **argv)
 	if (output_name)
 		data->file.path = output_name;
 
-	if (run_count != 1 || forever) {
+	if (stat_config.run_count != 1 || forever) {
 		pr_err("Cannot use -r option with perf stat record.\n");
 		return -1;
 	}
@@ -2549,9 +1352,8 @@ static int __cmd_record(int argc, const char **argv)
 	return argc;
 }
 
-static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
-				    union perf_event *event,
-				    struct perf_session *session)
+static int process_stat_round_event(struct perf_session *session,
+				    union perf_event *event)
 {
 	struct stat_round_event *stat_round = &event->stat_round;
 	struct perf_evsel *counter;
@@ -2576,10 +1378,10 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
 }
 
 static
-int process_stat_config_event(struct perf_tool *tool,
-			      union perf_event *event,
-			      struct perf_session *session __maybe_unused)
+int process_stat_config_event(struct perf_session *session,
+			      union perf_event *event)
 {
+	struct perf_tool *tool = session->tool;
 	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
 
 	perf_event__read_stat_config(&stat_config, &event->stat_config);
@@ -2619,10 +1421,10 @@ static int set_maps(struct perf_stat *st)
 }
 
 static
-int process_thread_map_event(struct perf_tool *tool,
-			     union perf_event *event,
-			     struct perf_session *session __maybe_unused)
+int process_thread_map_event(struct perf_session *session,
+			     union perf_event *event)
 {
+	struct perf_tool *tool = session->tool;
 	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
 
 	if (st->threads) {
@@ -2638,10 +1440,10 @@ int process_thread_map_event(struct perf_tool *tool,
 }
 
 static
-int process_cpu_map_event(struct perf_tool *tool,
-			  union perf_event *event,
-			  struct perf_session *session __maybe_unused)
+int process_cpu_map_event(struct perf_session *session,
+			  union perf_event *event)
 {
+	struct perf_tool *tool = session->tool;
 	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
 	struct cpu_map *cpus;
 
@@ -2803,12 +1605,12 @@ int cmd_stat(int argc, const char **argv)
 	perf_stat__collect_metric_expr(evsel_list);
 	perf_stat__init_shadow_stats();
 
-	if (csv_sep) {
-		csv_output = true;
-		if (!strcmp(csv_sep, "\\t"))
-			csv_sep = "\t";
+	if (stat_config.csv_sep) {
+		stat_config.csv_output = true;
+		if (!strcmp(stat_config.csv_sep, "\\t"))
+			stat_config.csv_sep = "\t";
 	} else
-		csv_sep = DEFAULT_SEPARATOR;
+		stat_config.csv_sep = DEFAULT_SEPARATOR;
 
 	if (argc && !strncmp(argv[0], "rec", 3)) {
 		argc = __cmd_record(argc, argv);
@@ -2833,16 +1635,23 @@ int cmd_stat(int argc, const char **argv)
 		goto out;
 	}
 
-	if (metric_only && stat_config.aggr_mode == AGGR_THREAD) {
+	if (stat_config.metric_only && stat_config.aggr_mode == AGGR_THREAD) {
 		fprintf(stderr, "--metric-only is not supported with --per-thread\n");
 		goto out;
 	}
 
-	if (metric_only && run_count > 1) {
+	if (stat_config.metric_only && stat_config.run_count > 1) {
 		fprintf(stderr, "--metric-only is not supported with -r\n");
 		goto out;
 	}
 
+	if (stat_config.walltime_run_table && stat_config.run_count <= 1) {
+		fprintf(stderr, "--table is only supported with -r\n");
+		parse_options_usage(stat_usage, stat_options, "r", 1);
+		parse_options_usage(NULL, stat_options, "table", 0);
+		goto out;
+	}
+
 	if (output_fd < 0) {
 		fprintf(stderr, "argument to --log-fd must be a > 0\n");
 		parse_options_usage(stat_usage, stat_options, "log-fd", 0);
@@ -2874,7 +1683,7 @@ int cmd_stat(int argc, const char **argv)
 	/*
 	 * let the spreadsheet do the pretty-printing
 	 */
-	if (csv_output) {
+	if (stat_config.csv_output) {
 		/* User explicitly passed -B? */
 		if (big_num_opt == 1) {
 			fprintf(stderr, "-B option not supported with -x\n");
@@ -2882,19 +1691,34 @@ int cmd_stat(int argc, const char **argv)
 			parse_options_usage(NULL, stat_options, "x", 1);
 			goto out;
 		} else /* Nope, so disable big number formatting */
-			big_num = false;
+			stat_config.big_num = false;
 	} else if (big_num_opt == 0) /* User passed --no-big-num */
-		big_num = false;
+		stat_config.big_num = false;
 
 	setup_system_wide(argc);
 
-	if (run_count < 0) {
+	/*
+	 * Display user/system times only for single
+	 * run and when there's specified tracee.
+	 */
+	if ((stat_config.run_count == 1) && target__none(&target))
+		stat_config.ru_display = true;
+
+	if (stat_config.run_count < 0) {
 		pr_err("Run count must be a positive number\n");
 		parse_options_usage(stat_usage, stat_options, "r", 1);
 		goto out;
-	} else if (run_count == 0) {
+	} else if (stat_config.run_count == 0) {
 		forever = true;
-		run_count = 1;
+		stat_config.run_count = 1;
+	}
+
+	if (stat_config.walltime_run_table) {
+		stat_config.walltime_run = zalloc(stat_config.run_count * sizeof(stat_config.walltime_run[0]));
+		if (!stat_config.walltime_run) {
+			pr_err("failed to setup -r option");
+			goto out;
+		}
 	}
 
 	if ((stat_config.aggr_mode == AGGR_THREAD) &&
@@ -2994,6 +1818,17 @@ int cmd_stat(int argc, const char **argv)
 		goto out;
 
 	/*
+	 * Set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
+	 * while avoiding that older tools show confusing messages.
+	 *
+	 * However for pipe sessions we need to keep it zero,
+	 * because script's perf_evsel__check_attr is triggered
+	 * by attr->sample_type != 0, and we can't run it on
+	 * stat sessions.
+	 */
+	stat_config.identifier = !(STAT_RECORD && perf_stat.data.is_pipe);
+
+	/*
 	 * We dont want to block the signals - that would cause
 	 * child tasks to inherit that and Ctrl-C would not work.
 	 * What we want is for Ctrl-C to work in the exec()-ed
@@ -3007,12 +1842,12 @@ int cmd_stat(int argc, const char **argv)
 	signal(SIGABRT, skip_signal);
 
 	status = 0;
-	for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
-		if (run_count != 1 && verbose > 0)
+	for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) {
+		if (stat_config.run_count != 1 && verbose > 0)
 			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
 				run_idx + 1);
 
-		status = run_perf_stat(argc, argv);
+		status = run_perf_stat(argc, argv, run_idx);
 		if (forever && status != -1) {
 			print_counters(NULL, argc, argv);
 			perf_stat__reset_stats();
@@ -3060,6 +1895,8 @@ int cmd_stat(int argc, const char **argv)
 	perf_stat__exit_aggr_mode();
 	perf_evlist__free_stats(evsel_list);
 out:
+	free(stat_config.walltime_run);
+
 	if (smi_cost && smi_reset)
 		sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
 
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 813698a9b8c7..a827919c6263 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -533,12 +533,8 @@ static const char *cat_backtrace(union perf_event *event,
 		}
 
 		tal.filtered = 0;
-		thread__find_addr_location(al.thread, cpumode,
-					   MAP__FUNCTION, ip, &tal);
-
-		if (tal.sym)
-			fprintf(f, "..... %016" PRIx64 " %s\n", ip,
-				tal.sym->name);
+		if (thread__find_symbol(al.thread, cpumode, ip, &tal))
+			fprintf(f, "..... %016" PRIx64 " %s\n", ip, tal.sym->name);
 		else
 			fprintf(f, "..... %016" PRIx64 "\n", ip);
 	}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index f39bd60d2708..aa0c73e57924 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -123,14 +123,9 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
 	}
 
 	notes = symbol__annotation(sym);
-	if (notes->src != NULL) {
-		pthread_mutex_lock(&notes->lock);
-		goto out_assign;
-	}
-
 	pthread_mutex_lock(&notes->lock);
 
-	if (symbol__alloc_hist(sym) < 0) {
+	if (!symbol__hists(sym, top->evlist->nr_entries)) {
 		pthread_mutex_unlock(&notes->lock);
 		pr_err("Not enough memory for annotating '%s' symbol!\n",
 		       sym->name);
@@ -138,9 +133,8 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
 		return err;
 	}
 
-	err = symbol__annotate(sym, map, evsel, 0, NULL);
+	err = symbol__annotate(sym, map, evsel, 0, &top->annotation_opts, NULL);
 	if (err == 0) {
-out_assign:
 		top->sym_filter_entry = he;
 	} else {
 		char msg[BUFSIZ];
@@ -188,7 +182,7 @@ static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip)
 static void perf_top__record_precise_ip(struct perf_top *top,
 					struct hist_entry *he,
 					struct perf_sample *sample,
-					int counter, u64 ip)
+					struct perf_evsel *evsel, u64 ip)
 {
 	struct annotation *notes;
 	struct symbol *sym = he->ms.sym;
@@ -204,7 +198,7 @@ static void perf_top__record_precise_ip(struct perf_top *top,
 	if (pthread_mutex_trylock(&notes->lock))
 		return;
 
-	err = hist_entry__inc_addr_samples(he, sample, counter, ip);
+	err = hist_entry__inc_addr_samples(he, sample, evsel, ip);
 
 	pthread_mutex_unlock(&notes->lock);
 
@@ -249,10 +243,9 @@ static void perf_top__show_details(struct perf_top *top)
 		goto out_unlock;
 
 	printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name);
-	printf("  Events  Pcnt (>=%d%%)\n", top->sym_pcnt_filter);
+	printf("  Events  Pcnt (>=%d%%)\n", top->annotation_opts.min_pcnt);
 
-	more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel,
-				       0, top->sym_pcnt_filter, top->print_entries, 4);
+	more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel, &top->annotation_opts);
 
 	if (top->evlist->enabled) {
 		if (top->zero)
@@ -314,7 +307,7 @@ static void perf_top__print_sym_table(struct perf_top *top)
 	hists__output_recalc_col_len(hists, top->print_entries - printed);
 	putchar('\n');
 	hists__fprintf(hists, false, top->print_entries - printed, win_width,
-		       top->min_percent, stdout, symbol_conf.use_callchain);
+		       top->min_percent, stdout, !symbol_conf.use_callchain);
 }
 
 static void prompt_integer(int *target, const char *msg)
@@ -412,7 +405,7 @@ static void perf_top__print_mapped_keys(struct perf_top *top)
 
 	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top->count_filter);
 
-	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", top->sym_pcnt_filter);
+	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", top->annotation_opts.min_pcnt);
 	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
 	fprintf(stdout, "\t[S]     stop annotation.\n");
 
@@ -515,7 +508,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c)
 			prompt_integer(&top->count_filter, "Enter display event count filter");
 			break;
 		case 'F':
-			prompt_percent(&top->sym_pcnt_filter,
+			prompt_percent(&top->annotation_opts.min_pcnt,
 				       "Enter details display event filter (percent)");
 			break;
 		case 'K':
@@ -613,7 +606,8 @@ static void *display_thread_tui(void *arg)
 	perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
 				      top->min_percent,
 				      &top->session->header.env,
-				      !top->record_opts.overwrite);
+				      !top->record_opts.overwrite,
+				      &top->annotation_opts);
 
 	done = 1;
 	return NULL;
@@ -691,7 +685,7 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
 	struct perf_evsel *evsel = iter->evsel;
 
 	if (perf_hpp_list.sym && single)
-		perf_top__record_precise_ip(top, he, iter->sample, evsel->idx, al->addr);
+		perf_top__record_precise_ip(top, he, iter->sample, evsel, al->addr);
 
 	hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
 		     !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY));
@@ -742,7 +736,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
 "Check /proc/sys/kernel/kptr_restrict.\n\n"
 "Kernel%s samples will not be resolved.\n",
-			  al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
+			  al.map && map__has_symbols(al.map) ?
 			  " modules" : "");
 			if (use_browser <= 0)
 				sleep(5);
@@ -750,7 +744,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 		machine->kptr_restrict_warned = true;
 	}
 
-	if (al.sym == NULL) {
+	if (al.sym == NULL && al.map != NULL) {
 		const char *msg = "Kernel samples will not be resolved.\n";
 		/*
 		 * As we do lazy loading of symtabs we only will know if the
@@ -764,8 +758,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 		 * invalid --vmlinux ;-)
 		 */
 		if (!machine->kptr_restrict_warned && !top->vmlinux_warned &&
-		    al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
-		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
+		    __map__is_kernel(al.map) && map__has_symbols(al.map)) {
 			if (symbol_conf.vmlinux_name) {
 				char serr[256];
 				dso__strerror_load(al.map->dso, serr, sizeof(serr));
@@ -1084,8 +1077,9 @@ static int __cmd_top(struct perf_top *top)
 	if (top->session == NULL)
 		return -1;
 
-	if (!objdump_path) {
-		ret = perf_env__lookup_objdump(&top->session->header.env);
+	if (!top->annotation_opts.objdump_path) {
+		ret = perf_env__lookup_objdump(&top->session->header.env,
+					       &top->annotation_opts.objdump_path);
 		if (ret)
 			goto out_delete;
 	}
@@ -1140,11 +1134,6 @@ static int __cmd_top(struct perf_top *top)
         if (!target__none(&opts->target))
                 perf_evlist__enable(top->evlist);
 
-	/* Wait for a minimal set of events before starting the snapshot */
-	perf_evlist__poll(top->evlist, 100);
-
-	perf_top__mmap_read(top);
-
 	ret = -1;
 	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
 							    display_thread), top)) {
@@ -1162,6 +1151,11 @@ static int __cmd_top(struct perf_top *top)
 		}
 	}
 
+	/* Wait for a minimal set of events before starting the snapshot */
+	perf_evlist__poll(top->evlist, 100);
+
+	perf_top__mmap_read(top);
+
 	while (!done) {
 		u64 hits = top->samples;
 
@@ -1263,10 +1257,17 @@ int cmd_top(int argc, const char **argv)
 				.uses_mmap   = true,
 			},
 			.proc_map_timeout    = 500,
-			.overwrite	= 1,
+			/*
+			 * FIXME: This will lose PERF_RECORD_MMAP and other metadata
+			 * when we pause, fix that and reenable. Probably using a
+			 * separate evlist with a dummy event, i.e. a non-overwrite
+			 * ring buffer just for metadata events, while PERF_RECORD_SAMPLE
+			 * stays in overwrite mode. -acme
+			 * */
+			.overwrite	= 0,
 		},
-		.max_stack	     = sysctl_perf_event_max_stack,
-		.sym_pcnt_filter     = 5,
+		.max_stack	     = sysctl__max_stack(),
+		.annotation_opts     = annotation__default_options,
 		.nr_threads_synthesize = UINT_MAX,
 	};
 	struct record_opts *opts = &top.record_opts;
@@ -1348,15 +1349,15 @@ int cmd_top(int argc, const char **argv)
 		   "only consider symbols in these comms"),
 	OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
 		   "only consider these symbols"),
-	OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
+	OPT_BOOLEAN(0, "source", &top.annotation_opts.annotate_src,
 		    "Interleave source code with assembly code (default)"),
-	OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
+	OPT_BOOLEAN(0, "asm-raw", &top.annotation_opts.show_asm_raw,
 		    "Display raw encoding of assembly instructions (default)"),
 	OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
 		    "Enable kernel symbol demangling"),
-	OPT_STRING(0, "objdump", &objdump_path, "path",
+	OPT_STRING(0, "objdump", &top.annotation_opts.objdump_path, "path",
 		    "objdump binary to use for disassembly and annotations"),
-	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
+	OPT_STRING('M', "disassembler-style", &top.annotation_opts.disassembler_style, "disassembler style",
 		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
 	OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
 	OPT_CALLBACK(0, "percent-limit", &top, "percent",
@@ -1378,6 +1379,8 @@ int cmd_top(int argc, const char **argv)
 		    "Show raw trace event output (do not use print fmt or plugins)"),
 	OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
 		    "Show entries in a hierarchy"),
+	OPT_BOOLEAN(0, "overwrite", &top.record_opts.overwrite,
+		    "Use a backward ring buffer, default: no"),
 	OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"),
 	OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize,
 			"number of thread to run event synthesize"),
@@ -1392,6 +1395,9 @@ int cmd_top(int argc, const char **argv)
 	if (status < 0)
 		return status;
 
+	top.annotation_opts.min_pcnt = 5;
+	top.annotation_opts.context  = 4;
+
 	top.evlist = perf_evlist__new();
 	if (top.evlist == NULL)
 		return -ENOMEM;
@@ -1423,6 +1429,9 @@ int cmd_top(int argc, const char **argv)
 		}
 	}
 
+	if (opts->branch_stack && callchain_param.enabled)
+		symbol_conf.show_branchflag_count = true;
+
 	sort__mode = SORT_MODE__TOP;
 	/* display thread wants entries to be collapsed in a different tree */
 	perf_hpp_list.need_collapse = 1;
@@ -1469,8 +1478,6 @@ int cmd_top(int argc, const char **argv)
 		goto out_delete_evlist;
 	}
 
-	symbol_conf.nr_events = top.evlist->nr_entries;
-
 	if (top.delay_secs < 1)
 		top.delay_secs = 1;
 
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 3ad17ee89403..835619476370 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -77,7 +77,8 @@ struct trace {
 		struct syscall  *table;
 		struct {
 			struct perf_evsel *sys_enter,
-					  *sys_exit;
+					  *sys_exit,
+					  *augmented;
 		}		events;
 	} syscalls;
 	struct record_opts	opts;
@@ -88,6 +89,8 @@ struct trace {
 	u64			base_time;
 	FILE			*output;
 	unsigned long		nr_events;
+	unsigned long		nr_events_printed;
+	unsigned long		max_events;
 	struct strlist		*ev_qualifier;
 	struct {
 		size_t		nr;
@@ -105,6 +108,7 @@ struct trace {
 	} stats;
 	unsigned int		max_stack;
 	unsigned int		min_stack;
+	bool			raw_augmented_syscalls;
 	bool			not_ev_qualifier;
 	bool			live;
 	bool			full_time;
@@ -121,7 +125,6 @@ struct trace {
 	bool			force;
 	bool			vfs_getname;
 	int			trace_pgfaults;
-	int			open_id;
 };
 
 struct tp_field {
@@ -157,13 +160,11 @@ TP_UINT_FIELD__SWAPPED(16);
 TP_UINT_FIELD__SWAPPED(32);
 TP_UINT_FIELD__SWAPPED(64);
 
-static int tp_field__init_uint(struct tp_field *field,
-			       struct format_field *format_field,
-			       bool needs_swap)
+static int __tp_field__init_uint(struct tp_field *field, int size, int offset, bool needs_swap)
 {
-	field->offset = format_field->offset;
+	field->offset = offset;
 
-	switch (format_field->size) {
+	switch (size) {
 	case 1:
 		field->integer = tp_field__u8;
 		break;
@@ -183,18 +184,28 @@ static int tp_field__init_uint(struct tp_field *field,
 	return 0;
 }
 
+static int tp_field__init_uint(struct tp_field *field, struct tep_format_field *format_field, bool needs_swap)
+{
+	return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap);
+}
+
 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
 {
 	return sample->raw_data + field->offset;
 }
 
-static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
+static int __tp_field__init_ptr(struct tp_field *field, int offset)
 {
-	field->offset = format_field->offset;
+	field->offset = offset;
 	field->pointer = tp_field__ptr;
 	return 0;
 }
 
+static int tp_field__init_ptr(struct tp_field *field, struct tep_format_field *format_field)
+{
+	return __tp_field__init_ptr(field, format_field->offset);
+}
+
 struct syscall_tp {
 	struct tp_field id;
 	union {
@@ -206,7 +217,7 @@ static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
 					  struct tp_field *field,
 					  const char *name)
 {
-	struct format_field *format_field = perf_evsel__field(evsel, name);
+	struct tep_format_field *format_field = perf_evsel__field(evsel, name);
 
 	if (format_field == NULL)
 		return -1;
@@ -222,7 +233,7 @@ static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
 					 struct tp_field *field,
 					 const char *name)
 {
-	struct format_field *format_field = perf_evsel__field(evsel, name);
+	struct tep_format_field *format_field = perf_evsel__field(evsel, name);
 
 	if (format_field == NULL)
 		return -1;
@@ -240,7 +251,54 @@ static void perf_evsel__delete_priv(struct perf_evsel *evsel)
 	perf_evsel__delete(evsel);
 }
 
-static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
+static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel)
+{
+	struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
+
+	if (evsel->priv != NULL) {
+		if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr"))
+			goto out_delete;
+		return 0;
+	}
+
+	return -ENOMEM;
+out_delete:
+	zfree(&evsel->priv);
+	return -ENOENT;
+}
+
+static int perf_evsel__init_augmented_syscall_tp(struct perf_evsel *evsel)
+{
+	struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
+
+	if (evsel->priv != NULL) {       /* field, sizeof_field, offsetof_field */
+		if (__tp_field__init_uint(&sc->id, sizeof(long), sizeof(long long), evsel->needs_swap))
+			goto out_delete;
+
+		return 0;
+	}
+
+	return -ENOMEM;
+out_delete:
+	zfree(&evsel->priv);
+	return -EINVAL;
+}
+
+static int perf_evsel__init_augmented_syscall_tp_args(struct perf_evsel *evsel)
+{
+	struct syscall_tp *sc = evsel->priv;
+
+	return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
+}
+
+static int perf_evsel__init_augmented_syscall_tp_ret(struct perf_evsel *evsel)
+{
+	struct syscall_tp *sc = evsel->priv;
+
+	return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap);
+}
+
+static int perf_evsel__init_raw_syscall_tp(struct perf_evsel *evsel, void *handler)
 {
 	evsel->priv = malloc(sizeof(struct syscall_tp));
 	if (evsel->priv != NULL) {
@@ -258,7 +316,7 @@ out_delete:
 	return -ENOENT;
 }
 
-static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
+static struct perf_evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
 {
 	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
 
@@ -269,7 +327,7 @@ static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void
 	if (IS_ERR(evsel))
 		return NULL;
 
-	if (perf_evsel__init_syscall_tp(evsel, handler))
+	if (perf_evsel__init_raw_syscall_tp(evsel, handler))
 		goto out_delete;
 
 	return evsel;
@@ -291,7 +349,7 @@ size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const cha
 {
 	int idx = val - sa->offset;
 
-	if (idx < 0 || idx >= sa->nr_entries)
+	if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL)
 		return scnprintf(bf, size, intfmt, val);
 
 	return scnprintf(bf, size, "%s", sa->entries[idx]);
@@ -450,16 +508,6 @@ static const char *clockid[] = {
 };
 static DEFINE_STRARRAY(clockid);
 
-static const char *socket_families[] = {
-	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
-	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
-	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
-	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
-	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
-	"ALG", "NFC", "VSOCK",
-};
-static DEFINE_STRARRAY(socket_families);
-
 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
 						 struct syscall_arg *arg)
 {
@@ -567,6 +615,7 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
 
 struct syscall_arg_fmt {
 	size_t	   (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
+	unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val);
 	void	   *parm;
 	const char *name;
 	bool	   show_zero;
@@ -583,6 +632,8 @@ static struct syscall_fmt {
 } syscall_fmts[] = {
 	{ .name	    = "access",
 	  .arg = { [1] = { .scnprintf = SCA_ACCMODE,  /* mode */ }, }, },
+	{ .name	    = "bind",
+	  .arg = { [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ }, }, },
 	{ .name	    = "bpf",
 	  .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
 	{ .name	    = "brk",	    .hexret = true,
@@ -597,6 +648,8 @@ static struct syscall_fmt {
 		   [4] = { .name = "tls",	    .scnprintf = SCA_HEX, }, }, },
 	{ .name	    = "close",
 	  .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
+	{ .name	    = "connect",
+	  .arg = { [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ }, }, },
 	{ .name	    = "epoll_ctl",
 	  .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
 	{ .name	    = "eventfd2",
@@ -674,6 +727,10 @@ static struct syscall_fmt {
 	  .arg = { [0] = { .scnprintf = SCA_HEX,	/* addr */ },
 		   [2] = { .scnprintf = SCA_MMAP_PROT,	/* prot */ },
 		   [3] = { .scnprintf = SCA_MMAP_FLAGS,	/* flags */ }, }, },
+	{ .name	    = "mount",
+	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ },
+		   [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */
+			   .mask_val  = SCAMV_MOUNT_FLAGS, /* flags */ }, }, },
 	{ .name	    = "mprotect",
 	  .arg = { [0] = { .scnprintf = SCA_HEX,	/* start */ },
 		   [2] = { .scnprintf = SCA_MMAP_PROT,	/* prot */ }, }, },
@@ -753,7 +810,8 @@ static struct syscall_fmt {
 	{ .name	    = "sendmsg",
 	  .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
 	{ .name	    = "sendto",
-	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
+	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ },
+		   [4] = { .scnprintf = SCA_SOCKADDR, /* addr */ }, }, },
 	{ .name	    = "set_tid_address", .errpid = true, },
 	{ .name	    = "setitimer",
 	  .arg = { [0] = STRARRAY(which, itimers), }, },
@@ -761,10 +819,12 @@ static struct syscall_fmt {
 	  .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
 	{ .name	    = "socket",
 	  .arg = { [0] = STRARRAY(family, socket_families),
-		   [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
+		   [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
+		   [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
 	{ .name	    = "socketpair",
 	  .arg = { [0] = STRARRAY(family, socket_families),
-		   [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
+		   [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
+		   [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
 	{ .name	    = "stat", .alias = "newstat", },
 	{ .name	    = "statx",
 	  .arg = { [0] = { .scnprintf = SCA_FDAT,	 /* fdat */ },
@@ -780,6 +840,8 @@ static struct syscall_fmt {
 	  .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
 	{ .name	    = "tkill",
 	  .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
+	{ .name     = "umount2", .alias = "umount",
+	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* name */ }, }, },
 	{ .name	    = "uname", .alias = "newuname", },
 	{ .name	    = "unlinkat",
 	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
@@ -803,12 +865,31 @@ static struct syscall_fmt *syscall_fmt__find(const char *name)
 	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
 }
 
+static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
+{
+	int i, nmemb = ARRAY_SIZE(syscall_fmts);
+
+	for (i = 0; i < nmemb; ++i) {
+		if (syscall_fmts[i].alias && strcmp(syscall_fmts[i].alias, alias) == 0)
+			return &syscall_fmts[i];
+	}
+
+	return NULL;
+}
+
+/*
+ * is_exit: is this "exit" or "exit_group"?
+ * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
+ * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc.
+ */
 struct syscall {
-	struct event_format *tp_format;
+	struct tep_event_format *tp_format;
 	int		    nr_args;
-	struct format_field *args;
-	const char	    *name;
+	int		    args_size;
 	bool		    is_exit;
+	bool		    is_open;
+	struct tep_format_field *args;
+	const char	    *name;
 	struct syscall_fmt  *fmt;
 	struct syscall_arg_fmt *arg_fmt;
 };
@@ -1040,11 +1121,21 @@ static void thread__set_filename_pos(struct thread *thread, const char *bf,
 	ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
 }
 
+static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size)
+{
+	struct augmented_arg *augmented_arg = arg->augmented.args;
+
+	return scnprintf(bf, size, "%.*s", augmented_arg->size, augmented_arg->value);
+}
+
 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
 					      struct syscall_arg *arg)
 {
 	unsigned long ptr = arg->val;
 
+	if (arg->augmented.args)
+		return syscall_arg__scnprintf_augmented_string(arg, bf, size);
+
 	if (!arg->trace->vfs_getname)
 		return scnprintf(bf, size, "%#x", ptr);
 
@@ -1087,11 +1178,9 @@ static void sig_handler(int sig)
 	interrupted = sig == SIGINT;
 }
 
-static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
-					u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
+static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp)
 {
-	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
-	printed += fprintf_duration(duration, duration_calculated, fp);
+	size_t printed = 0;
 
 	if (trace->multiple_threads) {
 		if (trace->show_comm)
@@ -1102,6 +1191,14 @@ static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thre
 	return printed;
 }
 
+static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
+					u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
+{
+	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
+	printed += fprintf_duration(duration, duration_calculated, fp);
+	return printed + trace__fprintf_comm_tid(trace, thread, fp);
+}
+
 static int trace__process_event(struct trace *trace, struct machine *machine,
 				union perf_event *event, struct perf_sample *sample)
 {
@@ -1203,10 +1300,12 @@ static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
 
 static int syscall__set_arg_fmts(struct syscall *sc)
 {
-	struct format_field *field;
+	struct tep_format_field *field, *last_field = NULL;
 	int idx = 0, len;
 
 	for (field = sc->args; field; field = field->next, ++idx) {
+		last_field = field;
+
 		if (sc->fmt && sc->fmt->arg[idx].scnprintf)
 			continue;
 
@@ -1215,7 +1314,7 @@ static int syscall__set_arg_fmts(struct syscall *sc)
 			  strcmp(field->name, "path") == 0 ||
 			  strcmp(field->name, "pathname") == 0))
 			sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
-		else if (field->flags & FIELD_IS_POINTER)
+		else if (field->flags & TEP_FIELD_IS_POINTER)
 			sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
 		else if (strcmp(field->type, "pid_t") == 0)
 			sc->arg_fmt[idx].scnprintf = SCA_PID;
@@ -1237,6 +1336,9 @@ static int syscall__set_arg_fmts(struct syscall *sc)
 		}
 	}
 
+	if (last_field)
+		sc->args_size = last_field->offset + last_field->size;
+
 	return 0;
 }
 
@@ -1297,6 +1399,7 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 	}
 
 	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
+	sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
 
 	return syscall__set_arg_fmts(sc);
 }
@@ -1403,6 +1506,19 @@ static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
 	return scnprintf(bf, size, "arg%d: ", arg->idx);
 }
 
+/*
+ * Check if the value is in fact zero, i.e. mask whatever needs masking, such
+ * as mount 'flags' argument that needs ignoring some magic flag, see comment
+ * in tools/perf/trace/beauty/mount_flags.c
+ */
+static unsigned long syscall__mask_val(struct syscall *sc, struct syscall_arg *arg, unsigned long val)
+{
+	if (sc->arg_fmt && sc->arg_fmt[arg->idx].mask_val)
+		return sc->arg_fmt[arg->idx].mask_val(arg, val);
+
+	return val;
+}
+
 static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
 				     struct syscall_arg *arg, unsigned long val)
 {
@@ -1416,14 +1532,18 @@ static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
 }
 
 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
-				      unsigned char *args, struct trace *trace,
-				      struct thread *thread)
+				      unsigned char *args, void *augmented_args, int augmented_args_size,
+				      struct trace *trace, struct thread *thread)
 {
 	size_t printed = 0;
 	unsigned long val;
 	u8 bit = 1;
 	struct syscall_arg arg = {
 		.args	= args,
+		.augmented = {
+			.size = augmented_args_size,
+			.args = augmented_args,
+		},
 		.idx	= 0,
 		.mask	= 0,
 		.trace  = trace,
@@ -1439,7 +1559,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
 	ttrace->ret_scnprintf = NULL;
 
 	if (sc->args != NULL) {
-		struct format_field *field;
+		struct tep_format_field *field;
 
 		for (field = sc->args; field;
 		     field = field->next, ++arg.idx, bit <<= 1) {
@@ -1447,6 +1567,11 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
 				continue;
 
 			val = syscall_arg__val(&arg, arg.idx);
+			/*
+			 * Some syscall args need some mask, most don't and
+			 * return val untouched.
+			 */
+			val = syscall__mask_val(sc, &arg, val);
 
 			/*
  			 * Suppress this argument if its value is zero and
@@ -1578,6 +1703,8 @@ static int trace__printf_interrupted_entry(struct trace *trace)
 	printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
 	ttrace->entry_pending = false;
 
+	++trace->nr_events_printed;
+
 	return printed;
 }
 
@@ -1598,6 +1725,32 @@ static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
 	return printed;
 }
 
+static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, bool raw_augmented)
+{
+	void *augmented_args = NULL;
+	/*
+	 * For now with BPF raw_augmented we hook into raw_syscalls:sys_enter
+	 * and there we get all 6 syscall args plus the tracepoint common
+	 * fields (sizeof(long)) and the syscall_nr (another long). So we check
+	 * if that is the case and if so don't look after the sc->args_size,
+	 * but always after the full raw_syscalls:sys_enter payload, which is
+	 * fixed.
+	 *
+	 * We'll revisit this later to pass s->args_size to the BPF augmenter
+	 * (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it
+	 * copies only what we need for each syscall, like what happens when we
+	 * use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace
+	 * traffic to just what is needed for each syscall.
+	 */
+	int args_size = raw_augmented ? (8 * (int)sizeof(long)) : sc->args_size;
+
+	*augmented_args_size = sample->raw_size - args_size;
+	if (*augmented_args_size > 0)
+		augmented_args = sample->raw_data + args_size;
+
+	return augmented_args;
+}
+
 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 			    union perf_event *event __maybe_unused,
 			    struct perf_sample *sample)
@@ -1607,6 +1760,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 	size_t printed = 0;
 	struct thread *thread;
 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
+	int augmented_args_size = 0;
+	void *augmented_args = NULL;
 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
 	struct thread_trace *ttrace;
 
@@ -1630,13 +1785,24 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 
 	if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
 		trace__printf_interrupted_entry(trace);
-
+	/*
+	 * If this is raw_syscalls.sys_enter, then it always comes with the 6 possible
+	 * arguments, even if the syscall being handled, say "openat", uses only 4 arguments
+	 * this breaks syscall__augmented_args() check for augmented args, as we calculate
+	 * syscall->args_size using each syscalls:sys_enter_NAME tracefs format file,
+	 * so when handling, say the openat syscall, we end up getting 6 args for the
+	 * raw_syscalls:sys_enter event, when we expected just 4, we end up mistakenly
+	 * thinking that the extra 2 u64 args are the augmented filename, so just check
+	 * here and avoid using augmented syscalls when the evsel is the raw_syscalls one.
+	 */
+	if (evsel != trace->syscalls.events.sys_enter)
+		augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls);
 	ttrace->entry_time = sample->time;
 	msg = ttrace->entry_str;
 	printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
 
 	printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
-					   args, trace, thread);
+					   args, augmented_args, augmented_args_size, trace, thread);
 
 	if (sc->is_exit) {
 		if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
@@ -1659,6 +1825,39 @@ out_put:
 	return err;
 }
 
+static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evsel,
+				    struct perf_sample *sample)
+{
+	struct thread_trace *ttrace;
+	struct thread *thread;
+	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
+	struct syscall *sc = trace__syscall_info(trace, evsel, id);
+	char msg[1024];
+	void *args, *augmented_args = NULL;
+	int augmented_args_size;
+
+	if (sc == NULL)
+		return -1;
+
+	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+	ttrace = thread__trace(thread, trace->output);
+	/*
+	 * We need to get ttrace just to make sure it is there when syscall__scnprintf_args()
+	 * and the rest of the beautifiers accessing it via struct syscall_arg touches it.
+	 */
+	if (ttrace == NULL)
+		goto out_put;
+
+	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
+	augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls);
+	syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
+	fprintf(trace->output, "%s", msg);
+	err = 0;
+out_put:
+	thread__put(thread);
+	return err;
+}
+
 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
 				    struct perf_sample *sample,
 				    struct callchain_cursor *cursor)
@@ -1667,12 +1866,14 @@ static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evse
 	int max_stack = evsel->attr.sample_max_stack ?
 			evsel->attr.sample_max_stack :
 			trace->max_stack;
+	int err;
 
-	if (machine__resolve(trace->host, &al, sample) < 0 ||
-	    thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack))
+	if (machine__resolve(trace->host, &al, sample) < 0)
 		return -1;
 
-	return 0;
+	err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack);
+	addr_location__put(&al);
+	return err;
 }
 
 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
@@ -1720,7 +1921,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 
 	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
 
-	if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
+	if (sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
 		trace__set_fd_pathname(thread, ret, ttrace->filename.name);
 		ttrace->filename.pending_open = false;
 		++trace->stats.vfs_getname;
@@ -1797,6 +1998,13 @@ errno_print: {
 
 	fputc('\n', trace->output);
 
+	/*
+	 * We only consider an 'event' for the sake of --max-events a non-filtered
+	 * sys_enter + sys_exit and other tracepoint events.
+	 */
+	if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX)
+		interrupted = true;
+
 	if (callchain_ret > 0)
 		trace__fprintf_callchain(trace, sample);
 	else if (callchain_ret < 0)
@@ -1929,13 +2137,25 @@ static void bpf_output__fprintf(struct trace *trace,
 {
 	binary__fprintf(sample->raw_data, sample->raw_size, 8,
 			bpf_output__printer, NULL, trace->output);
+	++trace->nr_events_printed;
 }
 
 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
 				union perf_event *event __maybe_unused,
 				struct perf_sample *sample)
 {
+	struct thread *thread;
 	int callchain_ret = 0;
+	/*
+	 * Check if we called perf_evsel__disable(evsel) due to, for instance,
+	 * this event's max_events having been hit and this is an entry coming
+	 * from the ring buffer that we should discard, since the max events
+	 * have already been considered/printed.
+	 */
+	if (evsel->disabled)
+		return 0;
+
+	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
 
 	if (sample->callchain) {
 		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
@@ -1952,16 +2172,47 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
 	if (trace->trace_syscalls)
 		fprintf(trace->output, "(         ): ");
 
+	if (thread)
+		trace__fprintf_comm_tid(trace, thread, trace->output);
+
+	if (evsel == trace->syscalls.events.augmented) {
+		int id = perf_evsel__sc_tp_uint(evsel, id, sample);
+		struct syscall *sc = trace__syscall_info(trace, evsel, id);
+
+		if (sc) {
+			fprintf(trace->output, "%s(", sc->name);
+			trace__fprintf_sys_enter(trace, evsel, sample);
+			fputc(')', trace->output);
+			goto newline;
+		}
+
+		/*
+		 * XXX: Not having the associated syscall info or not finding/adding
+		 * 	the thread should never happen, but if it does...
+		 * 	fall thru and print it as a bpf_output event.
+		 */
+	}
+
 	fprintf(trace->output, "%s:", evsel->name);
 
 	if (perf_evsel__is_bpf_output(evsel)) {
 		bpf_output__fprintf(trace, sample);
 	} else if (evsel->tp_format) {
-		event_format__fprintf(evsel->tp_format, sample->cpu,
-				      sample->raw_data, sample->raw_size,
-				      trace->output);
+		if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
+		    trace__fprintf_sys_enter(trace, evsel, sample)) {
+			event_format__fprintf(evsel->tp_format, sample->cpu,
+					      sample->raw_data, sample->raw_size,
+					      trace->output);
+			++trace->nr_events_printed;
+
+			if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) {
+				perf_evsel__disable(evsel);
+				perf_evsel__close(evsel);
+			}
+		}
 	}
 
+newline:
 	fprintf(trace->output, "\n");
 
 	if (callchain_ret > 0)
@@ -1969,6 +2220,7 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
 	else if (callchain_ret < 0)
 		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
 out:
+	thread__put(thread);
 	return 0;
 }
 
@@ -2024,8 +2276,7 @@ static int trace__pgfault(struct trace *trace,
 	if (trace->summary_only)
 		goto out;
 
-	thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
-			      sample->ip, &al);
+	thread__find_symbol(thread, sample->cpumode, sample->ip, &al);
 
 	trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
 
@@ -2037,12 +2288,10 @@ static int trace__pgfault(struct trace *trace,
 
 	fprintf(trace->output, "] => ");
 
-	thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
-				   sample->addr, &al);
+	thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
 
 	if (!al.map) {
-		thread__find_addr_location(thread, sample->cpumode,
-					   MAP__FUNCTION, sample->addr, &al);
+		thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
 
 		if (al.map)
 			map_type = 'x';
@@ -2058,6 +2307,8 @@ static int trace__pgfault(struct trace *trace,
 		trace__fprintf_callchain(trace, sample);
 	else if (callchain_ret < 0)
 		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+
+	++trace->nr_events_printed;
 out:
 	err = 0;
 out_put:
@@ -2235,6 +2486,9 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
 		tracepoint_handler handler = evsel->handler;
 		handler(trace, evsel, event, sample);
 	}
+
+	if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX)
+		interrupted = true;
 }
 
 static int trace__add_syscall_newtp(struct trace *trace)
@@ -2243,14 +2497,14 @@ static int trace__add_syscall_newtp(struct trace *trace)
 	struct perf_evlist *evlist = trace->evlist;
 	struct perf_evsel *sys_enter, *sys_exit;
 
-	sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
+	sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter);
 	if (sys_enter == NULL)
 		goto out;
 
 	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
 		goto out_delete_sys_enter;
 
-	sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
+	sys_exit = perf_evsel__raw_syscall_newtp("sys_exit", trace__sys_exit);
 	if (sys_exit == NULL)
 		goto out_delete_sys_enter;
 
@@ -2494,7 +2748,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	 * to override an explicitely set --max-stack global setting.
 	 */
 	evlist__for_each_entry(evlist, evsel) {
-		if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) &&
+		if (evsel__has_callchain(evsel) &&
 		    evsel->attr.sample_max_stack == 0)
 			evsel->attr.sample_max_stack = trace->max_stack;
 	}
@@ -2539,7 +2793,7 @@ next_event:
 		int timeout = done ? 100 : -1;
 
 		if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
-			if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
+			if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
 				draining = true;
 
 			goto again;
@@ -2672,7 +2926,7 @@ static int trace__replay(struct trace *trace)
 							     "syscalls:sys_enter");
 
 	if (evsel &&
-	    (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
+	    (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
 	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
 		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
 		goto out;
@@ -2684,7 +2938,7 @@ static int trace__replay(struct trace *trace)
 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
 							     "syscalls:sys_exit");
 	if (evsel &&
-	    (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
+	    (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
 	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
 		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
 		goto out;
@@ -2924,6 +3178,36 @@ static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
 		evsel->handler = handler;
 }
 
+static int evlist__set_syscall_tp_fields(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->priv || !evsel->tp_format)
+			continue;
+
+		if (strcmp(evsel->tp_format->system, "syscalls"))
+			continue;
+
+		if (perf_evsel__init_syscall_tp(evsel))
+			return -1;
+
+		if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
+			struct syscall_tp *sc = evsel->priv;
+
+			if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)))
+				return -1;
+		} else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) {
+			struct syscall_tp *sc = evsel->priv;
+
+			if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap))
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * XXX: Hackish, just splitting the combined -e+--event (syscalls
  * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
@@ -2941,6 +3225,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
 	int len = strlen(str) + 1, err = -1, list, idx;
 	char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
 	char group_name[PATH_MAX];
+	struct syscall_fmt *fmt;
 
 	if (strace_groups_dir == NULL)
 		return -1;
@@ -2958,12 +3243,19 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
 		if (syscalltbl__id(trace->sctbl, s) >= 0 ||
 		    syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
 			list = 1;
+			goto do_concat;
+		}
+
+		fmt = syscall_fmt__find_by_alias(s);
+		if (fmt != NULL) {
+			list = 1;
+			s = fmt->name;
 		} else {
 			path__join(group_name, sizeof(group_name), strace_groups_dir, s);
 			if (access(group_name, R_OK) == 0)
 				list = 1;
 		}
-
+do_concat:
 		if (lists[list]) {
 			sprintf(lists[list] + strlen(lists[list]), ",%s", s);
 		} else {
@@ -2993,6 +3285,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
 
 		if (trace__validate_ev_qualifier(trace))
 			goto out;
+		trace->trace_syscalls = true;
 	}
 
 	err = 0;
@@ -3048,9 +3341,10 @@ int cmd_trace(int argc, const char **argv)
 		},
 		.output = stderr,
 		.show_comm = true,
-		.trace_syscalls = true,
+		.trace_syscalls = false,
 		.kernel_syscallchains = false,
 		.max_stack = UINT_MAX,
+		.max_events = ULONG_MAX,
 	};
 	const char *output_name = NULL;
 	const struct option trace_options[] = {
@@ -3103,6 +3397,8 @@ int cmd_trace(int argc, const char **argv)
 		     &record_parse_callchain_opt),
 	OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
 		    "Show the kernel callchains on the syscall exit path"),
+	OPT_ULONG(0, "max-events", &trace.max_events,
+		"Set the maximum number of events to print, exit after that is reached. "),
 	OPT_UINTEGER(0, "min-stack", &trace.min_stack,
 		     "Set the minimum stack depth when parsing the callchain, "
 		     "anything below the specified depth will be ignored."),
@@ -3123,8 +3419,9 @@ int cmd_trace(int argc, const char **argv)
 	};
 	bool __maybe_unused max_stack_user_set = true;
 	bool mmap_pages_user_set = true;
+	struct perf_evsel *evsel;
 	const char * const trace_subcommands[] = { "record", NULL };
-	int err;
+	int err = -1;
 	char bf[BUFSIZ];
 
 	signal(SIGSEGV, sighandler_dump_stack);
@@ -3147,6 +3444,16 @@ int cmd_trace(int argc, const char **argv)
 				       "cgroup monitoring only available in system-wide mode");
 	}
 
+	evsel = bpf__setup_output_event(trace.evlist, "__augmented_syscalls__");
+	if (IS_ERR(evsel)) {
+		bpf__strerror_setup_output_event(trace.evlist, PTR_ERR(evsel), bf, sizeof(bf));
+		pr_err("ERROR: Setup trace syscalls enter failed: %s\n", bf);
+		goto out;
+	}
+
+	if (evsel)
+		trace.syscalls.events.augmented = evsel;
+
 	err = bpf__setup_stdout(trace.evlist);
 	if (err) {
 		bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
@@ -3165,7 +3472,7 @@ int cmd_trace(int argc, const char **argv)
 		mmap_pages_user_set = false;
 
 	if (trace.max_stack == UINT_MAX) {
-		trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
+		trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack();
 		max_stack_user_set = false;
 	}
 
@@ -3182,8 +3489,49 @@ int cmd_trace(int argc, const char **argv)
 		symbol_conf.use_callchain = true;
 	}
 
-	if (trace.evlist->nr_entries > 0)
+	if (trace.evlist->nr_entries > 0) {
 		evlist__set_evsel_handler(trace.evlist, trace__event_handler);
+		if (evlist__set_syscall_tp_fields(trace.evlist)) {
+			perror("failed to set syscalls:* tracepoint fields");
+			goto out;
+		}
+	}
+
+	/*
+	 * If we are augmenting syscalls, then combine what we put in the
+	 * __augmented_syscalls__ BPF map with what is in the
+	 * syscalls:sys_exit_FOO tracepoints, i.e. just like we do without BPF,
+	 * combining raw_syscalls:sys_enter with raw_syscalls:sys_exit.
+	 *
+	 * We'll switch to look at two BPF maps, one for sys_enter and the
+	 * other for sys_exit when we start augmenting the sys_exit paths with
+	 * buffers that are being copied from kernel to userspace, think 'read'
+	 * syscall.
+	 */
+	if (trace.syscalls.events.augmented) {
+		evsel = trace.syscalls.events.augmented;
+
+		if (perf_evsel__init_augmented_syscall_tp(evsel) ||
+		    perf_evsel__init_augmented_syscall_tp_args(evsel))
+			goto out;
+		evsel->handler = trace__sys_enter;
+
+		evlist__for_each_entry(trace.evlist, evsel) {
+			bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
+
+			if (raw_syscalls_sys_exit) {
+				trace.raw_augmented_syscalls = true;
+				goto init_augmented_syscall_tp;
+			}
+
+			if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) {
+init_augmented_syscall_tp:
+				perf_evsel__init_augmented_syscall_tp(evsel);
+				perf_evsel__init_augmented_syscall_tp_ret(evsel);
+				evsel->handler = trace__sys_exit;
+			}
+		}
+	}
 
 	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
 		return trace__record(&trace, argc-1, &argv[1]);
@@ -3194,13 +3542,7 @@ int cmd_trace(int argc, const char **argv)
 
 	if (!trace.trace_syscalls && !trace.trace_pgfaults &&
 	    trace.evlist->nr_entries == 0 /* Was --events used? */) {
-		pr_err("Please specify something to trace.\n");
-		return -1;
-	}
-
-	if (!trace.trace_syscalls && trace.ev_qualifier) {
-		pr_err("The -e option can't be used with --no-syscalls.\n");
-		goto out;
+		trace.trace_syscalls = true;
 	}
 
 	if (output_name != NULL) {
@@ -3211,8 +3553,6 @@ int cmd_trace(int argc, const char **argv)
 		}
 	}
 
-	trace.open_id = syscalltbl__id(trace.sctbl, "open");
-
 	err = target__validate(&trace.opts.target);
 	if (err) {
 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 9aff89bc7535..9531f7bd7d9b 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -5,14 +5,17 @@ HEADERS='
 include/uapi/drm/drm.h
 include/uapi/drm/i915_drm.h
 include/uapi/linux/fcntl.h
+include/uapi/linux/fs.h
 include/uapi/linux/kcmp.h
 include/uapi/linux/kvm.h
+include/uapi/linux/in.h
 include/uapi/linux/perf_event.h
 include/uapi/linux/prctl.h
 include/uapi/linux/sched.h
 include/uapi/linux/stat.h
 include/uapi/linux/vhost.h
 include/uapi/sound/asound.h
+include/linux/bits.h
 include/linux/hash.h
 include/uapi/linux/hw_breakpoint.h
 arch/x86/include/asm/disabled-features.h
@@ -35,6 +38,7 @@ arch/s390/include/uapi/asm/ptrace.h
 arch/s390/include/uapi/asm/sie.h
 arch/arm/include/uapi/asm/kvm.h
 arch/arm64/include/uapi/asm/kvm.h
+arch/arm64/include/uapi/asm/unistd.h
 arch/alpha/include/uapi/asm/errno.h
 arch/mips/include/asm/errno.h
 arch/mips/include/uapi/asm/errno.h
@@ -53,37 +57,53 @@ include/uapi/asm-generic/errno.h
 include/uapi/asm-generic/errno-base.h
 include/uapi/asm-generic/ioctls.h
 include/uapi/asm-generic/mman-common.h
+include/uapi/asm-generic/unistd.h
 '
 
-check () {
-  file=$1
+check_2 () {
+  file1=$1
+  file2=$2
 
   shift
-  opts=
-  while [ -n "$*" ]; do
-    opts="$opts \"$1\""
-    shift
-  done
+  shift
 
-  cmd="diff $opts ../$file ../../$file > /dev/null"
+  cmd="diff $* $file1 $file2 > /dev/null"
 
-  test -f ../../$file &&
-  eval $cmd || echo "Warning: Kernel ABI header at 'tools/$file' differs from latest version at '$file'" >&2
+  test -f $file2 && {
+    eval $cmd || {
+      echo "Warning: Kernel ABI header at '$file1' differs from latest version at '$file2'" >&2
+      echo diff -u $file1 $file2
+    }
+  }
 }
 
+check () {
+  file=$1
+
+  shift
+
+  check_2 tools/$file $file $*
+}
 
 # Check if we have the kernel headers (tools/perf/../../include), else
 # we're probably on a detached tarball, so no point in trying to check
 # differences.
 test -d ../../include || exit 0
 
+cd ../..
+
 # simple diff check
 for i in $HEADERS; do
   check $i -B
 done
 
 # diff with extra ignore lines
-check arch/x86/lib/memcpy_64.S        -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"
-check arch/x86/lib/memset_64.S        -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"
-check include/uapi/asm-generic/mman.h -I "^#include <\(uapi/\)*asm-generic/mman-common.h>"
-check include/uapi/linux/mman.h       -I "^#include <\(uapi/\)*asm/mman.h>"
+check arch/x86/lib/memcpy_64.S        '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"'
+check arch/x86/lib/memset_64.S        '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"'
+check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common.h>"'
+check include/uapi/linux/mman.h       '-I "^#include <\(uapi/\)*asm/mman.h>"'
+
+# diff non-symmetric files
+check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
+
+cd tools/perf
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index 2d0caf20ff3a..bc6c585f74fc 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -30,3 +30,4 @@ perf-test			mainporcelain common
 perf-timechart			mainporcelain common
 perf-top			mainporcelain common
 perf-trace			mainporcelain audit
+perf-version			mainporcelain common
diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c
new file mode 100644
index 000000000000..b9c203219691
--- /dev/null
+++ b/tools/perf/examples/bpf/5sec.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+    Description:
+
+    . Disable strace like syscall tracing (--no-syscalls), or try tracing
+      just some (-e *sleep).
+
+    . Attach a filter function to a kernel function, returning when it should
+      be considered, i.e. appear on the output.
+
+    . Run it system wide, so that any sleep of >= 5 seconds and < than 6
+      seconds gets caught.
+
+    . Ask for callgraphs using DWARF info, so that userspace can be unwound
+
+    . While this is running, run something like "sleep 5s".
+
+    . If we decide to add tv_nsec as well, then it becomes:
+
+      int probe(hrtimer_nanosleep, rqtp->tv_sec rqtp->tv_nsec)(void *ctx, int err, long sec, long nsec)
+
+      I.e. add where it comes from (rqtp->tv_nsec) and where it will be
+      accessible in the function body (nsec)
+
+    # perf trace --no-syscalls -e tools/perf/examples/bpf/5sec.c/call-graph=dwarf/
+         0.000 perf_bpf_probe:func:(ffffffff9811b5f0) tv_sec=5
+                                           hrtimer_nanosleep ([kernel.kallsyms])
+                                           __x64_sys_nanosleep ([kernel.kallsyms])
+                                           do_syscall_64 ([kernel.kallsyms])
+                                           entry_SYSCALL_64 ([kernel.kallsyms])
+                                           __GI___nanosleep (/usr/lib64/libc-2.26.so)
+                                           rpl_nanosleep (/usr/bin/sleep)
+                                           xnanosleep (/usr/bin/sleep)
+                                           main (/usr/bin/sleep)
+                                           __libc_start_main (/usr/lib64/libc-2.26.so)
+                                           _start (/usr/bin/sleep)
+    ^C#
+
+   Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com>
+*/
+
+#include <bpf.h>
+
+int probe(hrtimer_nanosleep, rqtp->tv_sec)(void *ctx, int err, long sec)
+{
+	return sec == 5;
+}
+
+license(GPL);
diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
new file mode 100644
index 000000000000..90a19336310b
--- /dev/null
+++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Augment the raw_syscalls tracepoints with the contents of the pointer arguments.
+ *
+ * Test it with:
+ *
+ * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null
+ *
+ * This exactly matches what is marshalled into the raw_syscall:sys_enter
+ * payload expected by the 'perf trace' beautifiers.
+ *
+ * For now it just uses the existing tracepoint augmentation code in 'perf
+ * trace', in the next csets we'll hook up these with the sys_enter/sys_exit
+ * code that will combine entry/exit in a strace like way.
+ */
+
+#include <stdio.h>
+#include <linux/socket.h>
+
+/* bpf-output associated map */
+struct bpf_map SEC("maps") __augmented_syscalls__ = {
+	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(u32),
+	.max_entries = __NR_CPUS__,
+};
+
+struct syscall_enter_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	unsigned long	   args[6];
+};
+
+struct syscall_exit_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	long		   ret;
+};
+
+struct augmented_filename {
+	unsigned int	size;
+	int		reserved;
+	char		value[256];
+};
+
+#define SYS_OPEN 2
+#define SYS_OPENAT 257
+
+SEC("raw_syscalls:sys_enter")
+int sys_enter(struct syscall_enter_args *args)
+{
+	struct {
+		struct syscall_enter_args args;
+		struct augmented_filename filename;
+	} augmented_args;
+	unsigned int len = sizeof(augmented_args);
+	const void *filename_arg = NULL;
+
+	probe_read(&augmented_args.args, sizeof(augmented_args.args), args);
+	/*
+	 * Yonghong and Edward Cree sayz:
+	 *
+	 * https://www.spinics.net/lists/netdev/msg531645.html
+	 *
+	 * >>   R0=inv(id=0) R1=inv2 R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1
+	 * >> 10: (bf) r1 = r6
+	 * >> 11: (07) r1 += 16
+	 * >> 12: (05) goto pc+2
+	 * >> 15: (79) r3 = *(u64 *)(r1 +0)
+	 * >> dereference of modified ctx ptr R1 off=16 disallowed
+	 * > Aha, we at least got a different error message this time.
+	 * > And indeed llvm has done that optimisation, rather than the more obvious
+	 * > 11: r3 = *(u64 *)(r1 +16)
+	 * > because it wants to have lots of reads share a single insn.  You may be able
+	 * > to defeat that optimisation by adding compiler barriers, idk.  Maybe someone
+	 * > with llvm knowledge can figure out how to stop it (ideally, llvm would know
+	 * > when it's generating for bpf backend and not do that).  -O0?  ¯\_(ツ)_/¯
+	 *
+	 * The optimization mostly likes below:
+	 *
+	 *	br1:
+	 * 	...
+	 *	r1 += 16
+	 *	goto merge
+	 *	br2:
+	 *	...
+	 *	r1 += 20
+	 *	goto merge
+	 *	merge:
+	 *	*(u64 *)(r1 + 0)
+	 *
+	 * The compiler tries to merge common loads. There is no easy way to
+	 * stop this compiler optimization without turning off a lot of other
+	 * optimizations. The easiest way is to add barriers:
+	 *
+	 * 	 __asm__ __volatile__("": : :"memory")
+	 *
+	 * 	 after the ctx memory access to prevent their down stream merging.
+	 */
+	switch (augmented_args.args.syscall_nr) {
+	case SYS_OPEN:	 filename_arg = (const void *)args->args[0];
+			__asm__ __volatile__("": : :"memory");
+			 break;
+	case SYS_OPENAT: filename_arg = (const void *)args->args[1];
+			 break;
+	}
+
+	if (filename_arg != NULL) {
+		augmented_args.filename.reserved = 0;
+		augmented_args.filename.size = probe_read_str(&augmented_args.filename.value,
+							      sizeof(augmented_args.filename.value),
+							      filename_arg);
+		if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) {
+			len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;
+			len &= sizeof(augmented_args.filename.value) - 1;
+		}
+	} else {
+		len = sizeof(augmented_args.args);
+	}
+
+	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);
+	return 0;
+}
+
+SEC("raw_syscalls:sys_exit")
+int sys_exit(struct syscall_exit_args *args)
+{
+	return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */
+}
+
+license(GPL);
diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c
new file mode 100644
index 000000000000..2ae44813ef2d
--- /dev/null
+++ b/tools/perf/examples/bpf/augmented_syscalls.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Augment syscalls with the contents of the pointer arguments.
+ *
+ * Test it with:
+ *
+ * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null
+ *
+ * It'll catch some openat syscalls related to the dynamic linked and
+ * the last one should be the one for '/etc/passwd'.
+ *
+ * This matches what is marshalled into the raw_syscall:sys_enter payload
+ * expected by the 'perf trace' beautifiers, and can be used by them, that will
+ * check if perf_sample->raw_data is more than what is expected for each
+ * syscalls:sys_{enter,exit}_SYSCALL tracepoint, uing the extra data as the
+ * contents of pointer arguments.
+ */
+
+#include <stdio.h>
+#include <linux/socket.h>
+
+struct bpf_map SEC("maps") __augmented_syscalls__ = {
+       .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+       .key_size = sizeof(int),
+       .value_size = sizeof(u32),
+       .max_entries = __NR_CPUS__,
+};
+
+struct syscall_exit_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	long		   ret;
+};
+
+struct augmented_filename {
+	unsigned int	size;
+	int		reserved;
+	char		value[256];
+};
+
+#define augmented_filename_syscall(syscall)							\
+struct augmented_enter_##syscall##_args {			 				\
+	struct syscall_enter_##syscall##_args	args;				 		\
+	struct augmented_filename		filename;				 	\
+};												\
+int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args)				\
+{												\
+	struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; 	\
+	unsigned int len = sizeof(augmented_args);						\
+	probe_read(&augmented_args.args, sizeof(augmented_args.args), args);			\
+	augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, 		\
+						      sizeof(augmented_args.filename.value), 	\
+						      args->filename_ptr); 			\
+	if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) {		\
+		len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;	\
+		len &= sizeof(augmented_args.filename.value) - 1;				\
+	}											\
+	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 			\
+			  &augmented_args, len);						\
+	return 0;										\
+}												\
+int syscall_exit(syscall)(struct syscall_exit_args *args)					\
+{												\
+       return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */	\
+}
+
+struct syscall_enter_openat_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	long		   dfd;
+	char		   *filename_ptr;
+	long		   flags;
+	long		   mode;
+};
+
+augmented_filename_syscall(openat);
+
+struct syscall_enter_open_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	char		   *filename_ptr;
+	long		   flags;
+	long		   mode;
+};
+
+augmented_filename_syscall(open);
+
+struct syscall_enter_inotify_add_watch_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	long		   fd;
+	char		   *filename_ptr;
+	long		   mask;
+};
+
+augmented_filename_syscall(inotify_add_watch);
+
+struct statbuf;
+
+struct syscall_enter_newstat_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	char		   *filename_ptr;
+	struct stat	   *statbuf;
+};
+
+augmented_filename_syscall(newstat);
+
+#ifndef _K_SS_MAXSIZE
+#define _K_SS_MAXSIZE 128
+#endif
+
+#define augmented_sockaddr_syscall(syscall)						\
+struct augmented_enter_##syscall##_args {			 				\
+	struct syscall_enter_##syscall##_args	args;				 		\
+	struct sockaddr_storage			addr;						\
+};												\
+int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args)				\
+{												\
+	struct augmented_enter_##syscall##_args augmented_args;				 	\
+	unsigned long addrlen = sizeof(augmented_args.addr);					\
+	probe_read(&augmented_args.args, sizeof(augmented_args.args), args);			\
+/* FIXME_CLANG_OPTIMIZATION_THAT_ACCESSES_USER_CONTROLLED_ADDRLEN_DESPITE_THIS_CHECK */		\
+/*	if (addrlen > augmented_args.args.addrlen)				     */		\
+/*		addrlen = augmented_args.args.addrlen;				     */		\
+/*										     */		\
+	probe_read(&augmented_args.addr, addrlen, args->addr_ptr); 				\
+	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 			\
+			  &augmented_args, 							\
+			  sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);	\
+	return 0;										\
+}												\
+int syscall_exit(syscall)(struct syscall_exit_args *args)					\
+{												\
+       return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */	\
+}
+
+struct sockaddr;
+
+struct syscall_enter_bind_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	long		   fd;
+	struct sockaddr	   *addr_ptr;
+	unsigned long	   addrlen;
+};
+
+augmented_sockaddr_syscall(bind);
+
+struct syscall_enter_connect_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	long		   fd;
+	struct sockaddr	   *addr_ptr;
+	unsigned long	   addrlen;
+};
+
+augmented_sockaddr_syscall(connect);
+
+struct syscall_enter_sendto_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	long		   fd;
+	void		   *buff;
+	long		   len;
+	unsigned long	   flags;
+	struct sockaddr	   *addr_ptr;
+	long		   addr_len;
+};
+
+augmented_sockaddr_syscall(sendto);
+
+license(GPL);
diff --git a/tools/perf/examples/bpf/empty.c b/tools/perf/examples/bpf/empty.c
new file mode 100644
index 000000000000..3776d26db9e7
--- /dev/null
+++ b/tools/perf/examples/bpf/empty.c
@@ -0,0 +1,3 @@
+#include <bpf.h>
+
+license(GPL);
diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c
new file mode 100644
index 000000000000..b59e8812ee8c
--- /dev/null
+++ b/tools/perf/examples/bpf/etcsnoop.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Augment the filename syscalls with the contents of the filename pointer argument
+ * filtering only those that do not start with /etc/.
+ *
+ * Test it with:
+ *
+ * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null
+ *
+ * It'll catch some openat syscalls related to the dynamic linked and
+ * the last one should be the one for '/etc/passwd'.
+ *
+ * This matches what is marshalled into the raw_syscall:sys_enter payload
+ * expected by the 'perf trace' beautifiers, and can be used by them unmodified,
+ * which will be done as that feature is implemented in the next csets, for now
+ * it will appear in a dump done by the default tracepoint handler in 'perf trace',
+ * that uses bpf_output__fprintf() to just dump those contents, as done with
+ * the bpf-output event associated with the __bpf_output__ map declared in
+ * tools/perf/include/bpf/stdio.h.
+ */
+
+#include <stdio.h>
+
+struct bpf_map SEC("maps") __augmented_syscalls__ = {
+       .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+       .key_size = sizeof(int),
+       .value_size = sizeof(u32),
+       .max_entries = __NR_CPUS__,
+};
+
+struct augmented_filename {
+	int	size;
+	int	reserved;
+	char	value[64];
+};
+
+#define augmented_filename_syscall_enter(syscall) 						\
+struct augmented_enter_##syscall##_args {			 				\
+	struct syscall_enter_##syscall##_args	args;				 		\
+	struct augmented_filename		filename;				 	\
+};												\
+int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args)				\
+{												\
+	char etc[6] = "/etc/";									\
+	struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; 	\
+	probe_read(&augmented_args.args, sizeof(augmented_args.args), args);			\
+	augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, 		\
+						      sizeof(augmented_args.filename.value), 	\
+						      args->filename_ptr); 			\
+	if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0)			\
+		return 0;									\
+	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 			\
+			  &augmented_args, 							\
+			  (sizeof(augmented_args) - sizeof(augmented_args.filename.value) +	\
+			   augmented_args.filename.size));					\
+	return 0;										\
+}
+
+struct syscall_enter_openat_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	long		   dfd;
+	char		   *filename_ptr;
+	long		   flags;
+	long		   mode;
+};
+
+augmented_filename_syscall_enter(openat);
+
+struct syscall_enter_open_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	char		   *filename_ptr;
+	long		   flags;
+	long		   mode;
+};
+
+augmented_filename_syscall_enter(open);
+
+license(GPL);
diff --git a/tools/perf/examples/bpf/hello.c b/tools/perf/examples/bpf/hello.c
new file mode 100644
index 000000000000..cf3c2fdc7f79
--- /dev/null
+++ b/tools/perf/examples/bpf/hello.c
@@ -0,0 +1,9 @@
+#include <stdio.h>
+
+int syscall_enter(openat)(void *args)
+{
+	puts("Hello, world\n");
+	return 0;
+}
+
+license(GPL);
diff --git a/tools/perf/examples/bpf/sys_enter_openat.c b/tools/perf/examples/bpf/sys_enter_openat.c
new file mode 100644
index 000000000000..9cd124b09392
--- /dev/null
+++ b/tools/perf/examples/bpf/sys_enter_openat.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hook into 'openat' syscall entry tracepoint
+ *
+ * Test it with:
+ *
+ * perf trace -e tools/perf/examples/bpf/sys_enter_openat.c cat /etc/passwd > /dev/null
+ *
+ * It'll catch some openat syscalls related to the dynamic linked and
+ * the last one should be the one for '/etc/passwd'.
+ *
+ * The syscall_enter_openat_args can be used to get the syscall fields
+ * and use them for filtering calls, i.e. use in expressions for
+ * the return value.
+ */
+
+#include <bpf.h>
+
+struct syscall_enter_openat_args {
+	unsigned long long unused;
+	long		   syscall_nr;
+	long		   dfd;
+	char		   *filename_ptr;
+	long		   flags;
+	long		   mode;
+};
+
+int syscall_enter(openat)(struct syscall_enter_openat_args *args)
+{
+	return 1;
+}
+
+license(GPL);
diff --git a/tools/perf/include/bpf/bpf.h b/tools/perf/include/bpf/bpf.h
new file mode 100644
index 000000000000..52b6d87fe822
--- /dev/null
+++ b/tools/perf/include/bpf/bpf.h
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _PERF_BPF_H
+#define _PERF_BPF_H
+
+#include <uapi/linux/bpf.h>
+
+/*
+ * A helper structure used by eBPF C program to describe map attributes to
+ * elf_bpf loader, taken from tools/testing/selftests/bpf/bpf_helpers.h:
+ */
+struct bpf_map {
+        unsigned int type;
+        unsigned int key_size;
+        unsigned int value_size;
+        unsigned int max_entries;
+        unsigned int map_flags;
+        unsigned int inner_map_idx;
+        unsigned int numa_node;
+};
+
+#define SEC(NAME) __attribute__((section(NAME),  used))
+
+#define probe(function, vars) \
+	SEC(#function "=" #function " " #vars) function
+
+#define syscall_enter(name) \
+	SEC("syscalls:sys_enter_" #name) syscall_enter_ ## name
+
+#define syscall_exit(name) \
+	SEC("syscalls:sys_exit_" #name) syscall_exit_ ## name
+
+#define license(name) \
+char _license[] SEC("license") = #name; \
+int _version SEC("version") = LINUX_VERSION_CODE;
+
+static int (*probe_read)(void *dst, int size, const void *unsafe_addr) = (void *)BPF_FUNC_probe_read;
+static int (*probe_read_str)(void *dst, int size, const void *unsafe_addr) = (void *)BPF_FUNC_probe_read_str;
+
+#endif /* _PERF_BPF_H */
diff --git a/tools/perf/include/bpf/linux/socket.h b/tools/perf/include/bpf/linux/socket.h
new file mode 100644
index 000000000000..7f844568dab8
--- /dev/null
+++ b/tools/perf/include/bpf/linux/socket.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_SOCKET_H
+#define _UAPI_LINUX_SOCKET_H
+
+/*
+ * Desired design of maximum size and alignment (see RFC2553)
+ */
+#define _K_SS_MAXSIZE	128	/* Implementation specific max size */
+#define _K_SS_ALIGNSIZE	(__alignof__ (struct sockaddr *))
+				/* Implementation specific desired alignment */
+
+typedef unsigned short __kernel_sa_family_t;
+
+struct __kernel_sockaddr_storage {
+	__kernel_sa_family_t	ss_family;		/* address family */
+	/* Following field(s) are implementation specific */
+	char		__data[_K_SS_MAXSIZE - sizeof(unsigned short)];
+				/* space to achieve desired size, */
+				/* _SS_MAXSIZE value minus size of ss_family */
+} __attribute__ ((aligned(_K_SS_ALIGNSIZE)));	/* force desired alignment */
+
+#define sockaddr_storage __kernel_sockaddr_storage
+
+#endif /* _UAPI_LINUX_SOCKET_H */
diff --git a/tools/perf/include/bpf/stdio.h b/tools/perf/include/bpf/stdio.h
new file mode 100644
index 000000000000..2899cb7bfed8
--- /dev/null
+++ b/tools/perf/include/bpf/stdio.h
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <bpf.h>
+
+struct bpf_map SEC("maps") __bpf_stdout__ = {
+       .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+       .key_size = sizeof(int),
+       .value_size = sizeof(u32),
+       .max_entries = __NR_CPUS__,
+};
+
+static int (*perf_event_output)(void *, struct bpf_map *, int, void *, unsigned long) =
+	(void *)BPF_FUNC_perf_event_output;
+
+#define puts(from) \
+	({ const int __len = sizeof(from); \
+	   char __from[__len] = from; \
+	   perf_event_output(args, &__bpf_stdout__, BPF_F_CURRENT_CPU, \
+			  &__from, __len & (sizeof(from) - 1)); })
diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c
index 0c6d1002b524..f7eb63cbbc65 100644
--- a/tools/perf/jvmti/jvmti_agent.c
+++ b/tools/perf/jvmti/jvmti_agent.c
@@ -35,6 +35,7 @@
 #include <sys/mman.h>
 #include <syscall.h> /* for gettid() */
 #include <err.h>
+#include <linux/kernel.h>
 
 #include "jvmti_agent.h"
 #include "../util/jitdump.h"
@@ -124,7 +125,7 @@ perf_get_timestamp(void)
 }
 
 static int
-debug_cache_init(void)
+create_jit_cache_dir(void)
 {
 	char str[32];
 	char *base, *p;
@@ -143,8 +144,13 @@ debug_cache_init(void)
 
 	strftime(str, sizeof(str), JIT_LANG"-jit-%Y%m%d", &tm);
 
-	snprintf(jit_path, PATH_MAX - 1, "%s/.debug/", base);
-
+	ret = snprintf(jit_path, PATH_MAX, "%s/.debug/", base);
+	if (ret >= PATH_MAX) {
+		warnx("jvmti: cannot generate jit cache dir because %s/.debug/"
+			" is too long, please check the cwd, JITDUMPDIR, and"
+			" HOME variables", base);
+		return -1;
+	}
 	ret = mkdir(jit_path, 0755);
 	if (ret == -1) {
 		if (errno != EEXIST) {
@@ -153,20 +159,32 @@ debug_cache_init(void)
 		}
 	}
 
-	snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit", base);
+	ret = snprintf(jit_path, PATH_MAX, "%s/.debug/jit", base);
+	if (ret >= PATH_MAX) {
+		warnx("jvmti: cannot generate jit cache dir because"
+			" %s/.debug/jit is too long, please check the cwd,"
+			" JITDUMPDIR, and HOME variables", base);
+		return -1;
+	}
 	ret = mkdir(jit_path, 0755);
 	if (ret == -1) {
 		if (errno != EEXIST) {
-			warn("cannot create jit cache dir %s", jit_path);
+			warn("jvmti: cannot create jit cache dir %s", jit_path);
 			return -1;
 		}
 	}
 
-	snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit/%s.XXXXXXXX", base, str);
-
+	ret = snprintf(jit_path, PATH_MAX, "%s/.debug/jit/%s.XXXXXXXX", base, str);
+	if (ret >= PATH_MAX) {
+		warnx("jvmti: cannot generate jit cache dir because"
+			" %s/.debug/jit/%s.XXXXXXXX is too long, please check"
+			" the cwd, JITDUMPDIR, and HOME variables",
+			base, str);
+		return -1;
+	}
 	p = mkdtemp(jit_path);
 	if (p != jit_path) {
-		warn("cannot create jit cache dir %s", jit_path);
+		warn("jvmti: cannot create jit cache dir %s", jit_path);
 		return -1;
 	}
 
@@ -227,7 +245,7 @@ void *jvmti_open(void)
 {
 	char dump_path[PATH_MAX];
 	struct jitheader header;
-	int fd;
+	int fd, ret;
 	FILE *fp;
 
 	init_arch_timestamp();
@@ -244,12 +262,22 @@ void *jvmti_open(void)
 
 	memset(&header, 0, sizeof(header));
 
-	debug_cache_init();
+	/*
+	 * jitdump file dir
+	 */
+	if (create_jit_cache_dir() < 0)
+		return NULL;
 
 	/*
 	 * jitdump file name
 	 */
-	snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid());
+	ret = snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid());
+	if (ret >= PATH_MAX) {
+		warnx("jvmti: cannot generate jitdump file full path because"
+			" %s/jit-%i.dump is too long, please check the cwd,"
+			" JITDUMPDIR, and HOME variables", jit_path, getpid());
+		return NULL;
+	}
 
 	fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666);
 	if (fd == -1)
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 20a08cb32332..a11cb006f968 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -12,7 +12,6 @@
 #include "util/env.h"
 #include <subcmd/exec-cmd.h>
 #include "util/config.h"
-#include "util/quote.h"
 #include <subcmd/run-command.h>
 #include "util/parse-events.h"
 #include <subcmd/parse-options.h>
@@ -238,7 +237,7 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
 			(*argc)--;
 		} else if (strstarts(cmd, CMD_DEBUGFS_DIR)) {
 			tracing_path_set(cmd + strlen(CMD_DEBUGFS_DIR));
-			fprintf(stderr, "dir: %s\n", tracing_path);
+			fprintf(stderr, "dir: %s\n", tracing_path_mount());
 			if (envchanged)
 				*envchanged = 1;
 		} else if (!strcmp(cmd, "--list-cmds")) {
@@ -421,22 +420,11 @@ void pthread__unblock_sigwinch(void)
 	pthread_sigmask(SIG_UNBLOCK, &set, NULL);
 }
 
-#ifdef _SC_LEVEL1_DCACHE_LINESIZE
-#define cache_line_size(cacheline_sizep) *cacheline_sizep = sysconf(_SC_LEVEL1_DCACHE_LINESIZE)
-#else
-static void cache_line_size(int *cacheline_sizep)
-{
-	if (sysfs__read_int("devices/system/cpu/cpu0/cache/index0/coherency_line_size", cacheline_sizep))
-		pr_debug("cannot determine cache line size");
-}
-#endif
-
 int main(int argc, const char **argv)
 {
 	int err;
 	const char *cmd;
 	char sbuf[STRERR_BUFSIZE];
-	int value;
 
 	/* libsubcmd init */
 	exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
@@ -444,13 +432,6 @@ int main(int argc, const char **argv)
 
 	/* The page_size is placed in util object. */
 	page_size = sysconf(_SC_PAGE_SIZE);
-	cache_line_size(&cacheline_size);
-
-	if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
-		sysctl_perf_event_max_stack = value;
-
-	if (sysctl__read_int("kernel/perf_event_max_contexts_per_stack", &value) == 0)
-		sysctl_perf_event_max_contexts_per_stack = value;
 
 	cmd = extract_argv0_path(argv[0]);
 	if (!cmd)
@@ -458,15 +439,11 @@ int main(int argc, const char **argv)
 
 	srandom(time(NULL));
 
-	perf_config__init();
 	err = perf_config(perf_default_config, NULL);
 	if (err)
 		return err;
 	set_buildid_dir(NULL);
 
-	/* get debugfs/tracefs mount point from /proc/mounts */
-	tracing_path_mount();
-
 	/*
 	 * "perf-xxxx" is the same as "perf xxxx", but we obviously:
 	 *
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index a1a97956136f..0ed4a34c74c4 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -5,6 +5,7 @@
 #include <time.h>
 #include <stdbool.h>
 #include <linux/types.h>
+#include <linux/stddef.h>
 #include <linux/perf_event.h>
 
 extern bool test_attr__enabled;
@@ -24,7 +25,9 @@ static inline unsigned long long rdclock(void)
 	return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
 }
 
+#ifndef MAX_NR_CPUS
 #define MAX_NR_CPUS			1024
+#endif
 
 extern const char *input_name;
 extern bool perf_host, perf_guest;
@@ -78,6 +81,7 @@ struct record_opts {
 	unsigned     initial_delay;
 	bool         use_clockid;
 	clockid_t    clockid;
+	u64          clockid_res_ns;
 	unsigned int proc_map_timeout;
 };
 
diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build
index 17783913d330..215ba30b8534 100644
--- a/tools/perf/pmu-events/Build
+++ b/tools/perf/pmu-events/Build
@@ -1,7 +1,7 @@
 hostprogs := jevents
 
 jevents-y	+= json.o jsmn.o jevents.o
-CHOSTFLAGS_jevents.o	= -I$(srctree)/tools/include
+HOSTCFLAGS_jevents.o	= -I$(srctree)/tools/include
 pmu-events-y	+= pmu-events.o
 JDIR		=  pmu-events/arch/$(SRCARCH)
 JSON		=  $(shell [ -d $(JDIR) ] &&				\
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/branch.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/branch.json
new file mode 100644
index 000000000000..abc98b018446
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/branch.json
@@ -0,0 +1,23 @@
+[
+    {
+        "ArchStdEvent": "BR_IMMED_SPEC",
+    },
+    {
+        "ArchStdEvent": "BR_RETURN_SPEC",
+    },
+    {
+        "ArchStdEvent": "BR_INDIRECT_SPEC",
+    },
+    {
+        "PublicDescription": "Mispredicted or not predicted branch speculatively executed",
+        "EventCode": "0x10",
+        "EventName": "BR_MIS_PRED",
+        "BriefDescription": "Branch mispredicted"
+    },
+    {
+        "PublicDescription": "Predictable branch speculatively executed",
+        "EventCode": "0x12",
+        "EventName": "BR_PRED",
+        "BriefDescription": "Predictable branch"
+    },
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json
new file mode 100644
index 000000000000..687b2629e1d1
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json
@@ -0,0 +1,26 @@
+[
+    {
+        "ArchStdEvent": "BUS_ACCESS_RD",
+    },
+    {
+        "ArchStdEvent": "BUS_ACCESS_WR",
+    },
+    {
+        "ArchStdEvent": "BUS_ACCESS_SHARED",
+    },
+    {
+        "ArchStdEvent": "BUS_ACCESS_NOT_SHARED",
+    },
+    {
+        "ArchStdEvent": "BUS_ACCESS_NORMAL",
+    },
+    {
+        "ArchStdEvent": "BUS_ACCESS_PERIPH",
+    },
+    {
+        "PublicDescription": "Bus access",
+        "EventCode": "0x19",
+        "EventName": "BUS_ACCESS",
+        "BriefDescription": "Bus access"
+    },
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json
new file mode 100644
index 000000000000..df9201434cb6
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json
@@ -0,0 +1,191 @@
+[
+    {
+        "ArchStdEvent": "L1D_CACHE_RD",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WR",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_RD",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_INVAL",
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL_RD",
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL_WR",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_RD",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WR",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_RD",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_WR",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WB_VICTIM",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WB_CLEAN",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_INVAL",
+    },
+    {
+        "PublicDescription": "Level 1 instruction cache refill",
+        "EventCode": "0x01",
+        "EventName": "L1I_CACHE_REFILL",
+        "BriefDescription": "L1I cache refill"
+    },
+    {
+        "PublicDescription": "Level 1 instruction TLB refill",
+        "EventCode": "0x02",
+        "EventName": "L1I_TLB_REFILL",
+        "BriefDescription": "L1I TLB refill"
+    },
+    {
+        "PublicDescription": "Level 1 data cache refill",
+        "EventCode": "0x03",
+        "EventName": "L1D_CACHE_REFILL",
+        "BriefDescription": "L1D cache refill"
+    },
+    {
+        "PublicDescription": "Level 1 data cache access",
+        "EventCode": "0x04",
+        "EventName": "L1D_CACHE_ACCESS",
+        "BriefDescription": "L1D cache access"
+    },
+    {
+        "PublicDescription": "Level 1 data TLB refill",
+        "EventCode": "0x05",
+        "EventName": "L1D_TLB_REFILL",
+        "BriefDescription": "L1D TLB refill"
+    },
+    {
+        "PublicDescription": "Level 1 instruction cache access",
+        "EventCode": "0x14",
+        "EventName": "L1I_CACHE_ACCESS",
+        "BriefDescription": "L1I cache access"
+    },
+    {
+        "PublicDescription": "Level 2 data cache access",
+        "EventCode": "0x16",
+        "EventName": "L2D_CACHE_ACCESS",
+        "BriefDescription": "L2D cache access"
+    },
+    {
+        "PublicDescription": "Level 2 data refill",
+        "EventCode": "0x17",
+        "EventName": "L2D_CACHE_REFILL",
+        "BriefDescription": "L2D cache refill"
+    },
+    {
+        "PublicDescription": "Level 2 data cache, Write-Back",
+        "EventCode": "0x18",
+        "EventName": "L2D_CACHE_WB",
+        "BriefDescription": "L2D cache Write-Back"
+    },
+    {
+        "PublicDescription": "Level 1 data TLB access. This event counts any load or store operation which accesses the data L1 TLB",
+        "EventCode": "0x25",
+        "EventName": "L1D_TLB_ACCESS",
+        "BriefDescription": "L1D TLB access"
+    },
+    {
+        "PublicDescription": "Level 1 instruction TLB access. This event counts any instruction fetch which accesses the instruction L1 TLB",
+        "EventCode": "0x26",
+        "EventName": "L1I_TLB_ACCESS",
+        "BriefDescription": "L1I TLB access"
+    },
+    {
+        "PublicDescription": "Level 2 access to data TLB that caused a page table walk. This event counts on any data access which causes L2D_TLB_REFILL to count",
+        "EventCode": "0x34",
+        "EventName": "L2D_TLB_ACCESS",
+        "BriefDescription": "L2D TLB access"
+    },
+    {
+        "PublicDescription": "Level 2 access to instruciton TLB that caused a page table walk. This event counts on any instruciton access which causes L2I_TLB_REFILL to count",
+        "EventCode": "0x35",
+        "EventName": "L2I_TLB_ACCESS",
+        "BriefDescription": "L2D TLB access"
+    },
+    {
+        "PublicDescription": "Branch target buffer misprediction",
+        "EventCode": "0x102",
+        "EventName": "BTB_MIS_PRED",
+        "BriefDescription": "BTB misprediction"
+    },
+    {
+        "PublicDescription": "ITB miss",
+        "EventCode": "0x103",
+        "EventName": "ITB_MISS",
+        "BriefDescription": "ITB miss"
+    },
+    {
+        "PublicDescription": "DTB miss",
+        "EventCode": "0x104",
+        "EventName": "DTB_MISS",
+        "BriefDescription": "DTB miss"
+    },
+    {
+        "PublicDescription": "Level 1 data cache late miss",
+        "EventCode": "0x105",
+        "EventName": "L1D_CACHE_LATE_MISS",
+        "BriefDescription": "L1D cache late miss"
+    },
+    {
+        "PublicDescription": "Level 1 data cache prefetch request",
+        "EventCode": "0x106",
+        "EventName": "L1D_CACHE_PREFETCH",
+        "BriefDescription": "L1D cache prefetch"
+    },
+    {
+        "PublicDescription": "Level 2 data cache prefetch request",
+        "EventCode": "0x107",
+        "EventName": "L2D_CACHE_PREFETCH",
+        "BriefDescription": "L2D cache prefetch"
+    },
+    {
+        "PublicDescription": "Level 1 stage 2 TLB refill",
+        "EventCode": "0x111",
+        "EventName": "L1_STAGE2_TLB_REFILL",
+        "BriefDescription": "L1 stage 2 TLB refill"
+    },
+    {
+        "PublicDescription": "Page walk cache level-0 stage-1 hit",
+        "EventCode": "0x112",
+        "EventName": "PAGE_WALK_L0_STAGE1_HIT",
+        "BriefDescription": "Page walk, L0 stage-1 hit"
+    },
+    {
+        "PublicDescription": "Page walk cache level-1 stage-1 hit",
+        "EventCode": "0x113",
+        "EventName": "PAGE_WALK_L1_STAGE1_HIT",
+        "BriefDescription": "Page walk, L1 stage-1 hit"
+    },
+    {
+        "PublicDescription": "Page walk cache level-2 stage-1 hit",
+        "EventCode": "0x114",
+        "EventName": "PAGE_WALK_L2_STAGE1_HIT",
+        "BriefDescription": "Page walk, L2 stage-1 hit"
+    },
+    {
+        "PublicDescription": "Page walk cache level-1 stage-2 hit",
+        "EventCode": "0x115",
+        "EventName": "PAGE_WALK_L1_STAGE2_HIT",
+        "BriefDescription": "Page walk, L1 stage-2 hit"
+    },
+    {
+        "PublicDescription": "Page walk cache level-2 stage-2 hit",
+        "EventCode": "0x116",
+        "EventName": "PAGE_WALK_L2_STAGE2_HIT",
+        "BriefDescription": "Page walk, L2 stage-2 hit"
+    },
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json
new file mode 100644
index 000000000000..38cd1f1a70dc
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json
@@ -0,0 +1,20 @@
+[
+    {
+        "PublicDescription": "The number of core clock cycles",
+        "EventCode": "0x11",
+        "EventName": "CPU_CYCLES",
+        "BriefDescription": "Clock cycles"
+    },
+    {
+        "PublicDescription": "FSU clocking gated off cycle",
+        "EventCode": "0x101",
+        "EventName": "FSU_CLOCK_OFF_CYCLES",
+        "BriefDescription": "FSU clocking gated off cycle"
+    },
+    {
+        "PublicDescription": "Wait state cycle",
+        "EventCode": "0x110",
+        "EventName": "Wait_CYCLES",
+        "BriefDescription": "Wait state cycle"
+    },
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json
new file mode 100644
index 000000000000..3720dc28a15f
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json
@@ -0,0 +1,50 @@
+[
+    {
+        "ArchStdEvent": "EXC_UNDEF",
+    },
+    {
+        "ArchStdEvent": "EXC_SVC",
+    },
+    {
+        "ArchStdEvent": "EXC_PABORT",
+    },
+    {
+        "ArchStdEvent": "EXC_DABORT",
+    },
+    {
+        "ArchStdEvent": "EXC_IRQ",
+    },
+    {
+        "ArchStdEvent": "EXC_FIQ",
+    },
+    {
+        "ArchStdEvent": "EXC_HVC",
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_PABORT",
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_DABORT",
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_OTHER",
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_IRQ",
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_FIQ",
+    },
+    {
+        "PublicDescription": "Exception taken",
+        "EventCode": "0x09",
+        "EventName": "EXC_TAKEN",
+        "BriefDescription": "Exception taken"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, condition check pass, exception return",
+        "EventCode": "0x0a",
+        "EventName": "EXC_RETURN",
+        "BriefDescription": "Exception return"
+    },
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json
new file mode 100644
index 000000000000..82cf753e6472
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json
@@ -0,0 +1,89 @@
+[
+    {
+        "ArchStdEvent": "LD_SPEC",
+    },
+    {
+        "ArchStdEvent": "ST_SPEC",
+    },
+    {
+        "ArchStdEvent": "LDST_SPEC",
+    },
+    {
+        "ArchStdEvent": "DP_SPEC",
+    },
+    {
+        "ArchStdEvent": "ASE_SPEC",
+    },
+    {
+        "ArchStdEvent": "VFP_SPEC",
+    },
+    {
+        "ArchStdEvent": "PC_WRITE_SPEC",
+    },
+    {
+        "ArchStdEvent": "CRYPTO_SPEC",
+    },
+    {
+        "ArchStdEvent": "ISB_SPEC",
+    },
+    {
+        "ArchStdEvent": "DSB_SPEC",
+    },
+    {
+        "ArchStdEvent": "DMB_SPEC",
+    },
+    {
+        "ArchStdEvent": "RC_LD_SPEC",
+    },
+    {
+        "ArchStdEvent": "RC_ST_SPEC",
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, software increment",
+        "EventCode": "0x00",
+        "EventName": "SW_INCR",
+        "BriefDescription": "Software increment"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed",
+        "EventCode": "0x08",
+        "EventName": "INST_RETIRED",
+        "BriefDescription": "Instruction retired"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR",
+        "EventCode": "0x0b",
+        "EventName": "CID_WRITE_RETIRED",
+        "BriefDescription": "Write to CONTEXTIDR"
+    },
+    {
+        "PublicDescription": "Operation speculatively executed",
+        "EventCode": "0x1b",
+        "EventName": "INST_SPEC",
+        "BriefDescription": "Speculatively executed"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed (condition check pass), write to TTBR",
+        "EventCode": "0x1c",
+        "EventName": "TTBR_WRITE_RETIRED",
+        "BriefDescription": "Instruction executed, TTBR write"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, branch. This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches",
+        "EventCode": "0x21",
+        "EventName": "BR_RETIRED",
+        "BriefDescription": "Branch retired"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, mispredicted branch. This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush",
+        "EventCode": "0x22",
+        "EventName": "BR_MISPRED_RETIRED",
+        "BriefDescription": "Mispredicted branch retired"
+    },
+    {
+        "PublicDescription": "Operation speculatively executed, NOP",
+        "EventCode": "0x100",
+        "EventName": "NOP_SPEC",
+        "BriefDescription": "Speculatively executed, NOP"
+    },
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/intrinsic.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/intrinsic.json
new file mode 100644
index 000000000000..2aecc5c2347d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/intrinsic.json
@@ -0,0 +1,14 @@
+[
+    {
+        "ArchStdEvent": "LDREX_SPEC",
+    },
+    {
+        "ArchStdEvent": "STREX_PASS_SPEC",
+    },
+    {
+        "ArchStdEvent": "STREX_FAIL_SPEC",
+    },
+    {
+        "ArchStdEvent": "STREX_SPEC",
+    },
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json
new file mode 100644
index 000000000000..08508697b318
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json
@@ -0,0 +1,29 @@
+[
+    {
+        "ArchStdEvent": "MEM_ACCESS_RD",
+    },
+    {
+        "ArchStdEvent": "MEM_ACCESS_WR",
+    },
+    {
+        "ArchStdEvent": "UNALIGNED_LD_SPEC",
+    },
+    {
+        "ArchStdEvent": "UNALIGNED_ST_SPEC",
+    },
+    {
+        "ArchStdEvent": "UNALIGNED_LDST_SPEC",
+    },
+    {
+        "PublicDescription": "Data memory access",
+        "EventCode": "0x13",
+        "EventName": "MEM_ACCESS",
+        "BriefDescription": "Memory access"
+    },
+    {
+        "PublicDescription": "Local memory error. This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs",
+        "EventCode": "0x1a",
+        "EventName": "MEM_ERROR",
+        "BriefDescription": "Memory error"
+    },
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/pipeline.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/pipeline.json
new file mode 100644
index 000000000000..e2087de586bf
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/pipeline.json
@@ -0,0 +1,50 @@
+[
+    {
+        "PublicDescription": "Decode starved for instruction cycle",
+        "EventCode": "0x108",
+        "EventName": "DECODE_STALL",
+        "BriefDescription": "Decode starved"
+    },
+    {
+        "PublicDescription": "Op dispatch stalled cycle",
+        "EventCode": "0x109",
+        "EventName": "DISPATCH_STALL",
+        "BriefDescription": "Dispatch stalled"
+    },
+    {
+        "PublicDescription": "IXA Op non-issue",
+        "EventCode": "0x10a",
+        "EventName": "IXA_STALL",
+        "BriefDescription": "IXA stalled"
+    },
+    {
+        "PublicDescription": "IXB Op non-issue",
+        "EventCode": "0x10b",
+        "EventName": "IXB_STALL",
+        "BriefDescription": "IXB stalled"
+    },
+    {
+        "PublicDescription": "BX Op non-issue",
+        "EventCode": "0x10c",
+        "EventName": "BX_STALL",
+        "BriefDescription": "BX stalled"
+    },
+    {
+        "PublicDescription": "LX Op non-issue",
+        "EventCode": "0x10d",
+        "EventName": "LX_STALL",
+        "BriefDescription": "LX stalled"
+    },
+    {
+        "PublicDescription": "SX Op non-issue",
+        "EventCode": "0x10e",
+        "EventName": "SX_STALL",
+        "BriefDescription": "SX stalled"
+    },
+    {
+        "PublicDescription": "FX Op non-issue",
+        "EventCode": "0x10f",
+        "EventName": "FX_STALL",
+        "BriefDescription": "FX stalled"
+    },
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json
index bc03c06c3918..752e47eb6977 100644
--- a/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json
+++ b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json
@@ -12,6 +12,21 @@
         "ArchStdEvent": "L1D_CACHE_REFILL_WR",
     },
     {
+        "ArchStdEvent": "L1D_CACHE_REFILL_INNER",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_OUTER",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WB_VICTIM",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WB_CLEAN",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_INVAL",
+    },
+    {
         "ArchStdEvent": "L1D_TLB_REFILL_RD",
     },
     {
@@ -24,9 +39,75 @@
         "ArchStdEvent": "L1D_TLB_WR",
     },
     {
+        "ArchStdEvent": "L2D_TLB_REFILL_RD",
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_REFILL_WR",
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_RD",
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_WR",
+    },
+    {
         "ArchStdEvent": "BUS_ACCESS_RD",
-   },
-   {
+    },
+    {
         "ArchStdEvent": "BUS_ACCESS_WR",
-   }
+    },
+    {
+        "ArchStdEvent": "MEM_ACCESS_RD",
+    },
+    {
+        "ArchStdEvent": "MEM_ACCESS_WR",
+    },
+    {
+        "ArchStdEvent": "UNALIGNED_LD_SPEC",
+    },
+    {
+        "ArchStdEvent": "UNALIGNED_ST_SPEC",
+    },
+    {
+        "ArchStdEvent": "UNALIGNED_LDST_SPEC",
+    },
+    {
+        "ArchStdEvent": "EXC_UNDEF",
+    },
+    {
+        "ArchStdEvent": "EXC_SVC",
+    },
+    {
+        "ArchStdEvent": "EXC_PABORT",
+    },
+    {
+        "ArchStdEvent": "EXC_DABORT",
+    },
+    {
+        "ArchStdEvent": "EXC_IRQ",
+    },
+    {
+        "ArchStdEvent": "EXC_FIQ",
+    },
+    {
+        "ArchStdEvent": "EXC_SMC",
+    },
+    {
+        "ArchStdEvent": "EXC_HVC",
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_PABORT",
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_DABORT",
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_OTHER",
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_IRQ",
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_FIQ",
+    }
 ]
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
index f03e26ecb658..59cd8604b0bd 100644
--- a/tools/perf/pmu-events/arch/arm64/mapfile.csv
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -16,3 +16,4 @@
 0x00000000420f5160,v1,cavium/thunderx2,core
 0x00000000430f0af0,v1,cavium/thunderx2,core
 0x00000000480fd010,v1,hisilicon/hip08,core
+0x00000000500f0000,v1,ampere/emag,core
diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/basic.json b/tools/perf/pmu-events/arch/s390/cf_z10/basic.json
index 8bf16759ca53..2dd8dafff2ef 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z10/basic.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z10/basic.json
@@ -1,71 +1,83 @@
 [
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "0",
 		"EventName": "CPU_CYCLES",
 		"BriefDescription": "CPU Cycles",
 		"PublicDescription": "Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "1",
 		"EventName": "INSTRUCTIONS",
 		"BriefDescription": "Instructions",
 		"PublicDescription": "Instruction Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "2",
 		"EventName": "L1I_DIR_WRITES",
 		"BriefDescription": "L1I Directory Writes",
 		"PublicDescription": "Level-1 I-Cache Directory Write Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "3",
 		"EventName": "L1I_PENALTY_CYCLES",
 		"BriefDescription": "L1I Penalty Cycles",
 		"PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "4",
 		"EventName": "L1D_DIR_WRITES",
 		"BriefDescription": "L1D Directory Writes",
 		"PublicDescription": "Level-1 D-Cache Directory Write Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "5",
 		"EventName": "L1D_PENALTY_CYCLES",
 		"BriefDescription": "L1D Penalty Cycles",
 		"PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "32",
 		"EventName": "PROBLEM_STATE_CPU_CYCLES",
 		"BriefDescription": "Problem-State CPU Cycles",
 		"PublicDescription": "Problem-State Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "33",
 		"EventName": "PROBLEM_STATE_INSTRUCTIONS",
 		"BriefDescription": "Problem-State Instructions",
 		"PublicDescription": "Problem-State Instruction Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "34",
 		"EventName": "PROBLEM_STATE_L1I_DIR_WRITES",
 		"BriefDescription": "Problem-State L1I Directory Writes",
 		"PublicDescription": "Problem-State Level-1 I-Cache Directory Write Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "35",
 		"EventName": "PROBLEM_STATE_L1I_PENALTY_CYCLES",
 		"BriefDescription": "Problem-State L1I Penalty Cycles",
 		"PublicDescription": "Problem-State Level-1 I-Cache Penalty Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "36",
 		"EventName": "PROBLEM_STATE_L1D_DIR_WRITES",
 		"BriefDescription": "Problem-State L1D Directory Writes",
 		"PublicDescription": "Problem-State Level-1 D-Cache Directory Write Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "37",
 		"EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES",
 		"BriefDescription": "Problem-State L1D Penalty Cycles",
diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json
index 7e5b72492141..db286f19e7b6 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json
@@ -1,95 +1,111 @@
 [
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "64",
 		"EventName": "PRNG_FUNCTIONS",
 		"BriefDescription": "PRNG Functions",
 		"PublicDescription": "Total number of the PRNG functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "65",
 		"EventName": "PRNG_CYCLES",
 		"BriefDescription": "PRNG Cycles",
 		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "66",
 		"EventName": "PRNG_BLOCKED_FUNCTIONS",
 		"BriefDescription": "PRNG Blocked Functions",
 		"PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "67",
 		"EventName": "PRNG_BLOCKED_CYCLES",
 		"BriefDescription": "PRNG Blocked Cycles",
 		"PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "68",
 		"EventName": "SHA_FUNCTIONS",
 		"BriefDescription": "SHA Functions",
 		"PublicDescription": "Total number of SHA functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "69",
 		"EventName": "SHA_CYCLES",
 		"BriefDescription": "SHA Cycles",
 		"PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "70",
 		"EventName": "SHA_BLOCKED_FUNCTIONS",
 		"BriefDescription": "SHA Blocked Functions",
 		"PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "71",
 		"EventName": "SHA_BLOCKED_CYCLES",
 		"BriefDescription": "SHA Bloced Cycles",
 		"PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "72",
 		"EventName": "DEA_FUNCTIONS",
 		"BriefDescription": "DEA Functions",
 		"PublicDescription": "Total number of the DEA functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "73",
 		"EventName": "DEA_CYCLES",
 		"BriefDescription": "DEA Cycles",
 		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "74",
 		"EventName": "DEA_BLOCKED_FUNCTIONS",
 		"BriefDescription": "DEA Blocked Functions",
 		"PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "75",
 		"EventName": "DEA_BLOCKED_CYCLES",
 		"BriefDescription": "DEA Blocked Cycles",
 		"PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "76",
 		"EventName": "AES_FUNCTIONS",
 		"BriefDescription": "AES Functions",
 		"PublicDescription": "Total number of AES functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "77",
 		"EventName": "AES_CYCLES",
 		"BriefDescription": "AES Cycles",
 		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "78",
 		"EventName": "AES_BLOCKED_FUNCTIONS",
 		"BriefDescription": "AES Blocked Functions",
 		"PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "79",
 		"EventName": "AES_BLOCKED_CYCLES",
 		"BriefDescription": "AES Blocked Cycles",
diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/extended.json b/tools/perf/pmu-events/arch/s390/cf_z10/extended.json
index 0feedb40f30f..b6b7f29ca831 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z10/extended.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z10/extended.json
@@ -1,107 +1,125 @@
 [
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "128",
 		"EventName": "L1I_L2_SOURCED_WRITES",
 		"BriefDescription": "L1I L2 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from the Level-2 (L1.5) cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "129",
 		"EventName": "L1D_L2_SOURCED_WRITES",
 		"BriefDescription": "L1D L2 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the installed cache line was sourced from the Level-2 (L1.5) cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "130",
 		"EventName": "L1I_L3_LOCAL_WRITES",
 		"BriefDescription": "L1I L3 Local Writes",
 		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the installed cache line was sourced from the Level-3 cache that is on the same book as the Instruction cache (Local L2 cache)"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "131",
 		"EventName": "L1D_L3_LOCAL_WRITES",
 		"BriefDescription": "L1D L3 Local Writes",
 		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the installtion cache line was source from the Level-3 cache that is on the same book as the Data cache (Local L2 cache)"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "132",
 		"EventName": "L1I_L3_REMOTE_WRITES",
 		"BriefDescription": "L1I L3 Remote Writes",
 		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the installed cache line was sourced from a Level-3 cache that is not on the same book as the Instruction cache (Remote L2 cache)"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "133",
 		"EventName": "L1D_L3_REMOTE_WRITES",
 		"BriefDescription": "L1D L3 Remote Writes",
 		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the installed cache line was sourced from a Level-3 cache that is not on the same book as the Data cache (Remote L2 cache)"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "134",
 		"EventName": "L1D_LMEM_SOURCED_WRITES",
 		"BriefDescription": "L1D Local Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the installed cache line was sourced from memory that is attached to the same book as the Data cache (Local Memory)"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "135",
 		"EventName": "L1I_LMEM_SOURCED_WRITES",
 		"BriefDescription": "L1I Local Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 I-Cache where the installed cache line was sourced from memory that is attached to the s ame book as the Instruction cache (Local Memory)"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "136",
 		"EventName": "L1D_RO_EXCL_WRITES",
 		"BriefDescription": "L1D Read-only Exclusive Writes",
 		"PublicDescription": "A directory write to the Level-1 D-Cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "137",
 		"EventName": "L1I_CACHELINE_INVALIDATES",
 		"BriefDescription": "L1I Cacheline Invalidates",
 		"PublicDescription": "A cache line in the Level-1 I-Cache has been invalidated by a store on the same CPU as the Level-1 I-Cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "138",
 		"EventName": "ITLB1_WRITES",
 		"BriefDescription": "ITLB1 Writes",
 		"PublicDescription": "A translation entry has been written into the Level-1 Instruction Translation Lookaside Buffer"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "139",
 		"EventName": "DTLB1_WRITES",
 		"BriefDescription": "DTLB1 Writes",
 		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "140",
 		"EventName": "TLB2_PTE_WRITES",
 		"BriefDescription": "TLB2 PTE Writes",
 		"PublicDescription": "A translation entry has been written to the Level-2 TLB Page Table Entry arrays"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "141",
 		"EventName": "TLB2_CRSTE_WRITES",
 		"BriefDescription": "TLB2 CRSTE Writes",
 		"PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "142",
 		"EventName": "TLB2_CRSTE_HPAGE_WRITES",
 		"BriefDescription": "TLB2 CRSTE One-Megabyte Page Writes",
 		"PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays for a one-megabyte large page translation"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "145",
 		"EventName": "ITLB1_MISSES",
 		"BriefDescription": "ITLB1 Misses",
 		"PublicDescription": "Level-1 Instruction TLB miss in progress. Incremented by one for every cycle an ITLB1 miss is in progress"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "146",
 		"EventName": "DTLB1_MISSES",
 		"BriefDescription": "DTLB1 Misses",
 		"PublicDescription": "Level-1 Data TLB miss in progress. Incremented by one for every cycle an DTLB1 miss is in progress"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "147",
 		"EventName": "L2C_STORES_SENT",
 		"BriefDescription": "L2C Stores Sent",
diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/basic.json b/tools/perf/pmu-events/arch/s390/cf_z13/basic.json
index 8bf16759ca53..2dd8dafff2ef 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z13/basic.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z13/basic.json
@@ -1,71 +1,83 @@
 [
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "0",
 		"EventName": "CPU_CYCLES",
 		"BriefDescription": "CPU Cycles",
 		"PublicDescription": "Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "1",
 		"EventName": "INSTRUCTIONS",
 		"BriefDescription": "Instructions",
 		"PublicDescription": "Instruction Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "2",
 		"EventName": "L1I_DIR_WRITES",
 		"BriefDescription": "L1I Directory Writes",
 		"PublicDescription": "Level-1 I-Cache Directory Write Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "3",
 		"EventName": "L1I_PENALTY_CYCLES",
 		"BriefDescription": "L1I Penalty Cycles",
 		"PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "4",
 		"EventName": "L1D_DIR_WRITES",
 		"BriefDescription": "L1D Directory Writes",
 		"PublicDescription": "Level-1 D-Cache Directory Write Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "5",
 		"EventName": "L1D_PENALTY_CYCLES",
 		"BriefDescription": "L1D Penalty Cycles",
 		"PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "32",
 		"EventName": "PROBLEM_STATE_CPU_CYCLES",
 		"BriefDescription": "Problem-State CPU Cycles",
 		"PublicDescription": "Problem-State Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "33",
 		"EventName": "PROBLEM_STATE_INSTRUCTIONS",
 		"BriefDescription": "Problem-State Instructions",
 		"PublicDescription": "Problem-State Instruction Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "34",
 		"EventName": "PROBLEM_STATE_L1I_DIR_WRITES",
 		"BriefDescription": "Problem-State L1I Directory Writes",
 		"PublicDescription": "Problem-State Level-1 I-Cache Directory Write Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "35",
 		"EventName": "PROBLEM_STATE_L1I_PENALTY_CYCLES",
 		"BriefDescription": "Problem-State L1I Penalty Cycles",
 		"PublicDescription": "Problem-State Level-1 I-Cache Penalty Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "36",
 		"EventName": "PROBLEM_STATE_L1D_DIR_WRITES",
 		"BriefDescription": "Problem-State L1D Directory Writes",
 		"PublicDescription": "Problem-State Level-1 D-Cache Directory Write Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "37",
 		"EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES",
 		"BriefDescription": "Problem-State L1D Penalty Cycles",
diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json
index 7e5b72492141..db286f19e7b6 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json
@@ -1,95 +1,111 @@
 [
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "64",
 		"EventName": "PRNG_FUNCTIONS",
 		"BriefDescription": "PRNG Functions",
 		"PublicDescription": "Total number of the PRNG functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "65",
 		"EventName": "PRNG_CYCLES",
 		"BriefDescription": "PRNG Cycles",
 		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "66",
 		"EventName": "PRNG_BLOCKED_FUNCTIONS",
 		"BriefDescription": "PRNG Blocked Functions",
 		"PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "67",
 		"EventName": "PRNG_BLOCKED_CYCLES",
 		"BriefDescription": "PRNG Blocked Cycles",
 		"PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "68",
 		"EventName": "SHA_FUNCTIONS",
 		"BriefDescription": "SHA Functions",
 		"PublicDescription": "Total number of SHA functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "69",
 		"EventName": "SHA_CYCLES",
 		"BriefDescription": "SHA Cycles",
 		"PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "70",
 		"EventName": "SHA_BLOCKED_FUNCTIONS",
 		"BriefDescription": "SHA Blocked Functions",
 		"PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "71",
 		"EventName": "SHA_BLOCKED_CYCLES",
 		"BriefDescription": "SHA Bloced Cycles",
 		"PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "72",
 		"EventName": "DEA_FUNCTIONS",
 		"BriefDescription": "DEA Functions",
 		"PublicDescription": "Total number of the DEA functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "73",
 		"EventName": "DEA_CYCLES",
 		"BriefDescription": "DEA Cycles",
 		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "74",
 		"EventName": "DEA_BLOCKED_FUNCTIONS",
 		"BriefDescription": "DEA Blocked Functions",
 		"PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "75",
 		"EventName": "DEA_BLOCKED_CYCLES",
 		"BriefDescription": "DEA Blocked Cycles",
 		"PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "76",
 		"EventName": "AES_FUNCTIONS",
 		"BriefDescription": "AES Functions",
 		"PublicDescription": "Total number of AES functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "77",
 		"EventName": "AES_CYCLES",
 		"BriefDescription": "AES Cycles",
 		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "78",
 		"EventName": "AES_BLOCKED_FUNCTIONS",
 		"BriefDescription": "AES Blocked Functions",
 		"PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "79",
 		"EventName": "AES_BLOCKED_CYCLES",
 		"BriefDescription": "AES Blocked Cycles",
diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/extended.json b/tools/perf/pmu-events/arch/s390/cf_z13/extended.json
index 9a002b6967f1..436ce33f1182 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z13/extended.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z13/extended.json
@@ -1,335 +1,391 @@
 [
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "128",
 		"EventName": "L1D_RO_EXCL_WRITES",
 		"BriefDescription": "L1D Read-only Exclusive Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line."
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "129",
 		"EventName": "DTLB1_WRITES",
 		"BriefDescription": "DTLB1 Writes",
 		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "130",
 		"EventName": "DTLB1_MISSES",
 		"BriefDescription": "DTLB1 Misses",
 		"PublicDescription": "Level-1 Data TLB miss in progress. Incremented by one for every cycle a DTLB1 miss is in progress."
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "131",
 		"EventName": "DTLB1_HPAGE_WRITES",
 		"BriefDescription": "DTLB1 One-Megabyte Page Writes",
 		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a one-megabyte page"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "132",
 		"EventName": "DTLB1_GPAGE_WRITES",
 		"BriefDescription": "DTLB1 Two-Gigabyte Page Writes",
 		"PublicDescription": "Counter:132	Name:DTLB1_GPAGE_WRITES A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a two-gigabyte page."
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "133",
 		"EventName": "L1D_L2D_SOURCED_WRITES",
 		"BriefDescription": "L1D L2D Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "134",
 		"EventName": "ITLB1_WRITES",
 		"BriefDescription": "ITLB1 Writes",
 		"PublicDescription": "A translation entry has been written to the Level-1 Instruction Translation Lookaside Buffer"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "135",
 		"EventName": "ITLB1_MISSES",
 		"BriefDescription": "ITLB1 Misses",
 		"PublicDescription": "Level-1 Instruction TLB miss in progress. Incremented by one for every cycle an ITLB1 miss is in progress"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "136",
 		"EventName": "L1I_L2I_SOURCED_WRITES",
 		"BriefDescription": "L1I L2I Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "137",
 		"EventName": "TLB2_PTE_WRITES",
 		"BriefDescription": "TLB2 PTE Writes",
 		"PublicDescription": "A translation entry has been written to the Level-2 TLB Page Table Entry arrays"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "138",
 		"EventName": "TLB2_CRSTE_HPAGE_WRITES",
 		"BriefDescription": "TLB2 CRSTE One-Megabyte Page Writes",
 		"PublicDescription": "A translation entry has been written to the Level-2 TLB Combined Region Segment Table Entry arrays for a one-megabyte large page translation"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "139",
 		"EventName": "TLB2_CRSTE_WRITES",
 		"BriefDescription": "TLB2 CRSTE Writes",
 		"PublicDescription": "A translation entry has been written to the Level-2 TLB Combined Region Segment Table Entry arrays"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "140",
 		"EventName": "TX_C_TEND",
 		"BriefDescription": "Completed TEND instructions in constrained TX mode",
 		"PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "141",
 		"EventName": "TX_NC_TEND",
 		"BriefDescription": "Completed TEND instructions in non-constrained TX mode",
 		"PublicDescription": "A TEND instruction has completed in a non-constrained transactional-execution mode"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "143",
 		"EventName": "L1C_TLB1_MISSES",
 		"BriefDescription": "L1C TLB1 Misses",
 		"PublicDescription": "Increments by one for any cycle where a Level-1 cache or Level-1 TLB miss is in progress."
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "144",
 		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES",
 		"BriefDescription": "L1D On-Chip L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "145",
 		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "146",
 		"EventName": "L1D_ONNODE_L4_SOURCED_WRITES",
 		"BriefDescription": "L1D On-Node L4 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Node Level-4 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "147",
 		"EventName": "L1D_ONNODE_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1D On-Node L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Node Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "148",
 		"EventName": "L1D_ONNODE_L3_SOURCED_WRITES",
 		"BriefDescription": "L1D On-Node L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Node Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "149",
 		"EventName": "L1D_ONDRAWER_L4_SOURCED_WRITES",
 		"BriefDescription": "L1D On-Drawer L4 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Drawer Level-4 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "150",
 		"EventName": "L1D_ONDRAWER_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1D On-Drawer L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Drawer Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "151",
 		"EventName": "L1D_ONDRAWER_L3_SOURCED_WRITES",
 		"BriefDescription": "L1D On-Drawer L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Drawer Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "152",
 		"EventName": "L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES",
 		"BriefDescription": "L1D Off-Drawer Same-Column L4 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-4 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "153",
 		"EventName": "L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1D Off-Drawer Same-Column L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "154",
 		"EventName": "L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES",
 		"BriefDescription": "L1D Off-Drawer Same-Column L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "155",
 		"EventName": "L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES",
 		"BriefDescription": "L1D Off-Drawer Far-Column L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-4 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "156",
 		"EventName": "L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1D Off-Drawer Far-Column L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "157",
 		"EventName": "L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES",
 		"BriefDescription": "L1D Off-Drawer Far-Column L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "158",
 		"EventName": "L1D_ONNODE_MEM_SOURCED_WRITES",
 		"BriefDescription": "L1D On-Node Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Node memory"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "159",
 		"EventName": "L1D_ONDRAWER_MEM_SOURCED_WRITES",
 		"BriefDescription": "L1D On-Drawer Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer memory"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "160",
 		"EventName": "L1D_OFFDRAWER_MEM_SOURCED_WRITES",
 		"BriefDescription": "L1D Off-Drawer Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer memory"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "161",
 		"EventName": "L1D_ONCHIP_MEM_SOURCED_WRITES",
 		"BriefDescription": "L1D On-Chip Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip memory"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "162",
 		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES",
 		"BriefDescription": "L1I On-Chip L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "163",
 		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On Chip Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "164",
 		"EventName": "L1I_ONNODE_L4_SOURCED_WRITES",
 		"BriefDescription": "L1I On-Chip L4 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Node Level-4 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "165",
 		"EventName": "L1I_ONNODE_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1I On-Node L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Node Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "166",
 		"EventName": "L1I_ONNODE_L3_SOURCED_WRITES",
 		"BriefDescription": "L1I On-Node L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Node Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "167",
 		"EventName": "L1I_ONDRAWER_L4_SOURCED_WRITES",
 		"BriefDescription": "L1I On-Drawer L4 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Drawer Level-4 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "168",
 		"EventName": "L1I_ONDRAWER_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1I On-Drawer L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Drawer Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "169",
 		"EventName": "L1I_ONDRAWER_L3_SOURCED_WRITES",
 		"BriefDescription": "L1I On-Drawer L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Drawer Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "170",
 		"EventName": "L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES",
 		"BriefDescription": "L1I Off-Drawer Same-Column L4 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-4 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "171",
 		"EventName": "L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1I Off-Drawer Same-Column L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "172",
 		"EventName": "L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES",
 		"BriefDescription": "L1I Off-Drawer Same-Column L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "173",
 		"EventName": "L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES",
 		"BriefDescription": "L1I Off-Drawer Far-Column L4 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-4 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "174",
 		"EventName": "L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1I Off-Drawer Far-Column L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "175",
 		"EventName": "L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES",
 		"BriefDescription": "L1I Off-Drawer Far-Column L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "176",
 		"EventName": "L1I_ONNODE_MEM_SOURCED_WRITES",
 		"BriefDescription": "L1I On-Node Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Node memory"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "177",
 		"EventName": "L1I_ONDRAWER_MEM_SOURCED_WRITES",
 		"BriefDescription": "L1I On-Drawer Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer memory"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "178",
 		"EventName": "L1I_OFFDRAWER_MEM_SOURCED_WRITES",
 		"BriefDescription": "L1I Off-Drawer Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer memory"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "179",
 		"EventName": "L1I_ONCHIP_MEM_SOURCED_WRITES",
 		"BriefDescription": "L1I On-Chip Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Chip memory"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "218",
 		"EventName": "TX_NC_TABORT",
 		"BriefDescription": "Aborted transactions in non-constrained TX mode",
 		"PublicDescription": "A transaction abort has occurred in a non-constrained transactional-execution mode"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "219",
 		"EventName": "TX_C_TABORT_NO_SPECIAL",
 		"BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic",
 		"PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "220",
 		"EventName": "TX_C_TABORT_SPECIAL",
 		"BriefDescription": "Aborted transactions in constrained TX mode using special completion logic",
 		"PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "448",
 		"EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE",
 		"BriefDescription": "Cycle count with one thread active",
 		"PublicDescription": "Cycle count with one thread active"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "449",
 		"EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE",
 		"BriefDescription": "Cycle count with two threads active",
diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/transaction.json b/tools/perf/pmu-events/arch/s390/cf_z13/transaction.json
new file mode 100644
index 000000000000..1a0034f79f73
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z13/transaction.json
@@ -0,0 +1,7 @@
+[
+  {
+    "BriefDescription": "Transaction count",
+    "MetricName": "transaction",
+    "MetricExpr": "TX_C_TEND + TX_NC_TEND + TX_NC_TABORT + TX_C_TABORT_SPECIAL + TX_C_TABORT_NO_SPECIAL"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/basic.json b/tools/perf/pmu-events/arch/s390/cf_z14/basic.json
index 8f653c9d899d..17fb5241928b 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z14/basic.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z14/basic.json
@@ -1,47 +1,55 @@
 [
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "0",
 		"EventName": "CPU_CYCLES",
 		"BriefDescription": "CPU Cycles",
 		"PublicDescription": "Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "1",
 		"EventName": "INSTRUCTIONS",
 		"BriefDescription": "Instructions",
 		"PublicDescription": "Instruction Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "2",
 		"EventName": "L1I_DIR_WRITES",
 		"BriefDescription": "L1I Directory Writes",
 		"PublicDescription": "Level-1 I-Cache Directory Write Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "3",
 		"EventName": "L1I_PENALTY_CYCLES",
 		"BriefDescription": "L1I Penalty Cycles",
 		"PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "4",
 		"EventName": "L1D_DIR_WRITES",
 		"BriefDescription": "L1D Directory Writes",
 		"PublicDescription": "Level-1 D-Cache Directory Write Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "5",
 		"EventName": "L1D_PENALTY_CYCLES",
 		"BriefDescription": "L1D Penalty Cycles",
 		"PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "32",
 		"EventName": "PROBLEM_STATE_CPU_CYCLES",
 		"BriefDescription": "Problem-State CPU Cycles",
 		"PublicDescription": "Problem-State Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "33",
 		"EventName": "PROBLEM_STATE_INSTRUCTIONS",
 		"BriefDescription": "Problem-State Instructions",
diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json
index 7e5b72492141..db286f19e7b6 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json
@@ -1,95 +1,111 @@
 [
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "64",
 		"EventName": "PRNG_FUNCTIONS",
 		"BriefDescription": "PRNG Functions",
 		"PublicDescription": "Total number of the PRNG functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "65",
 		"EventName": "PRNG_CYCLES",
 		"BriefDescription": "PRNG Cycles",
 		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "66",
 		"EventName": "PRNG_BLOCKED_FUNCTIONS",
 		"BriefDescription": "PRNG Blocked Functions",
 		"PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "67",
 		"EventName": "PRNG_BLOCKED_CYCLES",
 		"BriefDescription": "PRNG Blocked Cycles",
 		"PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "68",
 		"EventName": "SHA_FUNCTIONS",
 		"BriefDescription": "SHA Functions",
 		"PublicDescription": "Total number of SHA functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "69",
 		"EventName": "SHA_CYCLES",
 		"BriefDescription": "SHA Cycles",
 		"PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "70",
 		"EventName": "SHA_BLOCKED_FUNCTIONS",
 		"BriefDescription": "SHA Blocked Functions",
 		"PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "71",
 		"EventName": "SHA_BLOCKED_CYCLES",
 		"BriefDescription": "SHA Bloced Cycles",
 		"PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "72",
 		"EventName": "DEA_FUNCTIONS",
 		"BriefDescription": "DEA Functions",
 		"PublicDescription": "Total number of the DEA functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "73",
 		"EventName": "DEA_CYCLES",
 		"BriefDescription": "DEA Cycles",
 		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "74",
 		"EventName": "DEA_BLOCKED_FUNCTIONS",
 		"BriefDescription": "DEA Blocked Functions",
 		"PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "75",
 		"EventName": "DEA_BLOCKED_CYCLES",
 		"BriefDescription": "DEA Blocked Cycles",
 		"PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "76",
 		"EventName": "AES_FUNCTIONS",
 		"BriefDescription": "AES Functions",
 		"PublicDescription": "Total number of AES functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "77",
 		"EventName": "AES_CYCLES",
 		"BriefDescription": "AES Cycles",
 		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "78",
 		"EventName": "AES_BLOCKED_FUNCTIONS",
 		"BriefDescription": "AES Blocked Functions",
 		"PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "79",
 		"EventName": "AES_BLOCKED_CYCLES",
 		"BriefDescription": "AES Blocked Cycles",
diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/extended.json b/tools/perf/pmu-events/arch/s390/cf_z14/extended.json
index aa4dfb46b65b..e7a3524b748f 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z14/extended.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z14/extended.json
@@ -1,317 +1,370 @@
 [
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "128",
 		"EventName": "L1D_RO_EXCL_WRITES",
 		"BriefDescription": "L1D Read-only Exclusive Writes",
 		"PublicDescription": "Counter:128	Name:L1D_RO_EXCL_WRITES A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "129",
 		"EventName": "DTLB2_WRITES",
 		"BriefDescription": "DTLB2 Writes",
 		"PublicDescription": "A translation has been written into The Translation Lookaside Buffer 2 (TLB2) and the request was made by the data cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "130",
 		"EventName": "DTLB2_MISSES",
 		"BriefDescription": "DTLB2 Misses",
 		"PublicDescription": "A TLB2 miss is in progress for a request made by the data cache. Incremented by one for every TLB2 miss in progress for the Level-1 Data cache on this cycle"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "131",
 		"EventName": "DTLB2_HPAGE_WRITES",
 		"BriefDescription": "DTLB2 One-Megabyte Page Writes",
 		"PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page or a Last Host Translation was done"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "132",
 		"EventName": "DTLB2_GPAGE_WRITES",
 		"BriefDescription": "DTLB2 Two-Gigabyte Page Writes",
 		"PublicDescription": "A translation entry for a two-gigabyte page was written into the Level-2 TLB"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "133",
 		"EventName": "L1D_L2D_SOURCED_WRITES",
 		"BriefDescription": "L1D L2D Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "134",
 		"EventName": "ITLB2_WRITES",
 		"BriefDescription": "ITLB2 Writes",
 		"PublicDescription": "A translation entry has been written into the Translation Lookaside Buffer 2 (TLB2) and the request was made by the instruction cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "135",
 		"EventName": "ITLB2_MISSES",
 		"BriefDescription": "ITLB2 Misses",
 		"PublicDescription": "A TLB2 miss is in progress for a request made by the instruction cache. Incremented by one for every TLB2 miss in progress for the Level-1 Instruction cache in a cycle"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "136",
 		"EventName": "L1I_L2I_SOURCED_WRITES",
 		"BriefDescription": "L1I L2I Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "137",
 		"EventName": "TLB2_PTE_WRITES",
 		"BriefDescription": "TLB2 PTE Writes",
 		"PublicDescription": "A translation entry was written into the Page Table Entry array in the Level-2 TLB"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "138",
 		"EventName": "TLB2_CRSTE_WRITES",
 		"BriefDescription": "TLB2 CRSTE Writes",
 		"PublicDescription": "Translation entries were written into the Combined Region and Segment Table Entry array and the Page Table Entry array in the Level-2 TLB"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "139",
 		"EventName": "TLB2_ENGINES_BUSY",
 		"BriefDescription": "TLB2 Engines Busy",
 		"PublicDescription": "The number of Level-2 TLB translation engines busy in a cycle"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "140",
 		"EventName": "TX_C_TEND",
 		"BriefDescription": "Completed TEND instructions in constrained TX mode",
 		"PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "141",
 		"EventName": "TX_NC_TEND",
 		"BriefDescription": "Completed TEND instructions in non-constrained TX mode",
 		"PublicDescription": "A TEND instruction has completed in a non-constrained transactional-execution mode"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "143",
 		"EventName": "L1C_TLB2_MISSES",
 		"BriefDescription": "L1C TLB2 Misses",
 		"PublicDescription": "Increments by one for any cycle where a level-1 cache or level-2 TLB miss is in progress"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "144",
 		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES",
 		"BriefDescription": "L1D On-Chip L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "145",
 		"EventName": "L1D_ONCHIP_MEMORY_SOURCED_WRITES",
 		"BriefDescription": "L1D On-Chip Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip memory"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "146",
 		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "147",
 		"EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES",
 		"BriefDescription": "L1D On-Cluster L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Cluster Level-3 cache withountervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "148",
 		"EventName": "L1D_ONCLUSTER_MEMORY_SOURCED_WRITES",
 		"BriefDescription": "L1D On-Cluster Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster memory"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "149",
 		"EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1D On-Cluster L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "150",
 		"EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES",
 		"BriefDescription": "L1D Off-Cluster L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "151",
 		"EventName": "L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES",
 		"BriefDescription": "L1D Off-Cluster Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Cluster memory"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "152",
 		"EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1D Off-Cluster L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "153",
 		"EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES",
 		"BriefDescription": "L1D Off-Drawer L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "154",
 		"EventName": "L1D_OFFDRAWER_MEMORY_SOURCED_WRITES",
 		"BriefDescription": "L1D Off-Drawer Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer memory"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "155",
 		"EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1D Off-Drawer L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "156",
 		"EventName": "L1D_ONDRAWER_L4_SOURCED_WRITES",
 		"BriefDescription": "L1D On-Drawer L4 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer Level-4 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "157",
 		"EventName": "L1D_OFFDRAWER_L4_SOURCED_WRITES",
 		"BriefDescription": "L1D Off-Drawer L4 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "158",
 		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_RO",
 		"BriefDescription": "L1D On-Chip L3 Sourced Writes read-only",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip L3 but a read-only invalidate was done to remove other copies of the cache line"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "162",
 		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES",
 		"BriefDescription": "L1I On-Chip L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "163",
 		"EventName": "L1I_ONCHIP_MEMORY_SOURCED_WRITES",
 		"BriefDescription": "L1I On-Chip Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from On-Chip memory"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "164",
 		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "165",
 		"EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES",
 		"BriefDescription": "L1I On-Cluster L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "166",
 		"EventName": "L1I_ONCLUSTER_MEMORY_SOURCED_WRITES",
 		"BriefDescription": "L1I On-Cluster Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster memory"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "167",
 		"EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1I On-Cluster L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Cluster Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "168",
 		"EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES",
 		"BriefDescription": "L1I Off-Cluster L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "169",
 		"EventName": "L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES",
 		"BriefDescription": "L1I Off-Cluster Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Cluster memory"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "170",
 		"EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1I Off-Cluster L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "171",
 		"EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES",
 		"BriefDescription": "L1I Off-Drawer L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "172",
 		"EventName": "L1I_OFFDRAWER_MEMORY_SOURCED_WRITES",
 		"BriefDescription": "L1I Off-Drawer Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer memory"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "173",
 		"EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1I Off-Drawer L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "174",
 		"EventName": "L1I_ONDRAWER_L4_SOURCED_WRITES",
 		"BriefDescription": "L1I On-Drawer L4 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer Level-4 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "175",
 		"EventName": "L1I_OFFDRAWER_L4_SOURCED_WRITES",
 		"BriefDescription": "L1I Off-Drawer L4 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "224",
 		"EventName": "BCD_DFP_EXECUTION_SLOTS",
 		"BriefDescription": "BCD DFP Execution Slots",
 		"PublicDescription": "Count of floating point execution slots used for finished Binary Coded Decimal to Decimal Floating Point conversions. Instructions: CDZT, CXZT, CZDT, CZXT"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "225",
 		"EventName": "VX_BCD_EXECUTION_SLOTS",
 		"BriefDescription": "VX BCD Execution Slots",
 		"PublicDescription": "Count of floating point execution slots used for finished vector arithmetic Binary Coded Decimal instructions. Instructions: VAP, VSP, VMPVMSP, VDP, VSDP, VRP, VLIP, VSRP, VPSOPVCP, VTP, VPKZ, VUPKZ, VCVB, VCVBG, VCVDVCVDG"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "226",
 		"EventName": "DECIMAL_INSTRUCTIONS",
 		"BriefDescription": "Decimal Instructions",
 		"PublicDescription": "Decimal instructions dispatched. Instructions: CVB, CVD, AP, CP, DP, ED, EDMK, MP, SRP, SP, ZAP"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "232",
 		"EventName": "LAST_HOST_TRANSLATIONS",
 		"BriefDescription": "Last host translation done",
 		"PublicDescription": "Last Host Translation done"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "243",
 		"EventName": "TX_NC_TABORT",
 		"BriefDescription": "Aborted transactions in non-constrained TX mode",
 		"PublicDescription": "A transaction abort has occurred in a non-constrained transactional-execution mode"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "244",
 		"EventName": "TX_C_TABORT_NO_SPECIAL",
 		"BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic",
 		"PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "245",
 		"EventName": "TX_C_TABORT_SPECIAL",
 		"BriefDescription": "Aborted transactions in constrained TX mode using special completion logic",
 		"PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "448",
 		"EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE",
 		"BriefDescription": "Cycle count with one thread active",
 		"PublicDescription": "Cycle count with one thread active"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "449",
 		"EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE",
 		"BriefDescription": "Cycle count with two threads active",
diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/transaction.json b/tools/perf/pmu-events/arch/s390/cf_z14/transaction.json
new file mode 100644
index 000000000000..1a0034f79f73
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z14/transaction.json
@@ -0,0 +1,7 @@
+[
+  {
+    "BriefDescription": "Transaction count",
+    "MetricName": "transaction",
+    "MetricExpr": "TX_C_TEND + TX_NC_TEND + TX_NC_TABORT + TX_C_TABORT_SPECIAL + TX_C_TABORT_NO_SPECIAL"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/basic.json b/tools/perf/pmu-events/arch/s390/cf_z196/basic.json
index 8bf16759ca53..2dd8dafff2ef 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z196/basic.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z196/basic.json
@@ -1,71 +1,83 @@
 [
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "0",
 		"EventName": "CPU_CYCLES",
 		"BriefDescription": "CPU Cycles",
 		"PublicDescription": "Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "1",
 		"EventName": "INSTRUCTIONS",
 		"BriefDescription": "Instructions",
 		"PublicDescription": "Instruction Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "2",
 		"EventName": "L1I_DIR_WRITES",
 		"BriefDescription": "L1I Directory Writes",
 		"PublicDescription": "Level-1 I-Cache Directory Write Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "3",
 		"EventName": "L1I_PENALTY_CYCLES",
 		"BriefDescription": "L1I Penalty Cycles",
 		"PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "4",
 		"EventName": "L1D_DIR_WRITES",
 		"BriefDescription": "L1D Directory Writes",
 		"PublicDescription": "Level-1 D-Cache Directory Write Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "5",
 		"EventName": "L1D_PENALTY_CYCLES",
 		"BriefDescription": "L1D Penalty Cycles",
 		"PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "32",
 		"EventName": "PROBLEM_STATE_CPU_CYCLES",
 		"BriefDescription": "Problem-State CPU Cycles",
 		"PublicDescription": "Problem-State Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "33",
 		"EventName": "PROBLEM_STATE_INSTRUCTIONS",
 		"BriefDescription": "Problem-State Instructions",
 		"PublicDescription": "Problem-State Instruction Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "34",
 		"EventName": "PROBLEM_STATE_L1I_DIR_WRITES",
 		"BriefDescription": "Problem-State L1I Directory Writes",
 		"PublicDescription": "Problem-State Level-1 I-Cache Directory Write Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "35",
 		"EventName": "PROBLEM_STATE_L1I_PENALTY_CYCLES",
 		"BriefDescription": "Problem-State L1I Penalty Cycles",
 		"PublicDescription": "Problem-State Level-1 I-Cache Penalty Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "36",
 		"EventName": "PROBLEM_STATE_L1D_DIR_WRITES",
 		"BriefDescription": "Problem-State L1D Directory Writes",
 		"PublicDescription": "Problem-State Level-1 D-Cache Directory Write Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "37",
 		"EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES",
 		"BriefDescription": "Problem-State L1D Penalty Cycles",
diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json
index 7e5b72492141..db286f19e7b6 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json
@@ -1,95 +1,111 @@
 [
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "64",
 		"EventName": "PRNG_FUNCTIONS",
 		"BriefDescription": "PRNG Functions",
 		"PublicDescription": "Total number of the PRNG functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "65",
 		"EventName": "PRNG_CYCLES",
 		"BriefDescription": "PRNG Cycles",
 		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "66",
 		"EventName": "PRNG_BLOCKED_FUNCTIONS",
 		"BriefDescription": "PRNG Blocked Functions",
 		"PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "67",
 		"EventName": "PRNG_BLOCKED_CYCLES",
 		"BriefDescription": "PRNG Blocked Cycles",
 		"PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "68",
 		"EventName": "SHA_FUNCTIONS",
 		"BriefDescription": "SHA Functions",
 		"PublicDescription": "Total number of SHA functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "69",
 		"EventName": "SHA_CYCLES",
 		"BriefDescription": "SHA Cycles",
 		"PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "70",
 		"EventName": "SHA_BLOCKED_FUNCTIONS",
 		"BriefDescription": "SHA Blocked Functions",
 		"PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "71",
 		"EventName": "SHA_BLOCKED_CYCLES",
 		"BriefDescription": "SHA Bloced Cycles",
 		"PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "72",
 		"EventName": "DEA_FUNCTIONS",
 		"BriefDescription": "DEA Functions",
 		"PublicDescription": "Total number of the DEA functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "73",
 		"EventName": "DEA_CYCLES",
 		"BriefDescription": "DEA Cycles",
 		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "74",
 		"EventName": "DEA_BLOCKED_FUNCTIONS",
 		"BriefDescription": "DEA Blocked Functions",
 		"PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "75",
 		"EventName": "DEA_BLOCKED_CYCLES",
 		"BriefDescription": "DEA Blocked Cycles",
 		"PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "76",
 		"EventName": "AES_FUNCTIONS",
 		"BriefDescription": "AES Functions",
 		"PublicDescription": "Total number of AES functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "77",
 		"EventName": "AES_CYCLES",
 		"BriefDescription": "AES Cycles",
 		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "78",
 		"EventName": "AES_BLOCKED_FUNCTIONS",
 		"BriefDescription": "AES Blocked Functions",
 		"PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "79",
 		"EventName": "AES_BLOCKED_CYCLES",
 		"BriefDescription": "AES Blocked Cycles",
diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/extended.json b/tools/perf/pmu-events/arch/s390/cf_z196/extended.json
index b6d7fec7c2e7..b7b42a870bb0 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z196/extended.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z196/extended.json
@@ -1,143 +1,167 @@
 [
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "128",
 		"EventName": "L1D_L2_SOURCED_WRITES",
 		"BriefDescription": "L1D L2 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from the Level-2 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "129",
 		"EventName": "L1I_L2_SOURCED_WRITES",
 		"BriefDescription": "L1I L2 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from the Level-2 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "130",
 		"EventName": "DTLB1_MISSES",
 		"BriefDescription": "DTLB1 Misses",
 		"PublicDescription": "Level-1 Data TLB miss in progress. Incremented by one for every cycle a DTLB1 miss is in progress."
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "131",
 		"EventName": "ITLB1_MISSES",
 		"BriefDescription": "ITLB1 Misses",
 		"PublicDescription": "Level-1 Instruction TLB miss in progress. Incremented by one for every cycle a ITLB1 miss is in progress."
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "133",
 		"EventName": "L2C_STORES_SENT",
 		"BriefDescription": "L2C Stores Sent",
 		"PublicDescription": "Incremented by one for every store sent to Level-2 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "134",
 		"EventName": "L1D_OFFBOOK_L3_SOURCED_WRITES",
 		"BriefDescription": "L1D Off-Book L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an Off Book Level-3 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "135",
 		"EventName": "L1D_ONBOOK_L4_SOURCED_WRITES",
 		"BriefDescription": "L1D On-Book L4 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an On Book Level-4 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "136",
 		"EventName": "L1I_ONBOOK_L4_SOURCED_WRITES",
 		"BriefDescription": "L1I On-Book L4 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an On Book Level-4 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "137",
 		"EventName": "L1D_RO_EXCL_WRITES",
 		"BriefDescription": "L1D Read-only Exclusive Writes",
 		"PublicDescription": "A directory write to the Level-1 D-Cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "138",
 		"EventName": "L1D_OFFBOOK_L4_SOURCED_WRITES",
 		"BriefDescription": "L1D Off-Book L4 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an Off Book Level-4 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "139",
 		"EventName": "L1I_OFFBOOK_L4_SOURCED_WRITES",
 		"BriefDescription": "L1I Off-Book L4 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an Off Book Level-4 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "140",
 		"EventName": "DTLB1_HPAGE_WRITES",
 		"BriefDescription": "DTLB1 One-Megabyte Page Writes",
 		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a one-megabyte page"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "141",
 		"EventName": "L1D_LMEM_SOURCED_WRITES",
 		"BriefDescription": "L1D Local Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 D-Cache where the installed cache line was sourced from memory that is attached to the same book as the Data cache (Local Memory)"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "142",
 		"EventName": "L1I_LMEM_SOURCED_WRITES",
 		"BriefDescription": "L1I Local Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 I-Cache where the installed cache line was sourced from memory that is attached to the same book as the Instruction cache (Local Memory)"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "143",
 		"EventName": "L1I_OFFBOOK_L3_SOURCED_WRITES",
 		"BriefDescription": "L1I Off-Book L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an Off Book Level-3 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "144",
 		"EventName": "DTLB1_WRITES",
 		"BriefDescription": "DTLB1 Writes",
 		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "145",
 		"EventName": "ITLB1_WRITES",
 		"BriefDescription": "ITLB1 Writes",
 		"PublicDescription": "A translation entry has been written to the Level-1 Instruction Translation Lookaside Buffer"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "146",
 		"EventName": "TLB2_PTE_WRITES",
 		"BriefDescription": "TLB2 PTE Writes",
 		"PublicDescription": "A translation entry has been written to the Level-2 TLB Page Table Entry arrays"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "147",
 		"EventName": "TLB2_CRSTE_HPAGE_WRITES",
 		"BriefDescription": "TLB2 CRSTE One-Megabyte Page Writes",
 		"PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays for a one-megabyte large page translation"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "148",
 		"EventName": "TLB2_CRSTE_WRITES",
 		"BriefDescription": "TLB2 CRSTE Writes",
 		"PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "150",
 		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES",
 		"BriefDescription": "L1D On-Chip L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an On Chip Level-3 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "152",
 		"EventName": "L1D_OFFCHIP_L3_SOURCED_WRITES",
 		"BriefDescription": "L1D Off-Chip L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "153",
 		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES",
 		"BriefDescription": "L1I On-Chip L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an On Chip Level-3 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "155",
 		"EventName": "L1I_OFFCHIP_L3_SOURCED_WRITES",
 		"BriefDescription": "L1I Off-Chip L3 Sourced Writes",
diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json b/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json
index 8bf16759ca53..2dd8dafff2ef 100644
--- a/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json
+++ b/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json
@@ -1,71 +1,83 @@
 [
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "0",
 		"EventName": "CPU_CYCLES",
 		"BriefDescription": "CPU Cycles",
 		"PublicDescription": "Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "1",
 		"EventName": "INSTRUCTIONS",
 		"BriefDescription": "Instructions",
 		"PublicDescription": "Instruction Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "2",
 		"EventName": "L1I_DIR_WRITES",
 		"BriefDescription": "L1I Directory Writes",
 		"PublicDescription": "Level-1 I-Cache Directory Write Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "3",
 		"EventName": "L1I_PENALTY_CYCLES",
 		"BriefDescription": "L1I Penalty Cycles",
 		"PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "4",
 		"EventName": "L1D_DIR_WRITES",
 		"BriefDescription": "L1D Directory Writes",
 		"PublicDescription": "Level-1 D-Cache Directory Write Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "5",
 		"EventName": "L1D_PENALTY_CYCLES",
 		"BriefDescription": "L1D Penalty Cycles",
 		"PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "32",
 		"EventName": "PROBLEM_STATE_CPU_CYCLES",
 		"BriefDescription": "Problem-State CPU Cycles",
 		"PublicDescription": "Problem-State Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "33",
 		"EventName": "PROBLEM_STATE_INSTRUCTIONS",
 		"BriefDescription": "Problem-State Instructions",
 		"PublicDescription": "Problem-State Instruction Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "34",
 		"EventName": "PROBLEM_STATE_L1I_DIR_WRITES",
 		"BriefDescription": "Problem-State L1I Directory Writes",
 		"PublicDescription": "Problem-State Level-1 I-Cache Directory Write Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "35",
 		"EventName": "PROBLEM_STATE_L1I_PENALTY_CYCLES",
 		"BriefDescription": "Problem-State L1I Penalty Cycles",
 		"PublicDescription": "Problem-State Level-1 I-Cache Penalty Cycle Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "36",
 		"EventName": "PROBLEM_STATE_L1D_DIR_WRITES",
 		"BriefDescription": "Problem-State L1D Directory Writes",
 		"PublicDescription": "Problem-State Level-1 D-Cache Directory Write Count"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "37",
 		"EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES",
 		"BriefDescription": "Problem-State L1D Penalty Cycles",
diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json b/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json
index 7e5b72492141..db286f19e7b6 100644
--- a/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json
+++ b/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json
@@ -1,95 +1,111 @@
 [
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "64",
 		"EventName": "PRNG_FUNCTIONS",
 		"BriefDescription": "PRNG Functions",
 		"PublicDescription": "Total number of the PRNG functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "65",
 		"EventName": "PRNG_CYCLES",
 		"BriefDescription": "PRNG Cycles",
 		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "66",
 		"EventName": "PRNG_BLOCKED_FUNCTIONS",
 		"BriefDescription": "PRNG Blocked Functions",
 		"PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "67",
 		"EventName": "PRNG_BLOCKED_CYCLES",
 		"BriefDescription": "PRNG Blocked Cycles",
 		"PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "68",
 		"EventName": "SHA_FUNCTIONS",
 		"BriefDescription": "SHA Functions",
 		"PublicDescription": "Total number of SHA functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "69",
 		"EventName": "SHA_CYCLES",
 		"BriefDescription": "SHA Cycles",
 		"PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "70",
 		"EventName": "SHA_BLOCKED_FUNCTIONS",
 		"BriefDescription": "SHA Blocked Functions",
 		"PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "71",
 		"EventName": "SHA_BLOCKED_CYCLES",
 		"BriefDescription": "SHA Bloced Cycles",
 		"PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "72",
 		"EventName": "DEA_FUNCTIONS",
 		"BriefDescription": "DEA Functions",
 		"PublicDescription": "Total number of the DEA functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "73",
 		"EventName": "DEA_CYCLES",
 		"BriefDescription": "DEA Cycles",
 		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "74",
 		"EventName": "DEA_BLOCKED_FUNCTIONS",
 		"BriefDescription": "DEA Blocked Functions",
 		"PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "75",
 		"EventName": "DEA_BLOCKED_CYCLES",
 		"BriefDescription": "DEA Blocked Cycles",
 		"PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "76",
 		"EventName": "AES_FUNCTIONS",
 		"BriefDescription": "AES Functions",
 		"PublicDescription": "Total number of AES functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "77",
 		"EventName": "AES_CYCLES",
 		"BriefDescription": "AES Cycles",
 		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "78",
 		"EventName": "AES_BLOCKED_FUNCTIONS",
 		"BriefDescription": "AES Blocked Functions",
 		"PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "79",
 		"EventName": "AES_BLOCKED_CYCLES",
 		"BriefDescription": "AES Blocked Cycles",
diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json b/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json
index 8682126aabb2..162251037219 100644
--- a/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json
+++ b/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json
@@ -1,209 +1,244 @@
 [
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "128",
 		"EventName": "DTLB1_MISSES",
 		"BriefDescription": "DTLB1 Misses",
 		"PublicDescription": "Level-1 Data TLB miss in progress. Incremented by one for every cycle a DTLB1 miss is in progress."
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "129",
 		"EventName": "ITLB1_MISSES",
 		"BriefDescription": "ITLB1 Misses",
 		"PublicDescription": "Level-1 Instruction TLB miss in progress. Incremented by one for every cycle a ITLB1 miss is in progress."
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "130",
 		"EventName": "L1D_L2I_SOURCED_WRITES",
 		"BriefDescription": "L1D L2I Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Instruction cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "131",
 		"EventName": "L1I_L2I_SOURCED_WRITES",
 		"BriefDescription": "L1I L2I Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "132",
 		"EventName": "L1D_L2D_SOURCED_WRITES",
 		"BriefDescription": "L1D L2D Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "133",
 		"EventName": "DTLB1_WRITES",
 		"BriefDescription": "DTLB1 Writes",
 		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "135",
 		"EventName": "L1D_LMEM_SOURCED_WRITES",
 		"BriefDescription": "L1D Local Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache where the installed cache line was sourced from memory that is attached to the same book as the Data cache (Local Memory)"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "137",
 		"EventName": "L1I_LMEM_SOURCED_WRITES",
 		"BriefDescription": "L1I Local Memory Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache where the installed cache line was sourced from memory that is attached to the same book as the Instruction cache (Local Memory)"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "138",
 		"EventName": "L1D_RO_EXCL_WRITES",
 		"BriefDescription": "L1D Read-only Exclusive Writes",
 		"PublicDescription": "A directory write to the Level-1 D-Cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "139",
 		"EventName": "DTLB1_HPAGE_WRITES",
 		"BriefDescription": "DTLB1 One-Megabyte Page Writes",
 		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a one-megabyte page"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "140",
 		"EventName": "ITLB1_WRITES",
 		"BriefDescription": "ITLB1 Writes",
 		"PublicDescription": "A translation entry has been written to the Level-1 Instruction Translation Lookaside Buffer"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "141",
 		"EventName": "TLB2_PTE_WRITES",
 		"BriefDescription": "TLB2 PTE Writes",
 		"PublicDescription": "A translation entry has been written to the Level-2 TLB Page Table Entry arrays"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "142",
 		"EventName": "TLB2_CRSTE_HPAGE_WRITES",
 		"BriefDescription": "TLB2 CRSTE One-Megabyte Page Writes",
 		"PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays for a one-megabyte large page translation"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "143",
 		"EventName": "TLB2_CRSTE_WRITES",
 		"BriefDescription": "TLB2 CRSTE Writes",
 		"PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "144",
 		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES",
 		"BriefDescription": "L1D On-Chip L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On Chip Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "145",
 		"EventName": "L1D_OFFCHIP_L3_SOURCED_WRITES",
 		"BriefDescription": "L1D Off-Chip L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "146",
 		"EventName": "L1D_OFFBOOK_L3_SOURCED_WRITES",
 		"BriefDescription": "L1D Off-Book L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Book Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "147",
 		"EventName": "L1D_ONBOOK_L4_SOURCED_WRITES",
 		"BriefDescription": "L1D On-Book L4 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On Book Level-4 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "148",
 		"EventName": "L1D_OFFBOOK_L4_SOURCED_WRITES",
 		"BriefDescription": "L1D Off-Book L4 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Book Level-4 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "149",
 		"EventName": "TX_NC_TEND",
 		"BriefDescription": "Completed TEND instructions in non-constrained TX mode",
 		"PublicDescription": "A TEND instruction has completed in a nonconstrained transactional-execution mode"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "150",
 		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from a On Chip Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "151",
 		"EventName": "L1D_OFFCHIP_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1D Off-Chip L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "152",
 		"EventName": "L1D_OFFBOOK_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1D Off-Book L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Book Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "153",
 		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES",
 		"BriefDescription": "L1I On-Chip L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On Chip Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "154",
 		"EventName": "L1I_OFFCHIP_L3_SOURCED_WRITES",
 		"BriefDescription": "L1I Off-Chip L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "155",
 		"EventName": "L1I_OFFBOOK_L3_SOURCED_WRITES",
 		"BriefDescription": "L1I Off-Book L3 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Book Level-3 cache without intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "156",
 		"EventName": "L1I_ONBOOK_L4_SOURCED_WRITES",
 		"BriefDescription": "L1I On-Book L4 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On Book Level-4 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "157",
 		"EventName": "L1I_OFFBOOK_L4_SOURCED_WRITES",
 		"BriefDescription": "L1I Off-Book L4 Sourced Writes",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Book Level-4 cache"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "158",
 		"EventName": "TX_C_TEND",
 		"BriefDescription": "Completed TEND instructions in constrained TX mode",
 		"PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "159",
 		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On Chip Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "160",
 		"EventName": "L1I_OFFCHIP_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1I Off-Chip L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "161",
 		"EventName": "L1I_OFFBOOK_L3_SOURCED_WRITES_IV",
 		"BriefDescription": "L1I Off-Book L3 Sourced Writes with Intervention",
 		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Book Level-3 cache with intervention"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "177",
 		"EventName": "TX_NC_TABORT",
 		"BriefDescription": "Aborted transactions in non-constrained TX mode",
 		"PublicDescription": "A transaction abort has occurred in a nonconstrained transactional-execution mode"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "178",
 		"EventName": "TX_C_TABORT_NO_SPECIAL",
 		"BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic",
 		"PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete"
 	},
 	{
+		"Unit": "CPU-M-CF",
 		"EventCode": "179",
 		"EventName": "TX_C_TABORT_SPECIAL",
 		"BriefDescription": "Aborted transactions in constrained TX mode using special completion logic",
diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/transaction.json b/tools/perf/pmu-events/arch/s390/cf_zec12/transaction.json
new file mode 100644
index 000000000000..1a0034f79f73
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_zec12/transaction.json
@@ -0,0 +1,7 @@
+[
+  {
+    "BriefDescription": "Transaction count",
+    "MetricName": "transaction",
+    "MetricExpr": "TX_C_TEND + TX_NC_TEND + TX_NC_TABORT + TX_C_TABORT_SPECIAL + TX_C_TABORT_NO_SPECIAL"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json
index d40498f2cb1e..635c09fda1d9 100644
--- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json
+++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json
@@ -188,7 +188,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xb",
         "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES",
-        "Filter": "filter_band0=1200",
+        "Filter": "filter_band0=12",
         "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_1200mhz_cycles %",
         "PerPkg": "1",
@@ -199,7 +199,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xc",
         "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES",
-        "Filter": "filter_band1=2000",
+        "Filter": "filter_band1=20",
         "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_2000mhz_cycles %",
         "PerPkg": "1",
@@ -210,7 +210,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xd",
         "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES",
-        "Filter": "filter_band2=3000",
+        "Filter": "filter_band2=30",
         "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_3000mhz_cycles %",
         "PerPkg": "1",
@@ -221,7 +221,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xe",
         "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES",
-        "Filter": "filter_band3=4000",
+        "Filter": "filter_band3=40",
         "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_4000mhz_cycles %",
         "PerPkg": "1",
@@ -232,7 +232,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xb",
         "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band0=1200",
+        "Filter": "edge=1,filter_band0=12",
         "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_1200mhz_cycles %",
         "PerPkg": "1",
@@ -243,7 +243,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xc",
         "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band1=2000",
+        "Filter": "edge=1,filter_band1=20",
         "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_2000mhz_cycles %",
         "PerPkg": "1",
@@ -254,7 +254,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xd",
         "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band2=4000",
+        "Filter": "edge=1,filter_band2=30",
         "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_3000mhz_cycles %",
         "PerPkg": "1",
@@ -265,7 +265,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xe",
         "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band3=4000",
+        "Filter": "edge=1,filter_band3=40",
         "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_4000mhz_cycles %",
         "PerPkg": "1",
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json
index 16034bfd06dd..8755693d86c6 100644
--- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json
+++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json
@@ -187,7 +187,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xb",
         "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES",
-        "Filter": "filter_band0=1200",
+        "Filter": "filter_band0=12",
         "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_1200mhz_cycles %",
         "PerPkg": "1",
@@ -198,7 +198,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xc",
         "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES",
-        "Filter": "filter_band1=2000",
+        "Filter": "filter_band1=20",
         "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_2000mhz_cycles %",
         "PerPkg": "1",
@@ -209,7 +209,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xd",
         "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES",
-        "Filter": "filter_band2=3000",
+        "Filter": "filter_band2=30",
         "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_3000mhz_cycles %",
         "PerPkg": "1",
@@ -220,7 +220,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xe",
         "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES",
-        "Filter": "filter_band3=4000",
+        "Filter": "filter_band3=40",
         "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_4000mhz_cycles %",
         "PerPkg": "1",
@@ -231,7 +231,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xb",
         "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band0=1200",
+        "Filter": "edge=1,filter_band0=12",
         "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_1200mhz_cycles %",
         "PerPkg": "1",
@@ -242,7 +242,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xc",
         "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band1=2000",
+        "Filter": "edge=1,filter_band1=20",
         "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_2000mhz_cycles %",
         "PerPkg": "1",
@@ -253,7 +253,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xd",
         "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band2=4000",
+        "Filter": "edge=1,filter_band2=30",
         "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_3000mhz_cycles %",
         "PerPkg": "1",
@@ -264,7 +264,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xe",
         "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band3=4000",
+        "Filter": "edge=1,filter_band3=40",
         "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_4000mhz_cycles %",
         "PerPkg": "1",
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index db3a594ee1e4..68c92bb599ee 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -233,6 +233,8 @@ static struct map {
 	{ "QPI LL", "uncore_qpi" },
 	{ "SBO", "uncore_sbox" },
 	{ "iMPH-U", "uncore_arb" },
+	{ "CPU-M-CF", "cpum_cf" },
+	{ "CPU-M-SF", "cpum_sf" },
 	{}
 };
 
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
index 38dfb720fb6f..54ace2f6bc36 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
@@ -31,10 +31,8 @@ def flag_str(event_name, field_name, value):
     string = ""
 
     if flag_fields[event_name][field_name]:
-	print_delim = 0
-        keys = flag_fields[event_name][field_name]['values'].keys()
-        keys.sort()
-        for idx in keys:
+        print_delim = 0
+        for idx in sorted(flag_fields[event_name][field_name]['values']):
             if not value and not idx:
                 string += flag_fields[event_name][field_name]['values'][idx]
                 break
@@ -51,14 +49,12 @@ def symbol_str(event_name, field_name, value):
     string = ""
 
     if symbolic_fields[event_name][field_name]:
-        keys = symbolic_fields[event_name][field_name]['values'].keys()
-        keys.sort()
-        for idx in keys:
+        for idx in sorted(symbolic_fields[event_name][field_name]['values']):
             if not value and not idx:
-		string = symbolic_fields[event_name][field_name]['values'][idx]
+                string = symbolic_fields[event_name][field_name]['values'][idx]
                 break
-	    if (value == idx):
-		string = symbolic_fields[event_name][field_name]['values'][idx]
+            if (value == idx):
+                string = symbolic_fields[event_name][field_name]['values'][idx]
                 break
 
     return string
@@ -74,19 +70,17 @@ def trace_flag_str(value):
     string = ""
     print_delim = 0
 
-    keys = trace_flags.keys()
-
-    for idx in keys:
-	if not value and not idx:
-	    string += "NONE"
-	    break
-
-	if idx and (value & idx) == idx:
-	    if print_delim:
-		string += " | ";
-	    string += trace_flags[idx]
-	    print_delim = 1
-	    value &= ~idx
+    for idx in trace_flags:
+        if not value and not idx:
+            string += "NONE"
+            break
+
+        if idx and (value & idx) == idx:
+            if print_delim:
+                string += " | ";
+            string += trace_flags[idx]
+            print_delim = 1
+            value &= ~idx
 
     return string
 
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py
index 81a56cd2b3c1..21a7a1298094 100755
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py
@@ -8,6 +8,7 @@
 # PerfEvent is the base class for all perf event sample, PebsEvent
 # is a HW base Intel x86 PEBS event, and user could add more SW/HW
 # event classes based on requirements.
+from __future__ import print_function
 
 import struct
 
@@ -44,7 +45,8 @@ class PerfEvent(object):
                 PerfEvent.event_num += 1
 
         def show(self):
-                print "PMU event: name=%12s, symbol=%24s, comm=%8s, dso=%12s" % (self.name, self.symbol, self.comm, self.dso)
+                print("PMU event: name=%12s, symbol=%24s, comm=%8s, dso=%12s" %
+                      (self.name, self.symbol, self.comm, self.dso))
 
 #
 # Basic Intel PEBS (Precise Event-based Sampling) event, whose raw buffer
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
index fdd92f699055..cac7b2542ee8 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
@@ -11,7 +11,7 @@
 try:
 	import wx
 except ImportError:
-	raise ImportError, "You need to install the wxpython lib for this script"
+	raise ImportError("You need to install the wxpython lib for this script")
 
 
 class RootFrame(wx.Frame):
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
index f6c84966e4f8..7384dcb628c4 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
@@ -5,6 +5,7 @@
 # This software may be distributed under the terms of the GNU General
 # Public License ("GPL") version 2 as published by the Free Software
 # Foundation.
+from __future__ import print_function
 
 import errno, os
 
@@ -33,7 +34,7 @@ def nsecs_str(nsecs):
     return str
 
 def add_stats(dict, key, value):
-	if not dict.has_key(key):
+	if key not in dict:
 		dict[key] = (value, value, value, 1)
 	else:
 		min, max, avg, count = dict[key]
@@ -72,10 +73,10 @@ try:
 except:
 	if not audit_package_warned:
 		audit_package_warned = True
-		print "Install the audit-libs-python package to get syscall names.\n" \
-                    "For example:\n  # apt-get install python-audit (Ubuntu)" \
-                    "\n  # yum install audit-libs-python (Fedora)" \
-                    "\n  etc.\n"
+		print("Install the audit-libs-python package to get syscall names.\n"
+                    "For example:\n  # apt-get install python-audit (Ubuntu)"
+                    "\n  # yum install audit-libs-python (Fedora)"
+                    "\n  etc.\n")
 
 def syscall_name(id):
 	try:
diff --git a/tools/perf/scripts/python/bin/powerpc-hcalls-record b/tools/perf/scripts/python/bin/powerpc-hcalls-record
new file mode 100644
index 000000000000..b7402aa9147d
--- /dev/null
+++ b/tools/perf/scripts/python/bin/powerpc-hcalls-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -e "{powerpc:hcall_entry,powerpc:hcall_exit}" $@
diff --git a/tools/perf/scripts/python/bin/powerpc-hcalls-report b/tools/perf/scripts/python/bin/powerpc-hcalls-report
new file mode 100644
index 000000000000..dd32ad7465f6
--- /dev/null
+++ b/tools/perf/scripts/python/bin/powerpc-hcalls-report
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/powerpc-hcalls.py
diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py
deleted file mode 100644
index b494a67a1c67..000000000000
--- a/tools/perf/scripts/python/call-graph-from-sql.py
+++ /dev/null
@@ -1,339 +0,0 @@
-#!/usr/bin/python2
-# call-graph-from-sql.py: create call-graph from sql database
-# Copyright (c) 2014-2017, Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-
-# To use this script you will need to have exported data using either the
-# export-to-sqlite.py or the export-to-postgresql.py script.  Refer to those
-# scripts for details.
-#
-# Following on from the example in the export scripts, a
-# call-graph can be displayed for the pt_example database like this:
-#
-#	python tools/perf/scripts/python/call-graph-from-sql.py pt_example
-#
-# Note that for PostgreSQL, this script supports connecting to remote databases
-# by setting hostname, port, username, password, and dbname e.g.
-#
-#	python tools/perf/scripts/python/call-graph-from-sql.py "hostname=myhost username=myuser password=mypassword dbname=pt_example"
-#
-# The result is a GUI window with a tree representing a context-sensitive
-# call-graph.  Expanding a couple of levels of the tree and adjusting column
-# widths to suit will display something like:
-#
-#                                         Call Graph: pt_example
-# Call Path                          Object      Count   Time(ns)  Time(%)  Branch Count   Branch Count(%)
-# v- ls
-#     v- 2638:2638
-#         v- _start                  ld-2.19.so    1     10074071   100.0         211135            100.0
-#           |- unknown               unknown       1        13198     0.1              1              0.0
-#           >- _dl_start             ld-2.19.so    1      1400980    13.9          19637              9.3
-#           >- _d_linit_internal     ld-2.19.so    1       448152     4.4          11094              5.3
-#           v-__libc_start_main@plt  ls            1      8211741    81.5         180397             85.4
-#              >- _dl_fixup          ld-2.19.so    1         7607     0.1            108              0.1
-#              >- __cxa_atexit       libc-2.19.so  1        11737     0.1             10              0.0
-#              >- __libc_csu_init    ls            1        10354     0.1             10              0.0
-#              |- _setjmp            libc-2.19.so  1            0     0.0              4              0.0
-#              v- main               ls            1      8182043    99.6         180254             99.9
-#
-# Points to note:
-#	The top level is a command name (comm)
-#	The next level is a thread (pid:tid)
-#	Subsequent levels are functions
-#	'Count' is the number of calls
-#	'Time' is the elapsed time until the function returns
-#	Percentages are relative to the level above
-#	'Branch Count' is the total number of branches for that function and all
-#       functions that it calls
-
-import sys
-from PySide.QtCore import *
-from PySide.QtGui import *
-from PySide.QtSql import *
-from decimal import *
-
-class TreeItem():
-
-	def __init__(self, db, row, parent_item):
-		self.db = db
-		self.row = row
-		self.parent_item = parent_item
-		self.query_done = False;
-		self.child_count = 0
-		self.child_items = []
-		self.data = ["", "", "", "", "", "", ""]
-		self.comm_id = 0
-		self.thread_id = 0
-		self.call_path_id = 1
-		self.branch_count = 0
-		self.time = 0
-		if not parent_item:
-			self.setUpRoot()
-
-	def setUpRoot(self):
-		self.query_done = True
-		query = QSqlQuery(self.db)
-		ret = query.exec_('SELECT id, comm FROM comms')
-		if not ret:
-			raise Exception("Query failed: " + query.lastError().text())
-		while query.next():
-			if not query.value(0):
-				continue
-			child_item = TreeItem(self.db, self.child_count, self)
-			self.child_items.append(child_item)
-			self.child_count += 1
-			child_item.setUpLevel1(query.value(0), query.value(1))
-
-	def setUpLevel1(self, comm_id, comm):
-		self.query_done = True;
-		self.comm_id = comm_id
-		self.data[0] = comm
-		self.child_items = []
-		self.child_count = 0
-		query = QSqlQuery(self.db)
-		ret = query.exec_('SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id))
-		if not ret:
-			raise Exception("Query failed: " + query.lastError().text())
-		while query.next():
-			child_item = TreeItem(self.db, self.child_count, self)
-			self.child_items.append(child_item)
-			self.child_count += 1
-			child_item.setUpLevel2(comm_id, query.value(0), query.value(1), query.value(2))
-
-	def setUpLevel2(self, comm_id, thread_id, pid, tid):
-		self.comm_id = comm_id
-		self.thread_id = thread_id
-		self.data[0] = str(pid) + ":" + str(tid)
-
-	def getChildItem(self, row):
-		return self.child_items[row]
-
-	def getParentItem(self):
-		return self.parent_item
-
-	def getRow(self):
-		return self.row
-
-	def timePercent(self, b):
-		if not self.time:
-			return "0.0"
-		x = (b * Decimal(100)) / self.time
-		return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
-
-	def branchPercent(self, b):
-		if not self.branch_count:
-			return "0.0"
-		x = (b * Decimal(100)) / self.branch_count
-		return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
-
-	def addChild(self, call_path_id, name, dso, count, time, branch_count):
-		child_item = TreeItem(self.db, self.child_count, self)
-		child_item.comm_id = self.comm_id
-		child_item.thread_id = self.thread_id
-		child_item.call_path_id = call_path_id
-		child_item.branch_count = branch_count
-		child_item.time = time
-		child_item.data[0] = name
-		if dso == "[kernel.kallsyms]":
-			dso = "[kernel]"
-		child_item.data[1] = dso
-		child_item.data[2] = str(count)
-		child_item.data[3] = str(time)
-		child_item.data[4] = self.timePercent(time)
-		child_item.data[5] = str(branch_count)
-		child_item.data[6] = self.branchPercent(branch_count)
-		self.child_items.append(child_item)
-		self.child_count += 1
-
-	def selectCalls(self):
-		self.query_done = True;
-		query = QSqlQuery(self.db)
-		ret = query.exec_('SELECT id, call_path_id, branch_count, call_time, return_time, '
-				  '( SELECT name FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ), '
-				  '( SELECT short_name FROM dsos WHERE id = ( SELECT dso_id FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ) ), '
-				  '( SELECT ip FROM call_paths where id = call_path_id ) '
-				  'FROM calls WHERE parent_call_path_id = ' + str(self.call_path_id) + ' AND comm_id = ' + str(self.comm_id) + ' AND thread_id = ' + str(self.thread_id) +
-				  ' ORDER BY call_path_id')
-		if not ret:
-			raise Exception("Query failed: " + query.lastError().text())
-		last_call_path_id = 0
-		name = ""
-		dso = ""
-		count = 0
-		branch_count = 0
-		total_branch_count = 0
-		time = 0
-		total_time = 0
-		while query.next():
-			if query.value(1) == last_call_path_id:
-				count += 1
-				branch_count += query.value(2)
-				time += query.value(4) - query.value(3)
-			else:
-				if count:
-					self.addChild(last_call_path_id, name, dso, count, time, branch_count)
-				last_call_path_id = query.value(1)
-				name = query.value(5)
-				dso = query.value(6)
-				count = 1
-				total_branch_count += branch_count
-				total_time += time
-				branch_count = query.value(2)
-				time = query.value(4) - query.value(3)
-		if count:
-			self.addChild(last_call_path_id, name, dso, count, time, branch_count)
-		total_branch_count += branch_count
-		total_time += time
-		# Top level does not have time or branch count, so fix that here
-		if total_branch_count > self.branch_count:
-			self.branch_count = total_branch_count
-			if self.branch_count:
-				for child_item in self.child_items:
-					child_item.data[6] = self.branchPercent(child_item.branch_count)
-		if total_time > self.time:
-			self.time = total_time
-			if self.time:
-				for child_item in self.child_items:
-					child_item.data[4] = self.timePercent(child_item.time)
-
-	def childCount(self):
-		if not self.query_done:
-			self.selectCalls()
-		return self.child_count
-
-	def columnCount(self):
-		return 7
-
-	def columnHeader(self, column):
-		headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
-		return headers[column]
-
-	def getData(self, column):
-		return self.data[column]
-
-class TreeModel(QAbstractItemModel):
-
-	def __init__(self, db, parent=None):
-		super(TreeModel, self).__init__(parent)
-		self.db = db
-		self.root = TreeItem(db, 0, None)
-
-	def columnCount(self, parent):
-		return self.root.columnCount()
-
-	def rowCount(self, parent):
-		if parent.isValid():
-			parent_item = parent.internalPointer()
-		else:
-			parent_item = self.root
-		return parent_item.childCount()
-
-	def headerData(self, section, orientation, role):
-		if role == Qt.TextAlignmentRole:
-			if section > 1:
-				return Qt.AlignRight
-		if role != Qt.DisplayRole:
-			return None
-		if orientation != Qt.Horizontal:
-			return None
-		return self.root.columnHeader(section)
-
-	def parent(self, child):
-		child_item = child.internalPointer()
-		if child_item is self.root:
-			return QModelIndex()
-		parent_item = child_item.getParentItem()
-		return self.createIndex(parent_item.getRow(), 0, parent_item)
-
-	def index(self, row, column, parent):
-		if parent.isValid():
-			parent_item = parent.internalPointer()
-		else:
-			parent_item = self.root
-		child_item = parent_item.getChildItem(row)
-		return self.createIndex(row, column, child_item)
-
-	def data(self, index, role):
-		if role == Qt.TextAlignmentRole:
-			if index.column() > 1:
-				return Qt.AlignRight
-		if role != Qt.DisplayRole:
-			return None
-		index_item = index.internalPointer()
-		return index_item.getData(index.column())
-
-class MainWindow(QMainWindow):
-
-	def __init__(self, db, dbname, parent=None):
-		super(MainWindow, self).__init__(parent)
-
-		self.setObjectName("MainWindow")
-		self.setWindowTitle("Call Graph: " + dbname)
-		self.move(100, 100)
-		self.resize(800, 600)
-		style = self.style()
-		icon = style.standardIcon(QStyle.SP_MessageBoxInformation)
-		self.setWindowIcon(icon);
-
-		self.model = TreeModel(db)
-
-		self.view = QTreeView()
-		self.view.setModel(self.model)
-
-		self.setCentralWidget(self.view)
-
-if __name__ == '__main__':
-	if (len(sys.argv) < 2):
-		print >> sys.stderr, "Usage is: call-graph-from-sql.py <database name>"
-		raise Exception("Too few arguments")
-
-	dbname = sys.argv[1]
-
-	is_sqlite3 = False
-	try:
-		f = open(dbname)
-		if f.read(15) == "SQLite format 3":
-			is_sqlite3 = True
-		f.close()
-	except:
-		pass
-
-	if is_sqlite3:
-		db = QSqlDatabase.addDatabase('QSQLITE')
-	else:
-		db = QSqlDatabase.addDatabase('QPSQL')
-		opts = dbname.split()
-		for opt in opts:
-			if '=' in opt:
-				opt = opt.split('=')
-				if opt[0] == 'hostname':
-					db.setHostName(opt[1])
-				elif opt[0] == 'port':
-					db.setPort(int(opt[1]))
-				elif opt[0] == 'username':
-					db.setUserName(opt[1])
-				elif opt[0] == 'password':
-					db.setPassword(opt[1])
-				elif opt[0] == 'dbname':
-					dbname = opt[1]
-			else:
-				dbname = opt
-
-	db.setDatabaseName(dbname)
-	if not db.open():
-		raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text())
-
-	app = QApplication(sys.argv)
-	window = MainWindow(db, dbname)
-	window.show()
-	err = app.exec_()
-	db.close()
-	sys.exit(err)
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index efcaf6cac2eb..0564dd7377f2 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -59,7 +59,7 @@ import datetime
 #	pt_example=# \q
 #
 # An example of using the database is provided by the script
-# call-graph-from-sql.py.  Refer to that script for details.
+# exported-sql-viewer.py.  Refer to that script for details.
 #
 # Tables:
 #
@@ -204,14 +204,23 @@ from ctypes import *
 libpq = CDLL("libpq.so.5")
 PQconnectdb = libpq.PQconnectdb
 PQconnectdb.restype = c_void_p
+PQconnectdb.argtypes = [ c_char_p ]
 PQfinish = libpq.PQfinish
+PQfinish.argtypes = [ c_void_p ]
 PQstatus = libpq.PQstatus
+PQstatus.restype = c_int
+PQstatus.argtypes = [ c_void_p ]
 PQexec = libpq.PQexec
 PQexec.restype = c_void_p
+PQexec.argtypes = [ c_void_p, c_char_p ]
 PQresultStatus = libpq.PQresultStatus
+PQresultStatus.restype = c_int
+PQresultStatus.argtypes = [ c_void_p ]
 PQputCopyData = libpq.PQputCopyData
+PQputCopyData.restype = c_int
 PQputCopyData.argtypes = [ c_void_p, c_void_p, c_int ]
 PQputCopyEnd = libpq.PQputCopyEnd
+PQputCopyEnd.restype = c_int
 PQputCopyEnd.argtypes = [ c_void_p, c_void_p ]
 
 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py
index f827bf77e9d2..245caf2643ed 100644
--- a/tools/perf/scripts/python/export-to-sqlite.py
+++ b/tools/perf/scripts/python/export-to-sqlite.py
@@ -40,7 +40,7 @@ import datetime
 #	sqlite> .quit
 #
 # An example of using the database is provided by the script
-# call-graph-from-sql.py.  Refer to that script for details.
+# exported-sql-viewer.py.  Refer to that script for details.
 #
 # The database structure is practically the same as created by the script
 # export-to-postgresql.py. Refer to that script for details.  A notable
@@ -440,7 +440,11 @@ def branch_type_table(*x):
 
 def sample_table(*x):
 	if branches:
-		bind_exec(sample_query, 18, x)
+		for xx in x[0:15]:
+			sample_query.addBindValue(str(xx))
+		for xx in x[19:22]:
+			sample_query.addBindValue(str(xx))
+		do_query_(sample_query)
 	else:
 		bind_exec(sample_query, 22, x)
 
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
new file mode 100755
index 000000000000..f278ce5ebab7
--- /dev/null
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -0,0 +1,2615 @@
+#!/usr/bin/python2
+# SPDX-License-Identifier: GPL-2.0
+# exported-sql-viewer.py: view data from sql database
+# Copyright (c) 2014-2018, Intel Corporation.
+
+# To use this script you will need to have exported data using either the
+# export-to-sqlite.py or the export-to-postgresql.py script.  Refer to those
+# scripts for details.
+#
+# Following on from the example in the export scripts, a
+# call-graph can be displayed for the pt_example database like this:
+#
+#	python tools/perf/scripts/python/exported-sql-viewer.py pt_example
+#
+# Note that for PostgreSQL, this script supports connecting to remote databases
+# by setting hostname, port, username, password, and dbname e.g.
+#
+#	python tools/perf/scripts/python/exported-sql-viewer.py "hostname=myhost username=myuser password=mypassword dbname=pt_example"
+#
+# The result is a GUI window with a tree representing a context-sensitive
+# call-graph.  Expanding a couple of levels of the tree and adjusting column
+# widths to suit will display something like:
+#
+#                                         Call Graph: pt_example
+# Call Path                          Object      Count   Time(ns)  Time(%)  Branch Count   Branch Count(%)
+# v- ls
+#     v- 2638:2638
+#         v- _start                  ld-2.19.so    1     10074071   100.0         211135            100.0
+#           |- unknown               unknown       1        13198     0.1              1              0.0
+#           >- _dl_start             ld-2.19.so    1      1400980    13.9          19637              9.3
+#           >- _d_linit_internal     ld-2.19.so    1       448152     4.4          11094              5.3
+#           v-__libc_start_main@plt  ls            1      8211741    81.5         180397             85.4
+#              >- _dl_fixup          ld-2.19.so    1         7607     0.1            108              0.1
+#              >- __cxa_atexit       libc-2.19.so  1        11737     0.1             10              0.0
+#              >- __libc_csu_init    ls            1        10354     0.1             10              0.0
+#              |- _setjmp            libc-2.19.so  1            0     0.0              4              0.0
+#              v- main               ls            1      8182043    99.6         180254             99.9
+#
+# Points to note:
+#	The top level is a command name (comm)
+#	The next level is a thread (pid:tid)
+#	Subsequent levels are functions
+#	'Count' is the number of calls
+#	'Time' is the elapsed time until the function returns
+#	Percentages are relative to the level above
+#	'Branch Count' is the total number of branches for that function and all
+#       functions that it calls
+
+# There is also a "All branches" report, which displays branches and
+# possibly disassembly.  However, presently, the only supported disassembler is
+# Intel XED, and additionally the object code must be present in perf build ID
+# cache. To use Intel XED, libxed.so must be present. To build and install
+# libxed.so:
+#            git clone https://github.com/intelxed/mbuild.git mbuild
+#            git clone https://github.com/intelxed/xed
+#            cd xed
+#            ./mfile.py --share
+#            sudo ./mfile.py --prefix=/usr/local install
+#            sudo ldconfig
+#
+# Example report:
+#
+# Time           CPU  Command  PID    TID    Branch Type            In Tx  Branch
+# 8107675239590  2    ls       22011  22011  return from interrupt  No     ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea260 _start (ld-2.19.so)
+#                                                                              7fab593ea260 48 89 e7                                        mov %rsp, %rdi
+# 8107675239899  2    ls       22011  22011  hardware interrupt     No         7fab593ea260 _start (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel])
+# 8107675241900  2    ls       22011  22011  return from interrupt  No     ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea260 _start (ld-2.19.so)
+#                                                                              7fab593ea260 48 89 e7                                        mov %rsp, %rdi
+#                                                                              7fab593ea263 e8 c8 06 00 00                                  callq  0x7fab593ea930
+# 8107675241900  2    ls       22011  22011  call                   No         7fab593ea263 _start+0x3 (ld-2.19.so) -> 7fab593ea930 _dl_start (ld-2.19.so)
+#                                                                              7fab593ea930 55                                              pushq  %rbp
+#                                                                              7fab593ea931 48 89 e5                                        mov %rsp, %rbp
+#                                                                              7fab593ea934 41 57                                           pushq  %r15
+#                                                                              7fab593ea936 41 56                                           pushq  %r14
+#                                                                              7fab593ea938 41 55                                           pushq  %r13
+#                                                                              7fab593ea93a 41 54                                           pushq  %r12
+#                                                                              7fab593ea93c 53                                              pushq  %rbx
+#                                                                              7fab593ea93d 48 89 fb                                        mov %rdi, %rbx
+#                                                                              7fab593ea940 48 83 ec 68                                     sub $0x68, %rsp
+#                                                                              7fab593ea944 0f 31                                           rdtsc
+#                                                                              7fab593ea946 48 c1 e2 20                                     shl $0x20, %rdx
+#                                                                              7fab593ea94a 89 c0                                           mov %eax, %eax
+#                                                                              7fab593ea94c 48 09 c2                                        or %rax, %rdx
+#                                                                              7fab593ea94f 48 8b 05 1a 15 22 00                            movq  0x22151a(%rip), %rax
+# 8107675242232  2    ls       22011  22011  hardware interrupt     No         7fab593ea94f _dl_start+0x1f (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel])
+# 8107675242900  2    ls       22011  22011  return from interrupt  No     ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea94f _dl_start+0x1f (ld-2.19.so)
+#                                                                              7fab593ea94f 48 8b 05 1a 15 22 00                            movq  0x22151a(%rip), %rax
+#                                                                              7fab593ea956 48 89 15 3b 13 22 00                            movq  %rdx, 0x22133b(%rip)
+# 8107675243232  2    ls       22011  22011  hardware interrupt     No         7fab593ea956 _dl_start+0x26 (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel])
+
+import sys
+import weakref
+import threading
+import string
+import cPickle
+import re
+import os
+from PySide.QtCore import *
+from PySide.QtGui import *
+from PySide.QtSql import *
+from decimal import *
+from ctypes import *
+from multiprocessing import Process, Array, Value, Event
+
+# Data formatting helpers
+
+def tohex(ip):
+	if ip < 0:
+		ip += 1 << 64
+	return "%x" % ip
+
+def offstr(offset):
+	if offset:
+		return "+0x%x" % offset
+	return ""
+
+def dsoname(name):
+	if name == "[kernel.kallsyms]":
+		return "[kernel]"
+	return name
+
+def findnth(s, sub, n, offs=0):
+	pos = s.find(sub)
+	if pos < 0:
+		return pos
+	if n <= 1:
+		return offs + pos
+	return findnth(s[pos + 1:], sub, n - 1, offs + pos + 1)
+
+# Percent to one decimal place
+
+def PercentToOneDP(n, d):
+	if not d:
+		return "0.0"
+	x = (n * Decimal(100)) / d
+	return str(x.quantize(Decimal(".1"), rounding=ROUND_HALF_UP))
+
+# Helper for queries that must not fail
+
+def QueryExec(query, stmt):
+	ret = query.exec_(stmt)
+	if not ret:
+		raise Exception("Query failed: " + query.lastError().text())
+
+# Background thread
+
+class Thread(QThread):
+
+	done = Signal(object)
+
+	def __init__(self, task, param=None, parent=None):
+		super(Thread, self).__init__(parent)
+		self.task = task
+		self.param = param
+
+	def run(self):
+		while True:
+			if self.param is None:
+				done, result = self.task()
+			else:
+				done, result = self.task(self.param)
+			self.done.emit(result)
+			if done:
+				break
+
+# Tree data model
+
+class TreeModel(QAbstractItemModel):
+
+	def __init__(self, root, parent=None):
+		super(TreeModel, self).__init__(parent)
+		self.root = root
+		self.last_row_read = 0
+
+	def Item(self, parent):
+		if parent.isValid():
+			return parent.internalPointer()
+		else:
+			return self.root
+
+	def rowCount(self, parent):
+		result = self.Item(parent).childCount()
+		if result < 0:
+			result = 0
+			self.dataChanged.emit(parent, parent)
+		return result
+
+	def hasChildren(self, parent):
+		return self.Item(parent).hasChildren()
+
+	def headerData(self, section, orientation, role):
+		if role == Qt.TextAlignmentRole:
+			return self.columnAlignment(section)
+		if role != Qt.DisplayRole:
+			return None
+		if orientation != Qt.Horizontal:
+			return None
+		return self.columnHeader(section)
+
+	def parent(self, child):
+		child_item = child.internalPointer()
+		if child_item is self.root:
+			return QModelIndex()
+		parent_item = child_item.getParentItem()
+		return self.createIndex(parent_item.getRow(), 0, parent_item)
+
+	def index(self, row, column, parent):
+		child_item = self.Item(parent).getChildItem(row)
+		return self.createIndex(row, column, child_item)
+
+	def DisplayData(self, item, index):
+		return item.getData(index.column())
+
+	def FetchIfNeeded(self, row):
+		if row > self.last_row_read:
+			self.last_row_read = row
+			if row + 10 >= self.root.child_count:
+				self.fetcher.Fetch(glb_chunk_sz)
+
+	def columnAlignment(self, column):
+		return Qt.AlignLeft
+
+	def columnFont(self, column):
+		return None
+
+	def data(self, index, role):
+		if role == Qt.TextAlignmentRole:
+			return self.columnAlignment(index.column())
+		if role == Qt.FontRole:
+			return self.columnFont(index.column())
+		if role != Qt.DisplayRole:
+			return None
+		item = index.internalPointer()
+		return self.DisplayData(item, index)
+
+# Table data model
+
+class TableModel(QAbstractTableModel):
+
+	def __init__(self, parent=None):
+		super(TableModel, self).__init__(parent)
+		self.child_count = 0
+		self.child_items = []
+		self.last_row_read = 0
+
+	def Item(self, parent):
+		if parent.isValid():
+			return parent.internalPointer()
+		else:
+			return self
+
+	def rowCount(self, parent):
+		return self.child_count
+
+	def headerData(self, section, orientation, role):
+		if role == Qt.TextAlignmentRole:
+			return self.columnAlignment(section)
+		if role != Qt.DisplayRole:
+			return None
+		if orientation != Qt.Horizontal:
+			return None
+		return self.columnHeader(section)
+
+	def index(self, row, column, parent):
+		return self.createIndex(row, column, self.child_items[row])
+
+	def DisplayData(self, item, index):
+		return item.getData(index.column())
+
+	def FetchIfNeeded(self, row):
+		if row > self.last_row_read:
+			self.last_row_read = row
+			if row + 10 >= self.child_count:
+				self.fetcher.Fetch(glb_chunk_sz)
+
+	def columnAlignment(self, column):
+		return Qt.AlignLeft
+
+	def columnFont(self, column):
+		return None
+
+	def data(self, index, role):
+		if role == Qt.TextAlignmentRole:
+			return self.columnAlignment(index.column())
+		if role == Qt.FontRole:
+			return self.columnFont(index.column())
+		if role != Qt.DisplayRole:
+			return None
+		item = index.internalPointer()
+		return self.DisplayData(item, index)
+
+# Model cache
+
+model_cache = weakref.WeakValueDictionary()
+model_cache_lock = threading.Lock()
+
+def LookupCreateModel(model_name, create_fn):
+	model_cache_lock.acquire()
+	try:
+		model = model_cache[model_name]
+	except:
+		model = None
+	if model is None:
+		model = create_fn()
+		model_cache[model_name] = model
+	model_cache_lock.release()
+	return model
+
+# Find bar
+
+class FindBar():
+
+	def __init__(self, parent, finder, is_reg_expr=False):
+		self.finder = finder
+		self.context = []
+		self.last_value = None
+		self.last_pattern = None
+
+		label = QLabel("Find:")
+		label.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+		self.textbox = QComboBox()
+		self.textbox.setEditable(True)
+		self.textbox.currentIndexChanged.connect(self.ValueChanged)
+
+		self.progress = QProgressBar()
+		self.progress.setRange(0, 0)
+		self.progress.hide()
+
+		if is_reg_expr:
+			self.pattern = QCheckBox("Regular Expression")
+		else:
+			self.pattern = QCheckBox("Pattern")
+		self.pattern.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+		self.next_button = QToolButton()
+		self.next_button.setIcon(parent.style().standardIcon(QStyle.SP_ArrowDown))
+		self.next_button.released.connect(lambda: self.NextPrev(1))
+
+		self.prev_button = QToolButton()
+		self.prev_button.setIcon(parent.style().standardIcon(QStyle.SP_ArrowUp))
+		self.prev_button.released.connect(lambda: self.NextPrev(-1))
+
+		self.close_button = QToolButton()
+		self.close_button.setIcon(parent.style().standardIcon(QStyle.SP_DockWidgetCloseButton))
+		self.close_button.released.connect(self.Deactivate)
+
+		self.hbox = QHBoxLayout()
+		self.hbox.setContentsMargins(0, 0, 0, 0)
+
+		self.hbox.addWidget(label)
+		self.hbox.addWidget(self.textbox)
+		self.hbox.addWidget(self.progress)
+		self.hbox.addWidget(self.pattern)
+		self.hbox.addWidget(self.next_button)
+		self.hbox.addWidget(self.prev_button)
+		self.hbox.addWidget(self.close_button)
+
+		self.bar = QWidget()
+		self.bar.setLayout(self.hbox);
+		self.bar.hide()
+
+	def Widget(self):
+		return self.bar
+
+	def Activate(self):
+		self.bar.show()
+		self.textbox.setFocus()
+
+	def Deactivate(self):
+		self.bar.hide()
+
+	def Busy(self):
+		self.textbox.setEnabled(False)
+		self.pattern.hide()
+		self.next_button.hide()
+		self.prev_button.hide()
+		self.progress.show()
+
+	def Idle(self):
+		self.textbox.setEnabled(True)
+		self.progress.hide()
+		self.pattern.show()
+		self.next_button.show()
+		self.prev_button.show()
+
+	def Find(self, direction):
+		value = self.textbox.currentText()
+		pattern = self.pattern.isChecked()
+		self.last_value = value
+		self.last_pattern = pattern
+		self.finder.Find(value, direction, pattern, self.context)
+
+	def ValueChanged(self):
+		value = self.textbox.currentText()
+		pattern = self.pattern.isChecked()
+		index = self.textbox.currentIndex()
+		data = self.textbox.itemData(index)
+		# Store the pattern in the combo box to keep it with the text value
+		if data == None:
+			self.textbox.setItemData(index, pattern)
+		else:
+			self.pattern.setChecked(data)
+		self.Find(0)
+
+	def NextPrev(self, direction):
+		value = self.textbox.currentText()
+		pattern = self.pattern.isChecked()
+		if value != self.last_value:
+			index = self.textbox.findText(value)
+			# Allow for a button press before the value has been added to the combo box
+			if index < 0:
+				index = self.textbox.count()
+				self.textbox.addItem(value, pattern)
+				self.textbox.setCurrentIndex(index)
+				return
+			else:
+				self.textbox.setItemData(index, pattern)
+		elif pattern != self.last_pattern:
+			# Keep the pattern recorded in the combo box up to date
+			index = self.textbox.currentIndex()
+			self.textbox.setItemData(index, pattern)
+		self.Find(direction)
+
+	def NotFound(self):
+		QMessageBox.information(self.bar, "Find", "'" + self.textbox.currentText() + "' not found")
+
+# Context-sensitive call graph data model item base
+
+class CallGraphLevelItemBase(object):
+
+	def __init__(self, glb, row, parent_item):
+		self.glb = glb
+		self.row = row
+		self.parent_item = parent_item
+		self.query_done = False;
+		self.child_count = 0
+		self.child_items = []
+
+	def getChildItem(self, row):
+		return self.child_items[row]
+
+	def getParentItem(self):
+		return self.parent_item
+
+	def getRow(self):
+		return self.row
+
+	def childCount(self):
+		if not self.query_done:
+			self.Select()
+			if not self.child_count:
+				return -1
+		return self.child_count
+
+	def hasChildren(self):
+		if not self.query_done:
+			return True
+		return self.child_count > 0
+
+	def getData(self, column):
+		return self.data[column]
+
+# Context-sensitive call graph data model level 2+ item base
+
+class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase):
+
+	def __init__(self, glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item):
+		super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, row, parent_item)
+		self.comm_id = comm_id
+		self.thread_id = thread_id
+		self.call_path_id = call_path_id
+		self.branch_count = branch_count
+		self.time = time
+
+	def Select(self):
+		self.query_done = True;
+		query = QSqlQuery(self.glb.db)
+		QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time), SUM(branch_count)"
+					" FROM calls"
+					" INNER JOIN call_paths ON calls.call_path_id = call_paths.id"
+					" INNER JOIN symbols ON call_paths.symbol_id = symbols.id"
+					" INNER JOIN dsos ON symbols.dso_id = dsos.id"
+					" WHERE parent_call_path_id = " + str(self.call_path_id) +
+					" AND comm_id = " + str(self.comm_id) +
+					" AND thread_id = " + str(self.thread_id) +
+					" GROUP BY call_path_id, name, short_name"
+					" ORDER BY call_path_id")
+		while query.next():
+			child_item = CallGraphLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self)
+			self.child_items.append(child_item)
+			self.child_count += 1
+
+# Context-sensitive call graph data model level three item
+
+class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase):
+
+	def __init__(self, glb, row, comm_id, thread_id, call_path_id, name, dso, count, time, branch_count, parent_item):
+		super(CallGraphLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item)
+		dso = dsoname(dso)
+		self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ]
+		self.dbid = call_path_id
+
+# Context-sensitive call graph data model level two item
+
+class CallGraphLevelTwoItem(CallGraphLevelTwoPlusItemBase):
+
+	def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item):
+		super(CallGraphLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 1, 0, 0, parent_item)
+		self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""]
+		self.dbid = thread_id
+
+	def Select(self):
+		super(CallGraphLevelTwoItem, self).Select()
+		for child_item in self.child_items:
+			self.time += child_item.time
+			self.branch_count += child_item.branch_count
+		for child_item in self.child_items:
+			child_item.data[4] = PercentToOneDP(child_item.time, self.time)
+			child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count)
+
+# Context-sensitive call graph data model level one item
+
+class CallGraphLevelOneItem(CallGraphLevelItemBase):
+
+	def __init__(self, glb, row, comm_id, comm, parent_item):
+		super(CallGraphLevelOneItem, self).__init__(glb, row, parent_item)
+		self.data = [comm, "", "", "", "", "", ""]
+		self.dbid = comm_id
+
+	def Select(self):
+		self.query_done = True;
+		query = QSqlQuery(self.glb.db)
+		QueryExec(query, "SELECT thread_id, pid, tid"
+					" FROM comm_threads"
+					" INNER JOIN threads ON thread_id = threads.id"
+					" WHERE comm_id = " + str(self.dbid))
+		while query.next():
+			child_item = CallGraphLevelTwoItem(self.glb, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self)
+			self.child_items.append(child_item)
+			self.child_count += 1
+
+# Context-sensitive call graph data model root item
+
+class CallGraphRootItem(CallGraphLevelItemBase):
+
+	def __init__(self, glb):
+		super(CallGraphRootItem, self).__init__(glb, 0, None)
+		self.dbid = 0
+		self.query_done = True;
+		query = QSqlQuery(glb.db)
+		QueryExec(query, "SELECT id, comm FROM comms")
+		while query.next():
+			if not query.value(0):
+				continue
+			child_item = CallGraphLevelOneItem(glb, self.child_count, query.value(0), query.value(1), self)
+			self.child_items.append(child_item)
+			self.child_count += 1
+
+# Context-sensitive call graph data model
+
+class CallGraphModel(TreeModel):
+
+	def __init__(self, glb, parent=None):
+		super(CallGraphModel, self).__init__(CallGraphRootItem(glb), parent)
+		self.glb = glb
+
+	def columnCount(self, parent=None):
+		return 7
+
+	def columnHeader(self, column):
+		headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
+		return headers[column]
+
+	def columnAlignment(self, column):
+		alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ]
+		return alignment[column]
+
+	def FindSelect(self, value, pattern, query):
+		if pattern:
+			# postgresql and sqlite pattern patching differences:
+			#   postgresql LIKE is case sensitive but sqlite LIKE is not
+			#   postgresql LIKE allows % and _ to be escaped with \ but sqlite LIKE does not
+			#   postgresql supports ILIKE which is case insensitive
+			#   sqlite supports GLOB (text only) which uses * and ? and is case sensitive
+			if not self.glb.dbref.is_sqlite3:
+				# Escape % and _
+				s = value.replace("%", "\%")
+				s = s.replace("_", "\_")
+				# Translate * and ? into SQL LIKE pattern characters % and _
+				trans = string.maketrans("*?", "%_")
+				match = " LIKE '" + str(s).translate(trans) + "'"
+			else:
+				match = " GLOB '" + str(value) + "'"
+		else:
+			match = " = '" + str(value) + "'"
+		QueryExec(query, "SELECT call_path_id, comm_id, thread_id"
+						" FROM calls"
+						" INNER JOIN call_paths ON calls.call_path_id = call_paths.id"
+						" INNER JOIN symbols ON call_paths.symbol_id = symbols.id"
+						" WHERE symbols.name" + match +
+						" GROUP BY comm_id, thread_id, call_path_id"
+						" ORDER BY comm_id, thread_id, call_path_id")
+
+	def FindPath(self, query):
+		# Turn the query result into a list of ids that the tree view can walk
+		# to open the tree at the right place.
+		ids = []
+		parent_id = query.value(0)
+		while parent_id:
+			ids.insert(0, parent_id)
+			q2 = QSqlQuery(self.glb.db)
+			QueryExec(q2, "SELECT parent_id"
+					" FROM call_paths"
+					" WHERE id = " + str(parent_id))
+			if not q2.next():
+				break
+			parent_id = q2.value(0)
+		# The call path root is not used
+		if ids[0] == 1:
+			del ids[0]
+		ids.insert(0, query.value(2))
+		ids.insert(0, query.value(1))
+		return ids
+
+	def Found(self, query, found):
+		if found:
+			return self.FindPath(query)
+		return []
+
+	def FindValue(self, value, pattern, query, last_value, last_pattern):
+		if last_value == value and pattern == last_pattern:
+			found = query.first()
+		else:
+			self.FindSelect(value, pattern, query)
+			found = query.next()
+		return self.Found(query, found)
+
+	def FindNext(self, query):
+		found = query.next()
+		if not found:
+			found = query.first()
+		return self.Found(query, found)
+
+	def FindPrev(self, query):
+		found = query.previous()
+		if not found:
+			found = query.last()
+		return self.Found(query, found)
+
+	def FindThread(self, c):
+		if c.direction == 0 or c.value != c.last_value or c.pattern != c.last_pattern:
+			ids = self.FindValue(c.value, c.pattern, c.query, c.last_value, c.last_pattern)
+		elif c.direction > 0:
+			ids = self.FindNext(c.query)
+		else:
+			ids = self.FindPrev(c.query)
+		return (True, ids)
+
+	def Find(self, value, direction, pattern, context, callback):
+		class Context():
+			def __init__(self, *x):
+				self.value, self.direction, self.pattern, self.query, self.last_value, self.last_pattern = x
+			def Update(self, *x):
+				self.value, self.direction, self.pattern, self.last_value, self.last_pattern = x + (self.value, self.pattern)
+		if len(context):
+			context[0].Update(value, direction, pattern)
+		else:
+			context.append(Context(value, direction, pattern, QSqlQuery(self.glb.db), None, None))
+		# Use a thread so the UI is not blocked during the SELECT
+		thread = Thread(self.FindThread, context[0])
+		thread.done.connect(lambda ids, t=thread, c=callback: self.FindDone(t, c, ids), Qt.QueuedConnection)
+		thread.start()
+
+	def FindDone(self, thread, callback, ids):
+		callback(ids)
+
+# Vertical widget layout
+
+class VBox():
+
+	def __init__(self, w1, w2, w3=None):
+		self.vbox = QWidget()
+		self.vbox.setLayout(QVBoxLayout());
+
+		self.vbox.layout().setContentsMargins(0, 0, 0, 0)
+
+		self.vbox.layout().addWidget(w1)
+		self.vbox.layout().addWidget(w2)
+		if w3:
+			self.vbox.layout().addWidget(w3)
+
+	def Widget(self):
+		return self.vbox
+
+# Context-sensitive call graph window
+
+class CallGraphWindow(QMdiSubWindow):
+
+	def __init__(self, glb, parent=None):
+		super(CallGraphWindow, self).__init__(parent)
+
+		self.model = LookupCreateModel("Context-Sensitive Call Graph", lambda x=glb: CallGraphModel(x))
+
+		self.view = QTreeView()
+		self.view.setModel(self.model)
+
+		for c, w in ((0, 250), (1, 100), (2, 60), (3, 70), (4, 70), (5, 100)):
+			self.view.setColumnWidth(c, w)
+
+		self.find_bar = FindBar(self, self)
+
+		self.vbox = VBox(self.view, self.find_bar.Widget())
+
+		self.setWidget(self.vbox.Widget())
+
+		AddSubWindow(glb.mainwindow.mdi_area, self, "Context-Sensitive Call Graph")
+
+	def DisplayFound(self, ids):
+		if not len(ids):
+			return False
+		parent = QModelIndex()
+		for dbid in ids:
+			found = False
+			n = self.model.rowCount(parent)
+			for row in xrange(n):
+				child = self.model.index(row, 0, parent)
+				if child.internalPointer().dbid == dbid:
+					found = True
+					self.view.setCurrentIndex(child)
+					parent = child
+					break
+			if not found:
+				break
+		return found
+
+	def Find(self, value, direction, pattern, context):
+		self.view.setFocus()
+		self.find_bar.Busy()
+		self.model.Find(value, direction, pattern, context, self.FindDone)
+
+	def FindDone(self, ids):
+		found = True
+		if not self.DisplayFound(ids):
+			found = False
+		self.find_bar.Idle()
+		if not found:
+			self.find_bar.NotFound()
+
+# Child data item  finder
+
+class ChildDataItemFinder():
+
+	def __init__(self, root):
+		self.root = root
+		self.value, self.direction, self.pattern, self.last_value, self.last_pattern = (None,) * 5
+		self.rows = []
+		self.pos = 0
+
+	def FindSelect(self):
+		self.rows = []
+		if self.pattern:
+			pattern = re.compile(self.value)
+			for child in self.root.child_items:
+				for column_data in child.data:
+					if re.search(pattern, str(column_data)) is not None:
+						self.rows.append(child.row)
+						break
+		else:
+			for child in self.root.child_items:
+				for column_data in child.data:
+					if self.value in str(column_data):
+						self.rows.append(child.row)
+						break
+
+	def FindValue(self):
+		self.pos = 0
+		if self.last_value != self.value or self.pattern != self.last_pattern:
+			self.FindSelect()
+		if not len(self.rows):
+			return -1
+		return self.rows[self.pos]
+
+	def FindThread(self):
+		if self.direction == 0 or self.value != self.last_value or self.pattern != self.last_pattern:
+			row = self.FindValue()
+		elif len(self.rows):
+			if self.direction > 0:
+				self.pos += 1
+				if self.pos >= len(self.rows):
+					self.pos = 0
+			else:
+				self.pos -= 1
+				if self.pos < 0:
+					self.pos = len(self.rows) - 1
+			row = self.rows[self.pos]
+		else:
+			row = -1
+		return (True, row)
+
+	def Find(self, value, direction, pattern, context, callback):
+		self.value, self.direction, self.pattern, self.last_value, self.last_pattern = (value, direction,pattern, self.value, self.pattern)
+		# Use a thread so the UI is not blocked
+		thread = Thread(self.FindThread)
+		thread.done.connect(lambda row, t=thread, c=callback: self.FindDone(t, c, row), Qt.QueuedConnection)
+		thread.start()
+
+	def FindDone(self, thread, callback, row):
+		callback(row)
+
+# Number of database records to fetch in one go
+
+glb_chunk_sz = 10000
+
+# size of pickled integer big enough for record size
+
+glb_nsz = 8
+
+# Background process for SQL data fetcher
+
+class SQLFetcherProcess():
+
+	def __init__(self, dbref, sql, buffer, head, tail, fetch_count, fetching_done, process_target, wait_event, fetched_event, prep):
+		# Need a unique connection name
+		conn_name = "SQLFetcher" + str(os.getpid())
+		self.db, dbname = dbref.Open(conn_name)
+		self.sql = sql
+		self.buffer = buffer
+		self.head = head
+		self.tail = tail
+		self.fetch_count = fetch_count
+		self.fetching_done = fetching_done
+		self.process_target = process_target
+		self.wait_event = wait_event
+		self.fetched_event = fetched_event
+		self.prep = prep
+		self.query = QSqlQuery(self.db)
+		self.query_limit = 0 if "$$last_id$$" in sql else 2
+		self.last_id = -1
+		self.fetched = 0
+		self.more = True
+		self.local_head = self.head.value
+		self.local_tail = self.tail.value
+
+	def Select(self):
+		if self.query_limit:
+			if self.query_limit == 1:
+				return
+			self.query_limit -= 1
+		stmt = self.sql.replace("$$last_id$$", str(self.last_id))
+		QueryExec(self.query, stmt)
+
+	def Next(self):
+		if not self.query.next():
+			self.Select()
+			if not self.query.next():
+				return None
+		self.last_id = self.query.value(0)
+		return self.prep(self.query)
+
+	def WaitForTarget(self):
+		while True:
+			self.wait_event.clear()
+			target = self.process_target.value
+			if target > self.fetched or target < 0:
+				break
+			self.wait_event.wait()
+		return target
+
+	def HasSpace(self, sz):
+		if self.local_tail <= self.local_head:
+			space = len(self.buffer) - self.local_head
+			if space > sz:
+				return True
+			if space >= glb_nsz:
+				# Use 0 (or space < glb_nsz) to mean there is no more at the top of the buffer
+				nd = cPickle.dumps(0, cPickle.HIGHEST_PROTOCOL)
+				self.buffer[self.local_head : self.local_head + len(nd)] = nd
+			self.local_head = 0
+		if self.local_tail - self.local_head > sz:
+			return True
+		return False
+
+	def WaitForSpace(self, sz):
+		if self.HasSpace(sz):
+			return
+		while True:
+			self.wait_event.clear()
+			self.local_tail = self.tail.value
+			if self.HasSpace(sz):
+				return
+			self.wait_event.wait()
+
+	def AddToBuffer(self, obj):
+		d = cPickle.dumps(obj, cPickle.HIGHEST_PROTOCOL)
+		n = len(d)
+		nd = cPickle.dumps(n, cPickle.HIGHEST_PROTOCOL)
+		sz = n + glb_nsz
+		self.WaitForSpace(sz)
+		pos = self.local_head
+		self.buffer[pos : pos + len(nd)] = nd
+		self.buffer[pos + glb_nsz : pos + sz] = d
+		self.local_head += sz
+
+	def FetchBatch(self, batch_size):
+		fetched = 0
+		while batch_size > fetched:
+			obj = self.Next()
+			if obj is None:
+				self.more = False
+				break
+			self.AddToBuffer(obj)
+			fetched += 1
+		if fetched:
+			self.fetched += fetched
+			with self.fetch_count.get_lock():
+				self.fetch_count.value += fetched
+			self.head.value = self.local_head
+			self.fetched_event.set()
+
+	def Run(self):
+		while self.more:
+			target = self.WaitForTarget()
+			if target < 0:
+				break
+			batch_size = min(glb_chunk_sz, target - self.fetched)
+			self.FetchBatch(batch_size)
+		self.fetching_done.value = True
+		self.fetched_event.set()
+
+def SQLFetcherFn(*x):
+	process = SQLFetcherProcess(*x)
+	process.Run()
+
+# SQL data fetcher
+
+class SQLFetcher(QObject):
+
+	done = Signal(object)
+
+	def __init__(self, glb, sql, prep, process_data, parent=None):
+		super(SQLFetcher, self).__init__(parent)
+		self.process_data = process_data
+		self.more = True
+		self.target = 0
+		self.last_target = 0
+		self.fetched = 0
+		self.buffer_size = 16 * 1024 * 1024
+		self.buffer = Array(c_char, self.buffer_size, lock=False)
+		self.head = Value(c_longlong)
+		self.tail = Value(c_longlong)
+		self.local_tail = 0
+		self.fetch_count = Value(c_longlong)
+		self.fetching_done = Value(c_bool)
+		self.last_count = 0
+		self.process_target = Value(c_longlong)
+		self.wait_event = Event()
+		self.fetched_event = Event()
+		glb.AddInstanceToShutdownOnExit(self)
+		self.process = Process(target=SQLFetcherFn, args=(glb.dbref, sql, self.buffer, self.head, self.tail, self.fetch_count, self.fetching_done, self.process_target, self.wait_event, self.fetched_event, prep))
+		self.process.start()
+		self.thread = Thread(self.Thread)
+		self.thread.done.connect(self.ProcessData, Qt.QueuedConnection)
+		self.thread.start()
+
+	def Shutdown(self):
+		# Tell the thread and process to exit
+		self.process_target.value = -1
+		self.wait_event.set()
+		self.more = False
+		self.fetching_done.value = True
+		self.fetched_event.set()
+
+	def Thread(self):
+		if not self.more:
+			return True, 0
+		while True:
+			self.fetched_event.clear()
+			fetch_count = self.fetch_count.value
+			if fetch_count != self.last_count:
+				break
+			if self.fetching_done.value:
+				self.more = False
+				return True, 0
+			self.fetched_event.wait()
+		count = fetch_count - self.last_count
+		self.last_count = fetch_count
+		self.fetched += count
+		return False, count
+
+	def Fetch(self, nr):
+		if not self.more:
+			# -1 inidcates there are no more
+			return -1
+		result = self.fetched
+		extra = result + nr - self.target
+		if extra > 0:
+			self.target += extra
+			# process_target < 0 indicates shutting down
+			if self.process_target.value >= 0:
+				self.process_target.value = self.target
+			self.wait_event.set()
+		return result
+
+	def RemoveFromBuffer(self):
+		pos = self.local_tail
+		if len(self.buffer) - pos < glb_nsz:
+			pos = 0
+		n = cPickle.loads(self.buffer[pos : pos + glb_nsz])
+		if n == 0:
+			pos = 0
+			n = cPickle.loads(self.buffer[0 : glb_nsz])
+		pos += glb_nsz
+		obj = cPickle.loads(self.buffer[pos : pos + n])
+		self.local_tail = pos + n
+		return obj
+
+	def ProcessData(self, count):
+		for i in xrange(count):
+			obj = self.RemoveFromBuffer()
+			self.process_data(obj)
+		self.tail.value = self.local_tail
+		self.wait_event.set()
+		self.done.emit(count)
+
+# Fetch more records bar
+
+class FetchMoreRecordsBar():
+
+	def __init__(self, model, parent):
+		self.model = model
+
+		self.label = QLabel("Number of records (x " + "{:,}".format(glb_chunk_sz) + ") to fetch:")
+		self.label.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+		self.fetch_count = QSpinBox()
+		self.fetch_count.setRange(1, 1000000)
+		self.fetch_count.setValue(10)
+		self.fetch_count.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+		self.fetch = QPushButton("Go!")
+		self.fetch.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+		self.fetch.released.connect(self.FetchMoreRecords)
+
+		self.progress = QProgressBar()
+		self.progress.setRange(0, 100)
+		self.progress.hide()
+
+		self.done_label = QLabel("All records fetched")
+		self.done_label.hide()
+
+		self.spacer = QLabel("")
+
+		self.close_button = QToolButton()
+		self.close_button.setIcon(parent.style().standardIcon(QStyle.SP_DockWidgetCloseButton))
+		self.close_button.released.connect(self.Deactivate)
+
+		self.hbox = QHBoxLayout()
+		self.hbox.setContentsMargins(0, 0, 0, 0)
+
+		self.hbox.addWidget(self.label)
+		self.hbox.addWidget(self.fetch_count)
+		self.hbox.addWidget(self.fetch)
+		self.hbox.addWidget(self.spacer)
+		self.hbox.addWidget(self.progress)
+		self.hbox.addWidget(self.done_label)
+		self.hbox.addWidget(self.close_button)
+
+		self.bar = QWidget()
+		self.bar.setLayout(self.hbox);
+		self.bar.show()
+
+		self.in_progress = False
+		self.model.progress.connect(self.Progress)
+
+		self.done = False
+
+		if not model.HasMoreRecords():
+			self.Done()
+
+	def Widget(self):
+		return self.bar
+
+	def Activate(self):
+		self.bar.show()
+		self.fetch.setFocus()
+
+	def Deactivate(self):
+		self.bar.hide()
+
+	def Enable(self, enable):
+		self.fetch.setEnabled(enable)
+		self.fetch_count.setEnabled(enable)
+
+	def Busy(self):
+		self.Enable(False)
+		self.fetch.hide()
+		self.spacer.hide()
+		self.progress.show()
+
+	def Idle(self):
+		self.in_progress = False
+		self.Enable(True)
+		self.progress.hide()
+		self.fetch.show()
+		self.spacer.show()
+
+	def Target(self):
+		return self.fetch_count.value() * glb_chunk_sz
+
+	def Done(self):
+		self.done = True
+		self.Idle()
+		self.label.hide()
+		self.fetch_count.hide()
+		self.fetch.hide()
+		self.spacer.hide()
+		self.done_label.show()
+
+	def Progress(self, count):
+		if self.in_progress:
+			if count:
+				percent = ((count - self.start) * 100) / self.Target()
+				if percent >= 100:
+					self.Idle()
+				else:
+					self.progress.setValue(percent)
+		if not count:
+			# Count value of zero means no more records
+			self.Done()
+
+	def FetchMoreRecords(self):
+		if self.done:
+			return
+		self.progress.setValue(0)
+		self.Busy()
+		self.in_progress = True
+		self.start = self.model.FetchMoreRecords(self.Target())
+
+# Brance data model level two item
+
+class BranchLevelTwoItem():
+
+	def __init__(self, row, text, parent_item):
+		self.row = row
+		self.parent_item = parent_item
+		self.data = [""] * 8
+		self.data[7] = text
+		self.level = 2
+
+	def getParentItem(self):
+		return self.parent_item
+
+	def getRow(self):
+		return self.row
+
+	def childCount(self):
+		return 0
+
+	def hasChildren(self):
+		return False
+
+	def getData(self, column):
+		return self.data[column]
+
+# Brance data model level one item
+
+class BranchLevelOneItem():
+
+	def __init__(self, glb, row, data, parent_item):
+		self.glb = glb
+		self.row = row
+		self.parent_item = parent_item
+		self.child_count = 0
+		self.child_items = []
+		self.data = data[1:]
+		self.dbid = data[0]
+		self.level = 1
+		self.query_done = False
+
+	def getChildItem(self, row):
+		return self.child_items[row]
+
+	def getParentItem(self):
+		return self.parent_item
+
+	def getRow(self):
+		return self.row
+
+	def Select(self):
+		self.query_done = True
+
+		if not self.glb.have_disassembler:
+			return
+
+		query = QSqlQuery(self.glb.db)
+
+		QueryExec(query, "SELECT cpu, to_dso_id, to_symbol_id, to_sym_offset, short_name, long_name, build_id, sym_start, to_ip"
+				  " FROM samples"
+				  " INNER JOIN dsos ON samples.to_dso_id = dsos.id"
+				  " INNER JOIN symbols ON samples.to_symbol_id = symbols.id"
+				  " WHERE samples.id = " + str(self.dbid))
+		if not query.next():
+			return
+		cpu = query.value(0)
+		dso = query.value(1)
+		sym = query.value(2)
+		if dso == 0 or sym == 0:
+			return
+		off = query.value(3)
+		short_name = query.value(4)
+		long_name = query.value(5)
+		build_id = query.value(6)
+		sym_start = query.value(7)
+		ip = query.value(8)
+
+		QueryExec(query, "SELECT samples.dso_id, symbol_id, sym_offset, sym_start"
+				  " FROM samples"
+				  " INNER JOIN symbols ON samples.symbol_id = symbols.id"
+				  " WHERE samples.id > " + str(self.dbid) + " AND cpu = " + str(cpu) +
+				  " ORDER BY samples.id"
+				  " LIMIT 1")
+		if not query.next():
+			return
+		if query.value(0) != dso:
+			# Cannot disassemble from one dso to another
+			return
+		bsym = query.value(1)
+		boff = query.value(2)
+		bsym_start = query.value(3)
+		if bsym == 0:
+			return
+		tot = bsym_start + boff + 1 - sym_start - off
+		if tot <= 0 or tot > 16384:
+			return
+
+		inst = self.glb.disassembler.Instruction()
+		f = self.glb.FileFromNamesAndBuildId(short_name, long_name, build_id)
+		if not f:
+			return
+		mode = 0 if Is64Bit(f) else 1
+		self.glb.disassembler.SetMode(inst, mode)
+
+		buf_sz = tot + 16
+		buf = create_string_buffer(tot + 16)
+		f.seek(sym_start + off)
+		buf.value = f.read(buf_sz)
+		buf_ptr = addressof(buf)
+		i = 0
+		while tot > 0:
+			cnt, text = self.glb.disassembler.DisassembleOne(inst, buf_ptr, buf_sz, ip)
+			if cnt:
+				byte_str = tohex(ip).rjust(16)
+				for k in xrange(cnt):
+					byte_str += " %02x" % ord(buf[i])
+					i += 1
+				while k < 15:
+					byte_str += "   "
+					k += 1
+				self.child_items.append(BranchLevelTwoItem(0, byte_str + " " + text, self))
+				self.child_count += 1
+			else:
+				return
+			buf_ptr += cnt
+			tot -= cnt
+			buf_sz -= cnt
+			ip += cnt
+
+	def childCount(self):
+		if not self.query_done:
+			self.Select()
+			if not self.child_count:
+				return -1
+		return self.child_count
+
+	def hasChildren(self):
+		if not self.query_done:
+			return True
+		return self.child_count > 0
+
+	def getData(self, column):
+		return self.data[column]
+
+# Brance data model root item
+
+class BranchRootItem():
+
+	def __init__(self):
+		self.child_count = 0
+		self.child_items = []
+		self.level = 0
+
+	def getChildItem(self, row):
+		return self.child_items[row]
+
+	def getParentItem(self):
+		return None
+
+	def getRow(self):
+		return 0
+
+	def childCount(self):
+		return self.child_count
+
+	def hasChildren(self):
+		return self.child_count > 0
+
+	def getData(self, column):
+		return ""
+
+# Branch data preparation
+
+def BranchDataPrep(query):
+	data = []
+	for i in xrange(0, 8):
+		data.append(query.value(i))
+	data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) +
+			" (" + dsoname(query.value(11)) + ")" + " -> " +
+			tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) +
+			" (" + dsoname(query.value(15)) + ")")
+	return data
+
+# Branch data model
+
+class BranchModel(TreeModel):
+
+	progress = Signal(object)
+
+	def __init__(self, glb, event_id, where_clause, parent=None):
+		super(BranchModel, self).__init__(BranchRootItem(), parent)
+		self.glb = glb
+		self.event_id = event_id
+		self.more = True
+		self.populated = 0
+		sql = ("SELECT samples.id, time, cpu, comm, pid, tid, branch_types.name,"
+			" CASE WHEN in_tx = '0' THEN 'No' ELSE 'Yes' END,"
+			" ip, symbols.name, sym_offset, dsos.short_name,"
+			" to_ip, to_symbols.name, to_sym_offset, to_dsos.short_name"
+			" FROM samples"
+			" INNER JOIN comms ON comm_id = comms.id"
+			" INNER JOIN threads ON thread_id = threads.id"
+			" INNER JOIN branch_types ON branch_type = branch_types.id"
+			" INNER JOIN symbols ON symbol_id = symbols.id"
+			" INNER JOIN symbols to_symbols ON to_symbol_id = to_symbols.id"
+			" INNER JOIN dsos ON samples.dso_id = dsos.id"
+			" INNER JOIN dsos AS to_dsos ON samples.to_dso_id = to_dsos.id"
+			" WHERE samples.id > $$last_id$$" + where_clause +
+			" AND evsel_id = " + str(self.event_id) +
+			" ORDER BY samples.id"
+			" LIMIT " + str(glb_chunk_sz))
+		self.fetcher = SQLFetcher(glb, sql, BranchDataPrep, self.AddSample)
+		self.fetcher.done.connect(self.Update)
+		self.fetcher.Fetch(glb_chunk_sz)
+
+	def columnCount(self, parent=None):
+		return 8
+
+	def columnHeader(self, column):
+		return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Branch")[column]
+
+	def columnFont(self, column):
+		if column != 7:
+			return None
+		return QFont("Monospace")
+
+	def DisplayData(self, item, index):
+		if item.level == 1:
+			self.FetchIfNeeded(item.row)
+		return item.getData(index.column())
+
+	def AddSample(self, data):
+		child = BranchLevelOneItem(self.glb, self.populated, data, self.root)
+		self.root.child_items.append(child)
+		self.populated += 1
+
+	def Update(self, fetched):
+		if not fetched:
+			self.more = False
+			self.progress.emit(0)
+		child_count = self.root.child_count
+		count = self.populated - child_count
+		if count > 0:
+			parent = QModelIndex()
+			self.beginInsertRows(parent, child_count, child_count + count - 1)
+			self.insertRows(child_count, count, parent)
+			self.root.child_count += count
+			self.endInsertRows()
+			self.progress.emit(self.root.child_count)
+
+	def FetchMoreRecords(self, count):
+		current = self.root.child_count
+		if self.more:
+			self.fetcher.Fetch(count)
+		else:
+			self.progress.emit(0)
+		return current
+
+	def HasMoreRecords(self):
+		return self.more
+
+# Branch window
+
+class BranchWindow(QMdiSubWindow):
+
+	def __init__(self, glb, event_id, name, where_clause, parent=None):
+		super(BranchWindow, self).__init__(parent)
+
+		model_name = "Branch Events " + str(event_id)
+		if len(where_clause):
+			model_name = where_clause + " " + model_name
+
+		self.model = LookupCreateModel(model_name, lambda: BranchModel(glb, event_id, where_clause))
+
+		self.view = QTreeView()
+		self.view.setUniformRowHeights(True)
+		self.view.setModel(self.model)
+
+		self.ResizeColumnsToContents()
+
+		self.find_bar = FindBar(self, self, True)
+
+		self.finder = ChildDataItemFinder(self.model.root)
+
+		self.fetch_bar = FetchMoreRecordsBar(self.model, self)
+
+		self.vbox = VBox(self.view, self.find_bar.Widget(), self.fetch_bar.Widget())
+
+		self.setWidget(self.vbox.Widget())
+
+		AddSubWindow(glb.mainwindow.mdi_area, self, name + " Branch Events")
+
+	def ResizeColumnToContents(self, column, n):
+		# Using the view's resizeColumnToContents() here is extrememly slow
+		# so implement a crude alternative
+		mm = "MM" if column else "MMMM"
+		font = self.view.font()
+		metrics = QFontMetrics(font)
+		max = 0
+		for row in xrange(n):
+			val = self.model.root.child_items[row].data[column]
+			len = metrics.width(str(val) + mm)
+			max = len if len > max else max
+		val = self.model.columnHeader(column)
+		len = metrics.width(str(val) + mm)
+		max = len if len > max else max
+		self.view.setColumnWidth(column, max)
+
+	def ResizeColumnsToContents(self):
+		n = min(self.model.root.child_count, 100)
+		if n < 1:
+			# No data yet, so connect a signal to notify when there is
+			self.model.rowsInserted.connect(self.UpdateColumnWidths)
+			return
+		columns = self.model.columnCount()
+		for i in xrange(columns):
+			self.ResizeColumnToContents(i, n)
+
+	def UpdateColumnWidths(self, *x):
+		# This only needs to be done once, so disconnect the signal now
+		self.model.rowsInserted.disconnect(self.UpdateColumnWidths)
+		self.ResizeColumnsToContents()
+
+	def Find(self, value, direction, pattern, context):
+		self.view.setFocus()
+		self.find_bar.Busy()
+		self.finder.Find(value, direction, pattern, context, self.FindDone)
+
+	def FindDone(self, row):
+		self.find_bar.Idle()
+		if row >= 0:
+			self.view.setCurrentIndex(self.model.index(row, 0, QModelIndex()))
+		else:
+			self.find_bar.NotFound()
+
+# Dialog data item converted and validated using a SQL table
+
+class SQLTableDialogDataItem():
+
+	def __init__(self, glb, label, placeholder_text, table_name, match_column, column_name1, column_name2, parent):
+		self.glb = glb
+		self.label = label
+		self.placeholder_text = placeholder_text
+		self.table_name = table_name
+		self.match_column = match_column
+		self.column_name1 = column_name1
+		self.column_name2 = column_name2
+		self.parent = parent
+
+		self.value = ""
+
+		self.widget = QLineEdit()
+		self.widget.editingFinished.connect(self.Validate)
+		self.widget.textChanged.connect(self.Invalidate)
+		self.red = False
+		self.error = ""
+		self.validated = True
+
+		self.last_id = 0
+		self.first_time = 0
+		self.last_time = 2 ** 64
+		if self.table_name == "<timeranges>":
+			query = QSqlQuery(self.glb.db)
+			QueryExec(query, "SELECT id, time FROM samples ORDER BY id DESC LIMIT 1")
+			if query.next():
+				self.last_id = int(query.value(0))
+				self.last_time = int(query.value(1))
+			QueryExec(query, "SELECT time FROM samples WHERE time != 0 ORDER BY id LIMIT 1")
+			if query.next():
+				self.first_time = int(query.value(0))
+			if placeholder_text:
+				placeholder_text += ", between " + str(self.first_time) + " and " + str(self.last_time)
+
+		if placeholder_text:
+			self.widget.setPlaceholderText(placeholder_text)
+
+	def ValueToIds(self, value):
+		ids = []
+		query = QSqlQuery(self.glb.db)
+		stmt = "SELECT id FROM " + self.table_name + " WHERE " + self.match_column + " = '" + value + "'"
+		ret = query.exec_(stmt)
+		if ret:
+			while query.next():
+				ids.append(str(query.value(0)))
+		return ids
+
+	def IdBetween(self, query, lower_id, higher_id, order):
+		QueryExec(query, "SELECT id FROM samples WHERE id > " + str(lower_id) + " AND id < " + str(higher_id) + " ORDER BY id " + order + " LIMIT 1")
+		if query.next():
+			return True, int(query.value(0))
+		else:
+			return False, 0
+
+	def BinarySearchTime(self, lower_id, higher_id, target_time, get_floor):
+		query = QSqlQuery(self.glb.db)
+		while True:
+			next_id = int((lower_id + higher_id) / 2)
+			QueryExec(query, "SELECT time FROM samples WHERE id = " + str(next_id))
+			if not query.next():
+				ok, dbid = self.IdBetween(query, lower_id, next_id, "DESC")
+				if not ok:
+					ok, dbid = self.IdBetween(query, next_id, higher_id, "")
+					if not ok:
+						return str(higher_id)
+				next_id = dbid
+				QueryExec(query, "SELECT time FROM samples WHERE id = " + str(next_id))
+			next_time = int(query.value(0))
+			if get_floor:
+				if target_time > next_time:
+					lower_id = next_id
+				else:
+					higher_id = next_id
+				if higher_id <= lower_id + 1:
+					return str(higher_id)
+			else:
+				if target_time >= next_time:
+					lower_id = next_id
+				else:
+					higher_id = next_id
+				if higher_id <= lower_id + 1:
+					return str(lower_id)
+
+	def ConvertRelativeTime(self, val):
+		print "val ", val
+		mult = 1
+		suffix = val[-2:]
+		if suffix == "ms":
+			mult = 1000000
+		elif suffix == "us":
+			mult = 1000
+		elif suffix == "ns":
+			mult = 1
+		else:
+			return val
+		val = val[:-2].strip()
+		if not self.IsNumber(val):
+			return val
+		val = int(val) * mult
+		if val >= 0:
+			val += self.first_time
+		else:
+			val += self.last_time
+		return str(val)
+
+	def ConvertTimeRange(self, vrange):
+		print "vrange ", vrange
+		if vrange[0] == "":
+			vrange[0] = str(self.first_time)
+		if vrange[1] == "":
+			vrange[1] = str(self.last_time)
+		vrange[0] = self.ConvertRelativeTime(vrange[0])
+		vrange[1] = self.ConvertRelativeTime(vrange[1])
+		print "vrange2 ", vrange
+		if not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]):
+			return False
+		print "ok1"
+		beg_range = max(int(vrange[0]), self.first_time)
+		end_range = min(int(vrange[1]), self.last_time)
+		if beg_range > self.last_time or end_range < self.first_time:
+			return False
+		print "ok2"
+		vrange[0] = self.BinarySearchTime(0, self.last_id, beg_range, True)
+		vrange[1] = self.BinarySearchTime(1, self.last_id + 1, end_range, False)
+		print "vrange3 ", vrange
+		return True
+
+	def AddTimeRange(self, value, ranges):
+		print "value ", value
+		n = value.count("-")
+		if n == 1:
+			pass
+		elif n == 2:
+			if value.split("-")[1].strip() == "":
+				n = 1
+		elif n == 3:
+			n = 2
+		else:
+			return False
+		pos = findnth(value, "-", n)
+		vrange = [value[:pos].strip() ,value[pos+1:].strip()]
+		if self.ConvertTimeRange(vrange):
+			ranges.append(vrange)
+			return True
+		return False
+
+	def InvalidValue(self, value):
+		self.value = ""
+		palette = QPalette()
+		palette.setColor(QPalette.Text,Qt.red)
+		self.widget.setPalette(palette)
+		self.red = True
+		self.error = self.label + " invalid value '" + value + "'"
+		self.parent.ShowMessage(self.error)
+
+	def IsNumber(self, value):
+		try:
+			x = int(value)
+		except:
+			x = 0
+		return str(x) == value
+
+	def Invalidate(self):
+		self.validated = False
+
+	def Validate(self):
+		input_string = self.widget.text()
+		self.validated = True
+		if self.red:
+			palette = QPalette()
+			self.widget.setPalette(palette)
+			self.red = False
+		if not len(input_string.strip()):
+			self.error = ""
+			self.value = ""
+			return
+		if self.table_name == "<timeranges>":
+			ranges = []
+			for value in [x.strip() for x in input_string.split(",")]:
+				if not self.AddTimeRange(value, ranges):
+					return self.InvalidValue(value)
+			ranges = [("(" + self.column_name1 + " >= " + r[0] + " AND " + self.column_name1 + " <= " + r[1] + ")") for r in ranges]
+			self.value = " OR ".join(ranges)
+		elif self.table_name == "<ranges>":
+			singles = []
+			ranges = []
+			for value in [x.strip() for x in input_string.split(",")]:
+				if "-" in value:
+					vrange = value.split("-")
+					if len(vrange) != 2 or not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]):
+						return self.InvalidValue(value)
+					ranges.append(vrange)
+				else:
+					if not self.IsNumber(value):
+						return self.InvalidValue(value)
+					singles.append(value)
+			ranges = [("(" + self.column_name1 + " >= " + r[0] + " AND " + self.column_name1 + " <= " + r[1] + ")") for r in ranges]
+			if len(singles):
+				ranges.append(self.column_name1 + " IN (" + ",".join(singles) + ")")
+			self.value = " OR ".join(ranges)
+		elif self.table_name:
+			all_ids = []
+			for value in [x.strip() for x in input_string.split(",")]:
+				ids = self.ValueToIds(value)
+				if len(ids):
+					all_ids.extend(ids)
+				else:
+					return self.InvalidValue(value)
+			self.value = self.column_name1 + " IN (" + ",".join(all_ids) + ")"
+			if self.column_name2:
+				self.value = "( " + self.value + " OR " + self.column_name2 + " IN (" + ",".join(all_ids) + ") )"
+		else:
+			self.value = input_string.strip()
+		self.error = ""
+		self.parent.ClearMessage()
+
+	def IsValid(self):
+		if not self.validated:
+			self.Validate()
+		if len(self.error):
+			self.parent.ShowMessage(self.error)
+			return False
+		return True
+
+# Selected branch report creation dialog
+
+class SelectedBranchDialog(QDialog):
+
+	def __init__(self, glb, parent=None):
+		super(SelectedBranchDialog, self).__init__(parent)
+
+		self.glb = glb
+
+		self.name = ""
+		self.where_clause = ""
+
+		self.setWindowTitle("Selected Branches")
+		self.setMinimumWidth(600)
+
+		items = (
+			("Report name:", "Enter a name to appear in the window title bar", "", "", "", ""),
+			("Time ranges:", "Enter time ranges", "<timeranges>", "", "samples.id", ""),
+			("CPUs:", "Enter CPUs or ranges e.g. 0,5-6", "<ranges>", "", "cpu", ""),
+			("Commands:", "Only branches with these commands will be included", "comms", "comm", "comm_id", ""),
+			("PIDs:", "Only branches with these process IDs will be included", "threads", "pid", "thread_id", ""),
+			("TIDs:", "Only branches with these thread IDs will be included", "threads", "tid", "thread_id", ""),
+			("DSOs:", "Only branches with these DSOs will be included", "dsos", "short_name", "samples.dso_id", "to_dso_id"),
+			("Symbols:", "Only branches with these symbols will be included", "symbols", "name", "symbol_id", "to_symbol_id"),
+			("Raw SQL clause: ", "Enter a raw SQL WHERE clause", "", "", "", ""),
+			)
+		self.data_items = [SQLTableDialogDataItem(glb, *x, parent=self) for x in items]
+
+		self.grid = QGridLayout()
+
+		for row in xrange(len(self.data_items)):
+			self.grid.addWidget(QLabel(self.data_items[row].label), row, 0)
+			self.grid.addWidget(self.data_items[row].widget, row, 1)
+
+		self.status = QLabel()
+
+		self.ok_button = QPushButton("Ok", self)
+		self.ok_button.setDefault(True)
+		self.ok_button.released.connect(self.Ok)
+		self.ok_button.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+		self.cancel_button = QPushButton("Cancel", self)
+		self.cancel_button.released.connect(self.reject)
+		self.cancel_button.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+		self.hbox = QHBoxLayout()
+		#self.hbox.addStretch()
+		self.hbox.addWidget(self.status)
+		self.hbox.addWidget(self.ok_button)
+		self.hbox.addWidget(self.cancel_button)
+
+		self.vbox = QVBoxLayout()
+		self.vbox.addLayout(self.grid)
+		self.vbox.addLayout(self.hbox)
+
+		self.setLayout(self.vbox);
+
+	def Ok(self):
+		self.name = self.data_items[0].value
+		if not self.name:
+			self.ShowMessage("Report name is required")
+			return
+		for d in self.data_items:
+			if not d.IsValid():
+				return
+		for d in self.data_items[1:]:
+			if len(d.value):
+				if len(self.where_clause):
+					self.where_clause += " AND "
+				self.where_clause += d.value
+		if len(self.where_clause):
+			self.where_clause = " AND ( " + self.where_clause + " ) "
+		else:
+			self.ShowMessage("No selection")
+			return
+		self.accept()
+
+	def ShowMessage(self, msg):
+		self.status.setText("<font color=#FF0000>" + msg)
+
+	def ClearMessage(self):
+		self.status.setText("")
+
+# Event list
+
+def GetEventList(db):
+	events = []
+	query = QSqlQuery(db)
+	QueryExec(query, "SELECT name FROM selected_events WHERE id > 0 ORDER BY id")
+	while query.next():
+		events.append(query.value(0))
+	return events
+
+# SQL data preparation
+
+def SQLTableDataPrep(query, count):
+	data = []
+	for i in xrange(count):
+		data.append(query.value(i))
+	return data
+
+# SQL table data model item
+
+class SQLTableItem():
+
+	def __init__(self, row, data):
+		self.row = row
+		self.data = data
+
+	def getData(self, column):
+		return self.data[column]
+
+# SQL table data model
+
+class SQLTableModel(TableModel):
+
+	progress = Signal(object)
+
+	def __init__(self, glb, sql, column_count, parent=None):
+		super(SQLTableModel, self).__init__(parent)
+		self.glb = glb
+		self.more = True
+		self.populated = 0
+		self.fetcher = SQLFetcher(glb, sql, lambda x, y=column_count: SQLTableDataPrep(x, y), self.AddSample)
+		self.fetcher.done.connect(self.Update)
+		self.fetcher.Fetch(glb_chunk_sz)
+
+	def DisplayData(self, item, index):
+		self.FetchIfNeeded(item.row)
+		return item.getData(index.column())
+
+	def AddSample(self, data):
+		child = SQLTableItem(self.populated, data)
+		self.child_items.append(child)
+		self.populated += 1
+
+	def Update(self, fetched):
+		if not fetched:
+			self.more = False
+			self.progress.emit(0)
+		child_count = self.child_count
+		count = self.populated - child_count
+		if count > 0:
+			parent = QModelIndex()
+			self.beginInsertRows(parent, child_count, child_count + count - 1)
+			self.insertRows(child_count, count, parent)
+			self.child_count += count
+			self.endInsertRows()
+			self.progress.emit(self.child_count)
+
+	def FetchMoreRecords(self, count):
+		current = self.child_count
+		if self.more:
+			self.fetcher.Fetch(count)
+		else:
+			self.progress.emit(0)
+		return current
+
+	def HasMoreRecords(self):
+		return self.more
+
+# SQL automatic table data model
+
+class SQLAutoTableModel(SQLTableModel):
+
+	def __init__(self, glb, table_name, parent=None):
+		sql = "SELECT * FROM " + table_name + " WHERE id > $$last_id$$ ORDER BY id LIMIT " + str(glb_chunk_sz)
+		if table_name == "comm_threads_view":
+			# For now, comm_threads_view has no id column
+			sql = "SELECT * FROM " + table_name + " WHERE comm_id > $$last_id$$ ORDER BY comm_id LIMIT " + str(glb_chunk_sz)
+		self.column_headers = []
+		query = QSqlQuery(glb.db)
+		if glb.dbref.is_sqlite3:
+			QueryExec(query, "PRAGMA table_info(" + table_name + ")")
+			while query.next():
+				self.column_headers.append(query.value(1))
+			if table_name == "sqlite_master":
+				sql = "SELECT * FROM " + table_name
+		else:
+			if table_name[:19] == "information_schema.":
+				sql = "SELECT * FROM " + table_name
+				select_table_name = table_name[19:]
+				schema = "information_schema"
+			else:
+				select_table_name = table_name
+				schema = "public"
+			QueryExec(query, "SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' and table_name = '" + select_table_name + "'")
+			while query.next():
+				self.column_headers.append(query.value(0))
+		super(SQLAutoTableModel, self).__init__(glb, sql, len(self.column_headers), parent)
+
+	def columnCount(self, parent=None):
+		return len(self.column_headers)
+
+	def columnHeader(self, column):
+		return self.column_headers[column]
+
+# Base class for custom ResizeColumnsToContents
+
+class ResizeColumnsToContentsBase(QObject):
+
+	def __init__(self, parent=None):
+		super(ResizeColumnsToContentsBase, self).__init__(parent)
+
+	def ResizeColumnToContents(self, column, n):
+		# Using the view's resizeColumnToContents() here is extrememly slow
+		# so implement a crude alternative
+		font = self.view.font()
+		metrics = QFontMetrics(font)
+		max = 0
+		for row in xrange(n):
+			val = self.data_model.child_items[row].data[column]
+			len = metrics.width(str(val) + "MM")
+			max = len if len > max else max
+		val = self.data_model.columnHeader(column)
+		len = metrics.width(str(val) + "MM")
+		max = len if len > max else max
+		self.view.setColumnWidth(column, max)
+
+	def ResizeColumnsToContents(self):
+		n = min(self.data_model.child_count, 100)
+		if n < 1:
+			# No data yet, so connect a signal to notify when there is
+			self.data_model.rowsInserted.connect(self.UpdateColumnWidths)
+			return
+		columns = self.data_model.columnCount()
+		for i in xrange(columns):
+			self.ResizeColumnToContents(i, n)
+
+	def UpdateColumnWidths(self, *x):
+		# This only needs to be done once, so disconnect the signal now
+		self.data_model.rowsInserted.disconnect(self.UpdateColumnWidths)
+		self.ResizeColumnsToContents()
+
+# Table window
+
+class TableWindow(QMdiSubWindow, ResizeColumnsToContentsBase):
+
+	def __init__(self, glb, table_name, parent=None):
+		super(TableWindow, self).__init__(parent)
+
+		self.data_model = LookupCreateModel(table_name + " Table", lambda: SQLAutoTableModel(glb, table_name))
+
+		self.model = QSortFilterProxyModel()
+		self.model.setSourceModel(self.data_model)
+
+		self.view = QTableView()
+		self.view.setModel(self.model)
+		self.view.setEditTriggers(QAbstractItemView.NoEditTriggers)
+		self.view.verticalHeader().setVisible(False)
+		self.view.sortByColumn(-1, Qt.AscendingOrder)
+		self.view.setSortingEnabled(True)
+
+		self.ResizeColumnsToContents()
+
+		self.find_bar = FindBar(self, self, True)
+
+		self.finder = ChildDataItemFinder(self.data_model)
+
+		self.fetch_bar = FetchMoreRecordsBar(self.data_model, self)
+
+		self.vbox = VBox(self.view, self.find_bar.Widget(), self.fetch_bar.Widget())
+
+		self.setWidget(self.vbox.Widget())
+
+		AddSubWindow(glb.mainwindow.mdi_area, self, table_name + " Table")
+
+	def Find(self, value, direction, pattern, context):
+		self.view.setFocus()
+		self.find_bar.Busy()
+		self.finder.Find(value, direction, pattern, context, self.FindDone)
+
+	def FindDone(self, row):
+		self.find_bar.Idle()
+		if row >= 0:
+			self.view.setCurrentIndex(self.model.mapFromSource(self.data_model.index(row, 0, QModelIndex())))
+		else:
+			self.find_bar.NotFound()
+
+# Table list
+
+def GetTableList(glb):
+	tables = []
+	query = QSqlQuery(glb.db)
+	if glb.dbref.is_sqlite3:
+		QueryExec(query, "SELECT name FROM sqlite_master WHERE type IN ( 'table' , 'view' ) ORDER BY name")
+	else:
+		QueryExec(query, "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type IN ( 'BASE TABLE' , 'VIEW' ) ORDER BY table_name")
+	while query.next():
+		tables.append(query.value(0))
+	if glb.dbref.is_sqlite3:
+		tables.append("sqlite_master")
+	else:
+		tables.append("information_schema.tables")
+		tables.append("information_schema.views")
+		tables.append("information_schema.columns")
+	return tables
+
+# Action Definition
+
+def CreateAction(label, tip, callback, parent=None, shortcut=None):
+	action = QAction(label, parent)
+	if shortcut != None:
+		action.setShortcuts(shortcut)
+	action.setStatusTip(tip)
+	action.triggered.connect(callback)
+	return action
+
+# Typical application actions
+
+def CreateExitAction(app, parent=None):
+	return CreateAction("&Quit", "Exit the application", app.closeAllWindows, parent, QKeySequence.Quit)
+
+# Typical MDI actions
+
+def CreateCloseActiveWindowAction(mdi_area):
+	return CreateAction("Cl&ose", "Close the active window", mdi_area.closeActiveSubWindow, mdi_area)
+
+def CreateCloseAllWindowsAction(mdi_area):
+	return CreateAction("Close &All", "Close all the windows", mdi_area.closeAllSubWindows, mdi_area)
+
+def CreateTileWindowsAction(mdi_area):
+	return CreateAction("&Tile", "Tile the windows", mdi_area.tileSubWindows, mdi_area)
+
+def CreateCascadeWindowsAction(mdi_area):
+	return CreateAction("&Cascade", "Cascade the windows", mdi_area.cascadeSubWindows, mdi_area)
+
+def CreateNextWindowAction(mdi_area):
+	return CreateAction("Ne&xt", "Move the focus to the next window", mdi_area.activateNextSubWindow, mdi_area, QKeySequence.NextChild)
+
+def CreatePreviousWindowAction(mdi_area):
+	return CreateAction("Pre&vious", "Move the focus to the previous window", mdi_area.activatePreviousSubWindow, mdi_area, QKeySequence.PreviousChild)
+
+# Typical MDI window menu
+
+class WindowMenu():
+
+	def __init__(self, mdi_area, menu):
+		self.mdi_area = mdi_area
+		self.window_menu = menu.addMenu("&Windows")
+		self.close_active_window = CreateCloseActiveWindowAction(mdi_area)
+		self.close_all_windows = CreateCloseAllWindowsAction(mdi_area)
+		self.tile_windows = CreateTileWindowsAction(mdi_area)
+		self.cascade_windows = CreateCascadeWindowsAction(mdi_area)
+		self.next_window = CreateNextWindowAction(mdi_area)
+		self.previous_window = CreatePreviousWindowAction(mdi_area)
+		self.window_menu.aboutToShow.connect(self.Update)
+
+	def Update(self):
+		self.window_menu.clear()
+		sub_window_count = len(self.mdi_area.subWindowList())
+		have_sub_windows = sub_window_count != 0
+		self.close_active_window.setEnabled(have_sub_windows)
+		self.close_all_windows.setEnabled(have_sub_windows)
+		self.tile_windows.setEnabled(have_sub_windows)
+		self.cascade_windows.setEnabled(have_sub_windows)
+		self.next_window.setEnabled(have_sub_windows)
+		self.previous_window.setEnabled(have_sub_windows)
+		self.window_menu.addAction(self.close_active_window)
+		self.window_menu.addAction(self.close_all_windows)
+		self.window_menu.addSeparator()
+		self.window_menu.addAction(self.tile_windows)
+		self.window_menu.addAction(self.cascade_windows)
+		self.window_menu.addSeparator()
+		self.window_menu.addAction(self.next_window)
+		self.window_menu.addAction(self.previous_window)
+		if sub_window_count == 0:
+			return
+		self.window_menu.addSeparator()
+		nr = 1
+		for sub_window in self.mdi_area.subWindowList():
+			label = str(nr) + " " + sub_window.name
+			if nr < 10:
+				label = "&" + label
+			action = self.window_menu.addAction(label)
+			action.setCheckable(True)
+			action.setChecked(sub_window == self.mdi_area.activeSubWindow())
+			action.triggered.connect(lambda x=nr: self.setActiveSubWindow(x))
+			self.window_menu.addAction(action)
+			nr += 1
+
+	def setActiveSubWindow(self, nr):
+		self.mdi_area.setActiveSubWindow(self.mdi_area.subWindowList()[nr - 1])
+
+# Help text
+
+glb_help_text = """
+<h1>Contents</h1>
+<style>
+p.c1 {
+    text-indent: 40px;
+}
+p.c2 {
+    text-indent: 80px;
+}
+}
+</style>
+<p class=c1><a href=#reports>1. Reports</a></p>
+<p class=c2><a href=#callgraph>1.1 Context-Sensitive Call Graph</a></p>
+<p class=c2><a href=#allbranches>1.2 All branches</a></p>
+<p class=c2><a href=#selectedbranches>1.3 Selected branches</a></p>
+<p class=c1><a href=#tables>2. Tables</a></p>
+<h1 id=reports>1. Reports</h1>
+<h2 id=callgraph>1.1 Context-Sensitive Call Graph</h2>
+The result is a GUI window with a tree representing a context-sensitive
+call-graph. Expanding a couple of levels of the tree and adjusting column
+widths to suit will display something like:
+<pre>
+                                         Call Graph: pt_example
+Call Path                          Object      Count   Time(ns)  Time(%)  Branch Count   Branch Count(%)
+v- ls
+    v- 2638:2638
+        v- _start                  ld-2.19.so    1     10074071   100.0         211135            100.0
+          |- unknown               unknown       1        13198     0.1              1              0.0
+          >- _dl_start             ld-2.19.so    1      1400980    13.9          19637              9.3
+          >- _d_linit_internal     ld-2.19.so    1       448152     4.4          11094              5.3
+          v-__libc_start_main@plt  ls            1      8211741    81.5         180397             85.4
+             >- _dl_fixup          ld-2.19.so    1         7607     0.1            108              0.1
+             >- __cxa_atexit       libc-2.19.so  1        11737     0.1             10              0.0
+             >- __libc_csu_init    ls            1        10354     0.1             10              0.0
+             |- _setjmp            libc-2.19.so  1            0     0.0              4              0.0
+             v- main               ls            1      8182043    99.6         180254             99.9
+</pre>
+<h3>Points to note:</h3>
+<ul>
+<li>The top level is a command name (comm)</li>
+<li>The next level is a thread (pid:tid)</li>
+<li>Subsequent levels are functions</li>
+<li>'Count' is the number of calls</li>
+<li>'Time' is the elapsed time until the function returns</li>
+<li>Percentages are relative to the level above</li>
+<li>'Branch Count' is the total number of branches for that function and all functions that it calls
+</ul>
+<h3>Find</h3>
+Ctrl-F displays a Find bar which finds function names by either an exact match or a pattern match.
+The pattern matching symbols are ? for any character and * for zero or more characters.
+<h2 id=allbranches>1.2 All branches</h2>
+The All branches report displays all branches in chronological order.
+Not all data is fetched immediately. More records can be fetched using the Fetch bar provided.
+<h3>Disassembly</h3>
+Open a branch to display disassembly. This only works if:
+<ol>
+<li>The disassembler is available. Currently, only Intel XED is supported - see <a href=#xed>Intel XED Setup</a></li>
+<li>The object code is available. Currently, only the perf build ID cache is searched for object code.
+The default directory ~/.debug can be overridden by setting environment variable PERF_BUILDID_DIR.
+One exception is kcore where the DSO long name is used (refer dsos_view on the Tables menu),
+or alternatively, set environment variable PERF_KCORE to the kcore file name.</li>
+</ol>
+<h4 id=xed>Intel XED Setup</h4>
+To use Intel XED, libxed.so must be present.  To build and install libxed.so:
+<pre>
+git clone https://github.com/intelxed/mbuild.git mbuild
+git clone https://github.com/intelxed/xed
+cd xed
+./mfile.py --share
+sudo ./mfile.py --prefix=/usr/local install
+sudo ldconfig
+</pre>
+<h3>Find</h3>
+Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match.
+Refer to Python documentation for the regular expression syntax.
+All columns are searched, but only currently fetched rows are searched.
+<h2 id=selectedbranches>1.3 Selected branches</h2>
+This is the same as the <a href=#allbranches>All branches</a> report but with the data reduced
+by various selection criteria. A dialog box displays available criteria which are AND'ed together.
+<h3>1.3.1 Time ranges</h3>
+The time ranges hint text shows the total time range. Relative time ranges can also be entered in
+ms, us or ns. Also, negative values are relative to the end of trace.  Examples:
+<pre>
+	81073085947329-81073085958238	From 81073085947329 to 81073085958238
+	100us-200us		From 100us to 200us
+	10ms-			From 10ms to the end
+	-100ns			The first 100ns
+	-10ms-			The last 10ms
+</pre>
+N.B. Due to the granularity of timestamps, there could be no branches in any given time range.
+<h1 id=tables>2. Tables</h1>
+The Tables menu shows all tables and views in the database. Most tables have an associated view
+which displays the information in a more friendly way. Not all data for large tables is fetched
+immediately. More records can be fetched using the Fetch bar provided. Columns can be sorted,
+but that can be slow for large tables.
+<p>There are also tables of database meta-information.
+For SQLite3 databases, the sqlite_master table is included.
+For PostgreSQL databases, information_schema.tables/views/columns are included.
+<h3>Find</h3>
+Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match.
+Refer to Python documentation for the regular expression syntax.
+All columns are searched, but only currently fetched rows are searched.
+<p>N.B. Results are found in id order, so if the table is re-ordered, find-next and find-previous
+will go to the next/previous result in id order, instead of display order.
+"""
+
+# Help window
+
+class HelpWindow(QMdiSubWindow):
+
+	def __init__(self, glb, parent=None):
+		super(HelpWindow, self).__init__(parent)
+
+		self.text = QTextBrowser()
+		self.text.setHtml(glb_help_text)
+		self.text.setReadOnly(True)
+		self.text.setOpenExternalLinks(True)
+
+		self.setWidget(self.text)
+
+		AddSubWindow(glb.mainwindow.mdi_area, self, "Exported SQL Viewer Help")
+
+# Main window that only displays the help text
+
+class HelpOnlyWindow(QMainWindow):
+
+	def __init__(self, parent=None):
+		super(HelpOnlyWindow, self).__init__(parent)
+
+		self.setMinimumSize(200, 100)
+		self.resize(800, 600)
+		self.setWindowTitle("Exported SQL Viewer Help")
+		self.setWindowIcon(self.style().standardIcon(QStyle.SP_MessageBoxInformation))
+
+		self.text = QTextBrowser()
+		self.text.setHtml(glb_help_text)
+		self.text.setReadOnly(True)
+		self.text.setOpenExternalLinks(True)
+
+		self.setCentralWidget(self.text)
+
+# Font resize
+
+def ResizeFont(widget, diff):
+	font = widget.font()
+	sz = font.pointSize()
+	font.setPointSize(sz + diff)
+	widget.setFont(font)
+
+def ShrinkFont(widget):
+	ResizeFont(widget, -1)
+
+def EnlargeFont(widget):
+	ResizeFont(widget, 1)
+
+# Unique name for sub-windows
+
+def NumberedWindowName(name, nr):
+	if nr > 1:
+		name += " <" + str(nr) + ">"
+	return name
+
+def UniqueSubWindowName(mdi_area, name):
+	nr = 1
+	while True:
+		unique_name = NumberedWindowName(name, nr)
+		ok = True
+		for sub_window in mdi_area.subWindowList():
+			if sub_window.name == unique_name:
+				ok = False
+				break
+		if ok:
+			return unique_name
+		nr += 1
+
+# Add a sub-window
+
+def AddSubWindow(mdi_area, sub_window, name):
+	unique_name = UniqueSubWindowName(mdi_area, name)
+	sub_window.setMinimumSize(200, 100)
+	sub_window.resize(800, 600)
+	sub_window.setWindowTitle(unique_name)
+	sub_window.setAttribute(Qt.WA_DeleteOnClose)
+	sub_window.setWindowIcon(sub_window.style().standardIcon(QStyle.SP_FileIcon))
+	sub_window.name = unique_name
+	mdi_area.addSubWindow(sub_window)
+	sub_window.show()
+
+# Main window
+
+class MainWindow(QMainWindow):
+
+	def __init__(self, glb, parent=None):
+		super(MainWindow, self).__init__(parent)
+
+		self.glb = glb
+
+		self.setWindowTitle("Exported SQL Viewer: " + glb.dbname)
+		self.setWindowIcon(self.style().standardIcon(QStyle.SP_ComputerIcon))
+		self.setMinimumSize(200, 100)
+
+		self.mdi_area = QMdiArea()
+		self.mdi_area.setHorizontalScrollBarPolicy(Qt.ScrollBarAsNeeded)
+		self.mdi_area.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded)
+
+		self.setCentralWidget(self.mdi_area)
+
+		menu = self.menuBar()
+
+		file_menu = menu.addMenu("&File")
+		file_menu.addAction(CreateExitAction(glb.app, self))
+
+		edit_menu = menu.addMenu("&Edit")
+		edit_menu.addAction(CreateAction("&Find...", "Find items", self.Find, self, QKeySequence.Find))
+		edit_menu.addAction(CreateAction("Fetch &more records...", "Fetch more records", self.FetchMoreRecords, self, [QKeySequence(Qt.Key_F8)]))
+		edit_menu.addAction(CreateAction("&Shrink Font", "Make text smaller", self.ShrinkFont, self, [QKeySequence("Ctrl+-")]))
+		edit_menu.addAction(CreateAction("&Enlarge Font", "Make text bigger", self.EnlargeFont, self, [QKeySequence("Ctrl++")]))
+
+		reports_menu = menu.addMenu("&Reports")
+		reports_menu.addAction(CreateAction("Context-Sensitive Call &Graph", "Create a new window containing a context-sensitive call graph", self.NewCallGraph, self))
+
+		self.EventMenu(GetEventList(glb.db), reports_menu)
+
+		self.TableMenu(GetTableList(glb), menu)
+
+		self.window_menu = WindowMenu(self.mdi_area, menu)
+
+		help_menu = menu.addMenu("&Help")
+		help_menu.addAction(CreateAction("&Exported SQL Viewer Help", "Helpful information", self.Help, self, QKeySequence.HelpContents))
+
+	def Find(self):
+		win = self.mdi_area.activeSubWindow()
+		if win:
+			try:
+				win.find_bar.Activate()
+			except:
+				pass
+
+	def FetchMoreRecords(self):
+		win = self.mdi_area.activeSubWindow()
+		if win:
+			try:
+				win.fetch_bar.Activate()
+			except:
+				pass
+
+	def ShrinkFont(self):
+		win = self.mdi_area.activeSubWindow()
+		ShrinkFont(win.view)
+
+	def EnlargeFont(self):
+		win = self.mdi_area.activeSubWindow()
+		EnlargeFont(win.view)
+
+	def EventMenu(self, events, reports_menu):
+		branches_events = 0
+		for event in events:
+			event = event.split(":")[0]
+			if event == "branches":
+				branches_events += 1
+		dbid = 0
+		for event in events:
+			dbid += 1
+			event = event.split(":")[0]
+			if event == "branches":
+				label = "All branches" if branches_events == 1 else "All branches " + "(id=" + dbid + ")"
+				reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewBranchView(x), self))
+				label = "Selected branches" if branches_events == 1 else "Selected branches " + "(id=" + dbid + ")"
+				reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewSelectedBranchView(x), self))
+
+	def TableMenu(self, tables, menu):
+		table_menu = menu.addMenu("&Tables")
+		for table in tables:
+			table_menu.addAction(CreateAction(table, "Create a new window containing a table view", lambda t=table: self.NewTableView(t), self))
+
+	def NewCallGraph(self):
+		CallGraphWindow(self.glb, self)
+
+	def NewBranchView(self, event_id):
+		BranchWindow(self.glb, event_id, "", "", self)
+
+	def NewSelectedBranchView(self, event_id):
+		dialog = SelectedBranchDialog(self.glb, self)
+		ret = dialog.exec_()
+		if ret:
+			BranchWindow(self.glb, event_id, dialog.name, dialog.where_clause, self)
+
+	def NewTableView(self, table_name):
+		TableWindow(self.glb, table_name, self)
+
+	def Help(self):
+		HelpWindow(self.glb, self)
+
+# XED Disassembler
+
+class xed_state_t(Structure):
+
+	_fields_ = [
+		("mode", c_int),
+		("width", c_int)
+	]
+
+class XEDInstruction():
+
+	def __init__(self, libxed):
+		# Current xed_decoded_inst_t structure is 192 bytes. Use 512 to allow for future expansion
+		xedd_t = c_byte * 512
+		self.xedd = xedd_t()
+		self.xedp = addressof(self.xedd)
+		libxed.xed_decoded_inst_zero(self.xedp)
+		self.state = xed_state_t()
+		self.statep = addressof(self.state)
+		# Buffer for disassembled instruction text
+		self.buffer = create_string_buffer(256)
+		self.bufferp = addressof(self.buffer)
+
+class LibXED():
+
+	def __init__(self):
+		try:
+			self.libxed = CDLL("libxed.so")
+		except:
+			self.libxed = None
+		if not self.libxed:
+			self.libxed = CDLL("/usr/local/lib/libxed.so")
+
+		self.xed_tables_init = self.libxed.xed_tables_init
+		self.xed_tables_init.restype = None
+		self.xed_tables_init.argtypes = []
+
+		self.xed_decoded_inst_zero = self.libxed.xed_decoded_inst_zero
+		self.xed_decoded_inst_zero.restype = None
+		self.xed_decoded_inst_zero.argtypes = [ c_void_p ]
+
+		self.xed_operand_values_set_mode = self.libxed.xed_operand_values_set_mode
+		self.xed_operand_values_set_mode.restype = None
+		self.xed_operand_values_set_mode.argtypes = [ c_void_p, c_void_p ]
+
+		self.xed_decoded_inst_zero_keep_mode = self.libxed.xed_decoded_inst_zero_keep_mode
+		self.xed_decoded_inst_zero_keep_mode.restype = None
+		self.xed_decoded_inst_zero_keep_mode.argtypes = [ c_void_p ]
+
+		self.xed_decode = self.libxed.xed_decode
+		self.xed_decode.restype = c_int
+		self.xed_decode.argtypes = [ c_void_p, c_void_p, c_uint ]
+
+		self.xed_format_context = self.libxed.xed_format_context
+		self.xed_format_context.restype = c_uint
+		self.xed_format_context.argtypes = [ c_int, c_void_p, c_void_p, c_int, c_ulonglong, c_void_p, c_void_p ]
+
+		self.xed_tables_init()
+
+	def Instruction(self):
+		return XEDInstruction(self)
+
+	def SetMode(self, inst, mode):
+		if mode:
+			inst.state.mode = 4 # 32-bit
+			inst.state.width = 4 # 4 bytes
+		else:
+			inst.state.mode = 1 # 64-bit
+			inst.state.width = 8 # 8 bytes
+		self.xed_operand_values_set_mode(inst.xedp, inst.statep)
+
+	def DisassembleOne(self, inst, bytes_ptr, bytes_cnt, ip):
+		self.xed_decoded_inst_zero_keep_mode(inst.xedp)
+		err = self.xed_decode(inst.xedp, bytes_ptr, bytes_cnt)
+		if err:
+			return 0, ""
+		# Use AT&T mode (2), alternative is Intel (3)
+		ok = self.xed_format_context(2, inst.xedp, inst.bufferp, sizeof(inst.buffer), ip, 0, 0)
+		if not ok:
+			return 0, ""
+		# Return instruction length and the disassembled instruction text
+		# For now, assume the length is in byte 166
+		return inst.xedd[166], inst.buffer.value
+
+def TryOpen(file_name):
+	try:
+		return open(file_name, "rb")
+	except:
+		return None
+
+def Is64Bit(f):
+	result = sizeof(c_void_p)
+	# ELF support only
+	pos = f.tell()
+	f.seek(0)
+	header = f.read(7)
+	f.seek(pos)
+	magic = header[0:4]
+	eclass = ord(header[4])
+	encoding = ord(header[5])
+	version = ord(header[6])
+	if magic == chr(127) + "ELF" and eclass > 0 and eclass < 3 and encoding > 0 and encoding < 3 and version == 1:
+		result = True if eclass == 2 else False
+	return result
+
+# Global data
+
+class Glb():
+
+	def __init__(self, dbref, db, dbname):
+		self.dbref = dbref
+		self.db = db
+		self.dbname = dbname
+		self.home_dir = os.path.expanduser("~")
+		self.buildid_dir = os.getenv("PERF_BUILDID_DIR")
+		if self.buildid_dir:
+			self.buildid_dir += "/.build-id/"
+		else:
+			self.buildid_dir = self.home_dir + "/.debug/.build-id/"
+		self.app = None
+		self.mainwindow = None
+		self.instances_to_shutdown_on_exit = weakref.WeakSet()
+		try:
+			self.disassembler = LibXED()
+			self.have_disassembler = True
+		except:
+			self.have_disassembler = False
+
+	def FileFromBuildId(self, build_id):
+		file_name = self.buildid_dir + build_id[0:2] + "/" + build_id[2:] + "/elf"
+		return TryOpen(file_name)
+
+	def FileFromNamesAndBuildId(self, short_name, long_name, build_id):
+		# Assume current machine i.e. no support for virtualization
+		if short_name[0:7] == "[kernel" and os.path.basename(long_name) == "kcore":
+			file_name = os.getenv("PERF_KCORE")
+			f = TryOpen(file_name) if file_name else None
+			if f:
+				return f
+			# For now, no special handling if long_name is /proc/kcore
+			f = TryOpen(long_name)
+			if f:
+				return f
+		f = self.FileFromBuildId(build_id)
+		if f:
+			return f
+		return None
+
+	def AddInstanceToShutdownOnExit(self, instance):
+		self.instances_to_shutdown_on_exit.add(instance)
+
+	# Shutdown any background processes or threads
+	def ShutdownInstances(self):
+		for x in self.instances_to_shutdown_on_exit:
+			try:
+				x.Shutdown()
+			except:
+				pass
+
+# Database reference
+
+class DBRef():
+
+	def __init__(self, is_sqlite3, dbname):
+		self.is_sqlite3 = is_sqlite3
+		self.dbname = dbname
+
+	def Open(self, connection_name):
+		dbname = self.dbname
+		if self.is_sqlite3:
+			db = QSqlDatabase.addDatabase("QSQLITE", connection_name)
+		else:
+			db = QSqlDatabase.addDatabase("QPSQL", connection_name)
+			opts = dbname.split()
+			for opt in opts:
+				if "=" in opt:
+					opt = opt.split("=")
+					if opt[0] == "hostname":
+						db.setHostName(opt[1])
+					elif opt[0] == "port":
+						db.setPort(int(opt[1]))
+					elif opt[0] == "username":
+						db.setUserName(opt[1])
+					elif opt[0] == "password":
+						db.setPassword(opt[1])
+					elif opt[0] == "dbname":
+						dbname = opt[1]
+				else:
+					dbname = opt
+
+		db.setDatabaseName(dbname)
+		if not db.open():
+			raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text())
+		return db, dbname
+
+# Main
+
+def Main():
+	if (len(sys.argv) < 2):
+		print >> sys.stderr, "Usage is: exported-sql-viewer.py {<database name> | --help-only}"
+		raise Exception("Too few arguments")
+
+	dbname = sys.argv[1]
+	if dbname == "--help-only":
+		app = QApplication(sys.argv)
+		mainwindow = HelpOnlyWindow()
+		mainwindow.show()
+		err = app.exec_()
+		sys.exit(err)
+
+	is_sqlite3 = False
+	try:
+		f = open(dbname)
+		if f.read(15) == "SQLite format 3":
+			is_sqlite3 = True
+		f.close()
+	except:
+		pass
+
+	dbref = DBRef(is_sqlite3, dbname)
+	db, dbname = dbref.Open("main")
+	glb = Glb(dbref, db, dbname)
+	app = QApplication(sys.argv)
+	glb.app = app
+	mainwindow = MainWindow(glb)
+	glb.mainwindow = mainwindow
+	mainwindow.show()
+	err = app.exec_()
+	glb.ShutdownInstances()
+	db.close()
+	sys.exit(err)
+
+if __name__ == "__main__":
+	Main()
diff --git a/tools/perf/scripts/python/powerpc-hcalls.py b/tools/perf/scripts/python/powerpc-hcalls.py
new file mode 100644
index 000000000000..00e0e7476e55
--- /dev/null
+++ b/tools/perf/scripts/python/powerpc-hcalls.py
@@ -0,0 +1,200 @@
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Copyright (C) 2018 Ravi Bangoria, IBM Corporation
+#
+# Hypervisor call statisics
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from Util import *
+
+# output: {
+#	opcode: {
+#		'min': minimum time nsec
+#		'max': maximum time nsec
+#		'time': average time nsec
+#		'cnt': counter
+#	} ...
+# }
+output = {}
+
+# d_enter: {
+#	cpu: {
+#		opcode: nsec
+#	} ...
+# }
+d_enter = {}
+
+hcall_table = {
+	4: 'H_REMOVE',
+	8: 'H_ENTER',
+	12: 'H_READ',
+	16: 'H_CLEAR_MOD',
+	20: 'H_CLEAR_REF',
+	24: 'H_PROTECT',
+	28: 'H_GET_TCE',
+	32: 'H_PUT_TCE',
+	36: 'H_SET_SPRG0',
+	40: 'H_SET_DABR',
+	44: 'H_PAGE_INIT',
+	48: 'H_SET_ASR',
+	52: 'H_ASR_ON',
+	56: 'H_ASR_OFF',
+	60: 'H_LOGICAL_CI_LOAD',
+	64: 'H_LOGICAL_CI_STORE',
+	68: 'H_LOGICAL_CACHE_LOAD',
+	72: 'H_LOGICAL_CACHE_STORE',
+	76: 'H_LOGICAL_ICBI',
+	80: 'H_LOGICAL_DCBF',
+	84: 'H_GET_TERM_CHAR',
+	88: 'H_PUT_TERM_CHAR',
+	92: 'H_REAL_TO_LOGICAL',
+	96: 'H_HYPERVISOR_DATA',
+	100: 'H_EOI',
+	104: 'H_CPPR',
+	108: 'H_IPI',
+	112: 'H_IPOLL',
+	116: 'H_XIRR',
+	120: 'H_MIGRATE_DMA',
+	124: 'H_PERFMON',
+	220: 'H_REGISTER_VPA',
+	224: 'H_CEDE',
+	228: 'H_CONFER',
+	232: 'H_PROD',
+	236: 'H_GET_PPP',
+	240: 'H_SET_PPP',
+	244: 'H_PURR',
+	248: 'H_PIC',
+	252: 'H_REG_CRQ',
+	256: 'H_FREE_CRQ',
+	260: 'H_VIO_SIGNAL',
+	264: 'H_SEND_CRQ',
+	272: 'H_COPY_RDMA',
+	276: 'H_REGISTER_LOGICAL_LAN',
+	280: 'H_FREE_LOGICAL_LAN',
+	284: 'H_ADD_LOGICAL_LAN_BUFFER',
+	288: 'H_SEND_LOGICAL_LAN',
+	292: 'H_BULK_REMOVE',
+	304: 'H_MULTICAST_CTRL',
+	308: 'H_SET_XDABR',
+	312: 'H_STUFF_TCE',
+	316: 'H_PUT_TCE_INDIRECT',
+	332: 'H_CHANGE_LOGICAL_LAN_MAC',
+	336: 'H_VTERM_PARTNER_INFO',
+	340: 'H_REGISTER_VTERM',
+	344: 'H_FREE_VTERM',
+	348: 'H_RESET_EVENTS',
+	352: 'H_ALLOC_RESOURCE',
+	356: 'H_FREE_RESOURCE',
+	360: 'H_MODIFY_QP',
+	364: 'H_QUERY_QP',
+	368: 'H_REREGISTER_PMR',
+	372: 'H_REGISTER_SMR',
+	376: 'H_QUERY_MR',
+	380: 'H_QUERY_MW',
+	384: 'H_QUERY_HCA',
+	388: 'H_QUERY_PORT',
+	392: 'H_MODIFY_PORT',
+	396: 'H_DEFINE_AQP1',
+	400: 'H_GET_TRACE_BUFFER',
+	404: 'H_DEFINE_AQP0',
+	408: 'H_RESIZE_MR',
+	412: 'H_ATTACH_MCQP',
+	416: 'H_DETACH_MCQP',
+	420: 'H_CREATE_RPT',
+	424: 'H_REMOVE_RPT',
+	428: 'H_REGISTER_RPAGES',
+	432: 'H_DISABLE_AND_GETC',
+	436: 'H_ERROR_DATA',
+	440: 'H_GET_HCA_INFO',
+	444: 'H_GET_PERF_COUNT',
+	448: 'H_MANAGE_TRACE',
+	468: 'H_FREE_LOGICAL_LAN_BUFFER',
+	472: 'H_POLL_PENDING',
+	484: 'H_QUERY_INT_STATE',
+	580: 'H_ILLAN_ATTRIBUTES',
+	592: 'H_MODIFY_HEA_QP',
+	596: 'H_QUERY_HEA_QP',
+	600: 'H_QUERY_HEA',
+	604: 'H_QUERY_HEA_PORT',
+	608: 'H_MODIFY_HEA_PORT',
+	612: 'H_REG_BCMC',
+	616: 'H_DEREG_BCMC',
+	620: 'H_REGISTER_HEA_RPAGES',
+	624: 'H_DISABLE_AND_GET_HEA',
+	628: 'H_GET_HEA_INFO',
+	632: 'H_ALLOC_HEA_RESOURCE',
+	644: 'H_ADD_CONN',
+	648: 'H_DEL_CONN',
+	664: 'H_JOIN',
+	676: 'H_VASI_STATE',
+	688: 'H_ENABLE_CRQ',
+	696: 'H_GET_EM_PARMS',
+	720: 'H_SET_MPP',
+	724: 'H_GET_MPP',
+	748: 'H_HOME_NODE_ASSOCIATIVITY',
+	756: 'H_BEST_ENERGY',
+	764: 'H_XIRR_X',
+	768: 'H_RANDOM',
+	772: 'H_COP',
+	788: 'H_GET_MPP_X',
+	796: 'H_SET_MODE',
+	61440: 'H_RTAS',
+}
+
+def hcall_table_lookup(opcode):
+	if (hcall_table.has_key(opcode)):
+		return hcall_table[opcode]
+	else:
+		return opcode
+
+print_ptrn = '%-28s%10s%10s%10s%10s'
+
+def trace_end():
+	print print_ptrn % ('hcall', 'count', 'min(ns)', 'max(ns)', 'avg(ns)')
+	print '-' * 68
+	for opcode in output:
+		h_name = hcall_table_lookup(opcode)
+		time = output[opcode]['time']
+		cnt = output[opcode]['cnt']
+		min_t = output[opcode]['min']
+		max_t = output[opcode]['max']
+
+		print print_ptrn % (h_name, cnt, min_t, max_t, time/cnt)
+
+def powerpc__hcall_exit(name, context, cpu, sec, nsec, pid, comm, callchain,
+			opcode, retval):
+	if (d_enter.has_key(cpu) and d_enter[cpu].has_key(opcode)):
+		diff = nsecs(sec, nsec) - d_enter[cpu][opcode]
+
+		if (output.has_key(opcode)):
+			output[opcode]['time'] += diff
+			output[opcode]['cnt'] += 1
+			if (output[opcode]['min'] > diff):
+				output[opcode]['min'] = diff
+			if (output[opcode]['max'] < diff):
+				output[opcode]['max'] = diff
+		else:
+			output[opcode] = {
+				'time': diff,
+				'cnt': 1,
+				'min': diff,
+				'max': diff,
+			}
+
+		del d_enter[cpu][opcode]
+#	else:
+#		print "Can't find matching hcall_enter event. Ignoring sample"
+
+def powerpc__hcall_entry(event_name, context, cpu, sec, nsec, pid, comm,
+			 callchain, opcode):
+		if (d_enter.has_key(cpu)):
+			d_enter[cpu][opcode] = nsecs(sec, nsec)
+		else:
+			d_enter[cpu] = {opcode: nsecs(sec, nsec)}
diff --git a/tools/perf/scripts/python/sched-migration.py b/tools/perf/scripts/python/sched-migration.py
index de66cb3b72c9..3473e7f66081 100644
--- a/tools/perf/scripts/python/sched-migration.py
+++ b/tools/perf/scripts/python/sched-migration.py
@@ -9,13 +9,17 @@
 # This software is distributed under the terms of the GNU General
 # Public License ("GPL") version 2 as published by the Free Software
 # Foundation.
-
+from __future__ import print_function
 
 import os
 import sys
 
 from collections import defaultdict
-from UserList import UserList
+try:
+    from UserList import UserList
+except ImportError:
+    # Python 3: UserList moved to the collections package
+    from collections import UserList
 
 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
 	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
@@ -300,7 +304,7 @@ class TimeSliceList(UserList):
 		if i == -1:
 			return
 
-		for i in xrange(i, len(self.data)):
+		for i in range(i, len(self.data)):
 			timeslice = self.data[i]
 			if timeslice.start > end:
 				return
@@ -336,8 +340,8 @@ class SchedEventProxy:
 		on_cpu_task = self.current_tsk[headers.cpu]
 
 		if on_cpu_task != -1 and on_cpu_task != prev_pid:
-			print "Sched switch event rejected ts: %s cpu: %d prev: %s(%d) next: %s(%d)" % \
-				(headers.ts_format(), headers.cpu, prev_comm, prev_pid, next_comm, next_pid)
+			print("Sched switch event rejected ts: %s cpu: %d prev: %s(%d) next: %s(%d)" % \
+				headers.ts_format(), headers.cpu, prev_comm, prev_pid, next_comm, next_pid)
 
 		threads[prev_pid] = prev_comm
 		threads[next_pid] = next_comm
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 6c108fa79ae3..0b2b8305c965 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -21,6 +21,7 @@ perf-y += python-use.o
 perf-y += bp_signal.o
 perf-y += bp_signal_overflow.o
 perf-y += bp_account.o
+perf-y += wp.o
 perf-y += task-exit.o
 perf-y += sw-clock.o
 perf-y += mmap-thread-lookup.o
diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record
index 37940665f736..efd0157b9d22 100644
--- a/tools/perf/tests/attr/base-record
+++ b/tools/perf/tests/attr/base-record
@@ -9,7 +9,7 @@ size=112
 config=0
 sample_period=*
 sample_type=263
-read_format=0
+read_format=0|4
 disabled=1
 inherit=1
 pinned=0
diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling
index 8a33ca4f9e1f..f0729c454f16 100644
--- a/tools/perf/tests/attr/test-record-group-sampling
+++ b/tools/perf/tests/attr/test-record-group-sampling
@@ -37,4 +37,3 @@ sample_freq=0
 sample_period=0
 freq=0
 write_backward=0
-sample_id_all=0
diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c
index 47bedf25ba69..96e7fc1ad3f9 100644
--- a/tools/perf/tests/bitmap.c
+++ b/tools/perf/tests/bitmap.c
@@ -16,8 +16,6 @@ static unsigned long *get_bitmap(const char *str, int nbits)
 	bm = bitmap_alloc(nbits);
 
 	if (map && bm) {
-		bitmap_zero(bm, nbits);
-
 		for (i = 0; i < map->nr; i++)
 			set_bit(map->map[i], bm);
 	}
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index cac8f8889bc3..12c09e0ece71 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -121,6 +121,16 @@ static struct test generic_tests[] = {
 		.is_supported = test__bp_signal_is_supported,
 	},
 	{
+		.desc = "Watchpoint",
+		.func = test__wp,
+		.is_supported = test__wp_is_supported,
+		.subtest = {
+			.skip_if_fail	= false,
+			.get_nr		= test__wp_subtest_get_nr,
+			.get_desc	= test__wp_subtest_get_desc,
+		},
+	},
+	{
 		.desc = "Number of exit events of a simple workload",
 		.func = test__task_exit,
 	},
@@ -385,7 +395,7 @@ static int test_and_print(struct test *t, bool force_skip, int subtest)
 	if (!t->subtest.get_nr)
 		pr_debug("%s:", t->desc);
 	else
-		pr_debug("%s subtest %d:", t->desc, subtest);
+		pr_debug("%s subtest %d:", t->desc, subtest + 1);
 
 	switch (err) {
 	case TEST_OK:
@@ -422,7 +432,7 @@ static const char *shell_test__description(char *description, size_t size,
 
 #define for_each_shell_test(dir, base, ent)	\
 	while ((ent = readdir(dir)) != NULL)	\
-		if (!is_directory(base, ent))
+		if (!is_directory(base, ent) && ent->d_name[0] != '.')
 
 static const char *shell_tests__dir(char *path, size_t size)
 {
@@ -654,6 +664,15 @@ static int perf_test__list(int argc, const char **argv)
 			continue;
 
 		pr_info("%2d: %s\n", i, t->desc);
+
+		if (t->subtest.get_nr) {
+			int subn = t->subtest.get_nr();
+			int subi;
+
+			for (subi = 0; subi < subn; subi++)
+				pr_info("%2d:%1d: %s\n", i, subi + 1,
+					t->subtest.get_desc(subi));
+		}
 	}
 
 	perf_test__list_shell(argc, argv, i);
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 99936352df4f..6b049f3f5cf4 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -232,18 +232,18 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 	u64 objdump_addr;
 	const char *objdump_name;
 	char decomp_name[KMOD_DECOMP_LEN];
+	bool decomp = false;
 	int ret;
 
 	pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr);
 
-	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al);
-	if (!al.map || !al.map->dso) {
+	if (!thread__find_map(thread, cpumode, addr, &al) || !al.map->dso) {
 		if (cpumode == PERF_RECORD_MISC_HYPERVISOR) {
 			pr_debug("Hypervisor address can not be resolved - skipping\n");
 			return 0;
 		}
 
-		pr_debug("thread__find_addr_map failed\n");
+		pr_debug("thread__find_map failed\n");
 		return -1;
 	}
 
@@ -306,6 +306,7 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 			return -1;
 		}
 
+		decomp = true;
 		objdump_name = decomp_name;
 	}
 
@@ -313,7 +314,7 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 	objdump_addr = map__rip_2objdump(al.map, al.addr);
 	ret = read_via_objdump(objdump_name, objdump_addr, buf2, len);
 
-	if (dso__needs_decompress(al.map->dso))
+	if (decomp)
 		unlink(objdump_name);
 
 	if (ret > 0) {
@@ -561,6 +562,7 @@ static int do_test_code_reading(bool try_kcore)
 	pid = getpid();
 
 	machine = machine__new_host();
+	machine->env = &perf_env;
 
 	ret = machine__create_kernel_maps(machine);
 	if (ret < 0) {
diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
index 699561fa512c..5f8501c68da4 100644
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -8,7 +8,7 @@
 static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name,
 				  int size, bool should_be_signed)
 {
-	struct format_field *field = perf_evsel__field(evsel, name);
+	struct tep_format_field *field = perf_evsel__field(evsel, name);
 	int is_signed;
 	int ret = 0;
 
@@ -17,7 +17,7 @@ static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name,
 		return -1;
 	}
 
-	is_signed = !!(field->flags | FIELD_IS_SIGNED);
+	is_signed = !!(field->flags | TEP_FIELD_IS_SIGNED);
 	if (should_be_signed && !is_signed) {
 		pr_debug("%s: \"%s\" signedness(%d) is wrong, should be %d\n",
 			 evsel->name, name, is_signed, should_be_signed);
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index f7c5b613d667..b889a28fd80b 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c
@@ -131,20 +131,20 @@ struct machine *setup_fake_machine(struct machines *machines)
 			goto out;
 
 		/* emulate dso__load() */
-		dso__set_loaded(dso, MAP__FUNCTION);
+		dso__set_loaded(dso);
 
 		for (k = 0; k < fake_symbols[i].nr_syms; k++) {
 			struct symbol *sym;
 			struct fake_sym *fsym = &fake_symbols[i].syms[k];
 
 			sym = symbol__new(fsym->start, fsym->length,
-					  STB_GLOBAL, fsym->name);
+					  STB_GLOBAL, STT_FUNC, fsym->name);
 			if (sym == NULL) {
 				dso__put(dso);
 				goto out;
 			}
 
-			symbols__insert(&dso->symbols[MAP__FUNCTION], sym);
+			symbols__insert(&dso->symbols, sym);
 		}
 
 		dso__put(dso);
diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c
index 8e57d46109de..0579a70bbbff 100644
--- a/tools/perf/tests/kmod-path.c
+++ b/tools/perf/tests/kmod-path.c
@@ -5,34 +5,28 @@
 #include "dso.h"
 #include "debug.h"
 
-static int test(const char *path, bool alloc_name, bool alloc_ext,
-		bool kmod, bool comp, const char *name, const char *ext)
+static int test(const char *path, bool alloc_name, bool kmod,
+		int comp, const char *name)
 {
 	struct kmod_path m;
 
 	memset(&m, 0x0, sizeof(m));
 
 	TEST_ASSERT_VAL("kmod_path__parse",
-			!__kmod_path__parse(&m, path, alloc_name, alloc_ext));
+			!__kmod_path__parse(&m, path, alloc_name));
 
-	pr_debug("%s - alloc name %d, alloc ext %d, kmod %d, comp %d, name '%s', ext '%s'\n",
-		 path, alloc_name, alloc_ext, m.kmod, m.comp, m.name, m.ext);
+	pr_debug("%s - alloc name %d, kmod %d, comp %d, name '%s'\n",
+		 path, alloc_name, m.kmod, m.comp, m.name);
 
 	TEST_ASSERT_VAL("wrong kmod", m.kmod == kmod);
 	TEST_ASSERT_VAL("wrong comp", m.comp == comp);
 
-	if (ext)
-		TEST_ASSERT_VAL("wrong ext", m.ext && !strcmp(ext, m.ext));
-	else
-		TEST_ASSERT_VAL("wrong ext", !m.ext);
-
 	if (name)
 		TEST_ASSERT_VAL("wrong name", m.name && !strcmp(name, m.name));
 	else
 		TEST_ASSERT_VAL("wrong name", !m.name);
 
 	free(m.name);
-	free(m.ext);
 	return 0;
 }
 
@@ -45,102 +39,118 @@ static int test_is_kernel_module(const char *path, int cpumode, bool expect)
 	return 0;
 }
 
-#define T(path, an, ae, k, c, n, e) \
-	TEST_ASSERT_VAL("failed", !test(path, an, ae, k, c, n, e))
+#define T(path, an, k, c, n) \
+	TEST_ASSERT_VAL("failed", !test(path, an, k, c, n))
 
 #define M(path, c, e) \
 	TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e))
 
 int test__kmod_path__parse(struct test *t __maybe_unused, int subtest __maybe_unused)
 {
-	/* path                alloc_name  alloc_ext   kmod  comp   name     ext */
-	T("/xxxx/xxxx/x-x.ko", true      , true      , true, false, "[x_x]", NULL);
-	T("/xxxx/xxxx/x-x.ko", false     , true      , true, false, NULL   , NULL);
-	T("/xxxx/xxxx/x-x.ko", true      , false     , true, false, "[x_x]", NULL);
-	T("/xxxx/xxxx/x-x.ko", false     , false     , true, false, NULL   , NULL);
+	/* path                alloc_name  kmod  comp   name   */
+	T("/xxxx/xxxx/x-x.ko", true      , true, 0    , "[x_x]");
+	T("/xxxx/xxxx/x-x.ko", false     , true, 0    , NULL   );
+	T("/xxxx/xxxx/x-x.ko", true      , true, 0    , "[x_x]");
+	T("/xxxx/xxxx/x-x.ko", false     , true, 0    , NULL   );
 	M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
 	M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_KERNEL, true);
 	M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_USER, false);
 
 #ifdef HAVE_ZLIB_SUPPORT
-	/* path                alloc_name  alloc_ext   kmod  comp  name   ext */
-	T("/xxxx/xxxx/x.ko.gz", true     , true      , true, true, "[x]", "gz");
-	T("/xxxx/xxxx/x.ko.gz", false    , true      , true, true, NULL , "gz");
-	T("/xxxx/xxxx/x.ko.gz", true     , false     , true, true, "[x]", NULL);
-	T("/xxxx/xxxx/x.ko.gz", false    , false     , true, true, NULL , NULL);
+	/* path                alloc_name   kmod  comp  name  */
+	T("/xxxx/xxxx/x.ko.gz", true     , true, 1   , "[x]");
+	T("/xxxx/xxxx/x.ko.gz", false    , true, 1   , NULL );
+	T("/xxxx/xxxx/x.ko.gz", true     , true, 1   , "[x]");
+	T("/xxxx/xxxx/x.ko.gz", false    , true, 1   , NULL );
 	M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
 	M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_KERNEL, true);
 	M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_USER, false);
 
-	/* path              alloc_name  alloc_ext  kmod   comp  name    ext */
-	T("/xxxx/xxxx/x.gz", true      , true     , false, true, "x.gz" ,"gz");
-	T("/xxxx/xxxx/x.gz", false     , true     , false, true, NULL   ,"gz");
-	T("/xxxx/xxxx/x.gz", true      , false    , false, true, "x.gz" , NULL);
-	T("/xxxx/xxxx/x.gz", false     , false    , false, true, NULL   , NULL);
+	/* path              alloc_name  kmod   comp  name  */
+	T("/xxxx/xxxx/x.gz", true      , false, 1   , "x.gz");
+	T("/xxxx/xxxx/x.gz", false     , false, 1   , NULL  );
+	T("/xxxx/xxxx/x.gz", true      , false, 1   , "x.gz");
+	T("/xxxx/xxxx/x.gz", false     , false, 1   , NULL  );
 	M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
 	M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_KERNEL, false);
 	M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_USER, false);
 
-	/* path   alloc_name  alloc_ext  kmod   comp  name     ext */
-	T("x.gz", true      , true     , false, true, "x.gz", "gz");
-	T("x.gz", false     , true     , false, true, NULL  , "gz");
-	T("x.gz", true      , false    , false, true, "x.gz", NULL);
-	T("x.gz", false     , false    , false, true, NULL  , NULL);
+	/* path   alloc_name  kmod   comp  name   */
+	T("x.gz", true      , false, 1   , "x.gz");
+	T("x.gz", false     , false, 1   , NULL  );
+	T("x.gz", true      , false, 1   , "x.gz");
+	T("x.gz", false     , false, 1   , NULL  );
 	M("x.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
 	M("x.gz", PERF_RECORD_MISC_KERNEL, false);
 	M("x.gz", PERF_RECORD_MISC_USER, false);
 
-	/* path      alloc_name  alloc_ext  kmod  comp  name  ext */
-	T("x.ko.gz", true      , true     , true, true, "[x]", "gz");
-	T("x.ko.gz", false     , true     , true, true, NULL , "gz");
-	T("x.ko.gz", true      , false    , true, true, "[x]", NULL);
-	T("x.ko.gz", false     , false    , true, true, NULL , NULL);
+	/* path      alloc_name  kmod  comp  name  */
+	T("x.ko.gz", true      , true, 1   , "[x]");
+	T("x.ko.gz", false     , true, 1   , NULL );
+	T("x.ko.gz", true      , true, 1   , "[x]");
+	T("x.ko.gz", false     , true, 1   , NULL );
 	M("x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
 	M("x.ko.gz", PERF_RECORD_MISC_KERNEL, true);
 	M("x.ko.gz", PERF_RECORD_MISC_USER, false);
 #endif
 
-	/* path            alloc_name  alloc_ext  kmod  comp   name             ext */
-	T("[test_module]", true      , true     , true, false, "[test_module]", NULL);
-	T("[test_module]", false     , true     , true, false, NULL           , NULL);
-	T("[test_module]", true      , false    , true, false, "[test_module]", NULL);
-	T("[test_module]", false     , false    , true, false, NULL           , NULL);
+	/* path            alloc_name  kmod  comp   name           */
+	T("[test_module]", true      , true, false, "[test_module]");
+	T("[test_module]", false     , true, false, NULL           );
+	T("[test_module]", true      , true, false, "[test_module]");
+	T("[test_module]", false     , true, false, NULL           );
 	M("[test_module]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
 	M("[test_module]", PERF_RECORD_MISC_KERNEL, true);
 	M("[test_module]", PERF_RECORD_MISC_USER, false);
 
-	/* path            alloc_name  alloc_ext  kmod  comp   name             ext */
-	T("[test.module]", true      , true     , true, false, "[test.module]", NULL);
-	T("[test.module]", false     , true     , true, false, NULL           , NULL);
-	T("[test.module]", true      , false    , true, false, "[test.module]", NULL);
-	T("[test.module]", false     , false    , true, false, NULL           , NULL);
+	/* path            alloc_name  kmod  comp   name           */
+	T("[test.module]", true      , true, false, "[test.module]");
+	T("[test.module]", false     , true, false, NULL           );
+	T("[test.module]", true      , true, false, "[test.module]");
+	T("[test.module]", false     , true, false, NULL           );
 	M("[test.module]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
 	M("[test.module]", PERF_RECORD_MISC_KERNEL, true);
 	M("[test.module]", PERF_RECORD_MISC_USER, false);
 
-	/* path     alloc_name  alloc_ext  kmod   comp   name      ext */
-	T("[vdso]", true      , true     , false, false, "[vdso]", NULL);
-	T("[vdso]", false     , true     , false, false, NULL    , NULL);
-	T("[vdso]", true      , false    , false, false, "[vdso]", NULL);
-	T("[vdso]", false     , false    , false, false, NULL    , NULL);
+	/* path     alloc_name  kmod   comp   name    */
+	T("[vdso]", true      , false, false, "[vdso]");
+	T("[vdso]", false     , false, false, NULL    );
+	T("[vdso]", true      , false, false, "[vdso]");
+	T("[vdso]", false     , false, false, NULL    );
 	M("[vdso]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
 	M("[vdso]", PERF_RECORD_MISC_KERNEL, false);
 	M("[vdso]", PERF_RECORD_MISC_USER, false);
 
-	/* path         alloc_name  alloc_ext  kmod   comp   name          ext */
-	T("[vsyscall]", true      , true     , false, false, "[vsyscall]", NULL);
-	T("[vsyscall]", false     , true     , false, false, NULL        , NULL);
-	T("[vsyscall]", true      , false    , false, false, "[vsyscall]", NULL);
-	T("[vsyscall]", false     , false    , false, false, NULL        , NULL);
+	T("[vdso32]", true      , false, false, "[vdso32]");
+	T("[vdso32]", false     , false, false, NULL      );
+	T("[vdso32]", true      , false, false, "[vdso32]");
+	T("[vdso32]", false     , false, false, NULL      );
+	M("[vdso32]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+	M("[vdso32]", PERF_RECORD_MISC_KERNEL, false);
+	M("[vdso32]", PERF_RECORD_MISC_USER, false);
+
+	T("[vdsox32]", true      , false, false, "[vdsox32]");
+	T("[vdsox32]", false     , false, false, NULL       );
+	T("[vdsox32]", true      , false, false, "[vdsox32]");
+	T("[vdsox32]", false     , false, false, NULL       );
+	M("[vdsox32]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+	M("[vdsox32]", PERF_RECORD_MISC_KERNEL, false);
+	M("[vdsox32]", PERF_RECORD_MISC_USER, false);
+
+	/* path         alloc_name  kmod   comp   name        */
+	T("[vsyscall]", true      , false, false, "[vsyscall]");
+	T("[vsyscall]", false     , false, false, NULL        );
+	T("[vsyscall]", true      , false, false, "[vsyscall]");
+	T("[vsyscall]", false     , false, false, NULL        );
 	M("[vsyscall]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
 	M("[vsyscall]", PERF_RECORD_MISC_KERNEL, false);
 	M("[vsyscall]", PERF_RECORD_MISC_USER, false);
 
-	/* path                alloc_name  alloc_ext  kmod   comp   name      ext */
-	T("[kernel.kallsyms]", true      , true     , false, false, "[kernel.kallsyms]", NULL);
-	T("[kernel.kallsyms]", false     , true     , false, false, NULL               , NULL);
-	T("[kernel.kallsyms]", true      , false    , false, false, "[kernel.kallsyms]", NULL);
-	T("[kernel.kallsyms]", false     , false    , false, false, NULL               , NULL);
+	/* path                alloc_name  kmod   comp   name      */
+	T("[kernel.kallsyms]", true      , false, false, "[kernel.kallsyms]");
+	T("[kernel.kallsyms]", false     , false, false, NULL               );
+	T("[kernel.kallsyms]", true      , false, false, "[kernel.kallsyms]");
+	T("[kernel.kallsyms]", false     , false, false, NULL               );
 	M("[kernel.kallsyms]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
 	M("[kernel.kallsyms]", PERF_RECORD_MISC_KERNEL, false);
 	M("[kernel.kallsyms]", PERF_RECORD_MISC_USER, false);
diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c
index 0c3c87f86e03..9e9e4d37cc77 100644
--- a/tools/perf/tests/mem2node.c
+++ b/tools/perf/tests/mem2node.c
@@ -24,8 +24,6 @@ static unsigned long *get_bitmap(const char *str, int nbits)
 	bm = bitmap_alloc(nbits);
 
 	if (map && bm) {
-		bitmap_zero(bm, nbits);
-
 		for (i = 0; i < map->nr; i++) {
 			set_bit(map->map[i], bm);
 		}
diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c
index 868d82b501f4..b1af2499a3c9 100644
--- a/tools/perf/tests/mmap-thread-lookup.c
+++ b/tools/perf/tests/mmap-thread-lookup.c
@@ -188,9 +188,8 @@ static int mmap_events(synth_cb synth)
 
 		pr_debug("looking for map %p\n", td->map);
 
-		thread__find_addr_map(thread,
-				      PERF_RECORD_MISC_USER, MAP__FUNCTION,
-				      (unsigned long) (td->map + 1), &al);
+		thread__find_map(thread, PERF_RECORD_MISC_USER,
+				 (unsigned long) (td->map + 1), &al);
 
 		thread__put(thread);
 
@@ -218,7 +217,7 @@ static int mmap_events(synth_cb synth)
  *   perf_event__synthesize_threads    (global)
  *
  * We test we can find all memory maps via:
- *   thread__find_addr_map
+ *   thread__find_map
  *
  * by using all thread objects.
  */
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 18b06444f230..3b97ac018d5a 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -499,7 +499,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlis
 	 * while this test executes only parse events method.
 	 */
 	TEST_ASSERT_VAL("wrong period",     0 == evsel->attr.sample_period);
-	TEST_ASSERT_VAL("wrong callgraph",  !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type));
+	TEST_ASSERT_VAL("wrong callgraph",  !evsel__has_callchain(evsel));
 	TEST_ASSERT_VAL("wrong time",  !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
 
 	/* cpu/config=2,call-graph=no,time=0,period=2000/ */
@@ -512,7 +512,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlis
 	 * while this test executes only parse events method.
 	 */
 	TEST_ASSERT_VAL("wrong period",     0 == evsel->attr.sample_period);
-	TEST_ASSERT_VAL("wrong callgraph",  !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type));
+	TEST_ASSERT_VAL("wrong callgraph",  !evsel__has_callchain(evsel));
 	TEST_ASSERT_VAL("wrong time",  !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
 
 	return 0;
@@ -1309,18 +1309,39 @@ static int test__checkevent_config_cache(struct perf_evlist *evlist)
 	return 0;
 }
 
+static bool test__intel_pt_valid(void)
+{
+	return !!perf_pmu__find("intel_pt");
+}
+
+static int test__intel_pt(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "intel_pt//u") == 0);
+	return 0;
+}
+
+static int test__checkevent_complex_name(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong complex name parsing", strcmp(evsel->name, "COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks") == 0);
+	return 0;
+}
+
 static int count_tracepoints(void)
 {
 	struct dirent *events_ent;
 	DIR *events_dir;
 	int cnt = 0;
 
-	events_dir = opendir(tracing_events_path);
+	events_dir = tracing_events__opendir();
 
 	TEST_ASSERT_VAL("Can't open events dir", events_dir);
 
 	while ((events_ent = readdir(events_dir))) {
-		char sys_path[PATH_MAX];
+		char *sys_path;
 		struct dirent *sys_ent;
 		DIR *sys_dir;
 
@@ -1331,8 +1352,8 @@ static int count_tracepoints(void)
 		    || !strcmp(events_ent->d_name, "header_page"))
 			continue;
 
-		scnprintf(sys_path, PATH_MAX, "%s/%s",
-			  tracing_events_path, events_ent->d_name);
+		sys_path = get_events_file(events_ent->d_name);
+		TEST_ASSERT_VAL("Can't get sys path", sys_path);
 
 		sys_dir = opendir(sys_path);
 		TEST_ASSERT_VAL("Can't open sys dir", sys_dir);
@@ -1348,6 +1369,7 @@ static int count_tracepoints(void)
 		}
 
 		closedir(sys_dir);
+		put_events_file(sys_path);
 	}
 
 	closedir(events_dir);
@@ -1366,6 +1388,7 @@ struct evlist_test {
 	const char *name;
 	__u32 type;
 	const int id;
+	bool (*valid)(void);
 	int (*check)(struct perf_evlist *evlist);
 };
 
@@ -1637,6 +1660,17 @@ static struct evlist_test test__events[] = {
 		.check = test__checkevent_config_cache,
 		.id    = 51,
 	},
+	{
+		.name  = "intel_pt//u",
+		.valid = test__intel_pt_valid,
+		.check = test__intel_pt,
+		.id    = 52,
+	},
+	{
+		.name  = "cycles/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks'/Duk",
+		.check = test__checkevent_complex_name,
+		.id    = 53
+	}
 };
 
 static struct evlist_test test__events_pmu[] = {
@@ -1655,6 +1689,11 @@ static struct evlist_test test__events_pmu[] = {
 		.check = test__checkevent_pmu_partial_time_callgraph,
 		.id    = 2,
 	},
+	{
+		.name  = "cpu/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks',period=0x1,event=0x2/ukp",
+		.check = test__checkevent_complex_name,
+		.id    = 3,
+	}
 };
 
 struct terms_test {
@@ -1672,17 +1711,24 @@ static struct terms_test test__terms[] = {
 
 static int test_event(struct evlist_test *e)
 {
+	struct parse_events_error err = { .idx = 0, };
 	struct perf_evlist *evlist;
 	int ret;
 
+	if (e->valid && !e->valid()) {
+		pr_debug("... SKIP");
+		return 0;
+	}
+
 	evlist = perf_evlist__new();
 	if (evlist == NULL)
 		return -ENOMEM;
 
-	ret = parse_events(evlist, e->name, NULL);
+	ret = parse_events(evlist, e->name, &err);
 	if (ret) {
-		pr_debug("failed to parse event '%s', err %d\n",
-			 e->name, ret);
+		pr_debug("failed to parse event '%s', err %d, str '%s'\n",
+			 e->name, ret, err.str);
+		parse_events_print_error(&err, e->name);
 	} else {
 		ret = e->check(evlist);
 	}
@@ -1700,10 +1746,11 @@ static int test_events(struct evlist_test *events, unsigned cnt)
 	for (i = 0; i < cnt; i++) {
 		struct evlist_test *e = &events[i];
 
-		pr_debug("running test %d '%s'\n", e->id, e->name);
+		pr_debug("running test %d '%s'", e->id, e->name);
 		ret1 = test_event(e);
 		if (ret1)
 			ret2 = ret1;
+		pr_debug("\n");
 	}
 
 	return ret2;
@@ -1785,7 +1832,7 @@ static int test_pmu_events(void)
 	}
 
 	while (!ret && (ent = readdir(dir))) {
-		struct evlist_test e;
+		struct evlist_test e = { .id = 0, };
 		char name[2 * NAME_MAX + 1 + 12 + 3];
 
 		/* Names containing . are special and cannot be used directly */
diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c
index 5d2df65ada6a..40ab72149ce1 100644
--- a/tools/perf/tests/python-use.c
+++ b/tools/perf/tests/python-use.c
@@ -7,8 +7,7 @@
 #include <stdlib.h>
 #include <linux/compiler.h>
 #include "tests.h"
-
-extern int verbose;
+#include "util/debug.h"
 
 int test__python_use(struct test *test __maybe_unused, int subtest __maybe_unused)
 {
diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
index ee86473643be..cab7b0aea6ea 100755
--- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
@@ -11,38 +11,62 @@
 . $(dirname $0)/lib/probe.sh
 
 libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1/g')
-nm -g $libc 2>/dev/null | fgrep -q inet_pton || exit 254
+nm -Dg $libc 2>/dev/null | fgrep -q inet_pton || exit 254
+
+event_pattern='probe_libc:inet_pton(\_[[:digit:]]+)?'
+
+add_libc_inet_pton_event() {
+
+	event_name=$(perf probe -f -x $libc -a inet_pton 2>&1 | tail -n +2 | head -n -5 | \
+			grep -P -o "$event_pattern(?=[[:space:]]\(on inet_pton in $libc\))")
+
+	if [ $? -ne 0 -o -z "$event_name" ] ; then
+		printf "FAIL: could not add event\n"
+		return 1
+	fi
+}
 
 trace_libc_inet_pton_backtrace() {
-	idx=0
-	expected[0]="ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)"
-	expected[1]=".*inet_pton[[:space:]]\($libc|inlined\)$"
+
+	expected=`mktemp -u /tmp/expected.XXX`
+
+	echo "ping[][0-9 \.:]+$event_name: \([[:xdigit:]]+\)" > $expected
+	echo ".*inet_pton\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected
 	case "$(uname -m)" in
 	s390x)
 		eventattr='call-graph=dwarf,max-stack=4'
-		expected[2]="gaih_inet.*[[:space:]]\($libc|inlined\)$"
-		expected[3]="(__GI_)?getaddrinfo[[:space:]]\($libc|inlined\)$"
-		expected[4]="main[[:space:]]\(.*/bin/ping.*\)$"
+		echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected
+		echo "(__GI_)?getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected
+		echo "main\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected
+		;;
+	ppc64|ppc64le)
+		eventattr='max-stack=4'
+		echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected
+		echo "getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected
+		echo ".*\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected
 		;;
 	*)
 		eventattr='max-stack=3'
-		expected[2]="getaddrinfo[[:space:]]\($libc\)$"
-		expected[3]=".*\(.*/bin/ping.*\)$"
+		echo "getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected
+		echo ".*(\+0x[[:xdigit:]]+|\[unknown\])[[:space:]]\(.*/bin/ping.*\)$" >> $expected
 		;;
 	esac
 
-	file=`mktemp -u /tmp/perf.data.XXX`
+	perf_data=`mktemp -u /tmp/perf.data.XXX`
+	perf_script=`mktemp -u /tmp/perf.script.XXX`
+	perf record -e $event_name/$eventattr/ -o $perf_data ping -6 -c 1 ::1 > /dev/null 2>&1
+	perf script -i $perf_data > $perf_script
 
-	perf record -e probe_libc:inet_pton/$eventattr/ -o $file ping -6 -c 1 ::1 > /dev/null 2>&1
-	perf script -i $file | while read line ; do
+	exec 3<$perf_script
+	exec 4<$expected
+	while read line <&3 && read -r pattern <&4; do
+		[ -z "$pattern" ] && break
 		echo $line
-		echo "$line" | egrep -q "${expected[$idx]}"
+		echo "$line" | egrep -q "$pattern"
 		if [ $? -ne 0 ] ; then
-			printf "FAIL: expected backtrace entry %d \"%s\" got \"%s\"\n" $idx "${expected[$idx]}" "$line"
-			exit 1
+			printf "FAIL: expected backtrace entry \"%s\" got \"%s\"\n" "$pattern" "$line"
+			return 1
 		fi
-		let idx+=1
-		[ -z "${expected[$idx]}" ] && break
 	done
 
 	# If any statements are executed from this point onwards,
@@ -51,13 +75,20 @@ trace_libc_inet_pton_backtrace() {
 	# even if the perf script output does not match.
 }
 
+delete_libc_inet_pton_event() {
+
+	if [ -n "$event_name" ] ; then
+		perf probe -q -d $event_name
+	fi
+}
+
 # Check for IPv6 interface existence
 ip a sh lo | fgrep -q inet6 || exit 2
 
 skip_if_no_perf_probe && \
-perf probe -q $libc inet_pton && \
+add_libc_inet_pton_event && \
 trace_libc_inet_pton_backtrace
 err=$?
-rm -f ${file}
-perf probe -q -d probe_libc:inet_pton
+rm -f ${perf_data} ${perf_script} ${expected}
+delete_libc_inet_pton_event
 exit $err
diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
index 55ad9793d544..4ce276efe6b4 100755
--- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
@@ -17,7 +17,7 @@ skip_if_no_perf_probe || exit 2
 file=$(mktemp /tmp/temporary_file.XXXXX)
 
 trace_open_vfs_getname() {
-	evts=$(echo $(perf list syscalls:sys_enter_open* |& egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/')
+	evts=$(echo $(perf list syscalls:sys_enter_open* 2>&1 | egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/')
 	perf trace -e $evts touch $file 2>&1 | \
 	egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$"
 }
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index a9760e790563..b82f55fcc294 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -59,6 +59,9 @@ int test__python_use(struct test *test, int subtest);
 int test__bp_signal(struct test *test, int subtest);
 int test__bp_signal_overflow(struct test *test, int subtest);
 int test__bp_accounting(struct test *test, int subtest);
+int test__wp(struct test *test, int subtest);
+const char *test__wp_subtest_get_desc(int subtest);
+int test__wp_subtest_get_nr(void);
 int test__task_exit(struct test *test, int subtest);
 int test__mem(struct test *test, int subtest);
 int test__sw_clock_freq(struct test *test, int subtest);
@@ -106,6 +109,7 @@ int test__unit_number__scnprint(struct test *test, int subtest);
 int test__mem2node(struct test *t, int subtest);
 
 bool test__bp_signal_is_supported(void);
+bool test__wp_is_supported(void);
 
 #if defined(__arm__) || defined(__aarch64__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 40e30a26b23c..9497d02f69e6 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -45,6 +45,7 @@ static int session_write_header(char *path)
 
 	perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
 	perf_header__set_feat(&session->header, HEADER_NRCPUS);
+	perf_header__set_feat(&session->header, HEADER_ARCH);
 
 	session->header.data_size += DATA_SIZE;
 
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 1e5adb65632a..7691980b7df1 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -19,8 +19,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
 	struct symbol *sym;
 	struct map *kallsyms_map, *vmlinux_map, *map;
 	struct machine kallsyms, vmlinux;
-	enum map_type type = MAP__FUNCTION;
-	struct maps *maps = &vmlinux.kmaps.maps[type];
+	struct maps *maps = machine__kernel_maps(&vmlinux);
 	u64 mem_start, mem_end;
 	bool header_printed;
 
@@ -56,7 +55,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
 	 * be compacted against the list of modules found in the "vmlinux"
 	 * code and with the one got from /proc/modules from the "kallsyms" code.
 	 */
-	if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type) <= 0) {
+	if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms") <= 0) {
 		pr_debug("dso__load_kallsyms ");
 		goto out;
 	}
@@ -94,7 +93,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
 	 * maps__reloc_vmlinux will notice and set proper ->[un]map_ip routines
 	 * to fixup the symbols.
 	 */
-	if (machine__load_vmlinux_path(&vmlinux, type) <= 0) {
+	if (machine__load_vmlinux_path(&vmlinux) <= 0) {
 		pr_debug("Couldn't find a vmlinux that matches the kernel running on this machine, skipping test\n");
 		err = TEST_SKIP;
 		goto out;
@@ -108,7 +107,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
 	 * in the kallsyms dso. For the ones that are in both, check its names and
 	 * end addresses too.
 	 */
-	for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) {
+	map__for_each_symbol(vmlinux_map, sym, nd) {
 		struct symbol *pair, *first_pair;
 
 		sym  = rb_entry(nd, struct symbol, rb_node);
@@ -119,8 +118,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
 		mem_start = vmlinux_map->unmap_ip(vmlinux_map, sym->start);
 		mem_end = vmlinux_map->unmap_ip(vmlinux_map, sym->end);
 
-		first_pair = machine__find_kernel_symbol(&kallsyms, type,
-							 mem_start, NULL);
+		first_pair = machine__find_kernel_symbol(&kallsyms, mem_start, NULL);
 		pair = first_pair;
 
 		if (pair && UM(pair->start) == mem_start) {
@@ -149,7 +147,7 @@ next_pair:
 				 */
 				continue;
 			} else {
-				pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL);
+				pair = machine__find_kernel_symbol_by_name(&kallsyms, sym->name, NULL);
 				if (pair) {
 					if (UM(pair->start) == mem_start)
 						goto next_pair;
@@ -183,7 +181,7 @@ next_pair:
 		 * so use the short name, less descriptive but the same ("[kernel]" in
 		 * both cases.
 		 */
-		pair = map_groups__find_by_name(&kallsyms.kmaps, type,
+		pair = map_groups__find_by_name(&kallsyms.kmaps,
 						(map->dso->kernel ?
 							map->dso->short_name :
 							map->dso->name));
@@ -206,7 +204,7 @@ next_pair:
 		mem_start = vmlinux_map->unmap_ip(vmlinux_map, map->start);
 		mem_end = vmlinux_map->unmap_ip(vmlinux_map, map->end);
 
-		pair = map_groups__find(&kallsyms.kmaps, type, mem_start);
+		pair = map_groups__find(&kallsyms.kmaps, mem_start);
 		if (pair == NULL || pair->priv)
 			continue;
 
@@ -228,7 +226,7 @@ next_pair:
 
 	header_printed = false;
 
-	maps = &kallsyms.kmaps.maps[type];
+	maps = machine__kernel_maps(&kallsyms);
 
 	for (map = maps__first(maps); map; map = map__next(map)) {
 		if (!map->priv) {
diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c
new file mode 100644
index 000000000000..f89e6806557b
--- /dev/null
+++ b/tools/perf/tests/wp.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <linux/hw_breakpoint.h>
+#include "tests.h"
+#include "debug.h"
+#include "cloexec.h"
+
+#define WP_TEST_ASSERT_VAL(fd, text, val)       \
+do {                                            \
+	long long count;                        \
+	wp_read(fd, &count, sizeof(long long)); \
+	TEST_ASSERT_VAL(text, count == val);    \
+} while (0)
+
+volatile u64 data1;
+volatile u8 data2[3];
+
+static int wp_read(int fd, long long *count, int size)
+{
+	int ret = read(fd, count, size);
+
+	if (ret != size) {
+		pr_debug("failed to read: %d\n", ret);
+		return -1;
+	}
+	return 0;
+}
+
+static void get__perf_event_attr(struct perf_event_attr *attr, int wp_type,
+				 void *wp_addr, unsigned long wp_len)
+{
+	memset(attr, 0, sizeof(struct perf_event_attr));
+	attr->type           = PERF_TYPE_BREAKPOINT;
+	attr->size           = sizeof(struct perf_event_attr);
+	attr->config         = 0;
+	attr->bp_type        = wp_type;
+	attr->bp_addr        = (unsigned long)wp_addr;
+	attr->bp_len         = wp_len;
+	attr->sample_period  = 1;
+	attr->sample_type    = PERF_SAMPLE_IP;
+	attr->exclude_kernel = 1;
+	attr->exclude_hv     = 1;
+}
+
+static int __event(int wp_type, void *wp_addr, unsigned long wp_len)
+{
+	int fd;
+	struct perf_event_attr attr;
+
+	get__perf_event_attr(&attr, wp_type, wp_addr, wp_len);
+	fd = sys_perf_event_open(&attr, 0, -1, -1,
+				 perf_event_open_cloexec_flag());
+	if (fd < 0)
+		pr_debug("failed opening event %x\n", attr.bp_type);
+
+	return fd;
+}
+
+static int wp_ro_test(void)
+{
+	int fd;
+	unsigned long tmp, tmp1 = rand();
+
+	fd = __event(HW_BREAKPOINT_R, (void *)&data1, sizeof(data1));
+	if (fd < 0)
+		return -1;
+
+	tmp = data1;
+	WP_TEST_ASSERT_VAL(fd, "RO watchpoint", 1);
+
+	data1 = tmp1 + tmp;
+	WP_TEST_ASSERT_VAL(fd, "RO watchpoint", 1);
+
+	close(fd);
+	return 0;
+}
+
+static int wp_wo_test(void)
+{
+	int fd;
+	unsigned long tmp, tmp1 = rand();
+
+	fd = __event(HW_BREAKPOINT_W, (void *)&data1, sizeof(data1));
+	if (fd < 0)
+		return -1;
+
+	tmp = data1;
+	WP_TEST_ASSERT_VAL(fd, "WO watchpoint", 0);
+
+	data1 = tmp1 + tmp;
+	WP_TEST_ASSERT_VAL(fd, "WO watchpoint", 1);
+
+	close(fd);
+	return 0;
+}
+
+static int wp_rw_test(void)
+{
+	int fd;
+	unsigned long tmp, tmp1 = rand();
+
+	fd = __event(HW_BREAKPOINT_R | HW_BREAKPOINT_W, (void *)&data1,
+		     sizeof(data1));
+	if (fd < 0)
+		return -1;
+
+	tmp = data1;
+	WP_TEST_ASSERT_VAL(fd, "RW watchpoint", 1);
+
+	data1 = tmp1 + tmp;
+	WP_TEST_ASSERT_VAL(fd, "RW watchpoint", 2);
+
+	close(fd);
+	return 0;
+}
+
+static int wp_modify_test(void)
+{
+	int fd, ret;
+	unsigned long tmp = rand();
+	struct perf_event_attr new_attr;
+
+	fd = __event(HW_BREAKPOINT_W, (void *)&data1, sizeof(data1));
+	if (fd < 0)
+		return -1;
+
+	data1 = tmp;
+	WP_TEST_ASSERT_VAL(fd, "Modify watchpoint", 1);
+
+	/* Modify watchpoint with disabled = 1 */
+	get__perf_event_attr(&new_attr, HW_BREAKPOINT_W, (void *)&data2[0],
+			     sizeof(u8) * 2);
+	new_attr.disabled = 1;
+	ret = ioctl(fd, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &new_attr);
+	if (ret < 0) {
+		pr_debug("ioctl(PERF_EVENT_IOC_MODIFY_ATTRIBUTES) failed\n");
+		close(fd);
+		return ret;
+	}
+
+	data2[1] = tmp; /* Not Counted */
+	WP_TEST_ASSERT_VAL(fd, "Modify watchpoint", 1);
+
+	/* Enable the event */
+	ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (ret < 0) {
+		pr_debug("Failed to enable event\n");
+		close(fd);
+		return ret;
+	}
+
+	data2[1] = tmp; /* Counted */
+	WP_TEST_ASSERT_VAL(fd, "Modify watchpoint", 2);
+
+	data2[2] = tmp; /* Not Counted */
+	WP_TEST_ASSERT_VAL(fd, "Modify watchpoint", 2);
+
+	close(fd);
+	return 0;
+}
+
+static bool wp_ro_supported(void)
+{
+#if defined (__x86_64__) || defined (__i386__)
+	return false;
+#else
+	return true;
+#endif
+}
+
+static void wp_ro_skip_msg(void)
+{
+#if defined (__x86_64__) || defined (__i386__)
+	pr_debug("Hardware does not support read only watchpoints.\n");
+#endif
+}
+
+static struct {
+	const char *desc;
+	int (*target_func)(void);
+	bool (*is_supported)(void);
+	void (*skip_msg)(void);
+} wp_testcase_table[] = {
+	{
+		.desc = "Read Only Watchpoint",
+		.target_func = &wp_ro_test,
+		.is_supported = &wp_ro_supported,
+		.skip_msg = &wp_ro_skip_msg,
+	},
+	{
+		.desc = "Write Only Watchpoint",
+		.target_func = &wp_wo_test,
+	},
+	{
+		.desc = "Read / Write Watchpoint",
+		.target_func = &wp_rw_test,
+	},
+	{
+		.desc = "Modify Watchpoint",
+		.target_func = &wp_modify_test,
+	},
+};
+
+int test__wp_subtest_get_nr(void)
+{
+	return (int)ARRAY_SIZE(wp_testcase_table);
+}
+
+const char *test__wp_subtest_get_desc(int i)
+{
+	if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table))
+		return NULL;
+	return wp_testcase_table[i].desc;
+}
+
+int test__wp(struct test *test __maybe_unused, int i)
+{
+	if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table))
+		return TEST_FAIL;
+
+	if (wp_testcase_table[i].is_supported &&
+	    !wp_testcase_table[i].is_supported()) {
+		wp_testcase_table[i].skip_msg();
+		return TEST_SKIP;
+	}
+
+	return !wp_testcase_table[i].target_func() ? TEST_OK : TEST_FAIL;
+}
+
+/* The s390 so far does not have support for
+ * instruction breakpoint using the perf_event_open() system call.
+ */
+bool test__wp_is_supported(void)
+{
+#if defined(__s390x__)
+	return false;
+#else
+	return true;
+#endif
+}
diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build
index 66330d4b739b..304313073242 100644
--- a/tools/perf/trace/beauty/Build
+++ b/tools/perf/trace/beauty/Build
@@ -5,6 +5,9 @@ ifeq ($(SRCARCH),$(filter $(SRCARCH),x86))
 libperf-y += ioctl.o
 endif
 libperf-y += kcmp.o
+libperf-y += mount_flags.o
 libperf-y += pkey_alloc.o
 libperf-y += prctl.o
+libperf-y += sockaddr.o
+libperf-y += socket.o
 libperf-y += statx.o
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index 984a504d335c..039c29039b2c 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -24,15 +24,43 @@ struct strarray {
 }
 
 size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val);
+size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, unsigned long flags);
 
 struct trace;
 struct thread;
 
 size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size);
 
+extern struct strarray strarray__socket_families;
+
+/**
+ * augmented_arg: extra payload for syscall pointer arguments
+ 
+ * If perf_sample->raw_size is more than what a syscall sys_enter_FOO puts,
+ * then its the arguments contents, so that we can show more than just a
+ * pointer. This will be done initially with eBPF, the start of that is at the
+ * tools/perf/examples/bpf/augmented_syscalls.c example for the openat, but
+ * will eventually be done automagically caching the running kernel tracefs
+ * events data into an eBPF C script, that then gets compiled and its .o file
+ * cached for subsequent use. For char pointers like the ones for 'open' like
+ * syscalls its easy, for the rest we should use DWARF or better, BTF, much
+ * more compact.
+ *
+ * @size: 8 if all we need is an integer, otherwise all of the augmented arg.
+ * @int_arg: will be used for integer like pointer contents, like 'accept's 'upeer_addrlen'
+ * @value: u64 aligned, for structs, pathnames
+ */
+struct augmented_arg {
+	int  size;
+	int  int_arg;
+	u64  value[];
+};
+
 /**
  * @val: value of syscall argument being formatted
  * @args: All the args, use syscall_args__val(arg, nth) to access one
+ * @augmented_args: Extra data that can be collected, for instance, with eBPF for expanding the pathname for open, etc
+ * @augmented_args_size: augmented_args total payload size
  * @thread: tid state (maps, pid, tid, etc)
  * @trace: 'perf trace' internals: all threads, etc
  * @parm: private area, may be an strarray, for instance
@@ -43,6 +71,10 @@ size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_
 struct syscall_arg {
 	unsigned long val;
 	unsigned char *args;
+	struct {
+		struct augmented_arg *args;
+		int		     size;
+	} augmented;
 	struct thread *thread;
 	struct trace  *trace;
 	void	      *parm;
@@ -91,6 +123,12 @@ size_t syscall_arg__scnprintf_kcmp_type(char *bf, size_t size, struct syscall_ar
 size_t syscall_arg__scnprintf_kcmp_idx(char *bf, size_t size, struct syscall_arg *arg);
 #define SCA_KCMP_IDX syscall_arg__scnprintf_kcmp_idx
 
+unsigned long syscall_arg__mask_val_mount_flags(struct syscall_arg *arg, unsigned long flags);
+#define SCAMV_MOUNT_FLAGS syscall_arg__mask_val_mount_flags
+
+size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_MOUNT_FLAGS syscall_arg__scnprintf_mount_flags
+
 size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg);
 #define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights
 
@@ -106,6 +144,12 @@ size_t syscall_arg__scnprintf_prctl_arg2(char *bf, size_t size, struct syscall_a
 size_t syscall_arg__scnprintf_prctl_arg3(char *bf, size_t size, struct syscall_arg *arg);
 #define SCA_PRCTL_ARG3 syscall_arg__scnprintf_prctl_arg3
 
+size_t syscall_arg__scnprintf_sockaddr(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_SOCKADDR syscall_arg__scnprintf_sockaddr
+
+size_t syscall_arg__scnprintf_socket_protocol(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_SK_PROTO syscall_arg__scnprintf_socket_protocol
+
 size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg);
 #define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags
 
diff --git a/tools/perf/trace/beauty/clone.c b/tools/perf/trace/beauty/clone.c
index d64d049ab991..010406500c30 100644
--- a/tools/perf/trace/beauty/clone.c
+++ b/tools/perf/trace/beauty/clone.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/cone.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/drm_ioctl.sh b/tools/perf/trace/beauty/drm_ioctl.sh
index 2149d3a98e42..9aa94fd523a9 100755
--- a/tools/perf/trace/beauty/drm_ioctl.sh
+++ b/tools/perf/trace/beauty/drm_ioctl.sh
@@ -1,13 +1,15 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/drm/
 
-drm_header_dir=$1
 printf "#ifndef DRM_COMMAND_BASE\n"
-grep "#define DRM_COMMAND_BASE" $drm_header_dir/drm.h
+grep "#define DRM_COMMAND_BASE" $header_dir/drm.h
 printf "#endif\n"
 
 printf "static const char *drm_ioctl_cmds[] = {\n"
-grep "^#define DRM_IOCTL.*DRM_IO" $drm_header_dir/drm.h | \
+grep "^#define DRM_IOCTL.*DRM_IO" $header_dir/drm.h | \
 	sed -r 's/^#define +DRM_IOCTL_([A-Z0-9_]+)[	 ]+DRM_IO[A-Z]* *\( *(0x[[:xdigit:]]+),*.*/	[\2] = "\1",/g'
-grep "^#define DRM_I915_[A-Z_0-9]\+[	 ]\+0x" $drm_header_dir/i915_drm.h | \
+grep "^#define DRM_I915_[A-Z_0-9]\+[	 ]\+0x" $header_dir/i915_drm.h | \
 	sed -r 's/^#define +DRM_I915_([A-Z0-9_]+)[	 ]+(0x[[:xdigit:]]+)/\t[DRM_COMMAND_BASE + \2] = "I915_\1",/g'
 printf "};\n"
diff --git a/tools/perf/trace/beauty/eventfd.c b/tools/perf/trace/beauty/eventfd.c
index 5d6a477a6400..db5b9b492113 100644
--- a/tools/perf/trace/beauty/eventfd.c
+++ b/tools/perf/trace/beauty/eventfd.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #ifndef EFD_SEMAPHORE
 #define EFD_SEMAPHORE		1
 #endif
diff --git a/tools/perf/trace/beauty/fcntl.c b/tools/perf/trace/beauty/fcntl.c
index 9e8900c13cb1..e6de31674e24 100644
--- a/tools/perf/trace/beauty/fcntl.c
+++ b/tools/perf/trace/beauty/fcntl.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/fcntl.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/flock.c b/tools/perf/trace/beauty/flock.c
index c4ff6ad30b06..cf02ae5f0ba6 100644
--- a/tools/perf/trace/beauty/flock.c
+++ b/tools/perf/trace/beauty/flock.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 
 #include "trace/beauty/beauty.h"
 #include <linux/kernel.h>
diff --git a/tools/perf/trace/beauty/futex_op.c b/tools/perf/trace/beauty/futex_op.c
index 61850fbc85ff..1136bde56406 100644
--- a/tools/perf/trace/beauty/futex_op.c
+++ b/tools/perf/trace/beauty/futex_op.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <linux/futex.h>
 
 #ifndef FUTEX_WAIT_BITSET
diff --git a/tools/perf/trace/beauty/futex_val3.c b/tools/perf/trace/beauty/futex_val3.c
index 26f6b3253511..138b7d588a70 100644
--- a/tools/perf/trace/beauty/futex_val3.c
+++ b/tools/perf/trace/beauty/futex_val3.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <linux/futex.h>
 
 #ifndef FUTEX_BITSET_MATCH_ANY
diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c
index 1be3b4cf0827..eae59ad15ce3 100644
--- a/tools/perf/trace/beauty/ioctl.c
+++ b/tools/perf/trace/beauty/ioctl.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/ioctl.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
@@ -32,6 +31,7 @@ static size_t ioctl__scnprintf_tty_cmd(int nr, int dir, char *bf, size_t size)
 	"TCSETSW2", "TCSETSF2", "TIOCGRS48", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
 	"TIOCGDEV", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG", "TIOCVHANGUP", "TIOCGPKT",
 	"TIOCGPTLCK", [_IOC_NR(TIOCGEXCL)] = "TIOCGEXCL", "TIOCGPTPEER",
+	"TIOCGISO7816", "TIOCSISO7816",
 	[_IOC_NR(FIONCLEX)] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
 	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
 	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
diff --git a/tools/perf/trace/beauty/kcmp.c b/tools/perf/trace/beauty/kcmp.c
index f62040eb9d5c..b276a274f203 100644
--- a/tools/perf/trace/beauty/kcmp.c
+++ b/tools/perf/trace/beauty/kcmp.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/kcmp.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/kcmp_type.sh b/tools/perf/trace/beauty/kcmp_type.sh
index 40d063b8c082..df8b17486d57 100755
--- a/tools/perf/trace/beauty/kcmp_type.sh
+++ b/tools/perf/trace/beauty/kcmp_type.sh
@@ -1,6 +1,7 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
-header_dir=$1
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
 printf "static const char *kcmp_types[] = {\n"
 regex='^[[:space:]]+(KCMP_(\w+)),'
diff --git a/tools/perf/trace/beauty/kvm_ioctl.sh b/tools/perf/trace/beauty/kvm_ioctl.sh
index bd28817afced..4ce54f5bf756 100755
--- a/tools/perf/trace/beauty/kvm_ioctl.sh
+++ b/tools/perf/trace/beauty/kvm_ioctl.sh
@@ -1,10 +1,11 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
-kvm_header_dir=$1
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
 printf "static const char *kvm_ioctl_cmds[] = {\n"
 regex='^#[[:space:]]*define[[:space:]]+KVM_(\w+)[[:space:]]+_IO[RW]*\([[:space:]]*KVMIO[[:space:]]*,[[:space:]]*(0x[[:xdigit:]]+).*'
-egrep $regex ${kvm_header_dir}/kvm.h	| \
+egrep $regex ${header_dir}/kvm.h	| \
 	sed -r "s/$regex/\2 \1/g"	| \
 	egrep -v " ((ARM|PPC|S390)_|[GS]ET_(DEBUGREGS|PIT2|XSAVE|TSC_KHZ)|CREATE_SPAPR_TCE_64)" | \
 	sort | xargs printf "\t[%s] = \"%s\",\n"
diff --git a/tools/perf/trace/beauty/madvise_behavior.sh b/tools/perf/trace/beauty/madvise_behavior.sh
index 60ef8640ee70..4527d290cdfc 100755
--- a/tools/perf/trace/beauty/madvise_behavior.sh
+++ b/tools/perf/trace/beauty/madvise_behavior.sh
@@ -1,6 +1,7 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
-header_dir=$1
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/asm-generic/
 
 printf "static const char *madvise_advices[] = {\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MADV_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*'
diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
index 9f68077b241b..c534bd96ef5c 100644
--- a/tools/perf/trace/beauty/mmap.c
+++ b/tools/perf/trace/beauty/mmap.c
@@ -1,5 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <uapi/linux/mman.h>
+#include <linux/log2.h>
 
 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 					       struct syscall_arg *arg)
@@ -30,50 +31,23 @@ static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 
 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
 
+static size_t mmap__scnprintf_flags(unsigned long flags, char *bf, size_t size)
+{
+#include "trace/beauty/generated/mmap_flags_array.c"
+       static DEFINE_STRARRAY(mmap_flags);
+
+       return strarray__scnprintf_flags(&strarray__mmap_flags, bf, size, flags);
+}
+
 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 						struct syscall_arg *arg)
 {
-	int printed = 0, flags = arg->val;
+	unsigned long flags = arg->val;
 
 	if (flags & MAP_ANONYMOUS)
 		arg->mask |= (1 << 4) | (1 << 5); /* Mask 4th ('fd') and 5th ('offset') args, ignored */
 
-#define	P_MMAP_FLAG(n) \
-	if (flags & MAP_##n) { \
-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-		flags &= ~MAP_##n; \
-	}
-
-	P_MMAP_FLAG(SHARED);
-	P_MMAP_FLAG(PRIVATE);
-#ifdef MAP_32BIT
-	P_MMAP_FLAG(32BIT);
-#endif
-	P_MMAP_FLAG(ANONYMOUS);
-	P_MMAP_FLAG(DENYWRITE);
-	P_MMAP_FLAG(EXECUTABLE);
-	P_MMAP_FLAG(FILE);
-	P_MMAP_FLAG(FIXED);
-#ifdef MAP_FIXED_NOREPLACE
-	P_MMAP_FLAG(FIXED_NOREPLACE);
-#endif
-	P_MMAP_FLAG(GROWSDOWN);
-	P_MMAP_FLAG(HUGETLB);
-	P_MMAP_FLAG(LOCKED);
-	P_MMAP_FLAG(NONBLOCK);
-	P_MMAP_FLAG(NORESERVE);
-	P_MMAP_FLAG(POPULATE);
-	P_MMAP_FLAG(STACK);
-	P_MMAP_FLAG(UNINITIALIZED);
-#ifdef MAP_SYNC
-	P_MMAP_FLAG(SYNC);
-#endif
-#undef P_MMAP_FLAG
-
-	if (flags)
-		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
-	return printed;
+	return mmap__scnprintf_flags(flags, bf, size);
 }
 
 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh
new file mode 100755
index 000000000000..22c3fdca8975
--- /dev/null
+++ b/tools/perf/trace/beauty/mmap_flags.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+if [ $# -ne 2 ] ; then
+	[ $# -eq 1 ] && hostarch=$1 || hostarch=`uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/`
+	header_dir=tools/include/uapi/asm-generic
+	arch_header_dir=tools/arch/${hostarch}/include/uapi/asm
+else
+	header_dir=$1
+	arch_header_dir=$2
+fi
+
+arch_mman=${arch_header_dir}/mman.h
+
+# those in egrep -vw are flags, we want just the bits
+
+printf "static const char *mmap_flags[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MAP_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
+egrep -q $regex ${arch_mman} && \
+(egrep $regex ${arch_mman} | \
+	sed -r "s/$regex/\2 \1/g"	| \
+	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.*' ${arch_mman} &&
+(egrep $regex ${header_dir}/mman-common.h | \
+	egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
+	sed -r "s/$regex/\2 \1/g"	| \
+	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.h>.*' ${arch_mman} &&
+(egrep $regex ${header_dir}/mman.h | \
+	sed -r "s/$regex/\2 \1/g"	| \
+	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+printf "};\n"
diff --git a/tools/perf/trace/beauty/mode_t.c b/tools/perf/trace/beauty/mode_t.c
index d929ad7dd97b..6879d36d3004 100644
--- a/tools/perf/trace/beauty/mode_t.c
+++ b/tools/perf/trace/beauty/mode_t.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
diff --git a/tools/perf/trace/beauty/mount_flags.c b/tools/perf/trace/beauty/mount_flags.c
new file mode 100644
index 000000000000..712935c6620a
--- /dev/null
+++ b/tools/perf/trace/beauty/mount_flags.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * trace/beauty/mount_flags.c
+ *
+ *  Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <sys/mount.h>
+
+static size_t mount__scnprintf_flags(unsigned long flags, char *bf, size_t size)
+{
+#include "trace/beauty/generated/mount_flags_array.c"
+	static DEFINE_STRARRAY(mount_flags);
+
+	return strarray__scnprintf_flags(&strarray__mount_flags, bf, size, flags);
+}
+
+unsigned long syscall_arg__mask_val_mount_flags(struct syscall_arg *arg __maybe_unused, unsigned long flags)
+{
+	// do_mount in fs/namespace.c:
+	/*
+	 * Pre-0.97 versions of mount() didn't have a flags word.  When the
+	 * flags word was introduced its top half was required to have the
+	 * magic value 0xC0ED, and this remained so until 2.4.0-test9.
+	 * Therefore, if this magic number is present, it carries no
+	 * information and must be discarded.
+	 */
+	if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
+		flags &= ~MS_MGC_MSK;
+
+	return flags;
+}
+
+size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+	unsigned long flags = arg->val;
+
+	return mount__scnprintf_flags(flags, bf, size);
+}
diff --git a/tools/perf/trace/beauty/mount_flags.sh b/tools/perf/trace/beauty/mount_flags.sh
new file mode 100755
index 000000000000..45547573a1db
--- /dev/null
+++ b/tools/perf/trace/beauty/mount_flags.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
+
+printf "static const char *mount_flags[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*'
+egrep $regex ${header_dir}/fs.h | egrep -v '(MSK|VERBOSE|MGC_VAL)\>' | \
+	sed -r "s/$regex/\2 \2 \1/g" | sort -n | \
+	xargs printf "\t[%s ? (ilog2(%s) + 1) : 0] = \"%s\",\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+\(1<<([[:digit:]]+)\)[[:space:]]*.*'
+egrep $regex ${header_dir}/fs.h | \
+	sed -r "s/$regex/\2 \1/g" | \
+	xargs printf "\t[%s + 1] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c
index c064d6aae659..1b9d6306d274 100644
--- a/tools/perf/trace/beauty/msg_flags.c
+++ b/tools/perf/trace/beauty/msg_flags.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sys/types.h>
 #include <sys/socket.h>
 
diff --git a/tools/perf/trace/beauty/open_flags.c b/tools/perf/trace/beauty/open_flags.c
index 6aec6178a99d..cc673fec9184 100644
--- a/tools/perf/trace/beauty/open_flags.c
+++ b/tools/perf/trace/beauty/open_flags.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
diff --git a/tools/perf/trace/beauty/perf_event_open.c b/tools/perf/trace/beauty/perf_event_open.c
index 2bafd7c995ff..981185c1974b 100644
--- a/tools/perf/trace/beauty/perf_event_open.c
+++ b/tools/perf/trace/beauty/perf_event_open.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #ifndef PERF_FLAG_FD_NO_GROUP
 # define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
 #endif
diff --git a/tools/perf/trace/beauty/perf_ioctl.sh b/tools/perf/trace/beauty/perf_ioctl.sh
index faea4237c793..9aabd9743ef6 100755
--- a/tools/perf/trace/beauty/perf_ioctl.sh
+++ b/tools/perf/trace/beauty/perf_ioctl.sh
@@ -1,6 +1,7 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
-header_dir=$1
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
 printf "static const char *perf_ioctl_cmds[] = {\n"
 regex='^#[[:space:]]*define[[:space:]]+PERF_EVENT_IOC_(\w+)[[:space:]]+_IO[RW]*[[:space:]]*\([[:space:]]*.\$.[[:space:]]*,[[:space:]]*([[:digit:]]+).*'
diff --git a/tools/perf/trace/beauty/pid.c b/tools/perf/trace/beauty/pid.c
index 0313df342830..1a6acc46807b 100644
--- a/tools/perf/trace/beauty/pid.c
+++ b/tools/perf/trace/beauty/pid.c
@@ -1,4 +1,5 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
+
 size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg)
 {
 	int pid = arg->val;
diff --git a/tools/perf/trace/beauty/pkey_alloc.c b/tools/perf/trace/beauty/pkey_alloc.c
index 2ba784a3734a..1b8ed4cac815 100644
--- a/tools/perf/trace/beauty/pkey_alloc.c
+++ b/tools/perf/trace/beauty/pkey_alloc.c
@@ -1,40 +1,36 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/pkey_alloc.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
 #include <linux/kernel.h>
 #include <linux/log2.h>
 
-static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size)
+size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, unsigned long flags)
 {
 	int i, printed = 0;
 
-#include "trace/beauty/generated/pkey_alloc_access_rights_array.c"
-	static DEFINE_STRARRAY(pkey_alloc_access_rights);
-
-	if (access_rights == 0) {
-		const char *s = strarray__pkey_alloc_access_rights.entries[0];
+	if (flags == 0) {
+		const char *s = sa->entries[0];
 		if (s)
 			return scnprintf(bf, size, "%s", s);
 		return scnprintf(bf, size, "%d", 0);
 	}
 
-	for (i = 1; i < strarray__pkey_alloc_access_rights.nr_entries; ++i) {
-		int bit = 1 << (i - 1);
+	for (i = 1; i < sa->nr_entries; ++i) {
+		unsigned long bit = 1UL << (i - 1);
 
-		if (!(access_rights & bit))
+		if (!(flags & bit))
 			continue;
 
 		if (printed != 0)
 			printed += scnprintf(bf + printed, size - printed, "|");
 
-		if (strarray__pkey_alloc_access_rights.entries[i] != NULL)
-			printed += scnprintf(bf + printed, size - printed, "%s", strarray__pkey_alloc_access_rights.entries[i]);
+		if (sa->entries[i] != NULL)
+			printed += scnprintf(bf + printed, size - printed, "%s", sa->entries[i]);
 		else
 			printed += scnprintf(bf + printed, size - printed, "0x%#", bit);
 	}
@@ -42,6 +38,14 @@ static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, s
 	return printed;
 }
 
+static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size)
+{
+#include "trace/beauty/generated/pkey_alloc_access_rights_array.c"
+	static DEFINE_STRARRAY(pkey_alloc_access_rights);
+
+	return strarray__scnprintf_flags(&strarray__pkey_alloc_access_rights, bf, size, access_rights);
+}
+
 size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg)
 {
 	unsigned long cmd = arg->val;
diff --git a/tools/perf/trace/beauty/pkey_alloc_access_rights.sh b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
index 62e51a02b839..f8f1b560cf8a 100755
--- a/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
+++ b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
@@ -1,6 +1,7 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
-header_dir=$1
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/asm-generic/
 
 printf "static const char *pkey_alloc_access_rights[] = {\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+PKEY_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*'
diff --git a/tools/perf/trace/beauty/prctl.c b/tools/perf/trace/beauty/prctl.c
index 246130dad6c4..be7a5d395975 100644
--- a/tools/perf/trace/beauty/prctl.c
+++ b/tools/perf/trace/beauty/prctl.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/prctl.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh
index 0be4138fbe71..d32f8f1124af 100755
--- a/tools/perf/trace/beauty/prctl_option.sh
+++ b/tools/perf/trace/beauty/prctl_option.sh
@@ -1,6 +1,7 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
-header_dir=$1
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
 printf "static const char *prctl_options[] = {\n"
 regex='^#define[[:space:]]+PR_([GS]ET\w+)[[:space:]]*([[:xdigit:]]+).*'
diff --git a/tools/perf/trace/beauty/sched_policy.c b/tools/perf/trace/beauty/sched_policy.c
index ba5096ae76b6..48f2b5c9aa3e 100644
--- a/tools/perf/trace/beauty/sched_policy.c
+++ b/tools/perf/trace/beauty/sched_policy.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sched.h>
 
 /*
diff --git a/tools/perf/trace/beauty/seccomp.c b/tools/perf/trace/beauty/seccomp.c
index b7097fd5fed9..e36156b19c70 100644
--- a/tools/perf/trace/beauty/seccomp.c
+++ b/tools/perf/trace/beauty/seccomp.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #ifndef SECCOMP_SET_MODE_STRICT
 #define SECCOMP_SET_MODE_STRICT 0
 #endif
diff --git a/tools/perf/trace/beauty/signum.c b/tools/perf/trace/beauty/signum.c
index bde18a53f090..587fec545b8a 100644
--- a/tools/perf/trace/beauty/signum.c
+++ b/tools/perf/trace/beauty/signum.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <signal.h>
 
 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
diff --git a/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh b/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
index aad5ab130539..e0803b957593 100755
--- a/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
+++ b/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
@@ -1,8 +1,9 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
-sound_header_dir=$1
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/sound/
 
 printf "static const char *sndrv_ctl_ioctl_cmds[] = {\n"
-grep "^#define[\t ]\+SNDRV_CTL_IOCTL_" $sound_header_dir/asound.h | \
+grep "^#define[\t ]\+SNDRV_CTL_IOCTL_" $header_dir/asound.h | \
 	sed -r 's/^#define +SNDRV_CTL_IOCTL_([A-Z0-9_]+)[\t ]+_IO[RW]*\( *.U., *(0x[[:xdigit:]]+),?.*/\t[\2] = \"\1\",/g'
 printf "};\n"
diff --git a/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh b/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
index b7e9ef6b2f55..7a464a7bf913 100755
--- a/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
+++ b/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
@@ -1,8 +1,9 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
-sound_header_dir=$1
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/sound/
 
 printf "static const char *sndrv_pcm_ioctl_cmds[] = {\n"
-grep "^#define[\t ]\+SNDRV_PCM_IOCTL_" $sound_header_dir/asound.h | \
+grep "^#define[\t ]\+SNDRV_PCM_IOCTL_" $header_dir/asound.h | \
 	sed -r 's/^#define +SNDRV_PCM_IOCTL_([A-Z0-9_]+)[\t ]+_IO[RW]*\( *.A., *(0x[[:xdigit:]]+),?.*/\t[\2] = \"\1\",/g'
 printf "};\n"
diff --git a/tools/perf/trace/beauty/sockaddr.c b/tools/perf/trace/beauty/sockaddr.c
new file mode 100644
index 000000000000..9410ad230f10
--- /dev/null
+++ b/tools/perf/trace/beauty/sockaddr.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: LGPL-2.1
+// Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+
+#include "trace/beauty/beauty.h"
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <arpa/inet.h>
+
+static const char *socket_families[] = {
+	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
+	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
+	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
+	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
+	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
+	"ALG", "NFC", "VSOCK",
+};
+DEFINE_STRARRAY(socket_families);
+
+static size_t af_inet__scnprintf(struct sockaddr *sa, char *bf, size_t size)
+{
+	struct sockaddr_in *sin = (struct sockaddr_in *)sa;
+	char tmp[16];
+	return scnprintf(bf, size, ", port: %d, addr: %s", ntohs(sin->sin_port),
+			 inet_ntop(sin->sin_family, &sin->sin_addr, tmp, sizeof(tmp)));
+}
+
+static size_t af_inet6__scnprintf(struct sockaddr *sa, char *bf, size_t size)
+{
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
+	u32 flowinfo = ntohl(sin6->sin6_flowinfo);
+	char tmp[512];
+	size_t printed = scnprintf(bf, size, ", port: %d, addr: %s", ntohs(sin6->sin6_port),
+				   inet_ntop(sin6->sin6_family, &sin6->sin6_addr, tmp, sizeof(tmp)));
+	if (flowinfo != 0)
+		printed += scnprintf(bf + printed, size - printed, ", flowinfo: %lu", flowinfo);
+	if (sin6->sin6_scope_id != 0)
+		printed += scnprintf(bf + printed, size - printed, ", scope_id: %lu", sin6->sin6_scope_id);
+
+	return printed;
+}
+
+static size_t af_local__scnprintf(struct sockaddr *sa, char *bf, size_t size)
+{
+	struct sockaddr_un *sun = (struct sockaddr_un *)sa;
+	return scnprintf(bf, size, ", path: %s", sun->sun_path);
+}
+
+static size_t (*af_scnprintfs[])(struct sockaddr *sa, char *bf, size_t size) = {
+	[AF_LOCAL] = af_local__scnprintf,
+	[AF_INET]  = af_inet__scnprintf,
+	[AF_INET6] = af_inet6__scnprintf,
+};
+
+static size_t syscall_arg__scnprintf_augmented_sockaddr(struct syscall_arg *arg, char *bf, size_t size)
+{
+	struct sockaddr *sa = (struct sockaddr *)arg->augmented.args;
+	char family[32];
+	size_t printed;
+
+	strarray__scnprintf(&strarray__socket_families, family, sizeof(family), "%d", sa->sa_family);
+	printed = scnprintf(bf, size, "{ .family: %s", family);
+
+	if (sa->sa_family < ARRAY_SIZE(af_scnprintfs) && af_scnprintfs[sa->sa_family])
+		printed += af_scnprintfs[sa->sa_family](sa, bf + printed, size - printed);
+
+	return printed + scnprintf(bf + printed, size - printed, " }");
+}
+
+size_t syscall_arg__scnprintf_sockaddr(char *bf, size_t size, struct syscall_arg *arg)
+{
+	if (arg->augmented.args)
+		return syscall_arg__scnprintf_augmented_sockaddr(arg, bf, size);
+
+	return scnprintf(bf, size, "%#x", arg->val);
+}
diff --git a/tools/perf/trace/beauty/socket.c b/tools/perf/trace/beauty/socket.c
new file mode 100644
index 000000000000..d971a2596417
--- /dev/null
+++ b/tools/perf/trace/beauty/socket.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * trace/beauty/socket.c
+ *
+ *  Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "trace/beauty/beauty.h"
+#include <sys/types.h>
+#include <sys/socket.h>
+
+static size_t socket__scnprintf_ipproto(int protocol, char *bf, size_t size)
+{
+#include "trace/beauty/generated/socket_ipproto_array.c"
+	static DEFINE_STRARRAY(socket_ipproto);
+
+	return strarray__scnprintf(&strarray__socket_ipproto, bf, size, "%d", protocol);
+}
+
+size_t syscall_arg__scnprintf_socket_protocol(char *bf, size_t size, struct syscall_arg *arg)
+{
+	int domain = syscall_arg__val(arg, 0);
+
+	if (domain == AF_INET || domain == AF_INET6)
+		return socket__scnprintf_ipproto(arg->val, bf, size);
+
+	return syscall_arg__scnprintf_int(bf, size, arg);
+}
diff --git a/tools/perf/trace/beauty/socket_ipproto.sh b/tools/perf/trace/beauty/socket_ipproto.sh
new file mode 100755
index 000000000000..de0f2f29017f
--- /dev/null
+++ b/tools/perf/trace/beauty/socket_ipproto.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
+
+printf "static const char *socket_ipproto[] = {\n"
+regex='^[[:space:]]+IPPROTO_(\w+)[[:space:]]+=[[:space:]]+([[:digit:]]+),.*'
+
+egrep $regex ${header_dir}/in.h | \
+	sed -r "s/$regex/\2 \1/g"	| \
+	sort | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/socket_type.c b/tools/perf/trace/beauty/socket_type.c
index bca26aef4a77..a63a9a332aa0 100644
--- a/tools/perf/trace/beauty/socket_type.c
+++ b/tools/perf/trace/beauty/socket_type.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sys/types.h>
 #include <sys/socket.h>
 
diff --git a/tools/perf/trace/beauty/statx.c b/tools/perf/trace/beauty/statx.c
index 5643b692af4c..630f2760dd66 100644
--- a/tools/perf/trace/beauty/statx.c
+++ b/tools/perf/trace/beauty/statx.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/statx.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/vhost_virtio_ioctl.sh b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
index 76f1de697787..439773daaf77 100755
--- a/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
+++ b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
@@ -1,17 +1,18 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
-vhost_virtio_header_dir=$1
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
 printf "static const char *vhost_virtio_ioctl_cmds[] = {\n"
 regex='^#[[:space:]]*define[[:space:]]+VHOST_(\w+)[[:space:]]+_IOW?\([[:space:]]*VHOST_VIRTIO[[:space:]]*,[[:space:]]*(0x[[:xdigit:]]+).*'
-egrep $regex ${vhost_virtio_header_dir}/vhost.h | \
+egrep $regex ${header_dir}/vhost.h | \
 	sed -r "s/$regex/\2 \1/g"	| \
 	sort | xargs printf "\t[%s] = \"%s\",\n"
 printf "};\n"
 
 printf "static const char *vhost_virtio_ioctl_read_cmds[] = {\n"
 regex='^#[[:space:]]*define[[:space:]]+VHOST_(\w+)[[:space:]]+_IOW?R\([[:space:]]*VHOST_VIRTIO[[:space:]]*,[[:space:]]*(0x[[:xdigit:]]+).*'
-egrep $regex ${vhost_virtio_header_dir}/vhost.h | \
+egrep $regex ${header_dir}/vhost.h | \
 	sed -r "s/$regex/\2 \1/g"	| \
 	sort | xargs printf "\t[%s] = \"%s\",\n"
 printf "};\n"
diff --git a/tools/perf/trace/beauty/waitid_options.c b/tools/perf/trace/beauty/waitid_options.c
index 8465281a093d..42ff58ad613b 100644
--- a/tools/perf/trace/beauty/waitid_options.c
+++ b/tools/perf/trace/beauty/waitid_options.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sys/types.h>
 #include <sys/wait.h>
 
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 3781d74088a7..1d00e5ec7906 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <sys/ttydefaults.h>
+#include <asm/bug.h>
 
 struct disasm_line_samples {
 	double		      percent;
@@ -29,6 +30,7 @@ struct annotate_browser {
 	struct rb_node		   *curr_hot;
 	struct annotation_line	   *selection;
 	struct arch		   *arch;
+	struct annotation_options  *opts;
 	bool			    searching_backwards;
 	char			    search_bf[128];
 };
@@ -114,7 +116,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	if (!browser->navkeypressed)
 		ops.width += 1;
 
-	annotation_line__write(al, notes, &ops);
+	annotation_line__write(al, notes, &ops, ab->opts);
 
 	if (ops.current_entry)
 		ab->selection = al;
@@ -226,10 +228,10 @@ static int disasm__cmp(struct annotation_line *a, struct annotation_line *b)
 {
 	int i;
 
-	for (i = 0; i < a->samples_nr; i++) {
-		if (a->samples[i].percent == b->samples[i].percent)
+	for (i = 0; i < a->data_nr; i++) {
+		if (a->data[i].percent == b->data[i].percent)
 			continue;
-		return a->samples[i].percent < b->samples[i].percent;
+		return a->data[i].percent < b->data[i].percent;
 	}
 	return 0;
 }
@@ -313,11 +315,14 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
 			continue;
 		}
 
-		for (i = 0; i < pos->al.samples_nr; i++) {
-			struct annotation_data *sample = &pos->al.samples[i];
+		for (i = 0; i < pos->al.data_nr; i++) {
+			double percent;
 
-			if (max_percent < sample->percent)
-				max_percent = sample->percent;
+			percent = annotation_data__percent(&pos->al.data[i],
+							   browser->opts->percent_type);
+
+			if (max_percent < percent)
+				max_percent = percent;
 		}
 
 		if (max_percent < 0.01 && pos->al.ipc == 0) {
@@ -379,9 +384,10 @@ static void ui_browser__init_asm_mode(struct ui_browser *browser)
 #define SYM_TITLE_MAX_SIZE (PATH_MAX + 64)
 
 static int sym_title(struct symbol *sym, struct map *map, char *title,
-		     size_t sz)
+		     size_t sz, int percent_type)
 {
-	return snprintf(title, sz, "%s  %s", sym->name, map->dso->long_name);
+	return snprintf(title, sz, "%s  %s [Percent: %s]", sym->name, map->dso->long_name,
+			percent_type_str(percent_type));
 }
 
 /*
@@ -410,7 +416,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
 	notes = symbol__annotation(dl->ops.target.sym);
 	pthread_mutex_lock(&notes->lock);
 
-	if (notes->src == NULL && symbol__alloc_hist(dl->ops.target.sym) < 0) {
+	if (!symbol__hists(dl->ops.target.sym, evsel->evlist->nr_entries)) {
 		pthread_mutex_unlock(&notes->lock);
 		ui__warning("Not enough memory for annotating '%s' symbol!\n",
 			    dl->ops.target.sym->name);
@@ -418,8 +424,8 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
 	}
 
 	pthread_mutex_unlock(&notes->lock);
-	symbol__tui_annotate(dl->ops.target.sym, ms->map, evsel, hbt);
-	sym_title(ms->sym, ms->map, title, sizeof(title));
+	symbol__tui_annotate(dl->ops.target.sym, ms->map, evsel, hbt, browser->opts);
+	sym_title(ms->sym, ms->map, title, sizeof(title), browser->opts->percent_type);
 	ui_browser__show_title(&browser->b, title);
 	return true;
 }
@@ -594,6 +600,7 @@ bool annotate_browser__continue_search_reverse(struct annotate_browser *browser,
 
 static int annotate_browser__show(struct ui_browser *browser, char *title, const char *help)
 {
+	struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
 	struct map_symbol *ms = browser->priv;
 	struct symbol *sym = ms->sym;
 	char symbol_dso[SYM_TITLE_MAX_SIZE];
@@ -601,7 +608,7 @@ static int annotate_browser__show(struct ui_browser *browser, char *title, const
 	if (ui_browser__show(browser, title, help) < 0)
 		return -1;
 
-	sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso));
+	sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso), ab->opts->percent_type);
 
 	ui_browser__gotorc_title(browser, 0, 0);
 	ui_browser__set_color(browser, HE_COLORSET_ROOT);
@@ -609,6 +616,39 @@ static int annotate_browser__show(struct ui_browser *browser, char *title, const
 	return 0;
 }
 
+static void
+switch_percent_type(struct annotation_options *opts, bool base)
+{
+	switch (opts->percent_type) {
+	case PERCENT_HITS_LOCAL:
+		if (base)
+			opts->percent_type = PERCENT_PERIOD_LOCAL;
+		else
+			opts->percent_type = PERCENT_HITS_GLOBAL;
+		break;
+	case PERCENT_HITS_GLOBAL:
+		if (base)
+			opts->percent_type = PERCENT_PERIOD_GLOBAL;
+		else
+			opts->percent_type = PERCENT_HITS_LOCAL;
+		break;
+	case PERCENT_PERIOD_LOCAL:
+		if (base)
+			opts->percent_type = PERCENT_HITS_LOCAL;
+		else
+			opts->percent_type = PERCENT_PERIOD_GLOBAL;
+		break;
+	case PERCENT_PERIOD_GLOBAL:
+		if (base)
+			opts->percent_type = PERCENT_HITS_GLOBAL;
+		else
+			opts->percent_type = PERCENT_PERIOD_LOCAL;
+		break;
+	default:
+		WARN_ON(1);
+	}
+}
+
 static int annotate_browser__run(struct annotate_browser *browser,
 				 struct perf_evsel *evsel,
 				 struct hist_browser_timer *hbt)
@@ -623,8 +663,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
 	char title[256];
 	int key;
 
-	annotation__scnprintf_samples_period(notes, title, sizeof(title), evsel);
-
+	hists__scnprintf_title(hists, title, sizeof(title));
 	if (annotate_browser__show(&browser->b, title, help) < 0)
 		return -1;
 
@@ -695,10 +734,13 @@ static int annotate_browser__run(struct annotate_browser *browser,
 		"O             Bump offset level (jump targets -> +call -> all -> cycle thru)\n"
 		"s             Toggle source code view\n"
 		"t             Circulate percent, total period, samples view\n"
+		"c             Show min/max cycle\n"
 		"/             Search string\n"
 		"k             Toggle line numbers\n"
 		"P             Print to [symbol_name].annotation file.\n"
 		"r             Run available scripts\n"
+		"p             Toggle percent type [local/global]\n"
+		"b             Toggle percent base [period/hits]\n"
 		"?             Search string backwards\n");
 			continue;
 		case 'r':
@@ -779,7 +821,7 @@ show_sup_ins:
 			continue;
 		}
 		case 'P':
-			map_symbol__annotation_dump(ms, evsel);
+			map_symbol__annotation_dump(ms, evsel, browser->opts);
 			continue;
 		case 't':
 			if (notes->options->show_total_period) {
@@ -791,6 +833,19 @@ show_sup_ins:
 				notes->options->show_total_period = true;
 			annotation__update_column_widths(notes);
 			continue;
+		case 'c':
+			if (notes->options->show_minmax_cycle)
+				notes->options->show_minmax_cycle = false;
+			else
+				notes->options->show_minmax_cycle = true;
+			annotation__update_column_widths(notes);
+			continue;
+		case 'p':
+		case 'b':
+			switch_percent_type(browser->opts, key == 'b');
+			hists__scnprintf_title(hists, title, sizeof(title));
+			annotate_browser__show(&browser->b, title, help);
+			continue;
 		case K_LEFT:
 		case K_ESC:
 		case 'q':
@@ -809,24 +864,27 @@ out:
 }
 
 int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel,
-			     struct hist_browser_timer *hbt)
+			     struct hist_browser_timer *hbt,
+			     struct annotation_options *opts)
 {
-	return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt);
+	return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt, opts);
 }
 
 int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
-			     struct hist_browser_timer *hbt)
+			     struct hist_browser_timer *hbt,
+			     struct annotation_options *opts)
 {
 	/* reset abort key so that it can get Ctrl-C as a key */
 	SLang_reset_tty();
 	SLang_init_tty(0, 0, 0);
 
-	return map_symbol__tui_annotate(&he->ms, evsel, hbt);
+	return map_symbol__tui_annotate(&he->ms, evsel, hbt, opts);
 }
 
 int symbol__tui_annotate(struct symbol *sym, struct map *map,
 			 struct perf_evsel *evsel,
-			 struct hist_browser_timer *hbt)
+			 struct hist_browser_timer *hbt,
+			 struct annotation_options *opts)
 {
 	struct annotation *notes = symbol__annotation(sym);
 	struct map_symbol ms = {
@@ -843,6 +901,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 			.priv	 = &ms,
 			.use_navkeypressed = true,
 		},
+		.opts = opts,
 	};
 	int ret = -1, err;
 
@@ -852,7 +911,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 	if (map->dso->annotate_warned)
 		return -1;
 
-	err = symbol__annotate2(sym, map, evsel, &annotation__default_options, &browser.arch);
+	err = symbol__annotate2(sym, map, evsel, opts, &browser.arch);
 	if (err) {
 		char msg[BUFSIZ];
 		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index e5f247247daa..a96f62ca984a 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1231,6 +1231,7 @@ static int hist_browser__show_entry(struct hist_browser *browser,
 	int width = browser->b.width;
 	char folded_sign = ' ';
 	bool current_entry = ui_browser__is_current_entry(&browser->b, row);
+	bool use_callchain = hist_entry__has_callchains(entry) && symbol_conf.use_callchain;
 	off_t row_offset = entry->row_offset;
 	bool first = true;
 	struct perf_hpp_fmt *fmt;
@@ -1240,7 +1241,7 @@ static int hist_browser__show_entry(struct hist_browser *browser,
 		browser->selection = &entry->ms;
 	}
 
-	if (symbol_conf.use_callchain) {
+	if (use_callchain) {
 		hist_entry__init_have_children(entry);
 		folded_sign = hist_entry__folded(entry);
 	}
@@ -1276,7 +1277,7 @@ static int hist_browser__show_entry(struct hist_browser *browser,
 			}
 
 			if (first) {
-				if (symbol_conf.use_callchain) {
+				if (use_callchain) {
 					ui_browser__printf(&browser->b, "%c ", folded_sign);
 					width -= 2;
 				}
@@ -1583,7 +1584,7 @@ hists_browser__scnprintf_headers(struct hist_browser *browser, char *buf,
 	int column = 0;
 	int span = 0;
 
-	if (symbol_conf.use_callchain) {
+	if (hists__has_callchains(hists) && symbol_conf.use_callchain) {
 		ret = scnprintf(buf, size, "  ");
 		if (advance_hpp_check(&dummy_hpp, ret))
 			return ret;
@@ -1987,7 +1988,7 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser,
 	bool first = true;
 	int ret;
 
-	if (symbol_conf.use_callchain) {
+	if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) {
 		folded_sign = hist_entry__folded(he);
 		printed += fprintf(fp, "%c ", folded_sign);
 	}
@@ -2175,7 +2176,8 @@ struct hist_browser *hist_browser__new(struct hists *hists)
 static struct hist_browser *
 perf_evsel_browser__new(struct perf_evsel *evsel,
 			struct hist_browser_timer *hbt,
-			struct perf_env *env)
+			struct perf_env *env,
+			struct annotation_options *annotation_opts)
 {
 	struct hist_browser *browser = hist_browser__new(evsel__hists(evsel));
 
@@ -2183,6 +2185,7 @@ perf_evsel_browser__new(struct perf_evsel *evsel,
 		browser->hbt   = hbt;
 		browser->env   = env;
 		browser->title = hists_browser__scnprintf_title;
+		browser->annotation_opts = annotation_opts;
 	}
 	return browser;
 }
@@ -2336,7 +2339,8 @@ do_annotate(struct hist_browser *browser, struct popup_action *act)
 	struct hist_entry *he;
 	int err;
 
-	if (!objdump_path && perf_env__lookup_objdump(browser->env))
+	if (!browser->annotation_opts->objdump_path &&
+	    perf_env__lookup_objdump(browser->env, &browser->annotation_opts->objdump_path))
 		return 0;
 
 	notes = symbol__annotation(act->ms.sym);
@@ -2344,7 +2348,8 @@ do_annotate(struct hist_browser *browser, struct popup_action *act)
 		return 0;
 
 	evsel = hists_to_evsel(browser->hists);
-	err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt);
+	err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt,
+				       browser->annotation_opts);
 	he = hist_browser__selected_entry(browser);
 	/*
 	 * offer option to annotate the other branch source or target
@@ -2667,7 +2672,7 @@ static void hist_browser__update_percent_limit(struct hist_browser *hb,
 			he->nr_rows = 0;
 		}
 
-		if (!he->leaf || !symbol_conf.use_callchain)
+		if (!he->leaf || !hist_entry__has_callchains(he) || !symbol_conf.use_callchain)
 			goto next;
 
 		if (callchain_param.mode == CHAIN_GRAPH_REL) {
@@ -2697,10 +2702,11 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 				    struct hist_browser_timer *hbt,
 				    float min_pcnt,
 				    struct perf_env *env,
-				    bool warn_lost_event)
+				    bool warn_lost_event,
+				    struct annotation_options *annotation_opts)
 {
 	struct hists *hists = evsel__hists(evsel);
-	struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env);
+	struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env, annotation_opts);
 	struct branch_info *bi;
 #define MAX_OPTIONS  16
 	char *options[MAX_OPTIONS];
@@ -3062,6 +3068,7 @@ out:
 struct perf_evsel_menu {
 	struct ui_browser b;
 	struct perf_evsel *selection;
+	struct annotation_options *annotation_opts;
 	bool lost_events, lost_events_warned;
 	float min_pcnt;
 	struct perf_env *env;
@@ -3163,7 +3170,8 @@ browse_hists:
 						       true, hbt,
 						       menu->min_pcnt,
 						       menu->env,
-						       warn_lost_event);
+						       warn_lost_event,
+						       menu->annotation_opts);
 			ui_browser__show_title(&menu->b, title);
 			switch (key) {
 			case K_TAB:
@@ -3222,7 +3230,8 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
 					   struct hist_browser_timer *hbt,
 					   float min_pcnt,
 					   struct perf_env *env,
-					   bool warn_lost_event)
+					   bool warn_lost_event,
+					   struct annotation_options *annotation_opts)
 {
 	struct perf_evsel *pos;
 	struct perf_evsel_menu menu = {
@@ -3237,6 +3246,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
 		},
 		.min_pcnt = min_pcnt,
 		.env = env,
+		.annotation_opts = annotation_opts,
 	};
 
 	ui_helpline__push("Press ESC to exit");
@@ -3257,7 +3267,8 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
 				  struct hist_browser_timer *hbt,
 				  float min_pcnt,
 				  struct perf_env *env,
-				  bool warn_lost_event)
+				  bool warn_lost_event,
+				  struct annotation_options *annotation_opts)
 {
 	int nr_entries = evlist->nr_entries;
 
@@ -3267,7 +3278,8 @@ single_entry:
 
 		return perf_evsel__hists_browse(first, nr_entries, help,
 						false, hbt, min_pcnt,
-						env, warn_lost_event);
+						env, warn_lost_event,
+						annotation_opts);
 	}
 
 	if (symbol_conf.event_group) {
@@ -3285,5 +3297,6 @@ single_entry:
 
 	return __perf_evlist__tui_browse_hists(evlist, nr_entries, help,
 					       hbt, min_pcnt, env,
-					       warn_lost_event);
+					       warn_lost_event,
+					       annotation_opts);
 }
diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h
index 9428bee076f2..91d3e18b50aa 100644
--- a/tools/perf/ui/browsers/hists.h
+++ b/tools/perf/ui/browsers/hists.h
@@ -4,6 +4,8 @@
 
 #include "ui/browser.h"
 
+struct annotation_options;
+
 struct hist_browser {
 	struct ui_browser   b;
 	struct hists	    *hists;
@@ -12,6 +14,7 @@ struct hist_browser {
 	struct hist_browser_timer *hbt;
 	struct pstack	    *pstack;
 	struct perf_env	    *env;
+	struct annotation_options *annotation_opts;
 	int		     print_seq;
 	bool		     show_dso;
 	bool		     show_headers;
diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c
index e03fa75f108a..5b8b8c637686 100644
--- a/tools/perf/ui/browsers/map.c
+++ b/tools/perf/ui/browsers/map.c
@@ -104,7 +104,7 @@ int map__browse(struct map *map)
 {
 	struct map_browser mb = {
 		.b = {
-			.entries = &map->dso->symbols[map->type],
+			.entries = &map->dso->symbols,
 			.refresh = ui_browser__rb_tree_refresh,
 			.seek	 = ui_browser__rb_tree_seek,
 			.write	 = map_browser__write,
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index aeeaf15029f0..48428c9acd89 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -169,7 +169,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map,
 	if (map->dso->annotate_warned)
 		return -1;
 
-	err = symbol__annotate(sym, map, evsel, 0, NULL);
+	err = symbol__annotate(sym, map, evsel, 0, &annotation__default_options, NULL);
 	if (err) {
 		char msg[BUFSIZ];
 		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 24e1ec201ffd..4ab663ec3e5e 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -382,7 +382,8 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 			gtk_tree_store_set(store, &iter, col_idx++, s, -1);
 		}
 
-		if (symbol_conf.use_callchain && hists__has(hists, sym)) {
+		if (hist_entry__has_callchains(h) &&
+		    symbol_conf.use_callchain && hists__has(hists, sym)) {
 			if (callchain_param.mode == CHAIN_GRAPH_REL)
 				total = symbol_conf.cumulate_callchain ?
 					h->stat_acc->period : h->stat.period;
@@ -479,7 +480,7 @@ static void perf_gtk__add_hierarchy_entries(struct hists *hists,
 			}
 		}
 
-		if (symbol_conf.use_callchain && he->leaf) {
+		if (he->leaf && hist_entry__has_callchains(he) && symbol_conf.use_callchain) {
 			if (callchain_param.mode == CHAIN_GRAPH_REL)
 				total = symbol_conf.cumulate_callchain ?
 					he->stat_acc->period : he->stat.period;
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 706f6f1e9c7d..fe3dfaa64a91 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -207,7 +207,7 @@ static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b,
 		if (ret)
 			return ret;
 
-		if (a->thread != b->thread || !symbol_conf.use_callchain)
+		if (a->thread != b->thread || !hist_entry__has_callchains(a) || !symbol_conf.use_callchain)
 			return 0;
 
 		ret = b->callchain->max_depth - a->callchain->max_depth;
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 6832fcb2e6ff..74c4ae1f0a05 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -516,7 +516,7 @@ static int hist_entry__hierarchy_fprintf(struct hist_entry *he,
 	}
 	printed += putc('\n', fp);
 
-	if (symbol_conf.use_callchain && he->leaf) {
+	if (he->leaf && hist_entry__has_callchains(he) && symbol_conf.use_callchain) {
 		u64 total = hists__total_period(hists);
 
 		printed += hist_entry_callchain__fprintf(he, total, 0, fp);
@@ -529,7 +529,7 @@ out:
 
 static int hist_entry__fprintf(struct hist_entry *he, size_t size,
 			       char *bf, size_t bfsz, FILE *fp,
-			       bool use_callchain)
+			       bool ignore_callchains)
 {
 	int ret;
 	int callchain_ret = 0;
@@ -550,7 +550,7 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size,
 
 	ret = fprintf(fp, "%s\n", bf);
 
-	if (use_callchain)
+	if (hist_entry__has_callchains(he) && !ignore_callchains)
 		callchain_ret = hist_entry_callchain__fprintf(he, total_period,
 							      0, fp);
 
@@ -755,7 +755,7 @@ int hists__fprintf_headers(struct hists *hists, FILE *fp)
 
 size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 		      int max_cols, float min_pcnt, FILE *fp,
-		      bool use_callchain)
+		      bool ignore_callchains)
 {
 	struct rb_node *nd;
 	size_t ret = 0;
@@ -799,7 +799,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 		if (percent < min_pcnt)
 			continue;
 
-		ret += hist_entry__fprintf(h, max_cols, line, linesz, fp, use_callchain);
+		ret += hist_entry__fprintf(h, max_cols, line, linesz, fp, ignore_callchains);
 
 		if (max_rows && ++nr_rows >= max_rows)
 			break;
@@ -819,8 +819,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 		}
 
 		if (h->ms.map == NULL && verbose > 1) {
-			__map_groups__fprintf_maps(h->thread->mg,
-						   MAP__FUNCTION, fp);
+			map_groups__fprintf(h->thread->mg, fp);
 			fprintf(fp, "%.10s end\n", graph_dotted_line);
 		}
 	}
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 8052373bcd6a..b7bf201fe8a8 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -10,6 +10,7 @@ libperf-y += evlist.o
 libperf-y += evsel.o
 libperf-y += evsel_fprintf.o
 libperf-y += find_bit.o
+libperf-y += get_current_dir_name.o
 libperf-y += kallsyms.o
 libperf-y += levenshtein.o
 libperf-y += llvm-utils.o
@@ -24,7 +25,6 @@ libperf-y += libstring.o
 libperf-y += bitmap.o
 libperf-y += hweight.o
 libperf-y += smt.o
-libperf-y += quote.o
 libperf-y += strbuf.o
 libperf-y += string.o
 libperf-y += strlist.o
@@ -74,6 +74,7 @@ libperf-y += vdso.o
 libperf-y += counts.o
 libperf-y += stat.o
 libperf-y += stat-shadow.o
+libperf-y += stat-display.o
 libperf-y += record.o
 libperf-y += srcline.o
 libperf-y += data.o
@@ -88,6 +89,7 @@ libperf-$(CONFIG_AUXTRACE) += intel-pt.o
 libperf-$(CONFIG_AUXTRACE) += intel-bts.o
 libperf-$(CONFIG_AUXTRACE) += arm-spe.o
 libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o
+libperf-$(CONFIG_AUXTRACE) += s390-cpumsf.o
 
 ifdef CONFIG_LIBOPENCSD
 libperf-$(CONFIG_AUXTRACE) += cs-etm.o
@@ -152,6 +154,8 @@ libperf-y += perf-hooks.o
 libperf-$(CONFIG_CXX) += c++/
 
 CFLAGS_config.o   += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))"
+
 # avoid compiler warnings in 32-bit mode
 CFLAGS_genelf_debug.o  += -Wno-packed
 
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 5d74a30fe00f..6936daf89ddd 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -21,6 +21,7 @@
 #include "debug.h"
 #include "annotate.h"
 #include "evsel.h"
+#include "evlist.h"
 #include "block-range.h"
 #include "string2.h"
 #include "arch/common.h"
@@ -46,11 +47,11 @@
 struct annotation_options annotation__default_options = {
 	.use_offset     = true,
 	.jump_arrows    = true,
+	.annotate_src	= true,
 	.offset_level	= ANNOTATION__OFFSET_JUMP_TARGETS,
+	.percent_type	= PERCENT_PERIOD_LOCAL,
 };
 
-const char 	*disassembler_style;
-const char	*objdump_path;
 static regex_t	 file_lineno;
 
 static struct ins_ops *ins__find(struct arch *arch, const char *name);
@@ -138,6 +139,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i
 #include "arch/x86/annotate/instructions.c"
 #include "arch/powerpc/annotate/instructions.c"
 #include "arch/s390/annotate/instructions.c"
+#include "arch/sparc/annotate/instructions.c"
 
 static struct arch architectures[] = {
 	{
@@ -169,6 +171,13 @@ static struct arch architectures[] = {
 			.comment_char = '#',
 		},
 	},
+	{
+		.name = "sparc",
+		.init = sparc__annotate_init,
+		.objdump = {
+			.comment_char = '#',
+		},
+	},
 };
 
 static void ins__delete(struct ins_operands *ops)
@@ -245,8 +254,14 @@ find_target:
 
 indirect_call:
 	tok = strchr(endptr, '*');
-	if (tok != NULL)
-		ops->target.addr = strtoull(tok + 1, NULL, 16);
+	if (tok != NULL) {
+		endptr++;
+
+		/* Indirect call can use a non-rip register and offset: callq  *0x8(%rbx).
+		 * Do not parse such instruction.  */
+		if (strstr(endptr, "(%r") == NULL)
+			ops->target.addr = strtoull(endptr, NULL, 16);
+	}
 	goto find_target;
 }
 
@@ -275,7 +290,19 @@ bool ins__is_call(const struct ins *ins)
 	return ins->ops == &call_ops || ins->ops == &s390_call_ops;
 }
 
-static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms)
+/*
+ * Prevents from matching commas in the comment section, e.g.:
+ * ffff200008446e70:       b.cs    ffff2000084470f4 <generic_exec_single+0x314>  // b.hs, b.nlast
+ */
+static inline const char *validate_comma(const char *c, struct ins_operands *ops)
+{
+	if (ops->raw_comment && c > ops->raw_comment)
+		return NULL;
+
+	return c;
+}
+
+static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
 {
 	struct map *map = ms->map;
 	struct symbol *sym = ms->sym;
@@ -284,6 +311,10 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op
 	};
 	const char *c = strchr(ops->raw, ',');
 	u64 start, end;
+
+	ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char);
+	c = validate_comma(c, ops);
+
 	/*
 	 * Examples of lines to parse for the _cpp_lex_token@@Base
 	 * function:
@@ -303,6 +334,7 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op
 		ops->target.addr = strtoull(c, NULL, 16);
 		if (!ops->target.addr) {
 			c = strchr(c, ',');
+			c = validate_comma(c, ops);
 			if (c++ != NULL)
 				ops->target.addr = strtoull(c, NULL, 16);
 		}
@@ -360,9 +392,12 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
 		return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.sym->name);
 
 	c = strchr(ops->raw, ',');
+	c = validate_comma(c, ops);
+
 	if (c != NULL) {
 		const char *c2 = strchr(c + 1, ',');
 
+		c2 = validate_comma(c2, ops);
 		/* check for 3-op insn */
 		if (c2 != NULL)
 			c = c2;
@@ -678,10 +713,28 @@ static struct arch *arch__find(const char *name)
 	return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp);
 }
 
-int symbol__alloc_hist(struct symbol *sym)
+static struct annotated_source *annotated_source__new(void)
+{
+	struct annotated_source *src = zalloc(sizeof(*src));
+
+	if (src != NULL)
+		INIT_LIST_HEAD(&src->source);
+
+	return src;
+}
+
+static __maybe_unused void annotated_source__delete(struct annotated_source *src)
+{
+	if (src == NULL)
+		return;
+	zfree(&src->histograms);
+	zfree(&src->cycles_hist);
+	free(src);
+}
+
+static int annotated_source__alloc_histograms(struct annotated_source *src,
+					      size_t size, int nr_hists)
 {
-	struct annotation *notes = symbol__annotation(sym);
-	size_t size = symbol__size(sym);
 	size_t sizeof_sym_hist;
 
 	/*
@@ -701,17 +754,13 @@ int symbol__alloc_hist(struct symbol *sym)
 	sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(struct sym_hist_entry));
 
 	/* Check for overflow in zalloc argument */
-	if (sizeof_sym_hist > (SIZE_MAX - sizeof(*notes->src))
-				/ symbol_conf.nr_events)
+	if (sizeof_sym_hist > SIZE_MAX / nr_hists)
 		return -1;
 
-	notes->src = zalloc(sizeof(*notes->src) + symbol_conf.nr_events * sizeof_sym_hist);
-	if (notes->src == NULL)
-		return -1;
-	notes->src->sizeof_sym_hist = sizeof_sym_hist;
-	notes->src->nr_histograms   = symbol_conf.nr_events;
-	INIT_LIST_HEAD(&notes->src->source);
-	return 0;
+	src->sizeof_sym_hist = sizeof_sym_hist;
+	src->nr_histograms   = nr_hists;
+	src->histograms	     = calloc(nr_hists, sizeof_sym_hist) ;
+	return src->histograms ? 0 : -1;
 }
 
 /* The cycles histogram is lazily allocated. */
@@ -741,14 +790,11 @@ void symbol__annotate_zero_histograms(struct symbol *sym)
 	pthread_mutex_unlock(&notes->lock);
 }
 
-static int __symbol__account_cycles(struct annotation *notes,
+static int __symbol__account_cycles(struct cyc_hist *ch,
 				    u64 start,
 				    unsigned offset, unsigned cycles,
 				    unsigned have_start)
 {
-	struct cyc_hist *ch;
-
-	ch = notes->src->cycles_hist;
 	/*
 	 * For now we can only account one basic block per
 	 * final jump. But multiple could be overlapping.
@@ -760,6 +806,15 @@ static int __symbol__account_cycles(struct annotation *notes,
 	ch[offset].num_aggr++;
 	ch[offset].cycles_aggr += cycles;
 
+	if (cycles > ch[offset].cycles_max)
+		ch[offset].cycles_max = cycles;
+
+	if (ch[offset].cycles_min) {
+		if (cycles && cycles < ch[offset].cycles_min)
+			ch[offset].cycles_min = cycles;
+	} else
+		ch[offset].cycles_min = cycles;
+
 	if (!have_start && ch[offset].have_start)
 		return 0;
 	if (ch[offset].num) {
@@ -782,7 +837,7 @@ static int __symbol__account_cycles(struct annotation *notes,
 }
 
 static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
-				      struct annotation *notes, int evidx, u64 addr,
+				      struct annotated_source *src, int evidx, u64 addr,
 				      struct perf_sample *sample)
 {
 	unsigned offset;
@@ -798,7 +853,12 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
 	}
 
 	offset = addr - sym->start;
-	h = annotation__histogram(notes, evidx);
+	h = annotated_source__histogram(src, evidx);
+	if (h == NULL) {
+		pr_debug("%s(%d): ENOMEM! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 ", func: %d\n",
+			 __func__, __LINE__, sym->name, sym->start, addr, sym->end, sym->type == STT_FUNC);
+		return -ENOMEM;
+	}
 	h->nr_samples++;
 	h->addr[offset].nr_samples++;
 	h->period += sample->period;
@@ -811,45 +871,69 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
 	return 0;
 }
 
-static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles)
+static struct cyc_hist *symbol__cycles_hist(struct symbol *sym)
 {
 	struct annotation *notes = symbol__annotation(sym);
 
 	if (notes->src == NULL) {
-		if (symbol__alloc_hist(sym) < 0)
+		notes->src = annotated_source__new();
+		if (notes->src == NULL)
 			return NULL;
+		goto alloc_cycles_hist;
+	}
+
+	if (!notes->src->cycles_hist) {
+alloc_cycles_hist:
+		symbol__alloc_hist_cycles(sym);
 	}
-	if (!notes->src->cycles_hist && cycles) {
-		if (symbol__alloc_hist_cycles(sym) < 0)
+
+	return notes->src->cycles_hist;
+}
+
+struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists)
+{
+	struct annotation *notes = symbol__annotation(sym);
+
+	if (notes->src == NULL) {
+		notes->src = annotated_source__new();
+		if (notes->src == NULL)
 			return NULL;
+		goto alloc_histograms;
 	}
-	return notes;
+
+	if (notes->src->histograms == NULL) {
+alloc_histograms:
+		annotated_source__alloc_histograms(notes->src, symbol__size(sym),
+						   nr_hists);
+	}
+
+	return notes->src;
 }
 
 static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
-				    int evidx, u64 addr,
+				    struct perf_evsel *evsel, u64 addr,
 				    struct perf_sample *sample)
 {
-	struct annotation *notes;
+	struct annotated_source *src;
 
 	if (sym == NULL)
 		return 0;
-	notes = symbol__get_annotation(sym, false);
-	if (notes == NULL)
+	src = symbol__hists(sym, evsel->evlist->nr_entries);
+	if (src == NULL)
 		return -ENOMEM;
-	return __symbol__inc_addr_samples(sym, map, notes, evidx, addr, sample);
+	return __symbol__inc_addr_samples(sym, map, src, evsel->idx, addr, sample);
 }
 
 static int symbol__account_cycles(u64 addr, u64 start,
 				  struct symbol *sym, unsigned cycles)
 {
-	struct annotation *notes;
+	struct cyc_hist *cycles_hist;
 	unsigned offset;
 
 	if (sym == NULL)
 		return 0;
-	notes = symbol__get_annotation(sym, true);
-	if (notes == NULL)
+	cycles_hist = symbol__cycles_hist(sym);
+	if (cycles_hist == NULL)
 		return -ENOMEM;
 	if (addr < sym->start || addr >= sym->end)
 		return -ERANGE;
@@ -861,7 +945,7 @@ static int symbol__account_cycles(u64 addr, u64 start,
 			start = 0;
 	}
 	offset = addr - sym->start;
-	return __symbol__account_cycles(notes,
+	return __symbol__account_cycles(cycles_hist,
 					start ? start - sym->start : 0,
 					offset, cycles,
 					!!start);
@@ -953,8 +1037,11 @@ void annotation__compute_ipc(struct annotation *notes, size_t size)
 			if (ch->have_start)
 				annotation__count_and_fill(notes, ch->start, offset, ch);
 			al = notes->offsets[offset];
-			if (al && ch->num_aggr)
+			if (al && ch->num_aggr) {
 				al->cycles = ch->cycles_aggr / ch->num_aggr;
+				al->cycles_max = ch->cycles_max;
+				al->cycles_min = ch->cycles_min;
+			}
 			notes->have_cycles = true;
 		}
 	}
@@ -962,15 +1049,15 @@ void annotation__compute_ipc(struct annotation *notes, size_t size)
 }
 
 int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
-				 int evidx)
+				 struct perf_evsel *evsel)
 {
-	return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr, sample);
+	return symbol__inc_addr_samples(ams->sym, ams->map, evsel, ams->al_addr, sample);
 }
 
 int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample,
-				 int evidx, u64 ip)
+				 struct perf_evsel *evsel, u64 ip)
 {
-	return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip, sample);
+	return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evsel, ip, sample);
 }
 
 static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms)
@@ -1019,6 +1106,7 @@ struct annotate_args {
 	struct arch		*arch;
 	struct map_symbol	 ms;
 	struct perf_evsel	*evsel;
+	struct annotation_options *options;
 	s64			 offset;
 	char			*line;
 	int			 line_nr;
@@ -1055,7 +1143,7 @@ annotation_line__new(struct annotate_args *args, size_t privsize)
 	if (perf_evsel__is_group_event(evsel))
 		nr = evsel->nr_members;
 
-	size += sizeof(al->samples[0]) * nr;
+	size += sizeof(al->data[0]) * nr;
 
 	al = zalloc(size);
 	if (al) {
@@ -1064,7 +1152,7 @@ annotation_line__new(struct annotate_args *args, size_t privsize)
 		al->offset     = args->offset;
 		al->line       = strdup(args->line);
 		al->line_nr    = args->line_nr;
-		al->samples_nr = nr;
+		al->data_nr    = nr;
 	}
 
 	return al;
@@ -1244,7 +1332,8 @@ static int disasm_line__print(struct disasm_line *dl, u64 start, int addr_fmt_wi
 static int
 annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start,
 		       struct perf_evsel *evsel, u64 len, int min_pcnt, int printed,
-		       int max_lines, struct annotation_line *queue, int addr_fmt_width)
+		       int max_lines, struct annotation_line *queue, int addr_fmt_width,
+		       int percent_type)
 {
 	struct disasm_line *dl = container_of(al, struct disasm_line, al);
 	static const char *prev_line;
@@ -1256,15 +1345,18 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
 		const char *color;
 		struct annotation *notes = symbol__annotation(sym);
 
-		for (i = 0; i < al->samples_nr; i++) {
-			struct annotation_data *sample = &al->samples[i];
+		for (i = 0; i < al->data_nr; i++) {
+			double percent;
 
-			if (sample->percent > max_percent)
-				max_percent = sample->percent;
+			percent = annotation_data__percent(&al->data[i],
+							   percent_type);
+
+			if (percent > max_percent)
+				max_percent = percent;
 		}
 
-		if (al->samples_nr > nr_percent)
-			nr_percent = al->samples_nr;
+		if (al->data_nr > nr_percent)
+			nr_percent = al->data_nr;
 
 		if (max_percent < min_pcnt)
 			return -1;
@@ -1277,7 +1369,8 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
 				if (queue == al)
 					break;
 				annotation_line__print(queue, sym, start, evsel, len,
-						       0, 0, 1, NULL, addr_fmt_width);
+						       0, 0, 1, NULL, addr_fmt_width,
+						       percent_type);
 			}
 		}
 
@@ -1298,18 +1391,20 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
 		}
 
 		for (i = 0; i < nr_percent; i++) {
-			struct annotation_data *sample = &al->samples[i];
+			struct annotation_data *data = &al->data[i];
+			double percent;
 
-			color = get_percent_color(sample->percent);
+			percent = annotation_data__percent(data, percent_type);
+			color = get_percent_color(percent);
 
 			if (symbol_conf.show_total_period)
 				color_fprintf(stdout, color, " %11" PRIu64,
-					      sample->he.period);
+					      data->he.period);
 			else if (symbol_conf.show_nr_samples)
 				color_fprintf(stdout, color, " %7" PRIu64,
-					      sample->he.nr_samples);
+					      data->he.nr_samples);
 			else
-				color_fprintf(stdout, color, " %7.2f", sample->percent);
+				color_fprintf(stdout, color, " %7.2f", percent);
 		}
 
 		printf(" : ");
@@ -1560,6 +1655,7 @@ fallback:
 
 static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 {
+	struct annotation_options *opts = args->options;
 	struct map *map = args->ms.map;
 	struct dso *dso = map->dso;
 	char *command;
@@ -1567,6 +1663,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 	char symfs_filename[PATH_MAX];
 	struct kcore_extract kce;
 	bool delete_extract = false;
+	bool decomp = false;
 	int stdout_fd[2];
 	int lineno = 0;
 	int nline;
@@ -1600,6 +1697,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 						 tmp, sizeof(tmp)) < 0)
 			goto out;
 
+		decomp = true;
 		strcpy(symfs_filename, tmp);
 	}
 
@@ -1607,13 +1705,13 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 		 "%s %s%s --start-address=0x%016" PRIx64
 		 " --stop-address=0x%016" PRIx64
 		 " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand",
-		 objdump_path ? objdump_path : "objdump",
-		 disassembler_style ? "-M " : "",
-		 disassembler_style ? disassembler_style : "",
+		 opts->objdump_path ?: "objdump",
+		 opts->disassembler_style ? "-M " : "",
+		 opts->disassembler_style ?: "",
 		 map__rip_2objdump(map, sym->start),
 		 map__rip_2objdump(map, sym->end),
-		 symbol_conf.annotate_asm_raw ? "" : "--no-show-raw",
-		 symbol_conf.annotate_src ? "-S" : "",
+		 opts->show_asm_raw ? "" : "--no-show-raw",
+		 opts->annotate_src ? "-S" : "",
 		 symfs_filename, symfs_filename);
 
 	if (err < 0) {
@@ -1686,7 +1784,7 @@ out_free_command:
 out_remove_tmp:
 	close(stdout_fd[0]);
 
-	if (dso__needs_decompress(dso))
+	if (decomp)
 		unlink(symfs_filename);
 
 	if (delete_extract)
@@ -1699,34 +1797,45 @@ out_close_stdout:
 	goto out_free_command;
 }
 
-static void calc_percent(struct sym_hist *hist,
-			 struct annotation_data *sample,
+static void calc_percent(struct sym_hist *sym_hist,
+			 struct hists *hists,
+			 struct annotation_data *data,
 			 s64 offset, s64 end)
 {
 	unsigned int hits = 0;
 	u64 period = 0;
 
 	while (offset < end) {
-		hits   += hist->addr[offset].nr_samples;
-		period += hist->addr[offset].period;
+		hits   += sym_hist->addr[offset].nr_samples;
+		period += sym_hist->addr[offset].period;
 		++offset;
 	}
 
-	if (hist->nr_samples) {
-		sample->he.period     = period;
-		sample->he.nr_samples = hits;
-		sample->percent = 100.0 * hits / hist->nr_samples;
+	if (sym_hist->nr_samples) {
+		data->he.period     = period;
+		data->he.nr_samples = hits;
+		data->percent[PERCENT_HITS_LOCAL] = 100.0 * hits / sym_hist->nr_samples;
 	}
+
+	if (hists->stats.nr_non_filtered_samples)
+		data->percent[PERCENT_HITS_GLOBAL] = 100.0 * hits / hists->stats.nr_non_filtered_samples;
+
+	if (sym_hist->period)
+		data->percent[PERCENT_PERIOD_LOCAL] = 100.0 * period / sym_hist->period;
+
+	if (hists->stats.total_period)
+		data->percent[PERCENT_PERIOD_GLOBAL] = 100.0 * period / hists->stats.total_period;
 }
 
 static void annotation__calc_percent(struct annotation *notes,
-				     struct perf_evsel *evsel, s64 len)
+				     struct perf_evsel *leader, s64 len)
 {
 	struct annotation_line *al, *next;
+	struct perf_evsel *evsel;
 
 	list_for_each_entry(al, &notes->src->source, node) {
 		s64 end;
-		int i;
+		int i = 0;
 
 		if (al->offset == -1)
 			continue;
@@ -1734,14 +1843,17 @@ static void annotation__calc_percent(struct annotation *notes,
 		next = annotation_line__next(al, &notes->src->source);
 		end  = next ? next->offset : len;
 
-		for (i = 0; i < al->samples_nr; i++) {
-			struct annotation_data *sample;
-			struct sym_hist *hist;
+		for_each_group_evsel(evsel, leader) {
+			struct hists *hists = evsel__hists(evsel);
+			struct annotation_data *data;
+			struct sym_hist *sym_hist;
 
-			hist   = annotation__histogram(notes, evsel->idx + i);
-			sample = &al->samples[i];
+			BUG_ON(i >= al->data_nr);
 
-			calc_percent(hist, sample, al->offset, end);
+			sym_hist = annotation__histogram(notes, evsel->idx);
+			data = &al->data[i++];
+
+			calc_percent(sym_hist, hists, data, al->offset, end);
 		}
 	}
 }
@@ -1755,11 +1867,13 @@ void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel)
 
 int symbol__annotate(struct symbol *sym, struct map *map,
 		     struct perf_evsel *evsel, size_t privsize,
+		     struct annotation_options *options,
 		     struct arch **parch)
 {
 	struct annotate_args args = {
 		.privsize	= privsize,
 		.evsel		= evsel,
+		.options	= options,
 	};
 	struct perf_env *env = perf_evsel__env(evsel);
 	const char *arch_name = perf_env__arch(env);
@@ -1790,7 +1904,8 @@ int symbol__annotate(struct symbol *sym, struct map *map,
 	return symbol__disassemble(sym, &args);
 }
 
-static void insert_source_line(struct rb_root *root, struct annotation_line *al)
+static void insert_source_line(struct rb_root *root, struct annotation_line *al,
+			       struct annotation_options *opts)
 {
 	struct annotation_line *iter;
 	struct rb_node **p = &root->rb_node;
@@ -1803,8 +1918,10 @@ static void insert_source_line(struct rb_root *root, struct annotation_line *al)
 
 		ret = strcmp(iter->path, al->path);
 		if (ret == 0) {
-			for (i = 0; i < al->samples_nr; i++)
-				iter->samples[i].percent_sum += al->samples[i].percent;
+			for (i = 0; i < al->data_nr; i++) {
+				iter->data[i].percent_sum += annotation_data__percent(&al->data[i],
+										      opts->percent_type);
+			}
 			return;
 		}
 
@@ -1814,8 +1931,10 @@ static void insert_source_line(struct rb_root *root, struct annotation_line *al)
 			p = &(*p)->rb_right;
 	}
 
-	for (i = 0; i < al->samples_nr; i++)
-		al->samples[i].percent_sum = al->samples[i].percent;
+	for (i = 0; i < al->data_nr; i++) {
+		al->data[i].percent_sum = annotation_data__percent(&al->data[i],
+								   opts->percent_type);
+	}
 
 	rb_link_node(&al->rb_node, parent, p);
 	rb_insert_color(&al->rb_node, root);
@@ -1825,10 +1944,10 @@ static int cmp_source_line(struct annotation_line *a, struct annotation_line *b)
 {
 	int i;
 
-	for (i = 0; i < a->samples_nr; i++) {
-		if (a->samples[i].percent_sum == b->samples[i].percent_sum)
+	for (i = 0; i < a->data_nr; i++) {
+		if (a->data[i].percent_sum == b->data[i].percent_sum)
 			continue;
-		return a->samples[i].percent_sum > b->samples[i].percent_sum;
+		return a->data[i].percent_sum > b->data[i].percent_sum;
 	}
 
 	return 0;
@@ -1893,8 +2012,8 @@ static void print_summary(struct rb_root *root, const char *filename)
 		int i;
 
 		al = rb_entry(node, struct annotation_line, rb_node);
-		for (i = 0; i < al->samples_nr; i++) {
-			percent = al->samples[i].percent_sum;
+		for (i = 0; i < al->data_nr; i++) {
+			percent = al->data[i].percent_sum;
 			color = get_percent_color(percent);
 			color_fprintf(stdout, color, " %7.2f", percent);
 
@@ -1937,8 +2056,8 @@ static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start)
 }
 
 int symbol__annotate_printf(struct symbol *sym, struct map *map,
-			    struct perf_evsel *evsel, bool full_paths,
-			    int min_pcnt, int max_lines, int context)
+			    struct perf_evsel *evsel,
+			    struct annotation_options *opts)
 {
 	struct dso *dso = map->dso;
 	char *filename;
@@ -1950,28 +2069,35 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
 	u64 start = map__rip_2objdump(map, sym->start);
 	int printed = 2, queue_len = 0, addr_fmt_width;
 	int more = 0;
+	bool context = opts->context;
 	u64 len;
 	int width = symbol_conf.show_total_period ? 12 : 8;
 	int graph_dotted_len;
+	char buf[512];
 
 	filename = strdup(dso->long_name);
 	if (!filename)
 		return -ENOMEM;
 
-	if (full_paths)
+	if (opts->full_path)
 		d_filename = filename;
 	else
 		d_filename = basename(filename);
 
 	len = symbol__size(sym);
 
-	if (perf_evsel__is_group_event(evsel))
+	if (perf_evsel__is_group_event(evsel)) {
 		width *= evsel->nr_members;
+		perf_evsel__group_desc(evsel, buf, sizeof(buf));
+		evsel_name = buf;
+	}
 
-	graph_dotted_len = printf(" %-*.*s|	Source code & Disassembly of %s for %s (%" PRIu64 " samples)\n",
+	graph_dotted_len = printf(" %-*.*s|	Source code & Disassembly of %s for %s (%" PRIu64 " samples, "
+				  "percent: %s)\n",
 				  width, width, symbol_conf.show_total_period ? "Period" :
 				  symbol_conf.show_nr_samples ? "Samples" : "Percent",
-				  d_filename, evsel_name, h->nr_samples);
+				  d_filename, evsel_name, h->nr_samples,
+				  percent_type_str(opts->percent_type));
 
 	printf("%-*.*s----\n",
 	       graph_dotted_len, graph_dotted_len, graph_dotted_line);
@@ -1990,8 +2116,8 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
 		}
 
 		err = annotation_line__print(pos, sym, start, evsel, len,
-					     min_pcnt, printed, max_lines,
-					     queue, addr_fmt_width);
+					     opts->min_pcnt, printed, opts->max_lines,
+					     queue, addr_fmt_width, opts->percent_type);
 
 		switch (err) {
 		case 0:
@@ -2068,10 +2194,11 @@ static void FILE__write_graph(void *fp, int graph)
 	fputs(s, fp);
 }
 
-int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp)
+static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp,
+				     struct annotation_options *opts)
 {
 	struct annotation *notes = symbol__annotation(sym);
-	struct annotation_write_ops ops = {
+	struct annotation_write_ops wops = {
 		.first_line		 = true,
 		.obj			 = fp,
 		.set_color		 = FILE__set_color,
@@ -2085,15 +2212,16 @@ int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp)
 	list_for_each_entry(al, &notes->src->source, node) {
 		if (annotation_line__filter(al, notes))
 			continue;
-		annotation_line__write(al, notes, &ops);
+		annotation_line__write(al, notes, &wops, opts);
 		fputc('\n', fp);
-		ops.first_line = false;
+		wops.first_line = false;
 	}
 
 	return 0;
 }
 
-int map_symbol__annotation_dump(struct map_symbol *ms, struct perf_evsel *evsel)
+int map_symbol__annotation_dump(struct map_symbol *ms, struct perf_evsel *evsel,
+				struct annotation_options *opts)
 {
 	const char *ev_name = perf_evsel__name(evsel);
 	char buf[1024];
@@ -2115,7 +2243,7 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct perf_evsel *evsel)
 
 	fprintf(fp, "%s() %s\nEvent: %s\n\n",
 		ms->sym->name, ms->map->dso->long_name, ev_name);
-	symbol__annotate_fprintf2(ms->sym, fp);
+	symbol__annotate_fprintf2(ms->sym, fp, opts);
 
 	fclose(fp);
 	err = 0;
@@ -2285,7 +2413,8 @@ void annotation__update_column_widths(struct annotation *notes)
 }
 
 static void annotation__calc_lines(struct annotation *notes, struct map *map,
-				  struct rb_root *root)
+				   struct rb_root *root,
+				   struct annotation_options *opts)
 {
 	struct annotation_line *al;
 	struct rb_root tmp_root = RB_ROOT;
@@ -2294,13 +2423,14 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map,
 		double percent_max = 0.0;
 		int i;
 
-		for (i = 0; i < al->samples_nr; i++) {
-			struct annotation_data *sample;
+		for (i = 0; i < al->data_nr; i++) {
+			double percent;
 
-			sample = &al->samples[i];
+			percent = annotation_data__percent(&al->data[i],
+							   opts->percent_type);
 
-			if (sample->percent > percent_max)
-				percent_max = sample->percent;
+			if (percent > percent_max)
+				percent_max = percent;
 		}
 
 		if (percent_max <= 0.5)
@@ -2308,42 +2438,43 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map,
 
 		al->path = get_srcline(map->dso, notes->start + al->offset, NULL,
 				       false, true, notes->start + al->offset);
-		insert_source_line(&tmp_root, al);
+		insert_source_line(&tmp_root, al, opts);
 	}
 
 	resort_source_line(root, &tmp_root);
 }
 
 static void symbol__calc_lines(struct symbol *sym, struct map *map,
-			      struct rb_root *root)
+			       struct rb_root *root,
+			       struct annotation_options *opts)
 {
 	struct annotation *notes = symbol__annotation(sym);
 
-	annotation__calc_lines(notes, map, root);
+	annotation__calc_lines(notes, map, root, opts);
 }
 
 int symbol__tty_annotate2(struct symbol *sym, struct map *map,
-			  struct perf_evsel *evsel, bool print_lines,
-			  bool full_paths)
+			  struct perf_evsel *evsel,
+			  struct annotation_options *opts)
 {
 	struct dso *dso = map->dso;
 	struct rb_root source_line = RB_ROOT;
-	struct annotation_options opts = annotation__default_options;
-	struct annotation *notes = symbol__annotation(sym);
+	struct hists *hists = evsel__hists(evsel);
 	char buf[1024];
 
-	if (symbol__annotate2(sym, map, evsel, &opts, NULL) < 0)
+	if (symbol__annotate2(sym, map, evsel, opts, NULL) < 0)
 		return -1;
 
-	if (print_lines) {
-		srcline_full_filename = full_paths;
-		symbol__calc_lines(sym, map, &source_line);
+	if (opts->print_lines) {
+		srcline_full_filename = opts->full_path;
+		symbol__calc_lines(sym, map, &source_line, opts);
 		print_summary(&source_line, dso->long_name);
 	}
 
-	annotation__scnprintf_samples_period(notes, buf, sizeof(buf), evsel);
-	fprintf(stdout, "%s\n%s() %s\n", buf, sym->name, dso->long_name);
-	symbol__annotate_fprintf2(sym, stdout);
+	hists__scnprintf_title(hists, buf, sizeof(buf));
+	fprintf(stdout, "%s, [percent: %s]\n%s() %s\n",
+		buf, percent_type_str(opts->percent_type), sym->name, dso->long_name);
+	symbol__annotate_fprintf2(sym, stdout, opts);
 
 	annotated_source__purge(symbol__annotation(sym)->src);
 
@@ -2351,25 +2482,24 @@ int symbol__tty_annotate2(struct symbol *sym, struct map *map,
 }
 
 int symbol__tty_annotate(struct symbol *sym, struct map *map,
-			 struct perf_evsel *evsel, bool print_lines,
-			 bool full_paths, int min_pcnt, int max_lines)
+			 struct perf_evsel *evsel,
+			 struct annotation_options *opts)
 {
 	struct dso *dso = map->dso;
 	struct rb_root source_line = RB_ROOT;
 
-	if (symbol__annotate(sym, map, evsel, 0, NULL) < 0)
+	if (symbol__annotate(sym, map, evsel, 0, opts, NULL) < 0)
 		return -1;
 
 	symbol__calc_percent(sym, evsel);
 
-	if (print_lines) {
-		srcline_full_filename = full_paths;
-		symbol__calc_lines(sym, map, &source_line);
+	if (opts->print_lines) {
+		srcline_full_filename = opts->full_path;
+		symbol__calc_lines(sym, map, &source_line, opts);
 		print_summary(&source_line, dso->long_name);
 	}
 
-	symbol__annotate_printf(sym, map, evsel, full_paths,
-				min_pcnt, max_lines, 0);
+	symbol__annotate_printf(sym, map, evsel, opts);
 
 	annotated_source__purge(symbol__annotation(sym)->src);
 
@@ -2382,14 +2512,21 @@ bool ui__has_annotation(void)
 }
 
 
-double annotation_line__max_percent(struct annotation_line *al, struct annotation *notes)
+static double annotation_line__max_percent(struct annotation_line *al,
+					   struct annotation *notes,
+					   unsigned int percent_type)
 {
 	double percent_max = 0.0;
 	int i;
 
 	for (i = 0; i < notes->nr_events; i++) {
-		if (al->samples[i].percent > percent_max)
-			percent_max = al->samples[i].percent;
+		double percent;
+
+		percent = annotation_data__percent(&al->data[i],
+						   percent_type);
+
+		if (percent > percent_max)
+			percent_max = percent;
 	}
 
 	return percent_max;
@@ -2428,7 +2565,7 @@ call_like:
 
 static void __annotation_line__write(struct annotation_line *al, struct annotation *notes,
 				     bool first_line, bool current_entry, bool change_color, int width,
-				     void *obj,
+				     void *obj, unsigned int percent_type,
 				     int  (*obj__set_color)(void *obj, int color),
 				     void (*obj__set_percent_color)(void *obj, double percent, bool current),
 				     int  (*obj__set_jumps_percent_color)(void *obj, int nr, bool current),
@@ -2436,7 +2573,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
 				     void (*obj__write_graph)(void *obj, int graph))
 
 {
-	double percent_max = annotation_line__max_percent(al, notes);
+	double percent_max = annotation_line__max_percent(al, notes, percent_type);
 	int pcnt_width = annotation__pcnt_width(notes),
 	    cycles_width = annotation__cycles_width(notes);
 	bool show_title = false;
@@ -2455,15 +2592,18 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
 		int i;
 
 		for (i = 0; i < notes->nr_events; i++) {
-			obj__set_percent_color(obj, al->samples[i].percent, current_entry);
+			double percent;
+
+			percent = annotation_data__percent(&al->data[i], percent_type);
+
+			obj__set_percent_color(obj, percent, current_entry);
 			if (notes->options->show_total_period) {
-				obj__printf(obj, "%11" PRIu64 " ", al->samples[i].he.period);
+				obj__printf(obj, "%11" PRIu64 " ", al->data[i].he.period);
 			} else if (notes->options->show_nr_samples) {
 				obj__printf(obj, "%6" PRIu64 " ",
-						   al->samples[i].he.nr_samples);
+						   al->data[i].he.nr_samples);
 			} else {
-				obj__printf(obj, "%6.2f ",
-						   al->samples[i].percent);
+				obj__printf(obj, "%6.2f ", percent);
 			}
 		}
 	} else {
@@ -2486,13 +2626,38 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
 		else
 			obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC");
 
-		if (al->cycles)
-			obj__printf(obj, "%*" PRIu64 " ",
+		if (!notes->options->show_minmax_cycle) {
+			if (al->cycles)
+				obj__printf(obj, "%*" PRIu64 " ",
 					   ANNOTATION__CYCLES_WIDTH - 1, al->cycles);
-		else if (!show_title)
-			obj__printf(obj, "%*s", ANNOTATION__CYCLES_WIDTH, " ");
-		else
-			obj__printf(obj, "%*s ", ANNOTATION__CYCLES_WIDTH - 1, "Cycle");
+			else if (!show_title)
+				obj__printf(obj, "%*s",
+					    ANNOTATION__CYCLES_WIDTH, " ");
+			else
+				obj__printf(obj, "%*s ",
+					    ANNOTATION__CYCLES_WIDTH - 1,
+					    "Cycle");
+		} else {
+			if (al->cycles) {
+				char str[32];
+
+				scnprintf(str, sizeof(str),
+					"%" PRIu64 "(%" PRIu64 "/%" PRIu64 ")",
+					al->cycles, al->cycles_min,
+					al->cycles_max);
+
+				obj__printf(obj, "%*s ",
+					    ANNOTATION__MINMAX_CYCLES_WIDTH - 1,
+					    str);
+			} else if (!show_title)
+				obj__printf(obj, "%*s",
+					    ANNOTATION__MINMAX_CYCLES_WIDTH,
+					    " ");
+			else
+				obj__printf(obj, "%*s ",
+					    ANNOTATION__MINMAX_CYCLES_WIDTH - 1,
+					    "Cycle(min/max)");
+		}
 	}
 
 	obj__printf(obj, " ");
@@ -2556,13 +2721,15 @@ print_addr:
 }
 
 void annotation_line__write(struct annotation_line *al, struct annotation *notes,
-			    struct annotation_write_ops *ops)
+			    struct annotation_write_ops *wops,
+			    struct annotation_options *opts)
 {
-	__annotation_line__write(al, notes, ops->first_line, ops->current_entry,
-				 ops->change_color, ops->width, ops->obj,
-				 ops->set_color, ops->set_percent_color,
-				 ops->set_jumps_percent_color, ops->printf,
-				 ops->write_graph);
+	__annotation_line__write(al, notes, wops->first_line, wops->current_entry,
+				 wops->change_color, wops->width, wops->obj,
+				 opts->percent_type,
+				 wops->set_color, wops->set_percent_color,
+				 wops->set_jumps_percent_color, wops->printf,
+				 wops->write_graph);
 }
 
 int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *evsel,
@@ -2579,7 +2746,7 @@ int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *ev
 	if (perf_evsel__is_group_event(evsel))
 		nr_pcnt = evsel->nr_members;
 
-	err = symbol__annotate(sym, map, evsel, 0, parch);
+	err = symbol__annotate(sym, map, evsel, 0, options, parch);
 	if (err)
 		goto out_free_offsets;
 
@@ -2604,46 +2771,6 @@ out_free_offsets:
 	return -1;
 }
 
-int __annotation__scnprintf_samples_period(struct annotation *notes,
-					   char *bf, size_t size,
-					   struct perf_evsel *evsel,
-					   bool show_freq)
-{
-	const char *ev_name = perf_evsel__name(evsel);
-	char buf[1024], ref[30] = " show reference callgraph, ";
-	char sample_freq_str[64] = "";
-	unsigned long nr_samples = 0;
-	int nr_members = 1;
-	bool enable_ref = false;
-	u64 nr_events = 0;
-	char unit;
-	int i;
-
-	if (perf_evsel__is_group_event(evsel)) {
-		perf_evsel__group_desc(evsel, buf, sizeof(buf));
-		ev_name = buf;
-                nr_members = evsel->nr_members;
-	}
-
-	for (i = 0; i < nr_members; i++) {
-		struct sym_hist *ah = annotation__histogram(notes, evsel->idx + i);
-
-		nr_samples += ah->nr_samples;
-		nr_events  += ah->period;
-	}
-
-	if (symbol_conf.show_ref_callgraph && strstr(ev_name, "call-graph=no"))
-		enable_ref = true;
-
-	if (show_freq)
-		scnprintf(sample_freq_str, sizeof(sample_freq_str), " %d Hz,", evsel->attr.sample_freq);
-
-	nr_samples = convert_unit(nr_samples, &unit);
-	return scnprintf(bf, size, "Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64,
-			 nr_samples, unit, evsel->nr_members > 1 ? "s" : "",
-			 ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
-}
-
 #define ANNOTATION__CFG(n) \
 	{ .name = #n, .value = &annotation__default_options.n, }
 
@@ -2708,3 +2835,55 @@ void annotation_config__init(void)
 	annotation__default_options.show_total_period = symbol_conf.show_total_period;
 	annotation__default_options.show_nr_samples   = symbol_conf.show_nr_samples;
 }
+
+static unsigned int parse_percent_type(char *str1, char *str2)
+{
+	unsigned int type = (unsigned int) -1;
+
+	if (!strcmp("period", str1)) {
+		if (!strcmp("local", str2))
+			type = PERCENT_PERIOD_LOCAL;
+		else if (!strcmp("global", str2))
+			type = PERCENT_PERIOD_GLOBAL;
+	}
+
+	if (!strcmp("hits", str1)) {
+		if (!strcmp("local", str2))
+			type = PERCENT_HITS_LOCAL;
+		else if (!strcmp("global", str2))
+			type = PERCENT_HITS_GLOBAL;
+	}
+
+	return type;
+}
+
+int annotate_parse_percent_type(const struct option *opt, const char *_str,
+				int unset __maybe_unused)
+{
+	struct annotation_options *opts = opt->value;
+	unsigned int type;
+	char *str1, *str2;
+	int err = -1;
+
+	str1 = strdup(_str);
+	if (!str1)
+		return -ENOMEM;
+
+	str2 = strchr(str1, '-');
+	if (!str2)
+		goto out;
+
+	*str2++ = 0;
+
+	type = parse_percent_type(str1, str2);
+	if (type == (unsigned int) -1)
+		type = parse_percent_type(str2, str1);
+	if (type != (unsigned int) -1) {
+		opts->percent_type = type;
+		err = 0;
+	}
+
+out:
+	free(str1);
+	return err;
+}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index f28a9e43421d..5399ba2321bb 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -11,6 +11,7 @@
 #include <linux/list.h>
 #include <linux/rbtree.h>
 #include <pthread.h>
+#include <asm/bug.h>
 
 struct ins_ops;
 
@@ -21,6 +22,7 @@ struct ins {
 
 struct ins_operands {
 	char	*raw;
+	char	*raw_comment;
 	struct {
 		char	*raw;
 		char	*name;
@@ -61,16 +63,28 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
 
 #define ANNOTATION__IPC_WIDTH 6
 #define ANNOTATION__CYCLES_WIDTH 6
+#define ANNOTATION__MINMAX_CYCLES_WIDTH 19
 
 struct annotation_options {
 	bool hide_src_code,
 	     use_offset,
 	     jump_arrows,
+	     print_lines,
+	     full_path,
 	     show_linenr,
 	     show_nr_jumps,
 	     show_nr_samples,
-	     show_total_period;
+	     show_total_period,
+	     show_minmax_cycle,
+	     show_asm_raw,
+	     annotate_src;
 	u8   offset_level;
+	int  min_pcnt;
+	int  max_lines;
+	int  context;
+	const char *objdump_path;
+	const char *disassembler_style;
+	unsigned int percent_type;
 };
 
 enum {
@@ -90,8 +104,16 @@ struct sym_hist_entry {
 	u64		period;
 };
 
+enum {
+	PERCENT_HITS_LOCAL,
+	PERCENT_HITS_GLOBAL,
+	PERCENT_PERIOD_LOCAL,
+	PERCENT_PERIOD_GLOBAL,
+	PERCENT_MAX,
+};
+
 struct annotation_data {
-	double			 percent;
+	double			 percent[PERCENT_MAX];
 	double			 percent_sum;
 	struct sym_hist_entry	 he;
 };
@@ -105,12 +127,14 @@ struct annotation_line {
 	int			 jump_sources;
 	float			 ipc;
 	u64			 cycles;
+	u64			 cycles_max;
+	u64			 cycles_min;
 	size_t			 privsize;
 	char			*path;
 	u32			 idx;
 	int			 idx_asm;
-	int			 samples_nr;
-	struct annotation_data	 samples[0];
+	int			 data_nr;
+	struct annotation_data	 data[0];
 };
 
 struct disasm_line {
@@ -121,6 +145,27 @@ struct disasm_line {
 	struct annotation_line	 al;
 };
 
+static inline double annotation_data__percent(struct annotation_data *data,
+					      unsigned int which)
+{
+	return which < PERCENT_MAX ? data->percent[which] : -1;
+}
+
+static inline const char *percent_type_str(unsigned int type)
+{
+	static const char *str[PERCENT_MAX] = {
+		"local hits",
+		"global hits",
+		"local period",
+		"global period",
+	};
+
+	if (WARN_ON(type >= PERCENT_MAX))
+		return "N/A";
+
+	return str[type];
+}
+
 static inline struct disasm_line *disasm_line(struct annotation_line *al)
 {
 	return al ? container_of(al, struct disasm_line, al) : NULL;
@@ -156,22 +201,15 @@ struct annotation_write_ops {
 	void (*write_graph)(void *obj, int graph);
 };
 
-double annotation_line__max_percent(struct annotation_line *al, struct annotation *notes);
 void annotation_line__write(struct annotation_line *al, struct annotation *notes,
-			    struct annotation_write_ops *ops);
+			    struct annotation_write_ops *ops,
+			    struct annotation_options *opts);
 
 int __annotation__scnprintf_samples_period(struct annotation *notes,
 					   char *bf, size_t size,
 					   struct perf_evsel *evsel,
 					   bool show_freq);
 
-static inline int annotation__scnprintf_samples_period(struct annotation *notes,
-						       char *bf, size_t size,
-						       struct perf_evsel *evsel)
-{
-	return __annotation__scnprintf_samples_period(notes, bf, size, evsel, true);
-}
-
 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw);
 size_t disasm__fprintf(struct list_head *head, FILE *fp);
 void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel);
@@ -186,6 +224,8 @@ struct cyc_hist {
 	u64	start;
 	u64	cycles;
 	u64	cycles_aggr;
+	u64	cycles_max;
+	u64	cycles_min;
 	u32	num;
 	u32	num_aggr;
 	u8	have_start;
@@ -195,7 +235,11 @@ struct cyc_hist {
 
 /** struct annotated_source - symbols with hits have this attached as in sannotation
  *
- * @histogram: Array of addr hit histograms per event being monitored
+ * @histograms: Array of addr hit histograms per event being monitored
+ * nr_histograms: This may not be the same as evsel->evlist->nr_entries if
+ * 		  we have more than a group in a evlist, where we will want
+ * 		  to see each group separately, that is why symbol__annotate2()
+ * 		  sets src->nr_histograms to evsel->nr_members.
  * @lines: If 'print_lines' is specified, per source code line percentages
  * @source: source parsed from a disassembler like objdump -dS
  * @cyc_hist: Average cycles per basic block
@@ -211,7 +255,7 @@ struct annotated_source {
 	int    		   nr_histograms;
 	size_t		   sizeof_sym_hist;
 	struct cyc_hist	   *cycles_hist;
-	struct sym_hist	   histograms[0];
+	struct sym_hist	   *histograms;
 };
 
 struct annotation {
@@ -239,6 +283,9 @@ struct annotation {
 
 static inline int annotation__cycles_width(struct annotation *notes)
 {
+	if (notes->have_cycles && notes->options->show_minmax_cycle)
+		return ANNOTATION__IPC_WIDTH + ANNOTATION__MINMAX_CYCLES_WIDTH;
+
 	return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0;
 }
 
@@ -258,10 +305,14 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
 void annotation__update_column_widths(struct annotation *notes);
 void annotation__init_column_widths(struct annotation *notes, struct symbol *sym);
 
+static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src, int idx)
+{
+	return ((void *)src->histograms) + (src->sizeof_sym_hist * idx);
+}
+
 static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx)
 {
-	return (((void *)&notes->src->histograms) +
-	 	(notes->src->sizeof_sym_hist * idx));
+	return annotated_source__histogram(notes->src, idx);
 }
 
 static inline struct annotation *symbol__annotation(struct symbol *sym)
@@ -270,20 +321,21 @@ static inline struct annotation *symbol__annotation(struct symbol *sym)
 }
 
 int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
-				 int evidx);
+				 struct perf_evsel *evsel);
 
 int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
 				    struct addr_map_symbol *start,
 				    unsigned cycles);
 
 int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample,
-				 int evidx, u64 addr);
+				 struct perf_evsel *evsel, u64 addr);
 
-int symbol__alloc_hist(struct symbol *sym);
+struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists);
 void symbol__annotate_zero_histograms(struct symbol *sym);
 
 int symbol__annotate(struct symbol *sym, struct map *map,
 		     struct perf_evsel *evsel, size_t privsize,
+		     struct annotation_options *options,
 		     struct arch **parch);
 int symbol__annotate2(struct symbol *sym, struct map *map,
 		      struct perf_evsel *evsel,
@@ -311,42 +363,41 @@ int symbol__strerror_disassemble(struct symbol *sym, struct map *map,
 				 int errnum, char *buf, size_t buflen);
 
 int symbol__annotate_printf(struct symbol *sym, struct map *map,
-			    struct perf_evsel *evsel, bool full_paths,
-			    int min_pcnt, int max_lines, int context);
-int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp);
+			    struct perf_evsel *evsel,
+			    struct annotation_options *options);
 void symbol__annotate_zero_histogram(struct symbol *sym, int evidx);
 void symbol__annotate_decay_histogram(struct symbol *sym, int evidx);
 void annotated_source__purge(struct annotated_source *as);
 
-int map_symbol__annotation_dump(struct map_symbol *ms, struct perf_evsel *evsel);
+int map_symbol__annotation_dump(struct map_symbol *ms, struct perf_evsel *evsel,
+				struct annotation_options *opts);
 
 bool ui__has_annotation(void);
 
 int symbol__tty_annotate(struct symbol *sym, struct map *map,
-			 struct perf_evsel *evsel, bool print_lines,
-			 bool full_paths, int min_pcnt, int max_lines);
+			 struct perf_evsel *evsel, struct annotation_options *opts);
 
 int symbol__tty_annotate2(struct symbol *sym, struct map *map,
-			  struct perf_evsel *evsel, bool print_lines,
-			  bool full_paths);
+			  struct perf_evsel *evsel, struct annotation_options *opts);
 
 #ifdef HAVE_SLANG_SUPPORT
 int symbol__tui_annotate(struct symbol *sym, struct map *map,
 			 struct perf_evsel *evsel,
-			 struct hist_browser_timer *hbt);
+			 struct hist_browser_timer *hbt,
+			 struct annotation_options *opts);
 #else
 static inline int symbol__tui_annotate(struct symbol *sym __maybe_unused,
 				struct map *map __maybe_unused,
 				struct perf_evsel *evsel  __maybe_unused,
-				struct hist_browser_timer *hbt
-				__maybe_unused)
+				struct hist_browser_timer *hbt __maybe_unused,
+				struct annotation_options *opts __maybe_unused)
 {
 	return 0;
 }
 #endif
 
-extern const char	*disassembler_style;
-
 void annotation_config__init(void);
 
+int annotate_parse_percent_type(const struct option *opt, const char *_str,
+				int unset);
 #endif	/* __PERF_ANNOTATE_H */
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 857de69a5361..72d5ba2479bf 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -56,6 +56,7 @@
 #include "intel-pt.h"
 #include "intel-bts.h"
 #include "arm-spe.h"
+#include "s390-cpumsf.h"
 
 #include "sane_ctype.h"
 #include "symbol/kallsyms.h"
@@ -202,6 +203,9 @@ static int auxtrace_queues__grow(struct auxtrace_queues *queues,
 	for (i = 0; i < queues->nr_queues; i++) {
 		list_splice_tail(&queues->queue_array[i].head,
 				 &queue_array[i].head);
+		queue_array[i].tid = queues->queue_array[i].tid;
+		queue_array[i].cpu = queues->queue_array[i].cpu;
+		queue_array[i].set = queues->queue_array[i].set;
 		queue_array[i].priv = queues->queue_array[i].priv;
 	}
 
@@ -902,9 +906,8 @@ out_free:
 	return err;
 }
 
-int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
-				      union perf_event *event,
-				      struct perf_session *session)
+int perf_event__process_auxtrace_info(struct perf_session *session,
+				      union perf_event *event)
 {
 	enum auxtrace_type type = event->auxtrace_info.type;
 
@@ -920,15 +923,16 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
 		return arm_spe_process_auxtrace_info(event, session);
 	case PERF_AUXTRACE_CS_ETM:
 		return cs_etm__process_auxtrace_info(event, session);
+	case PERF_AUXTRACE_S390_CPUMSF:
+		return s390_cpumsf_process_auxtrace_info(event, session);
 	case PERF_AUXTRACE_UNKNOWN:
 	default:
 		return -EINVAL;
 	}
 }
 
-s64 perf_event__process_auxtrace(struct perf_tool *tool,
-				 union perf_event *event,
-				 struct perf_session *session)
+s64 perf_event__process_auxtrace(struct perf_session *session,
+				 union perf_event *event)
 {
 	s64 err;
 
@@ -944,7 +948,7 @@ s64 perf_event__process_auxtrace(struct perf_tool *tool,
 	if (!session->auxtrace || event->header.type != PERF_RECORD_AUXTRACE)
 		return -EINVAL;
 
-	err = session->auxtrace->process_auxtrace_event(session, event, tool);
+	err = session->auxtrace->process_auxtrace_event(session, event, session->tool);
 	if (err < 0)
 		return err;
 
@@ -958,16 +962,23 @@ s64 perf_event__process_auxtrace(struct perf_tool *tool,
 #define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ	64
 #define PERF_ITRACE_MAX_LAST_BRANCH_SZ		1024
 
-void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
+				    bool no_sample)
 {
-	synth_opts->instructions = true;
 	synth_opts->branches = true;
 	synth_opts->transactions = true;
 	synth_opts->ptwrites = true;
 	synth_opts->pwr_events = true;
 	synth_opts->errors = true;
-	synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
-	synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+	if (no_sample) {
+		synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS;
+		synth_opts->period = 1;
+		synth_opts->calls = true;
+	} else {
+		synth_opts->instructions = true;
+		synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
+		synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+	}
 	synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
 	synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
 	synth_opts->initial_skip = 0;
@@ -995,7 +1006,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 	}
 
 	if (!str) {
-		itrace_synth_opts__set_default(synth_opts);
+		itrace_synth_opts__set_default(synth_opts, false);
 		return 0;
 	}
 
@@ -1179,9 +1190,8 @@ void events_stats__auxtrace_error_warn(const struct events_stats *stats)
 	}
 }
 
-int perf_event__process_auxtrace_error(struct perf_tool *tool __maybe_unused,
-				       union perf_event *event,
-				       struct perf_session *session)
+int perf_event__process_auxtrace_error(struct perf_session *session,
+				       union perf_event *event)
 {
 	if (auxtrace__dont_decode(session))
 		return 0;
@@ -1190,11 +1200,12 @@ int perf_event__process_auxtrace_error(struct perf_tool *tool __maybe_unused,
 	return 0;
 }
 
-static int __auxtrace_mmap__read(struct auxtrace_mmap *mm,
+static int __auxtrace_mmap__read(struct perf_mmap *map,
 				 struct auxtrace_record *itr,
 				 struct perf_tool *tool, process_auxtrace_t fn,
 				 bool snapshot, size_t snapshot_size)
 {
+	struct auxtrace_mmap *mm = &map->auxtrace_mmap;
 	u64 head, old = mm->prev, offset, ref;
 	unsigned char *data = mm->base;
 	size_t size, head_off, old_off, len1, len2, padding;
@@ -1281,7 +1292,7 @@ static int __auxtrace_mmap__read(struct auxtrace_mmap *mm,
 	ev.auxtrace.tid = mm->tid;
 	ev.auxtrace.cpu = mm->cpu;
 
-	if (fn(tool, &ev, data1, len1, data2, len2))
+	if (fn(tool, map, &ev, data1, len1, data2, len2))
 		return -1;
 
 	mm->prev = head;
@@ -1300,18 +1311,18 @@ static int __auxtrace_mmap__read(struct auxtrace_mmap *mm,
 	return 1;
 }
 
-int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
+int auxtrace_mmap__read(struct perf_mmap *map, struct auxtrace_record *itr,
 			struct perf_tool *tool, process_auxtrace_t fn)
 {
-	return __auxtrace_mmap__read(mm, itr, tool, fn, false, 0);
+	return __auxtrace_mmap__read(map, itr, tool, fn, false, 0);
 }
 
-int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm,
+int auxtrace_mmap__read_snapshot(struct perf_mmap *map,
 				 struct auxtrace_record *itr,
 				 struct perf_tool *tool, process_auxtrace_t fn,
 				 size_t snapshot_size)
 {
-	return __auxtrace_mmap__read(mm, itr, tool, fn, true, snapshot_size);
+	return __auxtrace_mmap__read(map, itr, tool, fn, true, snapshot_size);
 }
 
 /**
@@ -1679,7 +1690,7 @@ struct sym_args {
 static bool kern_sym_match(struct sym_args *args, const char *name, char type)
 {
 	/* A function with the same name, and global or the n'th found or any */
-	return symbol_type__is_a(type, MAP__FUNCTION) &&
+	return kallsyms__is_function(type) &&
 	       !strcmp(name, args->name) &&
 	       ((args->global && isupper(type)) ||
 		(args->selected && ++(args->cnt) == args->idx) ||
@@ -1784,7 +1795,7 @@ static int find_entire_kern_cb(void *arg, const char *name __maybe_unused,
 {
 	struct sym_args *args = arg;
 
-	if (!symbol_type__is_a(type, MAP__FUNCTION))
+	if (!kallsyms__is_function(type))
 		return 0;
 
 	if (!args->started) {
@@ -1915,7 +1926,7 @@ static void print_duplicate_syms(struct dso *dso, const char *sym_name)
 
 	pr_err("Multiple symbols with name '%s'\n", sym_name);
 
-	sym = dso__first_symbol(dso, MAP__FUNCTION);
+	sym = dso__first_symbol(dso);
 	while (sym) {
 		if (dso_sym_match(sym, sym_name, &cnt, -1)) {
 			pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n",
@@ -1945,7 +1956,7 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start,
 	*start = 0;
 	*size = 0;
 
-	sym = dso__first_symbol(dso, MAP__FUNCTION);
+	sym = dso__first_symbol(dso);
 	while (sym) {
 		if (*start) {
 			if (!*size)
@@ -1972,8 +1983,8 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start,
 
 static int addr_filter__entire_dso(struct addr_filter *filt, struct dso *dso)
 {
-	struct symbol *first_sym = dso__first_symbol(dso, MAP__FUNCTION);
-	struct symbol *last_sym = dso__last_symbol(dso, MAP__FUNCTION);
+	struct symbol *first_sym = dso__first_symbol(dso);
+	struct symbol *last_sym = dso__last_symbol(dso);
 
 	if (!first_sym || !last_sym) {
 		pr_err("Failed to determine filter for %s\nNo symbols found.\n",
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index e731f55da072..8e50f96d4b23 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -23,6 +23,7 @@
 #include <linux/list.h>
 #include <linux/perf_event.h>
 #include <linux/types.h>
+#include <asm/bitsperlong.h>
 
 #include "../perf.h"
 #include "event.h"
@@ -33,6 +34,7 @@ union perf_event;
 struct perf_session;
 struct perf_evlist;
 struct perf_tool;
+struct perf_mmap;
 struct option;
 struct record_opts;
 struct auxtrace_info_event;
@@ -44,6 +46,7 @@ enum auxtrace_type {
 	PERF_AUXTRACE_INTEL_BTS,
 	PERF_AUXTRACE_CS_ETM,
 	PERF_AUXTRACE_ARM_SPE,
+	PERF_AUXTRACE_S390_CPUMSF,
 };
 
 enum itrace_period_type {
@@ -55,6 +58,7 @@ enum itrace_period_type {
 /**
  * struct itrace_synth_opts - AUX area tracing synthesis options.
  * @set: indicates whether or not options have been set
+ * @default_no_sample: Default to no sampling.
  * @inject: indicates the event (not just the sample) must be fully synthesized
  *          because 'perf inject' will write it out
  * @instructions: whether to synthesize 'instructions' events
@@ -79,6 +83,7 @@ enum itrace_period_type {
  */
 struct itrace_synth_opts {
 	bool			set;
+	bool			default_no_sample;
 	bool			inject;
 	bool			instructions;
 	bool			branches;
@@ -433,13 +438,14 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
 				   bool per_cpu);
 
 typedef int (*process_auxtrace_t)(struct perf_tool *tool,
+				  struct perf_mmap *map,
 				  union perf_event *event, void *data1,
 				  size_t len1, void *data2, size_t len2);
 
-int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
+int auxtrace_mmap__read(struct perf_mmap *map, struct auxtrace_record *itr,
 			struct perf_tool *tool, process_auxtrace_t fn);
 
-int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm,
+int auxtrace_mmap__read_snapshot(struct perf_mmap *map,
 				 struct auxtrace_record *itr,
 				 struct perf_tool *tool, process_auxtrace_t fn,
 				 size_t snapshot_size);
@@ -516,18 +522,16 @@ int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
 					 struct perf_tool *tool,
 					 struct perf_session *session,
 					 perf_event__handler_t process);
-int perf_event__process_auxtrace_info(struct perf_tool *tool,
-				      union perf_event *event,
-				      struct perf_session *session);
-s64 perf_event__process_auxtrace(struct perf_tool *tool,
-				 union perf_event *event,
-				 struct perf_session *session);
-int perf_event__process_auxtrace_error(struct perf_tool *tool,
-				       union perf_event *event,
-				       struct perf_session *session);
+int perf_event__process_auxtrace_info(struct perf_session *session,
+				      union perf_event *event);
+s64 perf_event__process_auxtrace(struct perf_session *session,
+				 union perf_event *event);
+int perf_event__process_auxtrace_error(struct perf_session *session,
+				       union perf_event *event);
 int itrace_parse_synth_opts(const struct option *opt, const char *str,
 			    int unset);
-void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts);
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
+				    bool no_sample);
 
 size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp);
 void perf_session__auxtrace_error_inc(struct perf_session *session,
@@ -576,6 +580,23 @@ static inline void auxtrace__free(struct perf_session *session)
 	return session->auxtrace->free(session);
 }
 
+#define ITRACE_HELP \
+"				i:	    		synthesize instructions events\n"		\
+"				b:	    		synthesize branches events\n"		\
+"				c:	    		synthesize branches events (calls only)\n"	\
+"				r:	    		synthesize branches events (returns only)\n" \
+"				x:	    		synthesize transactions events\n"		\
+"				w:	    		synthesize ptwrite events\n"		\
+"				p:	    		synthesize power events\n"			\
+"				e:	    		synthesize error events\n"			\
+"				d:	    		create a debug log\n"			\
+"				g[len]:     		synthesize a call chain (use with i or x)\n" \
+"				l[len]:     		synthesize last branch entries (use with i or x)\n" \
+"				sNUMBER:    		skip initial number of events\n"		\
+"				PERIOD[ns|us|ms|i|t]:   specify period to sample stream\n" \
+"				concatenate multiple options. Default is ibxwpe or cewp\n"
+
+
 #else
 
 static inline struct auxtrace_record *
@@ -716,6 +737,8 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
 				   struct perf_evlist *evlist, int idx,
 				   bool per_cpu);
 
+#define ITRACE_HELP ""
+
 #endif
 
 #endif
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index cee658733e2c..f9ae1a993806 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -747,7 +747,9 @@ int bpf__load(struct bpf_object *obj)
 
 	err = bpf_object__load(obj);
 	if (err) {
-		pr_debug("bpf: load objects failed\n");
+		char bf[128];
+		libbpf_strerror(err, bf, sizeof(bf));
+		pr_debug("bpf: load objects failed: err=%d: (%s)\n", err, bf);
 		return err;
 	}
 	return 0;
@@ -1527,13 +1529,13 @@ int bpf__apply_obj_config(void)
 	bpf_object__for_each_safe(obj, objtmp)	\
 		bpf_map__for_each(pos, obj)
 
-#define bpf__for_each_stdout_map(pos, obj, objtmp)	\
+#define bpf__for_each_map_named(pos, obj, objtmp, name)	\
 	bpf__for_each_map(pos, obj, objtmp) 		\
 		if (bpf_map__name(pos) && 		\
-			(strcmp("__bpf_stdout__", 	\
+			(strcmp(name, 			\
 				bpf_map__name(pos)) == 0))
 
-int bpf__setup_stdout(struct perf_evlist *evlist)
+struct perf_evsel *bpf__setup_output_event(struct perf_evlist *evlist, const char *name)
 {
 	struct bpf_map_priv *tmpl_priv = NULL;
 	struct bpf_object *obj, *tmp;
@@ -1542,11 +1544,11 @@ int bpf__setup_stdout(struct perf_evlist *evlist)
 	int err;
 	bool need_init = false;
 
-	bpf__for_each_stdout_map(map, obj, tmp) {
+	bpf__for_each_map_named(map, obj, tmp, name) {
 		struct bpf_map_priv *priv = bpf_map__priv(map);
 
 		if (IS_ERR(priv))
-			return -BPF_LOADER_ERRNO__INTERNAL;
+			return ERR_PTR(-BPF_LOADER_ERRNO__INTERNAL);
 
 		/*
 		 * No need to check map type: type should have been
@@ -1559,49 +1561,61 @@ int bpf__setup_stdout(struct perf_evlist *evlist)
 	}
 
 	if (!need_init)
-		return 0;
+		return NULL;
 
 	if (!tmpl_priv) {
-		err = parse_events(evlist, "bpf-output/no-inherit=1,name=__bpf_stdout__/",
-				   NULL);
+		char *event_definition = NULL;
+
+		if (asprintf(&event_definition, "bpf-output/no-inherit=1,name=%s/", name) < 0)
+			return ERR_PTR(-ENOMEM);
+
+		err = parse_events(evlist, event_definition, NULL);
+		free(event_definition);
+
 		if (err) {
-			pr_debug("ERROR: failed to create bpf-output event\n");
-			return -err;
+			pr_debug("ERROR: failed to create the \"%s\" bpf-output event\n", name);
+			return ERR_PTR(-err);
 		}
 
 		evsel = perf_evlist__last(evlist);
 	}
 
-	bpf__for_each_stdout_map(map, obj, tmp) {
+	bpf__for_each_map_named(map, obj, tmp, name) {
 		struct bpf_map_priv *priv = bpf_map__priv(map);
 
 		if (IS_ERR(priv))
-			return -BPF_LOADER_ERRNO__INTERNAL;
+			return ERR_PTR(-BPF_LOADER_ERRNO__INTERNAL);
 		if (priv)
 			continue;
 
 		if (tmpl_priv) {
 			priv = bpf_map_priv__clone(tmpl_priv);
 			if (!priv)
-				return -ENOMEM;
+				return ERR_PTR(-ENOMEM);
 
 			err = bpf_map__set_priv(map, priv, bpf_map_priv__clear);
 			if (err) {
 				bpf_map_priv__clear(map, priv);
-				return err;
+				return ERR_PTR(err);
 			}
 		} else if (evsel) {
 			struct bpf_map_op *op;
 
 			op = bpf_map__add_newop(map, NULL);
 			if (IS_ERR(op))
-				return PTR_ERR(op);
+				return ERR_PTR(PTR_ERR(op));
 			op->op_type = BPF_MAP_OP_SET_EVSEL;
 			op->v.evsel = evsel;
 		}
 	}
 
-	return 0;
+	return evsel;
+}
+
+int bpf__setup_stdout(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = bpf__setup_output_event(evlist, "__bpf_stdout__");
+	return PTR_ERR_OR_ZERO(evsel);
 }
 
 #define ERRNO_OFFSET(e)		((e) - __BPF_LOADER_ERRNO__START)
@@ -1778,8 +1792,8 @@ int bpf__strerror_apply_obj_config(int err, char *buf, size_t size)
 	return 0;
 }
 
-int bpf__strerror_setup_stdout(struct perf_evlist *evlist __maybe_unused,
-			       int err, char *buf, size_t size)
+int bpf__strerror_setup_output_event(struct perf_evlist *evlist __maybe_unused,
+				     int err, char *buf, size_t size)
 {
 	bpf__strerror_head(err, buf, size);
 	bpf__strerror_end(buf, size);
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index 5d3aefd6fae7..62d245a90e1d 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -43,6 +43,7 @@ enum bpf_loader_errno {
 	__BPF_LOADER_ERRNO__END,
 };
 
+struct perf_evsel;
 struct bpf_object;
 struct parse_events_term;
 #define PERF_BPF_PROBE_GROUP "perf_bpf_probe"
@@ -82,9 +83,8 @@ int bpf__apply_obj_config(void);
 int bpf__strerror_apply_obj_config(int err, char *buf, size_t size);
 
 int bpf__setup_stdout(struct perf_evlist *evlist);
-int bpf__strerror_setup_stdout(struct perf_evlist *evlist, int err,
-			       char *buf, size_t size);
-
+struct perf_evsel *bpf__setup_output_event(struct perf_evlist *evlist, const char *name);
+int bpf__strerror_setup_output_event(struct perf_evlist *evlist, int err, char *buf, size_t size);
 #else
 #include <errno.h>
 
@@ -138,6 +138,12 @@ bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused)
 	return 0;
 }
 
+static inline struct perf_evsel *
+bpf__setup_output_event(struct perf_evlist *evlist __maybe_unused, const char *name __maybe_unused)
+{
+	return NULL;
+}
+
 static inline int
 __bpf_strerror(char *buf, size_t size)
 {
@@ -193,11 +199,16 @@ bpf__strerror_apply_obj_config(int err __maybe_unused,
 }
 
 static inline int
-bpf__strerror_setup_stdout(struct perf_evlist *evlist __maybe_unused,
-			   int err __maybe_unused, char *buf,
-			   size_t size)
+bpf__strerror_setup_output_event(struct perf_evlist *evlist __maybe_unused,
+				 int err __maybe_unused, char *buf, size_t size)
 {
 	return __bpf_strerror(buf, size);
 }
+
 #endif
+
+static inline int bpf__strerror_setup_stdout(struct perf_evlist *evlist, int err, char *buf, size_t size)
+{
+	return bpf__strerror_setup_output_event(evlist, err, buf, size);
+}
 #endif
diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c
index 29347756b0af..77e4891e17b0 100644
--- a/tools/perf/util/bpf-prologue.c
+++ b/tools/perf/util/bpf-prologue.c
@@ -61,7 +61,7 @@ check_pos(struct bpf_insn_pos *pos)
 
 /*
  * Convert type string (u8/u16/u32/u64/s8/s16/s32/s64 ..., see
- * Documentation/trace/kprobetrace.txt) to size field of BPF_LDX_MEM
+ * Documentation/trace/kprobetrace.rst) to size field of BPF_LDX_MEM
  * instruction (BPF_{B,H,W,DW}).
  */
 static int
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 537eadd81914..04b1d53e4bf9 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -47,9 +47,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
 		return -1;
 	}
 
-	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al);
-
-	if (al.map != NULL)
+	if (thread__find_map(thread, sample->cpumode, sample->ip, &al))
 		al.map->dso->hit = 1;
 
 	thread__put(thread);
diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp
index bf31ceab33bd..89512504551b 100644
--- a/tools/perf/util/c++/clang.cpp
+++ b/tools/perf/util/c++/clang.cpp
@@ -146,8 +146,15 @@ getBPFObjectFromModule(llvm::Module *Module)
 	raw_svector_ostream ostream(*Buffer);
 
 	legacy::PassManager PM;
-	if (TargetMachine->addPassesToEmitFile(PM, ostream,
-					       TargetMachine::CGFT_ObjectFile)) {
+	bool NotAdded;
+#if CLANG_VERSION_MAJOR < 7
+	NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream,
+						      TargetMachine::CGFT_ObjectFile);
+#else
+	NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream, nullptr,
+						      TargetMachine::CGFT_ObjectFile);
+#endif
+	if (NotAdded) {
 		llvm::errs() << "TargetMachine can't emit a file of this type\n";
 		return std::unique_ptr<llvm::SmallVectorImpl<char>>(nullptr);;
 	}
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index decb91f9da82..ccd02634a616 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -93,20 +93,17 @@ static int open_cgroup(const char *name)
 static struct cgroup *evlist__find_cgroup(struct perf_evlist *evlist, const char *str)
 {
 	struct perf_evsel *counter;
-	struct cgroup *cgrp = NULL;
 	/*
 	 * check if cgrp is already defined, if so we reuse it
 	 */
 	evlist__for_each_entry(evlist, counter) {
 		if (!counter->cgrp)
 			continue;
-		if (!strcmp(counter->cgrp->name, str)) {
-			cgrp = cgroup__get(counter->cgrp);
-			break;
-		}
+		if (!strcmp(counter->cgrp->name, str))
+			return cgroup__get(counter->cgrp);
 	}
 
-	return cgrp;
+	return NULL;
 }
 
 static struct cgroup *cgroup__new(const char *name)
diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
index 7798a2cc8a86..31279a7bd919 100644
--- a/tools/perf/util/comm.c
+++ b/tools/perf/util/comm.c
@@ -20,9 +20,10 @@ static struct rw_semaphore comm_str_lock = {.lock = PTHREAD_RWLOCK_INITIALIZER,}
 
 static struct comm_str *comm_str__get(struct comm_str *cs)
 {
-	if (cs)
-		refcount_inc(&cs->refcnt);
-	return cs;
+	if (cs && refcount_inc_not_zero(&cs->refcnt))
+		return cs;
+
+	return NULL;
 }
 
 static void comm_str__put(struct comm_str *cs)
@@ -67,9 +68,14 @@ struct comm_str *__comm_str__findnew(const char *str, struct rb_root *root)
 		parent = *p;
 		iter = rb_entry(parent, struct comm_str, rb_node);
 
+		/*
+		 * If we race with comm_str__put, iter->refcnt is 0
+		 * and it will be removed within comm_str__put call
+		 * shortly, ignore it in this search.
+		 */
 		cmp = strcmp(str, iter->str);
-		if (!cmp)
-			return comm_str__get(iter);
+		if (!cmp && comm_str__get(iter))
+			return iter;
 
 		if (cmp < 0)
 			p = &(*p)->rb_left;
diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h
index ecca688a25fb..892e92e7e7fc 100644
--- a/tools/perf/util/compress.h
+++ b/tools/perf/util/compress.h
@@ -4,10 +4,12 @@
 
 #ifdef HAVE_ZLIB_SUPPORT
 int gzip_decompress_to_file(const char *input, int output_fd);
+bool gzip_is_compressed(const char *input);
 #endif
 
 #ifdef HAVE_LZMA_SUPPORT
 int lzma_decompress_to_file(const char *input, int output_fd);
+bool lzma_is_compressed(const char *input);
 #endif
 
 #endif /* PERF_COMPRESS_H */
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 84eb9393c7db..5ac157056cdf 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -707,6 +707,14 @@ struct perf_config_set *perf_config_set__new(void)
 	return set;
 }
 
+static int perf_config__init(void)
+{
+	if (config_set == NULL)
+		config_set = perf_config_set__new();
+
+	return config_set == NULL;
+}
+
 int perf_config(config_fn_t fn, void *data)
 {
 	int ret = 0;
@@ -714,7 +722,7 @@ int perf_config(config_fn_t fn, void *data)
 	struct perf_config_section *section;
 	struct perf_config_item *item;
 
-	if (config_set == NULL)
+	if (config_set == NULL && perf_config__init())
 		return -1;
 
 	perf_config_set__for_each_entry(config_set, section, item) {
@@ -735,12 +743,6 @@ int perf_config(config_fn_t fn, void *data)
 	return ret;
 }
 
-void perf_config__init(void)
-{
-	if (config_set == NULL)
-		config_set = perf_config_set__new();
-}
-
 void perf_config__exit(void)
 {
 	perf_config_set__delete(config_set);
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index baf82bf227ac..bd0a5897c76a 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -38,7 +38,6 @@ struct perf_config_set *perf_config_set__new(void);
 void perf_config_set__delete(struct perf_config_set *set);
 int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
 			     const char *var, const char *value);
-void perf_config__init(void);
 void perf_config__exit(void);
 void perf_config__refresh(void);
 
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 4d5fc374e730..938def6d0bb9 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -31,6 +31,8 @@
 #endif
 #endif
 
+#define CS_ETM_INVAL_ADDR	0xdeadbeefdeadbeefUL
+
 struct cs_etm_decoder {
 	void *data;
 	void (*packet_printer)(const char *msg);
@@ -261,8 +263,8 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
 	decoder->tail = 0;
 	decoder->packet_count = 0;
 	for (i = 0; i < MAX_BUFFER; i++) {
-		decoder->packet_buffer[i].start_addr = 0xdeadbeefdeadbeefUL;
-		decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL;
+		decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
+		decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
 		decoder->packet_buffer[i].last_instr_taken_branch = false;
 		decoder->packet_buffer[i].exc = false;
 		decoder->packet_buffer[i].exc_ret = false;
@@ -295,8 +297,8 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
 	decoder->packet_buffer[et].exc = false;
 	decoder->packet_buffer[et].exc_ret = false;
 	decoder->packet_buffer[et].cpu = *((int *)inode->priv);
-	decoder->packet_buffer[et].start_addr = 0xdeadbeefdeadbeefUL;
-	decoder->packet_buffer[et].end_addr = 0xdeadbeefdeadbeefUL;
+	decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
+	decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
 
 	if (decoder->packet_count == MAX_BUFFER - 1)
 		return OCSD_RESP_WAIT;
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 743f5f444304..612b5755f742 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -23,6 +23,7 @@ struct cs_etm_buffer {
 };
 
 enum cs_etm_sample_type {
+	CS_ETM_EMPTY = 0,
 	CS_ETM_RANGE = 1 << 0,
 	CS_ETM_TRACE_ON = 1 << 1,
 };
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index bf16dc9ee507..73430b73570d 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -244,6 +244,27 @@ static void cs_etm__free(struct perf_session *session)
 	zfree(&aux);
 }
 
+static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
+{
+	struct machine *machine;
+
+	machine = etmq->etm->machine;
+
+	if (address >= etmq->etm->kernel_start) {
+		if (machine__is_host(machine))
+			return PERF_RECORD_MISC_KERNEL;
+		else
+			return PERF_RECORD_MISC_GUEST_KERNEL;
+	} else {
+		if (machine__is_host(machine))
+			return PERF_RECORD_MISC_USER;
+		else if (perf_guest)
+			return PERF_RECORD_MISC_GUEST_USER;
+		else
+			return PERF_RECORD_MISC_HYPERVISOR;
+	}
+}
+
 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
 			      size_t size, u8 *buffer)
 {
@@ -258,10 +279,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
 		return -1;
 
 	machine = etmq->etm->machine;
-	if (address >= etmq->etm->kernel_start)
-		cpumode = PERF_RECORD_MISC_KERNEL;
-	else
-		cpumode = PERF_RECORD_MISC_USER;
+	cpumode = cs_etm__cpu_mode(etmq, address);
 
 	thread = etmq->thread;
 	if (!thread) {
@@ -270,9 +288,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
 		thread = etmq->etm->unknown_thread;
 	}
 
-	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, address, &al);
-
-	if (!al.map || !al.map->dso)
+	if (!thread__find_map(thread, cpumode, address, &al) || !al.map->dso)
 		return 0;
 
 	if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
@@ -496,6 +512,10 @@ static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq)
 
 static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet)
 {
+	/* Returns 0 for the CS_ETM_TRACE_ON packet */
+	if (packet->sample_type == CS_ETM_TRACE_ON)
+		return 0;
+
 	/*
 	 * The packet records the execution range with an exclusive end address
 	 *
@@ -507,6 +527,15 @@ static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet)
 	return packet->end_addr - A64_INSTR_SIZE;
 }
 
+static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
+{
+	/* Returns 0 for the CS_ETM_TRACE_ON packet */
+	if (packet->sample_type == CS_ETM_TRACE_ON)
+		return 0;
+
+	return packet->start_addr;
+}
+
 static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet)
 {
 	/*
@@ -548,7 +577,7 @@ static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
 
 	be       = &bs->entries[etmq->last_branch_pos];
 	be->from = cs_etm__last_executed_instr(etmq->prev_packet);
-	be->to	 = etmq->packet->start_addr;
+	be->to	 = cs_etm__first_executed_instr(etmq->packet);
 	/* No support for mispredict */
 	be->flags.mispred = 0;
 	be->flags.predicted = 1;
@@ -642,7 +671,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
 	struct perf_sample sample = {.ip = 0,};
 
 	event->sample.header.type = PERF_RECORD_SAMPLE;
-	event->sample.header.misc = PERF_RECORD_MISC_USER;
+	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
 	event->sample.header.size = sizeof(struct perf_event_header);
 
 	sample.ip = addr;
@@ -654,7 +683,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
 	sample.cpu = etmq->packet->cpu;
 	sample.flags = 0;
 	sample.insn_len = 1;
-	sample.cpumode = event->header.misc;
+	sample.cpumode = event->sample.header.misc;
 
 	if (etm->synth_opts.last_branch) {
 		cs_etm__copy_last_branch_rb(etmq);
@@ -695,21 +724,24 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
 		u64			nr;
 		struct branch_entry	entries;
 	} dummy_bs;
+	u64 ip;
+
+	ip = cs_etm__last_executed_instr(etmq->prev_packet);
 
 	event->sample.header.type = PERF_RECORD_SAMPLE;
-	event->sample.header.misc = PERF_RECORD_MISC_USER;
+	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
 	event->sample.header.size = sizeof(struct perf_event_header);
 
-	sample.ip = cs_etm__last_executed_instr(etmq->prev_packet);
+	sample.ip = ip;
 	sample.pid = etmq->pid;
 	sample.tid = etmq->tid;
-	sample.addr = etmq->packet->start_addr;
+	sample.addr = cs_etm__first_executed_instr(etmq->packet);
 	sample.id = etmq->etm->branches_id;
 	sample.stream_id = etmq->etm->branches_id;
 	sample.period = 1;
 	sample.cpu = etmq->packet->cpu;
 	sample.flags = 0;
-	sample.cpumode = PERF_RECORD_MISC_USER;
+	sample.cpumode = event->sample.header.misc;
 
 	/*
 	 * perf report cannot handle events without a branch stack
@@ -899,13 +931,23 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
 		etmq->period_instructions = instrs_over;
 	}
 
-	if (etm->sample_branches &&
-	    etmq->prev_packet &&
-	    etmq->prev_packet->sample_type == CS_ETM_RANGE &&
-	    etmq->prev_packet->last_instr_taken_branch) {
-		ret = cs_etm__synth_branch_sample(etmq);
-		if (ret)
-			return ret;
+	if (etm->sample_branches && etmq->prev_packet) {
+		bool generate_sample = false;
+
+		/* Generate sample for tracing on packet */
+		if (etmq->prev_packet->sample_type == CS_ETM_TRACE_ON)
+			generate_sample = true;
+
+		/* Generate sample for branch taken packet */
+		if (etmq->prev_packet->sample_type == CS_ETM_RANGE &&
+		    etmq->prev_packet->last_instr_taken_branch)
+			generate_sample = true;
+
+		if (generate_sample) {
+			ret = cs_etm__synth_branch_sample(etmq);
+			if (ret)
+				return ret;
+		}
 	}
 
 	if (etm->sample_branches || etm->synth_opts.last_branch) {
@@ -924,10 +966,17 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
 static int cs_etm__flush(struct cs_etm_queue *etmq)
 {
 	int err = 0;
+	struct cs_etm_auxtrace *etm = etmq->etm;
 	struct cs_etm_packet *tmp;
 
+	if (!etmq->prev_packet)
+		return 0;
+
+	/* Handle start tracing packet */
+	if (etmq->prev_packet->sample_type == CS_ETM_EMPTY)
+		goto swap_packet;
+
 	if (etmq->etm->synth_opts.last_branch &&
-	    etmq->prev_packet &&
 	    etmq->prev_packet->sample_type == CS_ETM_RANGE) {
 		/*
 		 * Generate a last branch event for the branches left in the
@@ -941,8 +990,22 @@ static int cs_etm__flush(struct cs_etm_queue *etmq)
 		err = cs_etm__synth_instruction_sample(
 			etmq, addr,
 			etmq->period_instructions);
+		if (err)
+			return err;
+
 		etmq->period_instructions = 0;
 
+	}
+
+	if (etm->sample_branches &&
+	    etmq->prev_packet->sample_type == CS_ETM_RANGE) {
+		err = cs_etm__synth_branch_sample(etmq);
+		if (err)
+			return err;
+	}
+
+swap_packet:
+	if (etmq->etm->synth_opts.last_branch) {
 		/*
 		 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
 		 * the next incoming packet.
@@ -1022,6 +1085,13 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
 					 */
 					cs_etm__flush(etmq);
 					break;
+				case CS_ETM_EMPTY:
+					/*
+					 * Should not receive empty packet,
+					 * report error.
+					 */
+					pr_err("CS ETM Trace: empty packet\n");
+					return -EINVAL;
 				default:
 					break;
 				}
@@ -1383,7 +1453,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 	if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
 		etm->synth_opts = *session->itrace_synth_opts;
 	} else {
-		itrace_synth_opts__set_default(&etm->synth_opts);
+		itrace_synth_opts__set_default(&etm->synth_opts,
+				session->itrace_synth_opts->default_no_sample);
 		etm->synth_opts.callchain = false;
 	}
 
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 5744c12641a5..2a36fab76994 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -182,20 +182,20 @@ err_put_field:
 }
 
 static struct bt_ctf_field_type*
-get_tracepoint_field_type(struct ctf_writer *cw, struct format_field *field)
+get_tracepoint_field_type(struct ctf_writer *cw, struct tep_format_field *field)
 {
 	unsigned long flags = field->flags;
 
-	if (flags & FIELD_IS_STRING)
+	if (flags & TEP_FIELD_IS_STRING)
 		return cw->data.string;
 
-	if (!(flags & FIELD_IS_SIGNED)) {
+	if (!(flags & TEP_FIELD_IS_SIGNED)) {
 		/* unsigned long are mostly pointers */
-		if (flags & FIELD_IS_LONG || flags & FIELD_IS_POINTER)
+		if (flags & TEP_FIELD_IS_LONG || flags & TEP_FIELD_IS_POINTER)
 			return cw->data.u64_hex;
 	}
 
-	if (flags & FIELD_IS_SIGNED) {
+	if (flags & TEP_FIELD_IS_SIGNED) {
 		if (field->size == 8)
 			return cw->data.s64;
 		else
@@ -287,7 +287,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 				      struct bt_ctf_event_class *event_class,
 				      struct bt_ctf_event *event,
 				      struct perf_sample *sample,
-				      struct format_field *fmtf)
+				      struct tep_format_field *fmtf)
 {
 	struct bt_ctf_field_type *type;
 	struct bt_ctf_field *array_field;
@@ -304,20 +304,20 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 	name = fmtf->alias;
 	offset = fmtf->offset;
 	len = fmtf->size;
-	if (flags & FIELD_IS_STRING)
-		flags &= ~FIELD_IS_ARRAY;
+	if (flags & TEP_FIELD_IS_STRING)
+		flags &= ~TEP_FIELD_IS_ARRAY;
 
-	if (flags & FIELD_IS_DYNAMIC) {
+	if (flags & TEP_FIELD_IS_DYNAMIC) {
 		unsigned long long tmp_val;
 
-		tmp_val = pevent_read_number(fmtf->event->pevent,
-				data + offset, len);
+		tmp_val = tep_read_number(fmtf->event->pevent,
+					  data + offset, len);
 		offset = tmp_val;
 		len = offset >> 16;
 		offset &= 0xffff;
 	}
 
-	if (flags & FIELD_IS_ARRAY) {
+	if (flags & TEP_FIELD_IS_ARRAY) {
 
 		type = bt_ctf_event_class_get_field_by_name(
 				event_class, name);
@@ -338,7 +338,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 	type = get_tracepoint_field_type(cw, fmtf);
 
 	for (i = 0; i < n_items; i++) {
-		if (flags & FIELD_IS_ARRAY)
+		if (flags & TEP_FIELD_IS_ARRAY)
 			field = bt_ctf_field_array_get_field(array_field, i);
 		else
 			field = bt_ctf_field_create(type);
@@ -348,16 +348,16 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 			return -1;
 		}
 
-		if (flags & FIELD_IS_STRING)
+		if (flags & TEP_FIELD_IS_STRING)
 			ret = string_set_value(field, data + offset + i * len);
 		else {
 			unsigned long long value_int;
 
-			value_int = pevent_read_number(
+			value_int = tep_read_number(
 					fmtf->event->pevent,
 					data + offset + i * len, len);
 
-			if (!(flags & FIELD_IS_SIGNED))
+			if (!(flags & TEP_FIELD_IS_SIGNED))
 				ret = bt_ctf_field_unsigned_integer_set_value(
 						field, value_int);
 			else
@@ -369,7 +369,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 			pr_err("failed to set file value %s\n", name);
 			goto err_put_field;
 		}
-		if (!(flags & FIELD_IS_ARRAY)) {
+		if (!(flags & TEP_FIELD_IS_ARRAY)) {
 			ret = bt_ctf_event_set_payload(event, name, field);
 			if (ret) {
 				pr_err("failed to set payload %s\n", name);
@@ -378,7 +378,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 		}
 		bt_ctf_field_put(field);
 	}
-	if (flags & FIELD_IS_ARRAY) {
+	if (flags & TEP_FIELD_IS_ARRAY) {
 		ret = bt_ctf_event_set_payload(event, name, array_field);
 		if (ret) {
 			pr_err("Failed add payload array %s\n", name);
@@ -396,10 +396,10 @@ err_put_field:
 static int add_tracepoint_fields_values(struct ctf_writer *cw,
 					struct bt_ctf_event_class *event_class,
 					struct bt_ctf_event *event,
-					struct format_field *fields,
+					struct tep_format_field *fields,
 					struct perf_sample *sample)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 	int ret;
 
 	for (field = fields; field; field = field->next) {
@@ -417,8 +417,8 @@ static int add_tracepoint_values(struct ctf_writer *cw,
 				 struct perf_evsel *evsel,
 				 struct perf_sample *sample)
 {
-	struct format_field *common_fields = evsel->tp_format->format.common_fields;
-	struct format_field *fields        = evsel->tp_format->format.fields;
+	struct tep_format_field *common_fields = evsel->tp_format->format.common_fields;
+	struct tep_format_field *fields        = evsel->tp_format->format.fields;
 	int ret;
 
 	ret = add_tracepoint_fields_values(cw, event_class, event,
@@ -970,7 +970,7 @@ out:
 
 static int event_class_add_field(struct bt_ctf_event_class *event_class,
 		struct bt_ctf_field_type *type,
-		struct format_field *field)
+		struct tep_format_field *field)
 {
 	struct bt_ctf_field_type *t = NULL;
 	char *name;
@@ -1009,10 +1009,10 @@ static int event_class_add_field(struct bt_ctf_event_class *event_class,
 }
 
 static int add_tracepoint_fields_types(struct ctf_writer *cw,
-				       struct format_field *fields,
+				       struct tep_format_field *fields,
 				       struct bt_ctf_event_class *event_class)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 	int ret;
 
 	for (field = fields; field; field = field->next) {
@@ -1030,15 +1030,15 @@ static int add_tracepoint_fields_types(struct ctf_writer *cw,
 		 * type and don't care that it is an array. What we don't
 		 * support is an array of strings.
 		 */
-		if (flags & FIELD_IS_STRING)
-			flags &= ~FIELD_IS_ARRAY;
+		if (flags & TEP_FIELD_IS_STRING)
+			flags &= ~TEP_FIELD_IS_ARRAY;
 
-		if (flags & FIELD_IS_ARRAY)
+		if (flags & TEP_FIELD_IS_ARRAY)
 			type = bt_ctf_field_type_array_create(type, field->arraylen);
 
 		ret = event_class_add_field(event_class, type, field);
 
-		if (flags & FIELD_IS_ARRAY)
+		if (flags & TEP_FIELD_IS_ARRAY)
 			bt_ctf_field_type_put(type);
 
 		if (ret) {
@@ -1055,8 +1055,8 @@ static int add_tracepoint_types(struct ctf_writer *cw,
 				struct perf_evsel *evsel,
 				struct bt_ctf_event_class *class)
 {
-	struct format_field *common_fields = evsel->tp_format->format.common_fields;
-	struct format_field *fields        = evsel->tp_format->format.fields;
+	struct tep_format_field *common_fields = evsel->tp_format->format.common_fields;
+	struct tep_format_field *fields        = evsel->tp_format->format.fields;
 	int ret;
 
 	ret = add_tracepoint_fields_types(cw, common_fields, class);
@@ -1578,7 +1578,7 @@ int bt_convert__perf2ctf(const char *input, const char *path,
 {
 	struct perf_session *session;
 	struct perf_data data = {
-		.file.path = input,
+		.file      = { .path = input, .fd = -1 },
 		.mode      = PERF_DATA_MODE_READ,
 		.force     = opts->force,
 	};
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index b0c2b5c5d337..69fbb0a72d0c 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -247,9 +247,9 @@ static int db_ids_from_al(struct db_export *dbe, struct addr_location *al,
 		*dso_db_id = dso->db_id;
 
 		if (!al->sym) {
-			al->sym = symbol__new(al->addr, 0, 0, "unknown");
+			al->sym = symbol__new(al->addr, 0, 0, 0, "unknown");
 			if (al->sym)
-				dso__insert_symbol(dso, al->map->type, al->sym);
+				dso__insert_symbol(dso, al->sym);
 		}
 
 		if (al->sym) {
@@ -315,8 +315,7 @@ static struct call_path *call_path_from_sample(struct db_export *dbe,
 		al.addr = node->ip;
 
 		if (al.map && !al.sym)
-			al.sym = dso__find_symbol(al.map->dso, MAP__FUNCTION,
-						  al.addr);
+			al.sym = dso__find_symbol(al.map->dso, al.addr);
 
 		db_ids_from_al(dbe, &al, &dso_db_id, &sym_db_id, &offset);
 
@@ -464,6 +463,28 @@ int db_export__branch_types(struct db_export *dbe)
 		if (err)
 			break;
 	}
+
+	/* Add trace begin / end variants */
+	for (i = 0; branch_types[i].name ; i++) {
+		const char *name = branch_types[i].name;
+		u32 type = branch_types[i].branch_type;
+		char buf[64];
+
+		if (type == PERF_IP_FLAG_BRANCH ||
+		    (type & (PERF_IP_FLAG_TRACE_BEGIN | PERF_IP_FLAG_TRACE_END)))
+			continue;
+
+		snprintf(buf, sizeof(buf), "trace begin / %s", name);
+		err = db_export__branch_type(dbe, type | PERF_IP_FLAG_TRACE_BEGIN, buf);
+		if (err)
+			break;
+
+		snprintf(buf, sizeof(buf), "%s / trace end", name);
+		err = db_export__branch_type(dbe, type | PERF_IP_FLAG_TRACE_END, buf);
+		if (err)
+			break;
+	}
+
 	return err;
 }
 
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 36ef45b2e89d..bbed90e5d9bb 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -189,28 +189,34 @@ int dso__read_binary_type_filename(const struct dso *dso,
 	return ret;
 }
 
+enum {
+	COMP_ID__NONE = 0,
+};
+
 static const struct {
 	const char *fmt;
 	int (*decompress)(const char *input, int output);
+	bool (*is_compressed)(const char *input);
 } compressions[] = {
+	[COMP_ID__NONE] = { .fmt = NULL, },
 #ifdef HAVE_ZLIB_SUPPORT
-	{ "gz", gzip_decompress_to_file },
+	{ "gz", gzip_decompress_to_file, gzip_is_compressed },
 #endif
 #ifdef HAVE_LZMA_SUPPORT
-	{ "xz", lzma_decompress_to_file },
+	{ "xz", lzma_decompress_to_file, lzma_is_compressed },
 #endif
-	{ NULL, NULL },
+	{ NULL, NULL, NULL },
 };
 
-bool is_supported_compression(const char *ext)
+static int is_supported_compression(const char *ext)
 {
 	unsigned i;
 
-	for (i = 0; compressions[i].fmt; i++) {
+	for (i = 1; compressions[i].fmt; i++) {
 		if (!strcmp(ext, compressions[i].fmt))
-			return true;
+			return i;
 	}
-	return false;
+	return COMP_ID__NONE;
 }
 
 bool is_kernel_module(const char *pathname, int cpumode)
@@ -239,80 +245,73 @@ bool is_kernel_module(const char *pathname, int cpumode)
 	return m.kmod;
 }
 
-bool decompress_to_file(const char *ext, const char *filename, int output_fd)
-{
-	unsigned i;
-
-	for (i = 0; compressions[i].fmt; i++) {
-		if (!strcmp(ext, compressions[i].fmt))
-			return !compressions[i].decompress(filename,
-							   output_fd);
-	}
-	return false;
-}
-
 bool dso__needs_decompress(struct dso *dso)
 {
 	return dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
 		dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
 }
 
-static int decompress_kmodule(struct dso *dso, const char *name, char *tmpbuf)
+static int decompress_kmodule(struct dso *dso, const char *name,
+			      char *pathname, size_t len)
 {
+	char tmpbuf[] = KMOD_DECOMP_NAME;
 	int fd = -1;
-	struct kmod_path m;
 
 	if (!dso__needs_decompress(dso))
 		return -1;
 
-	if (kmod_path__parse_ext(&m, dso->long_name))
+	if (dso->comp == COMP_ID__NONE)
 		return -1;
 
-	if (!m.comp)
-		goto out;
+	/*
+	 * We have proper compression id for DSO and yet the file
+	 * behind the 'name' can still be plain uncompressed object.
+	 *
+	 * The reason is behind the logic we open the DSO object files,
+	 * when we try all possible 'debug' objects until we find the
+	 * data. So even if the DSO is represented by 'krava.xz' module,
+	 * we can end up here opening ~/.debug/....23432432/debug' file
+	 * which is not compressed.
+	 *
+	 * To keep this transparent, we detect this and return the file
+	 * descriptor to the uncompressed file.
+	 */
+	if (!compressions[dso->comp].is_compressed(name))
+		return open(name, O_RDONLY);
 
 	fd = mkstemp(tmpbuf);
 	if (fd < 0) {
 		dso->load_errno = errno;
-		goto out;
+		return -1;
 	}
 
-	if (!decompress_to_file(m.ext, name, fd)) {
+	if (compressions[dso->comp].decompress(name, fd)) {
 		dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE;
 		close(fd);
 		fd = -1;
 	}
 
-out:
-	free(m.ext);
+	if (!pathname || (fd < 0))
+		unlink(tmpbuf);
+
+	if (pathname && (fd >= 0))
+		strncpy(pathname, tmpbuf, len);
+
 	return fd;
 }
 
 int dso__decompress_kmodule_fd(struct dso *dso, const char *name)
 {
-	char tmpbuf[] = KMOD_DECOMP_NAME;
-	int fd;
-
-	fd = decompress_kmodule(dso, name, tmpbuf);
-	unlink(tmpbuf);
-	return fd;
+	return decompress_kmodule(dso, name, NULL, 0);
 }
 
 int dso__decompress_kmodule_path(struct dso *dso, const char *name,
 				 char *pathname, size_t len)
 {
-	char tmpbuf[] = KMOD_DECOMP_NAME;
-	int fd;
-
-	fd = decompress_kmodule(dso, name, tmpbuf);
-	if (fd < 0) {
-		unlink(tmpbuf);
-		return -1;
-	}
+	int fd = decompress_kmodule(dso, name, pathname, len);
 
-	strncpy(pathname, tmpbuf, len);
 	close(fd);
-	return 0;
+	return fd >= 0 ? 0 : -1;
 }
 
 /*
@@ -332,7 +331,7 @@ int dso__decompress_kmodule_path(struct dso *dso, const char *name,
  * Returns 0 if there's no strdup error, -ENOMEM otherwise.
  */
 int __kmod_path__parse(struct kmod_path *m, const char *path,
-		       bool alloc_name, bool alloc_ext)
+		       bool alloc_name)
 {
 	const char *name = strrchr(path, '/');
 	const char *ext  = strrchr(path, '.');
@@ -354,6 +353,8 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
 		if ((strncmp(name, "[kernel.kallsyms]", 17) == 0) ||
 		    (strncmp(name, "[guest.kernel.kallsyms", 22) == 0) ||
 		    (strncmp(name, "[vdso]", 6) == 0) ||
+		    (strncmp(name, "[vdso32]", 8) == 0) ||
+		    (strncmp(name, "[vdsox32]", 9) == 0) ||
 		    (strncmp(name, "[vsyscall]", 10) == 0)) {
 			m->kmod = false;
 
@@ -370,10 +371,9 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
 		return 0;
 	}
 
-	if (is_supported_compression(ext + 1)) {
-		m->comp = true;
+	m->comp = is_supported_compression(ext + 1);
+	if (m->comp > COMP_ID__NONE)
 		ext -= 3;
-	}
 
 	/* Check .ko extension only if there's enough name left. */
 	if (ext > name)
@@ -391,14 +391,6 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
 		strxfrchar(m->name, '-', '_');
 	}
 
-	if (alloc_ext && m->comp) {
-		m->ext = strdup(ext + 4);
-		if (!m->ext) {
-			free((void *) m->name);
-			return -ENOMEM;
-		}
-	}
-
 	return 0;
 }
 
@@ -411,8 +403,10 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m,
 		dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE;
 
 	/* _KMODULE_COMP should be next to _KMODULE */
-	if (m->kmod && m->comp)
+	if (m->kmod && m->comp) {
 		dso->symtab_type++;
+		dso->comp = m->comp;
+	}
 
 	dso__set_short_name(dso, strdup(m->name), true);
 }
@@ -466,6 +460,7 @@ static int __open_dso(struct dso *dso, struct machine *machine)
 	int fd = -EINVAL;
 	char *root_dir = (char *)"";
 	char *name = malloc(PATH_MAX);
+	bool decomp = false;
 
 	if (!name)
 		return -ENOMEM;
@@ -489,12 +484,13 @@ static int __open_dso(struct dso *dso, struct machine *machine)
 			goto out;
 		}
 
+		decomp = true;
 		strcpy(name, newpath);
 	}
 
 	fd = do_open(name);
 
-	if (dso__needs_decompress(dso))
+	if (decomp)
 		unlink(name);
 
 out:
@@ -1014,7 +1010,7 @@ struct map *dso__new_map(const char *name)
 	struct dso *dso = dso__new(name);
 
 	if (dso)
-		map = map__new2(0, dso, MAP__FUNCTION);
+		map = map__new2(0, dso);
 
 	return map;
 }
@@ -1176,19 +1172,19 @@ int dso__name_len(const struct dso *dso)
 	return dso->short_name_len;
 }
 
-bool dso__loaded(const struct dso *dso, enum map_type type)
+bool dso__loaded(const struct dso *dso)
 {
-	return dso->loaded & (1 << type);
+	return dso->loaded;
 }
 
-bool dso__sorted_by_name(const struct dso *dso, enum map_type type)
+bool dso__sorted_by_name(const struct dso *dso)
 {
-	return dso->sorted_by_name & (1 << type);
+	return dso->sorted_by_name;
 }
 
-void dso__set_sorted_by_name(struct dso *dso, enum map_type type)
+void dso__set_sorted_by_name(struct dso *dso)
 {
-	dso->sorted_by_name |= (1 << type);
+	dso->sorted_by_name = true;
 }
 
 struct dso *dso__new(const char *name)
@@ -1196,12 +1192,10 @@ struct dso *dso__new(const char *name)
 	struct dso *dso = calloc(1, sizeof(*dso) + strlen(name) + 1);
 
 	if (dso != NULL) {
-		int i;
 		strcpy(dso->name, name);
 		dso__set_long_name(dso, dso->name, false);
 		dso__set_short_name(dso, dso->name, false);
-		for (i = 0; i < MAP__NR_TYPES; ++i)
-			dso->symbols[i] = dso->symbol_names[i] = RB_ROOT;
+		dso->symbols = dso->symbol_names = RB_ROOT;
 		dso->data.cache = RB_ROOT;
 		dso->inlined_nodes = RB_ROOT;
 		dso->srclines = RB_ROOT;
@@ -1218,6 +1212,7 @@ struct dso *dso__new(const char *name)
 		dso->a2l_fails = 1;
 		dso->kernel = DSO_TYPE_USER;
 		dso->needs_swap = DSO_SWAP__UNSET;
+		dso->comp = COMP_ID__NONE;
 		RB_CLEAR_NODE(&dso->rb_node);
 		dso->root = NULL;
 		INIT_LIST_HEAD(&dso->node);
@@ -1231,8 +1226,6 @@ struct dso *dso__new(const char *name)
 
 void dso__delete(struct dso *dso)
 {
-	int i;
-
 	if (!RB_EMPTY_NODE(&dso->rb_node))
 		pr_err("DSO %s is still in rbtree when being deleted!\n",
 		       dso->long_name);
@@ -1240,8 +1233,7 @@ void dso__delete(struct dso *dso)
 	/* free inlines first, as they reference symbols */
 	inlines__tree_delete(&dso->inlined_nodes);
 	srcline__tree_delete(&dso->srclines);
-	for (i = 0; i < MAP__NR_TYPES; ++i)
-		symbols__delete(&dso->symbols[i]);
+	symbols__delete(&dso->symbols);
 
 	if (dso->short_name_allocated) {
 		zfree((char **)&dso->short_name);
@@ -1451,9 +1443,7 @@ size_t __dsos__fprintf(struct list_head *head, FILE *fp)
 	size_t ret = 0;
 
 	list_for_each_entry(pos, head, node) {
-		int i;
-		for (i = 0; i < MAP__NR_TYPES; ++i)
-			ret += dso__fprintf(pos, i, fp);
+		ret += dso__fprintf(pos, fp);
 	}
 
 	return ret;
@@ -1467,18 +1457,17 @@ size_t dso__fprintf_buildid(struct dso *dso, FILE *fp)
 	return fprintf(fp, "%s", sbuild_id);
 }
 
-size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp)
+size_t dso__fprintf(struct dso *dso, FILE *fp)
 {
 	struct rb_node *nd;
 	size_t ret = fprintf(fp, "dso: %s (", dso->short_name);
 
 	if (dso->short_name != dso->long_name)
 		ret += fprintf(fp, "%s, ", dso->long_name);
-	ret += fprintf(fp, "%s, %sloaded, ", map_type__name[type],
-		       dso__loaded(dso, type) ? "" : "NOT ");
+	ret += fprintf(fp, "%sloaded, ", dso__loaded(dso) ? "" : "NOT ");
 	ret += dso__fprintf_buildid(dso, fp);
 	ret += fprintf(fp, ")\n");
-	for (nd = rb_first(&dso->symbols[type]); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(&dso->symbols); nd; nd = rb_next(nd)) {
 		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
 		ret += symbol__fprintf(pos, fp);
 	}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index c229dbe0277a..c5380500bed4 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -140,14 +140,14 @@ struct dso {
 	struct list_head node;
 	struct rb_node	 rb_node;	/* rbtree node sorted by long name */
 	struct rb_root	 *root;		/* root of rbtree that rb_node is in */
-	struct rb_root	 symbols[MAP__NR_TYPES];
-	struct rb_root	 symbol_names[MAP__NR_TYPES];
+	struct rb_root	 symbols;
+	struct rb_root	 symbol_names;
 	struct rb_root	 inlined_nodes;
 	struct rb_root	 srclines;
 	struct {
 		u64		addr;
 		struct symbol	*symbol;
-	} last_find_result[MAP__NR_TYPES];
+	} last_find_result;
 	void		 *a2l;
 	char		 *symsrc_filename;
 	unsigned int	 a2l_fails;
@@ -164,8 +164,8 @@ struct dso {
 	u8		 short_name_allocated:1;
 	u8		 long_name_allocated:1;
 	u8		 is_64_bit:1;
-	u8		 sorted_by_name;
-	u8		 loaded;
+	bool		 sorted_by_name;
+	bool		 loaded;
 	u8		 rel;
 	u8		 build_id[BUILD_ID_SIZE];
 	u64		 text_offset;
@@ -175,6 +175,7 @@ struct dso {
 	u16		 short_name_len;
 	void		*dwfl;			/* DWARF debug info */
 	struct auxtrace_cache *auxtrace_cache;
+	int		 comp;
 
 	/* dso data file */
 	struct {
@@ -202,14 +203,13 @@ struct dso {
  * @dso: the 'struct dso *' in which symbols itereated
  * @pos: the 'struct symbol *' to use as a loop cursor
  * @n: the 'struct rb_node *' to use as a temporary storage
- * @type: the 'enum map_type' type of symbols
  */
-#define dso__for_each_symbol(dso, pos, n, type)	\
-	symbols__for_each_entry(&(dso)->symbols[(type)], pos, n)
+#define dso__for_each_symbol(dso, pos, n)	\
+	symbols__for_each_entry(&(dso)->symbols, pos, n)
 
-static inline void dso__set_loaded(struct dso *dso, enum map_type type)
+static inline void dso__set_loaded(struct dso *dso)
 {
-	dso->loaded |= (1 << type);
+	dso->loaded = true;
 }
 
 struct dso *dso__new(const char *name);
@@ -231,11 +231,16 @@ static inline void __dso__zput(struct dso **dso)
 
 #define dso__zput(dso) __dso__zput(&dso)
 
-bool dso__loaded(const struct dso *dso, enum map_type type);
+bool dso__loaded(const struct dso *dso);
 
-bool dso__sorted_by_name(const struct dso *dso, enum map_type type);
-void dso__set_sorted_by_name(struct dso *dso, enum map_type type);
-void dso__sort_by_name(struct dso *dso, enum map_type type);
+static inline bool dso__has_symbols(const struct dso *dso)
+{
+	return !RB_EMPTY_ROOT(&dso->symbols);
+}
+
+bool dso__sorted_by_name(const struct dso *dso);
+void dso__set_sorted_by_name(struct dso *dso);
+void dso__sort_by_name(struct dso *dso);
 
 void dso__set_build_id(struct dso *dso, void *build_id);
 bool dso__build_id_equal(const struct dso *dso, u8 *build_id);
@@ -246,9 +251,7 @@ int dso__kernel_module_get_build_id(struct dso *dso, const char *root_dir);
 char dso__symtab_origin(const struct dso *dso);
 int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type,
 				   char *root_dir, char *filename, size_t size);
-bool is_supported_compression(const char *ext);
 bool is_kernel_module(const char *pathname, int cpumode);
-bool decompress_to_file(const char *ext, const char *filename, int output_fd);
 bool dso__needs_decompress(struct dso *dso);
 int dso__decompress_kmodule_fd(struct dso *dso, const char *name);
 int dso__decompress_kmodule_path(struct dso *dso, const char *name,
@@ -259,17 +262,15 @@ int dso__decompress_kmodule_path(struct dso *dso, const char *name,
 
 struct kmod_path {
 	char *name;
-	char *ext;
-	bool  comp;
+	int   comp;
 	bool  kmod;
 };
 
 int __kmod_path__parse(struct kmod_path *m, const char *path,
-		     bool alloc_name, bool alloc_ext);
+		     bool alloc_name);
 
-#define kmod_path__parse(__m, __p)      __kmod_path__parse(__m, __p, false, false)
-#define kmod_path__parse_name(__m, __p) __kmod_path__parse(__m, __p, true , false)
-#define kmod_path__parse_ext(__m, __p)  __kmod_path__parse(__m, __p, false, true)
+#define kmod_path__parse(__m, __p)      __kmod_path__parse(__m, __p, false)
+#define kmod_path__parse_name(__m, __p) __kmod_path__parse(__m, __p, true)
 
 void dso__set_module_info(struct dso *dso, struct kmod_path *m,
 			  struct machine *machine);
@@ -349,9 +350,8 @@ size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
 size_t __dsos__fprintf(struct list_head *head, FILE *fp);
 
 size_t dso__fprintf_buildid(struct dso *dso, FILE *fp);
-size_t dso__fprintf_symbols_by_name(struct dso *dso,
-				    enum map_type type, FILE *fp);
-size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp);
+size_t dso__fprintf_symbols_by_name(struct dso *dso, FILE *fp);
+size_t dso__fprintf(struct dso *dso, FILE *fp);
 
 static inline bool dso__is_vmlinux(struct dso *dso)
 {
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 4c842762e3f2..59f38c7693f8 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -93,6 +93,37 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
 	return 0;
 }
 
+static int perf_env__read_arch(struct perf_env *env)
+{
+	struct utsname uts;
+
+	if (env->arch)
+		return 0;
+
+	if (!uname(&uts))
+		env->arch = strdup(uts.machine);
+
+	return env->arch ? 0 : -ENOMEM;
+}
+
+static int perf_env__read_nr_cpus_avail(struct perf_env *env)
+{
+	if (env->nr_cpus_avail == 0)
+		env->nr_cpus_avail = cpu__max_present_cpu();
+
+	return env->nr_cpus_avail ? 0 : -ENOENT;
+}
+
+const char *perf_env__raw_arch(struct perf_env *env)
+{
+	return env && !perf_env__read_arch(env) ? env->arch : "unknown";
+}
+
+int perf_env__nr_cpus_avail(struct perf_env *env)
+{
+	return env && !perf_env__read_nr_cpus_avail(env) ? env->nr_cpus_avail : 0;
+}
+
 void cpu_cache_level__free(struct cpu_cache_level *cache)
 {
 	free(cache->type);
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index c4ef2e523367..d01b8355f4ca 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -63,6 +63,7 @@ struct perf_env {
 	struct numa_node	*numa_nodes;
 	struct memory_node	*memory_nodes;
 	unsigned long long	 memory_bsize;
+	u64                     clockid_res_ns;
 };
 
 extern struct perf_env perf_env;
@@ -76,4 +77,7 @@ int perf_env__read_cpu_topology_map(struct perf_env *env);
 void cpu_cache_level__free(struct cpu_cache_level *cache);
 
 const char *perf_env__arch(struct perf_env *env);
+const char *perf_env__raw_arch(struct perf_env *env);
+int perf_env__nr_cpus_avail(struct perf_env *env);
+
 #endif /* __PERF_ENV_H */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 98ff3a6a3d50..e9c108a6b1c3 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -88,10 +88,10 @@ static const char *perf_ns__name(unsigned int id)
 	return perf_ns__names[id];
 }
 
-static int perf_tool__process_synth_event(struct perf_tool *tool,
-					  union perf_event *event,
-					  struct machine *machine,
-					  perf_event__handler_t process)
+int perf_tool__process_synth_event(struct perf_tool *tool,
+				   union perf_event *event,
+				   struct machine *machine,
+				   perf_event__handler_t process)
 {
 	struct perf_sample synth_sample = {
 	.pid	   = -1,
@@ -308,6 +308,7 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
 	event->fork.pid  = tgid;
 	event->fork.tid  = pid;
 	event->fork.header.type = PERF_RECORD_FORK;
+	event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC;
 
 	event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);
 
@@ -464,8 +465,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
 {
 	int rc = 0;
 	struct map *pos;
-	struct map_groups *kmaps = &machine->kmaps;
-	struct maps *maps = &kmaps->maps[MAP__FUNCTION];
+	struct maps *maps = machine__kernel_maps(machine);
 	union perf_event *event = zalloc((sizeof(event->mmap) +
 					  machine->id_hdr_size));
 	if (event == NULL) {
@@ -488,7 +488,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
 	for (pos = maps__first(maps); pos; pos = map__next(pos)) {
 		size_t size;
 
-		if (__map__is_kernel(pos))
+		if (!__map__is_kmodule(pos))
 			continue;
 
 		size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
@@ -542,10 +542,17 @@ static int __event__synthesize_thread(union perf_event *comm_event,
 						      tgid, process, machine) < 0)
 			return -1;
 
+		/*
+		 * send mmap only for thread group leader
+		 * see thread__init_map_groups
+		 */
+		if (pid == tgid &&
+		    perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
+						       process, machine, mmap_data,
+						       proc_map_timeout))
+			return -1;
 
-		return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
-							  process, machine, mmap_data,
-							  proc_map_timeout);
+		return 0;
 	}
 
 	if (machine__is_default_guest(machine))
@@ -869,7 +876,7 @@ static int find_symbol_cb(void *arg, const char *name, char type,
 	 * Must be a function or at least an alias, as in PARISC64, where "_text" is
 	 * an 'A' to the same address as "_stext".
 	 */
-	if (!(symbol_type__is_a(type, MAP__FUNCTION) ||
+	if (!(kallsyms__is_function(type) ||
 	      type == 'A') || strcmp(name, args->name))
 		return 0;
 
@@ -889,9 +896,16 @@ int kallsyms__get_function_start(const char *kallsyms_filename,
 	return 0;
 }
 
-int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
-				       perf_event__handler_t process,
-				       struct machine *machine)
+int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused,
+					      perf_event__handler_t process __maybe_unused,
+					      struct machine *machine __maybe_unused)
+{
+	return 0;
+}
+
+static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+						perf_event__handler_t process,
+						struct machine *machine)
 {
 	size_t size;
 	struct map *map = machine__kernel_map(machine);
@@ -944,6 +958,19 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
 	return err;
 }
 
+int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+				       perf_event__handler_t process,
+				       struct machine *machine)
+{
+	int err;
+
+	err = __perf_event__synthesize_kernel_mmap(tool, process, machine);
+	if (err < 0)
+		return err;
+
+	return perf_event__synthesize_extra_kmaps(tool, process, machine);
+}
+
 int perf_event__synthesize_thread_map2(struct perf_tool *tool,
 				      struct thread_map *threads,
 				      perf_event__handler_t process,
@@ -1055,6 +1082,7 @@ void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max
 	}
 
 	*size += sizeof(struct cpu_map_data);
+	*size = PERF_ALIGN(*size, sizeof(u64));
 	return zalloc(*size);
 }
 
@@ -1489,9 +1517,8 @@ int perf_event__process(struct perf_tool *tool __maybe_unused,
 	return machine__process_event(machine, event, sample);
 }
 
-void thread__find_addr_map(struct thread *thread, u8 cpumode,
-			   enum map_type type, u64 addr,
-			   struct addr_location *al)
+struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
+			     struct addr_location *al)
 {
 	struct map_groups *mg = thread->mg;
 	struct machine *machine = mg->machine;
@@ -1505,7 +1532,7 @@ void thread__find_addr_map(struct thread *thread, u8 cpumode,
 
 	if (machine == NULL) {
 		al->map = NULL;
-		return;
+		return NULL;
 	}
 
 	if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) {
@@ -1533,28 +1560,11 @@ void thread__find_addr_map(struct thread *thread, u8 cpumode,
 			!perf_host)
 			al->filtered |= (1 << HIST_FILTER__HOST);
 
-		return;
+		return NULL;
 	}
-try_again:
-	al->map = map_groups__find(mg, type, al->addr);
-	if (al->map == NULL) {
-		/*
-		 * If this is outside of all known maps, and is a negative
-		 * address, try to look it up in the kernel dso, as it might be
-		 * a vsyscall or vdso (which executes in user-mode).
-		 *
-		 * XXX This is nasty, we should have a symbol list in the
-		 * "[vdso]" dso, but for now lets use the old trick of looking
-		 * in the whole kernel symbol list.
-		 */
-		if (cpumode == PERF_RECORD_MISC_USER && machine &&
-		    mg != &machine->kmaps &&
-		    machine__kernel_ip(machine, al->addr)) {
-			mg = &machine->kmaps;
-			load_map = true;
-			goto try_again;
-		}
-	} else {
+
+	al->map = map_groups__find(mg, al->addr);
+	if (al->map != NULL) {
 		/*
 		 * Kernel maps might be changed when loading symbols so loading
 		 * must be done prior to using kernel maps.
@@ -1563,17 +1573,17 @@ try_again:
 			map__load(al->map);
 		al->addr = al->map->map_ip(al->map, al->addr);
 	}
+
+	return al->map;
 }
 
-void thread__find_addr_location(struct thread *thread,
-				u8 cpumode, enum map_type type, u64 addr,
-				struct addr_location *al)
+struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
+				   u64 addr, struct addr_location *al)
 {
-	thread__find_addr_map(thread, cpumode, type, addr, al);
-	if (al->map != NULL)
+	al->sym = NULL;
+	if (thread__find_map(thread, cpumode, addr, al))
 		al->sym = map__find_symbol(al->map, al->addr);
-	else
-		al->sym = NULL;
+	return al->sym;
 }
 
 /*
@@ -1590,7 +1600,7 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
 		return -1;
 
 	dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
-	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al);
+	thread__find_map(thread, sample->cpumode, sample->ip, al);
 	dump_printf(" ...... dso: %s\n",
 		    al->map ? al->map->dso->long_name :
 			al->level == 'H' ? "[hypervisor]" : "<not found>");
@@ -1669,10 +1679,7 @@ bool sample_addr_correlates_sym(struct perf_event_attr *attr)
 void thread__resolve(struct thread *thread, struct addr_location *al,
 		     struct perf_sample *sample)
 {
-	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->addr, al);
-	if (!al->map)
-		thread__find_addr_map(thread, sample->cpumode, MAP__VARIABLE,
-				      sample->addr, al);
+	thread__find_map(thread, sample->cpumode, sample->addr, al);
 
 	al->cpu = sample->cpu;
 	al->sym = NULL;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 0f794744919c..bfa60bcafbde 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -750,6 +750,10 @@ int perf_event__process_exit(struct perf_tool *tool,
 			     union perf_event *event,
 			     struct perf_sample *sample,
 			     struct machine *machine);
+int perf_tool__process_synth_event(struct perf_tool *tool,
+				   union perf_event *event,
+				   struct machine *machine,
+				   perf_event__handler_t process);
 int perf_event__process(struct perf_tool *tool,
 			union perf_event *event,
 			struct perf_sample *sample,
@@ -796,6 +800,10 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 				       bool mmap_data,
 				       unsigned int proc_map_timeout);
 
+int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
+				       perf_event__handler_t process,
+				       struct machine *machine);
+
 size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index a59281d64368..668d2a9ef0f4 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -358,7 +358,7 @@ void perf_evlist__disable(struct perf_evlist *evlist)
 	struct perf_evsel *pos;
 
 	evlist__for_each_entry(evlist, pos) {
-		if (!perf_evsel__is_group_leader(pos) || !pos->fd)
+		if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->fd)
 			continue;
 		perf_evsel__disable(pos);
 	}
@@ -803,7 +803,7 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
 		if (*output == -1) {
 			*output = fd;
 
-			if (perf_mmap__mmap(&maps[idx], mp, *output)  < 0)
+			if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0)
 				return -1;
 		} else {
 			if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
@@ -1795,3 +1795,45 @@ bool perf_evlist__exclude_kernel(struct perf_evlist *evlist)
 
 	return true;
 }
+
+/*
+ * Events in data file are not collect in groups, but we still want
+ * the group display. Set the artificial group and set the leader's
+ * forced_leader flag to notify the display code.
+ */
+void perf_evlist__force_leader(struct perf_evlist *evlist)
+{
+	if (!evlist->nr_groups) {
+		struct perf_evsel *leader = perf_evlist__first(evlist);
+
+		perf_evlist__set_leader(evlist);
+		leader->forced_leader = true;
+	}
+}
+
+struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evsel_list,
+						 struct perf_evsel *evsel)
+{
+	struct perf_evsel *c2, *leader;
+	bool is_open = true;
+
+	leader = evsel->leader;
+	pr_debug("Weak group for %s/%d failed\n",
+			leader->name, leader->nr_members);
+
+	/*
+	 * for_each_group_member doesn't work here because it doesn't
+	 * include the first entry.
+	 */
+	evlist__for_each_entry(evsel_list, c2) {
+		if (c2 == evsel)
+			is_open = false;
+		if (c2->leader == leader) {
+			if (is_open)
+				perf_evsel__close(c2);
+			c2->leader = c2;
+			c2->nr_members = 0;
+		}
+	}
+	return leader;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 6c41b2f78713..9919eed6d15b 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -309,4 +309,10 @@ struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
 					    union perf_event *event);
 
 bool perf_evlist__exclude_kernel(struct perf_evlist *evlist);
+
+void perf_evlist__force_leader(struct perf_evlist *evlist);
+
+struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evlist,
+						 struct perf_evsel *evsel);
+
 #endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 4cd2cf93f726..dbc0466db368 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -232,6 +232,7 @@ void perf_evsel__init(struct perf_evsel *evsel,
 	evsel->leader	   = evsel;
 	evsel->unit	   = "";
 	evsel->scale	   = 1.0;
+	evsel->max_events  = ULONG_MAX;
 	evsel->evlist	   = NULL;
 	evsel->bpf_fd	   = -1;
 	INIT_LIST_HEAD(&evsel->node);
@@ -251,8 +252,9 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
 {
 	struct perf_evsel *evsel = zalloc(perf_evsel__object.size);
 
-	if (evsel != NULL)
-		perf_evsel__init(evsel, attr, idx);
+	if (!evsel)
+		return NULL;
+	perf_evsel__init(evsel, attr, idx);
 
 	if (perf_evsel__is_bpf_output(evsel)) {
 		evsel->attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
@@ -260,6 +262,17 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
 		evsel->attr.sample_period = 1;
 	}
 
+	if (perf_evsel__is_clock(evsel)) {
+		/*
+		 * The evsel->unit points to static alias->unit
+		 * so it's ok to use static string in here.
+		 */
+		static const char *unit = "msec";
+
+		evsel->unit = unit;
+		evsel->scale = 1e-6;
+	}
+
 	return evsel;
 }
 
@@ -781,6 +794,9 @@ static void apply_config_terms(struct perf_evsel *evsel,
 		case PERF_EVSEL__CONFIG_TERM_MAX_STACK:
 			max_stack = term->val.max_stack;
 			break;
+		case PERF_EVSEL__CONFIG_TERM_MAX_EVENTS:
+			evsel->max_events = term->val.max_events;
+			break;
 		case PERF_EVSEL__CONFIG_TERM_INHERIT:
 			/*
 			 * attr->inherit should has already been set by
@@ -848,6 +864,12 @@ static void apply_config_terms(struct perf_evsel *evsel,
 	}
 }
 
+static bool is_dummy_event(struct perf_evsel *evsel)
+{
+	return (evsel->attr.type == PERF_TYPE_SOFTWARE) &&
+	       (evsel->attr.config == PERF_COUNT_SW_DUMMY);
+}
+
 /*
  * The enable_on_exec/disabled value strategy:
  *
@@ -934,7 +956,6 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
 		attr->sample_freq    = 0;
 		attr->sample_period  = 0;
 		attr->write_backward = 0;
-		attr->sample_id_all  = 0;
 	}
 
 	if (opts->no_samples)
@@ -1071,6 +1092,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
 		attr->exclude_user   = 1;
 	}
 
+	if (evsel->own_cpus || evsel->unit)
+		evsel->attr.read_format |= PERF_FORMAT_ID;
+
 	/*
 	 * Apply event specific term settings,
 	 * it overloads any global configuration.
@@ -1086,6 +1110,14 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
 		else
 			perf_evsel__reset_sample_bit(evsel, PERIOD);
 	}
+
+	/*
+	 * For initial_delay, a dummy event is added implicitly.
+	 * The software event will trigger -EOPNOTSUPP error out,
+	 * if BRANCH_STACK bit is set.
+	 */
+	if (opts->initial_delay && is_dummy_event(evsel))
+		perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
 }
 
 static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
@@ -1174,16 +1206,27 @@ int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter)
 
 int perf_evsel__enable(struct perf_evsel *evsel)
 {
-	return perf_evsel__run_ioctl(evsel,
-				     PERF_EVENT_IOC_ENABLE,
-				     0);
+	int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0);
+
+	if (!err)
+		evsel->disabled = false;
+
+	return err;
 }
 
 int perf_evsel__disable(struct perf_evsel *evsel)
 {
-	return perf_evsel__run_ioctl(evsel,
-				     PERF_EVENT_IOC_DISABLE,
-				     0);
+	int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0);
+	/*
+	 * We mark it disabled here so that tools that disable a event can
+	 * ignore events after they disable it. I.e. the ring buffer may have
+	 * already a few more events queued up before the kernel got the stop
+	 * request.
+	 */
+	if (!err)
+		evsel->disabled = true;
+
+	return err;
 }
 
 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
@@ -2197,7 +2240,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 		}
 	}
 
-	if (type & PERF_SAMPLE_CALLCHAIN) {
+	if (evsel__has_callchain(evsel)) {
 		const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
 
 		OVERFLOW_CHECK_u64(array);
@@ -2656,15 +2699,15 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
 	return 0;
 }
 
-struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name)
+struct tep_format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name)
 {
-	return pevent_find_field(evsel->tp_format, name);
+	return tep_find_field(evsel->tp_format, name);
 }
 
 void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample,
 			 const char *name)
 {
-	struct format_field *field = perf_evsel__field(evsel, name);
+	struct tep_format_field *field = perf_evsel__field(evsel, name);
 	int offset;
 
 	if (!field)
@@ -2672,7 +2715,7 @@ void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample,
 
 	offset = field->offset;
 
-	if (field->flags & FIELD_IS_DYNAMIC) {
+	if (field->flags & TEP_FIELD_IS_DYNAMIC) {
 		offset = *(int *)(sample->raw_data + field->offset);
 		offset &= 0xffff;
 	}
@@ -2680,7 +2723,7 @@ void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample,
 	return sample->raw_data + offset;
 }
 
-u64 format_field__intval(struct format_field *field, struct perf_sample *sample,
+u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample,
 			 bool needs_swap)
 {
 	u64 value;
@@ -2722,7 +2765,7 @@ u64 format_field__intval(struct format_field *field, struct perf_sample *sample,
 u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
 		       const char *name)
 {
-	struct format_field *field = perf_evsel__field(evsel, name);
+	struct tep_format_field *field = perf_evsel__field(evsel, name);
 
 	if (!field)
 		return 0;
@@ -2857,12 +2900,12 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
 			 "Hint: Try again after reducing the number of events.\n"
 			 "Hint: Try increasing the limit with 'ulimit -n <limit>'");
 	case ENOMEM:
-		if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0 &&
+		if (evsel__has_callchain(evsel) &&
 		    access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0)
 			return scnprintf(msg, size,
 					 "Not enough memory to setup event with callchain.\n"
 					 "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n"
-					 "Hint: Current value: %d", sysctl_perf_event_max_stack);
+					 "Hint: Current value: %d", sysctl__max_stack());
 		break;
 	case ENODEV:
 		if (target->cpu_list)
@@ -2914,3 +2957,32 @@ struct perf_env *perf_evsel__env(struct perf_evsel *evsel)
 		return evsel->evlist->env;
 	return NULL;
 }
+
+static int store_evsel_ids(struct perf_evsel *evsel, struct perf_evlist *evlist)
+{
+	int cpu, thread;
+
+	for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
+		for (thread = 0; thread < xyarray__max_y(evsel->fd);
+		     thread++) {
+			int fd = FD(evsel, cpu, thread);
+
+			if (perf_evlist__id_add_fd(evlist, evsel,
+						   cpu, thread, fd) < 0)
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+int perf_evsel__store_ids(struct perf_evsel *evsel, struct perf_evlist *evlist)
+{
+	struct cpu_map *cpus = evsel->cpus;
+	struct thread_map *threads = evsel->threads;
+
+	if (perf_evsel__alloc_id(evsel, cpus->nr, threads->nr))
+		return -ENOMEM;
+
+	return store_evsel_ids(evsel, evlist);
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index b13f5f234c8f..3147ca76c6fc 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -46,6 +46,7 @@ enum term_type {
 	PERF_EVSEL__CONFIG_TERM_STACK_USER,
 	PERF_EVSEL__CONFIG_TERM_INHERIT,
 	PERF_EVSEL__CONFIG_TERM_MAX_STACK,
+	PERF_EVSEL__CONFIG_TERM_MAX_EVENTS,
 	PERF_EVSEL__CONFIG_TERM_OVERWRITE,
 	PERF_EVSEL__CONFIG_TERM_DRV_CFG,
 	PERF_EVSEL__CONFIG_TERM_BRANCH,
@@ -65,6 +66,7 @@ struct perf_evsel_config_term {
 		bool	inherit;
 		bool	overwrite;
 		char	*branch;
+		unsigned long max_events;
 	} val;
 	bool weak;
 };
@@ -99,10 +101,12 @@ struct perf_evsel {
 	struct perf_counts	*prev_raw_counts;
 	int			idx;
 	u32			ids;
+	unsigned long		max_events;
+	unsigned long		nr_events_printed;
 	char			*name;
 	double			scale;
 	const char		*unit;
-	struct event_format	*tp_format;
+	struct tep_event_format	*tp_format;
 	off_t			id_offset;
 	struct perf_stat_evsel  *stats;
 	void			*priv;
@@ -119,6 +123,7 @@ struct perf_evsel {
 	bool			snapshot;
 	bool 			supported;
 	bool 			needs_swap;
+	bool 			disabled;
 	bool			no_aux_samples;
 	bool			immediate;
 	bool			system_wide;
@@ -211,7 +216,7 @@ static inline struct perf_evsel *perf_evsel__newtp(const char *sys, const char *
 
 struct perf_evsel *perf_evsel__new_cycles(bool precise);
 
-struct event_format *event_format__new(const char *sys, const char *name);
+struct tep_event_format *event_format__new(const char *sys, const char *name);
 
 void perf_evsel__init(struct perf_evsel *evsel,
 		      struct perf_event_attr *attr, int idx);
@@ -296,11 +301,11 @@ static inline char *perf_evsel__strval(struct perf_evsel *evsel,
 	return perf_evsel__rawptr(evsel, sample, name);
 }
 
-struct format_field;
+struct tep_format_field;
 
-u64 format_field__intval(struct format_field *field, struct perf_sample *sample, bool needs_swap);
+u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample, bool needs_swap);
 
-struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name);
+struct tep_format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name);
 
 #define perf_evsel__match(evsel, t, c)		\
 	(evsel->attr.type == PERF_TYPE_##t &&	\
@@ -402,10 +407,13 @@ bool perf_evsel__is_function_event(struct perf_evsel *evsel);
 
 static inline bool perf_evsel__is_bpf_output(struct perf_evsel *evsel)
 {
-	struct perf_event_attr *attr = &evsel->attr;
+	return perf_evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT);
+}
 
-	return (attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
-		(attr->type == PERF_TYPE_SOFTWARE);
+static inline bool perf_evsel__is_clock(struct perf_evsel *evsel)
+{
+	return perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
+	       perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK);
 }
 
 struct perf_attr_details {
@@ -449,16 +457,28 @@ static inline int perf_evsel__group_idx(struct perf_evsel *evsel)
 	return evsel->idx - evsel->leader->idx;
 }
 
+/* Iterates group WITHOUT the leader. */
 #define for_each_group_member(_evsel, _leader) 					\
 for ((_evsel) = list_entry((_leader)->node.next, struct perf_evsel, node); 	\
      (_evsel) && (_evsel)->leader == (_leader);					\
      (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node))
 
+/* Iterates group WITH the leader. */
+#define for_each_group_evsel(_evsel, _leader) 					\
+for ((_evsel) = _leader; 							\
+     (_evsel) && (_evsel)->leader == (_leader);					\
+     (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node))
+
 static inline bool perf_evsel__has_branch_callstack(const struct perf_evsel *evsel)
 {
 	return evsel->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
 }
 
+static inline bool evsel__has_callchain(const struct perf_evsel *evsel)
+{
+	return (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0;
+}
+
 typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *);
 
 int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
@@ -466,4 +486,5 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
 
 struct perf_env *perf_evsel__env(struct perf_evsel *evsel);
 
+int perf_evsel__store_ids(struct perf_evsel *evsel, struct perf_evlist *evlist);
 #endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 06dfb027879d..0d0a4c6f368b 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -73,7 +73,7 @@ int perf_evsel__fprintf(struct perf_evsel *evsel,
 	}
 
 	if (details->trace_fields) {
-		struct format_field *field;
+		struct tep_format_field *field;
 
 		if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
 			printed += comma_fprintf(fp, &first, " (not a tracepoint)");
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
index c540d47583e7..aafbe54fd3fa 100644
--- a/tools/perf/util/genelf.c
+++ b/tools/perf/util/genelf.c
@@ -114,7 +114,7 @@ gen_build_id(struct buildid_note *note,
 
 	fd = open("/dev/urandom", O_RDONLY);
 	if (fd == -1)
-		err(1, "cannot access /dev/urandom for builid");
+		err(1, "cannot access /dev/urandom for buildid");
 
 	sret = read(fd, note->build_id, sz);
 
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
index de322d51c7fe..b72440bf9a79 100644
--- a/tools/perf/util/genelf.h
+++ b/tools/perf/util/genelf.h
@@ -29,6 +29,12 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
 #elif defined(__powerpc__)
 #define GEN_ELF_ARCH	EM_PPC
 #define GEN_ELF_CLASS	ELFCLASS32
+#elif defined(__sparc__) && defined(__arch64__)
+#define GEN_ELF_ARCH	EM_SPARCV9
+#define GEN_ELF_CLASS	ELFCLASS64
+#elif defined(__sparc__)
+#define GEN_ELF_ARCH	EM_SPARC
+#define GEN_ELF_CLASS	ELFCLASS32
 #else
 #error "unsupported architecture"
 #endif
diff --git a/tools/perf/util/get_current_dir_name.c b/tools/perf/util/get_current_dir_name.c
new file mode 100644
index 000000000000..267aa609a582
--- /dev/null
+++ b/tools/perf/util/get_current_dir_name.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+//
+#ifndef HAVE_GET_CURRENT_DIR_NAME
+#include "util.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdlib.h>
+
+/* Android's 'bionic' library, for one, doesn't have this */
+
+char *get_current_dir_name(void)
+{
+	char pwd[PATH_MAX];
+
+	return getcwd(pwd, sizeof(pwd)) == NULL ? NULL : strdup(pwd);
+}
+#endif // HAVE_GET_CURRENT_DIR_NAME
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index a8bff2178fbc..4fd45be95a43 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -279,8 +279,6 @@ static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize)
 	if (!set)
 		return -ENOMEM;
 
-	bitmap_zero(set, size);
-
 	p = (u64 *) set;
 
 	for (i = 0; (u64) i < BITS_TO_U64(size); i++) {
@@ -1036,6 +1034,13 @@ static int write_auxtrace(struct feat_fd *ff,
 	return err;
 }
 
+static int write_clockid(struct feat_fd *ff,
+			 struct perf_evlist *evlist __maybe_unused)
+{
+	return do_write(ff, &ff->ph->env.clockid_res_ns,
+			sizeof(ff->ph->env.clockid_res_ns));
+}
+
 static int cpu_cache_level__sort(const void *a, const void *b)
 {
 	struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
@@ -1285,7 +1290,6 @@ static int memory_node__read(struct memory_node *n, unsigned long idx)
 		return -ENOMEM;
 	}
 
-	bitmap_zero(n->set, size);
 	n->node = idx;
 	n->size = size;
 
@@ -1459,8 +1463,24 @@ static void print_cmdline(struct feat_fd *ff, FILE *fp)
 
 	fprintf(fp, "# cmdline : ");
 
-	for (i = 0; i < nr; i++)
-		fprintf(fp, "%s ", ff->ph->env.cmdline_argv[i]);
+	for (i = 0; i < nr; i++) {
+		char *argv_i = strdup(ff->ph->env.cmdline_argv[i]);
+		if (!argv_i) {
+			fprintf(fp, "%s ", ff->ph->env.cmdline_argv[i]);
+		} else {
+			char *mem = argv_i;
+			do {
+				char *quote = strchr(argv_i, '\'');
+				if (!quote)
+					break;
+				*quote++ = '\0';
+				fprintf(fp, "%s\\\'", argv_i);
+				argv_i = quote;
+			} while (1);
+			fprintf(fp, "%s ", argv_i);
+			free(mem);
+		}
+	}
 	fputc('\n', fp);
 }
 
@@ -1495,6 +1515,12 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
 		fprintf(fp, "# Core ID and Socket ID information is not available\n");
 }
 
+static void print_clockid(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n",
+		ff->ph->env.clockid_res_ns * 1000);
+}
+
 static void free_event_desc(struct perf_evsel *events)
 {
 	struct perf_evsel *evsel;
@@ -2113,6 +2139,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
 	int cpu_nr = ff->ph->env.nr_cpus_avail;
 	u64 size = 0;
 	struct perf_header *ph = ff->ph;
+	bool do_core_id_test = true;
 
 	ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu));
 	if (!ph->env.cpu)
@@ -2167,6 +2194,13 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
 		return 0;
 	}
 
+	/* On s390 the socket_id number is not related to the numbers of cpus.
+	 * The socket_id number might be higher than the numbers of cpus.
+	 * This depends on the configuration.
+	 */
+	if (ph->env.arch && !strncmp(ph->env.arch, "s390", 4))
+		do_core_id_test = false;
+
 	for (i = 0; i < (u32)cpu_nr; i++) {
 		if (do_read_u32(ff, &nr))
 			goto free_cpu;
@@ -2176,7 +2210,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
 		if (do_read_u32(ff, &nr))
 			goto free_cpu;
 
-		if (nr != (u32)-1 && nr > (u32)cpu_nr) {
+		if (do_core_id_test && nr != (u32)-1 && nr > (u32)cpu_nr) {
 			pr_debug("socket_id number is too big."
 				 "You may need to upgrade the perf tool.\n");
 			goto free_cpu;
@@ -2510,6 +2544,15 @@ out:
 	return ret;
 }
 
+static int process_clockid(struct feat_fd *ff,
+			   void *data __maybe_unused)
+{
+	if (do_read_u64(ff, &ff->ph->env.clockid_res_ns))
+		return -1;
+
+	return 0;
+}
+
 struct feature_ops {
 	int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
 	void (*print)(struct feat_fd *ff, FILE *fp);
@@ -2563,12 +2606,13 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
 	FEAT_OPR(NUMA_TOPOLOGY,	numa_topology,	true),
 	FEAT_OPN(BRANCH_STACK,	branch_stack,	false),
 	FEAT_OPR(PMU_MAPPINGS,	pmu_mappings,	false),
-	FEAT_OPN(GROUP_DESC,	group_desc,	false),
+	FEAT_OPR(GROUP_DESC,	group_desc,	false),
 	FEAT_OPN(AUXTRACE,	auxtrace,	false),
 	FEAT_OPN(STAT,		stat,		false),
 	FEAT_OPN(CACHE,		cache,		true),
 	FEAT_OPR(SAMPLE_TIME,	sample_time,	false),
 	FEAT_OPR(MEM_TOPOLOGY,	mem_topology,	true),
+	FEAT_OPR(CLOCKID,       clockid,        false)
 };
 
 struct header_print_data {
@@ -3183,9 +3227,9 @@ static int read_attr(int fd, struct perf_header *ph,
 }
 
 static int perf_evsel__prepare_tracepoint_event(struct perf_evsel *evsel,
-						struct pevent *pevent)
+						struct tep_handle *pevent)
 {
-	struct event_format *event;
+	struct tep_event_format *event;
 	char bf[128];
 
 	/* already prepared */
@@ -3197,7 +3241,7 @@ static int perf_evsel__prepare_tracepoint_event(struct perf_evsel *evsel,
 		return -1;
 	}
 
-	event = pevent_find_event(pevent, evsel->attr.config);
+	event = tep_find_event(pevent, evsel->attr.config);
 	if (event == NULL) {
 		pr_debug("cannot find event format for %d\n", (int)evsel->attr.config);
 		return -1;
@@ -3215,7 +3259,7 @@ static int perf_evsel__prepare_tracepoint_event(struct perf_evsel *evsel,
 }
 
 static int perf_evlist__prepare_tracepoint_events(struct perf_evlist *evlist,
-						  struct pevent *pevent)
+						  struct tep_handle *pevent)
 {
 	struct perf_evsel *pos;
 
@@ -3312,8 +3356,6 @@ int perf_session__read_header(struct perf_session *session)
 		lseek(fd, tmp, SEEK_SET);
 	}
 
-	symbol_conf.nr_events = nr_attrs;
-
 	perf_header__process_sections(header, fd, &session->tevent,
 				      perf_file_section__process);
 
@@ -3429,10 +3471,10 @@ int perf_event__synthesize_features(struct perf_tool *tool,
 	return ret;
 }
 
-int perf_event__process_feature(struct perf_tool *tool,
-				union perf_event *event,
-				struct perf_session *session __maybe_unused)
+int perf_event__process_feature(struct perf_session *session,
+				union perf_event *event)
 {
+	struct perf_tool *tool = session->tool;
 	struct feat_fd ff = { .fd = 0 };
 	struct feature_event *fe = (struct feature_event *)event;
 	int type = fe->header.type;
@@ -3442,7 +3484,7 @@ int perf_event__process_feature(struct perf_tool *tool,
 		pr_warning("invalid record type %d in pipe-mode\n", type);
 		return 0;
 	}
-	if (feat == HEADER_RESERVED || feat > HEADER_LAST_FEATURE) {
+	if (feat == HEADER_RESERVED || feat >= HEADER_LAST_FEATURE) {
 		pr_warning("invalid record type %d in pipe-mode\n", type);
 		return -1;
 	}
@@ -3618,13 +3660,13 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
 }
 
 int perf_event__synthesize_attrs(struct perf_tool *tool,
-				   struct perf_session *session,
-				   perf_event__handler_t process)
+				 struct perf_evlist *evlist,
+				 perf_event__handler_t process)
 {
 	struct perf_evsel *evsel;
 	int err = 0;
 
-	evlist__for_each_entry(session->evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		err = perf_event__synthesize_attr(tool, &evsel->attr, evsel->ids,
 						  evsel->id, process);
 		if (err) {
@@ -3739,8 +3781,6 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
 		perf_evlist__id_add(evlist, evsel, 0, i, event->attr.id[i]);
 	}
 
-	symbol_conf.nr_events = evlist->nr_entries;
-
 	return 0;
 }
 
@@ -3839,9 +3879,8 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd,
 	return aligned_size;
 }
 
-int perf_event__process_tracing_data(struct perf_tool *tool __maybe_unused,
-				     union perf_event *event,
-				     struct perf_session *session)
+int perf_event__process_tracing_data(struct perf_session *session,
+				     union perf_event *event)
 {
 	ssize_t size_read, padding, size = event->tracing_data.size;
 	int fd = perf_data__fd(session->data);
@@ -3907,9 +3946,8 @@ int perf_event__synthesize_build_id(struct perf_tool *tool,
 	return err;
 }
 
-int perf_event__process_build_id(struct perf_tool *tool __maybe_unused,
-				 union perf_event *event,
-				 struct perf_session *session)
+int perf_event__process_build_id(struct perf_session *session,
+				 union perf_event *event)
 {
 	__event_process_build_id(&event->build_id,
 				 event->build_id.filename,
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 90d4577a92dc..0d553ddca0a3 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -2,6 +2,7 @@
 #ifndef __PERF_HEADER_H
 #define __PERF_HEADER_H
 
+#include <linux/stddef.h>
 #include <linux/perf_event.h>
 #include <sys/types.h>
 #include <stdbool.h>
@@ -37,6 +38,7 @@ enum {
 	HEADER_CACHE,
 	HEADER_SAMPLE_TIME,
 	HEADER_MEM_TOPOLOGY,
+	HEADER_CLOCKID,
 	HEADER_LAST_FEATURE,
 	HEADER_FEAT_BITS	= 256,
 };
@@ -115,15 +117,14 @@ int perf_event__synthesize_extra_attr(struct perf_tool *tool,
 				      perf_event__handler_t process,
 				      bool is_pipe);
 
-int perf_event__process_feature(struct perf_tool *tool,
-				union perf_event *event,
-				struct perf_session *session);
+int perf_event__process_feature(struct perf_session *session,
+				union perf_event *event);
 
 int perf_event__synthesize_attr(struct perf_tool *tool,
 				struct perf_event_attr *attr, u32 ids, u64 *id,
 				perf_event__handler_t process);
 int perf_event__synthesize_attrs(struct perf_tool *tool,
-				 struct perf_session *session,
+				 struct perf_evlist *evlist,
 				 perf_event__handler_t process);
 int perf_event__synthesize_event_update_unit(struct perf_tool *tool,
 					     struct perf_evsel *evsel,
@@ -147,17 +148,15 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp);
 int perf_event__synthesize_tracing_data(struct perf_tool *tool,
 					int fd, struct perf_evlist *evlist,
 					perf_event__handler_t process);
-int perf_event__process_tracing_data(struct perf_tool *tool,
-				     union perf_event *event,
-				     struct perf_session *session);
+int perf_event__process_tracing_data(struct perf_session *session,
+				     union perf_event *event);
 
 int perf_event__synthesize_build_id(struct perf_tool *tool,
 				    struct dso *pos, u16 misc,
 				    perf_event__handler_t process,
 				    struct machine *machine);
-int perf_event__process_build_id(struct perf_tool *tool,
-				 union perf_event *event,
-				 struct perf_session *session);
+int perf_event__process_build_id(struct perf_session *session,
+				 union perf_event *event);
 bool is_perf_magic(u64 magic);
 
 #define NAME_ALIGN 64
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 4d602fba40b2..828cb9794c76 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -370,9 +370,11 @@ void hists__delete_entries(struct hists *hists)
 
 static int hist_entry__init(struct hist_entry *he,
 			    struct hist_entry *template,
-			    bool sample_self)
+			    bool sample_self,
+			    size_t callchain_size)
 {
 	*he = *template;
+	he->callchain_size = callchain_size;
 
 	if (symbol_conf.cumulate_callchain) {
 		he->stat_acc = malloc(sizeof(he->stat));
@@ -410,7 +412,7 @@ static int hist_entry__init(struct hist_entry *he,
 		map__get(he->mem_info->daddr.map);
 	}
 
-	if (symbol_conf.use_callchain)
+	if (hist_entry__has_callchains(he) && symbol_conf.use_callchain)
 		callchain_init(he->callchain);
 
 	if (he->raw_data) {
@@ -473,7 +475,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
 
 	he = ops->new(callchain_size);
 	if (he) {
-		err = hist_entry__init(he, template, sample_self);
+		err = hist_entry__init(he, template, sample_self, callchain_size);
 		if (err) {
 			ops->free(he);
 			he = NULL;
@@ -492,7 +494,7 @@ static u8 symbol__parent_filter(const struct symbol *parent)
 
 static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period)
 {
-	if (!symbol_conf.use_callchain)
+	if (!hist_entry__has_callchains(he) || !symbol_conf.use_callchain)
 		return;
 
 	he->hists->callchain_period += period;
@@ -619,9 +621,11 @@ __hists__add_entry(struct hists *hists,
 		.raw_data = sample->raw_data,
 		.raw_size = sample->raw_size,
 		.ops = ops,
-	};
+	}, *he = hists__findnew_entry(hists, &entry, al, sample_self);
 
-	return hists__findnew_entry(hists, &entry, al, sample_self);
+	if (!hists->has_callchains && he && he->callchain_size != 0)
+		hists->has_callchains = true;
+	return he;
 }
 
 struct hist_entry *hists__add_entry(struct hists *hists,
@@ -986,7 +990,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
 	iter->he = he;
 	he_cache[iter->curr++] = he;
 
-	if (symbol_conf.use_callchain)
+	if (hist_entry__has_callchains(he) && symbol_conf.use_callchain)
 		callchain_append(he->callchain, &cursor, sample->period);
 	return 0;
 }
@@ -1039,7 +1043,7 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
 	int err, err2;
 	struct map *alm = NULL;
 
-	if (al && al->map)
+	if (al)
 		alm = map__get(al->map);
 
 	err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent,
@@ -1373,7 +1377,8 @@ static int hists__hierarchy_insert_entry(struct hists *hists,
 	if (new_he) {
 		new_he->leaf = true;
 
-		if (symbol_conf.use_callchain) {
+		if (hist_entry__has_callchains(new_he) &&
+		    symbol_conf.use_callchain) {
 			callchain_cursor_reset(&callchain_cursor);
 			if (callchain_merge(&callchain_cursor,
 					    new_he->callchain,
@@ -1414,7 +1419,7 @@ static int hists__collapse_insert_entry(struct hists *hists,
 			if (symbol_conf.cumulate_callchain)
 				he_stat__add_stat(iter->stat_acc, he->stat_acc);
 
-			if (symbol_conf.use_callchain) {
+			if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) {
 				callchain_cursor_reset(&callchain_cursor);
 				if (callchain_merge(&callchain_cursor,
 						    iter->callchain,
@@ -1757,7 +1762,7 @@ void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *pro
 	bool use_callchain;
 
 	if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph)
-		use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN;
+		use_callchain = evsel__has_callchain(evsel);
 	else
 		use_callchain = symbol_conf.use_callchain;
 
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index fbabfd8a215d..3badd7f1e1b8 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -85,6 +85,7 @@ struct hists {
 	struct events_stats	stats;
 	u64			event_stream;
 	u16			col_len[HISTC_NR_COLS];
+	bool			has_callchains;
 	int			socket_filter;
 	struct perf_hpp_list	*hpp_list;
 	struct list_head	hpp_formats;
@@ -180,7 +181,7 @@ size_t events_stats__fprintf(struct events_stats *stats, FILE *fp);
 
 size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 		      int max_cols, float min_pcnt, FILE *fp,
-		      bool use_callchain);
+		      bool ignore_callchains);
 size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp);
 
 void hists__filter_by_dso(struct hists *hists);
@@ -220,6 +221,11 @@ static inline struct hists *evsel__hists(struct perf_evsel *evsel)
 	return &hevsel->hists;
 }
 
+static __pure inline bool hists__has_callchains(struct hists *hists)
+{
+	return hists->has_callchains;
+}
+
 int hists__init(void);
 int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list);
 
@@ -419,19 +425,24 @@ struct hist_browser_timer {
 	int refresh;
 };
 
+struct annotation_options;
+
 #ifdef HAVE_SLANG_SUPPORT
 #include "../ui/keysyms.h"
 int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel,
-			     struct hist_browser_timer *hbt);
+			     struct hist_browser_timer *hbt,
+			     struct annotation_options *annotation_opts);
 
 int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
-			     struct hist_browser_timer *hbt);
+			     struct hist_browser_timer *hbt,
+			     struct annotation_options *annotation_opts);
 
 int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
 				  struct hist_browser_timer *hbt,
 				  float min_pcnt,
 				  struct perf_env *env,
-				  bool warn_lost_event);
+				  bool warn_lost_event,
+				  struct annotation_options *annotation_options);
 int script_browse(const char *script_opt);
 #else
 static inline
@@ -440,20 +451,23 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused,
 				  struct hist_browser_timer *hbt __maybe_unused,
 				  float min_pcnt __maybe_unused,
 				  struct perf_env *env __maybe_unused,
-				  bool warn_lost_event __maybe_unused)
+				  bool warn_lost_event __maybe_unused,
+				  struct annotation_options *annotation_options __maybe_unused)
 {
 	return 0;
 }
 static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused,
 					   struct perf_evsel *evsel __maybe_unused,
-					   struct hist_browser_timer *hbt __maybe_unused)
+					   struct hist_browser_timer *hbt __maybe_unused,
+					   struct annotation_options *annotation_options __maybe_unused)
 {
 	return 0;
 }
 
 static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused,
 					   struct perf_evsel *evsel __maybe_unused,
-					   struct hist_browser_timer *hbt __maybe_unused)
+					   struct hist_browser_timer *hbt __maybe_unused,
+					   struct annotation_options *annotation_opts __maybe_unused)
 {
 	return 0;
 }
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 72db2744876d..7b27d77306c2 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -269,6 +269,13 @@ static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
 	return 0;
 }
 
+static inline u8 intel_bts_cpumode(struct intel_bts *bts, uint64_t ip)
+{
+	return machine__kernel_ip(bts->machine, ip) ?
+	       PERF_RECORD_MISC_KERNEL :
+	       PERF_RECORD_MISC_USER;
+}
+
 static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
 					 struct branch *branch)
 {
@@ -281,12 +288,8 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
 	    bts->num_events++ <= bts->synth_opts.initial_skip)
 		return 0;
 
-	event.sample.header.type = PERF_RECORD_SAMPLE;
-	event.sample.header.misc = PERF_RECORD_MISC_USER;
-	event.sample.header.size = sizeof(struct perf_event_header);
-
-	sample.cpumode = PERF_RECORD_MISC_USER;
 	sample.ip = le64_to_cpu(branch->from);
+	sample.cpumode = intel_bts_cpumode(bts, sample.ip);
 	sample.pid = btsq->pid;
 	sample.tid = btsq->tid;
 	sample.addr = le64_to_cpu(branch->to);
@@ -298,6 +301,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
 	sample.insn_len = btsq->intel_pt_insn.length;
 	memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ);
 
+	event.sample.header.type = PERF_RECORD_SAMPLE;
+	event.sample.header.misc = sample.cpumode;
+	event.sample.header.size = sizeof(struct perf_event_header);
+
 	if (bts->synth_opts.inject) {
 		event.sample.header.size = bts->branches_event_size;
 		ret = perf_event__synthesize_sample(&event,
@@ -335,8 +342,7 @@ static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip)
 	if (!thread)
 		return -1;
 
-	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
-	if (!al.map || !al.map->dso)
+	if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso)
 		goto out_put;
 
 	len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf,
@@ -911,7 +917,8 @@ int intel_bts_process_auxtrace_info(union perf_event *event,
 	if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
 		bts->synth_opts = *session->itrace_synth_opts;
 	} else {
-		itrace_synth_opts__set_default(&bts->synth_opts);
+		itrace_synth_opts__set_default(&bts->synth_opts,
+				session->itrace_synth_opts->default_no_sample);
 		if (session->itrace_synth_opts)
 			bts->synth_opts.thread_stack =
 				session->itrace_synth_opts->thread_stack;
diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h
index e23578c7b1be..2669c9f748e4 100644
--- a/tools/perf/util/intel-pt-decoder/insn.h
+++ b/tools/perf/util/intel-pt-decoder/insn.h
@@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn)
 	return insn_offset_displacement(insn) + insn->displacement.nbytes;
 }
 
+#define POP_SS_OPCODE 0x1f
+#define MOV_SREG_OPCODE 0x8e
+
+/*
+ * Intel SDM Vol.3A 6.8.3 states;
+ * "Any single-step trap that would be delivered following the MOV to SS
+ * instruction or POP to SS instruction (because EFLAGS.TF is 1) is
+ * suppressed."
+ * This function returns true if @insn is MOV SS or POP SS. On these
+ * instructions, single stepping is suppressed.
+ */
+static inline int insn_masking_exception(struct insn *insn)
+{
+	return insn->opcode.bytes[0] == POP_SS_OPCODE ||
+		(insn->opcode.bytes[0] == MOV_SREG_OPCODE &&
+		 X86_MODRM_REG(insn->modrm.bytes[0]) == 2);
+}
+
 #endif /* _ASM_X86_INSN_H */
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index f9157aed1289..4503f3ca45ab 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -113,6 +113,7 @@ struct intel_pt_decoder {
 	bool have_cyc;
 	bool fixup_last_mtc;
 	bool have_last_ip;
+	enum intel_pt_param_flags flags;
 	uint64_t pos;
 	uint64_t last_ip;
 	uint64_t ip;
@@ -226,6 +227,8 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
 	decoder->return_compression = params->return_compression;
 	decoder->branch_enable      = params->branch_enable;
 
+	decoder->flags              = params->flags;
+
 	decoder->period             = params->period;
 	decoder->period_type        = params->period_type;
 
@@ -1097,6 +1100,15 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
 	return ret;
 }
 
+static inline bool intel_pt_fup_with_nlip(struct intel_pt_decoder *decoder,
+					  struct intel_pt_insn *intel_pt_insn,
+					  uint64_t ip, int err)
+{
+	return decoder->flags & INTEL_PT_FUP_WITH_NLIP && !err &&
+	       intel_pt_insn->branch == INTEL_PT_BR_INDIRECT &&
+	       ip == decoder->ip + intel_pt_insn->length;
+}
+
 static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
 {
 	struct intel_pt_insn intel_pt_insn;
@@ -1109,10 +1121,11 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
 		err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip);
 		if (err == INTEL_PT_RETURN)
 			return 0;
-		if (err == -EAGAIN) {
+		if (err == -EAGAIN ||
+		    intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) {
 			if (intel_pt_fup_event(decoder))
 				return 0;
-			return err;
+			return -EAGAIN;
 		}
 		decoder->set_fup_tx_flags = false;
 		if (err)
@@ -1152,7 +1165,7 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
 		decoder->pge = false;
 		decoder->continuous_period = false;
 		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
-		decoder->state.to_ip = 0;
+		decoder->state.type |= INTEL_PT_TRACE_END;
 		return 0;
 	}
 	if (err == INTEL_PT_RETURN)
@@ -1166,9 +1179,13 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
 			decoder->continuous_period = false;
 			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
 			decoder->state.from_ip = decoder->ip;
-			decoder->state.to_ip = 0;
-			if (decoder->packet.count != 0)
+			if (decoder->packet.count == 0) {
+				decoder->state.to_ip = 0;
+			} else {
+				decoder->state.to_ip = decoder->last_ip;
 				decoder->ip = decoder->last_ip;
+			}
+			decoder->state.type |= INTEL_PT_TRACE_END;
 		} else {
 			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
 			decoder->state.from_ip = decoder->ip;
@@ -1195,7 +1212,8 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
 			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
 			decoder->ip = to_ip;
 			decoder->state.from_ip = decoder->ip;
-			decoder->state.to_ip = 0;
+			decoder->state.to_ip = to_ip;
+			decoder->state.type |= INTEL_PT_TRACE_END;
 			return 0;
 		}
 		intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch",
@@ -1376,7 +1394,6 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
 {
 	intel_pt_log("ERROR: Buffer overflow\n");
 	intel_pt_clear_tx_flags(decoder);
-	decoder->have_tma = false;
 	decoder->cbr = 0;
 	decoder->timestamp_insn_cnt = 0;
 	decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
@@ -1457,6 +1474,8 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
 		decoder->have_calc_cyc_to_tsc = false;
 		intel_pt_calc_cyc_to_tsc(decoder, true);
 	}
+
+	intel_pt_log_to("Setting timestamp", decoder->timestamp);
 }
 
 static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
@@ -1497,6 +1516,8 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
 		decoder->timestamp = timestamp;
 
 	decoder->timestamp_insn_cnt = 0;
+
+	intel_pt_log_to("Setting timestamp", decoder->timestamp);
 }
 
 /* Walk PSB+ packets when already in sync. */
@@ -1604,7 +1625,6 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
 		case INTEL_PT_PSB:
 		case INTEL_PT_TSC:
 		case INTEL_PT_TMA:
-		case INTEL_PT_CBR:
 		case INTEL_PT_MODE_TSX:
 		case INTEL_PT_BAD:
 		case INTEL_PT_PSBEND:
@@ -1620,19 +1640,24 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
 			decoder->pkt_step = 0;
 			return -ENOENT;
 
+		case INTEL_PT_CBR:
+			intel_pt_calc_cbr(decoder);
+			break;
+
 		case INTEL_PT_OVF:
 			return intel_pt_overflow(decoder);
 
 		case INTEL_PT_TIP_PGD:
 			decoder->state.from_ip = decoder->ip;
-			decoder->state.to_ip = 0;
-			if (decoder->packet.count != 0) {
+			if (decoder->packet.count == 0) {
+				decoder->state.to_ip = 0;
+			} else {
 				intel_pt_set_ip(decoder);
-				intel_pt_log("Omitting PGD ip " x64_fmt "\n",
-					     decoder->ip);
+				decoder->state.to_ip = decoder->ip;
 			}
 			decoder->pge = false;
 			decoder->continuous_period = false;
+			decoder->state.type |= INTEL_PT_TRACE_END;
 			return 0;
 
 		case INTEL_PT_TIP_PGE:
@@ -1646,6 +1671,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
 				intel_pt_set_ip(decoder);
 				decoder->state.to_ip = decoder->ip;
 			}
+			decoder->state.type |= INTEL_PT_TRACE_BEGIN;
 			return 0;
 
 		case INTEL_PT_TIP:
@@ -1724,6 +1750,7 @@ next:
 			intel_pt_set_ip(decoder);
 			decoder->state.from_ip = 0;
 			decoder->state.to_ip = decoder->ip;
+			decoder->state.type |= INTEL_PT_TRACE_BEGIN;
 			return 0;
 		}
 
@@ -2062,9 +2089,13 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
 			decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD;
 			if (intel_pt_have_ip(decoder))
 				intel_pt_set_ip(decoder);
-			if (decoder->ip)
-				return 0;
-			break;
+			if (!decoder->ip)
+				break;
+			if (decoder->packet.type == INTEL_PT_TIP_PGE)
+				decoder->state.type |= INTEL_PT_TRACE_BEGIN;
+			if (decoder->packet.type == INTEL_PT_TIP_PGD)
+				decoder->state.type |= INTEL_PT_TRACE_END;
+			return 0;
 
 		case INTEL_PT_FUP:
 			if (intel_pt_have_ip(decoder))
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index fc1752d50019..ed088d4726ba 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -37,6 +37,8 @@ enum intel_pt_sample_type {
 	INTEL_PT_EX_STOP	= 1 << 6,
 	INTEL_PT_PWR_EXIT	= 1 << 7,
 	INTEL_PT_CBR_CHG	= 1 << 8,
+	INTEL_PT_TRACE_BEGIN	= 1 << 9,
+	INTEL_PT_TRACE_END	= 1 << 10,
 };
 
 enum intel_pt_period_type {
@@ -60,6 +62,14 @@ enum {
 	INTEL_PT_ERR_MAX,
 };
 
+enum intel_pt_param_flags {
+	/*
+	 * FUP packet can contain next linear instruction pointer instead of
+	 * current linear instruction pointer.
+	 */
+	INTEL_PT_FUP_WITH_NLIP	= 1 << 0,
+};
+
 struct intel_pt_state {
 	enum intel_pt_sample_type type;
 	int err;
@@ -106,6 +116,7 @@ struct intel_pt_params {
 	unsigned int mtc_period;
 	uint32_t tsc_ctc_ratio_n;
 	uint32_t tsc_ctc_ratio_d;
+	enum intel_pt_param_flags flags;
 };
 
 struct intel_pt_decoder;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
index e02bc7b166a0..5e64da270f97 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
@@ -31,6 +31,11 @@ static FILE *f;
 static char log_name[MAX_LOG_NAME];
 bool intel_pt_enable_logging;
 
+void *intel_pt_log_fp(void)
+{
+	return f;
+}
+
 void intel_pt_log_enable(void)
 {
 	intel_pt_enable_logging = true;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
index 45b64f93f358..cc084937f701 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
@@ -22,6 +22,7 @@
 
 struct intel_pt_pkt;
 
+void *intel_pt_log_fp(void);
 void intel_pt_log_enable(void);
 void intel_pt_log_disable(void);
 void intel_pt_log_set_name(const char *name);
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index ba4c9dd18643..d426761a549d 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -366,7 +366,7 @@ static int intel_pt_get_cyc(unsigned int byte, const unsigned char *buf,
 		if (len < offs)
 			return INTEL_PT_NEED_MORE_BYTES;
 		byte = buf[offs++];
-		payload |= (byte >> 1) << shift;
+		payload |= ((uint64_t)byte >> 1) << shift;
 	}
 
 	packet->type = INTEL_PT_CYC;
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 0effaff57020..149ff361ca78 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -206,6 +206,16 @@ static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
 	intel_pt_dump(pt, buf, len);
 }
 
+static void intel_pt_log_event(union perf_event *event)
+{
+	FILE *f = intel_pt_log_fp();
+
+	if (!intel_pt_enable_logging || !f)
+		return;
+
+	perf_event__fprintf(event, f);
+}
+
 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
 				   struct auxtrace_buffer *b)
 {
@@ -407,6 +417,13 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
 	return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
 }
 
+static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip)
+{
+	return ip >= pt->kernel_start ?
+	       PERF_RECORD_MISC_KERNEL :
+	       PERF_RECORD_MISC_USER;
+}
+
 static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
 				   uint64_t *insn_cnt_ptr, uint64_t *ip,
 				   uint64_t to_ip, uint64_t max_insn_cnt,
@@ -429,10 +446,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
 	if (to_ip && *ip == to_ip)
 		goto out_no_cache;
 
-	if (*ip >= ptq->pt->kernel_start)
-		cpumode = PERF_RECORD_MISC_KERNEL;
-	else
-		cpumode = PERF_RECORD_MISC_USER;
+	cpumode = intel_pt_cpumode(ptq->pt, *ip);
 
 	thread = ptq->thread;
 	if (!thread) {
@@ -442,8 +456,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
 	}
 
 	while (1) {
-		thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al);
-		if (!al.map || !al.map->dso)
+		if (!thread__find_map(thread, cpumode, *ip, &al) || !al.map->dso)
 			return -EINVAL;
 
 		if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
@@ -596,8 +609,7 @@ static int __intel_pt_pgd_ip(uint64_t ip, void *data)
 	if (!thread)
 		return -EINVAL;
 
-	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
-	if (!al.map || !al.map->dso)
+	if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso)
 		return -EINVAL;
 
 	offset = al.map->map_ip(al.map, ip);
@@ -751,6 +763,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 						   unsigned int queue_nr)
 {
 	struct intel_pt_params params = { .get_trace = 0, };
+	struct perf_env *env = pt->machine->env;
 	struct intel_pt_queue *ptq;
 
 	ptq = zalloc(sizeof(struct intel_pt_queue));
@@ -760,7 +773,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 	if (pt->synth_opts.callchain) {
 		size_t sz = sizeof(struct ip_callchain);
 
-		sz += pt->synth_opts.callchain_sz * sizeof(u64);
+		/* Add 1 to callchain_sz for callchain context */
+		sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
 		ptq->chain = zalloc(sz);
 		if (!ptq->chain)
 			goto out_free;
@@ -832,6 +846,9 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 		}
 	}
 
+	if (env->cpuid && !strncmp(env->cpuid, "GenuineIntel,6,92,", 18))
+		params.flags |= INTEL_PT_FUP_WITH_NLIP;
+
 	ptq->decoder = intel_pt_decoder_new(&params);
 	if (!ptq->decoder)
 		goto out_free;
@@ -906,6 +923,11 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
 		ptq->insn_len = ptq->state->insn_len;
 		memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ);
 	}
+
+	if (ptq->state->type & INTEL_PT_TRACE_BEGIN)
+		ptq->flags |= PERF_IP_FLAG_TRACE_BEGIN;
+	if (ptq->state->type & INTEL_PT_TRACE_END)
+		ptq->flags |= PERF_IP_FLAG_TRACE_END;
 }
 
 static int intel_pt_setup_queue(struct intel_pt *pt,
@@ -1051,15 +1073,11 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt,
 				   union perf_event *event,
 				   struct perf_sample *sample)
 {
-	event->sample.header.type = PERF_RECORD_SAMPLE;
-	event->sample.header.misc = PERF_RECORD_MISC_USER;
-	event->sample.header.size = sizeof(struct perf_event_header);
-
 	if (!pt->timeless_decoding)
 		sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
 
-	sample->cpumode = PERF_RECORD_MISC_USER;
 	sample->ip = ptq->state->from_ip;
+	sample->cpumode = intel_pt_cpumode(pt, sample->ip);
 	sample->pid = ptq->pid;
 	sample->tid = ptq->tid;
 	sample->addr = ptq->state->to_ip;
@@ -1068,6 +1086,10 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt,
 	sample->flags = ptq->flags;
 	sample->insn_len = ptq->insn_len;
 	memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
+
+	event->sample.header.type = PERF_RECORD_SAMPLE;
+	event->sample.header.misc = sample->cpumode;
+	event->sample.header.size = sizeof(struct perf_event_header);
 }
 
 static int intel_pt_inject_event(union perf_event *event,
@@ -1153,7 +1175,8 @@ static void intel_pt_prep_sample(struct intel_pt *pt,
 
 	if (pt->synth_opts.callchain) {
 		thread_stack__sample(ptq->thread, ptq->chain,
-				     pt->synth_opts.callchain_sz, sample->ip);
+				     pt->synth_opts.callchain_sz + 1,
+				     sample->ip, pt->kernel_start);
 		sample->callchain = ptq->chain;
 	}
 
@@ -1523,6 +1546,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
 
 	if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
 		switch (ptq->switch_state) {
+		case INTEL_PT_SS_NOT_TRACING:
 		case INTEL_PT_SS_UNKNOWN:
 		case INTEL_PT_SS_EXPECTING_SWITCH_IP:
 			err = intel_pt_next_tid(pt, ptq);
@@ -1565,7 +1589,7 @@ static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
 	if (map__load(map))
 		return 0;
 
-	start = dso__first_symbol(map->dso, MAP__FUNCTION);
+	start = dso__first_symbol(map->dso);
 
 	for (sym = start; sym; sym = dso__next_symbol(sym)) {
 		if (sym->binding == STB_GLOBAL &&
@@ -1996,9 +2020,9 @@ static int intel_pt_process_event(struct perf_session *session,
 		 event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
 		err = intel_pt_context_switch(pt, event, sample);
 
-	intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
-		     perf_event__name(event->header.type), event->header.type,
-		     sample->cpu, sample->time, timestamp);
+	intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ",
+		     event->header.type, sample->cpu, sample->time, timestamp);
+	intel_pt_log_event(event);
 
 	return err;
 }
@@ -2551,7 +2575,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 	if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
 		pt->synth_opts = *session->itrace_synth_opts;
 	} else {
-		itrace_synth_opts__set_default(&pt->synth_opts);
+		itrace_synth_opts__set_default(&pt->synth_opts,
+				session->itrace_synth_opts->default_no_sample);
 		if (use_browser != -1) {
 			pt->synth_opts.branches = false;
 			pt->synth_opts.callchain = true;
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 1cca0a2fa641..5b0b60f00275 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -14,19 +14,22 @@
 #include "config.h"
 #include "util.h"
 #include <sys/wait.h>
+#include <subcmd/exec-cmd.h>
 
 #define CLANG_BPF_CMD_DEFAULT_TEMPLATE				\
 		"$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\
 		"-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE "	\
-		"$CLANG_OPTIONS $KERNEL_INC_OPTIONS "		\
+		"$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \
 		"-Wno-unused-value -Wno-pointer-sign "		\
 		"-working-directory $WORKING_DIR "		\
-		"-c \"$CLANG_SOURCE\" -target bpf -O2 -o -"
+		"-c \"$CLANG_SOURCE\" -target bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE"
 
 struct llvm_param llvm_param = {
 	.clang_path = "clang",
+	.llc_path = "llc",
 	.clang_bpf_cmd_template = CLANG_BPF_CMD_DEFAULT_TEMPLATE,
 	.clang_opt = NULL,
+	.opts = NULL,
 	.kbuild_dir = NULL,
 	.kbuild_opts = NULL,
 	.user_set_param = false,
@@ -50,6 +53,8 @@ int perf_llvm_config(const char *var, const char *value)
 		llvm_param.kbuild_opts = strdup(value);
 	else if (!strcmp(var, "dump-obj"))
 		llvm_param.dump_obj = !!perf_config_bool(var, value);
+	else if (!strcmp(var, "opts"))
+		llvm_param.opts = strdup(value);
 	else {
 		pr_debug("Invalid LLVM config option: %s\n", value);
 		return -1;
@@ -212,7 +217,7 @@ version_notice(void)
 "     \t\thttp://llvm.org/apt\n\n"
 "     \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n"
 "     \toption in [llvm] section of ~/.perfconfig to:\n\n"
-"     \t  \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS \\\n"
+"     \t  \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS \\\n"
 "     \t     -working-directory $WORKING_DIR -c $CLANG_SOURCE \\\n"
 "     \t     -emit-llvm -o - | /path/to/llc -march=bpf -filetype=obj -o -\"\n"
 "     \t(Replace /path/to/llc with path to your llc)\n\n"
@@ -265,16 +270,16 @@ static const char *kinc_fetch_script =
 "#!/usr/bin/env sh\n"
 "if ! test -d \"$KBUILD_DIR\"\n"
 "then\n"
-"	exit -1\n"
+"	exit 1\n"
 "fi\n"
 "if ! test -f \"$KBUILD_DIR/include/generated/autoconf.h\"\n"
 "then\n"
-"	exit -1\n"
+"	exit 1\n"
 "fi\n"
 "TMPDIR=`mktemp -d`\n"
 "if test -z \"$TMPDIR\"\n"
 "then\n"
-"    exit -1\n"
+"    exit 1\n"
 "fi\n"
 "cat << EOF > $TMPDIR/Makefile\n"
 "obj-y := dummy.o\n"
@@ -429,11 +434,15 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 	unsigned int kernel_version;
 	char linux_version_code_str[64];
 	const char *clang_opt = llvm_param.clang_opt;
-	char clang_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64];
+	char clang_path[PATH_MAX], llc_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64];
 	char serr[STRERR_BUFSIZE];
-	char *kbuild_dir = NULL, *kbuild_include_opts = NULL;
+	char *kbuild_dir = NULL, *kbuild_include_opts = NULL,
+	     *perf_bpf_include_opts = NULL;
 	const char *template = llvm_param.clang_bpf_cmd_template;
-	char *command_echo, *command_out;
+	char *pipe_template = NULL;
+	const char *opts = llvm_param.opts;
+	char *command_echo = NULL, *command_out;
+	char *perf_include_dir = system_path(PERF_INCLUDE_DIR);
 
 	if (path[0] != '-' && realpath(path, abspath) == NULL) {
 		err = errno;
@@ -471,14 +480,36 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 
 	snprintf(linux_version_code_str, sizeof(linux_version_code_str),
 		 "0x%x", kernel_version);
-
+	if (asprintf(&perf_bpf_include_opts, "-I%s/bpf", perf_include_dir) < 0)
+		goto errout;
 	force_set_env("NR_CPUS", nr_cpus_avail_str);
 	force_set_env("LINUX_VERSION_CODE", linux_version_code_str);
 	force_set_env("CLANG_EXEC", clang_path);
 	force_set_env("CLANG_OPTIONS", clang_opt);
 	force_set_env("KERNEL_INC_OPTIONS", kbuild_include_opts);
+	force_set_env("PERF_BPF_INC_OPTIONS", perf_bpf_include_opts);
 	force_set_env("WORKING_DIR", kbuild_dir ? : ".");
 
+	if (opts) {
+		err = search_program(llvm_param.llc_path, "llc", llc_path);
+		if (err) {
+			pr_err("ERROR:\tunable to find llc.\n"
+			       "Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n"
+			       "     \tand 'llc-path' option in [llvm] section of ~/.perfconfig.\n");
+			version_notice();
+			goto errout;
+		}
+
+		if (asprintf(&pipe_template, "%s -emit-llvm | %s -march=bpf %s -filetype=obj -o -",
+			      template, llc_path, opts) < 0) {
+			pr_err("ERROR:\tnot enough memory to setup command line\n");
+			goto errout;
+		}
+
+		template = pipe_template;
+
+	}
+
 	/*
 	 * Since we may reset clang's working dir, path of source file
 	 * should be transferred into absolute path, except we want
@@ -512,6 +543,8 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 	free(command_out);
 	free(kbuild_dir);
 	free(kbuild_include_opts);
+	free(perf_bpf_include_opts);
+	free(perf_include_dir);
 
 	if (!p_obj_buf)
 		free(obj_buf);
@@ -526,6 +559,9 @@ errout:
 	free(kbuild_dir);
 	free(kbuild_include_opts);
 	free(obj_buf);
+	free(perf_bpf_include_opts);
+	free(perf_include_dir);
+	free(pipe_template);
 	if (p_obj_buf)
 		*p_obj_buf = NULL;
 	if (p_obj_buf_sz)
diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h
index d3ad8deb5db4..bf3f3f4c4fe2 100644
--- a/tools/perf/util/llvm-utils.h
+++ b/tools/perf/util/llvm-utils.h
@@ -11,6 +11,8 @@
 struct llvm_param {
 	/* Path of clang executable */
 	const char *clang_path;
+	/* Path of llc executable */
+	const char *llc_path;
 	/*
 	 * Template of clang bpf compiling. 5 env variables
 	 * can be used:
@@ -23,6 +25,13 @@ struct llvm_param {
 	const char *clang_bpf_cmd_template;
 	/* Will be filled in $CLANG_OPTIONS */
 	const char *clang_opt;
+	/*
+	 * If present it'll add -emit-llvm to $CLANG_OPTIONS to pipe
+	 * the clang output to llc, useful for new llvm options not
+	 * yet selectable via 'clang -mllvm option', such as -mattr=dwarfris
+	 * in clang 6.0/llvm 7
+	 */
+	const char *opts;
 	/* Where to find kbuild system */
 	const char *kbuild_dir;
 	/*
diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c
index 07498eaddc08..b1dd29a9d915 100644
--- a/tools/perf/util/lzma.c
+++ b/tools/perf/util/lzma.c
@@ -3,9 +3,13 @@
 #include <lzma.h>
 #include <stdio.h>
 #include <linux/compiler.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
 #include "compress.h"
 #include "util.h"
 #include "debug.h"
+#include <unistd.h>
 
 #define BUFSIZE 8192
 
@@ -99,3 +103,19 @@ err_fclose:
 	fclose(infile);
 	return err;
 }
+
+bool lzma_is_compressed(const char *input)
+{
+	int fd = open(input, O_RDONLY);
+	const uint8_t magic[6] = { 0xFD, '7', 'z', 'X', 'Z', 0x00 };
+	char buf[6] = { 0 };
+	ssize_t rc;
+
+	if (fd < 0)
+		return -1;
+
+	rc = read(fd, buf, sizeof(buf));
+	close(fd);
+	return rc == sizeof(buf) ?
+	       memcmp(buf, magic, sizeof(buf)) == 0 : false;
+}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 32d50492505d..8f36ce813bc5 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -24,6 +24,7 @@
 
 #include "sane_ctype.h"
 #include <symbol/kallsyms.h>
+#include <linux/mman.h>
 
 static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock);
 
@@ -81,8 +82,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 	machine->kptr_restrict_warned = false;
 	machine->comm_exec = false;
 	machine->kernel_start = 0;
-
-	memset(machine->vmlinux_maps, 0, sizeof(machine->vmlinux_maps));
+	machine->vmlinux_map = NULL;
 
 	machine->root_dir = strdup(root_dir);
 	if (machine->root_dir == NULL)
@@ -137,13 +137,11 @@ struct machine *machine__new_kallsyms(void)
 	struct machine *machine = machine__new_host();
 	/*
 	 * FIXME:
-	 * 1) MAP__FUNCTION will go away when we stop loading separate maps for
-	 *    functions and data objects.
-	 * 2) We should switch to machine__load_kallsyms(), i.e. not explicitely
+	 * 1) We should switch to machine__load_kallsyms(), i.e. not explicitely
 	 *    ask for not using the kcore parsing code, once this one is fixed
 	 *    to create a map per module.
 	 */
-	if (machine && machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION) <= 0) {
+	if (machine && machine__load_kallsyms(machine, "/proc/kallsyms") <= 0) {
 		machine__delete(machine);
 		machine = NULL;
 	}
@@ -410,23 +408,16 @@ out_err:
 }
 
 /*
- * Caller must eventually drop thread->refcnt returned with a successful
- * lookup/new thread inserted.
+ * Front-end cache - TID lookups come in blocks,
+ * so most of the time we dont have to look up
+ * the full rbtree:
  */
-static struct thread *____machine__findnew_thread(struct machine *machine,
-						  struct threads *threads,
-						  pid_t pid, pid_t tid,
-						  bool create)
+static struct thread*
+__threads__get_last_match(struct threads *threads, struct machine *machine,
+			  int pid, int tid)
 {
-	struct rb_node **p = &threads->entries.rb_node;
-	struct rb_node *parent = NULL;
 	struct thread *th;
 
-	/*
-	 * Front-end cache - TID lookups come in blocks,
-	 * so most of the time we dont have to look up
-	 * the full rbtree:
-	 */
 	th = threads->last_match;
 	if (th != NULL) {
 		if (th->tid == tid) {
@@ -437,12 +428,57 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
 		threads->last_match = NULL;
 	}
 
+	return NULL;
+}
+
+static struct thread*
+threads__get_last_match(struct threads *threads, struct machine *machine,
+			int pid, int tid)
+{
+	struct thread *th = NULL;
+
+	if (perf_singlethreaded)
+		th = __threads__get_last_match(threads, machine, pid, tid);
+
+	return th;
+}
+
+static void
+__threads__set_last_match(struct threads *threads, struct thread *th)
+{
+	threads->last_match = th;
+}
+
+static void
+threads__set_last_match(struct threads *threads, struct thread *th)
+{
+	if (perf_singlethreaded)
+		__threads__set_last_match(threads, th);
+}
+
+/*
+ * Caller must eventually drop thread->refcnt returned with a successful
+ * lookup/new thread inserted.
+ */
+static struct thread *____machine__findnew_thread(struct machine *machine,
+						  struct threads *threads,
+						  pid_t pid, pid_t tid,
+						  bool create)
+{
+	struct rb_node **p = &threads->entries.rb_node;
+	struct rb_node *parent = NULL;
+	struct thread *th;
+
+	th = threads__get_last_match(threads, machine, pid, tid);
+	if (th)
+		return th;
+
 	while (*p != NULL) {
 		parent = *p;
 		th = rb_entry(parent, struct thread, rb_node);
 
 		if (th->tid == tid) {
-			threads->last_match = th;
+			threads__set_last_match(threads, th);
 			machine__update_thread_pid(machine, th, pid);
 			return thread__get(th);
 		}
@@ -479,7 +515,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
 		 * It is now in the rbtree, get a ref
 		 */
 		thread__get(th);
-		threads->last_match = th;
+		threads__set_last_match(threads, th);
 		++threads->nr;
 	}
 
@@ -673,8 +709,7 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
 	if (kmod_path__parse_name(&m, filename))
 		return NULL;
 
-	map = map_groups__find_by_name(&machine->kmaps, MAP__FUNCTION,
-				       m.name);
+	map = map_groups__find_by_name(&machine->kmaps, m.name);
 	if (map) {
 		/*
 		 * If the map's dso is an offline module, give dso__load()
@@ -689,7 +724,7 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
 	if (dso == NULL)
 		goto out;
 
-	map = map__new2(start, dso, MAP__FUNCTION);
+	map = map__new2(start, dso);
 	if (map == NULL)
 		goto out;
 
@@ -810,8 +845,8 @@ struct process_args {
 	u64 start;
 };
 
-static void machine__get_kallsyms_filename(struct machine *machine, char *buf,
-					   size_t bufsz)
+void machine__get_kallsyms_filename(struct machine *machine, char *buf,
+				    size_t bufsz)
 {
 	if (machine__is_default_guest(machine))
 		scnprintf(buf, bufsz, "%s", symbol_conf.default_guest_kallsyms);
@@ -854,65 +889,171 @@ static int machine__get_running_kernel_start(struct machine *machine,
 	return 0;
 }
 
+int machine__create_extra_kernel_map(struct machine *machine,
+				     struct dso *kernel,
+				     struct extra_kernel_map *xm)
+{
+	struct kmap *kmap;
+	struct map *map;
+
+	map = map__new2(xm->start, kernel);
+	if (!map)
+		return -1;
+
+	map->end   = xm->end;
+	map->pgoff = xm->pgoff;
+
+	kmap = map__kmap(map);
+
+	kmap->kmaps = &machine->kmaps;
+	strlcpy(kmap->name, xm->name, KMAP_NAME_LEN);
+
+	map_groups__insert(&machine->kmaps, map);
+
+	pr_debug2("Added extra kernel map %s %" PRIx64 "-%" PRIx64 "\n",
+		  kmap->name, map->start, map->end);
+
+	map__put(map);
+
+	return 0;
+}
+
+static u64 find_entry_trampoline(struct dso *dso)
+{
+	/* Duplicates are removed so lookup all aliases */
+	const char *syms[] = {
+		"_entry_trampoline",
+		"__entry_trampoline_start",
+		"entry_SYSCALL_64_trampoline",
+	};
+	struct symbol *sym = dso__first_symbol(dso);
+	unsigned int i;
+
+	for (; sym; sym = dso__next_symbol(sym)) {
+		if (sym->binding != STB_GLOBAL)
+			continue;
+		for (i = 0; i < ARRAY_SIZE(syms); i++) {
+			if (!strcmp(sym->name, syms[i]))
+				return sym->start;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * These values can be used for kernels that do not have symbols for the entry
+ * trampolines in kallsyms.
+ */
+#define X86_64_CPU_ENTRY_AREA_PER_CPU	0xfffffe0000000000ULL
+#define X86_64_CPU_ENTRY_AREA_SIZE	0x2c000
+#define X86_64_ENTRY_TRAMPOLINE		0x6000
+
+/* Map x86_64 PTI entry trampolines */
+int machine__map_x86_64_entry_trampolines(struct machine *machine,
+					  struct dso *kernel)
+{
+	struct map_groups *kmaps = &machine->kmaps;
+	struct maps *maps = &kmaps->maps;
+	int nr_cpus_avail, cpu;
+	bool found = false;
+	struct map *map;
+	u64 pgoff;
+
+	/*
+	 * In the vmlinux case, pgoff is a virtual address which must now be
+	 * mapped to a vmlinux offset.
+	 */
+	for (map = maps__first(maps); map; map = map__next(map)) {
+		struct kmap *kmap = __map__kmap(map);
+		struct map *dest_map;
+
+		if (!kmap || !is_entry_trampoline(kmap->name))
+			continue;
+
+		dest_map = map_groups__find(kmaps, map->pgoff);
+		if (dest_map != map)
+			map->pgoff = dest_map->map_ip(dest_map, map->pgoff);
+		found = true;
+	}
+	if (found || machine->trampolines_mapped)
+		return 0;
+
+	pgoff = find_entry_trampoline(kernel);
+	if (!pgoff)
+		return 0;
+
+	nr_cpus_avail = machine__nr_cpus_avail(machine);
+
+	/* Add a 1 page map for each CPU's entry trampoline */
+	for (cpu = 0; cpu < nr_cpus_avail; cpu++) {
+		u64 va = X86_64_CPU_ENTRY_AREA_PER_CPU +
+			 cpu * X86_64_CPU_ENTRY_AREA_SIZE +
+			 X86_64_ENTRY_TRAMPOLINE;
+		struct extra_kernel_map xm = {
+			.start = va,
+			.end   = va + page_size,
+			.pgoff = pgoff,
+		};
+
+		strlcpy(xm.name, ENTRY_TRAMPOLINE_NAME, KMAP_NAME_LEN);
+
+		if (machine__create_extra_kernel_map(machine, kernel, &xm) < 0)
+			return -1;
+	}
+
+	machine->trampolines_mapped = nr_cpus_avail;
+
+	return 0;
+}
+
+int __weak machine__create_extra_kernel_maps(struct machine *machine __maybe_unused,
+					     struct dso *kernel __maybe_unused)
+{
+	return 0;
+}
+
 static int
 __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
 {
-	int type;
+	struct kmap *kmap;
+	struct map *map;
 
 	/* In case of renewal the kernel map, destroy previous one */
 	machine__destroy_kernel_maps(machine);
 
-	for (type = 0; type < MAP__NR_TYPES; ++type) {
-		struct kmap *kmap;
-		struct map *map;
-
-		machine->vmlinux_maps[type] = map__new2(0, kernel, type);
-		if (machine->vmlinux_maps[type] == NULL)
-			return -1;
+	machine->vmlinux_map = map__new2(0, kernel);
+	if (machine->vmlinux_map == NULL)
+		return -1;
 
-		machine->vmlinux_maps[type]->map_ip =
-			machine->vmlinux_maps[type]->unmap_ip =
-				identity__map_ip;
-		map = __machine__kernel_map(machine, type);
-		kmap = map__kmap(map);
-		if (!kmap)
-			return -1;
+	machine->vmlinux_map->map_ip = machine->vmlinux_map->unmap_ip = identity__map_ip;
+	map = machine__kernel_map(machine);
+	kmap = map__kmap(map);
+	if (!kmap)
+		return -1;
 
-		kmap->kmaps = &machine->kmaps;
-		map_groups__insert(&machine->kmaps, map);
-	}
+	kmap->kmaps = &machine->kmaps;
+	map_groups__insert(&machine->kmaps, map);
 
 	return 0;
 }
 
 void machine__destroy_kernel_maps(struct machine *machine)
 {
-	int type;
-
-	for (type = 0; type < MAP__NR_TYPES; ++type) {
-		struct kmap *kmap;
-		struct map *map = __machine__kernel_map(machine, type);
-
-		if (map == NULL)
-			continue;
+	struct kmap *kmap;
+	struct map *map = machine__kernel_map(machine);
 
-		kmap = map__kmap(map);
-		map_groups__remove(&machine->kmaps, map);
-		if (kmap && kmap->ref_reloc_sym) {
-			/*
-			 * ref_reloc_sym is shared among all maps, so free just
-			 * on one of them.
-			 */
-			if (type == MAP__FUNCTION) {
-				zfree((char **)&kmap->ref_reloc_sym->name);
-				zfree(&kmap->ref_reloc_sym);
-			} else
-				kmap->ref_reloc_sym = NULL;
-		}
+	if (map == NULL)
+		return;
 
-		map__put(machine->vmlinux_maps[type]);
-		machine->vmlinux_maps[type] = NULL;
+	kmap = map__kmap(map);
+	map_groups__remove(&machine->kmaps, map);
+	if (kmap && kmap->ref_reloc_sym) {
+		zfree((char **)&kmap->ref_reloc_sym->name);
+		zfree(&kmap->ref_reloc_sym);
 	}
+
+	map__zput(machine->vmlinux_map);
 }
 
 int machines__create_guest_kernel_maps(struct machines *machines)
@@ -989,32 +1130,31 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid)
 	return machine__create_kernel_maps(machine);
 }
 
-int machine__load_kallsyms(struct machine *machine, const char *filename,
-			     enum map_type type)
+int machine__load_kallsyms(struct machine *machine, const char *filename)
 {
 	struct map *map = machine__kernel_map(machine);
 	int ret = __dso__load_kallsyms(map->dso, filename, map, true);
 
 	if (ret > 0) {
-		dso__set_loaded(map->dso, type);
+		dso__set_loaded(map->dso);
 		/*
 		 * Since /proc/kallsyms will have multiple sessions for the
 		 * kernel, with modules between them, fixup the end of all
 		 * sections.
 		 */
-		__map_groups__fixup_end(&machine->kmaps, type);
+		map_groups__fixup_end(&machine->kmaps);
 	}
 
 	return ret;
 }
 
-int machine__load_vmlinux_path(struct machine *machine, enum map_type type)
+int machine__load_vmlinux_path(struct machine *machine)
 {
 	struct map *map = machine__kernel_map(machine);
 	int ret = dso__load_vmlinux_path(map->dso, map);
 
 	if (ret > 0)
-		dso__set_loaded(map->dso, type);
+		dso__set_loaded(map->dso);
 
 	return ret;
 }
@@ -1055,10 +1195,9 @@ static bool is_kmod_dso(struct dso *dso)
 static int map_groups__set_module_path(struct map_groups *mg, const char *path,
 				       struct kmod_path *m)
 {
-	struct map *map;
 	char *long_name;
+	struct map *map = map_groups__find_by_name(mg, m->name);
 
-	map = map_groups__find_by_name(mg, MAP__FUNCTION, m->name);
 	if (map == NULL)
 		return 0;
 
@@ -1073,8 +1212,10 @@ static int map_groups__set_module_path(struct map_groups *mg, const char *path,
 	 * Full name could reveal us kmod compression, so
 	 * we need to update the symtab_type if needed.
 	 */
-	if (m->comp && is_kmod_dso(map->dso))
+	if (m->comp && is_kmod_dso(map->dso)) {
 		map->dso->symtab_type++;
+		map->dso->comp = m->comp;
+	}
 
 	return 0;
 }
@@ -1207,19 +1348,14 @@ static int machine__create_modules(struct machine *machine)
 static void machine__set_kernel_mmap(struct machine *machine,
 				     u64 start, u64 end)
 {
-	int i;
-
-	for (i = 0; i < MAP__NR_TYPES; i++) {
-		machine->vmlinux_maps[i]->start = start;
-		machine->vmlinux_maps[i]->end   = end;
-
-		/*
-		 * Be a bit paranoid here, some perf.data file came with
-		 * a zero sized synthesized MMAP event for the kernel.
-		 */
-		if (start == 0 && end == 0)
-			machine->vmlinux_maps[i]->end = ~0ULL;
-	}
+	machine->vmlinux_map->start = start;
+	machine->vmlinux_map->end   = end;
+	/*
+	 * Be a bit paranoid here, some perf.data file came with
+	 * a zero sized synthesized MMAP event for the kernel.
+	 */
+	if (start == 0 && end == 0)
+		machine->vmlinux_map->end = ~0ULL;
 }
 
 int machine__create_kernel_maps(struct machine *machine)
@@ -1234,9 +1370,8 @@ int machine__create_kernel_maps(struct machine *machine)
 		return -1;
 
 	ret = __machine__create_kernel_maps(machine, kernel);
-	dso__put(kernel);
 	if (ret < 0)
-		return -1;
+		goto out_put;
 
 	if (symbol_conf.use_modules && machine__create_modules(machine) < 0) {
 		if (machine__is_host(machine))
@@ -1249,9 +1384,10 @@ int machine__create_kernel_maps(struct machine *machine)
 
 	if (!machine__get_running_kernel_start(machine, &name, &addr)) {
 		if (name &&
-		    maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) {
+		    map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, name, addr)) {
 			machine__destroy_kernel_maps(machine);
-			return -1;
+			ret = -1;
+			goto out_put;
 		}
 
 		/* we have a real start address now, so re-order the kmaps */
@@ -1267,12 +1403,16 @@ int machine__create_kernel_maps(struct machine *machine)
 		map__put(map);
 	}
 
+	if (machine__create_extra_kernel_maps(machine, kernel))
+		pr_debug("Problems creating extra kernel maps, continuing anyway...\n");
+
 	/* update end address of the kernel map using adjacent module address */
 	map = map__next(machine__kernel_map(machine));
 	if (map)
 		machine__set_kernel_mmap(machine, addr, map->start);
-
-	return 0;
+out_put:
+	dso__put(kernel);
+	return ret;
 }
 
 static bool machine__uses_kcore(struct machine *machine)
@@ -1287,6 +1427,32 @@ static bool machine__uses_kcore(struct machine *machine)
 	return false;
 }
 
+static bool perf_event__is_extra_kernel_mmap(struct machine *machine,
+					     union perf_event *event)
+{
+	return machine__is(machine, "x86_64") &&
+	       is_entry_trampoline(event->mmap.filename);
+}
+
+static int machine__process_extra_kernel_map(struct machine *machine,
+					     union perf_event *event)
+{
+	struct map *kernel_map = machine__kernel_map(machine);
+	struct dso *kernel = kernel_map ? kernel_map->dso : NULL;
+	struct extra_kernel_map xm = {
+		.start = event->mmap.start,
+		.end   = event->mmap.start + event->mmap.len,
+		.pgoff = event->mmap.pgoff,
+	};
+
+	if (kernel == NULL)
+		return -1;
+
+	strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN);
+
+	return machine__create_extra_kernel_map(machine, kernel, &xm);
+}
+
 static int machine__process_kernel_mmap_event(struct machine *machine,
 					      union perf_event *event)
 {
@@ -1379,9 +1545,9 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 		 * time /proc/sys/kernel/kptr_restrict was non zero.
 		 */
 		if (event->mmap.pgoff != 0) {
-			maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps,
-							 symbol_name,
-							 event->mmap.pgoff);
+			map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map,
+							symbol_name,
+							event->mmap.pgoff);
 		}
 
 		if (machine__is_default_guest(machine)) {
@@ -1390,6 +1556,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 			 */
 			dso__load(kernel, machine__kernel_map(machine));
 		}
+	} else if (perf_event__is_extra_kernel_mmap(machine, event)) {
+		return machine__process_extra_kernel_map(machine, event);
 	}
 	return 0;
 out_problem:
@@ -1402,7 +1570,6 @@ int machine__process_mmap2_event(struct machine *machine,
 {
 	struct thread *thread;
 	struct map *map;
-	enum map_type type;
 	int ret = 0;
 
 	if (dump_trace)
@@ -1421,11 +1588,6 @@ int machine__process_mmap2_event(struct machine *machine,
 	if (thread == NULL)
 		goto out_problem;
 
-	if (event->header.misc & PERF_RECORD_MISC_MMAP_DATA)
-		type = MAP__VARIABLE;
-	else
-		type = MAP__FUNCTION;
-
 	map = map__new(machine, event->mmap2.start,
 			event->mmap2.len, event->mmap2.pgoff,
 			event->mmap2.maj,
@@ -1433,7 +1595,7 @@ int machine__process_mmap2_event(struct machine *machine,
 			event->mmap2.ino_generation,
 			event->mmap2.prot,
 			event->mmap2.flags,
-			event->mmap2.filename, type, thread);
+			event->mmap2.filename, thread);
 
 	if (map == NULL)
 		goto out_problem_map;
@@ -1460,7 +1622,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
 {
 	struct thread *thread;
 	struct map *map;
-	enum map_type type;
+	u32 prot = 0;
 	int ret = 0;
 
 	if (dump_trace)
@@ -1479,16 +1641,14 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
 	if (thread == NULL)
 		goto out_problem;
 
-	if (event->header.misc & PERF_RECORD_MISC_MMAP_DATA)
-		type = MAP__VARIABLE;
-	else
-		type = MAP__FUNCTION;
+	if (!(event->header.misc & PERF_RECORD_MISC_MMAP_DATA))
+		prot = PROT_EXEC;
 
 	map = map__new(machine, event->mmap.start,
 			event->mmap.len, event->mmap.pgoff,
-			0, 0, 0, 0, 0, 0,
+			0, 0, 0, 0, prot, 0,
 			event->mmap.filename,
-			type, thread);
+			thread);
 
 	if (map == NULL)
 		goto out_problem_map;
@@ -1515,7 +1675,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th,
 	struct threads *threads = machine__threads(machine, th->tid);
 
 	if (threads->last_match == th)
-		threads->last_match = NULL;
+		threads__set_last_match(threads, NULL);
 
 	BUG_ON(refcount_read(&th->refcnt) == 0);
 	if (lock)
@@ -1548,6 +1708,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
 	struct thread *parent = machine__findnew_thread(machine,
 							event->fork.ppid,
 							event->fork.ptid);
+	bool do_maps_clone = true;
 	int err = 0;
 
 	if (dump_trace)
@@ -1576,9 +1737,25 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
 
 	thread = machine__findnew_thread(machine, event->fork.pid,
 					 event->fork.tid);
+	/*
+	 * When synthesizing FORK events, we are trying to create thread
+	 * objects for the already running tasks on the machine.
+	 *
+	 * Normally, for a kernel FORK event, we want to clone the parent's
+	 * maps because that is what the kernel just did.
+	 *
+	 * But when synthesizing, this should not be done.  If we do, we end up
+	 * with overlapping maps as we process the sythesized MMAP2 events that
+	 * get delivered shortly thereafter.
+	 *
+	 * Use the FORK event misc flags in an internal way to signal this
+	 * situation, so we can elide the map clone when appropriate.
+	 */
+	if (event->fork.header.misc & PERF_RECORD_MISC_FORK_EXEC)
+		do_maps_clone = false;
 
 	if (thread == NULL || parent == NULL ||
-	    thread__fork(thread, parent, sample->time) < 0) {
+	    thread__fork(thread, parent, sample->time, do_maps_clone) < 0) {
 		dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
 		err = -1;
 	}
@@ -1664,7 +1841,7 @@ static void ip__resolve_ams(struct thread *thread,
 	 * Thus, we have to try consecutively until we find a match
 	 * or else, the symbol is unknown
 	 */
-	thread__find_cpumode_addr_location(thread, MAP__FUNCTION, ip, &al);
+	thread__find_cpumode_addr_location(thread, ip, &al);
 
 	ams->addr = ip;
 	ams->al_addr = al.addr;
@@ -1681,15 +1858,7 @@ static void ip__resolve_data(struct thread *thread,
 
 	memset(&al, 0, sizeof(al));
 
-	thread__find_addr_location(thread, m, MAP__VARIABLE, addr, &al);
-	if (al.map == NULL) {
-		/*
-		 * some shared data regions have execute bit set which puts
-		 * their mapping in the MAP__FUNCTION type array.
-		 * Check there as a fallback option before dropping the sample.
-		 */
-		thread__find_addr_location(thread, m, MAP__FUNCTION, addr, &al);
-	}
+	thread__find_symbol(thread, m, addr, &al);
 
 	ams->addr = addr;
 	ams->al_addr = al.addr;
@@ -1758,8 +1927,7 @@ static int add_callchain_ip(struct thread *thread,
 	al.filtered = 0;
 	al.sym = NULL;
 	if (!cpumode) {
-		thread__find_cpumode_addr_location(thread, MAP__FUNCTION,
-						   ip, &al);
+		thread__find_cpumode_addr_location(thread, ip, &al);
 	} else {
 		if (ip >= PERF_CONTEXT_MAX) {
 			switch (ip) {
@@ -1784,8 +1952,7 @@ static int add_callchain_ip(struct thread *thread,
 			}
 			return 0;
 		}
-		thread__find_addr_location(thread, *cpumode, MAP__FUNCTION,
-					   ip, &al);
+		thread__find_symbol(thread, *cpumode, ip, &al);
 	}
 
 	if (al.sym != NULL) {
@@ -1810,7 +1977,7 @@ static int add_callchain_ip(struct thread *thread,
 	}
 
 	srcline = callchain_srcline(al.map, al.sym, al.addr);
-	return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
+	return callchain_cursor_append(cursor, ip, al.map, al.sym,
 				       branch, flags, nr_loop_iter,
 				       iter_cycles, branch_from, srcline);
 }
@@ -1990,6 +2157,27 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
 	return 0;
 }
 
+static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
+			     struct callchain_cursor *cursor,
+			     struct symbol **parent,
+			     struct addr_location *root_al,
+			     u8 *cpumode, int ent)
+{
+	int err = 0;
+
+	while (--ent >= 0) {
+		u64 ip = chain->ips[ent];
+
+		if (ip >= PERF_CONTEXT_MAX) {
+			err = add_callchain_ip(thread, cursor, parent,
+					       root_al, cpumode, ip,
+					       false, NULL, NULL, 0);
+			break;
+		}
+	}
+	return err;
+}
+
 static int thread__resolve_callchain_sample(struct thread *thread,
 					    struct callchain_cursor *cursor,
 					    struct perf_evsel *evsel,
@@ -2096,6 +2284,12 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 	}
 
 check_calls:
+	if (callchain_param.order != ORDER_CALLEE) {
+		err = find_prev_cpumode(chain, thread, cursor, parent, root_al,
+					&cpumode, chain->nr - first_call);
+		if (err)
+			return (err < 0) ? err : 0;
+	}
 	for (i = first_call, nr_entries = 0;
 	     i < chain_nr && nr_entries < max_stack; i++) {
 		u64 ip;
@@ -2110,9 +2304,15 @@ check_calls:
 			continue;
 #endif
 		ip = chain->ips[j];
-
 		if (ip < PERF_CONTEXT_MAX)
                        ++nr_entries;
+		else if (callchain_param.order != ORDER_CALLEE) {
+			err = find_prev_cpumode(chain, thread, cursor, parent,
+						root_al, &cpumode, j);
+			if (err)
+				return (err < 0) ? err : 0;
+			continue;
+		}
 
 		err = add_callchain_ip(thread, cursor, parent,
 				       root_al, &cpumode, ip,
@@ -2136,7 +2336,8 @@ static int append_inlines(struct callchain_cursor *cursor,
 	if (!symbol_conf.inline_name || !map || !sym)
 		return ret;
 
-	addr = map__rip_2objdump(map, ip);
+	addr = map__map_ip(map, ip);
+	addr = map__rip_2objdump(map, addr);
 
 	inline_node = inlines__tree_find(&map->dso->inlined_nodes, addr);
 	if (!inline_node) {
@@ -2162,6 +2363,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
 {
 	struct callchain_cursor *cursor = arg;
 	const char *srcline = NULL;
+	u64 addr = entry->ip;
 
 	if (symbol_conf.hide_unresolved && entry->sym == NULL)
 		return 0;
@@ -2169,7 +2371,14 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
 	if (append_inlines(cursor, entry->map, entry->sym, entry->ip) == 0)
 		return 0;
 
-	srcline = callchain_srcline(entry->map, entry->sym, entry->ip);
+	/*
+	 * Convert entry->ip from a virtual address to an offset in
+	 * its corresponding binary.
+	 */
+	if (entry->map)
+		addr = map__map_ip(entry->map, entry->ip);
+
+	srcline = callchain_srcline(entry->map, entry->sym, addr);
 	return callchain_cursor_append(cursor, entry->ip,
 				       entry->map, entry->sym,
 				       false, NULL, 0, 0, 0, srcline);
@@ -2342,6 +2551,20 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
 	return 0;
 }
 
+/*
+ * Compares the raw arch string. N.B. see instead perf_env__arch() if a
+ * normalized arch is needed.
+ */
+bool machine__is(struct machine *machine, const char *arch)
+{
+	return machine && !strcmp(perf_env__raw_arch(machine->env), arch);
+}
+
+int machine__nr_cpus_avail(struct machine *machine)
+{
+	return machine ? perf_env__nr_cpus_avail(machine->env) : 0;
+}
+
 int machine__get_kernel_start(struct machine *machine)
 {
 	struct map *map = machine__kernel_map(machine);
@@ -2358,7 +2581,12 @@ int machine__get_kernel_start(struct machine *machine)
 	machine->kernel_start = 1ULL << 63;
 	if (map) {
 		err = map__load(map);
-		if (!err)
+		/*
+		 * On x86_64, PTI entry trampolines are less than the
+		 * start of kernel text, but still above 2^63. So leave
+		 * kernel_start = 1ULL << 63 for x86_64.
+		 */
+		if (!err && !machine__is(machine, "x86_64"))
 			machine->kernel_start = map->start;
 	}
 	return err;
@@ -2373,7 +2601,7 @@ char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, ch
 {
 	struct machine *machine = vmachine;
 	struct map *map;
-	struct symbol *sym = map_groups__find_symbol(&machine->kmaps, MAP__FUNCTION, *addrp, &map);
+	struct symbol *sym = machine__find_kernel_symbol(machine, *addrp, &map);
 
 	if (sym == NULL)
 		return NULL;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 66cc200ef86f..d856b85862e2 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -49,13 +49,14 @@ struct machine {
 	struct perf_env   *env;
 	struct dsos	  dsos;
 	struct map_groups kmaps;
-	struct map	  *vmlinux_maps[MAP__NR_TYPES];
+	struct map	  *vmlinux_map;
 	u64		  kernel_start;
 	pid_t		  *current_tid;
 	union { /* Tool specific area */
 		void	  *priv;
 		u64	  db_id;
 	};
+	bool		  trampolines_mapped;
 };
 
 static inline struct threads *machine__threads(struct machine *machine, pid_t tid)
@@ -64,16 +65,22 @@ static inline struct threads *machine__threads(struct machine *machine, pid_t ti
 	return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE];
 }
 
+/*
+ * The main kernel (vmlinux) map
+ */
 static inline
-struct map *__machine__kernel_map(struct machine *machine, enum map_type type)
+struct map *machine__kernel_map(struct machine *machine)
 {
-	return machine->vmlinux_maps[type];
+	return machine->vmlinux_map;
 }
 
+/*
+ * kernel (the one returned by machine__kernel_map()) plus kernel modules maps
+ */
 static inline
-struct map *machine__kernel_map(struct machine *machine)
+struct maps *machine__kernel_maps(struct machine *machine)
 {
-	return __machine__kernel_map(machine, MAP__FUNCTION);
+	return &machine->kmaps.maps;
 }
 
 int machine__get_kernel_start(struct machine *machine);
@@ -182,6 +189,9 @@ static inline bool machine__is_host(struct machine *machine)
 	return machine ? machine->pid == HOST_KERNEL_ID : false;
 }
 
+bool machine__is(struct machine *machine, const char *arch);
+int machine__nr_cpus_avail(struct machine *machine);
+
 struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
 struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
 
@@ -190,44 +200,27 @@ struct dso *machine__findnew_dso(struct machine *machine, const char *filename);
 size_t machine__fprintf(struct machine *machine, FILE *fp);
 
 static inline
-struct symbol *machine__find_kernel_symbol(struct machine *machine,
-					   enum map_type type, u64 addr,
+struct symbol *machine__find_kernel_symbol(struct machine *machine, u64 addr,
 					   struct map **mapp)
 {
-	return map_groups__find_symbol(&machine->kmaps, type, addr, mapp);
+	return map_groups__find_symbol(&machine->kmaps, addr, mapp);
 }
 
 static inline
 struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine,
-						   enum map_type type, const char *name,
+						   const char *name,
 						   struct map **mapp)
 {
-	return map_groups__find_symbol_by_name(&machine->kmaps, type, name, mapp);
-}
-
-static inline
-struct symbol *machine__find_kernel_function(struct machine *machine, u64 addr,
-					     struct map **mapp)
-{
-	return machine__find_kernel_symbol(machine, MAP__FUNCTION, addr,
-					   mapp);
-}
-
-static inline
-struct symbol *machine__find_kernel_function_by_name(struct machine *machine,
-						     const char *name,
-						     struct map **mapp)
-{
-	return map_groups__find_function_by_name(&machine->kmaps, name, mapp);
+	return map_groups__find_symbol_by_name(&machine->kmaps, name, mapp);
 }
 
 struct map *machine__findnew_module_map(struct machine *machine, u64 start,
 					const char *filename);
 int arch__fix_module_text_start(u64 *start, const char *name);
 
-int machine__load_kallsyms(struct machine *machine, const char *filename,
-			   enum map_type type);
-int machine__load_vmlinux_path(struct machine *machine, enum map_type type);
+int machine__load_kallsyms(struct machine *machine, const char *filename);
+
+int machine__load_vmlinux_path(struct machine *machine);
 
 size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
 				     bool (skip)(struct dso *dso, int parm), int parm);
@@ -272,8 +265,29 @@ pid_t machine__get_current_tid(struct machine *machine, int cpu);
 int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
 			     pid_t tid);
 /*
- * For use with libtraceevent's pevent_set_function_resolver()
+ * For use with libtraceevent's tep_set_function_resolver()
  */
 char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp);
 
+void machine__get_kallsyms_filename(struct machine *machine, char *buf,
+				    size_t bufsz);
+
+int machine__create_extra_kernel_maps(struct machine *machine,
+				      struct dso *kernel);
+
+/* Kernel-space maps for symbols that are outside the main kernel map and module maps */
+struct extra_kernel_map {
+	u64 start;
+	u64 end;
+	u64 pgoff;
+	char name[KMAP_NAME_LEN];
+};
+
+int machine__create_extra_kernel_map(struct machine *machine,
+				     struct dso *kernel,
+				     struct extra_kernel_map *xm);
+
+int machine__map_x86_64_entry_trampolines(struct machine *machine,
+					  struct dso *kernel);
+
 #endif /* __PERF_MACHINE_H */
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 8fe57031e1a8..354e54550d2b 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -22,11 +22,6 @@
 
 static void __maps__insert(struct maps *maps, struct map *map);
 
-const char *map_type__name[MAP__NR_TYPES] = {
-	[MAP__FUNCTION] = "Functions",
-	[MAP__VARIABLE] = "Variables",
-};
-
 static inline int is_anon_memory(const char *filename, u32 flags)
 {
 	return flags & MAP_HUGETLB ||
@@ -129,10 +124,8 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
 	return false;
 }
 
-void map__init(struct map *map, enum map_type type,
-	       u64 start, u64 end, u64 pgoff, struct dso *dso)
+void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso)
 {
-	map->type     = type;
 	map->start    = start;
 	map->end      = end;
 	map->pgoff    = pgoff;
@@ -149,7 +142,7 @@ void map__init(struct map *map, enum map_type type,
 struct map *map__new(struct machine *machine, u64 start, u64 len,
 		     u64 pgoff, u32 d_maj, u32 d_min, u64 ino,
 		     u64 ino_gen, u32 prot, u32 flags, char *filename,
-		     enum map_type type, struct thread *thread)
+		     struct thread *thread)
 {
 	struct map *map = malloc(sizeof(*map));
 	struct nsinfo *nsi = NULL;
@@ -173,7 +166,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
 		map->flags = flags;
 		nsi = nsinfo__get(thread->nsinfo);
 
-		if ((anon || no_dso) && nsi && type == MAP__FUNCTION) {
+		if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) {
 			snprintf(newfilename, sizeof(newfilename),
 				 "/tmp/perf-%d.map", nsi->pid);
 			filename = newfilename;
@@ -203,7 +196,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
 		if (dso == NULL)
 			goto out_delete;
 
-		map__init(map, type, start, start + len, pgoff, dso);
+		map__init(map, start, start + len, pgoff, dso);
 
 		if (anon || no_dso) {
 			map->map_ip = map->unmap_ip = identity__map_ip;
@@ -213,8 +206,8 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
 			 * functions still return NULL, and we avoid the
 			 * unnecessary map__load warning.
 			 */
-			if (type != MAP__FUNCTION)
-				dso__set_loaded(dso, map->type);
+			if (!(prot & PROT_EXEC))
+				dso__set_loaded(dso);
 		}
 		dso->nsinfo = nsi;
 		dso__put(dso);
@@ -231,7 +224,7 @@ out_delete:
  * they are loaded) and for vmlinux, where only after we load all the
  * symbols we'll know where it starts and ends.
  */
-struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
+struct map *map__new2(u64 start, struct dso *dso)
 {
 	struct map *map = calloc(1, (sizeof(*map) +
 				     (dso->kernel ? sizeof(struct kmap) : 0)));
@@ -239,7 +232,7 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
 		/*
 		 * ->end will be filled after we load all the symbols
 		 */
-		map__init(map, type, start, 0, 0, dso);
+		map__init(map, start, 0, 0, dso);
 	}
 
 	return map;
@@ -256,7 +249,19 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
  */
 bool __map__is_kernel(const struct map *map)
 {
-	return __machine__kernel_map(map->groups->machine, map->type) == map;
+	return machine__kernel_map(map->groups->machine) == map;
+}
+
+bool __map__is_extra_kernel_map(const struct map *map)
+{
+	struct kmap *kmap = __map__kmap((struct map *)map);
+
+	return kmap && kmap->name[0];
+}
+
+bool map__has_symbols(const struct map *map)
+{
+	return dso__has_symbols(map->dso);
 }
 
 static void map__exit(struct map *map)
@@ -279,7 +284,7 @@ void map__put(struct map *map)
 
 void map__fixup_start(struct map *map)
 {
-	struct rb_root *symbols = &map->dso->symbols[map->type];
+	struct rb_root *symbols = &map->dso->symbols;
 	struct rb_node *nd = rb_first(symbols);
 	if (nd != NULL) {
 		struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
@@ -289,7 +294,7 @@ void map__fixup_start(struct map *map)
 
 void map__fixup_end(struct map *map)
 {
-	struct rb_root *symbols = &map->dso->symbols[map->type];
+	struct rb_root *symbols = &map->dso->symbols;
 	struct rb_node *nd = rb_last(symbols);
 	if (nd != NULL) {
 		struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
@@ -304,7 +309,7 @@ int map__load(struct map *map)
 	const char *name = map->dso->long_name;
 	int nr;
 
-	if (dso__loaded(map->dso, map->type))
+	if (dso__loaded(map->dso))
 		return 0;
 
 	nr = dso__load(map->dso, map);
@@ -315,12 +320,11 @@ int map__load(struct map *map)
 			build_id__sprintf(map->dso->build_id,
 					  sizeof(map->dso->build_id),
 					  sbuild_id);
-			pr_warning("%s with build id %s not found",
-				   name, sbuild_id);
+			pr_debug("%s with build id %s not found", name, sbuild_id);
 		} else
-			pr_warning("Failed to open %s", name);
+			pr_debug("Failed to open %s", name);
 
-		pr_warning(", continuing without symbols\n");
+		pr_debug(", continuing without symbols\n");
 		return -1;
 	} else if (nr == 0) {
 #ifdef HAVE_LIBELF_SUPPORT
@@ -329,12 +333,11 @@ int map__load(struct map *map)
 
 		if (len > sizeof(DSO__DELETED) &&
 		    strcmp(name + real_len + 1, DSO__DELETED) == 0) {
-			pr_warning("%.*s was updated (is prelink enabled?). "
+			pr_debug("%.*s was updated (is prelink enabled?). "
 				"Restart the long running apps that use it!\n",
 				   (int)real_len, name);
 		} else {
-			pr_warning("no symbols found in %s, maybe install "
-				   "a debug package?\n", name);
+			pr_debug("no symbols found in %s, maybe install a debug package?\n", name);
 		}
 #endif
 		return -1;
@@ -348,7 +351,7 @@ struct symbol *map__find_symbol(struct map *map, u64 addr)
 	if (map__load(map) < 0)
 		return NULL;
 
-	return dso__find_symbol(map->dso, map->type, addr);
+	return dso__find_symbol(map->dso, addr);
 }
 
 struct symbol *map__find_symbol_by_name(struct map *map, const char *name)
@@ -356,10 +359,10 @@ struct symbol *map__find_symbol_by_name(struct map *map, const char *name)
 	if (map__load(map) < 0)
 		return NULL;
 
-	if (!dso__sorted_by_name(map->dso, map->type))
-		dso__sort_by_name(map->dso, map->type);
+	if (!dso__sorted_by_name(map->dso))
+		dso__sort_by_name(map->dso);
 
-	return dso__find_symbol_by_name(map->dso, map->type, name);
+	return dso__find_symbol_by_name(map->dso, name);
 }
 
 struct map *map__clone(struct map *from)
@@ -376,20 +379,6 @@ struct map *map__clone(struct map *from)
 	return map;
 }
 
-int map__overlap(struct map *l, struct map *r)
-{
-	if (l->start > r->start) {
-		struct map *t = l;
-		l = r;
-		r = t;
-	}
-
-	if (l->end > r->start)
-		return 1;
-
-	return 0;
-}
-
 size_t map__fprintf(struct map *map, FILE *fp)
 {
 	return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s\n",
@@ -410,16 +399,20 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp)
 	return fprintf(fp, "%s", dsoname);
 }
 
+char *map__srcline(struct map *map, u64 addr, struct symbol *sym)
+{
+	if (map == NULL)
+		return SRCLINE_UNKNOWN;
+	return get_srcline(map->dso, map__rip_2objdump(map, addr), sym, true, true, addr);
+}
+
 int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
 			 FILE *fp)
 {
-	char *srcline;
 	int ret = 0;
 
 	if (map && map->dso) {
-		srcline = get_srcline(map->dso,
-				      map__rip_2objdump(map, addr), NULL,
-				      true, true, addr);
+		char *srcline = map__srcline(map, addr, NULL);
 		if (srcline != SRCLINE_UNKNOWN)
 			ret = fprintf(fp, "%s%s", prefix, srcline);
 		free_srcline(srcline);
@@ -440,6 +433,20 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
  */
 u64 map__rip_2objdump(struct map *map, u64 rip)
 {
+	struct kmap *kmap = __map__kmap(map);
+
+	/*
+	 * vmlinux does not have program headers for PTI entry trampolines and
+	 * kcore may not either. However the trampoline object code is on the
+	 * main kernel map, so just use that instead.
+	 */
+	if (kmap && is_entry_trampoline(kmap->name) && kmap->kmaps && kmap->kmaps->machine) {
+		struct map *kernel_map = machine__kernel_map(kmap->kmaps->machine);
+
+		if (kernel_map)
+			map = kernel_map;
+	}
+
 	if (!map->dso->adjust_symbols)
 		return rip;
 
@@ -494,10 +501,7 @@ static void maps__init(struct maps *maps)
 
 void map_groups__init(struct map_groups *mg, struct machine *machine)
 {
-	int i;
-	for (i = 0; i < MAP__NR_TYPES; ++i) {
-		maps__init(&mg->maps[i]);
-	}
+	maps__init(&mg->maps);
 	mg->machine = machine;
 	refcount_set(&mg->refcnt, 1);
 }
@@ -525,22 +529,12 @@ static void maps__exit(struct maps *maps)
 
 void map_groups__exit(struct map_groups *mg)
 {
-	int i;
-
-	for (i = 0; i < MAP__NR_TYPES; ++i)
-		maps__exit(&mg->maps[i]);
+	maps__exit(&mg->maps);
 }
 
 bool map_groups__empty(struct map_groups *mg)
 {
-	int i;
-
-	for (i = 0; i < MAP__NR_TYPES; ++i) {
-		if (maps__first(&mg->maps[i]))
-			return false;
-	}
-
-	return true;
+	return !maps__first(&mg->maps);
 }
 
 struct map_groups *map_groups__new(struct machine *machine)
@@ -566,10 +560,9 @@ void map_groups__put(struct map_groups *mg)
 }
 
 struct symbol *map_groups__find_symbol(struct map_groups *mg,
-				       enum map_type type, u64 addr,
-				       struct map **mapp)
+				       u64 addr, struct map **mapp)
 {
-	struct map *map = map_groups__find(mg, type, addr);
+	struct map *map = map_groups__find(mg, addr);
 
 	/* Ensure map is loaded before using map->map_ip */
 	if (map != NULL && map__load(map) >= 0) {
@@ -581,6 +574,13 @@ struct symbol *map_groups__find_symbol(struct map_groups *mg,
 	return NULL;
 }
 
+static bool map__contains_symbol(struct map *map, struct symbol *sym)
+{
+	u64 ip = map->unmap_ip(map, sym->start);
+
+	return ip >= map->start && ip < map->end;
+}
+
 struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
 					 struct map **mapp)
 {
@@ -596,6 +596,10 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
 
 		if (sym == NULL)
 			continue;
+		if (!map__contains_symbol(pos, sym)) {
+			sym = NULL;
+			continue;
+		}
 		if (mapp != NULL)
 			*mapp = pos;
 		goto out;
@@ -608,13 +612,10 @@ out:
 }
 
 struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
-					       enum map_type type,
 					       const char *name,
 					       struct map **mapp)
 {
-	struct symbol *sym = maps__find_symbol_by_name(&mg->maps[type], name, mapp);
-
-	return sym;
+	return maps__find_symbol_by_name(&mg->maps, name, mapp);
 }
 
 int map_groups__find_ams(struct addr_map_symbol *ams)
@@ -622,8 +623,7 @@ int map_groups__find_ams(struct addr_map_symbol *ams)
 	if (ams->addr < ams->map->start || ams->addr >= ams->map->end) {
 		if (ams->map->groups == NULL)
 			return -1;
-		ams->map = map_groups__find(ams->map->groups, ams->map->type,
-					    ams->addr);
+		ams->map = map_groups__find(ams->map->groups, ams->addr);
 		if (ams->map == NULL)
 			return -1;
 	}
@@ -646,7 +646,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp)
 		printed += fprintf(fp, "Map:");
 		printed += map__fprintf(pos, fp);
 		if (verbose > 2) {
-			printed += dso__fprintf(pos->dso, pos->type, fp);
+			printed += dso__fprintf(pos->dso, fp);
 			printed += fprintf(fp, "--\n");
 		}
 	}
@@ -656,50 +656,61 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp)
 	return printed;
 }
 
-size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
-				  FILE *fp)
-{
-	size_t printed = fprintf(fp, "%s:\n", map_type__name[type]);
-	return printed += maps__fprintf(&mg->maps[type], fp);
-}
-
 size_t map_groups__fprintf(struct map_groups *mg, FILE *fp)
 {
-	size_t printed = 0, i;
-	for (i = 0; i < MAP__NR_TYPES; ++i)
-		printed += __map_groups__fprintf_maps(mg, i, fp);
-	return printed;
+	return maps__fprintf(&mg->maps, fp);
 }
 
 static void __map_groups__insert(struct map_groups *mg, struct map *map)
 {
-	__maps__insert(&mg->maps[map->type], map);
+	__maps__insert(&mg->maps, map);
 	map->groups = mg;
 }
 
 static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
 {
 	struct rb_root *root;
-	struct rb_node *next;
+	struct rb_node *next, *first;
 	int err = 0;
 
 	down_write(&maps->lock);
 
 	root = &maps->entries;
-	next = rb_first(root);
 
+	/*
+	 * Find first map where end > map->start.
+	 * Same as find_vma() in kernel.
+	 */
+	next = root->rb_node;
+	first = NULL;
+	while (next) {
+		struct map *pos = rb_entry(next, struct map, rb_node);
+
+		if (pos->end > map->start) {
+			first = next;
+			if (pos->start <= map->start)
+				break;
+			next = next->rb_left;
+		} else
+			next = next->rb_right;
+	}
+
+	next = first;
 	while (next) {
 		struct map *pos = rb_entry(next, struct map, rb_node);
 		next = rb_next(&pos->rb_node);
 
-		if (!map__overlap(pos, map))
-			continue;
+		/*
+		 * Stop if current map starts after map->end.
+		 * Maps are ordered by start: next will not overlap for sure.
+		 */
+		if (pos->start >= map->end)
+			break;
 
 		if (verbose >= 2) {
 
 			if (use_browser) {
-				pr_warning("overlapping maps in %s "
-					   "(disable tui for more info)\n",
+				pr_debug("overlapping maps in %s (disable tui for more info)\n",
 					   map->dso->name);
 			} else {
 				fputs("overlapping maps:\n", fp);
@@ -758,19 +769,18 @@ out:
 int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
 				   FILE *fp)
 {
-	return maps__fixup_overlappings(&mg->maps[map->type], map, fp);
+	return maps__fixup_overlappings(&mg->maps, map, fp);
 }
 
 /*
  * XXX This should not really _copy_ te maps, but refcount them.
  */
-int map_groups__clone(struct thread *thread,
-		      struct map_groups *parent, enum map_type type)
+int map_groups__clone(struct thread *thread, struct map_groups *parent)
 {
 	struct map_groups *mg = thread->mg;
 	int err = -ENOMEM;
 	struct map *map;
-	struct maps *maps = &parent->maps[type];
+	struct maps *maps = &parent->maps;
 
 	down_read(&maps->lock);
 
@@ -877,15 +887,22 @@ struct map *map__next(struct map *map)
 	return NULL;
 }
 
-struct kmap *map__kmap(struct map *map)
+struct kmap *__map__kmap(struct map *map)
 {
-	if (!map->dso || !map->dso->kernel) {
-		pr_err("Internal error: map__kmap with a non-kernel map\n");
+	if (!map->dso || !map->dso->kernel)
 		return NULL;
-	}
 	return (struct kmap *)(map + 1);
 }
 
+struct kmap *map__kmap(struct map *map)
+{
+	struct kmap *kmap = __map__kmap(map);
+
+	if (!kmap)
+		pr_err("Internal error: map__kmap with a non-kernel map\n");
+	return kmap;
+}
+
 struct map_groups *map__kmaps(struct map *map)
 {
 	struct kmap *kmap = map__kmap(map);
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 0e9bbe01b0ab..e0f327b51e66 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -8,19 +8,11 @@
 #include <linux/rbtree.h>
 #include <pthread.h>
 #include <stdio.h>
+#include <string.h>
 #include <stdbool.h>
 #include <linux/types.h>
 #include "rwsem.h"
 
-enum map_type {
-	MAP__FUNCTION = 0,
-	MAP__VARIABLE,
-};
-
-#define MAP__NR_TYPES (MAP__VARIABLE + 1)
-
-extern const char *map_type__name[MAP__NR_TYPES];
-
 struct dso;
 struct ip_callchain;
 struct ref_reloc_sym;
@@ -35,7 +27,6 @@ struct map {
 	};
 	u64			start;
 	u64			end;
-	u8 /* enum map_type */	type;
 	bool			erange_warned;
 	u32			priv;
 	u32			prot;
@@ -56,9 +47,12 @@ struct map {
 	refcount_t		refcnt;
 };
 
+#define KMAP_NAME_LEN 256
+
 struct kmap {
 	struct ref_reloc_sym	*ref_reloc_sym;
 	struct map_groups	*kmaps;
+	char			name[KMAP_NAME_LEN];
 };
 
 struct maps {
@@ -67,7 +61,7 @@ struct maps {
 };
 
 struct map_groups {
-	struct maps	 maps[MAP__NR_TYPES];
+	struct maps	 maps;
 	struct machine	 *machine;
 	refcount_t	 refcnt;
 };
@@ -85,6 +79,7 @@ static inline struct map_groups *map_groups__get(struct map_groups *mg)
 
 void map_groups__put(struct map_groups *mg);
 
+struct kmap *__map__kmap(struct map *map);
 struct kmap *map__kmap(struct map *map);
 struct map_groups *map__kmaps(struct map *map);
 
@@ -125,7 +120,7 @@ struct thread;
  * Note: caller must ensure map->dso is not NULL (map is loaded).
  */
 #define map__for_each_symbol(map, pos, n)	\
-	dso__for_each_symbol(map->dso, pos, n, map->type)
+	dso__for_each_symbol(map->dso, pos, n)
 
 /* map__for_each_symbol_with_name - iterate over the symbols in the given map
  *                                  that have the given name
@@ -144,13 +139,13 @@ struct thread;
 #define map__for_each_symbol_by_name(map, sym_name, pos)		\
 	__map__for_each_symbol_by_name(map, sym_name, (pos))
 
-void map__init(struct map *map, enum map_type type,
+void map__init(struct map *map,
 	       u64 start, u64 end, u64 pgoff, struct dso *dso);
 struct map *map__new(struct machine *machine, u64 start, u64 len,
 		     u64 pgoff, u32 d_maj, u32 d_min, u64 ino,
 		     u64 ino_gen, u32 prot, u32 flags,
-		     char *filename, enum map_type type, struct thread *thread);
-struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
+		     char *filename, struct thread *thread);
+struct map *map__new2(u64 start, struct dso *dso);
 void map__delete(struct map *map);
 struct map *map__clone(struct map *map);
 
@@ -171,9 +166,9 @@ static inline void __map__zput(struct map **map)
 
 #define map__zput(map) __map__zput(&map)
 
-int map__overlap(struct map *l, struct map *r);
 size_t map__fprintf(struct map *map, FILE *fp);
 size_t map__fprintf_dsoname(struct map *map, FILE *fp);
+char *map__srcline(struct map *map, u64 addr, struct symbol *sym);
 int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
 			 FILE *fp);
 
@@ -185,8 +180,6 @@ void map__fixup_end(struct map *map);
 
 void map__reloc_vmlinux(struct map *map);
 
-size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
-				  FILE *fp);
 void maps__insert(struct maps *maps, struct map *map);
 void maps__remove(struct maps *maps, struct map *map);
 struct map *maps__find(struct maps *maps, u64 addr);
@@ -197,34 +190,29 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
 void map_groups__init(struct map_groups *mg, struct machine *machine);
 void map_groups__exit(struct map_groups *mg);
 int map_groups__clone(struct thread *thread,
-		      struct map_groups *parent, enum map_type type);
+		      struct map_groups *parent);
 size_t map_groups__fprintf(struct map_groups *mg, FILE *fp);
 
-int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name,
-				     u64 addr);
+int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name,
+				    u64 addr);
 
 static inline void map_groups__insert(struct map_groups *mg, struct map *map)
 {
-	maps__insert(&mg->maps[map->type], map);
+	maps__insert(&mg->maps, map);
 	map->groups = mg;
 }
 
 static inline void map_groups__remove(struct map_groups *mg, struct map *map)
 {
-	maps__remove(&mg->maps[map->type], map);
+	maps__remove(&mg->maps, map);
 }
 
-static inline struct map *map_groups__find(struct map_groups *mg,
-					   enum map_type type, u64 addr)
+static inline struct map *map_groups__find(struct map_groups *mg, u64 addr)
 {
-	return maps__find(&mg->maps[type], addr);
+	return maps__find(&mg->maps, addr);
 }
 
-static inline struct map *map_groups__first(struct map_groups *mg,
-					    enum map_type type)
-{
-	return maps__first(&mg->maps[type]);
-}
+struct map *map_groups__first(struct map_groups *mg);
 
 static inline struct map *map_groups__next(struct map *map)
 {
@@ -232,11 +220,9 @@ static inline struct map *map_groups__next(struct map *map)
 }
 
 struct symbol *map_groups__find_symbol(struct map_groups *mg,
-				       enum map_type type, u64 addr,
-				       struct map **mapp);
+				       u64 addr, struct map **mapp);
 
 struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
-					       enum map_type type,
 					       const char *name,
 					       struct map **mapp);
 
@@ -244,24 +230,26 @@ struct addr_map_symbol;
 
 int map_groups__find_ams(struct addr_map_symbol *ams);
 
-static inline
-struct symbol *map_groups__find_function_by_name(struct map_groups *mg,
-						 const char *name, struct map **mapp)
-{
-	return map_groups__find_symbol_by_name(mg, MAP__FUNCTION, name, mapp);
-}
-
 int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
 				   FILE *fp);
 
-struct map *map_groups__find_by_name(struct map_groups *mg,
-				     enum map_type type, const char *name);
+struct map *map_groups__find_by_name(struct map_groups *mg, const char *name);
 
 bool __map__is_kernel(const struct map *map);
+bool __map__is_extra_kernel_map(const struct map *map);
 
 static inline bool __map__is_kmodule(const struct map *map)
 {
-	return !__map__is_kernel(map);
+	return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map);
+}
+
+bool map__has_symbols(const struct map *map);
+
+#define ENTRY_TRAMPOLINE_NAME "__entry_SYSCALL_64_trampoline"
+
+static inline bool is_entry_trampoline(const char *name)
+{
+	return !strcmp(name, ENTRY_TRAMPOLINE_NAME);
 }
 
 #endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 1ddc3d1d0147..a28f9b5cc4ff 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -326,8 +326,8 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
 				if (raw)
 					s = (char *)pe->metric_name;
 				else {
-					if (asprintf(&s, "%s\n\t[%s]",
-						     pe->metric_name, pe->desc) < 0)
+					if (asprintf(&s, "%s\n%*s%s]",
+						     pe->metric_name, 8, "[", pe->desc) < 0)
 						return;
 				}
 
@@ -490,3 +490,25 @@ out:
 	metricgroup__free_egroups(&group_list);
 	return ret;
 }
+
+bool metricgroup__has_metric(const char *metric)
+{
+	struct pmu_events_map *map = perf_pmu__find_map(NULL);
+	struct pmu_event *pe;
+	int i;
+
+	if (!map)
+		return false;
+
+	for (i = 0; ; i++) {
+		pe = &map->table[i];
+
+		if (!pe->name && !pe->metric_group && !pe->metric_name)
+			break;
+		if (!pe->metric_expr)
+			continue;
+		if (match_metric(pe->metric_name, metric))
+			return true;
+	}
+	return false;
+}
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index 06854e125ee7..8a155dba0581 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -28,4 +28,5 @@ int metricgroup__parse_groups(const struct option *opt,
 			struct rblist *metric_events);
 
 void metricgroup__print(bool metrics, bool groups, char *filter, bool raw);
+bool metricgroup__has_metric(const char *metric);
 #endif
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index fc832676a798..cdb95b3a1213 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -164,7 +164,7 @@ void perf_mmap__munmap(struct perf_mmap *map)
 	auxtrace_mmap__munmap(&map->auxtrace_mmap);
 }
 
-int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd)
+int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu)
 {
 	/*
 	 * The last one will be done at perf_mmap__consume(), so that we
@@ -191,6 +191,7 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd)
 		return -1;
 	}
 	map->fd = fd;
+	map->cpu = cpu;
 
 	if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
 				&mp->auxtrace_mp, map->base, fd))
@@ -280,7 +281,7 @@ int perf_mmap__read_init(struct perf_mmap *map)
 }
 
 int perf_mmap__push(struct perf_mmap *md, void *to,
-		    int push(void *to, void *buf, size_t size))
+		    int push(struct perf_mmap *map, void *to, void *buf, size_t size))
 {
 	u64 head = perf_mmap__read_head(md);
 	unsigned char *data = md->base + page_size;
@@ -299,7 +300,7 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
 		size = md->mask + 1 - (md->start & md->mask);
 		md->start += size;
 
-		if (push(to, buf, size) < 0) {
+		if (push(md, to, buf, size) < 0) {
 			rc = -1;
 			goto out;
 		}
@@ -309,7 +310,7 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
 	size = md->end - md->start;
 	md->start += size;
 
-	if (push(to, buf, size) < 0) {
+	if (push(md, to, buf, size) < 0) {
 		rc = -1;
 		goto out;
 	}
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index d82294db1295..cc5e2d6d17a9 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -4,7 +4,7 @@
 #include <linux/compiler.h>
 #include <linux/refcount.h>
 #include <linux/types.h>
-#include <asm/barrier.h>
+#include <linux/ring_buffer.h>
 #include <stdbool.h>
 #include "auxtrace.h"
 #include "event.h"
@@ -18,6 +18,7 @@ struct perf_mmap {
 	void		 *base;
 	int		 mask;
 	int		 fd;
+	int		 cpu;
 	refcount_t	 refcnt;
 	u64		 prev;
 	u64		 start;
@@ -60,7 +61,7 @@ struct mmap_params {
 	struct auxtrace_mmap_params auxtrace_mp;
 };
 
-int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd);
+int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu);
 void perf_mmap__munmap(struct perf_mmap *map);
 
 void perf_mmap__get(struct perf_mmap *map);
@@ -70,21 +71,12 @@ void perf_mmap__consume(struct perf_mmap *map);
 
 static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
 {
-	struct perf_event_mmap_page *pc = mm->base;
-	u64 head = READ_ONCE(pc->data_head);
-	rmb();
-	return head;
+	return ring_buffer_read_head(mm->base);
 }
 
 static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
 {
-	struct perf_event_mmap_page *pc = md->base;
-
-	/*
-	 * ensure all reads are done before we write the tail out.
-	 */
-	mb();
-	pc->data_tail = tail;
+	ring_buffer_write_tail(md->base, tail);
 }
 
 union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
@@ -92,7 +84,7 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
 union perf_event *perf_mmap__read_event(struct perf_mmap *map);
 
 int perf_mmap__push(struct perf_mmap *md, void *to,
-		    int push(void *to, void *buf, size_t size));
+		    int push(struct perf_mmap *map, void *to, void *buf, size_t size));
 
 size_t perf_mmap__mmap_len(struct perf_mmap *map);
 
diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c
index 5be021701f34..aed170bd4384 100644
--- a/tools/perf/util/namespaces.c
+++ b/tools/perf/util/namespaces.c
@@ -18,6 +18,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
+#include <asm/bug.h>
 
 struct namespaces *namespaces__new(struct namespaces_event *event)
 {
@@ -139,6 +140,9 @@ struct nsinfo *nsinfo__copy(struct nsinfo *nsi)
 {
 	struct nsinfo *nnsi;
 
+	if (nsi == NULL)
+		return NULL;
+
 	nnsi = calloc(1, sizeof(*nnsi));
 	if (nnsi != NULL) {
 		nnsi->pid = nsi->pid;
@@ -183,6 +187,7 @@ void nsinfo__mountns_enter(struct nsinfo *nsi,
 	char curpath[PATH_MAX];
 	int oldns = -1;
 	int newns = -1;
+	char *oldcwd = NULL;
 
 	if (nc == NULL)
 		return;
@@ -196,9 +201,13 @@ void nsinfo__mountns_enter(struct nsinfo *nsi,
 	if (snprintf(curpath, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX)
 		return;
 
+	oldcwd = get_current_dir_name();
+	if (!oldcwd)
+		return;
+
 	oldns = open(curpath, O_RDONLY);
 	if (oldns < 0)
-		return;
+		goto errout;
 
 	newns = open(nsi->mntns_path, O_RDONLY);
 	if (newns < 0)
@@ -207,11 +216,13 @@ void nsinfo__mountns_enter(struct nsinfo *nsi,
 	if (setns(newns, CLONE_NEWNS) < 0)
 		goto errout;
 
+	nc->oldcwd = oldcwd;
 	nc->oldns = oldns;
 	nc->newns = newns;
 	return;
 
 errout:
+	free(oldcwd);
 	if (oldns > -1)
 		close(oldns);
 	if (newns > -1)
@@ -220,11 +231,16 @@ errout:
 
 void nsinfo__mountns_exit(struct nscookie *nc)
 {
-	if (nc == NULL || nc->oldns == -1 || nc->newns == -1)
+	if (nc == NULL || nc->oldns == -1 || nc->newns == -1 || !nc->oldcwd)
 		return;
 
 	setns(nc->oldns, CLONE_NEWNS);
 
+	if (nc->oldcwd) {
+		WARN_ON_ONCE(chdir(nc->oldcwd));
+		zfree(&nc->oldcwd);
+	}
+
 	if (nc->oldns > -1) {
 		close(nc->oldns);
 		nc->oldns = -1;
diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h
index 760558dcfd18..d5f46c09ea31 100644
--- a/tools/perf/util/namespaces.h
+++ b/tools/perf/util/namespaces.h
@@ -10,6 +10,7 @@
 #define __PERF_NAMESPACES_H
 
 #include <sys/types.h>
+#include <linux/stddef.h>
 #include <linux/perf_event.h>
 #include <linux/refcount.h>
 #include <linux/types.h>
@@ -37,6 +38,7 @@ struct nsinfo {
 struct nscookie {
 	int			oldns;
 	int			newns;
+	char			*oldcwd;
 };
 
 int nsinfo__init(struct nsinfo *nsi);
diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c
index bad9e0296e9a..1904e7f6ec84 100644
--- a/tools/perf/util/ordered-events.c
+++ b/tools/perf/util/ordered-events.c
@@ -80,14 +80,20 @@ static union perf_event *dup_event(struct ordered_events *oe,
 	return oe->copy_on_queue ? __dup_event(oe, event) : event;
 }
 
-static void free_dup_event(struct ordered_events *oe, union perf_event *event)
+static void __free_dup_event(struct ordered_events *oe, union perf_event *event)
 {
-	if (event && oe->copy_on_queue) {
+	if (event) {
 		oe->cur_alloc_size -= event->header.size;
 		free(event);
 	}
 }
 
+static void free_dup_event(struct ordered_events *oe, union perf_event *event)
+{
+	if (oe->copy_on_queue)
+		__free_dup_event(oe, event);
+}
+
 #define MAX_SAMPLE_BUFFER	(64 * 1024 / sizeof(struct ordered_event))
 static struct ordered_event *alloc_event(struct ordered_events *oe,
 					 union perf_event *event)
@@ -95,21 +101,49 @@ static struct ordered_event *alloc_event(struct ordered_events *oe,
 	struct list_head *cache = &oe->cache;
 	struct ordered_event *new = NULL;
 	union perf_event *new_event;
+	size_t size;
 
 	new_event = dup_event(oe, event);
 	if (!new_event)
 		return NULL;
 
+	/*
+	 * We maintain the following scheme of buffers for ordered
+	 * event allocation:
+	 *
+	 *   to_free list -> buffer1 (64K)
+	 *                   buffer2 (64K)
+	 *                   ...
+	 *
+	 * Each buffer keeps an array of ordered events objects:
+	 *    buffer -> event[0]
+	 *              event[1]
+	 *              ...
+	 *
+	 * Each allocated ordered event is linked to one of
+	 * following lists:
+	 *   - time ordered list 'events'
+	 *   - list of currently removed events 'cache'
+	 *
+	 * Allocation of the ordered event uses the following order
+	 * to get the memory:
+	 *   - use recently removed object from 'cache' list
+	 *   - use available object in current allocation buffer
+	 *   - allocate new buffer if the current buffer is full
+	 *
+	 * Removal of ordered event object moves it from events to
+	 * the cache list.
+	 */
+	size = sizeof(*oe->buffer) + MAX_SAMPLE_BUFFER * sizeof(*new);
+
 	if (!list_empty(cache)) {
 		new = list_entry(cache->next, struct ordered_event, list);
 		list_del(&new->list);
 	} else if (oe->buffer) {
-		new = oe->buffer + oe->buffer_idx;
+		new = &oe->buffer->event[oe->buffer_idx];
 		if (++oe->buffer_idx == MAX_SAMPLE_BUFFER)
 			oe->buffer = NULL;
-	} else if (oe->cur_alloc_size < oe->max_alloc_size) {
-		size_t size = MAX_SAMPLE_BUFFER * sizeof(*new);
-
+	} else if ((oe->cur_alloc_size + size) < oe->max_alloc_size) {
 		oe->buffer = malloc(size);
 		if (!oe->buffer) {
 			free_dup_event(oe, new_event);
@@ -122,11 +156,11 @@ static struct ordered_event *alloc_event(struct ordered_events *oe,
 		oe->cur_alloc_size += size;
 		list_add(&oe->buffer->list, &oe->to_free);
 
-		/* First entry is abused to maintain the to_free list. */
-		oe->buffer_idx = 2;
-		new = oe->buffer + 1;
+		oe->buffer_idx = 1;
+		new = &oe->buffer->event[0];
 	} else {
 		pr("allocation limit reached %" PRIu64 "B\n", oe->max_alloc_size);
+		return NULL;
 	}
 
 	new->event = new_event;
@@ -300,15 +334,38 @@ void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t d
 	oe->deliver	   = deliver;
 }
 
+static void
+ordered_events_buffer__free(struct ordered_events_buffer *buffer,
+			    unsigned int max, struct ordered_events *oe)
+{
+	if (oe->copy_on_queue) {
+		unsigned int i;
+
+		for (i = 0; i < max; i++)
+			__free_dup_event(oe, buffer->event[i].event);
+	}
+
+	free(buffer);
+}
+
 void ordered_events__free(struct ordered_events *oe)
 {
-	while (!list_empty(&oe->to_free)) {
-		struct ordered_event *event;
+	struct ordered_events_buffer *buffer, *tmp;
 
-		event = list_entry(oe->to_free.next, struct ordered_event, list);
-		list_del(&event->list);
-		free_dup_event(oe, event->event);
-		free(event);
+	if (list_empty(&oe->to_free))
+		return;
+
+	/*
+	 * Current buffer might not have all the events allocated
+	 * yet, we need to free only allocated ones ...
+	 */
+	list_del(&oe->buffer->list);
+	ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe);
+
+	/* ... and continue with the rest */
+	list_for_each_entry_safe(buffer, tmp, &oe->to_free, list) {
+		list_del(&buffer->list);
+		ordered_events_buffer__free(buffer, MAX_SAMPLE_BUFFER, oe);
 	}
 }
 
diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h
index 8c7a2948593e..1338d5c345dc 100644
--- a/tools/perf/util/ordered-events.h
+++ b/tools/perf/util/ordered-events.h
@@ -25,23 +25,28 @@ struct ordered_events;
 typedef int (*ordered_events__deliver_t)(struct ordered_events *oe,
 					 struct ordered_event *event);
 
+struct ordered_events_buffer {
+	struct list_head	list;
+	struct ordered_event	event[0];
+};
+
 struct ordered_events {
-	u64			last_flush;
-	u64			next_flush;
-	u64			max_timestamp;
-	u64			max_alloc_size;
-	u64			cur_alloc_size;
-	struct list_head	events;
-	struct list_head	cache;
-	struct list_head	to_free;
-	struct ordered_event	*buffer;
-	struct ordered_event	*last;
-	ordered_events__deliver_t deliver;
-	int			buffer_idx;
-	unsigned int		nr_events;
-	enum oe_flush		last_flush_type;
-	u32			nr_unordered_events;
-	bool                    copy_on_queue;
+	u64				 last_flush;
+	u64				 next_flush;
+	u64				 max_timestamp;
+	u64				 max_alloc_size;
+	u64				 cur_alloc_size;
+	struct list_head		 events;
+	struct list_head		 cache;
+	struct list_head		 to_free;
+	struct ordered_events_buffer	*buffer;
+	struct ordered_event		*last;
+	ordered_events__deliver_t	 deliver;
+	int				 buffer_idx;
+	unsigned int			 nr_events;
+	enum oe_flush			 last_flush_type;
+	u32				 nr_unordered_events;
+	bool				 copy_on_queue;
 };
 
 int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 2fc4ee8b86c1..59be3466d64d 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -156,13 +156,12 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
 		    (strcmp(sys_dirent->d_name, ".")) &&	\
 		    (strcmp(sys_dirent->d_name, "..")))
 
-static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
+static int tp_event_has_id(const char *dir_path, struct dirent *evt_dir)
 {
 	char evt_path[MAXPATHLEN];
 	int fd;
 
-	snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", tracing_events_path,
-			sys_dir->d_name, evt_dir->d_name);
+	snprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, evt_dir->d_name);
 	fd = open(evt_path, O_RDONLY);
 	if (fd < 0)
 		return -EINVAL;
@@ -171,12 +170,12 @@ static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
 	return 0;
 }
 
-#define for_each_event(sys_dirent, evt_dir, evt_dirent)		\
+#define for_each_event(dir_path, evt_dir, evt_dirent)		\
 	while ((evt_dirent = readdir(evt_dir)) != NULL)		\
 		if (evt_dirent->d_type == DT_DIR &&		\
 		    (strcmp(evt_dirent->d_name, ".")) &&	\
 		    (strcmp(evt_dirent->d_name, "..")) &&	\
-		    (!tp_event_has_id(sys_dirent, evt_dirent)))
+		    (!tp_event_has_id(dir_path, evt_dirent)))
 
 #define MAX_EVENT_LENGTH 512
 
@@ -190,21 +189,21 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
 	int fd;
 	u64 id;
 	char evt_path[MAXPATHLEN];
-	char dir_path[MAXPATHLEN];
+	char *dir_path;
 
-	sys_dir = opendir(tracing_events_path);
+	sys_dir = tracing_events__opendir();
 	if (!sys_dir)
 		return NULL;
 
 	for_each_subsystem(sys_dir, sys_dirent) {
-
-		snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
-			 sys_dirent->d_name);
+		dir_path = get_events_file(sys_dirent->d_name);
+		if (!dir_path)
+			continue;
 		evt_dir = opendir(dir_path);
 		if (!evt_dir)
-			continue;
+			goto next;
 
-		for_each_event(sys_dirent, evt_dir, evt_dirent) {
+		for_each_event(dir_path, evt_dir, evt_dirent) {
 
 			scnprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path,
 				  evt_dirent->d_name);
@@ -218,6 +217,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
 			close(fd);
 			id = atoll(id_buf);
 			if (id == config) {
+				put_events_file(dir_path);
 				closedir(evt_dir);
 				closedir(sys_dir);
 				path = zalloc(sizeof(*path));
@@ -242,6 +242,8 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
 			}
 		}
 		closedir(evt_dir);
+next:
+		put_events_file(dir_path);
 	}
 
 	closedir(sys_dir);
@@ -512,14 +514,19 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx,
 				      struct parse_events_error *err,
 				      struct list_head *head_config)
 {
-	char evt_path[MAXPATHLEN];
+	char *evt_path;
 	struct dirent *evt_ent;
 	DIR *evt_dir;
 	int ret = 0, found = 0;
 
-	snprintf(evt_path, MAXPATHLEN, "%s/%s", tracing_events_path, sys_name);
+	evt_path = get_events_file(sys_name);
+	if (!evt_path) {
+		tracepoint_error(err, errno, sys_name, evt_name);
+		return -1;
+	}
 	evt_dir = opendir(evt_path);
 	if (!evt_dir) {
+		put_events_file(evt_path);
 		tracepoint_error(err, errno, sys_name, evt_name);
 		return -1;
 	}
@@ -545,6 +552,7 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx,
 		ret = -1;
 	}
 
+	put_events_file(evt_path);
 	closedir(evt_dir);
 	return ret;
 }
@@ -570,7 +578,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
 	DIR *events_dir;
 	int ret = 0;
 
-	events_dir = opendir(tracing_events_path);
+	events_dir = tracing_events__opendir();
 	if (!events_dir) {
 		tracepoint_error(err, errno, sys_name, evt_name);
 		return -1;
@@ -918,6 +926,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
 	[PARSE_EVENTS__TERM_TYPE_NOINHERIT]		= "no-inherit",
 	[PARSE_EVENTS__TERM_TYPE_INHERIT]		= "inherit",
 	[PARSE_EVENTS__TERM_TYPE_MAX_STACK]		= "max-stack",
+	[PARSE_EVENTS__TERM_TYPE_MAX_EVENTS]		= "nr",
 	[PARSE_EVENTS__TERM_TYPE_OVERWRITE]		= "overwrite",
 	[PARSE_EVENTS__TERM_TYPE_NOOVERWRITE]		= "no-overwrite",
 	[PARSE_EVENTS__TERM_TYPE_DRV_CFG]		= "driver-config",
@@ -1029,6 +1038,9 @@ do {									   \
 	case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
 		CHECK_TYPE_VAL(NUM);
 		break;
+	case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+		CHECK_TYPE_VAL(NUM);
+		break;
 	default:
 		err->str = strdup("unknown term");
 		err->idx = term->err_term;
@@ -1076,6 +1088,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
 	case PARSE_EVENTS__TERM_TYPE_INHERIT:
 	case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
 	case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+	case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
 	case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
 	case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
 		return config_term_common(attr, term, err);
@@ -1154,6 +1167,9 @@ do {								\
 		case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
 			ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num);
 			break;
+		case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+			ADD_CONFIG_TERM(MAX_EVENTS, max_events, term->val.num);
+			break;
 		case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
 			ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0);
 			break;
@@ -1983,8 +1999,11 @@ static int set_filter(struct perf_evsel *evsel, const void *arg)
 	int nr_addr_filters = 0;
 	struct perf_pmu *pmu = NULL;
 
-	if (evsel == NULL)
-		goto err;
+	if (evsel == NULL) {
+		fprintf(stderr,
+			"--filter option should follow a -e tracepoint or HW tracer option\n");
+		return -1;
+	}
 
 	if (evsel->attr.type == PERF_TYPE_TRACEPOINT) {
 		if (perf_evsel__append_tp_filter(evsel, str) < 0) {
@@ -2006,8 +2025,11 @@ static int set_filter(struct perf_evsel *evsel, const void *arg)
 		perf_pmu__scan_file(pmu, "nr_addr_filters",
 				    "%d", &nr_addr_filters);
 
-	if (!nr_addr_filters)
-		goto err;
+	if (!nr_addr_filters) {
+		fprintf(stderr,
+			"This CPU does not support address filtering\n");
+		return -1;
+	}
 
 	if (perf_evsel__append_addr_filter(evsel, str) < 0) {
 		fprintf(stderr,
@@ -2016,12 +2038,6 @@ static int set_filter(struct perf_evsel *evsel, const void *arg)
 	}
 
 	return 0;
-
-err:
-	fprintf(stderr,
-		"--filter option should follow a -e tracepoint or HW tracer option\n");
-
-	return -1;
 }
 
 int parse_filter(const struct option *opt, const char *str,
@@ -2092,13 +2108,13 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob,
 	DIR *sys_dir, *evt_dir;
 	struct dirent *sys_dirent, *evt_dirent;
 	char evt_path[MAXPATHLEN];
-	char dir_path[MAXPATHLEN];
+	char *dir_path;
 	char **evt_list = NULL;
 	unsigned int evt_i = 0, evt_num = 0;
 	bool evt_num_known = false;
 
 restart:
-	sys_dir = opendir(tracing_events_path);
+	sys_dir = tracing_events__opendir();
 	if (!sys_dir)
 		return;
 
@@ -2113,13 +2129,14 @@ restart:
 		    !strglobmatch(sys_dirent->d_name, subsys_glob))
 			continue;
 
-		snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
-			 sys_dirent->d_name);
+		dir_path = get_events_file(sys_dirent->d_name);
+		if (!dir_path)
+			continue;
 		evt_dir = opendir(dir_path);
 		if (!evt_dir)
-			continue;
+			goto next;
 
-		for_each_event(sys_dirent, evt_dir, evt_dirent) {
+		for_each_event(dir_path, evt_dir, evt_dirent) {
 			if (event_glob != NULL &&
 			    !strglobmatch(evt_dirent->d_name, event_glob))
 				continue;
@@ -2133,11 +2150,15 @@ restart:
 				 sys_dirent->d_name, evt_dirent->d_name);
 
 			evt_list[evt_i] = strdup(evt_path);
-			if (evt_list[evt_i] == NULL)
+			if (evt_list[evt_i] == NULL) {
+				put_events_file(dir_path);
 				goto out_close_evt_dir;
+			}
 			evt_i++;
 		}
 		closedir(evt_dir);
+next:
+		put_events_file(dir_path);
 	}
 	closedir(sys_dir);
 
@@ -2185,21 +2206,21 @@ int is_valid_tracepoint(const char *event_string)
 	DIR *sys_dir, *evt_dir;
 	struct dirent *sys_dirent, *evt_dirent;
 	char evt_path[MAXPATHLEN];
-	char dir_path[MAXPATHLEN];
+	char *dir_path;
 
-	sys_dir = opendir(tracing_events_path);
+	sys_dir = tracing_events__opendir();
 	if (!sys_dir)
 		return 0;
 
 	for_each_subsystem(sys_dir, sys_dirent) {
-
-		snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
-			 sys_dirent->d_name);
+		dir_path = get_events_file(sys_dirent->d_name);
+		if (!dir_path)
+			continue;
 		evt_dir = opendir(dir_path);
 		if (!evt_dir)
-			continue;
+			goto next;
 
-		for_each_event(sys_dirent, evt_dir, evt_dirent) {
+		for_each_event(dir_path, evt_dir, evt_dirent) {
 			snprintf(evt_path, MAXPATHLEN, "%s:%s",
 				 sys_dirent->d_name, evt_dirent->d_name);
 			if (!strcmp(evt_path, event_string)) {
@@ -2209,6 +2230,8 @@ int is_valid_tracepoint(const char *event_string)
 			}
 		}
 		closedir(evt_dir);
+next:
+		put_events_file(dir_path);
 	}
 	closedir(sys_dir);
 	return 0;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 4473dac27aee..5ed035cbcbb7 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -71,6 +71,7 @@ enum {
 	PARSE_EVENTS__TERM_TYPE_NOINHERIT,
 	PARSE_EVENTS__TERM_TYPE_INHERIT,
 	PARSE_EVENTS__TERM_TYPE_MAX_STACK,
+	PARSE_EVENTS__TERM_TYPE_MAX_EVENTS,
 	PARSE_EVENTS__TERM_TYPE_NOOVERWRITE,
 	PARSE_EVENTS__TERM_TYPE_OVERWRITE,
 	PARSE_EVENTS__TERM_TYPE_DRV_CFG,
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index a1a01b1ac8b8..7805c71aaae2 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -53,7 +53,21 @@ static int str(yyscan_t scanner, int token)
 	YYSTYPE *yylval = parse_events_get_lval(scanner);
 	char *text = parse_events_get_text(scanner);
 
-	yylval->str = strdup(text);
+	if (text[0] != '\'') {
+		yylval->str = strdup(text);
+	} else {
+		/*
+		 * If a text tag specified on the command line
+		 * contains opening single quite ' then it is
+		 * expected that the tag ends with single quote
+		 * as well, like this:
+		 *     name=\'CPU_CLK_UNHALTED.THREAD:cmask=1\'
+		 * quotes need to be escaped to bypass shell
+		 * processing.
+		 */
+		yylval->str = strndup(&text[1], strlen(text) - 2);
+	}
+
 	return token;
 }
 
@@ -176,6 +190,7 @@ num_dec		[0-9]+
 num_hex		0x[a-fA-F0-9]+
 num_raw_hex	[a-fA-F0-9]+
 name		[a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]]*
+name_tag	[\'][a-zA-Z_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\']
 name_minus	[a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
 drv_cfg_term	[a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
 /* If you add a modifier you need to update check_modifier() */
@@ -254,6 +269,7 @@ time			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
 call-graph		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
 stack-size		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
 max-stack		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); }
+nr			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_EVENTS); }
 inherit			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); }
 no-inherit		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
 overwrite		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); }
@@ -344,6 +360,7 @@ r{num_raw_hex}		{ return raw(yyscanner); }
 {bpf_object}		{ if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); }
 {bpf_source}		{ if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); }
 {name}			{ return pmu_str_check(yyscanner); }
+{name_tag}		{ return str(yyscanner, PE_NAME); }
 "/"			{ BEGIN(config); return '/'; }
 -			{ return '-'; }
 ,			{ BEGIN(event); return ','; }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index e37608a87dba..da8fe57691b8 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -73,6 +73,7 @@ static void inc_group_count(struct list_head *list,
 %type <num> value_sym
 %type <head> event_config
 %type <head> opt_event_config
+%type <head> opt_pmu_config
 %type <term> event_term
 %type <head> event_pmu
 %type <head> event_legacy_symbol
@@ -224,13 +225,18 @@ event_def: event_pmu |
 	   event_bpf_file
 
 event_pmu:
-PE_NAME opt_event_config
+PE_NAME opt_pmu_config
 {
+	struct parse_events_state *parse_state = _parse_state;
+	struct parse_events_error *error = parse_state->error;
 	struct list_head *list, *orig_terms, *terms;
 
 	if (parse_events_copy_term_list($2, &orig_terms))
 		YYABORT;
 
+	if (error)
+		error->idx = @1.first_column;
+
 	ALLOC_LIST(list);
 	if (parse_events_add_pmu(_parse_state, list, $1, $2, false, false)) {
 		struct perf_pmu *pmu = NULL;
@@ -496,6 +502,17 @@ opt_event_config:
 	$$ = NULL;
 }
 
+opt_pmu_config:
+'/' event_config '/'
+{
+	$$ = $2;
+}
+|
+'/' '/'
+{
+	$$ = NULL;
+}
+
 start_terms: event_config
 {
 	struct parse_events_state *parse_state = _parse_state;
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index d2fb597c9a8c..7e49baad304d 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -234,6 +234,74 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias,
 	return 0;
 }
 
+static void perf_pmu_assign_str(char *name, const char *field, char **old_str,
+				char **new_str)
+{
+	if (!*old_str)
+		goto set_new;
+
+	if (*new_str) {	/* Have new string, check with old */
+		if (strcasecmp(*old_str, *new_str))
+			pr_debug("alias %s differs in field '%s'\n",
+				 name, field);
+		zfree(old_str);
+	} else		/* Nothing new --> keep old string */
+		return;
+set_new:
+	*old_str = *new_str;
+	*new_str = NULL;
+}
+
+static void perf_pmu_update_alias(struct perf_pmu_alias *old,
+				  struct perf_pmu_alias *newalias)
+{
+	perf_pmu_assign_str(old->name, "desc", &old->desc, &newalias->desc);
+	perf_pmu_assign_str(old->name, "long_desc", &old->long_desc,
+			    &newalias->long_desc);
+	perf_pmu_assign_str(old->name, "topic", &old->topic, &newalias->topic);
+	perf_pmu_assign_str(old->name, "metric_expr", &old->metric_expr,
+			    &newalias->metric_expr);
+	perf_pmu_assign_str(old->name, "metric_name", &old->metric_name,
+			    &newalias->metric_name);
+	perf_pmu_assign_str(old->name, "value", &old->str, &newalias->str);
+	old->scale = newalias->scale;
+	old->per_pkg = newalias->per_pkg;
+	old->snapshot = newalias->snapshot;
+	memcpy(old->unit, newalias->unit, sizeof(old->unit));
+}
+
+/* Delete an alias entry. */
+static void perf_pmu_free_alias(struct perf_pmu_alias *newalias)
+{
+	zfree(&newalias->name);
+	zfree(&newalias->desc);
+	zfree(&newalias->long_desc);
+	zfree(&newalias->topic);
+	zfree(&newalias->str);
+	zfree(&newalias->metric_expr);
+	zfree(&newalias->metric_name);
+	parse_events_terms__purge(&newalias->terms);
+	free(newalias);
+}
+
+/* Merge an alias, search in alias list. If this name is already
+ * present merge both of them to combine all information.
+ */
+static bool perf_pmu_merge_alias(struct perf_pmu_alias *newalias,
+				 struct list_head *alist)
+{
+	struct perf_pmu_alias *a;
+
+	list_for_each_entry(a, alist, list) {
+		if (!strcasecmp(newalias->name, a->name)) {
+			perf_pmu_update_alias(a, newalias);
+			perf_pmu_free_alias(newalias);
+			return true;
+		}
+	}
+	return false;
+}
+
 static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
 				 char *desc, char *val,
 				 char *long_desc, char *topic,
@@ -241,9 +309,11 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
 				 char *metric_expr,
 				 char *metric_name)
 {
+	struct parse_events_term *term;
 	struct perf_pmu_alias *alias;
 	int ret;
 	int num;
+	char newval[256];
 
 	alias = malloc(sizeof(*alias));
 	if (!alias)
@@ -262,6 +332,27 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
 		return ret;
 	}
 
+	/* Scan event and remove leading zeroes, spaces, newlines, some
+	 * platforms have terms specified as
+	 * event=0x0091 (read from files ../<PMU>/events/<FILE>
+	 * and terms specified as event=0x91 (read from JSON files).
+	 *
+	 * Rebuild string to make alias->str member comparable.
+	 */
+	memset(newval, 0, sizeof(newval));
+	ret = 0;
+	list_for_each_entry(term, &alias->terms, list) {
+		if (ret)
+			ret += scnprintf(newval + ret, sizeof(newval) - ret,
+					 ",");
+		if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM)
+			ret += scnprintf(newval + ret, sizeof(newval) - ret,
+					 "%s=%#x", term->config, term->val.num);
+		else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR)
+			ret += scnprintf(newval + ret, sizeof(newval) - ret,
+					 "%s=%s", term->config, term->val.str);
+	}
+
 	alias->name = strdup(name);
 	if (dir) {
 		/*
@@ -285,9 +376,10 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
 		snprintf(alias->unit, sizeof(alias->unit), "%s", unit);
 	}
 	alias->per_pkg = perpkg && sscanf(perpkg, "%d", &num) == 1 && num == 1;
-	alias->str = strdup(val);
+	alias->str = strdup(newval);
 
-	list_add_tail(&alias->list, list);
+	if (!perf_pmu_merge_alias(alias, list))
+		list_add_tail(&alias->list, list);
 
 	return 0;
 }
@@ -303,6 +395,9 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI
 
 	buf[ret] = 0;
 
+	/* Remove trailing newline from sysfs file */
+	rtrim(buf);
+
 	return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL, NULL,
 				     NULL, NULL, NULL);
 }
@@ -557,12 +652,6 @@ static int is_arm_pmu_core(const char *name)
 	if (stat(path, &st) == 0)
 		return 1;
 
-	/* Look for cpu sysfs (specific to s390) */
-	scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s",
-		  sysfs, name);
-	if (stat(path, &st) == 0 && !strncmp(name, "cpum_", 5))
-		return 1;
-
 	return 0;
 }
 
@@ -684,7 +773,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
 
 		if (!is_arm_pmu_core(name)) {
 			pname = pe->pmu ? pe->pmu : "cpu";
-			if (strncmp(pname, name, strlen(pname)))
+			if (strcmp(pname, name))
 				continue;
 		}
 
@@ -841,13 +930,14 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v,
 
 static __u64 pmu_format_max_value(const unsigned long *format)
 {
-	__u64 w = 0;
-	int fbit;
+	int w;
 
-	for_each_set_bit(fbit, format, PERF_PMU_FORMAT_BITS)
-		w |= (1ULL << fbit);
-
-	return w;
+	w = bitmap_weight(format, PERF_PMU_FORMAT_BITS);
+	if (!w)
+		return 0;
+	if (w < 64)
+		return (1ULL << w) - 1;
+	return -1;
 }
 
 /*
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index e1dbc9821617..e86f8be89157 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -111,17 +111,6 @@ void exit_probe_symbol_maps(void)
 	symbol__exit();
 }
 
-static struct symbol *__find_kernel_function_by_name(const char *name,
-						     struct map **mapp)
-{
-	return machine__find_kernel_function_by_name(host_machine, name, mapp);
-}
-
-static struct symbol *__find_kernel_function(u64 addr, struct map **mapp)
-{
-	return machine__find_kernel_function(host_machine, addr, mapp);
-}
-
 static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void)
 {
 	/* kmap->ref_reloc_sym should be set if host_machine is initialized */
@@ -149,7 +138,7 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr,
 	if (reloc_sym && strcmp(name, reloc_sym->name) == 0)
 		*addr = (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr;
 	else {
-		sym = __find_kernel_function_by_name(name, &map);
+		sym = machine__find_kernel_symbol_by_name(host_machine, name, &map);
 		if (!sym)
 			return -ENOENT;
 		*addr = map->unmap_ip(map, sym->start) -
@@ -161,8 +150,7 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr,
 
 static struct map *kernel_get_module_map(const char *module)
 {
-	struct map_groups *grp = &host_machine->kmaps;
-	struct maps *maps = &grp->maps[MAP__FUNCTION];
+	struct maps *maps = machine__kernel_maps(host_machine);
 	struct map *pos;
 
 	/* A file path -- this is an offline module */
@@ -177,8 +165,7 @@ static struct map *kernel_get_module_map(const char *module)
 		if (strncmp(pos->dso->short_name + 1, module,
 			    pos->dso->short_name_len - 2) == 0 &&
 		    module[pos->dso->short_name_len - 2] == '\0') {
-			map__get(pos);
-			return pos;
+			return map__get(pos);
 		}
 	}
 	return NULL;
@@ -341,7 +328,7 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso)
 		char module_name[128];
 
 		snprintf(module_name, sizeof(module_name), "[%s]", module);
-		map = map_groups__find_by_name(&host_machine->kmaps, MAP__FUNCTION, module_name);
+		map = map_groups__find_by_name(&host_machine->kmaps, module_name);
 		if (map) {
 			dso = map->dso;
 			goto found;
@@ -1832,6 +1819,12 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev)
 			tp->offset = strtoul(fmt2_str, NULL, 10);
 	}
 
+	if (tev->uprobes) {
+		fmt2_str = strchr(p, '(');
+		if (fmt2_str)
+			tp->ref_ctr_offset = strtoul(fmt2_str + 1, NULL, 0);
+	}
+
 	tev->nargs = argc - 2;
 	tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
 	if (tev->args == NULL) {
@@ -2025,6 +2018,22 @@ static int synthesize_probe_trace_arg(struct probe_trace_arg *arg,
 	return err;
 }
 
+static int
+synthesize_uprobe_trace_def(struct probe_trace_event *tev, struct strbuf *buf)
+{
+	struct probe_trace_point *tp = &tev->point;
+	int err;
+
+	err = strbuf_addf(buf, "%s:0x%lx", tp->module, tp->address);
+
+	if (err >= 0 && tp->ref_ctr_offset) {
+		if (!uprobe_ref_ctr_is_supported())
+			return -1;
+		err = strbuf_addf(buf, "(0x%lx)", tp->ref_ctr_offset);
+	}
+	return err >= 0 ? 0 : -1;
+}
+
 char *synthesize_probe_trace_command(struct probe_trace_event *tev)
 {
 	struct probe_trace_point *tp = &tev->point;
@@ -2054,15 +2063,17 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev)
 	}
 
 	/* Use the tp->address for uprobes */
-	if (tev->uprobes)
-		err = strbuf_addf(&buf, "%s:0x%lx", tp->module, tp->address);
-	else if (!strncmp(tp->symbol, "0x", 2))
+	if (tev->uprobes) {
+		err = synthesize_uprobe_trace_def(tev, &buf);
+	} else if (!strncmp(tp->symbol, "0x", 2)) {
 		/* Absolute address. See try_to_find_absolute_address() */
 		err = strbuf_addf(&buf, "%s%s0x%lx", tp->module ?: "",
 				  tp->module ? ":" : "", tp->address);
-	else
+	} else {
 		err = strbuf_addf(&buf, "%s%s%s+%lu", tp->module ?: "",
 				tp->module ? ":" : "", tp->symbol, tp->offset);
+	}
+
 	if (err)
 		goto error;
 
@@ -2098,7 +2109,7 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp,
 		}
 		if (addr) {
 			addr += tp->offset;
-			sym = __find_kernel_function(addr, &map);
+			sym = machine__find_kernel_symbol(host_machine, addr, &map);
 		}
 	}
 
@@ -2646,6 +2657,13 @@ static void warn_uprobe_event_compat(struct probe_trace_event *tev)
 {
 	int i;
 	char *buf = synthesize_probe_trace_command(tev);
+	struct probe_trace_point *tp = &tev->point;
+
+	if (tp->ref_ctr_offset && !uprobe_ref_ctr_is_supported()) {
+		pr_warning("A semaphore is associated with %s:%s and "
+			   "seems your kernel doesn't support it.\n",
+			   tev->group, tev->event);
+	}
 
 	/* Old uprobe event doesn't support memory dereference */
 	if (!tev->uprobes || tev->nargs == 0 || !buf)
@@ -3504,19 +3522,18 @@ int show_available_funcs(const char *target, struct nsinfo *nsi,
 			       (target) ? : "kernel");
 		goto end;
 	}
-	if (!dso__sorted_by_name(map->dso, map->type))
-		dso__sort_by_name(map->dso, map->type);
+	if (!dso__sorted_by_name(map->dso))
+		dso__sort_by_name(map->dso);
 
 	/* Show all (filtered) symbols */
 	setup_pager();
 
-        for (nd = rb_first(&map->dso->symbol_names[map->type]); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(&map->dso->symbol_names); nd; nd = rb_next(nd)) {
 		struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
 
 		if (strfilter__compare(_filter, pos->sym.name))
 			printf("%s\n", pos->sym.name);
-        }
-
+	}
 end:
 	map__put(map);
 	exit_probe_symbol_maps();
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 45b14f020558..15a98c3a2a2f 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -27,6 +27,7 @@ struct probe_trace_point {
 	char		*symbol;	/* Base symbol */
 	char		*module;	/* Module name */
 	unsigned long	offset;		/* Offset from symbol */
+	unsigned long	ref_ctr_offset;	/* SDT reference counter offset */
 	unsigned long	address;	/* Actual address of the trace point */
 	bool		retprobe;	/* Return probe flag */
 };
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 4ae1123c6794..aac7817d9e14 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -84,8 +84,7 @@ int open_trace_file(const char *trace_file, bool readwrite)
 	char buf[PATH_MAX];
 	int ret;
 
-	ret = e_snprintf(buf, PATH_MAX, "%s/%s",
-			 tracing_path, trace_file);
+	ret = e_snprintf(buf, PATH_MAX, "%s/%s", tracing_path_mount(), trace_file);
 	if (ret >= 0) {
 		pr_debug("Opening %s write=%d\n", buf, readwrite);
 		if (readwrite && !probe_event_dry_run)
@@ -697,8 +696,16 @@ out_err:
 #ifdef HAVE_GELF_GETNOTE_SUPPORT
 static unsigned long long sdt_note__get_addr(struct sdt_note *note)
 {
-	return note->bit32 ? (unsigned long long)note->addr.a32[0]
-		 : (unsigned long long)note->addr.a64[0];
+	return note->bit32 ?
+		(unsigned long long)note->addr.a32[SDT_NOTE_IDX_LOC] :
+		(unsigned long long)note->addr.a64[SDT_NOTE_IDX_LOC];
+}
+
+static unsigned long long sdt_note__get_ref_ctr_offset(struct sdt_note *note)
+{
+	return note->bit32 ?
+		(unsigned long long)note->addr.a32[SDT_NOTE_IDX_REFCTR] :
+		(unsigned long long)note->addr.a64[SDT_NOTE_IDX_REFCTR];
 }
 
 static const char * const type_to_suffix[] = {
@@ -776,14 +783,21 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note,
 {
 	struct strbuf buf;
 	char *ret = NULL, **args;
-	int i, args_count;
+	int i, args_count, err;
+	unsigned long long ref_ctr_offset;
 
 	if (strbuf_init(&buf, 32) < 0)
 		return NULL;
 
-	if (strbuf_addf(&buf, "p:%s/%s %s:0x%llx",
-				sdtgrp, note->name, pathname,
-				sdt_note__get_addr(note)) < 0)
+	err = strbuf_addf(&buf, "p:%s/%s %s:0x%llx",
+			sdtgrp, note->name, pathname,
+			sdt_note__get_addr(note));
+
+	ref_ctr_offset = sdt_note__get_ref_ctr_offset(note);
+	if (ref_ctr_offset && err >= 0)
+		err = strbuf_addf(&buf, "(0x%llx)", ref_ctr_offset);
+
+	if (err < 0)
 		goto error;
 
 	if (!note->args)
@@ -999,6 +1013,7 @@ int probe_cache__show_all_caches(struct strfilter *filter)
 enum ftrace_readme {
 	FTRACE_README_PROBE_TYPE_X = 0,
 	FTRACE_README_KRETPROBE_OFFSET,
+	FTRACE_README_UPROBE_REF_CTR,
 	FTRACE_README_END,
 };
 
@@ -1010,6 +1025,7 @@ static struct {
 	[idx] = {.pattern = pat, .avail = false}
 	DEFINE_TYPE(FTRACE_README_PROBE_TYPE_X, "*type: * x8/16/32/64,*"),
 	DEFINE_TYPE(FTRACE_README_KRETPROBE_OFFSET, "*place (kretprobe): *"),
+	DEFINE_TYPE(FTRACE_README_UPROBE_REF_CTR, "*ref_ctr_offset*"),
 };
 
 static bool scan_ftrace_readme(enum ftrace_readme type)
@@ -1065,3 +1081,8 @@ bool kretprobe_offset_is_supported(void)
 {
 	return scan_ftrace_readme(FTRACE_README_KRETPROBE_OFFSET);
 }
+
+bool uprobe_ref_ctr_is_supported(void)
+{
+	return scan_ftrace_readme(FTRACE_README_UPROBE_REF_CTR);
+}
diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h
index 63f29b1d22c1..2a249182f2a6 100644
--- a/tools/perf/util/probe-file.h
+++ b/tools/perf/util/probe-file.h
@@ -69,6 +69,7 @@ struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache,
 int probe_cache__show_all_caches(struct strfilter *filter);
 bool probe_type_is_available(enum probe_type type);
 bool kretprobe_offset_is_supported(void);
+bool uprobe_ref_ctr_is_supported(void);
 #else	/* ! HAVE_LIBELF_SUPPORT */
 static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused, struct nsinfo *nsi __maybe_unused)
 {
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 863b61478edd..50150dfc0cdf 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -11,6 +11,7 @@
 #include "cpumap.h"
 #include "print_binary.h"
 #include "thread_map.h"
+#include "mmap.h"
 
 #if PY_MAJOR_VERSION < 3
 #define _PyUnicode_FromString(arg) \
@@ -339,36 +340,36 @@ static bool is_tracepoint(struct pyrf_event *pevent)
 }
 
 static PyObject*
-tracepoint_field(struct pyrf_event *pe, struct format_field *field)
+tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field)
 {
-	struct pevent *pevent = field->event->pevent;
+	struct tep_handle *pevent = field->event->pevent;
 	void *data = pe->sample.raw_data;
 	PyObject *ret = NULL;
 	unsigned long long val;
 	unsigned int offset, len;
 
-	if (field->flags & FIELD_IS_ARRAY) {
+	if (field->flags & TEP_FIELD_IS_ARRAY) {
 		offset = field->offset;
 		len    = field->size;
-		if (field->flags & FIELD_IS_DYNAMIC) {
-			val     = pevent_read_number(pevent, data + offset, len);
+		if (field->flags & TEP_FIELD_IS_DYNAMIC) {
+			val     = tep_read_number(pevent, data + offset, len);
 			offset  = val;
 			len     = offset >> 16;
 			offset &= 0xffff;
 		}
-		if (field->flags & FIELD_IS_STRING &&
+		if (field->flags & TEP_FIELD_IS_STRING &&
 		    is_printable_array(data + offset, len)) {
 			ret = _PyUnicode_FromString((char *)data + offset);
 		} else {
 			ret = PyByteArray_FromStringAndSize((const char *) data + offset, len);
-			field->flags &= ~FIELD_IS_STRING;
+			field->flags &= ~TEP_FIELD_IS_STRING;
 		}
 	} else {
-		val = pevent_read_number(pevent, data + field->offset,
-					 field->size);
-		if (field->flags & FIELD_IS_POINTER)
+		val = tep_read_number(pevent, data + field->offset,
+				      field->size);
+		if (field->flags & TEP_FIELD_IS_POINTER)
 			ret = PyLong_FromUnsignedLong((unsigned long) val);
-		else if (field->flags & FIELD_IS_SIGNED)
+		else if (field->flags & TEP_FIELD_IS_SIGNED)
 			ret = PyLong_FromLong((long) val);
 		else
 			ret = PyLong_FromUnsignedLong((unsigned long) val);
@@ -382,10 +383,10 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name)
 {
 	const char *str = _PyUnicode_AsString(PyObject_Str(attr_name));
 	struct perf_evsel *evsel = pevent->evsel;
-	struct format_field *field;
+	struct tep_format_field *field;
 
 	if (!evsel->tp_format) {
-		struct event_format *tp_format;
+		struct tep_event_format *tp_format;
 
 		tp_format = trace_event__tp_format_id(evsel->attr.config);
 		if (!tp_format)
@@ -394,7 +395,7 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name)
 		evsel->tp_format = tp_format;
 	}
 
-	field = pevent_find_any_field(evsel->tp_format, str);
+	field = tep_find_any_field(evsel->tp_format, str);
 	if (!field)
 		return NULL;
 
@@ -976,6 +977,20 @@ static PyObject *pyrf_evlist__add(struct pyrf_evlist *pevlist,
 	return Py_BuildValue("i", evlist->nr_entries);
 }
 
+static struct perf_mmap *get_md(struct perf_evlist *evlist, int cpu)
+{
+	int i;
+
+	for (i = 0; i < evlist->nr_mmaps; i++) {
+		struct perf_mmap *md = &evlist->mmap[i];
+
+		if (md->cpu == cpu)
+			return md;
+	}
+
+	return NULL;
+}
+
 static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
 					  PyObject *args, PyObject *kwargs)
 {
@@ -990,7 +1005,10 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
 					 &cpu, &sample_id_all))
 		return NULL;
 
-	md = &evlist->mmap[cpu];
+	md = get_md(evlist, cpu);
+	if (!md)
+		return NULL;
+
 	if (perf_mmap__read_init(md) < 0)
 		goto end;
 
@@ -1222,7 +1240,7 @@ static struct {
 static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel,
 				  PyObject *args, PyObject *kwargs)
 {
-	struct event_format *tp_format;
+	struct tep_event_format *tp_format;
 	static char *kwlist[] = { "sys", "name", NULL };
 	char *sys  = NULL;
 	char *name = NULL;
diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c
deleted file mode 100644
index 22eaa201aa27..000000000000
--- a/tools/perf/util/quote.c
+++ /dev/null
@@ -1,62 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <errno.h>
-#include <stdlib.h>
-#include "strbuf.h"
-#include "quote.h"
-#include "util.h"
-
-/* Help to copy the thing properly quoted for the shell safety.
- * any single quote is replaced with '\'', any exclamation point
- * is replaced with '\!', and the whole thing is enclosed in a
- *
- * E.g.
- *  original     sq_quote     result
- *  name     ==> name      ==> 'name'
- *  a b      ==> a b       ==> 'a b'
- *  a'b      ==> a'\''b    ==> 'a'\''b'
- *  a!b      ==> a'\!'b    ==> 'a'\!'b'
- */
-static inline int need_bs_quote(char c)
-{
-	return (c == '\'' || c == '!');
-}
-
-static int sq_quote_buf(struct strbuf *dst, const char *src)
-{
-	char *to_free = NULL;
-	int ret;
-
-	if (dst->buf == src)
-		to_free = strbuf_detach(dst, NULL);
-
-	ret = strbuf_addch(dst, '\'');
-	while (!ret && *src) {
-		size_t len = strcspn(src, "'!");
-		ret = strbuf_add(dst, src, len);
-		src += len;
-		while (!ret && need_bs_quote(*src))
-			ret = strbuf_addf(dst, "'\\%c\'", *src++);
-	}
-	if (!ret)
-		ret = strbuf_addch(dst, '\'');
-	free(to_free);
-
-	return ret;
-}
-
-int sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen)
-{
-	int i, ret;
-
-	/* Copy into destination buffer. */
-	ret = strbuf_grow(dst, 255);
-	for (i = 0; !ret && argv[i]; ++i) {
-		ret = strbuf_addch(dst, ' ');
-		if (ret)
-			break;
-		ret = sq_quote_buf(dst, argv[i]);
-		if (maxlen && dst->len > maxlen)
-			return -ENOSPC;
-	}
-	return ret;
-}
diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h
deleted file mode 100644
index 274bf26d3511..000000000000
--- a/tools/perf/util/quote.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __PERF_QUOTE_H
-#define __PERF_QUOTE_H
-
-#include <stddef.h>
-
-/* Help to copy the thing properly quoted for the shell safety.
- * any single quote is replaced with '\'', any exclamation point
- * is replaced with '\!', and the whole thing is enclosed in a
- * single quote pair.
- *
- * For example, if you are passing the result to system() as an
- * argument:
- *
- * sprintf(cmd, "foobar %s %s", sq_quote(arg0), sq_quote(arg1))
- *
- * would be appropriate.  If the system() is going to call ssh to
- * run the command on the other side:
- *
- * sprintf(cmd, "git-diff-tree %s %s", sq_quote(arg0), sq_quote(arg1));
- * sprintf(rcmd, "ssh %s %s", sq_util/quote.host), sq_quote(cmd));
- *
- * Note that the above examples leak memory!  Remember to free result from
- * sq_quote() in a real application.
- */
-
-struct strbuf;
-
-int sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
-
-#endif /* __PERF_QUOTE_H */
diff --git a/tools/perf/util/s390-cpumsf-kernel.h b/tools/perf/util/s390-cpumsf-kernel.h
new file mode 100644
index 000000000000..de8c7ad0eca8
--- /dev/null
+++ b/tools/perf/util/s390-cpumsf-kernel.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Auxtrace support for s390 CPU measurement sampling facility
+ *
+ *  Copyright IBM Corp. 2018
+ *  Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
+ *	       Thomas Richter <tmricht@linux.ibm.com>
+ */
+#ifndef S390_CPUMSF_KERNEL_H
+#define S390_CPUMSF_KERNEL_H
+
+#define	S390_CPUMSF_PAGESZ	4096	/* Size of sample block units */
+#define	S390_CPUMSF_DIAG_DEF_FIRST	0x8001	/* Diagnostic entry lowest id */
+
+struct hws_basic_entry {
+	unsigned int def:16;	    /* 0-15  Data Entry Format		 */
+	unsigned int R:4;	    /* 16-19 reserved			 */
+	unsigned int U:4;	    /* 20-23 Number of unique instruct.  */
+	unsigned int z:2;	    /* zeros				 */
+	unsigned int T:1;	    /* 26 PSW DAT mode			 */
+	unsigned int W:1;	    /* 27 PSW wait state		 */
+	unsigned int P:1;	    /* 28 PSW Problem state		 */
+	unsigned int AS:2;	    /* 29-30 PSW address-space control	 */
+	unsigned int I:1;	    /* 31 entry valid or invalid	 */
+	unsigned int CL:2;	    /* 32-33 Configuration Level	 */
+	unsigned int:14;
+	unsigned int prim_asn:16;   /* primary ASN			 */
+	unsigned long long ia;	    /* Instruction Address		 */
+	unsigned long long gpp;     /* Guest Program Parameter		 */
+	unsigned long long hpp;     /* Host Program Parameter		 */
+};
+
+struct hws_diag_entry {
+	unsigned int def:16;	    /* 0-15  Data Entry Format		 */
+	unsigned int R:15;	    /* 16-19 and 20-30 reserved		 */
+	unsigned int I:1;	    /* 31 entry valid or invalid	 */
+	u8	     data[];	    /* Machine-dependent sample data	 */
+};
+
+struct hws_combined_entry {
+	struct hws_basic_entry	basic;	/* Basic-sampling data entry */
+	struct hws_diag_entry	diag;	/* Diagnostic-sampling data entry */
+};
+
+struct hws_trailer_entry {
+	union {
+		struct {
+			unsigned int f:1;	/* 0 - Block Full Indicator   */
+			unsigned int a:1;	/* 1 - Alert request control  */
+			unsigned int t:1;	/* 2 - Timestamp format	      */
+			unsigned int:29;	/* 3 - 31: Reserved	      */
+			unsigned int bsdes:16;	/* 32-47: size of basic SDE   */
+			unsigned int dsdes:16;	/* 48-63: size of diagnostic SDE */
+		};
+		unsigned long long flags;	/* 0 - 64: All indicators     */
+	};
+	unsigned long long overflow;	 /* 64 - sample Overflow count	      */
+	unsigned char timestamp[16];	 /* 16 - 31 timestamp		      */
+	unsigned long long reserved1;	 /* 32 -Reserved		      */
+	unsigned long long reserved2;	 /*				      */
+	union {				 /* 48 - reserved for programming use */
+		struct {
+			unsigned long long clock_base:1; /* in progusage2 */
+			unsigned long long progusage1:63;
+			unsigned long long progusage2;
+		};
+		unsigned long long progusage[2];
+	};
+};
+
+#endif
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
new file mode 100644
index 000000000000..a2eeebbfb25f
--- /dev/null
+++ b/tools/perf/util/s390-cpumsf.c
@@ -0,0 +1,1033 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2018
+ * Auxtrace support for s390 CPU-Measurement Sampling Facility
+ *
+ * Author(s):  Thomas Richter <tmricht@linux.ibm.com>
+ *
+ * Auxiliary traces are collected during 'perf record' using rbd000 event.
+ * Several PERF_RECORD_XXX are generated during recording:
+ *
+ * PERF_RECORD_AUX:
+ *	Records that new data landed in the AUX buffer part.
+ * PERF_RECORD_AUXTRACE:
+ *	Defines auxtrace data. Followed by the actual data. The contents of
+ *	the auxtrace data is dependent on the event and the CPU.
+ *	This record is generated by perf record command. For details
+ *	see Documentation/perf.data-file-format.txt.
+ * PERF_RECORD_AUXTRACE_INFO:
+ *	Defines a table of contains for PERF_RECORD_AUXTRACE records. This
+ *	record is generated during 'perf record' command. Each record contains up
+ *	to 256 entries describing offset and size of the AUXTRACE data in the
+ *	perf.data file.
+ * PERF_RECORD_AUXTRACE_ERROR:
+ *	Indicates an error during AUXTRACE collection such as buffer overflow.
+ * PERF_RECORD_FINISHED_ROUND:
+ *	Perf events are not necessarily in time stamp order, as they can be
+ *	collected in parallel on different CPUs. If the events should be
+ *	processed in time order they need to be sorted first.
+ *	Perf report guarantees that there is no reordering over a
+ *	PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a
+ *	time stamp lower than this record are processed (and displayed) before
+ *	the succeeding perf record are processed.
+ *
+ * These records are evaluated during perf report command.
+ *
+ * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for
+ * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info()
+ * below.
+ * Auxiliary trace data is collected per CPU. To merge the data into the report
+ * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace
+ * data is in ascending order.
+ *
+ * Each queue has a double linked list of auxtrace_buffers. This list contains
+ * the offset and size of a CPU's auxtrace data. During auxtrace processing
+ * the data portion is mmap()'ed.
+ *
+ * To sort the queues in chronological order, all queue access is controlled
+ * by the auxtrace_heap. This is basicly a stack, each stack element has two
+ * entries, the queue number and a time stamp. However the stack is sorted by
+ * the time stamps. The highest time stamp is at the bottom the lowest
+ * (nearest) time stamp is at the top. That sort order is maintained at all
+ * times!
+ *
+ * After the auxtrace infrastructure has been setup, the auxtrace queues are
+ * filled with data (offset/size pairs) and the auxtrace_heap is populated.
+ *
+ * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues.
+ * Each record is handled by s390_cpumsf_process_event(). The time stamp of
+ * the perf record is compared with the time stamp located on the auxtrace_heap
+ * top element. If that time stamp is lower than the time stamp from the
+ * record sample, the auxtrace queues will be processed. As auxtrace queues
+ * control many auxtrace_buffers and each buffer can be quite large, the
+ * auxtrace buffer might be processed only partially. In this case the
+ * position in the auxtrace_buffer of that queue is remembered and the time
+ * stamp of the last processed entry of the auxtrace_buffer replaces the
+ * current auxtrace_heap top.
+ *
+ * 3. Auxtrace_queues might run of out data and are feeded by the
+ * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event().
+ *
+ * Event Generation
+ * Each sampling-data entry in the auxilary trace data generates a perf sample.
+ * This sample is filled
+ * with data from the auxtrace such as PID/TID, instruction address, CPU state,
+ * etc. This sample is processed with perf_session__deliver_synth_event() to
+ * be included into the GUI.
+ *
+ * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining
+ * auxiliary traces entries until the time stamp of this record is reached
+ * auxtrace_heap top. This is triggered by ordered_event->deliver().
+ *
+ *
+ * Perf event processing.
+ * Event processing of PERF_RECORD_XXX entries relies on time stamp entries.
+ * This is the function call sequence:
+ *
+ * __cmd_report()
+ * |
+ * perf_session__process_events()
+ * |
+ * __perf_session__process_events()
+ * |
+ * perf_session__process_event()
+ * |  This functions splits the PERF_RECORD_XXX records.
+ * |  - Those generated by perf record command (type number equal or higher
+ * |    than PERF_RECORD_USER_TYPE_START) are handled by
+ * |    perf_session__process_user_event(see below)
+ * |  - Those generated by the kernel are handled by
+ * |    perf_evlist__parse_sample_timestamp()
+ * |
+ * perf_evlist__parse_sample_timestamp()
+ * |  Extract time stamp from sample data.
+ * |
+ * perf_session__queue_event()
+ * |  If timestamp is positive the sample is entered into an ordered_event
+ * |  list, sort order is the timestamp. The event processing is deferred until
+ * |  later (see perf_session__process_user_event()).
+ * |  Other timestamps (0 or -1) are handled immediately by
+ * |  perf_session__deliver_event(). These are events generated at start up
+ * |  of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP*
+ * |  records. They are needed to create a list of running processes and its
+ * |  memory mappings and layout. They are needed at the beginning to enable
+ * |  command perf report to create process trees and memory mappings.
+ * |
+ * perf_session__deliver_event()
+ * |  Delivers a PERF_RECORD_XXX entry for handling.
+ * |
+ * auxtrace__process_event()
+ * |  The timestamp of the PERF_RECORD_XXX entry is taken to correlate with
+ * |  time stamps from the auxiliary trace buffers. This enables
+ * |  synchronization between auxiliary trace data and the events on the
+ * |  perf.data file.
+ * |
+ * machine__deliver_event()
+ * |  Handles the PERF_RECORD_XXX event. This depends on the record type.
+ *    It might update the process tree, update a process memory map or enter
+ *    a sample with IP and call back chain data into GUI data pool.
+ *
+ *
+ * Deferred processing determined by perf_session__process_user_event() is
+ * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These
+ * are generated during command perf record.
+ * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all
+ * PERF_RECORD_XXX entries stored in the ordered_event list. This list was
+ * built up while reading the perf.data file.
+ * Each event is now processed by calling perf_session__deliver_event().
+ * This enables time synchronization between the data in the perf.data file and
+ * the data in the auxiliary trace buffers.
+ */
+
+#include <endian.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "cpumap.h"
+#include "color.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "machine.h"
+#include "session.h"
+#include "util.h"
+#include "thread.h"
+#include "debug.h"
+#include "auxtrace.h"
+#include "s390-cpumsf.h"
+#include "s390-cpumsf-kernel.h"
+#include "config.h"
+
+struct s390_cpumsf {
+	struct auxtrace		auxtrace;
+	struct auxtrace_queues	queues;
+	struct auxtrace_heap	heap;
+	struct perf_session	*session;
+	struct machine		*machine;
+	u32			auxtrace_type;
+	u32			pmu_type;
+	u16			machine_type;
+	bool			data_queued;
+	bool			use_logfile;
+	char			*logdir;
+};
+
+struct s390_cpumsf_queue {
+	struct s390_cpumsf	*sf;
+	unsigned int		queue_nr;
+	struct auxtrace_buffer	*buffer;
+	int			cpu;
+	FILE			*logfile;
+};
+
+/* Display s390 CPU measurement facility basic-sampling data entry */
+static bool s390_cpumsf_basic_show(const char *color, size_t pos,
+				   struct hws_basic_entry *basic)
+{
+	if (basic->def != 1) {
+		pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos);
+		return false;
+	}
+	color_fprintf(stdout, color, "    [%#08zx] Basic   Def:%04x Inst:%#04x"
+		      " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n"
+		      "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n",
+		      pos, basic->def, basic->U,
+		      basic->T ? 'T' : ' ',
+		      basic->W ? 'W' : ' ',
+		      basic->P ? 'P' : ' ',
+		      basic->I ? 'I' : ' ',
+		      basic->AS, basic->prim_asn, basic->ia, basic->CL,
+		      basic->hpp, basic->gpp);
+	return true;
+}
+
+/* Display s390 CPU measurement facility diagnostic-sampling data entry */
+static bool s390_cpumsf_diag_show(const char *color, size_t pos,
+				  struct hws_diag_entry *diag)
+{
+	if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) {
+		pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos);
+		return false;
+	}
+	color_fprintf(stdout, color, "    [%#08zx] Diag    Def:%04x %c\n",
+		      pos, diag->def, diag->I ? 'I' : ' ');
+	return true;
+}
+
+/* Return TOD timestamp contained in an trailer entry */
+static unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
+{
+	/* te->t set: TOD in STCKE format, bytes 8-15
+	 * to->t not set: TOD in STCK format, bytes 0-7
+	 */
+	unsigned long long ts;
+
+	memcpy(&ts, &te->timestamp[te->t], sizeof(ts));
+	return ts;
+}
+
+/* Display s390 CPU measurement facility trailer entry */
+static bool s390_cpumsf_trailer_show(const char *color, size_t pos,
+				     struct hws_trailer_entry *te)
+{
+	if (te->bsdes != sizeof(struct hws_basic_entry)) {
+		pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos);
+		return false;
+	}
+	color_fprintf(stdout, color, "    [%#08zx] Trailer %c%c%c bsdes:%d"
+		      " dsdes:%d Overflow:%lld Time:%#llx\n"
+		      "\t\tC:%d TOD:%#lx 1:%#llx 2:%#llx\n",
+		      pos,
+		      te->f ? 'F' : ' ',
+		      te->a ? 'A' : ' ',
+		      te->t ? 'T' : ' ',
+		      te->bsdes, te->dsdes, te->overflow,
+		      trailer_timestamp(te), te->clock_base, te->progusage2,
+		      te->progusage[0], te->progusage[1]);
+	return true;
+}
+
+/* Test a sample data block. It must be 4KB or a multiple thereof in size and
+ * 4KB page aligned. Each sample data page has a trailer entry at the
+ * end which contains the sample entry data sizes.
+ *
+ * Return true if the sample data block passes the checks and set the
+ * basic set entry size and diagnostic set entry size.
+ *
+ * Return false on failure.
+ *
+ * Note: Old hardware does not set the basic or diagnostic entry sizes
+ * in the trailer entry. Use the type number instead.
+ */
+static bool s390_cpumsf_validate(int machine_type,
+				 unsigned char *buf, size_t len,
+				 unsigned short *bsdes,
+				 unsigned short *dsdes)
+{
+	struct hws_basic_entry *basic = (struct hws_basic_entry *)buf;
+	struct hws_trailer_entry *te;
+
+	*dsdes = *bsdes = 0;
+	if (len & (S390_CPUMSF_PAGESZ - 1))	/* Illegal size */
+		return false;
+	if (basic->def != 1)		/* No basic set entry, must be first */
+		return false;
+	/* Check for trailer entry at end of SDB */
+	te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
+					      - sizeof(*te));
+	*bsdes = te->bsdes;
+	*dsdes = te->dsdes;
+	if (!te->bsdes && !te->dsdes) {
+		/* Very old hardware, use CPUID */
+		switch (machine_type) {
+		case 2097:
+		case 2098:
+			*dsdes = 64;
+			*bsdes = 32;
+			break;
+		case 2817:
+		case 2818:
+			*dsdes = 74;
+			*bsdes = 32;
+			break;
+		case 2827:
+		case 2828:
+			*dsdes = 85;
+			*bsdes = 32;
+			break;
+		default:
+			/* Illegal trailer entry */
+			return false;
+		}
+	}
+	return true;
+}
+
+/* Return true if there is room for another entry */
+static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos)
+{
+	size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry);
+
+	if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz)
+		return false;
+	return true;
+}
+
+/* Dump an auxiliary buffer. These buffers are multiple of
+ * 4KB SDB pages.
+ */
+static void s390_cpumsf_dump(struct s390_cpumsf *sf,
+			     unsigned char *buf, size_t len)
+{
+	const char *color = PERF_COLOR_BLUE;
+	struct hws_basic_entry *basic;
+	struct hws_diag_entry *diag;
+	unsigned short bsdes, dsdes;
+	size_t pos = 0;
+
+	color_fprintf(stdout, color,
+		      ". ... s390 AUX data: size %zu bytes\n",
+		      len);
+
+	if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
+				  &dsdes)) {
+		pr_err("Invalid AUX trace data block size:%zu"
+		       " (type:%d bsdes:%hd dsdes:%hd)\n",
+		       len, sf->machine_type, bsdes, dsdes);
+		return;
+	}
+
+	/* s390 kernel always returns 4KB blocks fully occupied,
+	 * no partially filled SDBs.
+	 */
+	while (pos < len) {
+		/* Handle Basic entry */
+		basic = (struct hws_basic_entry *)(buf + pos);
+		if (s390_cpumsf_basic_show(color, pos, basic))
+			pos += bsdes;
+		else
+			return;
+
+		/* Handle Diagnostic entry */
+		diag = (struct hws_diag_entry *)(buf + pos);
+		if (s390_cpumsf_diag_show(color, pos, diag))
+			pos += dsdes;
+		else
+			return;
+
+		/* Check for trailer entry */
+		if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
+			/* Show trailer entry */
+			struct hws_trailer_entry te;
+
+			pos = (pos + S390_CPUMSF_PAGESZ)
+			       & ~(S390_CPUMSF_PAGESZ - 1);
+			pos -= sizeof(te);
+			memcpy(&te, buf + pos, sizeof(te));
+			/* Set descriptor sizes in case of old hardware
+			 * where these values are not set.
+			 */
+			te.bsdes = bsdes;
+			te.dsdes = dsdes;
+			if (s390_cpumsf_trailer_show(color, pos, &te))
+				pos += sizeof(te);
+			else
+				return;
+		}
+	}
+}
+
+static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf,
+				   size_t len)
+{
+	printf(".\n");
+	s390_cpumsf_dump(sf, buf, len);
+}
+
+#define	S390_LPP_PID_MASK	0xffffffff
+
+static bool s390_cpumsf_make_event(size_t pos,
+				   struct hws_basic_entry *basic,
+				   struct s390_cpumsf_queue *sfq)
+{
+	struct perf_sample sample = {
+				.ip = basic->ia,
+				.pid = basic->hpp & S390_LPP_PID_MASK,
+				.tid = basic->hpp & S390_LPP_PID_MASK,
+				.cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN,
+				.cpu = sfq->cpu,
+				.period = 1
+			    };
+	union perf_event event;
+
+	memset(&event, 0, sizeof(event));
+	if (basic->CL == 1)	/* Native LPAR mode */
+		sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
+					  : PERF_RECORD_MISC_KERNEL;
+	else if (basic->CL == 2)	/* Guest kernel/user space */
+		sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
+					  : PERF_RECORD_MISC_GUEST_KERNEL;
+	else if (basic->gpp || basic->prim_asn != 0xffff)
+		/* Use heuristics on old hardware */
+		sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
+					  : PERF_RECORD_MISC_GUEST_KERNEL;
+	else
+		sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
+					  : PERF_RECORD_MISC_KERNEL;
+
+	event.sample.header.type = PERF_RECORD_SAMPLE;
+	event.sample.header.misc = sample.cpumode;
+	event.sample.header.size = sizeof(struct perf_event_header);
+
+	pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n",
+		 __func__, pos, sample.ip, basic->P, basic->CL, sample.pid,
+		 sample.tid, sample.cpumode, sample.cpu);
+	if (perf_session__deliver_synth_event(sfq->sf->session, &event,
+					      &sample)) {
+		pr_err("s390 Auxiliary Trace: failed to deliver event\n");
+		return false;
+	}
+	return true;
+}
+
+static unsigned long long get_trailer_time(const unsigned char *buf)
+{
+	struct hws_trailer_entry *te;
+	unsigned long long aux_time;
+
+	te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
+					      - sizeof(*te));
+
+	if (!te->clock_base)	/* TOD_CLOCK_BASE value missing */
+		return 0;
+
+	/* Correct calculation to convert time stamp in trailer entry to
+	 * nano seconds (taken from arch/s390 function tod_to_ns()).
+	 * TOD_CLOCK_BASE is stored in trailer entry member progusage2.
+	 */
+	aux_time = trailer_timestamp(te) - te->progusage2;
+	aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9);
+	return aux_time;
+}
+
+/* Process the data samples of a single queue. The first parameter is a
+ * pointer to the queue, the second parameter is the time stamp. This
+ * is the time stamp:
+ * - of the event that triggered this processing.
+ * - or the time stamp when the last proccesing of this queue stopped.
+ *   In this case it stopped at a 4KB page boundary and record the
+ *   position on where to continue processing on the next invocation
+ *   (see buffer->use_data and buffer->use_size).
+ *
+ * When this function returns the second parameter is updated to
+ * reflect the time stamp of the last processed auxiliary data entry
+ * (taken from the trailer entry of that page). The caller uses this
+ * returned time stamp to record the last processed entry in this
+ * queue.
+ *
+ * The function returns:
+ * 0:  Processing successful. The second parameter returns the
+ *     time stamp from the trailer entry until which position
+ *     processing took place. Subsequent calls resume from this
+ *     position.
+ * <0: An error occurred during processing. The second parameter
+ *     returns the maximum time stamp.
+ * >0: Done on this queue. The second parameter returns the
+ *     maximum time stamp.
+ */
+static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts)
+{
+	struct s390_cpumsf *sf = sfq->sf;
+	unsigned char *buf = sfq->buffer->use_data;
+	size_t len = sfq->buffer->use_size;
+	struct hws_basic_entry *basic;
+	unsigned short bsdes, dsdes;
+	size_t pos = 0;
+	int err = 1;
+	u64 aux_ts;
+
+	if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
+				  &dsdes)) {
+		*ts = ~0ULL;
+		return -1;
+	}
+
+	/* Get trailer entry time stamp and check if entries in
+	 * this auxiliary page are ready for processing. If the
+	 * time stamp of the first entry is too high, whole buffer
+	 * can be skipped. In this case return time stamp.
+	 */
+	aux_ts = get_trailer_time(buf);
+	if (!aux_ts) {
+		pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n",
+		       sfq->buffer->data_offset);
+		aux_ts = ~0ULL;
+		goto out;
+	}
+	if (aux_ts > *ts) {
+		*ts = aux_ts;
+		return 0;
+	}
+
+	while (pos < len) {
+		/* Handle Basic entry */
+		basic = (struct hws_basic_entry *)(buf + pos);
+		if (s390_cpumsf_make_event(pos, basic, sfq))
+			pos += bsdes;
+		else {
+			err = -EBADF;
+			goto out;
+		}
+
+		pos += dsdes;	/* Skip diagnositic entry */
+
+		/* Check for trailer entry */
+		if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
+			pos = (pos + S390_CPUMSF_PAGESZ)
+			       & ~(S390_CPUMSF_PAGESZ - 1);
+			/* Check existence of next page */
+			if (pos >= len)
+				break;
+			aux_ts = get_trailer_time(buf + pos);
+			if (!aux_ts) {
+				aux_ts = ~0ULL;
+				goto out;
+			}
+			if (aux_ts > *ts) {
+				*ts = aux_ts;
+				sfq->buffer->use_data += pos;
+				sfq->buffer->use_size -= pos;
+				return 0;
+			}
+		}
+	}
+out:
+	*ts = aux_ts;
+	sfq->buffer->use_size = 0;
+	sfq->buffer->use_data = NULL;
+	return err;	/* Buffer completely scanned or error */
+}
+
+/* Run the s390 auxiliary trace decoder.
+ * Select the queue buffer to operate on, the caller already selected
+ * the proper queue, depending on second parameter 'ts'.
+ * This is the time stamp until which the auxiliary entries should
+ * be processed. This value is updated by called functions and
+ * returned to the caller.
+ *
+ * Resume processing in the current buffer. If there is no buffer
+ * get a new buffer from the queue and setup start position for
+ * processing.
+ * When a buffer is completely processed remove it from the queue
+ * before returning.
+ *
+ * This function returns
+ * 1: When the queue is empty. Second parameter will be set to
+ *    maximum time stamp.
+ * 0: Normal processing done.
+ * <0: Error during queue buffer setup. This causes the caller
+ *     to stop processing completely.
+ */
+static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq,
+				   u64 *ts)
+{
+
+	struct auxtrace_buffer *buffer;
+	struct auxtrace_queue *queue;
+	int err;
+
+	queue = &sfq->sf->queues.queue_array[sfq->queue_nr];
+
+	/* Get buffer and last position in buffer to resume
+	 * decoding the auxiliary entries. One buffer might be large
+	 * and decoding might stop in between. This depends on the time
+	 * stamp of the trailer entry in each page of the auxiliary
+	 * data and the time stamp of the event triggering the decoding.
+	 */
+	if (sfq->buffer == NULL) {
+		sfq->buffer = buffer = auxtrace_buffer__next(queue,
+							     sfq->buffer);
+		if (!buffer) {
+			*ts = ~0ULL;
+			return 1;	/* Processing done on this queue */
+		}
+		/* Start with a new buffer on this queue */
+		if (buffer->data) {
+			buffer->use_size = buffer->size;
+			buffer->use_data = buffer->data;
+		}
+		if (sfq->logfile) {	/* Write into log file */
+			size_t rc = fwrite(buffer->data, buffer->size, 1,
+					   sfq->logfile);
+			if (rc != 1)
+				pr_err("Failed to write auxiliary data\n");
+		}
+	} else
+		buffer = sfq->buffer;
+
+	if (!buffer->data) {
+		int fd = perf_data__fd(sfq->sf->session->data);
+
+		buffer->data = auxtrace_buffer__get_data(buffer, fd);
+		if (!buffer->data)
+			return -ENOMEM;
+		buffer->use_size = buffer->size;
+		buffer->use_data = buffer->data;
+
+		if (sfq->logfile) {	/* Write into log file */
+			size_t rc = fwrite(buffer->data, buffer->size, 1,
+					   sfq->logfile);
+			if (rc != 1)
+				pr_err("Failed to write auxiliary data\n");
+		}
+	}
+	pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n",
+		  __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset,
+		  buffer->size, buffer->use_size);
+	err = s390_cpumsf_samples(sfq, ts);
+
+	/* If non-zero, there is either an error (err < 0) or the buffer is
+	 * completely done (err > 0). The error is unrecoverable, usually
+	 * some descriptors could not be read successfully, so continue with
+	 * the next buffer.
+	 * In both cases the parameter 'ts' has been updated.
+	 */
+	if (err) {
+		sfq->buffer = NULL;
+		list_del(&buffer->list);
+		auxtrace_buffer__free(buffer);
+		if (err > 0)		/* Buffer done, no error */
+			err = 0;
+	}
+	return err;
+}
+
+static struct s390_cpumsf_queue *
+s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr)
+{
+	struct s390_cpumsf_queue *sfq;
+
+	sfq = zalloc(sizeof(struct s390_cpumsf_queue));
+	if (sfq == NULL)
+		return NULL;
+
+	sfq->sf = sf;
+	sfq->queue_nr = queue_nr;
+	sfq->cpu = -1;
+	if (sf->use_logfile) {
+		char *name;
+		int rc;
+
+		rc = (sf->logdir)
+			? asprintf(&name, "%s/aux.smp.%02x",
+				 sf->logdir, queue_nr)
+			: asprintf(&name, "aux.smp.%02x", queue_nr);
+		if (rc > 0)
+			sfq->logfile = fopen(name, "w");
+		if (sfq->logfile == NULL) {
+			pr_err("Failed to open auxiliary log file %s,"
+			       "continue...\n", name);
+			sf->use_logfile = false;
+		}
+		free(name);
+	}
+	return sfq;
+}
+
+static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf,
+				   struct auxtrace_queue *queue,
+				   unsigned int queue_nr, u64 ts)
+{
+	struct s390_cpumsf_queue *sfq = queue->priv;
+
+	if (list_empty(&queue->head))
+		return 0;
+
+	if (sfq == NULL) {
+		sfq = s390_cpumsf_alloc_queue(sf, queue_nr);
+		if (!sfq)
+			return -ENOMEM;
+		queue->priv = sfq;
+
+		if (queue->cpu != -1)
+			sfq->cpu = queue->cpu;
+	}
+	return auxtrace_heap__add(&sf->heap, queue_nr, ts);
+}
+
+static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts)
+{
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0; i < sf->queues.nr_queues; i++) {
+		ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i],
+					      i, ts);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts)
+{
+	if (!sf->queues.new_data)
+		return 0;
+
+	sf->queues.new_data = false;
+	return s390_cpumsf_setup_queues(sf, ts);
+}
+
+static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp)
+{
+	unsigned int queue_nr;
+	u64 ts;
+	int ret;
+
+	while (1) {
+		struct auxtrace_queue *queue;
+		struct s390_cpumsf_queue *sfq;
+
+		if (!sf->heap.heap_cnt)
+			return 0;
+
+		if (sf->heap.heap_array[0].ordinal >= timestamp)
+			return 0;
+
+		queue_nr = sf->heap.heap_array[0].queue_nr;
+		queue = &sf->queues.queue_array[queue_nr];
+		sfq = queue->priv;
+
+		auxtrace_heap__pop(&sf->heap);
+		if (sf->heap.heap_cnt) {
+			ts = sf->heap.heap_array[0].ordinal + 1;
+			if (ts > timestamp)
+				ts = timestamp;
+		} else {
+			ts = timestamp;
+		}
+
+		ret = s390_cpumsf_run_decoder(sfq, &ts);
+		if (ret < 0) {
+			auxtrace_heap__add(&sf->heap, queue_nr, ts);
+			return ret;
+		}
+		if (!ret) {
+			ret = auxtrace_heap__add(&sf->heap, queue_nr, ts);
+			if (ret < 0)
+				return ret;
+		}
+	}
+	return 0;
+}
+
+static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu,
+				   pid_t pid, pid_t tid, u64 ip)
+{
+	char msg[MAX_AUXTRACE_ERROR_MSG];
+	union perf_event event;
+	int err;
+
+	strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1);
+	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+			     code, cpu, pid, tid, ip, msg);
+
+	err = perf_session__deliver_synth_event(sf->session, &event, NULL);
+	if (err)
+		pr_err("s390 Auxiliary Trace: failed to deliver error event,"
+			"error %d\n", err);
+	return err;
+}
+
+static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample)
+{
+	return s390_cpumsf_synth_error(sf, 1, sample->cpu,
+				       sample->pid, sample->tid, 0);
+}
+
+static int
+s390_cpumsf_process_event(struct perf_session *session __maybe_unused,
+			  union perf_event *event,
+			  struct perf_sample *sample,
+			  struct perf_tool *tool)
+{
+	struct s390_cpumsf *sf = container_of(session->auxtrace,
+					      struct s390_cpumsf,
+					      auxtrace);
+	u64 timestamp = sample->time;
+	int err = 0;
+
+	if (dump_trace)
+		return 0;
+
+	if (!tool->ordered_events) {
+		pr_err("s390 Auxiliary Trace requires ordered events\n");
+		return -EINVAL;
+	}
+
+	if (event->header.type == PERF_RECORD_AUX &&
+	    event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
+		return s390_cpumsf_lost(sf, sample);
+
+	if (timestamp) {
+		err = s390_cpumsf_update_queues(sf, timestamp);
+		if (!err)
+			err = s390_cpumsf_process_queues(sf, timestamp);
+	}
+	return err;
+}
+
+struct s390_cpumsf_synth {
+	struct perf_tool cpumsf_tool;
+	struct perf_session *session;
+};
+
+static int
+s390_cpumsf_process_auxtrace_event(struct perf_session *session,
+				   union perf_event *event __maybe_unused,
+				   struct perf_tool *tool __maybe_unused)
+{
+	struct s390_cpumsf *sf = container_of(session->auxtrace,
+					      struct s390_cpumsf,
+					      auxtrace);
+
+	int fd = perf_data__fd(session->data);
+	struct auxtrace_buffer *buffer;
+	off_t data_offset;
+	int err;
+
+	if (sf->data_queued)
+		return 0;
+
+	if (perf_data__is_pipe(session->data)) {
+		data_offset = 0;
+	} else {
+		data_offset = lseek(fd, 0, SEEK_CUR);
+		if (data_offset == -1)
+			return -errno;
+	}
+
+	err = auxtrace_queues__add_event(&sf->queues, session, event,
+					 data_offset, &buffer);
+	if (err)
+		return err;
+
+	/* Dump here after copying piped trace out of the pipe */
+	if (dump_trace) {
+		if (auxtrace_buffer__get_data(buffer, fd)) {
+			s390_cpumsf_dump_event(sf, buffer->data,
+					       buffer->size);
+			auxtrace_buffer__put_data(buffer);
+		}
+	}
+	return 0;
+}
+
+static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused)
+{
+}
+
+static int s390_cpumsf_flush(struct perf_session *session __maybe_unused,
+			     struct perf_tool *tool __maybe_unused)
+{
+	return 0;
+}
+
+static void s390_cpumsf_free_queues(struct perf_session *session)
+{
+	struct s390_cpumsf *sf = container_of(session->auxtrace,
+					      struct s390_cpumsf,
+					      auxtrace);
+	struct auxtrace_queues *queues = &sf->queues;
+	unsigned int i;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *)
+						queues->queue_array[i].priv;
+
+		if (sfq != NULL && sfq->logfile) {
+			fclose(sfq->logfile);
+			sfq->logfile = NULL;
+		}
+		zfree(&queues->queue_array[i].priv);
+	}
+	auxtrace_queues__free(queues);
+}
+
+static void s390_cpumsf_free(struct perf_session *session)
+{
+	struct s390_cpumsf *sf = container_of(session->auxtrace,
+					      struct s390_cpumsf,
+					      auxtrace);
+
+	auxtrace_heap__free(&sf->heap);
+	s390_cpumsf_free_queues(session);
+	session->auxtrace = NULL;
+	free(sf->logdir);
+	free(sf);
+}
+
+static int s390_cpumsf_get_type(const char *cpuid)
+{
+	int ret, family = 0;
+
+	ret = sscanf(cpuid, "%*[^,],%u", &family);
+	return (ret == 1) ? family : 0;
+}
+
+/* Check itrace options set on perf report command.
+ * Return true, if none are set or all options specified can be
+ * handled on s390 (currently only option 'd' for logging.
+ * Return false otherwise.
+ */
+static bool check_auxtrace_itrace(struct itrace_synth_opts *itops)
+{
+	bool ison = false;
+
+	if (!itops || !itops->set)
+		return true;
+	ison = itops->inject || itops->instructions || itops->branches ||
+		itops->transactions || itops->ptwrites ||
+		itops->pwr_events || itops->errors ||
+		itops->dont_decode || itops->calls || itops->returns ||
+		itops->callchain || itops->thread_stack ||
+		itops->last_branch;
+	if (!ison)
+		return true;
+	pr_err("Unsupported --itrace options specified\n");
+	return false;
+}
+
+/* Check for AUXTRACE dump directory if it is needed.
+ * On failure print an error message but continue.
+ * Return 0 on wrong keyword in config file and 1 otherwise.
+ */
+static int s390_cpumsf__config(const char *var, const char *value, void *cb)
+{
+	struct s390_cpumsf *sf = cb;
+	struct stat stbuf;
+	int rc;
+
+	if (strcmp(var, "auxtrace.dumpdir"))
+		return 0;
+	sf->logdir = strdup(value);
+	if (sf->logdir == NULL) {
+		pr_err("Failed to find auxtrace log directory %s,"
+		       " continue with current directory...\n", value);
+		return 1;
+	}
+	rc = stat(sf->logdir, &stbuf);
+	if (rc == -1 || !S_ISDIR(stbuf.st_mode)) {
+		pr_err("Missing auxtrace log directory %s,"
+		       " continue with current directory...\n", value);
+		free(sf->logdir);
+		sf->logdir = NULL;
+	}
+	return 1;
+}
+
+int s390_cpumsf_process_auxtrace_info(union perf_event *event,
+				      struct perf_session *session)
+{
+	struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
+	struct s390_cpumsf *sf;
+	int err;
+
+	if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event))
+		return -EINVAL;
+
+	sf = zalloc(sizeof(struct s390_cpumsf));
+	if (sf == NULL)
+		return -ENOMEM;
+
+	if (!check_auxtrace_itrace(session->itrace_synth_opts)) {
+		err = -EINVAL;
+		goto err_free;
+	}
+	sf->use_logfile = session->itrace_synth_opts->log;
+	if (sf->use_logfile)
+		perf_config(s390_cpumsf__config, sf);
+
+	err = auxtrace_queues__init(&sf->queues);
+	if (err)
+		goto err_free;
+
+	sf->session = session;
+	sf->machine = &session->machines.host; /* No kvm support */
+	sf->auxtrace_type = auxtrace_info->type;
+	sf->pmu_type = PERF_TYPE_RAW;
+	sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid);
+
+	sf->auxtrace.process_event = s390_cpumsf_process_event;
+	sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event;
+	sf->auxtrace.flush_events = s390_cpumsf_flush;
+	sf->auxtrace.free_events = s390_cpumsf_free_events;
+	sf->auxtrace.free = s390_cpumsf_free;
+	session->auxtrace = &sf->auxtrace;
+
+	if (dump_trace)
+		return 0;
+
+	err = auxtrace_queues__process_index(&sf->queues, session);
+	if (err)
+		goto err_free_queues;
+
+	if (sf->queues.populated)
+		sf->data_queued = true;
+
+	return 0;
+
+err_free_queues:
+	auxtrace_queues__free(&sf->queues);
+	session->auxtrace = NULL;
+err_free:
+	free(sf->logdir);
+	free(sf);
+	return err;
+}
diff --git a/tools/perf/util/s390-cpumsf.h b/tools/perf/util/s390-cpumsf.h
new file mode 100644
index 000000000000..fb64d100555c
--- /dev/null
+++ b/tools/perf/util/s390-cpumsf.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2018
+ * Auxtrace support for s390 CPU-Measurement Sampling Facility
+ *
+ * Author(s):  Thomas Richter <tmricht@linux.ibm.com>
+ */
+
+#ifndef INCLUDE__PERF_S390_CPUMSF_H
+#define INCLUDE__PERF_S390_CPUMSF_H
+
+union perf_event;
+struct perf_session;
+struct perf_pmu;
+
+struct auxtrace_record *
+s390_cpumsf_recording_init(int *err, struct perf_pmu *s390_cpumsf_pmu);
+
+int s390_cpumsf_process_auxtrace_info(union perf_event *event,
+				      struct perf_session *session);
+#endif
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 7b79c413486b..89cb887648f9 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -99,7 +99,7 @@ static void define_symbolic_value(const char *ev_name,
 	LEAVE;
 }
 
-static void define_symbolic_values(struct print_flag_sym *field,
+static void define_symbolic_values(struct tep_print_flag_sym *field,
 				   const char *ev_name,
 				   const char *field_name)
 {
@@ -157,7 +157,7 @@ static void define_flag_value(const char *ev_name,
 	LEAVE;
 }
 
-static void define_flag_values(struct print_flag_sym *field,
+static void define_flag_values(struct tep_print_flag_sym *field,
 			       const char *ev_name,
 			       const char *field_name)
 {
@@ -189,62 +189,62 @@ static void define_flag_field(const char *ev_name,
 	LEAVE;
 }
 
-static void define_event_symbols(struct event_format *event,
+static void define_event_symbols(struct tep_event_format *event,
 				 const char *ev_name,
-				 struct print_arg *args)
+				 struct tep_print_arg *args)
 {
 	if (args == NULL)
 		return;
 
 	switch (args->type) {
-	case PRINT_NULL:
+	case TEP_PRINT_NULL:
 		break;
-	case PRINT_ATOM:
+	case TEP_PRINT_ATOM:
 		define_flag_value(ev_name, cur_field_name, "0",
 				  args->atom.atom);
 		zero_flag_atom = 0;
 		break;
-	case PRINT_FIELD:
+	case TEP_PRINT_FIELD:
 		free(cur_field_name);
 		cur_field_name = strdup(args->field.name);
 		break;
-	case PRINT_FLAGS:
+	case TEP_PRINT_FLAGS:
 		define_event_symbols(event, ev_name, args->flags.field);
 		define_flag_field(ev_name, cur_field_name, args->flags.delim);
 		define_flag_values(args->flags.flags, ev_name, cur_field_name);
 		break;
-	case PRINT_SYMBOL:
+	case TEP_PRINT_SYMBOL:
 		define_event_symbols(event, ev_name, args->symbol.field);
 		define_symbolic_field(ev_name, cur_field_name);
 		define_symbolic_values(args->symbol.symbols, ev_name,
 				       cur_field_name);
 		break;
-	case PRINT_HEX:
-	case PRINT_HEX_STR:
+	case TEP_PRINT_HEX:
+	case TEP_PRINT_HEX_STR:
 		define_event_symbols(event, ev_name, args->hex.field);
 		define_event_symbols(event, ev_name, args->hex.size);
 		break;
-	case PRINT_INT_ARRAY:
+	case TEP_PRINT_INT_ARRAY:
 		define_event_symbols(event, ev_name, args->int_array.field);
 		define_event_symbols(event, ev_name, args->int_array.count);
 		define_event_symbols(event, ev_name, args->int_array.el_size);
 		break;
-	case PRINT_BSTRING:
-	case PRINT_DYNAMIC_ARRAY:
-	case PRINT_DYNAMIC_ARRAY_LEN:
-	case PRINT_STRING:
-	case PRINT_BITMASK:
+	case TEP_PRINT_BSTRING:
+	case TEP_PRINT_DYNAMIC_ARRAY:
+	case TEP_PRINT_DYNAMIC_ARRAY_LEN:
+	case TEP_PRINT_STRING:
+	case TEP_PRINT_BITMASK:
 		break;
-	case PRINT_TYPE:
+	case TEP_PRINT_TYPE:
 		define_event_symbols(event, ev_name, args->typecast.item);
 		break;
-	case PRINT_OP:
+	case TEP_PRINT_OP:
 		if (strcmp(args->op.op, ":") == 0)
 			zero_flag_atom = 1;
 		define_event_symbols(event, ev_name, args->op.left);
 		define_event_symbols(event, ev_name, args->op.right);
 		break;
-	case PRINT_FUNC:
+	case TEP_PRINT_FUNC:
 	default:
 		pr_err("Unsupported print arg type\n");
 		/* we should warn... */
@@ -338,8 +338,8 @@ static void perl_process_tracepoint(struct perf_sample *sample,
 				    struct addr_location *al)
 {
 	struct thread *thread = al->thread;
-	struct event_format *event = evsel->tp_format;
-	struct format_field *field;
+	struct tep_event_format *event = evsel->tp_format;
+	struct tep_format_field *field;
 	static char handler[256];
 	unsigned long long val;
 	unsigned long s, ns;
@@ -388,9 +388,9 @@ static void perl_process_tracepoint(struct perf_sample *sample,
 	/* common fields other than pid can be accessed via xsub fns */
 
 	for (field = event->format.fields; field; field = field->next) {
-		if (field->flags & FIELD_IS_STRING) {
+		if (field->flags & TEP_FIELD_IS_STRING) {
 			int offset;
-			if (field->flags & FIELD_IS_DYNAMIC) {
+			if (field->flags & TEP_FIELD_IS_DYNAMIC) {
 				offset = *(int *)(data + field->offset);
 				offset &= 0xffff;
 			} else
@@ -399,7 +399,7 @@ static void perl_process_tracepoint(struct perf_sample *sample,
 		} else { /* FIELD_IS_NUMERIC */
 			val = read_size(event, data + field->offset,
 					field->size);
-			if (field->flags & FIELD_IS_SIGNED) {
+			if (field->flags & TEP_FIELD_IS_SIGNED) {
 				XPUSHs(sv_2mortal(newSViv(val)));
 			} else {
 				XPUSHs(sv_2mortal(newSVuv(val)));
@@ -535,10 +535,10 @@ static int perl_stop_script(void)
 	return 0;
 }
 
-static int perl_generate_script(struct pevent *pevent, const char *outfile)
+static int perl_generate_script(struct tep_handle *pevent, const char *outfile)
 {
-	struct event_format *event = NULL;
-	struct format_field *f;
+	struct tep_event_format *event = NULL;
+	struct tep_format_field *f;
 	char fname[PATH_MAX];
 	int not_first, count;
 	FILE *ofp;
@@ -646,11 +646,11 @@ sub print_backtrace\n\
 			count++;
 
 			fprintf(ofp, "%s=", f->name);
-			if (f->flags & FIELD_IS_STRING ||
-			    f->flags & FIELD_IS_FLAG ||
-			    f->flags & FIELD_IS_SYMBOLIC)
+			if (f->flags & TEP_FIELD_IS_STRING ||
+			    f->flags & TEP_FIELD_IS_FLAG ||
+			    f->flags & TEP_FIELD_IS_SYMBOLIC)
 				fprintf(ofp, "%%s");
-			else if (f->flags & FIELD_IS_SIGNED)
+			else if (f->flags & TEP_FIELD_IS_SIGNED)
 				fprintf(ofp, "%%d");
 			else
 				fprintf(ofp, "%%u");
@@ -668,7 +668,7 @@ sub print_backtrace\n\
 			if (++count % 5 == 0)
 				fprintf(ofp, "\n\t       ");
 
-			if (f->flags & FIELD_IS_FLAG) {
+			if (f->flags & TEP_FIELD_IS_FLAG) {
 				if ((count - 1) % 5 != 0) {
 					fprintf(ofp, "\n\t       ");
 					count = 4;
@@ -678,7 +678,7 @@ sub print_backtrace\n\
 					event->name);
 				fprintf(ofp, "\"%s\", $%s)", f->name,
 					f->name);
-			} else if (f->flags & FIELD_IS_SYMBOLIC) {
+			} else if (f->flags & TEP_FIELD_IS_SYMBOLIC) {
 				if ((count - 1) % 5 != 0) {
 					fprintf(ofp, "\n\t       ");
 					count = 4;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 7f8afacd08ee..69aa93d4ee99 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -48,6 +48,7 @@
 #include "cpumap.h"
 #include "print_binary.h"
 #include "stat.h"
+#include "mem-events.h"
 
 #if PY_MAJOR_VERSION < 3
 #define _PyUnicode_FromString(arg) \
@@ -192,7 +193,7 @@ static void try_call_object(const char *handler_name, PyObject *args)
 		call_object(handler, args, handler_name);
 }
 
-static void define_value(enum print_arg_type field_type,
+static void define_value(enum tep_print_arg_type field_type,
 			 const char *ev_name,
 			 const char *field_name,
 			 const char *field_value,
@@ -203,7 +204,7 @@ static void define_value(enum print_arg_type field_type,
 	unsigned long long value;
 	unsigned n = 0;
 
-	if (field_type == PRINT_SYMBOL)
+	if (field_type == TEP_PRINT_SYMBOL)
 		handler_name = "define_symbolic_value";
 
 	t = PyTuple_New(4);
@@ -222,8 +223,8 @@ static void define_value(enum print_arg_type field_type,
 	Py_DECREF(t);
 }
 
-static void define_values(enum print_arg_type field_type,
-			  struct print_flag_sym *field,
+static void define_values(enum tep_print_arg_type field_type,
+			  struct tep_print_flag_sym *field,
 			  const char *ev_name,
 			  const char *field_name)
 {
@@ -234,7 +235,7 @@ static void define_values(enum print_arg_type field_type,
 		define_values(field_type, field->next, ev_name, field_name);
 }
 
-static void define_field(enum print_arg_type field_type,
+static void define_field(enum tep_print_arg_type field_type,
 			 const char *ev_name,
 			 const char *field_name,
 			 const char *delim)
@@ -243,10 +244,10 @@ static void define_field(enum print_arg_type field_type,
 	PyObject *t;
 	unsigned n = 0;
 
-	if (field_type == PRINT_SYMBOL)
+	if (field_type == TEP_PRINT_SYMBOL)
 		handler_name = "define_symbolic_field";
 
-	if (field_type == PRINT_FLAGS)
+	if (field_type == TEP_PRINT_FLAGS)
 		t = PyTuple_New(3);
 	else
 		t = PyTuple_New(2);
@@ -255,7 +256,7 @@ static void define_field(enum print_arg_type field_type,
 
 	PyTuple_SetItem(t, n++, _PyUnicode_FromString(ev_name));
 	PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_name));
-	if (field_type == PRINT_FLAGS)
+	if (field_type == TEP_PRINT_FLAGS)
 		PyTuple_SetItem(t, n++, _PyUnicode_FromString(delim));
 
 	try_call_object(handler_name, t);
@@ -263,54 +264,54 @@ static void define_field(enum print_arg_type field_type,
 	Py_DECREF(t);
 }
 
-static void define_event_symbols(struct event_format *event,
+static void define_event_symbols(struct tep_event_format *event,
 				 const char *ev_name,
-				 struct print_arg *args)
+				 struct tep_print_arg *args)
 {
 	if (args == NULL)
 		return;
 
 	switch (args->type) {
-	case PRINT_NULL:
+	case TEP_PRINT_NULL:
 		break;
-	case PRINT_ATOM:
-		define_value(PRINT_FLAGS, ev_name, cur_field_name, "0",
+	case TEP_PRINT_ATOM:
+		define_value(TEP_PRINT_FLAGS, ev_name, cur_field_name, "0",
 			     args->atom.atom);
 		zero_flag_atom = 0;
 		break;
-	case PRINT_FIELD:
+	case TEP_PRINT_FIELD:
 		free(cur_field_name);
 		cur_field_name = strdup(args->field.name);
 		break;
-	case PRINT_FLAGS:
+	case TEP_PRINT_FLAGS:
 		define_event_symbols(event, ev_name, args->flags.field);
-		define_field(PRINT_FLAGS, ev_name, cur_field_name,
+		define_field(TEP_PRINT_FLAGS, ev_name, cur_field_name,
 			     args->flags.delim);
-		define_values(PRINT_FLAGS, args->flags.flags, ev_name,
+		define_values(TEP_PRINT_FLAGS, args->flags.flags, ev_name,
 			      cur_field_name);
 		break;
-	case PRINT_SYMBOL:
+	case TEP_PRINT_SYMBOL:
 		define_event_symbols(event, ev_name, args->symbol.field);
-		define_field(PRINT_SYMBOL, ev_name, cur_field_name, NULL);
-		define_values(PRINT_SYMBOL, args->symbol.symbols, ev_name,
+		define_field(TEP_PRINT_SYMBOL, ev_name, cur_field_name, NULL);
+		define_values(TEP_PRINT_SYMBOL, args->symbol.symbols, ev_name,
 			      cur_field_name);
 		break;
-	case PRINT_HEX:
-	case PRINT_HEX_STR:
+	case TEP_PRINT_HEX:
+	case TEP_PRINT_HEX_STR:
 		define_event_symbols(event, ev_name, args->hex.field);
 		define_event_symbols(event, ev_name, args->hex.size);
 		break;
-	case PRINT_INT_ARRAY:
+	case TEP_PRINT_INT_ARRAY:
 		define_event_symbols(event, ev_name, args->int_array.field);
 		define_event_symbols(event, ev_name, args->int_array.count);
 		define_event_symbols(event, ev_name, args->int_array.el_size);
 		break;
-	case PRINT_STRING:
+	case TEP_PRINT_STRING:
 		break;
-	case PRINT_TYPE:
+	case TEP_PRINT_TYPE:
 		define_event_symbols(event, ev_name, args->typecast.item);
 		break;
-	case PRINT_OP:
+	case TEP_PRINT_OP:
 		if (strcmp(args->op.op, ":") == 0)
 			zero_flag_atom = 1;
 		define_event_symbols(event, ev_name, args->op.left);
@@ -318,11 +319,11 @@ static void define_event_symbols(struct event_format *event,
 		break;
 	default:
 		/* gcc warns for these? */
-	case PRINT_BSTRING:
-	case PRINT_DYNAMIC_ARRAY:
-	case PRINT_DYNAMIC_ARRAY_LEN:
-	case PRINT_FUNC:
-	case PRINT_BITMASK:
+	case TEP_PRINT_BSTRING:
+	case TEP_PRINT_DYNAMIC_ARRAY:
+	case TEP_PRINT_DYNAMIC_ARRAY_LEN:
+	case TEP_PRINT_FUNC:
+	case TEP_PRINT_BITMASK:
 		/* we should warn... */
 		return;
 	}
@@ -331,10 +332,10 @@ static void define_event_symbols(struct event_format *event,
 		define_event_symbols(event, ev_name, args->next);
 }
 
-static PyObject *get_field_numeric_entry(struct event_format *event,
-		struct format_field *field, void *data)
+static PyObject *get_field_numeric_entry(struct tep_event_format *event,
+		struct tep_format_field *field, void *data)
 {
-	bool is_array = field->flags & FIELD_IS_ARRAY;
+	bool is_array = field->flags & TEP_FIELD_IS_ARRAY;
 	PyObject *obj = NULL, *list = NULL;
 	unsigned long long val;
 	unsigned int item_size, n_items, i;
@@ -352,7 +353,7 @@ static PyObject *get_field_numeric_entry(struct event_format *event,
 
 		val = read_size(event, data + field->offset + i * item_size,
 				item_size);
-		if (field->flags & FIELD_IS_SIGNED) {
+		if (field->flags & TEP_FIELD_IS_SIGNED) {
 			if ((long long)val >= LONG_MIN &&
 					(long long)val <= LONG_MAX)
 				obj = _PyLong_FromLong(val);
@@ -372,6 +373,19 @@ static PyObject *get_field_numeric_entry(struct event_format *event,
 	return obj;
 }
 
+static const char *get_dsoname(struct map *map)
+{
+	const char *dsoname = "[unknown]";
+
+	if (map && map->dso) {
+		if (symbol_conf.show_kernel_path && map->dso->long_name)
+			dsoname = map->dso->long_name;
+		else
+			dsoname = map->dso->name;
+	}
+
+	return dsoname;
+}
 
 static PyObject *python_process_callchain(struct perf_sample *sample,
 					 struct perf_evsel *evsel,
@@ -427,14 +441,8 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
 		}
 
 		if (node->map) {
-			struct map *map = node->map;
-			const char *dsoname = "[unknown]";
-			if (map && map->dso) {
-				if (symbol_conf.show_kernel_path && map->dso->long_name)
-					dsoname = map->dso->long_name;
-				else
-					dsoname = map->dso->name;
-			}
+			const char *dsoname = get_dsoname(node->map);
+
 			pydict_set_item_string_decref(pyelem, "dso",
 					_PyUnicode_FromString(dsoname));
 		}
@@ -448,6 +456,166 @@ exit:
 	return pylist;
 }
 
+static PyObject *python_process_brstack(struct perf_sample *sample,
+					struct thread *thread)
+{
+	struct branch_stack *br = sample->branch_stack;
+	PyObject *pylist;
+	u64 i;
+
+	pylist = PyList_New(0);
+	if (!pylist)
+		Py_FatalError("couldn't create Python list");
+
+	if (!(br && br->nr))
+		goto exit;
+
+	for (i = 0; i < br->nr; i++) {
+		PyObject *pyelem;
+		struct addr_location al;
+		const char *dsoname;
+
+		pyelem = PyDict_New();
+		if (!pyelem)
+			Py_FatalError("couldn't create Python dictionary");
+
+		pydict_set_item_string_decref(pyelem, "from",
+		    PyLong_FromUnsignedLongLong(br->entries[i].from));
+		pydict_set_item_string_decref(pyelem, "to",
+		    PyLong_FromUnsignedLongLong(br->entries[i].to));
+		pydict_set_item_string_decref(pyelem, "mispred",
+		    PyBool_FromLong(br->entries[i].flags.mispred));
+		pydict_set_item_string_decref(pyelem, "predicted",
+		    PyBool_FromLong(br->entries[i].flags.predicted));
+		pydict_set_item_string_decref(pyelem, "in_tx",
+		    PyBool_FromLong(br->entries[i].flags.in_tx));
+		pydict_set_item_string_decref(pyelem, "abort",
+		    PyBool_FromLong(br->entries[i].flags.abort));
+		pydict_set_item_string_decref(pyelem, "cycles",
+		    PyLong_FromUnsignedLongLong(br->entries[i].flags.cycles));
+
+		thread__find_map(thread, sample->cpumode,
+				 br->entries[i].from, &al);
+		dsoname = get_dsoname(al.map);
+		pydict_set_item_string_decref(pyelem, "from_dsoname",
+					      _PyUnicode_FromString(dsoname));
+
+		thread__find_map(thread, sample->cpumode,
+				 br->entries[i].to, &al);
+		dsoname = get_dsoname(al.map);
+		pydict_set_item_string_decref(pyelem, "to_dsoname",
+					      _PyUnicode_FromString(dsoname));
+
+		PyList_Append(pylist, pyelem);
+		Py_DECREF(pyelem);
+	}
+
+exit:
+	return pylist;
+}
+
+static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
+{
+	unsigned long offset;
+
+	if (al->addr < sym->end)
+		offset = al->addr - sym->start;
+	else
+		offset = al->addr - al->map->start - sym->start;
+
+	return offset;
+}
+
+static int get_symoff(struct symbol *sym, struct addr_location *al,
+		      bool print_off, char *bf, int size)
+{
+	unsigned long offset;
+
+	if (!sym || !sym->name[0])
+		return scnprintf(bf, size, "%s", "[unknown]");
+
+	if (!print_off)
+		return scnprintf(bf, size, "%s", sym->name);
+
+	offset = get_offset(sym, al);
+
+	return scnprintf(bf, size, "%s+0x%x", sym->name, offset);
+}
+
+static int get_br_mspred(struct branch_flags *flags, char *bf, int size)
+{
+	if (!flags->mispred  && !flags->predicted)
+		return scnprintf(bf, size, "%s", "-");
+
+	if (flags->mispred)
+		return scnprintf(bf, size, "%s", "M");
+
+	return scnprintf(bf, size, "%s", "P");
+}
+
+static PyObject *python_process_brstacksym(struct perf_sample *sample,
+					   struct thread *thread)
+{
+	struct branch_stack *br = sample->branch_stack;
+	PyObject *pylist;
+	u64 i;
+	char bf[512];
+	struct addr_location al;
+
+	pylist = PyList_New(0);
+	if (!pylist)
+		Py_FatalError("couldn't create Python list");
+
+	if (!(br && br->nr))
+		goto exit;
+
+	for (i = 0; i < br->nr; i++) {
+		PyObject *pyelem;
+
+		pyelem = PyDict_New();
+		if (!pyelem)
+			Py_FatalError("couldn't create Python dictionary");
+
+		thread__find_symbol(thread, sample->cpumode,
+				    br->entries[i].from, &al);
+		get_symoff(al.sym, &al, true, bf, sizeof(bf));
+		pydict_set_item_string_decref(pyelem, "from",
+					      _PyUnicode_FromString(bf));
+
+		thread__find_symbol(thread, sample->cpumode,
+				    br->entries[i].to, &al);
+		get_symoff(al.sym, &al, true, bf, sizeof(bf));
+		pydict_set_item_string_decref(pyelem, "to",
+					      _PyUnicode_FromString(bf));
+
+		get_br_mspred(&br->entries[i].flags, bf, sizeof(bf));
+		pydict_set_item_string_decref(pyelem, "pred",
+					      _PyUnicode_FromString(bf));
+
+		if (br->entries[i].flags.in_tx) {
+			pydict_set_item_string_decref(pyelem, "in_tx",
+					      _PyUnicode_FromString("X"));
+		} else {
+			pydict_set_item_string_decref(pyelem, "in_tx",
+					      _PyUnicode_FromString("-"));
+		}
+
+		if (br->entries[i].flags.abort) {
+			pydict_set_item_string_decref(pyelem, "abort",
+					      _PyUnicode_FromString("A"));
+		} else {
+			pydict_set_item_string_decref(pyelem, "abort",
+					      _PyUnicode_FromString("-"));
+		}
+
+		PyList_Append(pylist, pyelem);
+		Py_DECREF(pyelem);
+	}
+
+exit:
+	return pylist;
+}
+
 static PyObject *get_sample_value_as_tuple(struct sample_read_value *value)
 {
 	PyObject *t;
@@ -498,12 +666,63 @@ static void set_sample_read_in_dict(PyObject *dict_sample,
 	pydict_set_item_string_decref(dict_sample, "values", values);
 }
 
+static void set_sample_datasrc_in_dict(PyObject *dict,
+				       struct perf_sample *sample)
+{
+	struct mem_info mi = { .data_src.val = sample->data_src };
+	char decode[100];
+
+	pydict_set_item_string_decref(dict, "datasrc",
+			PyLong_FromUnsignedLongLong(sample->data_src));
+
+	perf_script__meminfo_scnprintf(decode, 100, &mi);
+
+	pydict_set_item_string_decref(dict, "datasrc_decode",
+			_PyUnicode_FromString(decode));
+}
+
+static int regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size)
+{
+	unsigned int i = 0, r;
+	int printed = 0;
+
+	bf[0] = 0;
+
+	for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
+		u64 val = regs->regs[i++];
+
+		printed += scnprintf(bf + printed, size - printed,
+				     "%5s:0x%" PRIx64 " ",
+				     perf_reg_name(r), val);
+	}
+
+	return printed;
+}
+
+static void set_regs_in_dict(PyObject *dict,
+			     struct perf_sample *sample,
+			     struct perf_evsel *evsel)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+	char bf[512];
+
+	regs_map(&sample->intr_regs, attr->sample_regs_intr, bf, sizeof(bf));
+
+	pydict_set_item_string_decref(dict, "iregs",
+			_PyUnicode_FromString(bf));
+
+	regs_map(&sample->user_regs, attr->sample_regs_user, bf, sizeof(bf));
+
+	pydict_set_item_string_decref(dict, "uregs",
+			_PyUnicode_FromString(bf));
+}
+
 static PyObject *get_perf_sample_dict(struct perf_sample *sample,
 					 struct perf_evsel *evsel,
 					 struct addr_location *al,
 					 PyObject *callchain)
 {
-	PyObject *dict, *dict_sample;
+	PyObject *dict, *dict_sample, *brstack, *brstacksym;
 
 	dict = PyDict_New();
 	if (!dict)
@@ -534,6 +753,11 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
 	pydict_set_item_string_decref(dict_sample, "addr",
 			PyLong_FromUnsignedLongLong(sample->addr));
 	set_sample_read_in_dict(dict_sample, sample, evsel);
+	pydict_set_item_string_decref(dict_sample, "weight",
+			PyLong_FromUnsignedLongLong(sample->weight));
+	pydict_set_item_string_decref(dict_sample, "transaction",
+			PyLong_FromUnsignedLongLong(sample->transaction));
+	set_sample_datasrc_in_dict(dict_sample, sample);
 	pydict_set_item_string_decref(dict, "sample", dict_sample);
 
 	pydict_set_item_string_decref(dict, "raw_buf", _PyBytes_FromStringAndSize(
@@ -551,6 +775,14 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
 
 	pydict_set_item_string_decref(dict, "callchain", callchain);
 
+	brstack = python_process_brstack(sample, al->thread);
+	pydict_set_item_string_decref(dict, "brstack", brstack);
+
+	brstacksym = python_process_brstacksym(sample, al->thread);
+	pydict_set_item_string_decref(dict, "brstacksym", brstacksym);
+
+	set_regs_in_dict(dict, sample, evsel);
+
 	return dict;
 }
 
@@ -558,11 +790,11 @@ static void python_process_tracepoint(struct perf_sample *sample,
 				      struct perf_evsel *evsel,
 				      struct addr_location *al)
 {
-	struct event_format *event = evsel->tp_format;
+	struct tep_event_format *event = evsel->tp_format;
 	PyObject *handler, *context, *t, *obj = NULL, *callchain;
 	PyObject *dict = NULL, *all_entries_dict = NULL;
 	static char handler_name[256];
-	struct format_field *field;
+	struct tep_format_field *field;
 	unsigned long s, ns;
 	unsigned n = 0;
 	int pid;
@@ -635,22 +867,22 @@ static void python_process_tracepoint(struct perf_sample *sample,
 		unsigned int offset, len;
 		unsigned long long val;
 
-		if (field->flags & FIELD_IS_ARRAY) {
+		if (field->flags & TEP_FIELD_IS_ARRAY) {
 			offset = field->offset;
 			len    = field->size;
-			if (field->flags & FIELD_IS_DYNAMIC) {
-				val     = pevent_read_number(scripting_context->pevent,
-							     data + offset, len);
+			if (field->flags & TEP_FIELD_IS_DYNAMIC) {
+				val     = tep_read_number(scripting_context->pevent,
+							  data + offset, len);
 				offset  = val;
 				len     = offset >> 16;
 				offset &= 0xffff;
 			}
-			if (field->flags & FIELD_IS_STRING &&
+			if (field->flags & TEP_FIELD_IS_STRING &&
 			    is_printable_array(data + offset, len)) {
 				obj = _PyUnicode_FromString((char *) data + offset);
 			} else {
 				obj = PyByteArray_FromStringAndSize((const char *) data + offset, len);
-				field->flags &= ~FIELD_IS_STRING;
+				field->flags &= ~TEP_FIELD_IS_STRING;
 			}
 		} else { /* FIELD_IS_NUMERIC */
 			obj = get_field_numeric_entry(event, field, data);
@@ -676,14 +908,11 @@ static void python_process_tracepoint(struct perf_sample *sample,
 	if (_PyTuple_Resize(&t, n) == -1)
 		Py_FatalError("error resizing Python tuple");
 
-	if (!dict) {
+	if (!dict)
 		call_object(handler, t, handler_name);
-	} else {
+	else
 		call_object(handler, t, default_handler_name);
-		Py_DECREF(dict);
-	}
 
-	Py_XDECREF(all_entries_dict);
 	Py_DECREF(t);
 }
 
@@ -1003,7 +1232,6 @@ static void python_process_general_event(struct perf_sample *sample,
 
 	call_object(handler, t, handler_name);
 
-	Py_DECREF(dict);
 	Py_DECREF(t);
 }
 
@@ -1360,10 +1588,10 @@ static int python_stop_script(void)
 	return 0;
 }
 
-static int python_generate_script(struct pevent *pevent, const char *outfile)
+static int python_generate_script(struct tep_handle *pevent, const char *outfile)
 {
-	struct event_format *event = NULL;
-	struct format_field *f;
+	struct tep_event_format *event = NULL;
+	struct tep_format_field *f;
 	char fname[PATH_MAX];
 	int not_first, count;
 	FILE *ofp;
@@ -1395,6 +1623,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 	fprintf(ofp, "# See the perf-script-python Documentation for the list "
 		"of available functions.\n\n");
 
+	fprintf(ofp, "from __future__ import print_function\n\n");
 	fprintf(ofp, "import os\n");
 	fprintf(ofp, "import sys\n\n");
 
@@ -1404,10 +1633,10 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 	fprintf(ofp, "from Core import *\n\n\n");
 
 	fprintf(ofp, "def trace_begin():\n");
-	fprintf(ofp, "\tprint \"in trace_begin\"\n\n");
+	fprintf(ofp, "\tprint(\"in trace_begin\")\n\n");
 
 	fprintf(ofp, "def trace_end():\n");
-	fprintf(ofp, "\tprint \"in trace_end\"\n\n");
+	fprintf(ofp, "\tprint(\"in trace_end\")\n\n");
 
 	while ((event = trace_find_next_event(pevent, event))) {
 		fprintf(ofp, "def %s__%s(", event->system, event->name);
@@ -1443,7 +1672,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 			"common_secs, common_nsecs,\n\t\t\t"
 			"common_pid, common_comm)\n\n");
 
-		fprintf(ofp, "\t\tprint \"");
+		fprintf(ofp, "\t\tprint(\"");
 
 		not_first = 0;
 		count = 0;
@@ -1457,12 +1686,12 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 			count++;
 
 			fprintf(ofp, "%s=", f->name);
-			if (f->flags & FIELD_IS_STRING ||
-			    f->flags & FIELD_IS_FLAG ||
-			    f->flags & FIELD_IS_ARRAY ||
-			    f->flags & FIELD_IS_SYMBOLIC)
+			if (f->flags & TEP_FIELD_IS_STRING ||
+			    f->flags & TEP_FIELD_IS_FLAG ||
+			    f->flags & TEP_FIELD_IS_ARRAY ||
+			    f->flags & TEP_FIELD_IS_SYMBOLIC)
 				fprintf(ofp, "%%s");
-			else if (f->flags & FIELD_IS_SIGNED)
+			else if (f->flags & TEP_FIELD_IS_SIGNED)
 				fprintf(ofp, "%%d");
 			else
 				fprintf(ofp, "%%u");
@@ -1480,7 +1709,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 			if (++count % 5 == 0)
 				fprintf(ofp, "\n\t\t");
 
-			if (f->flags & FIELD_IS_FLAG) {
+			if (f->flags & TEP_FIELD_IS_FLAG) {
 				if ((count - 1) % 5 != 0) {
 					fprintf(ofp, "\n\t\t");
 					count = 4;
@@ -1490,7 +1719,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 					event->name);
 				fprintf(ofp, "\"%s\", %s)", f->name,
 					f->name);
-			} else if (f->flags & FIELD_IS_SYMBOLIC) {
+			} else if (f->flags & TEP_FIELD_IS_SYMBOLIC) {
 				if ((count - 1) % 5 != 0) {
 					fprintf(ofp, "\n\t\t");
 					count = 4;
@@ -1504,31 +1733,31 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 				fprintf(ofp, "%s", f->name);
 		}
 
-		fprintf(ofp, ")\n\n");
+		fprintf(ofp, "))\n\n");
 
-		fprintf(ofp, "\t\tprint 'Sample: {'+"
-			"get_dict_as_string(perf_sample_dict['sample'], ', ')+'}'\n\n");
+		fprintf(ofp, "\t\tprint('Sample: {'+"
+			"get_dict_as_string(perf_sample_dict['sample'], ', ')+'}')\n\n");
 
 		fprintf(ofp, "\t\tfor node in common_callchain:");
 		fprintf(ofp, "\n\t\t\tif 'sym' in node:");
-		fprintf(ofp, "\n\t\t\t\tprint \"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name'])");
+		fprintf(ofp, "\n\t\t\t\tprint(\"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name']))");
 		fprintf(ofp, "\n\t\t\telse:");
-		fprintf(ofp, "\n\t\t\t\tprint \"\t[%%x]\" %% (node['ip'])\n\n");
-		fprintf(ofp, "\t\tprint \"\\n\"\n\n");
+		fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x]\" %% (node['ip']))\n\n");
+		fprintf(ofp, "\t\tprint()\n\n");
 
 	}
 
 	fprintf(ofp, "def trace_unhandled(event_name, context, "
 		"event_fields_dict, perf_sample_dict):\n");
 
-	fprintf(ofp, "\t\tprint get_dict_as_string(event_fields_dict)\n");
-	fprintf(ofp, "\t\tprint 'Sample: {'+"
-		"get_dict_as_string(perf_sample_dict['sample'], ', ')+'}'\n\n");
+	fprintf(ofp, "\t\tprint(get_dict_as_string(event_fields_dict))\n");
+	fprintf(ofp, "\t\tprint('Sample: {'+"
+		"get_dict_as_string(perf_sample_dict['sample'], ', ')+'}')\n\n");
 
 	fprintf(ofp, "def print_header("
 		"event_name, cpu, secs, nsecs, pid, comm):\n"
-		"\tprint \"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t"
-		"(event_name, cpu, secs, nsecs, pid, comm),\n\n");
+		"\tprint(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t"
+		"(event_name, cpu, secs, nsecs, pid, comm), end=\"\")\n\n");
 
 	fprintf(ofp, "def get_dict_as_string(a_dict, delimiter=' '):\n"
 		"\treturn delimiter.join"
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index f4a7a437ee87..7d2c8ce6cfad 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -199,12 +199,10 @@ void perf_session__delete(struct perf_session *session)
 	free(session);
 }
 
-static int process_event_synth_tracing_data_stub(struct perf_tool *tool
+static int process_event_synth_tracing_data_stub(struct perf_session *session
 						 __maybe_unused,
 						 union perf_event *event
-						 __maybe_unused,
-						 struct perf_session *session
-						__maybe_unused)
+						 __maybe_unused)
 {
 	dump_printf(": unhandled!\n");
 	return 0;
@@ -277,10 +275,8 @@ static int skipn(int fd, off_t n)
 	return 0;
 }
 
-static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused,
-				       union perf_event *event,
-				       struct perf_session *session
-				       __maybe_unused)
+static s64 process_event_auxtrace_stub(struct perf_session *session __maybe_unused,
+				       union perf_event *event)
 {
 	dump_printf(": unhandled!\n");
 	if (perf_data__is_pipe(session->data))
@@ -288,9 +284,8 @@ static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused,
 	return event->auxtrace.size;
 }
 
-static int process_event_op2_stub(struct perf_tool *tool __maybe_unused,
-				  union perf_event *event __maybe_unused,
-				  struct perf_session *session __maybe_unused)
+static int process_event_op2_stub(struct perf_session *session __maybe_unused,
+				  union perf_event *event __maybe_unused)
 {
 	dump_printf(": unhandled!\n");
 	return 0;
@@ -298,9 +293,8 @@ static int process_event_op2_stub(struct perf_tool *tool __maybe_unused,
 
 
 static
-int process_event_thread_map_stub(struct perf_tool *tool __maybe_unused,
-				  union perf_event *event __maybe_unused,
-				  struct perf_session *session __maybe_unused)
+int process_event_thread_map_stub(struct perf_session *session __maybe_unused,
+				  union perf_event *event __maybe_unused)
 {
 	if (dump_trace)
 		perf_event__fprintf_thread_map(event, stdout);
@@ -310,9 +304,8 @@ int process_event_thread_map_stub(struct perf_tool *tool __maybe_unused,
 }
 
 static
-int process_event_cpu_map_stub(struct perf_tool *tool __maybe_unused,
-			       union perf_event *event __maybe_unused,
-			       struct perf_session *session __maybe_unused)
+int process_event_cpu_map_stub(struct perf_session *session __maybe_unused,
+			       union perf_event *event __maybe_unused)
 {
 	if (dump_trace)
 		perf_event__fprintf_cpu_map(event, stdout);
@@ -322,9 +315,8 @@ int process_event_cpu_map_stub(struct perf_tool *tool __maybe_unused,
 }
 
 static
-int process_event_stat_config_stub(struct perf_tool *tool __maybe_unused,
-				   union perf_event *event __maybe_unused,
-				   struct perf_session *session __maybe_unused)
+int process_event_stat_config_stub(struct perf_session *session __maybe_unused,
+				   union perf_event *event __maybe_unused)
 {
 	if (dump_trace)
 		perf_event__fprintf_stat_config(event, stdout);
@@ -333,10 +325,8 @@ int process_event_stat_config_stub(struct perf_tool *tool __maybe_unused,
 	return 0;
 }
 
-static int process_stat_stub(struct perf_tool *tool __maybe_unused,
-			     union perf_event *event __maybe_unused,
-			     struct perf_session *perf_session
-			     __maybe_unused)
+static int process_stat_stub(struct perf_session *perf_session __maybe_unused,
+			     union perf_event *event)
 {
 	if (dump_trace)
 		perf_event__fprintf_stat(event, stdout);
@@ -345,10 +335,8 @@ static int process_stat_stub(struct perf_tool *tool __maybe_unused,
 	return 0;
 }
 
-static int process_stat_round_stub(struct perf_tool *tool __maybe_unused,
-				   union perf_event *event __maybe_unused,
-				   struct perf_session *perf_session
-				   __maybe_unused)
+static int process_stat_round_stub(struct perf_session *perf_session __maybe_unused,
+				   union perf_event *event)
 {
 	if (dump_trace)
 		perf_event__fprintf_stat_round(event, stdout);
@@ -1094,7 +1082,7 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
 
 	sample_type = evsel->attr.sample_type;
 
-	if (sample_type & PERF_SAMPLE_CALLCHAIN)
+	if (evsel__has_callchain(evsel))
 		callchain__printf(evsel, sample);
 
 	if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !perf_evsel__has_branch_callstack(evsel))
@@ -1374,37 +1362,37 @@ static s64 perf_session__process_user_event(struct perf_session *session,
 	case PERF_RECORD_HEADER_TRACING_DATA:
 		/* setup for reading amidst mmap */
 		lseek(fd, file_offset, SEEK_SET);
-		return tool->tracing_data(tool, event, session);
+		return tool->tracing_data(session, event);
 	case PERF_RECORD_HEADER_BUILD_ID:
-		return tool->build_id(tool, event, session);
+		return tool->build_id(session, event);
 	case PERF_RECORD_FINISHED_ROUND:
 		return tool->finished_round(tool, event, oe);
 	case PERF_RECORD_ID_INDEX:
-		return tool->id_index(tool, event, session);
+		return tool->id_index(session, event);
 	case PERF_RECORD_AUXTRACE_INFO:
-		return tool->auxtrace_info(tool, event, session);
+		return tool->auxtrace_info(session, event);
 	case PERF_RECORD_AUXTRACE:
 		/* setup for reading amidst mmap */
 		lseek(fd, file_offset + event->header.size, SEEK_SET);
-		return tool->auxtrace(tool, event, session);
+		return tool->auxtrace(session, event);
 	case PERF_RECORD_AUXTRACE_ERROR:
 		perf_session__auxtrace_error_inc(session, event);
-		return tool->auxtrace_error(tool, event, session);
+		return tool->auxtrace_error(session, event);
 	case PERF_RECORD_THREAD_MAP:
-		return tool->thread_map(tool, event, session);
+		return tool->thread_map(session, event);
 	case PERF_RECORD_CPU_MAP:
-		return tool->cpu_map(tool, event, session);
+		return tool->cpu_map(session, event);
 	case PERF_RECORD_STAT_CONFIG:
-		return tool->stat_config(tool, event, session);
+		return tool->stat_config(session, event);
 	case PERF_RECORD_STAT:
-		return tool->stat(tool, event, session);
+		return tool->stat(session, event);
 	case PERF_RECORD_STAT_ROUND:
-		return tool->stat_round(tool, event, session);
+		return tool->stat_round(session, event);
 	case PERF_RECORD_TIME_CONV:
 		session->time_conv = event->time_conv;
-		return tool->time_conv(tool, event, session);
+		return tool->time_conv(session, event);
 	case PERF_RECORD_HEADER_FEATURE:
-		return tool->feature(tool, event, session);
+		return tool->feature(session, event);
 	default:
 		return -EINVAL;
 	}
@@ -1973,12 +1961,11 @@ bool perf_session__has_traces(struct perf_session *session, const char *msg)
 	return false;
 }
 
-int maps__set_kallsyms_ref_reloc_sym(struct map **maps,
-				     const char *symbol_name, u64 addr)
+int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr)
 {
 	char *bracket;
-	int i;
 	struct ref_reloc_sym *ref;
+	struct kmap *kmap;
 
 	ref = zalloc(sizeof(struct ref_reloc_sym));
 	if (ref == NULL)
@@ -1996,13 +1983,9 @@ int maps__set_kallsyms_ref_reloc_sym(struct map **maps,
 
 	ref->addr = addr;
 
-	for (i = 0; i < MAP__NR_TYPES; ++i) {
-		struct kmap *kmap = map__kmap(maps[i]);
-
-		if (!kmap)
-			continue;
+	kmap = map__kmap(map);
+	if (kmap)
 		kmap->ref_reloc_sym = ref;
-	}
 
 	return 0;
 }
@@ -2138,9 +2121,8 @@ out:
 	return err;
 }
 
-int perf_event__process_id_index(struct perf_tool *tool __maybe_unused,
-				 union perf_event *event,
-				 struct perf_session *session)
+int perf_event__process_id_index(struct perf_session *session,
+				 union perf_event *event)
 {
 	struct perf_evlist *evlist = session->evlist;
 	struct id_index_event *ie = &event->id_index;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index da40b4b380ca..d96eccd7d27f 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -120,9 +120,8 @@ int perf_session__deliver_synth_event(struct perf_session *session,
 				      union perf_event *event,
 				      struct perf_sample *sample);
 
-int perf_event__process_id_index(struct perf_tool *tool,
-				 union perf_event *event,
-				 struct perf_session *session);
+int perf_event__process_id_index(struct perf_session *session,
+				 union perf_event *event);
 
 int perf_event__synthesize_id_index(struct perf_tool *tool,
 				    perf_event__handler_t process,
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 001be4f9d3b9..63f758c655d5 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -1,12 +1,22 @@
 #!/usr/bin/python
 
 from os import getenv
+from subprocess import Popen, PIPE
+from re import sub
+
+def clang_has_option(option):
+    return [o for o in Popen(['clang', option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ]
 
 cc = getenv("CC")
 if cc == "clang":
-    from _sysconfigdata import build_time_vars
-    from re import sub
-    build_time_vars["CFLAGS"] = sub("-specs=[^ ]+", "", build_time_vars["CFLAGS"])
+    from distutils.sysconfig import get_config_vars
+    vars = get_config_vars()
+    for var in ('CFLAGS', 'OPT'):
+        vars[var] = sub("-specs=[^ ]+", "", vars[var])
+        if not clang_has_option("-mcet"):
+            vars[var] = sub("-mcet", "", vars[var])
+        if not clang_has_option("-fcf-protection"):
+            vars[var] = sub("-fcf-protection", "", vars[var])
 
 from distutils.core import setup, Extension
 
@@ -27,7 +37,7 @@ class install_lib(_install_lib):
 
 cflags = getenv('CFLAGS', '').split()
 # switch off several checks (need to be at the end of cflags list)
-cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ]
+cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ]
 if cc != "clang":
     cflags += ['-Wno-cast-function-type' ]
 
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 26a68dfd8a4f..f96c005b3c41 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2,7 +2,7 @@
 #include <errno.h>
 #include <inttypes.h>
 #include <regex.h>
-#include <sys/mman.h>
+#include <linux/mman.h>
 #include "sort.h"
 #include "hist.h"
 #include "comm.h"
@@ -282,7 +282,7 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
 
 	ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
 	if (sym && map) {
-		if (map->type == MAP__VARIABLE) {
+		if (sym->type == STT_OBJECT) {
 			ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
 			ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
 					ip - map->unmap_ip(map, sym->start));
@@ -331,24 +331,18 @@ struct sort_entry sort_sym = {
 
 /* --sort srcline */
 
-char *hist_entry__get_srcline(struct hist_entry *he)
+char *hist_entry__srcline(struct hist_entry *he)
 {
-	struct map *map = he->ms.map;
-
-	if (!map)
-		return SRCLINE_UNKNOWN;
-
-	return get_srcline(map->dso, map__rip_2objdump(map, he->ip),
-			   he->ms.sym, true, true, he->ip);
+	return map__srcline(he->ms.map, he->ip, he->ms.sym);
 }
 
 static int64_t
 sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	if (!left->srcline)
-		left->srcline = hist_entry__get_srcline(left);
+		left->srcline = hist_entry__srcline(left);
 	if (!right->srcline)
-		right->srcline = hist_entry__get_srcline(right);
+		right->srcline = hist_entry__srcline(right);
 
 	return strcmp(right->srcline, left->srcline);
 }
@@ -357,7 +351,7 @@ static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf,
 					size_t size, unsigned int width)
 {
 	if (!he->srcline)
-		he->srcline = hist_entry__get_srcline(he);
+		he->srcline = hist_entry__srcline(he);
 
 	return repsep_snprintf(bf, size, "%-.*s", width, he->srcline);
 }
@@ -371,33 +365,20 @@ struct sort_entry sort_srcline = {
 
 /* --sort srcline_from */
 
+static char *addr_map_symbol__srcline(struct addr_map_symbol *ams)
+{
+	return map__srcline(ams->map, ams->al_addr, ams->sym);
+}
+
 static int64_t
 sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	if (!left->branch_info->srcline_from) {
-		struct map *map = left->branch_info->from.map;
-		if (!map)
-			left->branch_info->srcline_from = SRCLINE_UNKNOWN;
-		else
-			left->branch_info->srcline_from = get_srcline(map->dso,
-					   map__rip_2objdump(map,
-							     left->branch_info->from.al_addr),
-							 left->branch_info->from.sym,
-							 true, true,
-							 left->branch_info->from.al_addr);
-	}
-	if (!right->branch_info->srcline_from) {
-		struct map *map = right->branch_info->from.map;
-		if (!map)
-			right->branch_info->srcline_from = SRCLINE_UNKNOWN;
-		else
-			right->branch_info->srcline_from = get_srcline(map->dso,
-					     map__rip_2objdump(map,
-							       right->branch_info->from.al_addr),
-						     right->branch_info->from.sym,
-						     true, true,
-						     right->branch_info->from.al_addr);
-	}
+	if (!left->branch_info->srcline_from)
+		left->branch_info->srcline_from = addr_map_symbol__srcline(&left->branch_info->from);
+
+	if (!right->branch_info->srcline_from)
+		right->branch_info->srcline_from = addr_map_symbol__srcline(&right->branch_info->from);
+
 	return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from);
 }
 
@@ -419,30 +400,12 @@ struct sort_entry sort_srcline_from = {
 static int64_t
 sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	if (!left->branch_info->srcline_to) {
-		struct map *map = left->branch_info->to.map;
-		if (!map)
-			left->branch_info->srcline_to = SRCLINE_UNKNOWN;
-		else
-			left->branch_info->srcline_to = get_srcline(map->dso,
-					   map__rip_2objdump(map,
-							     left->branch_info->to.al_addr),
-							 left->branch_info->from.sym,
-							 true, true,
-							 left->branch_info->to.al_addr);
-	}
-	if (!right->branch_info->srcline_to) {
-		struct map *map = right->branch_info->to.map;
-		if (!map)
-			right->branch_info->srcline_to = SRCLINE_UNKNOWN;
-		else
-			right->branch_info->srcline_to = get_srcline(map->dso,
-					     map__rip_2objdump(map,
-							       right->branch_info->to.al_addr),
-						     right->branch_info->to.sym,
-						     true, true,
-						     right->branch_info->to.al_addr);
-	}
+	if (!left->branch_info->srcline_to)
+		left->branch_info->srcline_to = addr_map_symbol__srcline(&left->branch_info->to);
+
+	if (!right->branch_info->srcline_to)
+		right->branch_info->srcline_to = addr_map_symbol__srcline(&right->branch_info->to);
+
 	return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to);
 }
 
@@ -638,7 +601,7 @@ static char *get_trace_output(struct hist_entry *he)
 {
 	struct trace_seq seq;
 	struct perf_evsel *evsel;
-	struct pevent_record rec = {
+	struct tep_record rec = {
 		.data = he->raw_data,
 		.size = he->raw_size,
 	};
@@ -647,10 +610,10 @@ static char *get_trace_output(struct hist_entry *he)
 
 	trace_seq_init(&seq);
 	if (symbol_conf.raw_trace) {
-		pevent_print_fields(&seq, he->raw_data, he->raw_size,
-				    evsel->tp_format);
+		tep_print_fields(&seq, he->raw_data, he->raw_size,
+				 evsel->tp_format);
 	} else {
-		pevent_event_info(&seq, evsel->tp_format, &rec);
+		tep_event_info(&seq, evsel->tp_format, &rec);
 	}
 	/*
 	 * Trim the buffer, it starts at 4KB and we're not going to
@@ -1211,7 +1174,7 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf,
 
 		/* print [s] for shared data mmaps */
 		if ((he->cpumode != PERF_RECORD_MISC_KERNEL) &&
-		     map && (map->type == MAP__VARIABLE) &&
+		     map && !(map->prot & PROT_EXEC) &&
 		    (map->flags & MAP_SHARED) &&
 		    (map->maj || map->min || map->ino ||
 		     map->ino_generation))
@@ -1921,7 +1884,7 @@ static int __sort_dimension__add_hpp_output(struct sort_dimension *sd,
 struct hpp_dynamic_entry {
 	struct perf_hpp_fmt hpp;
 	struct perf_evsel *evsel;
-	struct format_field *field;
+	struct tep_format_field *field;
 	unsigned dynamic_len;
 	bool raw_trace;
 };
@@ -1936,7 +1899,7 @@ static int hde_width(struct hpp_dynamic_entry *hde)
 		if (namelen > len)
 			len = namelen;
 
-		if (!(hde->field->flags & FIELD_IS_STRING)) {
+		if (!(hde->field->flags & TEP_FIELD_IS_STRING)) {
 			/* length for print hex numbers */
 			fieldlen = hde->field->size * 2 + 2;
 		}
@@ -1952,7 +1915,7 @@ static void update_dynamic_len(struct hpp_dynamic_entry *hde,
 			       struct hist_entry *he)
 {
 	char *str, *pos;
-	struct format_field *field = hde->field;
+	struct tep_format_field *field = hde->field;
 	size_t namelen;
 	bool last = false;
 
@@ -2037,7 +2000,7 @@ static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 	struct hpp_dynamic_entry *hde;
 	size_t len = fmt->user_len;
 	char *str, *pos;
-	struct format_field *field;
+	struct tep_format_field *field;
 	size_t namelen;
 	bool last = false;
 	int ret;
@@ -2084,7 +2047,7 @@ static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 		struct trace_seq seq;
 raw_field:
 		trace_seq_init(&seq);
-		pevent_print_field(&seq, he->raw_data, hde->field);
+		tep_print_field(&seq, he->raw_data, hde->field);
 		str = seq.buffer;
 	}
 
@@ -2097,7 +2060,7 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
 			       struct hist_entry *a, struct hist_entry *b)
 {
 	struct hpp_dynamic_entry *hde;
-	struct format_field *field;
+	struct tep_format_field *field;
 	unsigned offset, size;
 
 	hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
@@ -2108,10 +2071,10 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
 	}
 
 	field = hde->field;
-	if (field->flags & FIELD_IS_DYNAMIC) {
+	if (field->flags & TEP_FIELD_IS_DYNAMIC) {
 		unsigned long long dyn;
 
-		pevent_read_number_field(field, a->raw_data, &dyn);
+		tep_read_number_field(field, a->raw_data, &dyn);
 		offset = dyn & 0xffff;
 		size = (dyn >> 16) & 0xffff;
 
@@ -2154,7 +2117,7 @@ static void hde_free(struct perf_hpp_fmt *fmt)
 }
 
 static struct hpp_dynamic_entry *
-__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field,
+__alloc_dynamic_entry(struct perf_evsel *evsel, struct tep_format_field *field,
 		      int level)
 {
 	struct hpp_dynamic_entry *hde;
@@ -2289,7 +2252,7 @@ static struct perf_evsel *find_evsel(struct perf_evlist *evlist, char *event_nam
 }
 
 static int __dynamic_dimension__add(struct perf_evsel *evsel,
-				    struct format_field *field,
+				    struct tep_format_field *field,
 				    bool raw_trace, int level)
 {
 	struct hpp_dynamic_entry *hde;
@@ -2307,7 +2270,7 @@ static int __dynamic_dimension__add(struct perf_evsel *evsel,
 static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace, int level)
 {
 	int ret;
-	struct format_field *field;
+	struct tep_format_field *field;
 
 	field = evsel->tp_format->format.fields;
 	while (field) {
@@ -2342,13 +2305,13 @@ static int add_all_matching_fields(struct perf_evlist *evlist,
 {
 	int ret = -ESRCH;
 	struct perf_evsel *evsel;
-	struct format_field *field;
+	struct tep_format_field *field;
 
 	evlist__for_each_entry(evlist, evsel) {
 		if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
 			continue;
 
-		field = pevent_find_any_field(evsel->tp_format, field_name);
+		field = tep_find_any_field(evsel->tp_format, field_name);
 		if (field == NULL)
 			continue;
 
@@ -2364,7 +2327,7 @@ static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok,
 {
 	char *str, *event_name, *field_name, *opt_name;
 	struct perf_evsel *evsel;
-	struct format_field *field;
+	struct tep_format_field *field;
 	bool raw_trace = symbol_conf.raw_trace;
 	int ret = 0;
 
@@ -2415,7 +2378,7 @@ static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok,
 	if (!strcmp(field_name, "*")) {
 		ret = add_evsel_fields(evsel, raw_trace, level);
 	} else {
-		field = pevent_find_any_field(evsel->tp_format, field_name);
+		field = tep_find_any_field(evsel->tp_format, field_name);
 		if (field == NULL) {
 			pr_debug("Cannot find event field for %s.%s\n",
 				 event_name, field_name);
@@ -2582,7 +2545,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
 		if (sort__mode != SORT_MODE__MEMORY)
 			return -EINVAL;
 
-		if (sd->entry == &sort_mem_dcacheline && cacheline_size == 0)
+		if (sd->entry == &sort_mem_dcacheline && cacheline_size() == 0)
 			return -EINVAL;
 
 		if (sd->entry == &sort_mem_daddr_sym)
@@ -2628,7 +2591,7 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str,
 		if (*tok) {
 			ret = sort_dimension__add(list, tok, evlist, level);
 			if (ret == -EINVAL) {
-				if (!cacheline_size && !strncasecmp(tok, "dcacheline", strlen(tok)))
+				if (!cacheline_size() && !strncasecmp(tok, "dcacheline", strlen(tok)))
 					pr_err("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system");
 				else
 					pr_err("Invalid --sort key: `%s'", tok);
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 035b62e2c60b..a97cf8e6be86 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -112,6 +112,8 @@ struct hist_entry {
 
 	char			level;
 	u8			filtered;
+
+	u16			callchain_size;
 	union {
 		/*
 		 * Since perf diff only supports the stdio output, TUI
@@ -151,6 +153,11 @@ struct hist_entry {
 	struct callchain_root	callchain[0]; /* must be last member */
 };
 
+static __pure inline bool hist_entry__has_callchains(struct hist_entry *he)
+{
+	return he->callchain_size != 0;
+}
+
 static inline bool hist_entry__has_pairs(struct hist_entry *he)
 {
 	return !list_empty(&he->pairs.node);
@@ -186,13 +193,13 @@ static inline float hist_entry__get_percent_limit(struct hist_entry *he)
 static inline u64 cl_address(u64 address)
 {
 	/* return the cacheline of the address */
-	return (address & ~(cacheline_size - 1));
+	return (address & ~(cacheline_size() - 1));
 }
 
 static inline u64 cl_offset(u64 address)
 {
 	/* return the cacheline of the address */
-	return (address & (cacheline_size - 1));
+	return (address & (cacheline_size() - 1));
 }
 
 enum sort_mode {
@@ -269,7 +276,7 @@ extern struct sort_entry sort_thread;
 extern struct list_head hist_entry__sort_list;
 
 struct perf_evlist;
-struct pevent;
+struct tep_handle;
 int setup_sorting(struct perf_evlist *evlist);
 int setup_output_field(void);
 void reset_output_field(void);
@@ -292,5 +299,5 @@ int64_t
 sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right);
 int64_t
 sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);
-char *hist_entry__get_srcline(struct hist_entry *he);
+char *hist_entry__srcline(struct hist_entry *he);
 #endif	/* __PERF_SORT_H */
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 3c21fd059b64..e767c4a9d4d2 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -85,6 +85,9 @@ static struct symbol *new_inline_sym(struct dso *dso,
 	struct symbol *inline_sym;
 	char *demangled = NULL;
 
+	if (!funcname)
+		funcname = "??";
+
 	if (dso) {
 		demangled = dso__demangle_sym(dso, 0, funcname);
 		if (demangled)
@@ -103,6 +106,7 @@ static struct symbol *new_inline_sym(struct dso *dso,
 		inline_sym = symbol__new(base_sym ? base_sym->start : 0,
 					 base_sym ? base_sym->end : 0,
 					 base_sym ? base_sym->binding : 0,
+					 base_sym ? base_sym->type : 0,
 					 funcname);
 		if (inline_sym)
 			inline_sym->inlined = 1;
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
new file mode 100644
index 000000000000..e7b4c44ebb62
--- /dev/null
+++ b/tools/perf/util/stat-display.c
@@ -0,0 +1,1166 @@
+#include <stdio.h>
+#include <inttypes.h>
+#include <linux/time64.h>
+#include <math.h>
+#include "evlist.h"
+#include "evsel.h"
+#include "stat.h"
+#include "top.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "string2.h"
+#include "sane_ctype.h"
+#include "cgroup.h"
+#include <math.h>
+#include <api/fs/fs.h>
+
+#define CNTR_NOT_SUPPORTED	"<not supported>"
+#define CNTR_NOT_COUNTED	"<not counted>"
+
+static bool is_duration_time(struct perf_evsel *evsel)
+{
+	return !strcmp(evsel->name, "duration_time");
+}
+
+static void print_running(struct perf_stat_config *config,
+			  u64 run, u64 ena)
+{
+	if (config->csv_output) {
+		fprintf(config->output, "%s%" PRIu64 "%s%.2f",
+					config->csv_sep,
+					run,
+					config->csv_sep,
+					ena ? 100.0 * run / ena : 100.0);
+	} else if (run != ena) {
+		fprintf(config->output, "  (%.2f%%)", 100.0 * run / ena);
+	}
+}
+
+static void print_noise_pct(struct perf_stat_config *config,
+			    double total, double avg)
+{
+	double pct = rel_stddev_stats(total, avg);
+
+	if (config->csv_output)
+		fprintf(config->output, "%s%.2f%%", config->csv_sep, pct);
+	else if (pct)
+		fprintf(config->output, "  ( +-%6.2f%% )", pct);
+}
+
+static void print_noise(struct perf_stat_config *config,
+			struct perf_evsel *evsel, double avg)
+{
+	struct perf_stat_evsel *ps;
+
+	if (config->run_count == 1)
+		return;
+
+	ps = evsel->stats;
+	print_noise_pct(config, stddev_stats(&ps->res_stats[0]), avg);
+}
+
+static void aggr_printout(struct perf_stat_config *config,
+			  struct perf_evsel *evsel, int id, int nr)
+{
+	switch (config->aggr_mode) {
+	case AGGR_CORE:
+		fprintf(config->output, "S%d-C%*d%s%*d%s",
+			cpu_map__id_to_socket(id),
+			config->csv_output ? 0 : -8,
+			cpu_map__id_to_cpu(id),
+			config->csv_sep,
+			config->csv_output ? 0 : 4,
+			nr,
+			config->csv_sep);
+		break;
+	case AGGR_SOCKET:
+		fprintf(config->output, "S%*d%s%*d%s",
+			config->csv_output ? 0 : -5,
+			id,
+			config->csv_sep,
+			config->csv_output ? 0 : 4,
+			nr,
+			config->csv_sep);
+			break;
+	case AGGR_NONE:
+		fprintf(config->output, "CPU%*d%s",
+			config->csv_output ? 0 : -4,
+			perf_evsel__cpus(evsel)->map[id], config->csv_sep);
+		break;
+	case AGGR_THREAD:
+		fprintf(config->output, "%*s-%*d%s",
+			config->csv_output ? 0 : 16,
+			thread_map__comm(evsel->threads, id),
+			config->csv_output ? 0 : -8,
+			thread_map__pid(evsel->threads, id),
+			config->csv_sep);
+		break;
+	case AGGR_GLOBAL:
+	case AGGR_UNSET:
+	default:
+		break;
+	}
+}
+
+struct outstate {
+	FILE *fh;
+	bool newline;
+	const char *prefix;
+	int  nfields;
+	int  id, nr;
+	struct perf_evsel *evsel;
+};
+
+#define METRIC_LEN  35
+
+static void new_line_std(struct perf_stat_config *config __maybe_unused,
+			 void *ctx)
+{
+	struct outstate *os = ctx;
+
+	os->newline = true;
+}
+
+static void do_new_line_std(struct perf_stat_config *config,
+			    struct outstate *os)
+{
+	fputc('\n', os->fh);
+	fputs(os->prefix, os->fh);
+	aggr_printout(config, os->evsel, os->id, os->nr);
+	if (config->aggr_mode == AGGR_NONE)
+		fprintf(os->fh, "        ");
+	fprintf(os->fh, "                                                 ");
+}
+
+static void print_metric_std(struct perf_stat_config *config,
+			     void *ctx, const char *color, const char *fmt,
+			     const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	int n;
+	bool newline = os->newline;
+
+	os->newline = false;
+
+	if (unit == NULL || fmt == NULL) {
+		fprintf(out, "%-*s", METRIC_LEN, "");
+		return;
+	}
+
+	if (newline)
+		do_new_line_std(config, os);
+
+	n = fprintf(out, " # ");
+	if (color)
+		n += color_fprintf(out, color, fmt, val);
+	else
+		n += fprintf(out, fmt, val);
+	fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
+}
+
+static void new_line_csv(struct perf_stat_config *config, void *ctx)
+{
+	struct outstate *os = ctx;
+	int i;
+
+	fputc('\n', os->fh);
+	if (os->prefix)
+		fprintf(os->fh, "%s%s", os->prefix, config->csv_sep);
+	aggr_printout(config, os->evsel, os->id, os->nr);
+	for (i = 0; i < os->nfields; i++)
+		fputs(config->csv_sep, os->fh);
+}
+
+static void print_metric_csv(struct perf_stat_config *config __maybe_unused,
+			     void *ctx,
+			     const char *color __maybe_unused,
+			     const char *fmt, const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	char buf[64], *vals, *ends;
+
+	if (unit == NULL || fmt == NULL) {
+		fprintf(out, "%s%s", config->csv_sep, config->csv_sep);
+		return;
+	}
+	snprintf(buf, sizeof(buf), fmt, val);
+	ends = vals = ltrim(buf);
+	while (isdigit(*ends) || *ends == '.')
+		ends++;
+	*ends = 0;
+	while (isspace(*unit))
+		unit++;
+	fprintf(out, "%s%s%s%s", config->csv_sep, vals, config->csv_sep, unit);
+}
+
+/* Filter out some columns that don't work well in metrics only mode */
+
+static bool valid_only_metric(const char *unit)
+{
+	if (!unit)
+		return false;
+	if (strstr(unit, "/sec") ||
+	    strstr(unit, "hz") ||
+	    strstr(unit, "Hz") ||
+	    strstr(unit, "CPUs utilized"))
+		return false;
+	return true;
+}
+
+static const char *fixunit(char *buf, struct perf_evsel *evsel,
+			   const char *unit)
+{
+	if (!strncmp(unit, "of all", 6)) {
+		snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel),
+			 unit);
+		return buf;
+	}
+	return unit;
+}
+
+static void print_metric_only(struct perf_stat_config *config,
+			      void *ctx, const char *color, const char *fmt,
+			      const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	char buf[1024], str[1024];
+	unsigned mlen = config->metric_only_len;
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(buf, os->evsel, unit);
+	if (mlen < strlen(unit))
+		mlen = strlen(unit) + 1;
+
+	if (color)
+		mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1;
+
+	color_snprintf(str, sizeof(str), color ?: "", fmt, val);
+	fprintf(out, "%*s ", mlen, str);
+}
+
+static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused,
+				  void *ctx, const char *color __maybe_unused,
+				  const char *fmt,
+				  const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	char buf[64], *vals, *ends;
+	char tbuf[1024];
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(tbuf, os->evsel, unit);
+	snprintf(buf, sizeof buf, fmt, val);
+	ends = vals = ltrim(buf);
+	while (isdigit(*ends) || *ends == '.')
+		ends++;
+	*ends = 0;
+	fprintf(out, "%s%s", vals, config->csv_sep);
+}
+
+static void new_line_metric(struct perf_stat_config *config __maybe_unused,
+			    void *ctx __maybe_unused)
+{
+}
+
+static void print_metric_header(struct perf_stat_config *config,
+				void *ctx, const char *color __maybe_unused,
+				const char *fmt __maybe_unused,
+				const char *unit, double val __maybe_unused)
+{
+	struct outstate *os = ctx;
+	char tbuf[1024];
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(tbuf, os->evsel, unit);
+	if (config->csv_output)
+		fprintf(os->fh, "%s%s", unit, config->csv_sep);
+	else
+		fprintf(os->fh, "%*s ", config->metric_only_len, unit);
+}
+
+static int first_shadow_cpu(struct perf_stat_config *config,
+			    struct perf_evsel *evsel, int id)
+{
+	struct perf_evlist *evlist = evsel->evlist;
+	int i;
+
+	if (!config->aggr_get_id)
+		return 0;
+
+	if (config->aggr_mode == AGGR_NONE)
+		return id;
+
+	if (config->aggr_mode == AGGR_GLOBAL)
+		return 0;
+
+	for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
+		int cpu2 = perf_evsel__cpus(evsel)->map[i];
+
+		if (config->aggr_get_id(config, evlist->cpus, cpu2) == id)
+			return cpu2;
+	}
+	return 0;
+}
+
+static void abs_printout(struct perf_stat_config *config,
+			 int id, int nr, struct perf_evsel *evsel, double avg)
+{
+	FILE *output = config->output;
+	double sc =  evsel->scale;
+	const char *fmt;
+
+	if (config->csv_output) {
+		fmt = floor(sc) != sc ?  "%.2f%s" : "%.0f%s";
+	} else {
+		if (config->big_num)
+			fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s";
+		else
+			fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s";
+	}
+
+	aggr_printout(config, evsel, id, nr);
+
+	fprintf(output, fmt, avg, config->csv_sep);
+
+	if (evsel->unit)
+		fprintf(output, "%-*s%s",
+			config->csv_output ? 0 : config->unit_width,
+			evsel->unit, config->csv_sep);
+
+	fprintf(output, "%-*s", config->csv_output ? 0 : 25, perf_evsel__name(evsel));
+
+	if (evsel->cgrp)
+		fprintf(output, "%s%s", config->csv_sep, evsel->cgrp->name);
+}
+
+static bool is_mixed_hw_group(struct perf_evsel *counter)
+{
+	struct perf_evlist *evlist = counter->evlist;
+	u32 pmu_type = counter->attr.type;
+	struct perf_evsel *pos;
+
+	if (counter->nr_members < 2)
+		return false;
+
+	evlist__for_each_entry(evlist, pos) {
+		/* software events can be part of any hardware group */
+		if (pos->attr.type == PERF_TYPE_SOFTWARE)
+			continue;
+		if (pmu_type == PERF_TYPE_SOFTWARE) {
+			pmu_type = pos->attr.type;
+			continue;
+		}
+		if (pmu_type != pos->attr.type)
+			return true;
+	}
+
+	return false;
+}
+
+static void printout(struct perf_stat_config *config, int id, int nr,
+		     struct perf_evsel *counter, double uval,
+		     char *prefix, u64 run, u64 ena, double noise,
+		     struct runtime_stat *st)
+{
+	struct perf_stat_output_ctx out;
+	struct outstate os = {
+		.fh = config->output,
+		.prefix = prefix ? prefix : "",
+		.id = id,
+		.nr = nr,
+		.evsel = counter,
+	};
+	print_metric_t pm = print_metric_std;
+	new_line_t nl;
+
+	if (config->metric_only) {
+		nl = new_line_metric;
+		if (config->csv_output)
+			pm = print_metric_only_csv;
+		else
+			pm = print_metric_only;
+	} else
+		nl = new_line_std;
+
+	if (config->csv_output && !config->metric_only) {
+		static int aggr_fields[] = {
+			[AGGR_GLOBAL] = 0,
+			[AGGR_THREAD] = 1,
+			[AGGR_NONE] = 1,
+			[AGGR_SOCKET] = 2,
+			[AGGR_CORE] = 2,
+		};
+
+		pm = print_metric_csv;
+		nl = new_line_csv;
+		os.nfields = 3;
+		os.nfields += aggr_fields[config->aggr_mode];
+		if (counter->cgrp)
+			os.nfields++;
+	}
+	if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
+		if (config->metric_only) {
+			pm(config, &os, NULL, "", "", 0);
+			return;
+		}
+		aggr_printout(config, counter, id, nr);
+
+		fprintf(config->output, "%*s%s",
+			config->csv_output ? 0 : 18,
+			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
+			config->csv_sep);
+
+		if (counter->supported) {
+			config->print_free_counters_hint = 1;
+			if (is_mixed_hw_group(counter))
+				config->print_mixed_hw_group_error = 1;
+		}
+
+		fprintf(config->output, "%-*s%s",
+			config->csv_output ? 0 : config->unit_width,
+			counter->unit, config->csv_sep);
+
+		fprintf(config->output, "%*s",
+			config->csv_output ? 0 : -25,
+			perf_evsel__name(counter));
+
+		if (counter->cgrp)
+			fprintf(config->output, "%s%s",
+				config->csv_sep, counter->cgrp->name);
+
+		if (!config->csv_output)
+			pm(config, &os, NULL, NULL, "", 0);
+		print_noise(config, counter, noise);
+		print_running(config, run, ena);
+		if (config->csv_output)
+			pm(config, &os, NULL, NULL, "", 0);
+		return;
+	}
+
+	if (!config->metric_only)
+		abs_printout(config, id, nr, counter, uval);
+
+	out.print_metric = pm;
+	out.new_line = nl;
+	out.ctx = &os;
+	out.force_header = false;
+
+	if (config->csv_output && !config->metric_only) {
+		print_noise(config, counter, noise);
+		print_running(config, run, ena);
+	}
+
+	perf_stat__print_shadow_stats(config, counter, uval,
+				first_shadow_cpu(config, counter, id),
+				&out, &config->metric_events, st);
+	if (!config->csv_output && !config->metric_only) {
+		print_noise(config, counter, noise);
+		print_running(config, run, ena);
+	}
+}
+
+static void aggr_update_shadow(struct perf_stat_config *config,
+			       struct perf_evlist *evlist)
+{
+	int cpu, s2, id, s;
+	u64 val;
+	struct perf_evsel *counter;
+
+	for (s = 0; s < config->aggr_map->nr; s++) {
+		id = config->aggr_map->map[s];
+		evlist__for_each_entry(evlist, counter) {
+			val = 0;
+			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+				s2 = config->aggr_get_id(config, evlist->cpus, cpu);
+				if (s2 != id)
+					continue;
+				val += perf_counts(counter->counts, cpu, 0)->val;
+			}
+			perf_stat__update_shadow_stats(counter, val,
+					first_shadow_cpu(config, counter, id),
+					&rt_stat);
+		}
+	}
+}
+
+static void uniquify_event_name(struct perf_evsel *counter)
+{
+	char *new_name;
+	char *config;
+
+	if (counter->uniquified_name ||
+	    !counter->pmu_name || !strncmp(counter->name, counter->pmu_name,
+					   strlen(counter->pmu_name)))
+		return;
+
+	config = strchr(counter->name, '/');
+	if (config) {
+		if (asprintf(&new_name,
+			     "%s%s", counter->pmu_name, config) > 0) {
+			free(counter->name);
+			counter->name = new_name;
+		}
+	} else {
+		if (asprintf(&new_name,
+			     "%s [%s]", counter->name, counter->pmu_name) > 0) {
+			free(counter->name);
+			counter->name = new_name;
+		}
+	}
+
+	counter->uniquified_name = true;
+}
+
+static void collect_all_aliases(struct perf_stat_config *config, struct perf_evsel *counter,
+			    void (*cb)(struct perf_stat_config *config, struct perf_evsel *counter, void *data,
+				       bool first),
+			    void *data)
+{
+	struct perf_evlist *evlist = counter->evlist;
+	struct perf_evsel *alias;
+
+	alias = list_prepare_entry(counter, &(evlist->entries), node);
+	list_for_each_entry_continue (alias, &evlist->entries, node) {
+		if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) ||
+		    alias->scale != counter->scale ||
+		    alias->cgrp != counter->cgrp ||
+		    strcmp(alias->unit, counter->unit) ||
+		    perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter))
+			break;
+		alias->merged_stat = true;
+		cb(config, alias, data, false);
+	}
+}
+
+static bool collect_data(struct perf_stat_config *config, struct perf_evsel *counter,
+			    void (*cb)(struct perf_stat_config *config, struct perf_evsel *counter, void *data,
+				       bool first),
+			    void *data)
+{
+	if (counter->merged_stat)
+		return false;
+	cb(config, counter, data, true);
+	if (config->no_merge)
+		uniquify_event_name(counter);
+	else if (counter->auto_merge_stats)
+		collect_all_aliases(config, counter, cb, data);
+	return true;
+}
+
+struct aggr_data {
+	u64 ena, run, val;
+	int id;
+	int nr;
+	int cpu;
+};
+
+static void aggr_cb(struct perf_stat_config *config,
+		    struct perf_evsel *counter, void *data, bool first)
+{
+	struct aggr_data *ad = data;
+	int cpu, s2;
+
+	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+		struct perf_counts_values *counts;
+
+		s2 = config->aggr_get_id(config, perf_evsel__cpus(counter), cpu);
+		if (s2 != ad->id)
+			continue;
+		if (first)
+			ad->nr++;
+		counts = perf_counts(counter->counts, cpu, 0);
+		/*
+		 * When any result is bad, make them all to give
+		 * consistent output in interval mode.
+		 */
+		if (counts->ena == 0 || counts->run == 0 ||
+		    counter->counts->scaled == -1) {
+			ad->ena = 0;
+			ad->run = 0;
+			break;
+		}
+		ad->val += counts->val;
+		ad->ena += counts->ena;
+		ad->run += counts->run;
+	}
+}
+
+static void print_aggr(struct perf_stat_config *config,
+		       struct perf_evlist *evlist,
+		       char *prefix)
+{
+	bool metric_only = config->metric_only;
+	FILE *output = config->output;
+	struct perf_evsel *counter;
+	int s, id, nr;
+	double uval;
+	u64 ena, run, val;
+	bool first;
+
+	if (!(config->aggr_map || config->aggr_get_id))
+		return;
+
+	aggr_update_shadow(config, evlist);
+
+	/*
+	 * With metric_only everything is on a single line.
+	 * Without each counter has its own line.
+	 */
+	for (s = 0; s < config->aggr_map->nr; s++) {
+		struct aggr_data ad;
+		if (prefix && metric_only)
+			fprintf(output, "%s", prefix);
+
+		ad.id = id = config->aggr_map->map[s];
+		first = true;
+		evlist__for_each_entry(evlist, counter) {
+			if (is_duration_time(counter))
+				continue;
+
+			ad.val = ad.ena = ad.run = 0;
+			ad.nr = 0;
+			if (!collect_data(config, counter, aggr_cb, &ad))
+				continue;
+			nr = ad.nr;
+			ena = ad.ena;
+			run = ad.run;
+			val = ad.val;
+			if (first && metric_only) {
+				first = false;
+				aggr_printout(config, counter, id, nr);
+			}
+			if (prefix && !metric_only)
+				fprintf(output, "%s", prefix);
+
+			uval = val * counter->scale;
+			printout(config, id, nr, counter, uval, prefix,
+				 run, ena, 1.0, &rt_stat);
+			if (!metric_only)
+				fputc('\n', output);
+		}
+		if (metric_only)
+			fputc('\n', output);
+	}
+}
+
+static int cmp_val(const void *a, const void *b)
+{
+	return ((struct perf_aggr_thread_value *)b)->val -
+		((struct perf_aggr_thread_value *)a)->val;
+}
+
+static struct perf_aggr_thread_value *sort_aggr_thread(
+					struct perf_evsel *counter,
+					int nthreads, int ncpus,
+					int *ret,
+					struct target *_target)
+{
+	int cpu, thread, i = 0;
+	double uval;
+	struct perf_aggr_thread_value *buf;
+
+	buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value));
+	if (!buf)
+		return NULL;
+
+	for (thread = 0; thread < nthreads; thread++) {
+		u64 ena = 0, run = 0, val = 0;
+
+		for (cpu = 0; cpu < ncpus; cpu++) {
+			val += perf_counts(counter->counts, cpu, thread)->val;
+			ena += perf_counts(counter->counts, cpu, thread)->ena;
+			run += perf_counts(counter->counts, cpu, thread)->run;
+		}
+
+		uval = val * counter->scale;
+
+		/*
+		 * Skip value 0 when enabling --per-thread globally,
+		 * otherwise too many 0 output.
+		 */
+		if (uval == 0.0 && target__has_per_thread(_target))
+			continue;
+
+		buf[i].counter = counter;
+		buf[i].id = thread;
+		buf[i].uval = uval;
+		buf[i].val = val;
+		buf[i].run = run;
+		buf[i].ena = ena;
+		i++;
+	}
+
+	qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val);
+
+	if (ret)
+		*ret = i;
+
+	return buf;
+}
+
+static void print_aggr_thread(struct perf_stat_config *config,
+			      struct target *_target,
+			      struct perf_evsel *counter, char *prefix)
+{
+	FILE *output = config->output;
+	int nthreads = thread_map__nr(counter->threads);
+	int ncpus = cpu_map__nr(counter->cpus);
+	int thread, sorted_threads, id;
+	struct perf_aggr_thread_value *buf;
+
+	buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads, _target);
+	if (!buf) {
+		perror("cannot sort aggr thread");
+		return;
+	}
+
+	for (thread = 0; thread < sorted_threads; thread++) {
+		if (prefix)
+			fprintf(output, "%s", prefix);
+
+		id = buf[thread].id;
+		if (config->stats)
+			printout(config, id, 0, buf[thread].counter, buf[thread].uval,
+				 prefix, buf[thread].run, buf[thread].ena, 1.0,
+				 &config->stats[id]);
+		else
+			printout(config, id, 0, buf[thread].counter, buf[thread].uval,
+				 prefix, buf[thread].run, buf[thread].ena, 1.0,
+				 &rt_stat);
+		fputc('\n', output);
+	}
+
+	free(buf);
+}
+
+struct caggr_data {
+	double avg, avg_enabled, avg_running;
+};
+
+static void counter_aggr_cb(struct perf_stat_config *config __maybe_unused,
+			    struct perf_evsel *counter, void *data,
+			    bool first __maybe_unused)
+{
+	struct caggr_data *cd = data;
+	struct perf_stat_evsel *ps = counter->stats;
+
+	cd->avg += avg_stats(&ps->res_stats[0]);
+	cd->avg_enabled += avg_stats(&ps->res_stats[1]);
+	cd->avg_running += avg_stats(&ps->res_stats[2]);
+}
+
+/*
+ * Print out the results of a single counter:
+ * aggregated counts in system-wide mode
+ */
+static void print_counter_aggr(struct perf_stat_config *config,
+			       struct perf_evsel *counter, char *prefix)
+{
+	bool metric_only = config->metric_only;
+	FILE *output = config->output;
+	double uval;
+	struct caggr_data cd = { .avg = 0.0 };
+
+	if (!collect_data(config, counter, counter_aggr_cb, &cd))
+		return;
+
+	if (prefix && !metric_only)
+		fprintf(output, "%s", prefix);
+
+	uval = cd.avg * counter->scale;
+	printout(config, -1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled,
+		 cd.avg, &rt_stat);
+	if (!metric_only)
+		fprintf(output, "\n");
+}
+
+static void counter_cb(struct perf_stat_config *config __maybe_unused,
+		       struct perf_evsel *counter, void *data,
+		       bool first __maybe_unused)
+{
+	struct aggr_data *ad = data;
+
+	ad->val += perf_counts(counter->counts, ad->cpu, 0)->val;
+	ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena;
+	ad->run += perf_counts(counter->counts, ad->cpu, 0)->run;
+}
+
+/*
+ * Print out the results of a single counter:
+ * does not use aggregated count in system-wide
+ */
+static void print_counter(struct perf_stat_config *config,
+			  struct perf_evsel *counter, char *prefix)
+{
+	FILE *output = config->output;
+	u64 ena, run, val;
+	double uval;
+	int cpu;
+
+	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+		struct aggr_data ad = { .cpu = cpu };
+
+		if (!collect_data(config, counter, counter_cb, &ad))
+			return;
+		val = ad.val;
+		ena = ad.ena;
+		run = ad.run;
+
+		if (prefix)
+			fprintf(output, "%s", prefix);
+
+		uval = val * counter->scale;
+		printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0,
+			 &rt_stat);
+
+		fputc('\n', output);
+	}
+}
+
+static void print_no_aggr_metric(struct perf_stat_config *config,
+				 struct perf_evlist *evlist,
+				 char *prefix)
+{
+	int cpu;
+	int nrcpus = 0;
+	struct perf_evsel *counter;
+	u64 ena, run, val;
+	double uval;
+
+	nrcpus = evlist->cpus->nr;
+	for (cpu = 0; cpu < nrcpus; cpu++) {
+		bool first = true;
+
+		if (prefix)
+			fputs(prefix, config->output);
+		evlist__for_each_entry(evlist, counter) {
+			if (is_duration_time(counter))
+				continue;
+			if (first) {
+				aggr_printout(config, counter, cpu, 0);
+				first = false;
+			}
+			val = perf_counts(counter->counts, cpu, 0)->val;
+			ena = perf_counts(counter->counts, cpu, 0)->ena;
+			run = perf_counts(counter->counts, cpu, 0)->run;
+
+			uval = val * counter->scale;
+			printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0,
+				 &rt_stat);
+		}
+		fputc('\n', config->output);
+	}
+}
+
+static int aggr_header_lens[] = {
+	[AGGR_CORE] = 18,
+	[AGGR_SOCKET] = 12,
+	[AGGR_NONE] = 6,
+	[AGGR_THREAD] = 24,
+	[AGGR_GLOBAL] = 0,
+};
+
+static const char *aggr_header_csv[] = {
+	[AGGR_CORE] 	= 	"core,cpus,",
+	[AGGR_SOCKET] 	= 	"socket,cpus",
+	[AGGR_NONE] 	= 	"cpu,",
+	[AGGR_THREAD] 	= 	"comm-pid,",
+	[AGGR_GLOBAL] 	=	""
+};
+
+static void print_metric_headers(struct perf_stat_config *config,
+				 struct perf_evlist *evlist,
+				 const char *prefix, bool no_indent)
+{
+	struct perf_stat_output_ctx out;
+	struct perf_evsel *counter;
+	struct outstate os = {
+		.fh = config->output
+	};
+
+	if (prefix)
+		fprintf(config->output, "%s", prefix);
+
+	if (!config->csv_output && !no_indent)
+		fprintf(config->output, "%*s",
+			aggr_header_lens[config->aggr_mode], "");
+	if (config->csv_output) {
+		if (config->interval)
+			fputs("time,", config->output);
+		fputs(aggr_header_csv[config->aggr_mode], config->output);
+	}
+
+	/* Print metrics headers only */
+	evlist__for_each_entry(evlist, counter) {
+		if (is_duration_time(counter))
+			continue;
+		os.evsel = counter;
+		out.ctx = &os;
+		out.print_metric = print_metric_header;
+		out.new_line = new_line_metric;
+		out.force_header = true;
+		os.evsel = counter;
+		perf_stat__print_shadow_stats(config, counter, 0,
+					      0,
+					      &out,
+					      &config->metric_events,
+					      &rt_stat);
+	}
+	fputc('\n', config->output);
+}
+
+static void print_interval(struct perf_stat_config *config,
+			   struct perf_evlist *evlist,
+			   char *prefix, struct timespec *ts)
+{
+	bool metric_only = config->metric_only;
+	unsigned int unit_width = config->unit_width;
+	FILE *output = config->output;
+	static int num_print_interval;
+
+	if (config->interval_clear)
+		puts(CONSOLE_CLEAR);
+
+	sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep);
+
+	if ((num_print_interval == 0 && !config->csv_output) || config->interval_clear) {
+		switch (config->aggr_mode) {
+		case AGGR_SOCKET:
+			fprintf(output, "#           time socket cpus");
+			if (!metric_only)
+				fprintf(output, "             counts %*s events\n", unit_width, "unit");
+			break;
+		case AGGR_CORE:
+			fprintf(output, "#           time core         cpus");
+			if (!metric_only)
+				fprintf(output, "             counts %*s events\n", unit_width, "unit");
+			break;
+		case AGGR_NONE:
+			fprintf(output, "#           time CPU    ");
+			if (!metric_only)
+				fprintf(output, "                counts %*s events\n", unit_width, "unit");
+			break;
+		case AGGR_THREAD:
+			fprintf(output, "#           time             comm-pid");
+			if (!metric_only)
+				fprintf(output, "                  counts %*s events\n", unit_width, "unit");
+			break;
+		case AGGR_GLOBAL:
+		default:
+			fprintf(output, "#           time");
+			if (!metric_only)
+				fprintf(output, "             counts %*s events\n", unit_width, "unit");
+		case AGGR_UNSET:
+			break;
+		}
+	}
+
+	if ((num_print_interval == 0 || config->interval_clear) && metric_only)
+		print_metric_headers(config, evlist, " ", true);
+	if (++num_print_interval == 25)
+		num_print_interval = 0;
+}
+
+static void print_header(struct perf_stat_config *config,
+			 struct target *_target,
+			 int argc, const char **argv)
+{
+	FILE *output = config->output;
+	int i;
+
+	fflush(stdout);
+
+	if (!config->csv_output) {
+		fprintf(output, "\n");
+		fprintf(output, " Performance counter stats for ");
+		if (_target->system_wide)
+			fprintf(output, "\'system wide");
+		else if (_target->cpu_list)
+			fprintf(output, "\'CPU(s) %s", _target->cpu_list);
+		else if (!target__has_task(_target)) {
+			fprintf(output, "\'%s", argv ? argv[0] : "pipe");
+			for (i = 1; argv && (i < argc); i++)
+				fprintf(output, " %s", argv[i]);
+		} else if (_target->pid)
+			fprintf(output, "process id \'%s", _target->pid);
+		else
+			fprintf(output, "thread id \'%s", _target->tid);
+
+		fprintf(output, "\'");
+		if (config->run_count > 1)
+			fprintf(output, " (%d runs)", config->run_count);
+		fprintf(output, ":\n\n");
+	}
+}
+
+static int get_precision(double num)
+{
+	if (num > 1)
+		return 0;
+
+	return lround(ceil(-log10(num)));
+}
+
+static void print_table(struct perf_stat_config *config,
+			FILE *output, int precision, double avg)
+{
+	char tmp[64];
+	int idx, indent = 0;
+
+	scnprintf(tmp, 64, " %17.*f", precision, avg);
+	while (tmp[indent] == ' ')
+		indent++;
+
+	fprintf(output, "%*s# Table of individual measurements:\n", indent, "");
+
+	for (idx = 0; idx < config->run_count; idx++) {
+		double run = (double) config->walltime_run[idx] / NSEC_PER_SEC;
+		int h, n = 1 + abs((int) (100.0 * (run - avg)/run) / 5);
+
+		fprintf(output, " %17.*f (%+.*f) ",
+			precision, run, precision, run - avg);
+
+		for (h = 0; h < n; h++)
+			fprintf(output, "#");
+
+		fprintf(output, "\n");
+	}
+
+	fprintf(output, "\n%*s# Final result:\n", indent, "");
+}
+
+static double timeval2double(struct timeval *t)
+{
+	return t->tv_sec + (double) t->tv_usec/USEC_PER_SEC;
+}
+
+static void print_footer(struct perf_stat_config *config)
+{
+	double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
+	FILE *output = config->output;
+	int n;
+
+	if (!config->null_run)
+		fprintf(output, "\n");
+
+	if (config->run_count == 1) {
+		fprintf(output, " %17.9f seconds time elapsed", avg);
+
+		if (config->ru_display) {
+			double ru_utime = timeval2double(&config->ru_data.ru_utime);
+			double ru_stime = timeval2double(&config->ru_data.ru_stime);
+
+			fprintf(output, "\n\n");
+			fprintf(output, " %17.9f seconds user\n", ru_utime);
+			fprintf(output, " %17.9f seconds sys\n", ru_stime);
+		}
+	} else {
+		double sd = stddev_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
+		/*
+		 * Display at most 2 more significant
+		 * digits than the stddev inaccuracy.
+		 */
+		int precision = get_precision(sd) + 2;
+
+		if (config->walltime_run_table)
+			print_table(config, output, precision, avg);
+
+		fprintf(output, " %17.*f +- %.*f seconds time elapsed",
+			precision, avg, precision, sd);
+
+		print_noise_pct(config, sd, avg);
+	}
+	fprintf(output, "\n\n");
+
+	if (config->print_free_counters_hint &&
+	    sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 &&
+	    n > 0)
+		fprintf(output,
+"Some events weren't counted. Try disabling the NMI watchdog:\n"
+"	echo 0 > /proc/sys/kernel/nmi_watchdog\n"
+"	perf stat ...\n"
+"	echo 1 > /proc/sys/kernel/nmi_watchdog\n");
+
+	if (config->print_mixed_hw_group_error)
+		fprintf(output,
+			"The events in group usually have to be from "
+			"the same PMU. Try reorganizing the group.\n");
+}
+
+void
+perf_evlist__print_counters(struct perf_evlist *evlist,
+			    struct perf_stat_config *config,
+			    struct target *_target,
+			    struct timespec *ts,
+			    int argc, const char **argv)
+{
+	bool metric_only = config->metric_only;
+	int interval = config->interval;
+	struct perf_evsel *counter;
+	char buf[64], *prefix = NULL;
+
+	if (interval)
+		print_interval(config, evlist, prefix = buf, ts);
+	else
+		print_header(config, _target, argc, argv);
+
+	if (metric_only) {
+		static int num_print_iv;
+
+		if (num_print_iv == 0 && !interval)
+			print_metric_headers(config, evlist, prefix, false);
+		if (num_print_iv++ == 25)
+			num_print_iv = 0;
+		if (config->aggr_mode == AGGR_GLOBAL && prefix)
+			fprintf(config->output, "%s", prefix);
+	}
+
+	switch (config->aggr_mode) {
+	case AGGR_CORE:
+	case AGGR_SOCKET:
+		print_aggr(config, evlist, prefix);
+		break;
+	case AGGR_THREAD:
+		evlist__for_each_entry(evlist, counter) {
+			if (is_duration_time(counter))
+				continue;
+			print_aggr_thread(config, _target, counter, prefix);
+		}
+		break;
+	case AGGR_GLOBAL:
+		evlist__for_each_entry(evlist, counter) {
+			if (is_duration_time(counter))
+				continue;
+			print_counter_aggr(config, counter, prefix);
+		}
+		if (metric_only)
+			fputc('\n', config->output);
+		break;
+	case AGGR_NONE:
+		if (metric_only)
+			print_no_aggr_metric(config, evlist, prefix);
+		else {
+			evlist__for_each_entry(evlist, counter) {
+				if (is_duration_time(counter))
+					continue;
+				print_counter(config, counter, prefix);
+			}
+		}
+		break;
+	case AGGR_UNSET:
+	default:
+		break;
+	}
+
+	if (!interval && !config->csv_output)
+		print_footer(config);
+
+	fflush(config->output);
+}
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 594d14a02b67..8ad32763cfff 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -410,7 +410,8 @@ static double runtime_stat_n(struct runtime_stat *st,
 	return v->stats.n;
 }
 
-static void print_stalled_cycles_frontend(int cpu,
+static void print_stalled_cycles_frontend(struct perf_stat_config *config,
+					  int cpu,
 					  struct perf_evsel *evsel, double avg,
 					  struct perf_stat_output_ctx *out,
 					  struct runtime_stat *st)
@@ -427,13 +428,14 @@ static void print_stalled_cycles_frontend(int cpu,
 	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
 
 	if (ratio)
-		out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle",
+		out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle",
 				  ratio);
 	else
-		out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0);
+		out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0);
 }
 
-static void print_stalled_cycles_backend(int cpu,
+static void print_stalled_cycles_backend(struct perf_stat_config *config,
+					 int cpu,
 					 struct perf_evsel *evsel, double avg,
 					 struct perf_stat_output_ctx *out,
 					 struct runtime_stat *st)
@@ -449,10 +451,11 @@ static void print_stalled_cycles_backend(int cpu,
 
 	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
 
-	out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
 }
 
-static void print_branch_misses(int cpu,
+static void print_branch_misses(struct perf_stat_config *config,
+				int cpu,
 				struct perf_evsel *evsel,
 				double avg,
 				struct perf_stat_output_ctx *out,
@@ -469,10 +472,11 @@ static void print_branch_misses(int cpu,
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 
-	out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio);
 }
 
-static void print_l1_dcache_misses(int cpu,
+static void print_l1_dcache_misses(struct perf_stat_config *config,
+				   int cpu,
 				   struct perf_evsel *evsel,
 				   double avg,
 				   struct perf_stat_output_ctx *out,
@@ -490,10 +494,11 @@ static void print_l1_dcache_misses(int cpu,
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 
-	out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
 }
 
-static void print_l1_icache_misses(int cpu,
+static void print_l1_icache_misses(struct perf_stat_config *config,
+				   int cpu,
 				   struct perf_evsel *evsel,
 				   double avg,
 				   struct perf_stat_output_ctx *out,
@@ -510,10 +515,11 @@ static void print_l1_icache_misses(int cpu,
 		ratio = avg / total * 100.0;
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-	out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
 }
 
-static void print_dtlb_cache_misses(int cpu,
+static void print_dtlb_cache_misses(struct perf_stat_config *config,
+				    int cpu,
 				    struct perf_evsel *evsel,
 				    double avg,
 				    struct perf_stat_output_ctx *out,
@@ -529,10 +535,11 @@ static void print_dtlb_cache_misses(int cpu,
 		ratio = avg / total * 100.0;
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-	out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
 }
 
-static void print_itlb_cache_misses(int cpu,
+static void print_itlb_cache_misses(struct perf_stat_config *config,
+				    int cpu,
 				    struct perf_evsel *evsel,
 				    double avg,
 				    struct perf_stat_output_ctx *out,
@@ -548,10 +555,11 @@ static void print_itlb_cache_misses(int cpu,
 		ratio = avg / total * 100.0;
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-	out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
 }
 
-static void print_ll_cache_misses(int cpu,
+static void print_ll_cache_misses(struct perf_stat_config *config,
+				  int cpu,
 				  struct perf_evsel *evsel,
 				  double avg,
 				  struct perf_stat_output_ctx *out,
@@ -567,7 +575,7 @@ static void print_ll_cache_misses(int cpu,
 		ratio = avg / total * 100.0;
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-	out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
 }
 
 /*
@@ -674,7 +682,8 @@ static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
 	return sanitize_val(1.0 - sum);
 }
 
-static void print_smi_cost(int cpu, struct perf_evsel *evsel,
+static void print_smi_cost(struct perf_stat_config *config,
+			   int cpu, struct perf_evsel *evsel,
 			   struct perf_stat_output_ctx *out,
 			   struct runtime_stat *st)
 {
@@ -694,11 +703,12 @@ static void print_smi_cost(int cpu, struct perf_evsel *evsel,
 
 	if (cost > 10)
 		color = PERF_COLOR_RED;
-	out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
-	out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
+	out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
+	out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num);
 }
 
-static void generic_metric(const char *metric_expr,
+static void generic_metric(struct perf_stat_config *config,
+			   const char *metric_expr,
 			   struct perf_evsel **metric_events,
 			   char *name,
 			   const char *metric_name,
@@ -737,20 +747,21 @@ static void generic_metric(const char *metric_expr,
 		const char *p = metric_expr;
 
 		if (expr__parse(&ratio, &pctx, &p) == 0)
-			print_metric(ctxp, NULL, "%8.1f",
+			print_metric(config, ctxp, NULL, "%8.1f",
 				metric_name ?
 				metric_name :
 				out->force_header ?  name : "",
 				ratio);
 		else
-			print_metric(ctxp, NULL, NULL,
+			print_metric(config, ctxp, NULL, NULL,
 				     out->force_header ?
 				     (metric_name ? metric_name : name) : "", 0);
 	} else
-		print_metric(ctxp, NULL, NULL, "", 0);
+		print_metric(config, ctxp, NULL, NULL, "", 0);
 }
 
-void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
+void perf_stat__print_shadow_stats(struct perf_stat_config *config,
+				   struct perf_evsel *evsel,
 				   double avg, int cpu,
 				   struct perf_stat_output_ctx *out,
 				   struct rblist *metric_events,
@@ -769,10 +780,10 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 
 		if (total) {
 			ratio = avg / total;
-			print_metric(ctxp, NULL, "%7.2f ",
+			print_metric(config, ctxp, NULL, "%7.2f ",
 					"insn per cycle", ratio);
 		} else {
-			print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
+			print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
 		}
 
 		total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT,
@@ -783,20 +794,20 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 						    ctx, cpu));
 
 		if (total && avg) {
-			out->new_line(ctxp);
+			out->new_line(config, ctxp);
 			ratio = total / avg;
-			print_metric(ctxp, NULL, "%7.2f ",
+			print_metric(config, ctxp, NULL, "%7.2f ",
 					"stalled cycles per insn",
 					ratio);
 		} else if (have_frontend_stalled) {
-			print_metric(ctxp, NULL, NULL,
+			print_metric(config, ctxp, NULL, NULL,
 				     "stalled cycles per insn", 0);
 		}
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
 		if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
-			print_branch_misses(cpu, evsel, avg, out, st);
+			print_branch_misses(config, cpu, evsel, avg, out, st);
 		else
-			print_metric(ctxp, NULL, NULL, "of all branches", 0);
+			print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
@@ -804,9 +815,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
 
 		if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0)
-			print_l1_dcache_misses(cpu, evsel, avg, out, st);
+			print_l1_dcache_misses(config, cpu, evsel, avg, out, st);
 		else
-			print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
+			print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
@@ -814,9 +825,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
 
 		if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0)
-			print_l1_icache_misses(cpu, evsel, avg, out, st);
+			print_l1_icache_misses(config, cpu, evsel, avg, out, st);
 		else
-			print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
+			print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
@@ -824,9 +835,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
 
 		if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0)
-			print_dtlb_cache_misses(cpu, evsel, avg, out, st);
+			print_dtlb_cache_misses(config, cpu, evsel, avg, out, st);
 		else
-			print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
+			print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
@@ -834,9 +845,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
 
 		if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0)
-			print_itlb_cache_misses(cpu, evsel, avg, out, st);
+			print_itlb_cache_misses(config, cpu, evsel, avg, out, st);
 		else
-			print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
+			print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
@@ -844,9 +855,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
 
 		if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0)
-			print_ll_cache_misses(cpu, evsel, avg, out, st);
+			print_ll_cache_misses(config, cpu, evsel, avg, out, st);
 		else
-			print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
+			print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
 		total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
 
@@ -854,32 +865,32 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 			ratio = avg * 100 / total;
 
 		if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0)
-			print_metric(ctxp, NULL, "%8.3f %%",
+			print_metric(config, ctxp, NULL, "%8.3f %%",
 				     "of all cache refs", ratio);
 		else
-			print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
+			print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
-		print_stalled_cycles_frontend(cpu, evsel, avg, out, st);
+		print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
-		print_stalled_cycles_backend(cpu, evsel, avg, out, st);
+		print_stalled_cycles_backend(config, cpu, evsel, avg, out, st);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
 		total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
 
 		if (total) {
 			ratio = avg / total;
-			print_metric(ctxp, NULL, "%8.3f", "GHz", ratio);
+			print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio);
 		} else {
-			print_metric(ctxp, NULL, NULL, "Ghz", 0);
+			print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
 		}
 	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
 		total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
 
 		if (total)
-			print_metric(ctxp, NULL,
+			print_metric(config, ctxp, NULL,
 					"%7.2f%%", "transactional cycles",
 					100.0 * (avg / total));
 		else
-			print_metric(ctxp, NULL, NULL, "transactional cycles",
+			print_metric(config, ctxp, NULL, NULL, "transactional cycles",
 				     0);
 	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
 		total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
@@ -888,10 +899,10 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		if (total2 < avg)
 			total2 = avg;
 		if (total)
-			print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles",
+			print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles",
 				100.0 * ((total2-avg) / total));
 		else
-			print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
+			print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
 	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
 		total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
 					 ctx, cpu);
@@ -900,10 +911,10 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 			ratio = total / avg;
 
 		if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0)
-			print_metric(ctxp, NULL, "%8.0f",
+			print_metric(config, ctxp, NULL, "%8.0f",
 				     "cycles / transaction", ratio);
 		else
-			print_metric(ctxp, NULL, NULL, "cycles / transaction",
+			print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
 				      0);
 	} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
 		total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
@@ -912,34 +923,33 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		if (avg)
 			ratio = total / avg;
 
-		print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio);
-	} else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK) ||
-		   perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK)) {
+		print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio);
+	} else if (perf_evsel__is_clock(evsel)) {
 		if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
-			print_metric(ctxp, NULL, "%8.3f", "CPUs utilized",
-				     avg / ratio);
+			print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized",
+				     avg / (ratio * evsel->scale));
 		else
-			print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
+			print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
 		double fe_bound = td_fe_bound(ctx, cpu, st);
 
 		if (fe_bound > 0.2)
 			color = PERF_COLOR_RED;
-		print_metric(ctxp, color, "%8.1f%%", "frontend bound",
+		print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
 				fe_bound * 100.);
 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
 		double retiring = td_retiring(ctx, cpu, st);
 
 		if (retiring > 0.7)
 			color = PERF_COLOR_GREEN;
-		print_metric(ctxp, color, "%8.1f%%", "retiring",
+		print_metric(config, ctxp, color, "%8.1f%%", "retiring",
 				retiring * 100.);
 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
 		double bad_spec = td_bad_spec(ctx, cpu, st);
 
 		if (bad_spec > 0.1)
 			color = PERF_COLOR_RED;
-		print_metric(ctxp, color, "%8.1f%%", "bad speculation",
+		print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
 				bad_spec * 100.);
 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
 		double be_bound = td_be_bound(ctx, cpu, st);
@@ -956,12 +966,12 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		if (be_bound > 0.2)
 			color = PERF_COLOR_RED;
 		if (td_total_slots(ctx, cpu, st) > 0)
-			print_metric(ctxp, color, "%8.1f%%", name,
+			print_metric(config, ctxp, color, "%8.1f%%", name,
 					be_bound * 100.);
 		else
-			print_metric(ctxp, NULL, NULL, name, 0);
+			print_metric(config, ctxp, NULL, NULL, name, 0);
 	} else if (evsel->metric_expr) {
-		generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name,
+		generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name,
 				evsel->metric_name, avg, cpu, out, st);
 	} else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
 		char unit = 'M';
@@ -976,9 +986,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 			unit = 'K';
 		}
 		snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
-		print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
+		print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
 	} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
-		print_smi_cost(cpu, evsel, out, st);
+		print_smi_cost(config, cpu, evsel, out, st);
 	} else {
 		num = 0;
 	}
@@ -988,12 +998,12 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 
 		list_for_each_entry (mexp, &me->head, nd) {
 			if (num++ > 0)
-				out->new_line(ctxp);
-			generic_metric(mexp->metric_expr, mexp->metric_events,
+				out->new_line(config, ctxp);
+			generic_metric(config, mexp->metric_expr, mexp->metric_events,
 					evsel->name, mexp->metric_name,
 					avg, cpu, out, st);
 		}
 	}
 	if (num == 0)
-		print_metric(ctxp, NULL, NULL, NULL, 0);
+		print_metric(config, ctxp, NULL, NULL, NULL, 0);
 }
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index a0061e0b0fad..4d40515307b8 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -374,9 +374,8 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 	return 0;
 }
 
-int perf_event__process_stat_event(struct perf_tool *tool __maybe_unused,
-				   union perf_event *event,
-				   struct perf_session *session)
+int perf_event__process_stat_event(struct perf_session *session,
+				   union perf_event *event)
 {
 	struct perf_counts_values count;
 	struct stat_event *st = &event->stat;
@@ -435,3 +434,98 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
 
 	return ret;
 }
+
+int create_perf_stat_counter(struct perf_evsel *evsel,
+			     struct perf_stat_config *config,
+			     struct target *target)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+	struct perf_evsel *leader = evsel->leader;
+
+	if (config->scale) {
+		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+				    PERF_FORMAT_TOTAL_TIME_RUNNING;
+	}
+
+	/*
+	 * The event is part of non trivial group, let's enable
+	 * the group read (for leader) and ID retrieval for all
+	 * members.
+	 */
+	if (leader->nr_members > 1)
+		attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
+
+	attr->inherit = !config->no_inherit;
+
+	/*
+	 * Some events get initialized with sample_(period/type) set,
+	 * like tracepoints. Clear it up for counting.
+	 */
+	attr->sample_period = 0;
+
+	if (config->identifier)
+		attr->sample_type = PERF_SAMPLE_IDENTIFIER;
+
+	/*
+	 * Disabling all counters initially, they will be enabled
+	 * either manually by us or by kernel via enable_on_exec
+	 * set later.
+	 */
+	if (perf_evsel__is_group_leader(evsel)) {
+		attr->disabled = 1;
+
+		/*
+		 * In case of initial_delay we enable tracee
+		 * events manually.
+		 */
+		if (target__none(target) && !config->initial_delay)
+			attr->enable_on_exec = 1;
+	}
+
+	if (target__has_cpu(target) && !target__has_per_thread(target))
+		return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
+
+	return perf_evsel__open_per_thread(evsel, evsel->threads);
+}
+
+int perf_stat_synthesize_config(struct perf_stat_config *config,
+				struct perf_tool *tool,
+				struct perf_evlist *evlist,
+				perf_event__handler_t process,
+				bool attrs)
+{
+	int err;
+
+	if (attrs) {
+		err = perf_event__synthesize_attrs(tool, evlist, process);
+		if (err < 0) {
+			pr_err("Couldn't synthesize attrs.\n");
+			return err;
+		}
+	}
+
+	err = perf_event__synthesize_extra_attr(tool, evlist, process,
+						attrs);
+
+	err = perf_event__synthesize_thread_map2(tool, evlist->threads,
+						 process, NULL);
+	if (err < 0) {
+		pr_err("Couldn't synthesize thread map.\n");
+		return err;
+	}
+
+	err = perf_event__synthesize_cpu_map(tool, evlist->cpus,
+					     process, NULL);
+	if (err < 0) {
+		pr_err("Couldn't synthesize thread map.\n");
+		return err;
+	}
+
+	err = perf_event__synthesize_stat_config(tool, config, process, NULL);
+	if (err < 0) {
+		pr_err("Couldn't synthesize config.\n");
+		return err;
+	}
+
+	return 0;
+}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 8f56ba4fd258..2f9c9159a364 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -4,11 +4,16 @@
 
 #include <linux/types.h>
 #include <stdio.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
 #include "xyarray.h"
 #include "rblist.h"
+#include "perf.h"
+#include "event.h"
 
-struct stats
-{
+struct stats {
 	double n, mean, M2;
 	u64 max, min;
 };
@@ -85,15 +90,42 @@ struct runtime_stat {
 	struct rblist value_list;
 };
 
+typedef int (*aggr_get_id_t)(struct perf_stat_config *config,
+			     struct cpu_map *m, int cpu);
+
 struct perf_stat_config {
-	enum aggr_mode	aggr_mode;
-	bool		scale;
-	FILE		*output;
-	unsigned int	interval;
-	unsigned int	timeout;
-	int		times;
-	struct runtime_stat *stats;
-	int		stats_num;
+	enum aggr_mode		 aggr_mode;
+	bool			 scale;
+	bool			 no_inherit;
+	bool			 identifier;
+	bool			 csv_output;
+	bool			 interval_clear;
+	bool			 metric_only;
+	bool			 null_run;
+	bool			 ru_display;
+	bool			 big_num;
+	bool			 no_merge;
+	bool			 walltime_run_table;
+	FILE			*output;
+	unsigned int		 interval;
+	unsigned int		 timeout;
+	unsigned int		 initial_delay;
+	unsigned int		 unit_width;
+	unsigned int		 metric_only_len;
+	int			 times;
+	int			 run_count;
+	int			 print_free_counters_hint;
+	int			 print_mixed_hw_group_error;
+	struct runtime_stat	*stats;
+	int			 stats_num;
+	const char		*csv_sep;
+	struct stats		*walltime_nsecs_stats;
+	struct rusage		 ru_data;
+	struct cpu_map		*aggr_map;
+	aggr_get_id_t		 aggr_get_id;
+	struct cpu_map		*cpus_aggr_map;
+	u64			*walltime_run;
+	struct rblist		 metric_events;
 };
 
 void update_stats(struct stats *stats, u64 val);
@@ -131,9 +163,10 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel,
 extern struct runtime_stat rt_stat;
 extern struct stats walltime_nsecs_stats;
 
-typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit,
+typedef void (*print_metric_t)(struct perf_stat_config *config,
+			       void *ctx, const char *color, const char *unit,
 			       const char *fmt, double val);
-typedef void (*new_line_t )(void *ctx);
+typedef void (*new_line_t)(struct perf_stat_config *config, void *ctx);
 
 void runtime_stat__init(struct runtime_stat *st);
 void runtime_stat__exit(struct runtime_stat *st);
@@ -149,7 +182,8 @@ struct perf_stat_output_ctx {
 	bool force_header;
 };
 
-void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
+void perf_stat__print_shadow_stats(struct perf_stat_config *config,
+				   struct perf_evsel *evsel,
 				   double avg, int cpu,
 				   struct perf_stat_output_ctx *out,
 				   struct rblist *metric_events,
@@ -165,11 +199,25 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 struct perf_tool;
 union perf_event;
 struct perf_session;
-int perf_event__process_stat_event(struct perf_tool *tool,
-				   union perf_event *event,
-				   struct perf_session *session);
+int perf_event__process_stat_event(struct perf_session *session,
+				   union perf_event *event);
 
 size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
+
+int create_perf_stat_counter(struct perf_evsel *evsel,
+			     struct perf_stat_config *config,
+			     struct target *target);
+int perf_stat_synthesize_config(struct perf_stat_config *config,
+				struct perf_tool *tool,
+				struct perf_evlist *evlist,
+				perf_event__handler_t process,
+				bool attrs);
+void
+perf_evlist__print_counters(struct perf_evlist *evlist,
+			    struct perf_stat_config *config,
+			    struct target *_target,
+			    struct timespec *ts,
+			    int argc, const char **argv);
 #endif
diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
index 3d1cf5bf7f18..9005fbe0780e 100644
--- a/tools/perf/util/strbuf.c
+++ b/tools/perf/util/strbuf.c
@@ -98,19 +98,25 @@ static int strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap)
 
 	va_copy(ap_saved, ap);
 	len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
-	if (len < 0)
+	if (len < 0) {
+		va_end(ap_saved);
 		return len;
+	}
 	if (len > strbuf_avail(sb)) {
 		ret = strbuf_grow(sb, len);
-		if (ret)
+		if (ret) {
+			va_end(ap_saved);
 			return ret;
+		}
 		len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap_saved);
 		va_end(ap_saved);
 		if (len > strbuf_avail(sb)) {
 			pr_debug("this should not happen, your vsnprintf is broken");
+			va_end(ap_saved);
 			return -EINVAL;
 		}
 	}
+	va_end(ap_saved);
 	return strbuf_setlen(sb, sb->len + len);
 }
 
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 2de770511e70..66a84d5846c8 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -114,16 +114,9 @@ static inline int elf_sym__is_label(const GElf_Sym *sym)
 		sym->st_shndx != SHN_ABS;
 }
 
-static bool elf_sym__is_a(GElf_Sym *sym, enum map_type type)
+static bool elf_sym__filter(GElf_Sym *sym)
 {
-	switch (type) {
-	case MAP__FUNCTION:
-		return elf_sym__is_function(sym);
-	case MAP__VARIABLE:
-		return elf_sym__is_object(sym);
-	default:
-		return false;
-	}
+	return elf_sym__is_function(sym) || elf_sym__is_object(sym);
 }
 
 static inline const char *elf_sym__name(const GElf_Sym *sym,
@@ -150,17 +143,10 @@ static inline bool elf_sec__is_data(const GElf_Shdr *shdr,
 	return strstr(elf_sec__name(shdr, secstrs), "data") != NULL;
 }
 
-static bool elf_sec__is_a(GElf_Shdr *shdr, Elf_Data *secstrs,
-			  enum map_type type)
+static bool elf_sec__filter(GElf_Shdr *shdr, Elf_Data *secstrs)
 {
-	switch (type) {
-	case MAP__FUNCTION:
-		return elf_sec__is_text(shdr, secstrs);
-	case MAP__VARIABLE:
-		return elf_sec__is_data(shdr, secstrs);
-	default:
-		return false;
-	}
+	return elf_sec__is_text(shdr, secstrs) || 
+	       elf_sec__is_data(shdr, secstrs);
 }
 
 static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr)
@@ -256,7 +242,7 @@ static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
  * And always look at the original dso, not at debuginfo packages, that
  * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS).
  */
-int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map)
+int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss)
 {
 	uint32_t nr_rel_entries, idx;
 	GElf_Sym sym;
@@ -338,7 +324,17 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *
 			plt_entry_size = 16;
 			break;
 
-		default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa need to be checked */
+		case EM_SPARC:
+			plt_header_size = 48;
+			plt_entry_size = 12;
+			break;
+
+		case EM_SPARCV9:
+			plt_header_size = 128;
+			plt_entry_size = 32;
+			break;
+
+		default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */
 			plt_header_size = shdr_plt.sh_entsize;
 			plt_entry_size = shdr_plt.sh_entsize;
 			break;
@@ -364,12 +360,12 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *
 			free(demangled);
 
 			f = symbol__new(plt_offset, plt_entry_size,
-					STB_GLOBAL, sympltname);
+					STB_GLOBAL, STT_FUNC, sympltname);
 			if (!f)
 				goto out_elf_end;
 
 			plt_offset += plt_entry_size;
-			symbols__insert(&dso->symbols[map->type], f);
+			symbols__insert(&dso->symbols, f);
 			++nr;
 		}
 	} else if (shdr_rel_plt.sh_type == SHT_REL) {
@@ -390,12 +386,12 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *
 			free(demangled);
 
 			f = symbol__new(plt_offset, plt_entry_size,
-					STB_GLOBAL, sympltname);
+					STB_GLOBAL, STT_FUNC, sympltname);
 			if (!f)
 				goto out_elf_end;
 
 			plt_offset += plt_entry_size;
-			symbols__insert(&dso->symbols[map->type], f);
+			symbols__insert(&dso->symbols, f);
 			++nr;
 		}
 	}
@@ -811,6 +807,110 @@ static u64 ref_reloc(struct kmap *kmap)
 void __weak arch__sym_update(struct symbol *s __maybe_unused,
 		GElf_Sym *sym __maybe_unused) { }
 
+static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
+				      GElf_Sym *sym, GElf_Shdr *shdr,
+				      struct map_groups *kmaps, struct kmap *kmap,
+				      struct dso **curr_dsop, struct map **curr_mapp,
+				      const char *section_name,
+				      bool adjust_kernel_syms, bool kmodule, bool *remap_kernel)
+{
+	struct dso *curr_dso = *curr_dsop;
+	struct map *curr_map;
+	char dso_name[PATH_MAX];
+
+	/* Adjust symbol to map to file offset */
+	if (adjust_kernel_syms)
+		sym->st_value -= shdr->sh_addr - shdr->sh_offset;
+
+	if (strcmp(section_name, (curr_dso->short_name + dso->short_name_len)) == 0)
+		return 0;
+
+	if (strcmp(section_name, ".text") == 0) {
+		/*
+		 * The initial kernel mapping is based on
+		 * kallsyms and identity maps.  Overwrite it to
+		 * map to the kernel dso.
+		 */
+		if (*remap_kernel && dso->kernel) {
+			*remap_kernel = false;
+			map->start = shdr->sh_addr + ref_reloc(kmap);
+			map->end = map->start + shdr->sh_size;
+			map->pgoff = shdr->sh_offset;
+			map->map_ip = map__map_ip;
+			map->unmap_ip = map__unmap_ip;
+			/* Ensure maps are correctly ordered */
+			if (kmaps) {
+				map__get(map);
+				map_groups__remove(kmaps, map);
+				map_groups__insert(kmaps, map);
+				map__put(map);
+			}
+		}
+
+		/*
+		 * The initial module mapping is based on
+		 * /proc/modules mapped to offset zero.
+		 * Overwrite it to map to the module dso.
+		 */
+		if (*remap_kernel && kmodule) {
+			*remap_kernel = false;
+			map->pgoff = shdr->sh_offset;
+		}
+
+		*curr_mapp = map;
+		*curr_dsop = dso;
+		return 0;
+	}
+
+	if (!kmap)
+		return 0;
+
+	snprintf(dso_name, sizeof(dso_name), "%s%s", dso->short_name, section_name);
+
+	curr_map = map_groups__find_by_name(kmaps, dso_name);
+	if (curr_map == NULL) {
+		u64 start = sym->st_value;
+
+		if (kmodule)
+			start += map->start + shdr->sh_offset;
+
+		curr_dso = dso__new(dso_name);
+		if (curr_dso == NULL)
+			return -1;
+		curr_dso->kernel = dso->kernel;
+		curr_dso->long_name = dso->long_name;
+		curr_dso->long_name_len = dso->long_name_len;
+		curr_map = map__new2(start, curr_dso);
+		dso__put(curr_dso);
+		if (curr_map == NULL)
+			return -1;
+
+		if (adjust_kernel_syms) {
+			curr_map->start  = shdr->sh_addr + ref_reloc(kmap);
+			curr_map->end	 = curr_map->start + shdr->sh_size;
+			curr_map->pgoff	 = shdr->sh_offset;
+		} else {
+			curr_map->map_ip = curr_map->unmap_ip = identity__map_ip;
+		}
+		curr_dso->symtab_type = dso->symtab_type;
+		map_groups__insert(kmaps, curr_map);
+		/*
+		 * Add it before we drop the referece to curr_map, i.e. while
+		 * we still are sure to have a reference to this DSO via
+		 * *curr_map->dso.
+		 */
+		dsos__add(&map->groups->machine->dsos, curr_dso);
+		/* kmaps already got it */
+		map__put(curr_map);
+		dso__set_loaded(curr_dso);
+		*curr_mapp = curr_map;
+		*curr_dsop = curr_dso;
+	} else
+		*curr_dsop = curr_map->dso;
+
+	return 0;
+}
+
 int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 		  struct symsrc *runtime_ss, int kmodule)
 {
@@ -844,7 +944,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 	 * have the wrong values for the dso maps, so remove them.
 	 */
 	if (kmodule && syms_ss->symtab)
-		symbols__delete(&dso->symbols[map->type]);
+		symbols__delete(&dso->symbols);
 
 	if (!syms_ss->symtab) {
 		/*
@@ -921,10 +1021,10 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 
 	dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap);
 	/*
-	 * Initial kernel and module mappings do not map to the dso.  For
-	 * function mappings, flag the fixups.
+	 * Initial kernel and module mappings do not map to the dso.
+	 * Flag the fixups.
 	 */
-	if (map->type == MAP__FUNCTION && (dso->kernel || kmodule)) {
+	if (dso->kernel || kmodule) {
 		remap_kernel = true;
 		adjust_kernel_syms = dso->adjust_symbols;
 	}
@@ -936,7 +1036,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 		const char *section_name;
 		bool used_opd = false;
 
-		if (!is_label && !elf_sym__is_a(&sym, map->type))
+		if (!is_label && !elf_sym__filter(&sym))
 			continue;
 
 		/* Reject ARM ELF "mapping symbols": these aren't unique and
@@ -974,7 +1074,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 
 		gelf_getshdr(sec, &shdr);
 
-		if (is_label && !elf_sec__is_a(&shdr, secstrs, map->type))
+		if (is_label && !elf_sec__filter(&shdr, secstrs))
 			continue;
 
 		section_name = elf_sec__name(&shdr, secstrs);
@@ -982,134 +1082,37 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 		/* On ARM, symbols for thumb functions have 1 added to
 		 * the symbol address as a flag - remove it */
 		if ((ehdr.e_machine == EM_ARM) &&
-		    (map->type == MAP__FUNCTION) &&
+		    (GELF_ST_TYPE(sym.st_info) == STT_FUNC) &&
 		    (sym.st_value & 1))
 			--sym.st_value;
 
 		if (dso->kernel || kmodule) {
-			char dso_name[PATH_MAX];
-
-			/* Adjust symbol to map to file offset */
-			if (adjust_kernel_syms)
-				sym.st_value -= shdr.sh_addr - shdr.sh_offset;
-
-			if (strcmp(section_name,
-				   (curr_dso->short_name +
-				    dso->short_name_len)) == 0)
-				goto new_symbol;
-
-			if (strcmp(section_name, ".text") == 0) {
-				/*
-				 * The initial kernel mapping is based on
-				 * kallsyms and identity maps.  Overwrite it to
-				 * map to the kernel dso.
-				 */
-				if (remap_kernel && dso->kernel) {
-					remap_kernel = false;
-					map->start = shdr.sh_addr +
-						     ref_reloc(kmap);
-					map->end = map->start + shdr.sh_size;
-					map->pgoff = shdr.sh_offset;
-					map->map_ip = map__map_ip;
-					map->unmap_ip = map__unmap_ip;
-					/* Ensure maps are correctly ordered */
-					if (kmaps) {
-						map__get(map);
-						map_groups__remove(kmaps, map);
-						map_groups__insert(kmaps, map);
-						map__put(map);
-					}
-				}
-
-				/*
-				 * The initial module mapping is based on
-				 * /proc/modules mapped to offset zero.
-				 * Overwrite it to map to the module dso.
-				 */
-				if (remap_kernel && kmodule) {
-					remap_kernel = false;
-					map->pgoff = shdr.sh_offset;
-				}
-
-				curr_map = map;
-				curr_dso = dso;
-				goto new_symbol;
-			}
-
-			if (!kmap)
-				goto new_symbol;
-
-			snprintf(dso_name, sizeof(dso_name),
-				 "%s%s", dso->short_name, section_name);
-
-			curr_map = map_groups__find_by_name(kmaps, map->type, dso_name);
-			if (curr_map == NULL) {
-				u64 start = sym.st_value;
-
-				if (kmodule)
-					start += map->start + shdr.sh_offset;
-
-				curr_dso = dso__new(dso_name);
-				if (curr_dso == NULL)
-					goto out_elf_end;
-				curr_dso->kernel = dso->kernel;
-				curr_dso->long_name = dso->long_name;
-				curr_dso->long_name_len = dso->long_name_len;
-				curr_map = map__new2(start, curr_dso,
-						     map->type);
-				dso__put(curr_dso);
-				if (curr_map == NULL) {
-					goto out_elf_end;
-				}
-				if (adjust_kernel_syms) {
-					curr_map->start = shdr.sh_addr +
-							  ref_reloc(kmap);
-					curr_map->end = curr_map->start +
-							shdr.sh_size;
-					curr_map->pgoff = shdr.sh_offset;
-				} else {
-					curr_map->map_ip = identity__map_ip;
-					curr_map->unmap_ip = identity__map_ip;
-				}
-				curr_dso->symtab_type = dso->symtab_type;
-				map_groups__insert(kmaps, curr_map);
-				/*
-				 * Add it before we drop the referece to curr_map,
-				 * i.e. while we still are sure to have a reference
-				 * to this DSO via curr_map->dso.
-				 */
-				dsos__add(&map->groups->machine->dsos, curr_dso);
-				/* kmaps already got it */
-				map__put(curr_map);
-				dso__set_loaded(curr_dso, map->type);
-			} else
-				curr_dso = curr_map->dso;
-
-			goto new_symbol;
-		}
-
-		if ((used_opd && runtime_ss->adjust_symbols)
-				|| (!used_opd && syms_ss->adjust_symbols)) {
+			if (dso__process_kernel_symbol(dso, map, &sym, &shdr, kmaps, kmap, &curr_dso, &curr_map,
+						       section_name, adjust_kernel_syms, kmodule, &remap_kernel))
+				goto out_elf_end;
+		} else if ((used_opd && runtime_ss->adjust_symbols) ||
+			   (!used_opd && syms_ss->adjust_symbols)) {
 			pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
 				  "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__,
 				  (u64)sym.st_value, (u64)shdr.sh_addr,
 				  (u64)shdr.sh_offset);
 			sym.st_value -= shdr.sh_addr - shdr.sh_offset;
 		}
-new_symbol:
+
 		demangled = demangle_sym(dso, kmodule, elf_name);
 		if (demangled != NULL)
 			elf_name = demangled;
 
 		f = symbol__new(sym.st_value, sym.st_size,
-				GELF_ST_BIND(sym.st_info), elf_name);
+				GELF_ST_BIND(sym.st_info),
+				GELF_ST_TYPE(sym.st_info), elf_name);
 		free(demangled);
 		if (!f)
 			goto out_elf_end;
 
 		arch__sym_update(f, &sym);
 
-		__symbols__insert(&curr_dso->symbols[curr_map->type], f, dso->kernel);
+		__symbols__insert(&curr_dso->symbols, f, dso->kernel);
 		nr++;
 	}
 
@@ -1117,14 +1120,14 @@ new_symbol:
 	 * For misannotated, zeroed, ASM function sizes.
 	 */
 	if (nr > 0) {
-		symbols__fixup_end(&dso->symbols[map->type]);
-		symbols__fixup_duplicate(&dso->symbols[map->type]);
+		symbols__fixup_end(&dso->symbols);
+		symbols__fixup_duplicate(&dso->symbols);
 		if (kmap) {
 			/*
 			 * We need to fixup this here too because we create new
 			 * maps here, for things like vsyscall sections.
 			 */
-			__map_groups__fixup_end(kmaps, map->type);
+			map_groups__fixup_end(kmaps);
 		}
 	}
 	err = nr;
@@ -1393,8 +1396,16 @@ static off_t kcore__write(struct kcore *kcore)
 
 struct phdr_data {
 	off_t offset;
+	off_t rel;
 	u64 addr;
 	u64 len;
+	struct list_head node;
+	struct phdr_data *remaps;
+};
+
+struct sym_data {
+	u64 addr;
+	struct list_head node;
 };
 
 struct kcore_copy_info {
@@ -1404,16 +1415,78 @@ struct kcore_copy_info {
 	u64 last_symbol;
 	u64 first_module;
 	u64 last_module_symbol;
-	struct phdr_data kernel_map;
-	struct phdr_data modules_map;
+	size_t phnum;
+	struct list_head phdrs;
+	struct list_head syms;
 };
 
+#define kcore_copy__for_each_phdr(k, p) \
+	list_for_each_entry((p), &(k)->phdrs, node)
+
+static struct phdr_data *phdr_data__new(u64 addr, u64 len, off_t offset)
+{
+	struct phdr_data *p = zalloc(sizeof(*p));
+
+	if (p) {
+		p->addr   = addr;
+		p->len    = len;
+		p->offset = offset;
+	}
+
+	return p;
+}
+
+static struct phdr_data *kcore_copy_info__addnew(struct kcore_copy_info *kci,
+						 u64 addr, u64 len,
+						 off_t offset)
+{
+	struct phdr_data *p = phdr_data__new(addr, len, offset);
+
+	if (p)
+		list_add_tail(&p->node, &kci->phdrs);
+
+	return p;
+}
+
+static void kcore_copy__free_phdrs(struct kcore_copy_info *kci)
+{
+	struct phdr_data *p, *tmp;
+
+	list_for_each_entry_safe(p, tmp, &kci->phdrs, node) {
+		list_del(&p->node);
+		free(p);
+	}
+}
+
+static struct sym_data *kcore_copy__new_sym(struct kcore_copy_info *kci,
+					    u64 addr)
+{
+	struct sym_data *s = zalloc(sizeof(*s));
+
+	if (s) {
+		s->addr = addr;
+		list_add_tail(&s->node, &kci->syms);
+	}
+
+	return s;
+}
+
+static void kcore_copy__free_syms(struct kcore_copy_info *kci)
+{
+	struct sym_data *s, *tmp;
+
+	list_for_each_entry_safe(s, tmp, &kci->syms, node) {
+		list_del(&s->node);
+		free(s);
+	}
+}
+
 static int kcore_copy__process_kallsyms(void *arg, const char *name, char type,
 					u64 start)
 {
 	struct kcore_copy_info *kci = arg;
 
-	if (!symbol_type__is_a(type, MAP__FUNCTION))
+	if (!kallsyms__is_function(type))
 		return 0;
 
 	if (strchr(name, '[')) {
@@ -1438,6 +1511,9 @@ static int kcore_copy__process_kallsyms(void *arg, const char *name, char type,
 		return 0;
 	}
 
+	if (is_entry_trampoline(name) && !kcore_copy__new_sym(kci, start))
+		return -1;
+
 	return 0;
 }
 
@@ -1487,27 +1563,39 @@ static int kcore_copy__parse_modules(struct kcore_copy_info *kci,
 	return 0;
 }
 
-static void kcore_copy__map(struct phdr_data *p, u64 start, u64 end, u64 pgoff,
-			    u64 s, u64 e)
+static int kcore_copy__map(struct kcore_copy_info *kci, u64 start, u64 end,
+			   u64 pgoff, u64 s, u64 e)
 {
-	if (p->addr || s < start || s >= end)
-		return;
+	u64 len, offset;
 
-	p->addr = s;
-	p->offset = (s - start) + pgoff;
-	p->len = e < end ? e - s : end - s;
+	if (s < start || s >= end)
+		return 0;
+
+	offset = (s - start) + pgoff;
+	len = e < end ? e - s : end - s;
+
+	return kcore_copy_info__addnew(kci, s, len, offset) ? 0 : -1;
 }
 
 static int kcore_copy__read_map(u64 start, u64 len, u64 pgoff, void *data)
 {
 	struct kcore_copy_info *kci = data;
 	u64 end = start + len;
+	struct sym_data *sdat;
+
+	if (kcore_copy__map(kci, start, end, pgoff, kci->stext, kci->etext))
+		return -1;
 
-	kcore_copy__map(&kci->kernel_map, start, end, pgoff, kci->stext,
-			kci->etext);
+	if (kcore_copy__map(kci, start, end, pgoff, kci->first_module,
+			    kci->last_module_symbol))
+		return -1;
 
-	kcore_copy__map(&kci->modules_map, start, end, pgoff, kci->first_module,
-			kci->last_module_symbol);
+	list_for_each_entry(sdat, &kci->syms, node) {
+		u64 s = round_down(sdat->addr, page_size);
+
+		if (kcore_copy__map(kci, start, end, pgoff, s, s + len))
+			return -1;
+	}
 
 	return 0;
 }
@@ -1520,6 +1608,64 @@ static int kcore_copy__read_maps(struct kcore_copy_info *kci, Elf *elf)
 	return 0;
 }
 
+static void kcore_copy__find_remaps(struct kcore_copy_info *kci)
+{
+	struct phdr_data *p, *k = NULL;
+	u64 kend;
+
+	if (!kci->stext)
+		return;
+
+	/* Find phdr that corresponds to the kernel map (contains stext) */
+	kcore_copy__for_each_phdr(kci, p) {
+		u64 pend = p->addr + p->len - 1;
+
+		if (p->addr <= kci->stext && pend >= kci->stext) {
+			k = p;
+			break;
+		}
+	}
+
+	if (!k)
+		return;
+
+	kend = k->offset + k->len;
+
+	/* Find phdrs that remap the kernel */
+	kcore_copy__for_each_phdr(kci, p) {
+		u64 pend = p->offset + p->len;
+
+		if (p == k)
+			continue;
+
+		if (p->offset >= k->offset && pend <= kend)
+			p->remaps = k;
+	}
+}
+
+static void kcore_copy__layout(struct kcore_copy_info *kci)
+{
+	struct phdr_data *p;
+	off_t rel = 0;
+
+	kcore_copy__find_remaps(kci);
+
+	kcore_copy__for_each_phdr(kci, p) {
+		if (!p->remaps) {
+			p->rel = rel;
+			rel += p->len;
+		}
+		kci->phnum += 1;
+	}
+
+	kcore_copy__for_each_phdr(kci, p) {
+		struct phdr_data *k = p->remaps;
+
+		if (k)
+			p->rel = p->offset - k->offset + k->rel;
+	}
+}
+
 static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir,
 				 Elf *elf)
 {
@@ -1555,7 +1701,12 @@ static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir,
 	if (kci->first_module && !kci->last_module_symbol)
 		return -1;
 
-	return kcore_copy__read_maps(kci, elf);
+	if (kcore_copy__read_maps(kci, elf))
+		return -1;
+
+	kcore_copy__layout(kci);
+
+	return 0;
 }
 
 static int kcore_copy__copy_file(const char *from_dir, const char *to_dir,
@@ -1678,12 +1829,15 @@ int kcore_copy(const char *from_dir, const char *to_dir)
 {
 	struct kcore kcore;
 	struct kcore extract;
-	size_t count = 2;
 	int idx = 0, err = -1;
-	off_t offset = page_size, sz, modules_offset = 0;
+	off_t offset, sz;
 	struct kcore_copy_info kci = { .stext = 0, };
 	char kcore_filename[PATH_MAX];
 	char extract_filename[PATH_MAX];
+	struct phdr_data *p;
+
+	INIT_LIST_HEAD(&kci.phdrs);
+	INIT_LIST_HEAD(&kci.syms);
 
 	if (kcore_copy__copy_file(from_dir, to_dir, "kallsyms"))
 		return -1;
@@ -1703,20 +1857,17 @@ int kcore_copy(const char *from_dir, const char *to_dir)
 	if (kcore__init(&extract, extract_filename, kcore.elfclass, false))
 		goto out_kcore_close;
 
-	if (!kci.modules_map.addr)
-		count -= 1;
-
-	if (kcore__copy_hdr(&kcore, &extract, count))
+	if (kcore__copy_hdr(&kcore, &extract, kci.phnum))
 		goto out_extract_close;
 
-	if (kcore__add_phdr(&extract, idx++, offset, kci.kernel_map.addr,
-			    kci.kernel_map.len))
-		goto out_extract_close;
+	offset = gelf_fsize(extract.elf, ELF_T_EHDR, 1, EV_CURRENT) +
+		 gelf_fsize(extract.elf, ELF_T_PHDR, kci.phnum, EV_CURRENT);
+	offset = round_up(offset, page_size);
+
+	kcore_copy__for_each_phdr(&kci, p) {
+		off_t offs = p->rel + offset;
 
-	if (kci.modules_map.addr) {
-		modules_offset = offset + kci.kernel_map.len;
-		if (kcore__add_phdr(&extract, idx, modules_offset,
-				    kci.modules_map.addr, kci.modules_map.len))
+		if (kcore__add_phdr(&extract, idx++, offs, p->addr, p->len))
 			goto out_extract_close;
 	}
 
@@ -1724,14 +1875,14 @@ int kcore_copy(const char *from_dir, const char *to_dir)
 	if (sz < 0 || sz > offset)
 		goto out_extract_close;
 
-	if (copy_bytes(kcore.fd, kci.kernel_map.offset, extract.fd, offset,
-		       kci.kernel_map.len))
-		goto out_extract_close;
+	kcore_copy__for_each_phdr(&kci, p) {
+		off_t offs = p->rel + offset;
 
-	if (modules_offset && copy_bytes(kcore.fd, kci.modules_map.offset,
-					 extract.fd, modules_offset,
-					 kci.modules_map.len))
-		goto out_extract_close;
+		if (p->remaps)
+			continue;
+		if (copy_bytes(kcore.fd, p->offset, extract.fd, offs, p->len))
+			goto out_extract_close;
+	}
 
 	if (kcore_copy__compare_file(from_dir, to_dir, "modules"))
 		goto out_extract_close;
@@ -1754,6 +1905,9 @@ out_unlink_kallsyms:
 	if (err)
 		kcore_copy__unlink(to_dir, "kallsyms");
 
+	kcore_copy__free_phdrs(&kci);
+	kcore_copy__free_syms(&kci);
+
 	return err;
 }
 
@@ -1803,6 +1957,34 @@ void kcore_extract__delete(struct kcore_extract *kce)
 }
 
 #ifdef HAVE_GELF_GETNOTE_SUPPORT
+
+static void sdt_adjust_loc(struct sdt_note *tmp, GElf_Addr base_off)
+{
+	if (!base_off)
+		return;
+
+	if (tmp->bit32)
+		tmp->addr.a32[SDT_NOTE_IDX_LOC] =
+			tmp->addr.a32[SDT_NOTE_IDX_LOC] + base_off -
+			tmp->addr.a32[SDT_NOTE_IDX_BASE];
+	else
+		tmp->addr.a64[SDT_NOTE_IDX_LOC] =
+			tmp->addr.a64[SDT_NOTE_IDX_LOC] + base_off -
+			tmp->addr.a64[SDT_NOTE_IDX_BASE];
+}
+
+static void sdt_adjust_refctr(struct sdt_note *tmp, GElf_Addr base_addr,
+			      GElf_Addr base_off)
+{
+	if (!base_off)
+		return;
+
+	if (tmp->bit32 && tmp->addr.a32[SDT_NOTE_IDX_REFCTR])
+		tmp->addr.a32[SDT_NOTE_IDX_REFCTR] -= (base_addr - base_off);
+	else if (tmp->addr.a64[SDT_NOTE_IDX_REFCTR])
+		tmp->addr.a64[SDT_NOTE_IDX_REFCTR] -= (base_addr - base_off);
+}
+
 /**
  * populate_sdt_note : Parse raw data and identify SDT note
  * @elf: elf of the opened file
@@ -1820,7 +2002,6 @@ static int populate_sdt_note(Elf **elf, const char *data, size_t len,
 	const char *provider, *name, *args;
 	struct sdt_note *tmp = NULL;
 	GElf_Ehdr ehdr;
-	GElf_Addr base_off = 0;
 	GElf_Shdr shdr;
 	int ret = -EINVAL;
 
@@ -1916,17 +2097,12 @@ static int populate_sdt_note(Elf **elf, const char *data, size_t len,
 	 * base address in the description of the SDT note. If its different,
 	 * then accordingly, adjust the note location.
 	 */
-	if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_BASE_SCN, NULL)) {
-		base_off = shdr.sh_offset;
-		if (base_off) {
-			if (tmp->bit32)
-				tmp->addr.a32[0] = tmp->addr.a32[0] + base_off -
-					tmp->addr.a32[1];
-			else
-				tmp->addr.a64[0] = tmp->addr.a64[0] + base_off -
-					tmp->addr.a64[1];
-		}
-	}
+	if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_BASE_SCN, NULL))
+		sdt_adjust_loc(tmp, shdr.sh_offset);
+
+	/* Adjust reference counter offset */
+	if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_PROBES_SCN, NULL))
+		sdt_adjust_refctr(tmp, shdr.sh_addr, shdr.sh_offset);
 
 	list_add_tail(&tmp->note_list, sdt_notes);
 	return 0;
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index ff48d0d49584..7119df77dc0b 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -288,8 +288,7 @@ void symsrc__destroy(struct symsrc *ss)
 }
 
 int dso__synthesize_plt_symbols(struct dso *dso __maybe_unused,
-				struct symsrc *ss __maybe_unused,
-				struct map *map __maybe_unused)
+				struct symsrc *ss __maybe_unused)
 {
 	return 0;
 }
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 1466814ebada..d188b7588152 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -5,6 +5,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <linux/kernel.h>
+#include <linux/mman.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/param.h>
@@ -39,7 +40,6 @@ char **vmlinux_path;
 struct symbol_conf symbol_conf = {
 	.use_modules		= true,
 	.try_vmlinux_path	= true,
-	.annotate_src		= true,
 	.demangle		= true,
 	.demangle_kernel	= false,
 	.cumulate_callchain	= true,
@@ -70,18 +70,10 @@ static enum dso_binary_type binary_type_symtab[] = {
 
 #define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab)
 
-bool symbol_type__is_a(char symbol_type, enum map_type map_type)
+static bool symbol_type__filter(char symbol_type)
 {
 	symbol_type = toupper(symbol_type);
-
-	switch (map_type) {
-	case MAP__FUNCTION:
-		return symbol_type == 'T' || symbol_type == 'W';
-	case MAP__VARIABLE:
-		return symbol_type == 'D';
-	default:
-		return false;
-	}
+	return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B';
 }
 
 static int prefix_underscores_count(const char *str)
@@ -228,9 +220,9 @@ void symbols__fixup_end(struct rb_root *symbols)
 		curr->end = roundup(curr->start, 4096) + 4096;
 }
 
-void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
+void map_groups__fixup_end(struct map_groups *mg)
 {
-	struct maps *maps = &mg->maps[type];
+	struct maps *maps = &mg->maps;
 	struct map *next, *curr;
 
 	down_write(&maps->lock);
@@ -256,7 +248,7 @@ out_unlock:
 	up_write(&maps->lock);
 }
 
-struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
+struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name)
 {
 	size_t namelen = strlen(name) + 1;
 	struct symbol *sym = calloc(1, (symbol_conf.priv_size +
@@ -274,6 +266,7 @@ struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
 
 	sym->start   = start;
 	sym->end     = len ? start + len : start;
+	sym->type    = type;
 	sym->binding = binding;
 	sym->namelen = namelen - 1;
 
@@ -484,45 +477,40 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
 
 void dso__reset_find_symbol_cache(struct dso *dso)
 {
-	enum map_type type;
-
-	for (type = MAP__FUNCTION; type <= MAP__VARIABLE; ++type) {
-		dso->last_find_result[type].addr   = 0;
-		dso->last_find_result[type].symbol = NULL;
-	}
+	dso->last_find_result.addr   = 0;
+	dso->last_find_result.symbol = NULL;
 }
 
-void dso__insert_symbol(struct dso *dso, enum map_type type, struct symbol *sym)
+void dso__insert_symbol(struct dso *dso, struct symbol *sym)
 {
-	__symbols__insert(&dso->symbols[type], sym, dso->kernel);
+	__symbols__insert(&dso->symbols, sym, dso->kernel);
 
 	/* update the symbol cache if necessary */
-	if (dso->last_find_result[type].addr >= sym->start &&
-	    (dso->last_find_result[type].addr < sym->end ||
+	if (dso->last_find_result.addr >= sym->start &&
+	    (dso->last_find_result.addr < sym->end ||
 	    sym->start == sym->end)) {
-		dso->last_find_result[type].symbol = sym;
+		dso->last_find_result.symbol = sym;
 	}
 }
 
-struct symbol *dso__find_symbol(struct dso *dso,
-				enum map_type type, u64 addr)
+struct symbol *dso__find_symbol(struct dso *dso, u64 addr)
 {
-	if (dso->last_find_result[type].addr != addr || dso->last_find_result[type].symbol == NULL) {
-		dso->last_find_result[type].addr   = addr;
-		dso->last_find_result[type].symbol = symbols__find(&dso->symbols[type], addr);
+	if (dso->last_find_result.addr != addr || dso->last_find_result.symbol == NULL) {
+		dso->last_find_result.addr   = addr;
+		dso->last_find_result.symbol = symbols__find(&dso->symbols, addr);
 	}
 
-	return dso->last_find_result[type].symbol;
+	return dso->last_find_result.symbol;
 }
 
-struct symbol *dso__first_symbol(struct dso *dso, enum map_type type)
+struct symbol *dso__first_symbol(struct dso *dso)
 {
-	return symbols__first(&dso->symbols[type]);
+	return symbols__first(&dso->symbols);
 }
 
-struct symbol *dso__last_symbol(struct dso *dso, enum map_type type)
+struct symbol *dso__last_symbol(struct dso *dso)
 {
-	return symbols__last(&dso->symbols[type]);
+	return symbols__last(&dso->symbols);
 }
 
 struct symbol *dso__next_symbol(struct symbol *sym)
@@ -539,24 +527,22 @@ struct symbol *symbol__next_by_name(struct symbol *sym)
 }
 
  /*
-  * Teturns first symbol that matched with @name.
+  * Returns first symbol that matched with @name.
   */
-struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
-					const char *name)
+struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name)
 {
-	struct symbol *s = symbols__find_by_name(&dso->symbol_names[type], name,
+	struct symbol *s = symbols__find_by_name(&dso->symbol_names, name,
 						 SYMBOL_TAG_INCLUDE__NONE);
 	if (!s)
-		s = symbols__find_by_name(&dso->symbol_names[type], name,
+		s = symbols__find_by_name(&dso->symbol_names, name,
 					  SYMBOL_TAG_INCLUDE__DEFAULT_ONLY);
 	return s;
 }
 
-void dso__sort_by_name(struct dso *dso, enum map_type type)
+void dso__sort_by_name(struct dso *dso)
 {
-	dso__set_sorted_by_name(dso, type);
-	return symbols__sort_by_name(&dso->symbol_names[type],
-				     &dso->symbols[type]);
+	dso__set_sorted_by_name(dso);
+	return symbols__sort_by_name(&dso->symbol_names, &dso->symbols);
 }
 
 int modules__parse(const char *filename, void *arg,
@@ -621,11 +607,6 @@ out:
 	return err;
 }
 
-struct process_kallsyms_args {
-	struct map *map;
-	struct dso *dso;
-};
-
 /*
  * These are symbols in the kernel image, so make sure that
  * sym is from a kernel DSO.
@@ -661,10 +642,10 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
 				       char type, u64 start)
 {
 	struct symbol *sym;
-	struct process_kallsyms_args *a = arg;
-	struct rb_root *root = &a->dso->symbols[a->map->type];
+	struct dso *dso = arg;
+	struct rb_root *root = &dso->symbols;
 
-	if (!symbol_type__is_a(type, a->map->type))
+	if (!symbol_type__filter(type))
 		return 0;
 
 	/*
@@ -672,7 +653,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
 	 * symbols, setting length to 0, and rely on
 	 * symbols__fixup_end() to fix it up.
 	 */
-	sym = symbol__new(start, 0, kallsyms2elf_binding(type), name);
+	sym = symbol__new(start, 0, kallsyms2elf_binding(type), kallsyms2elf_type(type), name);
 	if (sym == NULL)
 		return -ENOMEM;
 	/*
@@ -689,21 +670,18 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
  * so that we can in the next step set the symbol ->end address and then
  * call kernel_maps__split_kallsyms.
  */
-static int dso__load_all_kallsyms(struct dso *dso, const char *filename,
-				  struct map *map)
+static int dso__load_all_kallsyms(struct dso *dso, const char *filename)
 {
-	struct process_kallsyms_args args = { .map = map, .dso = dso, };
-	return kallsyms__parse(filename, &args, map__process_kallsym_symbol);
+	return kallsyms__parse(filename, dso, map__process_kallsym_symbol);
 }
 
-static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map)
+static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct dso *dso)
 {
-	struct map_groups *kmaps = map__kmaps(map);
 	struct map *curr_map;
 	struct symbol *pos;
 	int count = 0;
-	struct rb_root old_root = dso->symbols[map->type];
-	struct rb_root *root = &dso->symbols[map->type];
+	struct rb_root old_root = dso->symbols;
+	struct rb_root *root = &dso->symbols;
 	struct rb_node *next = rb_first(root);
 
 	if (!kmaps)
@@ -723,7 +701,7 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map)
 		if (module)
 			*module = '\0';
 
-		curr_map = map_groups__find(kmaps, map->type, pos->start);
+		curr_map = map_groups__find(kmaps, pos->start);
 
 		if (!curr_map) {
 			symbol__delete(pos);
@@ -733,7 +711,7 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map)
 		pos->start -= curr_map->start - curr_map->pgoff;
 		if (pos->end)
 			pos->end -= curr_map->start - curr_map->pgoff;
-		symbols__insert(&curr_map->dso->symbols[curr_map->type], pos);
+		symbols__insert(&curr_map->dso->symbols, pos);
 		++count;
 	}
 
@@ -748,22 +726,25 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map)
  * kernel range is broken in several maps, named [kernel].N, as we don't have
  * the original ELF section names vmlinux have.
  */
-static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
+static int map_groups__split_kallsyms(struct map_groups *kmaps, struct dso *dso, u64 delta,
+				      struct map *initial_map)
 {
-	struct map_groups *kmaps = map__kmaps(map);
 	struct machine *machine;
-	struct map *curr_map = map;
+	struct map *curr_map = initial_map;
 	struct symbol *pos;
 	int count = 0, moved = 0;
-	struct rb_root *root = &dso->symbols[map->type];
+	struct rb_root *root = &dso->symbols;
 	struct rb_node *next = rb_first(root);
 	int kernel_range = 0;
+	bool x86_64;
 
 	if (!kmaps)
 		return -1;
 
 	machine = kmaps->machine;
 
+	x86_64 = machine__is(machine, "x86_64");
+
 	while (next) {
 		char *module;
 
@@ -778,7 +759,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
 			*module++ = '\0';
 
 			if (strcmp(curr_map->dso->short_name, module)) {
-				if (curr_map != map &&
+				if (curr_map != initial_map &&
 				    dso->kernel == DSO_TYPE_GUEST_KERNEL &&
 				    machine__is_default_guest(machine)) {
 					/*
@@ -788,18 +769,16 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
 					 * symbols are in its kmap. Mark it as
 					 * loaded.
 					 */
-					dso__set_loaded(curr_map->dso,
-							curr_map->type);
+					dso__set_loaded(curr_map->dso);
 				}
 
-				curr_map = map_groups__find_by_name(kmaps,
-							map->type, module);
+				curr_map = map_groups__find_by_name(kmaps, module);
 				if (curr_map == NULL) {
 					pr_debug("%s/proc/{kallsyms,modules} "
 					         "inconsistency while looking "
 						 "for \"%s\" module!\n",
 						 machine->root_dir, module);
-					curr_map = map;
+					curr_map = initial_map;
 					goto discard_symbol;
 				}
 
@@ -809,11 +788,21 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
 			}
 			/*
 			 * So that we look just like we get from .ko files,
-			 * i.e. not prelinked, relative to map->start.
+			 * i.e. not prelinked, relative to initial_map->start.
 			 */
 			pos->start = curr_map->map_ip(curr_map, pos->start);
 			pos->end   = curr_map->map_ip(curr_map, pos->end);
-		} else if (curr_map != map) {
+		} else if (x86_64 && is_entry_trampoline(pos->name)) {
+			/*
+			 * These symbols are not needed anymore since the
+			 * trampoline maps refer to the text section and it's
+			 * symbols instead. Avoid having to deal with
+			 * relocations, and the assumption that the first symbol
+			 * is the start of kernel text, by simply removing the
+			 * symbols at this point.
+			 */
+			goto discard_symbol;
+		} else if (curr_map != initial_map) {
 			char dso_name[PATH_MAX];
 			struct dso *ndso;
 
@@ -824,7 +813,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
 			}
 
 			if (count == 0) {
-				curr_map = map;
+				curr_map = initial_map;
 				goto add_symbol;
 			}
 
@@ -843,7 +832,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
 
 			ndso->kernel = dso->kernel;
 
-			curr_map = map__new2(pos->start, ndso, map->type);
+			curr_map = map__new2(pos->start, ndso);
 			if (curr_map == NULL) {
 				dso__put(ndso);
 				return -1;
@@ -858,9 +847,9 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
 			pos->end -= delta;
 		}
 add_symbol:
-		if (curr_map != map) {
+		if (curr_map != initial_map) {
 			rb_erase(&pos->rb_node, root);
-			symbols__insert(&curr_map->dso->symbols[curr_map->type], pos);
+			symbols__insert(&curr_map->dso->symbols, pos);
 			++moved;
 		} else
 			++count;
@@ -871,10 +860,10 @@ discard_symbol:
 		symbol__delete(pos);
 	}
 
-	if (curr_map != map &&
+	if (curr_map != initial_map &&
 	    dso->kernel == DSO_TYPE_GUEST_KERNEL &&
 	    machine__is_default_guest(kmaps->machine)) {
-		dso__set_loaded(curr_map->dso, curr_map->type);
+		dso__set_loaded(curr_map->dso);
 	}
 
 	return count + moved;
@@ -1035,7 +1024,12 @@ out_delete_from:
 	return ret;
 }
 
-static int do_validate_kcore_modules(const char *filename, struct map *map,
+struct map *map_groups__first(struct map_groups *mg)
+{
+	return maps__first(&mg->maps);
+}
+
+static int do_validate_kcore_modules(const char *filename,
 				  struct map_groups *kmaps)
 {
 	struct rb_root modules = RB_ROOT;
@@ -1046,13 +1040,12 @@ static int do_validate_kcore_modules(const char *filename, struct map *map,
 	if (err)
 		return err;
 
-	old_map = map_groups__first(kmaps, map->type);
+	old_map = map_groups__first(kmaps);
 	while (old_map) {
 		struct map *next = map_groups__next(old_map);
 		struct module_info *mi;
 
-		if (old_map == map || old_map->start == map->start) {
-			/* The kernel map */
+		if (!__map__is_kmodule(old_map)) {
 			old_map = next;
 			continue;
 		}
@@ -1109,7 +1102,7 @@ static int validate_kcore_modules(const char *kallsyms_filename,
 					     kallsyms_filename))
 		return -EINVAL;
 
-	if (do_validate_kcore_modules(modules_filename, map, kmaps))
+	if (do_validate_kcore_modules(modules_filename, kmaps))
 		return -EINVAL;
 
 	return 0;
@@ -1138,7 +1131,6 @@ static int validate_kcore_addresses(const char *kallsyms_filename,
 
 struct kcore_mapfn_data {
 	struct dso *dso;
-	enum map_type type;
 	struct list_head maps;
 };
 
@@ -1147,7 +1139,7 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data)
 	struct kcore_mapfn_data *md = data;
 	struct map *map;
 
-	map = map__new2(start, md->dso, md->type);
+	map = map__new2(start, md->dso);
 	if (map == NULL)
 		return -ENOMEM;
 
@@ -1163,13 +1155,13 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
 			   const char *kallsyms_filename)
 {
 	struct map_groups *kmaps = map__kmaps(map);
-	struct machine *machine;
 	struct kcore_mapfn_data md;
 	struct map *old_map, *new_map, *replacement_map = NULL;
+	struct machine *machine;
 	bool is_64_bit;
 	int err, fd;
 	char kcore_filename[PATH_MAX];
-	struct symbol *sym;
+	u64 stext;
 
 	if (!kmaps)
 		return -EINVAL;
@@ -1177,7 +1169,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
 	machine = kmaps->machine;
 
 	/* This function requires that the map is the kernel map */
-	if (map != machine->vmlinux_maps[map->type])
+	if (!__map__is_kernel(map))
 		return -EINVAL;
 
 	if (!filename_from_kallsyms_filename(kcore_filename, "kcore",
@@ -1189,7 +1181,6 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
 		return -EINVAL;
 
 	md.dso = dso;
-	md.type = map->type;
 	INIT_LIST_HEAD(&md.maps);
 
 	fd = open(kcore_filename, O_RDONLY);
@@ -1200,7 +1191,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
 	}
 
 	/* Read new maps into temporary lists */
-	err = file__read_maps(fd, md.type == MAP__FUNCTION, kcore_mapfn, &md,
+	err = file__read_maps(fd, map->prot & PROT_EXEC, kcore_mapfn, &md,
 			      &is_64_bit);
 	if (err)
 		goto out_err;
@@ -1212,7 +1203,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
 	}
 
 	/* Remove old maps */
-	old_map = map_groups__first(kmaps, map->type);
+	old_map = map_groups__first(kmaps);
 	while (old_map) {
 		struct map *next = map_groups__next(old_map);
 
@@ -1220,14 +1211,15 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
 			map_groups__remove(kmaps, old_map);
 		old_map = next;
 	}
+	machine->trampolines_mapped = false;
 
-	/* Find the kernel map using the first symbol */
-	sym = dso__first_symbol(dso, map->type);
-	list_for_each_entry(new_map, &md.maps, node) {
-		if (sym && sym->start >= new_map->start &&
-		    sym->start < new_map->end) {
-			replacement_map = new_map;
-			break;
+	/* Find the kernel map using the '_stext' symbol */
+	if (!kallsyms__get_function_start(kallsyms_filename, "_stext", &stext)) {
+		list_for_each_entry(new_map, &md.maps, node) {
+			if (stext >= new_map->start && stext < new_map->end) {
+				replacement_map = new_map;
+				break;
+			}
 		}
 	}
 
@@ -1256,6 +1248,19 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
 		map__put(new_map);
 	}
 
+	if (machine__is(machine, "x86_64")) {
+		u64 addr;
+
+		/*
+		 * If one of the corresponding symbols is there, assume the
+		 * entry trampoline maps are too.
+		 */
+		if (!kallsyms__get_function_start(kallsyms_filename,
+						  ENTRY_TRAMPOLINE_NAME,
+						  &addr))
+			machine->trampolines_mapped = true;
+	}
+
 	/*
 	 * Set the data type and long name so that kcore can be read via
 	 * dso__data_read_addr().
@@ -1268,7 +1273,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
 
 	close(fd);
 
-	if (map->type == MAP__FUNCTION)
+	if (map->prot & PROT_EXEC)
 		pr_debug("Using %s for kernel object code\n", kcore_filename);
 	else
 		pr_debug("Using %s for kernel data\n", kcore_filename);
@@ -1289,14 +1294,10 @@ out_err:
  * If the kernel is relocated at boot time, kallsyms won't match.  Compute the
  * delta based on the relocation reference symbol.
  */
-static int kallsyms__delta(struct map *map, const char *filename, u64 *delta)
+static int kallsyms__delta(struct kmap *kmap, const char *filename, u64 *delta)
 {
-	struct kmap *kmap = map__kmap(map);
 	u64 addr;
 
-	if (!kmap)
-		return -1;
-
 	if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name)
 		return 0;
 
@@ -1310,19 +1311,23 @@ static int kallsyms__delta(struct map *map, const char *filename, u64 *delta)
 int __dso__load_kallsyms(struct dso *dso, const char *filename,
 			 struct map *map, bool no_kcore)
 {
+	struct kmap *kmap = map__kmap(map);
 	u64 delta = 0;
 
 	if (symbol__restricted_filename(filename, "/proc/kallsyms"))
 		return -1;
 
-	if (dso__load_all_kallsyms(dso, filename, map) < 0)
+	if (!kmap || !kmap->kmaps)
 		return -1;
 
-	if (kallsyms__delta(map, filename, &delta))
+	if (dso__load_all_kallsyms(dso, filename) < 0)
 		return -1;
 
-	symbols__fixup_end(&dso->symbols[map->type]);
-	symbols__fixup_duplicate(&dso->symbols[map->type]);
+	if (kallsyms__delta(kmap, filename, &delta))
+		return -1;
+
+	symbols__fixup_end(&dso->symbols);
+	symbols__fixup_duplicate(&dso->symbols);
 
 	if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
 		dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
@@ -1330,9 +1335,9 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename,
 		dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS;
 
 	if (!no_kcore && !dso__load_kcore(dso, map, filename))
-		return dso__split_kallsyms_for_kcore(dso, map);
+		return map_groups__split_kallsyms_for_kcore(kmap->kmaps, dso);
 	else
-		return dso__split_kallsyms(dso, map, delta);
+		return map_groups__split_kallsyms(kmap->kmaps, dso, delta, map);
 }
 
 int dso__load_kallsyms(struct dso *dso, const char *filename,
@@ -1341,8 +1346,7 @@ int dso__load_kallsyms(struct dso *dso, const char *filename,
 	return __dso__load_kallsyms(dso, filename, map, false);
 }
 
-static int dso__load_perf_map(const char *map_path, struct dso *dso,
-			      struct map *map)
+static int dso__load_perf_map(const char *map_path, struct dso *dso)
 {
 	char *line = NULL;
 	size_t n;
@@ -1379,12 +1383,12 @@ static int dso__load_perf_map(const char *map_path, struct dso *dso,
 		if (len + 2 >= line_len)
 			continue;
 
-		sym = symbol__new(start, size, STB_GLOBAL, line + len);
+		sym = symbol__new(start, size, STB_GLOBAL, STT_FUNC, line + len);
 
 		if (sym == NULL)
 			goto out_delete_line;
 
-		symbols__insert(&dso->symbols[map->type], sym);
+		symbols__insert(&dso->symbols, sym);
 		nr_syms++;
 	}
 
@@ -1509,25 +1513,27 @@ int dso__load(struct dso *dso, struct map *map)
 	pthread_mutex_lock(&dso->lock);
 
 	/* check again under the dso->lock */
-	if (dso__loaded(dso, map->type)) {
+	if (dso__loaded(dso)) {
 		ret = 1;
 		goto out;
 	}
 
+	if (map->groups && map->groups->machine)
+		machine = map->groups->machine;
+	else
+		machine = NULL;
+
 	if (dso->kernel) {
 		if (dso->kernel == DSO_TYPE_KERNEL)
 			ret = dso__load_kernel_sym(dso, map);
 		else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
 			ret = dso__load_guest_kernel_sym(dso, map);
 
+		if (machine__is(machine, "x86_64"))
+			machine__map_x86_64_entry_trampolines(machine, dso);
 		goto out;
 	}
 
-	if (map->groups && map->groups->machine)
-		machine = map->groups->machine;
-	else
-		machine = NULL;
-
 	dso->adjust_symbols = 0;
 
 	if (perfmap) {
@@ -1542,7 +1548,7 @@ int dso__load(struct dso *dso, struct map *map)
 			goto out;
 		}
 
-		ret = dso__load_perf_map(map_path, dso, map);
+		ret = dso__load_perf_map(map_path, dso);
 		dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT :
 					     DSO_BINARY_TYPE__NOT_FOUND;
 		goto out;
@@ -1651,7 +1657,7 @@ int dso__load(struct dso *dso, struct map *map)
 	if (ret > 0) {
 		int nr_plt;
 
-		nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss, map);
+		nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss);
 		if (nr_plt > 0)
 			ret += nr_plt;
 	}
@@ -1663,17 +1669,16 @@ out_free:
 	if (ret < 0 && strstr(dso->name, " (deleted)") != NULL)
 		ret = 0;
 out:
-	dso__set_loaded(dso, map->type);
+	dso__set_loaded(dso);
 	pthread_mutex_unlock(&dso->lock);
 	nsinfo__mountns_exit(&nsc);
 
 	return ret;
 }
 
-struct map *map_groups__find_by_name(struct map_groups *mg,
-				     enum map_type type, const char *name)
+struct map *map_groups__find_by_name(struct map_groups *mg, const char *name)
 {
-	struct maps *maps = &mg->maps[type];
+	struct maps *maps = &mg->maps;
 	struct map *map;
 
 	down_read(&maps->lock);
@@ -1720,7 +1725,7 @@ int dso__load_vmlinux(struct dso *dso, struct map *map,
 		else
 			dso->binary_type = DSO_BINARY_TYPE__VMLINUX;
 		dso__set_long_name(dso, vmlinux, vmlinux_allocated);
-		dso__set_loaded(dso, map->type);
+		dso__set_loaded(dso);
 		pr_debug("Using %s for symbols\n", symfs_vmlinux);
 	}
 
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 70c16741f50a..d026d215bdc6 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -57,7 +57,8 @@ struct symbol {
 	u64		start;
 	u64		end;
 	u16		namelen;
-	u8		binding;
+	u8		type:4;
+	u8		binding:4;
 	u8		idle:1;
 	u8		ignore:1;
 	u8		inlined:1;
@@ -89,7 +90,6 @@ struct intlist;
 
 struct symbol_conf {
 	unsigned short	priv_size;
-	unsigned short	nr_events;
 	bool		try_vmlinux_path,
 			init_annotation,
 			force,
@@ -108,8 +108,6 @@ struct symbol_conf {
 			show_cpu_utilization,
 			initialized,
 			kptr_restrict,
-			annotate_asm_raw,
-			annotate_src,
 			event_group,
 			demangle,
 			demangle_kernel,
@@ -125,7 +123,8 @@ struct symbol_conf {
 	const char	*vmlinux_name,
 			*kallsyms_name,
 			*source_prefix,
-			*field_sep;
+			*field_sep,
+			*graph_function;
 	const char	*default_guest_vmlinux_name,
 			*default_guest_kallsyms,
 			*default_guest_modules;
@@ -259,17 +258,16 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
 			 bool no_kcore);
 int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map);
 
-void dso__insert_symbol(struct dso *dso, enum map_type type,
+void dso__insert_symbol(struct dso *dso,
 			struct symbol *sym);
 
-struct symbol *dso__find_symbol(struct dso *dso, enum map_type type,
-				u64 addr);
-struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
-					const char *name);
+struct symbol *dso__find_symbol(struct dso *dso, u64 addr);
+struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name);
+
 struct symbol *symbol__next_by_name(struct symbol *sym);
 
-struct symbol *dso__first_symbol(struct dso *dso, enum map_type type);
-struct symbol *dso__last_symbol(struct dso *dso, enum map_type type);
+struct symbol *dso__first_symbol(struct dso *dso);
+struct symbol *dso__last_symbol(struct dso *dso);
 struct symbol *dso__next_symbol(struct symbol *sym);
 
 enum dso_type dso__type_fd(int fd);
@@ -288,7 +286,7 @@ void symbol__exit(void);
 void symbol__elf_init(void);
 int symbol__annotation_init(void);
 
-struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name);
+struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name);
 size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
 				      const struct addr_location *al,
 				      bool unknown_as_addr,
@@ -300,7 +298,6 @@ size_t __symbol__fprintf_symname(const struct symbol *sym,
 				 bool unknown_as_addr, FILE *fp);
 size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp);
 size_t symbol__fprintf(struct symbol *sym, FILE *fp);
-bool symbol_type__is_a(char symbol_type, enum map_type map_type);
 bool symbol__restricted_filename(const char *filename,
 				 const char *restricted_filename);
 int symbol__config_symfs(const struct option *opt __maybe_unused,
@@ -308,8 +305,7 @@ int symbol__config_symfs(const struct option *opt __maybe_unused,
 
 int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 		  struct symsrc *runtime_ss, int kmodule);
-int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss,
-				struct map *map);
+int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss);
 
 char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name);
 
@@ -317,7 +313,7 @@ void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel)
 void symbols__insert(struct rb_root *symbols, struct symbol *sym);
 void symbols__fixup_duplicate(struct rb_root *symbols);
 void symbols__fixup_end(struct rb_root *symbols);
-void __map_groups__fixup_end(struct map_groups *mg, enum map_type type);
+void map_groups__fixup_end(struct map_groups *mg);
 
 typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
 int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data,
@@ -384,12 +380,19 @@ int get_sdt_note_list(struct list_head *head, const char *target);
 int cleanup_sdt_note_list(struct list_head *sdt_notes);
 int sdt_notes__get_count(struct list_head *start);
 
+#define SDT_PROBES_SCN ".probes"
 #define SDT_BASE_SCN ".stapsdt.base"
 #define SDT_NOTE_SCN  ".note.stapsdt"
 #define SDT_NOTE_TYPE 3
 #define SDT_NOTE_NAME "stapsdt"
 #define NR_ADDR 3
 
+enum {
+	SDT_NOTE_IDX_LOC = 0,
+	SDT_NOTE_IDX_BASE,
+	SDT_NOTE_IDX_REFCTR,
+};
+
 struct mem_info *mem_info__new(void);
 struct mem_info *mem_info__get(struct mem_info *mi);
 void   mem_info__put(struct mem_info *mi);
diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c
index 6dd2cb88ccbe..ed0205cc7942 100644
--- a/tools/perf/util/symbol_fprintf.c
+++ b/tools/perf/util/symbol_fprintf.c
@@ -58,13 +58,13 @@ size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp)
 }
 
 size_t dso__fprintf_symbols_by_name(struct dso *dso,
-				    enum map_type type, FILE *fp)
+				    FILE *fp)
 {
 	size_t ret = 0;
 	struct rb_node *nd;
 	struct symbol_name_rb_node *pos;
 
-	for (nd = rb_first(&dso->symbol_names[type]); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(&dso->symbol_names); nd; nd = rb_next(nd)) {
 		pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
 		fprintf(fp, "%s\n", pos->sym.name);
 	}
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index 0ee7f568d60c..3393d7ee9401 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -38,6 +38,10 @@ static const char **syscalltbl_native = syscalltbl_powerpc_64;
 #include <asm/syscalls_32.c>
 const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_32_MAX_ID;
 static const char **syscalltbl_native = syscalltbl_powerpc_32;
+#elif defined(__aarch64__)
+#include <asm/syscalls.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_arm64;
 #endif
 
 struct syscall {
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index dd17d6a38d3a..61a4286a74dc 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -36,6 +36,7 @@
  * @branch_count: the branch count when the entry was created
  * @cp: call path
  * @no_call: a 'call' was not seen
+ * @trace_end: a 'call' but trace ended
  */
 struct thread_stack_entry {
 	u64 ret_addr;
@@ -44,6 +45,7 @@ struct thread_stack_entry {
 	u64 branch_count;
 	struct call_path *cp;
 	bool no_call;
+	bool trace_end;
 };
 
 /**
@@ -112,7 +114,8 @@ static struct thread_stack *thread_stack__new(struct thread *thread,
 	return ts;
 }
 
-static int thread_stack__push(struct thread_stack *ts, u64 ret_addr)
+static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
+			      bool trace_end)
 {
 	int err = 0;
 
@@ -124,6 +127,7 @@ static int thread_stack__push(struct thread_stack *ts, u64 ret_addr)
 		}
 	}
 
+	ts->stack[ts->cnt].trace_end = trace_end;
 	ts->stack[ts->cnt++].ret_addr = ret_addr;
 
 	return err;
@@ -150,6 +154,18 @@ static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
 	}
 }
 
+static void thread_stack__pop_trace_end(struct thread_stack *ts)
+{
+	size_t i;
+
+	for (i = ts->cnt; i; ) {
+		if (ts->stack[--i].trace_end)
+			ts->cnt = i;
+		else
+			return;
+	}
+}
+
 static bool thread_stack__in_kernel(struct thread_stack *ts)
 {
 	if (!ts->cnt)
@@ -254,10 +270,19 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
 		ret_addr = from_ip + insn_len;
 		if (ret_addr == to_ip)
 			return 0; /* Zero-length calls are excluded */
-		return thread_stack__push(thread->ts, ret_addr);
-	} else if (flags & PERF_IP_FLAG_RETURN) {
-		if (!from_ip)
-			return 0;
+		return thread_stack__push(thread->ts, ret_addr,
+					  flags & PERF_IP_FLAG_TRACE_END);
+	} else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
+		/*
+		 * If the caller did not change the trace number (which would
+		 * have flushed the stack) then try to make sense of the stack.
+		 * Possibly, tracing began after returning to the current
+		 * address, so try to pop that. Also, do not expect a call made
+		 * when the trace ended, to return, so pop that.
+		 */
+		thread_stack__pop(thread->ts, to_ip);
+		thread_stack__pop_trace_end(thread->ts);
+	} else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
 		thread_stack__pop(thread->ts, to_ip);
 	}
 
@@ -285,20 +310,46 @@ void thread_stack__free(struct thread *thread)
 	}
 }
 
+static inline u64 callchain_context(u64 ip, u64 kernel_start)
+{
+	return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
+}
+
 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
-			  size_t sz, u64 ip)
+			  size_t sz, u64 ip, u64 kernel_start)
 {
-	size_t i;
+	u64 context = callchain_context(ip, kernel_start);
+	u64 last_context;
+	size_t i, j;
 
-	if (!thread || !thread->ts)
-		chain->nr = 1;
-	else
-		chain->nr = min(sz, thread->ts->cnt + 1);
+	if (sz < 2) {
+		chain->nr = 0;
+		return;
+	}
+
+	chain->ips[0] = context;
+	chain->ips[1] = ip;
 
-	chain->ips[0] = ip;
+	if (!thread || !thread->ts) {
+		chain->nr = 2;
+		return;
+	}
+
+	last_context = context;
+
+	for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) {
+		ip = thread->ts->stack[thread->ts->cnt - j].ret_addr;
+		context = callchain_context(ip, kernel_start);
+		if (context != last_context) {
+			if (i >= sz - 1)
+				break;
+			chain->ips[i++] = context;
+			last_context = context;
+		}
+		chain->ips[i] = ip;
+	}
 
-	for (i = 1; i < chain->nr; i++)
-		chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr;
+	chain->nr = i;
 }
 
 struct call_return_processor *
@@ -332,7 +383,7 @@ void call_return_processor__free(struct call_return_processor *crp)
 
 static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
 				 u64 timestamp, u64 ref, struct call_path *cp,
-				 bool no_call)
+				 bool no_call, bool trace_end)
 {
 	struct thread_stack_entry *tse;
 	int err;
@@ -350,6 +401,7 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
 	tse->branch_count = ts->branch_count;
 	tse->cp = cp;
 	tse->no_call = no_call;
+	tse->trace_end = trace_end;
 
 	return 0;
 }
@@ -423,7 +475,7 @@ static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
 		return -ENOMEM;
 
 	return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp,
-				     true);
+				     true, false);
 }
 
 static int thread_stack__no_call_return(struct thread *thread,
@@ -455,7 +507,7 @@ static int thread_stack__no_call_return(struct thread *thread,
 			if (!cp)
 				return -ENOMEM;
 			return thread_stack__push_cp(ts, 0, sample->time, ref,
-						     cp, true);
+						     cp, true, false);
 		}
 	} else if (thread_stack__in_kernel(ts) && sample->ip < ks) {
 		/* Return to userspace, so pop all kernel addresses */
@@ -480,7 +532,7 @@ static int thread_stack__no_call_return(struct thread *thread,
 		return -ENOMEM;
 
 	err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp,
-				    true);
+				    true, false);
 	if (err)
 		return err;
 
@@ -500,7 +552,7 @@ static int thread_stack__trace_begin(struct thread *thread,
 
 	/* Pop trace end */
 	tse = &ts->stack[ts->cnt - 1];
-	if (tse->cp->sym == NULL && tse->cp->ip == 0) {
+	if (tse->trace_end) {
 		err = thread_stack__call_return(thread, ts, --ts->cnt,
 						timestamp, ref, false);
 		if (err)
@@ -529,7 +581,7 @@ static int thread_stack__trace_end(struct thread_stack *ts,
 	ret_addr = sample->ip + sample->insn_len;
 
 	return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
-				     false);
+				     false, true);
 }
 
 int thread_stack__process(struct thread *thread, struct comm *comm,
@@ -579,6 +631,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
 	ts->last_time = sample->time;
 
 	if (sample->flags & PERF_IP_FLAG_CALL) {
+		bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
 		struct call_path_root *cpr = ts->crp->cpr;
 		struct call_path *cp;
 		u64 ret_addr;
@@ -596,7 +649,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
 		if (!cp)
 			return -ENOMEM;
 		err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
-					    cp, false);
+					    cp, false, trace_end);
 	} else if (sample->flags & PERF_IP_FLAG_RETURN) {
 		if (!sample->ip || !sample->addr)
 			return 0;
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index b7e41c4ebfdd..f97c00a8c251 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -84,7 +84,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
 			u64 to_ip, u16 insn_len, u64 trace_nr);
 void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr);
 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
-			  size_t sz, u64 ip);
+			  size_t sz, u64 ip, u64 kernel_start);
 int thread_stack__flush(struct thread *thread);
 void thread_stack__free(struct thread *thread);
 size_t thread_stack__depth(struct thread *thread);
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 68b65b10579b..3d9ed7d0e281 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -302,23 +302,20 @@ int thread__insert_map(struct thread *thread, struct map *map)
 static int __thread__prepare_access(struct thread *thread)
 {
 	bool initialized = false;
-	int i, err = 0;
-
-	for (i = 0; i < MAP__NR_TYPES; ++i) {
-		struct maps *maps = &thread->mg->maps[i];
-		struct map *map;
-
-		down_read(&maps->lock);
+	int err = 0;
+	struct maps *maps = &thread->mg->maps;
+	struct map *map;
 
-		for (map = maps__first(maps); map; map = map__next(map)) {
-			err = unwind__prepare_access(thread, map, &initialized);
-			if (err || initialized)
-				break;
-		}
+	down_read(&maps->lock);
 
-		up_read(&maps->lock);
+	for (map = maps__first(maps); map; map = map__next(map)) {
+		err = unwind__prepare_access(thread, map, &initialized);
+		if (err || initialized)
+			break;
 	}
 
+	up_read(&maps->lock);
+
 	return err;
 }
 
@@ -333,10 +330,9 @@ static int thread__prepare_access(struct thread *thread)
 }
 
 static int thread__clone_map_groups(struct thread *thread,
-				    struct thread *parent)
+				    struct thread *parent,
+				    bool do_maps_clone)
 {
-	int i;
-
 	/* This is new thread, we share map groups for process. */
 	if (thread->pid_ == parent->pid_)
 		return thread__prepare_access(thread);
@@ -346,16 +342,11 @@ static int thread__clone_map_groups(struct thread *thread,
 			 thread->pid_, thread->tid, parent->pid_, parent->tid);
 		return 0;
 	}
-
 	/* But this one is new process, copy maps. */
-	for (i = 0; i < MAP__NR_TYPES; ++i)
-		if (map_groups__clone(thread, parent->mg, i) < 0)
-			return -ENOMEM;
-
-	return 0;
+	return do_maps_clone ? map_groups__clone(thread, parent->mg) : 0;
 }
 
-int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone)
 {
 	if (parent->comm_set) {
 		const char *comm = thread__comm_str(parent);
@@ -368,11 +359,10 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
 	}
 
 	thread->ppid = parent->tid;
-	return thread__clone_map_groups(thread, parent);
+	return thread__clone_map_groups(thread, parent, do_maps_clone);
 }
 
-void thread__find_cpumode_addr_location(struct thread *thread,
-					enum map_type type, u64 addr,
+void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
 					struct addr_location *al)
 {
 	size_t i;
@@ -384,7 +374,7 @@ void thread__find_cpumode_addr_location(struct thread *thread,
 	};
 
 	for (i = 0; i < ARRAY_SIZE(cpumodes); i++) {
-		thread__find_addr_location(thread, cpumodes[i], type, addr, al);
+		thread__find_symbol(thread, cpumodes[i], addr, al);
 		if (al->map)
 			break;
 	}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 14d44c3235b8..30e2b4c165fe 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -42,6 +42,8 @@ struct thread {
 	void				*addr_space;
 	struct unwind_libunwind_ops	*unwind_libunwind_ops;
 #endif
+	bool			filter;
+	int			filter_entry_depth;
 };
 
 struct machine;
@@ -87,21 +89,18 @@ struct comm *thread__comm(const struct thread *thread);
 struct comm *thread__exec_comm(const struct thread *thread);
 const char *thread__comm_str(const struct thread *thread);
 int thread__insert_map(struct thread *thread, struct map *map);
-int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp);
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone);
 size_t thread__fprintf(struct thread *thread, FILE *fp);
 
 struct thread *thread__main_thread(struct machine *machine, struct thread *thread);
 
-void thread__find_addr_map(struct thread *thread,
-			   u8 cpumode, enum map_type type, u64 addr,
-			   struct addr_location *al);
+struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
+			     struct addr_location *al);
 
-void thread__find_addr_location(struct thread *thread,
-				u8 cpumode, enum map_type type, u64 addr,
-				struct addr_location *al);
+struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
+				   u64 addr, struct addr_location *al);
 
-void thread__find_cpumode_addr_location(struct thread *thread,
-					enum map_type type, u64 addr,
+void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
 					struct addr_location *al);
 
 static inline void *thread__priv(struct thread *thread)
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 183c91453522..56e4ca54020a 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -26,15 +26,12 @@ typedef int (*event_attr_op)(struct perf_tool *tool,
 			     union perf_event *event,
 			     struct perf_evlist **pevlist);
 
-typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event,
-			 struct perf_session *session);
+typedef int (*event_op2)(struct perf_session *session, union perf_event *event);
+typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event);
 
 typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event,
 			struct ordered_events *oe);
 
-typedef s64 (*event_op3)(struct perf_tool *tool, union perf_event *event,
-			 struct perf_session *session);
-
 enum show_feature_header {
 	SHOW_FEAT_NO_HEADER = 0,
 	SHOW_FEAT_HEADER,
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index 9892323cdd7c..9add1f72ce95 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -3,6 +3,7 @@
 #define __PERF_TOP_H 1
 
 #include "tool.h"
+#include "annotate.h"
 #include <linux/types.h>
 #include <stddef.h>
 #include <stdbool.h>
@@ -16,6 +17,7 @@ struct perf_top {
 	struct perf_tool   tool;
 	struct perf_evlist *evlist;
 	struct record_opts record_opts;
+	struct annotation_options annotation_opts;
 	/*
 	 * Symbols will be added here in perf_event__process_sample and will
 	 * get out after decayed.
@@ -35,7 +37,6 @@ struct perf_top {
 	struct perf_session *session;
 	struct winsize	   winsize;
 	int		   realtime_prio;
-	int		   sym_pcnt_filter;
 	const char	   *sym_filter;
 	float		   min_percent;
 	unsigned int	   nr_threads_synthesize;
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index d7f2113462fb..8ad8e755127b 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -103,11 +103,10 @@ out:
 
 static int record_header_files(void)
 {
-	char *path;
+	char *path = get_events_file("header_page");
 	struct stat st;
 	int err = -EIO;
 
-	path = get_tracing_file("events/header_page");
 	if (!path) {
 		pr_debug("can't get tracing/events/header_page");
 		return -ENOMEM;
@@ -128,9 +127,9 @@ static int record_header_files(void)
 		goto out;
 	}
 
-	put_tracing_file(path);
+	put_events_file(path);
 
-	path = get_tracing_file("events/header_event");
+	path = get_events_file("header_event");
 	if (!path) {
 		pr_debug("can't get tracing/events/header_event");
 		err = -ENOMEM;
@@ -154,7 +153,7 @@ static int record_header_files(void)
 
 	err = 0;
 out:
-	put_tracing_file(path);
+	put_events_file(path);
 	return err;
 }
 
@@ -243,7 +242,7 @@ static int record_ftrace_files(struct tracepoint_path *tps)
 	char *path;
 	int ret;
 
-	path = get_tracing_file("events/ftrace");
+	path = get_events_file("ftrace");
 	if (!path) {
 		pr_debug("can't get tracing/events/ftrace");
 		return -ENOMEM;
@@ -378,7 +377,7 @@ out:
 
 static int record_saved_cmdline(void)
 {
-	unsigned int size;
+	unsigned long long size;
 	char *path;
 	struct stat st;
 	int ret, err = 0;
@@ -532,12 +531,14 @@ struct tracing_data *tracing_data_get(struct list_head *pattrs,
 			 "/tmp/perf-XXXXXX");
 		if (!mkstemp(tdata->temp_file)) {
 			pr_debug("Can't make temp file");
+			free(tdata);
 			return NULL;
 		}
 
 		temp_fd = open(tdata->temp_file, O_RDWR);
 		if (temp_fd < 0) {
 			pr_debug("Can't read '%s'", tdata->temp_file);
+			free(tdata);
 			return NULL;
 		}
 
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index e0a6e9a6a053..32e558a65af3 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -32,23 +32,24 @@
 static int get_common_field(struct scripting_context *context,
 			    int *offset, int *size, const char *type)
 {
-	struct pevent *pevent = context->pevent;
-	struct event_format *event;
-	struct format_field *field;
+	struct tep_handle *pevent = context->pevent;
+	struct tep_event_format *event;
+	struct tep_format_field *field;
 
 	if (!*size) {
-		if (!pevent->events)
+
+		event = tep_get_first_event(pevent);
+		if (!event)
 			return 0;
 
-		event = pevent->events[0];
-		field = pevent_find_common_field(event, type);
+		field = tep_find_common_field(event, type);
 		if (!field)
 			return 0;
 		*offset = field->offset;
 		*size = field->size;
 	}
 
-	return pevent_read_number(pevent, context->event_data + *offset, *size);
+	return tep_read_number(pevent, context->event_data + *offset, *size);
 }
 
 int common_lock_depth(struct scripting_context *context)
@@ -94,29 +95,29 @@ int common_pc(struct scripting_context *context)
 }
 
 unsigned long long
-raw_field_value(struct event_format *event, const char *name, void *data)
+raw_field_value(struct tep_event_format *event, const char *name, void *data)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 	unsigned long long val;
 
-	field = pevent_find_any_field(event, name);
+	field = tep_find_any_field(event, name);
 	if (!field)
 		return 0ULL;
 
-	pevent_read_number_field(field, data, &val);
+	tep_read_number_field(field, data, &val);
 
 	return val;
 }
 
-unsigned long long read_size(struct event_format *event, void *ptr, int size)
+unsigned long long read_size(struct tep_event_format *event, void *ptr, int size)
 {
-	return pevent_read_number(event->pevent, ptr, size);
+	return tep_read_number(event->pevent, ptr, size);
 }
 
-void event_format__fprintf(struct event_format *event,
+void event_format__fprintf(struct tep_event_format *event,
 			   int cpu, void *data, int size, FILE *fp)
 {
-	struct pevent_record record;
+	struct tep_record record;
 	struct trace_seq s;
 
 	memset(&record, 0, sizeof(record));
@@ -125,18 +126,18 @@ void event_format__fprintf(struct event_format *event,
 	record.data = data;
 
 	trace_seq_init(&s);
-	pevent_event_info(&s, event, &record);
+	tep_event_info(&s, event, &record);
 	trace_seq_do_fprintf(&s, fp);
 	trace_seq_destroy(&s);
 }
 
-void event_format__print(struct event_format *event,
+void event_format__print(struct tep_event_format *event,
 			 int cpu, void *data, int size)
 {
 	return event_format__fprintf(event, cpu, data, size, stdout);
 }
 
-void parse_ftrace_printk(struct pevent *pevent,
+void parse_ftrace_printk(struct tep_handle *pevent,
 			 char *file, unsigned int size __maybe_unused)
 {
 	unsigned long long addr;
@@ -157,61 +158,65 @@ void parse_ftrace_printk(struct pevent *pevent,
 		/* fmt still has a space, skip it */
 		printk = strdup(fmt+1);
 		line = strtok_r(NULL, "\n", &next);
-		pevent_register_print_string(pevent, printk, addr);
+		tep_register_print_string(pevent, printk, addr);
+		free(printk);
 	}
 }
 
-void parse_saved_cmdline(struct pevent *pevent,
+void parse_saved_cmdline(struct tep_handle *pevent,
 			 char *file, unsigned int size __maybe_unused)
 {
-	char *comm;
+	char comm[17]; /* Max comm length in the kernel is 16. */
 	char *line;
 	char *next = NULL;
 	int pid;
 
 	line = strtok_r(file, "\n", &next);
 	while (line) {
-		sscanf(line, "%d %ms", &pid, &comm);
-		pevent_register_comm(pevent, comm, pid);
-		free(comm);
+		if (sscanf(line, "%d %16s", &pid, comm) == 2)
+			tep_register_comm(pevent, comm, pid);
 		line = strtok_r(NULL, "\n", &next);
 	}
 }
 
-int parse_ftrace_file(struct pevent *pevent, char *buf, unsigned long size)
+int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size)
 {
-	return pevent_parse_event(pevent, buf, size, "ftrace");
+	return tep_parse_event(pevent, buf, size, "ftrace");
 }
 
-int parse_event_file(struct pevent *pevent,
+int parse_event_file(struct tep_handle *pevent,
 		     char *buf, unsigned long size, char *sys)
 {
-	return pevent_parse_event(pevent, buf, size, sys);
+	return tep_parse_event(pevent, buf, size, sys);
 }
 
-struct event_format *trace_find_next_event(struct pevent *pevent,
-					   struct event_format *event)
+struct tep_event_format *trace_find_next_event(struct tep_handle *pevent,
+					       struct tep_event_format *event)
 {
 	static int idx;
+	int events_count;
+	struct tep_event_format *all_events;
 
-	if (!pevent || !pevent->events)
+	all_events = tep_get_first_event(pevent);
+	events_count = tep_get_events_count(pevent);
+	if (!pevent || !all_events || events_count < 1)
 		return NULL;
 
 	if (!event) {
 		idx = 0;
-		return pevent->events[0];
+		return all_events;
 	}
 
-	if (idx < pevent->nr_events && event == pevent->events[idx]) {
+	if (idx < events_count && event == (all_events + idx)) {
 		idx++;
-		if (idx == pevent->nr_events)
+		if (idx == events_count)
 			return NULL;
-		return pevent->events[idx];
+		return (all_events + idx);
 	}
 
-	for (idx = 1; idx < pevent->nr_events; idx++) {
-		if (event == pevent->events[idx - 1])
-			return pevent->events[idx];
+	for (idx = 1; idx < events_count; idx++) {
+		if (event == (all_events + (idx - 1)))
+			return (all_events + idx);
 	}
 	return NULL;
 }
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 40b425949aa3..76f12c705ef9 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -96,22 +96,22 @@ static void skip(int size)
 	};
 }
 
-static unsigned int read4(struct pevent *pevent)
+static unsigned int read4(struct tep_handle *pevent)
 {
 	unsigned int data;
 
 	if (do_read(&data, 4) < 0)
 		return 0;
-	return __data2host4(pevent, data);
+	return __tep_data2host4(pevent, data);
 }
 
-static unsigned long long read8(struct pevent *pevent)
+static unsigned long long read8(struct tep_handle *pevent)
 {
 	unsigned long long data;
 
 	if (do_read(&data, 8) < 0)
 		return 0;
-	return __data2host8(pevent, data);
+	return __tep_data2host8(pevent, data);
 }
 
 static char *read_string(void)
@@ -158,7 +158,7 @@ out:
 	return str;
 }
 
-static int read_proc_kallsyms(struct pevent *pevent)
+static int read_proc_kallsyms(struct tep_handle *pevent)
 {
 	unsigned int size;
 
@@ -181,7 +181,7 @@ static int read_proc_kallsyms(struct pevent *pevent)
 	return 0;
 }
 
-static int read_ftrace_printk(struct pevent *pevent)
+static int read_ftrace_printk(struct tep_handle *pevent)
 {
 	unsigned int size;
 	char *buf;
@@ -208,7 +208,7 @@ static int read_ftrace_printk(struct pevent *pevent)
 	return 0;
 }
 
-static int read_header_files(struct pevent *pevent)
+static int read_header_files(struct tep_handle *pevent)
 {
 	unsigned long long size;
 	char *header_page;
@@ -235,13 +235,13 @@ static int read_header_files(struct pevent *pevent)
 		return -1;
 	}
 
-	if (!pevent_parse_header_page(pevent, header_page, size,
-				      pevent_get_long_size(pevent))) {
+	if (!tep_parse_header_page(pevent, header_page, size,
+				   tep_get_long_size(pevent))) {
 		/*
 		 * The commit field in the page is of type long,
 		 * use that instead, since it represents the kernel.
 		 */
-		pevent_set_long_size(pevent, pevent->header_page_size_size);
+		tep_set_long_size(pevent, tep_get_header_page_size(pevent));
 	}
 	free(header_page);
 
@@ -259,7 +259,7 @@ static int read_header_files(struct pevent *pevent)
 	return ret;
 }
 
-static int read_ftrace_file(struct pevent *pevent, unsigned long long size)
+static int read_ftrace_file(struct tep_handle *pevent, unsigned long long size)
 {
 	int ret;
 	char *buf;
@@ -284,8 +284,8 @@ out:
 	return ret;
 }
 
-static int read_event_file(struct pevent *pevent, char *sys,
-			    unsigned long long size)
+static int read_event_file(struct tep_handle *pevent, char *sys,
+			   unsigned long long size)
 {
 	int ret;
 	char *buf;
@@ -297,10 +297,8 @@ static int read_event_file(struct pevent *pevent, char *sys,
 	}
 
 	ret = do_read(buf, size);
-	if (ret < 0) {
-		free(buf);
+	if (ret < 0)
 		goto out;
-	}
 
 	ret = parse_event_file(pevent, buf, size, sys);
 	if (ret < 0)
@@ -310,7 +308,7 @@ out:
 	return ret;
 }
 
-static int read_ftrace_files(struct pevent *pevent)
+static int read_ftrace_files(struct tep_handle *pevent)
 {
 	unsigned long long size;
 	int count;
@@ -328,7 +326,7 @@ static int read_ftrace_files(struct pevent *pevent)
 	return 0;
 }
 
-static int read_event_files(struct pevent *pevent)
+static int read_event_files(struct tep_handle *pevent)
 {
 	unsigned long long size;
 	char *sys;
@@ -349,14 +347,17 @@ static int read_event_files(struct pevent *pevent)
 		for (x=0; x < count; x++) {
 			size = read8(pevent);
 			ret = read_event_file(pevent, sys, size);
-			if (ret)
+			if (ret) {
+				free(sys);
 				return ret;
+			}
 		}
+		free(sys);
 	}
 	return 0;
 }
 
-static int read_saved_cmdline(struct pevent *pevent)
+static int read_saved_cmdline(struct tep_handle *pevent)
 {
 	unsigned long long size;
 	char *buf;
@@ -399,7 +400,7 @@ ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe)
 	int host_bigendian;
 	int file_long_size;
 	int file_page_size;
-	struct pevent *pevent = NULL;
+	struct tep_handle *pevent = NULL;
 	int err;
 
 	repipe = __repipe;
@@ -439,9 +440,9 @@ ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe)
 
 	pevent = tevent->pevent;
 
-	pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT);
-	pevent_set_file_bigendian(pevent, file_bigendian);
-	pevent_set_host_bigendian(pevent, host_bigendian);
+	tep_set_flag(pevent, TEP_NSEC_OUTPUT);
+	tep_set_file_bigendian(pevent, file_bigendian);
+	tep_set_host_bigendian(pevent, host_bigendian);
 
 	if (do_read(buf, 1) < 0)
 		goto out;
@@ -451,8 +452,8 @@ ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe)
 	if (!file_page_size)
 		goto out;
 
-	pevent_set_long_size(pevent, file_long_size);
-	pevent_set_page_size(pevent, file_page_size);
+	tep_set_long_size(pevent, file_long_size);
+	tep_set_page_size(pevent, file_page_size);
 
 	err = read_header_files(pevent);
 	if (err)
@@ -479,9 +480,9 @@ ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe)
 	repipe = false;
 
 	if (show_funcs) {
-		pevent_print_funcs(pevent);
+		tep_print_funcs(pevent);
 	} else if (show_printk) {
-		pevent_print_printk(pevent);
+		tep_print_printk(pevent);
 	}
 
 	pevent = NULL;
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index b1e5c3a2b8e3..b749f812ac70 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -66,7 +66,7 @@ static int python_start_script_unsupported(const char *script __maybe_unused,
 	return -1;
 }
 
-static int python_generate_script_unsupported(struct pevent *pevent
+static int python_generate_script_unsupported(struct tep_handle *pevent
 					      __maybe_unused,
 					      const char *outfile
 					      __maybe_unused)
@@ -130,7 +130,7 @@ static int perl_start_script_unsupported(const char *script __maybe_unused,
 	return -1;
 }
 
-static int perl_generate_script_unsupported(struct pevent *pevent
+static int perl_generate_script_unsupported(struct tep_handle *pevent
 					    __maybe_unused,
 					    const char *outfile __maybe_unused)
 {
diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c
index 16a776371d03..95664b2f771e 100644
--- a/tools/perf/util/trace-event.c
+++ b/tools/perf/util/trace-event.c
@@ -28,10 +28,10 @@ static bool tevent_initialized;
 
 int trace_event__init(struct trace_event *t)
 {
-	struct pevent *pevent = pevent_alloc();
+	struct tep_handle *pevent = tep_alloc();
 
 	if (pevent) {
-		t->plugin_list = traceevent_load_plugins(pevent);
+		t->plugin_list = tep_load_plugins(pevent);
 		t->pevent  = pevent;
 	}
 
@@ -40,56 +40,60 @@ int trace_event__init(struct trace_event *t)
 
 static int trace_event__init2(void)
 {
-	int be = traceevent_host_bigendian();
-	struct pevent *pevent;
+	int be = tep_host_bigendian();
+	struct tep_handle *pevent;
 
 	if (trace_event__init(&tevent))
 		return -1;
 
 	pevent = tevent.pevent;
-	pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT);
-	pevent_set_file_bigendian(pevent, be);
-	pevent_set_host_bigendian(pevent, be);
+	tep_set_flag(pevent, TEP_NSEC_OUTPUT);
+	tep_set_file_bigendian(pevent, be);
+	tep_set_host_bigendian(pevent, be);
 	tevent_initialized = true;
 	return 0;
 }
 
 int trace_event__register_resolver(struct machine *machine,
-				   pevent_func_resolver_t *func)
+				   tep_func_resolver_t *func)
 {
 	if (!tevent_initialized && trace_event__init2())
 		return -1;
 
-	return pevent_set_function_resolver(tevent.pevent, func, machine);
+	return tep_set_function_resolver(tevent.pevent, func, machine);
 }
 
 void trace_event__cleanup(struct trace_event *t)
 {
-	traceevent_unload_plugins(t->plugin_list, t->pevent);
-	pevent_free(t->pevent);
+	tep_unload_plugins(t->plugin_list, t->pevent);
+	tep_free(t->pevent);
 }
 
 /*
  * Returns pointer with encoded error via <linux/err.h> interface.
  */
-static struct event_format*
+static struct tep_event_format*
 tp_format(const char *sys, const char *name)
 {
-	struct pevent *pevent = tevent.pevent;
-	struct event_format *event = NULL;
+	char *tp_dir = get_events_file(sys);
+	struct tep_handle *pevent = tevent.pevent;
+	struct tep_event_format *event = NULL;
 	char path[PATH_MAX];
 	size_t size;
 	char *data;
 	int err;
 
-	scnprintf(path, PATH_MAX, "%s/%s/%s/format",
-		  tracing_events_path, sys, name);
+	if (!tp_dir)
+		return ERR_PTR(-errno);
+
+	scnprintf(path, PATH_MAX, "%s/%s/format", tp_dir, name);
+	put_events_file(tp_dir);
 
 	err = filename__read_str(path, &data, &size);
 	if (err)
 		return ERR_PTR(err);
 
-	pevent_parse_format(pevent, &event, data, size, sys);
+	tep_parse_format(pevent, &event, data, size, sys);
 
 	free(data);
 	return event;
@@ -98,7 +102,7 @@ tp_format(const char *sys, const char *name)
 /*
  * Returns pointer with encoded error via <linux/err.h> interface.
  */
-struct event_format*
+struct tep_event_format*
 trace_event__tp_format(const char *sys, const char *name)
 {
 	if (!tevent_initialized && trace_event__init2())
@@ -107,10 +111,10 @@ trace_event__tp_format(const char *sys, const char *name)
 	return tp_format(sys, name);
 }
 
-struct event_format *trace_event__tp_format_id(int id)
+struct tep_event_format *trace_event__tp_format_id(int id)
 {
 	if (!tevent_initialized && trace_event__init2())
 		return ERR_PTR(-ENOMEM);
 
-	return pevent_find_event(tevent.pevent, id);
+	return tep_find_event(tevent.pevent, id);
 }
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index dcbdb53dc702..f024d73bfc40 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -3,6 +3,7 @@
 #define _PERF_UTIL_TRACE_EVENT_H
 
 #include <traceevent/event-parse.h>
+#include <traceevent/trace-seq.h>
 #include "parse-events.h"
 
 struct machine;
@@ -10,46 +11,46 @@ struct perf_sample;
 union perf_event;
 struct perf_tool;
 struct thread;
-struct plugin_list;
+struct tep_plugin_list;
 
 struct trace_event {
-	struct pevent		*pevent;
-	struct plugin_list	*plugin_list;
+	struct tep_handle	*pevent;
+	struct tep_plugin_list	*plugin_list;
 };
 
 int trace_event__init(struct trace_event *t);
 void trace_event__cleanup(struct trace_event *t);
 int trace_event__register_resolver(struct machine *machine,
-				   pevent_func_resolver_t *func);
-struct event_format*
+				   tep_func_resolver_t *func);
+struct tep_event_format*
 trace_event__tp_format(const char *sys, const char *name);
 
-struct event_format *trace_event__tp_format_id(int id);
+struct tep_event_format *trace_event__tp_format_id(int id);
 
 int bigendian(void);
 
-void event_format__fprintf(struct event_format *event,
+void event_format__fprintf(struct tep_event_format *event,
 			   int cpu, void *data, int size, FILE *fp);
 
-void event_format__print(struct event_format *event,
+void event_format__print(struct tep_event_format *event,
 			 int cpu, void *data, int size);
 
-int parse_ftrace_file(struct pevent *pevent, char *buf, unsigned long size);
-int parse_event_file(struct pevent *pevent,
+int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size);
+int parse_event_file(struct tep_handle *pevent,
 		     char *buf, unsigned long size, char *sys);
 
 unsigned long long
-raw_field_value(struct event_format *event, const char *name, void *data);
+raw_field_value(struct tep_event_format *event, const char *name, void *data);
 
-void parse_proc_kallsyms(struct pevent *pevent, char *file, unsigned int size);
-void parse_ftrace_printk(struct pevent *pevent, char *file, unsigned int size);
-void parse_saved_cmdline(struct pevent *pevent, char *file, unsigned int size);
+void parse_proc_kallsyms(struct tep_handle *pevent, char *file, unsigned int size);
+void parse_ftrace_printk(struct tep_handle *pevent, char *file, unsigned int size);
+void parse_saved_cmdline(struct tep_handle *pevent, char *file, unsigned int size);
 
 ssize_t trace_report(int fd, struct trace_event *tevent, bool repipe);
 
-struct event_format *trace_find_next_event(struct pevent *pevent,
-					   struct event_format *event);
-unsigned long long read_size(struct event_format *event, void *ptr, int size);
+struct tep_event_format *trace_find_next_event(struct tep_handle *pevent,
+					       struct tep_event_format *event);
+unsigned long long read_size(struct tep_event_format *event, void *ptr, int size);
 unsigned long long eval_flag(const char *flag);
 
 int read_tracing_data(int fd, struct list_head *pattrs);
@@ -83,7 +84,7 @@ struct scripting_ops {
 	void (*process_stat)(struct perf_stat_config *config,
 			     struct perf_evsel *evsel, u64 tstamp);
 	void (*process_stat_interval)(u64 tstamp);
-	int (*generate_script) (struct pevent *pevent, const char *outfile);
+	int (*generate_script) (struct tep_handle *pevent, const char *outfile);
 };
 
 extern unsigned int scripting_max_stack;
@@ -94,7 +95,7 @@ void setup_perl_scripting(void);
 void setup_python_scripting(void);
 
 struct scripting_context {
-	struct pevent *pevent;
+	struct tep_handle *pevent;
 	void *event_data;
 };
 
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index 7bdd239c795c..5eff9bfc5758 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -28,10 +28,11 @@ static int __report_module(struct addr_location *al, u64 ip,
 {
 	Dwfl_Module *mod;
 	struct dso *dso = NULL;
-
-	thread__find_addr_location(ui->thread,
-				   PERF_RECORD_MISC_USER,
-				   MAP__FUNCTION, ip, al);
+	/*
+	 * Some callers will use al->sym, so we can't just use the
+	 * cheaper thread__find_map() here.
+	 */
+	thread__find_symbol(ui->thread, PERF_RECORD_MISC_USER, ip, al);
 
 	if (al->map)
 		dso = al->map->dso;
@@ -44,13 +45,13 @@ static int __report_module(struct addr_location *al, u64 ip,
 		Dwarf_Addr s;
 
 		dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL);
-		if (s != al->map->start)
+		if (s != al->map->start - al->map->pgoff)
 			mod = 0;
 	}
 
 	if (!mod)
 		mod = dwfl_report_elf(ui->dwfl, dso->short_name,
-				      (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start,
+				      (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start - al->map->pgoff,
 				      false);
 
 	return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1;
@@ -76,7 +77,7 @@ static int entry(u64 ip, struct unwind_info *ui)
 	if (__report_module(&al, ip, ui))
 		return -1;
 
-	e->ip  = al.addr;
+	e->ip  = ip;
 	e->map = al.map;
 	e->sym = al.sym;
 
@@ -103,19 +104,7 @@ static int access_dso_mem(struct unwind_info *ui, Dwarf_Addr addr,
 	struct addr_location al;
 	ssize_t size;
 
-	thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
-			      MAP__FUNCTION, addr, &al);
-	if (!al.map) {
-		/*
-		 * We've seen cases (softice) where DWARF unwinder went
-		 * through non executable mmaps, which we need to lookup
-		 * in MAP__VARIABLE tree.
-		 */
-		thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
-				      MAP__VARIABLE, addr, &al);
-	}
-
-	if (!al.map) {
+	if (!thread__find_map(ui->thread, PERF_RECORD_MISC_USER, addr, &al)) {
 		pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
 		return -1;
 	}
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index af873044d33a..79f521a552cf 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -366,19 +366,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso,
 static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
 {
 	struct addr_location al;
-
-	thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
-			      MAP__FUNCTION, ip, &al);
-	if (!al.map) {
-		/*
-		 * We've seen cases (softice) where DWARF unwinder went
-		 * through non executable mmaps, which we need to lookup
-		 * in MAP__VARIABLE tree.
-		 */
-		thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
-				      MAP__VARIABLE, ip, &al);
-	}
-	return al.map;
+	return thread__find_map(ui->thread, PERF_RECORD_MISC_USER, ip, &al);
 }
 
 static int
@@ -586,12 +574,9 @@ static int entry(u64 ip, struct thread *thread,
 	struct unwind_entry e;
 	struct addr_location al;
 
-	thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
-				   MAP__FUNCTION, ip, &al);
-
-	e.ip = al.addr;
+	e.sym = thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al);
+	e.ip  = ip;
 	e.map = al.map;
-	e.sym = al.sym;
 
 	pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
 		 al.sym ? al.sym->name : "''",
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 1019bbc5dbd8..093352e93d50 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -38,11 +38,43 @@ void perf_set_multithreaded(void)
 }
 
 unsigned int page_size;
-int cacheline_size;
+
+#ifdef _SC_LEVEL1_DCACHE_LINESIZE
+#define cache_line_size(cacheline_sizep) *cacheline_sizep = sysconf(_SC_LEVEL1_DCACHE_LINESIZE)
+#else
+static void cache_line_size(int *cacheline_sizep)
+{
+	if (sysfs__read_int("devices/system/cpu/cpu0/cache/index0/coherency_line_size", cacheline_sizep))
+		pr_debug("cannot determine cache line size");
+}
+#endif
+
+int cacheline_size(void)
+{
+	static int size;
+
+	if (!size)
+		cache_line_size(&size);
+
+	return size;
+}
 
 int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH;
 int sysctl_perf_event_max_contexts_per_stack = PERF_MAX_CONTEXTS_PER_STACK;
 
+int sysctl__max_stack(void)
+{
+	int value;
+
+	if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
+		sysctl_perf_event_max_stack = value;
+
+	if (sysctl__read_int("kernel/perf_event_max_contexts_per_stack", &value) == 0)
+		sysctl_perf_event_max_contexts_per_stack = value;
+
+	return sysctl_perf_event_max_stack;
+}
+
 bool test_attr__enabled;
 
 bool perf_host  = true;
@@ -189,7 +221,7 @@ out:
 	return err;
 }
 
-static int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
+int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
 {
 	void *ptr;
 	loff_t pgoff;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index c9626c206208..ece040b799f6 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -6,6 +6,7 @@
 /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */
 #define _DEFAULT_SOURCE 1
 
+#include <fcntl.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdlib.h>
@@ -35,6 +36,7 @@ bool lsdir_no_dot_filter(const char *name, struct dirent *d);
 int copyfile(const char *from, const char *to);
 int copyfile_mode(const char *from, const char *to, mode_t mode);
 int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi);
+int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size);
 
 ssize_t readn(int fd, void *buf, size_t n);
 ssize_t writen(int fd, const void *buf, size_t n);
@@ -43,7 +45,9 @@ size_t hex_width(u64 v);
 int hex2u64(const char *ptr, u64 *val);
 
 extern unsigned int page_size;
-extern int cacheline_size;
+int __pure cacheline_size(void);
+
+int sysctl__max_stack(void);
 
 int fetch_kernel_version(unsigned int *puint,
 			 char *str, size_t str_sz);
@@ -55,6 +59,10 @@ int fetch_kernel_version(unsigned int *puint,
 
 const char *perf_tip(const char *dirpath);
 
+#ifndef HAVE_GET_CURRENT_DIR_NAME
+char *get_current_dir_name(void);
+#endif
+
 #ifndef HAVE_SCHED_GETCPU_SUPPORT
 int sched_getcpu(void);
 #endif
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index 0acb1ec0e2f0..741af209b19d 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -139,12 +139,10 @@ static enum dso_type machine__thread_dso_type(struct machine *machine,
 					      struct thread *thread)
 {
 	enum dso_type dso_type = DSO__TYPE_UNKNOWN;
-	struct map *map;
-	struct dso *dso;
+	struct map *map = map_groups__first(thread->mg);
 
-	map = map_groups__first(thread->mg, MAP__FUNCTION);
 	for (; map ; map = map_groups__next(map)) {
-		dso = map->dso;
+		struct dso *dso = map->dso;
 		if (!dso || dso->long_name[0] != '/')
 			continue;
 		dso_type = dso__type(dso, machine);
diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c
index a725b958cf31..902ce6384f57 100644
--- a/tools/perf/util/zlib.c
+++ b/tools/perf/util/zlib.c
@@ -5,6 +5,8 @@
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <zlib.h>
+#include <linux/compiler.h>
+#include <unistd.h>
 
 #include "util/compress.h"
 #include "util/util.h"
@@ -79,3 +81,19 @@ out_close:
 
 	return ret == Z_STREAM_END ? 0 : -1;
 }
+
+bool gzip_is_compressed(const char *input)
+{
+	int fd = open(input, O_RDONLY);
+	const uint8_t magic[2] = { 0x1f, 0x8b };
+	char buf[2] = { 0 };
+	ssize_t rc;
+
+	if (fd < 0)
+		return -1;
+
+	rc = read(fd, buf, sizeof(buf));
+	close(fd);
+	return rc == sizeof(buf) ?
+	       memcmp(buf, magic, sizeof(buf)) == 0 : false;
+}
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index 1dd5f4fcffd5..db66a952c173 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -129,7 +129,7 @@ WARNINGS += $(call cc-supports,-Wno-pointer-sign)
 WARNINGS += $(call cc-supports,-Wdeclaration-after-statement)
 WARNINGS += -Wshadow
 
-CFLAGS += -DVERSION=\"$(VERSION)\" -DPACKAGE=\"$(PACKAGE)\" \
+override CFLAGS += -DVERSION=\"$(VERSION)\" -DPACKAGE=\"$(PACKAGE)\" \
 		-DPACKAGE_BUGREPORT=\"$(PACKAGE_BUGREPORT)\" -D_GNU_SOURCE
 
 UTIL_OBJS =  utils/helpers/amd.o utils/helpers/msr.o \
@@ -156,12 +156,12 @@ LIB_SRC = 	lib/cpufreq.c lib/cpupower.c lib/cpuidle.c
 LIB_OBJS = 	lib/cpufreq.o lib/cpupower.o lib/cpuidle.o
 LIB_OBJS :=	$(addprefix $(OUTPUT),$(LIB_OBJS))
 
-CFLAGS +=	-pipe
+override CFLAGS +=	-pipe
 
 ifeq ($(strip $(NLS)),true)
 	INSTALL_NLS += install-gmo
 	COMPILE_NLS += create-gmo
-	CFLAGS += -DNLS
+	override CFLAGS += -DNLS
 endif
 
 ifeq ($(strip $(CPUFREQ_BENCH)),true)
@@ -175,7 +175,7 @@ ifeq ($(strip $(STATIC)),true)
         UTIL_SRC += $(LIB_SRC)
 endif
 
-CFLAGS += $(WARNINGS)
+override CFLAGS += $(WARNINGS)
 
 ifeq ($(strip $(V)),false)
 	QUIET=@
@@ -188,10 +188,10 @@ export QUIET ECHO
 
 # if DEBUG is enabled, then we do not strip or optimize
 ifeq ($(strip $(DEBUG)),true)
-	CFLAGS += -O1 -g -DDEBUG
+	override CFLAGS += -O1 -g -DDEBUG
 	STRIPCMD = /bin/true -Since_we_are_debugging
 else
-	CFLAGS += $(OPTIMIZATION) -fomit-frame-pointer
+	override CFLAGS += $(OPTIMIZATION) -fomit-frame-pointer
 	STRIPCMD = $(STRIP) -s --remove-section=.note --remove-section=.comment
 endif
 
diff --git a/tools/power/cpupower/bench/Makefile b/tools/power/cpupower/bench/Makefile
index d79ab161cc75..f68b4bc55273 100644
--- a/tools/power/cpupower/bench/Makefile
+++ b/tools/power/cpupower/bench/Makefile
@@ -9,7 +9,7 @@ endif
 ifeq ($(strip $(STATIC)),true)
 LIBS = -L../ -L$(OUTPUT) -lm
 OBJS = $(OUTPUT)main.o $(OUTPUT)parse.o $(OUTPUT)system.o $(OUTPUT)benchmark.o \
-       $(OUTPUT)../lib/cpufreq.o $(OUTPUT)../lib/sysfs.o
+       $(OUTPUT)../lib/cpufreq.o $(OUTPUT)../lib/cpupower.o
 else
 LIBS = -L../ -L$(OUTPUT) -lm -lcpupower
 OBJS = $(OUTPUT)main.o $(OUTPUT)parse.o $(OUTPUT)system.o $(OUTPUT)benchmark.o
diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c
index 9b65f052081f..84caee38418f 100644
--- a/tools/power/cpupower/bench/parse.c
+++ b/tools/power/cpupower/bench/parse.c
@@ -104,7 +104,7 @@ FILE *prepare_output(const char *dirname)
 			dirname, time(NULL));
 	}
 
-	dprintf("logilename: %s\n", filename);
+	dprintf("logfilename: %s\n", filename);
 
 	output = fopen(filename, "w+");
 	if (output == NULL) {
@@ -145,7 +145,7 @@ struct config *prepare_default_config()
 	config->cpu = 0;
 	config->prio = SCHED_HIGH;
 	config->verbose = 0;
-	strncpy(config->governor, "ondemand", 8);
+	strncpy(config->governor, "ondemand", sizeof(config->governor));
 
 	config->output = stdout;
 
diff --git a/tools/power/cpupower/debug/x86_64/Makefile b/tools/power/cpupower/debug/x86_64/Makefile
index 59af84b8ef45..b1b6c43644e7 100644
--- a/tools/power/cpupower/debug/x86_64/Makefile
+++ b/tools/power/cpupower/debug/x86_64/Makefile
@@ -13,10 +13,10 @@ INSTALL = /usr/bin/install
 default: all
 
 $(OUTPUT)centrino-decode: ../i386/centrino-decode.c
-	$(CC) $(CFLAGS) -o $@ $<
+	$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $<
 
 $(OUTPUT)powernow-k8-decode: ../i386/powernow-k8-decode.c
-	$(CC) $(CFLAGS) -o $@ $<
+	$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $<
 
 all: $(OUTPUT)centrino-decode $(OUTPUT)powernow-k8-decode
 
diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c
index 1b993fe1ce23..0c0f3e3f0d80 100644
--- a/tools/power/cpupower/lib/cpufreq.c
+++ b/tools/power/cpupower/lib/cpufreq.c
@@ -28,7 +28,7 @@ static unsigned int sysfs_cpufreq_read_file(unsigned int cpu, const char *fname,
 
 	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s",
 			 cpu, fname);
-	return sysfs_read_file(path, buf, buflen);
+	return cpupower_read_sysfs(path, buf, buflen);
 }
 
 /* helper function to write a new value to a /sys file */
diff --git a/tools/power/cpupower/lib/cpuidle.c b/tools/power/cpupower/lib/cpuidle.c
index 9bd4c7655fdb..852d25462388 100644
--- a/tools/power/cpupower/lib/cpuidle.c
+++ b/tools/power/cpupower/lib/cpuidle.c
@@ -319,7 +319,7 @@ static unsigned int sysfs_cpuidle_read_file(const char *fname, char *buf,
 
 	snprintf(path, sizeof(path), PATH_TO_CPU "cpuidle/%s", fname);
 
-	return sysfs_read_file(path, buf, buflen);
+	return cpupower_read_sysfs(path, buf, buflen);
 }
 
 
diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c
index 9c395ec924de..9711d628b0f4 100644
--- a/tools/power/cpupower/lib/cpupower.c
+++ b/tools/power/cpupower/lib/cpupower.c
@@ -15,7 +15,7 @@
 #include "cpupower.h"
 #include "cpupower_intern.h"
 
-unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen)
+unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen)
 {
 	int fd;
 	ssize_t numread;
@@ -95,7 +95,7 @@ static int sysfs_topology_read_file(unsigned int cpu, const char *fname, int *re
 
 	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s",
 			 cpu, fname);
-	if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0)
+	if (cpupower_read_sysfs(path, linebuf, MAX_LINE_LEN) == 0)
 		return -1;
 	*result = strtol(linebuf, &endp, 0);
 	if (endp == linebuf || errno == ERANGE)
diff --git a/tools/power/cpupower/lib/cpupower_intern.h b/tools/power/cpupower/lib/cpupower_intern.h
index 92affdfbe417..4887c76d23f8 100644
--- a/tools/power/cpupower/lib/cpupower_intern.h
+++ b/tools/power/cpupower/lib/cpupower_intern.h
@@ -3,4 +3,4 @@
 #define MAX_LINE_LEN 4096
 #define SYSFS_PATH_MAX 255
 
-unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen);
+unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen);
diff --git a/tools/power/cpupower/po/de.po b/tools/power/cpupower/po/de.po
index 78c09e51663a..840c17cc450a 100644
--- a/tools/power/cpupower/po/de.po
+++ b/tools/power/cpupower/po/de.po
@@ -323,12 +323,12 @@ msgstr "  Hardwarebedingte Grenzen der Taktfrequenz: "
 #: utils/cpufreq-info.c:256
 #, c-format
 msgid "  available frequency steps: "
-msgstr "  m�gliche Taktfrequenzen: "
+msgstr "  mögliche Taktfrequenzen: "
 
 #: utils/cpufreq-info.c:269
 #, c-format
 msgid "  available cpufreq governors: "
-msgstr "  m�gliche Regler: "
+msgstr "  mögliche Regler: "
 
 #: utils/cpufreq-info.c:280
 #, c-format
@@ -381,7 +381,7 @@ msgstr "Optionen:\n"
 msgid "  -e, --debug          Prints out debug information [default]\n"
 msgstr ""
 "  -e, --debug          Erzeugt detaillierte Informationen, hilfreich\n"
-"                       zum Aufsp�ren von Fehlern\n"
+"                       zum Aufspüren von Fehlern\n"
 
 #: utils/cpufreq-info.c:475
 #, c-format
@@ -424,7 +424,7 @@ msgstr "  -p, --policy         Findet die momentane Taktik heraus *\n"
 #: utils/cpufreq-info.c:482
 #, c-format
 msgid "  -g, --governors      Determines available cpufreq governors *\n"
-msgstr "  -g, --governors      Erzeugt eine Liste mit verf�gbaren Reglern *\n"
+msgstr "  -g, --governors      Erzeugt eine Liste mit verfügbaren Reglern *\n"
 
 #: utils/cpufreq-info.c:483
 #, c-format
@@ -450,7 +450,7 @@ msgstr ""
 #, c-format
 msgid "  -s, --stats          Shows cpufreq statistics if available\n"
 msgstr ""
-"  -s, --stats          Zeigt, sofern m�glich, Statistiken �ber cpufreq an.\n"
+"  -s, --stats          Zeigt, sofern möglich, Statistiken über cpufreq an.\n"
 
 #: utils/cpufreq-info.c:487
 #, c-format
@@ -473,9 +473,9 @@ msgid ""
 "cpufreq\n"
 "                       interface in 2.4. and early 2.6. kernels\n"
 msgstr ""
-"  -o, --proc           Erzeugt Informationen in einem �hnlichem Format zu "
+"  -o, --proc           Erzeugt Informationen in einem ähnlichem Format zu "
 "dem\n"
-"                       der /proc/cpufreq-Datei in 2.4. und fr�hen 2.6.\n"
+"                       der /proc/cpufreq-Datei in 2.4. und frühen 2.6.\n"
 "                       Kernel-Versionen\n"
 
 #: utils/cpufreq-info.c:491
@@ -491,7 +491,7 @@ msgstr ""
 #: utils/cpufreq-info.c:492 utils/cpuidle-info.c:152
 #, c-format
 msgid "  -h, --help           Prints out this screen\n"
-msgstr "  -h, --help           Gibt diese Kurz�bersicht aus\n"
+msgstr "  -h, --help           Gibt diese Kurzübersicht aus\n"
 
 #: utils/cpufreq-info.c:495
 #, c-format
@@ -501,7 +501,7 @@ msgid ""
 msgstr ""
 "Sofern kein anderer Parameter als '-c, --cpu' angegeben wird, liefert "
 "dieses\n"
-"Programm Informationen, die z.B. zum Berichten von Fehlern n�tzlich sind.\n"
+"Programm Informationen, die z.B. zum Berichten von Fehlern nützlich sind.\n"
 
 #: utils/cpufreq-info.c:497
 #, c-format
@@ -557,7 +557,7 @@ msgid ""
 "select\n"
 msgstr ""
 "  -d FREQ, --min FREQ      neue minimale Taktfrequenz, die der Regler\n"
-"                           ausw�hlen darf\n"
+"                           auswählen darf\n"
 
 #: utils/cpufreq-set.c:28
 #, c-format
@@ -566,7 +566,7 @@ msgid ""
 "select\n"
 msgstr ""
 "  -u FREQ, --max FREQ      neue maximale Taktfrequenz, die der Regler\n"
-"                           ausw�hlen darf\n"
+"                           auswählen darf\n"
 
 #: utils/cpufreq-set.c:29
 #, c-format
@@ -579,20 +579,20 @@ msgid ""
 "  -f FREQ, --freq FREQ     specific frequency to be set. Requires userspace\n"
 "                           governor to be available and loaded\n"
 msgstr ""
-"  -f FREQ, --freq FREQ     setze exakte Taktfrequenz. Ben�tigt den Regler\n"
+"  -f FREQ, --freq FREQ     setze exakte Taktfrequenz. Benötigt den Regler\n"
 "                           'userspace'.\n"
 
 #: utils/cpufreq-set.c:32
 #, c-format
 msgid "  -r, --related            Switches all hardware-related CPUs\n"
 msgstr ""
-"  -r, --related            Setze Werte f�r alle CPUs, deren Taktfrequenz\n"
+"  -r, --related            Setze Werte für alle CPUs, deren Taktfrequenz\n"
 "                           hardwarebedingt identisch ist.\n"
 
 #: utils/cpufreq-set.c:33 utils/cpupower-set.c:28 utils/cpupower-info.c:27
 #, c-format
 msgid "  -h, --help               Prints out this screen\n"
-msgstr "  -h, --help               Gibt diese Kurz�bersicht aus\n"
+msgstr "  -h, --help               Gibt diese Kurzübersicht aus\n"
 
 #: utils/cpufreq-set.c:35
 #, fuzzy, c-format
@@ -618,8 +618,8 @@ msgstr ""
 "   angenommen\n"
 "2. Der Parameter -f bzw. --freq kann mit keinem anderen als dem Parameter\n"
 "   -c bzw. --cpu kombiniert werden\n"
-"3. FREQuenzen k�nnen in Hz, kHz (Standard), MHz, GHz oder THz eingegeben\n"
-"   werden, indem der Wert und unmittelbar anschlie�end (ohne Leerzeichen!)\n"
+"3. FREQuenzen können in Hz, kHz (Standard), MHz, GHz oder THz eingegeben\n"
+"   werden, indem der Wert und unmittelbar anschließend (ohne Leerzeichen!)\n"
 "   die Einheit angegeben werden. (Bsp: 1GHz )\n"
 "   (FREQuenz in kHz =^ MHz * 1000 =^ GHz * 1000000).\n"
 
@@ -638,7 +638,7 @@ msgid ""
 msgstr ""
 "Beim Einstellen ist ein Fehler aufgetreten. Typische Fehlerquellen sind:\n"
 "- nicht ausreichende Rechte (Administrator)\n"
-"- der Regler ist nicht verf�gbar bzw. nicht geladen\n"
+"- der Regler ist nicht verfügbar bzw. nicht geladen\n"
 "- die angegebene Taktik ist inkorrekt\n"
 "- eine spezifische Frequenz wurde angegeben, aber der Regler 'userspace'\n"
 "  kann entweder hardwarebedingt nicht genutzt werden oder ist nicht geladen\n"
@@ -821,7 +821,7 @@ msgstr ""
 #: utils/cpuidle-info.c:48
 #, fuzzy, c-format
 msgid "Available idle states:"
-msgstr "  m�gliche Taktfrequenzen: "
+msgstr "  mögliche Taktfrequenzen: "
 
 #: utils/cpuidle-info.c:71
 #, c-format
@@ -924,7 +924,7 @@ msgstr "Aufruf: cpufreq-info [Optionen]\n"
 msgid "  -s, --silent         Only show general C-state information\n"
 msgstr ""
 "  -e, --debug          Erzeugt detaillierte Informationen, hilfreich\n"
-"                       zum Aufsp�ren von Fehlern\n"
+"                       zum Aufspüren von Fehlern\n"
 
 #: utils/cpuidle-info.c:150
 #, fuzzy, c-format
@@ -933,9 +933,9 @@ msgid ""
 "acpi/processor/*/power\n"
 "                       interface in older kernels\n"
 msgstr ""
-"  -o, --proc           Erzeugt Informationen in einem �hnlichem Format zu "
+"  -o, --proc           Erzeugt Informationen in einem ähnlichem Format zu "
 "dem\n"
-"                       der /proc/cpufreq-Datei in 2.4. und fr�hen 2.6.\n"
+"                       der /proc/cpufreq-Datei in 2.4. und frühen 2.6.\n"
 "                       Kernel-Versionen\n"
 
 #: utils/cpuidle-info.c:209
@@ -949,7 +949,7 @@ msgstr ""
 #~ "  -c CPU, --cpu CPU    CPU number which information shall be determined "
 #~ "about\n"
 #~ msgstr ""
-#~ "  -c CPU, --cpu CPU    Nummer der CPU, �ber die Informationen "
+#~ "  -c CPU, --cpu CPU    Nummer der CPU, über die Informationen "
 #~ "herausgefunden werden sollen\n"
 
 #~ msgid ""
diff --git a/tools/power/cpupower/po/fr.po b/tools/power/cpupower/po/fr.po
index 245ad20a9bf9..b46ca2548f86 100644
--- a/tools/power/cpupower/po/fr.po
+++ b/tools/power/cpupower/po/fr.po
@@ -212,7 +212,7 @@ msgstr ""
 #: utils/cpupower.c:91
 #, c-format
 msgid "Report errors and bugs to %s, please.\n"
-msgstr "Veuillez rapportez les erreurs et les bogues � %s, s'il vous plait.\n"
+msgstr "Veuillez rapportez les erreurs et les bogues à %s, s'il vous plait.\n"
 
 #: utils/cpupower.c:114
 #, c-format
@@ -227,14 +227,14 @@ msgstr ""
 #: utils/cpufreq-info.c:31
 #, c-format
 msgid "Couldn't count the number of CPUs (%s: %s), assuming 1\n"
-msgstr "D�termination du nombre de CPUs (%s : %s) impossible.  Assume 1\n"
+msgstr "Détermination du nombre de CPUs (%s : %s) impossible.  Assume 1\n"
 
 #: utils/cpufreq-info.c:63
 #, c-format
 msgid ""
 "          minimum CPU frequency  -  maximum CPU frequency  -  governor\n"
 msgstr ""
-"         Fr�quence CPU minimale - Fr�quence CPU maximale  - r�gulateur\n"
+"         Fréquence CPU minimale - Fréquence CPU maximale  - régulateur\n"
 
 #: utils/cpufreq-info.c:151
 #, c-format
@@ -302,12 +302,12 @@ msgstr "  pilote : %s\n"
 #: utils/cpufreq-info.c:219
 #, fuzzy, c-format
 msgid "  CPUs which run at the same hardware frequency: "
-msgstr "  CPUs qui doivent changer de fr�quences en m�me temps : "
+msgstr "  CPUs qui doivent changer de fréquences en même temps : "
 
 #: utils/cpufreq-info.c:230
 #, fuzzy, c-format
 msgid "  CPUs which need to have their frequency coordinated by software: "
-msgstr "  CPUs qui doivent changer de fr�quences en m�me temps : "
+msgstr "  CPUs qui doivent changer de fréquences en même temps : "
 
 #: utils/cpufreq-info.c:241
 #, c-format
@@ -317,22 +317,22 @@ msgstr ""
 #: utils/cpufreq-info.c:247
 #, c-format
 msgid "  hardware limits: "
-msgstr "  limitation mat�rielle : "
+msgstr "  limitation matérielle : "
 
 #: utils/cpufreq-info.c:256
 #, c-format
 msgid "  available frequency steps: "
-msgstr "  plage de fr�quence : "
+msgstr "  plage de fréquence : "
 
 #: utils/cpufreq-info.c:269
 #, c-format
 msgid "  available cpufreq governors: "
-msgstr "  r�gulateurs disponibles : "
+msgstr "  régulateurs disponibles : "
 
 #: utils/cpufreq-info.c:280
 #, c-format
 msgid "  current policy: frequency should be within "
-msgstr "  tactique actuelle : la fr�quence doit �tre comprise entre "
+msgstr "  tactique actuelle : la fréquence doit être comprise entre "
 
 #: utils/cpufreq-info.c:282
 #, c-format
@@ -345,18 +345,18 @@ msgid ""
 "The governor \"%s\" may decide which speed to use\n"
 "                  within this range.\n"
 msgstr ""
-"Le r�gulateur \"%s\" est libre de choisir la vitesse\n"
-"                  dans cette plage de fr�quences.\n"
+"Le régulateur \"%s\" est libre de choisir la vitesse\n"
+"                  dans cette plage de fréquences.\n"
 
 #: utils/cpufreq-info.c:293
 #, c-format
 msgid "  current CPU frequency is "
-msgstr "  la fr�quence actuelle de ce CPU est "
+msgstr "  la fréquence actuelle de ce CPU est "
 
 #: utils/cpufreq-info.c:296
 #, c-format
 msgid " (asserted by call to hardware)"
-msgstr " (v�rifi� par un appel direct du mat�riel)"
+msgstr " (vérifié par un appel direct du matériel)"
 
 #: utils/cpufreq-info.c:304
 #, c-format
@@ -377,7 +377,7 @@ msgstr "Options :\n"
 #: utils/cpufreq-info.c:474
 #, fuzzy, c-format
 msgid "  -e, --debug          Prints out debug information [default]\n"
-msgstr "  -e, --debug          Afficher les informations de d�boguage\n"
+msgstr "  -e, --debug          Afficher les informations de déboguage\n"
 
 #: utils/cpufreq-info.c:475
 #, c-format
@@ -385,8 +385,8 @@ msgid ""
 "  -f, --freq           Get frequency the CPU currently runs at, according\n"
 "                       to the cpufreq core *\n"
 msgstr ""
-"  -f, --freq           Obtenir la fr�quence actuelle du CPU selon le point\n"
-"                       de vue du coeur du syst�me de cpufreq *\n"
+"  -f, --freq           Obtenir la fréquence actuelle du CPU selon le point\n"
+"                       de vue du coeur du système de cpufreq *\n"
 
 #: utils/cpufreq-info.c:477
 #, c-format
@@ -394,8 +394,8 @@ msgid ""
 "  -w, --hwfreq         Get frequency the CPU currently runs at, by reading\n"
 "                       it from hardware (only available to root) *\n"
 msgstr ""
-"  -w, --hwfreq         Obtenir la fr�quence actuelle du CPU directement par\n"
-"                       le mat�riel (doit �tre root) *\n"
+"  -w, --hwfreq         Obtenir la fréquence actuelle du CPU directement par\n"
+"                       le matériel (doit être root) *\n"
 
 #: utils/cpufreq-info.c:479
 #, c-format
@@ -403,13 +403,13 @@ msgid ""
 "  -l, --hwlimits       Determine the minimum and maximum CPU frequency "
 "allowed *\n"
 msgstr ""
-"  -l, --hwlimits       Affiche les fr�quences minimales et maximales du CPU "
+"  -l, --hwlimits       Affiche les fréquences minimales et maximales du CPU "
 "*\n"
 
 #: utils/cpufreq-info.c:480
 #, c-format
 msgid "  -d, --driver         Determines the used cpufreq kernel driver *\n"
-msgstr "  -d, --driver         Affiche le pilote cpufreq utilis� *\n"
+msgstr "  -d, --driver         Affiche le pilote cpufreq utilisé *\n"
 
 #: utils/cpufreq-info.c:481
 #, c-format
@@ -420,7 +420,7 @@ msgstr "  -p, --policy         Affiche la tactique actuelle de cpufreq *\n"
 #, c-format
 msgid "  -g, --governors      Determines available cpufreq governors *\n"
 msgstr ""
-"  -g, --governors      Affiche les r�gulateurs disponibles de cpufreq *\n"
+"  -g, --governors      Affiche les régulateurs disponibles de cpufreq *\n"
 
 #: utils/cpufreq-info.c:483
 #, fuzzy, c-format
@@ -429,7 +429,7 @@ msgid ""
 "frequency *\n"
 msgstr ""
 "  -a, --affected-cpus   Affiche quels sont les CPUs qui doivent changer de\n"
-"                        fr�quences en m�me temps *\n"
+"                        fréquences en même temps *\n"
 
 #: utils/cpufreq-info.c:484
 #, fuzzy, c-format
@@ -438,7 +438,7 @@ msgid ""
 "                       coordinated by software *\n"
 msgstr ""
 "  -a, --affected-cpus   Affiche quels sont les CPUs qui doivent changer de\n"
-"                        fr�quences en m�me temps *\n"
+"                        fréquences en même temps *\n"
 
 #: utils/cpufreq-info.c:486
 #, c-format
@@ -453,7 +453,7 @@ msgid ""
 "  -y, --latency        Determines the maximum latency on CPU frequency "
 "changes *\n"
 msgstr ""
-"  -l, --hwlimits       Affiche les fr�quences minimales et maximales du CPU "
+"  -l, --hwlimits       Affiche les fréquences minimales et maximales du CPU "
 "*\n"
 
 #: utils/cpufreq-info.c:488
@@ -469,7 +469,7 @@ msgid ""
 "                       interface in 2.4. and early 2.6. kernels\n"
 msgstr ""
 "  -o, --proc           Affiche les informations en utilisant l'interface\n"
-"                       fournie par /proc/cpufreq, pr�sente dans les "
+"                       fournie par /proc/cpufreq, présente dans les "
 "versions\n"
 "                       2.4 et les anciennes versions 2.6 du noyau\n"
 
@@ -485,7 +485,7 @@ msgstr ""
 #: utils/cpufreq-info.c:492 utils/cpuidle-info.c:152
 #, c-format
 msgid "  -h, --help           Prints out this screen\n"
-msgstr "  -h, --help           affiche l'aide-m�moire\n"
+msgstr "  -h, --help           affiche l'aide-mémoire\n"
 
 #: utils/cpufreq-info.c:495
 #, c-format
@@ -493,8 +493,8 @@ msgid ""
 "If no argument or only the -c, --cpu parameter is given, debug output about\n"
 "cpufreq is printed which is useful e.g. for reporting bugs.\n"
 msgstr ""
-"Par d�faut, les informations de d�boguage seront affich�es si aucun\n"
-"argument, ou bien si seulement l'argument -c (--cpu) est donn�, afin de\n"
+"Par défaut, les informations de déboguage seront affichées si aucun\n"
+"argument, ou bien si seulement l'argument -c (--cpu) est donné, afin de\n"
 "faciliter les rapports de bogues par exemple\n"
 
 #: utils/cpufreq-info.c:497
@@ -517,8 +517,8 @@ msgid ""
 "You can't specify more than one --cpu parameter and/or\n"
 "more than one output-specific argument\n"
 msgstr ""
-"On ne peut indiquer plus d'un param�tre --cpu, tout comme l'on ne peut\n"
-"sp�cifier plus d'un argument de formatage\n"
+"On ne peut indiquer plus d'un paramètre --cpu, tout comme l'on ne peut\n"
+"spécifier plus d'un argument de formatage\n"
 
 #: utils/cpufreq-info.c:600 utils/cpufreq-set.c:82 utils/cpupower-set.c:42
 #: utils/cpupower-info.c:42 utils/cpuidle-info.c:213
@@ -529,7 +529,7 @@ msgstr "option invalide\n"
 #: utils/cpufreq-info.c:617
 #, c-format
 msgid "couldn't analyze CPU %d as it doesn't seem to be present\n"
-msgstr "analyse du CPU %d impossible puisqu'il ne semble pas �tre pr�sent\n"
+msgstr "analyse du CPU %d impossible puisqu'il ne semble pas être présent\n"
 
 #: utils/cpufreq-info.c:620 utils/cpupower-info.c:142
 #, c-format
@@ -547,8 +547,8 @@ msgid ""
 "  -d FREQ, --min FREQ      new minimum CPU frequency the governor may "
 "select\n"
 msgstr ""
-"  -d FREQ, --min FREQ       nouvelle fr�quence minimale du CPU � utiliser\n"
-"                            par le r�gulateur\n"
+"  -d FREQ, --min FREQ       nouvelle fréquence minimale du CPU à utiliser\n"
+"                            par le régulateur\n"
 
 #: utils/cpufreq-set.c:28
 #, c-format
@@ -556,13 +556,13 @@ msgid ""
 "  -u FREQ, --max FREQ      new maximum CPU frequency the governor may "
 "select\n"
 msgstr ""
-"  -u FREQ, --max FREQ       nouvelle fr�quence maximale du CPU � utiliser\n"
-"                            par le r�gulateur\n"
+"  -u FREQ, --max FREQ       nouvelle fréquence maximale du CPU à utiliser\n"
+"                            par le régulateur\n"
 
 #: utils/cpufreq-set.c:29
 #, c-format
 msgid "  -g GOV, --governor GOV   new cpufreq governor\n"
-msgstr "  -g GOV, --governor GOV   active le r�gulateur GOV\n"
+msgstr "  -g GOV, --governor GOV   active le régulateur GOV\n"
 
 #: utils/cpufreq-set.c:30
 #, c-format
@@ -570,9 +570,9 @@ msgid ""
 "  -f FREQ, --freq FREQ     specific frequency to be set. Requires userspace\n"
 "                           governor to be available and loaded\n"
 msgstr ""
-"  -f FREQ, --freq FREQ     fixe la fr�quence du processeur � FREQ. Il faut\n"
-"                           que le r�gulateur � userspace � soit disponible \n"
-"                           et activ�.\n"
+"  -f FREQ, --freq FREQ     fixe la fréquence du processeur à FREQ. Il faut\n"
+"                           que le régulateur « userspace » soit disponible \n"
+"                           et activé.\n"
 
 #: utils/cpufreq-set.c:32
 #, c-format
@@ -582,7 +582,7 @@ msgstr ""
 #: utils/cpufreq-set.c:33 utils/cpupower-set.c:28 utils/cpupower-info.c:27
 #, fuzzy, c-format
 msgid "  -h, --help               Prints out this screen\n"
-msgstr "  -h, --help           affiche l'aide-m�moire\n"
+msgstr "  -h, --help           affiche l'aide-mémoire\n"
 
 #: utils/cpufreq-set.c:35
 #, fuzzy, c-format
@@ -602,11 +602,11 @@ msgid ""
 "   (FREQuency in kHz =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000).\n"
 msgstr ""
 "Remarque :\n"
-"1. Le CPU num�ro 0 sera utilis� par d�faut si -c (ou --cpu) est omis ;\n"
-"2. l'argument -f FREQ (ou --freq FREQ) ne peut �tre utilis� qu'avec --cpu ;\n"
-"3. on pourra pr�ciser l'unit� des fr�quences en postfixant sans aucune "
+"1. Le CPU numéro 0 sera utilisé par défaut si -c (ou --cpu) est omis ;\n"
+"2. l'argument -f FREQ (ou --freq FREQ) ne peut être utilisé qu'avec --cpu ;\n"
+"3. on pourra préciser l'unité des fréquences en postfixant sans aucune "
 "espace\n"
-"   les valeurs par hz, kHz (par d�faut), MHz, GHz ou THz\n"
+"   les valeurs par hz, kHz (par défaut), MHz, GHz ou THz\n"
 "   (kHz =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000).\n"
 
 #: utils/cpufreq-set.c:57
@@ -622,21 +622,21 @@ msgid ""
 "frequency\n"
 "   or because the userspace governor isn't loaded?\n"
 msgstr ""
-"En ajustant les nouveaux param�tres, une erreur est apparue. Les sources\n"
+"En ajustant les nouveaux paramètres, une erreur est apparue. Les sources\n"
 "d'erreur typique sont :\n"
-"- droit d'administration insuffisant (�tes-vous root ?) ;\n"
-"- le r�gulateur choisi n'est pas disponible, ou bien n'est pas disponible "
+"- droit d'administration insuffisant (êtes-vous root ?) ;\n"
+"- le régulateur choisi n'est pas disponible, ou bien n'est pas disponible "
 "en\n"
 "  tant que module noyau ;\n"
 "- la tactique n'est pas disponible ;\n"
-"- vous voulez utiliser l'option -f/--freq, mais le r�gulateur � userspace �\n"
-"  n'est pas disponible, par exemple parce que le mat�riel ne le supporte\n"
-"  pas, ou bien n'est tout simplement pas charg�.\n"
+"- vous voulez utiliser l'option -f/--freq, mais le régulateur « userspace »\n"
+"  n'est pas disponible, par exemple parce que le matériel ne le supporte\n"
+"  pas, ou bien n'est tout simplement pas chargé.\n"
 
 #: utils/cpufreq-set.c:170
 #, c-format
 msgid "wrong, unknown or unhandled CPU?\n"
-msgstr "CPU inconnu ou non support� ?\n"
+msgstr "CPU inconnu ou non supporté ?\n"
 
 #: utils/cpufreq-set.c:302
 #, c-format
@@ -653,7 +653,7 @@ msgid ""
 "At least one parameter out of -f/--freq, -d/--min, -u/--max, and\n"
 "-g/--governor must be passed\n"
 msgstr ""
-"L'un de ces param�tres est obligatoire : -f/--freq, -d/--min, -u/--max et\n"
+"L'un de ces paramètres est obligatoire : -f/--freq, -d/--min, -u/--max et\n"
 "-g/--governor\n"
 
 #: utils/cpufreq-set.c:347
@@ -810,7 +810,7 @@ msgstr ""
 #: utils/cpuidle-info.c:48
 #, fuzzy, c-format
 msgid "Available idle states:"
-msgstr "  plage de fr�quence : "
+msgstr "  plage de fréquence : "
 
 #: utils/cpuidle-info.c:71
 #, c-format
@@ -911,7 +911,7 @@ msgstr "Usage : cpufreq-info [options]\n"
 #: utils/cpuidle-info.c:149
 #, fuzzy, c-format
 msgid "  -s, --silent         Only show general C-state information\n"
-msgstr "  -e, --debug          Afficher les informations de d�boguage\n"
+msgstr "  -e, --debug          Afficher les informations de déboguage\n"
 
 #: utils/cpuidle-info.c:150
 #, fuzzy, c-format
@@ -921,7 +921,7 @@ msgid ""
 "                       interface in older kernels\n"
 msgstr ""
 "  -o, --proc           Affiche les informations en utilisant l'interface\n"
-"                       fournie par /proc/cpufreq, pr�sente dans les "
+"                       fournie par /proc/cpufreq, présente dans les "
 "versions\n"
 "                       2.4 et les anciennes versions 2.6 du noyau\n"
 
@@ -929,19 +929,19 @@ msgstr ""
 #, fuzzy, c-format
 msgid "You can't specify more than one output-specific argument\n"
 msgstr ""
-"On ne peut indiquer plus d'un param�tre --cpu, tout comme l'on ne peut\n"
-"sp�cifier plus d'un argument de formatage\n"
+"On ne peut indiquer plus d'un paramètre --cpu, tout comme l'on ne peut\n"
+"spécifier plus d'un argument de formatage\n"
 
 #~ msgid ""
 #~ "  -c CPU, --cpu CPU    CPU number which information shall be determined "
 #~ "about\n"
 #~ msgstr ""
-#~ "  -c CPU, --cpu CPU    Num�ro du CPU pour lequel l'information sera "
-#~ "affich�e\n"
+#~ "  -c CPU, --cpu CPU    Numéro du CPU pour lequel l'information sera "
+#~ "affichée\n"
 
 #~ msgid ""
 #~ "  -c CPU, --cpu CPU        number of CPU where cpufreq settings shall be "
 #~ "modified\n"
 #~ msgstr ""
-#~ "  -c CPU, --cpu CPU        num�ro du CPU � prendre en compte pour les\n"
+#~ "  -c CPU, --cpu CPU        numéro du CPU à prendre en compte pour les\n"
 #~ "                           changements\n"
diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c
index df43cd45d810..c3f39d5128ee 100644
--- a/tools/power/cpupower/utils/cpufreq-info.c
+++ b/tools/power/cpupower/utils/cpufreq-info.c
@@ -170,6 +170,7 @@ static int get_boost_mode(unsigned int cpu)
 	unsigned long pstates[MAX_HW_PSTATES] = {0,};
 
 	if (cpupower_cpu_info.vendor != X86_VENDOR_AMD &&
+	    cpupower_cpu_info.vendor != X86_VENDOR_HYGON &&
 	    cpupower_cpu_info.vendor != X86_VENDOR_INTEL)
 		return 0;
 
@@ -190,8 +191,9 @@ static int get_boost_mode(unsigned int cpu)
 	printf(_("    Supported: %s\n"), support ? _("yes") : _("no"));
 	printf(_("    Active: %s\n"), active ? _("yes") : _("no"));
 
-	if (cpupower_cpu_info.vendor == X86_VENDOR_AMD &&
-	    cpupower_cpu_info.family >= 0x10) {
+	if ((cpupower_cpu_info.vendor == X86_VENDOR_AMD &&
+	     cpupower_cpu_info.family >= 0x10) ||
+	     cpupower_cpu_info.vendor == X86_VENDOR_HYGON) {
 		ret = decode_pstates(cpu, cpupower_cpu_info.family, b_states,
 				     pstates, &pstate_no);
 		if (ret)
@@ -200,6 +202,8 @@ static int get_boost_mode(unsigned int cpu)
 		printf(_("    Boost States: %d\n"), b_states);
 		printf(_("    Total States: %d\n"), pstate_no);
 		for (i = 0; i < pstate_no; i++) {
+			if (!pstates[i])
+				continue;
 			if (i < b_states)
 				printf(_("    Pstate-Pb%d: %luMHz (boost state)"
 					 "\n"), i, pstates[i]);
diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c
index bb41cdd0df6b..7c4f83a8c973 100644
--- a/tools/power/cpupower/utils/helpers/amd.c
+++ b/tools/power/cpupower/utils/helpers/amd.c
@@ -33,7 +33,7 @@ union msr_pstate {
 		unsigned vid:8;
 		unsigned iddval:8;
 		unsigned idddiv:2;
-		unsigned res1:30;
+		unsigned res1:31;
 		unsigned en:1;
 	} fam17h_bits;
 	unsigned long long val;
@@ -45,7 +45,7 @@ static int get_did(int family, union msr_pstate pstate)
 
 	if (family == 0x12)
 		t = pstate.val & 0xf;
-	else if (family == 0x17)
+	else if (family == 0x17 || family == 0x18)
 		t = pstate.fam17h_bits.did;
 	else
 		t = pstate.bits.did;
@@ -59,7 +59,7 @@ static int get_cof(int family, union msr_pstate pstate)
 	int fid, did, cof;
 
 	did = get_did(family, pstate);
-	if (family == 0x17) {
+	if (family == 0x17 || family == 0x18) {
 		fid = pstate.fam17h_bits.fid;
 		cof = 200 * fid / did;
 	} else {
@@ -119,6 +119,11 @@ int decode_pstates(unsigned int cpu, unsigned int cpu_family,
 		}
 		if (read_msr(cpu, MSR_AMD_PSTATE + i, &pstate.val))
 			return -1;
+		if ((cpu_family == 0x17) && (!pstate.fam17h_bits.en))
+			continue;
+		else if (!pstate.bits.en)
+			continue;
+
 		pstates[i] = get_cof(cpu_family, pstate);
 	}
 	*no = i;
diff --git a/tools/power/cpupower/utils/helpers/cpuid.c b/tools/power/cpupower/utils/helpers/cpuid.c
index 732b0b41ba26..5cc39d4e23ed 100644
--- a/tools/power/cpupower/utils/helpers/cpuid.c
+++ b/tools/power/cpupower/utils/helpers/cpuid.c
@@ -8,7 +8,7 @@
 #include "helpers/helpers.h"
 
 static const char *cpu_vendor_table[X86_VENDOR_MAX] = {
-	"Unknown", "GenuineIntel", "AuthenticAMD",
+	"Unknown", "GenuineIntel", "AuthenticAMD", "HygonGenuine",
 };
 
 #if defined(__i386__) || defined(__x86_64__)
@@ -109,6 +109,7 @@ out:
 	fclose(fp);
 	/* Get some useful CPU capabilities from cpuid */
 	if (cpu_info->vendor != X86_VENDOR_AMD &&
+	    cpu_info->vendor != X86_VENDOR_HYGON &&
 	    cpu_info->vendor != X86_VENDOR_INTEL)
 		return ret;
 
@@ -124,8 +125,9 @@ out:
 	if (cpuid_level >= 6 && (cpuid_ecx(6) & 0x1))
 		cpu_info->caps |= CPUPOWER_CAP_APERF;
 
-	/* AMD Boost state enable/disable register */
-	if (cpu_info->vendor == X86_VENDOR_AMD) {
+	/* AMD or Hygon Boost state enable/disable register */
+	if (cpu_info->vendor == X86_VENDOR_AMD ||
+	    cpu_info->vendor == X86_VENDOR_HYGON) {
 		if (ext_cpuid_level >= 0x80000007 &&
 		    (cpuid_edx(0x80000007) & (1 << 9)))
 			cpu_info->caps |= CPUPOWER_CAP_AMD_CBP;
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index 41da392be448..902139689315 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -61,7 +61,7 @@ extern int be_verbose;
 
 /* cpuid and cpuinfo helpers  **************************/
 enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL,
-			  X86_VENDOR_AMD, X86_VENDOR_MAX};
+			  X86_VENDOR_AMD, X86_VENDOR_HYGON, X86_VENDOR_MAX};
 
 #define CPUPOWER_CAP_INV_TSC		0x00000001
 #define CPUPOWER_CAP_APERF		0x00000002
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
index 80fdf55f414d..f406adc40bad 100644
--- a/tools/power/cpupower/utils/helpers/misc.c
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -26,7 +26,7 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
 		 * has Hardware determined variable increments instead.
 		 */
 
-		if (cpu_info.family == 0x17) {
+		if (cpu_info.family == 0x17 || cpu_info.family == 0x18) {
 			if (!read_msr(cpu, MSR_AMD_HWCR, &val)) {
 				if (!(val & CPUPOWER_AMD_CPBDIS))
 					*active = 1;
diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
index 5b3205f16217..5b8c4956ff9a 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
@@ -126,6 +126,20 @@ void fix_up_intel_idle_driver_name(char *tmp, int num)
 	}
 }
 
+#ifdef __powerpc__
+void map_power_idle_state_name(char *tmp)
+{
+	if (!strncmp(tmp, "stop0_lite", CSTATE_NAME_LEN))
+		strcpy(tmp, "stop0L");
+	else if (!strncmp(tmp, "stop1_lite", CSTATE_NAME_LEN))
+		strcpy(tmp, "stop1L");
+	else if (!strncmp(tmp, "stop2_lite", CSTATE_NAME_LEN))
+		strcpy(tmp, "stop2L");
+}
+#else
+void map_power_idle_state_name(char *tmp) { }
+#endif
+
 static struct cpuidle_monitor *cpuidle_register(void)
 {
 	int num;
@@ -145,6 +159,7 @@ static struct cpuidle_monitor *cpuidle_register(void)
 		if (tmp == NULL)
 			continue;
 
+		map_power_idle_state_name(tmp);
 		fix_up_intel_idle_driver_name(tmp, num);
 		strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1);
 		free(tmp);
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
index 05f953f0f0a0..051da0a7c454 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
@@ -70,36 +70,43 @@ void print_n_spaces(int n)
 		printf(" ");
 }
 
-/* size of s must be at least n + 1 */
+/*s is filled with left and right spaces
+ *to make its length atleast n+1
+ */
 int fill_string_with_spaces(char *s, int n)
 {
+	char *temp;
 	int len = strlen(s);
-	if (len > n)
+
+	if (len >= n)
 		return -1;
+
+	temp = malloc(sizeof(char) * (n+1));
 	for (; len < n; len++)
 		s[len] = ' ';
 	s[len] = '\0';
+	snprintf(temp, n+1, " %s", s);
+	strcpy(s, temp);
+	free(temp);
 	return 0;
 }
 
+#define MAX_COL_WIDTH 6
 void print_header(int topology_depth)
 {
 	int unsigned mon;
 	int state, need_len;
 	cstate_t s;
 	char buf[128] = "";
-	int percent_width = 4;
 
 	fill_string_with_spaces(buf, topology_depth * 5 - 1);
 	printf("%s|", buf);
 
 	for (mon = 0; mon < avail_monitors; mon++) {
-		need_len = monitors[mon]->hw_states_num * (percent_width + 3)
+		need_len = monitors[mon]->hw_states_num * (MAX_COL_WIDTH + 1)
 			- 1;
-		if (mon != 0) {
-			printf("|| ");
-			need_len--;
-		}
+		if (mon != 0)
+			printf("||");
 		sprintf(buf, "%s", monitors[mon]->name);
 		fill_string_with_spaces(buf, need_len);
 		printf("%s", buf);
@@ -107,23 +114,21 @@ void print_header(int topology_depth)
 	printf("\n");
 
 	if (topology_depth > 2)
-		printf("PKG |");
+		printf(" PKG|");
 	if (topology_depth > 1)
 		printf("CORE|");
 	if (topology_depth > 0)
-		printf("CPU |");
+		printf(" CPU|");
 
 	for (mon = 0; mon < avail_monitors; mon++) {
 		if (mon != 0)
-			printf("|| ");
-		else
-			printf(" ");
+			printf("||");
 		for (state = 0; state < monitors[mon]->hw_states_num; state++) {
 			if (state != 0)
-				printf(" | ");
+				printf("|");
 			s = monitors[mon]->hw_states[state];
 			sprintf(buf, "%s", s.name);
-			fill_string_with_spaces(buf, percent_width);
+			fill_string_with_spaces(buf, MAX_COL_WIDTH);
 			printf("%s", buf);
 		}
 		printf(" ");
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
index 9e43f3371fbc..2ae50b499e0a 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
@@ -15,7 +15,16 @@
 
 #define MONITORS_MAX 20
 #define MONITOR_NAME_LEN 20
+
+/* CSTATE_NAME_LEN is limited by header field width defined
+ * in cpupower-monitor.c. Header field width is defined to be
+ * sum of percent width and two spaces for padding.
+ */
+#ifdef __powerpc__
+#define CSTATE_NAME_LEN 7
+#else
 #define CSTATE_NAME_LEN 5
+#endif
 #define CSTATE_DESC_LEN 60
 
 int cpu_count;
diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
index d7c2a6d13dea..f2a7e9cfd577 100644
--- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
@@ -241,7 +241,8 @@ static int init_maxfreq_mode(void)
 	if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_INV_TSC))
 		goto use_sysfs;
 
-	if (cpupower_cpu_info.vendor == X86_VENDOR_AMD) {
+	if (cpupower_cpu_info.vendor == X86_VENDOR_AMD ||
+	    cpupower_cpu_info.vendor == X86_VENDOR_HYGON) {
 		/* MSR_AMD_HWCR tells us whether TSC runs at P0/mperf
 		 * freq.
 		 * A test whether hwcr is accessable/available would be:
diff --git a/tools/power/pm-graph/Makefile b/tools/power/pm-graph/Makefile
index c1899cd72c80..845541544570 100644
--- a/tools/power/pm-graph/Makefile
+++ b/tools/power/pm-graph/Makefile
@@ -23,8 +23,8 @@ install : uninstall
 	install -m 644 config/suspend-x2-proc.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
 
 	install -d  $(DESTDIR)$(PREFIX)/bin
-	ln -s $(DESTDIR)$(PREFIX)/lib/pm-graph/bootgraph.py $(DESTDIR)$(PREFIX)/bin/bootgraph
-	ln -s $(DESTDIR)$(PREFIX)/lib/pm-graph/sleepgraph.py $(DESTDIR)$(PREFIX)/bin/sleepgraph
+	ln -s ../lib/pm-graph/bootgraph.py $(DESTDIR)$(PREFIX)/bin/bootgraph
+	ln -s ../lib/pm-graph/sleepgraph.py $(DESTDIR)$(PREFIX)/bin/sleepgraph
 
 	install -d  $(DESTDIR)$(PREFIX)/share/man/man8
 	install bootgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8
diff --git a/tools/power/pm-graph/bootgraph.py b/tools/power/pm-graph/bootgraph.py
index abb4c38f029b..6dae57041537 100755
--- a/tools/power/pm-graph/bootgraph.py
+++ b/tools/power/pm-graph/bootgraph.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python2
 #
 # Tool for analyzing boot timing
 # Copyright (c) 2013, Intel Corporation.
@@ -34,6 +34,10 @@ from datetime import datetime, timedelta
 from subprocess import call, Popen, PIPE
 import sleepgraph as aslib
 
+def pprint(msg):
+	print(msg)
+	sys.stdout.flush()
+
 # ----------------- CLASSES --------------------
 
 # Class: SystemValues
@@ -157,11 +161,11 @@ class SystemValues(aslib.SystemValues):
 		return cmdline
 	def manualRebootRequired(self):
 		cmdline = self.kernelParams()
-		print 'To generate a new timeline manually, follow these steps:\n'
-		print '1. Add the CMDLINE string to your kernel command line.'
-		print '2. Reboot the system.'
-		print '3. After reboot, re-run this tool with the same arguments but no command (w/o -reboot or -manual).\n'
-		print 'CMDLINE="%s"' % cmdline
+		pprint('To generate a new timeline manually, follow these steps:\n\n'\
+		'1. Add the CMDLINE string to your kernel command line.\n'\
+		'2. Reboot the system.\n'\
+		'3. After reboot, re-run this tool with the same arguments but no command (w/o -reboot or -manual).\n\n'\
+		'CMDLINE="%s"' % cmdline)
 		sys.exit()
 	def blGrub(self):
 		blcmd = ''
@@ -431,7 +435,7 @@ def parseTraceLog(data):
 			if len(cg.list) < 1 or cg.invalid or (cg.end - cg.start == 0):
 				continue
 			if(not cg.postProcess()):
-				print('Sanity check failed for %s-%d' % (proc, pid))
+				pprint('Sanity check failed for %s-%d' % (proc, pid))
 				continue
 			# match cg data to devices
 			devname = data.deviceMatch(pid, cg)
@@ -442,8 +446,8 @@ def parseTraceLog(data):
 				sysvals.vprint('%s callgraph found for %s %s-%d [%f - %f]' %\
 					(kind, cg.name, proc, pid, cg.start, cg.end))
 			elif len(cg.list) > 1000000:
-				print 'WARNING: the callgraph found for %s is massive! (%d lines)' %\
-					(devname, len(cg.list))
+				pprint('WARNING: the callgraph found for %s is massive! (%d lines)' %\
+					(devname, len(cg.list)))
 
 # Function: retrieveLogs
 # Description:
@@ -528,7 +532,7 @@ def createBootGraph(data):
 	tMax = data.end
 	tTotal = tMax - t0
 	if(tTotal == 0):
-		print('ERROR: No timeline data')
+		pprint('ERROR: No timeline data')
 		return False
 	user_mode = '%.0f'%(data.tUserMode*1000)
 	last_init = '%.0f'%(tTotal*1000)
@@ -734,7 +738,7 @@ def updateCron(restore=False):
 		op.close()
 		res = call([cmd, cronfile])
 	except Exception, e:
-		print 'Exception: %s' % str(e)
+		pprint('Exception: %s' % str(e))
 		shutil.move(backfile, cronfile)
 		res = -1
 	if res != 0:
@@ -750,7 +754,7 @@ def updateGrub(restore=False):
 			call(sysvals.blexec, stderr=PIPE, stdout=PIPE,
 				env={'PATH': '.:/sbin:/usr/sbin:/usr/bin:/sbin:/bin'})
 		except Exception, e:
-			print 'Exception: %s\n' % str(e)
+			pprint('Exception: %s\n' % str(e))
 		return
 	# extract the option and create a grub config without it
 	sysvals.rootUser(True)
@@ -797,7 +801,7 @@ def updateGrub(restore=False):
 		res = call(sysvals.blexec)
 		os.remove(grubfile)
 	except Exception, e:
-		print 'Exception: %s' % str(e)
+		pprint('Exception: %s' % str(e))
 		res = -1
 	# cleanup
 	shutil.move(tempfile, grubfile)
@@ -821,7 +825,7 @@ def updateKernelParams(restore=False):
 def doError(msg, help=False):
 	if help == True:
 		printHelp()
-	print 'ERROR: %s\n' % msg
+	pprint('ERROR: %s\n' % msg)
 	sysvals.outputResult({'error':msg})
 	sys.exit()
 
@@ -829,52 +833,51 @@ def doError(msg, help=False):
 # Description:
 #	 print out the help text
 def printHelp():
-	print('')
-	print('%s v%s' % (sysvals.title, sysvals.version))
-	print('Usage: bootgraph <options> <command>')
-	print('')
-	print('Description:')
-	print('  This tool reads in a dmesg log of linux kernel boot and')
-	print('  creates an html representation of the boot timeline up to')
-	print('  the start of the init process.')
-	print('')
-	print('  If no specific command is given the tool reads the current dmesg')
-	print('  and/or ftrace log and creates a timeline')
-	print('')
-	print('  Generates output files in subdirectory: boot-yymmdd-HHMMSS')
-	print('   HTML output:                    <hostname>_boot.html')
-	print('   raw dmesg output:               <hostname>_boot_dmesg.txt')
-	print('   raw ftrace output:              <hostname>_boot_ftrace.txt')
-	print('')
-	print('Options:')
-	print('  -h            Print this help text')
-	print('  -v            Print the current tool version')
-	print('  -verbose      Print extra information during execution and analysis')
-	print('  -addlogs      Add the dmesg log to the html output')
-	print('  -result fn    Export a results table to a text file for parsing.')
-	print('  -o name       Overrides the output subdirectory name when running a new test')
-	print('                default: boot-{date}-{time}')
-	print(' [advanced]')
-	print('  -fstat        Use ftrace to add function detail and statistics (default: disabled)')
-	print('  -f/-callgraph Add callgraph detail, can be very large (default: disabled)')
-	print('  -maxdepth N   limit the callgraph data to N call levels (default: 2)')
-	print('  -mincg ms     Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)')
-	print('  -timeprec N   Number of significant digits in timestamps (0:S, 3:ms, [6:us])')
-	print('  -expandcg     pre-expand the callgraph data in the html output (default: disabled)')
-	print('  -func list    Limit ftrace to comma-delimited list of functions (default: do_one_initcall)')
-	print('  -cgfilter S   Filter the callgraph output in the timeline')
-	print('  -cgskip file  Callgraph functions to skip, off to disable (default: cgskip.txt)')
-	print('  -bl name      Use the following boot loader for kernel params (default: grub)')
-	print('  -reboot       Reboot the machine automatically and generate a new timeline')
-	print('  -manual       Show the steps to generate a new timeline manually (used with -reboot)')
-	print('')
-	print('Other commands:')
-	print('  -flistall     Print all functions capable of being captured in ftrace')
-	print('  -sysinfo      Print out system info extracted from BIOS')
-	print(' [redo]')
-	print('  -dmesg file   Create HTML output using dmesg input (used with -ftrace)')
-	print('  -ftrace file  Create HTML output using ftrace input (used with -dmesg)')
-	print('')
+	pprint('\n%s v%s\n'\
+	'Usage: bootgraph <options> <command>\n'\
+	'\n'\
+	'Description:\n'\
+	'  This tool reads in a dmesg log of linux kernel boot and\n'\
+	'  creates an html representation of the boot timeline up to\n'\
+	'  the start of the init process.\n'\
+	'\n'\
+	'  If no specific command is given the tool reads the current dmesg\n'\
+	'  and/or ftrace log and creates a timeline\n'\
+	'\n'\
+	'  Generates output files in subdirectory: boot-yymmdd-HHMMSS\n'\
+	'   HTML output:                    <hostname>_boot.html\n'\
+	'   raw dmesg output:               <hostname>_boot_dmesg.txt\n'\
+	'   raw ftrace output:              <hostname>_boot_ftrace.txt\n'\
+	'\n'\
+	'Options:\n'\
+	'  -h            Print this help text\n'\
+	'  -v            Print the current tool version\n'\
+	'  -verbose      Print extra information during execution and analysis\n'\
+	'  -addlogs      Add the dmesg log to the html output\n'\
+	'  -result fn    Export a results table to a text file for parsing.\n'\
+	'  -o name       Overrides the output subdirectory name when running a new test\n'\
+	'                default: boot-{date}-{time}\n'\
+	' [advanced]\n'\
+	'  -fstat        Use ftrace to add function detail and statistics (default: disabled)\n'\
+	'  -f/-callgraph Add callgraph detail, can be very large (default: disabled)\n'\
+	'  -maxdepth N   limit the callgraph data to N call levels (default: 2)\n'\
+	'  -mincg ms     Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)\n'\
+	'  -timeprec N   Number of significant digits in timestamps (0:S, 3:ms, [6:us])\n'\
+	'  -expandcg     pre-expand the callgraph data in the html output (default: disabled)\n'\
+	'  -func list    Limit ftrace to comma-delimited list of functions (default: do_one_initcall)\n'\
+	'  -cgfilter S   Filter the callgraph output in the timeline\n'\
+	'  -cgskip file  Callgraph functions to skip, off to disable (default: cgskip.txt)\n'\
+	'  -bl name      Use the following boot loader for kernel params (default: grub)\n'\
+	'  -reboot       Reboot the machine automatically and generate a new timeline\n'\
+	'  -manual       Show the steps to generate a new timeline manually (used with -reboot)\n'\
+	'\n'\
+	'Other commands:\n'\
+	'  -flistall     Print all functions capable of being captured in ftrace\n'\
+	'  -sysinfo      Print out system info extracted from BIOS\n'\
+	' [redo]\n'\
+	'  -dmesg file   Create HTML output using dmesg input (used with -ftrace)\n'\
+	'  -ftrace file  Create HTML output using ftrace input (used with -dmesg)\n'\
+	'' % (sysvals.title, sysvals.version))
 	return True
 
 # ----------------- MAIN --------------------
@@ -895,7 +898,7 @@ if __name__ == '__main__':
 			printHelp()
 			sys.exit()
 		elif(arg == '-v'):
-			print("Version %s" % sysvals.version)
+			pprint("Version %s" % sysvals.version)
 			sys.exit()
 		elif(arg == '-verbose'):
 			sysvals.verbose = True
@@ -1016,7 +1019,7 @@ if __name__ == '__main__':
 				print f
 		elif cmd == 'checkbl':
 			sysvals.getBootLoader()
-			print 'Boot Loader: %s\n%s' % (sysvals.bootloader, sysvals.blexec)
+			pprint('Boot Loader: %s\n%s' % (sysvals.bootloader, sysvals.blexec))
 		elif(cmd == 'sysinfo'):
 			sysvals.printSystemInfo(True)
 		sys.exit()
diff --git a/tools/power/pm-graph/config/cgskip.txt b/tools/power/pm-graph/config/cgskip.txt
index e48d588fbfb4..9ff88e7e2300 100644
--- a/tools/power/pm-graph/config/cgskip.txt
+++ b/tools/power/pm-graph/config/cgskip.txt
@@ -27,6 +27,7 @@ ktime_get
 # console calls
 printk
 dev_printk
+__dev_printk
 console_unlock
 
 # memory handling
diff --git a/tools/power/pm-graph/sleepgraph.8 b/tools/power/pm-graph/sleepgraph.8
index 18baaf6300c9..24a2e7d0ae63 100644
--- a/tools/power/pm-graph/sleepgraph.8
+++ b/tools/power/pm-graph/sleepgraph.8
@@ -65,9 +65,9 @@ During test, enable/disable runtime suspend for all devices. The test is delayed
 by 5 seconds to allow runtime suspend changes to occur. The settings are restored
 after the test is complete.
 .TP
-\fB-display \fIon/off\fR
-Turn the display on or off for the test using the xset command. This helps
-maintain the consistecy of test data for better comparison.
+\fB-display \fIon/off/standby/suspend\fR
+Switch the display to the requested mode for the test using the xset command.
+This helps maintain the consistency of test data for better comparison.
 .TP
 \fB-skiphtml\fR
 Run the test and capture the trace logs, but skip the timeline generation.
@@ -168,6 +168,7 @@ Create a summary page of all tests in \fIindir\fR. Creates summary.html
 in the current folder. The output page is a table of tests with
 suspend and resume values sorted by suspend mode, host, and kernel.
 Includes test averages by mode and links to the test html files.
+Use -genhtml to include tests with missing html.
 .TP
 \fB-modes\fR
 List available suspend modes.
@@ -179,6 +180,16 @@ with any options you intend to use to see if they will work.
 \fB-fpdt\fR
 Print out the contents of the ACPI Firmware Performance Data Table.
 .TP
+\fB-battery\fR
+Print out battery status and current charge.
+.TP
+\fB-xon/-xoff/-xstandby/-xsuspend\fR
+Test xset by attempting to switch the display to the given mode. This
+is the same command which will be issued by \fB-display \fImode\fR.
+.TP
+\fB-xstat\fR
+Get the current DPMS display mode.
+.TP
 \fB-sysinfo\fR
 Print out system info extracted from BIOS. Reads /dev/mem directly instead of going through dmidecode.
 .TP
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index 266409fb27ae..52618f3444d4 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python2
 #
 # Tool for analyzing suspend/resume timing
 # Copyright (c) 2013, Intel Corporation.
@@ -54,6 +54,7 @@ import os
 import string
 import re
 import platform
+import signal
 from datetime import datetime
 import struct
 import ConfigParser
@@ -61,6 +62,10 @@ import gzip
 from threading import Thread
 from subprocess import call, Popen, PIPE
 
+def pprint(msg):
+	print(msg)
+	sys.stdout.flush()
+
 # ----------------- CLASSES --------------------
 
 # Class: SystemValues
@@ -69,10 +74,10 @@ from subprocess import call, Popen, PIPE
 #	 store system values and test parameters
 class SystemValues:
 	title = 'SleepGraph'
-	version = '5.0'
+	version = '5.2'
 	ansi = False
 	rs = 0
-	display = 0
+	display = ''
 	gzip = False
 	sync = False
 	verbose = False
@@ -99,6 +104,7 @@ class SystemValues:
 	tpath = '/sys/kernel/debug/tracing/'
 	fpdtpath = '/sys/firmware/acpi/tables/FPDT'
 	epath = '/sys/kernel/debug/tracing/events/power/'
+	pmdpath = '/sys/power/pm_debug_messages'
 	traceevents = [
 		'suspend_resume',
 		'device_pm_callback_end',
@@ -109,8 +115,10 @@ class SystemValues:
 	mempath = '/dev/mem'
 	powerfile = '/sys/power/state'
 	mempowerfile = '/sys/power/mem_sleep'
+	diskpowerfile = '/sys/power/disk'
 	suspendmode = 'mem'
 	memmode = ''
+	diskmode = ''
 	hostname = 'localhost'
 	prefix = 'test'
 	teststamp = ''
@@ -137,16 +145,15 @@ class SystemValues:
 	useprocmon = False
 	notestrun = False
 	cgdump = False
+	devdump = False
 	mixedphaseheight = True
 	devprops = dict()
 	predelay = 0
 	postdelay = 0
-	procexecfmt = 'ps - (?P<ps>.*)$'
-	devpropfmt = '# Device Properties: .*'
-	tracertypefmt = '# tracer: (?P<t>.*)'
-	firmwarefmt = '# fwsuspend (?P<s>[0-9]*) fwresume (?P<r>[0-9]*)$'
+	pmdebug = ''
 	tracefuncs = {
 		'sys_sync': {},
+		'ksys_sync': {},
 		'__pm_notifier_call_chain': {},
 		'pm_prepare_console': {},
 		'pm_notifier_call_chain': {},
@@ -187,7 +194,6 @@ class SystemValues:
 	dev_tracefuncs = {
 		# general wait/delay/sleep
 		'msleep': { 'args_x86_64': {'time':'%di:s32'}, 'ub': 1 },
-		'schedule_timeout_uninterruptible': { 'args_x86_64': {'timeout':'%di:s32'}, 'ub': 1 },
 		'schedule_timeout': { 'args_x86_64': {'timeout':'%di:s32'}, 'ub': 1 },
 		'udelay': { 'func':'__const_udelay', 'args_x86_64': {'loops':'%di:s32'}, 'ub': 1 },
 		'usleep_range': { 'args_x86_64': {'min':'%di:s32', 'max':'%si:s32'}, 'ub': 1 },
@@ -199,6 +205,9 @@ class SystemValues:
 		# filesystem
 		'ext4_sync_fs': {},
 		# 80211
+		'ath10k_bmi_read_memory': { 'args_x86_64': {'length':'%cx:s32'} },
+		'ath10k_bmi_write_memory': { 'args_x86_64': {'length':'%cx:s32'} },
+		'ath10k_bmi_fast_download': { 'args_x86_64': {'length':'%cx:s32'} },
 		'iwlagn_mac_start': {},
 		'iwlagn_alloc_bcast_station': {},
 		'iwl_trans_pcie_start_hw': {},
@@ -240,7 +249,8 @@ class SystemValues:
 	kprobes = dict()
 	timeformat = '%.3f'
 	cmdline = '%s %s' % \
-			(os.path.basename(sys.argv[0]), string.join(sys.argv[1:], ' '))
+			(os.path.basename(sys.argv[0]), ' '.join(sys.argv[1:]))
+	sudouser = ''
 	def __init__(self):
 		self.archargs = 'args_'+platform.machine()
 		self.hostname = platform.node()
@@ -256,27 +266,49 @@ class SystemValues:
 		if (hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()):
 			self.ansi = True
 		self.testdir = datetime.now().strftime('suspend-%y%m%d-%H%M%S')
+		if os.getuid() == 0 and 'SUDO_USER' in os.environ and \
+			os.environ['SUDO_USER']:
+			self.sudouser = os.environ['SUDO_USER']
 	def vprint(self, msg):
 		self.logmsg += msg+'\n'
-		if(self.verbose):
-			print(msg)
+		if self.verbose or msg.startswith('WARNING:'):
+			pprint(msg)
+	def signalHandler(self, signum, frame):
+		if not self.result:
+			return
+		signame = self.signames[signum] if signum in self.signames else 'UNKNOWN'
+		msg = 'Signal %s caused a tool exit, line %d' % (signame, frame.f_lineno)
+		sysvals.outputResult({'error':msg})
+		sys.exit(3)
+	def signalHandlerInit(self):
+		capture = ['BUS', 'SYS', 'XCPU', 'XFSZ', 'PWR', 'HUP', 'INT', 'QUIT',
+			'ILL', 'ABRT', 'FPE', 'SEGV', 'TERM', 'TSTP']
+		self.signames = dict()
+		for i in capture:
+			s = 'SIG'+i
+			try:
+				signum = getattr(signal, s)
+				signal.signal(signum, self.signalHandler)
+			except:
+				continue
+			self.signames[signum] = s
 	def rootCheck(self, fatal=True):
 		if(os.access(self.powerfile, os.W_OK)):
 			return True
 		if fatal:
 			msg = 'This command requires sysfs mount and root access'
-			print('ERROR: %s\n') % msg
+			pprint('ERROR: %s\n' % msg)
 			self.outputResult({'error':msg})
-			sys.exit()
+			sys.exit(1)
 		return False
 	def rootUser(self, fatal=False):
 		if 'USER' in os.environ and os.environ['USER'] == 'root':
 			return True
 		if fatal:
 			msg = 'This command must be run as root'
-			print('ERROR: %s\n') % msg
+			pprint('ERROR: %s\n' % msg)
 			self.outputResult({'error':msg})
-			sys.exit()
+			sys.exit(1)
 		return False
 	def getExec(self, cmd):
 		dirlist = ['/sbin', '/bin', '/usr/sbin', '/usr/bin',
@@ -406,8 +438,8 @@ class SystemValues:
 				ktime = m.group('ktime')
 		fp.close()
 		self.dmesgstart = float(ktime)
-	def getdmesg(self, fwdata=[]):
-		op = self.writeDatafileHeader(sysvals.dmesgfile, fwdata)
+	def getdmesg(self, testdata):
+		op = self.writeDatafileHeader(sysvals.dmesgfile, testdata)
 		# store all new dmesg lines since initdmesg was called
 		fp = Popen('dmesg', stdout=PIPE).stdout
 		for line in fp:
@@ -535,7 +567,7 @@ class SystemValues:
 		if len(self.kprobes) < 1:
 			return
 		if output:
-			print('    kprobe functions in this kernel:')
+			pprint('    kprobe functions in this kernel:')
 		# first test each kprobe
 		rejects = []
 		# sort kprobes: trace, ub-dev, custom, dev
@@ -557,7 +589,7 @@ class SystemValues:
 				else:
 					kpl[2].append(name)
 			if output:
-				print('         %s: %s' % (name, res))
+				pprint('         %s: %s' % (name, res))
 		kplist = kpl[0] + kpl[1] + kpl[2] + kpl[3]
 		# remove all failed ones from the list
 		for name in rejects:
@@ -571,7 +603,7 @@ class SystemValues:
 		if output:
 			check = self.fgetVal('kprobe_events')
 			linesack = (len(check.split('\n')) - 1) / 2
-			print('    kprobe functions enabled: %d/%d' % (linesack, linesout))
+			pprint('    kprobe functions enabled: %d/%d' % (linesack, linesout))
 		self.fsetVal('1', 'events/kprobes/enable')
 	def testKprobe(self, kname, kprobe):
 		self.fsetVal('0', 'events/kprobes/enable')
@@ -619,6 +651,8 @@ class SystemValues:
 			self.fsetVal('0', 'events/kprobes/enable')
 			self.fsetVal('', 'kprobe_events')
 			self.fsetVal('1024', 'buffer_size_kb')
+		if self.pmdebug:
+			self.setVal(self.pmdebug, self.pmdpath)
 	def setupAllKprobes(self):
 		for name in self.tracefuncs:
 			self.defaultKprobe(name, self.tracefuncs[name])
@@ -637,10 +671,15 @@ class SystemValues:
 		return False
 	def initFtrace(self):
 		self.printSystemInfo(False)
-		print('INITIALIZING FTRACE...')
+		pprint('INITIALIZING FTRACE...')
 		# turn trace off
 		self.fsetVal('0', 'tracing_on')
 		self.cleanupFtrace()
+		# pm debug messages
+		pv = self.getVal(self.pmdpath)
+		if pv != '1':
+			self.setVal('1', self.pmdpath)
+			self.pmdebug = pv
 		# set the trace clock to global
 		self.fsetVal('global', 'trace_clock')
 		self.fsetVal('nop', 'current_tracer')
@@ -649,7 +688,8 @@ class SystemValues:
 		if self.bufsize > 0:
 			tgtsize = self.bufsize
 		elif self.usecallgraph or self.usedevsrc:
-			tgtsize = min(self.memfree, 3*1024*1024)
+			bmax = (1*1024*1024) if self.suspendmode == 'disk' else (3*1024*1024)
+			tgtsize = min(self.memfree, bmax)
 		else:
 			tgtsize = 65536
 		while not self.fsetVal('%d' % (tgtsize / cpus), 'buffer_size_kb'):
@@ -658,7 +698,7 @@ class SystemValues:
 			if tgtsize < 65536:
 				tgtsize = int(self.fgetVal('buffer_size_kb')) * cpus
 				break
-		print 'Setting trace buffers to %d kB (%d kB per cpu)' % (tgtsize, tgtsize/cpus)
+		pprint('Setting trace buffers to %d kB (%d kB per cpu)' % (tgtsize, tgtsize/cpus))
 		# initialize the callgraph trace
 		if(self.usecallgraph):
 			# set trace type
@@ -691,7 +731,7 @@ class SystemValues:
 			if self.usedevsrc:
 				for name in self.dev_tracefuncs:
 					self.defaultKprobe(name, self.dev_tracefuncs[name])
-			print('INITIALIZING KPROBES...')
+			pprint('INITIALIZING KPROBES...')
 			self.addKprobes(self.verbose)
 		if(self.usetraceevents):
 			# turn trace events on
@@ -728,19 +768,24 @@ class SystemValues:
 		if not self.ansi:
 			return str
 		return '\x1B[%d;40m%s\x1B[m' % (color, str)
-	def writeDatafileHeader(self, filename, fwdata=[]):
+	def writeDatafileHeader(self, filename, testdata):
 		fp = self.openlog(filename, 'w')
 		fp.write('%s\n%s\n# command | %s\n' % (self.teststamp, self.sysstamp, self.cmdline))
-		if(self.suspendmode == 'mem' or self.suspendmode == 'command'):
-			for fw in fwdata:
+		for test in testdata:
+			if 'fw' in test:
+				fw = test['fw']
 				if(fw):
 					fp.write('# fwsuspend %u fwresume %u\n' % (fw[0], fw[1]))
+			if 'bat' in test:
+				(a1, c1), (a2, c2) = test['bat']
+				fp.write('# battery %s %d %s %d\n' % (a1, c1, a2, c2))
+			if test['error'] or len(testdata) > 1:
+				fp.write('# enter_sleep_error %s\n' % test['error'])
 		return fp
-	def sudouser(self, dir):
-		if os.path.exists(dir) and os.getuid() == 0 and \
-			'SUDO_USER' in os.environ:
+	def sudoUserchown(self, dir):
+		if os.path.exists(dir) and self.sudouser:
 			cmd = 'chown -R {0}:{0} {1} > /dev/null 2>&1'
-			call(cmd.format(os.environ['SUDO_USER'], dir), shell=True)
+			call(cmd.format(self.sudouser, dir), shell=True)
 	def outputResult(self, testdata, num=0):
 		if not self.result:
 			return
@@ -762,7 +807,7 @@ class SystemValues:
 		if 'bugurl' in testdata:
 			fp.write('url%s: %s\n' % (n, testdata['bugurl']))
 		fp.close()
-		self.sudouser(self.result)
+		self.sudoUserchown(self.result)
 	def configFile(self, file):
 		dir = os.path.dirname(os.path.realpath(__file__))
 		if os.path.exists(file):
@@ -800,15 +845,16 @@ suspendmodename = {
 #	 Simple class which holds property values collected
 #	 for all the devices used in the timeline.
 class DevProps:
-	syspath = ''
-	altname = ''
-	async = True
-	xtraclass = ''
-	xtrainfo = ''
+	def __init__(self):
+		self.syspath = ''
+		self.altname = ''
+		self.async = True
+		self.xtraclass = ''
+		self.xtrainfo = ''
 	def out(self, dev):
 		return '%s,%s,%d;' % (dev, self.altname, self.async)
 	def debug(self, dev):
-		print '%s:\n\taltname = %s\n\t  async = %s' % (dev, self.altname, self.async)
+		pprint('%s:\n\taltname = %s\n\t  async = %s' % (dev, self.altname, self.async))
 	def altName(self, dev):
 		if not self.altname or self.altname == dev:
 			return dev
@@ -831,9 +877,6 @@ class DevProps:
 #	 A container used to create a device hierachy, with a single root node
 #	 and a tree of child nodes. Used by Data.deviceTopology()
 class DeviceNode:
-	name = ''
-	children = 0
-	depth = 0
 	def __init__(self, nodename, nodedepth):
 		self.name = nodename
 		self.children = []
@@ -861,68 +904,81 @@ class DeviceNode:
 #	}
 #
 class Data:
-	dmesg = {}  # root data structure
-	phases = [] # ordered list of phases
-	start = 0.0 # test start
-	end = 0.0   # test end
-	tSuspended = 0.0 # low-level suspend start
-	tResumed = 0.0   # low-level resume start
-	tKernSus = 0.0   # kernel level suspend start
-	tKernRes = 0.0   # kernel level resume end
-	tLow = 0.0       # time spent in low-level suspend (standby/freeze)
-	fwValid = False  # is firmware data available
-	fwSuspend = 0    # time spent in firmware suspend
-	fwResume = 0     # time spent in firmware resume
-	dmesgtext = []   # dmesg text file in memory
-	pstl = 0         # process timeline
-	testnumber = 0
-	idstr = ''
-	html_device_id = 0
-	stamp = 0
-	outfile = ''
-	devpids = []
-	kerror = False
+	phasedef = {
+		'suspend_prepare': {'order': 0, 'color': '#CCFFCC'},
+		        'suspend': {'order': 1, 'color': '#88FF88'},
+		   'suspend_late': {'order': 2, 'color': '#00AA00'},
+		  'suspend_noirq': {'order': 3, 'color': '#008888'},
+		'suspend_machine': {'order': 4, 'color': '#0000FF'},
+		 'resume_machine': {'order': 5, 'color': '#FF0000'},
+		   'resume_noirq': {'order': 6, 'color': '#FF9900'},
+		   'resume_early': {'order': 7, 'color': '#FFCC00'},
+		         'resume': {'order': 8, 'color': '#FFFF88'},
+		'resume_complete': {'order': 9, 'color': '#FFFFCC'},
+	}
+	errlist = {
+		'HWERROR' : '.*\[ *Hardware Error *\].*',
+		'FWBUG'   : '.*\[ *Firmware Bug *\].*',
+		'BUG'     : '.*BUG.*',
+		'ERROR'   : '.*ERROR.*',
+		'WARNING' : '.*WARNING.*',
+		'IRQ'     : '.*genirq: .*',
+		'TASKFAIL': '.*Freezing of tasks failed.*',
+	}
 	def __init__(self, num):
 		idchar = 'abcdefghij'
-		self.pstl = dict()
+		self.start = 0.0 # test start
+		self.end = 0.0   # test end
+		self.tSuspended = 0.0 # low-level suspend start
+		self.tResumed = 0.0   # low-level resume start
+		self.tKernSus = 0.0   # kernel level suspend start
+		self.tKernRes = 0.0   # kernel level resume end
+		self.fwValid = False  # is firmware data available
+		self.fwSuspend = 0    # time spent in firmware suspend
+		self.fwResume = 0     # time spent in firmware resume
+		self.html_device_id = 0
+		self.stamp = 0
+		self.outfile = ''
+		self.kerror = False
+		self.battery = 0
+		self.enterfail = ''
+		self.currphase = ''
+		self.pstl = dict()    # process timeline
 		self.testnumber = num
 		self.idstr = idchar[num]
-		self.dmesgtext = []
-		self.phases = []
-		self.dmesg = { # fixed list of 10 phases
-			'suspend_prepare': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#CCFFCC', 'order': 0},
-			        'suspend': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#88FF88', 'order': 1},
-			   'suspend_late': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#00AA00', 'order': 2},
-			  'suspend_noirq': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#008888', 'order': 3},
-		    'suspend_machine': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#0000FF', 'order': 4},
-			 'resume_machine': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#FF0000', 'order': 5},
-			   'resume_noirq': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#FF9900', 'order': 6},
-			   'resume_early': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#FFCC00', 'order': 7},
-			         'resume': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#FFFF88', 'order': 8},
-			'resume_complete': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#FFFFCC', 'order': 9}
-		}
-		self.phases = self.sortedPhases()
+		self.dmesgtext = []   # dmesg text file in memory
+		self.dmesg = dict()   # root data structure
+		self.errorinfo = {'suspend':[],'resume':[]}
+		self.tLow = []        # time spent in low-level suspends (standby/freeze)
+		self.devpids = []
+		self.devicegroups = 0
+	def sortedPhases(self):
+		return sorted(self.dmesg, key=lambda k:self.dmesg[k]['order'])
+	def initDevicegroups(self):
+		# called when phases are all finished being added
+		for phase in self.dmesg.keys():
+			if '*' in phase:
+				p = phase.split('*')
+				pnew = '%s%d' % (p[0], len(p))
+				self.dmesg[pnew] = self.dmesg.pop(phase)
 		self.devicegroups = []
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			self.devicegroups.append([phase])
-		self.errorinfo = {'suspend':[],'resume':[]}
+	def nextPhase(self, phase, offset):
+		order = self.dmesg[phase]['order'] + offset
+		for p in self.dmesg:
+			if self.dmesg[p]['order'] == order:
+				return p
+		return ''
+	def lastPhase(self):
+		plist = self.sortedPhases()
+		if len(plist) < 1:
+			return ''
+		return plist[-1]
 	def extractErrorInfo(self):
 		lf = sysvals.openlog(sysvals.dmesgfile, 'r')
 		i = 0
 		list = []
-		# sl = start line, et = error time, el = error line
-		type = 'ERROR'
-		sl = et = el = -1
 		for line in lf:
 			i += 1
 			m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
@@ -931,43 +987,13 @@ class Data:
 			t = float(m.group('ktime'))
 			if t < self.start or t > self.end:
 				continue
-			if t < self.tSuspended:
-				dir = 'suspend'
-			else:
-				dir = 'resume'
+			dir = 'suspend' if t < self.tSuspended else 'resume'
 			msg = m.group('msg')
-			if re.match('-*\[ *cut here *\]-*', msg):
-				type = 'WARNING'
-				sl = i
-			elif re.match('genirq: .*', msg):
-				type = 'IRQ'
-				sl = i
-			elif re.match('BUG: .*', msg) or re.match('kernel BUG .*', msg):
-				type = 'BUG'
-				sl = i
-			elif re.match('-*\[ *end trace .*\]-*', msg) or \
-				re.match('R13: .*', msg):
-				if et >= 0 and sl >= 0:
-					list.append((type, dir, et, sl, i))
-					self.kerror = True
-					sl = et = el = -1
-					type = 'ERROR'
-			elif 'Call Trace:' in msg:
-				if el >= 0 and et >= 0:
-					list.append((type, dir, et, el, el))
+			for err in self.errlist:
+				if re.match(self.errlist[err], msg):
+					list.append((err, dir, t, i, i))
 					self.kerror = True
-				et, el = t, i
-				if sl < 0 or type == 'BUG':
-					slval = i
-					if sl >= 0:
-						slval = sl
-					list.append((type, dir, et, slval, i))
-					self.kerror = True
-					sl = et = el = -1
-					type = 'ERROR'
-		if el >= 0 and et >= 0:
-			list.append((type, dir, et, el, el))
-			self.kerror = True
+					break
 		for e in list:
 			type, dir, t, idx1, idx2 = e
 			sysvals.vprint('kernel %s found in %s at %f' % (type, dir, t))
@@ -980,7 +1006,7 @@ class Data:
 	def setEnd(self, time):
 		self.end = time
 	def isTraceEventOutsideDeviceCalls(self, pid, time):
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.dmesg[phase]['list']
 			for dev in list:
 				d = list[dev]
@@ -988,16 +1014,10 @@ class Data:
 					time < d['end']):
 					return False
 		return True
-	def phaseCollision(self, phase, isbegin, line):
-		key = 'end'
-		if isbegin:
-			key = 'start'
-		if self.dmesg[phase][key] >= 0:
-			sysvals.vprint('IGNORE: %s' % line.strip())
-			return True
-		return False
 	def sourcePhase(self, start):
-		for phase in self.phases:
+		for phase in self.sortedPhases():
+			if 'machine' in phase:
+				continue
 			pend = self.dmesg[phase]['end']
 			if start <= pend:
 				return phase
@@ -1028,14 +1048,15 @@ class Data:
 		return tgtdev
 	def addDeviceFunctionCall(self, displayname, kprobename, proc, pid, start, end, cdata, rdata):
 		# try to place the call in a device
-		tgtdev = self.sourceDevice(self.phases, start, end, pid, 'device')
+		phases = self.sortedPhases()
+		tgtdev = self.sourceDevice(phases, start, end, pid, 'device')
 		# calls with device pids that occur outside device bounds are dropped
 		# TODO: include these somehow
 		if not tgtdev and pid in self.devpids:
 			return False
 		# try to place the call in a thread
 		if not tgtdev:
-			tgtdev = self.sourceDevice(self.phases, start, end, pid, 'thread')
+			tgtdev = self.sourceDevice(phases, start, end, pid, 'thread')
 		# create new thread blocks, expand as new calls are found
 		if not tgtdev:
 			if proc == '<...>':
@@ -1077,7 +1098,7 @@ class Data:
 	def overflowDevices(self):
 		# get a list of devices that extend beyond the end of this test run
 		devlist = []
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.dmesg[phase]['list']
 			for devname in list:
 				dev = list[devname]
@@ -1088,7 +1109,7 @@ class Data:
 		# merge any devices that overlap devlist
 		for dev in devlist:
 			devname = dev['name']
-			for phase in self.phases:
+			for phase in self.sortedPhases():
 				list = self.dmesg[phase]['list']
 				if devname not in list:
 					continue
@@ -1103,7 +1124,7 @@ class Data:
 				del list[devname]
 	def usurpTouchingThread(self, name, dev):
 		# the caller test has priority of this thread, give it to him
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.dmesg[phase]['list']
 			if name in list:
 				tdev = list[name]
@@ -1117,7 +1138,7 @@ class Data:
 				break
 	def stitchTouchingThreads(self, testlist):
 		# merge any threads between tests that touch
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.dmesg[phase]['list']
 			for devname in list:
 				dev = list[devname]
@@ -1127,7 +1148,7 @@ class Data:
 					data.usurpTouchingThread(devname, dev)
 	def optimizeDevSrc(self):
 		# merge any src call loops to reduce timeline size
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.dmesg[phase]['list']
 			for dev in list:
 				if 'src' not in list[dev]:
@@ -1165,7 +1186,7 @@ class Data:
 		self.tKernSus = self.trimTimeVal(self.tKernSus, t0, dT, left)
 		self.tKernRes = self.trimTimeVal(self.tKernRes, t0, dT, left)
 		self.end = self.trimTimeVal(self.end, t0, dT, left)
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			p = self.dmesg[phase]
 			p['start'] = self.trimTimeVal(p['start'], t0, dT, left)
 			p['end'] = self.trimTimeVal(p['end'], t0, dT, left)
@@ -1174,6 +1195,7 @@ class Data:
 				d = list[name]
 				d['start'] = self.trimTimeVal(d['start'], t0, dT, left)
 				d['end'] = self.trimTimeVal(d['end'], t0, dT, left)
+				d['length'] = d['end'] - d['start']
 				if('ftrace' in d):
 					cg = d['ftrace']
 					cg.start = self.trimTimeVal(cg.start, t0, dT, left)
@@ -1190,30 +1212,51 @@ class Data:
 				tm = self.trimTimeVal(tm, t0, dT, left)
 				list.append((type, tm, idx1, idx2))
 			self.errorinfo[dir] = list
-	def normalizeTime(self, tZero):
+	def trimFreezeTime(self, tZero):
 		# trim out any standby or freeze clock time
-		if(self.tSuspended != self.tResumed):
-			if(self.tResumed > tZero):
-				self.trimTime(self.tSuspended, \
-					self.tResumed-self.tSuspended, True)
-			else:
-				self.trimTime(self.tSuspended, \
-					self.tResumed-self.tSuspended, False)
+		lp = ''
+		for phase in self.sortedPhases():
+			if 'resume_machine' in phase and 'suspend_machine' in lp:
+				tS, tR = self.dmesg[lp]['end'], self.dmesg[phase]['start']
+				tL = tR - tS
+				if tL > 0:
+					left = True if tR > tZero else False
+					self.trimTime(tS, tL, left)
+					self.tLow.append('%.0f'%(tL*1000))
+			lp = phase
 	def getTimeValues(self):
-		sktime = (self.dmesg['suspend_machine']['end'] - \
-			self.tKernSus) * 1000
-		rktime = (self.dmesg['resume_complete']['end'] - \
-			self.dmesg['resume_machine']['start']) * 1000
+		sktime = (self.tSuspended - self.tKernSus) * 1000
+		rktime = (self.tKernRes - self.tResumed) * 1000
 		return (sktime, rktime)
-	def setPhase(self, phase, ktime, isbegin):
+	def setPhase(self, phase, ktime, isbegin, order=-1):
 		if(isbegin):
+			# phase start over current phase
+			if self.currphase:
+				if 'resume_machine' not in self.currphase:
+					sysvals.vprint('WARNING: phase %s failed to end' % self.currphase)
+				self.dmesg[self.currphase]['end'] = ktime
+			phases = self.dmesg.keys()
+			color = self.phasedef[phase]['color']
+			count = len(phases) if order < 0 else order
+			# create unique name for every new phase
+			while phase in phases:
+				phase += '*'
+			self.dmesg[phase] = {'list': dict(), 'start': -1.0, 'end': -1.0,
+				'row': 0, 'color': color, 'order': count}
 			self.dmesg[phase]['start'] = ktime
+			self.currphase = phase
 		else:
+			# phase end without a start
+			if phase not in self.currphase:
+				if self.currphase:
+					sysvals.vprint('WARNING: %s ended instead of %s, ftrace corruption?' % (phase, self.currphase))
+				else:
+					sysvals.vprint('WARNING: %s ended without a start, ftrace corruption?' % phase)
+					return phase
+			phase = self.currphase
 			self.dmesg[phase]['end'] = ktime
-	def dmesgSortVal(self, phase):
-		return self.dmesg[phase]['order']
-	def sortedPhases(self):
-		return sorted(self.dmesg, key=self.dmesgSortVal)
+			self.currphase = ''
+		return phase
 	def sortedDevices(self, phase):
 		list = self.dmesg[phase]['list']
 		slist = []
@@ -1232,13 +1275,13 @@ class Data:
 		for devname in phaselist:
 			dev = phaselist[devname]
 			if(dev['end'] < 0):
-				for p in self.phases:
+				for p in self.sortedPhases():
 					if self.dmesg[p]['end'] > dev['start']:
 						dev['end'] = self.dmesg[p]['end']
 						break
 				sysvals.vprint('%s (%s): callback didnt return' % (devname, phase))
 	def deviceFilter(self, devicefilter):
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.dmesg[phase]['list']
 			rmlist = []
 			for name in list:
@@ -1253,7 +1296,7 @@ class Data:
 				del list[name]
 	def fixupInitcallsThatDidntReturn(self):
 		# if any calls never returned, clip them at system resume end
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			self.fixupInitcalls(phase)
 	def phaseOverlap(self, phases):
 		rmgroups = []
@@ -1272,17 +1315,18 @@ class Data:
 		self.devicegroups.append(newgroup)
 	def newActionGlobal(self, name, start, end, pid=-1, color=''):
 		# which phase is this device callback or action in
+		phases = self.sortedPhases()
 		targetphase = 'none'
 		htmlclass = ''
 		overlap = 0.0
-		phases = []
-		for phase in self.phases:
+		myphases = []
+		for phase in phases:
 			pstart = self.dmesg[phase]['start']
 			pend = self.dmesg[phase]['end']
 			# see if the action overlaps this phase
 			o = max(0, min(end, pend) - max(start, pstart))
 			if o > 0:
-				phases.append(phase)
+				myphases.append(phase)
 			# set the target phase to the one that overlaps most
 			if o > overlap:
 				if overlap > 0 and phase == 'post_resume':
@@ -1291,19 +1335,19 @@ class Data:
 				overlap = o
 		# if no target phase was found, pin it to the edge
 		if targetphase == 'none':
-			p0start = self.dmesg[self.phases[0]]['start']
+			p0start = self.dmesg[phases[0]]['start']
 			if start <= p0start:
-				targetphase = self.phases[0]
+				targetphase = phases[0]
 			else:
-				targetphase = self.phases[-1]
+				targetphase = phases[-1]
 		if pid == -2:
 			htmlclass = ' bg'
 		elif pid == -3:
 			htmlclass = ' ps'
-		if len(phases) > 1:
+		if len(myphases) > 1:
 			htmlclass = ' bg'
-			self.phaseOverlap(phases)
-		if targetphase in self.phases:
+			self.phaseOverlap(myphases)
+		if targetphase in phases:
 			newname = self.newAction(targetphase, name, pid, '', start, end, '', htmlclass, color)
 			return (targetphase, newname)
 		return False
@@ -1335,19 +1379,43 @@ class Data:
 			if(list[child]['par'] == devname):
 				devlist.append(child)
 		return devlist
+	def maxDeviceNameSize(self, phase):
+		size = 0
+		for name in self.dmesg[phase]['list']:
+			if len(name) > size:
+				size = len(name)
+		return size
 	def printDetails(self):
 		sysvals.vprint('Timeline Details:')
 		sysvals.vprint('          test start: %f' % self.start)
 		sysvals.vprint('kernel suspend start: %f' % self.tKernSus)
-		for phase in self.phases:
-			dc = len(self.dmesg[phase]['list'])
-			sysvals.vprint('    %16s: %f - %f (%d devices)' % (phase, \
-				self.dmesg[phase]['start'], self.dmesg[phase]['end'], dc))
+		tS = tR = False
+		for phase in self.sortedPhases():
+			devlist = self.dmesg[phase]['list']
+			dc, ps, pe = len(devlist), self.dmesg[phase]['start'], self.dmesg[phase]['end']
+			if not tS and ps >= self.tSuspended:
+				sysvals.vprint('   machine suspended: %f' % self.tSuspended)
+				tS = True
+			if not tR and ps >= self.tResumed:
+				sysvals.vprint('     machine resumed: %f' % self.tResumed)
+				tR = True
+			sysvals.vprint('%20s: %f - %f (%d devices)' % (phase, ps, pe, dc))
+			if sysvals.devdump:
+				sysvals.vprint(''.join('-' for i in range(80)))
+				maxname = '%d' % self.maxDeviceNameSize(phase)
+				fmt = '%3d) %'+maxname+'s - %f - %f'
+				c = 1
+				for name in devlist:
+					s = devlist[name]['start']
+					e = devlist[name]['end']
+					sysvals.vprint(fmt % (c, name, s, e))
+					c += 1
+				sysvals.vprint(''.join('-' for i in range(80)))
 		sysvals.vprint('   kernel resume end: %f' % self.tKernRes)
 		sysvals.vprint('            test end: %f' % self.end)
 	def deviceChildrenAllPhases(self, devname):
 		devlist = []
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.deviceChildren(devname, phase)
 			for dev in list:
 				if dev not in devlist:
@@ -1368,7 +1436,7 @@ class Data:
 		if node.name:
 			info = ''
 			drv = ''
-			for phase in self.phases:
+			for phase in self.sortedPhases():
 				list = self.dmesg[phase]['list']
 				if node.name in list:
 					s = list[node.name]['start']
@@ -1502,8 +1570,29 @@ class Data:
 			c = self.addProcessUsageEvent(ps, tres)
 			if c > 0:
 				sysvals.vprint('%25s (res): %d' % (ps, c))
+	def handleEndMarker(self, time):
+		dm = self.dmesg
+		self.setEnd(time)
+		self.initDevicegroups()
+		# give suspend_prepare an end if needed
+		if 'suspend_prepare' in dm and dm['suspend_prepare']['end'] < 0:
+			dm['suspend_prepare']['end'] = time
+		# assume resume machine ends at next phase start
+		if 'resume_machine' in dm and dm['resume_machine']['end'] < 0:
+			np = self.nextPhase('resume_machine', 1)
+			if np:
+				dm['resume_machine']['end'] = dm[np]['start']
+		# if kernel resume end not found, assume its the end marker
+		if self.tKernRes == 0.0:
+			self.tKernRes = time
+		# if kernel suspend start not found, assume its the end marker
+		if self.tKernSus == 0.0:
+			self.tKernSus = time
+		# set resume complete to end at end marker
+		if 'resume_complete' in dm:
+			dm['resume_complete']['end'] = time
 	def debugPrint(self):
-		for p in self.phases:
+		for p in self.sortedPhases():
 			list = self.dmesg[p]['list']
 			for devname in list:
 				dev = list[devname]
@@ -1514,9 +1603,9 @@ class Data:
 # Description:
 #	 A container for kprobe function data we want in the dev timeline
 class DevFunction:
-	row = 0
-	count = 1
 	def __init__(self, name, args, caller, ret, start, end, u, proc, pid, color):
+		self.row = 0
+		self.count = 1
 		self.name = name
 		self.args = args
 		self.caller = caller
@@ -1570,16 +1659,15 @@ class DevFunction:
 #			 suspend_resume: phase or custom exec block data
 #			 device_pm_callback: device callback info
 class FTraceLine:
-	time = 0.0
-	length = 0.0
-	fcall = False
-	freturn = False
-	fevent = False
-	fkprobe = False
-	depth = 0
-	name = ''
-	type = ''
 	def __init__(self, t, m='', d=''):
+		self.length = 0.0
+		self.fcall = False
+		self.freturn = False
+		self.fevent = False
+		self.fkprobe = False
+		self.depth = 0
+		self.name = ''
+		self.type = ''
 		self.time = float(t)
 		if not m and not d:
 			return
@@ -1657,13 +1745,13 @@ class FTraceLine:
 		return len(str)/2
 	def debugPrint(self, info=''):
 		if self.isLeaf():
-			print(' -- %12.6f (depth=%02d): %s(); (%.3f us) %s' % (self.time, \
+			pprint(' -- %12.6f (depth=%02d): %s(); (%.3f us) %s' % (self.time, \
 				self.depth, self.name, self.length*1000000, info))
 		elif self.freturn:
-			print(' -- %12.6f (depth=%02d): %s} (%.3f us) %s' % (self.time, \
+			pprint(' -- %12.6f (depth=%02d): %s} (%.3f us) %s' % (self.time, \
 				self.depth, self.name, self.length*1000000, info))
 		else:
-			print(' -- %12.6f (depth=%02d): %s() { (%.3f us) %s' % (self.time, \
+			pprint(' -- %12.6f (depth=%02d): %s() { (%.3f us) %s' % (self.time, \
 				self.depth, self.name, self.length*1000000, info))
 	def startMarker(self):
 		# Is this the starting line of a suspend?
@@ -1699,19 +1787,13 @@ class FTraceLine:
 #	 Each instance is tied to a single device in a single phase, and is
 #	 comprised of an ordered list of FTraceLine objects
 class FTraceCallGraph:
-	id = ''
-	start = -1.0
-	end = -1.0
-	list = []
-	invalid = False
-	depth = 0
-	pid = 0
-	name = ''
-	partial = False
 	vfname = 'missing_function_name'
-	ignore = False
-	sv = 0
 	def __init__(self, pid, sv):
+		self.id = ''
+		self.invalid = False
+		self.name = ''
+		self.partial = False
+		self.ignore = False
 		self.start = -1.0
 		self.end = -1.0
 		self.list = []
@@ -1810,7 +1892,7 @@ class FTraceCallGraph:
 			if warning and ('[make leaf]', line) not in info:
 				info.append(('', line))
 		if warning:
-			print 'WARNING: ftrace data missing, corrections made:'
+			pprint('WARNING: ftrace data missing, corrections made:')
 			for i in info:
 				t, obj = i
 				if obj:
@@ -1870,10 +1952,10 @@ class FTraceCallGraph:
 		id = 'task %s' % (self.pid)
 		window = '(%f - %f)' % (self.start, line.time)
 		if(self.depth < 0):
-			print('Data misalignment for '+id+\
+			pprint('Data misalignment for '+id+\
 				' (buffer overflow), ignoring this callback')
 		else:
-			print('Too much data for '+id+\
+			pprint('Too much data for '+id+\
 				' '+window+', ignoring this callback')
 	def slice(self, dev):
 		minicg = FTraceCallGraph(dev['pid'], self.sv)
@@ -1926,7 +2008,7 @@ class FTraceCallGraph:
 			elif l.isReturn():
 				if(l.depth not in stack):
 					if self.sv.verbose:
-						print 'Post Process Error: Depth missing'
+						pprint('Post Process Error: Depth missing')
 						l.debugPrint()
 					return False
 				# calculate call length from call/return lines
@@ -1943,7 +2025,7 @@ class FTraceCallGraph:
 			return True
 		elif(cnt < 0):
 			if self.sv.verbose:
-				print 'Post Process Error: Depth is less than 0'
+				pprint('Post Process Error: Depth is less than 0')
 			return False
 		# trace ended before call tree finished
 		return self.repair(cnt)
@@ -1967,7 +2049,7 @@ class FTraceCallGraph:
 						dev['ftrace'] = cg
 					found = devname
 			return found
-		for p in data.phases:
+		for p in data.sortedPhases():
 			if(data.dmesg[p]['start'] <= self.start and
 				self.start <= data.dmesg[p]['end']):
 				list = data.dmesg[p]['list']
@@ -1990,7 +2072,7 @@ class FTraceCallGraph:
 		if fs < data.start or fe > data.end:
 			return
 		phase = ''
-		for p in data.phases:
+		for p in data.sortedPhases():
 			if(data.dmesg[p]['start'] <= self.start and
 				self.start < data.dmesg[p]['end']):
 				phase = p
@@ -2002,20 +2084,20 @@ class FTraceCallGraph:
 			phase, myname = out
 			data.dmesg[phase]['list'][myname]['ftrace'] = self
 	def debugPrint(self, info=''):
-		print('%s pid=%d [%f - %f] %.3f us') % \
+		pprint('%s pid=%d [%f - %f] %.3f us' % \
 			(self.name, self.pid, self.start, self.end,
-			(self.end - self.start)*1000000)
+			(self.end - self.start)*1000000))
 		for l in self.list:
 			if l.isLeaf():
-				print('%f (%02d): %s(); (%.3f us)%s' % (l.time, \
+				pprint('%f (%02d): %s(); (%.3f us)%s' % (l.time, \
 					l.depth, l.name, l.length*1000000, info))
 			elif l.freturn:
-				print('%f (%02d): %s} (%.3f us)%s' % (l.time, \
+				pprint('%f (%02d): %s} (%.3f us)%s' % (l.time, \
 					l.depth, l.name, l.length*1000000, info))
 			else:
-				print('%f (%02d): %s() { (%.3f us)%s' % (l.time, \
+				pprint('%f (%02d): %s() { (%.3f us)%s' % (l.time, \
 					l.depth, l.name, l.length*1000000, info))
-		print(' ')
+		pprint(' ')
 
 class DevItem:
 	def __init__(self, test, phase, dev):
@@ -2032,23 +2114,20 @@ class DevItem:
 #	 A container for a device timeline which calculates
 #	 all the html properties to display it correctly
 class Timeline:
-	html = ''
-	height = 0	# total timeline height
-	scaleH = 20	# timescale (top) row height
-	rowH = 30	# device row height
-	bodyH = 0	# body height
-	rows = 0	# total timeline rows
-	rowlines = dict()
-	rowheight = dict()
 	html_tblock = '<div id="block{0}" class="tblock" style="left:{1}%;width:{2}%;"><div class="tback" style="height:{3}px"></div>\n'
 	html_device = '<div id="{0}" title="{1}" class="thread{7}" style="left:{2}%;top:{3}px;height:{4}px;width:{5}%;{8}">{6}</div>\n'
 	html_phase = '<div class="phase" style="left:{0}%;width:{1}%;top:{2}px;height:{3}px;background:{4}">{5}</div>\n'
 	html_phaselet = '<div id="{0}" class="phaselet" style="left:{1}%;width:{2}%;background:{3}"></div>\n'
 	html_legend = '<div id="p{3}" class="square" style="left:{0}%;background:{1}">&nbsp;{2}</div>\n'
 	def __init__(self, rowheight, scaleheight):
-		self.rowH = rowheight
-		self.scaleH = scaleheight
 		self.html = ''
+		self.height = 0  # total timeline height
+		self.scaleH = scaleheight # timescale (top) row height
+		self.rowH = rowheight     # device row height
+		self.bodyH = 0   # body height
+		self.rows = 0    # total timeline rows
+		self.rowlines = dict()
+		self.rowheight = dict()
 	def createHeader(self, sv, stamp):
 		if(not stamp['time']):
 			return
@@ -2275,18 +2354,18 @@ class Timeline:
 # Description:
 #	 A list of values describing the properties of these test runs
 class TestProps:
-	stamp = ''
-	sysinfo = ''
-	cmdline = ''
-	kparams = ''
-	S0i3 = False
-	fwdata = []
 	stampfmt = '# [a-z]*-(?P<m>[0-9]{2})(?P<d>[0-9]{2})(?P<y>[0-9]{2})-'+\
 				'(?P<H>[0-9]{2})(?P<M>[0-9]{2})(?P<S>[0-9]{2})'+\
 				' (?P<host>.*) (?P<mode>.*) (?P<kernel>.*)$'
+	batteryfmt = '^# battery (?P<a1>\w*) (?P<c1>\d*) (?P<a2>\w*) (?P<c2>\d*)'
+	testerrfmt = '^# enter_sleep_error (?P<e>.*)'
 	sysinfofmt = '^# sysinfo .*'
 	cmdlinefmt = '^# command \| (?P<cmd>.*)'
 	kparamsfmt = '^# kparams \| (?P<kp>.*)'
+	devpropfmt = '# Device Properties: .*'
+	tracertypefmt = '# tracer: (?P<t>.*)'
+	firmwarefmt = '# fwsuspend (?P<s>[0-9]*) fwresume (?P<r>[0-9]*)$'
+	procexecfmt = 'ps - (?P<ps>.*)$'
 	ftrace_line_fmt_fg = \
 		'^ *(?P<time>[0-9\.]*) *\| *(?P<cpu>[0-9]*)\)'+\
 		' *(?P<proc>.*)-(?P<pid>[0-9]*) *\|'+\
@@ -2295,11 +2374,17 @@ class TestProps:
 		' *(?P<proc>.*)-(?P<pid>[0-9]*) *\[(?P<cpu>[0-9]*)\] *'+\
 		'(?P<flags>.{4}) *(?P<time>[0-9\.]*): *'+\
 		'(?P<msg>.*)'
-	ftrace_line_fmt = ftrace_line_fmt_nop
-	cgformat = False
-	data = 0
-	ktemp = dict()
 	def __init__(self):
+		self.stamp = ''
+		self.sysinfo = ''
+		self.cmdline = ''
+		self.kparams = ''
+		self.testerror = []
+		self.battery = []
+		self.fwdata = []
+		self.ftrace_line_fmt = self.ftrace_line_fmt_nop
+		self.cgformat = False
+		self.data = 0
 		self.ktemp = dict()
 	def setTracerType(self, tracer):
 		if(tracer == 'function_graph'):
@@ -2310,6 +2395,7 @@ class TestProps:
 		else:
 			doError('Invalid tracer format: [%s]' % tracer)
 	def parseStamp(self, data, sv):
+		# global test data
 		m = re.match(self.stampfmt, self.stamp)
 		data.stamp = {'time': '', 'host': '', 'mode': ''}
 		dt = datetime(int(m.group('y'))+2000, int(m.group('m')),
@@ -2331,12 +2417,14 @@ class TestProps:
 		sv.suspendmode = data.stamp['mode']
 		if sv.suspendmode == 'command' and sv.ftracefile != '':
 			modes = ['on', 'freeze', 'standby', 'mem', 'disk']
-			out = Popen(['grep', 'machine_suspend', sv.ftracefile],
-				stderr=PIPE, stdout=PIPE).stdout.read()
-			m = re.match('.* machine_suspend\[(?P<mode>.*)\]', out)
-			if m and m.group('mode') in ['1', '2', '3', '4']:
-				sv.suspendmode = modes[int(m.group('mode'))]
-				data.stamp['mode'] = sv.suspendmode
+			fp = sysvals.openlog(sv.ftracefile, 'r')
+			for line in fp:
+				m = re.match('.* machine_suspend\[(?P<mode>.*)\]', line)
+				if m and m.group('mode') in ['1', '2', '3', '4']:
+					sv.suspendmode = modes[int(m.group('mode'))]
+					data.stamp['mode'] = sv.suspendmode
+					break
+			fp.close()
 		m = re.match(self.cmdlinefmt, self.cmdline)
 		if m:
 			sv.cmdline = m.group('cmd')
@@ -2346,23 +2434,36 @@ class TestProps:
 				sv.kparams = m.group('kp')
 		if not sv.stamp:
 			sv.stamp = data.stamp
+		# firmware data
+		if sv.suspendmode == 'mem' and len(self.fwdata) > data.testnumber:
+			data.fwSuspend, data.fwResume = self.fwdata[data.testnumber]
+			if(data.fwSuspend > 0 or data.fwResume > 0):
+				data.fwValid = True
+		# battery data
+		if len(self.battery) > data.testnumber:
+			m = re.match(self.batteryfmt, self.battery[data.testnumber])
+			if m:
+				data.battery = m.groups()
+		# sleep mode enter errors
+		if len(self.testerror) > data.testnumber:
+			m = re.match(self.testerrfmt, self.testerror[data.testnumber])
+			if m:
+				data.enterfail = m.group('e')
 
 # Class: TestRun
 # Description:
 #	 A container for a suspend/resume test run. This is necessary as
 #	 there could be more than one, and they need to be separate.
 class TestRun:
-	ftemp = dict()
-	ttemp = dict()
-	data = 0
 	def __init__(self, dataobj):
 		self.data = dataobj
 		self.ftemp = dict()
 		self.ttemp = dict()
 
 class ProcessMonitor:
-	proclist = dict()
-	running = False
+	def __init__(self):
+		self.proclist = dict()
+		self.running = False
 	def procstat(self):
 		c = ['cat /proc/[1-9]*/stat 2>/dev/null']
 		process = Popen(c, shell=True, stdout=PIPE)
@@ -2413,8 +2514,8 @@ class ProcessMonitor:
 #	 markers, and/or kprobes required for primary parsing.
 def doesTraceLogHaveTraceEvents():
 	kpcheck = ['_cal: (', '_cpu_down()']
-	techeck = sysvals.traceevents[:]
-	tmcheck = ['SUSPEND START', 'RESUME COMPLETE']
+	techeck = ['suspend_resume', 'device_pm_callback']
+	tmcheck = ['tracing_mark_write']
 	sysvals.usekprobes = False
 	fp = sysvals.openlog(sysvals.ftracefile, 'r')
 	for line in fp:
@@ -2436,23 +2537,14 @@ def doesTraceLogHaveTraceEvents():
 				check.remove(i)
 		tmcheck = check
 	fp.close()
-	if len(techeck) == 0:
-		sysvals.usetraceevents = True
-	else:
-		sysvals.usetraceevents = False
-	if len(tmcheck) == 0:
-		sysvals.usetracemarkers = True
-	else:
-		sysvals.usetracemarkers = False
+	sysvals.usetraceevents = True if len(techeck) < 2 else False
+	sysvals.usetracemarkers = True if len(tmcheck) == 0 else False
 
 # Function: appendIncompleteTraceLog
 # Description:
 #	 [deprecated for kernel 3.15 or newer]
-#	 Legacy support of ftrace outputs that lack the device_pm_callback
-#	 and/or suspend_resume trace events. The primary data should be
-#	 taken from dmesg, and this ftrace is used only for callgraph data
-#	 or custom actions in the timeline. The data is appended to the Data
-#	 objects provided.
+#	 Adds callgraph data which lacks trace event data. This is only
+#	 for timelines generated from 3.15 or older
 # Arguments:
 #	 testruns: the array of Data objects obtained from parseKernelLog
 def appendIncompleteTraceLog(testruns):
@@ -2482,13 +2574,19 @@ def appendIncompleteTraceLog(testruns):
 		elif re.match(tp.cmdlinefmt, line):
 			tp.cmdline = line
 			continue
+		elif re.match(tp.batteryfmt, line):
+			tp.battery.append(line)
+			continue
+		elif re.match(tp.testerrfmt, line):
+			tp.testerror.append(line)
+			continue
 		# determine the trace data type (required for further parsing)
-		m = re.match(sysvals.tracertypefmt, line)
+		m = re.match(tp.tracertypefmt, line)
 		if(m):
 			tp.setTracerType(m.group('t'))
 			continue
 		# device properties line
-		if(re.match(sysvals.devpropfmt, line)):
+		if(re.match(tp.devpropfmt, line)):
 			devProps(line)
 			continue
 		# parse only valid lines, if this is not one move on
@@ -2528,87 +2626,7 @@ def appendIncompleteTraceLog(testruns):
 			continue
 		# trace event processing
 		if(t.fevent):
-			# general trace events have two types, begin and end
-			if(re.match('(?P<name>.*) begin$', t.name)):
-				isbegin = True
-			elif(re.match('(?P<name>.*) end$', t.name)):
-				isbegin = False
-			else:
-				continue
-			m = re.match('(?P<name>.*)\[(?P<val>[0-9]*)\] .*', t.name)
-			if(m):
-				val = m.group('val')
-				if val == '0':
-					name = m.group('name')
-				else:
-					name = m.group('name')+'['+val+']'
-			else:
-				m = re.match('(?P<name>.*) .*', t.name)
-				name = m.group('name')
-			# special processing for trace events
-			if re.match('dpm_prepare\[.*', name):
-				continue
-			elif re.match('machine_suspend.*', name):
-				continue
-			elif re.match('suspend_enter\[.*', name):
-				if(not isbegin):
-					data.dmesg['suspend_prepare']['end'] = t.time
-				continue
-			elif re.match('dpm_suspend\[.*', name):
-				if(not isbegin):
-					data.dmesg['suspend']['end'] = t.time
-				continue
-			elif re.match('dpm_suspend_late\[.*', name):
-				if(isbegin):
-					data.dmesg['suspend_late']['start'] = t.time
-				else:
-					data.dmesg['suspend_late']['end'] = t.time
-				continue
-			elif re.match('dpm_suspend_noirq\[.*', name):
-				if(isbegin):
-					data.dmesg['suspend_noirq']['start'] = t.time
-				else:
-					data.dmesg['suspend_noirq']['end'] = t.time
-				continue
-			elif re.match('dpm_resume_noirq\[.*', name):
-				if(isbegin):
-					data.dmesg['resume_machine']['end'] = t.time
-					data.dmesg['resume_noirq']['start'] = t.time
-				else:
-					data.dmesg['resume_noirq']['end'] = t.time
-				continue
-			elif re.match('dpm_resume_early\[.*', name):
-				if(isbegin):
-					data.dmesg['resume_early']['start'] = t.time
-				else:
-					data.dmesg['resume_early']['end'] = t.time
-				continue
-			elif re.match('dpm_resume\[.*', name):
-				if(isbegin):
-					data.dmesg['resume']['start'] = t.time
-				else:
-					data.dmesg['resume']['end'] = t.time
-				continue
-			elif re.match('dpm_complete\[.*', name):
-				if(isbegin):
-					data.dmesg['resume_complete']['start'] = t.time
-				else:
-					data.dmesg['resume_complete']['end'] = t.time
-				continue
-			# skip trace events inside devices calls
-			if(not data.isTraceEventOutsideDeviceCalls(pid, t.time)):
-				continue
-			# global events (outside device calls) are simply graphed
-			if(isbegin):
-				# store each trace event in ttemp
-				if(name not in testrun[testidx].ttemp):
-					testrun[testidx].ttemp[name] = []
-				testrun[testidx].ttemp[name].append(\
-					{'begin': t.time, 'end': t.time})
-			else:
-				# finish off matching trace event in ttemp
-				if(name in testrun[testidx].ttemp):
-					testrun[testidx].ttemp[name][-1]['end'] = t.time
+			continue
 		# call/return processing
 		elif sysvals.usecallgraph:
 			# create a callgraph object for the data
@@ -2625,12 +2643,6 @@ def appendIncompleteTraceLog(testruns):
 	tf.close()
 
 	for test in testrun:
-		# add the traceevent data to the device hierarchy
-		if(sysvals.usetraceevents):
-			for name in test.ttemp:
-				for event in test.ttemp[name]:
-					test.data.newActionGlobal(name, event['begin'], event['end'])
-
 		# add the callgraph data to the device hierarchy
 		for pid in test.ftemp:
 			for cg in test.ftemp[pid]:
@@ -2643,7 +2655,7 @@ def appendIncompleteTraceLog(testruns):
 					continue
 				callstart = cg.start
 				callend = cg.end
-				for p in test.data.phases:
+				for p in test.data.sortedPhases():
 					if(test.data.dmesg[p]['start'] <= callstart and
 						callstart <= test.data.dmesg[p]['end']):
 						list = test.data.dmesg[p]['list']
@@ -2670,10 +2682,13 @@ def parseTraceLog(live=False):
 		doError('%s does not exist' % sysvals.ftracefile)
 	if not live:
 		sysvals.setupAllKprobes()
+	ksuscalls = ['pm_prepare_console']
+	krescalls = ['pm_restore_console']
 	tracewatch = []
 	if sysvals.usekprobes:
 		tracewatch += ['sync_filesystems', 'freeze_processes', 'syscore_suspend',
-			'syscore_resume', 'resume_console', 'thaw_processes', 'CPU_ON', 'CPU_OFF']
+			'syscore_resume', 'resume_console', 'thaw_processes', 'CPU_ON',
+			'CPU_OFF', 'timekeeping_freeze', 'acpi_suspend']
 
 	# extract the callgraph and traceevent data
 	tp = TestProps()
@@ -2696,18 +2711,24 @@ def parseTraceLog(live=False):
 		elif re.match(tp.cmdlinefmt, line):
 			tp.cmdline = line
 			continue
+		elif re.match(tp.batteryfmt, line):
+			tp.battery.append(line)
+			continue
+		elif re.match(tp.testerrfmt, line):
+			tp.testerror.append(line)
+			continue
 		# firmware line: pull out any firmware data
-		m = re.match(sysvals.firmwarefmt, line)
+		m = re.match(tp.firmwarefmt, line)
 		if(m):
 			tp.fwdata.append((int(m.group('s')), int(m.group('r'))))
 			continue
 		# tracer type line: determine the trace data type
-		m = re.match(sysvals.tracertypefmt, line)
+		m = re.match(tp.tracertypefmt, line)
 		if(m):
 			tp.setTracerType(m.group('t'))
 			continue
 		# device properties line
-		if(re.match(sysvals.devpropfmt, line)):
+		if(re.match(tp.devpropfmt, line)):
 			devProps(line)
 			continue
 		# ignore all other commented lines
@@ -2736,20 +2757,20 @@ def parseTraceLog(live=False):
 			continue
 		# find the start of suspend
 		if(t.startMarker()):
-			phase = 'suspend_prepare'
 			data = Data(len(testdata))
 			testdata.append(data)
 			testrun = TestRun(data)
 			testruns.append(testrun)
 			tp.parseStamp(data, sysvals)
 			data.setStart(t.time)
-			data.tKernSus = t.time
+			data.first_suspend_prepare = True
+			phase = data.setPhase('suspend_prepare', t.time, True)
 			continue
 		if(not data):
 			continue
 		# process cpu exec line
 		if t.type == 'tracing_mark_write':
-			m = re.match(sysvals.procexecfmt, t.name)
+			m = re.match(tp.procexecfmt, t.name)
 			if(m):
 				proclist = dict()
 				for ps in m.group('ps').split(','):
@@ -2762,28 +2783,17 @@ def parseTraceLog(live=False):
 				continue
 		# find the end of resume
 		if(t.endMarker()):
-			data.setEnd(t.time)
-			if data.tKernRes == 0.0:
-				data.tKernRes = t.time
-			if data.dmesg['resume_complete']['end'] < 0:
-				data.dmesg['resume_complete']['end'] = t.time
-			if sysvals.suspendmode == 'mem' and len(tp.fwdata) > data.testnumber:
-				data.fwSuspend, data.fwResume = tp.fwdata[data.testnumber]
-				if(data.tSuspended != 0 and data.tResumed != 0 and \
-					(data.fwSuspend > 0 or data.fwResume > 0)):
-					data.fwValid = True
+			data.handleEndMarker(t.time)
 			if(not sysvals.usetracemarkers):
 				# no trace markers? then quit and be sure to finish recording
 				# the event we used to trigger resume end
-				if(len(testrun.ttemp['thaw_processes']) > 0):
+				if('thaw_processes' in testrun.ttemp and len(testrun.ttemp['thaw_processes']) > 0):
 					# if an entry exists, assume this is its end
 					testrun.ttemp['thaw_processes'][-1]['end'] = t.time
 				break
 			continue
 		# trace event processing
 		if(t.fevent):
-			if(phase == 'post_resume'):
-				data.setEnd(t.time)
 			if(t.type == 'suspend_resume'):
 				# suspend_resume trace events have two types, begin and end
 				if(re.match('(?P<name>.*) begin$', t.name)):
@@ -2809,85 +2819,61 @@ def parseTraceLog(live=False):
 				# start of kernel suspend
 				if(re.match('suspend_enter\[.*', t.name)):
 					if(isbegin):
-						data.dmesg[phase]['start'] = t.time
 						data.tKernSus = t.time
 					continue
 				# suspend_prepare start
 				elif(re.match('dpm_prepare\[.*', t.name)):
-					phase = 'suspend_prepare'
-					if(not isbegin):
-						data.dmesg[phase]['end'] = t.time
-						if data.dmesg[phase]['start'] < 0:
-							data.dmesg[phase]['start'] = data.start
+					if isbegin and data.first_suspend_prepare:
+						data.first_suspend_prepare = False
+						if data.tKernSus == 0:
+							data.tKernSus = t.time
+						continue
+					phase = data.setPhase('suspend_prepare', t.time, isbegin)
 					continue
 				# suspend start
 				elif(re.match('dpm_suspend\[.*', t.name)):
-					phase = 'suspend'
-					data.setPhase(phase, t.time, isbegin)
+					phase = data.setPhase('suspend', t.time, isbegin)
 					continue
 				# suspend_late start
 				elif(re.match('dpm_suspend_late\[.*', t.name)):
-					phase = 'suspend_late'
-					data.setPhase(phase, t.time, isbegin)
+					phase = data.setPhase('suspend_late', t.time, isbegin)
 					continue
 				# suspend_noirq start
 				elif(re.match('dpm_suspend_noirq\[.*', t.name)):
-					if data.phaseCollision('suspend_noirq', isbegin, line):
-						continue
-					phase = 'suspend_noirq'
-					data.setPhase(phase, t.time, isbegin)
-					if(not isbegin):
-						phase = 'suspend_machine'
-						data.dmesg[phase]['start'] = t.time
+					phase = data.setPhase('suspend_noirq', t.time, isbegin)
 					continue
 				# suspend_machine/resume_machine
 				elif(re.match('machine_suspend\[.*', t.name)):
 					if(isbegin):
-						phase = 'suspend_machine'
-						data.dmesg[phase]['end'] = t.time
-						data.tSuspended = t.time
+						lp = data.lastPhase()
+						phase = data.setPhase('suspend_machine', data.dmesg[lp]['end'], True)
+						data.setPhase(phase, t.time, False)
+						if data.tSuspended == 0:
+							data.tSuspended = t.time
 					else:
-						if(sysvals.suspendmode in ['mem', 'disk'] and not tp.S0i3):
-							data.dmesg['suspend_machine']['end'] = t.time
+						phase = data.setPhase('resume_machine', t.time, True)
+						if(sysvals.suspendmode in ['mem', 'disk']):
+							susp = phase.replace('resume', 'suspend')
+							if susp in data.dmesg:
+								data.dmesg[susp]['end'] = t.time
 							data.tSuspended = t.time
-						phase = 'resume_machine'
-						data.dmesg[phase]['start'] = t.time
 						data.tResumed = t.time
-						data.tLow = data.tResumed - data.tSuspended
-					continue
-				# acpi_suspend
-				elif(re.match('acpi_suspend\[.*', t.name)):
-					# acpi_suspend[0] S0i3
-					if(re.match('acpi_suspend\[0\] begin', t.name)):
-						if(sysvals.suspendmode == 'mem'):
-							tp.S0i3 = True
-							data.dmesg['suspend_machine']['end'] = t.time
-							data.tSuspended = t.time
 					continue
 				# resume_noirq start
 				elif(re.match('dpm_resume_noirq\[.*', t.name)):
-					if data.phaseCollision('resume_noirq', isbegin, line):
-						continue
-					phase = 'resume_noirq'
-					data.setPhase(phase, t.time, isbegin)
-					if(isbegin):
-						data.dmesg['resume_machine']['end'] = t.time
+					phase = data.setPhase('resume_noirq', t.time, isbegin)
 					continue
 				# resume_early start
 				elif(re.match('dpm_resume_early\[.*', t.name)):
-					phase = 'resume_early'
-					data.setPhase(phase, t.time, isbegin)
+					phase = data.setPhase('resume_early', t.time, isbegin)
 					continue
 				# resume start
 				elif(re.match('dpm_resume\[.*', t.name)):
-					phase = 'resume'
-					data.setPhase(phase, t.time, isbegin)
+					phase = data.setPhase('resume', t.time, isbegin)
 					continue
 				# resume complete start
 				elif(re.match('dpm_complete\[.*', t.name)):
-					phase = 'resume_complete'
-					if(isbegin):
-						data.dmesg[phase]['start'] = t.time
+					phase = data.setPhase('resume_complete', t.time, isbegin)
 					continue
 				# skip trace events inside devices calls
 				if(not data.isTraceEventOutsideDeviceCalls(pid, t.time)):
@@ -2903,13 +2889,10 @@ def parseTraceLog(live=False):
 					if(len(testrun.ttemp[name]) > 0):
 						# if an entry exists, assume this is its end
 						testrun.ttemp[name][-1]['end'] = t.time
-					elif(phase == 'post_resume'):
-						# post resume events can just have ends
-						testrun.ttemp[name].append({
-							'begin': data.dmesg[phase]['start'],
-							'end': t.time})
 			# device callback start
 			elif(t.type == 'device_pm_callback_start'):
+				if phase not in data.dmesg:
+					continue
 				m = re.match('(?P<drv>.*) (?P<d>.*), parent: *(?P<p>.*), .*',\
 					t.name);
 				if(not m):
@@ -2923,6 +2906,8 @@ def parseTraceLog(live=False):
 						data.devpids.append(pid)
 			# device callback finish
 			elif(t.type == 'device_pm_callback_end'):
+				if phase not in data.dmesg:
+					continue
 				m = re.match('(?P<drv>.*) (?P<d>.*), err.*', t.name);
 				if(not m):
 					continue
@@ -2953,6 +2938,9 @@ def parseTraceLog(live=False):
 					'cdata': kprobedata,
 					'proc': m_proc,
 				})
+				# start of kernel resume
+				if(phase == 'suspend_prepare' and kprobename in ksuscalls):
+					data.tKernSus = t.time
 			elif(t.freturn):
 				if(key not in tp.ktemp) or len(tp.ktemp[key]) < 1:
 					continue
@@ -2963,9 +2951,9 @@ def parseTraceLog(live=False):
 					e['end'] = t.time
 					e['rdata'] = kprobedata
 				# end of kernel resume
-				if(kprobename == 'pm_notifier_call_chain' or \
-					kprobename == 'pm_restore_console'):
-					data.dmesg[phase]['end'] = t.time
+				if(phase != 'suspend_prepare' and kprobename in krescalls):
+					if phase in data.dmesg:
+						data.dmesg[phase]['end'] = t.time
 					data.tKernRes = t.time
 
 		# callgraph processing
@@ -2983,10 +2971,13 @@ def parseTraceLog(live=False):
 			if(res == -1):
 				testrun.ftemp[key][-1].addLine(t)
 	tf.close()
+	if data and not data.devicegroups:
+		sysvals.vprint('WARNING: end marker is missing')
+		data.handleEndMarker(t.time)
 
 	if sysvals.suspendmode == 'command':
 		for test in testruns:
-			for p in test.data.phases:
+			for p in test.data.sortedPhases():
 				if p == 'suspend_prepare':
 					test.data.dmesg[p]['start'] = test.data.start
 					test.data.dmesg[p]['end'] = test.data.end
@@ -2995,13 +2986,20 @@ def parseTraceLog(live=False):
 					test.data.dmesg[p]['end'] = test.data.end
 			test.data.tSuspended = test.data.end
 			test.data.tResumed = test.data.end
-			test.data.tLow = 0
 			test.data.fwValid = False
 
 	# dev source and procmon events can be unreadable with mixed phase height
 	if sysvals.usedevsrc or sysvals.useprocmon:
 		sysvals.mixedphaseheight = False
 
+	# expand phase boundaries so there are no gaps
+	for data in testdata:
+		lp = data.sortedPhases()[0]
+		for p in data.sortedPhases():
+			if(p != lp and not ('machine' in p and 'machine' in lp)):
+				data.dmesg[lp]['end'] = data.dmesg[p]['start']
+			lp = p
+
 	for i in range(len(testruns)):
 		test = testruns[i]
 		data = test.data
@@ -3062,8 +3060,8 @@ def parseTraceLog(live=False):
 						sortkey = '%f%f%d' % (cg.start, cg.end, pid)
 						sortlist[sortkey] = cg
 					elif len(cg.list) > 1000000:
-						print 'WARNING: the callgraph for %s is massive (%d lines)' %\
-							(devname, len(cg.list))
+						sysvals.vprint('WARNING: the callgraph for %s is massive (%d lines)' %\
+							(devname, len(cg.list)))
 			# create blocks for orphan cg data
 			for sortkey in sorted(sortlist):
 				cg = sortlist[sortkey]
@@ -3072,25 +3070,36 @@ def parseTraceLog(live=False):
 					sysvals.vprint('Callgraph found for task %d: %.3fms, %s' % (cg.pid, (cg.end - cg.start)*1000, name))
 					cg.newActionFromFunction(data)
 	if sysvals.suspendmode == 'command':
-		return testdata
+		return (testdata, '')
 
 	# fill in any missing phases
+	error = []
 	for data in testdata:
-		lp = data.phases[0]
-		for p in data.phases:
-			if(data.dmesg[p]['start'] < 0 and data.dmesg[p]['end'] < 0):
+		tn = '' if len(testdata) == 1 else ('%d' % (data.testnumber + 1))
+		terr = ''
+		phasedef = data.phasedef
+		lp = 'suspend_prepare'
+		for p in sorted(phasedef, key=lambda k:phasedef[k]['order']):
+			if p not in data.dmesg:
+				if not terr:
+					pprint('TEST%s FAILED: %s failed in %s phase' % (tn, sysvals.suspendmode, lp))
+					terr = '%s%s failed in %s phase' % (sysvals.suspendmode, tn, lp)
+					error.append(terr)
+					if data.tSuspended == 0:
+						data.tSuspended = data.dmesg[lp]['end']
+					if data.tResumed == 0:
+						data.tResumed = data.dmesg[lp]['end']
+					data.fwValid = False
 				sysvals.vprint('WARNING: phase "%s" is missing!' % p)
-			if(data.dmesg[p]['start'] < 0):
-				data.dmesg[p]['start'] = data.dmesg[lp]['end']
-				if(p == 'resume_machine'):
-					data.tSuspended = data.dmesg[lp]['end']
-					data.tResumed = data.dmesg[lp]['end']
-					data.tLow = 0
-			if(data.dmesg[p]['end'] < 0):
-				data.dmesg[p]['end'] = data.dmesg[p]['start']
-			if(p != lp and not ('machine' in p and 'machine' in lp)):
-				data.dmesg[lp]['end'] = data.dmesg[p]['start']
 			lp = p
+		if not terr and data.enterfail:
+			pprint('test%s FAILED: enter %s failed with %s' % (tn, sysvals.suspendmode, data.enterfail))
+			terr = 'test%s failed to enter %s mode' % (tn, sysvals.suspendmode)
+			error.append(terr)
+		if data.tSuspended == 0:
+			data.tSuspended = data.tKernRes
+		if data.tResumed == 0:
+			data.tResumed = data.tSuspended
 
 		if(len(sysvals.devicefilter) > 0):
 			data.deviceFilter(sysvals.devicefilter)
@@ -3106,7 +3115,7 @@ def parseTraceLog(live=False):
 			for j in range(i + 1, tc):
 				testdata[j].mergeOverlapDevices(devlist)
 		testdata[0].stitchTouchingThreads(testdata[1:])
-	return testdata
+	return (testdata, ', '.join(error))
 
 # Function: loadKernelLog
 # Description:
@@ -3142,7 +3151,13 @@ def loadKernelLog():
 		elif re.match(tp.cmdlinefmt, line):
 			tp.cmdline = line
 			continue
-		m = re.match(sysvals.firmwarefmt, line)
+		elif re.match(tp.batteryfmt, line):
+			tp.battery.append(line)
+			continue
+		elif re.match(tp.testerrfmt, line):
+			tp.testerror.append(line)
+			continue
+		m = re.match(tp.firmwarefmt, line)
 		if(m):
 			tp.fwdata.append((int(m.group('s')), int(m.group('r'))))
 			continue
@@ -3155,10 +3170,6 @@ def loadKernelLog():
 				testruns.append(data)
 			data = Data(len(testruns))
 			tp.parseStamp(data, sysvals)
-			if len(tp.fwdata) > data.testnumber:
-				data.fwSuspend, data.fwResume = tp.fwdata[data.testnumber]
-				if(data.fwSuspend > 0 or data.fwResume > 0):
-					data.fwValid = True
 		if(not data):
 			continue
 		m = re.match('.* *(?P<k>[0-9]\.[0-9]{2}\.[0-9]-.*) .*', msg)
@@ -3173,7 +3184,7 @@ def loadKernelLog():
 	if data:
 		testruns.append(data)
 	if len(testruns) < 1:
-		doError(' dmesg log has no suspend/resume data: %s' \
+		pprint('ERROR: dmesg log has no suspend/resume data: %s' \
 			% sysvals.dmesgfile)
 
 	# fix lines with same timestamp/function with the call and return swapped
@@ -3214,30 +3225,30 @@ def parseKernelLog(data):
 
 	# dmesg phase match table
 	dm = {
-		'suspend_prepare': 'PM: Syncing filesystems.*',
-		        'suspend': 'PM: Entering [a-z]* sleep.*',
-		   'suspend_late': 'PM: suspend of devices complete after.*',
-		  'suspend_noirq': 'PM: late suspend of devices complete after.*',
-		'suspend_machine': 'PM: noirq suspend of devices complete after.*',
-		 'resume_machine': 'ACPI: Low-level resume complete.*',
-		   'resume_noirq': 'ACPI: Waking up from system sleep state.*',
-		   'resume_early': 'PM: noirq resume of devices complete after.*',
-		         'resume': 'PM: early resume of devices complete after.*',
-		'resume_complete': 'PM: resume of devices complete after.*',
-		    'post_resume': '.*Restarting tasks \.\.\..*',
+		'suspend_prepare': ['PM: Syncing filesystems.*'],
+		        'suspend': ['PM: Entering [a-z]* sleep.*', 'Suspending console.*'],
+		   'suspend_late': ['PM: suspend of devices complete after.*'],
+		  'suspend_noirq': ['PM: late suspend of devices complete after.*'],
+		'suspend_machine': ['PM: noirq suspend of devices complete after.*'],
+		 'resume_machine': ['ACPI: Low-level resume complete.*'],
+		   'resume_noirq': ['ACPI: Waking up from system sleep state.*'],
+		   'resume_early': ['PM: noirq resume of devices complete after.*'],
+		         'resume': ['PM: early resume of devices complete after.*'],
+		'resume_complete': ['PM: resume of devices complete after.*'],
+		    'post_resume': ['.*Restarting tasks \.\.\..*'],
 	}
 	if(sysvals.suspendmode == 'standby'):
-		dm['resume_machine'] = 'PM: Restoring platform NVS memory'
+		dm['resume_machine'] = ['PM: Restoring platform NVS memory']
 	elif(sysvals.suspendmode == 'disk'):
-		dm['suspend_late'] = 'PM: freeze of devices complete after.*'
-		dm['suspend_noirq'] = 'PM: late freeze of devices complete after.*'
-		dm['suspend_machine'] = 'PM: noirq freeze of devices complete after.*'
-		dm['resume_machine'] = 'PM: Restoring platform NVS memory'
-		dm['resume_early'] = 'PM: noirq restore of devices complete after.*'
-		dm['resume'] = 'PM: early restore of devices complete after.*'
-		dm['resume_complete'] = 'PM: restore of devices complete after.*'
+		dm['suspend_late'] = ['PM: freeze of devices complete after.*']
+		dm['suspend_noirq'] = ['PM: late freeze of devices complete after.*']
+		dm['suspend_machine'] = ['PM: noirq freeze of devices complete after.*']
+		dm['resume_machine'] = ['PM: Restoring platform NVS memory']
+		dm['resume_early'] = ['PM: noirq restore of devices complete after.*']
+		dm['resume'] = ['PM: early restore of devices complete after.*']
+		dm['resume_complete'] = ['PM: restore of devices complete after.*']
 	elif(sysvals.suspendmode == 'freeze'):
-		dm['resume_machine'] = 'ACPI: resume from mwait'
+		dm['resume_machine'] = ['ACPI: resume from mwait']
 
 	# action table (expected events that occur and show up in dmesg)
 	at = {
@@ -3279,81 +3290,89 @@ def parseKernelLog(data):
 		else:
 			continue
 
+		# check for a phase change line
+		phasechange = False
+		for p in dm:
+			for s in dm[p]:
+				if(re.match(s, msg)):
+					phasechange, phase = True, p
+					break
+
 		# hack for determining resume_machine end for freeze
 		if(not sysvals.usetraceevents and sysvals.suspendmode == 'freeze' \
 			and phase == 'resume_machine' and \
 			re.match('calling  (?P<f>.*)\+ @ .*, parent: .*', msg)):
-			data.dmesg['resume_machine']['end'] = ktime
-			phase = 'resume_noirq'
-			data.dmesg[phase]['start'] = ktime
-
-		# suspend start
-		if(re.match(dm['suspend_prepare'], msg)):
-			phase = 'suspend_prepare'
-			data.dmesg[phase]['start'] = ktime
-			data.setStart(ktime)
-			data.tKernSus = ktime
-		# suspend start
-		elif(re.match(dm['suspend'], msg)):
-			data.dmesg['suspend_prepare']['end'] = ktime
-			phase = 'suspend'
-			data.dmesg[phase]['start'] = ktime
-		# suspend_late start
-		elif(re.match(dm['suspend_late'], msg)):
-			data.dmesg['suspend']['end'] = ktime
-			phase = 'suspend_late'
-			data.dmesg[phase]['start'] = ktime
-		# suspend_noirq start
-		elif(re.match(dm['suspend_noirq'], msg)):
-			data.dmesg['suspend_late']['end'] = ktime
-			phase = 'suspend_noirq'
-			data.dmesg[phase]['start'] = ktime
-		# suspend_machine start
-		elif(re.match(dm['suspend_machine'], msg)):
-			data.dmesg['suspend_noirq']['end'] = ktime
-			phase = 'suspend_machine'
-			data.dmesg[phase]['start'] = ktime
-		# resume_machine start
-		elif(re.match(dm['resume_machine'], msg)):
-			if(sysvals.suspendmode in ['freeze', 'standby']):
-				data.tSuspended = prevktime
-				data.dmesg['suspend_machine']['end'] = prevktime
-			else:
-				data.tSuspended = ktime
-				data.dmesg['suspend_machine']['end'] = ktime
-			phase = 'resume_machine'
-			data.tResumed = ktime
-			data.tLow = data.tResumed - data.tSuspended
-			data.dmesg[phase]['start'] = ktime
-		# resume_noirq start
-		elif(re.match(dm['resume_noirq'], msg)):
-			data.dmesg['resume_machine']['end'] = ktime
+			data.setPhase(phase, ktime, False)
 			phase = 'resume_noirq'
-			data.dmesg[phase]['start'] = ktime
-		# resume_early start
-		elif(re.match(dm['resume_early'], msg)):
-			data.dmesg['resume_noirq']['end'] = ktime
-			phase = 'resume_early'
-			data.dmesg[phase]['start'] = ktime
-		# resume start
-		elif(re.match(dm['resume'], msg)):
-			data.dmesg['resume_early']['end'] = ktime
-			phase = 'resume'
-			data.dmesg[phase]['start'] = ktime
-		# resume complete start
-		elif(re.match(dm['resume_complete'], msg)):
-			data.dmesg['resume']['end'] = ktime
-			phase = 'resume_complete'
-			data.dmesg[phase]['start'] = ktime
-		# post resume start
-		elif(re.match(dm['post_resume'], msg)):
-			data.dmesg['resume_complete']['end'] = ktime
-			data.setEnd(ktime)
-			data.tKernRes = ktime
-			break
+			data.setPhase(phase, ktime, True)
+
+		if phasechange:
+			if phase == 'suspend_prepare':
+				data.setPhase(phase, ktime, True)
+				data.setStart(ktime)
+				data.tKernSus = ktime
+			elif phase == 'suspend':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'suspend_late':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'suspend_noirq':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'suspend_machine':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'resume_machine':
+				lp = data.lastPhase()
+				if(sysvals.suspendmode in ['freeze', 'standby']):
+					data.tSuspended = prevktime
+					if lp:
+						data.setPhase(lp, prevktime, False)
+				else:
+					data.tSuspended = ktime
+					if lp:
+						data.setPhase(lp, prevktime, False)
+				data.tResumed = ktime
+				data.setPhase(phase, ktime, True)
+			elif phase == 'resume_noirq':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'resume_early':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'resume':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'resume_complete':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'post_resume':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setEnd(ktime)
+				data.tKernRes = ktime
+				break
 
 		# -- device callbacks --
-		if(phase in data.phases):
+		if(phase in data.sortedPhases()):
 			# device init call
 			if(re.match('calling  (?P<f>.*)\+ @ .*, parent: .*', msg)):
 				sm = re.match('calling  (?P<f>.*)\+ @ '+\
@@ -3411,24 +3430,31 @@ def parseKernelLog(data):
 				actions[cpu].append({'begin': cpu_start, 'end': ktime})
 				cpu_start = ktime
 		prevktime = ktime
+	data.initDevicegroups()
 
 	# fill in any missing phases
-	lp = data.phases[0]
-	for p in data.phases:
-		if(data.dmesg[p]['start'] < 0 and data.dmesg[p]['end'] < 0):
-			print('WARNING: phase "%s" is missing, something went wrong!' % p)
-			print('    In %s, this dmesg line denotes the start of %s:' % \
-				(sysvals.suspendmode, p))
-			print('        "%s"' % dm[p])
-		if(data.dmesg[p]['start'] < 0):
-			data.dmesg[p]['start'] = data.dmesg[lp]['end']
-			if(p == 'resume_machine'):
-				data.tSuspended = data.dmesg[lp]['end']
-				data.tResumed = data.dmesg[lp]['end']
-				data.tLow = 0
-		if(data.dmesg[p]['end'] < 0):
-			data.dmesg[p]['end'] = data.dmesg[p]['start']
+	phasedef = data.phasedef
+	terr, lp = '', 'suspend_prepare'
+	for p in sorted(phasedef, key=lambda k:phasedef[k]['order']):
+		if p not in data.dmesg:
+			if not terr:
+				pprint('TEST FAILED: %s failed in %s phase' % (sysvals.suspendmode, lp))
+				terr = '%s failed in %s phase' % (sysvals.suspendmode, lp)
+				if data.tSuspended == 0:
+					data.tSuspended = data.dmesg[lp]['end']
+				if data.tResumed == 0:
+					data.tResumed = data.dmesg[lp]['end']
+			sysvals.vprint('WARNING: phase "%s" is missing!' % p)
+		lp = p
+	lp = data.sortedPhases()[0]
+	for p in data.sortedPhases():
+		if(p != lp and not ('machine' in p and 'machine' in lp)):
+			data.dmesg[lp]['end'] = data.dmesg[p]['start']
 		lp = p
+	if data.tSuspended == 0:
+		data.tSuspended = data.tKernRes
+	if data.tResumed == 0:
+		data.tResumed = data.tSuspended
 
 	# fill in any actions we've found
 	for name in actions:
@@ -3477,7 +3503,7 @@ def addCallgraphs(sv, hf, data):
 	hf.write('<section id="callgraphs" class="callgraph">\n')
 	# write out the ftrace data converted to html
 	num = 0
-	for p in data.phases:
+	for p in data.sortedPhases():
 		if sv.cgphase and p != sv.cgphase:
 			continue
 		list = data.dmesg[p]['list']
@@ -3510,7 +3536,7 @@ def addCallgraphs(sv, hf, data):
 #	 Create summary html file for a series of tests
 # Arguments:
 #	 testruns: array of Data objects from parseTraceLog
-def createHTMLSummarySimple(testruns, htmlfile, folder):
+def createHTMLSummarySimple(testruns, htmlfile, title):
 	# write the html header first (html head, css code, up to body start)
 	html = '<!DOCTYPE html>\n<html>\n<head>\n\
 	<meta http-equiv="content-type" content="text/html; charset=UTF-8">\n\
@@ -3520,69 +3546,152 @@ def createHTMLSummarySimple(testruns, htmlfile, folder):
 		table {width:100%;border-collapse: collapse;}\n\
 		.summary {border:1px solid;}\n\
 		th {border: 1px solid black;background:#222;color:white;}\n\
-		td {font: 16px "Times New Roman";text-align: center;}\n\
-		tr.alt td {background:#ddd;}\n\
-		tr.avg td {background:#aaa;}\n\
+		td {font: 14px "Times New Roman";text-align: center;}\n\
+		tr.head td {border: 1px solid black;background:#aaa;}\n\
+		tr.alt {background-color:#ddd;}\n\
+		tr.notice {color:red;}\n\
+		.minval {background-color:#BBFFBB;}\n\
+		.medval {background-color:#BBBBFF;}\n\
+		.maxval {background-color:#FFBBBB;}\n\
+		.head a {color:#000;text-decoration: none;}\n\
 	</style>\n</head>\n<body>\n'
 
+	# extract the test data into list
+	list = dict()
+	tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [[], []]
+	iMin, iMed, iMax = [0, 0], [0, 0], [0, 0]
+	num = 0
+	lastmode = ''
+	cnt = dict()
+	for data in sorted(testruns, key=lambda v:(v['mode'], v['host'], v['kernel'], v['time'])):
+		mode = data['mode']
+		if mode not in list:
+			list[mode] = {'data': [], 'avg': [0,0], 'min': [0,0], 'max': [0,0], 'med': [0,0]}
+		if lastmode and lastmode != mode and num > 0:
+			for i in range(2):
+				s = sorted(tMed[i])
+				list[lastmode]['med'][i] = s[int(len(s)/2)]
+				iMed[i] = tMed[i].index(list[lastmode]['med'][i])
+			list[lastmode]['avg'] = [tAvg[0] / num, tAvg[1] / num]
+			list[lastmode]['min'] = tMin
+			list[lastmode]['max'] = tMax
+			list[lastmode]['idx'] = (iMin, iMed, iMax)
+			tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [[], []]
+			iMin, iMed, iMax = [0, 0], [0, 0], [0, 0]
+			num = 0
+		tVal = [float(data['suspend']), float(data['resume'])]
+		list[mode]['data'].append([data['host'], data['kernel'],
+			data['time'], tVal[0], tVal[1], data['url'], data['result'],
+			data['issues'], data['sus_worst'], data['sus_worsttime'],
+			data['res_worst'], data['res_worsttime']])
+		idx = len(list[mode]['data']) - 1
+		if data['result'] not in cnt:
+			cnt[data['result']] = 1
+		else:
+			cnt[data['result']] += 1
+		if data['result'] == 'pass':
+			for i in range(2):
+				tMed[i].append(tVal[i])
+				tAvg[i] += tVal[i]
+				if tMin[i] == 0 or tVal[i] < tMin[i]:
+					iMin[i] = idx
+					tMin[i] = tVal[i]
+				if tMax[i] == 0 or tVal[i] > tMax[i]:
+					iMax[i] = idx
+					tMax[i] = tVal[i]
+			num += 1
+		lastmode = mode
+	if lastmode and num > 0:
+		for i in range(2):
+			s = sorted(tMed[i])
+			list[lastmode]['med'][i] = s[int(len(s)/2)]
+			iMed[i] = tMed[i].index(list[lastmode]['med'][i])
+		list[lastmode]['avg'] = [tAvg[0] / num, tAvg[1] / num]
+		list[lastmode]['min'] = tMin
+		list[lastmode]['max'] = tMax
+		list[lastmode]['idx'] = (iMin, iMed, iMax)
+
 	# group test header
-	html += '<div class="stamp">%s (%d tests)</div>\n' % (folder, len(testruns))
+	desc = []
+	for ilk in sorted(cnt, reverse=True):
+		if cnt[ilk] > 0:
+			desc.append('%d %s' % (cnt[ilk], ilk))
+	html += '<div class="stamp">%s (%d tests: %s)</div>\n' % (title, len(testruns), ', '.join(desc))
 	th = '\t<th>{0}</th>\n'
 	td = '\t<td>{0}</td>\n'
+	tdh = '\t<td{1}>{0}</td>\n'
 	tdlink = '\t<td><a href="{0}">html</a></td>\n'
 
 	# table header
 	html += '<table class="summary">\n<tr>\n' + th.format('#') +\
 		th.format('Mode') + th.format('Host') + th.format('Kernel') +\
-		th.format('Test Time') + th.format('Suspend') + th.format('Resume') +\
+		th.format('Test Time') + th.format('Result') + th.format('Issues') +\
+		th.format('Suspend') + th.format('Resume') +\
+		th.format('Worst Suspend Device') + th.format('SD Time') +\
+		th.format('Worst Resume Device') + th.format('RD Time') +\
 		th.format('Detail') + '</tr>\n'
 
-	# test data, 1 row per test
-	avg = '<tr class="avg"><td></td><td></td><td></td><td></td>'+\
-		'<td>Average of {0} {1} tests</td><td>{2}</td><td>{3}</td><td></td></tr>\n'
-	sTimeAvg = rTimeAvg = 0.0
-	mode = ''
-	num = 0
-	for data in sorted(testruns, key=lambda v:(v['mode'], v['host'], v['kernel'], v['time'])):
-		if mode != data['mode']:
-			# test average line
-			if(num > 0):
-				sTimeAvg /= (num - 1)
-				rTimeAvg /= (num - 1)
-				html += avg.format('%d' % (num - 1), mode,
-					'%3.3f ms' % sTimeAvg, '%3.3f ms' % rTimeAvg)
-			sTimeAvg = rTimeAvg = 0.0
-			mode = data['mode']
-			num = 1
-		# alternate row color
-		if num % 2 == 1:
-			html += '<tr class="alt">\n'
+	# export list into html
+	head = '<tr class="head"><td>{0}</td><td>{1}</td>'+\
+		'<td colspan=12 class="sus">Suspend Avg={2} '+\
+		'<span class=minval><a href="#s{10}min">Min={3}</a></span> '+\
+		'<span class=medval><a href="#s{10}med">Med={4}</a></span> '+\
+		'<span class=maxval><a href="#s{10}max">Max={5}</a></span> '+\
+		'Resume Avg={6} '+\
+		'<span class=minval><a href="#r{10}min">Min={7}</a></span> '+\
+		'<span class=medval><a href="#r{10}med">Med={8}</a></span> '+\
+		'<span class=maxval><a href="#r{10}max">Max={9}</a></span></td>'+\
+		'</tr>\n'
+	headnone = '<tr class="head"><td>{0}</td><td>{1}</td><td colspan=12></td></tr>\n'
+	for mode in list:
+		# header line for each suspend mode
+		num = 0
+		tAvg, tMin, tMax, tMed = list[mode]['avg'], list[mode]['min'],\
+			list[mode]['max'], list[mode]['med']
+		count = len(list[mode]['data'])
+		if 'idx' in list[mode]:
+			iMin, iMed, iMax = list[mode]['idx']
+			html += head.format('%d' % count, mode.upper(),
+				'%.3f' % tAvg[0], '%.3f' % tMin[0], '%.3f' % tMed[0], '%.3f' % tMax[0],
+				'%.3f' % tAvg[1], '%.3f' % tMin[1], '%.3f' % tMed[1], '%.3f' % tMax[1],
+				mode.lower()
+			)
 		else:
-			html += '<tr>\n'
-		html += td.format("%d" % num)
-		num += 1
-		# basic info
-		for item in ['mode', 'host', 'kernel', 'time']:
-			val = "unknown"
-			if(item in data):
-				val = data[item]
-			html += td.format(val)
-		# suspend time
-		sTime = float(data['suspend'])
-		sTimeAvg += sTime
-		html += td.format('%.3f ms' % sTime)
-		# resume time
-		rTime = float(data['resume'])
-		rTimeAvg += rTime
-		html += td.format('%.3f ms' % rTime)
-		# link to the output html
-		html += tdlink.format(data['url']) + '</tr>\n'
-	# last test average line
-	if(num > 0):
-		sTimeAvg /= (num - 1)
-		rTimeAvg /= (num - 1)
-		html += avg.format('%d' % (num - 1), mode,
-			'%3.3f ms' % sTimeAvg, '%3.3f ms' % rTimeAvg)
+			iMin = iMed = iMax = [-1, -1, -1]
+			html += headnone.format('%d' % count, mode.upper())
+		for d in list[mode]['data']:
+			# row classes - alternate row color
+			rcls = ['alt'] if num % 2 == 1 else []
+			if d[6] != 'pass':
+				rcls.append('notice')
+			html += '<tr class="'+(' '.join(rcls))+'">\n' if len(rcls) > 0 else '<tr>\n'
+			# figure out if the line has sus or res highlighted
+			idx = list[mode]['data'].index(d)
+			tHigh = ['', '']
+			for i in range(2):
+				tag = 's%s' % mode if i == 0 else 'r%s' % mode
+				if idx == iMin[i]:
+					tHigh[i] = ' id="%smin" class=minval title="Minimum"' % tag
+				elif idx == iMax[i]:
+					tHigh[i] = ' id="%smax" class=maxval title="Maximum"' % tag
+				elif idx == iMed[i]:
+					tHigh[i] = ' id="%smed" class=medval title="Median"' % tag
+			html += td.format("%d" % (list[mode]['data'].index(d) + 1)) # row
+			html += td.format(mode)										# mode
+			html += td.format(d[0])										# host
+			html += td.format(d[1])										# kernel
+			html += td.format(d[2])										# time
+			html += td.format(d[6])										# result
+			html += td.format(d[7])										# issues
+			html += tdh.format('%.3f ms' % d[3], tHigh[0]) if d[3] else td.format('')	# suspend
+			html += tdh.format('%.3f ms' % d[4], tHigh[1]) if d[4] else td.format('')	# resume
+			html += td.format(d[8])										# sus_worst
+			html += td.format('%.3f ms' % d[9])	if d[9] else td.format('')		# sus_worst time
+			html += td.format(d[10])									# res_worst
+			html += td.format('%.3f ms' % d[11]) if d[11] else td.format('')	# res_worst time
+			html += tdlink.format(d[5]) if d[5] else td.format('')		# url
+			html += '</tr>\n'
+			num += 1
 
 	# flush the data to file
 	hf = open(htmlfile, 'w')
@@ -3607,16 +3716,17 @@ def ordinal(value):
 #	 testruns: array of Data objects from parseKernelLog or parseTraceLog
 # Output:
 #	 True if the html file was created, false if it failed
-def createHTML(testruns):
+def createHTML(testruns, testfail):
 	if len(testruns) < 1:
-		print('ERROR: Not enough test data to build a timeline')
+		pprint('ERROR: Not enough test data to build a timeline')
 		return
 
 	kerror = False
 	for data in testruns:
 		if data.kerror:
 			kerror = True
-		data.normalizeTime(testruns[-1].tSuspended)
+		if(sysvals.suspendmode in ['freeze', 'standby']):
+			data.trimFreezeTime(testruns[-1].tSuspended)
 
 	# html function templates
 	html_error = '<div id="{1}" title="kernel error/warning" class="err" style="right:{0}%">{2}&rarr;</div>\n'
@@ -3641,6 +3751,7 @@ def createHTML(testruns):
 		'<td class="purple">{4}Firmware Resume: {2} ms</td>'\
 		'<td class="yellow" title="time from firmware mode to return from kernel enter_state({5}) [kernel time only]">{4}Kernel Resume: {3} ms</td>'\
 		'</tr>\n</table>\n'
+	html_fail = '<table class="testfail"><tr><td>{0}</td></tr></table>\n'
 
 	# html format variables
 	scaleH = 20
@@ -3659,8 +3770,8 @@ def createHTML(testruns):
 		sktime, rktime = data.getTimeValues()
 		if(tTotal == 0):
 			doError('No timeline data')
-		if(data.tLow > 0):
-			low_time = '%.0f'%(data.tLow*1000)
+		if(len(data.tLow) > 0):
+			low_time = '|'.join(data.tLow)
 		if sysvals.suspendmode == 'command':
 			run_time = '%.0f'%((data.end-data.start)*1000)
 			if sysvals.testcommand:
@@ -3681,7 +3792,7 @@ def createHTML(testruns):
 			if(len(testruns) > 1):
 				testdesc1 = testdesc2 = ordinal(data.testnumber+1)
 				testdesc2 += ' '
-			if(data.tLow == 0):
+			if(len(data.tLow) == 0):
 				thtml = html_timetotal.format(suspend_time, \
 					resume_time, testdesc1, stitle, rtitle)
 			else:
@@ -3700,7 +3811,7 @@ def createHTML(testruns):
 			rtitle = 'time from firmware mode to return from kernel enter_state(%s) [kernel time only]' % sysvals.suspendmode
 			if(len(testruns) > 1):
 				testdesc = ordinal(data.testnumber+1)+' '+testdesc
-			if(data.tLow == 0):
+			if(len(data.tLow) == 0):
 				thtml = html_timetotal.format(suspend_time, \
 					resume_time, testdesc, stitle, rtitle)
 			else:
@@ -3708,6 +3819,9 @@ def createHTML(testruns):
 					resume_time, testdesc, stitle, rtitle)
 			devtl.html += thtml
 
+	if testfail:
+		devtl.html += html_fail.format(testfail)
+
 	# time scale for potentially multiple datasets
 	t0 = testruns[0].start
 	tMax = testruns[-1].end
@@ -3755,15 +3869,14 @@ def createHTML(testruns):
 
 	# draw the full timeline
 	devtl.createZoomBox(sysvals.suspendmode, len(testruns))
-	phases = {'suspend':[],'resume':[]}
-	for phase in data.dmesg:
-		if 'resume' in phase:
-			phases['resume'].append(phase)
-		else:
-			phases['suspend'].append(phase)
-
-	# draw each test run chronologically
 	for data in testruns:
+		# draw each test run and block chronologically
+		phases = {'suspend':[],'resume':[]}
+		for phase in data.sortedPhases():
+			if data.dmesg[phase]['start'] >= data.tSuspended:
+				phases['resume'].append(phase)
+			else:
+				phases['suspend'].append(phase)
 		# now draw the actual timeline blocks
 		for dir in phases:
 			# draw suspend and resume blocks separately
@@ -3785,7 +3898,7 @@ def createHTML(testruns):
 				continue
 			width = '%f' % (((mTotal*100.0)-sysvals.srgap/2)/tTotal)
 			devtl.html += devtl.html_tblock.format(bname, left, width, devtl.scaleH)
-			for b in sorted(phases[dir]):
+			for b in phases[dir]:
 				# draw the phase color background
 				phase = data.dmesg[b]
 				length = phase['end']-phase['start']
@@ -3800,7 +3913,7 @@ def createHTML(testruns):
 				id = '%d_%d' % (idx1, idx2)
 				right = '%f' % (((mMax-t)*100.0)/mTotal)
 				devtl.html += html_error.format(right, id, type)
-			for b in sorted(phases[dir]):
+			for b in phases[dir]:
 				# draw the devices for this phase
 				phaselist = data.dmesg[b]['list']
 				for d in data.tdevlist[b]:
@@ -3877,19 +3990,17 @@ def createHTML(testruns):
 
 	# draw a legend which describes the phases by color
 	if sysvals.suspendmode != 'command':
-		data = testruns[-1]
+		phasedef = testruns[-1].phasedef
 		devtl.html += '<div class="legend">\n'
-		pdelta = 100.0/len(data.phases)
+		pdelta = 100.0/len(phasedef.keys())
 		pmargin = pdelta / 4.0
-		for phase in data.phases:
-			tmp = phase.split('_')
-			id = tmp[0][0]
-			if(len(tmp) > 1):
-				id += tmp[1][0]
-			order = '%.2f' % ((data.dmesg[phase]['order'] * pdelta) + pmargin)
+		for phase in sorted(phasedef, key=lambda k:phasedef[k]['order']):
+			id, p = '', phasedef[phase]
+			for word in phase.split('_'):
+				id += word[0]
+			order = '%.2f' % ((p['order'] * pdelta) + pmargin)
 			name = string.replace(phase, '_', ' &nbsp;')
-			devtl.html += devtl.html_legend.format(order, \
-				data.dmesg[phase]['color'], name, id)
+			devtl.html += devtl.html_legend.format(order, p['color'], name, id)
 		devtl.html += '</div>\n'
 
 	hf = open(sysvals.htmlfile, 'w')
@@ -3905,7 +4016,7 @@ def createHTML(testruns):
 		pscolor = 'linear-gradient(to top left, #ccc, #eee)'
 		hf.write(devtl.html_phaselet.format('pre_suspend_process', \
 			'0', '0', pscolor))
-		for b in data.phases:
+		for b in data.sortedPhases():
 			phase = data.dmesg[b]
 			length = phase['end']-phase['start']
 			left = '%.3f' % (((phase['start']-t0)*100.0)/tTotal)
@@ -4006,6 +4117,7 @@ def addCSS(hf, sv, testcount=1, kerror=False, extra=''):
 		.blue {background:rgba(169,208,245,0.4);}\n\
 		.time1 {font:22px Arial;border:1px solid;}\n\
 		.time2 {font:15px Arial;border-bottom:1px solid;border-left:1px solid;border-right:1px solid;}\n\
+		.testfail {font:bold 22px Arial;color:red;border:1px dashed;}\n\
 		td {text-align:center;}\n\
 		r {color:#500000;font:15px Tahoma;}\n\
 		n {color:#505050;font:15px Tahoma;}\n\
@@ -4456,18 +4568,18 @@ def setRuntimeSuspend(before=True):
 			sv.rstgt, sv.rsval, sv.rsdir = 'on', 'auto', 'enabled'
 		else:
 			sv.rstgt, sv.rsval, sv.rsdir = 'auto', 'on', 'disabled'
-		print('CONFIGURING RUNTIME SUSPEND...')
+		pprint('CONFIGURING RUNTIME SUSPEND...')
 		sv.rslist = deviceInfo(sv.rstgt)
 		for i in sv.rslist:
 			sv.setVal(sv.rsval, i)
-		print('runtime suspend %s on all devices (%d changed)' % (sv.rsdir, len(sv.rslist)))
-		print('waiting 5 seconds...')
+		pprint('runtime suspend %s on all devices (%d changed)' % (sv.rsdir, len(sv.rslist)))
+		pprint('waiting 5 seconds...')
 		time.sleep(5)
 	else:
 		# runtime suspend re-enable or re-disable
 		for i in sv.rslist:
 			sv.setVal(sv.rstgt, i)
-		print('runtime suspend settings restored on %d devices' % len(sv.rslist))
+		pprint('runtime suspend settings restored on %d devices' % len(sv.rslist))
 
 # Function: executeSuspend
 # Description:
@@ -4476,25 +4588,21 @@ def setRuntimeSuspend(before=True):
 def executeSuspend():
 	pm = ProcessMonitor()
 	tp = sysvals.tpath
-	fwdata = []
+	testdata = []
+	battery = True if getBattery() else False
 	# run these commands to prepare the system for suspend
 	if sysvals.display:
-		if sysvals.display > 0:
-			print('TURN DISPLAY ON')
-			call('xset -d :0.0 dpms force suspend', shell=True)
-			call('xset -d :0.0 dpms force on', shell=True)
-		else:
-			print('TURN DISPLAY OFF')
-			call('xset -d :0.0 dpms force suspend', shell=True)
+		pprint('SET DISPLAY TO %s' % sysvals.display.upper())
+		displayControl(sysvals.display)
 		time.sleep(1)
 	if sysvals.sync:
-		print('SYNCING FILESYSTEMS')
+		pprint('SYNCING FILESYSTEMS')
 		call('sync', shell=True)
 	# mark the start point in the kernel ring buffer just as we start
 	sysvals.initdmesg()
 	# start ftrace
 	if(sysvals.usecallgraph or sysvals.usetraceevents):
-		print('START TRACING')
+		pprint('START TRACING')
 		sysvals.fsetVal('1', 'tracing_on')
 		if sysvals.useprocmon:
 			pm.start()
@@ -4507,15 +4615,16 @@ def executeSuspend():
 			sysvals.fsetVal('WAIT END', 'trace_marker')
 		# start message
 		if sysvals.testcommand != '':
-			print('COMMAND START')
+			pprint('COMMAND START')
 		else:
 			if(sysvals.rtcwake):
-				print('SUSPEND START')
+				pprint('SUSPEND START')
 			else:
-				print('SUSPEND START (press a key to resume)')
+				pprint('SUSPEND START (press a key to resume)')
+		bat1 = getBattery() if battery else False
 		# set rtcwake
 		if(sysvals.rtcwake):
-			print('will issue an rtcwake in %d seconds' % sysvals.rtcwaketime)
+			pprint('will issue an rtcwake in %d seconds' % sysvals.rtcwaketime)
 			sysvals.rtcWakeAlarmOn()
 		# start of suspend trace marker
 		if(sysvals.usecallgraph or sysvals.usetraceevents):
@@ -4526,8 +4635,11 @@ def executeSuspend():
 			time.sleep(sysvals.predelay/1000.0)
 			sysvals.fsetVal('WAIT END', 'trace_marker')
 		# initiate suspend or command
+		tdata = {'error': ''}
 		if sysvals.testcommand != '':
-			call(sysvals.testcommand+' 2>&1', shell=True);
+			res = call(sysvals.testcommand+' 2>&1', shell=True);
+			if res != 0:
+				tdata['error'] = 'cmd returned %d' % res
 		else:
 			mode = sysvals.suspendmode
 			if sysvals.memmode and os.path.exists(sysvals.mempowerfile):
@@ -4535,13 +4647,18 @@ def executeSuspend():
 				pf = open(sysvals.mempowerfile, 'w')
 				pf.write(sysvals.memmode)
 				pf.close()
+			if sysvals.diskmode and os.path.exists(sysvals.diskpowerfile):
+				mode = 'disk'
+				pf = open(sysvals.diskpowerfile, 'w')
+				pf.write(sysvals.diskmode)
+				pf.close()
 			pf = open(sysvals.powerfile, 'w')
 			pf.write(mode)
 			# execution will pause here
 			try:
 				pf.close()
-			except:
-				pass
+			except Exception as e:
+				tdata['error'] = str(e)
 		if(sysvals.rtcwake):
 			sysvals.rtcWakeAlarmOff()
 		# postdelay delay
@@ -4550,27 +4667,33 @@ def executeSuspend():
 			time.sleep(sysvals.postdelay/1000.0)
 			sysvals.fsetVal('WAIT END', 'trace_marker')
 		# return from suspend
-		print('RESUME COMPLETE')
+		pprint('RESUME COMPLETE')
 		if(sysvals.usecallgraph or sysvals.usetraceevents):
 			sysvals.fsetVal('RESUME COMPLETE', 'trace_marker')
 		if(sysvals.suspendmode == 'mem' or sysvals.suspendmode == 'command'):
-			fwdata.append(getFPDT(False))
+			tdata['fw'] = getFPDT(False)
+		bat2 = getBattery() if battery else False
+		if battery and bat1 and bat2:
+			tdata['bat'] = (bat1, bat2)
+		testdata.append(tdata)
 	# stop ftrace
 	if(sysvals.usecallgraph or sysvals.usetraceevents):
 		if sysvals.useprocmon:
 			pm.stop()
 		sysvals.fsetVal('0', 'tracing_on')
-		print('CAPTURING TRACE')
-		op = sysvals.writeDatafileHeader(sysvals.ftracefile, fwdata)
+	# grab a copy of the dmesg output
+	pprint('CAPTURING DMESG')
+	sysvals.getdmesg(testdata)
+	# grab a copy of the ftrace output
+	if(sysvals.usecallgraph or sysvals.usetraceevents):
+		pprint('CAPTURING TRACE')
+		op = sysvals.writeDatafileHeader(sysvals.ftracefile, testdata)
 		fp = open(tp+'trace', 'r')
 		for line in fp:
 			op.write(line)
 		op.close()
 		sysvals.fsetVal('', 'trace')
 		devProps()
-	# grab a copy of the dmesg output
-	print('CAPTURING DMESG')
-	sysvals.getdmesg(fwdata)
 
 def readFile(file):
 	if os.path.islink(file):
@@ -4607,15 +4730,15 @@ def yesno(val):
 #	 a list of USB device names to sysvals for better timeline readability
 def deviceInfo(output=''):
 	if not output:
-		print('LEGEND')
-		print('---------------------------------------------------------------------------------------------')
-		print('  A = async/sync PM queue (A/S)               C = runtime active children')
-		print('  R = runtime suspend enabled/disabled (E/D)  rACTIVE = runtime active (min/sec)')
-		print('  S = runtime status active/suspended (A/S)   rSUSPEND = runtime suspend (min/sec)')
-		print('  U = runtime usage count')
-		print('---------------------------------------------------------------------------------------------')
-		print('DEVICE                     NAME                       A R S U C    rACTIVE   rSUSPEND')
-		print('---------------------------------------------------------------------------------------------')
+		pprint('LEGEND\n'\
+		'---------------------------------------------------------------------------------------------\n'\
+		'  A = async/sync PM queue (A/S)               C = runtime active children\n'\
+		'  R = runtime suspend enabled/disabled (E/D)  rACTIVE = runtime active (min/sec)\n'\
+		'  S = runtime status active/suspended (A/S)   rSUSPEND = runtime suspend (min/sec)\n'\
+		'  U = runtime usage count\n'\
+		'---------------------------------------------------------------------------------------------\n'\
+		'DEVICE                     NAME                       A R S U C    rACTIVE   rSUSPEND\n'\
+		'---------------------------------------------------------------------------------------------')
 
 	res = []
 	tgtval = 'runtime_status'
@@ -4700,7 +4823,7 @@ def devProps(data=0):
 			alreadystamped = True
 			continue
 		# determine the trace data type (required for further parsing)
-		m = re.match(sysvals.tracertypefmt, line)
+		m = re.match(tp.tracertypefmt, line)
 		if(m):
 			tp.setTracerType(m.group('t'))
 			continue
@@ -4804,6 +4927,11 @@ def getModes():
 		fp.close()
 		if 'mem' in modes and not deep:
 			modes.remove('mem')
+	if('disk' in modes and os.path.exists(sysvals.diskpowerfile)):
+		fp = open(sysvals.diskpowerfile, 'r')
+		for m in string.split(fp.read()):
+			modes.append('disk-%s' % m.strip('[]'))
+		fp.close()
 	return modes
 
 # Function: dmidecode
@@ -4927,6 +5055,58 @@ def dmidecode(mempath, fatal=False):
 		count += 1
 	return out
 
+def getBattery():
+	p, charge, bat = '/sys/class/power_supply', 0, {}
+	if not os.path.exists(p):
+		return False
+	for d in os.listdir(p):
+		type = sysvals.getVal(os.path.join(p, d, 'type')).strip().lower()
+		if type != 'battery':
+			continue
+		for v in ['status', 'energy_now', 'capacity_now']:
+			bat[v] = sysvals.getVal(os.path.join(p, d, v)).strip().lower()
+		break
+	if 'status' not in bat:
+		return False
+	ac = False if 'discharging' in bat['status'] else True
+	for v in ['energy_now', 'capacity_now']:
+		if v in bat and bat[v]:
+			charge = int(bat[v])
+	return (ac, charge)
+
+def displayControl(cmd):
+	xset, ret = 'xset -d :0.0 {0}', 0
+	if sysvals.sudouser:
+		xset = 'sudo -u %s %s' % (sysvals.sudouser, xset)
+	if cmd == 'init':
+		ret = call(xset.format('dpms 0 0 0'), shell=True)
+		if not ret:
+			ret = call(xset.format('s off'), shell=True)
+	elif cmd == 'reset':
+		ret = call(xset.format('s reset'), shell=True)
+	elif cmd in ['on', 'off', 'standby', 'suspend']:
+		b4 = displayControl('stat')
+		ret = call(xset.format('dpms force %s' % cmd), shell=True)
+		if not ret:
+			curr = displayControl('stat')
+			sysvals.vprint('Display Switched: %s -> %s' % (b4, curr))
+			if curr != cmd:
+				sysvals.vprint('WARNING: Display failed to change to %s' % cmd)
+		if ret:
+			sysvals.vprint('WARNING: Display failed to change to %s with xset' % cmd)
+			return ret
+	elif cmd == 'stat':
+		fp = Popen(xset.format('q').split(' '), stdout=PIPE).stdout
+		ret = 'unknown'
+		for line in fp:
+			m = re.match('[\s]*Monitor is (?P<m>.*)', line)
+			if(m and len(m.group('m')) >= 2):
+				out = m.group('m').lower()
+				ret = out[3:] if out[0:2] == 'in' else out
+				break
+		fp.close()
+	return ret
+
 # Function: getFPDT
 # Description:
 #	 Read the acpi bios tables and pull out FPDT, the firmware data
@@ -4970,18 +5150,19 @@ def getFPDT(output):
 
 	table = struct.unpack('4sIBB6s8sI4sI', buf[0:36])
 	if(output):
-		print('')
-		print('Firmware Performance Data Table (%s)' % table[0])
-		print('                  Signature : %s' % table[0])
-		print('               Table Length : %u' % table[1])
-		print('                   Revision : %u' % table[2])
-		print('                   Checksum : 0x%x' % table[3])
-		print('                     OEM ID : %s' % table[4])
-		print('               OEM Table ID : %s' % table[5])
-		print('               OEM Revision : %u' % table[6])
-		print('                 Creator ID : %s' % table[7])
-		print('           Creator Revision : 0x%x' % table[8])
-		print('')
+		pprint('\n'\
+		'Firmware Performance Data Table (%s)\n'\
+		'                  Signature : %s\n'\
+		'               Table Length : %u\n'\
+		'                   Revision : %u\n'\
+		'                   Checksum : 0x%x\n'\
+		'                     OEM ID : %s\n'\
+		'               OEM Table ID : %s\n'\
+		'               OEM Revision : %u\n'\
+		'                 Creator ID : %s\n'\
+		'           Creator Revision : 0x%x\n'\
+		'' % (table[0], table[0], table[1], table[2], table[3],
+			table[4], table[5], table[6], table[7], table[8]))
 
 	if(table[0] != 'FPDT'):
 		if(output):
@@ -5007,22 +5188,24 @@ def getFPDT(output):
 			first = fp.read(8)
 		except:
 			if(output):
-				print('Bad address 0x%x in %s' % (addr, sysvals.mempath))
+				pprint('Bad address 0x%x in %s' % (addr, sysvals.mempath))
 			return [0, 0]
 		rechead = struct.unpack('4sI', first)
 		recdata = fp.read(rechead[1]-8)
 		if(rechead[0] == 'FBPT'):
 			record = struct.unpack('HBBIQQQQQ', recdata)
 			if(output):
-				print('%s (%s)' % (rectype[header[0]], rechead[0]))
-				print('                  Reset END : %u ns' % record[4])
-				print('  OS Loader LoadImage Start : %u ns' % record[5])
-				print(' OS Loader StartImage Start : %u ns' % record[6])
-				print('     ExitBootServices Entry : %u ns' % record[7])
-				print('      ExitBootServices Exit : %u ns' % record[8])
+				pprint('%s (%s)\n'\
+				'                  Reset END : %u ns\n'\
+				'  OS Loader LoadImage Start : %u ns\n'\
+				' OS Loader StartImage Start : %u ns\n'\
+				'     ExitBootServices Entry : %u ns\n'\
+				'      ExitBootServices Exit : %u ns'\
+				'' % (rectype[header[0]], rechead[0], record[4], record[5],
+					record[6], record[7], record[8]))
 		elif(rechead[0] == 'S3PT'):
 			if(output):
-				print('%s (%s)' % (rectype[header[0]], rechead[0]))
+				pprint('%s (%s)' % (rectype[header[0]], rechead[0]))
 			j = 0
 			while(j < len(recdata)):
 				prechead = struct.unpack('HBB', recdata[j:j+4])
@@ -5032,27 +5215,26 @@ def getFPDT(output):
 					record = struct.unpack('IIQQ', recdata[j:j+prechead[1]])
 					fwData[1] = record[2]
 					if(output):
-						print('    %s' % prectype[prechead[0]])
-						print('               Resume Count : %u' % \
-							record[1])
-						print('                 FullResume : %u ns' % \
-							record[2])
-						print('              AverageResume : %u ns' % \
-							record[3])
+						pprint('    %s\n'\
+						'               Resume Count : %u\n'\
+						'                 FullResume : %u ns\n'\
+						'              AverageResume : %u ns'\
+						'' % (prectype[prechead[0]], record[1],
+								record[2], record[3]))
 				elif(prechead[0] == 1):
 					record = struct.unpack('QQ', recdata[j+4:j+prechead[1]])
 					fwData[0] = record[1] - record[0]
 					if(output):
-						print('    %s' % prectype[prechead[0]])
-						print('               SuspendStart : %u ns' % \
-							record[0])
-						print('                 SuspendEnd : %u ns' % \
-							record[1])
-						print('                SuspendTime : %u ns' % \
-							fwData[0])
+						pprint('    %s\n'\
+						'               SuspendStart : %u ns\n'\
+						'                 SuspendEnd : %u ns\n'\
+						'                SuspendTime : %u ns'\
+						'' % (prectype[prechead[0]], record[0],
+								record[1], fwData[0]))
+
 				j += prechead[1]
 		if(output):
-			print('')
+			pprint('')
 		i += header[1]
 	fp.close()
 	return fwData
@@ -5064,26 +5246,26 @@ def getFPDT(output):
 # Output:
 #	 True if the test will work, False if not
 def statusCheck(probecheck=False):
-	status = True
+	status = ''
 
-	print('Checking this system (%s)...' % platform.node())
+	pprint('Checking this system (%s)...' % platform.node())
 
 	# check we have root access
 	res = sysvals.colorText('NO (No features of this tool will work!)')
 	if(sysvals.rootCheck(False)):
 		res = 'YES'
-	print('    have root access: %s' % res)
+	pprint('    have root access: %s' % res)
 	if(res != 'YES'):
-		print('    Try running this script with sudo')
-		return False
+		pprint('    Try running this script with sudo')
+		return 'missing root access'
 
 	# check sysfs is mounted
 	res = sysvals.colorText('NO (No features of this tool will work!)')
 	if(os.path.exists(sysvals.powerfile)):
 		res = 'YES'
-	print('    is sysfs mounted: %s' % res)
+	pprint('    is sysfs mounted: %s' % res)
 	if(res != 'YES'):
-		return False
+		return 'sysfs is missing'
 
 	# check target mode is a valid mode
 	if sysvals.suspendmode != 'command':
@@ -5092,11 +5274,11 @@ def statusCheck(probecheck=False):
 		if(sysvals.suspendmode in modes):
 			res = 'YES'
 		else:
-			status = False
-		print('    is "%s" a valid power mode: %s' % (sysvals.suspendmode, res))
+			status = '%s mode is not supported' % sysvals.suspendmode
+		pprint('    is "%s" a valid power mode: %s' % (sysvals.suspendmode, res))
 		if(res == 'NO'):
-			print('      valid power modes are: %s' % modes)
-			print('      please choose one with -m')
+			pprint('      valid power modes are: %s' % modes)
+			pprint('      please choose one with -m')
 
 	# check if ftrace is available
 	res = sysvals.colorText('NO')
@@ -5104,8 +5286,8 @@ def statusCheck(probecheck=False):
 	if(ftgood):
 		res = 'YES'
 	elif(sysvals.usecallgraph):
-		status = False
-	print('    is ftrace supported: %s' % res)
+		status = 'ftrace is not properly supported'
+	pprint('    is ftrace supported: %s' % res)
 
 	# check if kprobes are available
 	res = sysvals.colorText('NO')
@@ -5114,7 +5296,7 @@ def statusCheck(probecheck=False):
 		res = 'YES'
 	else:
 		sysvals.usedevsrc = False
-	print('    are kprobes supported: %s' % res)
+	pprint('    are kprobes supported: %s' % res)
 
 	# what data source are we using
 	res = 'DMESG'
@@ -5125,15 +5307,15 @@ def statusCheck(probecheck=False):
 				sysvals.usetraceevents = False
 		if(sysvals.usetraceevents):
 			res = 'FTRACE (all trace events found)'
-	print('    timeline data source: %s' % res)
+	pprint('    timeline data source: %s' % res)
 
 	# check if rtcwake
 	res = sysvals.colorText('NO')
 	if(sysvals.rtcpath != ''):
 		res = 'YES'
 	elif(sysvals.rtcwake):
-		status = False
-	print('    is rtcwake supported: %s' % res)
+		status = 'rtcwake is not properly supported'
+	pprint('    is rtcwake supported: %s' % res)
 
 	if not probecheck:
 		return status
@@ -5158,9 +5340,9 @@ def statusCheck(probecheck=False):
 def doError(msg, help=False):
 	if(help == True):
 		printHelp()
-	print('ERROR: %s\n') % msg
+	pprint('ERROR: %s\n' % msg)
 	sysvals.outputResult({'error':msg})
-	sys.exit()
+	sys.exit(1)
 
 # Function: getArgInt
 # Description:
@@ -5201,9 +5383,10 @@ def getArgFloat(name, args, min, max, main=True):
 	return val
 
 def processData(live=False):
-	print('PROCESSING DATA')
+	pprint('PROCESSING DATA')
+	error = ''
 	if(sysvals.usetraceevents):
-		testruns = parseTraceLog(live)
+		testruns, error = parseTraceLog(live)
 		if sysvals.dmesgfile:
 			for data in testruns:
 				data.extractErrorInfo()
@@ -5215,20 +5398,29 @@ def processData(live=False):
 			appendIncompleteTraceLog(testruns)
 	sysvals.vprint('Command:\n    %s' % sysvals.cmdline)
 	for data in testruns:
+		if data.battery:
+			a1, c1, a2, c2 = data.battery
+			s = 'Battery:\n    Before - AC: %s, Charge: %d\n     After - AC: %s, Charge: %d' % \
+				(a1, int(c1), a2, int(c2))
+			sysvals.vprint(s)
 		data.printDetails()
 	if sysvals.cgdump:
 		for data in testruns:
 			data.debugPrint()
-		sys.exit()
-
+		sys.exit(0)
+	if len(testruns) < 1:
+		pprint('ERROR: Not enough test data to build a timeline')
+		return (testruns, {'error': 'timeline generation failed'})
 	sysvals.vprint('Creating the html timeline (%s)...' % sysvals.htmlfile)
-	createHTML(testruns)
-	print('DONE')
+	createHTML(testruns, error)
+	pprint('DONE')
 	data = testruns[0]
 	stamp = data.stamp
 	stamp['suspend'], stamp['resume'] = data.getTimeValues()
 	if data.fwValid:
 		stamp['fwsuspend'], stamp['fwresume'] = data.fwSuspend, data.fwResume
+	if error:
+		stamp['error'] = error
 	return (testruns, stamp)
 
 # Function: rerunTest
@@ -5246,6 +5438,7 @@ def rerunTest():
 		elif not os.access(sysvals.htmlfile, os.W_OK):
 			doError('missing permission to write to %s' % sysvals.htmlfile)
 	testruns, stamp = processData(False)
+	sysvals.logmsg = ''
 	return stamp
 
 # Function: runTest
@@ -5260,70 +5453,156 @@ def runTest(n=0):
 	executeSuspend()
 	sysvals.cleanupFtrace()
 	if sysvals.skiphtml:
-		sysvals.sudouser(sysvals.testdir)
+		sysvals.sudoUserchown(sysvals.testdir)
 		return
 	testruns, stamp = processData(True)
 	for data in testruns:
 		del data
-	sysvals.sudouser(sysvals.testdir)
+	sysvals.sudoUserchown(sysvals.testdir)
 	sysvals.outputResult(stamp, n)
-
-def find_in_html(html, strs, div=False):
-	for str in strs:
-		l = len(str)
-		i = html.find(str)
-		if i >= 0:
+	if 'error' in stamp:
+		return 2
+	return 0
+
+def find_in_html(html, start, end, firstonly=True):
+	n, out = 0, []
+	while n < len(html):
+		m = re.search(start, html[n:])
+		if not m:
 			break
-	if i < 0:
-		return ''
-	if not div:
-		return re.search(r'[-+]?\d*\.\d+|\d+', html[i+l:i+l+50]).group()
-	n = html[i+l:].find('</div>')
-	if n < 0:
+		i = m.end()
+		m = re.search(end, html[n+i:])
+		if not m:
+			break
+		j = m.start()
+		str = html[n+i:n+i+j]
+		if end == 'ms':
+			num = re.search(r'[-+]?\d*\.\d+|\d+', str)
+			str = num.group() if num else 'NaN'
+		if firstonly:
+			return str
+		out.append(str)
+		n += i+j
+	if firstonly:
 		return ''
-	return html[i+l:i+l+n]
+	return out
+
+def data_from_html(file, outpath, devlist=False):
+	html = open(file, 'r').read()
+	suspend = find_in_html(html, 'Kernel Suspend', 'ms')
+	resume = find_in_html(html, 'Kernel Resume', 'ms')
+	line = find_in_html(html, '<div class="stamp">', '</div>')
+	stmp = line.split()
+	if not suspend or not resume or len(stmp) != 8:
+		return False
+	try:
+		dt = datetime.strptime(' '.join(stmp[3:]), '%B %d %Y, %I:%M:%S %p')
+	except:
+		return False
+	tstr = dt.strftime('%Y/%m/%d %H:%M:%S')
+	error = find_in_html(html, '<table class="testfail"><tr><td>', '</td>')
+	if error:
+		m = re.match('[a-z]* failed in (?P<p>[a-z0-9_]*) phase', error)
+		if m:
+			result = 'fail in %s' % m.group('p')
+		else:
+			result = 'fail'
+	else:
+		result = 'pass'
+	ilist = []
+	e = find_in_html(html, 'class="err"[\w=":;\.%\- ]*>', '&rarr;</div>', False)
+	for i in list(set(e)):
+		ilist.append('%sx%d' % (i, e.count(i)) if e.count(i) > 1 else i)
+	low = find_in_html(html, 'freeze time: <b>', ' ms</b>')
+	if low and '|' in low:
+		ilist.append('FREEZEx%d' % len(low.split('|')))
+	devices = dict()
+	for line in html.split('\n'):
+		m = re.match(' *<div id=\"[a,0-9]*\" *title=\"(?P<title>.*)\" class=\"thread.*', line)
+		if not m or 'thread kth' in line or 'thread sec' in line:
+			continue
+		m = re.match('(?P<n>.*) \((?P<t>[0-9,\.]*) ms\) (?P<p>.*)', m.group('title'))
+		if not m:
+			continue
+		name, time, phase = m.group('n'), m.group('t'), m.group('p')
+		if ' async' in name or ' sync' in name:
+			name = ' '.join(name.split(' ')[:-1])
+		d = phase.split('_')[0]
+		if d not in devices:
+			devices[d] = dict()
+		if name not in devices[d]:
+			devices[d][name] = 0.0
+		devices[d][name] += float(time)
+	worst  = {'suspend': {'name':'', 'time': 0.0},
+		'resume': {'name':'', 'time': 0.0}}
+	for d in devices:
+		if d not in worst:
+			worst[d] = dict()
+		dev = devices[d]
+		if len(dev.keys()) > 0:
+			n = sorted(dev, key=dev.get, reverse=True)[0]
+			worst[d]['name'], worst[d]['time'] = n, dev[n]
+	data = {
+		'mode': stmp[2],
+		'host': stmp[0],
+		'kernel': stmp[1],
+		'time': tstr,
+		'result': result,
+		'issues': ' '.join(ilist),
+		'suspend': suspend,
+		'resume': resume,
+		'sus_worst': worst['suspend']['name'],
+		'sus_worsttime': worst['suspend']['time'],
+		'res_worst': worst['resume']['name'],
+		'res_worsttime': worst['resume']['time'],
+		'url': os.path.relpath(file, outpath),
+	}
+	if devlist:
+		data['devlist'] = devices
+	return data
 
 # Function: runSummary
 # Description:
 #	 create a summary of tests in a sub-directory
-def runSummary(subdir, local=True):
+def runSummary(subdir, local=True, genhtml=False):
 	inpath = os.path.abspath(subdir)
-	outpath = inpath
-	if local:
-		outpath = os.path.abspath('.')
-	print('Generating a summary of folder "%s"' % inpath)
+	outpath = os.path.abspath('.') if local else inpath
+	pprint('Generating a summary of folder "%s"' % inpath)
+	if genhtml:
+		for dirname, dirnames, filenames in os.walk(subdir):
+			sysvals.dmesgfile = sysvals.ftracefile = sysvals.htmlfile = ''
+			for filename in filenames:
+				if(re.match('.*_dmesg.txt', filename)):
+					sysvals.dmesgfile = os.path.join(dirname, filename)
+				elif(re.match('.*_ftrace.txt', filename)):
+					sysvals.ftracefile = os.path.join(dirname, filename)
+			sysvals.setOutputFile()
+			if sysvals.ftracefile and sysvals.htmlfile and \
+				not os.path.exists(sysvals.htmlfile):
+				pprint('FTRACE: %s' % sysvals.ftracefile)
+				if sysvals.dmesgfile:
+					pprint('DMESG : %s' % sysvals.dmesgfile)
+				rerunTest()
 	testruns = []
+	desc = {'host':[],'mode':[],'kernel':[]}
 	for dirname, dirnames, filenames in os.walk(subdir):
 		for filename in filenames:
 			if(not re.match('.*.html', filename)):
 				continue
-			file = os.path.join(dirname, filename)
-			html = open(file, 'r').read(10000)
-			suspend = find_in_html(html,
-				['Kernel Suspend: ', 'Kernel Suspend Time: '])
-			resume = find_in_html(html,
-				['Kernel Resume: ', 'Kernel Resume Time: '])
-			line = find_in_html(html, ['<div class="stamp">'], True)
-			stmp = line.split()
-			if not suspend or not resume or len(stmp) < 4:
+			data = data_from_html(os.path.join(dirname, filename), outpath)
+			if(not data):
 				continue
-			data = {
-				'host': stmp[0],
-				'kernel': stmp[1],
-				'mode': stmp[2],
-				'time': string.join(stmp[3:], ' '),
-				'suspend': suspend,
-				'resume': resume,
-				'url': os.path.relpath(file, outpath),
-			}
-			if len(stmp) == 7:
-				data['kernel'] = 'unknown'
-				data['mode'] = stmp[1]
-				data['time'] = string.join(stmp[2:], ' ')
 			testruns.append(data)
+			for key in desc:
+				if data[key] not in desc[key]:
+					desc[key].append(data[key])
 	outfile = os.path.join(outpath, 'summary.html')
-	print('Summary file: %s' % outfile)
-	createHTMLSummarySimple(testruns, outfile, inpath)
+	pprint('Summary file: %s' % outfile)
+	if len(desc['host']) == len(desc['mode']) == len(desc['kernel']) == 1:
+		title = '%s %s %s' % (desc['host'][0], desc['kernel'][0], desc['mode'][0])
+	else:
+		title = inpath
+	createHTMLSummarySimple(testruns, outfile, title)
 
 # Function: checkArgBool
 # Description:
@@ -5380,13 +5659,10 @@ def configFromFile(file):
 				else:
 					doError('invalid value --> (%s: %s), use "enable/disable"' % (option, value), True)
 			elif(option == 'display'):
-				if value in switchvalues:
-					if value in switchoff:
-						sysvals.display = -1
-					else:
-						sysvals.display = 1
-				else:
-					doError('invalid value --> (%s: %s), use "on/off"' % (option, value), True)
+				disopt = ['on', 'off', 'standby', 'suspend']
+				if value not in disopt:
+					doError('invalid value --> (%s: %s), use %s' % (option, value, disopt), True)
+				sysvals.display = value
 			elif(option == 'gzip'):
 				sysvals.gzip = checkArgBool(option, value)
 			elif(option == 'cgfilter'):
@@ -5402,9 +5678,9 @@ def configFromFile(file):
 				sysvals.cgtest = getArgInt('cgtest', value, 0, 1, False)
 			elif(option == 'cgphase'):
 				d = Data(0)
-				if value not in d.phases:
+				if value not in d.sortedPhases():
 					doError('invalid phase --> (%s: %s), valid phases are %s'\
-						% (option, value, d.phases), True)
+						% (option, value, d.sortedPhases()), True)
 				sysvals.cgphase = value
 			elif(option == 'fadd'):
 				file = sysvals.configFile(value)
@@ -5541,90 +5817,96 @@ def configFromFile(file):
 # Description:
 #	 print out the help text
 def printHelp():
-	print('')
-	print('%s v%s' % (sysvals.title, sysvals.version))
-	print('Usage: sudo sleepgraph <options> <commands>')
-	print('')
-	print('Description:')
-	print('  This tool is designed to assist kernel and OS developers in optimizing')
-	print('  their linux stack\'s suspend/resume time. Using a kernel image built')
-	print('  with a few extra options enabled, the tool will execute a suspend and')
-	print('  capture dmesg and ftrace data until resume is complete. This data is')
-	print('  transformed into a device timeline and an optional callgraph to give')
-	print('  a detailed view of which devices/subsystems are taking the most')
-	print('  time in suspend/resume.')
-	print('')
-	print('  If no specific command is given, the default behavior is to initiate')
-	print('  a suspend/resume and capture the dmesg/ftrace output as an html timeline.')
-	print('')
-	print('  Generates output files in subdirectory: suspend-yymmdd-HHMMSS')
-	print('   HTML output:                    <hostname>_<mode>.html')
-	print('   raw dmesg output:               <hostname>_<mode>_dmesg.txt')
-	print('   raw ftrace output:              <hostname>_<mode>_ftrace.txt')
-	print('')
-	print('Options:')
-	print('   -h           Print this help text')
-	print('   -v           Print the current tool version')
-	print('   -config fn   Pull arguments and config options from file fn')
-	print('   -verbose     Print extra information during execution and analysis')
-	print('   -m mode      Mode to initiate for suspend (default: %s)') % (sysvals.suspendmode)
-	print('   -o name      Overrides the output subdirectory name when running a new test')
-	print('                default: suspend-{date}-{time}')
-	print('   -rtcwake t   Wakeup t seconds after suspend, set t to "off" to disable (default: 15)')
-	print('   -addlogs     Add the dmesg and ftrace logs to the html output')
-	print('   -srgap       Add a visible gap in the timeline between sus/res (default: disabled)')
-	print('   -skiphtml    Run the test and capture the trace logs, but skip the timeline (default: disabled)')
-	print('   -result fn   Export a results table to a text file for parsing.')
-	print('  [testprep]')
-	print('   -sync        Sync the filesystems before starting the test')
-	print('   -rs on/off   Enable/disable runtime suspend for all devices, restore all after test')
-	print('   -display on/off  Turn the display on or off for the test')
-	print('  [advanced]')
-	print('   -gzip        Gzip the trace and dmesg logs to save space')
-	print('   -cmd {s}     Run the timeline over a custom command, e.g. "sync -d"')
-	print('   -proc        Add usermode process info into the timeline (default: disabled)')
-	print('   -dev         Add kernel function calls and threads to the timeline (default: disabled)')
-	print('   -x2          Run two suspend/resumes back to back (default: disabled)')
-	print('   -x2delay t   Include t ms delay between multiple test runs (default: 0 ms)')
-	print('   -predelay t  Include t ms delay before 1st suspend (default: 0 ms)')
-	print('   -postdelay t Include t ms delay after last resume (default: 0 ms)')
-	print('   -mindev ms   Discard all device blocks shorter than ms milliseconds (e.g. 0.001 for us)')
-	print('   -multi n d   Execute <n> consecutive tests at <d> seconds intervals. The outputs will')
-	print('                be created in a new subdirectory with a summary page.')
-	print('  [debug]')
-	print('   -f           Use ftrace to create device callgraphs (default: disabled)')
-	print('   -maxdepth N  limit the callgraph data to N call levels (default: 0=all)')
-	print('   -expandcg    pre-expand the callgraph data in the html output (default: disabled)')
-	print('   -fadd file   Add functions to be graphed in the timeline from a list in a text file')
-	print('   -filter "d1,d2,..." Filter out all but this comma-delimited list of device names')
-	print('   -mincg  ms   Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)')
-	print('   -cgphase P   Only show callgraph data for phase P (e.g. suspend_late)')
-	print('   -cgtest N    Only show callgraph data for test N (e.g. 0 or 1 in an x2 run)')
-	print('   -timeprec N  Number of significant digits in timestamps (0:S, [3:ms], 6:us)')
-	print('   -cgfilter S  Filter the callgraph output in the timeline')
-	print('   -cgskip file Callgraph functions to skip, off to disable (default: cgskip.txt)')
-	print('   -bufsize N   Set trace buffer size to N kilo-bytes (default: all of free memory)')
-	print('')
-	print('Other commands:')
-	print('   -modes       List available suspend modes')
-	print('   -status      Test to see if the system is enabled to run this tool')
-	print('   -fpdt        Print out the contents of the ACPI Firmware Performance Data Table')
-	print('   -sysinfo     Print out system info extracted from BIOS')
-	print('   -devinfo     Print out the pm settings of all devices which support runtime suspend')
-	print('   -flist       Print the list of functions currently being captured in ftrace')
-	print('   -flistall    Print all functions capable of being captured in ftrace')
-	print('   -summary directory  Create a summary of all test in this dir')
-	print('  [redo]')
-	print('   -ftrace ftracefile  Create HTML output using ftrace input (used with -dmesg)')
-	print('   -dmesg dmesgfile    Create HTML output using dmesg (used with -ftrace)')
-	print('')
+	pprint('\n%s v%s\n'\
+	'Usage: sudo sleepgraph <options> <commands>\n'\
+	'\n'\
+	'Description:\n'\
+	'  This tool is designed to assist kernel and OS developers in optimizing\n'\
+	'  their linux stack\'s suspend/resume time. Using a kernel image built\n'\
+	'  with a few extra options enabled, the tool will execute a suspend and\n'\
+	'  capture dmesg and ftrace data until resume is complete. This data is\n'\
+	'  transformed into a device timeline and an optional callgraph to give\n'\
+	'  a detailed view of which devices/subsystems are taking the most\n'\
+	'  time in suspend/resume.\n'\
+	'\n'\
+	'  If no specific command is given, the default behavior is to initiate\n'\
+	'  a suspend/resume and capture the dmesg/ftrace output as an html timeline.\n'\
+	'\n'\
+	'  Generates output files in subdirectory: suspend-yymmdd-HHMMSS\n'\
+	'   HTML output:                    <hostname>_<mode>.html\n'\
+	'   raw dmesg output:               <hostname>_<mode>_dmesg.txt\n'\
+	'   raw ftrace output:              <hostname>_<mode>_ftrace.txt\n'\
+	'\n'\
+	'Options:\n'\
+	'   -h           Print this help text\n'\
+	'   -v           Print the current tool version\n'\
+	'   -config fn   Pull arguments and config options from file fn\n'\
+	'   -verbose     Print extra information during execution and analysis\n'\
+	'   -m mode      Mode to initiate for suspend (default: %s)\n'\
+	'   -o name      Overrides the output subdirectory name when running a new test\n'\
+	'                default: suspend-{date}-{time}\n'\
+	'   -rtcwake t   Wakeup t seconds after suspend, set t to "off" to disable (default: 15)\n'\
+	'   -addlogs     Add the dmesg and ftrace logs to the html output\n'\
+	'   -srgap       Add a visible gap in the timeline between sus/res (default: disabled)\n'\
+	'   -skiphtml    Run the test and capture the trace logs, but skip the timeline (default: disabled)\n'\
+	'   -result fn   Export a results table to a text file for parsing.\n'\
+	'  [testprep]\n'\
+	'   -sync        Sync the filesystems before starting the test\n'\
+	'   -rs on/off   Enable/disable runtime suspend for all devices, restore all after test\n'\
+	'   -display m   Change the display mode to m for the test (on/off/standby/suspend)\n'\
+	'  [advanced]\n'\
+	'   -gzip        Gzip the trace and dmesg logs to save space\n'\
+	'   -cmd {s}     Run the timeline over a custom command, e.g. "sync -d"\n'\
+	'   -proc        Add usermode process info into the timeline (default: disabled)\n'\
+	'   -dev         Add kernel function calls and threads to the timeline (default: disabled)\n'\
+	'   -x2          Run two suspend/resumes back to back (default: disabled)\n'\
+	'   -x2delay t   Include t ms delay between multiple test runs (default: 0 ms)\n'\
+	'   -predelay t  Include t ms delay before 1st suspend (default: 0 ms)\n'\
+	'   -postdelay t Include t ms delay after last resume (default: 0 ms)\n'\
+	'   -mindev ms   Discard all device blocks shorter than ms milliseconds (e.g. 0.001 for us)\n'\
+	'   -multi n d   Execute <n> consecutive tests at <d> seconds intervals. The outputs will\n'\
+	'                be created in a new subdirectory with a summary page.\n'\
+	'  [debug]\n'\
+	'   -f           Use ftrace to create device callgraphs (default: disabled)\n'\
+	'   -maxdepth N  limit the callgraph data to N call levels (default: 0=all)\n'\
+	'   -expandcg    pre-expand the callgraph data in the html output (default: disabled)\n'\
+	'   -fadd file   Add functions to be graphed in the timeline from a list in a text file\n'\
+	'   -filter "d1,d2,..." Filter out all but this comma-delimited list of device names\n'\
+	'   -mincg  ms   Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)\n'\
+	'   -cgphase P   Only show callgraph data for phase P (e.g. suspend_late)\n'\
+	'   -cgtest N    Only show callgraph data for test N (e.g. 0 or 1 in an x2 run)\n'\
+	'   -timeprec N  Number of significant digits in timestamps (0:S, [3:ms], 6:us)\n'\
+	'   -cgfilter S  Filter the callgraph output in the timeline\n'\
+	'   -cgskip file Callgraph functions to skip, off to disable (default: cgskip.txt)\n'\
+	'   -bufsize N   Set trace buffer size to N kilo-bytes (default: all of free memory)\n'\
+	'   -devdump     Print out all the raw device data for each phase\n'\
+	'   -cgdump      Print out all the raw callgraph data\n'\
+	'\n'\
+	'Other commands:\n'\
+	'   -modes       List available suspend modes\n'\
+	'   -status      Test to see if the system is enabled to run this tool\n'\
+	'   -fpdt        Print out the contents of the ACPI Firmware Performance Data Table\n'\
+	'   -battery     Print out battery info (if available)\n'\
+	'   -x<mode>     Test xset by toggling the given mode (on/off/standby/suspend)\n'\
+	'   -sysinfo     Print out system info extracted from BIOS\n'\
+	'   -devinfo     Print out the pm settings of all devices which support runtime suspend\n'\
+	'   -flist       Print the list of functions currently being captured in ftrace\n'\
+	'   -flistall    Print all functions capable of being captured in ftrace\n'\
+	'   -summary dir Create a summary of tests in this dir [-genhtml builds missing html]\n'\
+	'  [redo]\n'\
+	'   -ftrace ftracefile  Create HTML output using ftrace input (used with -dmesg)\n'\
+	'   -dmesg dmesgfile    Create HTML output using dmesg (used with -ftrace)\n'\
+	'' % (sysvals.title, sysvals.version, sysvals.suspendmode))
 	return True
 
 # ----------------- MAIN --------------------
 # exec start (skipped if script is loaded as library)
 if __name__ == '__main__':
+	genhtml = False
 	cmd = ''
-	simplecmds = ['-sysinfo', '-modes', '-fpdt', '-flist', '-flistall', '-devinfo', '-status']
+	simplecmds = ['-sysinfo', '-modes', '-fpdt', '-flist', '-flistall',
+		'-devinfo', '-status', '-battery', '-xon', '-xoff', '-xstandby',
+		'-xsuspend', '-xinit', '-xreset', '-xstat']
 	if '-f' in sys.argv:
 		sysvals.cgskip = sysvals.configFile('cgskip.txt')
 	# loop through the command line arguments
@@ -5642,10 +5924,10 @@ if __name__ == '__main__':
 			cmd = arg[1:]
 		elif(arg == '-h'):
 			printHelp()
-			sys.exit()
+			sys.exit(0)
 		elif(arg == '-v'):
-			print("Version %s" % sysvals.version)
-			sys.exit()
+			pprint("Version %s" % sysvals.version)
+			sys.exit(0)
 		elif(arg == '-x2'):
 			sysvals.execcount = 2
 		elif(arg == '-x2delay'):
@@ -5660,8 +5942,16 @@ if __name__ == '__main__':
 			sysvals.skiphtml = True
 		elif(arg == '-cgdump'):
 			sysvals.cgdump = True
+		elif(arg == '-devdump'):
+			sysvals.devdump = True
+		elif(arg == '-genhtml'):
+			genhtml = True
 		elif(arg == '-addlogs'):
 			sysvals.dmesglog = sysvals.ftracelog = True
+		elif(arg == '-addlogdmesg'):
+			sysvals.dmesglog = True
+		elif(arg == '-addlogftrace'):
+			sysvals.ftracelog = True
 		elif(arg == '-verbose'):
 			sysvals.verbose = True
 		elif(arg == '-proc'):
@@ -5688,14 +5978,11 @@ if __name__ == '__main__':
 			try:
 				val = args.next()
 			except:
-				doError('-display requires "on" or "off"', True)
-			if val.lower() in switchvalues:
-				if val.lower() in switchoff:
-					sysvals.display = -1
-				else:
-					sysvals.display = 1
-			else:
-				doError('invalid option: %s, use "on/off"' % val, True)
+				doError('-display requires an mode value', True)
+			disopt = ['on', 'off', 'standby', 'suspend']
+			if val.lower() not in disopt:
+				doError('valid display mode values are %s' % disopt, True)
+			sysvals.display = val.lower()
 		elif(arg == '-maxdepth'):
 			sysvals.max_graph_depth = getArgInt('-maxdepth', args, 0, 1000)
 		elif(arg == '-rtcwake'):
@@ -5724,9 +6011,9 @@ if __name__ == '__main__':
 			except:
 				doError('No phase name supplied', True)
 			d = Data(0)
-			if val not in d.phases:
+			if val not in d.phasedef:
 				doError('invalid phase --> (%s: %s), valid phases are %s'\
-					% (arg, val, d.phases), True)
+					% (arg, val, d.phasedef.keys()), True)
 			sysvals.cgphase = val
 		elif(arg == '-cgfilter'):
 			try:
@@ -5828,6 +6115,7 @@ if __name__ == '__main__':
 			except:
 				doError('No result file supplied', True)
 			sysvals.result = val
+			sysvals.signalHandlerInit()
 		else:
 			doError('Invalid argument: '+arg, True)
 
@@ -5852,10 +6140,20 @@ if __name__ == '__main__':
 
 	# just run a utility command and exit
 	if(cmd != ''):
+		ret = 0
 		if(cmd == 'status'):
-			statusCheck(True)
+			if not statusCheck(True):
+				ret = 1
 		elif(cmd == 'fpdt'):
-			getFPDT(True)
+			if not getFPDT(True):
+				ret = 1
+		elif(cmd == 'battery'):
+			out = getBattery()
+			if out:
+				pprint('AC Connect    : %s\nBattery Charge: %d' % out)
+			else:
+				pprint('no battery found')
+				ret = 1
 		elif(cmd == 'sysinfo'):
 			sysvals.printSystemInfo(True)
 		elif(cmd == 'devinfo'):
@@ -5867,26 +6165,29 @@ if __name__ == '__main__':
 		elif(cmd == 'flistall'):
 			sysvals.getFtraceFilterFunctions(False)
 		elif(cmd == 'summary'):
-			runSummary(sysvals.outdir, True)
-		sys.exit()
+			runSummary(sysvals.outdir, True, genhtml)
+		elif(cmd in ['xon', 'xoff', 'xstandby', 'xsuspend', 'xinit', 'xreset']):
+			sysvals.verbose = True
+			ret = displayControl(cmd[1:])
+		elif(cmd == 'xstat'):
+			pprint('Display Status: %s' % displayControl('stat').upper())
+		sys.exit(ret)
 
 	# if instructed, re-analyze existing data files
 	if(sysvals.notestrun):
 		stamp = rerunTest()
 		sysvals.outputResult(stamp)
-		sys.exit()
+		sys.exit(0)
 
 	# verify that we can run a test
-	if(not statusCheck()):
-		doError('Check FAILED, aborting the test run!')
+	error = statusCheck()
+	if(error):
+		doError(error)
 
-	# extract mem modes and convert
+	# extract mem/disk extra modes and convert
 	mode = sysvals.suspendmode
-	if 'mem' == mode[:3]:
-		if '-' in mode:
-			memmode = mode.split('-')[-1]
-		else:
-			memmode = 'deep'
+	if mode.startswith('mem'):
+		memmode = mode.split('-', 1)[-1] if '-' in mode else 'deep'
 		if memmode == 'shallow':
 			mode = 'standby'
 		elif memmode ==  's2idle':
@@ -5895,13 +6196,16 @@ if __name__ == '__main__':
 			mode = 'mem'
 		sysvals.memmode = memmode
 		sysvals.suspendmode = mode
+	if mode.startswith('disk-'):
+		sysvals.diskmode = mode.split('-', 1)[-1]
+		sysvals.suspendmode = 'disk'
 
 	sysvals.systemInfo(dmidecode(sysvals.mempath))
 
 	setRuntimeSuspend(True)
 	if sysvals.display:
-		call('xset -d :0.0 dpms 0 0 0', shell=True)
-		call('xset -d :0.0 s off', shell=True)
+		displayControl('init')
+	ret = 0
 	if sysvals.multitest['run']:
 		# run multiple tests in a separate subdirectory
 		if not sysvals.outdir:
@@ -5911,22 +6215,23 @@ if __name__ == '__main__':
 			os.mkdir(sysvals.outdir)
 		for i in range(sysvals.multitest['count']):
 			if(i != 0):
-				print('Waiting %d seconds...' % (sysvals.multitest['delay']))
+				pprint('Waiting %d seconds...' % (sysvals.multitest['delay']))
 				time.sleep(sysvals.multitest['delay'])
-			print('TEST (%d/%d) START' % (i+1, sysvals.multitest['count']))
+			pprint('TEST (%d/%d) START' % (i+1, sysvals.multitest['count']))
 			fmt = 'suspend-%y%m%d-%H%M%S'
 			sysvals.testdir = os.path.join(sysvals.outdir, datetime.now().strftime(fmt))
-			runTest(i+1)
-			print('TEST (%d/%d) COMPLETE' % (i+1, sysvals.multitest['count']))
+			ret = runTest(i+1)
+			pprint('TEST (%d/%d) COMPLETE' % (i+1, sysvals.multitest['count']))
 			sysvals.logmsg = ''
 		if not sysvals.skiphtml:
-			runSummary(sysvals.outdir, False)
-		sysvals.sudouser(sysvals.outdir)
+			runSummary(sysvals.outdir, False, False)
+		sysvals.sudoUserchown(sysvals.outdir)
 	else:
 		if sysvals.outdir:
 			sysvals.testdir = sysvals.outdir
 		# run the test in the current directory
-		runTest()
+		ret = runTest()
 	if sysvals.display:
-		call('xset -d :0.0 s reset', shell=True)
+		displayControl('reset')
 	setRuntimeSuspend(False)
+	sys.exit(ret)
diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
index 29f50d4cfea0..84e2b648e622 100755
--- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
+++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
@@ -28,6 +28,7 @@ import subprocess
 import os
 import time
 import re
+import signal
 import sys
 import getopt
 import Gnuplot
@@ -78,11 +79,12 @@ def print_help():
     print('    Or')
     print('      ./intel_pstate_tracer.py [--cpu cpus] ---trace_file <trace_file> --name <test_name>')
     print('    To generate trace file, parse and plot, use (sudo required):')
-    print('      sudo ./intel_pstate_tracer.py [-c cpus] -i <interval> -n <test_name>')
+    print('      sudo ./intel_pstate_tracer.py [-c cpus] -i <interval> -n <test_name> -m <kbytes>')
     print('    Or')
-    print('      sudo ./intel_pstate_tracer.py [--cpu cpus] --interval <interval> --name <test_name>')
+    print('      sudo ./intel_pstate_tracer.py [--cpu cpus] --interval <interval> --name <test_name> --memory <kbytes>')
     print('    Optional argument:')
-    print('      cpus:  comma separated list of CPUs')
+    print('      cpus:   comma separated list of CPUs')
+    print('      kbytes: Kilo bytes of memory per CPU to allocate to the trace buffer. Default: 10240')
     print('  Output:')
     print('    If not already present, creates a "results/test_name" folder in the current working directory with:')
     print('      cpu.csv - comma seperated values file with trace contents and some additional calculations.')
@@ -379,7 +381,7 @@ def clear_trace_file():
         f_handle.close()
     except:
         print('IO error clearing trace file ')
-        quit()
+        sys.exit(2)
 
 def enable_trace():
     """ Enable trace """
@@ -389,7 +391,7 @@ def enable_trace():
                  , 'w').write("1")
     except:
         print('IO error enabling trace ')
-        quit()
+        sys.exit(2)
 
 def disable_trace():
     """ Disable trace """
@@ -399,17 +401,17 @@ def disable_trace():
                  , 'w').write("0")
     except:
         print('IO error disabling trace ')
-        quit()
+        sys.exit(2)
 
 def set_trace_buffer_size():
     """ Set trace buffer size """
 
     try:
-       open('/sys/kernel/debug/tracing/buffer_size_kb'
-                 , 'w').write("10240")
+       with open('/sys/kernel/debug/tracing/buffer_size_kb', 'w') as fp:
+          fp.write(memory)
     except:
-        print('IO error setting trace buffer size ')
-        quit()
+       print('IO error setting trace buffer size ')
+       sys.exit(2)
 
 def free_trace_buffer():
     """ Free the trace buffer memory """
@@ -418,8 +420,8 @@ def free_trace_buffer():
        open('/sys/kernel/debug/tracing/buffer_size_kb'
                  , 'w').write("1")
     except:
-        print('IO error setting trace buffer size ')
-        quit()
+        print('IO error freeing trace buffer ')
+        sys.exit(2)
 
 def read_trace_data(filename):
     """ Read and parse trace data """
@@ -431,7 +433,7 @@ def read_trace_data(filename):
         data = open(filename, 'r').read()
     except:
         print('Error opening ', filename)
-        quit()
+        sys.exit(2)
 
     for line in data.splitlines():
         search_obj = \
@@ -489,10 +491,22 @@ def read_trace_data(filename):
 # Now seperate the main overall csv file into per CPU csv files.
     split_csv()
 
+def signal_handler(signal, frame):
+    print(' SIGINT: Forcing cleanup before exit.')
+    if interval:
+        disable_trace()
+        clear_trace_file()
+        # Free the memory
+        free_trace_buffer()
+        sys.exit(0)
+
+signal.signal(signal.SIGINT, signal_handler)
+
 interval = ""
 filename = ""
 cpu_list = ""
 testname = ""
+memory = "10240"
 graph_data_present = False;
 
 valid1 = False
@@ -501,7 +515,7 @@ valid2 = False
 cpu_mask = zeros((MAX_CPUS,), dtype=int)
 
 try:
-    opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:",["help","trace_file=","interval=","cpu=","name="])
+    opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="])
 except getopt.GetoptError:
     print_help()
     sys.exit(2)
@@ -521,6 +535,8 @@ for opt, arg in opts:
     elif opt in ("-n", "--name"):
         valid2 = True
         testname = arg
+    elif opt in ("-m", "--memory"):
+        memory = arg
 
 if not (valid1 and valid2):
     print_help()
@@ -569,6 +585,11 @@ current_max_cpu = 0
 
 read_trace_data(filename)
 
+clear_trace_file()
+# Free the memory
+if interval:
+    free_trace_buffer()
+
 if graph_data_present == False:
     print('No valid data to plot')
     sys.exit(2)
@@ -593,9 +614,4 @@ for root, dirs, files in os.walk('.'):
     for f in files:
         fix_ownership(f)
 
-clear_trace_file()
-# Free the memory
-if interval:
-    free_trace_buffer()
-
 os.chdir('../../')
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index a9bc914a8fe8..2ab25aa38263 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -25,4 +25,4 @@ install : turbostat
 	install -d  $(DESTDIR)$(PREFIX)/bin
 	install $(BUILD_OUTPUT)/turbostat $(DESTDIR)$(PREFIX)/bin/turbostat
 	install -d  $(DESTDIR)$(PREFIX)/share/man/man8
-	install turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8
+	install -m 644 turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index ccf2a69365cc..a6db83a88e85 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -54,9 +54,12 @@ name as necessary to disambiguate it from others is necessary.  Note that option
 .PP
 \fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set.  If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed.  Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed.  Otherwise, the system summary plus the specified set of CPUs are printed.  The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44
 .PP
-\fB--hide column\fP do not show the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--hide sysfs" to hide the sysfs statistics columns as a group.
+\fB--hide column\fP do not show the specified built-in columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--hide sysfs" to hide the sysfs statistics columns as a group.
 .PP
-\fB--show column\fP show only the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--show sysfs" to show the sysfs statistics columns as a group.
+\fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default.  Currently the only built-in counters disabled by default are "usec", "Time_Of_Day_Seconds", "APIC" and "X2APIC".
+The column name "all" can be used to enable all disabled-by-default built-in counters.
+.PP
+\fB--show column\fP show only the specified built-in columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--show sysfs" to show the sysfs statistics columns as a group.
 .PP
 \fB--Dump\fP displays the raw counter values.
 .PP
@@ -64,6 +67,8 @@ name as necessary to disambiguate it from others is necessary.  Note that option
 .PP
 \fB--interval seconds\fP overrides the default 5.0 second measurement interval.
 .PP
+\fB--num_iterations num\fP number of the measurement iterations.
+.PP
 \fB--out output_file\fP turbostat output is written to the specified output_file.
 The file is truncated if it already exists, and it is created if it does not exist.
 .PP
@@ -86,6 +91,8 @@ displays the statistics gathered since it was forked.
 The system configuration dump (if --quiet is not used) is followed by statistics.  The first row of the statistics labels the content of each column (below).  The second row of statistics is the system summary line.  The system summary line has a '-' in the columns for the Package, Core, and CPU.  The contents of the system summary line depends on the type of column.  Columns that count items (eg. IRQ) show the sum across all CPUs in the system.  Columns that show a percentage show the average across all CPUs in the system.  Columns that dump raw MSR values simply show 0 in the summary.  After the system summary row, each row describes a specific Package/Core/CPU.  Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system.
 .SH COLUMN DESCRIPTIONS
 .nf
+\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any.  This counter is disabled by default, and is enabled with "--enable usec", or --debug.  On the summary row, usec refers to the total elapsed time to collect the counters on all cpus.
+\fBTime_Of_Day_Seconds\fP For each CPU, the gettimeofday(2) value (seconds.subsec since Epoch) when the counters ending the measurement interval were collected.  This column is disabled by default, and can be enabled with "--enable Time_Of_Day_Seconds" or "--debug".  On the summary row, Time_Of_Day_Seconds refers to the timestamp following collection of counters on the last CPU.
 \fBCore\fP processor core number.  Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT).
 \fBCPU\fP Linux CPU (logical processor) number.  Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together.
 \fBPackage\fP processor package number -- not present on systems with a single processor package.
@@ -99,7 +106,7 @@ The system configuration dump (if --quiet is not used) is followed by statistics
 \fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3....  The system summary is the average of all CPUs in the system.  Note that these are software, reflecting what was requested.  The hardware counters reflect what was actually achieved.
 \fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states.  These numbers are from hardware residency counters.
 \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
-\fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
+\fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
 \fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms.
 \fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz.
 \fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states.  These numbers are from hardware residency counters.
@@ -107,7 +114,7 @@ The system configuration dump (if --quiet is not used) is followed by statistics
 \fBCorWatt\fP Watts consumed by the core part of the package.
 \fBGFXWatt\fP Watts consumed by the Graphics part of the package -- available only on client processors.
 \fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors.
-\fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package.
+\fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package.  Note that the system summary is the sum of the package throttling time, and thus may be higher than 100% on a multi-package system.  Note that the meaning of this field is model specific.  For example, some hardware increments this counter when RAPL responds to thermal limits, but does not increment this counter when RAPL responds to power limits.  Comparing PkgWatt and PkgTmp to system limits is necessary.
 \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM.
 .fi
 .SH TOO MUCH INFORMATION EXAMPLE
@@ -262,6 +269,21 @@ CPU	  PRF_CTRL
 
 .fi
 
+.SH INPUT
+
+For interval-mode, turbostat will immediately end the current interval
+when it sees a newline on standard input.
+turbostat will then start the next interval.
+Control-C will be send a SIGINT to turbostat,
+which will immediately abort the program with no further processing.
+.SH SIGNALS
+
+SIGINT will interrupt interval-mode.
+The end-of-interval data will be collected and displayed before turbostat exits.
+
+SIGUSR1 will end current interval,
+end-of-interval data will be collected and displayed before turbostat
+starts a new interval.
 .SH NOTES
 
 .B "turbostat "
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index bd9c6b31a504..328f62e6ea02 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -29,6 +29,7 @@
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <sys/stat.h>
+#include <sys/select.h>
 #include <sys/resource.h>
 #include <fcntl.h>
 #include <signal.h>
@@ -47,9 +48,13 @@
 char *proc_stat = "/proc/stat";
 FILE *outf;
 int *fd_percpu;
+struct timeval interval_tv = {5, 0};
 struct timespec interval_ts = {5, 0};
+struct timespec one_msec = {0, 1000000};
+unsigned int num_iterations;
 unsigned int debug;
 unsigned int quiet;
+unsigned int shown;
 unsigned int sums_need_wide_columns;
 unsigned int rapl_joules;
 unsigned int summary_only;
@@ -58,6 +63,7 @@ unsigned int dump_only;
 unsigned int do_snb_cstates;
 unsigned int do_knl_cstates;
 unsigned int do_slm_cstates;
+unsigned int do_cnl_cstates;
 unsigned int use_c1_residency_msr;
 unsigned int has_aperf;
 unsigned int has_epb;
@@ -80,6 +86,8 @@ unsigned int do_rapl;
 unsigned int do_dts;
 unsigned int do_ptm;
 unsigned long long  gfx_cur_rc6_ms;
+unsigned long long cpuidle_cur_cpu_lpi_us;
+unsigned long long cpuidle_cur_sys_lpi_us;
 unsigned int gfx_cur_mhz;
 unsigned int tcc_activation_temp;
 unsigned int tcc_activation_temp_override;
@@ -87,6 +95,7 @@ double rapl_power_units, rapl_time_units;
 double rapl_dram_energy_units, rapl_energy_units;
 double rapl_joule_counter_range;
 unsigned int do_core_perf_limit_reasons;
+unsigned int has_automatic_cstate_conversion;
 unsigned int do_gfx_perf_limit_reasons;
 unsigned int do_ring_perf_limit_reasons;
 unsigned int crystal_hz;
@@ -100,6 +109,7 @@ unsigned int has_hwp_activity_window;	/* IA32_HWP_REQUEST[bits 41:32] */
 unsigned int has_hwp_epp;		/* IA32_HWP_REQUEST[bits 31:24] */
 unsigned int has_hwp_pkg;		/* IA32_HWP_REQUEST_PKG */
 unsigned int has_misc_feature_control;
+unsigned int first_counter_read = 1;
 
 #define RAPL_PKG		(1 << 0)
 					/* 0x610 MSR_PKG_POWER_LIMIT */
@@ -147,7 +157,9 @@ char *progname;
 #define CPU_SUBSET_MAXCPUS	1024	/* need to use before probe... */
 cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
 size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
-#define MAX_ADDED_COUNTERS 16
+#define MAX_ADDED_COUNTERS 8
+#define MAX_ADDED_THREAD_COUNTERS 24
+#define BITMASK_SIZE 32
 
 struct thread_data {
 	struct timeval tv_begin;
@@ -159,10 +171,12 @@ struct thread_data {
 	unsigned long long  irq_count;
 	unsigned int smi_count;
 	unsigned int cpu_id;
+	unsigned int apic_id;
+	unsigned int x2apic_id;
 	unsigned int flags;
 #define CPU_IS_FIRST_THREAD_IN_CORE	0x2
 #define CPU_IS_FIRST_CORE_IN_PACKAGE	0x4
-	unsigned long long counter[MAX_ADDED_COUNTERS];
+	unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
 } *thread_even, *thread_odd;
 
 struct core_data {
@@ -183,6 +197,8 @@ struct pkg_data {
 	unsigned long long pc8;
 	unsigned long long pc9;
 	unsigned long long pc10;
+	unsigned long long cpu_lpi;
+	unsigned long long sys_lpi;
 	unsigned long long pkg_wtd_core_c0;
 	unsigned long long pkg_any_core_c0;
 	unsigned long long pkg_any_gfxe_c0;
@@ -203,12 +219,21 @@ struct pkg_data {
 #define ODD_COUNTERS thread_odd, core_odd, package_odd
 #define EVEN_COUNTERS thread_even, core_even, package_even
 
-#define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \
-	(thread_base + (pkg_no) * topo.num_cores_per_pkg * \
-		topo.num_threads_per_core + \
-		(core_no) * topo.num_threads_per_core + (thread_no))
-#define GET_CORE(core_base, core_no, pkg_no) \
-	(core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no))
+#define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no)	      \
+	((thread_base) +						      \
+	 ((pkg_no) *							      \
+	  topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
+	 ((node_no) * topo.cores_per_node * topo.threads_per_core) +	      \
+	 ((core_no) * topo.threads_per_core) +				      \
+	 (thread_no))
+
+#define GET_CORE(core_base, core_no, node_no, pkg_no)			\
+	((core_base) +							\
+	 ((pkg_no) *  topo.nodes_per_pkg * topo.cores_per_node) +	\
+	 ((node_no) * topo.cores_per_node) +				\
+	 (core_no))
+
+
 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
 
 enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
@@ -244,14 +269,25 @@ struct system_summary {
 	struct pkg_data packages;
 } average;
 
+struct cpu_topology {
+	int physical_package_id;
+	int logical_cpu_id;
+	int physical_node_id;
+	int logical_node_id;	/* 0-based count within the package */
+	int physical_core_id;
+	int thread_id;
+	cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
+} *cpus;
 
 struct topo_params {
 	int num_packages;
 	int num_cpus;
 	int num_cores;
 	int max_cpu_num;
-	int num_cores_per_pkg;
-	int num_threads_per_core;
+	int max_node_num;
+	int nodes_per_pkg;
+	int cores_per_node;
+	int threads_per_core;
 } topo;
 
 struct timeval tv_even, tv_odd, tv_delta;
@@ -273,27 +309,33 @@ int cpu_is_not_present(int cpu)
 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
 	struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
 {
-	int retval, pkg_no, core_no, thread_no;
+	int retval, pkg_no, core_no, thread_no, node_no;
 
 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
-		for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
-			for (thread_no = 0; thread_no <
-				topo.num_threads_per_core; ++thread_no) {
-				struct thread_data *t;
-				struct core_data *c;
-				struct pkg_data *p;
-
-				t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
-
-				if (cpu_is_not_present(t->cpu_id))
-					continue;
-
-				c = GET_CORE(core_base, core_no, pkg_no);
-				p = GET_PKG(pkg_base, pkg_no);
-
-				retval = func(t, c, p);
-				if (retval)
-					return retval;
+		for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
+			for (node_no = 0; node_no < topo.nodes_per_pkg;
+			     node_no++) {
+				for (thread_no = 0; thread_no <
+					topo.threads_per_core; ++thread_no) {
+					struct thread_data *t;
+					struct core_data *c;
+					struct pkg_data *p;
+
+					t = GET_THREAD(thread_base, thread_no,
+						       core_no, node_no,
+						       pkg_no);
+
+					if (cpu_is_not_present(t->cpu_id))
+						continue;
+
+					c = GET_CORE(core_base, core_no,
+						     node_no, pkg_no);
+					p = GET_PKG(pkg_base, pkg_no);
+
+					retval = func(t, c, p);
+					if (retval)
+						return retval;
+				}
 			}
 		}
 	}
@@ -342,17 +384,23 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
 }
 
 /*
- * Each string in this array is compared in --show and --hide cmdline.
- * Thus, strings that are proper sub-sets must follow their more specific peers.
+ * This list matches the column headers, except
+ * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
+ * 2. Core and CPU are moved to the end, we can't have strings that contain them
+ *    matching on them for --show and --hide.
  */
 struct msr_counter bic[] = {
+	{ 0x0, "usec" },
+	{ 0x0, "Time_Of_Day_Seconds" },
 	{ 0x0, "Package" },
+	{ 0x0, "Node" },
 	{ 0x0, "Avg_MHz" },
+	{ 0x0, "Busy%" },
 	{ 0x0, "Bzy_MHz" },
 	{ 0x0, "TSC_MHz" },
 	{ 0x0, "IRQ" },
 	{ 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
-	{ 0x0, "Busy%" },
+	{ 0x0, "sysfs" },
 	{ 0x0, "CPU%c1" },
 	{ 0x0, "CPU%c3" },
 	{ 0x0, "CPU%c6" },
@@ -369,7 +417,9 @@ struct msr_counter bic[] = {
 	{ 0x0, "Pkg%pc7" },
 	{ 0x0, "Pkg%pc8" },
 	{ 0x0, "Pkg%pc9" },
-	{ 0x0, "Pkg%pc10" },
+	{ 0x0, "Pk%pc10" },
+	{ 0x0, "CPU%LPI" },
+	{ 0x0, "SYS%LPI" },
 	{ 0x0, "PkgWatt" },
 	{ 0x0, "CorWatt" },
 	{ 0x0, "GFXWatt" },
@@ -381,70 +431,80 @@ struct msr_counter bic[] = {
 	{ 0x0, "Cor_J" },
 	{ 0x0, "GFX_J" },
 	{ 0x0, "RAM_J" },
-	{ 0x0, "Core" },
-	{ 0x0, "CPU" },
 	{ 0x0, "Mod%c6" },
-	{ 0x0, "sysfs" },
 	{ 0x0, "Totl%C0" },
 	{ 0x0, "Any%C0" },
 	{ 0x0, "GFX%C0" },
 	{ 0x0, "CPUGFX%" },
+	{ 0x0, "Core" },
+	{ 0x0, "CPU" },
+	{ 0x0, "APIC" },
+	{ 0x0, "X2APIC" },
 };
 
-
-
 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
-#define	BIC_Package	(1ULL << 0)
-#define	BIC_Avg_MHz	(1ULL << 1)
-#define	BIC_Bzy_MHz	(1ULL << 2)
-#define	BIC_TSC_MHz	(1ULL << 3)
-#define	BIC_IRQ		(1ULL << 4)
-#define	BIC_SMI		(1ULL << 5)
-#define	BIC_Busy	(1ULL << 6)
-#define	BIC_CPU_c1	(1ULL << 7)
-#define	BIC_CPU_c3	(1ULL << 8)
-#define	BIC_CPU_c6	(1ULL << 9)
-#define	BIC_CPU_c7	(1ULL << 10)
-#define	BIC_ThreadC	(1ULL << 11)
-#define	BIC_CoreTmp	(1ULL << 12)
-#define	BIC_CoreCnt	(1ULL << 13)
-#define	BIC_PkgTmp	(1ULL << 14)
-#define	BIC_GFX_rc6	(1ULL << 15)
-#define	BIC_GFXMHz	(1ULL << 16)
-#define	BIC_Pkgpc2	(1ULL << 17)
-#define	BIC_Pkgpc3	(1ULL << 18)
-#define	BIC_Pkgpc6	(1ULL << 19)
-#define	BIC_Pkgpc7	(1ULL << 20)
-#define	BIC_Pkgpc8	(1ULL << 21)
-#define	BIC_Pkgpc9	(1ULL << 22)
-#define	BIC_Pkgpc10	(1ULL << 23)
-#define	BIC_PkgWatt	(1ULL << 24)
-#define	BIC_CorWatt	(1ULL << 25)
-#define	BIC_GFXWatt	(1ULL << 26)
-#define	BIC_PkgCnt	(1ULL << 27)
-#define	BIC_RAMWatt	(1ULL << 28)
-#define	BIC_PKG__	(1ULL << 29)
-#define	BIC_RAM__	(1ULL << 30)
-#define	BIC_Pkg_J	(1ULL << 31)
-#define	BIC_Cor_J	(1ULL << 32)
-#define	BIC_GFX_J	(1ULL << 33)
-#define	BIC_RAM_J	(1ULL << 34)
-#define	BIC_Core	(1ULL << 35)
-#define	BIC_CPU		(1ULL << 36)
-#define	BIC_Mod_c6	(1ULL << 37)
-#define	BIC_sysfs	(1ULL << 38)
-#define	BIC_Totl_c0	(1ULL << 39)
-#define	BIC_Any_c0	(1ULL << 40)
-#define	BIC_GFX_c0	(1ULL << 41)
-#define	BIC_CPUGFX	(1ULL << 42)
-
-unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL;
-unsigned long long bic_present = BIC_sysfs;
+#define	BIC_USEC	(1ULL << 0)
+#define	BIC_TOD		(1ULL << 1)
+#define	BIC_Package	(1ULL << 2)
+#define	BIC_Node	(1ULL << 3)
+#define	BIC_Avg_MHz	(1ULL << 4)
+#define	BIC_Busy	(1ULL << 5)
+#define	BIC_Bzy_MHz	(1ULL << 6)
+#define	BIC_TSC_MHz	(1ULL << 7)
+#define	BIC_IRQ		(1ULL << 8)
+#define	BIC_SMI		(1ULL << 9)
+#define	BIC_sysfs	(1ULL << 10)
+#define	BIC_CPU_c1	(1ULL << 11)
+#define	BIC_CPU_c3	(1ULL << 12)
+#define	BIC_CPU_c6	(1ULL << 13)
+#define	BIC_CPU_c7	(1ULL << 14)
+#define	BIC_ThreadC	(1ULL << 15)
+#define	BIC_CoreTmp	(1ULL << 16)
+#define	BIC_CoreCnt	(1ULL << 17)
+#define	BIC_PkgTmp	(1ULL << 18)
+#define	BIC_GFX_rc6	(1ULL << 19)
+#define	BIC_GFXMHz	(1ULL << 20)
+#define	BIC_Pkgpc2	(1ULL << 21)
+#define	BIC_Pkgpc3	(1ULL << 22)
+#define	BIC_Pkgpc6	(1ULL << 23)
+#define	BIC_Pkgpc7	(1ULL << 24)
+#define	BIC_Pkgpc8	(1ULL << 25)
+#define	BIC_Pkgpc9	(1ULL << 26)
+#define	BIC_Pkgpc10	(1ULL << 27)
+#define BIC_CPU_LPI	(1ULL << 28)
+#define BIC_SYS_LPI	(1ULL << 29)
+#define	BIC_PkgWatt	(1ULL << 30)
+#define	BIC_CorWatt	(1ULL << 31)
+#define	BIC_GFXWatt	(1ULL << 32)
+#define	BIC_PkgCnt	(1ULL << 33)
+#define	BIC_RAMWatt	(1ULL << 34)
+#define	BIC_PKG__	(1ULL << 35)
+#define	BIC_RAM__	(1ULL << 36)
+#define	BIC_Pkg_J	(1ULL << 37)
+#define	BIC_Cor_J	(1ULL << 38)
+#define	BIC_GFX_J	(1ULL << 39)
+#define	BIC_RAM_J	(1ULL << 40)
+#define	BIC_Mod_c6	(1ULL << 41)
+#define	BIC_Totl_c0	(1ULL << 42)
+#define	BIC_Any_c0	(1ULL << 43)
+#define	BIC_GFX_c0	(1ULL << 44)
+#define	BIC_CPUGFX	(1ULL << 45)
+#define	BIC_Core	(1ULL << 46)
+#define	BIC_CPU		(1ULL << 47)
+#define	BIC_APIC	(1ULL << 48)
+#define	BIC_X2APIC	(1ULL << 49)
+
+#define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
+
+unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
+unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
 
 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
+#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
 
+
 #define MAX_DEFERRED 16
 char *deferred_skip_names[MAX_DEFERRED];
 int deferred_skip_index;
@@ -464,16 +524,34 @@ void help(void)
 	"when COMMAND completes.\n"
 	"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
 	"to print statistics, until interrupted.\n"
-	"--add		add a counter\n"
-	"		eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
-	"--cpu	cpu-set	limit output to summary plus cpu-set:\n"
-	"		{core | package | j,k,l..m,n-p }\n"
-	"--quiet	skip decoding system configuration header\n"
-	"--interval sec	Override default 5-second measurement interval\n"
-	"--help		print this help message\n"
-	"--list		list column headers only\n"
-	"--out file	create or truncate \"file\" for all output\n"
-	"--version	print version information\n"
+	"  -a, --add	add a counter\n"
+	"		  eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
+	"  -c, --cpu	cpu-set	limit output to summary plus cpu-set:\n"
+	"		  {core | package | j,k,l..m,n-p }\n"
+	"  -d, --debug	displays usec, Time_Of_Day_Seconds and more debugging\n"
+	"  -D, --Dump	displays the raw counter values\n"
+	"  -e, --enable	[all | column]\n"
+	"		shows all or the specified disabled column\n"
+	"  -H, --hide [column|column,column,...]\n"
+	"		hide the specified column(s)\n"
+	"  -i, --interval sec.subsec\n"
+	"		Override default 5-second measurement interval\n"
+	"  -J, --Joules	displays energy in Joules instead of Watts\n"
+	"  -l, --list	list column headers only\n"
+	"  -n, --num_iterations num\n"
+	"		number of the measurement iterations\n"
+	"  -o, --out file\n"
+	"		create or truncate \"file\" for all output\n"
+	"  -q, --quiet	skip decoding system configuration header\n"
+	"  -s, --show [column|column,column,...]\n"
+	"		show only the specified column(s)\n"
+	"  -S, --Summary\n"
+	"		limits output to 1-line system summary per interval\n"
+	"  -T, --TCC temperature\n"
+	"		sets the Thermal Control Circuit temperature in\n"
+	"		  degrees Celsius\n"
+	"  -h, --help	print this help message\n"
+	"  -v, --version	print version information\n"
 	"\n"
 	"For more help, run \"man turbostat\"\n");
 }
@@ -496,6 +574,9 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
 		if (comma)
 			*comma = '\0';
 
+		if (!strcmp(name_list, "all"))
+			return ~0;
+
 		for (i = 0; i < MAX_BIC; ++i) {
 			if (!strcmp(name_list, bic[i].name)) {
 				retval |= (1ULL << i);
@@ -532,14 +613,22 @@ void print_header(char *delim)
 	struct msr_counter *mp;
 	int printed = 0;
 
-	if (debug)
-		outp += sprintf(outp, "usec %s", delim);
+	if (DO_BIC(BIC_USEC))
+		outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
+	if (DO_BIC(BIC_TOD))
+		outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Package))
 		outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
+	if (DO_BIC(BIC_Node))
+		outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Core))
 		outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_CPU))
 		outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
+	if (DO_BIC(BIC_APIC))
+		outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
+	if (DO_BIC(BIC_X2APIC))
+		outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Avg_MHz))
 		outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Busy))
@@ -576,7 +665,7 @@ void print_header(char *delim)
 
 	if (DO_BIC(BIC_CPU_c1))
 		outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
-	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
+	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
 		outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_CPU_c6))
 		outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
@@ -635,6 +724,10 @@ void print_header(char *delim)
 		outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Pkgpc10))
 		outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
+	if (DO_BIC(BIC_CPU_LPI))
+		outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
+	if (DO_BIC(BIC_SYS_LPI))
+		outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
 
 	if (do_rapl && !rapl_joules) {
 		if (DO_BIC(BIC_PkgWatt))
@@ -739,6 +832,9 @@ int dump_counters(struct thread_data *t, struct core_data *c,
 		outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
 		outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
 		outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
+		outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
+		outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
+		outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
 		outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
 		outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
 		outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
@@ -786,7 +882,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		(cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
 		return 0;
 
-	if (debug) {
+	if (DO_BIC(BIC_USEC)) {
 		/* on each row, print how many usec each timestamp took to gather */
 		struct timeval tv;
 
@@ -794,6 +890,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
 	}
 
+	/* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
+	if (DO_BIC(BIC_TOD))
+		outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
+
 	interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
 
 	tsc = t->tsc * tsc_tweak;
@@ -802,10 +902,16 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (t == &average.threads) {
 		if (DO_BIC(BIC_Package))
 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+		if (DO_BIC(BIC_Node))
+			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_Core))
 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_CPU))
 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+		if (DO_BIC(BIC_APIC))
+			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+		if (DO_BIC(BIC_X2APIC))
+			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 	} else {
 		if (DO_BIC(BIC_Package)) {
 			if (p)
@@ -813,6 +919,15 @@ int format_counters(struct thread_data *t, struct core_data *c,
 			else
 				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 		}
+		if (DO_BIC(BIC_Node)) {
+			if (t)
+				outp += sprintf(outp, "%s%d",
+						(printed++ ? delim : ""),
+					      cpus[t->cpu_id].physical_node_id);
+			else
+				outp += sprintf(outp, "%s-",
+						(printed++ ? delim : ""));
+		}
 		if (DO_BIC(BIC_Core)) {
 			if (c)
 				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
@@ -821,6 +936,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		}
 		if (DO_BIC(BIC_CPU))
 			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
+		if (DO_BIC(BIC_APIC))
+			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
+		if (DO_BIC(BIC_X2APIC))
+			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
 	}
 
 	if (DO_BIC(BIC_Avg_MHz))
@@ -882,7 +1001,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 		goto done;
 
-	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
+	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
 	if (DO_BIC(BIC_CPU_c6))
 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
@@ -959,6 +1078,11 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (DO_BIC(BIC_Pkgpc10))
 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
 
+	if (DO_BIC(BIC_CPU_LPI))
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
+	if (DO_BIC(BIC_SYS_LPI))
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
+
 	/*
  	 * If measurement interval exceeds minimum RAPL Joule Counter range,
  	 * indicate that results are suspect by printing "**" in fraction place.
@@ -1006,7 +1130,8 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	}
 
 done:
-	outp += sprintf(outp, "\n");
+	if (*(outp - 1) != '\n')
+		outp += sprintf(outp, "\n");
 
 	return 0;
 }
@@ -1038,9 +1163,7 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_
 	if (!printed || !summary_only)
 		print_header("\t");
 
-	if (topo.num_cpus > 1)
-		format_counters(&average.threads, &average.cores,
-			&average.packages);
+	format_counters(&average.threads, &average.cores, &average.packages);
 
 	printed = 1;
 
@@ -1083,6 +1206,8 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
 	old->pc8 = new->pc8 - old->pc8;
 	old->pc9 = new->pc9 - old->pc9;
 	old->pc10 = new->pc10 - old->pc10;
+	old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
+	old->sys_lpi = new->sys_lpi - old->sys_lpi;
 	old->pkg_temp_c = new->pkg_temp_c;
 
 	/* flag an error when rc6 counter resets/wraps */
@@ -1140,6 +1265,21 @@ delta_thread(struct thread_data *new, struct thread_data *old,
 	int i;
 	struct msr_counter *mp;
 
+	/* we run cpuid just the 1st time, copy the results */
+	if (DO_BIC(BIC_APIC))
+		new->apic_id = old->apic_id;
+	if (DO_BIC(BIC_X2APIC))
+		new->x2apic_id = old->x2apic_id;
+
+	/*
+	 * the timestamps from start of measurement interval are in "old"
+	 * the timestamp from end of measurement interval are in "new"
+	 * over-write old w/ new so we can print end of interval values
+	 */
+
+	old->tv_begin = new->tv_begin;
+	old->tv_end = new->tv_end;
+
 	old->tsc = new->tsc - old->tsc;
 
 	/* check for TSC < 1 Mcycles over interval */
@@ -1228,6 +1368,11 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 	int i;
 	struct msr_counter  *mp;
 
+	t->tv_begin.tv_sec = 0;
+	t->tv_begin.tv_usec = 0;
+	t->tv_end.tv_sec = 0;
+	t->tv_end.tv_usec = 0;
+
 	t->tsc = 0;
 	t->aperf = 0;
 	t->mperf = 0;
@@ -1260,6 +1405,8 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 	p->pc8 = 0;
 	p->pc9 = 0;
 	p->pc10 = 0;
+	p->cpu_lpi = 0;
+	p->sys_lpi = 0;
 
 	p->energy_pkg = 0;
 	p->energy_dram = 0;
@@ -1286,6 +1433,19 @@ int sum_counters(struct thread_data *t, struct core_data *c,
 	int i;
 	struct msr_counter *mp;
 
+	/* copy un-changing apic_id's */
+	if (DO_BIC(BIC_APIC))
+		average.threads.apic_id = t->apic_id;
+	if (DO_BIC(BIC_X2APIC))
+		average.threads.x2apic_id = t->x2apic_id;
+
+	/* remember first tv_begin */
+	if (average.threads.tv_begin.tv_sec == 0)
+		average.threads.tv_begin = t->tv_begin;
+
+	/* remember last tv_end */
+	average.threads.tv_end = t->tv_end;
+
 	average.threads.tsc += t->tsc;
 	average.threads.aperf += t->aperf;
 	average.threads.mperf += t->mperf;
@@ -1341,6 +1501,9 @@ int sum_counters(struct thread_data *t, struct core_data *c,
 	average.packages.pc9 += p->pc9;
 	average.packages.pc10 += p->pc10;
 
+	average.packages.cpu_lpi = p->cpu_lpi;
+	average.packages.sys_lpi = p->sys_lpi;
+
 	average.packages.energy_pkg += p->energy_pkg;
 	average.packages.energy_dram += p->energy_dram;
 	average.packages.energy_cores += p->energy_cores;
@@ -1487,7 +1650,7 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
 		if (get_msr(cpu, mp->msr_num, counterp))
 			return -1;
 	} else {
-		char path[128];
+		char path[128 + PATH_BYTES];
 
 		if (mp->flags & SYSFS_PERCPU) {
 			sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
@@ -1502,6 +1665,34 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
 	return 0;
 }
 
+void get_apic_id(struct thread_data *t)
+{
+	unsigned int eax, ebx, ecx, edx, max_level;
+
+	eax = ebx = ecx = edx = 0;
+
+	if (!genuine_intel)
+		return;
+
+	__cpuid(0, max_level, ebx, ecx, edx);
+
+	__cpuid(1, eax, ebx, ecx, edx);
+	t->apic_id = (ebx >> 24) & 0xf;
+
+	if (max_level < 0xb)
+		return;
+
+	if (!DO_BIC(BIC_X2APIC))
+		return;
+
+	ecx = 0;
+	__cpuid(0xb, eax, ebx, ecx, edx);
+	t->x2apic_id = edx;
+
+	if (debug && (t->apic_id != t->x2apic_id))
+		fprintf(outf, "cpu%d: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
+}
+
 /*
  * get_counters(...)
  * migrate to cpu
@@ -1515,7 +1706,6 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	struct msr_counter *mp;
 	int i;
 
-
 	gettimeofday(&t->tv_begin, (struct timezone *)NULL);
 
 	if (cpu_migrate(cpu)) {
@@ -1523,6 +1713,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		return -1;
 	}
 
+	if (first_counter_read)
+		get_apic_id(t);
 retry:
 	t->tsc = rdtsc();	/* we are running on local CPU of interest */
 
@@ -1603,7 +1795,7 @@ retry:
 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 		goto done;
 
-	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) {
+	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) {
 		if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
 			return -6;
 	}
@@ -1684,6 +1876,11 @@ retry:
 		if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
 			return -13;
 
+	if (DO_BIC(BIC_CPU_LPI))
+		p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
+	if (DO_BIC(BIC_SYS_LPI))
+		p->sys_lpi = cpuidle_cur_sys_lpi_us;
+
 	if (do_rapl & RAPL_PKG) {
 		if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
 			return -13;
@@ -1769,7 +1966,7 @@ int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV,
 int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
-int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 
 
 static void
@@ -1885,7 +2082,7 @@ int has_turbo_ratio_group_limits(int family, int model)
 	switch (model) {
 	case INTEL_FAM6_ATOM_GOLDMONT:
 	case INTEL_FAM6_SKYLAKE_X:
-	case INTEL_FAM6_ATOM_DENVERTON:
+	case INTEL_FAM6_ATOM_GOLDMONT_X:
 		return 1;
 	}
 	return 0;
@@ -2071,12 +2268,9 @@ dump_nhm_cst_cfg(void)
 
 	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
 
-#define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
-#define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
-
 	fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
 
-	fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
+	fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
 		(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
 		(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
 		(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
@@ -2084,6 +2278,15 @@ dump_nhm_cst_cfg(void)
 		(msr & (1 << 15)) ? "" : "UN",
 		(unsigned int)msr & 0xF,
 		pkg_cstate_limit_strings[pkg_cstate_limit]);
+
+#define AUTOMATIC_CSTATE_CONVERSION		(1UL << 16)
+	if (has_automatic_cstate_conversion) {
+		fprintf(outf, ", automatic c-state conversion=%s",
+			(msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
+	}
+
+	fprintf(outf, ")\n");
+
 	return;
 }
 
@@ -2184,6 +2387,8 @@ void free_fd_percpu(void)
 
 void free_all_buffers(void)
 {
+	int i;
+
 	CPU_FREE(cpu_present_set);
 	cpu_present_set = NULL;
 	cpu_present_setsize = 0;
@@ -2216,6 +2421,12 @@ void free_all_buffers(void)
 
 	free(irq_column_2_cpu);
 	free(irqs_per_cpu);
+
+	for (i = 0; i <= topo.max_cpu_num; ++i) {
+		if (cpus[i].put_ids)
+			CPU_FREE(cpus[i].put_ids);
+	}
+	free(cpus);
 }
 
 
@@ -2240,44 +2451,6 @@ int parse_int_file(const char *fmt, ...)
 }
 
 /*
- * get_cpu_position_in_core(cpu)
- * return the position of the CPU among its HT siblings in the core
- * return -1 if the sibling is not in list
- */
-int get_cpu_position_in_core(int cpu)
-{
-	char path[64];
-	FILE *filep;
-	int this_cpu;
-	char character;
-	int i;
-
-	sprintf(path,
-		"/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
-		cpu);
-	filep = fopen(path, "r");
-	if (filep == NULL) {
-		perror(path);
-		exit(1);
-	}
-
-	for (i = 0; i < topo.num_threads_per_core; i++) {
-		fscanf(filep, "%d", &this_cpu);
-		if (this_cpu == cpu) {
-			fclose(filep);
-			return i;
-		}
-
-		/* Account for no separator after last thread*/
-		if (i != (topo.num_threads_per_core - 1))
-			fscanf(filep, "%c", &character);
-	}
-
-	fclose(filep);
-	return -1;
-}
-
-/*
  * cpu_is_first_core_in_package(cpu)
  * return 1 if given CPU is 1st core in package
  */
@@ -2296,35 +2469,109 @@ int get_core_id(int cpu)
 	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
 }
 
-int get_num_ht_siblings(int cpu)
+void set_node_data(void)
+{
+	int pkg, node, lnode, cpu, cpux;
+	int cpu_count;
+
+	/* initialize logical_node_id */
+	for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
+		cpus[cpu].logical_node_id = -1;
+
+	cpu_count = 0;
+	for (pkg = 0; pkg < topo.num_packages; pkg++) {
+		lnode = 0;
+		for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
+			if (cpus[cpu].physical_package_id != pkg)
+				continue;
+			/* find a cpu with an unset logical_node_id */
+			if (cpus[cpu].logical_node_id != -1)
+				continue;
+			cpus[cpu].logical_node_id = lnode;
+			node = cpus[cpu].physical_node_id;
+			cpu_count++;
+			/*
+			 * find all matching cpus on this pkg and set
+			 * the logical_node_id
+			 */
+			for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
+				if ((cpus[cpux].physical_package_id == pkg) &&
+				   (cpus[cpux].physical_node_id == node)) {
+					cpus[cpux].logical_node_id = lnode;
+					cpu_count++;
+				}
+			}
+			lnode++;
+			if (lnode > topo.nodes_per_pkg)
+				topo.nodes_per_pkg = lnode;
+		}
+		if (cpu_count >= topo.max_cpu_num)
+			break;
+	}
+}
+
+int get_physical_node_id(struct cpu_topology *thiscpu)
 {
 	char path[80];
 	FILE *filep;
-	int sib1;
-	int matches = 0;
-	char character;
-	char str[100];
-	char *ch;
-
-	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
-	filep = fopen_or_die(path, "r");
+	int i;
+	int cpu = thiscpu->logical_cpu_id;
 
-	/*
-	 * file format:
-	 * A ',' separated or '-' separated set of numbers
-	 * (eg 1-2 or 1,3,4,5)
-	 */
-	fscanf(filep, "%d%c\n", &sib1, &character);
-	fseek(filep, 0, SEEK_SET);
-	fgets(str, 100, filep);
-	ch = strchr(str, character);
-	while (ch != NULL) {
-		matches++;
-		ch = strchr(ch+1, character);
+	for (i = 0; i <= topo.max_cpu_num; i++) {
+		sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist",
+			cpu, i);
+		filep = fopen(path, "r");
+		if (!filep)
+			continue;
+		fclose(filep);
+		return i;
 	}
+	return -1;
+}
+
+int get_thread_siblings(struct cpu_topology *thiscpu)
+{
+	char path[80], character;
+	FILE *filep;
+	unsigned long map;
+	int so, shift, sib_core;
+	int cpu = thiscpu->logical_cpu_id;
+	int offset = topo.max_cpu_num + 1;
+	size_t size;
+	int thread_id = 0;
+
+	thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
+	if (thiscpu->thread_id < 0)
+		thiscpu->thread_id = thread_id++;
+	if (!thiscpu->put_ids)
+		return -1;
 
+	size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+	CPU_ZERO_S(size, thiscpu->put_ids);
+
+	sprintf(path,
+		"/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
+	filep = fopen_or_die(path, "r");
+	do {
+		offset -= BITMASK_SIZE;
+		fscanf(filep, "%lx%c", &map, &character);
+		for (shift = 0; shift < BITMASK_SIZE; shift++) {
+			if ((map >> shift) & 0x1) {
+				so = shift + offset;
+				sib_core = get_core_id(so);
+				if (sib_core == thiscpu->physical_core_id) {
+					CPU_SET_S(so, size, thiscpu->put_ids);
+					if ((so != cpu) &&
+					    (cpus[so].thread_id < 0))
+						cpus[so].thread_id =
+								    thread_id++;
+				}
+			}
+		}
+	} while (!strncmp(&character, ",", 1));
 	fclose(filep);
-	return matches+1;
+
+	return CPU_COUNT_S(size, thiscpu->put_ids);
 }
 
 /*
@@ -2339,32 +2586,42 @@ int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
 	struct thread_data *thread_base2, struct core_data *core_base2,
 	struct pkg_data *pkg_base2)
 {
-	int retval, pkg_no, core_no, thread_no;
+	int retval, pkg_no, node_no, core_no, thread_no;
 
 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
-		for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
-			for (thread_no = 0; thread_no <
-				topo.num_threads_per_core; ++thread_no) {
-				struct thread_data *t, *t2;
-				struct core_data *c, *c2;
-				struct pkg_data *p, *p2;
-
-				t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
-
-				if (cpu_is_not_present(t->cpu_id))
-					continue;
-
-				t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no);
-
-				c = GET_CORE(core_base, core_no, pkg_no);
-				c2 = GET_CORE(core_base2, core_no, pkg_no);
-
-				p = GET_PKG(pkg_base, pkg_no);
-				p2 = GET_PKG(pkg_base2, pkg_no);
-
-				retval = func(t, c, p, t2, c2, p2);
-				if (retval)
-					return retval;
+		for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
+			for (core_no = 0; core_no < topo.cores_per_node;
+			     ++core_no) {
+				for (thread_no = 0; thread_no <
+					topo.threads_per_core; ++thread_no) {
+					struct thread_data *t, *t2;
+					struct core_data *c, *c2;
+					struct pkg_data *p, *p2;
+
+					t = GET_THREAD(thread_base, thread_no,
+						       core_no, node_no,
+						       pkg_no);
+
+					if (cpu_is_not_present(t->cpu_id))
+						continue;
+
+					t2 = GET_THREAD(thread_base2, thread_no,
+							core_no, node_no,
+							pkg_no);
+
+					c = GET_CORE(core_base, core_no,
+						     node_no, pkg_no);
+					c2 = GET_CORE(core_base2, core_no,
+						      node_no,
+						      pkg_no);
+
+					p = GET_PKG(pkg_base, pkg_no);
+					p2 = GET_PKG(pkg_base2, pkg_no);
+
+					retval = func(t, c, p, t2, c2, p2);
+					if (retval)
+						return retval;
+				}
 			}
 		}
 	}
@@ -2409,6 +2666,20 @@ void re_initialize(void)
 	printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
 }
 
+void set_max_cpu_num(void)
+{
+	FILE *filep;
+	unsigned long dummy;
+
+	topo.max_cpu_num = 0;
+	filep = fopen_or_die(
+			"/sys/devices/system/cpu/cpu0/topology/thread_siblings",
+			"r");
+	while (fscanf(filep, "%lx,", &dummy) == 1)
+		topo.max_cpu_num += BITMASK_SIZE;
+	fclose(filep);
+	topo.max_cpu_num--; /* 0 based */
+}
 
 /*
  * count_cpus()
@@ -2416,10 +2687,7 @@ void re_initialize(void)
  */
 int count_cpus(int cpu)
 {
-	if (topo.max_cpu_num < cpu)
-		topo.max_cpu_num = cpu;
-
-	topo.num_cpus += 1;
+	topo.num_cpus++;
 	return 0;
 }
 int mark_cpu_present(int cpu)
@@ -2428,6 +2696,12 @@ int mark_cpu_present(int cpu)
 	return 0;
 }
 
+int init_thread_id(int cpu)
+{
+	cpus[cpu].thread_id = -1;
+	return 0;
+}
+
 /*
  * snapshot_proc_interrupts()
  *
@@ -2542,6 +2816,52 @@ int snapshot_gfx_mhz(void)
 }
 
 /*
+ * snapshot_cpu_lpi()
+ *
+ * record snapshot of
+ * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
+ *
+ * return 1 if config change requires a restart, else return 0
+ */
+int snapshot_cpu_lpi_us(void)
+{
+	FILE *fp;
+	int retval;
+
+	fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
+
+	retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
+	if (retval != 1)
+		err(1, "CPU LPI");
+
+	fclose(fp);
+
+	return 0;
+}
+/*
+ * snapshot_sys_lpi()
+ *
+ * record snapshot of
+ * /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
+ *
+ * return 1 if config change requires a restart, else return 0
+ */
+int snapshot_sys_lpi_us(void)
+{
+	FILE *fp;
+	int retval;
+
+	fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r");
+
+	retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
+	if (retval != 1)
+		err(1, "SYS LPI");
+
+	fclose(fp);
+
+	return 0;
+}
+/*
  * snapshot /proc and /sys files
  *
  * return 1 if configuration restart needed, else return 0
@@ -2558,19 +2878,91 @@ int snapshot_proc_sysfs_files(void)
 	if (DO_BIC(BIC_GFXMHz))
 		snapshot_gfx_mhz();
 
+	if (DO_BIC(BIC_CPU_LPI))
+		snapshot_cpu_lpi_us();
+
+	if (DO_BIC(BIC_SYS_LPI))
+		snapshot_sys_lpi_us();
+
 	return 0;
 }
 
+int exit_requested;
+
+static void signal_handler (int signal)
+{
+	switch (signal) {
+	case SIGINT:
+		exit_requested = 1;
+		if (debug)
+			fprintf(stderr, " SIGINT\n");
+		break;
+	case SIGUSR1:
+		if (debug > 1)
+			fprintf(stderr, "SIGUSR1\n");
+		break;
+	}
+	/* make sure this manually-invoked interval is at least 1ms long */
+	nanosleep(&one_msec, NULL);
+}
+
+void setup_signal_handler(void)
+{
+	struct sigaction sa;
+
+	memset(&sa, 0, sizeof(sa));
+
+	sa.sa_handler = &signal_handler;
+
+	if (sigaction(SIGINT, &sa, NULL) < 0)
+		err(1, "sigaction SIGINT");
+	if (sigaction(SIGUSR1, &sa, NULL) < 0)
+		err(1, "sigaction SIGUSR1");
+}
+
+void do_sleep(void)
+{
+	struct timeval select_timeout;
+	fd_set readfds;
+	int retval;
+
+	FD_ZERO(&readfds);
+	FD_SET(0, &readfds);
+
+	if (!isatty(fileno(stdin))) {
+		nanosleep(&interval_ts, NULL);
+		return;
+	}
+
+	select_timeout = interval_tv;
+	retval = select(1, &readfds, NULL, NULL, &select_timeout);
+
+	if (retval == 1) {
+		switch (getc(stdin)) {
+		case 'q':
+			exit_requested = 1;
+			break;
+		}
+		/* make sure this manually-invoked interval is at least 1ms long */
+		nanosleep(&one_msec, NULL);
+	}
+}
+
+
 void turbostat_loop()
 {
 	int retval;
 	int restarted = 0;
+	int done_iters = 0;
+
+	setup_signal_handler();
 
 restart:
 	restarted++;
 
 	snapshot_proc_sysfs_files();
 	retval = for_all_cpus(get_counters, EVEN_COUNTERS);
+	first_counter_read = 0;
 	if (retval < -1) {
 		exit(retval);
 	} else if (retval == -1) {
@@ -2581,6 +2973,7 @@ restart:
 		goto restart;
 	}
 	restarted = 0;
+	done_iters = 0;
 	gettimeofday(&tv_even, (struct timezone *)NULL);
 
 	while (1) {
@@ -2588,7 +2981,7 @@ restart:
 			re_initialize();
 			goto restart;
 		}
-		nanosleep(&interval_ts, NULL);
+		do_sleep();
 		if (snapshot_proc_sysfs_files())
 			goto restart;
 		retval = for_all_cpus(get_counters, ODD_COUNTERS);
@@ -2607,7 +3000,11 @@ restart:
 		compute_average(EVEN_COUNTERS);
 		format_all_counters(EVEN_COUNTERS);
 		flush_output_stdout();
-		nanosleep(&interval_ts, NULL);
+		if (exit_requested)
+			break;
+		if (num_iterations && ++done_iters >= num_iterations)
+			break;
+		do_sleep();
 		if (snapshot_proc_sysfs_files())
 			goto restart;
 		retval = for_all_cpus(get_counters, EVEN_COUNTERS);
@@ -2626,6 +3023,10 @@ restart:
 		compute_average(ODD_COUNTERS);
 		format_all_counters(ODD_COUNTERS);
 		flush_output_stdout();
+		if (exit_requested)
+			break;
+		if (num_iterations && ++done_iters >= num_iterations)
+			break;
 	}
 }
 
@@ -2740,6 +3141,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
+	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
 		pkg_cstate_limits = hsw_pkg_cstate_limits;
 		has_misc_feature_control = 1;
 		break;
@@ -2747,9 +3149,9 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 		pkg_cstate_limits = skx_pkg_cstate_limits;
 		has_misc_feature_control = 1;
 		break;
-	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
+	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
 		no_MSR_MISC_PWR_MGMT = 1;
-	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
+	case INTEL_FAM6_ATOM_SILVERMONT_X:	/* AVN */
 		pkg_cstate_limits = slv_pkg_cstate_limits;
 		break;
 	case INTEL_FAM6_ATOM_AIRMONT:	/* AMT */
@@ -2761,8 +3163,8 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 		pkg_cstate_limits = phi_pkg_cstate_limits;
 		break;
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
-	case INTEL_FAM6_ATOM_GEMINI_LAKE:
-	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
+	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+	case INTEL_FAM6_ATOM_GOLDMONT_X:	/* DNV */
 		pkg_cstate_limits = bxt_pkg_cstate_limits;
 		break;
 	default:
@@ -2791,9 +3193,9 @@ int has_slv_msrs(unsigned int family, unsigned int model)
 		return 0;
 
 	switch (model) {
-	case INTEL_FAM6_ATOM_SILVERMONT1:
-	case INTEL_FAM6_ATOM_MERRIFIELD:
-	case INTEL_FAM6_ATOM_MOOREFIELD:
+	case INTEL_FAM6_ATOM_SILVERMONT:
+	case INTEL_FAM6_ATOM_SILVERMONT_MID:
+	case INTEL_FAM6_ATOM_AIRMONT_MID:
 		return 1;
 	}
 	return 0;
@@ -2805,7 +3207,7 @@ int is_dnv(unsigned int family, unsigned int model)
 		return 0;
 
 	switch (model) {
-	case INTEL_FAM6_ATOM_DENVERTON:
+	case INTEL_FAM6_ATOM_GOLDMONT_X:
 		return 1;
 	}
 	return 0;
@@ -2945,6 +3347,7 @@ int has_config_tdp(unsigned int family, unsigned int model)
 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
+	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
 
 	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
@@ -3321,8 +3724,8 @@ double get_tdp(unsigned int model)
 			return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
 
 	switch (model) {
-	case INTEL_FAM6_ATOM_SILVERMONT1:
-	case INTEL_FAM6_ATOM_SILVERMONT2:
+	case INTEL_FAM6_ATOM_SILVERMONT:
+	case INTEL_FAM6_ATOM_SILVERMONT_X:
 		return 30.0;
 	default:
 		return 135.0;
@@ -3388,7 +3791,7 @@ void rapl_probe(unsigned int family, unsigned int model)
 		}
 		break;
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
-	case INTEL_FAM6_ATOM_GEMINI_LAKE:
+	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
 		do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
 		if (rapl_joules)
 			BIC_PRESENT(BIC_Pkg_J);
@@ -3399,6 +3802,7 @@ void rapl_probe(unsigned int family, unsigned int model)
 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
+	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
 		BIC_PRESENT(BIC_PKG__);
 		BIC_PRESENT(BIC_RAM__);
@@ -3446,8 +3850,8 @@ void rapl_probe(unsigned int family, unsigned int model)
 			BIC_PRESENT(BIC_RAMWatt);
 		}
 		break;
-	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
-	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
+	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
+	case INTEL_FAM6_ATOM_SILVERMONT_X:	/* AVN */
 		do_rapl = RAPL_PKG | RAPL_CORES;
 		if (rapl_joules) {
 			BIC_PRESENT(BIC_Pkg_J);
@@ -3457,7 +3861,7 @@ void rapl_probe(unsigned int family, unsigned int model)
 			BIC_PRESENT(BIC_CorWatt);
 		}
 		break;
-	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
+	case INTEL_FAM6_ATOM_GOLDMONT_X:	/* DNV */
 		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
 		BIC_PRESENT(BIC_PKG__);
 		BIC_PRESENT(BIC_RAM__);
@@ -3480,7 +3884,7 @@ void rapl_probe(unsigned int family, unsigned int model)
 		return;
 
 	rapl_power_units = 1.0 / (1 << (msr & 0xF));
-	if (model == INTEL_FAM6_ATOM_SILVERMONT1)
+	if (model == INTEL_FAM6_ATOM_SILVERMONT)
 		rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
 	else
 		rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
@@ -3523,6 +3927,12 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model)
 	}
 }
 
+void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
+{
+	if (is_skx(family, model) || is_bdx(family, model))
+		has_automatic_cstate_conversion = 1;
+}
+
 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 {
 	unsigned long long msr;
@@ -3728,10 +4138,11 @@ int has_snb_msrs(unsigned int family, unsigned int model)
 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
+	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
-	case INTEL_FAM6_ATOM_GEMINI_LAKE:
-	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
+	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+	case INTEL_FAM6_ATOM_GOLDMONT_X:	/* DNV */
 		return 1;
 	}
 	return 0;
@@ -3761,8 +4172,9 @@ int has_hsw_msrs(unsigned int family, unsigned int model)
 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
+	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
-	case INTEL_FAM6_ATOM_GEMINI_LAKE:
+	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
 		return 1;
 	}
 	return 0;
@@ -3786,6 +4198,7 @@ int has_skl_msrs(unsigned int family, unsigned int model)
 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
+	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
 		return 1;
 	}
 	return 0;
@@ -3796,8 +4209,8 @@ int is_slm(unsigned int family, unsigned int model)
 	if (!genuine_intel)
 		return 0;
 	switch (model) {
-	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
-	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
+	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
+	case INTEL_FAM6_ATOM_SILVERMONT_X:	/* AVN */
 		return 1;
 	}
 	return 0;
@@ -3815,6 +4228,19 @@ int is_knl(unsigned int family, unsigned int model)
 	return 0;
 }
 
+int is_cnl(unsigned int family, unsigned int model)
+{
+	if (!genuine_intel)
+		return 0;
+
+	switch (model) {
+	case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
+		return 1;
+	}
+
+	return 0;
+}
+
 unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
 {
 	if (is_knl(family, model))
@@ -3947,7 +4373,7 @@ void decode_misc_enable_msr(void)
 			base_cpu, msr,
 			msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
 			msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
-			msr & MSR_IA32_MISC_ENABLE_MWAIT ? "No-" : "",
+			msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
 			msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
 			msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
 }
@@ -4031,13 +4457,15 @@ void process_cpuid()
 	family = (fms >> 8) & 0xf;
 	model = (fms >> 4) & 0xf;
 	stepping = fms & 0xf;
-	if (family == 6 || family == 0xf)
+	if (family == 0xf)
+		family += (fms >> 20) & 0xff;
+	if (family >= 6)
 		model += ((fms >> 16) & 0xf) << 4;
 
 	if (!quiet) {
 		fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
 			max_level, family, model, stepping, family, model, stepping);
-		fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
+		fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
 			ecx & (1 << 0) ? "SSE3" : "-",
 			ecx & (1 << 3) ? "MONITOR" : "-",
 			ecx & (1 << 6) ? "SMX" : "-",
@@ -4046,6 +4474,7 @@ void process_cpuid()
 			edx & (1 << 4) ? "TSC" : "-",
 			edx & (1 << 5) ? "MSR" : "-",
 			edx & (1 << 22) ? "ACPI-TM" : "-",
+			edx & (1 << 28) ? "HT" : "-",
 			edx & (1 << 29) ? "TM" : "-");
 	}
 
@@ -4152,12 +4581,11 @@ void process_cpuid()
 				case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
 					crystal_hz = 24000000;	/* 24.0 MHz */
 					break;
-				case INTEL_FAM6_SKYLAKE_X:	/* SKX */
-				case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
+				case INTEL_FAM6_ATOM_GOLDMONT_X:	/* DNV */
 					crystal_hz = 25000000;	/* 25.0 MHz */
 					break;
 				case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
-				case INTEL_FAM6_ATOM_GEMINI_LAKE:
+				case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
 					crystal_hz = 19200000;	/* 19.2 MHz */
 					break;
 				default:
@@ -4253,6 +4681,7 @@ void process_cpuid()
 	}
 	do_slm_cstates = is_slm(family, model);
 	do_knl_cstates  = is_knl(family, model);
+	do_cnl_cstates = is_cnl(family, model);
 
 	if (!quiet)
 		decode_misc_pwr_mgmt_msr();
@@ -4262,6 +4691,7 @@ void process_cpuid()
 
 	rapl_probe(family, model);
 	perf_limit_reasons_probe(family, model);
+	automatic_cstate_conversion_probe(family, model);
 
 	if (!quiet)
 		dump_cstate_pstate_config_info(family, model);
@@ -4280,13 +4710,22 @@ void process_cpuid()
 	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
 		BIC_PRESENT(BIC_GFXMHz);
 
+	if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
+		BIC_PRESENT(BIC_CPU_LPI);
+	else
+		BIC_NOT_PRESENT(BIC_CPU_LPI);
+
+	if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", R_OK))
+		BIC_PRESENT(BIC_SYS_LPI);
+	else
+		BIC_NOT_PRESENT(BIC_SYS_LPI);
+
 	if (!quiet)
 		decode_misc_feature_control();
 
 	return;
 }
 
-
 /*
  * in /dev/cpu/ return success for names that are numbers
  * ie. filter out ".", "..", "microcode".
@@ -4310,14 +4749,10 @@ void topology_probe()
 	int max_core_id = 0;
 	int max_package_id = 0;
 	int max_siblings = 0;
-	struct cpu_topology {
-		int core_id;
-		int physical_package_id;
-	} *cpus;
 
 	/* Initialize num_cpus, max_cpu_num */
+	set_max_cpu_num();
 	topo.num_cpus = 0;
-	topo.max_cpu_num = 0;
 	for_all_proc_cpus(count_cpus);
 	if (!summary_only && topo.num_cpus > 1)
 		BIC_PRESENT(BIC_CPU);
@@ -4357,6 +4792,7 @@ void topology_probe()
 	cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
 	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
 
+	for_all_proc_cpus(init_thread_id);
 
 	/*
 	 * For online cpus
@@ -4370,26 +4806,37 @@ void topology_probe()
 				fprintf(outf, "cpu%d NOT PRESENT\n", i);
 			continue;
 		}
-		cpus[i].core_id = get_core_id(i);
-		if (cpus[i].core_id > max_core_id)
-			max_core_id = cpus[i].core_id;
 
+		cpus[i].logical_cpu_id = i;
+
+		/* get package information */
 		cpus[i].physical_package_id = get_physical_package_id(i);
 		if (cpus[i].physical_package_id > max_package_id)
 			max_package_id = cpus[i].physical_package_id;
 
-		siblings = get_num_ht_siblings(i);
+		/* get numa node information */
+		cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
+		if (cpus[i].physical_node_id > topo.max_node_num)
+			topo.max_node_num = cpus[i].physical_node_id;
+
+		/* get core information */
+		cpus[i].physical_core_id = get_core_id(i);
+		if (cpus[i].physical_core_id > max_core_id)
+			max_core_id = cpus[i].physical_core_id;
+
+		/* get thread information */
+		siblings = get_thread_siblings(&cpus[i]);
 		if (siblings > max_siblings)
 			max_siblings = siblings;
-		if (debug > 1)
-			fprintf(outf, "cpu %d pkg %d core %d\n",
-				i, cpus[i].physical_package_id, cpus[i].core_id);
+		if (cpus[i].thread_id == 0)
+			topo.num_cores++;
 	}
-	topo.num_cores_per_pkg = max_core_id + 1;
+
+	topo.cores_per_node = max_core_id + 1;
 	if (debug > 1)
 		fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
-			max_core_id, topo.num_cores_per_pkg);
-	if (!summary_only && topo.num_cores_per_pkg > 1)
+			max_core_id, topo.cores_per_node);
+	if (!summary_only && topo.cores_per_node > 1)
 		BIC_PRESENT(BIC_Core);
 
 	topo.num_packages = max_package_id + 1;
@@ -4399,33 +4846,52 @@ void topology_probe()
 	if (!summary_only && topo.num_packages > 1)
 		BIC_PRESENT(BIC_Package);
 
-	topo.num_threads_per_core = max_siblings;
+	set_node_data();
+	if (debug > 1)
+		fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
+	if (!summary_only && topo.nodes_per_pkg > 1)
+		BIC_PRESENT(BIC_Node);
+
+	topo.threads_per_core = max_siblings;
 	if (debug > 1)
 		fprintf(outf, "max_siblings %d\n", max_siblings);
 
-	free(cpus);
+	if (debug < 1)
+		return;
+
+	for (i = 0; i <= topo.max_cpu_num; ++i) {
+		fprintf(outf,
+			"cpu %d pkg %d node %d lnode %d core %d thread %d\n",
+			i, cpus[i].physical_package_id,
+			cpus[i].physical_node_id,
+			cpus[i].logical_node_id,
+			cpus[i].physical_core_id,
+			cpus[i].thread_id);
+	}
+
 }
 
 void
-allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
+allocate_counters(struct thread_data **t, struct core_data **c,
+		  struct pkg_data **p)
 {
 	int i;
+	int num_cores = topo.cores_per_node * topo.nodes_per_pkg *
+			topo.num_packages;
+	int num_threads = topo.threads_per_core * num_cores;
 
-	*t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
-		topo.num_packages, sizeof(struct thread_data));
+	*t = calloc(num_threads, sizeof(struct thread_data));
 	if (*t == NULL)
 		goto error;
 
-	for (i = 0; i < topo.num_threads_per_core *
-		topo.num_cores_per_pkg * topo.num_packages; i++)
+	for (i = 0; i < num_threads; i++)
 		(*t)[i].cpu_id = -1;
 
-	*c = calloc(topo.num_cores_per_pkg * topo.num_packages,
-		sizeof(struct core_data));
+	*c = calloc(num_cores, sizeof(struct core_data));
 	if (*c == NULL)
 		goto error;
 
-	for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
+	for (i = 0; i < num_cores; i++)
 		(*c)[i].core_id = -1;
 
 	*p = calloc(topo.num_packages, sizeof(struct pkg_data));
@@ -4442,47 +4908,46 @@ error:
 /*
  * init_counter()
  *
- * set cpu_id, core_num, pkg_num
  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
- *
- * increment topo.num_cores when 1st core in pkg seen
  */
 void init_counter(struct thread_data *thread_base, struct core_data *core_base,
-	struct pkg_data *pkg_base, int thread_num, int core_num,
-	int pkg_num, int cpu_id)
+	struct pkg_data *pkg_base, int cpu_id)
 {
+	int pkg_id = cpus[cpu_id].physical_package_id;
+	int node_id = cpus[cpu_id].logical_node_id;
+	int core_id = cpus[cpu_id].physical_core_id;
+	int thread_id = cpus[cpu_id].thread_id;
 	struct thread_data *t;
 	struct core_data *c;
 	struct pkg_data *p;
 
-	t = GET_THREAD(thread_base, thread_num, core_num, pkg_num);
-	c = GET_CORE(core_base, core_num, pkg_num);
-	p = GET_PKG(pkg_base, pkg_num);
+
+	/* Workaround for systems where physical_node_id==-1
+	 * and logical_node_id==(-1 - topo.num_cpus)
+	 */
+	if (node_id < 0)
+		node_id = 0;
+
+	t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
+	c = GET_CORE(core_base, core_id, node_id, pkg_id);
+	p = GET_PKG(pkg_base, pkg_id);
 
 	t->cpu_id = cpu_id;
-	if (thread_num == 0) {
+	if (thread_id == 0) {
 		t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
 		if (cpu_is_first_core_in_package(cpu_id))
 			t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
 	}
 
-	c->core_id = core_num;
-	p->package_id = pkg_num;
+	c->core_id = core_id;
+	p->package_id = pkg_id;
 }
 
 
 int initialize_counters(int cpu_id)
 {
-	int my_thread_id, my_core_id, my_package_id;
-
-	my_package_id = get_physical_package_id(cpu_id);
-	my_core_id = get_core_id(cpu_id);
-	my_thread_id = get_cpu_position_in_core(cpu_id);
-	if (!my_thread_id)
-		topo.num_cores++;
-
-	init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
-	init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
+	init_counter(EVEN_COUNTERS, cpu_id);
+	init_counter(ODD_COUNTERS, cpu_id);
 	return 0;
 }
 
@@ -4567,6 +5032,7 @@ int fork_it(char **argv)
 
 	snapshot_proc_sysfs_files();
 	status = for_all_cpus(get_counters, EVEN_COUNTERS);
+	first_counter_read = 0;
 	if (status)
 		exit(status);
 	/* clear affinity side-effect of get_counters() */
@@ -4630,7 +5096,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-	fprintf(outf, "turbostat version 17.06.23"
+	fprintf(outf, "turbostat version 18.07.27"
 		" - Len Brown <lenb@kernel.org>\n");
 }
 
@@ -4661,7 +5127,7 @@ int add_counter(unsigned int msr_num, char *path, char *name,
 		msrp->next = sys.tp;
 		sys.tp = msrp;
 		sys.added_thread_counters++;
-		if (sys.added_thread_counters > MAX_ADDED_COUNTERS) {
+		if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
 			fprintf(stderr, "exceeded max %d added thread counters\n",
 				MAX_ADDED_COUNTERS);
 			exit(-1);
@@ -4820,7 +5286,7 @@ void probe_sysfs(void)
 	if (!DO_BIC(BIC_sysfs))
 		return;
 
-	for (state = 10; state > 0; --state) {
+	for (state = 10; state >= 0; --state) {
 
 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
 			base_cpu, state);
@@ -4847,7 +5313,7 @@ void probe_sysfs(void)
 				FORMAT_PERCENT, SYSFS_PERCPU);
 	}
 
-	for (state = 10; state > 0; --state) {
+	for (state = 10; state >= 0; --state) {
 
 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
 			base_cpu, state);
@@ -4960,34 +5426,6 @@ error:
 	exit(-1);
 }
 
-int shown;
-/*
- * parse_show_hide() - process cmdline to set default counter action
- */
-void parse_show_hide(char *optarg, enum show_hide_mode new_mode)
-{
-	/*
-	 * --show: show only those specified
-	 *  The 1st invocation will clear and replace the enabled mask
-	 *  subsequent invocations can add to it.
-	 */
-	if (new_mode == SHOW_LIST) {
-		if (shown == 0)
-			bic_enabled = bic_lookup(optarg, new_mode);
-		else
-			bic_enabled |= bic_lookup(optarg, new_mode);
-		shown = 1;
-
-		return;
-	}
-
-	/*
-	 * --hide: do not show those specified
-	 *  multiple invocations simply clear more bits in enabled mask
-	 */
-	bic_enabled &= ~bic_lookup(optarg, new_mode);
-
-}
 
 void cmdline(int argc, char **argv)
 {
@@ -4998,7 +5436,9 @@ void cmdline(int argc, char **argv)
 		{"cpu",		required_argument,	0, 'c'},
 		{"Dump",	no_argument,		0, 'D'},
 		{"debug",	no_argument,		0, 'd'},	/* internal, not documented */
+		{"enable",	required_argument,	0, 'e'},
 		{"interval",	required_argument,	0, 'i'},
+		{"num_iterations",	required_argument,	0, 'n'},
 		{"help",	no_argument,		0, 'h'},
 		{"hide",	required_argument,	0, 'H'},	// meh, -h taken by --help
 		{"Joules",	no_argument,		0, 'J'},
@@ -5014,7 +5454,7 @@ void cmdline(int argc, char **argv)
 
 	progname = argv[0];
 
-	while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v",
+	while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v",
 				long_options, &option_index)) != -1) {
 		switch (opt) {
 		case 'a':
@@ -5026,11 +5466,20 @@ void cmdline(int argc, char **argv)
 		case 'D':
 			dump_only++;
 			break;
+		case 'e':
+			/* --enable specified counter */
+			bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
+			break;
 		case 'd':
 			debug++;
+			ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
 			break;
 		case 'H':
-			parse_show_hide(optarg, HIDE_LIST);
+			/*
+			 * --hide: do not show those specified
+			 *  multiple invocations simply clear more bits in enabled mask
+			 */
+			bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
 			break;
 		case 'h':
 		default:
@@ -5046,7 +5495,8 @@ void cmdline(int argc, char **argv)
 					exit(2);
 				}
 
-				interval_ts.tv_sec = interval;
+				interval_tv.tv_sec = interval_ts.tv_sec = interval;
+				interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
 				interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
 			}
 			break;
@@ -5054,6 +5504,7 @@ void cmdline(int argc, char **argv)
 			rapl_joules++;
 			break;
 		case 'l':
+			ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
 			list_header_only++;
 			quiet++;
 			break;
@@ -5063,8 +5514,26 @@ void cmdline(int argc, char **argv)
 		case 'q':
 			quiet = 1;
 			break;
+		case 'n':
+			num_iterations = strtod(optarg, NULL);
+
+			if (num_iterations <= 0) {
+				fprintf(outf, "iterations %d should be positive number\n",
+					num_iterations);
+				exit(2);
+			}
+			break;
 		case 's':
-			parse_show_hide(optarg, SHOW_LIST);
+			/*
+			 * --show: show only those specified
+			 *  The 1st invocation will clear and replace the enabled mask
+			 *  subsequent invocations can add to it.
+			 */
+			if (shown == 0)
+				bic_enabled = bic_lookup(optarg, SHOW_LIST);
+			else
+				bic_enabled |= bic_lookup(optarg, SHOW_LIST);
+			shown = 1;
 			break;
 		case 'S':
 			summary_only++;
@@ -5083,7 +5552,6 @@ void cmdline(int argc, char **argv)
 int main(int argc, char **argv)
 {
 	outf = stderr;
-
 	cmdline(argc, argv);
 
 	if (!quiet)
diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile
index 2447b1bbaacf..f4534fb8b951 100644
--- a/tools/power/x86/x86_energy_perf_policy/Makefile
+++ b/tools/power/x86/x86_energy_perf_policy/Makefile
@@ -24,5 +24,5 @@ install : x86_energy_perf_policy
 	install -d  $(DESTDIR)$(PREFIX)/bin
 	install $(BUILD_OUTPUT)/x86_energy_perf_policy $(DESTDIR)$(PREFIX)/bin/x86_energy_perf_policy
 	install -d  $(DESTDIR)$(PREFIX)/share/man/man8
-	install x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8
+	install -m 644 x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8
 
diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c
index 8c590cd1171a..4c12e6aea5d5 100644
--- a/tools/spi/spidev_test.c
+++ b/tools/spi/spidev_test.c
@@ -73,12 +73,12 @@ static void hex_dump(const void *src, size_t length, size_t line_size,
 				while (i++ % line_size)
 					printf("__ ");
 			}
-			printf(" | ");  /* right close */
+			printf(" |");
 			while (line < address) {
 				c = *line++;
-				printf("%c", (c < 33 || c == 255) ? 0x2E : c);
+				printf("%c", (c < 32 || c > 126) ? '.' : c);
 			}
-			printf("\n");
+			printf("|\n");
 			if (length > 0)
 				printf("%s | ", prefix);
 		}
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index 0392153a0009..778ceb651000 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -22,6 +22,7 @@ NVDIMM_SRC := $(DRIVERS)/nvdimm
 ACPI_SRC := $(DRIVERS)/acpi/nfit
 DAX_SRC := $(DRIVERS)/dax
 ccflags-y := -I$(src)/$(NVDIMM_SRC)/
+ccflags-y += -I$(src)/$(ACPI_SRC)/
 
 obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
 obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
diff --git a/tools/testing/nvdimm/acpi_nfit_test.c b/tools/testing/nvdimm/acpi_nfit_test.c
index 43521512e577..fec8fb1b7715 100644
--- a/tools/testing/nvdimm/acpi_nfit_test.c
+++ b/tools/testing/nvdimm/acpi_nfit_test.c
@@ -4,5 +4,13 @@
 #include <linux/module.h>
 #include <linux/printk.h>
 #include "watermark.h"
+#include <nfit.h>
 
 nfit_test_watermark(acpi_nfit);
+
+/* strong / override definition of nfit_intel_shutdown_status */
+void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem)
+{
+	set_bit(NFIT_MEM_DIRTY_COUNT, &nfit_mem->flags);
+	nfit_mem->dirty_shutdown = 42;
+}
diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c
index b53596ad601b..2e7fd8227969 100644
--- a/tools/testing/nvdimm/pmem-dax.c
+++ b/tools/testing/nvdimm/pmem-dax.c
@@ -31,17 +31,21 @@ long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
 	if (get_nfit_res(pmem->phys_addr + offset)) {
 		struct page *page;
 
-		*kaddr = pmem->virt_addr + offset;
+		if (kaddr)
+			*kaddr = pmem->virt_addr + offset;
 		page = vmalloc_to_page(pmem->virt_addr + offset);
-		*pfn = page_to_pfn_t(page);
+		if (pfn)
+			*pfn = page_to_pfn_t(page);
 		pr_debug_ratelimited("%s: pmem: %p pgoff: %#lx pfn: %#lx\n",
 				__func__, pmem, pgoff, page_to_pfn(page));
 
 		return 1;
 	}
 
-	*kaddr = pmem->virt_addr + offset;
-	*pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
+	if (kaddr)
+		*kaddr = pmem->virt_addr + offset;
+	if (pfn)
+		*pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
 
 	/*
 	 * If badblocks are present, limit known good range to the
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 4ea385be528f..6c16ac36d482 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -15,6 +15,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/workqueue.h>
 #include <linux/libnvdimm.h>
+#include <linux/genalloc.h>
 #include <linux/vmalloc.h>
 #include <linux/device.h>
 #include <linux/module.h>
@@ -24,11 +25,14 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <nd-core.h>
+#include <intel.h>
 #include <nfit.h>
 #include <nd.h>
 #include "nfit_test.h"
 #include "../watermark.h"
 
+#include <asm/mcsafe_test.h>
+
 /*
  * Generate an NFIT table to describe the following topology:
  *
@@ -137,8 +141,31 @@ static u32 handle[] = {
 	[6] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 1),
 };
 
-static unsigned long dimm_fail_cmd_flags[NUM_DCR];
-static int dimm_fail_cmd_code[NUM_DCR];
+static unsigned long dimm_fail_cmd_flags[ARRAY_SIZE(handle)];
+static int dimm_fail_cmd_code[ARRAY_SIZE(handle)];
+
+static const struct nd_intel_smart smart_def = {
+	.flags = ND_INTEL_SMART_HEALTH_VALID
+		| ND_INTEL_SMART_SPARES_VALID
+		| ND_INTEL_SMART_ALARM_VALID
+		| ND_INTEL_SMART_USED_VALID
+		| ND_INTEL_SMART_SHUTDOWN_VALID
+		| ND_INTEL_SMART_SHUTDOWN_COUNT_VALID
+		| ND_INTEL_SMART_MTEMP_VALID
+		| ND_INTEL_SMART_CTEMP_VALID,
+	.health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
+	.media_temperature = 23 * 16,
+	.ctrl_temperature = 25 * 16,
+	.pmic_temperature = 40 * 16,
+	.spares = 75,
+	.alarm_flags = ND_INTEL_SMART_SPARE_TRIP
+		| ND_INTEL_SMART_TEMP_TRIP,
+	.ait_status = 1,
+	.life_used = 5,
+	.shutdown_state = 0,
+	.shutdown_count = 42,
+	.vendor_size = 0,
+};
 
 struct nfit_test_fw {
 	enum intel_fw_update_state state;
@@ -179,7 +206,7 @@ struct nfit_test {
 		unsigned long deadline;
 		spinlock_t lock;
 	} ars_state;
-	struct device *dimm_dev[NUM_DCR];
+	struct device *dimm_dev[ARRAY_SIZE(handle)];
 	struct nd_intel_smart *smart;
 	struct nd_intel_smart_threshold *smart_threshold;
 	struct badrange badrange;
@@ -189,6 +216,8 @@ struct nfit_test {
 
 static struct workqueue_struct *nfit_wq;
 
+static struct gen_pool *nfit_pool;
+
 static struct nfit_test *to_nfit_test(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
@@ -750,15 +779,30 @@ static int nfit_test_cmd_smart_inject(
 	if (buf_len != sizeof(*inj))
 		return -EINVAL;
 
-	if (inj->mtemp_enable)
-		smart->media_temperature = inj->media_temperature;
-	if (inj->spare_enable)
-		smart->spares = inj->spares;
-	if (inj->fatal_enable)
-		smart->health = ND_INTEL_SMART_FATAL_HEALTH;
-	if (inj->unsafe_shutdown_enable) {
-		smart->shutdown_state = 1;
-		smart->shutdown_count++;
+	if (inj->flags & ND_INTEL_SMART_INJECT_MTEMP) {
+		if (inj->mtemp_enable)
+			smart->media_temperature = inj->media_temperature;
+		else
+			smart->media_temperature = smart_def.media_temperature;
+	}
+	if (inj->flags & ND_INTEL_SMART_INJECT_SPARE) {
+		if (inj->spare_enable)
+			smart->spares = inj->spares;
+		else
+			smart->spares = smart_def.spares;
+	}
+	if (inj->flags & ND_INTEL_SMART_INJECT_FATAL) {
+		if (inj->fatal_enable)
+			smart->health = ND_INTEL_SMART_FATAL_HEALTH;
+		else
+			smart->health = ND_INTEL_SMART_NON_CRITICAL_HEALTH;
+	}
+	if (inj->flags & ND_INTEL_SMART_INJECT_SHUTDOWN) {
+		if (inj->unsafe_shutdown_enable) {
+			smart->shutdown_state = 1;
+			smart->shutdown_count++;
+		} else
+			smart->shutdown_state = 0;
 	}
 	inj->status = 0;
 	smart_notify(bus_dev, dimm_dev, smart, thresh);
@@ -882,6 +926,16 @@ static int nd_intel_test_cmd_set_lss_status(struct nfit_test *t,
 	return 0;
 }
 
+static int override_return_code(int dimm, unsigned int func, int rc)
+{
+	if ((1 << func) & dimm_fail_cmd_flags[dimm]) {
+		if (dimm_fail_cmd_code[dimm])
+			return dimm_fail_cmd_code[dimm];
+		return -EIO;
+	}
+	return rc;
+}
+
 static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
 {
 	int i;
@@ -892,13 +946,6 @@ static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
 			break;
 	if (i >= ARRAY_SIZE(handle))
 		return -ENXIO;
-
-	if ((1 << func) & dimm_fail_cmd_flags[i]) {
-		if (dimm_fail_cmd_code[i])
-			return dimm_fail_cmd_code[i];
-		return -EIO;
-	}
-
 	return i;
 }
 
@@ -937,48 +984,59 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 
 			switch (func) {
 			case ND_INTEL_ENABLE_LSS_STATUS:
-				return nd_intel_test_cmd_set_lss_status(t,
+				rc = nd_intel_test_cmd_set_lss_status(t,
 						buf, buf_len);
+				break;
 			case ND_INTEL_FW_GET_INFO:
-				return nd_intel_test_get_fw_info(t, buf,
+				rc = nd_intel_test_get_fw_info(t, buf,
 						buf_len, i - t->dcr_idx);
+				break;
 			case ND_INTEL_FW_START_UPDATE:
-				return nd_intel_test_start_update(t, buf,
+				rc = nd_intel_test_start_update(t, buf,
 						buf_len, i - t->dcr_idx);
+				break;
 			case ND_INTEL_FW_SEND_DATA:
-				return nd_intel_test_send_data(t, buf,
+				rc = nd_intel_test_send_data(t, buf,
 						buf_len, i - t->dcr_idx);
+				break;
 			case ND_INTEL_FW_FINISH_UPDATE:
-				return nd_intel_test_finish_fw(t, buf,
+				rc = nd_intel_test_finish_fw(t, buf,
 						buf_len, i - t->dcr_idx);
+				break;
 			case ND_INTEL_FW_FINISH_QUERY:
-				return nd_intel_test_finish_query(t, buf,
+				rc = nd_intel_test_finish_query(t, buf,
 						buf_len, i - t->dcr_idx);
+				break;
 			case ND_INTEL_SMART:
-				return nfit_test_cmd_smart(buf, buf_len,
+				rc = nfit_test_cmd_smart(buf, buf_len,
 						&t->smart[i - t->dcr_idx]);
+				break;
 			case ND_INTEL_SMART_THRESHOLD:
-				return nfit_test_cmd_smart_threshold(buf,
+				rc = nfit_test_cmd_smart_threshold(buf,
 						buf_len,
 						&t->smart_threshold[i -
 							t->dcr_idx]);
+				break;
 			case ND_INTEL_SMART_SET_THRESHOLD:
-				return nfit_test_cmd_smart_set_threshold(buf,
+				rc = nfit_test_cmd_smart_set_threshold(buf,
 						buf_len,
 						&t->smart_threshold[i -
 							t->dcr_idx],
 						&t->smart[i - t->dcr_idx],
 						&t->pdev.dev, t->dimm_dev[i]);
+				break;
 			case ND_INTEL_SMART_INJECT:
-				return nfit_test_cmd_smart_inject(buf,
+				rc = nfit_test_cmd_smart_inject(buf,
 						buf_len,
 						&t->smart_threshold[i -
 							t->dcr_idx],
 						&t->smart[i - t->dcr_idx],
 						&t->pdev.dev, t->dimm_dev[i]);
+				break;
 			default:
 				return -ENOTTY;
 			}
+			return override_return_code(i, func, rc);
 		}
 
 		if (!test_bit(cmd, &cmd_mask)
@@ -1004,6 +1062,7 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 		default:
 			return -ENOTTY;
 		}
+		return override_return_code(i, func, rc);
 	} else {
 		struct ars_state *ars_state = &t->ars_state;
 		struct nd_cmd_pkg *call_pkg = buf;
@@ -1076,6 +1135,9 @@ static void release_nfit_res(void *data)
 	list_del(&nfit_res->list);
 	spin_unlock(&nfit_test_lock);
 
+	if (resource_size(&nfit_res->res) >= DIMM_SIZE)
+		gen_pool_free(nfit_pool, nfit_res->res.start,
+				resource_size(&nfit_res->res));
 	vfree(nfit_res->buf);
 	kfree(nfit_res);
 }
@@ -1088,7 +1150,7 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
 			GFP_KERNEL);
 	int rc;
 
-	if (!buf || !nfit_res)
+	if (!buf || !nfit_res || !*dma)
 		goto err;
 	rc = devm_add_action(dev, release_nfit_res, nfit_res);
 	if (rc)
@@ -1108,6 +1170,8 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
 
 	return nfit_res->buf;
  err:
+	if (*dma && size >= DIMM_SIZE)
+		gen_pool_free(nfit_pool, *dma, size);
 	if (buf)
 		vfree(buf);
 	kfree(nfit_res);
@@ -1116,9 +1180,16 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
 
 static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma)
 {
+	struct genpool_data_align data = {
+		.align = SZ_128M,
+	};
 	void *buf = vmalloc(size);
 
-	*dma = (unsigned long) buf;
+	if (size >= DIMM_SIZE)
+		*dma = gen_pool_alloc_algo(nfit_pool, size,
+				gen_pool_first_fit_align, &data);
+	else
+		*dma = (unsigned long) buf;
 	return __test_alloc(t, size, dma, buf);
 }
 
@@ -1300,29 +1371,9 @@ static void smart_init(struct nfit_test *t)
 		.ctrl_temperature = 30 * 16,
 		.spares = 5,
 	};
-	const struct nd_intel_smart smart_data = {
-		.flags = ND_INTEL_SMART_HEALTH_VALID
-			| ND_INTEL_SMART_SPARES_VALID
-			| ND_INTEL_SMART_ALARM_VALID
-			| ND_INTEL_SMART_USED_VALID
-			| ND_INTEL_SMART_SHUTDOWN_VALID
-			| ND_INTEL_SMART_MTEMP_VALID,
-		.health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
-		.media_temperature = 23 * 16,
-		.ctrl_temperature = 25 * 16,
-		.pmic_temperature = 40 * 16,
-		.spares = 75,
-		.alarm_flags = ND_INTEL_SMART_SPARE_TRIP
-			| ND_INTEL_SMART_TEMP_TRIP,
-		.ait_status = 1,
-		.life_used = 5,
-		.shutdown_state = 0,
-		.vendor_size = 0,
-		.shutdown_count = 100,
-	};
 
 	for (i = 0; i < t->num_dcr; i++) {
-		memcpy(&t->smart[i], &smart_data, sizeof(smart_data));
+		memcpy(&t->smart[i], &smart_def, sizeof(smart_def));
 		memcpy(&t->smart_threshold[i], &smart_t_data,
 				sizeof(smart_t_data));
 	}
@@ -1989,8 +2040,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES;
 	pcap->header.length = sizeof(*pcap);
 	pcap->highest_capability = 1;
-	pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH |
-		ACPI_NFIT_CAPABILITY_MEM_FLUSH;
+	pcap->capabilities = ACPI_NFIT_CAPABILITY_MEM_FLUSH;
 	offset += pcap->header.length;
 
 	if (t->setup_hotplug) {
@@ -2645,7 +2695,7 @@ static int nfit_test_probe(struct platform_device *pdev)
 		u32 nfit_handle = __to_nfit_memdev(nfit_mem)->device_handle;
 		int i;
 
-		for (i = 0; i < NUM_DCR; i++)
+		for (i = 0; i < ARRAY_SIZE(handle); i++)
 			if (nfit_handle == handle[i])
 				dev_set_drvdata(nfit_test->dimm_dev[i],
 						nfit_mem);
@@ -2681,6 +2731,107 @@ static struct platform_driver nfit_test_driver = {
 	.id_table = nfit_test_id,
 };
 
+static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
+
+enum INJECT {
+	INJECT_NONE,
+	INJECT_SRC,
+	INJECT_DST,
+};
+
+static void mcsafe_test_init(char *dst, char *src, size_t size)
+{
+	size_t i;
+
+	memset(dst, 0xff, size);
+	for (i = 0; i < size; i++)
+		src[i] = (char) i;
+}
+
+static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
+		size_t size, unsigned long rem)
+{
+	size_t i;
+
+	for (i = 0; i < size - rem; i++)
+		if (dst[i] != (unsigned char) i) {
+			pr_info_once("%s:%d: offset: %zd got: %#x expect: %#x\n",
+					__func__, __LINE__, i, dst[i],
+					(unsigned char) i);
+			return false;
+		}
+	for (i = size - rem; i < size; i++)
+		if (dst[i] != 0xffU) {
+			pr_info_once("%s:%d: offset: %zd got: %#x expect: 0xff\n",
+					__func__, __LINE__, i, dst[i]);
+			return false;
+		}
+	return true;
+}
+
+void mcsafe_test(void)
+{
+	char *inject_desc[] = { "none", "source", "destination" };
+	enum INJECT inj;
+
+	if (IS_ENABLED(CONFIG_MCSAFE_TEST)) {
+		pr_info("%s: run...\n", __func__);
+	} else {
+		pr_info("%s: disabled, skip.\n", __func__);
+		return;
+	}
+
+	for (inj = INJECT_NONE; inj <= INJECT_DST; inj++) {
+		int i;
+
+		pr_info("%s: inject: %s\n", __func__, inject_desc[inj]);
+		for (i = 0; i < 512; i++) {
+			unsigned long expect, rem;
+			void *src, *dst;
+			bool valid;
+
+			switch (inj) {
+			case INJECT_NONE:
+				mcsafe_inject_src(NULL);
+				mcsafe_inject_dst(NULL);
+				dst = &mcsafe_buf[2048];
+				src = &mcsafe_buf[1024 - i];
+				expect = 0;
+				break;
+			case INJECT_SRC:
+				mcsafe_inject_src(&mcsafe_buf[1024]);
+				mcsafe_inject_dst(NULL);
+				dst = &mcsafe_buf[2048];
+				src = &mcsafe_buf[1024 - i];
+				expect = 512 - i;
+				break;
+			case INJECT_DST:
+				mcsafe_inject_src(NULL);
+				mcsafe_inject_dst(&mcsafe_buf[2048]);
+				dst = &mcsafe_buf[2048 - i];
+				src = &mcsafe_buf[1024];
+				expect = 512 - i;
+				break;
+			}
+
+			mcsafe_test_init(dst, src, 512);
+			rem = __memcpy_mcsafe(dst, src, 512);
+			valid = mcsafe_test_validate(dst, src, 512, expect);
+			if (rem == expect && valid)
+				continue;
+			pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
+					__func__,
+					((unsigned long) dst) & ~PAGE_MASK,
+					((unsigned long ) src) & ~PAGE_MASK,
+					512, i, rem, valid ? "valid" : "bad",
+					expect);
+		}
+	}
+
+	mcsafe_inject_src(NULL);
+	mcsafe_inject_dst(NULL);
+}
+
 static __init int nfit_test_init(void)
 {
 	int rc, i;
@@ -2689,6 +2840,7 @@ static __init int nfit_test_init(void)
 	libnvdimm_test();
 	acpi_nfit_test();
 	device_dax_test();
+	mcsafe_test();
 
 	nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);
 
@@ -2702,6 +2854,17 @@ static __init int nfit_test_init(void)
 		goto err_register;
 	}
 
+	nfit_pool = gen_pool_create(ilog2(SZ_4M), NUMA_NO_NODE);
+	if (!nfit_pool) {
+		rc = -ENOMEM;
+		goto err_register;
+	}
+
+	if (gen_pool_add(nfit_pool, SZ_4G, SZ_4G, NUMA_NO_NODE)) {
+		rc = -ENOMEM;
+		goto err_register;
+	}
+
 	for (i = 0; i < NUM_NFITS; i++) {
 		struct nfit_test *nfit_test;
 		struct platform_device *pdev;
@@ -2757,6 +2920,9 @@ static __init int nfit_test_init(void)
 	return 0;
 
  err_register:
+	if (nfit_pool)
+		gen_pool_destroy(nfit_pool);
+
 	destroy_workqueue(nfit_wq);
 	for (i = 0; i < NUM_NFITS; i++)
 		if (instances[i])
@@ -2780,6 +2946,8 @@ static __exit void nfit_test_exit(void)
 	platform_driver_unregister(&nfit_test_driver);
 	nfit_test_teardown();
 
+	gen_pool_destroy(nfit_pool);
+
 	for (i = 0; i < NUM_NFITS; i++)
 		put_device(&instances[i]->pdev.dev);
 	class_destroy(nfit_test_dimm);
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index 33752e06ff8d..ade14fe3837e 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -117,30 +117,6 @@ struct nd_cmd_ars_err_inj_stat {
 #define ND_INTEL_SMART_INJECT_FATAL		(1 << 2)
 #define ND_INTEL_SMART_INJECT_SHUTDOWN		(1 << 3)
 
-struct nd_intel_smart {
-	__u32 status;
-	union {
-		struct {
-			__u32 flags;
-			__u8 reserved0[4];
-			__u8 health;
-			__u8 spares;
-			__u8 life_used;
-			__u8 alarm_flags;
-			__u16 media_temperature;
-			__u16 ctrl_temperature;
-			__u32 shutdown_count;
-			__u8 ait_status;
-			__u16 pmic_temperature;
-			__u8 reserved1[8];
-			__u8 shutdown_state;
-			__u32 vendor_size;
-			__u8 vendor_data[92];
-		} __packed;
-		__u8 data[128];
-	};
-} __packed;
-
 struct nd_intel_smart_threshold {
 	__u32 status;
 	union {
diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore
index d4706c0ffceb..3834899b6693 100644
--- a/tools/testing/radix-tree/.gitignore
+++ b/tools/testing/radix-tree/.gitignore
@@ -4,3 +4,4 @@ idr-test
 main
 multiorder
 radix-tree.c
+xarray
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index db66f8a0d4be..397d6b612502 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -1,11 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0
 
-CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address
-LDFLAGS += -fsanitize=address
+CFLAGS += -I. -I../../include -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address \
+	  -fsanitize=undefined
+LDFLAGS += -fsanitize=address -fsanitize=undefined
 LDLIBS+= -lpthread -lurcu
-TARGETS = main idr-test multiorder
-CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
+TARGETS = main idr-test multiorder xarray
+CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o
 OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
+	 regression4.o \
 	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
 
 ifndef SHIFT
@@ -21,8 +23,11 @@ targets: generated/map-shift.h $(TARGETS)
 
 main:	$(OFILES)
 
+idr-test.o: ../../../lib/test_ida.c
 idr-test: idr-test.o $(CORE_OFILES)
 
+xarray: $(CORE_OFILES)
+
 multiorder: multiorder.o $(CORE_OFILES)
 
 clean:
@@ -33,6 +38,7 @@ vpath %.c ../../lib
 $(OFILES): Makefile *.h */*.h generated/map-shift.h \
 	../../include/linux/*.h \
 	../../include/asm/*.h \
+	../../../include/linux/xarray.h \
 	../../../include/linux/radix-tree.h \
 	../../../include/linux/idr.h
 
@@ -42,8 +48,10 @@ radix-tree.c: ../../../lib/radix-tree.c
 idr.c: ../../../lib/idr.c
 	sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
 
+xarray.o: ../../../lib/xarray.c ../../../lib/test_xarray.c
+
 generated/map-shift.h:
 	@if ! grep -qws $(SHIFT) generated/map-shift.h; then		\
-		echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" >		\
+		echo "#define XA_CHUNK_SHIFT $(SHIFT)" >		\
 				generated/map-shift.h;			\
 	fi
diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c
index 99c40f3ed133..7e195ed8e92d 100644
--- a/tools/testing/radix-tree/benchmark.c
+++ b/tools/testing/radix-tree/benchmark.c
@@ -17,9 +17,6 @@
 #include <time.h>
 #include "test.h"
 
-#define for_each_index(i, base, order) \
-	        for (i = base; i < base + (1 << order); i++)
-
 #define NSEC_PER_SEC	1000000000L
 
 static long long benchmark_iter(struct radix_tree_root *root, bool tagged)
@@ -61,7 +58,7 @@ again:
 }
 
 static void benchmark_insert(struct radix_tree_root *root,
-			     unsigned long size, unsigned long step, int order)
+			     unsigned long size, unsigned long step)
 {
 	struct timespec start, finish;
 	unsigned long index;
@@ -70,19 +67,19 @@ static void benchmark_insert(struct radix_tree_root *root,
 	clock_gettime(CLOCK_MONOTONIC, &start);
 
 	for (index = 0 ; index < size ; index += step)
-		item_insert_order(root, index, order);
+		item_insert(root, index);
 
 	clock_gettime(CLOCK_MONOTONIC, &finish);
 
 	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
 	       (finish.tv_nsec - start.tv_nsec);
 
-	printv(2, "Size: %8ld, step: %8ld, order: %d, insertion: %15lld ns\n",
-		size, step, order, nsec);
+	printv(2, "Size: %8ld, step: %8ld, insertion: %15lld ns\n",
+		size, step, nsec);
 }
 
 static void benchmark_tagging(struct radix_tree_root *root,
-			     unsigned long size, unsigned long step, int order)
+			     unsigned long size, unsigned long step)
 {
 	struct timespec start, finish;
 	unsigned long index;
@@ -98,138 +95,53 @@ static void benchmark_tagging(struct radix_tree_root *root,
 	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
 	       (finish.tv_nsec - start.tv_nsec);
 
-	printv(2, "Size: %8ld, step: %8ld, order: %d, tagging: %17lld ns\n",
-		size, step, order, nsec);
+	printv(2, "Size: %8ld, step: %8ld, tagging: %17lld ns\n",
+		size, step, nsec);
 }
 
 static void benchmark_delete(struct radix_tree_root *root,
-			     unsigned long size, unsigned long step, int order)
+			     unsigned long size, unsigned long step)
 {
 	struct timespec start, finish;
-	unsigned long index, i;
+	unsigned long index;
 	long long nsec;
 
 	clock_gettime(CLOCK_MONOTONIC, &start);
 
 	for (index = 0 ; index < size ; index += step)
-		for_each_index(i, index, order)
-			item_delete(root, i);
+		item_delete(root, index);
 
 	clock_gettime(CLOCK_MONOTONIC, &finish);
 
 	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
 	       (finish.tv_nsec - start.tv_nsec);
 
-	printv(2, "Size: %8ld, step: %8ld, order: %d, deletion: %16lld ns\n",
-		size, step, order, nsec);
+	printv(2, "Size: %8ld, step: %8ld, deletion: %16lld ns\n",
+		size, step, nsec);
 }
 
-static void benchmark_size(unsigned long size, unsigned long step, int order)
+static void benchmark_size(unsigned long size, unsigned long step)
 {
 	RADIX_TREE(tree, GFP_KERNEL);
 	long long normal, tagged;
 
-	benchmark_insert(&tree, size, step, order);
-	benchmark_tagging(&tree, size, step, order);
+	benchmark_insert(&tree, size, step);
+	benchmark_tagging(&tree, size, step);
 
 	tagged = benchmark_iter(&tree, true);
 	normal = benchmark_iter(&tree, false);
 
-	printv(2, "Size: %8ld, step: %8ld, order: %d, tagged iteration: %8lld ns\n",
-		size, step, order, tagged);
-	printv(2, "Size: %8ld, step: %8ld, order: %d, normal iteration: %8lld ns\n",
-		size, step, order, normal);
+	printv(2, "Size: %8ld, step: %8ld, tagged iteration: %8lld ns\n",
+		size, step, tagged);
+	printv(2, "Size: %8ld, step: %8ld, normal iteration: %8lld ns\n",
+		size, step, normal);
 
-	benchmark_delete(&tree, size, step, order);
+	benchmark_delete(&tree, size, step);
 
 	item_kill_tree(&tree);
 	rcu_barrier();
 }
 
-static long long  __benchmark_split(unsigned long index,
-				    int old_order, int new_order)
-{
-	struct timespec start, finish;
-	long long nsec;
-	RADIX_TREE(tree, GFP_ATOMIC);
-
-	item_insert_order(&tree, index, old_order);
-
-	clock_gettime(CLOCK_MONOTONIC, &start);
-	radix_tree_split(&tree, index, new_order);
-	clock_gettime(CLOCK_MONOTONIC, &finish);
-	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
-	       (finish.tv_nsec - start.tv_nsec);
-
-	item_kill_tree(&tree);
-
-	return nsec;
-
-}
-
-static void benchmark_split(unsigned long size, unsigned long step)
-{
-	int i, j, idx;
-	long long nsec = 0;
-
-
-	for (idx = 0; idx < size; idx += step) {
-		for (i = 3; i < 11; i++) {
-			for (j = 0; j < i; j++) {
-				nsec += __benchmark_split(idx, i, j);
-			}
-		}
-	}
-
-	printv(2, "Size %8ld, step %8ld, split time %10lld ns\n",
-			size, step, nsec);
-
-}
-
-static long long  __benchmark_join(unsigned long index,
-			     unsigned order1, unsigned order2)
-{
-	unsigned long loc;
-	struct timespec start, finish;
-	long long nsec;
-	void *item, *item2 = item_create(index + 1, order1);
-	RADIX_TREE(tree, GFP_KERNEL);
-
-	item_insert_order(&tree, index, order2);
-	item = radix_tree_lookup(&tree, index);
-
-	clock_gettime(CLOCK_MONOTONIC, &start);
-	radix_tree_join(&tree, index + 1, order1, item2);
-	clock_gettime(CLOCK_MONOTONIC, &finish);
-	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
-		(finish.tv_nsec - start.tv_nsec);
-
-	loc = find_item(&tree, item);
-	if (loc == -1)
-		free(item);
-
-	item_kill_tree(&tree);
-
-	return nsec;
-}
-
-static void benchmark_join(unsigned long step)
-{
-	int i, j, idx;
-	long long nsec = 0;
-
-	for (idx = 0; idx < 1 << 10; idx += step) {
-		for (i = 1; i < 15; i++) {
-			for (j = 0; j < i; j++) {
-				nsec += __benchmark_join(idx, i, j);
-			}
-		}
-	}
-
-	printv(2, "Size %8d, step %8ld, join time %10lld ns\n",
-			1 << 10, step, nsec);
-}
-
 void benchmark(void)
 {
 	unsigned long size[] = {1 << 10, 1 << 20, 0};
@@ -242,16 +154,5 @@ void benchmark(void)
 
 	for (c = 0; size[c]; c++)
 		for (s = 0; step[s]; s++)
-			benchmark_size(size[c], step[s], 0);
-
-	for (c = 0; size[c]; c++)
-		for (s = 0; step[s]; s++)
-			benchmark_size(size[c], step[s] << 9, 9);
-
-	for (c = 0; size[c]; c++)
-		for (s = 0; step[s]; s++)
-			benchmark_split(size[c], step[s]);
-
-	for (s = 0; step[s]; s++)
-		benchmark_join(step[s]);
+			benchmark_size(size[c], step[s]);
 }
diff --git a/tools/testing/radix-tree/bitmap.c b/tools/testing/radix-tree/bitmap.c
new file mode 100644
index 000000000000..66ec4a24a203
--- /dev/null
+++ b/tools/testing/radix-tree/bitmap.c
@@ -0,0 +1,23 @@
+/* lib/bitmap.c pulls in at least two other files. */
+
+#include <linux/bitmap.h>
+
+void bitmap_clear(unsigned long *map, unsigned int start, int len)
+{
+	unsigned long *p = map + BIT_WORD(start);
+	const unsigned int size = start + len;
+	int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+	unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+	while (len - bits_to_clear >= 0) {
+		*p &= ~mask_to_clear;
+		len -= bits_to_clear;
+		bits_to_clear = BITS_PER_LONG;
+		mask_to_clear = ~0UL;
+		p++;
+	}
+	if (len) {
+		mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+		*p &= ~mask_to_clear;
+	}
+}
diff --git a/tools/testing/radix-tree/generated/autoconf.h b/tools/testing/radix-tree/generated/autoconf.h
index cf88dc5b8832..2218b3cc184e 100644
--- a/tools/testing/radix-tree/generated/autoconf.h
+++ b/tools/testing/radix-tree/generated/autoconf.h
@@ -1 +1 @@
-#define CONFIG_RADIX_TREE_MULTIORDER 1
+#define CONFIG_XARRAY_MULTI 1
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index ee820fcc29b0..1b63bdb7688f 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -19,7 +19,7 @@
 
 #include "test.h"
 
-#define DUMMY_PTR	((void *)0x12)
+#define DUMMY_PTR	((void *)0x10)
 
 int item_idr_free(int id, void *p, void *data)
 {
@@ -227,6 +227,66 @@ void idr_u32_test(int base)
 	idr_u32_test1(&idr, 0xffffffff);
 }
 
+static void idr_align_test(struct idr *idr)
+{
+	char name[] = "Motorola 68000";
+	int i, id;
+	void *entry;
+
+	for (i = 0; i < 9; i++) {
+		BUG_ON(idr_alloc(idr, &name[i], 0, 0, GFP_KERNEL) != i);
+		idr_for_each_entry(idr, entry, id);
+	}
+	idr_destroy(idr);
+
+	for (i = 1; i < 10; i++) {
+		BUG_ON(idr_alloc(idr, &name[i], 0, 0, GFP_KERNEL) != i - 1);
+		idr_for_each_entry(idr, entry, id);
+	}
+	idr_destroy(idr);
+
+	for (i = 2; i < 11; i++) {
+		BUG_ON(idr_alloc(idr, &name[i], 0, 0, GFP_KERNEL) != i - 2);
+		idr_for_each_entry(idr, entry, id);
+	}
+	idr_destroy(idr);
+
+	for (i = 3; i < 12; i++) {
+		BUG_ON(idr_alloc(idr, &name[i], 0, 0, GFP_KERNEL) != i - 3);
+		idr_for_each_entry(idr, entry, id);
+	}
+	idr_destroy(idr);
+
+	for (i = 0; i < 8; i++) {
+		BUG_ON(idr_alloc(idr, &name[i], 0, 0, GFP_KERNEL) != 0);
+		BUG_ON(idr_alloc(idr, &name[i + 1], 0, 0, GFP_KERNEL) != 1);
+		idr_for_each_entry(idr, entry, id);
+		idr_remove(idr, 1);
+		idr_for_each_entry(idr, entry, id);
+		idr_remove(idr, 0);
+		BUG_ON(!idr_is_empty(idr));
+	}
+
+	for (i = 0; i < 8; i++) {
+		BUG_ON(idr_alloc(idr, NULL, 0, 0, GFP_KERNEL) != 0);
+		idr_for_each_entry(idr, entry, id);
+		idr_replace(idr, &name[i], 0);
+		idr_for_each_entry(idr, entry, id);
+		BUG_ON(idr_find(idr, 0) != &name[i]);
+		idr_remove(idr, 0);
+	}
+
+	for (i = 0; i < 8; i++) {
+		BUG_ON(idr_alloc(idr, &name[i], 0, 0, GFP_KERNEL) != 0);
+		BUG_ON(idr_alloc(idr, NULL, 0, 0, GFP_KERNEL) != 1);
+		idr_remove(idr, 1);
+		idr_for_each_entry(idr, entry, id);
+		idr_replace(idr, &name[i + 1], 0);
+		idr_for_each_entry(idr, entry, id);
+		idr_remove(idr, 0);
+	}
+}
+
 void idr_checks(void)
 {
 	unsigned long i;
@@ -307,143 +367,64 @@ void idr_checks(void)
 	idr_u32_test(4);
 	idr_u32_test(1);
 	idr_u32_test(0);
+	idr_align_test(&idr);
 }
 
+#define module_init(x)
+#define module_exit(x)
+#define MODULE_AUTHOR(x)
+#define MODULE_LICENSE(x)
+#define dump_stack()    assert(0)
+void ida_dump(struct ida *);
+
+#include "../../../lib/test_ida.c"
+
 /*
  * Check that we get the correct error when we run out of memory doing
- * allocations.  To ensure we run out of memory, just "forget" to preload.
+ * allocations.  In userspace, GFP_NOWAIT will always fail an allocation.
  * The first test is for not having a bitmap available, and the second test
  * is for not being able to allocate a level of the radix tree.
  */
 void ida_check_nomem(void)
 {
 	DEFINE_IDA(ida);
-	int id, err;
-
-	err = ida_get_new_above(&ida, 256, &id);
-	assert(err == -EAGAIN);
-	err = ida_get_new_above(&ida, 1UL << 30, &id);
-	assert(err == -EAGAIN);
-}
-
-/*
- * Check what happens when we fill a leaf and then delete it.  This may
- * discover mishandling of IDR_FREE.
- */
-void ida_check_leaf(void)
-{
-	DEFINE_IDA(ida);
 	int id;
-	unsigned long i;
-
-	for (i = 0; i < IDA_BITMAP_BITS; i++) {
-		assert(ida_pre_get(&ida, GFP_KERNEL));
-		assert(!ida_get_new(&ida, &id));
-		assert(id == i);
-	}
 
-	ida_destroy(&ida);
-	assert(ida_is_empty(&ida));
-
-	assert(ida_pre_get(&ida, GFP_KERNEL));
-	assert(!ida_get_new(&ida, &id));
-	assert(id == 0);
-	ida_destroy(&ida);
-	assert(ida_is_empty(&ida));
+	id = ida_alloc_min(&ida, 256, GFP_NOWAIT);
+	IDA_BUG_ON(&ida, id != -ENOMEM);
+	id = ida_alloc_min(&ida, 1UL << 30, GFP_NOWAIT);
+	IDA_BUG_ON(&ida, id != -ENOMEM);
+	IDA_BUG_ON(&ida, !ida_is_empty(&ida));
 }
 
 /*
  * Check handling of conversions between exceptional entries and full bitmaps.
  */
-void ida_check_conv(void)
+void ida_check_conv_user(void)
 {
 	DEFINE_IDA(ida);
-	int id;
 	unsigned long i;
 
-	for (i = 0; i < IDA_BITMAP_BITS * 2; i += IDA_BITMAP_BITS) {
-		assert(ida_pre_get(&ida, GFP_KERNEL));
-		assert(!ida_get_new_above(&ida, i + 1, &id));
-		assert(id == i + 1);
-		assert(!ida_get_new_above(&ida, i + BITS_PER_LONG, &id));
-		assert(id == i + BITS_PER_LONG);
-		ida_remove(&ida, i + 1);
-		ida_remove(&ida, i + BITS_PER_LONG);
-		assert(ida_is_empty(&ida));
-	}
-
-	assert(ida_pre_get(&ida, GFP_KERNEL));
-
-	for (i = 0; i < IDA_BITMAP_BITS * 2; i++) {
-		assert(ida_pre_get(&ida, GFP_KERNEL));
-		assert(!ida_get_new(&ida, &id));
-		assert(id == i);
-	}
-
-	for (i = IDA_BITMAP_BITS * 2; i > 0; i--) {
-		ida_remove(&ida, i - 1);
-	}
-	assert(ida_is_empty(&ida));
-
-	for (i = 0; i < IDA_BITMAP_BITS + BITS_PER_LONG - 4; i++) {
-		assert(ida_pre_get(&ida, GFP_KERNEL));
-		assert(!ida_get_new(&ida, &id));
-		assert(id == i);
-	}
-
-	for (i = IDA_BITMAP_BITS + BITS_PER_LONG - 4; i > 0; i--) {
-		ida_remove(&ida, i - 1);
-	}
-	assert(ida_is_empty(&ida));
-
-	radix_tree_cpu_dead(1);
 	for (i = 0; i < 1000000; i++) {
-		int err = ida_get_new(&ida, &id);
-		if (err == -EAGAIN) {
-			assert((i % IDA_BITMAP_BITS) == (BITS_PER_LONG - 2));
-			assert(ida_pre_get(&ida, GFP_KERNEL));
-			err = ida_get_new(&ida, &id);
+		int id = ida_alloc(&ida, GFP_NOWAIT);
+		if (id == -ENOMEM) {
+			IDA_BUG_ON(&ida, ((i % IDA_BITMAP_BITS) !=
+					  BITS_PER_XA_VALUE) &&
+					 ((i % IDA_BITMAP_BITS) != 0));
+			id = ida_alloc(&ida, GFP_KERNEL);
 		} else {
-			assert((i % IDA_BITMAP_BITS) != (BITS_PER_LONG - 2));
+			IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) ==
+					BITS_PER_XA_VALUE);
 		}
-		assert(!err);
-		assert(id == i);
+		IDA_BUG_ON(&ida, id != i);
 	}
 	ida_destroy(&ida);
 }
 
-/*
- * Check allocations up to and slightly above the maximum allowed (2^31-1) ID.
- * Allocating up to 2^31-1 should succeed, and then allocating the next one
- * should fail.
- */
-void ida_check_max(void)
-{
-	DEFINE_IDA(ida);
-	int id, err;
-	unsigned long i, j;
-
-	for (j = 1; j < 65537; j *= 2) {
-		unsigned long base = (1UL << 31) - j;
-		for (i = 0; i < j; i++) {
-			assert(ida_pre_get(&ida, GFP_KERNEL));
-			assert(!ida_get_new_above(&ida, base, &id));
-			assert(id == base + i);
-		}
-		assert(ida_pre_get(&ida, GFP_KERNEL));
-		err = ida_get_new_above(&ida, base, &id);
-		assert(err == -ENOSPC);
-		ida_destroy(&ida);
-		assert(ida_is_empty(&ida));
-		rcu_barrier();
-	}
-}
-
 void ida_check_random(void)
 {
 	DEFINE_IDA(ida);
 	DECLARE_BITMAP(bitmap, 2048);
-	int id, err;
 	unsigned int i;
 	time_t s = time(NULL);
 
@@ -454,15 +435,11 @@ void ida_check_random(void)
 		int bit = i & 2047;
 		if (test_bit(bit, bitmap)) {
 			__clear_bit(bit, bitmap);
-			ida_remove(&ida, bit);
+			ida_free(&ida, bit);
 		} else {
 			__set_bit(bit, bitmap);
-			do {
-				ida_pre_get(&ida, GFP_KERNEL);
-				err = ida_get_new_above(&ida, bit, &id);
-			} while (err == -EAGAIN);
-			assert(!err);
-			assert(id == bit);
+			IDA_BUG_ON(&ida, ida_alloc_min(&ida, bit, GFP_KERNEL)
+					!= bit);
 		}
 	}
 	ida_destroy(&ida);
@@ -488,71 +465,12 @@ void ida_simple_get_remove_test(void)
 	ida_destroy(&ida);
 }
 
-void ida_checks(void)
+void user_ida_checks(void)
 {
-	DEFINE_IDA(ida);
-	int id;
-	unsigned long i;
-
 	radix_tree_cpu_dead(1);
-	ida_check_nomem();
-
-	for (i = 0; i < 10000; i++) {
-		assert(ida_pre_get(&ida, GFP_KERNEL));
-		assert(!ida_get_new(&ida, &id));
-		assert(id == i);
-	}
-
-	ida_remove(&ida, 20);
-	ida_remove(&ida, 21);
-	for (i = 0; i < 3; i++) {
-		assert(ida_pre_get(&ida, GFP_KERNEL));
-		assert(!ida_get_new(&ida, &id));
-		if (i == 2)
-			assert(id == 10000);
-	}
-
-	for (i = 0; i < 5000; i++)
-		ida_remove(&ida, i);
-
-	assert(ida_pre_get(&ida, GFP_KERNEL));
-	assert(!ida_get_new_above(&ida, 5000, &id));
-	assert(id == 10001);
 
-	ida_destroy(&ida);
-
-	assert(ida_is_empty(&ida));
-
-	assert(ida_pre_get(&ida, GFP_KERNEL));
-	assert(!ida_get_new_above(&ida, 1, &id));
-	assert(id == 1);
-
-	ida_remove(&ida, id);
-	assert(ida_is_empty(&ida));
-	ida_destroy(&ida);
-	assert(ida_is_empty(&ida));
-
-	assert(ida_pre_get(&ida, GFP_KERNEL));
-	assert(!ida_get_new_above(&ida, 1, &id));
-	ida_destroy(&ida);
-	assert(ida_is_empty(&ida));
-
-	assert(ida_pre_get(&ida, GFP_KERNEL));
-	assert(!ida_get_new_above(&ida, 1, &id));
-	assert(id == 1);
-	assert(ida_pre_get(&ida, GFP_KERNEL));
-	assert(!ida_get_new_above(&ida, 1025, &id));
-	assert(id == 1025);
-	assert(ida_pre_get(&ida, GFP_KERNEL));
-	assert(!ida_get_new_above(&ida, 10000, &id));
-	assert(id == 10000);
-	ida_remove(&ida, 1025);
-	ida_destroy(&ida);
-	assert(ida_is_empty(&ida));
-
-	ida_check_leaf();
-	ida_check_max();
-	ida_check_conv();
+	ida_check_nomem();
+	ida_check_conv_user();
 	ida_check_random();
 	ida_simple_get_remove_test();
 
@@ -582,12 +500,19 @@ void ida_thread_tests(void)
 		pthread_join(threads[i], NULL);
 }
 
+void ida_tests(void)
+{
+	user_ida_checks();
+	ida_checks();
+	ida_exit();
+	ida_thread_tests();
+}
+
 int __weak main(void)
 {
 	radix_tree_init();
 	idr_checks();
-	ida_checks();
-	ida_thread_tests();
+	ida_tests();
 	radix_tree_cpu_dead(1);
 	rcu_barrier();
 	if (nr_allocated)
diff --git a/tools/testing/radix-tree/iteration_check.c b/tools/testing/radix-tree/iteration_check.c
index a92bab513701..238db187aa15 100644
--- a/tools/testing/radix-tree/iteration_check.c
+++ b/tools/testing/radix-tree/iteration_check.c
@@ -1,5 +1,5 @@
 /*
- * iteration_check.c: test races having to do with radix tree iteration
+ * iteration_check.c: test races having to do with xarray iteration
  * Copyright (c) 2016 Intel Corporation
  * Author: Ross Zwisler <ross.zwisler@linux.intel.com>
  *
@@ -12,41 +12,54 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  */
-#include <linux/radix-tree.h>
 #include <pthread.h>
 #include "test.h"
 
 #define NUM_THREADS	5
 #define MAX_IDX		100
-#define TAG		0
-#define NEW_TAG		1
+#define TAG		XA_MARK_0
+#define NEW_TAG		XA_MARK_1
 
-static pthread_mutex_t tree_lock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_t threads[NUM_THREADS];
 static unsigned int seeds[3];
-static RADIX_TREE(tree, GFP_KERNEL);
+static DEFINE_XARRAY(array);
 static bool test_complete;
 static int max_order;
 
-/* relentlessly fill the tree with tagged entries */
+void my_item_insert(struct xarray *xa, unsigned long index)
+{
+	XA_STATE(xas, xa, index);
+	struct item *item = item_create(index, 0);
+	int order;
+
+retry:
+	xas_lock(&xas);
+	for (order = max_order; order >= 0; order--) {
+		xas_set_order(&xas, index, order);
+		item->order = order;
+		if (xas_find_conflict(&xas))
+			continue;
+		xas_store(&xas, item);
+		xas_set_mark(&xas, TAG);
+		break;
+	}
+	xas_unlock(&xas);
+	if (xas_nomem(&xas, GFP_KERNEL))
+		goto retry;
+	if (order < 0)
+		free(item);
+}
+
+/* relentlessly fill the array with tagged entries */
 static void *add_entries_fn(void *arg)
 {
 	rcu_register_thread();
 
 	while (!test_complete) {
 		unsigned long pgoff;
-		int order;
 
 		for (pgoff = 0; pgoff < MAX_IDX; pgoff++) {
-			pthread_mutex_lock(&tree_lock);
-			for (order = max_order; order >= 0; order--) {
-				if (item_insert_order(&tree, pgoff, order)
-						== 0) {
-					item_tag_set(&tree, pgoff, TAG);
-					break;
-				}
-			}
-			pthread_mutex_unlock(&tree_lock);
+			my_item_insert(&array, pgoff);
 		}
 	}
 
@@ -56,33 +69,25 @@ static void *add_entries_fn(void *arg)
 }
 
 /*
- * Iterate over the tagged entries, doing a radix_tree_iter_retry() as we find
- * things that have been removed and randomly resetting our iteration to the
- * next chunk with radix_tree_iter_resume().  Both radix_tree_iter_retry() and
- * radix_tree_iter_resume() cause radix_tree_next_slot() to be called with a
- * NULL 'slot' variable.
+ * Iterate over tagged entries, retrying when we find ourselves in a deleted
+ * node and randomly pausing the iteration.
  */
 static void *tagged_iteration_fn(void *arg)
 {
-	struct radix_tree_iter iter;
-	void **slot;
+	XA_STATE(xas, &array, 0);
+	void *entry;
 
 	rcu_register_thread();
 
 	while (!test_complete) {
+		xas_set(&xas, 0);
 		rcu_read_lock();
-		radix_tree_for_each_tagged(slot, &tree, &iter, 0, TAG) {
-			void *entry = radix_tree_deref_slot(slot);
-			if (unlikely(!entry))
+		xas_for_each_marked(&xas, entry, ULONG_MAX, TAG) {
+			if (xas_retry(&xas, entry))
 				continue;
 
-			if (radix_tree_deref_retry(entry)) {
-				slot = radix_tree_iter_retry(&iter);
-				continue;
-			}
-
 			if (rand_r(&seeds[0]) % 50 == 0) {
-				slot = radix_tree_iter_resume(slot, &iter);
+				xas_pause(&xas);
 				rcu_read_unlock();
 				rcu_barrier();
 				rcu_read_lock();
@@ -97,33 +102,25 @@ static void *tagged_iteration_fn(void *arg)
 }
 
 /*
- * Iterate over the entries, doing a radix_tree_iter_retry() as we find things
- * that have been removed and randomly resetting our iteration to the next
- * chunk with radix_tree_iter_resume().  Both radix_tree_iter_retry() and
- * radix_tree_iter_resume() cause radix_tree_next_slot() to be called with a
- * NULL 'slot' variable.
+ * Iterate over the entries, retrying when we find ourselves in a deleted
+ * node and randomly pausing the iteration.
  */
 static void *untagged_iteration_fn(void *arg)
 {
-	struct radix_tree_iter iter;
-	void **slot;
+	XA_STATE(xas, &array, 0);
+	void *entry;
 
 	rcu_register_thread();
 
 	while (!test_complete) {
+		xas_set(&xas, 0);
 		rcu_read_lock();
-		radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-			void *entry = radix_tree_deref_slot(slot);
-			if (unlikely(!entry))
+		xas_for_each(&xas, entry, ULONG_MAX) {
+			if (xas_retry(&xas, entry))
 				continue;
 
-			if (radix_tree_deref_retry(entry)) {
-				slot = radix_tree_iter_retry(&iter);
-				continue;
-			}
-
 			if (rand_r(&seeds[1]) % 50 == 0) {
-				slot = radix_tree_iter_resume(slot, &iter);
+				xas_pause(&xas);
 				rcu_read_unlock();
 				rcu_barrier();
 				rcu_read_lock();
@@ -138,7 +135,7 @@ static void *untagged_iteration_fn(void *arg)
 }
 
 /*
- * Randomly remove entries to help induce radix_tree_iter_retry() calls in the
+ * Randomly remove entries to help induce retries in the
  * two iteration functions.
  */
 static void *remove_entries_fn(void *arg)
@@ -147,12 +144,13 @@ static void *remove_entries_fn(void *arg)
 
 	while (!test_complete) {
 		int pgoff;
+		struct item *item;
 
 		pgoff = rand_r(&seeds[2]) % MAX_IDX;
 
-		pthread_mutex_lock(&tree_lock);
-		item_delete(&tree, pgoff);
-		pthread_mutex_unlock(&tree_lock);
+		item = xa_erase(&array, pgoff);
+		if (item)
+			item_free(item, pgoff);
 	}
 
 	rcu_unregister_thread();
@@ -165,8 +163,7 @@ static void *tag_entries_fn(void *arg)
 	rcu_register_thread();
 
 	while (!test_complete) {
-		tag_tagged_items(&tree, &tree_lock, 0, MAX_IDX, 10, TAG,
-					NEW_TAG);
+		tag_tagged_items(&array, 0, MAX_IDX, 10, TAG, NEW_TAG);
 	}
 	rcu_unregister_thread();
 	return NULL;
@@ -217,5 +214,5 @@ void iteration_test(unsigned order, unsigned test_duration)
 		}
 	}
 
-	item_kill_tree(&tree);
+	item_kill_tree(&array);
 }
diff --git a/tools/testing/radix-tree/linux/bug.h b/tools/testing/radix-tree/linux/bug.h
index 23b8ed52f8c8..03dc8a57eb99 100644
--- a/tools/testing/radix-tree/linux/bug.h
+++ b/tools/testing/radix-tree/linux/bug.h
@@ -1 +1,2 @@
+#include <stdio.h>
 #include "asm/bug.h"
diff --git a/tools/testing/radix-tree/linux/kconfig.h b/tools/testing/radix-tree/linux/kconfig.h
new file mode 100644
index 000000000000..6c8675859913
--- /dev/null
+++ b/tools/testing/radix-tree/linux/kconfig.h
@@ -0,0 +1 @@
+#include "../../../../include/linux/kconfig.h"
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 426f32f28547..4568248222ae 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -14,7 +14,12 @@
 #include "../../../include/linux/kconfig.h"
 
 #define printk printf
+#define pr_info printk
 #define pr_debug printk
 #define pr_cont printk
 
+#define __acquires(x)
+#define __releases(x)
+#define __must_hold(x)
+
 #endif /* _KERNEL_H */
diff --git a/tools/testing/radix-tree/linux/lockdep.h b/tools/testing/radix-tree/linux/lockdep.h
new file mode 100644
index 000000000000..565fccdfe6e9
--- /dev/null
+++ b/tools/testing/radix-tree/linux/lockdep.h
@@ -0,0 +1,11 @@
+#ifndef _LINUX_LOCKDEP_H
+#define _LINUX_LOCKDEP_H
+struct lock_class_key {
+	unsigned int a;
+};
+
+static inline void lockdep_set_class(spinlock_t *lock,
+					struct lock_class_key *key)
+{
+}
+#endif /* _LINUX_LOCKDEP_H */
diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h
index 24f13d27a8da..d1635a5bef02 100644
--- a/tools/testing/radix-tree/linux/radix-tree.h
+++ b/tools/testing/radix-tree/linux/radix-tree.h
@@ -2,7 +2,6 @@
 #ifndef _TEST_RADIX_TREE_H
 #define _TEST_RADIX_TREE_H
 
-#include "generated/map-shift.h"
 #include "../../../../include/linux/radix-tree.h"
 
 extern int kmalloc_verbose;
diff --git a/tools/testing/radix-tree/linux/rcupdate.h b/tools/testing/radix-tree/linux/rcupdate.h
index 73ed33658203..fd280b070fdb 100644
--- a/tools/testing/radix-tree/linux/rcupdate.h
+++ b/tools/testing/radix-tree/linux/rcupdate.h
@@ -6,5 +6,7 @@
 
 #define rcu_dereference_raw(p) rcu_dereference(p)
 #define rcu_dereference_protected(p, cond) rcu_dereference(p)
+#define rcu_dereference_check(p, cond) rcu_dereference(p)
+#define RCU_INIT_POINTER(p, v)	(p) = (v)
 
 #endif
diff --git a/tools/testing/radix-tree/linux/xarray.h b/tools/testing/radix-tree/linux/xarray.h
new file mode 100644
index 000000000000..df3812cda376
--- /dev/null
+++ b/tools/testing/radix-tree/linux/xarray.h
@@ -0,0 +1,2 @@
+#include "generated/map-shift.h"
+#include "../../../../include/linux/xarray.h"
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index 257f3f8aacaa..7a22d6e3732e 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -27,20 +27,22 @@ void __gang_check(unsigned long middle, long down, long up, int chunk, int hop)
 		item_check_present(&tree, middle + idx);
 	item_check_absent(&tree, middle + up);
 
-	item_gang_check_present(&tree, middle - down,
-			up + down, chunk, hop);
-	item_full_scan(&tree, middle - down, down + up, chunk);
+	if (chunk > 0) {
+		item_gang_check_present(&tree, middle - down, up + down,
+				chunk, hop);
+		item_full_scan(&tree, middle - down, down + up, chunk);
+	}
 	item_kill_tree(&tree);
 }
 
 void gang_check(void)
 {
-	__gang_check(1 << 30, 128, 128, 35, 2);
-	__gang_check(1 << 31, 128, 128, 32, 32);
-	__gang_check(1 << 31, 128, 128, 32, 100);
-	__gang_check(1 << 31, 128, 128, 17, 7);
-	__gang_check(0xffff0000, 0, 65536, 17, 7);
-	__gang_check(0xfffffffe, 1, 1, 17, 7);
+	__gang_check(1UL << 30, 128, 128, 35, 2);
+	__gang_check(1UL << 31, 128, 128, 32, 32);
+	__gang_check(1UL << 31, 128, 128, 32, 100);
+	__gang_check(1UL << 31, 128, 128, 17, 7);
+	__gang_check(0xffff0000UL, 0, 65536, 17, 7);
+	__gang_check(0xfffffffeUL, 1, 1, 17, 7);
 }
 
 void __big_gang_check(void)
@@ -212,7 +214,7 @@ void copy_tag_check(void)
 	}
 
 //	printf("\ncopying tags...\n");
-	tagged = tag_tagged_items(&tree, NULL, start, end, ITEMS, 0, 1);
+	tagged = tag_tagged_items(&tree, start, end, ITEMS, XA_MARK_0, XA_MARK_1);
 
 //	printf("checking copied tags\n");
 	assert(tagged == count);
@@ -221,7 +223,7 @@ void copy_tag_check(void)
 	/* Copy tags in several rounds */
 //	printf("\ncopying tags...\n");
 	tmp = rand() % (count / 10 + 2);
-	tagged = tag_tagged_items(&tree, NULL, start, end, tmp, 0, 2);
+	tagged = tag_tagged_items(&tree, start, end, tmp, XA_MARK_0, XA_MARK_2);
 	assert(tagged == count);
 
 //	printf("%lu %lu %lu\n", tagged, tmp, count);
@@ -234,63 +236,6 @@ void copy_tag_check(void)
 	item_kill_tree(&tree);
 }
 
-static void __locate_check(struct radix_tree_root *tree, unsigned long index,
-			unsigned order)
-{
-	struct item *item;
-	unsigned long index2;
-
-	item_insert_order(tree, index, order);
-	item = item_lookup(tree, index);
-	index2 = find_item(tree, item);
-	if (index != index2) {
-		printv(2, "index %ld order %d inserted; found %ld\n",
-			index, order, index2);
-		abort();
-	}
-}
-
-static void __order_0_locate_check(void)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	int i;
-
-	for (i = 0; i < 50; i++)
-		__locate_check(&tree, rand() % INT_MAX, 0);
-
-	item_kill_tree(&tree);
-}
-
-static void locate_check(void)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	unsigned order;
-	unsigned long offset, index;
-
-	__order_0_locate_check();
-
-	for (order = 0; order < 20; order++) {
-		for (offset = 0; offset < (1 << (order + 3));
-		     offset += (1UL << order)) {
-			for (index = 0; index < (1UL << (order + 5));
-			     index += (1UL << order)) {
-				__locate_check(&tree, index + offset, order);
-			}
-			if (find_item(&tree, &tree) != -1)
-				abort();
-
-			item_kill_tree(&tree);
-		}
-	}
-
-	if (find_item(&tree, &tree) != -1)
-		abort();
-	__locate_check(&tree, -1, 0);
-	if (find_item(&tree, &tree) != -1)
-		abort();
-	item_kill_tree(&tree);
-}
-
 static void single_thread_tests(bool long_run)
 {
 	int i;
@@ -301,10 +246,6 @@ static void single_thread_tests(bool long_run)
 	rcu_barrier();
 	printv(2, "after multiorder_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
-	locate_check();
-	rcu_barrier();
-	printv(2, "after locate_check: %d allocated, preempt %d\n",
-		nr_allocated, preempt_count);
 	tag_check();
 	rcu_barrier();
 	printv(2, "after tag_check: %d allocated, preempt %d\n",
@@ -322,7 +263,7 @@ static void single_thread_tests(bool long_run)
 	printv(2, "after dynamic_height_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	idr_checks();
-	ida_checks();
+	ida_tests();
 	rcu_barrier();
 	printv(2, "after idr_checks: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
@@ -363,13 +304,14 @@ int main(int argc, char **argv)
 	rcu_register_thread();
 	radix_tree_init();
 
+	xarray_tests();
 	regression1_test();
 	regression2_test();
 	regression3_test();
+	regression4_test();
 	iteration_test(0, 10 + 90 * long_run);
 	iteration_test(7, 10 + 90 * long_run);
 	single_thread_tests(long_run);
-	ida_thread_tests();
 
 	/* Free any remaining preallocated nodes */
 	radix_tree_cpu_dead(0);
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 7bf405638b0b..ff27a74d9762 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -20,230 +20,39 @@
 
 #include "test.h"
 
-#define for_each_index(i, base, order) \
-	for (i = base; i < base + (1 << order); i++)
-
-static void __multiorder_tag_test(int index, int order)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	int base, err, i;
-
-	/* our canonical entry */
-	base = index & ~((1 << order) - 1);
-
-	printv(2, "Multiorder tag test with index %d, canonical entry %d\n",
-			index, base);
-
-	err = item_insert_order(&tree, index, order);
-	assert(!err);
-
-	/*
-	 * Verify we get collisions for covered indices.  We try and fail to
-	 * insert an exceptional entry so we don't leak memory via
-	 * item_insert_order().
-	 */
-	for_each_index(i, base, order) {
-		err = __radix_tree_insert(&tree, i, order,
-				(void *)(0xA0 | RADIX_TREE_EXCEPTIONAL_ENTRY));
-		assert(err == -EEXIST);
-	}
-
-	for_each_index(i, base, order) {
-		assert(!radix_tree_tag_get(&tree, i, 0));
-		assert(!radix_tree_tag_get(&tree, i, 1));
-	}
-
-	assert(radix_tree_tag_set(&tree, index, 0));
-
-	for_each_index(i, base, order) {
-		assert(radix_tree_tag_get(&tree, i, 0));
-		assert(!radix_tree_tag_get(&tree, i, 1));
-	}
-
-	assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 1);
-	assert(radix_tree_tag_clear(&tree, index, 0));
-
-	for_each_index(i, base, order) {
-		assert(!radix_tree_tag_get(&tree, i, 0));
-		assert(radix_tree_tag_get(&tree, i, 1));
-	}
-
-	assert(radix_tree_tag_clear(&tree, index, 1));
-
-	assert(!radix_tree_tagged(&tree, 0));
-	assert(!radix_tree_tagged(&tree, 1));
-
-	item_kill_tree(&tree);
-}
-
-static void __multiorder_tag_test2(unsigned order, unsigned long index2)
+static int item_insert_order(struct xarray *xa, unsigned long index,
+			unsigned order)
 {
-	RADIX_TREE(tree, GFP_KERNEL);
-	unsigned long index = (1 << order);
-	index2 += index;
-
-	assert(item_insert_order(&tree, 0, order) == 0);
-	assert(item_insert(&tree, index2) == 0);
-
-	assert(radix_tree_tag_set(&tree, 0, 0));
-	assert(radix_tree_tag_set(&tree, index2, 0));
-
-	assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 2);
-
-	item_kill_tree(&tree);
-}
-
-static void multiorder_tag_tests(void)
-{
-	int i, j;
-
-	/* test multi-order entry for indices 0-7 with no sibling pointers */
-	__multiorder_tag_test(0, 3);
-	__multiorder_tag_test(5, 3);
-
-	/* test multi-order entry for indices 8-15 with no sibling pointers */
-	__multiorder_tag_test(8, 3);
-	__multiorder_tag_test(15, 3);
-
-	/*
-	 * Our order 5 entry covers indices 0-31 in a tree with height=2.
-	 * This is broken up as follows:
-	 * 0-7:		canonical entry
-	 * 8-15:	sibling 1
-	 * 16-23:	sibling 2
-	 * 24-31:	sibling 3
-	 */
-	__multiorder_tag_test(0, 5);
-	__multiorder_tag_test(29, 5);
-
-	/* same test, but with indices 32-63 */
-	__multiorder_tag_test(32, 5);
-	__multiorder_tag_test(44, 5);
-
-	/*
-	 * Our order 8 entry covers indices 0-255 in a tree with height=3.
-	 * This is broken up as follows:
-	 * 0-63:	canonical entry
-	 * 64-127:	sibling 1
-	 * 128-191:	sibling 2
-	 * 192-255:	sibling 3
-	 */
-	__multiorder_tag_test(0, 8);
-	__multiorder_tag_test(190, 8);
-
-	/* same test, but with indices 256-511 */
-	__multiorder_tag_test(256, 8);
-	__multiorder_tag_test(300, 8);
-
-	__multiorder_tag_test(0x12345678UL, 8);
-
-	for (i = 1; i < 10; i++)
-		for (j = 0; j < (10 << i); j++)
-			__multiorder_tag_test2(i, j);
-}
-
-static void multiorder_check(unsigned long index, int order)
-{
-	unsigned long i;
-	unsigned long min = index & ~((1UL << order) - 1);
-	unsigned long max = min + (1UL << order);
-	void **slot;
-	struct item *item2 = item_create(min, order);
-	RADIX_TREE(tree, GFP_KERNEL);
-
-	printv(2, "Multiorder index %ld, order %d\n", index, order);
-
-	assert(item_insert_order(&tree, index, order) == 0);
-
-	for (i = min; i < max; i++) {
-		struct item *item = item_lookup(&tree, i);
-		assert(item != 0);
-		assert(item->index == index);
-	}
-	for (i = 0; i < min; i++)
-		item_check_absent(&tree, i);
-	for (i = max; i < 2*max; i++)
-		item_check_absent(&tree, i);
-	for (i = min; i < max; i++)
-		assert(radix_tree_insert(&tree, i, item2) == -EEXIST);
-
-	slot = radix_tree_lookup_slot(&tree, index);
-	free(*slot);
-	radix_tree_replace_slot(&tree, slot, item2);
-	for (i = min; i < max; i++) {
-		struct item *item = item_lookup(&tree, i);
-		assert(item != 0);
-		assert(item->index == min);
-	}
-
-	assert(item_delete(&tree, min) != 0);
-
-	for (i = 0; i < 2*max; i++)
-		item_check_absent(&tree, i);
-}
-
-static void multiorder_shrink(unsigned long index, int order)
-{
-	unsigned long i;
-	unsigned long max = 1 << order;
-	RADIX_TREE(tree, GFP_KERNEL);
-	struct radix_tree_node *node;
-
-	printv(2, "Multiorder shrink index %ld, order %d\n", index, order);
+	XA_STATE_ORDER(xas, xa, index, order);
+	struct item *item = item_create(index, order);
 
-	assert(item_insert_order(&tree, 0, order) == 0);
-
-	node = tree.rnode;
-
-	assert(item_insert(&tree, index) == 0);
-	assert(node != tree.rnode);
-
-	assert(item_delete(&tree, index) != 0);
-	assert(node == tree.rnode);
-
-	for (i = 0; i < max; i++) {
-		struct item *item = item_lookup(&tree, i);
-		assert(item != 0);
-		assert(item->index == 0);
-	}
-	for (i = max; i < 2*max; i++)
-		item_check_absent(&tree, i);
-
-	if (!item_delete(&tree, 0)) {
-		printv(2, "failed to delete index %ld (order %d)\n", index, order);
-		abort();
-	}
-
-	for (i = 0; i < 2*max; i++)
-		item_check_absent(&tree, i);
-}
-
-static void multiorder_insert_bug(void)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
+	do {
+		xas_lock(&xas);
+		xas_store(&xas, item);
+		xas_unlock(&xas);
+	} while (xas_nomem(&xas, GFP_KERNEL));
 
-	item_insert(&tree, 0);
-	radix_tree_tag_set(&tree, 0, 0);
-	item_insert_order(&tree, 3 << 6, 6);
+	if (!xas_error(&xas))
+		return 0;
 
-	item_kill_tree(&tree);
+	free(item);
+	return xas_error(&xas);
 }
 
-void multiorder_iteration(void)
+void multiorder_iteration(struct xarray *xa)
 {
-	RADIX_TREE(tree, GFP_KERNEL);
-	struct radix_tree_iter iter;
-	void **slot;
+	XA_STATE(xas, xa, 0);
+	struct item *item;
 	int i, j, err;
 
-	printv(1, "Multiorder iteration test\n");
-
 #define NUM_ENTRIES 11
 	int index[NUM_ENTRIES] = {0, 2, 4, 8, 16, 32, 34, 36, 64, 72, 128};
 	int order[NUM_ENTRIES] = {1, 1, 2, 3,  4,  1,  0,  1,  3,  0, 7};
 
+	printv(1, "Multiorder iteration test\n");
+
 	for (i = 0; i < NUM_ENTRIES; i++) {
-		err = item_insert_order(&tree, index[i], order[i]);
+		err = item_insert_order(xa, index[i], order[i]);
 		assert(!err);
 	}
 
@@ -252,14 +61,14 @@ void multiorder_iteration(void)
 			if (j <= (index[i] | ((1 << order[i]) - 1)))
 				break;
 
-		radix_tree_for_each_slot(slot, &tree, &iter, j) {
-			int height = order[i] / RADIX_TREE_MAP_SHIFT;
-			int shift = height * RADIX_TREE_MAP_SHIFT;
+		xas_set(&xas, j);
+		xas_for_each(&xas, item, ULONG_MAX) {
+			int height = order[i] / XA_CHUNK_SHIFT;
+			int shift = height * XA_CHUNK_SHIFT;
 			unsigned long mask = (1UL << order[i]) - 1;
-			struct item *item = *slot;
 
-			assert((iter.index | mask) == (index[i] | mask));
-			assert(iter.shift == shift);
+			assert((xas.xa_index | mask) == (index[i] | mask));
+			assert(xas.xa_node->shift == shift);
 			assert(!radix_tree_is_internal_node(item));
 			assert((item->index | mask) == (index[i] | mask));
 			assert(item->order == order[i]);
@@ -267,18 +76,15 @@ void multiorder_iteration(void)
 		}
 	}
 
-	item_kill_tree(&tree);
+	item_kill_tree(xa);
 }
 
-void multiorder_tagged_iteration(void)
+void multiorder_tagged_iteration(struct xarray *xa)
 {
-	RADIX_TREE(tree, GFP_KERNEL);
-	struct radix_tree_iter iter;
-	void **slot;
+	XA_STATE(xas, xa, 0);
+	struct item *item;
 	int i, j;
 
-	printv(1, "Multiorder tagged iteration test\n");
-
 #define MT_NUM_ENTRIES 9
 	int index[MT_NUM_ENTRIES] = {0, 2, 4, 16, 32, 40, 64, 72, 128};
 	int order[MT_NUM_ENTRIES] = {1, 0, 2, 4,  3,  1,  3,  0,   7};
@@ -286,13 +92,15 @@ void multiorder_tagged_iteration(void)
 #define TAG_ENTRIES 7
 	int tag_index[TAG_ENTRIES] = {0, 4, 16, 40, 64, 72, 128};
 
+	printv(1, "Multiorder tagged iteration test\n");
+
 	for (i = 0; i < MT_NUM_ENTRIES; i++)
-		assert(!item_insert_order(&tree, index[i], order[i]));
+		assert(!item_insert_order(xa, index[i], order[i]));
 
-	assert(!radix_tree_tagged(&tree, 1));
+	assert(!xa_marked(xa, XA_MARK_1));
 
 	for (i = 0; i < TAG_ENTRIES; i++)
-		assert(radix_tree_tag_set(&tree, tag_index[i], 1));
+		xa_set_mark(xa, tag_index[i], XA_MARK_1);
 
 	for (j = 0; j < 256; j++) {
 		int k;
@@ -304,23 +112,23 @@ void multiorder_tagged_iteration(void)
 				break;
 		}
 
-		radix_tree_for_each_tagged(slot, &tree, &iter, j, 1) {
+		xas_set(&xas, j);
+		xas_for_each_marked(&xas, item, ULONG_MAX, XA_MARK_1) {
 			unsigned long mask;
-			struct item *item = *slot;
 			for (k = i; index[k] < tag_index[i]; k++)
 				;
 			mask = (1UL << order[k]) - 1;
 
-			assert((iter.index | mask) == (tag_index[i] | mask));
-			assert(!radix_tree_is_internal_node(item));
+			assert((xas.xa_index | mask) == (tag_index[i] | mask));
+			assert(!xa_is_internal(item));
 			assert((item->index | mask) == (tag_index[i] | mask));
 			assert(item->order == order[k]);
 			i++;
 		}
 	}
 
-	assert(tag_tagged_items(&tree, NULL, 0, ~0UL, TAG_ENTRIES, 1, 2) ==
-				TAG_ENTRIES);
+	assert(tag_tagged_items(xa, 0, ULONG_MAX, TAG_ENTRIES, XA_MARK_1,
+				XA_MARK_2) == TAG_ENTRIES);
 
 	for (j = 0; j < 256; j++) {
 		int mask, k;
@@ -332,297 +140,31 @@ void multiorder_tagged_iteration(void)
 				break;
 		}
 
-		radix_tree_for_each_tagged(slot, &tree, &iter, j, 2) {
-			struct item *item = *slot;
+		xas_set(&xas, j);
+		xas_for_each_marked(&xas, item, ULONG_MAX, XA_MARK_2) {
 			for (k = i; index[k] < tag_index[i]; k++)
 				;
 			mask = (1 << order[k]) - 1;
 
-			assert((iter.index | mask) == (tag_index[i] | mask));
-			assert(!radix_tree_is_internal_node(item));
+			assert((xas.xa_index | mask) == (tag_index[i] | mask));
+			assert(!xa_is_internal(item));
 			assert((item->index | mask) == (tag_index[i] | mask));
 			assert(item->order == order[k]);
 			i++;
 		}
 	}
 
-	assert(tag_tagged_items(&tree, NULL, 1, ~0UL, MT_NUM_ENTRIES * 2, 1, 0)
-			== TAG_ENTRIES);
+	assert(tag_tagged_items(xa, 1, ULONG_MAX, MT_NUM_ENTRIES * 2, XA_MARK_1,
+				XA_MARK_0) == TAG_ENTRIES);
 	i = 0;
-	radix_tree_for_each_tagged(slot, &tree, &iter, 0, 0) {
-		assert(iter.index == tag_index[i]);
+	xas_set(&xas, 0);
+	xas_for_each_marked(&xas, item, ULONG_MAX, XA_MARK_0) {
+		assert(xas.xa_index == tag_index[i]);
 		i++;
 	}
+	assert(i == TAG_ENTRIES);
 
-	item_kill_tree(&tree);
-}
-
-/*
- * Basic join checks: make sure we can't find an entry in the tree after
- * a larger entry has replaced it
- */
-static void multiorder_join1(unsigned long index,
-				unsigned order1, unsigned order2)
-{
-	unsigned long loc;
-	void *item, *item2 = item_create(index + 1, order1);
-	RADIX_TREE(tree, GFP_KERNEL);
-
-	item_insert_order(&tree, index, order2);
-	item = radix_tree_lookup(&tree, index);
-	radix_tree_join(&tree, index + 1, order1, item2);
-	loc = find_item(&tree, item);
-	if (loc == -1)
-		free(item);
-	item = radix_tree_lookup(&tree, index + 1);
-	assert(item == item2);
-	item_kill_tree(&tree);
-}
-
-/*
- * Check that the accounting of exceptional entries is handled correctly
- * by joining an exceptional entry to a normal pointer.
- */
-static void multiorder_join2(unsigned order1, unsigned order2)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	struct radix_tree_node *node;
-	void *item1 = item_create(0, order1);
-	void *item2;
-
-	item_insert_order(&tree, 0, order2);
-	radix_tree_insert(&tree, 1 << order2, (void *)0x12UL);
-	item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
-	assert(item2 == (void *)0x12UL);
-	assert(node->exceptional == 1);
-
-	item2 = radix_tree_lookup(&tree, 0);
-	free(item2);
-
-	radix_tree_join(&tree, 0, order1, item1);
-	item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
-	assert(item2 == item1);
-	assert(node->exceptional == 0);
-	item_kill_tree(&tree);
-}
-
-/*
- * This test revealed an accounting bug for exceptional entries at one point.
- * Nodes were being freed back into the pool with an elevated exception count
- * by radix_tree_join() and then radix_tree_split() was failing to zero the
- * count of exceptional entries.
- */
-static void multiorder_join3(unsigned int order)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	struct radix_tree_node *node;
-	void **slot;
-	struct radix_tree_iter iter;
-	unsigned long i;
-
-	for (i = 0; i < (1 << order); i++) {
-		radix_tree_insert(&tree, i, (void *)0x12UL);
-	}
-
-	radix_tree_join(&tree, 0, order, (void *)0x16UL);
-	rcu_barrier();
-
-	radix_tree_split(&tree, 0, 0);
-
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_iter_replace(&tree, &iter, slot, (void *)0x12UL);
-	}
-
-	__radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(node->exceptional == node->count);
-
-	item_kill_tree(&tree);
-}
-
-static void multiorder_join(void)
-{
-	int i, j, idx;
-
-	for (idx = 0; idx < 1024; idx = idx * 2 + 3) {
-		for (i = 1; i < 15; i++) {
-			for (j = 0; j < i; j++) {
-				multiorder_join1(idx, i, j);
-			}
-		}
-	}
-
-	for (i = 1; i < 15; i++) {
-		for (j = 0; j < i; j++) {
-			multiorder_join2(i, j);
-		}
-	}
-
-	for (i = 3; i < 10; i++) {
-		multiorder_join3(i);
-	}
-}
-
-static void check_mem(unsigned old_order, unsigned new_order, unsigned alloc)
-{
-	struct radix_tree_preload *rtp = &radix_tree_preloads;
-	if (rtp->nr != 0)
-		printv(2, "split(%u %u) remaining %u\n", old_order, new_order,
-							rtp->nr);
-	/*
-	 * Can't check for equality here as some nodes may have been
-	 * RCU-freed while we ran.  But we should never finish with more
-	 * nodes allocated since they should have all been preloaded.
-	 */
-	if (nr_allocated > alloc)
-		printv(2, "split(%u %u) allocated %u %u\n", old_order, new_order,
-							alloc, nr_allocated);
-}
-
-static void __multiorder_split(int old_order, int new_order)
-{
-	RADIX_TREE(tree, GFP_ATOMIC);
-	void **slot;
-	struct radix_tree_iter iter;
-	unsigned alloc;
-	struct item *item;
-
-	radix_tree_preload(GFP_KERNEL);
-	assert(item_insert_order(&tree, 0, old_order) == 0);
-	radix_tree_preload_end();
-
-	/* Wipe out the preloaded cache or it'll confuse check_mem() */
-	radix_tree_cpu_dead(0);
-
-	item = radix_tree_tag_set(&tree, 0, 2);
-
-	radix_tree_split_preload(old_order, new_order, GFP_KERNEL);
-	alloc = nr_allocated;
-	radix_tree_split(&tree, 0, new_order);
-	check_mem(old_order, new_order, alloc);
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_iter_replace(&tree, &iter, slot,
-					item_create(iter.index, new_order));
-	}
-	radix_tree_preload_end();
-
-	item_kill_tree(&tree);
-	free(item);
-}
-
-static void __multiorder_split2(int old_order, int new_order)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	void **slot;
-	struct radix_tree_iter iter;
-	struct radix_tree_node *node;
-	void *item;
-
-	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);
-
-	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == (void *)0x12);
-	assert(node->exceptional > 0);
-
-	radix_tree_split(&tree, 0, new_order);
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_iter_replace(&tree, &iter, slot,
-					item_create(iter.index, new_order));
-	}
-
-	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item != (void *)0x12);
-	assert(node->exceptional == 0);
-
-	item_kill_tree(&tree);
-}
-
-static void __multiorder_split3(int old_order, int new_order)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	void **slot;
-	struct radix_tree_iter iter;
-	struct radix_tree_node *node;
-	void *item;
-
-	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);
-
-	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == (void *)0x12);
-	assert(node->exceptional > 0);
-
-	radix_tree_split(&tree, 0, new_order);
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_iter_replace(&tree, &iter, slot, (void *)0x16);
-	}
-
-	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == (void *)0x16);
-	assert(node->exceptional > 0);
-
-	item_kill_tree(&tree);
-
-	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);
-
-	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == (void *)0x12);
-	assert(node->exceptional > 0);
-
-	radix_tree_split(&tree, 0, new_order);
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		if (iter.index == (1 << new_order))
-			radix_tree_iter_replace(&tree, &iter, slot,
-						(void *)0x16);
-		else
-			radix_tree_iter_replace(&tree, &iter, slot, NULL);
-	}
-
-	item = __radix_tree_lookup(&tree, 1 << new_order, &node, NULL);
-	assert(item == (void *)0x16);
-	assert(node->count == node->exceptional);
-	do {
-		node = node->parent;
-		if (!node)
-			break;
-		assert(node->count == 1);
-		assert(node->exceptional == 0);
-	} while (1);
-
-	item_kill_tree(&tree);
-}
-
-static void multiorder_split(void)
-{
-	int i, j;
-
-	for (i = 3; i < 11; i++)
-		for (j = 0; j < i; j++) {
-			__multiorder_split(i, j);
-			__multiorder_split2(i, j);
-			__multiorder_split3(i, j);
-		}
-}
-
-static void multiorder_account(void)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	struct radix_tree_node *node;
-	void **slot;
-
-	item_insert_order(&tree, 0, 5);
-
-	__radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12);
-	__radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(node->count == node->exceptional * 2);
-	radix_tree_delete(&tree, 1 << 5);
-	assert(node->exceptional == 0);
-
-	__radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12);
-	__radix_tree_lookup(&tree, 1 << 5, &node, &slot);
-	assert(node->count == node->exceptional * 2);
-	__radix_tree_replace(&tree, node, slot, NULL, NULL);
-	assert(node->exceptional == 0);
-
-	item_kill_tree(&tree);
+	item_kill_tree(xa);
 }
 
 bool stop_iteration = false;
@@ -645,68 +187,45 @@ static void *creator_func(void *ptr)
 
 static void *iterator_func(void *ptr)
 {
-	struct radix_tree_root *tree = ptr;
-	struct radix_tree_iter iter;
+	XA_STATE(xas, ptr, 0);
 	struct item *item;
-	void **slot;
 
 	while (!stop_iteration) {
 		rcu_read_lock();
-		radix_tree_for_each_slot(slot, tree, &iter, 0) {
-			item = radix_tree_deref_slot(slot);
-
-			if (!item)
+		xas_for_each(&xas, item, ULONG_MAX) {
+			if (xas_retry(&xas, item))
 				continue;
-			if (radix_tree_deref_retry(item)) {
-				slot = radix_tree_iter_retry(&iter);
-				continue;
-			}
 
-			item_sanity(item, iter.index);
+			item_sanity(item, xas.xa_index);
 		}
 		rcu_read_unlock();
 	}
 	return NULL;
 }
 
-static void multiorder_iteration_race(void)
+static void multiorder_iteration_race(struct xarray *xa)
 {
 	const int num_threads = sysconf(_SC_NPROCESSORS_ONLN);
 	pthread_t worker_thread[num_threads];
-	RADIX_TREE(tree, GFP_KERNEL);
 	int i;
 
-	pthread_create(&worker_thread[0], NULL, &creator_func, &tree);
+	pthread_create(&worker_thread[0], NULL, &creator_func, xa);
 	for (i = 1; i < num_threads; i++)
-		pthread_create(&worker_thread[i], NULL, &iterator_func, &tree);
+		pthread_create(&worker_thread[i], NULL, &iterator_func, xa);
 
 	for (i = 0; i < num_threads; i++)
 		pthread_join(worker_thread[i], NULL);
 
-	item_kill_tree(&tree);
+	item_kill_tree(xa);
 }
 
+static DEFINE_XARRAY(array);
+
 void multiorder_checks(void)
 {
-	int i;
-
-	for (i = 0; i < 20; i++) {
-		multiorder_check(200, i);
-		multiorder_check(0, i);
-		multiorder_check((1UL << i) + 1, i);
-	}
-
-	for (i = 0; i < 15; i++)
-		multiorder_shrink((1UL << (i + RADIX_TREE_MAP_SHIFT)), i);
-
-	multiorder_insert_bug();
-	multiorder_tag_tests();
-	multiorder_iteration();
-	multiorder_tagged_iteration();
-	multiorder_join();
-	multiorder_split();
-	multiorder_account();
-	multiorder_iteration_race();
+	multiorder_iteration(&array);
+	multiorder_tagged_iteration(&array);
+	multiorder_iteration_race(&array);
 
 	radix_tree_cpu_dead(0);
 }
diff --git a/tools/testing/radix-tree/regression.h b/tools/testing/radix-tree/regression.h
index 3c8a1584e9ee..135145af18b7 100644
--- a/tools/testing/radix-tree/regression.h
+++ b/tools/testing/radix-tree/regression.h
@@ -5,5 +5,6 @@
 void regression1_test(void);
 void regression2_test(void);
 void regression3_test(void);
+void regression4_test(void);
 
 #endif
diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c
index 0aece092f40e..a61c7bcbc72d 100644
--- a/tools/testing/radix-tree/regression1.c
+++ b/tools/testing/radix-tree/regression1.c
@@ -44,7 +44,6 @@
 #include "regression.h"
 
 static RADIX_TREE(mt_tree, GFP_KERNEL);
-static pthread_mutex_t mt_lock = PTHREAD_MUTEX_INITIALIZER;
 
 struct page {
 	pthread_mutex_t lock;
@@ -53,12 +52,12 @@ struct page {
 	unsigned long index;
 };
 
-static struct page *page_alloc(void)
+static struct page *page_alloc(int index)
 {
 	struct page *p;
 	p = malloc(sizeof(struct page));
 	p->count = 1;
-	p->index = 1;
+	p->index = index;
 	pthread_mutex_init(&p->lock, NULL);
 
 	return p;
@@ -80,53 +79,33 @@ static void page_free(struct page *p)
 static unsigned find_get_pages(unsigned long start,
 			    unsigned int nr_pages, struct page **pages)
 {
-	unsigned int i;
-	unsigned int ret;
-	unsigned int nr_found;
+	XA_STATE(xas, &mt_tree, start);
+	struct page *page;
+	unsigned int ret = 0;
 
 	rcu_read_lock();
-restart:
-	nr_found = radix_tree_gang_lookup_slot(&mt_tree,
-				(void ***)pages, NULL, start, nr_pages);
-	ret = 0;
-	for (i = 0; i < nr_found; i++) {
-		struct page *page;
-repeat:
-		page = radix_tree_deref_slot((void **)pages[i]);
-		if (unlikely(!page))
+	xas_for_each(&xas, page, ULONG_MAX) {
+		if (xas_retry(&xas, page))
 			continue;
 
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				/*
-				 * Transient condition which can only trigger
-				 * when entry at index 0 moves out of or back
-				 * to root: none yet gotten, safe to restart.
-				 */
-				assert((start | i) == 0);
-				goto restart;
-			}
-			/*
-			 * No exceptional entries are inserted in this test.
-			 */
-			assert(0);
-		}
-
 		pthread_mutex_lock(&page->lock);
-		if (!page->count) {
-			pthread_mutex_unlock(&page->lock);
-			goto repeat;
-		}
+		if (!page->count)
+			goto unlock;
+
 		/* don't actually update page refcount */
 		pthread_mutex_unlock(&page->lock);
 
 		/* Has the page moved? */
-		if (unlikely(page != *((void **)pages[i]))) {
-			goto repeat;
-		}
+		if (unlikely(page != xas_reload(&xas)))
+			goto put_page;
 
 		pages[ret] = page;
 		ret++;
+		continue;
+unlock:
+		pthread_mutex_unlock(&page->lock);
+put_page:
+		xas_reset(&xas);
 	}
 	rcu_read_unlock();
 	return ret;
@@ -145,30 +124,30 @@ static void *regression1_fn(void *arg)
 		for (j = 0; j < 1000000; j++) {
 			struct page *p;
 
-			p = page_alloc();
-			pthread_mutex_lock(&mt_lock);
+			p = page_alloc(0);
+			xa_lock(&mt_tree);
 			radix_tree_insert(&mt_tree, 0, p);
-			pthread_mutex_unlock(&mt_lock);
+			xa_unlock(&mt_tree);
 
-			p = page_alloc();
-			pthread_mutex_lock(&mt_lock);
+			p = page_alloc(1);
+			xa_lock(&mt_tree);
 			radix_tree_insert(&mt_tree, 1, p);
-			pthread_mutex_unlock(&mt_lock);
+			xa_unlock(&mt_tree);
 
-			pthread_mutex_lock(&mt_lock);
+			xa_lock(&mt_tree);
 			p = radix_tree_delete(&mt_tree, 1);
 			pthread_mutex_lock(&p->lock);
 			p->count--;
 			pthread_mutex_unlock(&p->lock);
-			pthread_mutex_unlock(&mt_lock);
+			xa_unlock(&mt_tree);
 			page_free(p);
 
-			pthread_mutex_lock(&mt_lock);
+			xa_lock(&mt_tree);
 			p = radix_tree_delete(&mt_tree, 0);
 			pthread_mutex_lock(&p->lock);
 			p->count--;
 			pthread_mutex_unlock(&p->lock);
-			pthread_mutex_unlock(&mt_lock);
+			xa_unlock(&mt_tree);
 			page_free(p);
 		}
 	} else {
diff --git a/tools/testing/radix-tree/regression2.c b/tools/testing/radix-tree/regression2.c
index 424b91c77831..f2c7e640a919 100644
--- a/tools/testing/radix-tree/regression2.c
+++ b/tools/testing/radix-tree/regression2.c
@@ -53,9 +53,9 @@
 #include "regression.h"
 #include "test.h"
 
-#define PAGECACHE_TAG_DIRTY     0
-#define PAGECACHE_TAG_WRITEBACK 1
-#define PAGECACHE_TAG_TOWRITE   2
+#define PAGECACHE_TAG_DIRTY     XA_MARK_0
+#define PAGECACHE_TAG_WRITEBACK XA_MARK_1
+#define PAGECACHE_TAG_TOWRITE   XA_MARK_2
 
 static RADIX_TREE(mt_tree, GFP_KERNEL);
 unsigned long page_count = 0;
@@ -92,7 +92,7 @@ void regression2_test(void)
 	/* 1. */
 	start = 0;
 	end = max_slots - 2;
-	tag_tagged_items(&mt_tree, NULL, start, end, 1,
+	tag_tagged_items(&mt_tree, start, end, 1,
 				PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
 
 	/* 2. */
diff --git a/tools/testing/radix-tree/regression3.c b/tools/testing/radix-tree/regression3.c
index ace2543c3eda..9f9a3b280f56 100644
--- a/tools/testing/radix-tree/regression3.c
+++ b/tools/testing/radix-tree/regression3.c
@@ -69,21 +69,6 @@ void regression3_test(void)
 			continue;
 		}
 	}
-	radix_tree_delete(&root, 1);
-
-	first = true;
-	radix_tree_for_each_contig(slot, &root, &iter, 0) {
-		printv(2, "contig %ld %p\n", iter.index, *slot);
-		if (first) {
-			radix_tree_insert(&root, 1, ptr);
-			first = false;
-		}
-		if (radix_tree_deref_retry(*slot)) {
-			printv(2, "retry at %ld\n", iter.index);
-			slot = radix_tree_iter_retry(&iter);
-			continue;
-		}
-	}
 
 	radix_tree_for_each_slot(slot, &root, &iter, 0) {
 		printv(2, "slot %ld %p\n", iter.index, *slot);
@@ -93,14 +78,6 @@ void regression3_test(void)
 		}
 	}
 
-	radix_tree_for_each_contig(slot, &root, &iter, 0) {
-		printv(2, "contig %ld %p\n", iter.index, *slot);
-		if (!iter.index) {
-			printv(2, "next at %ld\n", iter.index);
-			slot = radix_tree_iter_resume(slot, &iter);
-		}
-	}
-
 	radix_tree_tag_set(&root, 0, 0);
 	radix_tree_tag_set(&root, 1, 0);
 	radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) {
diff --git a/tools/testing/radix-tree/regression4.c b/tools/testing/radix-tree/regression4.c
new file mode 100644
index 000000000000..cf4e5aba6b08
--- /dev/null
+++ b/tools/testing/radix-tree/regression4.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+#include <linux/rcupdate.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <assert.h>
+
+#include "regression.h"
+
+static pthread_barrier_t worker_barrier;
+static int obj0, obj1;
+static RADIX_TREE(mt_tree, GFP_KERNEL);
+
+static void *reader_fn(void *arg)
+{
+	int i;
+	void *entry;
+
+	rcu_register_thread();
+	pthread_barrier_wait(&worker_barrier);
+
+	for (i = 0; i < 1000000; i++) {
+		rcu_read_lock();
+		entry = radix_tree_lookup(&mt_tree, 0);
+		rcu_read_unlock();
+		if (entry != &obj0) {
+			printf("iteration %d bad entry = %p\n", i, entry);
+			abort();
+		}
+	}
+
+	rcu_unregister_thread();
+
+	return NULL;
+}
+
+static void *writer_fn(void *arg)
+{
+	int i;
+
+	rcu_register_thread();
+	pthread_barrier_wait(&worker_barrier);
+
+	for (i = 0; i < 1000000; i++) {
+		radix_tree_insert(&mt_tree, 1, &obj1);
+		radix_tree_delete(&mt_tree, 1);
+	}
+
+	rcu_unregister_thread();
+
+	return NULL;
+}
+
+void regression4_test(void)
+{
+	pthread_t reader, writer;
+
+	printv(1, "regression test 4 starting\n");
+
+	radix_tree_insert(&mt_tree, 0, &obj0);
+	pthread_barrier_init(&worker_barrier, NULL, 2);
+
+	if (pthread_create(&reader, NULL, reader_fn, NULL) ||
+	    pthread_create(&writer, NULL, writer_fn, NULL)) {
+		perror("pthread_create");
+		exit(1);
+	}
+
+	if (pthread_join(reader, NULL) || pthread_join(writer, NULL)) {
+		perror("pthread_join");
+		exit(1);
+	}
+
+	printv(1, "regression test 4 passed\n");
+}
diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c
index 543181e4847b..f898957b1a19 100644
--- a/tools/testing/radix-tree/tag_check.c
+++ b/tools/testing/radix-tree/tag_check.c
@@ -24,7 +24,7 @@ __simple_checks(struct radix_tree_root *tree, unsigned long index, int tag)
 	item_tag_set(tree, index, tag);
 	ret = item_tag_get(tree, index, tag);
 	assert(ret != 0);
-	ret = tag_tagged_items(tree, NULL, first, ~0UL, 10, tag, !tag);
+	ret = tag_tagged_items(tree, first, ~0UL, 10, tag, !tag);
 	assert(ret == 1);
 	ret = item_tag_get(tree, index, !tag);
 	assert(ret != 0);
@@ -321,7 +321,7 @@ static void single_check(void)
 	assert(ret == 0);
 	verify_tag_consistency(&tree, 0);
 	verify_tag_consistency(&tree, 1);
-	ret = tag_tagged_items(&tree, NULL, first, 10, 10, 0, 1);
+	ret = tag_tagged_items(&tree, first, 10, 10, XA_MARK_0, XA_MARK_1);
 	assert(ret == 1);
 	ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 1);
 	assert(ret == 1);
@@ -331,34 +331,6 @@ static void single_check(void)
 	item_kill_tree(&tree);
 }
 
-void radix_tree_clear_tags_test(void)
-{
-	unsigned long index;
-	struct radix_tree_node *node;
-	struct radix_tree_iter iter;
-	void **slot;
-
-	RADIX_TREE(tree, GFP_KERNEL);
-
-	item_insert(&tree, 0);
-	item_tag_set(&tree, 0, 0);
-	__radix_tree_lookup(&tree, 0, &node, &slot);
-	radix_tree_clear_tags(&tree, node, slot);
-	assert(item_tag_get(&tree, 0, 0) == 0);
-
-	for (index = 0; index < 1000; index++) {
-		item_insert(&tree, index);
-		item_tag_set(&tree, index, 0);
-	}
-
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_clear_tags(&tree, iter.node, slot);
-		assert(item_tag_get(&tree, iter.index, 0) == 0);
-	}
-
-	item_kill_tree(&tree);
-}
-
 void tag_check(void)
 {
 	single_check();
@@ -376,5 +348,4 @@ void tag_check(void)
 	thrash_tags();
 	rcu_barrier();
 	printv(2, "after thrash_tags: %d allocated\n", nr_allocated);
-	radix_tree_clear_tags_test();
 }
diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c
index def6015570b2..a15d0512e633 100644
--- a/tools/testing/radix-tree/test.c
+++ b/tools/testing/radix-tree/test.c
@@ -25,11 +25,6 @@ int item_tag_get(struct radix_tree_root *root, unsigned long index, int tag)
 	return radix_tree_tag_get(root, index, tag);
 }
 
-int __item_insert(struct radix_tree_root *root, struct item *item)
-{
-	return __radix_tree_insert(root, item->index, item->order, item);
-}
-
 struct item *item_create(unsigned long index, unsigned int order)
 {
 	struct item *ret = malloc(sizeof(*ret));
@@ -39,21 +34,15 @@ struct item *item_create(unsigned long index, unsigned int order)
 	return ret;
 }
 
-int item_insert_order(struct radix_tree_root *root, unsigned long index,
-			unsigned order)
+int item_insert(struct radix_tree_root *root, unsigned long index)
 {
-	struct item *item = item_create(index, order);
-	int err = __item_insert(root, item);
+	struct item *item = item_create(index, 0);
+	int err = radix_tree_insert(root, item->index, item);
 	if (err)
 		free(item);
 	return err;
 }
 
-int item_insert(struct radix_tree_root *root, unsigned long index)
-{
-	return item_insert_order(root, index, 0);
-}
-
 void item_sanity(struct item *item, unsigned long index)
 {
 	unsigned long mask;
@@ -63,16 +52,21 @@ void item_sanity(struct item *item, unsigned long index)
 	assert((item->index | mask) == (index | mask));
 }
 
+void item_free(struct item *item, unsigned long index)
+{
+	item_sanity(item, index);
+	free(item);
+}
+
 int item_delete(struct radix_tree_root *root, unsigned long index)
 {
 	struct item *item = radix_tree_delete(root, index);
 
-	if (item) {
-		item_sanity(item, index);
-		free(item);
-		return 1;
-	}
-	return 0;
+	if (!item)
+		return 0;
+
+	item_free(item, index);
+	return 1;
 }
 
 static void item_free_rcu(struct rcu_head *head)
@@ -82,9 +76,9 @@ static void item_free_rcu(struct rcu_head *head)
 	free(item);
 }
 
-int item_delete_rcu(struct radix_tree_root *root, unsigned long index)
+int item_delete_rcu(struct xarray *xa, unsigned long index)
 {
-	struct item *item = radix_tree_delete(root, index);
+	struct item *item = xa_erase(xa, index);
 
 	if (item) {
 		item_sanity(item, index);
@@ -176,59 +170,30 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start,
 }
 
 /* Use the same pattern as tag_pages_for_writeback() in mm/page-writeback.c */
-int tag_tagged_items(struct radix_tree_root *root, pthread_mutex_t *lock,
-			unsigned long start, unsigned long end, unsigned batch,
-			unsigned iftag, unsigned thentag)
+int tag_tagged_items(struct xarray *xa, unsigned long start, unsigned long end,
+		unsigned batch, xa_mark_t iftag, xa_mark_t thentag)
 {
-	unsigned long tagged = 0;
-	struct radix_tree_iter iter;
-	void **slot;
+	XA_STATE(xas, xa, start);
+	unsigned int tagged = 0;
+	struct item *item;
 
 	if (batch == 0)
 		batch = 1;
 
-	if (lock)
-		pthread_mutex_lock(lock);
-	radix_tree_for_each_tagged(slot, root, &iter, start, iftag) {
-		if (iter.index > end)
-			break;
-		radix_tree_iter_tag_set(root, &iter, thentag);
-		tagged++;
-		if ((tagged % batch) != 0)
+	xas_lock_irq(&xas);
+	xas_for_each_marked(&xas, item, end, iftag) {
+		xas_set_mark(&xas, thentag);
+		if (++tagged % batch)
 			continue;
-		slot = radix_tree_iter_resume(slot, &iter);
-		if (lock) {
-			pthread_mutex_unlock(lock);
-			rcu_barrier();
-			pthread_mutex_lock(lock);
-		}
-	}
-	if (lock)
-		pthread_mutex_unlock(lock);
-
-	return tagged;
-}
 
-/* Use the same pattern as find_swap_entry() in mm/shmem.c */
-unsigned long find_item(struct radix_tree_root *root, void *item)
-{
-	struct radix_tree_iter iter;
-	void **slot;
-	unsigned long found = -1;
-	unsigned long checked = 0;
-
-	radix_tree_for_each_slot(slot, root, &iter, 0) {
-		if (*slot == item) {
-			found = iter.index;
-			break;
-		}
-		checked++;
-		if ((checked % 4) != 0)
-			continue;
-		slot = radix_tree_iter_resume(slot, &iter);
+		xas_pause(&xas);
+		xas_unlock_irq(&xas);
+		rcu_barrier();
+		xas_lock_irq(&xas);
 	}
+	xas_unlock_irq(&xas);
 
-	return found;
+	return tagged;
 }
 
 static int verify_node(struct radix_tree_node *slot, unsigned int tag,
@@ -281,43 +246,31 @@ static int verify_node(struct radix_tree_node *slot, unsigned int tag,
 
 void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag)
 {
-	struct radix_tree_node *node = root->rnode;
+	struct radix_tree_node *node = root->xa_head;
 	if (!radix_tree_is_internal_node(node))
 		return;
 	verify_node(node, tag, !!root_tag_get(root, tag));
 }
 
-void item_kill_tree(struct radix_tree_root *root)
+void item_kill_tree(struct xarray *xa)
 {
-	struct radix_tree_iter iter;
-	void **slot;
-	struct item *items[32];
-	int nfound;
-
-	radix_tree_for_each_slot(slot, root, &iter, 0) {
-		if (radix_tree_exceptional_entry(*slot))
-			radix_tree_delete(root, iter.index);
-	}
+	XA_STATE(xas, xa, 0);
+	void *entry;
 
-	while ((nfound = radix_tree_gang_lookup(root, (void **)items, 0, 32))) {
-		int i;
-
-		for (i = 0; i < nfound; i++) {
-			void *ret;
-
-			ret = radix_tree_delete(root, items[i]->index);
-			assert(ret == items[i]);
-			free(items[i]);
+	xas_for_each(&xas, entry, ULONG_MAX) {
+		if (!xa_is_value(entry)) {
+			item_free(entry, xas.xa_index);
 		}
+		xas_store(&xas, NULL);
 	}
-	assert(radix_tree_gang_lookup(root, (void **)items, 0, 32) == 0);
-	assert(root->rnode == NULL);
+
+	assert(xa_empty(xa));
 }
 
 void tree_verify_min_height(struct radix_tree_root *root, int maxindex)
 {
 	unsigned shift;
-	struct radix_tree_node *node = root->rnode;
+	struct radix_tree_node *node = root->xa_head;
 	if (!radix_tree_is_internal_node(node)) {
 		assert(maxindex == 0);
 		return;
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 31f1d9b6f506..1ee4b2c0ad10 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -11,13 +11,11 @@ struct item {
 };
 
 struct item *item_create(unsigned long index, unsigned int order);
-int __item_insert(struct radix_tree_root *root, struct item *item);
 int item_insert(struct radix_tree_root *root, unsigned long index);
 void item_sanity(struct item *item, unsigned long index);
-int item_insert_order(struct radix_tree_root *root, unsigned long index,
-			unsigned order);
+void item_free(struct item *item, unsigned long index);
 int item_delete(struct radix_tree_root *root, unsigned long index);
-int item_delete_rcu(struct radix_tree_root *root, unsigned long index);
+int item_delete_rcu(struct xarray *xa, unsigned long index);
 struct item *item_lookup(struct radix_tree_root *root, unsigned long index);
 
 void item_check_present(struct radix_tree_root *root, unsigned long index);
@@ -29,18 +27,16 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start,
 			unsigned long nr, int chunk);
 void item_kill_tree(struct radix_tree_root *root);
 
-int tag_tagged_items(struct radix_tree_root *, pthread_mutex_t *,
-			unsigned long start, unsigned long end, unsigned batch,
-			unsigned iftag, unsigned thentag);
-unsigned long find_item(struct radix_tree_root *, void *item);
+int tag_tagged_items(struct xarray *, unsigned long start, unsigned long end,
+		unsigned batch, xa_mark_t iftag, xa_mark_t thentag);
 
+void xarray_tests(void);
 void tag_check(void);
 void multiorder_checks(void);
 void iteration_test(unsigned order, unsigned duration);
 void benchmark(void);
 void idr_checks(void);
-void ida_checks(void);
-void ida_thread_tests(void);
+void ida_tests(void);
 
 struct item *
 item_tag_set(struct radix_tree_root *root, unsigned long index, int tag);
diff --git a/tools/testing/radix-tree/xarray.c b/tools/testing/radix-tree/xarray.c
new file mode 100644
index 000000000000..e61e43efe463
--- /dev/null
+++ b/tools/testing/radix-tree/xarray.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * xarray.c: Userspace shim for XArray test-suite
+ * Copyright (c) 2018 Matthew Wilcox <willy@infradead.org>
+ */
+
+#define XA_DEBUG
+#include "test.h"
+
+#define module_init(x)
+#define module_exit(x)
+#define MODULE_AUTHOR(x)
+#define MODULE_LICENSE(x)
+#define dump_stack()	assert(0)
+
+#include "../../../lib/xarray.c"
+#undef XA_DEBUG
+#include "../../../lib/test_xarray.c"
+
+void xarray_tests(void)
+{
+	xarray_checks();
+	xarray_exit();
+}
+
+int __weak main(void)
+{
+	radix_tree_init();
+	xarray_tests();
+	radix_tree_cpu_dead(1);
+	rcu_barrier();
+	if (nr_allocated)
+		printf("nr_allocated = %d\n", nr_allocated);
+	return 0;
+}
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 32aafa92074c..f0017c831e57 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -3,6 +3,7 @@ TARGETS = android
 TARGETS += bpf
 TARGETS += breakpoints
 TARGETS += capabilities
+TARGETS += cgroup
 TARGETS += cpufreq
 TARGETS += cpu-hotplug
 TARGETS += efivarfs
@@ -23,14 +24,18 @@ TARGETS += memory-hotplug
 TARGETS += mount
 TARGETS += mqueue
 TARGETS += net
+TARGETS += netfilter
 TARGETS += nsfs
 TARGETS += powerpc
 TARGETS += proc
 TARGETS += pstore
 TARGETS += ptrace
+TARGETS += rseq
+TARGETS += rtc
 TARGETS += seccomp
 TARGETS += sigaltstack
 TARGETS += size
+TARGETS += sparc64
 TARGETS += splice
 TARGETS += static_keys
 TARGETS += sync
@@ -134,7 +139,8 @@ ifdef INSTALL_PATH
 	echo "else" >> $(ALL_SCRIPT)
 	echo "  OUTPUT=/dev/stdout" >> $(ALL_SCRIPT)
 	echo "fi" >> $(ALL_SCRIPT)
-	echo "export KSFT_TAP_LEVEL=`echo 1`" >> $(ALL_SCRIPT)
+	echo "export KSFT_TAP_LEVEL=1" >> $(ALL_SCRIPT)
+	echo "export skip=4" >> $(ALL_SCRIPT)
 
 	for TARGET in $(TARGETS); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile
index f6304d2be90c..d9a725478375 100644
--- a/tools/testing/selftests/android/Makefile
+++ b/tools/testing/selftests/android/Makefile
@@ -6,7 +6,7 @@ TEST_PROGS := run.sh
 
 include ../lib.mk
 
-all:
+all: khdr
 	@for DIR in $(SUBDIRS); do		\
 		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
 		mkdir $$BUILD_TARGET  -p;	\
@@ -18,10 +18,6 @@ all:
 		fi \
 	done
 
-override define RUN_TESTS
-	@cd $(OUTPUT); ./run.sh
-endef
-
 override define INSTALL_RULE
 	mkdir -p $(INSTALL_PATH)
 	install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
@@ -33,10 +29,6 @@ override define INSTALL_RULE
 	done;
 endef
 
-override define EMIT_TESTS
-	echo "./run.sh"
-endef
-
 override define CLEAN
 	@for DIR in $(SUBDIRS); do		\
 		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
diff --git a/tools/testing/selftests/android/ion/config b/tools/testing/selftests/android/config
index b4ad748a9dd9..b4ad748a9dd9 100644
--- a/tools/testing/selftests/android/ion/config
+++ b/tools/testing/selftests/android/config
diff --git a/tools/testing/selftests/android/ion/Makefile b/tools/testing/selftests/android/ion/Makefile
index e03695287f76..88cfe88e466f 100644
--- a/tools/testing/selftests/android/ion/Makefile
+++ b/tools/testing/selftests/android/ion/Makefile
@@ -10,6 +10,8 @@ $(TEST_GEN_FILES): ipcsocket.c ionutils.c
 
 TEST_PROGS := ion_test.sh
 
+KSFT_KHDR_INSTALL := 1
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(OUTPUT)/ionapp_export: ionapp_export.c ipcsocket.c ionutils.c
diff --git a/tools/testing/selftests/android/ion/ion_test.sh b/tools/testing/selftests/android/ion/ion_test.sh
index a1aff506f5e6..69e676cfc94e 100755
--- a/tools/testing/selftests/android/ion/ion_test.sh
+++ b/tools/testing/selftests/android/ion/ion_test.sh
@@ -4,6 +4,9 @@ heapsize=4096
 TCID="ion_test.sh"
 errcode=0
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 run_test()
 {
 	heaptype=$1
@@ -25,7 +28,7 @@ check_root()
 	uid=$(id -u)
 	if [ $uid -ne 0 ]; then
 		echo $TCID: must be run as root >&2
-		exit 0
+		exit $ksft_skip
 	fi
 }
 
@@ -35,7 +38,7 @@ check_device()
 	if [ ! -e $DEVICE ]; then
 		echo $TCID: No $DEVICE device found >&2
 		echo $TCID: May be CONFIG_ION is not set >&2
-		exit 0
+		exit $ksft_skip
 	fi
 }
 
diff --git a/tools/testing/selftests/android/ion/ionapp_export.c b/tools/testing/selftests/android/ion/ionapp_export.c
index a944e72621a9..b5fa0a2dc968 100644
--- a/tools/testing/selftests/android/ion/ionapp_export.c
+++ b/tools/testing/selftests/android/ion/ionapp_export.c
@@ -51,6 +51,7 @@ int main(int argc, char *argv[])
 
 	heap_size = 0;
 	flags = 0;
+	heap_type = ION_HEAP_TYPE_SYSTEM;
 
 	while ((opt = getopt(argc, argv, "hi:s:")) != -1) {
 		switch (opt) {
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 5e1ab2f0eb79..1b799e30c06d 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -15,3 +15,15 @@ test_libbpf_open
 test_sock
 test_sock_addr
 urandom_read
+test_btf
+test_sockmap
+test_lirc_mode2_user
+get_cgroup_id_user
+test_skb_cgroup_id_user
+test_socket_cookie
+test_cgroup_storage
+test_select_reuseport
+test_flow_dissector
+flow_dissector_load
+test_netcnt
+test_section_names
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 0a315ddabbf4..e39dfb4e7970 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -10,28 +10,34 @@ ifneq ($(wildcard $(GENHDR)),)
   GENFLAGS := -DHAVE_GENHDR
 endif
 
-CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
+CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
 LDLIBS += -lcap -lelf -lrt -lpthread
 
 TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
 all: $(TEST_CUSTOM_PROGS)
 
-$(TEST_CUSTOM_PROGS): urandom_read
-
-urandom_read: urandom_read.c
-	$(CC) -o $(TEST_CUSTOM_PROGS) -static $<
+$(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c
+	$(CC) -o $(TEST_CUSTOM_PROGS) -static $< -Wl,--build-id
 
 # Order correspond to 'make run_tests' order
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
 	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
-	test_sock test_sock_addr
+	test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
+	test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
+	test_netcnt
 
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
 	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
 	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
 	sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
-	sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o
+	sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
+	test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
+	test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
+	test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
+	get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
+	test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \
+	test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -39,10 +45,19 @@ TEST_PROGS := test_kmod.sh \
 	test_xdp_redirect.sh \
 	test_xdp_meta.sh \
 	test_offload.py \
-	test_sock_addr.sh
+	test_sock_addr.sh \
+	test_tunnel.sh \
+	test_lwt_seg6local.sh \
+	test_lirc_mode2.sh \
+	test_skb_cgroup_id.sh \
+	test_flow_dissector.sh \
+	test_xdp_vlan.sh
+
+TEST_PROGS_EXTENDED := with_addr.sh
 
 # Compile but not part of 'make run_tests'
-TEST_GEN_PROGS_EXTENDED = test_libbpf_open
+TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \
+	flow_dissector_load test_flow_dissector
 
 include ../lib.mk
 
@@ -53,8 +68,16 @@ $(TEST_GEN_PROGS): $(BPFOBJ)
 $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
 
 $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
+$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
 $(OUTPUT)/test_sock: cgroup_helpers.c
 $(OUTPUT)/test_sock_addr: cgroup_helpers.c
+$(OUTPUT)/test_socket_cookie: cgroup_helpers.c
+$(OUTPUT)/test_sockmap: cgroup_helpers.c
+$(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c
+$(OUTPUT)/test_progs: trace_helpers.c
+$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
+$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
+$(OUTPUT)/test_netcnt: cgroup_helpers.c
 
 .PHONY: force
 
@@ -66,6 +89,8 @@ $(BPFOBJ): force
 
 CLANG ?= clang
 LLC   ?= llc
+LLVM_OBJCOPY ?= llvm-objcopy
+BTF_PAHOLE ?= pahole
 
 PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
 
@@ -77,15 +102,45 @@ else
   CPU ?= generic
 endif
 
+# Get Clang's default includes on this system, as opposed to those seen by
+# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
+	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
+
 CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
+	      $(CLANG_SYS_INCLUDES) \
 	      -Wno-compare-distinct-pointer-types
 
 $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
 $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
 
+$(OUTPUT)/test_queue_map.o: test_queue_stack_map.h
+$(OUTPUT)/test_stack_map.o: test_queue_stack_map.h
+
+BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
+BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
+BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
+
+ifneq ($(BTF_LLC_PROBE),)
+ifneq ($(BTF_PAHOLE_PROBE),)
+ifneq ($(BTF_OBJCOPY_PROBE),)
+	CLANG_FLAGS += -g
+	LLC_FLAGS += -mattr=dwarfris
+	DWARF2BTF = y
+endif
+endif
+endif
+
 $(OUTPUT)/%.o: %.c
 	$(CLANG) $(CLANG_FLAGS) \
 		 -O2 -target bpf -emit-llvm -c $< -o - |      \
-	$(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@
+	$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
+ifeq ($(DWARF2BTF),y)
+	$(BTF_PAHOLE) -J $@
+endif
 
 EXTRA_CLEAN := $(TEST_CUSTOM_PROGS)
diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c
new file mode 100644
index 000000000000..df9d32fd2055
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_flow.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <limits.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_packet.h>
+#include <sys/socket.h>
+#include <linux/if_tunnel.h>
+#include <linux/mpls.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+#define PROG(F) SEC(#F) int bpf_func_##F
+
+/* These are the identifiers of the BPF programs that will be used in tail
+ * calls. Name is limited to 16 characters, with the terminating character and
+ * bpf_func_ above, we have only 6 to work with, anything after will be cropped.
+ */
+enum {
+	IP,
+	IPV6,
+	IPV6OP,	/* Destination/Hop-by-Hop Options IPv6 Extension header */
+	IPV6FR,	/* Fragmentation IPv6 Extension Header */
+	MPLS,
+	VLAN,
+};
+
+#define IP_MF		0x2000
+#define IP_OFFSET	0x1FFF
+#define IP6_MF		0x0001
+#define IP6_OFFSET	0xFFF8
+
+struct vlan_hdr {
+	__be16 h_vlan_TCI;
+	__be16 h_vlan_encapsulated_proto;
+};
+
+struct gre_hdr {
+	__be16 flags;
+	__be16 proto;
+};
+
+struct frag_hdr {
+	__u8 nexthdr;
+	__u8 reserved;
+	__be16 frag_off;
+	__be32 identification;
+};
+
+struct bpf_map_def SEC("maps") jmp_table = {
+	.type = BPF_MAP_TYPE_PROG_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 8
+};
+
+static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
+							 __u16 hdr_size,
+							 void *buffer)
+{
+	void *data_end = (void *)(long)skb->data_end;
+	void *data = (void *)(long)skb->data;
+	__u16 thoff = skb->flow_keys->thoff;
+	__u8 *hdr;
+
+	/* Verifies this variable offset does not overflow */
+	if (thoff > (USHRT_MAX - hdr_size))
+		return NULL;
+
+	hdr = data + thoff;
+	if (hdr + hdr_size <= data_end)
+		return hdr;
+
+	if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size))
+		return NULL;
+
+	return buffer;
+}
+
+/* Dispatches on ETHERTYPE */
+static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
+{
+	struct bpf_flow_keys *keys = skb->flow_keys;
+
+	keys->n_proto = proto;
+	switch (proto) {
+	case bpf_htons(ETH_P_IP):
+		bpf_tail_call(skb, &jmp_table, IP);
+		break;
+	case bpf_htons(ETH_P_IPV6):
+		bpf_tail_call(skb, &jmp_table, IPV6);
+		break;
+	case bpf_htons(ETH_P_MPLS_MC):
+	case bpf_htons(ETH_P_MPLS_UC):
+		bpf_tail_call(skb, &jmp_table, MPLS);
+		break;
+	case bpf_htons(ETH_P_8021Q):
+	case bpf_htons(ETH_P_8021AD):
+		bpf_tail_call(skb, &jmp_table, VLAN);
+		break;
+	default:
+		/* Protocol not supported */
+		return BPF_DROP;
+	}
+
+	return BPF_DROP;
+}
+
+SEC("dissect")
+int _dissect(struct __sk_buff *skb)
+{
+	if (!skb->vlan_present)
+		return parse_eth_proto(skb, skb->protocol);
+	else
+		return parse_eth_proto(skb, skb->vlan_proto);
+}
+
+/* Parses on IPPROTO_* */
+static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
+{
+	struct bpf_flow_keys *keys = skb->flow_keys;
+	void *data_end = (void *)(long)skb->data_end;
+	struct icmphdr *icmp, _icmp;
+	struct gre_hdr *gre, _gre;
+	struct ethhdr *eth, _eth;
+	struct tcphdr *tcp, _tcp;
+	struct udphdr *udp, _udp;
+
+	keys->ip_proto = proto;
+	switch (proto) {
+	case IPPROTO_ICMP:
+		icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
+		if (!icmp)
+			return BPF_DROP;
+		return BPF_OK;
+	case IPPROTO_IPIP:
+		keys->is_encap = true;
+		return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
+	case IPPROTO_IPV6:
+		keys->is_encap = true;
+		return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
+	case IPPROTO_GRE:
+		gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
+		if (!gre)
+			return BPF_DROP;
+
+		if (bpf_htons(gre->flags & GRE_VERSION))
+			/* Only inspect standard GRE packets with version 0 */
+			return BPF_OK;
+
+		keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */
+		if (GRE_IS_CSUM(gre->flags))
+			keys->thoff += 4; /* Step over chksum and Padding */
+		if (GRE_IS_KEY(gre->flags))
+			keys->thoff += 4; /* Step over key */
+		if (GRE_IS_SEQ(gre->flags))
+			keys->thoff += 4; /* Step over sequence number */
+
+		keys->is_encap = true;
+
+		if (gre->proto == bpf_htons(ETH_P_TEB)) {
+			eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
+							  &_eth);
+			if (!eth)
+				return BPF_DROP;
+
+			keys->thoff += sizeof(*eth);
+
+			return parse_eth_proto(skb, eth->h_proto);
+		} else {
+			return parse_eth_proto(skb, gre->proto);
+		}
+	case IPPROTO_TCP:
+		tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
+		if (!tcp)
+			return BPF_DROP;
+
+		if (tcp->doff < 5)
+			return BPF_DROP;
+
+		if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
+			return BPF_DROP;
+
+		keys->sport = tcp->source;
+		keys->dport = tcp->dest;
+		return BPF_OK;
+	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE:
+		udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
+		if (!udp)
+			return BPF_DROP;
+
+		keys->sport = udp->source;
+		keys->dport = udp->dest;
+		return BPF_OK;
+	default:
+		return BPF_DROP;
+	}
+
+	return BPF_DROP;
+}
+
+static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
+{
+	struct bpf_flow_keys *keys = skb->flow_keys;
+
+	keys->ip_proto = nexthdr;
+	switch (nexthdr) {
+	case IPPROTO_HOPOPTS:
+	case IPPROTO_DSTOPTS:
+		bpf_tail_call(skb, &jmp_table, IPV6OP);
+		break;
+	case IPPROTO_FRAGMENT:
+		bpf_tail_call(skb, &jmp_table, IPV6FR);
+		break;
+	default:
+		return parse_ip_proto(skb, nexthdr);
+	}
+
+	return BPF_DROP;
+}
+
+PROG(IP)(struct __sk_buff *skb)
+{
+	void *data_end = (void *)(long)skb->data_end;
+	struct bpf_flow_keys *keys = skb->flow_keys;
+	void *data = (void *)(long)skb->data;
+	struct iphdr *iph, _iph;
+	bool done = false;
+
+	iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
+	if (!iph)
+		return BPF_DROP;
+
+	/* IP header cannot be smaller than 20 bytes */
+	if (iph->ihl < 5)
+		return BPF_DROP;
+
+	keys->addr_proto = ETH_P_IP;
+	keys->ipv4_src = iph->saddr;
+	keys->ipv4_dst = iph->daddr;
+
+	keys->thoff += iph->ihl << 2;
+	if (data + keys->thoff > data_end)
+		return BPF_DROP;
+
+	if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
+		keys->is_frag = true;
+		if (iph->frag_off & bpf_htons(IP_OFFSET))
+			/* From second fragment on, packets do not have headers
+			 * we can parse.
+			 */
+			done = true;
+		else
+			keys->is_first_frag = true;
+	}
+
+	if (done)
+		return BPF_OK;
+
+	return parse_ip_proto(skb, iph->protocol);
+}
+
+PROG(IPV6)(struct __sk_buff *skb)
+{
+	struct bpf_flow_keys *keys = skb->flow_keys;
+	struct ipv6hdr *ip6h, _ip6h;
+
+	ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
+	if (!ip6h)
+		return BPF_DROP;
+
+	keys->addr_proto = ETH_P_IPV6;
+	memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
+
+	keys->thoff += sizeof(struct ipv6hdr);
+
+	return parse_ipv6_proto(skb, ip6h->nexthdr);
+}
+
+PROG(IPV6OP)(struct __sk_buff *skb)
+{
+	struct ipv6_opt_hdr *ip6h, _ip6h;
+
+	ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
+	if (!ip6h)
+		return BPF_DROP;
+
+	/* hlen is in 8-octets and does not include the first 8 bytes
+	 * of the header
+	 */
+	skb->flow_keys->thoff += (1 + ip6h->hdrlen) << 3;
+
+	return parse_ipv6_proto(skb, ip6h->nexthdr);
+}
+
+PROG(IPV6FR)(struct __sk_buff *skb)
+{
+	struct bpf_flow_keys *keys = skb->flow_keys;
+	struct frag_hdr *fragh, _fragh;
+
+	fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
+	if (!fragh)
+		return BPF_DROP;
+
+	keys->thoff += sizeof(*fragh);
+	keys->is_frag = true;
+	if (!(fragh->frag_off & bpf_htons(IP6_OFFSET)))
+		keys->is_first_frag = true;
+
+	return parse_ipv6_proto(skb, fragh->nexthdr);
+}
+
+PROG(MPLS)(struct __sk_buff *skb)
+{
+	struct mpls_label *mpls, _mpls;
+
+	mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
+	if (!mpls)
+		return BPF_DROP;
+
+	return BPF_OK;
+}
+
+PROG(VLAN)(struct __sk_buff *skb)
+{
+	struct bpf_flow_keys *keys = skb->flow_keys;
+	struct vlan_hdr *vlan, _vlan;
+	__be16 proto;
+
+	/* Peek back to see if single or double-tagging */
+	if (bpf_skb_load_bytes(skb, keys->thoff - sizeof(proto), &proto,
+			       sizeof(proto)))
+		return BPF_DROP;
+
+	/* Account for double-tagging */
+	if (proto == bpf_htons(ETH_P_8021AD)) {
+		vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
+		if (!vlan)
+			return BPF_DROP;
+
+		if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
+			return BPF_DROP;
+
+		keys->thoff += sizeof(*vlan);
+	}
+
+	vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
+	if (!vlan)
+		return BPF_DROP;
+
+	keys->thoff += sizeof(*vlan);
+	/* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
+	if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
+	    vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
+		return BPF_DROP;
+
+	return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index d8223d99f96d..efb6c13ab0de 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -16,6 +16,13 @@ static int (*bpf_map_update_elem)(void *map, void *key, void *value,
 	(void *) BPF_FUNC_map_update_elem;
 static int (*bpf_map_delete_elem)(void *map, void *key) =
 	(void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_map_push_elem)(void *map, void *value,
+				unsigned long long flags) =
+	(void *) BPF_FUNC_map_push_elem;
+static int (*bpf_map_pop_elem)(void *map, void *value) =
+	(void *) BPF_FUNC_map_pop_elem;
+static int (*bpf_map_peek_elem)(void *map, void *value) =
+	(void *) BPF_FUNC_map_peek_elem;
 static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
 	(void *) BPF_FUNC_probe_read;
 static unsigned long long (*bpf_ktime_get_ns)(void) =
@@ -65,6 +72,8 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
 	(void *) BPF_FUNC_xdp_adjust_head;
 static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) =
 	(void *) BPF_FUNC_xdp_adjust_meta;
+static int (*bpf_get_socket_cookie)(void *ctx) =
+	(void *) BPF_FUNC_get_socket_cookie;
 static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
 			     int optlen) =
 	(void *) BPF_FUNC_setsockopt;
@@ -75,9 +84,14 @@ static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) =
 	(void *) BPF_FUNC_sock_ops_cb_flags_set;
 static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
 	(void *) BPF_FUNC_sk_redirect_map;
+static int (*bpf_sk_redirect_hash)(void *ctx, void *map, void *key, int flags) =
+	(void *) BPF_FUNC_sk_redirect_hash;
 static int (*bpf_sock_map_update)(void *map, void *key, void *value,
 				  unsigned long long flags) =
 	(void *) BPF_FUNC_sock_map_update;
+static int (*bpf_sock_hash_update)(void *map, void *key, void *value,
+				   unsigned long long flags) =
+	(void *) BPF_FUNC_sock_hash_update;
 static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags,
 					void *buf, unsigned int buf_size) =
 	(void *) BPF_FUNC_perf_event_read_value;
@@ -88,14 +102,72 @@ static int (*bpf_override_return)(void *ctx, unsigned long rc) =
 	(void *) BPF_FUNC_override_return;
 static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) =
 	(void *) BPF_FUNC_msg_redirect_map;
+static int (*bpf_msg_redirect_hash)(void *ctx,
+				    void *map, void *key, int flags) =
+	(void *) BPF_FUNC_msg_redirect_hash;
 static int (*bpf_msg_apply_bytes)(void *ctx, int len) =
 	(void *) BPF_FUNC_msg_apply_bytes;
 static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
 	(void *) BPF_FUNC_msg_cork_bytes;
 static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
 	(void *) BPF_FUNC_msg_pull_data;
+static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) =
+	(void *) BPF_FUNC_msg_push_data;
 static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
 	(void *) BPF_FUNC_bind;
+static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
+	(void *) BPF_FUNC_xdp_adjust_tail;
+static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
+				     int size, int flags) =
+	(void *) BPF_FUNC_skb_get_xfrm_state;
+static int (*bpf_sk_select_reuseport)(void *ctx, void *map, void *key, __u32 flags) =
+	(void *) BPF_FUNC_sk_select_reuseport;
+static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
+	(void *) BPF_FUNC_get_stack;
+static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
+			     int plen, __u32 flags) =
+	(void *) BPF_FUNC_fib_lookup;
+static int (*bpf_lwt_push_encap)(void *ctx, unsigned int type, void *hdr,
+				 unsigned int len) =
+	(void *) BPF_FUNC_lwt_push_encap;
+static int (*bpf_lwt_seg6_store_bytes)(void *ctx, unsigned int offset,
+				       void *from, unsigned int len) =
+	(void *) BPF_FUNC_lwt_seg6_store_bytes;
+static int (*bpf_lwt_seg6_action)(void *ctx, unsigned int action, void *param,
+				  unsigned int param_len) =
+	(void *) BPF_FUNC_lwt_seg6_action;
+static int (*bpf_lwt_seg6_adjust_srh)(void *ctx, unsigned int offset,
+				      unsigned int len) =
+	(void *) BPF_FUNC_lwt_seg6_adjust_srh;
+static int (*bpf_rc_repeat)(void *ctx) =
+	(void *) BPF_FUNC_rc_repeat;
+static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol,
+			     unsigned long long scancode, unsigned int toggle) =
+	(void *) BPF_FUNC_rc_keydown;
+static unsigned long long (*bpf_get_current_cgroup_id)(void) =
+	(void *) BPF_FUNC_get_current_cgroup_id;
+static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
+	(void *) BPF_FUNC_get_local_storage;
+static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) =
+	(void *) BPF_FUNC_skb_cgroup_id;
+static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) =
+	(void *) BPF_FUNC_skb_ancestor_cgroup_id;
+static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx,
+					     struct bpf_sock_tuple *tuple,
+					     int size, unsigned long long netns_id,
+					     unsigned long long flags) =
+	(void *) BPF_FUNC_sk_lookup_tcp;
+static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx,
+					     struct bpf_sock_tuple *tuple,
+					     int size, unsigned long long netns_id,
+					     unsigned long long flags) =
+	(void *) BPF_FUNC_sk_lookup_udp;
+static int (*bpf_sk_release)(struct bpf_sock *sk) =
+	(void *) BPF_FUNC_sk_release;
+static int (*bpf_skb_vlan_push)(void *ctx, __be16 vlan_proto, __u16 vlan_tci) =
+	(void *) BPF_FUNC_skb_vlan_push;
+static int (*bpf_skb_vlan_pop)(void *ctx) =
+	(void *) BPF_FUNC_skb_vlan_pop;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -121,14 +193,27 @@ struct bpf_map_def {
 	unsigned int numa_node;
 };
 
+#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val)		\
+	struct ____btf_map_##name {				\
+		type_key key;					\
+		type_val value;					\
+	};							\
+	struct ____btf_map_##name				\
+	__attribute__ ((section(".maps." #name), used))		\
+		____btf_map_##name = { }
+
 static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
 	(void *) BPF_FUNC_skb_load_bytes;
+static int (*bpf_skb_load_bytes_relative)(void *ctx, int off, void *to, int len, __u32 start_header) =
+	(void *) BPF_FUNC_skb_load_bytes_relative;
 static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
 	(void *) BPF_FUNC_skb_store_bytes;
 static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) =
 	(void *) BPF_FUNC_l3_csum_replace;
 static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
 	(void *) BPF_FUNC_l4_csum_replace;
+static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) =
+	(void *) BPF_FUNC_csum_diff;
 static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
 	(void *) BPF_FUNC_skb_under_cgroup;
 static int (*bpf_skb_change_head)(void *, int len, int flags) =
diff --git a/tools/testing/selftests/bpf/bpf_rand.h b/tools/testing/selftests/bpf/bpf_rand.h
new file mode 100644
index 000000000000..59bf3e1a9371
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_rand.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_RAND__
+#define __BPF_RAND__
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <time.h>
+
+static inline uint64_t bpf_rand_mask(uint64_t mask)
+{
+	return (((uint64_t)(uint32_t)rand()) |
+	        ((uint64_t)(uint32_t)rand() << 32)) & mask;
+}
+
+#define bpf_rand_ux(x, m)			\
+static inline uint64_t bpf_rand_u##x(int shift)	\
+{						\
+	return bpf_rand_mask((m)) << shift;	\
+}
+
+bpf_rand_ux( 8,               0xffULL)
+bpf_rand_ux(16,             0xffffULL)
+bpf_rand_ux(24,           0xffffffULL)
+bpf_rand_ux(32,         0xffffffffULL)
+bpf_rand_ux(40,       0xffffffffffULL)
+bpf_rand_ux(48,     0xffffffffffffULL)
+bpf_rand_ux(56,   0xffffffffffffffULL)
+bpf_rand_ux(64, 0xffffffffffffffffULL)
+
+static inline void bpf_semi_rand_init(void)
+{
+	srand(time(NULL));
+}
+
+static inline uint64_t bpf_semi_rand_get(void)
+{
+	switch (rand() % 39) {
+	case  0: return 0x000000ff00000000ULL | bpf_rand_u8(0);
+	case  1: return 0xffffffff00000000ULL | bpf_rand_u16(0);
+	case  2: return 0x00000000ffff0000ULL | bpf_rand_u16(0);
+	case  3: return 0x8000000000000000ULL | bpf_rand_u32(0);
+	case  4: return 0x00000000f0000000ULL | bpf_rand_u32(0);
+	case  5: return 0x0000000100000000ULL | bpf_rand_u24(0);
+	case  6: return 0x800ff00000000000ULL | bpf_rand_u32(0);
+	case  7: return 0x7fffffff00000000ULL | bpf_rand_u32(0);
+	case  8: return 0xffffffffffffff00ULL ^ bpf_rand_u32(24);
+	case  9: return 0xffffffffffffff00ULL | bpf_rand_u8(0);
+	case 10: return 0x0000000010000000ULL | bpf_rand_u32(0);
+	case 11: return 0xf000000000000000ULL | bpf_rand_u8(0);
+	case 12: return 0x0000f00000000000ULL | bpf_rand_u8(8);
+	case 13: return 0x000000000f000000ULL | bpf_rand_u8(16);
+	case 14: return 0x0000000000000f00ULL | bpf_rand_u8(32);
+	case 15: return 0x00fff00000000f00ULL | bpf_rand_u8(48);
+	case 16: return 0x00007fffffffffffULL ^ bpf_rand_u32(1);
+	case 17: return 0xffff800000000000ULL | bpf_rand_u8(4);
+	case 18: return 0xffff800000000000ULL | bpf_rand_u8(20);
+	case 19: return (0xffffffc000000000ULL + 0x80000ULL) | bpf_rand_u32(0);
+	case 20: return (0xffffffc000000000ULL - 0x04000000ULL) | bpf_rand_u32(0);
+	case 21: return 0x0000000000000000ULL | bpf_rand_u8(55) | bpf_rand_u32(20);
+	case 22: return 0xffffffffffffffffULL ^ bpf_rand_u8(3) ^ bpf_rand_u32(40);
+	case 23: return 0x0000000000000000ULL | bpf_rand_u8(bpf_rand_u8(0) % 64);
+	case 24: return 0x0000000000000000ULL | bpf_rand_u16(bpf_rand_u8(0) % 64);
+	case 25: return 0xffffffffffffffffULL ^ bpf_rand_u8(bpf_rand_u8(0) % 64);
+	case 26: return 0xffffffffffffffffULL ^ bpf_rand_u40(bpf_rand_u8(0) % 64);
+	case 27: return 0x0000800000000000ULL;
+	case 28: return 0x8000000000000000ULL;
+	case 29: return 0x0000000000000000ULL;
+	case 30: return 0xffffffffffffffffULL;
+	case 31: return bpf_rand_u16(bpf_rand_u8(0) % 64);
+	case 32: return bpf_rand_u24(bpf_rand_u8(0) % 64);
+	case 33: return bpf_rand_u32(bpf_rand_u8(0) % 64);
+	case 34: return bpf_rand_u40(bpf_rand_u8(0) % 64);
+	case 35: return bpf_rand_u48(bpf_rand_u8(0) % 64);
+	case 36: return bpf_rand_u56(bpf_rand_u8(0) % 64);
+	case 37: return bpf_rand_u64(bpf_rand_u8(0) % 64);
+	default: return bpf_rand_u64(0);
+	}
+}
+
+#endif /* __BPF_RAND__ */
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index d0811b3d6a6f..315a44fa32af 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -44,4 +44,8 @@ static inline unsigned int bpf_num_possible_cpus(void)
 		name[bpf_num_possible_cpus()]
 #define bpf_percpu(name, cpu) name[(cpu)].v
 
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
 #endif /* __BPF_UTIL__ */
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index f3bca3ade0f3..cf16948aad4a 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -6,6 +6,7 @@
 #include <sys/types.h>
 #include <linux/limits.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <linux/sched.h>
 #include <fcntl.h>
 #include <unistd.h>
@@ -117,7 +118,7 @@ static int join_cgroup_from_top(char *cgroup_path)
  *
  * On success, it returns 0, otherwise on failure it returns 1.
  */
-int join_cgroup(char *path)
+int join_cgroup(const char *path)
 {
 	char cgroup_path[PATH_MAX + 1];
 
@@ -157,7 +158,7 @@ void cleanup_cgroup_environment(void)
  * On success, it returns the file descriptor. On failure it returns 0.
  * If there is a failure, it prints the error to stderr.
  */
-int create_and_get_cgroup(char *path)
+int create_and_get_cgroup(const char *path)
 {
 	char cgroup_path[PATH_MAX + 1];
 	int fd;
@@ -176,3 +177,59 @@ int create_and_get_cgroup(char *path)
 
 	return fd;
 }
+
+/**
+ * get_cgroup_id() - Get cgroup id for a particular cgroup path
+ * @path: The cgroup path, relative to the workdir, to join
+ *
+ * On success, it returns the cgroup id. On failure it returns 0,
+ * which is an invalid cgroup id.
+ * If there is a failure, it prints the error to stderr.
+ */
+unsigned long long get_cgroup_id(const char *path)
+{
+	int dirfd, err, flags, mount_id, fhsize;
+	union {
+		unsigned long long cgid;
+		unsigned char raw_bytes[8];
+	} id;
+	char cgroup_workdir[PATH_MAX + 1];
+	struct file_handle *fhp, *fhp2;
+	unsigned long long ret = 0;
+
+	format_cgroup_path(cgroup_workdir, path);
+
+	dirfd = AT_FDCWD;
+	flags = 0;
+	fhsize = sizeof(*fhp);
+	fhp = calloc(1, fhsize);
+	if (!fhp) {
+		log_err("calloc");
+		return 0;
+	}
+	err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags);
+	if (err >= 0 || fhp->handle_bytes != 8) {
+		log_err("name_to_handle_at");
+		goto free_mem;
+	}
+
+	fhsize = sizeof(struct file_handle) + fhp->handle_bytes;
+	fhp2 = realloc(fhp, fhsize);
+	if (!fhp2) {
+		log_err("realloc");
+		goto free_mem;
+	}
+	err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags);
+	fhp = fhp2;
+	if (err < 0) {
+		log_err("name_to_handle_at");
+		goto free_mem;
+	}
+
+	memcpy(id.raw_bytes, fhp->f_handle, 8);
+	ret = id.cgid;
+
+free_mem:
+	free(fhp);
+	return ret;
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 06485e0002b3..d64bb8957090 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -9,9 +9,10 @@
 	__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
 
 
-int create_and_get_cgroup(char *path);
-int join_cgroup(char *path);
+int create_and_get_cgroup(const char *path);
+int join_cgroup(const char *path);
 int setup_cgroup_environment(void);
 void cleanup_cgroup_environment(void);
+unsigned long long get_cgroup_id(const char *path);
 
 #endif
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 1eefe211a4a8..7f90d3645af8 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -6,4 +6,19 @@ CONFIG_TEST_BPF=m
 CONFIG_CGROUP_BPF=y
 CONFIG_NETDEVSIM=m
 CONFIG_NET_CLS_ACT=y
+CONFIG_NET_SCHED=y
 CONFIG_NET_SCH_INGRESS=y
+CONFIG_NET_IPIP=y
+CONFIG_IPV6=y
+CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_NET_IPGRE=y
+CONFIG_IPV6_GRE=y
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_HMAC=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_VXLAN=y
+CONFIG_GENEVE=y
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_LWTUNNEL=y
+CONFIG_BPF_STREAM_PARSER=y
+CONFIG_XDP_SOCKETS=y
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c
new file mode 100644
index 000000000000..ae8180b11d5f
--- /dev/null
+++ b/tools/testing/selftests/bpf/flow_dissector_load.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+
+const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector";
+const char *cfg_map_name = "jmp_table";
+bool cfg_attach = true;
+char *cfg_section_name;
+char *cfg_path_name;
+
+static void load_and_attach_program(void)
+{
+	struct bpf_program *prog, *main_prog;
+	struct bpf_map *prog_array;
+	int i, fd, prog_fd, ret;
+	struct bpf_object *obj;
+	int prog_array_fd;
+
+	ret = bpf_prog_load(cfg_path_name, BPF_PROG_TYPE_FLOW_DISSECTOR, &obj,
+			    &prog_fd);
+	if (ret)
+		error(1, 0, "bpf_prog_load %s", cfg_path_name);
+
+	main_prog = bpf_object__find_program_by_title(obj, cfg_section_name);
+	if (!main_prog)
+		error(1, 0, "bpf_object__find_program_by_title %s",
+		      cfg_section_name);
+
+	prog_fd = bpf_program__fd(main_prog);
+	if (prog_fd < 0)
+		error(1, 0, "bpf_program__fd");
+
+	prog_array = bpf_object__find_map_by_name(obj, cfg_map_name);
+	if (!prog_array)
+		error(1, 0, "bpf_object__find_map_by_name %s", cfg_map_name);
+
+	prog_array_fd = bpf_map__fd(prog_array);
+	if (prog_array_fd < 0)
+		error(1, 0, "bpf_map__fd %s", cfg_map_name);
+
+	i = 0;
+	bpf_object__for_each_program(prog, obj) {
+		fd = bpf_program__fd(prog);
+		if (fd < 0)
+			error(1, 0, "bpf_program__fd");
+
+		if (fd != prog_fd) {
+			printf("%d: %s\n", i, bpf_program__title(prog, false));
+			bpf_map_update_elem(prog_array_fd, &i, &fd, BPF_ANY);
+			++i;
+		}
+	}
+
+	ret = bpf_prog_attach(prog_fd, 0 /* Ignore */, BPF_FLOW_DISSECTOR, 0);
+	if (ret)
+		error(1, 0, "bpf_prog_attach %s", cfg_path_name);
+
+	ret = bpf_object__pin(obj, cfg_pin_path);
+	if (ret)
+		error(1, 0, "bpf_object__pin %s", cfg_pin_path);
+
+}
+
+static void detach_program(void)
+{
+	char command[64];
+	int ret;
+
+	ret = bpf_prog_detach(0, BPF_FLOW_DISSECTOR);
+	if (ret)
+		error(1, 0, "bpf_prog_detach");
+
+	/* To unpin, it is necessary and sufficient to just remove this dir */
+	sprintf(command, "rm -r %s", cfg_pin_path);
+	ret = system(command);
+	if (ret)
+		error(1, errno, command);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	bool attach = false;
+	bool detach = false;
+	int c;
+
+	while ((c = getopt(argc, argv, "adp:s:")) != -1) {
+		switch (c) {
+		case 'a':
+			if (detach)
+				error(1, 0, "attach/detach are exclusive");
+			attach = true;
+			break;
+		case 'd':
+			if (attach)
+				error(1, 0, "attach/detach are exclusive");
+			detach = true;
+			break;
+		case 'p':
+			if (cfg_path_name)
+				error(1, 0, "only one prog name can be given");
+
+			cfg_path_name = optarg;
+			break;
+		case 's':
+			if (cfg_section_name)
+				error(1, 0, "only one section can be given");
+
+			cfg_section_name = optarg;
+			break;
+		}
+	}
+
+	if (detach)
+		cfg_attach = false;
+
+	if (cfg_attach && !cfg_path_name)
+		error(1, 0, "must provide a path to the BPF program");
+
+	if (cfg_attach && !cfg_section_name)
+		error(1, 0, "must provide a section name");
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+	if (cfg_attach)
+		load_and_attach_program();
+	else
+		detach_program();
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/get_cgroup_id_kern.c
new file mode 100644
index 000000000000..014dba10b8a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/get_cgroup_id_kern.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") cg_ids = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u64),
+	.max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") pidmap = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 1,
+};
+
+SEC("tracepoint/syscalls/sys_enter_nanosleep")
+int trace(void *ctx)
+{
+	__u32 pid = bpf_get_current_pid_tgid();
+	__u32 key = 0, *expected_pid;
+	__u64 *val;
+
+	expected_pid = bpf_map_lookup_elem(&pidmap, &key);
+	if (!expected_pid || *expected_pid != pid)
+		return 0;
+
+	val = bpf_map_lookup_elem(&cg_ids, &key);
+	if (val)
+		*val = bpf_get_current_cgroup_id();
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
new file mode 100644
index 000000000000..e8da7b39158d
--- /dev/null
+++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <linux/perf_event.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
+
+#define CHECK(condition, tag, format...) ({		\
+	int __ret = !!(condition);			\
+	if (__ret) {					\
+		printf("%s:FAIL:%s ", __func__, tag);	\
+		printf(format);				\
+	} else {					\
+		printf("%s:PASS:%s\n", __func__, tag);	\
+	}						\
+	__ret;						\
+})
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+			const char *name)
+{
+	struct bpf_map *map;
+
+	map = bpf_object__find_map_by_name(obj, name);
+	if (!map)
+		return -1;
+	return bpf_map__fd(map);
+}
+
+#define TEST_CGROUP "/test-bpf-get-cgroup-id/"
+
+int main(int argc, char **argv)
+{
+	const char *probe_name = "syscalls/sys_enter_nanosleep";
+	const char *file = "get_cgroup_id_kern.o";
+	int err, bytes, efd, prog_fd, pmu_fd;
+	int cgroup_fd, cgidmap_fd, pidmap_fd;
+	struct perf_event_attr attr = {};
+	struct bpf_object *obj;
+	__u64 kcgid = 0, ucgid;
+	__u32 key = 0, pid;
+	int exit_code = 1;
+	char buf[256];
+
+	err = setup_cgroup_environment();
+	if (CHECK(err, "setup_cgroup_environment", "err %d errno %d\n", err,
+		  errno))
+		return 1;
+
+	cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+	if (CHECK(cgroup_fd < 0, "create_and_get_cgroup", "err %d errno %d\n",
+		  cgroup_fd, errno))
+		goto cleanup_cgroup_env;
+
+	err = join_cgroup(TEST_CGROUP);
+	if (CHECK(err, "join_cgroup", "err %d errno %d\n", err, errno))
+		goto cleanup_cgroup_env;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+	if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
+		goto cleanup_cgroup_env;
+
+	cgidmap_fd = bpf_find_map(__func__, obj, "cg_ids");
+	if (CHECK(cgidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
+		  cgidmap_fd, errno))
+		goto close_prog;
+
+	pidmap_fd = bpf_find_map(__func__, obj, "pidmap");
+	if (CHECK(pidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
+		  pidmap_fd, errno))
+		goto close_prog;
+
+	pid = getpid();
+	bpf_map_update_elem(pidmap_fd, &key, &pid, 0);
+
+	snprintf(buf, sizeof(buf),
+		 "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+	efd = open(buf, O_RDONLY, 0);
+	if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+		goto close_prog;
+	bytes = read(efd, buf, sizeof(buf));
+	close(efd);
+	if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
+		  "bytes %d errno %d\n", bytes, errno))
+		goto close_prog;
+
+	attr.config = strtol(buf, NULL, 0);
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.sample_type = PERF_SAMPLE_RAW;
+	attr.sample_period = 1;
+	attr.wakeup_events = 1;
+
+	/* attach to this pid so the all bpf invocations will be in the
+	 * cgroup associated with this pid.
+	 */
+	pmu_fd = syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0);
+	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
+		  errno))
+		goto close_prog;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
+		  errno))
+		goto close_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
+		  errno))
+		goto close_pmu;
+
+	/* trigger some syscalls */
+	sleep(1);
+
+	err = bpf_map_lookup_elem(cgidmap_fd, &key, &kcgid);
+	if (CHECK(err, "bpf_map_lookup_elem", "err %d errno %d\n", err, errno))
+		goto close_pmu;
+
+	ucgid = get_cgroup_id(TEST_CGROUP);
+	if (CHECK(kcgid != ucgid, "compare_cgroup_id",
+		  "kern cgid %llx user cgid %llx", kcgid, ucgid))
+		goto close_pmu;
+
+	exit_code = 0;
+	printf("%s:PASS\n", argv[0]);
+
+close_pmu:
+	close(pmu_fd);
+close_prog:
+	bpf_object__close(obj);
+cleanup_cgroup_env:
+	cleanup_cgroup_environment();
+	return exit_code;
+}
diff --git a/tools/testing/selftests/bpf/netcnt_common.h b/tools/testing/selftests/bpf/netcnt_common.h
new file mode 100644
index 000000000000..81084c1c2c23
--- /dev/null
+++ b/tools/testing/selftests/bpf/netcnt_common.h
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __NETCNT_COMMON_H
+#define __NETCNT_COMMON_H
+
+#include <linux/types.h>
+
+#define MAX_PERCPU_PACKETS 32
+
+struct percpu_net_cnt {
+	__u64 packets;
+	__u64 bytes;
+
+	__u64 prev_ts;
+
+	__u64 prev_packets;
+	__u64 prev_bytes;
+};
+
+struct net_cnt {
+	__u64 packets;
+	__u64 bytes;
+};
+
+#endif
diff --git a/tools/testing/selftests/bpf/netcnt_prog.c b/tools/testing/selftests/bpf/netcnt_prog.c
new file mode 100644
index 000000000000..1198abca1360
--- /dev/null
+++ b/tools/testing/selftests/bpf/netcnt_prog.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/version.h>
+
+#include "bpf_helpers.h"
+#include "netcnt_common.h"
+
+#define MAX_BPS	(3 * 1024 * 1024)
+
+#define REFRESH_TIME_NS	100000000
+#define NS_PER_SEC	1000000000
+
+struct bpf_map_def SEC("maps") percpu_netcnt = {
+	.type = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+	.key_size = sizeof(struct bpf_cgroup_storage_key),
+	.value_size = sizeof(struct percpu_net_cnt),
+};
+
+struct bpf_map_def SEC("maps") netcnt = {
+	.type = BPF_MAP_TYPE_CGROUP_STORAGE,
+	.key_size = sizeof(struct bpf_cgroup_storage_key),
+	.value_size = sizeof(struct net_cnt),
+};
+
+SEC("cgroup/skb")
+int bpf_nextcnt(struct __sk_buff *skb)
+{
+	struct percpu_net_cnt *percpu_cnt;
+	char fmt[] = "%d %llu %llu\n";
+	struct net_cnt *cnt;
+	__u64 ts, dt;
+	int ret;
+
+	cnt = bpf_get_local_storage(&netcnt, 0);
+	percpu_cnt = bpf_get_local_storage(&percpu_netcnt, 0);
+
+	percpu_cnt->packets++;
+	percpu_cnt->bytes += skb->len;
+
+	if (percpu_cnt->packets > MAX_PERCPU_PACKETS) {
+		__sync_fetch_and_add(&cnt->packets,
+				     percpu_cnt->packets);
+		percpu_cnt->packets = 0;
+
+		__sync_fetch_and_add(&cnt->bytes,
+				     percpu_cnt->bytes);
+		percpu_cnt->bytes = 0;
+	}
+
+	ts = bpf_ktime_get_ns();
+	dt = ts - percpu_cnt->prev_ts;
+
+	dt *= MAX_BPS;
+	dt /= NS_PER_SEC;
+
+	if (cnt->bytes + percpu_cnt->bytes - percpu_cnt->prev_bytes < dt)
+		ret = 1;
+	else
+		ret = 0;
+
+	if (dt > REFRESH_TIME_NS) {
+		percpu_cnt->prev_ts = ts;
+		percpu_cnt->prev_packets = cnt->packets;
+		percpu_cnt->prev_bytes = cnt->bytes;
+	}
+
+	return !!ret;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/testing/selftests/bpf/sendmsg4_prog.c b/tools/testing/selftests/bpf/sendmsg4_prog.c
new file mode 100644
index 000000000000..a91536b1c47e
--- /dev/null
+++ b/tools/testing/selftests/bpf/sendmsg4_prog.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC1_IP4		0xAC100001U /* 172.16.0.1 */
+#define SRC2_IP4		0x00000000U
+#define SRC_REWRITE_IP4		0x7f000004U
+#define DST_IP4			0xC0A801FEU /* 192.168.1.254 */
+#define DST_REWRITE_IP4		0x7f000001U
+#define DST_PORT		4040
+#define DST_REWRITE_PORT4	4444
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/sendmsg4")
+int sendmsg_v4_prog(struct bpf_sock_addr *ctx)
+{
+	if (ctx->type != SOCK_DGRAM)
+		return 0;
+
+	/* Rewrite source. */
+	if (ctx->msg_src_ip4 == bpf_htonl(SRC1_IP4) ||
+	    ctx->msg_src_ip4 == bpf_htonl(SRC2_IP4)) {
+		ctx->msg_src_ip4 = bpf_htonl(SRC_REWRITE_IP4);
+	} else {
+		/* Unexpected source. Reject sendmsg. */
+		return 0;
+	}
+
+	/* Rewrite destination. */
+	if ((ctx->user_ip4 >> 24) == (bpf_htonl(DST_IP4) >> 24) &&
+	     ctx->user_port == bpf_htons(DST_PORT)) {
+		ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
+		ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
+	} else {
+		/* Unexpected source. Reject sendmsg. */
+		return 0;
+	}
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sendmsg6_prog.c b/tools/testing/selftests/bpf/sendmsg6_prog.c
new file mode 100644
index 000000000000..5aeaa284fc47
--- /dev/null
+++ b/tools/testing/selftests/bpf/sendmsg6_prog.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC_REWRITE_IP6_0	0
+#define SRC_REWRITE_IP6_1	0
+#define SRC_REWRITE_IP6_2	0
+#define SRC_REWRITE_IP6_3	6
+
+#define DST_REWRITE_IP6_0	0
+#define DST_REWRITE_IP6_1	0
+#define DST_REWRITE_IP6_2	0
+#define DST_REWRITE_IP6_3	1
+
+#define DST_REWRITE_PORT6	6666
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/sendmsg6")
+int sendmsg_v6_prog(struct bpf_sock_addr *ctx)
+{
+	if (ctx->type != SOCK_DGRAM)
+		return 0;
+
+	/* Rewrite source. */
+	if (ctx->msg_src_ip6[3] == bpf_htonl(1) ||
+	    ctx->msg_src_ip6[3] == bpf_htonl(0)) {
+		ctx->msg_src_ip6[0] = bpf_htonl(SRC_REWRITE_IP6_0);
+		ctx->msg_src_ip6[1] = bpf_htonl(SRC_REWRITE_IP6_1);
+		ctx->msg_src_ip6[2] = bpf_htonl(SRC_REWRITE_IP6_2);
+		ctx->msg_src_ip6[3] = bpf_htonl(SRC_REWRITE_IP6_3);
+	} else {
+		/* Unexpected source. Reject sendmsg. */
+		return 0;
+	}
+
+	/* Rewrite destination. */
+	if ((ctx->user_ip6[0] & 0xFFFF) == bpf_htons(0xFACE) &&
+	     ctx->user_ip6[0] >> 16 == bpf_htons(0xB00C)) {
+		ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
+		ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1);
+		ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
+		ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3);
+
+		ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
+	} else {
+		/* Unexpected destination. Reject sendmsg. */
+		return 0;
+	}
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/socket_cookie_prog.c b/tools/testing/selftests/bpf/socket_cookie_prog.c
new file mode 100644
index 000000000000..9ff8ac4b0bf6
--- /dev/null
+++ b/tools/testing/selftests/bpf/socket_cookie_prog.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+struct bpf_map_def SEC("maps") socket_cookies = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(__u64),
+	.value_size = sizeof(__u32),
+	.max_entries = 1 << 8,
+};
+
+SEC("cgroup/connect6")
+int set_cookie(struct bpf_sock_addr *ctx)
+{
+	__u32 cookie_value = 0xFF;
+	__u64 cookie_key;
+
+	if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6)
+		return 1;
+
+	cookie_key = bpf_get_socket_cookie(ctx);
+	if (bpf_map_update_elem(&socket_cookies, &cookie_key, &cookie_value, 0))
+		return 0;
+
+	return 1;
+}
+
+SEC("sockops")
+int update_cookie(struct bpf_sock_ops *ctx)
+{
+	__u32 new_cookie_value;
+	__u32 *cookie_value;
+	__u64 cookie_key;
+
+	if (ctx->family != AF_INET6)
+		return 1;
+
+	if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB)
+		return 1;
+
+	cookie_key = bpf_get_socket_cookie(ctx);
+
+	cookie_value = bpf_map_lookup_elem(&socket_cookies, &cookie_key);
+	if (!cookie_value)
+		return 1;
+
+	new_cookie_value = (ctx->local_port << 8) | *cookie_value;
+	bpf_map_update_elem(&socket_cookies, &cookie_key, &new_cookie_value, 0);
+
+	return 1;
+}
+
+int _version SEC("version") = 1;
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py
index 481dccdf140c..7f8200a8702b 100755
--- a/tools/testing/selftests/bpf/tcp_client.py
+++ b/tools/testing/selftests/bpf/tcp_client.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 #
 # SPDX-License-Identifier: GPL-2.0
 #
@@ -9,11 +9,11 @@ import subprocess
 import select
 
 def read(sock, n):
-    buf = ''
+    buf = b''
     while len(buf) < n:
         rem = n - len(buf)
         try: s = sock.recv(rem)
-        except (socket.error), e: return ''
+        except (socket.error) as e: return b''
         buf += s
     return buf
 
@@ -22,7 +22,7 @@ def send(sock, s):
     count = 0
     while count < total:
         try: n = sock.send(s)
-        except (socket.error), e: n = 0
+        except (socket.error) as e: n = 0
         if n == 0:
             return count;
         count += n
@@ -39,10 +39,10 @@ try:
 except socket.error as e:
     sys.exit(1)
 
-buf = ''
+buf = b''
 n = 0
 while n < 1000:
-    buf += '+'
+    buf += b'+'
     n += 1
 
 sock.settimeout(1);
diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py
index bc454d7d0be2..b39903fca4c8 100755
--- a/tools/testing/selftests/bpf/tcp_server.py
+++ b/tools/testing/selftests/bpf/tcp_server.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 #
 # SPDX-License-Identifier: GPL-2.0
 #
@@ -9,11 +9,11 @@ import subprocess
 import select
 
 def read(sock, n):
-    buf = ''
+    buf = b''
     while len(buf) < n:
         rem = n - len(buf)
         try: s = sock.recv(rem)
-        except (socket.error), e: return ''
+        except (socket.error) as e: return b''
         buf += s
     return buf
 
@@ -22,7 +22,7 @@ def send(sock, s):
     count = 0
     while count < total:
         try: n = sock.send(s)
-        except (socket.error), e: n = 0
+        except (socket.error) as e: n = 0
         if n == 0:
             return count;
         count += n
@@ -43,7 +43,7 @@ host = socket.gethostname()
 
 try: serverSocket.bind((host, 0))
 except socket.error as msg:
-    print 'bind fails: ', msg
+    print('bind fails: ' + str(msg))
 
 sn = serverSocket.getsockname()
 serverPort = sn[1]
@@ -51,10 +51,10 @@ serverPort = sn[1]
 cmdStr = ("./tcp_client.py %d &") % (serverPort)
 os.system(cmdStr)
 
-buf = ''
+buf = b''
 n = 0
 while n < 500:
-    buf += '.'
+    buf += b'.'
     n += 1
 
 serverSocket.listen(MAX_PORTS)
@@ -79,5 +79,5 @@ while True:
                 serverSocket.close()
                 sys.exit(0)
     else:
-        print 'Select timeout!'
+        print('Select timeout!')
         sys.exit(1)
diff --git a/tools/testing/selftests/bpf/test_adjust_tail.c b/tools/testing/selftests/bpf/test_adjust_tail.c
new file mode 100644
index 000000000000..4cd5e860c903
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_adjust_tail.c
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+SEC("xdp_adjust_tail")
+int _xdp_adjust_tail(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	int offset = 0;
+
+	if (data_end - data == 54)
+		offset = 256;
+	else
+		offset = 20;
+	if (bpf_xdp_adjust_tail(xdp, 0 - offset))
+		return XDP_DROP;
+	return XDP_TX;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index 6b1b302310fe..5f377ec53f2f 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -18,10 +18,7 @@
 
 #include "../../../include/linux/filter.h"
 #include "bpf_rlimit.h"
-
-#ifndef ARRAY_SIZE
-# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
+#include "bpf_util.h"
 
 #define MAX_INSNS	512
 #define MAX_MATCHES	16
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
new file mode 100644
index 000000000000..38e1cbaaffdb
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -0,0 +1,2949 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <bpf/bpf.h>
+#include <sys/resource.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+
+static uint32_t pass_cnt;
+static uint32_t error_cnt;
+static uint32_t skip_cnt;
+
+#define CHECK(condition, format...) ({					\
+	int __ret = !!(condition);					\
+	if (__ret) {							\
+		fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__);	\
+		fprintf(stderr, format);				\
+	}								\
+	__ret;								\
+})
+
+static int count_result(int err)
+{
+	if (err)
+		error_cnt++;
+	else
+		pass_cnt++;
+
+	fprintf(stderr, "\n");
+	return err;
+}
+
+#define __printf(a, b)	__attribute__((format(printf, a, b)))
+
+__printf(1, 2)
+static int __base_pr(const char *format, ...)
+{
+	va_list args;
+	int err;
+
+	va_start(args, format);
+	err = vfprintf(stderr, format, args);
+	va_end(args);
+	return err;
+}
+
+#define BTF_INFO_ENC(kind, root, vlen)			\
+	((!!(root) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
+
+#define BTF_TYPE_ENC(name, info, size_or_type)	\
+	(name), (info), (size_or_type)
+
+#define BTF_INT_ENC(encoding, bits_offset, nr_bits)	\
+	((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
+#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz)	\
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz),	\
+	BTF_INT_ENC(encoding, bits_offset, bits)
+
+#define BTF_ARRAY_ENC(type, index_type, nr_elems)	\
+	(type), (index_type), (nr_elems)
+#define BTF_TYPE_ARRAY_ENC(type, index_type, nr_elems) \
+	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), \
+	BTF_ARRAY_ENC(type, index_type, nr_elems)
+
+#define BTF_MEMBER_ENC(name, type, bits_offset)	\
+	(name), (type), (bits_offset)
+#define BTF_ENUM_ENC(name, val) (name), (val)
+
+#define BTF_TYPEDEF_ENC(name, type) \
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), type)
+
+#define BTF_PTR_ENC(name, type) \
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type)
+
+#define BTF_END_RAW 0xdeadbeef
+#define NAME_TBD 0xdeadb33f
+
+#define MAX_NR_RAW_TYPES 1024
+#define BTF_LOG_BUF_SIZE 65535
+
+static struct args {
+	unsigned int raw_test_num;
+	unsigned int file_test_num;
+	unsigned int get_info_test_num;
+	bool raw_test;
+	bool file_test;
+	bool get_info_test;
+	bool pprint_test;
+	bool always_log;
+} args;
+
+static char btf_log_buf[BTF_LOG_BUF_SIZE];
+
+static struct btf_header hdr_tmpl = {
+	.magic = BTF_MAGIC,
+	.version = BTF_VERSION,
+	.hdr_len = sizeof(struct btf_header),
+};
+
+struct btf_raw_test {
+	const char *descr;
+	const char *str_sec;
+	const char *map_name;
+	const char *err_str;
+	__u32 raw_types[MAX_NR_RAW_TYPES];
+	__u32 str_sec_size;
+	enum bpf_map_type map_type;
+	__u32 key_size;
+	__u32 value_size;
+	__u32 key_type_id;
+	__u32 value_type_id;
+	__u32 max_entries;
+	bool btf_load_err;
+	bool map_create_err;
+	bool ordered_map;
+	bool lossless_map;
+	bool percpu_map;
+	int hdr_len_delta;
+	int type_off_delta;
+	int str_off_delta;
+	int str_len_delta;
+};
+
+static struct btf_raw_test raw_tests[] = {
+/* enum E {
+ *     E0,
+ *     E1,
+ * };
+ *
+ * struct A {
+ *	unsigned long long m;
+ *	int n;
+ *	char o;
+ *	[3 bytes hole]
+ *	int p[8];
+ *	int q[4][8];
+ *	enum E r;
+ * };
+ */
+{
+	.descr = "struct test #1",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* unsigned long long */
+		BTF_TYPE_INT_ENC(0, 0, 0, 64, 8),		/* [2] */
+		/* char */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1),	/* [3] */
+		/* int[8] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 8),			/* [4] */
+		/* struct A { */				/* [5] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 6), 180),
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),	/* unsigned long long m;*/
+		BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8]		*/
+		BTF_MEMBER_ENC(NAME_TBD, 6, 384),/* int q[4][8]		*/
+		BTF_MEMBER_ENC(NAME_TBD, 7, 1408), /* enum E r		*/
+		/* } */
+		/* int[4][8] */
+		BTF_TYPE_ARRAY_ENC(4, 1, 4),			/* [6] */
+		/* enum E */					/* [7] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)),
+		BTF_ENUM_ENC(NAME_TBD, 0),
+		BTF_ENUM_ENC(NAME_TBD, 1),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1",
+	.str_sec_size = sizeof("\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_test1_map",
+	.key_size = sizeof(int),
+	.value_size = 180,
+	.key_type_id = 1,
+	.value_type_id = 5,
+	.max_entries = 4,
+},
+
+/* typedef struct b Struct_B;
+ *
+ * struct A {
+ *     int m;
+ *     struct b n[4];
+ *     const Struct_B o[4];
+ * };
+ *
+ * struct B {
+ *     int m;
+ *     int n;
+ * };
+ */
+{
+	.descr = "struct test #2",
+	.raw_types = {
+		/* int */					/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* struct b [4] */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(4, 1, 4),
+
+		/* struct A { */				/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 3), 68),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct B n[4]	*/
+		BTF_MEMBER_ENC(NAME_TBD, 8, 288),/* const Struct_B o[4];*/
+		/* } */
+
+		/* struct B { */				/* [4] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m; */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */
+		/* } */
+
+		/* const int */					/* [5] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1),
+		/* typedef struct b Struct_B */	/* [6] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 4),
+		/* const Struct_B */				/* [7] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 6),
+		/* const Struct_B [4] */			/* [8] */
+		BTF_TYPE_ARRAY_ENC(7, 1, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n\0o\0B\0m\0n\0Struct_B",
+	.str_sec_size = sizeof("\0A\0m\0n\0o\0B\0m\0n\0Struct_B"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_test2_map",
+	.key_size = sizeof(int),
+	.value_size = 68,
+	.key_type_id = 1,
+	.value_type_id = 3,
+	.max_entries = 4,
+},
+
+{
+	.descr = "struct test #3 Invalid member offset",
+	.raw_types = {
+		/* int */					/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int64 */					/* [2] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8),
+
+		/* struct A { */				/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 16),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 64),	/* int m;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),		/* int64 n; */
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n\0",
+	.str_sec_size = sizeof("\0A\0m\0n\0"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_test3_map",
+	.key_size = sizeof(int),
+	.value_size = 16,
+	.key_type_id = 1,
+	.value_type_id = 3,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid member bits_offset",
+},
+
+/* Test member exceeds the size of struct.
+ *
+ * struct A {
+ *     int m;
+ *     int n;
+ * };
+ */
+{
+	.descr = "size check test #1",
+	.raw_types = {
+		/* int */					/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* struct A { */				/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 -  1),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m; */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n",
+	.str_sec_size = sizeof("\0A\0m\0n"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "size_check1_map",
+	.key_size = sizeof(int),
+	.value_size = 1,
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Member exceeds struct_size",
+},
+
+/* Test member exeeds the size of struct
+ *
+ * struct A {
+ *     int m;
+ *     int n[2];
+ * };
+ */
+{
+	.descr = "size check test #2",
+	.raw_types = {
+		/* int */					/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+		/* int[2] */					/* [2] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 2),
+		/* struct A { */				/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 3 - 1),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m; */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n[2]; */
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n",
+	.str_sec_size = sizeof("\0A\0m\0n"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "size_check2_map",
+	.key_size = sizeof(int),
+	.value_size = 1,
+	.key_type_id = 1,
+	.value_type_id = 3,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Member exceeds struct_size",
+},
+
+/* Test member exeeds the size of struct
+ *
+ * struct A {
+ *     int m;
+ *     void *n;
+ * };
+ */
+{
+	.descr = "size check test #3",
+	.raw_types = {
+		/* int */					/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+		/* void* */					/* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+		/* struct A { */				/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) + sizeof(void *) - 1),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m; */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* void *n; */
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n",
+	.str_sec_size = sizeof("\0A\0m\0n"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "size_check3_map",
+	.key_size = sizeof(int),
+	.value_size = 1,
+	.key_type_id = 1,
+	.value_type_id = 3,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Member exceeds struct_size",
+},
+
+/* Test member exceeds the size of struct
+ *
+ * enum E {
+ *     E0,
+ *     E1,
+ * };
+ *
+ * struct A {
+ *     int m;
+ *     enum E n;
+ * };
+ */
+{
+	.descr = "size check test #4",
+	.raw_types = {
+		/* int */			/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+		/* enum E { */			/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)),
+		BTF_ENUM_ENC(NAME_TBD, 0),
+		BTF_ENUM_ENC(NAME_TBD, 1),
+		/* } */
+		/* struct A { */		/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m; */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* enum E n; */
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0E\0E0\0E1\0A\0m\0n",
+	.str_sec_size = sizeof("\0E\0E0\0E1\0A\0m\0n"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "size_check4_map",
+	.key_size = sizeof(int),
+	.value_size = 1,
+	.key_type_id = 1,
+	.value_type_id = 3,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Member exceeds struct_size",
+},
+
+/* typedef const void * const_void_ptr;
+ * struct A {
+ *	const_void_ptr m;
+ * };
+ */
+{
+	.descr = "void test #1",
+	.raw_types = {
+		/* int */		/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* const void */	/* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+		/* const void* */	/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
+		/* typedef const void * const_void_ptr */
+		BTF_TYPEDEF_ENC(NAME_TBD, 3),	/* [4] */
+		/* struct A { */	/* [5] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+		/* const_void_ptr m; */
+		BTF_MEMBER_ENC(NAME_TBD, 4, 0),
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0const_void_ptr\0A\0m",
+	.str_sec_size = sizeof("\0const_void_ptr\0A\0m"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "void_test1_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(void *),
+	.key_type_id = 1,
+	.value_type_id = 4,
+	.max_entries = 4,
+},
+
+/* struct A {
+ *     const void m;
+ * };
+ */
+{
+	.descr = "void test #2",
+	.raw_types = {
+		/* int */		/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* const void */	/* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+		/* struct A { */	/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 8),
+		/* const void m; */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m",
+	.str_sec_size = sizeof("\0A\0m"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "void_test2_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(void *),
+	.key_type_id = 1,
+	.value_type_id = 3,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid member",
+},
+
+/* typedef const void * const_void_ptr;
+ * const_void_ptr[4]
+ */
+{
+	.descr = "void test #3",
+	.raw_types = {
+		/* int */		/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* const void */	/* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+		/* const void* */	/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
+		/* typedef const void * const_void_ptr */
+		BTF_TYPEDEF_ENC(NAME_TBD, 3),	/* [4] */
+		/* const_void_ptr[4] */
+		BTF_TYPE_ARRAY_ENC(4, 1, 4),	/* [5] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0const_void_ptr",
+	.str_sec_size = sizeof("\0const_void_ptr"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "void_test3_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(void *) * 4,
+	.key_type_id = 1,
+	.value_type_id = 5,
+	.max_entries = 4,
+},
+
+/* const void[4]  */
+{
+	.descr = "void test #4",
+	.raw_types = {
+		/* int */		/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* const void */	/* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+		/* const void[4] */	/* [3] */
+		BTF_TYPE_ARRAY_ENC(2, 1, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m",
+	.str_sec_size = sizeof("\0A\0m"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "void_test4_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(void *) * 4,
+	.key_type_id = 1,
+	.value_type_id = 3,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid elem",
+},
+
+/* Array_A  <------------------+
+ *     elem_type == Array_B    |
+ *                    |        |
+ *                    |        |
+ * Array_B  <-------- +        |
+ *      elem_type == Array A --+
+ */
+{
+	.descr = "loop test #1",
+	.raw_types = {
+		/* int */			/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* Array_A */			/* [2] */
+		BTF_TYPE_ARRAY_ENC(3, 1, 8),
+		/* Array_B */			/* [3] */
+		BTF_TYPE_ARRAY_ENC(2, 1, 8),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test1_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(sizeof(int) * 8),
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Loop detected",
+},
+
+/* typedef is _before_ the BTF type of Array_A and Array_B
+ *
+ * typedef Array_B int_array;
+ *
+ * Array_A  <------------------+
+ *     elem_type == int_array  |
+ *                    |        |
+ *                    |        |
+ * Array_B  <-------- +        |
+ *      elem_type == Array_A --+
+ */
+{
+	.descr = "loop test #2",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* typedef Array_B int_array */
+		BTF_TYPEDEF_ENC(1, 4),				/* [2] */
+		/* Array_A */
+		BTF_TYPE_ARRAY_ENC(2, 1, 8),			/* [3] */
+		/* Array_B */
+		BTF_TYPE_ARRAY_ENC(3, 1, 8),			/* [4] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int_array\0",
+	.str_sec_size = sizeof("\0int_array"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test2_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(sizeof(int) * 8),
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Loop detected",
+},
+
+/* Array_A  <------------------+
+ *     elem_type == Array_B    |
+ *                    |        |
+ *                    |        |
+ * Array_B  <-------- +        |
+ *      elem_type == Array_A --+
+ */
+{
+	.descr = "loop test #3",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* Array_A */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(3, 1, 8),
+		/* Array_B */				/* [3] */
+		BTF_TYPE_ARRAY_ENC(2, 1, 8),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test3_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(sizeof(int) * 8),
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Loop detected",
+},
+
+/* typedef is _between_ the BTF type of Array_A and Array_B
+ *
+ * typedef Array_B int_array;
+ *
+ * Array_A  <------------------+
+ *     elem_type == int_array  |
+ *                    |        |
+ *                    |        |
+ * Array_B  <-------- +        |
+ *      elem_type == Array_A --+
+ */
+{
+	.descr = "loop test #4",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* Array_A */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(3, 1, 8),
+		/* typedef Array_B int_array */		/* [3] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 4),
+		/* Array_B */				/* [4] */
+		BTF_TYPE_ARRAY_ENC(2, 1, 8),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int_array\0",
+	.str_sec_size = sizeof("\0int_array"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test4_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(sizeof(int) * 8),
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Loop detected",
+},
+
+/* typedef struct B Struct_B
+ *
+ * struct A {
+ *     int x;
+ *     Struct_B y;
+ * };
+ *
+ * struct B {
+ *     int x;
+ *     struct A y;
+ * };
+ */
+{
+	.descr = "loop test #5",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* struct A */					/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int x;	*/
+		BTF_MEMBER_ENC(NAME_TBD, 3, 32),/* Struct_B y;	*/
+		/* typedef struct B Struct_B */
+		BTF_TYPEDEF_ENC(NAME_TBD, 4),			/* [3] */
+		/* struct B */					/* [4] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int x;	*/
+		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A y;	*/
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0x\0y\0Struct_B\0B\0x\0y",
+	.str_sec_size = sizeof("\0A\0x\0y\0Struct_B\0B\0x\0y"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test5_map",
+	.key_size = sizeof(int),
+	.value_size = 8,
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Loop detected",
+},
+
+/* struct A {
+ *     int x;
+ *     struct A array_a[4];
+ * };
+ */
+{
+	.descr = "loop test #6",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_ARRAY_ENC(3, 1, 4),			/* [2] */
+		/* struct A */					/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int x;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A array_a[4];	*/
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0x\0y",
+	.str_sec_size = sizeof("\0A\0x\0y"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test6_map",
+	.key_size = sizeof(int),
+	.value_size = 8,
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Loop detected",
+},
+
+{
+	.descr = "loop test #7",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* struct A { */			/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+		/*     const void *m;	*/
+		BTF_MEMBER_ENC(NAME_TBD, 3, 0),
+		/* CONST type_id=3	*/		/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
+		/* PTR type_id=2	*/		/* [4] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m",
+	.str_sec_size = sizeof("\0A\0m"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test7_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(void *),
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Loop detected",
+},
+
+{
+	.descr = "loop test #8",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* struct A { */			/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+		/*     const void *m;	*/
+		BTF_MEMBER_ENC(NAME_TBD, 4, 0),
+		/* struct B { */			/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+		/*     const void *n;	*/
+		BTF_MEMBER_ENC(NAME_TBD, 6, 0),
+		/* CONST type_id=5	*/		/* [4] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 5),
+		/* PTR type_id=6	*/		/* [5] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 6),
+		/* CONST type_id=7	*/		/* [6] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 7),
+		/* PTR type_id=4	*/		/* [7] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0B\0n",
+	.str_sec_size = sizeof("\0A\0m\0B\0n"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test8_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(void *),
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Loop detected",
+},
+
+{
+	.descr = "string section does not end with null",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int") - 1,
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid string section",
+},
+
+{
+	.descr = "empty string section",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = 0,
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid string section",
+},
+
+{
+	.descr = "empty type section",
+	.raw_types = {
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "No type found",
+},
+
+{
+	.descr = "btf_header test. Longer hdr_len",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.hdr_len_delta = 4,
+	.err_str = "Unsupported btf_header",
+},
+
+{
+	.descr = "btf_header test. Gap between hdr and type",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.type_off_delta = 4,
+	.err_str = "Unsupported section found",
+},
+
+{
+	.descr = "btf_header test. Gap between type and str",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.str_off_delta = 4,
+	.err_str = "Unsupported section found",
+},
+
+{
+	.descr = "btf_header test. Overlap between type and str",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.str_off_delta = -4,
+	.err_str = "Section overlap found",
+},
+
+{
+	.descr = "btf_header test. Larger BTF size",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.str_len_delta = -4,
+	.err_str = "Unsupported section found",
+},
+
+{
+	.descr = "btf_header test. Smaller BTF size",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.str_len_delta = 4,
+	.err_str = "Total section length too long",
+},
+
+{
+	.descr = "array test. index_type/elem_type \"int\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int[16] */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 16),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "array test. index_type/elem_type \"const int\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int[16] */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(3, 3, 16),
+		/* CONST type_id=1 */			/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "array test. index_type \"const int:31\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int:31 */				/* [2] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4),
+		/* int[16] */				/* [3] */
+		BTF_TYPE_ARRAY_ENC(1, 4, 16),
+		/* CONST type_id=2 */			/* [4] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid index",
+},
+
+{
+	.descr = "array test. elem_type \"const int:31\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int:31 */				/* [2] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4),
+		/* int[16] */				/* [3] */
+		BTF_TYPE_ARRAY_ENC(4, 1, 16),
+		/* CONST type_id=2 */			/* [4] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid array of int",
+},
+
+{
+	.descr = "array test. index_type \"void\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int[16] */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(1, 0, 16),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid index",
+},
+
+{
+	.descr = "array test. index_type \"const void\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int[16] */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(1, 3, 16),
+		/* CONST type_id=0 (void) */		/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid index",
+},
+
+{
+	.descr = "array test. elem_type \"const void\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int[16] */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(3, 1, 16),
+		/* CONST type_id=0 (void) */		/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid elem",
+},
+
+{
+	.descr = "array test. elem_type \"const void *\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* const void *[16] */			/* [2] */
+		BTF_TYPE_ARRAY_ENC(3, 1, 16),
+		/* CONST type_id=4 */			/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
+		/* void* */				/* [4] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "array test. index_type \"const void *\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* const void *[16] */			/* [2] */
+		BTF_TYPE_ARRAY_ENC(3, 3, 16),
+		/* CONST type_id=4 */			/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
+		/* void* */				/* [4] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid index",
+},
+
+{
+	.descr = "array test. t->size != 0\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int[16] */				/* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 1),
+		BTF_ARRAY_ENC(1, 1, 16),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "size != 0",
+},
+
+{
+	.descr = "int test. invalid int_data",
+	.raw_types = {
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), 4),
+		0x10000000,
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid int_data",
+},
+
+{
+	.descr = "invalid BTF_INFO",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_TYPE_ENC(0, 0x10000000, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid btf_info",
+},
+
+{
+	.descr = "fwd test. t->type != 0\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* fwd type */				/* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 1),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "fwd_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "type != 0",
+},
+
+{
+	.descr = "typedef (invalid name, name_off = 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPEDEF_ENC(0, 1),				/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__int",
+	.str_sec_size = sizeof("\0__int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "typedef_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "typedef (invalid name, invalid identifier)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 1),			/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__!int",
+	.str_sec_size = sizeof("\0__!int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "typedef_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "ptr type (invalid name, name_off <> 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1),	/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__int",
+	.str_sec_size = sizeof("\0__int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "ptr_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "volatile type (invalid name, name_off <> 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 1),	/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__int",
+	.str_sec_size = sizeof("\0__int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "volatile_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "const type (invalid name, name_off <> 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1),	/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__int",
+	.str_sec_size = sizeof("\0__int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "const_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "restrict type (invalid name, name_off <> 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1),	/* [2] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_RESTRICT, 0, 0), 2),	/* [3] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__int",
+	.str_sec_size = sizeof("\0__int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "restrict_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "fwd type (invalid name, name_off = 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0),	/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__skb",
+	.str_sec_size = sizeof("\0__skb"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "fwd_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "fwd type (invalid name, invalid identifier)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0),	/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__!skb",
+	.str_sec_size = sizeof("\0__!skb"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "fwd_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "array type (invalid name, name_off <> 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0),	/* [2] */
+		BTF_ARRAY_ENC(1, 1, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__skb",
+	.str_sec_size = sizeof("\0__skb"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "struct type (name_off = 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0,
+			     BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4),	/* [2] */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A",
+	.str_sec_size = sizeof("\0A"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "struct type (invalid name, invalid identifier)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4),	/* [2] */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A!\0B",
+	.str_sec_size = sizeof("\0A!\0B"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "struct member (name_off = 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0,
+			     BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4),	/* [2] */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A",
+	.str_sec_size = sizeof("\0A"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "struct member (invalid name, invalid identifier)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4),	/* [2] */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0B*",
+	.str_sec_size = sizeof("\0A\0B*"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "enum type (name_off = 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0,
+			     BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1),
+			     sizeof(int)),				/* [2] */
+		BTF_ENUM_ENC(NAME_TBD, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0B",
+	.str_sec_size = sizeof("\0A\0B"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "enum_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "enum type (invalid name, invalid identifier)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1),
+			     sizeof(int)),				/* [2] */
+		BTF_ENUM_ENC(NAME_TBD, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A!\0B",
+	.str_sec_size = sizeof("\0A!\0B"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "enum_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "enum member (invalid name, name_off = 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0,
+			     BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1),
+			     sizeof(int)),				/* [2] */
+		BTF_ENUM_ENC(0, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "enum_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "enum member (invalid name, invalid identifier)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0,
+			     BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1),
+			     sizeof(int)),				/* [2] */
+		BTF_ENUM_ENC(NAME_TBD, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A!",
+	.str_sec_size = sizeof("\0A!"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "enum_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+{
+	.descr = "arraymap invalid btf key (a bit field)",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* 32 bit int with 32 bit offset */	/* [2] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 32, 32, 8),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_map_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 2,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.map_create_err = true,
+},
+
+{
+	.descr = "arraymap invalid btf key (!= 32 bits)",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* 16 bit int with 0 bit offset */	/* [2] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 16, 2),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_map_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 2,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.map_create_err = true,
+},
+
+{
+	.descr = "arraymap invalid btf value (too small)",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_map_check_btf",
+	.key_size = sizeof(int),
+	/* btf_value_size < map->value_size */
+	.value_size = sizeof(__u64),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.map_create_err = true,
+},
+
+{
+	.descr = "arraymap invalid btf value (too big)",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_map_check_btf",
+	.key_size = sizeof(int),
+	/* btf_value_size > map->value_size */
+	.value_size = sizeof(__u16),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.map_create_err = true,
+},
+
+}; /* struct btf_raw_test raw_tests[] */
+
+static const char *get_next_str(const char *start, const char *end)
+{
+	return start < end - 1 ? start + 1 : NULL;
+}
+
+static int get_type_sec_size(const __u32 *raw_types)
+{
+	int i;
+
+	for (i = MAX_NR_RAW_TYPES - 1;
+	     i >= 0 && raw_types[i] != BTF_END_RAW;
+	     i--)
+		;
+
+	return i < 0 ? i : i * sizeof(raw_types[0]);
+}
+
+static void *btf_raw_create(const struct btf_header *hdr,
+			    const __u32 *raw_types,
+			    const char *str,
+			    unsigned int str_sec_size,
+			    unsigned int *btf_size)
+{
+	const char *next_str = str, *end_str = str + str_sec_size;
+	unsigned int size_needed, offset;
+	struct btf_header *ret_hdr;
+	int i, type_sec_size;
+	uint32_t *ret_types;
+	void *raw_btf;
+
+	type_sec_size = get_type_sec_size(raw_types);
+	if (CHECK(type_sec_size < 0, "Cannot get nr_raw_types"))
+		return NULL;
+
+	size_needed = sizeof(*hdr) + type_sec_size + str_sec_size;
+	raw_btf = malloc(size_needed);
+	if (CHECK(!raw_btf, "Cannot allocate memory for raw_btf"))
+		return NULL;
+
+	/* Copy header */
+	memcpy(raw_btf, hdr, sizeof(*hdr));
+	offset = sizeof(*hdr);
+
+	/* Copy type section */
+	ret_types = raw_btf + offset;
+	for (i = 0; i < type_sec_size / sizeof(raw_types[0]); i++) {
+		if (raw_types[i] == NAME_TBD) {
+			next_str = get_next_str(next_str, end_str);
+			if (CHECK(!next_str, "Error in getting next_str")) {
+				free(raw_btf);
+				return NULL;
+			}
+			ret_types[i] = next_str - str;
+			next_str += strlen(next_str);
+		} else {
+			ret_types[i] = raw_types[i];
+		}
+	}
+	offset += type_sec_size;
+
+	/* Copy string section */
+	memcpy(raw_btf + offset, str, str_sec_size);
+
+	ret_hdr = (struct btf_header *)raw_btf;
+	ret_hdr->type_len = type_sec_size;
+	ret_hdr->str_off = type_sec_size;
+	ret_hdr->str_len = str_sec_size;
+
+	*btf_size = size_needed;
+
+	return raw_btf;
+}
+
+static int do_test_raw(unsigned int test_num)
+{
+	struct btf_raw_test *test = &raw_tests[test_num - 1];
+	struct bpf_create_map_attr create_attr = {};
+	int map_fd = -1, btf_fd = -1;
+	unsigned int raw_btf_size;
+	struct btf_header *hdr;
+	void *raw_btf;
+	int err;
+
+	fprintf(stderr, "BTF raw test[%u] (%s): ", test_num, test->descr);
+	raw_btf = btf_raw_create(&hdr_tmpl,
+				 test->raw_types,
+				 test->str_sec,
+				 test->str_sec_size,
+				 &raw_btf_size);
+
+	if (!raw_btf)
+		return -1;
+
+	hdr = raw_btf;
+
+	hdr->hdr_len = (int)hdr->hdr_len + test->hdr_len_delta;
+	hdr->type_off = (int)hdr->type_off + test->type_off_delta;
+	hdr->str_off = (int)hdr->str_off + test->str_off_delta;
+	hdr->str_len = (int)hdr->str_len + test->str_len_delta;
+
+	*btf_log_buf = '\0';
+	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+			      btf_log_buf, BTF_LOG_BUF_SIZE,
+			      args.always_log);
+	free(raw_btf);
+
+	err = ((btf_fd == -1) != test->btf_load_err);
+	if (CHECK(err, "btf_fd:%d test->btf_load_err:%u",
+		  btf_fd, test->btf_load_err) ||
+	    CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
+		  "expected err_str:%s", test->err_str)) {
+		err = -1;
+		goto done;
+	}
+
+	if (err || btf_fd == -1)
+		goto done;
+
+	create_attr.name = test->map_name;
+	create_attr.map_type = test->map_type;
+	create_attr.key_size = test->key_size;
+	create_attr.value_size = test->value_size;
+	create_attr.max_entries = test->max_entries;
+	create_attr.btf_fd = btf_fd;
+	create_attr.btf_key_type_id = test->key_type_id;
+	create_attr.btf_value_type_id = test->value_type_id;
+
+	map_fd = bpf_create_map_xattr(&create_attr);
+
+	err = ((map_fd == -1) != test->map_create_err);
+	CHECK(err, "map_fd:%d test->map_create_err:%u",
+	      map_fd, test->map_create_err);
+
+done:
+	if (!err)
+		fprintf(stderr, "OK");
+
+	if (*btf_log_buf && (err || args.always_log))
+		fprintf(stderr, "\n%s", btf_log_buf);
+
+	if (btf_fd != -1)
+		close(btf_fd);
+	if (map_fd != -1)
+		close(map_fd);
+
+	return err;
+}
+
+static int test_raw(void)
+{
+	unsigned int i;
+	int err = 0;
+
+	if (args.raw_test_num)
+		return count_result(do_test_raw(args.raw_test_num));
+
+	for (i = 1; i <= ARRAY_SIZE(raw_tests); i++)
+		err |= count_result(do_test_raw(i));
+
+	return err;
+}
+
+struct btf_get_info_test {
+	const char *descr;
+	const char *str_sec;
+	__u32 raw_types[MAX_NR_RAW_TYPES];
+	__u32 str_sec_size;
+	int btf_size_delta;
+	int (*special_test)(unsigned int test_num);
+};
+
+static int test_big_btf_info(unsigned int test_num);
+static int test_btf_id(unsigned int test_num);
+
+const struct btf_get_info_test get_info_tests[] = {
+{
+	.descr = "== raw_btf_size+1",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.btf_size_delta = 1,
+},
+{
+	.descr = "== raw_btf_size-3",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.btf_size_delta = -3,
+},
+{
+	.descr = "Large bpf_btf_info",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.special_test = test_big_btf_info,
+},
+{
+	.descr = "BTF ID",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* unsigned int */			/* [2] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.special_test = test_btf_id,
+},
+};
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+	return (__u64)(unsigned long)ptr;
+}
+
+static int test_big_btf_info(unsigned int test_num)
+{
+	const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
+	uint8_t *raw_btf = NULL, *user_btf = NULL;
+	unsigned int raw_btf_size;
+	struct {
+		struct bpf_btf_info info;
+		uint64_t garbage;
+	} info_garbage;
+	struct bpf_btf_info *info;
+	int btf_fd = -1, err;
+	uint32_t info_len;
+
+	raw_btf = btf_raw_create(&hdr_tmpl,
+				 test->raw_types,
+				 test->str_sec,
+				 test->str_sec_size,
+				 &raw_btf_size);
+
+	if (!raw_btf)
+		return -1;
+
+	*btf_log_buf = '\0';
+
+	user_btf = malloc(raw_btf_size);
+	if (CHECK(!user_btf, "!user_btf")) {
+		err = -1;
+		goto done;
+	}
+
+	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+			      btf_log_buf, BTF_LOG_BUF_SIZE,
+			      args.always_log);
+	if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	/*
+	 * GET_INFO should error out if the userspace info
+	 * has non zero tailing bytes.
+	 */
+	info = &info_garbage.info;
+	memset(info, 0, sizeof(*info));
+	info_garbage.garbage = 0xdeadbeef;
+	info_len = sizeof(info_garbage);
+	info->btf = ptr_to_u64(user_btf);
+	info->btf_size = raw_btf_size;
+
+	err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
+	if (CHECK(!err, "!err")) {
+		err = -1;
+		goto done;
+	}
+
+	/*
+	 * GET_INFO should succeed even info_len is larger than
+	 * the kernel supported as long as tailing bytes are zero.
+	 * The kernel supported info len should also be returned
+	 * to userspace.
+	 */
+	info_garbage.garbage = 0;
+	err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
+	if (CHECK(err || info_len != sizeof(*info),
+		  "err:%d errno:%d info_len:%u sizeof(*info):%lu",
+		  err, errno, info_len, sizeof(*info))) {
+		err = -1;
+		goto done;
+	}
+
+	fprintf(stderr, "OK");
+
+done:
+	if (*btf_log_buf && (err || args.always_log))
+		fprintf(stderr, "\n%s", btf_log_buf);
+
+	free(raw_btf);
+	free(user_btf);
+
+	if (btf_fd != -1)
+		close(btf_fd);
+
+	return err;
+}
+
+static int test_btf_id(unsigned int test_num)
+{
+	const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
+	struct bpf_create_map_attr create_attr = {};
+	uint8_t *raw_btf = NULL, *user_btf[2] = {};
+	int btf_fd[2] = {-1, -1}, map_fd = -1;
+	struct bpf_map_info map_info = {};
+	struct bpf_btf_info info[2] = {};
+	unsigned int raw_btf_size;
+	uint32_t info_len;
+	int err, i, ret;
+
+	raw_btf = btf_raw_create(&hdr_tmpl,
+				 test->raw_types,
+				 test->str_sec,
+				 test->str_sec_size,
+				 &raw_btf_size);
+
+	if (!raw_btf)
+		return -1;
+
+	*btf_log_buf = '\0';
+
+	for (i = 0; i < 2; i++) {
+		user_btf[i] = malloc(raw_btf_size);
+		if (CHECK(!user_btf[i], "!user_btf[%d]", i)) {
+			err = -1;
+			goto done;
+		}
+		info[i].btf = ptr_to_u64(user_btf[i]);
+		info[i].btf_size = raw_btf_size;
+	}
+
+	btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size,
+				 btf_log_buf, BTF_LOG_BUF_SIZE,
+				 args.always_log);
+	if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	/* Test BPF_OBJ_GET_INFO_BY_ID on btf_id */
+	info_len = sizeof(info[0]);
+	err = bpf_obj_get_info_by_fd(btf_fd[0], &info[0], &info_len);
+	if (CHECK(err, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	btf_fd[1] = bpf_btf_get_fd_by_id(info[0].id);
+	if (CHECK(btf_fd[1] == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	ret = 0;
+	err = bpf_obj_get_info_by_fd(btf_fd[1], &info[1], &info_len);
+	if (CHECK(err || info[0].id != info[1].id ||
+		  info[0].btf_size != info[1].btf_size ||
+		  (ret = memcmp(user_btf[0], user_btf[1], info[0].btf_size)),
+		  "err:%d errno:%d id0:%u id1:%u btf_size0:%u btf_size1:%u memcmp:%d",
+		  err, errno, info[0].id, info[1].id,
+		  info[0].btf_size, info[1].btf_size, ret)) {
+		err = -1;
+		goto done;
+	}
+
+	/* Test btf members in struct bpf_map_info */
+	create_attr.name = "test_btf_id";
+	create_attr.map_type = BPF_MAP_TYPE_ARRAY;
+	create_attr.key_size = sizeof(int);
+	create_attr.value_size = sizeof(unsigned int);
+	create_attr.max_entries = 4;
+	create_attr.btf_fd = btf_fd[0];
+	create_attr.btf_key_type_id = 1;
+	create_attr.btf_value_type_id = 2;
+
+	map_fd = bpf_create_map_xattr(&create_attr);
+	if (CHECK(map_fd == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	info_len = sizeof(map_info);
+	err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
+	if (CHECK(err || map_info.btf_id != info[0].id ||
+		  map_info.btf_key_type_id != 1 || map_info.btf_value_type_id != 2,
+		  "err:%d errno:%d info.id:%u btf_id:%u btf_key_type_id:%u btf_value_type_id:%u",
+		  err, errno, info[0].id, map_info.btf_id, map_info.btf_key_type_id,
+		  map_info.btf_value_type_id)) {
+		err = -1;
+		goto done;
+	}
+
+	for (i = 0; i < 2; i++) {
+		close(btf_fd[i]);
+		btf_fd[i] = -1;
+	}
+
+	/* Test BTF ID is removed from the kernel */
+	btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
+	if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+	close(btf_fd[0]);
+	btf_fd[0] = -1;
+
+	/* The map holds the last ref to BTF and its btf_id */
+	close(map_fd);
+	map_fd = -1;
+	btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
+	if (CHECK(btf_fd[0] != -1, "BTF lingers")) {
+		err = -1;
+		goto done;
+	}
+
+	fprintf(stderr, "OK");
+
+done:
+	if (*btf_log_buf && (err || args.always_log))
+		fprintf(stderr, "\n%s", btf_log_buf);
+
+	free(raw_btf);
+	if (map_fd != -1)
+		close(map_fd);
+	for (i = 0; i < 2; i++) {
+		free(user_btf[i]);
+		if (btf_fd[i] != -1)
+			close(btf_fd[i]);
+	}
+
+	return err;
+}
+
+static int do_test_get_info(unsigned int test_num)
+{
+	const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
+	unsigned int raw_btf_size, user_btf_size, expected_nbytes;
+	uint8_t *raw_btf = NULL, *user_btf = NULL;
+	struct bpf_btf_info info = {};
+	int btf_fd = -1, err, ret;
+	uint32_t info_len;
+
+	fprintf(stderr, "BTF GET_INFO test[%u] (%s): ",
+		test_num, test->descr);
+
+	if (test->special_test)
+		return test->special_test(test_num);
+
+	raw_btf = btf_raw_create(&hdr_tmpl,
+				 test->raw_types,
+				 test->str_sec,
+				 test->str_sec_size,
+				 &raw_btf_size);
+
+	if (!raw_btf)
+		return -1;
+
+	*btf_log_buf = '\0';
+
+	user_btf = malloc(raw_btf_size);
+	if (CHECK(!user_btf, "!user_btf")) {
+		err = -1;
+		goto done;
+	}
+
+	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+			      btf_log_buf, BTF_LOG_BUF_SIZE,
+			      args.always_log);
+	if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	user_btf_size = (int)raw_btf_size + test->btf_size_delta;
+	expected_nbytes = min(raw_btf_size, user_btf_size);
+	if (raw_btf_size > expected_nbytes)
+		memset(user_btf + expected_nbytes, 0xff,
+		       raw_btf_size - expected_nbytes);
+
+	info_len = sizeof(info);
+	info.btf = ptr_to_u64(user_btf);
+	info.btf_size = user_btf_size;
+
+	ret = 0;
+	err = bpf_obj_get_info_by_fd(btf_fd, &info, &info_len);
+	if (CHECK(err || !info.id || info_len != sizeof(info) ||
+		  info.btf_size != raw_btf_size ||
+		  (ret = memcmp(raw_btf, user_btf, expected_nbytes)),
+		  "err:%d errno:%d info.id:%u info_len:%u sizeof(info):%lu raw_btf_size:%u info.btf_size:%u expected_nbytes:%u memcmp:%d",
+		  err, errno, info.id, info_len, sizeof(info),
+		  raw_btf_size, info.btf_size, expected_nbytes, ret)) {
+		err = -1;
+		goto done;
+	}
+
+	while (expected_nbytes < raw_btf_size) {
+		fprintf(stderr, "%u...", expected_nbytes);
+		if (CHECK(user_btf[expected_nbytes++] != 0xff,
+			  "user_btf[%u]:%x != 0xff", expected_nbytes - 1,
+			  user_btf[expected_nbytes - 1])) {
+			err = -1;
+			goto done;
+		}
+	}
+
+	fprintf(stderr, "OK");
+
+done:
+	if (*btf_log_buf && (err || args.always_log))
+		fprintf(stderr, "\n%s", btf_log_buf);
+
+	free(raw_btf);
+	free(user_btf);
+
+	if (btf_fd != -1)
+		close(btf_fd);
+
+	return err;
+}
+
+static int test_get_info(void)
+{
+	unsigned int i;
+	int err = 0;
+
+	if (args.get_info_test_num)
+		return count_result(do_test_get_info(args.get_info_test_num));
+
+	for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++)
+		err |= count_result(do_test_get_info(i));
+
+	return err;
+}
+
+struct btf_file_test {
+	const char *file;
+	bool btf_kv_notfound;
+};
+
+static struct btf_file_test file_tests[] = {
+{
+	.file = "test_btf_haskv.o",
+},
+{
+	.file = "test_btf_nokv.o",
+	.btf_kv_notfound = true,
+},
+};
+
+static int file_has_btf_elf(const char *fn)
+{
+	Elf_Scn *scn = NULL;
+	GElf_Ehdr ehdr;
+	int elf_fd;
+	Elf *elf;
+	int ret;
+
+	if (CHECK(elf_version(EV_CURRENT) == EV_NONE,
+		  "elf_version(EV_CURRENT) == EV_NONE"))
+		return -1;
+
+	elf_fd = open(fn, O_RDONLY);
+	if (CHECK(elf_fd == -1, "open(%s): errno:%d", fn, errno))
+		return -1;
+
+	elf = elf_begin(elf_fd, ELF_C_READ, NULL);
+	if (CHECK(!elf, "elf_begin(%s): %s", fn, elf_errmsg(elf_errno()))) {
+		ret = -1;
+		goto done;
+	}
+
+	if (CHECK(!gelf_getehdr(elf, &ehdr), "!gelf_getehdr(%s)", fn)) {
+		ret = -1;
+		goto done;
+	}
+
+	while ((scn = elf_nextscn(elf, scn))) {
+		const char *sh_name;
+		GElf_Shdr sh;
+
+		if (CHECK(gelf_getshdr(scn, &sh) != &sh,
+			  "file:%s gelf_getshdr != &sh", fn)) {
+			ret = -1;
+			goto done;
+		}
+
+		sh_name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
+		if (!strcmp(sh_name, BTF_ELF_SEC)) {
+			ret = 1;
+			goto done;
+		}
+	}
+
+	ret = 0;
+
+done:
+	close(elf_fd);
+	elf_end(elf);
+	return ret;
+}
+
+static int do_test_file(unsigned int test_num)
+{
+	const struct btf_file_test *test = &file_tests[test_num - 1];
+	struct bpf_object *obj = NULL;
+	struct bpf_program *prog;
+	struct bpf_map *map;
+	int err;
+
+	fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num,
+		test->file);
+
+	err = file_has_btf_elf(test->file);
+	if (err == -1)
+		return err;
+
+	if (err == 0) {
+		fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC);
+		skip_cnt++;
+		return 0;
+	}
+
+	obj = bpf_object__open(test->file);
+	if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
+		return PTR_ERR(obj);
+
+	err = bpf_object__btf_fd(obj);
+	if (CHECK(err == -1, "bpf_object__btf_fd: -1"))
+		goto done;
+
+	prog = bpf_program__next(NULL, obj);
+	if (CHECK(!prog, "Cannot find bpf_prog")) {
+		err = -1;
+		goto done;
+	}
+
+	bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT);
+	err = bpf_object__load(obj);
+	if (CHECK(err < 0, "bpf_object__load: %d", err))
+		goto done;
+
+	map = bpf_object__find_map_by_name(obj, "btf_map");
+	if (CHECK(!map, "btf_map not found")) {
+		err = -1;
+		goto done;
+	}
+
+	err = (bpf_map__btf_key_type_id(map) == 0 || bpf_map__btf_value_type_id(map) == 0)
+		!= test->btf_kv_notfound;
+	if (CHECK(err, "btf_key_type_id:%u btf_value_type_id:%u test->btf_kv_notfound:%u",
+		  bpf_map__btf_key_type_id(map), bpf_map__btf_value_type_id(map),
+		  test->btf_kv_notfound))
+		goto done;
+
+	fprintf(stderr, "OK");
+
+done:
+	bpf_object__close(obj);
+	return err;
+}
+
+static int test_file(void)
+{
+	unsigned int i;
+	int err = 0;
+
+	if (args.file_test_num)
+		return count_result(do_test_file(args.file_test_num));
+
+	for (i = 1; i <= ARRAY_SIZE(file_tests); i++)
+		err |= count_result(do_test_file(i));
+
+	return err;
+}
+
+const char *pprint_enum_str[] = {
+	"ENUM_ZERO",
+	"ENUM_ONE",
+	"ENUM_TWO",
+	"ENUM_THREE",
+};
+
+struct pprint_mapv {
+	uint32_t ui32;
+	uint16_t ui16;
+	/* 2 bytes hole */
+	int32_t si32;
+	uint32_t unused_bits2a:2,
+		bits28:28,
+		unused_bits2b:2;
+	union {
+		uint64_t ui64;
+		uint8_t ui8a[8];
+	};
+	enum {
+		ENUM_ZERO,
+		ENUM_ONE,
+		ENUM_TWO,
+		ENUM_THREE,
+	} aenum;
+};
+
+static struct btf_raw_test pprint_test_template = {
+	.raw_types = {
+		/* unsighed char */			/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1),
+		/* unsigned short */			/* [2] */
+		BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2),
+		/* unsigned int */			/* [3] */
+		BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4),
+		/* int */				/* [4] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		/* unsigned long long */		/* [5] */
+		BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8),
+		/* 2 bits */				/* [6] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 2, 2),
+		/* 28 bits */				/* [7] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 28, 4),
+		/* uint8_t[8] */			/* [8] */
+		BTF_TYPE_ARRAY_ENC(9, 1, 8),
+		/* typedef unsigned char uint8_t */	/* [9] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 1),
+		/* typedef unsigned short uint16_t */	/* [10] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 2),
+		/* typedef unsigned int uint32_t */	/* [11] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 3),
+		/* typedef int int32_t */		/* [12] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 4),
+		/* typedef unsigned long long uint64_t *//* [13] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 5),
+		/* union (anon) */			/* [14] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8),
+		BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */
+		BTF_MEMBER_ENC(NAME_TBD, 8, 0),	/* uint8_t ui8a[8]; */
+		/* enum (anon) */			/* [15] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4),
+		BTF_ENUM_ENC(NAME_TBD, 0),
+		BTF_ENUM_ENC(NAME_TBD, 1),
+		BTF_ENUM_ENC(NAME_TBD, 2),
+		BTF_ENUM_ENC(NAME_TBD, 3),
+		/* struct pprint_mapv */		/* [16] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 8), 32),
+		BTF_MEMBER_ENC(NAME_TBD, 11, 0),	/* uint32_t ui32 */
+		BTF_MEMBER_ENC(NAME_TBD, 10, 32),	/* uint16_t ui16 */
+		BTF_MEMBER_ENC(NAME_TBD, 12, 64),	/* int32_t si32 */
+		BTF_MEMBER_ENC(NAME_TBD, 6, 96),	/* unused_bits2a */
+		BTF_MEMBER_ENC(NAME_TBD, 7, 98),	/* bits28 */
+		BTF_MEMBER_ENC(NAME_TBD, 6, 126),	/* unused_bits2b */
+		BTF_MEMBER_ENC(0, 14, 128),		/* union (anon) */
+		BTF_MEMBER_ENC(NAME_TBD, 15, 192),	/* aenum */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum",
+	.str_sec_size = sizeof("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum"),
+	.key_size = sizeof(unsigned int),
+	.value_size = sizeof(struct pprint_mapv),
+	.key_type_id = 3,	/* unsigned int */
+	.value_type_id = 16,	/* struct pprint_mapv */
+	.max_entries = 128 * 1024,
+};
+
+static struct btf_pprint_test_meta {
+	const char *descr;
+	enum bpf_map_type map_type;
+	const char *map_name;
+	bool ordered_map;
+	bool lossless_map;
+	bool percpu_map;
+} pprint_tests_meta[] = {
+{
+	.descr = "BTF pretty print array",
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "pprint_test_array",
+	.ordered_map = true,
+	.lossless_map = true,
+	.percpu_map = false,
+},
+
+{
+	.descr = "BTF pretty print hash",
+	.map_type = BPF_MAP_TYPE_HASH,
+	.map_name = "pprint_test_hash",
+	.ordered_map = false,
+	.lossless_map = true,
+	.percpu_map = false,
+},
+
+{
+	.descr = "BTF pretty print lru hash",
+	.map_type = BPF_MAP_TYPE_LRU_HASH,
+	.map_name = "pprint_test_lru_hash",
+	.ordered_map = false,
+	.lossless_map = false,
+	.percpu_map = false,
+},
+
+{
+	.descr = "BTF pretty print percpu array",
+	.map_type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.map_name = "pprint_test_percpu_array",
+	.ordered_map = true,
+	.lossless_map = true,
+	.percpu_map = true,
+},
+
+{
+	.descr = "BTF pretty print percpu hash",
+	.map_type = BPF_MAP_TYPE_PERCPU_HASH,
+	.map_name = "pprint_test_percpu_hash",
+	.ordered_map = false,
+	.lossless_map = true,
+	.percpu_map = true,
+},
+
+{
+	.descr = "BTF pretty print lru percpu hash",
+	.map_type = BPF_MAP_TYPE_LRU_PERCPU_HASH,
+	.map_name = "pprint_test_lru_percpu_hash",
+	.ordered_map = false,
+	.lossless_map = false,
+	.percpu_map = true,
+},
+
+};
+
+
+static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i,
+			    int num_cpus, int rounded_value_size)
+{
+	int cpu;
+
+	for (cpu = 0; cpu < num_cpus; cpu++) {
+		v->ui32 = i + cpu;
+		v->si32 = -i;
+		v->unused_bits2a = 3;
+		v->bits28 = i;
+		v->unused_bits2b = 3;
+		v->ui64 = i;
+		v->aenum = i & 0x03;
+		v = (void *)v + rounded_value_size;
+	}
+}
+
+static int check_line(const char *expected_line, int nexpected_line,
+		      int expected_line_len, const char *line)
+{
+	if (CHECK(nexpected_line == expected_line_len,
+		  "expected_line is too long"))
+		return -1;
+
+	if (strcmp(expected_line, line)) {
+		fprintf(stderr, "unexpected pprint output\n");
+		fprintf(stderr, "expected: %s", expected_line);
+		fprintf(stderr, "    read: %s", line);
+		return -1;
+	}
+
+	return 0;
+}
+
+
+static int do_test_pprint(void)
+{
+	const struct btf_raw_test *test = &pprint_test_template;
+	struct bpf_create_map_attr create_attr = {};
+	bool ordered_map, lossless_map, percpu_map;
+	int err, ret, num_cpus, rounded_value_size;
+	struct pprint_mapv *mapv = NULL;
+	unsigned int key, nr_read_elems;
+	int map_fd = -1, btf_fd = -1;
+	unsigned int raw_btf_size;
+	char expected_line[255];
+	FILE *pin_file = NULL;
+	char pin_path[255];
+	size_t line_len = 0;
+	char *line = NULL;
+	uint8_t *raw_btf;
+	ssize_t nread;
+
+	fprintf(stderr, "%s......", test->descr);
+	raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
+				 test->str_sec, test->str_sec_size,
+				 &raw_btf_size);
+
+	if (!raw_btf)
+		return -1;
+
+	*btf_log_buf = '\0';
+	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+			      btf_log_buf, BTF_LOG_BUF_SIZE,
+			      args.always_log);
+	free(raw_btf);
+
+	if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	create_attr.name = test->map_name;
+	create_attr.map_type = test->map_type;
+	create_attr.key_size = test->key_size;
+	create_attr.value_size = test->value_size;
+	create_attr.max_entries = test->max_entries;
+	create_attr.btf_fd = btf_fd;
+	create_attr.btf_key_type_id = test->key_type_id;
+	create_attr.btf_value_type_id = test->value_type_id;
+
+	map_fd = bpf_create_map_xattr(&create_attr);
+	if (CHECK(map_fd == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	ret = snprintf(pin_path, sizeof(pin_path), "%s/%s",
+		       "/sys/fs/bpf", test->map_name);
+
+	if (CHECK(ret == sizeof(pin_path), "pin_path %s/%s is too long",
+		  "/sys/fs/bpf", test->map_name)) {
+		err = -1;
+		goto done;
+	}
+
+	err = bpf_obj_pin(map_fd, pin_path);
+	if (CHECK(err, "bpf_obj_pin(%s): errno:%d.", pin_path, errno))
+		goto done;
+
+	percpu_map = test->percpu_map;
+	num_cpus = percpu_map ? bpf_num_possible_cpus() : 1;
+	rounded_value_size = round_up(sizeof(struct pprint_mapv), 8);
+	mapv = calloc(num_cpus, rounded_value_size);
+	if (CHECK(!mapv, "mapv allocation failure")) {
+		err = -1;
+		goto done;
+	}
+
+	for (key = 0; key < test->max_entries; key++) {
+		set_pprint_mapv(mapv, key, num_cpus, rounded_value_size);
+		bpf_map_update_elem(map_fd, &key, mapv, 0);
+	}
+
+	pin_file = fopen(pin_path, "r");
+	if (CHECK(!pin_file, "fopen(%s): errno:%d", pin_path, errno)) {
+		err = -1;
+		goto done;
+	}
+
+	/* Skip lines start with '#' */
+	while ((nread = getline(&line, &line_len, pin_file)) > 0 &&
+	       *line == '#')
+		;
+
+	if (CHECK(nread <= 0, "Unexpected EOF")) {
+		err = -1;
+		goto done;
+	}
+
+	nr_read_elems = 0;
+	ordered_map = test->ordered_map;
+	lossless_map = test->lossless_map;
+	do {
+		struct pprint_mapv *cmapv;
+		ssize_t nexpected_line;
+		unsigned int next_key;
+		int cpu;
+
+		next_key = ordered_map ? nr_read_elems : atoi(line);
+		set_pprint_mapv(mapv, next_key, num_cpus, rounded_value_size);
+		cmapv = mapv;
+
+		for (cpu = 0; cpu < num_cpus; cpu++) {
+			if (percpu_map) {
+				/* for percpu map, the format looks like:
+				 * <key>: {
+				 *	cpu0: <value_on_cpu0>
+				 *	cpu1: <value_on_cpu1>
+				 *	...
+				 *	cpun: <value_on_cpun>
+				 * }
+				 *
+				 * let us verify the line containing the key here.
+				 */
+				if (cpu == 0) {
+					nexpected_line = snprintf(expected_line,
+								  sizeof(expected_line),
+								  "%u: {\n",
+								  next_key);
+
+					err = check_line(expected_line, nexpected_line,
+							 sizeof(expected_line), line);
+					if (err == -1)
+						goto done;
+				}
+
+				/* read value@cpu */
+				nread = getline(&line, &line_len, pin_file);
+				if (nread < 0)
+					break;
+			}
+
+			nexpected_line = snprintf(expected_line, sizeof(expected_line),
+						  "%s%u: {%u,0,%d,0x%x,0x%x,0x%x,"
+						  "{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n",
+						  percpu_map ? "\tcpu" : "",
+						  percpu_map ? cpu : next_key,
+						  cmapv->ui32, cmapv->si32,
+						  cmapv->unused_bits2a,
+						  cmapv->bits28,
+						  cmapv->unused_bits2b,
+						  cmapv->ui64,
+						  cmapv->ui8a[0], cmapv->ui8a[1],
+						  cmapv->ui8a[2], cmapv->ui8a[3],
+						  cmapv->ui8a[4], cmapv->ui8a[5],
+						  cmapv->ui8a[6], cmapv->ui8a[7],
+						  pprint_enum_str[cmapv->aenum]);
+
+			err = check_line(expected_line, nexpected_line,
+					 sizeof(expected_line), line);
+			if (err == -1)
+				goto done;
+
+			cmapv = (void *)cmapv + rounded_value_size;
+		}
+
+		if (percpu_map) {
+			/* skip the last bracket for the percpu map */
+			nread = getline(&line, &line_len, pin_file);
+			if (nread < 0)
+				break;
+		}
+
+		nread = getline(&line, &line_len, pin_file);
+	} while (++nr_read_elems < test->max_entries && nread > 0);
+
+	if (lossless_map &&
+	    CHECK(nr_read_elems < test->max_entries,
+		  "Unexpected EOF. nr_read_elems:%u test->max_entries:%u",
+		  nr_read_elems, test->max_entries)) {
+		err = -1;
+		goto done;
+	}
+
+	if (CHECK(nread > 0, "Unexpected extra pprint output: %s", line)) {
+		err = -1;
+		goto done;
+	}
+
+	err = 0;
+
+done:
+	if (mapv)
+		free(mapv);
+	if (!err)
+		fprintf(stderr, "OK");
+	if (*btf_log_buf && (err || args.always_log))
+		fprintf(stderr, "\n%s", btf_log_buf);
+	if (btf_fd != -1)
+		close(btf_fd);
+	if (map_fd != -1)
+		close(map_fd);
+	if (pin_file)
+		fclose(pin_file);
+	unlink(pin_path);
+	free(line);
+
+	return err;
+}
+
+static int test_pprint(void)
+{
+	unsigned int i;
+	int err = 0;
+
+	for (i = 0; i < ARRAY_SIZE(pprint_tests_meta); i++) {
+		pprint_test_template.descr = pprint_tests_meta[i].descr;
+		pprint_test_template.map_type = pprint_tests_meta[i].map_type;
+		pprint_test_template.map_name = pprint_tests_meta[i].map_name;
+		pprint_test_template.ordered_map = pprint_tests_meta[i].ordered_map;
+		pprint_test_template.lossless_map = pprint_tests_meta[i].lossless_map;
+		pprint_test_template.percpu_map = pprint_tests_meta[i].percpu_map;
+
+		err |= count_result(do_test_pprint());
+	}
+
+	return err;
+}
+
+static void usage(const char *cmd)
+{
+	fprintf(stderr, "Usage: %s [-l] [[-r test_num (1 - %zu)] | [-g test_num (1 - %zu)] | [-f test_num (1 - %zu)] | [-p]]\n",
+		cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests),
+		ARRAY_SIZE(file_tests));
+}
+
+static int parse_args(int argc, char **argv)
+{
+	const char *optstr = "lpf:r:g:";
+	int opt;
+
+	while ((opt = getopt(argc, argv, optstr)) != -1) {
+		switch (opt) {
+		case 'l':
+			args.always_log = true;
+			break;
+		case 'f':
+			args.file_test_num = atoi(optarg);
+			args.file_test = true;
+			break;
+		case 'r':
+			args.raw_test_num = atoi(optarg);
+			args.raw_test = true;
+			break;
+		case 'g':
+			args.get_info_test_num = atoi(optarg);
+			args.get_info_test = true;
+			break;
+		case 'p':
+			args.pprint_test = true;
+			break;
+		case 'h':
+			usage(argv[0]);
+			exit(0);
+		default:
+				usage(argv[0]);
+				return -1;
+		}
+	}
+
+	if (args.raw_test_num &&
+	    (args.raw_test_num < 1 ||
+	     args.raw_test_num > ARRAY_SIZE(raw_tests))) {
+		fprintf(stderr, "BTF raw test number must be [1 - %zu]\n",
+			ARRAY_SIZE(raw_tests));
+		return -1;
+	}
+
+	if (args.file_test_num &&
+	    (args.file_test_num < 1 ||
+	     args.file_test_num > ARRAY_SIZE(file_tests))) {
+		fprintf(stderr, "BTF file test number must be [1 - %zu]\n",
+			ARRAY_SIZE(file_tests));
+		return -1;
+	}
+
+	if (args.get_info_test_num &&
+	    (args.get_info_test_num < 1 ||
+	     args.get_info_test_num > ARRAY_SIZE(get_info_tests))) {
+		fprintf(stderr, "BTF get info test number must be [1 - %zu]\n",
+			ARRAY_SIZE(get_info_tests));
+		return -1;
+	}
+
+	return 0;
+}
+
+static void print_summary(void)
+{
+	fprintf(stderr, "PASS:%u SKIP:%u FAIL:%u\n",
+		pass_cnt - skip_cnt, skip_cnt, error_cnt);
+}
+
+int main(int argc, char **argv)
+{
+	int err = 0;
+
+	err = parse_args(argc, argv);
+	if (err)
+		return err;
+
+	if (args.always_log)
+		libbpf_set_print(__base_pr, __base_pr, __base_pr);
+
+	if (args.raw_test)
+		err |= test_raw();
+
+	if (args.get_info_test)
+		err |= test_get_info();
+
+	if (args.file_test)
+		err |= test_file();
+
+	if (args.pprint_test)
+		err |= test_pprint();
+
+	if (args.raw_test || args.get_info_test || args.file_test ||
+	    args.pprint_test)
+		goto done;
+
+	err |= test_raw();
+	err |= test_get_info();
+	err |= test_file();
+
+done:
+	print_summary();
+	return err;
+}
diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c b/tools/testing/selftests/bpf/test_btf_haskv.c
new file mode 100644
index 000000000000..b21b876f475d
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf_haskv.c
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct ipv_counts {
+	unsigned int v4;
+	unsigned int v6;
+};
+
+struct bpf_map_def SEC("maps") btf_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(struct ipv_counts),
+	.max_entries = 4,
+};
+
+BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
+
+struct dummy_tracepoint_args {
+	unsigned long long pad;
+	struct sock *sock;
+};
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+	struct ipv_counts *counts;
+	int key = 0;
+
+	if (!arg->sock)
+		return 0;
+
+	counts = bpf_map_lookup_elem(&btf_map, &key);
+	if (!counts)
+		return 0;
+
+	counts->v6++;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_btf_nokv.c b/tools/testing/selftests/bpf/test_btf_nokv.c
new file mode 100644
index 000000000000..0ed8e088eebf
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf_nokv.c
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct ipv_counts {
+	unsigned int v4;
+	unsigned int v6;
+};
+
+struct bpf_map_def SEC("maps") btf_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(struct ipv_counts),
+	.max_entries = 4,
+};
+
+struct dummy_tracepoint_args {
+	unsigned long long pad;
+	struct sock *sock;
+};
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+	struct ipv_counts *counts;
+	int key = 0;
+
+	if (!arg->sock)
+		return 0;
+
+	counts = bpf_map_lookup_elem(&btf_map, &key);
+	if (!counts)
+		return 0;
+
+	counts->v6++;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
new file mode 100644
index 000000000000..f44834155f25
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <assert.h>
+#include <bpf/bpf.h>
+#include <linux/filter.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/sysinfo.h>
+
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+#define TEST_CGROUP "/test-bpf-cgroup-storage-buf/"
+
+int main(int argc, char **argv)
+{
+	struct bpf_insn prog[] = {
+		BPF_LD_MAP_FD(BPF_REG_1, 0), /* percpu map fd */
+		BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_get_local_storage),
+		BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1),
+		BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
+
+		BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */
+		BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_get_local_storage),
+		BPF_MOV64_IMM(BPF_REG_1, 1),
+		BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+		BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+		BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x1),
+		BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+		BPF_EXIT_INSN(),
+	};
+	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+	int error = EXIT_FAILURE;
+	int map_fd, percpu_map_fd, prog_fd, cgroup_fd;
+	struct bpf_cgroup_storage_key key;
+	unsigned long long value;
+	unsigned long long *percpu_value;
+	int cpu, nproc;
+
+	nproc = get_nprocs_conf();
+	percpu_value = malloc(sizeof(*percpu_value) * nproc);
+	if (!percpu_value) {
+		printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
+		goto err;
+	}
+
+	map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key),
+				sizeof(value), 0, 0);
+	if (map_fd < 0) {
+		printf("Failed to create map: %s\n", strerror(errno));
+		goto out;
+	}
+
+	percpu_map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+				       sizeof(key), sizeof(value), 0, 0);
+	if (percpu_map_fd < 0) {
+		printf("Failed to create map: %s\n", strerror(errno));
+		goto out;
+	}
+
+	prog[0].imm = percpu_map_fd;
+	prog[7].imm = map_fd;
+	prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+				   prog, insns_cnt, "GPL", 0,
+				   bpf_log_buf, BPF_LOG_BUF_SIZE);
+	if (prog_fd < 0) {
+		printf("Failed to load bpf program: %s\n", bpf_log_buf);
+		goto out;
+	}
+
+	if (setup_cgroup_environment()) {
+		printf("Failed to setup cgroup environment\n");
+		goto err;
+	}
+
+	/* Create a cgroup, get fd, and join it */
+	cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+	if (!cgroup_fd) {
+		printf("Failed to create test cgroup\n");
+		goto err;
+	}
+
+	if (join_cgroup(TEST_CGROUP)) {
+		printf("Failed to join cgroup\n");
+		goto err;
+	}
+
+	/* Attach the bpf program */
+	if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
+		printf("Failed to attach bpf program\n");
+		goto err;
+	}
+
+	if (bpf_map_get_next_key(map_fd, NULL, &key)) {
+		printf("Failed to get the first key in cgroup storage\n");
+		goto err;
+	}
+
+	if (bpf_map_lookup_elem(map_fd, &key, &value)) {
+		printf("Failed to lookup cgroup storage 0\n");
+		goto err;
+	}
+
+	for (cpu = 0; cpu < nproc; cpu++)
+		percpu_value[cpu] = 1000;
+
+	if (bpf_map_update_elem(percpu_map_fd, &key, percpu_value, 0)) {
+		printf("Failed to update the data in the cgroup storage\n");
+		goto err;
+	}
+
+	/* Every second packet should be dropped */
+	assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+	assert(system("ping localhost -c 1 -W 1 -q > /dev/null"));
+	assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+
+	/* Check the counter in the cgroup local storage */
+	if (bpf_map_lookup_elem(map_fd, &key, &value)) {
+		printf("Failed to lookup cgroup storage\n");
+		goto err;
+	}
+
+	if (value != 3) {
+		printf("Unexpected data in the cgroup storage: %llu\n", value);
+		goto err;
+	}
+
+	/* Bump the counter in the cgroup local storage */
+	value++;
+	if (bpf_map_update_elem(map_fd, &key, &value, 0)) {
+		printf("Failed to update the data in the cgroup storage\n");
+		goto err;
+	}
+
+	/* Every second packet should be dropped */
+	assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+	assert(system("ping localhost -c 1 -W 1 -q > /dev/null"));
+	assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+
+	/* Check the final value of the counter in the cgroup local storage */
+	if (bpf_map_lookup_elem(map_fd, &key, &value)) {
+		printf("Failed to lookup the cgroup storage\n");
+		goto err;
+	}
+
+	if (value != 7) {
+		printf("Unexpected data in the cgroup storage: %llu\n", value);
+		goto err;
+	}
+
+	/* Check the final value of the counter in the percpu local storage */
+
+	for (cpu = 0; cpu < nproc; cpu++)
+		percpu_value[cpu] = 0;
+
+	if (bpf_map_lookup_elem(percpu_map_fd, &key, percpu_value)) {
+		printf("Failed to lookup the per-cpu cgroup storage\n");
+		goto err;
+	}
+
+	value = 0;
+	for (cpu = 0; cpu < nproc; cpu++)
+		value += percpu_value[cpu];
+
+	if (value != nproc * 1000 + 6) {
+		printf("Unexpected data in the per-cpu cgroup storage\n");
+		goto err;
+	}
+
+	error = 0;
+	printf("test_cgroup_storage:PASS\n");
+
+err:
+	cleanup_cgroup_environment();
+	free(percpu_value);
+
+out:
+	return error;
+}
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.c b/tools/testing/selftests/bpf/test_flow_dissector.c
new file mode 100644
index 000000000000..12b784afba31
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_flow_dissector.c
@@ -0,0 +1,782 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Inject packets with all sorts of encapsulation into the kernel.
+ *
+ * IPv4/IPv6	outer layer 3
+ * GRE/GUE/BARE outer layer 4, where bare is IPIP/SIT/IPv4-in-IPv6/..
+ * IPv4/IPv6    inner layer 3
+ */
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <arpa/inet.h>
+#include <asm/byteorder.h>
+#include <error.h>
+#include <errno.h>
+#include <linux/if_packet.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <netinet/ip.h>
+#include <netinet/in.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define CFG_PORT_INNER	8000
+
+/* Add some protocol definitions that do not exist in userspace */
+
+struct grehdr {
+	uint16_t unused;
+	uint16_t protocol;
+} __attribute__((packed));
+
+struct guehdr {
+	union {
+		struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+			__u8	hlen:5,
+				control:1,
+				version:2;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+			__u8	version:2,
+				control:1,
+				hlen:5;
+#else
+#error  "Please fix <asm/byteorder.h>"
+#endif
+			__u8	proto_ctype;
+			__be16	flags;
+		};
+		__be32	word;
+	};
+};
+
+static uint8_t	cfg_dsfield_inner;
+static uint8_t	cfg_dsfield_outer;
+static uint8_t	cfg_encap_proto;
+static bool	cfg_expect_failure = false;
+static int	cfg_l3_extra = AF_UNSPEC;	/* optional SIT prefix */
+static int	cfg_l3_inner = AF_UNSPEC;
+static int	cfg_l3_outer = AF_UNSPEC;
+static int	cfg_num_pkt = 10;
+static int	cfg_num_secs = 0;
+static char	cfg_payload_char = 'a';
+static int	cfg_payload_len = 100;
+static int	cfg_port_gue = 6080;
+static bool	cfg_only_rx;
+static bool	cfg_only_tx;
+static int	cfg_src_port = 9;
+
+static char	buf[ETH_DATA_LEN];
+
+#define INIT_ADDR4(name, addr4, port)				\
+	static struct sockaddr_in name = {			\
+		.sin_family = AF_INET,				\
+		.sin_port = __constant_htons(port),		\
+		.sin_addr.s_addr = __constant_htonl(addr4),	\
+	};
+
+#define INIT_ADDR6(name, addr6, port)				\
+	static struct sockaddr_in6 name = {			\
+		.sin6_family = AF_INET6,			\
+		.sin6_port = __constant_htons(port),		\
+		.sin6_addr = addr6,				\
+	};
+
+INIT_ADDR4(in_daddr4, INADDR_LOOPBACK, CFG_PORT_INNER)
+INIT_ADDR4(in_saddr4, INADDR_LOOPBACK + 2, 0)
+INIT_ADDR4(out_daddr4, INADDR_LOOPBACK, 0)
+INIT_ADDR4(out_saddr4, INADDR_LOOPBACK + 1, 0)
+INIT_ADDR4(extra_daddr4, INADDR_LOOPBACK, 0)
+INIT_ADDR4(extra_saddr4, INADDR_LOOPBACK + 1, 0)
+
+INIT_ADDR6(in_daddr6, IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER)
+INIT_ADDR6(in_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(out_daddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(out_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(extra_daddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(extra_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
+
+static unsigned long util_gettime(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void util_printaddr(const char *msg, struct sockaddr *addr)
+{
+	unsigned long off = 0;
+	char nbuf[INET6_ADDRSTRLEN];
+
+	switch (addr->sa_family) {
+	case PF_INET:
+		off = __builtin_offsetof(struct sockaddr_in, sin_addr);
+		break;
+	case PF_INET6:
+		off = __builtin_offsetof(struct sockaddr_in6, sin6_addr);
+		break;
+	default:
+		error(1, 0, "printaddr: unsupported family %u\n",
+		      addr->sa_family);
+	}
+
+	if (!inet_ntop(addr->sa_family, ((void *) addr) + off, nbuf,
+		       sizeof(nbuf)))
+		error(1, errno, "inet_ntop");
+
+	fprintf(stderr, "%s: %s\n", msg, nbuf);
+}
+
+static unsigned long add_csum_hword(const uint16_t *start, int num_u16)
+{
+	unsigned long sum = 0;
+	int i;
+
+	for (i = 0; i < num_u16; i++)
+		sum += start[i];
+
+	return sum;
+}
+
+static uint16_t build_ip_csum(const uint16_t *start, int num_u16,
+			      unsigned long sum)
+{
+	sum += add_csum_hword(start, num_u16);
+
+	while (sum >> 16)
+		sum = (sum & 0xffff) + (sum >> 16);
+
+	return ~sum;
+}
+
+static void build_ipv4_header(void *header, uint8_t proto,
+			      uint32_t src, uint32_t dst,
+			      int payload_len, uint8_t tos)
+{
+	struct iphdr *iph = header;
+
+	iph->ihl = 5;
+	iph->version = 4;
+	iph->tos = tos;
+	iph->ttl = 8;
+	iph->tot_len = htons(sizeof(*iph) + payload_len);
+	iph->id = htons(1337);
+	iph->protocol = proto;
+	iph->saddr = src;
+	iph->daddr = dst;
+	iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0);
+}
+
+static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield)
+{
+	uint16_t val, *ptr = (uint16_t *)ip6h;
+
+	val = ntohs(*ptr);
+	val &= 0xF00F;
+	val |= ((uint16_t) dsfield) << 4;
+	*ptr = htons(val);
+}
+
+static void build_ipv6_header(void *header, uint8_t proto,
+			      struct sockaddr_in6 *src,
+			      struct sockaddr_in6 *dst,
+			      int payload_len, uint8_t dsfield)
+{
+	struct ipv6hdr *ip6h = header;
+
+	ip6h->version = 6;
+	ip6h->payload_len = htons(payload_len);
+	ip6h->nexthdr = proto;
+	ip6h->hop_limit = 8;
+	ipv6_set_dsfield(ip6h, dsfield);
+
+	memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr));
+	memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr));
+}
+
+static uint16_t build_udp_v4_csum(const struct iphdr *iph,
+				  const struct udphdr *udph,
+				  int num_words)
+{
+	unsigned long pseudo_sum;
+	int num_u16 = sizeof(iph->saddr);	/* halfwords: twice byte len */
+
+	pseudo_sum = add_csum_hword((void *) &iph->saddr, num_u16);
+	pseudo_sum += htons(IPPROTO_UDP);
+	pseudo_sum += udph->len;
+	return build_ip_csum((void *) udph, num_words, pseudo_sum);
+}
+
+static uint16_t build_udp_v6_csum(const struct ipv6hdr *ip6h,
+				  const struct udphdr *udph,
+				  int num_words)
+{
+	unsigned long pseudo_sum;
+	int num_u16 = sizeof(ip6h->saddr);	/* halfwords: twice byte len */
+
+	pseudo_sum = add_csum_hword((void *) &ip6h->saddr, num_u16);
+	pseudo_sum += htons(ip6h->nexthdr);
+	pseudo_sum += ip6h->payload_len;
+	return build_ip_csum((void *) udph, num_words, pseudo_sum);
+}
+
+static void build_udp_header(void *header, int payload_len,
+			     uint16_t dport, int family)
+{
+	struct udphdr *udph = header;
+	int len = sizeof(*udph) + payload_len;
+
+	udph->source = htons(cfg_src_port);
+	udph->dest = htons(dport);
+	udph->len = htons(len);
+	udph->check = 0;
+	if (family == AF_INET)
+		udph->check = build_udp_v4_csum(header - sizeof(struct iphdr),
+						udph, len >> 1);
+	else
+		udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr),
+						udph, len >> 1);
+}
+
+static void build_gue_header(void *header, uint8_t proto)
+{
+	struct guehdr *gueh = header;
+
+	gueh->proto_ctype = proto;
+}
+
+static void build_gre_header(void *header, uint16_t proto)
+{
+	struct grehdr *greh = header;
+
+	greh->protocol = htons(proto);
+}
+
+static int l3_length(int family)
+{
+	if (family == AF_INET)
+		return sizeof(struct iphdr);
+	else
+		return sizeof(struct ipv6hdr);
+}
+
+static int build_packet(void)
+{
+	int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0;
+	int el3_len = 0;
+
+	if (cfg_l3_extra)
+		el3_len = l3_length(cfg_l3_extra);
+
+	/* calculate header offsets */
+	if (cfg_encap_proto) {
+		ol3_len = l3_length(cfg_l3_outer);
+
+		if (cfg_encap_proto == IPPROTO_GRE)
+			ol4_len = sizeof(struct grehdr);
+		else if (cfg_encap_proto == IPPROTO_UDP)
+			ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr);
+	}
+
+	il3_len = l3_length(cfg_l3_inner);
+	il4_len = sizeof(struct udphdr);
+
+	if (el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len >=
+	    sizeof(buf))
+		error(1, 0, "packet too large\n");
+
+	/*
+	 * Fill packet from inside out, to calculate correct checksums.
+	 * But create ip before udp headers, as udp uses ip for pseudo-sum.
+	 */
+	memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len,
+	       cfg_payload_char, cfg_payload_len);
+
+	/* add zero byte for udp csum padding */
+	buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len] = 0;
+
+	switch (cfg_l3_inner) {
+	case PF_INET:
+		build_ipv4_header(buf + el3_len + ol3_len + ol4_len,
+				  IPPROTO_UDP,
+				  in_saddr4.sin_addr.s_addr,
+				  in_daddr4.sin_addr.s_addr,
+				  il4_len + cfg_payload_len,
+				  cfg_dsfield_inner);
+		break;
+	case PF_INET6:
+		build_ipv6_header(buf + el3_len + ol3_len + ol4_len,
+				  IPPROTO_UDP,
+				  &in_saddr6, &in_daddr6,
+				  il4_len + cfg_payload_len,
+				  cfg_dsfield_inner);
+		break;
+	}
+
+	build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len,
+			 cfg_payload_len, CFG_PORT_INNER, cfg_l3_inner);
+
+	if (!cfg_encap_proto)
+		return il3_len + il4_len + cfg_payload_len;
+
+	switch (cfg_l3_outer) {
+	case PF_INET:
+		build_ipv4_header(buf + el3_len, cfg_encap_proto,
+				  out_saddr4.sin_addr.s_addr,
+				  out_daddr4.sin_addr.s_addr,
+				  ol4_len + il3_len + il4_len + cfg_payload_len,
+				  cfg_dsfield_outer);
+		break;
+	case PF_INET6:
+		build_ipv6_header(buf + el3_len, cfg_encap_proto,
+				  &out_saddr6, &out_daddr6,
+				  ol4_len + il3_len + il4_len + cfg_payload_len,
+				  cfg_dsfield_outer);
+		break;
+	}
+
+	switch (cfg_encap_proto) {
+	case IPPROTO_UDP:
+		build_gue_header(buf + el3_len + ol3_len + ol4_len -
+				 sizeof(struct guehdr),
+				 cfg_l3_inner == PF_INET ? IPPROTO_IPIP
+							 : IPPROTO_IPV6);
+		build_udp_header(buf + el3_len + ol3_len,
+				 sizeof(struct guehdr) + il3_len + il4_len +
+				 cfg_payload_len,
+				 cfg_port_gue, cfg_l3_outer);
+		break;
+	case IPPROTO_GRE:
+		build_gre_header(buf + el3_len + ol3_len,
+				 cfg_l3_inner == PF_INET ? ETH_P_IP
+							 : ETH_P_IPV6);
+		break;
+	}
+
+	switch (cfg_l3_extra) {
+	case PF_INET:
+		build_ipv4_header(buf,
+				  cfg_l3_outer == PF_INET ? IPPROTO_IPIP
+							  : IPPROTO_IPV6,
+				  extra_saddr4.sin_addr.s_addr,
+				  extra_daddr4.sin_addr.s_addr,
+				  ol3_len + ol4_len + il3_len + il4_len +
+				  cfg_payload_len, 0);
+		break;
+	case PF_INET6:
+		build_ipv6_header(buf,
+				  cfg_l3_outer == PF_INET ? IPPROTO_IPIP
+							  : IPPROTO_IPV6,
+				  &extra_saddr6, &extra_daddr6,
+				  ol3_len + ol4_len + il3_len + il4_len +
+				  cfg_payload_len, 0);
+		break;
+	}
+
+	return el3_len + ol3_len + ol4_len + il3_len + il4_len +
+	       cfg_payload_len;
+}
+
+/* sender transmits encapsulated over RAW or unencap'd over UDP */
+static int setup_tx(void)
+{
+	int family, fd, ret;
+
+	if (cfg_l3_extra)
+		family = cfg_l3_extra;
+	else if (cfg_l3_outer)
+		family = cfg_l3_outer;
+	else
+		family = cfg_l3_inner;
+
+	fd = socket(family, SOCK_RAW, IPPROTO_RAW);
+	if (fd == -1)
+		error(1, errno, "socket tx");
+
+	if (cfg_l3_extra) {
+		if (cfg_l3_extra == PF_INET)
+			ret = connect(fd, (void *) &extra_daddr4,
+				      sizeof(extra_daddr4));
+		else
+			ret = connect(fd, (void *) &extra_daddr6,
+				      sizeof(extra_daddr6));
+		if (ret)
+			error(1, errno, "connect tx");
+	} else if (cfg_l3_outer) {
+		/* connect to destination if not encapsulated */
+		if (cfg_l3_outer == PF_INET)
+			ret = connect(fd, (void *) &out_daddr4,
+				      sizeof(out_daddr4));
+		else
+			ret = connect(fd, (void *) &out_daddr6,
+				      sizeof(out_daddr6));
+		if (ret)
+			error(1, errno, "connect tx");
+	} else {
+		/* otherwise using loopback */
+		if (cfg_l3_inner == PF_INET)
+			ret = connect(fd, (void *) &in_daddr4,
+				      sizeof(in_daddr4));
+		else
+			ret = connect(fd, (void *) &in_daddr6,
+				      sizeof(in_daddr6));
+		if (ret)
+			error(1, errno, "connect tx");
+	}
+
+	return fd;
+}
+
+/* receiver reads unencapsulated UDP */
+static int setup_rx(void)
+{
+	int fd, ret;
+
+	fd = socket(cfg_l3_inner, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket rx");
+
+	if (cfg_l3_inner == PF_INET)
+		ret = bind(fd, (void *) &in_daddr4, sizeof(in_daddr4));
+	else
+		ret = bind(fd, (void *) &in_daddr6, sizeof(in_daddr6));
+	if (ret)
+		error(1, errno, "bind rx");
+
+	return fd;
+}
+
+static int do_tx(int fd, const char *pkt, int len)
+{
+	int ret;
+
+	ret = write(fd, pkt, len);
+	if (ret == -1)
+		error(1, errno, "send");
+	if (ret != len)
+		error(1, errno, "send: len (%d < %d)\n", ret, len);
+
+	return 1;
+}
+
+static int do_poll(int fd, short events, int timeout)
+{
+	struct pollfd pfd;
+	int ret;
+
+	pfd.fd = fd;
+	pfd.events = events;
+
+	ret = poll(&pfd, 1, timeout);
+	if (ret == -1)
+		error(1, errno, "poll");
+	if (ret && !(pfd.revents & POLLIN))
+		error(1, errno, "poll: unexpected event 0x%x\n", pfd.revents);
+
+	return ret;
+}
+
+static int do_rx(int fd)
+{
+	char rbuf;
+	int ret, num = 0;
+
+	while (1) {
+		ret = recv(fd, &rbuf, 1, MSG_DONTWAIT);
+		if (ret == -1 && errno == EAGAIN)
+			break;
+		if (ret == -1)
+			error(1, errno, "recv");
+		if (rbuf != cfg_payload_char)
+			error(1, 0, "recv: payload mismatch");
+		num++;
+	};
+
+	return num;
+}
+
+static int do_main(void)
+{
+	unsigned long tstop, treport, tcur;
+	int fdt = -1, fdr = -1, len, tx = 0, rx = 0;
+
+	if (!cfg_only_tx)
+		fdr = setup_rx();
+	if (!cfg_only_rx)
+		fdt = setup_tx();
+
+	len = build_packet();
+
+	tcur = util_gettime();
+	treport = tcur + 1000;
+	tstop = tcur + (cfg_num_secs * 1000);
+
+	while (1) {
+		if (!cfg_only_rx)
+			tx += do_tx(fdt, buf, len);
+
+		if (!cfg_only_tx)
+			rx += do_rx(fdr);
+
+		if (cfg_num_secs) {
+			tcur = util_gettime();
+			if (tcur >= tstop)
+				break;
+			if (tcur >= treport) {
+				fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx);
+				tx = 0;
+				rx = 0;
+				treport = tcur + 1000;
+			}
+		} else {
+			if (tx == cfg_num_pkt)
+				break;
+		}
+	}
+
+	/* read straggler packets, if any */
+	if (rx < tx) {
+		tstop = util_gettime() + 100;
+		while (rx < tx) {
+			tcur = util_gettime();
+			if (tcur >= tstop)
+				break;
+
+			do_poll(fdr, POLLIN, tstop - tcur);
+			rx += do_rx(fdr);
+		}
+	}
+
+	fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx);
+
+	if (fdr != -1 && close(fdr))
+		error(1, errno, "close rx");
+	if (fdt != -1 && close(fdt))
+		error(1, errno, "close tx");
+
+	/*
+	 * success (== 0) only if received all packets
+	 * unless failure is expected, in which case none must arrive.
+	 */
+	if (cfg_expect_failure)
+		return rx != 0;
+	else
+		return rx != tx;
+}
+
+
+static void __attribute__((noreturn)) usage(const char *filepath)
+{
+	fprintf(stderr, "Usage: %s [-e gre|gue|bare|none] [-i 4|6] [-l len] "
+			"[-O 4|6] [-o 4|6] [-n num] [-t secs] [-R] [-T] "
+			"[-s <osrc> [-d <odst>] [-S <isrc>] [-D <idst>] "
+			"[-x <otos>] [-X <itos>] [-f <isport>] [-F]\n",
+		filepath);
+	exit(1);
+}
+
+static void parse_addr(int family, void *addr, const char *optarg)
+{
+	int ret;
+
+	ret = inet_pton(family, optarg, addr);
+	if (ret == -1)
+		error(1, errno, "inet_pton");
+	if (ret == 0)
+		error(1, 0, "inet_pton: bad string");
+}
+
+static void parse_addr4(struct sockaddr_in *addr, const char *optarg)
+{
+	parse_addr(AF_INET, &addr->sin_addr, optarg);
+}
+
+static void parse_addr6(struct sockaddr_in6 *addr, const char *optarg)
+{
+	parse_addr(AF_INET6, &addr->sin6_addr, optarg);
+}
+
+static int parse_protocol_family(const char *filepath, const char *optarg)
+{
+	if (!strcmp(optarg, "4"))
+		return PF_INET;
+	if (!strcmp(optarg, "6"))
+		return PF_INET6;
+
+	usage(filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "d:D:e:f:Fhi:l:n:o:O:Rs:S:t:Tx:X:")) != -1) {
+		switch (c) {
+		case 'd':
+			if (cfg_l3_outer == AF_UNSPEC)
+				error(1, 0, "-d must be preceded by -o");
+			if (cfg_l3_outer == AF_INET)
+				parse_addr4(&out_daddr4, optarg);
+			else
+				parse_addr6(&out_daddr6, optarg);
+			break;
+		case 'D':
+			if (cfg_l3_inner == AF_UNSPEC)
+				error(1, 0, "-D must be preceded by -i");
+			if (cfg_l3_inner == AF_INET)
+				parse_addr4(&in_daddr4, optarg);
+			else
+				parse_addr6(&in_daddr6, optarg);
+			break;
+		case 'e':
+			if (!strcmp(optarg, "gre"))
+				cfg_encap_proto = IPPROTO_GRE;
+			else if (!strcmp(optarg, "gue"))
+				cfg_encap_proto = IPPROTO_UDP;
+			else if (!strcmp(optarg, "bare"))
+				cfg_encap_proto = IPPROTO_IPIP;
+			else if (!strcmp(optarg, "none"))
+				cfg_encap_proto = IPPROTO_IP;	/* == 0 */
+			else
+				usage(argv[0]);
+			break;
+		case 'f':
+			cfg_src_port = strtol(optarg, NULL, 0);
+			break;
+		case 'F':
+			cfg_expect_failure = true;
+			break;
+		case 'h':
+			usage(argv[0]);
+			break;
+		case 'i':
+			if (!strcmp(optarg, "4"))
+				cfg_l3_inner = PF_INET;
+			else if (!strcmp(optarg, "6"))
+				cfg_l3_inner = PF_INET6;
+			else
+				usage(argv[0]);
+			break;
+		case 'l':
+			cfg_payload_len = strtol(optarg, NULL, 0);
+			break;
+		case 'n':
+			cfg_num_pkt = strtol(optarg, NULL, 0);
+			break;
+		case 'o':
+			cfg_l3_outer = parse_protocol_family(argv[0], optarg);
+			break;
+		case 'O':
+			cfg_l3_extra = parse_protocol_family(argv[0], optarg);
+			break;
+		case 'R':
+			cfg_only_rx = true;
+			break;
+		case 's':
+			if (cfg_l3_outer == AF_INET)
+				parse_addr4(&out_saddr4, optarg);
+			else
+				parse_addr6(&out_saddr6, optarg);
+			break;
+		case 'S':
+			if (cfg_l3_inner == AF_INET)
+				parse_addr4(&in_saddr4, optarg);
+			else
+				parse_addr6(&in_saddr6, optarg);
+			break;
+		case 't':
+			cfg_num_secs = strtol(optarg, NULL, 0);
+			break;
+		case 'T':
+			cfg_only_tx = true;
+			break;
+		case 'x':
+			cfg_dsfield_outer = strtol(optarg, NULL, 0);
+			break;
+		case 'X':
+			cfg_dsfield_inner = strtol(optarg, NULL, 0);
+			break;
+		}
+	}
+
+	if (cfg_only_rx && cfg_only_tx)
+		error(1, 0, "options: cannot combine rx-only and tx-only");
+
+	if (cfg_encap_proto && cfg_l3_outer == AF_UNSPEC)
+		error(1, 0, "options: must specify outer with encap");
+	else if ((!cfg_encap_proto) && cfg_l3_outer != AF_UNSPEC)
+		error(1, 0, "options: cannot combine no-encap and outer");
+	else if ((!cfg_encap_proto) && cfg_l3_extra != AF_UNSPEC)
+		error(1, 0, "options: cannot combine no-encap and extra");
+
+	if (cfg_l3_inner == AF_UNSPEC)
+		cfg_l3_inner = AF_INET6;
+	if (cfg_l3_inner == AF_INET6 && cfg_encap_proto == IPPROTO_IPIP)
+		cfg_encap_proto = IPPROTO_IPV6;
+
+	/* RFC 6040 4.2:
+	 *   on decap, if outer encountered congestion (CE == 0x3),
+	 *   but inner cannot encode ECN (NoECT == 0x0), then drop packet.
+	 */
+	if (((cfg_dsfield_outer & 0x3) == 0x3) &&
+	    ((cfg_dsfield_inner & 0x3) == 0x0))
+		cfg_expect_failure = true;
+}
+
+static void print_opts(void)
+{
+	if (cfg_l3_inner == PF_INET6) {
+		util_printaddr("inner.dest6", (void *) &in_daddr6);
+		util_printaddr("inner.source6", (void *) &in_saddr6);
+	} else {
+		util_printaddr("inner.dest4", (void *) &in_daddr4);
+		util_printaddr("inner.source4", (void *) &in_saddr4);
+	}
+
+	if (!cfg_l3_outer)
+		return;
+
+	fprintf(stderr, "encap proto:   %u\n", cfg_encap_proto);
+
+	if (cfg_l3_outer == PF_INET6) {
+		util_printaddr("outer.dest6", (void *) &out_daddr6);
+		util_printaddr("outer.source6", (void *) &out_saddr6);
+	} else {
+		util_printaddr("outer.dest4", (void *) &out_daddr4);
+		util_printaddr("outer.source4", (void *) &out_saddr4);
+	}
+
+	if (!cfg_l3_extra)
+		return;
+
+	if (cfg_l3_outer == PF_INET6) {
+		util_printaddr("extra.dest6", (void *) &extra_daddr6);
+		util_printaddr("extra.source6", (void *) &extra_saddr6);
+	} else {
+		util_printaddr("extra.dest4", (void *) &extra_daddr4);
+		util_printaddr("extra.source4", (void *) &extra_saddr4);
+	}
+
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+	print_opts();
+	return do_main();
+}
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh
new file mode 100755
index 000000000000..c0fb073b5eab
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_flow_dissector.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Load BPF flow dissector and verify it correctly dissects traffic
+export TESTNAME=test_flow_dissector
+unmount=0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+msg="skip all tests:"
+if [ $UID != 0 ]; then
+	echo $msg please run this as root >&2
+	exit $ksft_skip
+fi
+
+# This test needs to be run in a network namespace with in_netns.sh. Check if
+# this is the case and run it with in_netns.sh if it is being run in the root
+# namespace.
+if [[ -z $(ip netns identify $$) ]]; then
+	../net/in_netns.sh "$0" "$@"
+	exit $?
+fi
+
+# Determine selftest success via shell exit code
+exit_handler()
+{
+	if (( $? == 0 )); then
+		echo "selftests: $TESTNAME [PASS]";
+	else
+		echo "selftests: $TESTNAME [FAILED]";
+	fi
+
+	set +e
+
+	# Cleanup
+	tc filter del dev lo ingress pref 1337 2> /dev/null
+	tc qdisc del dev lo ingress 2> /dev/null
+	./flow_dissector_load -d 2> /dev/null
+	if [ $unmount -ne 0 ]; then
+		umount bpffs 2> /dev/null
+	fi
+}
+
+# Exit script immediately (well catched by trap handler) if any
+# program/thing exits with a non-zero status.
+set -e
+
+# (Use 'trap -l' to list meaning of numbers)
+trap exit_handler 0 2 3 6 9
+
+# Mount BPF file system
+if /bin/mount | grep /sys/fs/bpf > /dev/null; then
+	echo "bpffs already mounted"
+else
+	echo "bpffs not mounted. Mounting..."
+	unmount=1
+	/bin/mount bpffs /sys/fs/bpf -t bpf
+fi
+
+# Attach BPF program
+./flow_dissector_load -p bpf_flow.o -s dissect
+
+# Setup
+tc qdisc add dev lo ingress
+
+echo "Testing IPv4..."
+# Drops all IP/UDP packets coming from port 9
+tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \
+	udp src_port 9 action drop
+
+# Send 10 IPv4/UDP packets from port 8. Filter should not drop any.
+./test_flow_dissector -i 4 -f 8
+# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
+./test_flow_dissector -i 4 -f 9 -F
+# Send 10 IPv4/UDP packets from port 10. Filter should not drop any.
+./test_flow_dissector -i 4 -f 10
+
+echo "Testing IPIP..."
+# Send 10 IPv4/IPv4/UDP packets from port 8. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
+	-D 192.168.0.1 -S 1.1.1.1 -f 8
+# Send 10 IPv4/IPv4/UDP packets from port 9. Filter should drop all.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
+	-D 192.168.0.1 -S 1.1.1.1 -f 9 -F
+# Send 10 IPv4/IPv4/UDP packets from port 10. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
+	-D 192.168.0.1 -S 1.1.1.1 -f 10
+
+echo "Testing IPv4 + GRE..."
+# Send 10 IPv4/GRE/IPv4/UDP packets from port 8. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
+	-D 192.168.0.1 -S 1.1.1.1 -f 8
+# Send 10 IPv4/GRE/IPv4/UDP packets from port 9. Filter should drop all.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
+	-D 192.168.0.1 -S 1.1.1.1 -f 9 -F
+# Send 10 IPv4/GRE/IPv4/UDP packets from port 10. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
+	-D 192.168.0.1 -S 1.1.1.1 -f 10
+
+tc filter del dev lo ingress pref 1337
+
+echo "Testing IPv6..."
+# Drops all IPv6/UDP packets coming from port 9
+tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \
+	udp src_port 9 action drop
+
+# Send 10 IPv6/UDP packets from port 8. Filter should not drop any.
+./test_flow_dissector -i 6 -f 8
+# Send 10 IPv6/UDP packets from port 9. Filter should drop all.
+./test_flow_dissector -i 6 -f 9 -F
+# Send 10 IPv6/UDP packets from port 10. Filter should not drop any.
+./test_flow_dissector -i 6 -f 10
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/test_get_stack_rawtp.c
new file mode 100644
index 000000000000..f6d9f238e00a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_get_stack_rawtp.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* Permit pretty deep stack traces */
+#define MAX_STACK_RAWTP 100
+struct stack_trace_t {
+	int pid;
+	int kern_stack_size;
+	int user_stack_size;
+	int user_stack_buildid_size;
+	__u64 kern_stack[MAX_STACK_RAWTP];
+	__u64 user_stack[MAX_STACK_RAWTP];
+	struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
+};
+
+struct bpf_map_def SEC("maps") perfmap = {
+	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(__u32),
+	.max_entries = 2,
+};
+
+struct bpf_map_def SEC("maps") stackdata_map = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct stack_trace_t),
+	.max_entries = 1,
+};
+
+/* Allocate per-cpu space twice the needed. For the code below
+ *   usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
+ *   if (usize < 0)
+ *     return 0;
+ *   ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
+ *
+ * If we have value_size = MAX_STACK_RAWTP * sizeof(__u64),
+ * verifier will complain that access "raw_data + usize"
+ * with size "max_len - usize" may be out of bound.
+ * The maximum "raw_data + usize" is "raw_data + max_len"
+ * and the maximum "max_len - usize" is "max_len", verifier
+ * concludes that the maximum buffer access range is
+ * "raw_data[0...max_len * 2 - 1]" and hence reject the program.
+ *
+ * Doubling the to-be-used max buffer size can fix this verifier
+ * issue and avoid complicated C programming massaging.
+ * This is an acceptable workaround since there is one entry here.
+ */
+struct bpf_map_def SEC("maps") rawdata_map = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2,
+	.max_entries = 1,
+};
+
+SEC("tracepoint/raw_syscalls/sys_enter")
+int bpf_prog1(void *ctx)
+{
+	int max_len, max_buildid_len, usize, ksize, total_size;
+	struct stack_trace_t *data;
+	void *raw_data;
+	__u32 key = 0;
+
+	data = bpf_map_lookup_elem(&stackdata_map, &key);
+	if (!data)
+		return 0;
+
+	max_len = MAX_STACK_RAWTP * sizeof(__u64);
+	max_buildid_len = MAX_STACK_RAWTP * sizeof(struct bpf_stack_build_id);
+	data->pid = bpf_get_current_pid_tgid();
+	data->kern_stack_size = bpf_get_stack(ctx, data->kern_stack,
+					      max_len, 0);
+	data->user_stack_size = bpf_get_stack(ctx, data->user_stack, max_len,
+					    BPF_F_USER_STACK);
+	data->user_stack_buildid_size = bpf_get_stack(
+		ctx, data->user_stack_buildid, max_buildid_len,
+		BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
+	bpf_perf_event_output(ctx, &perfmap, 0, data, sizeof(*data));
+
+	/* write both kernel and user stacks to the same buffer */
+	raw_data = bpf_map_lookup_elem(&rawdata_map, &key);
+	if (!raw_data)
+		return 0;
+
+	usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
+	if (usize < 0)
+		return 0;
+
+	ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
+	if (ksize < 0)
+		return 0;
+
+	total_size = usize + ksize;
+	if (total_size > 0 && total_size <= max_len)
+		bpf_perf_event_output(ctx, &perfmap, 0, raw_data, total_size);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh
index 35669ccd4d23..9df0d2ac45f8 100755
--- a/tools/testing/selftests/bpf/test_kmod.sh
+++ b/tools/testing/selftests/bpf/test_kmod.sh
@@ -1,6 +1,15 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+msg="skip all tests:"
+if [ "$(id -u)" != "0" ]; then
+	echo $msg please run this as root >&2
+	exit $ksft_skip
+fi
+
 SRC_TREE=../../../../
 
 test_run()
diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh
index d97dc914cd49..156d89f1edcc 100755
--- a/tools/testing/selftests/bpf/test_libbpf.sh
+++ b/tools/testing/selftests/bpf/test_libbpf.sh
@@ -6,7 +6,7 @@ export TESTNAME=test_libbpf
 # Determine selftest success via shell exit code
 exit_handler()
 {
-	if (( $? == 0 )); then
+	if [ $? -eq 0 ]; then
 		echo "selftests: $TESTNAME [PASS]";
 	else
 		echo "$TESTNAME: failed at file $LAST_LOADED" 1>&2
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh
new file mode 100755
index 000000000000..677686198df3
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+msg="skip all tests:"
+if [ $UID != 0 ]; then
+	echo $msg please run this as root >&2
+	exit $ksft_skip
+fi
+
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+modprobe rc-loopback
+
+for i in /sys/class/rc/rc*
+do
+	if grep -q DRV_NAME=rc-loopback $i/uevent
+	then
+		LIRCDEV=$(grep DEVNAME= $i/lirc*/uevent | sed sQDEVNAME=Q/dev/Q)
+	fi
+done
+
+if [ -n $LIRCDEV ];
+then
+	TYPE=lirc_mode2
+	./test_lirc_mode2_user $LIRCDEV
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		echo -e ${RED}"FAIL: $TYPE"${NC}
+	else
+		echo -e ${GREEN}"PASS: $TYPE"${NC}
+	fi
+fi
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_kern.c b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c
new file mode 100644
index 000000000000..ba26855563a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// test ir decoder
+//
+// Copyright (C) 2018 Sean Young <sean@mess.org>
+
+#include <linux/bpf.h>
+#include <linux/lirc.h>
+#include "bpf_helpers.h"
+
+SEC("lirc_mode2")
+int bpf_decoder(unsigned int *sample)
+{
+	if (LIRC_IS_PULSE(*sample)) {
+		unsigned int duration = LIRC_VALUE(*sample);
+
+		if (duration & 0x10000)
+			bpf_rc_keydown(sample, 0x40, duration & 0xffff, 0);
+	}
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
new file mode 100644
index 000000000000..d470d63c33db
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+// test ir decoder
+//
+// Copyright (C) 2018 Sean Young <sean@mess.org>
+
+// A lirc chardev is a device representing a consumer IR (cir) device which
+// can receive infrared signals from remote control and/or transmit IR.
+//
+// IR is sent as a series of pulses and space somewhat like morse code. The
+// BPF program can decode this into scancodes so that rc-core can translate
+// this into input key codes using the rc keymap.
+//
+// This test works by sending IR over rc-loopback, so the IR is processed by
+// BPF and then decoded into scancodes. The lirc chardev must be the one
+// associated with rc-loopback, see the output of ir-keytable(1).
+//
+// The following CONFIG options must be enabled for the test to succeed:
+// CONFIG_RC_CORE=y
+// CONFIG_BPF_RAWIR_EVENT=y
+// CONFIG_RC_LOOPBACK=y
+
+// Steps:
+// 1. Open the /dev/lircN device for rc-loopback (given on command line)
+// 2. Attach bpf_lirc_mode2 program which decodes some IR.
+// 3. Send some IR to the same IR device; since it is loopback, this will
+//    end up in the bpf program
+// 4. bpf program should decode IR and report keycode
+// 5. We can read keycode from same /dev/lirc device
+
+#include <linux/bpf.h>
+#include <linux/lirc.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <poll.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "bpf_util.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+int main(int argc, char **argv)
+{
+	struct bpf_object *obj;
+	int ret, lircfd, progfd, mode;
+	int testir = 0x1dead;
+	u32 prog_ids[10], prog_flags[10], prog_cnt;
+
+	if (argc != 2) {
+		printf("Usage: %s /dev/lircN\n", argv[0]);
+		return 2;
+	}
+
+	ret = bpf_prog_load("test_lirc_mode2_kern.o",
+			    BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd);
+	if (ret) {
+		printf("Failed to load bpf program\n");
+		return 1;
+	}
+
+	lircfd = open(argv[1], O_RDWR | O_NONBLOCK);
+	if (lircfd == -1) {
+		printf("failed to open lirc device %s: %m\n", argv[1]);
+		return 1;
+	}
+
+	/* Let's try detach it before it was ever attached */
+	ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2);
+	if (ret != -1 || errno != ENOENT) {
+		printf("bpf_prog_detach2 not attached should fail: %m\n");
+		return 1;
+	}
+
+	mode = LIRC_MODE_SCANCODE;
+	if (ioctl(lircfd, LIRC_SET_REC_MODE, &mode)) {
+		printf("failed to set rec mode: %m\n");
+		return 1;
+	}
+
+	prog_cnt = 10;
+	ret = bpf_prog_query(lircfd, BPF_LIRC_MODE2, 0, prog_flags, prog_ids,
+			     &prog_cnt);
+	if (ret) {
+		printf("Failed to query bpf programs on lirc device: %m\n");
+		return 1;
+	}
+
+	if (prog_cnt != 0) {
+		printf("Expected nothing to be attached\n");
+		return 1;
+	}
+
+	ret = bpf_prog_attach(progfd, lircfd, BPF_LIRC_MODE2, 0);
+	if (ret) {
+		printf("Failed to attach bpf to lirc device: %m\n");
+		return 1;
+	}
+
+	/* Write raw IR */
+	ret = write(lircfd, &testir, sizeof(testir));
+	if (ret != sizeof(testir)) {
+		printf("Failed to send test IR message: %m\n");
+		return 1;
+	}
+
+	struct pollfd pfd = { .fd = lircfd, .events = POLLIN };
+	struct lirc_scancode lsc;
+
+	poll(&pfd, 1, 100);
+
+	/* Read decoded IR */
+	ret = read(lircfd, &lsc, sizeof(lsc));
+	if (ret != sizeof(lsc)) {
+		printf("Failed to read decoded IR: %m\n");
+		return 1;
+	}
+
+	if (lsc.scancode != 0xdead || lsc.rc_proto != 64) {
+		printf("Incorrect scancode decoded\n");
+		return 1;
+	}
+
+	prog_cnt = 10;
+	ret = bpf_prog_query(lircfd, BPF_LIRC_MODE2, 0, prog_flags, prog_ids,
+			     &prog_cnt);
+	if (ret) {
+		printf("Failed to query bpf programs on lirc device: %m\n");
+		return 1;
+	}
+
+	if (prog_cnt != 1) {
+		printf("Expected one program to be attached\n");
+		return 1;
+	}
+
+	/* Let's try detaching it now it is actually attached */
+	ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2);
+	if (ret) {
+		printf("bpf_prog_detach2: returned %m\n");
+		return 1;
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.c b/tools/testing/selftests/bpf/test_lwt_seg6local.c
new file mode 100644
index 000000000000..0575751bc1bc
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.c
@@ -0,0 +1,437 @@
+#include <stddef.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <linux/seg6_local.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define bpf_printk(fmt, ...)				\
+({							\
+	char ____fmt[] = fmt;				\
+	bpf_trace_printk(____fmt, sizeof(____fmt),	\
+			##__VA_ARGS__);			\
+})
+
+/* Packet parsing state machine helpers. */
+#define cursor_advance(_cursor, _len) \
+	({ void *_tmp = _cursor; _cursor += _len; _tmp; })
+
+#define SR6_FLAG_ALERT (1 << 4)
+
+#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
+				0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
+#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
+				0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
+#define BPF_PACKET_HEADER __attribute__((packed))
+
+struct ip6_t {
+	unsigned int ver:4;
+	unsigned int priority:8;
+	unsigned int flow_label:20;
+	unsigned short payload_len;
+	unsigned char next_header;
+	unsigned char hop_limit;
+	unsigned long long src_hi;
+	unsigned long long src_lo;
+	unsigned long long dst_hi;
+	unsigned long long dst_lo;
+} BPF_PACKET_HEADER;
+
+struct ip6_addr_t {
+	unsigned long long hi;
+	unsigned long long lo;
+} BPF_PACKET_HEADER;
+
+struct ip6_srh_t {
+	unsigned char nexthdr;
+	unsigned char hdrlen;
+	unsigned char type;
+	unsigned char segments_left;
+	unsigned char first_segment;
+	unsigned char flags;
+	unsigned short tag;
+
+	struct ip6_addr_t segments[0];
+} BPF_PACKET_HEADER;
+
+struct sr6_tlv_t {
+	unsigned char type;
+	unsigned char len;
+	unsigned char value[0];
+} BPF_PACKET_HEADER;
+
+__attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb)
+{
+	void *cursor, *data_end;
+	struct ip6_srh_t *srh;
+	struct ip6_t *ip;
+	uint8_t *ipver;
+
+	data_end = (void *)(long)skb->data_end;
+	cursor = (void *)(long)skb->data;
+	ipver = (uint8_t *)cursor;
+
+	if ((void *)ipver + sizeof(*ipver) > data_end)
+		return NULL;
+
+	if ((*ipver >> 4) != 6)
+		return NULL;
+
+	ip = cursor_advance(cursor, sizeof(*ip));
+	if ((void *)ip + sizeof(*ip) > data_end)
+		return NULL;
+
+	if (ip->next_header != 43)
+		return NULL;
+
+	srh = cursor_advance(cursor, sizeof(*srh));
+	if ((void *)srh + sizeof(*srh) > data_end)
+		return NULL;
+
+	if (srh->type != 4)
+		return NULL;
+
+	return srh;
+}
+
+__attribute__((always_inline))
+int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
+		   uint32_t old_pad, uint32_t pad_off)
+{
+	int err;
+
+	if (new_pad != old_pad) {
+		err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
+					  (int) new_pad - (int) old_pad);
+		if (err)
+			return err;
+	}
+
+	if (new_pad > 0) {
+		char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+					0, 0, 0};
+		struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
+
+		pad_tlv->type = SR6_TLV_PADDING;
+		pad_tlv->len = new_pad - 2;
+
+		err = bpf_lwt_seg6_store_bytes(skb, pad_off,
+					       (void *)pad_tlv_buf, new_pad);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+__attribute__((always_inline))
+int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
+			  uint32_t *tlv_off, uint32_t *pad_size,
+			  uint32_t *pad_off)
+{
+	uint32_t srh_off, cur_off;
+	int offset_valid = 0;
+	int err;
+
+	srh_off = (char *)srh - (char *)(long)skb->data;
+	// cur_off = end of segments, start of possible TLVs
+	cur_off = srh_off + sizeof(*srh) +
+		sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
+
+	*pad_off = 0;
+
+	// we can only go as far as ~10 TLVs due to the BPF max stack size
+	#pragma clang loop unroll(full)
+	for (int i = 0; i < 10; i++) {
+		struct sr6_tlv_t tlv;
+
+		if (cur_off == *tlv_off)
+			offset_valid = 1;
+
+		if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
+			break;
+
+		err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
+		if (err)
+			return err;
+
+		if (tlv.type == SR6_TLV_PADDING) {
+			*pad_size = tlv.len + sizeof(tlv);
+			*pad_off = cur_off;
+
+			if (*tlv_off == srh_off) {
+				*tlv_off = cur_off;
+				offset_valid = 1;
+			}
+			break;
+
+		} else if (tlv.type == SR6_TLV_HMAC) {
+			break;
+		}
+
+		cur_off += sizeof(tlv) + tlv.len;
+	} // we reached the padding or HMAC TLVs, or the end of the SRH
+
+	if (*pad_off == 0)
+		*pad_off = cur_off;
+
+	if (*tlv_off == -1)
+		*tlv_off = cur_off;
+	else if (!offset_valid)
+		return -EINVAL;
+
+	return 0;
+}
+
+__attribute__((always_inline))
+int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
+	    struct sr6_tlv_t *itlv, uint8_t tlv_size)
+{
+	uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
+	uint8_t len_remaining, new_pad;
+	uint32_t pad_off = 0;
+	uint32_t pad_size = 0;
+	uint32_t partial_srh_len;
+	int err;
+
+	if (tlv_off != -1)
+		tlv_off += srh_off;
+
+	if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
+		return -EINVAL;
+
+	err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
+	if (err)
+		return err;
+
+	err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
+	if (err)
+		return err;
+
+	err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
+	if (err)
+		return err;
+
+	// the following can't be moved inside update_tlv_pad because the
+	// bpf verifier has some issues with it
+	pad_off += sizeof(*itlv) + itlv->len;
+	partial_srh_len = pad_off - srh_off;
+	len_remaining = partial_srh_len % 8;
+	new_pad = 8 - len_remaining;
+
+	if (new_pad == 1) // cannot pad for 1 byte only
+		new_pad = 9;
+	else if (new_pad == 8)
+		new_pad = 0;
+
+	return update_tlv_pad(skb, new_pad, pad_size, pad_off);
+}
+
+__attribute__((always_inline))
+int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh,
+	       uint32_t tlv_off)
+{
+	uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
+	uint8_t len_remaining, new_pad;
+	uint32_t partial_srh_len;
+	uint32_t pad_off = 0;
+	uint32_t pad_size = 0;
+	struct sr6_tlv_t tlv;
+	int err;
+
+	tlv_off += srh_off;
+
+	err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
+	if (err)
+		return err;
+
+	err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv));
+	if (err)
+		return err;
+
+	err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len));
+	if (err)
+		return err;
+
+	pad_off -= sizeof(tlv) + tlv.len;
+	partial_srh_len = pad_off - srh_off;
+	len_remaining = partial_srh_len % 8;
+	new_pad = 8 - len_remaining;
+	if (new_pad == 1) // cannot pad for 1 byte only
+		new_pad = 9;
+	else if (new_pad == 8)
+		new_pad = 0;
+
+	return update_tlv_pad(skb, new_pad, pad_size, pad_off);
+}
+
+__attribute__((always_inline))
+int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh)
+{
+	int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) +
+		((srh->first_segment + 1) << 4);
+	struct sr6_tlv_t tlv;
+
+	if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t)))
+		return 0;
+
+	if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) {
+		struct ip6_addr_t egr_addr;
+
+		if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16))
+			return 0;
+
+		// check if egress TLV value is correct
+		if (ntohll(egr_addr.hi) == 0xfd00000000000000 &&
+				ntohll(egr_addr.lo) == 0x4)
+			return 1;
+	}
+
+	return 0;
+}
+
+// This function will push a SRH with segments fd00::1, fd00::2, fd00::3,
+// fd00::4
+SEC("encap_srh")
+int __encap_srh(struct __sk_buff *skb)
+{
+	unsigned long long hi = 0xfd00000000000000;
+	struct ip6_addr_t *seg;
+	struct ip6_srh_t *srh;
+	char srh_buf[72]; // room for 4 segments
+	int err;
+
+	srh = (struct ip6_srh_t *)srh_buf;
+	srh->nexthdr = 0;
+	srh->hdrlen = 8;
+	srh->type = 4;
+	srh->segments_left = 3;
+	srh->first_segment = 3;
+	srh->flags = 0;
+	srh->tag = 0;
+
+	seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
+
+	#pragma clang loop unroll(full)
+	for (unsigned long long lo = 0; lo < 4; lo++) {
+		seg->lo = htonll(4 - lo);
+		seg->hi = htonll(hi);
+		seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg));
+	}
+
+	err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf));
+	if (err)
+		return BPF_DROP;
+
+	return BPF_REDIRECT;
+}
+
+// Add an Egress TLV fc00::4, add the flag A,
+// and apply End.X action to fc42::1
+SEC("add_egr_x")
+int __add_egr_x(struct __sk_buff *skb)
+{
+	unsigned long long hi = 0xfc42000000000000;
+	unsigned long long lo = 0x1;
+	struct ip6_srh_t *srh = get_srh(skb);
+	uint8_t new_flags = SR6_FLAG_ALERT;
+	struct ip6_addr_t addr;
+	int err, offset;
+
+	if (srh == NULL)
+		return BPF_DROP;
+
+	uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+			   0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
+
+	err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
+		      (struct sr6_tlv_t *)&tlv, 20);
+	if (err)
+		return BPF_DROP;
+
+	offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
+	err = bpf_lwt_seg6_store_bytes(skb, offset,
+				       (void *)&new_flags, sizeof(new_flags));
+	if (err)
+		return BPF_DROP;
+
+	addr.lo = htonll(lo);
+	addr.hi = htonll(hi);
+	err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
+				  (void *)&addr, sizeof(addr));
+	if (err)
+		return BPF_DROP;
+	return BPF_REDIRECT;
+}
+
+// Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a
+// simple End action
+SEC("pop_egr")
+int __pop_egr(struct __sk_buff *skb)
+{
+	struct ip6_srh_t *srh = get_srh(skb);
+	uint16_t new_tag = bpf_htons(2442);
+	uint8_t new_flags = 0;
+	int err, offset;
+
+	if (srh == NULL)
+		return BPF_DROP;
+
+	if (srh->flags != SR6_FLAG_ALERT)
+		return BPF_DROP;
+
+	if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV
+		return BPF_DROP;
+
+	if (!has_egr_tlv(skb, srh))
+		return BPF_DROP;
+
+	err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16);
+	if (err)
+		return BPF_DROP;
+
+	offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
+	if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags,
+				     sizeof(new_flags)))
+		return BPF_DROP;
+
+	offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag);
+	if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag,
+				     sizeof(new_tag)))
+		return BPF_DROP;
+
+	return BPF_OK;
+}
+
+// Inspect if the Egress TLV and flag have been removed, if the tag is correct,
+// then apply a End.T action to reach the last segment
+SEC("inspect_t")
+int __inspect_t(struct __sk_buff *skb)
+{
+	struct ip6_srh_t *srh = get_srh(skb);
+	int table = 117;
+	int err;
+
+	if (srh == NULL)
+		return BPF_DROP;
+
+	if (srh->flags != 0)
+		return BPF_DROP;
+
+	if (srh->tag != bpf_htons(2442))
+		return BPF_DROP;
+
+	if (srh->hdrlen != 8) // 4 segments
+		return BPF_DROP;
+
+	err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T,
+				  (void *)&table, sizeof(table));
+
+	if (err)
+		return BPF_DROP;
+
+	return BPF_REDIRECT;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
new file mode 100755
index 000000000000..785eabf2a593
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+# Connects 6 network namespaces through veths.
+# Each NS may have different IPv6 global scope addresses :
+#   NS1 ---- NS2 ---- NS3 ---- NS4 ---- NS5 ---- NS6
+# fb00::1           fd00::1  fd00::2  fd00::3  fb00::6
+#                   fc42::1           fd00::4
+#
+# All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a
+# IPv6 header with a Segment Routing Header, with segments :
+# 	fd00::1 -> fd00::2 -> fd00::3 -> fd00::4
+#
+# 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions :
+# - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1
+# - fd00::2 : remove the TLV, change the flags, add a tag
+# - fd00::3 : apply an End.T action to fd00::4, through routing table 117
+#
+# fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet.
+# Each End.BPF action will validate the operations applied on the SRH by the
+# previous BPF program in the chain, otherwise the packet is dropped.
+#
+# An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
+# datagram can be read on NS6 when binding to fb00::6.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+msg="skip all tests:"
+if [ $UID != 0 ]; then
+	echo $msg please run this as root >&2
+	exit $ksft_skip
+fi
+
+TMP_FILE="/tmp/selftest_lwt_seg6local.txt"
+
+cleanup()
+{
+	if [ "$?" = "0" ]; then
+		echo "selftests: test_lwt_seg6local [PASS]";
+	else
+		echo "selftests: test_lwt_seg6local [FAILED]";
+	fi
+
+	set +e
+	ip netns del ns1 2> /dev/null
+	ip netns del ns2 2> /dev/null
+	ip netns del ns3 2> /dev/null
+	ip netns del ns4 2> /dev/null
+	ip netns del ns5 2> /dev/null
+	ip netns del ns6 2> /dev/null
+	rm -f $TMP_FILE
+}
+
+set -e
+
+ip netns add ns1
+ip netns add ns2
+ip netns add ns3
+ip netns add ns4
+ip netns add ns5
+ip netns add ns6
+
+trap cleanup 0 2 3 6 9
+
+ip link add veth1 type veth peer name veth2
+ip link add veth3 type veth peer name veth4
+ip link add veth5 type veth peer name veth6
+ip link add veth7 type veth peer name veth8
+ip link add veth9 type veth peer name veth10
+
+ip link set veth1 netns ns1
+ip link set veth2 netns ns2
+ip link set veth3 netns ns2
+ip link set veth4 netns ns3
+ip link set veth5 netns ns3
+ip link set veth6 netns ns4
+ip link set veth7 netns ns4
+ip link set veth8 netns ns5
+ip link set veth9 netns ns5
+ip link set veth10 netns ns6
+
+ip netns exec ns1 ip link set dev veth1 up
+ip netns exec ns2 ip link set dev veth2 up
+ip netns exec ns2 ip link set dev veth3 up
+ip netns exec ns3 ip link set dev veth4 up
+ip netns exec ns3 ip link set dev veth5 up
+ip netns exec ns4 ip link set dev veth6 up
+ip netns exec ns4 ip link set dev veth7 up
+ip netns exec ns5 ip link set dev veth8 up
+ip netns exec ns5 ip link set dev veth9 up
+ip netns exec ns6 ip link set dev veth10 up
+ip netns exec ns6 ip link set dev lo up
+
+# All link scope addresses and routes required between veths
+ip netns exec ns1 ip -6 addr add fb00::12/16 dev veth1 scope link
+ip netns exec ns1 ip -6 route add fb00::21 dev veth1 scope link
+ip netns exec ns2 ip -6 addr add fb00::21/16 dev veth2 scope link
+ip netns exec ns2 ip -6 addr add fb00::34/16 dev veth3 scope link
+ip netns exec ns2 ip -6 route add fb00::43 dev veth3 scope link
+ip netns exec ns3 ip -6 route add fb00::65 dev veth5 scope link
+ip netns exec ns3 ip -6 addr add fb00::43/16 dev veth4 scope link
+ip netns exec ns3 ip -6 addr add fb00::56/16 dev veth5 scope link
+ip netns exec ns4 ip -6 addr add fb00::65/16 dev veth6 scope link
+ip netns exec ns4 ip -6 addr add fb00::78/16 dev veth7 scope link
+ip netns exec ns4 ip -6 route add fb00::87 dev veth7 scope link
+ip netns exec ns5 ip -6 addr add fb00::87/16 dev veth8 scope link
+ip netns exec ns5 ip -6 addr add fb00::910/16 dev veth9 scope link
+ip netns exec ns5 ip -6 route add fb00::109 dev veth9 scope link
+ip netns exec ns5 ip -6 route add fb00::109 table 117 dev veth9 scope link
+ip netns exec ns6 ip -6 addr add fb00::109/16 dev veth10 scope link
+
+ip netns exec ns1 ip -6 addr add fb00::1/16 dev lo
+ip netns exec ns1 ip -6 route add fb00::6 dev veth1 via fb00::21
+
+ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2
+ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
+
+ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65
+ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4
+
+ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6
+ip netns exec ns4 ip -6 addr add fc42::1 dev lo
+ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87
+
+ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
+ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8
+
+ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo
+ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo
+
+ip netns exec ns1 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns2 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns3 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns4 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns5 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+ip netns exec ns6 sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
+ip netns exec ns6 sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
+ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
+
+ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
+ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
+sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
+kill -INT $!
+
+if [[ $(< $TMP_FILE) != "foobar" ]]; then
+	exit 1
+fi
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 6c253343a6f9..4db2116e52be 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -15,9 +15,11 @@
 #include <string.h>
 #include <assert.h>
 #include <stdlib.h>
+#include <time.h>
 
 #include <sys/wait.h>
-
+#include <sys/socket.h>
+#include <netinet/in.h>
 #include <linux/bpf.h>
 
 #include <bpf/bpf.h>
@@ -26,8 +28,21 @@
 #include "bpf_util.h"
 #include "bpf_rlimit.h"
 
+#ifndef ENOTSUPP
+#define ENOTSUPP 524
+#endif
+
 static int map_flags;
 
+#define CHECK(condition, tag, format...) ({				\
+	int __ret = !!(condition);					\
+	if (__ret) {							\
+		printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag);	\
+		printf(format);						\
+		exit(-1);						\
+	}								\
+})
+
 static void test_hashmap(int task, void *data)
 {
 	long long key, next_key, first_key, value;
@@ -457,6 +472,122 @@ static void test_devmap(int task, void *data)
 	close(fd);
 }
 
+static void test_queuemap(int task, void *data)
+{
+	const int MAP_SIZE = 32;
+	__u32 vals[MAP_SIZE + MAP_SIZE/2], val;
+	int fd, i;
+
+	/* Fill test values to be used */
+	for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++)
+		vals[i] = rand();
+
+	/* Invalid key size */
+	fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 4, sizeof(val), MAP_SIZE,
+			    map_flags);
+	assert(fd < 0 && errno == EINVAL);
+
+	fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 0, sizeof(val), MAP_SIZE,
+			    map_flags);
+	/* Queue map does not support BPF_F_NO_PREALLOC */
+	if (map_flags & BPF_F_NO_PREALLOC) {
+		assert(fd < 0 && errno == EINVAL);
+		return;
+	}
+	if (fd < 0) {
+		printf("Failed to create queuemap '%s'!\n", strerror(errno));
+		exit(1);
+	}
+
+	/* Push MAP_SIZE elements */
+	for (i = 0; i < MAP_SIZE; i++)
+		assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
+
+	/* Check that element cannot be pushed due to max_entries limit */
+	assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+	       errno == E2BIG);
+
+	/* Peek element */
+	assert(bpf_map_lookup_elem(fd, NULL, &val) == 0 && val == vals[0]);
+
+	/* Replace half elements */
+	for (i = MAP_SIZE; i < MAP_SIZE + MAP_SIZE/2; i++)
+		assert(bpf_map_update_elem(fd, NULL, &vals[i], BPF_EXIST) == 0);
+
+	/* Pop all elements */
+	for (i = MAP_SIZE/2; i < MAP_SIZE + MAP_SIZE/2; i++)
+		assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == 0 &&
+		       val == vals[i]);
+
+	/* Check that there are not elements left */
+	assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+	       errno == ENOENT);
+
+	/* Check that non supported functions set errno to EINVAL */
+	assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
+	assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+
+	close(fd);
+}
+
+static void test_stackmap(int task, void *data)
+{
+	const int MAP_SIZE = 32;
+	__u32 vals[MAP_SIZE + MAP_SIZE/2], val;
+	int fd, i;
+
+	/* Fill test values to be used */
+	for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++)
+		vals[i] = rand();
+
+	/* Invalid key size */
+	fd = bpf_create_map(BPF_MAP_TYPE_STACK, 4, sizeof(val), MAP_SIZE,
+			    map_flags);
+	assert(fd < 0 && errno == EINVAL);
+
+	fd = bpf_create_map(BPF_MAP_TYPE_STACK, 0, sizeof(val), MAP_SIZE,
+			    map_flags);
+	/* Stack map does not support BPF_F_NO_PREALLOC */
+	if (map_flags & BPF_F_NO_PREALLOC) {
+		assert(fd < 0 && errno == EINVAL);
+		return;
+	}
+	if (fd < 0) {
+		printf("Failed to create stackmap '%s'!\n", strerror(errno));
+		exit(1);
+	}
+
+	/* Push MAP_SIZE elements */
+	for (i = 0; i < MAP_SIZE; i++)
+		assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
+
+	/* Check that element cannot be pushed due to max_entries limit */
+	assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+	       errno == E2BIG);
+
+	/* Peek element */
+	assert(bpf_map_lookup_elem(fd, NULL, &val) == 0 && val == vals[i - 1]);
+
+	/* Replace half elements */
+	for (i = MAP_SIZE; i < MAP_SIZE + MAP_SIZE/2; i++)
+		assert(bpf_map_update_elem(fd, NULL, &vals[i], BPF_EXIST) == 0);
+
+	/* Pop all elements */
+	for (i = MAP_SIZE + MAP_SIZE/2 - 1; i >= MAP_SIZE/2; i--)
+		assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == 0 &&
+		       val == vals[i]);
+
+	/* Check that there are not elements left */
+	assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+	       errno == ENOENT);
+
+	/* Check that non supported functions set errno to EINVAL */
+	assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
+	assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+
+	close(fd);
+}
+
 #include <sys/socket.h>
 #include <sys/ioctl.h>
 #include <arpa/inet.h>
@@ -566,7 +697,11 @@ static void test_sockmap(int tasks, void *data)
 	/* Test update without programs */
 	for (i = 0; i < 6; i++) {
 		err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
-		if (err) {
+		if (i < 2 && !err) {
+			printf("Allowed update sockmap '%i:%i' not in ESTABLISHED\n",
+			       i, sfd[i]);
+			goto out_sockmap;
+		} else if (i >= 2 && err) {
 			printf("Failed noprog update sockmap '%i:%i'\n",
 			       i, sfd[i]);
 			goto out_sockmap;
@@ -727,7 +862,7 @@ static void test_sockmap(int tasks, void *data)
 	}
 
 	/* Test map update elem afterwards fd lives in fd and map_fd */
-	for (i = 0; i < 6; i++) {
+	for (i = 2; i < 6; i++) {
 		err = bpf_map_update_elem(map_fd_rx, &i, &sfd[i], BPF_ANY);
 		if (err) {
 			printf("Failed map_fd_rx update sockmap %i '%i:%i'\n",
@@ -831,7 +966,7 @@ static void test_sockmap(int tasks, void *data)
 	}
 
 	/* Delete the elems without programs */
-	for (i = 0; i < 6; i++) {
+	for (i = 2; i < 6; i++) {
 		err = bpf_map_delete_elem(fd, &i);
 		if (err) {
 			printf("Failed delete sockmap %i '%i:%i'\n",
@@ -1150,6 +1285,250 @@ static void test_map_wronly(void)
 	assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
 }
 
+static void prepare_reuseport_grp(int type, int map_fd,
+				  __s64 *fds64, __u64 *sk_cookies,
+				  unsigned int n)
+{
+	socklen_t optlen, addrlen;
+	struct sockaddr_in6 s6;
+	const __u32 index0 = 0;
+	const int optval = 1;
+	unsigned int i;
+	u64 sk_cookie;
+	__s64 fd64;
+	int err;
+
+	s6.sin6_family = AF_INET6;
+	s6.sin6_addr = in6addr_any;
+	s6.sin6_port = 0;
+	addrlen = sizeof(s6);
+	optlen = sizeof(sk_cookie);
+
+	for (i = 0; i < n; i++) {
+		fd64 = socket(AF_INET6, type, 0);
+		CHECK(fd64 == -1, "socket()",
+		      "sock_type:%d fd64:%lld errno:%d\n",
+		      type, fd64, errno);
+
+		err = setsockopt(fd64, SOL_SOCKET, SO_REUSEPORT,
+				 &optval, sizeof(optval));
+		CHECK(err == -1, "setsockopt(SO_REUSEPORT)",
+		      "err:%d errno:%d\n", err, errno);
+
+		/* reuseport_array does not allow unbound sk */
+		err = bpf_map_update_elem(map_fd, &index0, &fd64,
+					  BPF_ANY);
+		CHECK(err != -1 || errno != EINVAL,
+		      "reuseport array update unbound sk",
+		      "sock_type:%d err:%d errno:%d\n",
+		      type, err, errno);
+
+		err = bind(fd64, (struct sockaddr *)&s6, sizeof(s6));
+		CHECK(err == -1, "bind()",
+		      "sock_type:%d err:%d errno:%d\n", type, err, errno);
+
+		if (i == 0) {
+			err = getsockname(fd64, (struct sockaddr *)&s6,
+					  &addrlen);
+			CHECK(err == -1, "getsockname()",
+			      "sock_type:%d err:%d errno:%d\n",
+			      type, err, errno);
+		}
+
+		err = getsockopt(fd64, SOL_SOCKET, SO_COOKIE, &sk_cookie,
+				 &optlen);
+		CHECK(err == -1, "getsockopt(SO_COOKIE)",
+		      "sock_type:%d err:%d errno:%d\n", type, err, errno);
+
+		if (type == SOCK_STREAM) {
+			/*
+			 * reuseport_array does not allow
+			 * non-listening tcp sk.
+			 */
+			err = bpf_map_update_elem(map_fd, &index0, &fd64,
+						  BPF_ANY);
+			CHECK(err != -1 || errno != EINVAL,
+			      "reuseport array update non-listening sk",
+			      "sock_type:%d err:%d errno:%d\n",
+			      type, err, errno);
+			err = listen(fd64, 0);
+			CHECK(err == -1, "listen()",
+			      "sock_type:%d, err:%d errno:%d\n",
+			      type, err, errno);
+		}
+
+		fds64[i] = fd64;
+		sk_cookies[i] = sk_cookie;
+	}
+}
+
+static void test_reuseport_array(void)
+{
+#define REUSEPORT_FD_IDX(err, last) ({ (err) ? last : !last; })
+
+	const __u32 array_size = 4, index0 = 0, index3 = 3;
+	int types[2] = { SOCK_STREAM, SOCK_DGRAM }, type;
+	__u64 grpa_cookies[2], sk_cookie, map_cookie;
+	__s64 grpa_fds64[2] = { -1, -1 }, fd64 = -1;
+	const __u32 bad_index = array_size;
+	int map_fd, err, t, f;
+	__u32 fds_idx = 0;
+	int fd;
+
+	map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+				sizeof(__u32), sizeof(__u64), array_size, 0);
+	CHECK(map_fd == -1, "reuseport array create",
+	      "map_fd:%d, errno:%d\n", map_fd, errno);
+
+	/* Test lookup/update/delete with invalid index */
+	err = bpf_map_delete_elem(map_fd, &bad_index);
+	CHECK(err != -1 || errno != E2BIG, "reuseport array del >=max_entries",
+	      "err:%d errno:%d\n", err, errno);
+
+	err = bpf_map_update_elem(map_fd, &bad_index, &fd64, BPF_ANY);
+	CHECK(err != -1 || errno != E2BIG,
+	      "reuseport array update >=max_entries",
+	      "err:%d errno:%d\n", err, errno);
+
+	err = bpf_map_lookup_elem(map_fd, &bad_index, &map_cookie);
+	CHECK(err != -1 || errno != ENOENT,
+	      "reuseport array update >=max_entries",
+	      "err:%d errno:%d\n", err, errno);
+
+	/* Test lookup/delete non existence elem */
+	err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
+	CHECK(err != -1 || errno != ENOENT,
+	      "reuseport array lookup not-exist elem",
+	      "err:%d errno:%d\n", err, errno);
+	err = bpf_map_delete_elem(map_fd, &index3);
+	CHECK(err != -1 || errno != ENOENT,
+	      "reuseport array del not-exist elem",
+	      "err:%d errno:%d\n", err, errno);
+
+	for (t = 0; t < ARRAY_SIZE(types); t++) {
+		type = types[t];
+
+		prepare_reuseport_grp(type, map_fd, grpa_fds64,
+				      grpa_cookies, ARRAY_SIZE(grpa_fds64));
+
+		/* Test BPF_* update flags */
+		/* BPF_EXIST failure case */
+		err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+					  BPF_EXIST);
+		CHECK(err != -1 || errno != ENOENT,
+		      "reuseport array update empty elem BPF_EXIST",
+		      "sock_type:%d err:%d errno:%d\n",
+		      type, err, errno);
+		fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
+
+		/* BPF_NOEXIST success case */
+		err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+					  BPF_NOEXIST);
+		CHECK(err == -1,
+		      "reuseport array update empty elem BPF_NOEXIST",
+		      "sock_type:%d err:%d errno:%d\n",
+		      type, err, errno);
+		fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
+
+		/* BPF_EXIST success case. */
+		err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+					  BPF_EXIST);
+		CHECK(err == -1,
+		      "reuseport array update same elem BPF_EXIST",
+		      "sock_type:%d err:%d errno:%d\n", type, err, errno);
+		fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
+
+		/* BPF_NOEXIST failure case */
+		err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+					  BPF_NOEXIST);
+		CHECK(err != -1 || errno != EEXIST,
+		      "reuseport array update non-empty elem BPF_NOEXIST",
+		      "sock_type:%d err:%d errno:%d\n",
+		      type, err, errno);
+		fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
+
+		/* BPF_ANY case (always succeed) */
+		err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+					  BPF_ANY);
+		CHECK(err == -1,
+		      "reuseport array update same sk with BPF_ANY",
+		      "sock_type:%d err:%d errno:%d\n", type, err, errno);
+
+		fd64 = grpa_fds64[fds_idx];
+		sk_cookie = grpa_cookies[fds_idx];
+
+		/* The same sk cannot be added to reuseport_array twice */
+		err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_ANY);
+		CHECK(err != -1 || errno != EBUSY,
+		      "reuseport array update same sk with same index",
+		      "sock_type:%d err:%d errno:%d\n",
+		      type, err, errno);
+
+		err = bpf_map_update_elem(map_fd, &index0, &fd64, BPF_ANY);
+		CHECK(err != -1 || errno != EBUSY,
+		      "reuseport array update same sk with different index",
+		      "sock_type:%d err:%d errno:%d\n",
+		      type, err, errno);
+
+		/* Test delete elem */
+		err = bpf_map_delete_elem(map_fd, &index3);
+		CHECK(err == -1, "reuseport array delete sk",
+		      "sock_type:%d err:%d errno:%d\n",
+		      type, err, errno);
+
+		/* Add it back with BPF_NOEXIST */
+		err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
+		CHECK(err == -1,
+		      "reuseport array re-add with BPF_NOEXIST after del",
+		      "sock_type:%d err:%d errno:%d\n", type, err, errno);
+
+		/* Test cookie */
+		err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
+		CHECK(err == -1 || sk_cookie != map_cookie,
+		      "reuseport array lookup re-added sk",
+		      "sock_type:%d err:%d errno:%d sk_cookie:0x%llx map_cookie:0x%llxn",
+		      type, err, errno, sk_cookie, map_cookie);
+
+		/* Test elem removed by close() */
+		for (f = 0; f < ARRAY_SIZE(grpa_fds64); f++)
+			close(grpa_fds64[f]);
+		err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
+		CHECK(err != -1 || errno != ENOENT,
+		      "reuseport array lookup after close()",
+		      "sock_type:%d err:%d errno:%d\n",
+		      type, err, errno);
+	}
+
+	/* Test SOCK_RAW */
+	fd64 = socket(AF_INET6, SOCK_RAW, IPPROTO_UDP);
+	CHECK(fd64 == -1, "socket(SOCK_RAW)", "err:%d errno:%d\n",
+	      err, errno);
+	err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
+	CHECK(err != -1 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
+	      "err:%d errno:%d\n", err, errno);
+	close(fd64);
+
+	/* Close the 64 bit value map */
+	close(map_fd);
+
+	/* Test 32 bit fd */
+	map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+				sizeof(__u32), sizeof(__u32), array_size, 0);
+	CHECK(map_fd == -1, "reuseport array create",
+	      "map_fd:%d, errno:%d\n", map_fd, errno);
+	prepare_reuseport_grp(SOCK_STREAM, map_fd, &fd64, &sk_cookie, 1);
+	fd = fd64;
+	err = bpf_map_update_elem(map_fd, &index3, &fd, BPF_NOEXIST);
+	CHECK(err == -1, "reuseport array update 32 bit fd",
+	      "err:%d errno:%d\n", err, errno);
+	err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
+	CHECK(err != -1 || errno != ENOSPC,
+	      "reuseport array lookup 32 bit fd",
+	      "err:%d errno:%d\n", err, errno);
+	close(fd);
+	close(map_fd);
+}
+
 static void run_all_tests(void)
 {
 	test_hashmap(0, NULL);
@@ -1170,10 +1549,17 @@ static void run_all_tests(void)
 
 	test_map_rdonly();
 	test_map_wronly();
+
+	test_reuseport_array();
+
+	test_queuemap(0, NULL);
+	test_stackmap(0, NULL);
 }
 
 int main(void)
 {
+	srand(time(NULL));
+
 	map_flags = 0;
 	run_all_tests();
 
diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c
new file mode 100644
index 000000000000..44ed7f29f8ab
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_netcnt.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/sysinfo.h>
+#include <sys/time.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
+#include "netcnt_common.h"
+
+#define BPF_PROG "./netcnt_prog.o"
+#define TEST_CGROUP "/test-network-counters/"
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+			const char *name)
+{
+	struct bpf_map *map;
+
+	map = bpf_object__find_map_by_name(obj, name);
+	if (!map) {
+		printf("%s:FAIL:map '%s' not found\n", test, name);
+		return -1;
+	}
+	return bpf_map__fd(map);
+}
+
+int main(int argc, char **argv)
+{
+	struct percpu_net_cnt *percpu_netcnt;
+	struct bpf_cgroup_storage_key key;
+	int map_fd, percpu_map_fd;
+	int error = EXIT_FAILURE;
+	struct net_cnt netcnt;
+	struct bpf_object *obj;
+	int prog_fd, cgroup_fd;
+	unsigned long packets;
+	unsigned long bytes;
+	int cpu, nproc;
+	__u32 prog_cnt;
+
+	nproc = get_nprocs_conf();
+	percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
+	if (!percpu_netcnt) {
+		printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
+		goto err;
+	}
+
+	if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB,
+			  &obj, &prog_fd)) {
+		printf("Failed to load bpf program\n");
+		goto out;
+	}
+
+	if (setup_cgroup_environment()) {
+		printf("Failed to load bpf program\n");
+		goto err;
+	}
+
+	/* Create a cgroup, get fd, and join it */
+	cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+	if (!cgroup_fd) {
+		printf("Failed to create test cgroup\n");
+		goto err;
+	}
+
+	if (join_cgroup(TEST_CGROUP)) {
+		printf("Failed to join cgroup\n");
+		goto err;
+	}
+
+	/* Attach bpf program */
+	if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
+		printf("Failed to attach bpf program");
+		goto err;
+	}
+
+	if (system("which ping6 &>/dev/null") == 0)
+		assert(!system("ping6 localhost -c 10000 -f -q > /dev/null"));
+	else
+		assert(!system("ping -6 localhost -c 10000 -f -q > /dev/null"));
+
+	if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
+			   &prog_cnt)) {
+		printf("Failed to query attached programs");
+		goto err;
+	}
+
+	map_fd = bpf_find_map(__func__, obj, "netcnt");
+	if (map_fd < 0) {
+		printf("Failed to find bpf map with net counters");
+		goto err;
+	}
+
+	percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt");
+	if (percpu_map_fd < 0) {
+		printf("Failed to find bpf map with percpu net counters");
+		goto err;
+	}
+
+	if (bpf_map_get_next_key(map_fd, NULL, &key)) {
+		printf("Failed to get key in cgroup storage\n");
+		goto err;
+	}
+
+	if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) {
+		printf("Failed to lookup cgroup storage\n");
+		goto err;
+	}
+
+	if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) {
+		printf("Failed to lookup percpu cgroup storage\n");
+		goto err;
+	}
+
+	/* Some packets can be still in per-cpu cache, but not more than
+	 * MAX_PERCPU_PACKETS.
+	 */
+	packets = netcnt.packets;
+	bytes = netcnt.bytes;
+	for (cpu = 0; cpu < nproc; cpu++) {
+		if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) {
+			printf("Unexpected percpu value: %llu\n",
+			       percpu_netcnt[cpu].packets);
+			goto err;
+		}
+
+		packets += percpu_netcnt[cpu].packets;
+		bytes += percpu_netcnt[cpu].bytes;
+	}
+
+	/* No packets should be lost */
+	if (packets != 10000) {
+		printf("Unexpected packet count: %lu\n", packets);
+		goto err;
+	}
+
+	/* Let's check that bytes counter matches the number of packets
+	 * multiplied by the size of ipv6 ICMP packet.
+	 */
+	if (bytes != packets * 104) {
+		printf("Unexpected bytes count: %lu\n", bytes);
+		goto err;
+	}
+
+	error = 0;
+	printf("test_netcnt:PASS\n");
+
+err:
+	cleanup_cgroup_environment();
+	free(percpu_netcnt);
+
+out:
+	return error;
+}
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index e78aad0a68bb..d59642e70f56 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -158,11 +158,16 @@ def tool(name, args, flags, JSON=True, ns="", fail=True, include_stderr=False):
     else:
         return ret, out
 
-def bpftool(args, JSON=True, ns="", fail=True):
-    return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail)
+def bpftool(args, JSON=True, ns="", fail=True, include_stderr=False):
+    return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns,
+                fail=fail, include_stderr=include_stderr)
 
 def bpftool_prog_list(expected=None, ns=""):
     _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True)
+    # Remove the base progs
+    for p in base_progs:
+        if p in progs:
+            progs.remove(p)
     if expected is not None:
         if len(progs) != expected:
             fail(True, "%d BPF programs loaded, expected %d" %
@@ -171,6 +176,10 @@ def bpftool_prog_list(expected=None, ns=""):
 
 def bpftool_map_list(expected=None, ns=""):
     _, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
+    # Remove the base maps
+    for m in base_maps:
+        if m in maps:
+            maps.remove(m)
     if expected is not None:
         if len(maps) != expected:
             fail(True, "%d BPF maps loaded, expected %d" %
@@ -193,6 +202,21 @@ def bpftool_map_list_wait(expected=0, n_retry=20):
         time.sleep(0.05)
     raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps))
 
+def bpftool_prog_load(sample, file_name, maps=[], prog_type="xdp", dev=None,
+                      fail=True, include_stderr=False):
+    args = "prog load %s %s" % (os.path.join(bpf_test_dir, sample), file_name)
+    if prog_type is not None:
+        args += " type " + prog_type
+    if dev is not None:
+        args += " dev " + dev
+    if len(maps):
+        args += " map " + " map ".join(maps)
+
+    res = bpftool(args, fail=fail, include_stderr=include_stderr)
+    if res[0] == 0:
+        files.append(file_name)
+    return res
+
 def ip(args, force=False, JSON=True, ns="", fail=True, include_stderr=False):
     if force:
         args = "-force " + args
@@ -299,21 +323,25 @@ class NetdevSim:
     Class for netdevsim netdevice and its attributes.
     """
 
-    def __init__(self):
+    def __init__(self, link=None):
+        self.link = link
+
         self.dev = self._netdevsim_create()
         devs.append(self)
 
         self.ns = ""
 
         self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname'])
+        self.sdev_dir = self.dfs_dir + '/sdev/'
         self.dfs_refresh()
 
     def __getitem__(self, key):
         return self.dev[key]
 
     def _netdevsim_create(self):
+        link = "" if self.link is None else "link " + self.link.dev['ifname']
         _, old  = ip("link show")
-        ip("link add sim%d type netdevsim")
+        ip("link add sim%d {link} type netdevsim".format(link=link))
         _, new  = ip("link show")
 
         for dev in new:
@@ -331,13 +359,18 @@ class NetdevSim:
         self.dfs = DebugfsDir(self.dfs_dir)
         return self.dfs
 
+    def dfs_read(self, f):
+        path = os.path.join(self.dfs_dir, f)
+        _, data = cmd('cat %s' % (path))
+        return data.strip()
+
     def dfs_num_bound_progs(self):
-        path = os.path.join(self.dfs_dir, "bpf_bound_progs")
+        path = os.path.join(self.sdev_dir, "bpf_bound_progs")
         _, progs = cmd('ls %s' % (path))
         return len(progs.split())
 
     def dfs_get_bound_progs(self, expected):
-        progs = DebugfsDir(os.path.join(self.dfs_dir, "bpf_bound_progs"))
+        progs = DebugfsDir(os.path.join(self.sdev_dir, "bpf_bound_progs"))
         if expected is not None:
             if len(progs) != expected:
                 fail(True, "%d BPF programs bound, expected %d" %
@@ -539,11 +572,11 @@ def check_extack(output, reference, args):
     if skip_extack:
         return
     lines = output.split("\n")
-    comp = len(lines) >= 2 and lines[1] == reference
+    comp = len(lines) >= 2 and lines[1] == 'Error: ' + reference
     fail(not comp, "Missing or incorrect netlink extack message")
 
 def check_extack_nsim(output, reference, args):
-    check_extack(output, "Error: netdevsim: " + reference, args)
+    check_extack(output, "netdevsim: " + reference, args)
 
 def check_no_extack(res, needle):
     fail((res[1] + res[2]).count(needle) or (res[1] + res[2]).count("Warning:"),
@@ -585,8 +618,8 @@ skip(os.getuid() != 0, "test must be run as root")
 # Check tools
 ret, progs = bpftool("prog", fail=False)
 skip(ret != 0, "bpftool not installed")
-# Check no BPF programs are loaded
-skip(len(progs) != 0, "BPF programs already loaded on the system")
+base_progs = progs
+_, base_maps = bpftool("map")
 
 # Check netdevsim
 ret, out = cmd("modprobe netdevsim", fail=False)
@@ -646,7 +679,7 @@ try:
     ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True,
                                          fail=False, include_stderr=True)
     fail(ret == 0, "TC filter loaded without enabling TC offloads")
-    check_extack(err, "Error: TC offload is disabled on net device.", args)
+    check_extack(err, "TC offload is disabled on net device.", args)
     sim.wait_for_flush()
 
     sim.set_ethtool_tc_offloads(True)
@@ -686,7 +719,7 @@ try:
                                          skip_sw=True,
                                          fail=False, include_stderr=True)
     fail(ret == 0, "Offloaded a filter to chain other than 0")
-    check_extack(err, "Error: Driver supports only offload of chain 0.", args)
+    check_extack(err, "Driver supports only offload of chain 0.", args)
     sim.tc_flush_filters()
 
     start_test("Test TC replace...")
@@ -806,24 +839,20 @@ try:
          "Device parameters reported for non-offloaded program")
 
     start_test("Test XDP prog replace with bad flags...")
-    ret, _, err = sim.set_xdp(obj, "offload", force=True,
+    ret, _, err = sim.set_xdp(obj, "generic", force=True,
                               fail=False, include_stderr=True)
     fail(ret == 0, "Replaced XDP program with a program in different mode")
-    check_extack_nsim(err, "program loaded with different flags.", args)
+    fail(err.count("File exists") != 1, "Replaced driver XDP with generic")
     ret, _, err = sim.set_xdp(obj, "", force=True,
                               fail=False, include_stderr=True)
     fail(ret == 0, "Replaced XDP program with a program in different mode")
-    check_extack_nsim(err, "program loaded with different flags.", args)
+    check_extack(err, "program loaded with different flags.", args)
 
     start_test("Test XDP prog remove with bad flags...")
-    ret, _, err = sim.unset_xdp("offload", force=True,
-                                fail=False, include_stderr=True)
-    fail(ret == 0, "Removed program with a bad mode mode")
-    check_extack_nsim(err, "program loaded with different flags.", args)
     ret, _, err = sim.unset_xdp("", force=True,
                                 fail=False, include_stderr=True)
-    fail(ret == 0, "Removed program with a bad mode mode")
-    check_extack_nsim(err, "program loaded with different flags.", args)
+    fail(ret == 0, "Removed program with a bad mode")
+    check_extack(err, "program loaded with different flags.", args)
 
     start_test("Test MTU restrictions...")
     ret, _ = sim.set_mtu(9000, fail=False)
@@ -838,6 +867,25 @@ try:
     sim.set_mtu(1500)
 
     sim.wait_for_flush()
+    start_test("Test non-offload XDP attaching to HW...")
+    bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/nooffload")
+    nooffload = bpf_pinned("/sys/fs/bpf/nooffload")
+    ret, _, err = sim.set_xdp(nooffload, "offload",
+                              fail=False, include_stderr=True)
+    fail(ret == 0, "attached non-offloaded XDP program to HW")
+    check_extack_nsim(err, "xdpoffload of non-bound program.", args)
+    rm("/sys/fs/bpf/nooffload")
+
+    start_test("Test offload XDP attaching to drv...")
+    bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/offload",
+                      dev=sim['ifname'])
+    offload = bpf_pinned("/sys/fs/bpf/offload")
+    ret, _, err = sim.set_xdp(offload, "drv", fail=False, include_stderr=True)
+    fail(ret == 0, "attached offloaded XDP program to drv")
+    check_extack(err, "using device-bound program without HW_MODE flag is not supported.", args)
+    rm("/sys/fs/bpf/offload")
+    sim.wait_for_flush()
+
     start_test("Test XDP offload...")
     _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True)
     ipl = sim.ip_link_show(xdp=True)
@@ -883,6 +931,60 @@ try:
     rm(pin_file)
     bpftool_prog_list_wait(expected=0)
 
+    start_test("Test multi-attachment XDP - attach...")
+    sim.set_xdp(obj, "offload")
+    xdp = sim.ip_link_show(xdp=True)["xdp"]
+    offloaded = sim.dfs_read("bpf_offloaded_id")
+    fail("prog" not in xdp, "Base program not reported in single program mode")
+    fail(len(ipl["xdp"]["attached"]) != 1,
+         "Wrong attached program count with one program")
+
+    sim.set_xdp(obj, "")
+    two_xdps = sim.ip_link_show(xdp=True)["xdp"]
+    offloaded2 = sim.dfs_read("bpf_offloaded_id")
+
+    fail(two_xdps["mode"] != 4, "Bad mode reported with multiple programs")
+    fail("prog" in two_xdps, "Base program reported in multi program mode")
+    fail(xdp["attached"][0] not in two_xdps["attached"],
+         "Offload program not reported after driver activated")
+    fail(len(two_xdps["attached"]) != 2,
+         "Wrong attached program count with two programs")
+    fail(two_xdps["attached"][0]["prog"]["id"] ==
+         two_xdps["attached"][1]["prog"]["id"],
+         "offloaded and drv programs have the same id")
+    fail(offloaded != offloaded2,
+         "offload ID changed after loading driver program")
+
+    start_test("Test multi-attachment XDP - replace...")
+    ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True)
+    fail(err.count("busy") != 1, "Replaced one of programs without -force")
+
+    start_test("Test multi-attachment XDP - detach...")
+    ret, _, err = sim.unset_xdp("drv", force=True,
+                                fail=False, include_stderr=True)
+    fail(ret == 0, "Removed program with a bad mode")
+    check_extack(err, "program loaded with different flags.", args)
+
+    sim.unset_xdp("offload")
+    xdp = sim.ip_link_show(xdp=True)["xdp"]
+    offloaded = sim.dfs_read("bpf_offloaded_id")
+
+    fail(xdp["mode"] != 1, "Bad mode reported after multiple programs")
+    fail("prog" not in xdp,
+         "Base program not reported after multi program mode")
+    fail(xdp["attached"][0] not in two_xdps["attached"],
+         "Offload program not reported after driver activated")
+    fail(len(ipl["xdp"]["attached"]) != 1,
+         "Wrong attached program count with remaining programs")
+    fail(offloaded != "0", "offload ID reported with only driver program left")
+
+    start_test("Test multi-attachment XDP - device remove...")
+    sim.set_xdp(obj, "offload")
+    sim.remove()
+
+    sim = NetdevSim()
+    sim.set_ethtool_tc_offloads(True)
+
     start_test("Test mixing of TC and XDP...")
     sim.tc_add_ingress()
     sim.set_xdp(obj, "offload")
@@ -1077,6 +1179,106 @@ try:
     fail(ret == 0,
          "netdevsim didn't refuse to create a map with offload disabled")
 
+    sim.remove()
+
+    start_test("Test multi-dev ASIC program reuse...")
+    simA = NetdevSim()
+    simB1 = NetdevSim()
+    simB2 = NetdevSim(link=simB1)
+    simB3 = NetdevSim(link=simB1)
+    sims = (simA, simB1, simB2, simB3)
+    simB = (simB1, simB2, simB3)
+
+    bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimA",
+                      dev=simA['ifname'])
+    progA = bpf_pinned("/sys/fs/bpf/nsimA")
+    bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB",
+                      dev=simB1['ifname'])
+    progB = bpf_pinned("/sys/fs/bpf/nsimB")
+
+    simA.set_xdp(progA, "offload", JSON=False)
+    for d in simB:
+        d.set_xdp(progB, "offload", JSON=False)
+
+    start_test("Test multi-dev ASIC cross-dev replace...")
+    ret, _ = simA.set_xdp(progB, "offload", force=True, JSON=False, fail=False)
+    fail(ret == 0, "cross-ASIC program allowed")
+    for d in simB:
+        ret, _ = d.set_xdp(progA, "offload", force=True, JSON=False, fail=False)
+        fail(ret == 0, "cross-ASIC program allowed")
+
+    start_test("Test multi-dev ASIC cross-dev install...")
+    for d in sims:
+        d.unset_xdp("offload")
+
+    ret, _, err = simA.set_xdp(progB, "offload", force=True, JSON=False,
+                               fail=False, include_stderr=True)
+    fail(ret == 0, "cross-ASIC program allowed")
+    check_extack_nsim(err, "program bound to different dev.", args)
+    for d in simB:
+        ret, _, err = d.set_xdp(progA, "offload", force=True, JSON=False,
+                                fail=False, include_stderr=True)
+        fail(ret == 0, "cross-ASIC program allowed")
+        check_extack_nsim(err, "program bound to different dev.", args)
+
+    start_test("Test multi-dev ASIC cross-dev map reuse...")
+
+    mapA = bpftool("prog show %s" % (progA))[1]["map_ids"][0]
+    mapB = bpftool("prog show %s" % (progB))[1]["map_ids"][0]
+
+    ret, _ = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB_",
+                               dev=simB3['ifname'],
+                               maps=["idx 0 id %d" % (mapB)],
+                               fail=False)
+    fail(ret != 0, "couldn't reuse a map on the same ASIC")
+    rm("/sys/fs/bpf/nsimB_")
+
+    ret, _, err = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimA_",
+                                    dev=simA['ifname'],
+                                    maps=["idx 0 id %d" % (mapB)],
+                                    fail=False, include_stderr=True)
+    fail(ret == 0, "could reuse a map on a different ASIC")
+    fail(err.count("offload device mismatch between prog and map") == 0,
+         "error message missing for cross-ASIC map")
+
+    ret, _, err = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB_",
+                                    dev=simB1['ifname'],
+                                    maps=["idx 0 id %d" % (mapA)],
+                                    fail=False, include_stderr=True)
+    fail(ret == 0, "could reuse a map on a different ASIC")
+    fail(err.count("offload device mismatch between prog and map") == 0,
+         "error message missing for cross-ASIC map")
+
+    start_test("Test multi-dev ASIC cross-dev destruction...")
+    bpftool_prog_list_wait(expected=2)
+
+    simA.remove()
+    bpftool_prog_list_wait(expected=1)
+
+    ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
+    fail(ifnameB != simB1['ifname'], "program not bound to originial device")
+    simB1.remove()
+    bpftool_prog_list_wait(expected=1)
+
+    start_test("Test multi-dev ASIC cross-dev destruction - move...")
+    ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
+    fail(ifnameB not in (simB2['ifname'], simB3['ifname']),
+         "program not bound to remaining devices")
+
+    simB2.remove()
+    ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
+    fail(ifnameB != simB3['ifname'], "program not bound to remaining device")
+
+    simB3.remove()
+    bpftool_prog_list_wait(expected=0)
+
+    start_test("Test multi-dev ASIC cross-dev destruction - orphaned...")
+    ret, out = bpftool("prog show %s" % (progB), fail=False)
+    fail(ret == 0, "got information about orphaned program")
+    fail("error" not in out, "no error reported for get info on orphaned")
+    fail(out["error"] != "can't get prog info: No such device",
+         "wrong error for get info on orphaned")
+
     print("%s: OK" % (os.path.basename(__file__)))
 
 finally:
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 4123d0ab90ba..2d3c04f45530 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -38,8 +38,10 @@ typedef __u16 __sum16;
 #include "bpf_util.h"
 #include "bpf_endian.h"
 #include "bpf_rlimit.h"
+#include "trace_helpers.h"
 
 static int error_cnt, pass_cnt;
+static bool jit_enabled;
 
 #define MAGIC_BYTES 123
 
@@ -110,13 +112,13 @@ static void test_pkt_access(void)
 
 	err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
 				NULL, NULL, &retval, &duration);
-	CHECK(err || errno || retval, "ipv4",
+	CHECK(err || retval, "ipv4",
 	      "err %d errno %d retval %d duration %d\n",
 	      err, errno, retval, duration);
 
 	err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6),
 				NULL, NULL, &retval, &duration);
-	CHECK(err || errno || retval, "ipv6",
+	CHECK(err || retval, "ipv6",
 	      "err %d errno %d retval %d duration %d\n",
 	      err, errno, retval, duration);
 	bpf_object__close(obj);
@@ -151,14 +153,14 @@ static void test_xdp(void)
 	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
 				buf, &size, &retval, &duration);
 
-	CHECK(err || errno || retval != XDP_TX || size != 74 ||
+	CHECK(err || retval != XDP_TX || size != 74 ||
 	      iph->protocol != IPPROTO_IPIP, "ipv4",
 	      "err %d errno %d retval %d size %d\n",
 	      err, errno, retval, size);
 
 	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
 				buf, &size, &retval, &duration);
-	CHECK(err || errno || retval != XDP_TX || size != 114 ||
+	CHECK(err || retval != XDP_TX || size != 114 ||
 	      iph6->nexthdr != IPPROTO_IPV6, "ipv6",
 	      "err %d errno %d retval %d size %d\n",
 	      err, errno, retval, size);
@@ -166,6 +168,37 @@ out:
 	bpf_object__close(obj);
 }
 
+static void test_xdp_adjust_tail(void)
+{
+	const char *file = "./test_adjust_tail.o";
+	struct bpf_object *obj;
+	char buf[128];
+	__u32 duration, retval, size;
+	int err, prog_fd;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+	if (err) {
+		error_cnt++;
+		return;
+	}
+
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+				buf, &size, &retval, &duration);
+
+	CHECK(err || retval != XDP_DROP,
+	      "ipv4", "err %d errno %d retval %d size %d\n",
+	      err, errno, retval, size);
+
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
+				buf, &size, &retval, &duration);
+	CHECK(err || retval != XDP_TX || size != 54,
+	      "ipv6", "err %d errno %d retval %d size %d\n",
+	      err, errno, retval, size);
+	bpf_object__close(obj);
+}
+
+
+
 #define MAGIC_VAL 0x1234
 #define NUM_ITER 100000
 #define VIP_NUM 5
@@ -221,14 +254,14 @@ static void test_l4lb(const char *file)
 
 	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
 				buf, &size, &retval, &duration);
-	CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 ||
+	CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 ||
 	      *magic != MAGIC_VAL, "ipv4",
 	      "err %d errno %d retval %d size %d magic %x\n",
 	      err, errno, retval, size, *magic);
 
 	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
 				buf, &size, &retval, &duration);
-	CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 ||
+	CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 ||
 	      *magic != MAGIC_VAL, "ipv6",
 	      "err %d errno %d retval %d size %d magic %x\n",
 	      err, errno, retval, size, *magic);
@@ -310,14 +343,14 @@ static void test_xdp_noinline(void)
 
 	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
 				buf, &size, &retval, &duration);
-	CHECK(err || errno || retval != 1 || size != 54 ||
+	CHECK(err || retval != 1 || size != 54 ||
 	      *magic != MAGIC_VAL, "ipv4",
 	      "err %d errno %d retval %d size %d magic %x\n",
 	      err, errno, retval, size, *magic);
 
 	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
 				buf, &size, &retval, &duration);
-	CHECK(err || errno || retval != 1 || size != 74 ||
+	CHECK(err || retval != 1 || size != 74 ||
 	      *magic != MAGIC_VAL, "ipv6",
 	      "err %d errno %d retval %d size %d magic %x\n",
 	      err, errno, retval, size, *magic);
@@ -360,13 +393,30 @@ static inline __u64 ptr_to_u64(const void *ptr)
 	return (__u64) (unsigned long) ptr;
 }
 
+static bool is_jit_enabled(void)
+{
+	const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
+	bool enabled = false;
+	int sysctl_fd;
+
+	sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
+	if (sysctl_fd != -1) {
+		char tmpc;
+
+		if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
+			enabled = (tmpc != '0');
+		close(sysctl_fd);
+	}
+
+	return enabled;
+}
+
 static void test_bpf_obj_id(void)
 {
 	const __u64 array_magic_value = 0xfaceb00c;
 	const __u32 array_key = 0;
 	const int nr_iters = 2;
 	const char *file = "./test_obj_id.o";
-	const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
 	const char *expected_prog_name = "test_obj_id";
 	const char *expected_map_name = "test_map_id";
 	const __u64 nsec_per_sec = 1000000000;
@@ -383,20 +433,11 @@ static void test_bpf_obj_id(void)
 	char jited_insns[128], xlated_insns[128], zeros[128];
 	__u32 i, next_id, info_len, nr_id_found, duration = 0;
 	struct timespec real_time_ts, boot_time_ts;
-	int sysctl_fd, jit_enabled = 0, err = 0;
+	int err = 0;
 	__u64 array_value;
 	uid_t my_uid = getuid();
 	time_t now, load_time;
 
-	sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
-	if (sysctl_fd != -1) {
-		char tmpc;
-
-		if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
-			jit_enabled = (tmpc != '0');
-		close(sysctl_fd);
-	}
-
 	err = bpf_prog_get_fd_by_id(0);
 	CHECK(err >= 0 || errno != ENOENT,
 	      "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno);
@@ -865,11 +906,47 @@ static int compare_map_keys(int map1_fd, int map2_fd)
 	return 0;
 }
 
+static int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len)
+{
+	__u32 key, next_key, *cur_key_p, *next_key_p;
+	char *val_buf1, *val_buf2;
+	int i, err = 0;
+
+	val_buf1 = malloc(stack_trace_len);
+	val_buf2 = malloc(stack_trace_len);
+	cur_key_p = NULL;
+	next_key_p = &key;
+	while (bpf_map_get_next_key(smap_fd, cur_key_p, next_key_p) == 0) {
+		err = bpf_map_lookup_elem(smap_fd, next_key_p, val_buf1);
+		if (err)
+			goto out;
+		err = bpf_map_lookup_elem(amap_fd, next_key_p, val_buf2);
+		if (err)
+			goto out;
+		for (i = 0; i < stack_trace_len; i++) {
+			if (val_buf1[i] != val_buf2[i]) {
+				err = -1;
+				goto out;
+			}
+		}
+		key = *next_key_p;
+		cur_key_p = &key;
+		next_key_p = &next_key;
+	}
+	if (errno != ENOENT)
+		err = -1;
+
+out:
+	free(val_buf1);
+	free(val_buf2);
+	return err;
+}
+
 static void test_stacktrace_map()
 {
-	int control_map_fd, stackid_hmap_fd, stackmap_fd;
+	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
 	const char *file = "./test_stacktrace_map.o";
-	int bytes, efd, err, pmu_fd, prog_fd;
+	int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len;
 	struct perf_event_attr attr = {};
 	__u32 key, val, duration = 0;
 	struct bpf_object *obj;
@@ -925,6 +1002,10 @@ static void test_stacktrace_map()
 	if (stackmap_fd < 0)
 		goto disable_pmu;
 
+	stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
+	if (stack_amap_fd < 0)
+		goto disable_pmu;
+
 	/* give some time for bpf program run */
 	sleep(1);
 
@@ -946,6 +1027,12 @@ static void test_stacktrace_map()
 		  "err %d errno %d\n", err, errno))
 		goto disable_pmu_noerr;
 
+	stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
+	err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
+	if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu_noerr;
+
 	goto disable_pmu_noerr;
 disable_pmu:
 	error_cnt++;
@@ -1039,9 +1126,9 @@ err:
 
 static void test_stacktrace_build_id(void)
 {
-	int control_map_fd, stackid_hmap_fd, stackmap_fd;
+	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
 	const char *file = "./test_stacktrace_build_id.o";
-	int bytes, efd, err, pmu_fd, prog_fd;
+	int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len;
 	struct perf_event_attr attr = {};
 	__u32 key, previous_key, val, duration = 0;
 	struct bpf_object *obj;
@@ -1106,6 +1193,11 @@ static void test_stacktrace_build_id(void)
 		  err, errno))
 		goto disable_pmu;
 
+	stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
+	if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
 	assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
 	       == 0);
 	assert(system("./urandom_read") == 0);
@@ -1157,8 +1249,15 @@ static void test_stacktrace_build_id(void)
 		previous_key = key;
 	} while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
 
-	CHECK(build_id_matches < 1, "build id match",
-	      "Didn't find expected build ID from the map\n");
+	if (CHECK(build_id_matches < 1, "build id match",
+		  "Didn't find expected build ID from the map\n"))
+		goto disable_pmu;
+
+	stack_trace_len = PERF_MAX_STACK_DEPTH
+		* sizeof(struct bpf_stack_build_id);
+	err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
+	CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
+	      "err %d errno %d\n", err, errno);
 
 disable_pmu:
 	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
@@ -1173,10 +1272,573 @@ out:
 	return;
 }
 
+static void test_stacktrace_build_id_nmi(void)
+{
+	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+	const char *file = "./test_stacktrace_build_id.o";
+	int err, pmu_fd, prog_fd;
+	struct perf_event_attr attr = {
+		.sample_freq = 5000,
+		.freq = 1,
+		.type = PERF_TYPE_HARDWARE,
+		.config = PERF_COUNT_HW_CPU_CYCLES,
+	};
+	__u32 key, previous_key, val, duration = 0;
+	struct bpf_object *obj;
+	char buf[256];
+	int i, j;
+	struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
+	int build_id_matches = 0;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd);
+	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+		return;
+
+	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+			 0 /* cpu 0 */, -1 /* group id */,
+			 0 /* flags */);
+	if (CHECK(pmu_fd < 0, "perf_event_open",
+		  "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n",
+		  pmu_fd, errno))
+		goto close_prog;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+		  err, errno))
+		goto close_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+		  err, errno))
+		goto disable_pmu;
+
+	/* find map fds */
+	control_map_fd = bpf_find_map(__func__, obj, "control_map");
+	if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+	if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+	if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
+		  err, errno))
+		goto disable_pmu;
+
+	stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
+	if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
+	       == 0);
+	assert(system("taskset 0x1 ./urandom_read 100000") == 0);
+	/* disable stack trace collection */
+	key = 0;
+	val = 1;
+	bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+	/* for every element in stackid_hmap, we can find a corresponding one
+	 * in stackmap, and vise versa.
+	 */
+	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+	if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = extract_build_id(buf, 256);
+
+	if (CHECK(err, "get build_id with readelf",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+	if (CHECK(err, "get_next_key from stackmap",
+		  "err %d, errno %d\n", err, errno))
+		goto disable_pmu;
+
+	do {
+		char build_id[64];
+
+		err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
+		if (CHECK(err, "lookup_elem from stackmap",
+			  "err %d, errno %d\n", err, errno))
+			goto disable_pmu;
+		for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
+			if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
+			    id_offs[i].offset != 0) {
+				for (j = 0; j < 20; ++j)
+					sprintf(build_id + 2 * j, "%02x",
+						id_offs[i].build_id[j] & 0xff);
+				if (strstr(buf, build_id) != NULL)
+					build_id_matches = 1;
+			}
+		previous_key = key;
+	} while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+
+	if (CHECK(build_id_matches < 1, "build id match",
+		  "Didn't find expected build ID from the map\n"))
+		goto disable_pmu;
+
+	/*
+	 * We intentionally skip compare_stack_ips(). This is because we
+	 * only support one in_nmi() ips-to-build_id translation per cpu
+	 * at any time, thus stack_amap here will always fallback to
+	 * BPF_STACK_BUILD_ID_IP;
+	 */
+
+disable_pmu:
+	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+
+close_pmu:
+	close(pmu_fd);
+
+close_prog:
+	bpf_object__close(obj);
+}
+
+#define MAX_CNT_RAWTP	10ull
+#define MAX_STACK_RAWTP	100
+struct get_stack_trace_t {
+	int pid;
+	int kern_stack_size;
+	int user_stack_size;
+	int user_stack_buildid_size;
+	__u64 kern_stack[MAX_STACK_RAWTP];
+	__u64 user_stack[MAX_STACK_RAWTP];
+	struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
+};
+
+static int get_stack_print_output(void *data, int size)
+{
+	bool good_kern_stack = false, good_user_stack = false;
+	const char *nonjit_func = "___bpf_prog_run";
+	struct get_stack_trace_t *e = data;
+	int i, num_stack;
+	static __u64 cnt;
+	struct ksym *ks;
+
+	cnt++;
+
+	if (size < sizeof(struct get_stack_trace_t)) {
+		__u64 *raw_data = data;
+		bool found = false;
+
+		num_stack = size / sizeof(__u64);
+		/* If jit is enabled, we do not have a good way to
+		 * verify the sanity of the kernel stack. So we
+		 * just assume it is good if the stack is not empty.
+		 * This could be improved in the future.
+		 */
+		if (jit_enabled) {
+			found = num_stack > 0;
+		} else {
+			for (i = 0; i < num_stack; i++) {
+				ks = ksym_search(raw_data[i]);
+				if (strcmp(ks->name, nonjit_func) == 0) {
+					found = true;
+					break;
+				}
+			}
+		}
+		if (found) {
+			good_kern_stack = true;
+			good_user_stack = true;
+		}
+	} else {
+		num_stack = e->kern_stack_size / sizeof(__u64);
+		if (jit_enabled) {
+			good_kern_stack = num_stack > 0;
+		} else {
+			for (i = 0; i < num_stack; i++) {
+				ks = ksym_search(e->kern_stack[i]);
+				if (strcmp(ks->name, nonjit_func) == 0) {
+					good_kern_stack = true;
+					break;
+				}
+			}
+		}
+		if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0)
+			good_user_stack = true;
+	}
+	if (!good_kern_stack || !good_user_stack)
+		return LIBBPF_PERF_EVENT_ERROR;
+
+	if (cnt == MAX_CNT_RAWTP)
+		return LIBBPF_PERF_EVENT_DONE;
+
+	return LIBBPF_PERF_EVENT_CONT;
+}
+
+static void test_get_stack_raw_tp(void)
+{
+	const char *file = "./test_get_stack_rawtp.o";
+	int i, efd, err, prog_fd, pmu_fd, perfmap_fd;
+	struct perf_event_attr attr = {};
+	struct timespec tv = {0, 10};
+	__u32 key = 0, duration = 0;
+	struct bpf_object *obj;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+	if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+		return;
+
+	efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
+	if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
+		goto close_prog;
+
+	perfmap_fd = bpf_find_map(__func__, obj, "perfmap");
+	if (CHECK(perfmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
+		  perfmap_fd, errno))
+		goto close_prog;
+
+	err = load_kallsyms();
+	if (CHECK(err < 0, "load_kallsyms", "err %d errno %d\n", err, errno))
+		goto close_prog;
+
+	attr.sample_type = PERF_SAMPLE_RAW;
+	attr.type = PERF_TYPE_SOFTWARE;
+	attr.config = PERF_COUNT_SW_BPF_OUTPUT;
+	pmu_fd = syscall(__NR_perf_event_open, &attr, getpid()/*pid*/, -1/*cpu*/,
+			 -1/*group_fd*/, 0);
+	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
+		  errno))
+		goto close_prog;
+
+	err = bpf_map_update_elem(perfmap_fd, &key, &pmu_fd, BPF_ANY);
+	if (CHECK(err < 0, "bpf_map_update_elem", "err %d errno %d\n", err,
+		  errno))
+		goto close_prog;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (CHECK(err < 0, "ioctl PERF_EVENT_IOC_ENABLE", "err %d errno %d\n",
+		  err, errno))
+		goto close_prog;
+
+	err = perf_event_mmap(pmu_fd);
+	if (CHECK(err < 0, "perf_event_mmap", "err %d errno %d\n", err, errno))
+		goto close_prog;
+
+	/* trigger some syscall action */
+	for (i = 0; i < MAX_CNT_RAWTP; i++)
+		nanosleep(&tv, NULL);
+
+	err = perf_event_poller(pmu_fd, get_stack_print_output);
+	if (CHECK(err < 0, "perf_event_poller", "err %d errno %d\n", err, errno))
+		goto close_prog;
+
+	goto close_prog_noerr;
+close_prog:
+	error_cnt++;
+close_prog_noerr:
+	bpf_object__close(obj);
+}
+
+static void test_task_fd_query_rawtp(void)
+{
+	const char *file = "./test_get_stack_rawtp.o";
+	__u64 probe_offset, probe_addr;
+	__u32 len, prog_id, fd_type;
+	struct bpf_object *obj;
+	int efd, err, prog_fd;
+	__u32 duration = 0;
+	char buf[256];
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+	if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+		return;
+
+	efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
+	if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
+		goto close_prog;
+
+	/* query (getpid(), efd) */
+	len = sizeof(buf);
+	err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
+				&fd_type, &probe_offset, &probe_addr);
+	if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err,
+		  errno))
+		goto close_prog;
+
+	err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+	      strcmp(buf, "sys_enter") == 0;
+	if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n",
+		  fd_type, buf))
+		goto close_prog;
+
+	/* test zero len */
+	len = 0;
+	err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
+				&fd_type, &probe_offset, &probe_addr);
+	if (CHECK(err < 0, "bpf_task_fd_query (len = 0)", "err %d errno %d\n",
+		  err, errno))
+		goto close_prog;
+	err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+	      len == strlen("sys_enter");
+	if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
+		goto close_prog;
+
+	/* test empty buffer */
+	len = sizeof(buf);
+	err = bpf_task_fd_query(getpid(), efd, 0, 0, &len, &prog_id,
+				&fd_type, &probe_offset, &probe_addr);
+	if (CHECK(err < 0, "bpf_task_fd_query (buf = 0)", "err %d errno %d\n",
+		  err, errno))
+		goto close_prog;
+	err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+	      len == strlen("sys_enter");
+	if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
+		goto close_prog;
+
+	/* test smaller buffer */
+	len = 3;
+	err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
+				&fd_type, &probe_offset, &probe_addr);
+	if (CHECK(err >= 0 || errno != ENOSPC, "bpf_task_fd_query (len = 3)",
+		  "err %d errno %d\n", err, errno))
+		goto close_prog;
+	err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+	      len == strlen("sys_enter") &&
+	      strcmp(buf, "sy") == 0;
+	if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
+		goto close_prog;
+
+	goto close_prog_noerr;
+close_prog:
+	error_cnt++;
+close_prog_noerr:
+	bpf_object__close(obj);
+}
+
+static void test_task_fd_query_tp_core(const char *probe_name,
+				       const char *tp_name)
+{
+	const char *file = "./test_tracepoint.o";
+	int err, bytes, efd, prog_fd, pmu_fd;
+	struct perf_event_attr attr = {};
+	__u64 probe_offset, probe_addr;
+	__u32 len, prog_id, fd_type;
+	struct bpf_object *obj;
+	__u32 duration = 0;
+	char buf[256];
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+	if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
+		goto close_prog;
+
+	snprintf(buf, sizeof(buf),
+		 "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+	efd = open(buf, O_RDONLY, 0);
+	if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+		goto close_prog;
+	bytes = read(efd, buf, sizeof(buf));
+	close(efd);
+	if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
+		  "bytes %d errno %d\n", bytes, errno))
+		goto close_prog;
+
+	attr.config = strtol(buf, NULL, 0);
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.sample_type = PERF_SAMPLE_RAW;
+	attr.sample_period = 1;
+	attr.wakeup_events = 1;
+	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+			 0 /* cpu 0 */, -1 /* group id */,
+			 0 /* flags */);
+	if (CHECK(err, "perf_event_open", "err %d errno %d\n", err, errno))
+		goto close_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
+		  errno))
+		goto close_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
+		  errno))
+		goto close_pmu;
+
+	/* query (getpid(), pmu_fd) */
+	len = sizeof(buf);
+	err = bpf_task_fd_query(getpid(), pmu_fd, 0, buf, &len, &prog_id,
+				&fd_type, &probe_offset, &probe_addr);
+	if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err,
+		  errno))
+		goto close_pmu;
+
+	err = (fd_type == BPF_FD_TYPE_TRACEPOINT) && !strcmp(buf, tp_name);
+	if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n",
+		  fd_type, buf))
+		goto close_pmu;
+
+	close(pmu_fd);
+	goto close_prog_noerr;
+
+close_pmu:
+	close(pmu_fd);
+close_prog:
+	error_cnt++;
+close_prog_noerr:
+	bpf_object__close(obj);
+}
+
+static void test_task_fd_query_tp(void)
+{
+	test_task_fd_query_tp_core("sched/sched_switch",
+				   "sched_switch");
+	test_task_fd_query_tp_core("syscalls/sys_enter_read",
+				   "sys_enter_read");
+}
+
+static void test_reference_tracking()
+{
+	const char *file = "./test_sk_lookup_kern.o";
+	struct bpf_object *obj;
+	struct bpf_program *prog;
+	__u32 duration;
+	int err = 0;
+
+	obj = bpf_object__open(file);
+	if (IS_ERR(obj)) {
+		error_cnt++;
+		return;
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		const char *title;
+
+		/* Ignore .text sections */
+		title = bpf_program__title(prog, false);
+		if (strstr(title, ".text") != NULL)
+			continue;
+
+		bpf_program__set_type(prog, BPF_PROG_TYPE_SCHED_CLS);
+
+		/* Expect verifier failure if test name has 'fail' */
+		if (strstr(title, "fail") != NULL) {
+			libbpf_set_print(NULL, NULL, NULL);
+			err = !bpf_program__load(prog, "GPL", 0);
+			libbpf_set_print(printf, printf, NULL);
+		} else {
+			err = bpf_program__load(prog, "GPL", 0);
+		}
+		CHECK(err, title, "\n");
+	}
+	bpf_object__close(obj);
+}
+
+enum {
+	QUEUE,
+	STACK,
+};
+
+static void test_queue_stack_map(int type)
+{
+	const int MAP_SIZE = 32;
+	__u32 vals[MAP_SIZE], duration, retval, size, val;
+	int i, err, prog_fd, map_in_fd, map_out_fd;
+	char file[32], buf[128];
+	struct bpf_object *obj;
+	struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
+
+	/* Fill test values to be used */
+	for (i = 0; i < MAP_SIZE; i++)
+		vals[i] = rand();
+
+	if (type == QUEUE)
+		strncpy(file, "./test_queue_map.o", sizeof(file));
+	else if (type == STACK)
+		strncpy(file, "./test_stack_map.o", sizeof(file));
+	else
+		return;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+	if (err) {
+		error_cnt++;
+		return;
+	}
+
+	map_in_fd = bpf_find_map(__func__, obj, "map_in");
+	if (map_in_fd < 0)
+		goto out;
+
+	map_out_fd = bpf_find_map(__func__, obj, "map_out");
+	if (map_out_fd < 0)
+		goto out;
+
+	/* Push 32 elements to the input map */
+	for (i = 0; i < MAP_SIZE; i++) {
+		err = bpf_map_update_elem(map_in_fd, NULL, &vals[i], 0);
+		if (err) {
+			error_cnt++;
+			goto out;
+		}
+	}
+
+	/* The eBPF program pushes iph.saddr in the output map,
+	 * pops the input map and saves this value in iph.daddr
+	 */
+	for (i = 0; i < MAP_SIZE; i++) {
+		if (type == QUEUE) {
+			val = vals[i];
+			pkt_v4.iph.saddr = vals[i] * 5;
+		} else if (type == STACK) {
+			val = vals[MAP_SIZE - 1 - i];
+			pkt_v4.iph.saddr = vals[MAP_SIZE - 1 - i] * 5;
+		}
+
+		err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+					buf, &size, &retval, &duration);
+		if (err || retval || size != sizeof(pkt_v4) ||
+		    iph->daddr != val)
+			break;
+	}
+
+	CHECK(err || retval || size != sizeof(pkt_v4) || iph->daddr != val,
+	      "bpf_map_pop_elem",
+	      "err %d errno %d retval %d size %d iph->daddr %u\n",
+	      err, errno, retval, size, iph->daddr);
+
+	/* Queue is empty, program should return TC_ACT_SHOT */
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+				buf, &size, &retval, &duration);
+	CHECK(err || retval != 2 /* TC_ACT_SHOT */|| size != sizeof(pkt_v4),
+	      "check-queue-stack-map-empty",
+	      "err %d errno %d retval %d size %d\n",
+	      err, errno, retval, size);
+
+	/* Check that the program pushed elements correctly */
+	for (i = 0; i < MAP_SIZE; i++) {
+		err = bpf_map_lookup_and_delete_elem(map_out_fd, NULL, &val);
+		if (err || val != vals[i] * 5)
+			break;
+	}
+
+	CHECK(i != MAP_SIZE && (err || val != vals[i] * 5),
+	      "bpf_map_push_elem", "err %d value %u\n", err, val);
+
+out:
+	pkt_v4.iph.saddr = 0;
+	bpf_object__close(obj);
+}
+
 int main(void)
 {
+	srand(time(NULL));
+
+	jit_enabled = is_jit_enabled();
+
 	test_pkt_access();
 	test_xdp();
+	test_xdp_adjust_tail();
 	test_l4lb_all();
 	test_xdp_noinline();
 	test_tcp_estats();
@@ -1186,7 +1848,14 @@ int main(void)
 	test_tp_attach_query();
 	test_stacktrace_map();
 	test_stacktrace_build_id();
+	test_stacktrace_build_id_nmi();
 	test_stacktrace_map_raw_tp();
+	test_get_stack_raw_tp();
+	test_task_fd_query_rawtp();
+	test_task_fd_query_tp();
+	test_reference_tracking();
+	test_queue_stack_map(QUEUE);
+	test_queue_stack_map(STACK);
 
 	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
 	return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_queue_map.c b/tools/testing/selftests/bpf/test_queue_map.c
new file mode 100644
index 000000000000..87db1f9da33d
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_queue_map.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Politecnico di Torino
+#define MAP_TYPE BPF_MAP_TYPE_QUEUE
+#include "test_queue_stack_map.h"
diff --git a/tools/testing/selftests/bpf/test_queue_stack_map.h b/tools/testing/selftests/bpf/test_queue_stack_map.h
new file mode 100644
index 000000000000..295b9b3bc5c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_queue_stack_map.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2018 Politecnico di Torino
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/pkt_cls.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) map_in = {
+	.type = MAP_TYPE,
+	.key_size = 0,
+	.value_size = sizeof(__u32),
+	.max_entries = 32,
+	.map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) map_out = {
+	.type = MAP_TYPE,
+	.key_size = 0,
+	.value_size = sizeof(__u32),
+	.max_entries = 32,
+	.map_flags = 0,
+};
+
+SEC("test")
+int _test(struct __sk_buff *skb)
+{
+	void *data_end = (void *)(long)skb->data_end;
+	void *data = (void *)(long)skb->data;
+	struct ethhdr *eth = (struct ethhdr *)(data);
+	__u32 value;
+	int err;
+
+	if (eth + 1 > data_end)
+		return TC_ACT_SHOT;
+
+	struct iphdr *iph = (struct iphdr *)(eth + 1);
+
+	if (iph + 1 > data_end)
+		return TC_ACT_SHOT;
+
+	err = bpf_map_pop_elem(&map_in, &value);
+	if (err)
+		return TC_ACT_SHOT;
+
+	iph->daddr = value;
+
+	err = bpf_map_push_elem(&map_out, &iph->saddr, 0);
+	if (err)
+		return TC_ACT_SHOT;
+
+	return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_section_names.c b/tools/testing/selftests/bpf/test_section_names.c
new file mode 100644
index 000000000000..7c4f41572b1c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_section_names.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <err.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+
+struct sec_name_test {
+	const char sec_name[32];
+	struct {
+		int rc;
+		enum bpf_prog_type prog_type;
+		enum bpf_attach_type expected_attach_type;
+	} expected_load;
+	struct {
+		int rc;
+		enum bpf_attach_type attach_type;
+	} expected_attach;
+};
+
+static struct sec_name_test tests[] = {
+	{"InvAliD", {-EINVAL, 0, 0}, {-EINVAL, 0} },
+	{"cgroup", {-EINVAL, 0, 0}, {-EINVAL, 0} },
+	{"socket", {0, BPF_PROG_TYPE_SOCKET_FILTER, 0}, {-EINVAL, 0} },
+	{"kprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} },
+	{"kretprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} },
+	{"classifier", {0, BPF_PROG_TYPE_SCHED_CLS, 0}, {-EINVAL, 0} },
+	{"action", {0, BPF_PROG_TYPE_SCHED_ACT, 0}, {-EINVAL, 0} },
+	{"tracepoint/", {0, BPF_PROG_TYPE_TRACEPOINT, 0}, {-EINVAL, 0} },
+	{
+		"raw_tracepoint/",
+		{0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0},
+		{-EINVAL, 0},
+	},
+	{"xdp", {0, BPF_PROG_TYPE_XDP, 0}, {-EINVAL, 0} },
+	{"perf_event", {0, BPF_PROG_TYPE_PERF_EVENT, 0}, {-EINVAL, 0} },
+	{"lwt_in", {0, BPF_PROG_TYPE_LWT_IN, 0}, {-EINVAL, 0} },
+	{"lwt_out", {0, BPF_PROG_TYPE_LWT_OUT, 0}, {-EINVAL, 0} },
+	{"lwt_xmit", {0, BPF_PROG_TYPE_LWT_XMIT, 0}, {-EINVAL, 0} },
+	{"lwt_seg6local", {0, BPF_PROG_TYPE_LWT_SEG6LOCAL, 0}, {-EINVAL, 0} },
+	{
+		"cgroup_skb/ingress",
+		{0, BPF_PROG_TYPE_CGROUP_SKB, 0},
+		{0, BPF_CGROUP_INET_INGRESS},
+	},
+	{
+		"cgroup_skb/egress",
+		{0, BPF_PROG_TYPE_CGROUP_SKB, 0},
+		{0, BPF_CGROUP_INET_EGRESS},
+	},
+	{"cgroup/skb", {0, BPF_PROG_TYPE_CGROUP_SKB, 0}, {-EINVAL, 0} },
+	{
+		"cgroup/sock",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK, 0},
+		{0, BPF_CGROUP_INET_SOCK_CREATE},
+	},
+	{
+		"cgroup/post_bind4",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND},
+		{0, BPF_CGROUP_INET4_POST_BIND},
+	},
+	{
+		"cgroup/post_bind6",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND},
+		{0, BPF_CGROUP_INET6_POST_BIND},
+	},
+	{
+		"cgroup/dev",
+		{0, BPF_PROG_TYPE_CGROUP_DEVICE, 0},
+		{0, BPF_CGROUP_DEVICE},
+	},
+	{"sockops", {0, BPF_PROG_TYPE_SOCK_OPS, 0}, {0, BPF_CGROUP_SOCK_OPS} },
+	{
+		"sk_skb/stream_parser",
+		{0, BPF_PROG_TYPE_SK_SKB, 0},
+		{0, BPF_SK_SKB_STREAM_PARSER},
+	},
+	{
+		"sk_skb/stream_verdict",
+		{0, BPF_PROG_TYPE_SK_SKB, 0},
+		{0, BPF_SK_SKB_STREAM_VERDICT},
+	},
+	{"sk_skb", {0, BPF_PROG_TYPE_SK_SKB, 0}, {-EINVAL, 0} },
+	{"sk_msg", {0, BPF_PROG_TYPE_SK_MSG, 0}, {0, BPF_SK_MSG_VERDICT} },
+	{"lirc_mode2", {0, BPF_PROG_TYPE_LIRC_MODE2, 0}, {0, BPF_LIRC_MODE2} },
+	{
+		"flow_dissector",
+		{0, BPF_PROG_TYPE_FLOW_DISSECTOR, 0},
+		{0, BPF_FLOW_DISSECTOR},
+	},
+	{
+		"cgroup/bind4",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND},
+		{0, BPF_CGROUP_INET4_BIND},
+	},
+	{
+		"cgroup/bind6",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND},
+		{0, BPF_CGROUP_INET6_BIND},
+	},
+	{
+		"cgroup/connect4",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT},
+		{0, BPF_CGROUP_INET4_CONNECT},
+	},
+	{
+		"cgroup/connect6",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT},
+		{0, BPF_CGROUP_INET6_CONNECT},
+	},
+	{
+		"cgroup/sendmsg4",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG},
+		{0, BPF_CGROUP_UDP4_SENDMSG},
+	},
+	{
+		"cgroup/sendmsg6",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG},
+		{0, BPF_CGROUP_UDP6_SENDMSG},
+	},
+};
+
+static int test_prog_type_by_name(const struct sec_name_test *test)
+{
+	enum bpf_attach_type expected_attach_type;
+	enum bpf_prog_type prog_type;
+	int rc;
+
+	rc = libbpf_prog_type_by_name(test->sec_name, &prog_type,
+				      &expected_attach_type);
+
+	if (rc != test->expected_load.rc) {
+		warnx("prog: unexpected rc=%d for %s", rc, test->sec_name);
+		return -1;
+	}
+
+	if (rc)
+		return 0;
+
+	if (prog_type != test->expected_load.prog_type) {
+		warnx("prog: unexpected prog_type=%d for %s", prog_type,
+		      test->sec_name);
+		return -1;
+	}
+
+	if (expected_attach_type != test->expected_load.expected_attach_type) {
+		warnx("prog: unexpected expected_attach_type=%d for %s",
+		      expected_attach_type, test->sec_name);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int test_attach_type_by_name(const struct sec_name_test *test)
+{
+	enum bpf_attach_type attach_type;
+	int rc;
+
+	rc = libbpf_attach_type_by_name(test->sec_name, &attach_type);
+
+	if (rc != test->expected_attach.rc) {
+		warnx("attach: unexpected rc=%d for %s", rc, test->sec_name);
+		return -1;
+	}
+
+	if (rc)
+		return 0;
+
+	if (attach_type != test->expected_attach.attach_type) {
+		warnx("attach: unexpected attach_type=%d for %s", attach_type,
+		      test->sec_name);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int run_test_case(const struct sec_name_test *test)
+{
+	if (test_prog_type_by_name(test))
+		return -1;
+	if (test_attach_type_by_name(test))
+		return -1;
+	return 0;
+}
+
+static int run_tests(void)
+{
+	int passes = 0;
+	int fails = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+		if (run_test_case(&tests[i]))
+			++fails;
+		else
+			++passes;
+	}
+	printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
+	return fails ? -1 : 0;
+}
+
+int main(int argc, char **argv)
+{
+	return run_tests();
+}
diff --git a/tools/testing/selftests/bpf/test_select_reuseport.c b/tools/testing/selftests/bpf/test_select_reuseport.c
new file mode 100644
index 000000000000..75646d9b34aa
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_select_reuseport.c
@@ -0,0 +1,688 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <linux/err.h>
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <sys/types.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "test_select_reuseport_common.h"
+
+#define MIN_TCPHDR_LEN 20
+#define UDPHDR_LEN 8
+
+#define TCP_SYNCOOKIE_SYSCTL "/proc/sys/net/ipv4/tcp_syncookies"
+#define TCP_FO_SYSCTL "/proc/sys/net/ipv4/tcp_fastopen"
+#define REUSEPORT_ARRAY_SIZE 32
+
+static int result_map, tmp_index_ovr_map, linum_map, data_check_map;
+static enum result expected_results[NR_RESULTS];
+static int sk_fds[REUSEPORT_ARRAY_SIZE];
+static int reuseport_array, outer_map;
+static int select_by_skb_data_prog;
+static int saved_tcp_syncookie;
+static struct bpf_object *obj;
+static int saved_tcp_fo;
+static __u32 index_zero;
+static int epfd;
+
+static union sa46 {
+	struct sockaddr_in6 v6;
+	struct sockaddr_in v4;
+	sa_family_t family;
+} srv_sa;
+
+#define CHECK(condition, tag, format...) ({				\
+	int __ret = !!(condition);					\
+	if (__ret) {							\
+		printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag);	\
+		printf(format);						\
+		exit(-1);						\
+	}								\
+})
+
+static void create_maps(void)
+{
+	struct bpf_create_map_attr attr = {};
+
+	/* Creating reuseport_array */
+	attr.name = "reuseport_array";
+	attr.map_type = BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
+	attr.key_size = sizeof(__u32);
+	attr.value_size = sizeof(__u32);
+	attr.max_entries = REUSEPORT_ARRAY_SIZE;
+
+	reuseport_array = bpf_create_map_xattr(&attr);
+	CHECK(reuseport_array == -1, "creating reuseport_array",
+	      "reuseport_array:%d errno:%d\n", reuseport_array, errno);
+
+	/* Creating outer_map */
+	attr.name = "outer_map";
+	attr.map_type = BPF_MAP_TYPE_ARRAY_OF_MAPS;
+	attr.key_size = sizeof(__u32);
+	attr.value_size = sizeof(__u32);
+	attr.max_entries = 1;
+	attr.inner_map_fd = reuseport_array;
+	outer_map = bpf_create_map_xattr(&attr);
+	CHECK(outer_map == -1, "creating outer_map",
+	      "outer_map:%d errno:%d\n", outer_map, errno);
+}
+
+static void prepare_bpf_obj(void)
+{
+	struct bpf_program *prog;
+	struct bpf_map *map;
+	int err;
+	struct bpf_object_open_attr attr = {
+		.file = "test_select_reuseport_kern.o",
+		.prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
+	};
+
+	obj = bpf_object__open_xattr(&attr);
+	CHECK(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
+	      "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
+
+	prog = bpf_program__next(NULL, obj);
+	CHECK(!prog, "get first bpf_program", "!prog\n");
+	bpf_program__set_type(prog, attr.prog_type);
+
+	map = bpf_object__find_map_by_name(obj, "outer_map");
+	CHECK(!map, "find outer_map", "!map\n");
+	err = bpf_map__reuse_fd(map, outer_map);
+	CHECK(err, "reuse outer_map", "err:%d\n", err);
+
+	err = bpf_object__load(obj);
+	CHECK(err, "load bpf_object", "err:%d\n", err);
+
+	select_by_skb_data_prog = bpf_program__fd(prog);
+	CHECK(select_by_skb_data_prog == -1, "get prog fd",
+	      "select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
+
+	map = bpf_object__find_map_by_name(obj, "result_map");
+	CHECK(!map, "find result_map", "!map\n");
+	result_map = bpf_map__fd(map);
+	CHECK(result_map == -1, "get result_map fd",
+	      "result_map:%d\n", result_map);
+
+	map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
+	CHECK(!map, "find tmp_index_ovr_map", "!map\n");
+	tmp_index_ovr_map = bpf_map__fd(map);
+	CHECK(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
+	      "tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
+
+	map = bpf_object__find_map_by_name(obj, "linum_map");
+	CHECK(!map, "find linum_map", "!map\n");
+	linum_map = bpf_map__fd(map);
+	CHECK(linum_map == -1, "get linum_map fd",
+	      "linum_map:%d\n", linum_map);
+
+	map = bpf_object__find_map_by_name(obj, "data_check_map");
+	CHECK(!map, "find data_check_map", "!map\n");
+	data_check_map = bpf_map__fd(map);
+	CHECK(data_check_map == -1, "get data_check_map fd",
+	      "data_check_map:%d\n", data_check_map);
+}
+
+static void sa46_init_loopback(union sa46 *sa, sa_family_t family)
+{
+	memset(sa, 0, sizeof(*sa));
+	sa->family = family;
+	if (sa->family == AF_INET6)
+		sa->v6.sin6_addr = in6addr_loopback;
+	else
+		sa->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+}
+
+static void sa46_init_inany(union sa46 *sa, sa_family_t family)
+{
+	memset(sa, 0, sizeof(*sa));
+	sa->family = family;
+	if (sa->family == AF_INET6)
+		sa->v6.sin6_addr = in6addr_any;
+	else
+		sa->v4.sin_addr.s_addr = INADDR_ANY;
+}
+
+static int read_int_sysctl(const char *sysctl)
+{
+	char buf[16];
+	int fd, ret;
+
+	fd = open(sysctl, 0);
+	CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n",
+	      sysctl, fd, errno);
+
+	ret = read(fd, buf, sizeof(buf));
+	CHECK(ret <= 0, "read(sysctl)", "sysctl:%s ret:%d errno:%d\n",
+	      sysctl, ret, errno);
+	close(fd);
+
+	return atoi(buf);
+}
+
+static void write_int_sysctl(const char *sysctl, int v)
+{
+	int fd, ret, size;
+	char buf[16];
+
+	fd = open(sysctl, O_RDWR);
+	CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n",
+	      sysctl, fd, errno);
+
+	size = snprintf(buf, sizeof(buf), "%d", v);
+	ret = write(fd, buf, size);
+	CHECK(ret != size, "write(sysctl)",
+	      "sysctl:%s ret:%d size:%d errno:%d\n", sysctl, ret, size, errno);
+	close(fd);
+}
+
+static void restore_sysctls(void)
+{
+	write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo);
+	write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie);
+}
+
+static void enable_fastopen(void)
+{
+	int fo;
+
+	fo = read_int_sysctl(TCP_FO_SYSCTL);
+	write_int_sysctl(TCP_FO_SYSCTL, fo | 7);
+}
+
+static void enable_syncookie(void)
+{
+	write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2);
+}
+
+static void disable_syncookie(void)
+{
+	write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0);
+}
+
+static __u32 get_linum(void)
+{
+	__u32 linum;
+	int err;
+
+	err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
+	CHECK(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
+	      err, errno);
+
+	return linum;
+}
+
+static void check_data(int type, sa_family_t family, const struct cmd *cmd,
+		       int cli_fd)
+{
+	struct data_check expected = {}, result;
+	union sa46 cli_sa;
+	socklen_t addrlen;
+	int err;
+
+	addrlen = sizeof(cli_sa);
+	err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
+			  &addrlen);
+	CHECK(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
+	      err, errno);
+
+	err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
+	CHECK(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
+	      err, errno);
+
+	if (type == SOCK_STREAM) {
+		expected.len = MIN_TCPHDR_LEN;
+		expected.ip_protocol = IPPROTO_TCP;
+	} else {
+		expected.len = UDPHDR_LEN;
+		expected.ip_protocol = IPPROTO_UDP;
+	}
+
+	if (family == AF_INET6) {
+		expected.eth_protocol = htons(ETH_P_IPV6);
+		expected.bind_inany = !srv_sa.v6.sin6_addr.s6_addr32[3] &&
+			!srv_sa.v6.sin6_addr.s6_addr32[2] &&
+			!srv_sa.v6.sin6_addr.s6_addr32[1] &&
+			!srv_sa.v6.sin6_addr.s6_addr32[0];
+
+		memcpy(&expected.skb_addrs[0], cli_sa.v6.sin6_addr.s6_addr32,
+		       sizeof(cli_sa.v6.sin6_addr));
+		memcpy(&expected.skb_addrs[4], &in6addr_loopback,
+		       sizeof(in6addr_loopback));
+		expected.skb_ports[0] = cli_sa.v6.sin6_port;
+		expected.skb_ports[1] = srv_sa.v6.sin6_port;
+	} else {
+		expected.eth_protocol = htons(ETH_P_IP);
+		expected.bind_inany = !srv_sa.v4.sin_addr.s_addr;
+
+		expected.skb_addrs[0] = cli_sa.v4.sin_addr.s_addr;
+		expected.skb_addrs[1] = htonl(INADDR_LOOPBACK);
+		expected.skb_ports[0] = cli_sa.v4.sin_port;
+		expected.skb_ports[1] = srv_sa.v4.sin_port;
+	}
+
+	if (memcmp(&result, &expected, offsetof(struct data_check,
+						equal_check_end))) {
+		printf("unexpected data_check\n");
+		printf("  result: (0x%x, %u, %u)\n",
+		       result.eth_protocol, result.ip_protocol,
+		       result.bind_inany);
+		printf("expected: (0x%x, %u, %u)\n",
+		       expected.eth_protocol, expected.ip_protocol,
+		       expected.bind_inany);
+		CHECK(1, "data_check result != expected",
+		      "bpf_prog_linum:%u\n", get_linum());
+	}
+
+	CHECK(!result.hash, "data_check result.hash empty",
+	      "result.hash:%u", result.hash);
+
+	expected.len += cmd ? sizeof(*cmd) : 0;
+	if (type == SOCK_STREAM)
+		CHECK(expected.len > result.len, "expected.len > result.len",
+		      "expected.len:%u result.len:%u bpf_prog_linum:%u\n",
+		      expected.len, result.len, get_linum());
+	else
+		CHECK(expected.len != result.len, "expected.len != result.len",
+		      "expected.len:%u result.len:%u bpf_prog_linum:%u\n",
+		      expected.len, result.len, get_linum());
+}
+
+static void check_results(void)
+{
+	__u32 results[NR_RESULTS];
+	__u32 i, broken = 0;
+	int err;
+
+	for (i = 0; i < NR_RESULTS; i++) {
+		err = bpf_map_lookup_elem(result_map, &i, &results[i]);
+		CHECK(err == -1, "lookup_elem(result_map)",
+		      "i:%u err:%d errno:%d\n", i, err, errno);
+	}
+
+	for (i = 0; i < NR_RESULTS; i++) {
+		if (results[i] != expected_results[i]) {
+			broken = i;
+			break;
+		}
+	}
+
+	if (i == NR_RESULTS)
+		return;
+
+	printf("unexpected result\n");
+	printf(" result: [");
+	printf("%u", results[0]);
+	for (i = 1; i < NR_RESULTS; i++)
+		printf(", %u", results[i]);
+	printf("]\n");
+
+	printf("expected: [");
+	printf("%u", expected_results[0]);
+	for (i = 1; i < NR_RESULTS; i++)
+		printf(", %u", expected_results[i]);
+	printf("]\n");
+
+	CHECK(expected_results[broken] != results[broken],
+	      "unexpected result",
+	      "expected_results[%u] != results[%u] bpf_prog_linum:%u\n",
+	      broken, broken, get_linum());
+}
+
+static int send_data(int type, sa_family_t family, void *data, size_t len,
+		     enum result expected)
+{
+	union sa46 cli_sa;
+	int fd, err;
+
+	fd = socket(family, type, 0);
+	CHECK(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
+
+	sa46_init_loopback(&cli_sa, family);
+	err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa));
+	CHECK(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
+
+	err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa,
+		     sizeof(srv_sa));
+	CHECK(err != len && expected >= PASS,
+	      "sendto()", "family:%u err:%d errno:%d expected:%d\n",
+	      family, err, errno, expected);
+
+	return fd;
+}
+
+static void do_test(int type, sa_family_t family, struct cmd *cmd,
+		    enum result expected)
+{
+	int nev, srv_fd, cli_fd;
+	struct epoll_event ev;
+	struct cmd rcv_cmd;
+	ssize_t nread;
+
+	cli_fd = send_data(type, family, cmd, cmd ? sizeof(*cmd) : 0,
+			   expected);
+	nev = epoll_wait(epfd, &ev, 1, expected >= PASS ? 5 : 0);
+	CHECK((nev <= 0 && expected >= PASS) ||
+	      (nev > 0 && expected < PASS),
+	      "nev <> expected",
+	      "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n",
+	      nev, expected, type, family,
+	      cmd ? cmd->reuseport_index : -1,
+	      cmd ? cmd->pass_on_failure : -1);
+	check_results();
+	check_data(type, family, cmd, cli_fd);
+
+	if (expected < PASS)
+		return;
+
+	CHECK(expected != PASS_ERR_SK_SELECT_REUSEPORT &&
+	      cmd->reuseport_index != ev.data.u32,
+	      "check cmd->reuseport_index",
+	      "cmd:(%u, %u) ev.data.u32:%u\n",
+	      cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32);
+
+	srv_fd = sk_fds[ev.data.u32];
+	if (type == SOCK_STREAM) {
+		int new_fd = accept(srv_fd, NULL, 0);
+
+		CHECK(new_fd == -1, "accept(srv_fd)",
+		      "ev.data.u32:%u new_fd:%d errno:%d\n",
+		      ev.data.u32, new_fd, errno);
+
+		nread = recv(new_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
+		CHECK(nread != sizeof(rcv_cmd),
+		      "recv(new_fd)",
+		      "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
+		      ev.data.u32, nread, sizeof(rcv_cmd), errno);
+
+		close(new_fd);
+	} else {
+		nread = recv(srv_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
+		CHECK(nread != sizeof(rcv_cmd),
+		      "recv(sk_fds)",
+		      "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
+		      ev.data.u32, nread, sizeof(rcv_cmd), errno);
+	}
+
+	close(cli_fd);
+}
+
+static void test_err_inner_map(int type, sa_family_t family)
+{
+	struct cmd cmd = {
+		.reuseport_index = 0,
+		.pass_on_failure = 0,
+	};
+
+	printf("%s: ", __func__);
+	expected_results[DROP_ERR_INNER_MAP]++;
+	do_test(type, family, &cmd, DROP_ERR_INNER_MAP);
+	printf("OK\n");
+}
+
+static void test_err_skb_data(int type, sa_family_t family)
+{
+	printf("%s: ", __func__);
+	expected_results[DROP_ERR_SKB_DATA]++;
+	do_test(type, family, NULL, DROP_ERR_SKB_DATA);
+	printf("OK\n");
+}
+
+static void test_err_sk_select_port(int type, sa_family_t family)
+{
+	struct cmd cmd = {
+		.reuseport_index = REUSEPORT_ARRAY_SIZE,
+		.pass_on_failure = 0,
+	};
+
+	printf("%s: ", __func__);
+	expected_results[DROP_ERR_SK_SELECT_REUSEPORT]++;
+	do_test(type, family, &cmd, DROP_ERR_SK_SELECT_REUSEPORT);
+	printf("OK\n");
+}
+
+static void test_pass(int type, sa_family_t family)
+{
+	struct cmd cmd;
+	int i;
+
+	printf("%s: ", __func__);
+	cmd.pass_on_failure = 0;
+	for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
+		expected_results[PASS]++;
+		cmd.reuseport_index = i;
+		do_test(type, family, &cmd, PASS);
+	}
+	printf("OK\n");
+}
+
+static void test_syncookie(int type, sa_family_t family)
+{
+	int err, tmp_index = 1;
+	struct cmd cmd = {
+		.reuseport_index = 0,
+		.pass_on_failure = 0,
+	};
+
+	if (type != SOCK_STREAM)
+		return;
+
+	printf("%s: ", __func__);
+	/*
+	 * +1 for TCP-SYN and
+	 * +1 for the TCP-ACK (ack the syncookie)
+	 */
+	expected_results[PASS] += 2;
+	enable_syncookie();
+	/*
+	 * Simulate TCP-SYN and TCP-ACK are handled by two different sk:
+	 * TCP-SYN: select sk_fds[tmp_index = 1] tmp_index is from the
+	 *          tmp_index_ovr_map
+	 * TCP-ACK: select sk_fds[reuseport_index = 0] reuseport_index
+	 *          is from the cmd.reuseport_index
+	 */
+	err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
+				  &tmp_index, BPF_ANY);
+	CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
+	      "err:%d errno:%d\n", err, errno);
+	do_test(type, family, &cmd, PASS);
+	err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
+				  &tmp_index);
+	CHECK(err == -1 || tmp_index != -1,
+	      "lookup_elem(tmp_index_ovr_map)",
+	      "err:%d errno:%d tmp_index:%d\n",
+	      err, errno, tmp_index);
+	disable_syncookie();
+	printf("OK\n");
+}
+
+static void test_pass_on_err(int type, sa_family_t family)
+{
+	struct cmd cmd = {
+		.reuseport_index = REUSEPORT_ARRAY_SIZE,
+		.pass_on_failure = 1,
+	};
+
+	printf("%s: ", __func__);
+	expected_results[PASS_ERR_SK_SELECT_REUSEPORT] += 1;
+	do_test(type, family, &cmd, PASS_ERR_SK_SELECT_REUSEPORT);
+	printf("OK\n");
+}
+
+static void prepare_sk_fds(int type, sa_family_t family, bool inany)
+{
+	const int first = REUSEPORT_ARRAY_SIZE - 1;
+	int i, err, optval = 1;
+	struct epoll_event ev;
+	socklen_t addrlen;
+
+	if (inany)
+		sa46_init_inany(&srv_sa, family);
+	else
+		sa46_init_loopback(&srv_sa, family);
+	addrlen = sizeof(srv_sa);
+
+	/*
+	 * The sk_fds[] is filled from the back such that the order
+	 * is exactly opposite to the (struct sock_reuseport *)reuse->socks[].
+	 */
+	for (i = first; i >= 0; i--) {
+		sk_fds[i] = socket(family, type, 0);
+		CHECK(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n",
+		      i, sk_fds[i], errno);
+		err = setsockopt(sk_fds[i], SOL_SOCKET, SO_REUSEPORT,
+				 &optval, sizeof(optval));
+		CHECK(err == -1, "setsockopt(SO_REUSEPORT)",
+		      "sk_fds[%d] err:%d errno:%d\n",
+		      i, err, errno);
+
+		if (i == first) {
+			err = setsockopt(sk_fds[i], SOL_SOCKET,
+					 SO_ATTACH_REUSEPORT_EBPF,
+					 &select_by_skb_data_prog,
+					 sizeof(select_by_skb_data_prog));
+			CHECK(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
+			      "err:%d errno:%d\n", err, errno);
+		}
+
+		err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
+		CHECK(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
+		      i, err, errno);
+
+		if (type == SOCK_STREAM) {
+			err = listen(sk_fds[i], 10);
+			CHECK(err == -1, "listen()",
+			      "sk_fds[%d] err:%d errno:%d\n",
+			      i, err, errno);
+		}
+
+		err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
+					  BPF_NOEXIST);
+		CHECK(err == -1, "update_elem(reuseport_array)",
+		      "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
+
+		if (i == first) {
+			socklen_t addrlen = sizeof(srv_sa);
+
+			err = getsockname(sk_fds[i], (struct sockaddr *)&srv_sa,
+					  &addrlen);
+			CHECK(err == -1, "getsockname()",
+			      "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
+		}
+	}
+
+	epfd = epoll_create(1);
+	CHECK(epfd == -1, "epoll_create(1)",
+	      "epfd:%d errno:%d\n", epfd, errno);
+
+	ev.events = EPOLLIN;
+	for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
+		ev.data.u32 = i;
+		err = epoll_ctl(epfd, EPOLL_CTL_ADD, sk_fds[i], &ev);
+		CHECK(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i);
+	}
+}
+
+static void setup_per_test(int type, unsigned short family, bool inany)
+{
+	int ovr = -1, err;
+
+	prepare_sk_fds(type, family, inany);
+	err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
+				  BPF_ANY);
+	CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
+	      "err:%d errno:%d\n", err, errno);
+}
+
+static void cleanup_per_test(void)
+{
+	int i, err;
+
+	for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++)
+		close(sk_fds[i]);
+	close(epfd);
+
+	err = bpf_map_delete_elem(outer_map, &index_zero);
+	CHECK(err == -1, "delete_elem(outer_map)",
+	      "err:%d errno:%d\n", err, errno);
+}
+
+static void cleanup(void)
+{
+	close(outer_map);
+	close(reuseport_array);
+	bpf_object__close(obj);
+}
+
+static void test_all(void)
+{
+	/* Extra SOCK_STREAM to test bind_inany==true */
+	const int types[] = { SOCK_STREAM, SOCK_DGRAM, SOCK_STREAM };
+	const char * const type_strings[] = { "TCP", "UDP", "TCP" };
+	const char * const family_strings[] = { "IPv6", "IPv4" };
+	const unsigned short families[] = { AF_INET6, AF_INET };
+	const bool bind_inany[] = { false, false, true };
+	int t, f, err;
+
+	for (f = 0; f < ARRAY_SIZE(families); f++) {
+		unsigned short family = families[f];
+
+		for (t = 0; t < ARRAY_SIZE(types); t++) {
+			bool inany = bind_inany[t];
+			int type = types[t];
+
+			printf("######## %s/%s %s ########\n",
+			       family_strings[f], type_strings[t],
+				inany ? " INANY  " : "LOOPBACK");
+
+			setup_per_test(type, family, inany);
+
+			test_err_inner_map(type, family);
+
+			/* Install reuseport_array to the outer_map */
+			err = bpf_map_update_elem(outer_map, &index_zero,
+						  &reuseport_array, BPF_ANY);
+			CHECK(err == -1, "update_elem(outer_map)",
+			      "err:%d errno:%d\n", err, errno);
+
+			test_err_skb_data(type, family);
+			test_err_sk_select_port(type, family);
+			test_pass(type, family);
+			test_syncookie(type, family);
+			test_pass_on_err(type, family);
+
+			cleanup_per_test();
+			printf("\n");
+		}
+	}
+}
+
+int main(int argc, const char **argv)
+{
+	create_maps();
+	prepare_bpf_obj();
+	saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
+	saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL);
+	enable_fastopen();
+	disable_syncookie();
+	atexit(restore_sysctls);
+
+	test_all();
+
+	cleanup();
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_select_reuseport_common.h b/tools/testing/selftests/bpf/test_select_reuseport_common.h
new file mode 100644
index 000000000000..08eb2a9f145f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_select_reuseport_common.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+
+#ifndef __TEST_SELECT_REUSEPORT_COMMON_H
+#define __TEST_SELECT_REUSEPORT_COMMON_H
+
+#include <linux/types.h>
+
+enum result {
+	DROP_ERR_INNER_MAP,
+	DROP_ERR_SKB_DATA,
+	DROP_ERR_SK_SELECT_REUSEPORT,
+	DROP_MISC,
+	PASS,
+	PASS_ERR_SK_SELECT_REUSEPORT,
+	NR_RESULTS,
+};
+
+struct cmd {
+	__u32 reuseport_index;
+	__u32 pass_on_failure;
+};
+
+struct data_check {
+	__u32 ip_protocol;
+	__u32 skb_addrs[8];
+	__u16 skb_ports[2];
+	__u16 eth_protocol;
+	__u8  bind_inany;
+	__u8  equal_check_end[0];
+
+	__u32 len;
+	__u32 hash;
+};
+
+#endif
diff --git a/tools/testing/selftests/bpf/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/test_select_reuseport_kern.c
new file mode 100644
index 000000000000..5b54ec637ada
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_select_reuseport_kern.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook */
+
+#include <stdlib.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#include "bpf_endian.h"
+#include "bpf_helpers.h"
+#include "test_select_reuseport_common.h"
+
+int _version SEC("version") = 1;
+
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+struct bpf_map_def SEC("maps") outer_map = {
+	.type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") result_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = NR_RESULTS,
+};
+
+struct bpf_map_def SEC("maps") tmp_index_ovr_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(int),
+	.max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") linum_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") data_check_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct data_check),
+	.max_entries = 1,
+};
+
+#define GOTO_DONE(_result) ({			\
+	result = (_result);			\
+	linum = __LINE__;			\
+	goto done;				\
+})
+
+SEC("select_by_skb_data")
+int _select_by_skb_data(struct sk_reuseport_md *reuse_md)
+{
+	__u32 linum, index = 0, flags = 0, index_zero = 0;
+	__u32 *result_cnt, *linum_value;
+	struct data_check data_check = {};
+	struct cmd *cmd, cmd_copy;
+	void *data, *data_end;
+	void *reuseport_array;
+	enum result result;
+	int *index_ovr;
+	int err;
+
+	data = reuse_md->data;
+	data_end = reuse_md->data_end;
+	data_check.len = reuse_md->len;
+	data_check.eth_protocol = reuse_md->eth_protocol;
+	data_check.ip_protocol = reuse_md->ip_protocol;
+	data_check.hash = reuse_md->hash;
+	data_check.bind_inany = reuse_md->bind_inany;
+	if (data_check.eth_protocol == bpf_htons(ETH_P_IP)) {
+		if (bpf_skb_load_bytes_relative(reuse_md,
+						offsetof(struct iphdr, saddr),
+						data_check.skb_addrs, 8,
+						BPF_HDR_START_NET))
+			GOTO_DONE(DROP_MISC);
+	} else {
+		if (bpf_skb_load_bytes_relative(reuse_md,
+						offsetof(struct ipv6hdr, saddr),
+						data_check.skb_addrs, 32,
+						BPF_HDR_START_NET))
+			GOTO_DONE(DROP_MISC);
+	}
+
+	/*
+	 * The ip_protocol could be a compile time decision
+	 * if the bpf_prog.o is dedicated to either TCP or
+	 * UDP.
+	 *
+	 * Otherwise, reuse_md->ip_protocol or
+	 * the protocol field in the iphdr can be used.
+	 */
+	if (data_check.ip_protocol == IPPROTO_TCP) {
+		struct tcphdr *th = data;
+
+		if (th + 1 > data_end)
+			GOTO_DONE(DROP_MISC);
+
+		data_check.skb_ports[0] = th->source;
+		data_check.skb_ports[1] = th->dest;
+
+		if ((th->doff << 2) + sizeof(*cmd) > data_check.len)
+			GOTO_DONE(DROP_ERR_SKB_DATA);
+		if (bpf_skb_load_bytes(reuse_md, th->doff << 2, &cmd_copy,
+				       sizeof(cmd_copy)))
+			GOTO_DONE(DROP_MISC);
+		cmd = &cmd_copy;
+	} else if (data_check.ip_protocol == IPPROTO_UDP) {
+		struct udphdr *uh = data;
+
+		if (uh + 1 > data_end)
+			GOTO_DONE(DROP_MISC);
+
+		data_check.skb_ports[0] = uh->source;
+		data_check.skb_ports[1] = uh->dest;
+
+		if (sizeof(struct udphdr) + sizeof(*cmd) > data_check.len)
+			GOTO_DONE(DROP_ERR_SKB_DATA);
+		if (data + sizeof(struct udphdr) + sizeof(*cmd) > data_end) {
+			if (bpf_skb_load_bytes(reuse_md, sizeof(struct udphdr),
+					       &cmd_copy, sizeof(cmd_copy)))
+				GOTO_DONE(DROP_MISC);
+			cmd = &cmd_copy;
+		} else {
+			cmd = data + sizeof(struct udphdr);
+		}
+	} else {
+		GOTO_DONE(DROP_MISC);
+	}
+
+	reuseport_array = bpf_map_lookup_elem(&outer_map, &index_zero);
+	if (!reuseport_array)
+		GOTO_DONE(DROP_ERR_INNER_MAP);
+
+	index = cmd->reuseport_index;
+	index_ovr = bpf_map_lookup_elem(&tmp_index_ovr_map, &index_zero);
+	if (!index_ovr)
+		GOTO_DONE(DROP_MISC);
+
+	if (*index_ovr != -1) {
+		index = *index_ovr;
+		*index_ovr = -1;
+	}
+	err = bpf_sk_select_reuseport(reuse_md, reuseport_array, &index,
+				      flags);
+	if (!err)
+		GOTO_DONE(PASS);
+
+	if (cmd->pass_on_failure)
+		GOTO_DONE(PASS_ERR_SK_SELECT_REUSEPORT);
+	else
+		GOTO_DONE(DROP_ERR_SK_SELECT_REUSEPORT);
+
+done:
+	result_cnt = bpf_map_lookup_elem(&result_map, &result);
+	if (!result_cnt)
+		return SK_DROP;
+
+	bpf_map_update_elem(&linum_map, &index_zero, &linum, BPF_ANY);
+	bpf_map_update_elem(&data_check_map, &index_zero, &data_check, BPF_ANY);
+
+	(*result_cnt)++;
+	return result < PASS ? SK_DROP : SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/test_sk_lookup_kern.c
new file mode 100644
index 000000000000..e21cd736c196
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sk_lookup_kern.c
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
+
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/tcp.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
+static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off,
+					void *data_end, __u16 eth_proto,
+					bool *ipv4)
+{
+	struct bpf_sock_tuple *result;
+	__u8 proto = 0;
+	__u64 ihl_len;
+
+	if (eth_proto == bpf_htons(ETH_P_IP)) {
+		struct iphdr *iph = (struct iphdr *)(data + nh_off);
+
+		if (iph + 1 > data_end)
+			return NULL;
+		ihl_len = iph->ihl * 4;
+		proto = iph->protocol;
+		*ipv4 = true;
+		result = (struct bpf_sock_tuple *)&iph->saddr;
+	} else if (eth_proto == bpf_htons(ETH_P_IPV6)) {
+		struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + nh_off);
+
+		if (ip6h + 1 > data_end)
+			return NULL;
+		ihl_len = sizeof(*ip6h);
+		proto = ip6h->nexthdr;
+		*ipv4 = true;
+		result = (struct bpf_sock_tuple *)&ip6h->saddr;
+	}
+
+	if (data + nh_off + ihl_len > data_end || proto != IPPROTO_TCP)
+		return NULL;
+
+	return result;
+}
+
+SEC("sk_lookup_success")
+int bpf_sk_lookup_test0(struct __sk_buff *skb)
+{
+	void *data_end = (void *)(long)skb->data_end;
+	void *data = (void *)(long)skb->data;
+	struct ethhdr *eth = (struct ethhdr *)(data);
+	struct bpf_sock_tuple *tuple;
+	struct bpf_sock *sk;
+	size_t tuple_len;
+	bool ipv4;
+
+	if (eth + 1 > data_end)
+		return TC_ACT_SHOT;
+
+	tuple = get_tuple(data, sizeof(*eth), data_end, eth->h_proto, &ipv4);
+	if (!tuple || tuple + sizeof *tuple > data_end)
+		return TC_ACT_SHOT;
+
+	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
+	sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
+	if (sk)
+		bpf_sk_release(sk);
+	return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
+}
+
+SEC("sk_lookup_success_simple")
+int bpf_sk_lookup_test1(struct __sk_buff *skb)
+{
+	struct bpf_sock_tuple tuple = {};
+	struct bpf_sock *sk;
+
+	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
+	if (sk)
+		bpf_sk_release(sk);
+	return 0;
+}
+
+SEC("fail_use_after_free")
+int bpf_sk_lookup_uaf(struct __sk_buff *skb)
+{
+	struct bpf_sock_tuple tuple = {};
+	struct bpf_sock *sk;
+	__u32 family = 0;
+
+	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
+	if (sk) {
+		bpf_sk_release(sk);
+		family = sk->family;
+	}
+	return family;
+}
+
+SEC("fail_modify_sk_pointer")
+int bpf_sk_lookup_modptr(struct __sk_buff *skb)
+{
+	struct bpf_sock_tuple tuple = {};
+	struct bpf_sock *sk;
+	__u32 family;
+
+	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
+	if (sk) {
+		sk += 1;
+		bpf_sk_release(sk);
+	}
+	return 0;
+}
+
+SEC("fail_modify_sk_or_null_pointer")
+int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
+{
+	struct bpf_sock_tuple tuple = {};
+	struct bpf_sock *sk;
+	__u32 family;
+
+	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
+	sk += 1;
+	if (sk)
+		bpf_sk_release(sk);
+	return 0;
+}
+
+SEC("fail_no_release")
+int bpf_sk_lookup_test2(struct __sk_buff *skb)
+{
+	struct bpf_sock_tuple tuple = {};
+
+	bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
+	return 0;
+}
+
+SEC("fail_release_twice")
+int bpf_sk_lookup_test3(struct __sk_buff *skb)
+{
+	struct bpf_sock_tuple tuple = {};
+	struct bpf_sock *sk;
+
+	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
+	bpf_sk_release(sk);
+	bpf_sk_release(sk);
+	return 0;
+}
+
+SEC("fail_release_unchecked")
+int bpf_sk_lookup_test4(struct __sk_buff *skb)
+{
+	struct bpf_sock_tuple tuple = {};
+	struct bpf_sock *sk;
+
+	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
+	bpf_sk_release(sk);
+	return 0;
+}
+
+void lookup_no_release(struct __sk_buff *skb)
+{
+	struct bpf_sock_tuple tuple = {};
+	bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
+}
+
+SEC("fail_no_release_subcall")
+int bpf_sk_lookup_test5(struct __sk_buff *skb)
+{
+	lookup_no_release(skb);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
new file mode 100755
index 000000000000..a9bc6f82abc1
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
@@ -0,0 +1,63 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2018 Facebook
+
+set -eu
+
+wait_for_ip()
+{
+	local _i
+	echo -n "Wait for testing link-local IP to become available "
+	for _i in $(seq ${MAX_PING_TRIES}); do
+		echo -n "."
+		if $PING6 -c 1 -W 1 ff02::1%${TEST_IF} >/dev/null 2>&1; then
+			echo " OK"
+			return
+		fi
+		sleep 1
+	done
+	echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
+	exit 1
+}
+
+setup()
+{
+	# Create testing interfaces not to interfere with current environment.
+	ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER}
+	ip link set ${TEST_IF} up
+	ip link set ${TEST_IF_PEER} up
+
+	wait_for_ip
+
+	tc qdisc add dev ${TEST_IF} clsact
+	tc filter add dev ${TEST_IF} egress bpf obj ${BPF_PROG_OBJ} \
+		sec ${BPF_PROG_SECTION} da
+
+	BPF_PROG_ID=$(tc filter show dev ${TEST_IF} egress | \
+			awk '/ id / {sub(/.* id /, "", $0); print($1)}')
+}
+
+cleanup()
+{
+	ip link del ${TEST_IF} 2>/dev/null || :
+	ip link del ${TEST_IF_PEER} 2>/dev/null || :
+}
+
+main()
+{
+	trap cleanup EXIT 2 3 6 15
+	setup
+	${PROG} ${TEST_IF} ${BPF_PROG_ID}
+}
+
+DIR=$(dirname $0)
+TEST_IF="test_cgid_1"
+TEST_IF_PEER="test_cgid_2"
+MAX_PING_TRIES=5
+BPF_PROG_OBJ="${DIR}/test_skb_cgroup_id_kern.o"
+BPF_PROG_SECTION="cgroup_id_logger"
+BPF_PROG_ID=0
+PROG="${DIR}/test_skb_cgroup_id_user"
+type ping6 >/dev/null 2>&1 && PING6="ping6" || PING6="ping -6"
+
+main
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c
new file mode 100644
index 000000000000..68cf9829f5a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+
+#include <string.h>
+
+#include "bpf_helpers.h"
+
+#define NUM_CGROUP_LEVELS	4
+
+struct bpf_map_def SEC("maps") cgroup_ids = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u64),
+	.max_entries = NUM_CGROUP_LEVELS,
+};
+
+static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level)
+{
+	__u64 id;
+
+	/* [1] &level passed to external function that may change it, it's
+	 *     incompatible with loop unroll.
+	 */
+	id = bpf_skb_ancestor_cgroup_id(skb, level);
+	bpf_map_update_elem(&cgroup_ids, &level, &id, 0);
+}
+
+SEC("cgroup_id_logger")
+int log_cgroup_id(struct __sk_buff *skb)
+{
+	/* Loop unroll can't be used here due to [1]. Unrolling manually.
+	 * Number of calls should be in sync with NUM_CGROUP_LEVELS.
+	 */
+	log_nth_level(skb, 0);
+	log_nth_level(skb, 1);
+	log_nth_level(skb, 2);
+	log_nth_level(skb, 3);
+
+	return TC_ACT_OK;
+}
+
+int _version SEC("version") = 1;
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
new file mode 100644
index 000000000000..c121cc59f314
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+#define CGROUP_PATH		"/skb_cgroup_test"
+#define NUM_CGROUP_LEVELS	4
+
+/* RFC 4291, Section 2.7.1 */
+#define LINKLOCAL_MULTICAST	"ff02::1"
+
+static int mk_dst_addr(const char *ip, const char *iface,
+		       struct sockaddr_in6 *dst)
+{
+	memset(dst, 0, sizeof(*dst));
+
+	dst->sin6_family = AF_INET6;
+	dst->sin6_port = htons(1025);
+
+	if (inet_pton(AF_INET6, ip, &dst->sin6_addr) != 1) {
+		log_err("Invalid IPv6: %s", ip);
+		return -1;
+	}
+
+	dst->sin6_scope_id = if_nametoindex(iface);
+	if (!dst->sin6_scope_id) {
+		log_err("Failed to get index of iface: %s", iface);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int send_packet(const char *iface)
+{
+	struct sockaddr_in6 dst;
+	char msg[] = "msg";
+	int err = 0;
+	int fd = -1;
+
+	if (mk_dst_addr(LINKLOCAL_MULTICAST, iface, &dst))
+		goto err;
+
+	fd = socket(AF_INET6, SOCK_DGRAM, 0);
+	if (fd == -1) {
+		log_err("Failed to create UDP socket");
+		goto err;
+	}
+
+	if (sendto(fd, &msg, sizeof(msg), 0, (const struct sockaddr *)&dst,
+		   sizeof(dst)) == -1) {
+		log_err("Failed to send datagram");
+		goto err;
+	}
+
+	goto out;
+err:
+	err = -1;
+out:
+	if (fd >= 0)
+		close(fd);
+	return err;
+}
+
+int get_map_fd_by_prog_id(int prog_id)
+{
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
+	__u32 map_ids[1];
+	int prog_fd = -1;
+	int map_fd = -1;
+
+	prog_fd = bpf_prog_get_fd_by_id(prog_id);
+	if (prog_fd < 0) {
+		log_err("Failed to get fd by prog id %d", prog_id);
+		goto err;
+	}
+
+	info.nr_map_ids = 1;
+	info.map_ids = (__u64) (unsigned long) map_ids;
+
+	if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) {
+		log_err("Failed to get info by prog fd %d", prog_fd);
+		goto err;
+	}
+
+	if (!info.nr_map_ids) {
+		log_err("No maps found for prog fd %d", prog_fd);
+		goto err;
+	}
+
+	map_fd = bpf_map_get_fd_by_id(map_ids[0]);
+	if (map_fd < 0)
+		log_err("Failed to get fd by map id %d", map_ids[0]);
+err:
+	if (prog_fd >= 0)
+		close(prog_fd);
+	return map_fd;
+}
+
+int check_ancestor_cgroup_ids(int prog_id)
+{
+	__u64 actual_ids[NUM_CGROUP_LEVELS], expected_ids[NUM_CGROUP_LEVELS];
+	__u32 level;
+	int err = 0;
+	int map_fd;
+
+	expected_ids[0] = 0x100000001;	/* root cgroup */
+	expected_ids[1] = get_cgroup_id("");
+	expected_ids[2] = get_cgroup_id(CGROUP_PATH);
+	expected_ids[3] = 0; /* non-existent cgroup */
+
+	map_fd = get_map_fd_by_prog_id(prog_id);
+	if (map_fd < 0)
+		goto err;
+
+	for (level = 0; level < NUM_CGROUP_LEVELS; ++level) {
+		if (bpf_map_lookup_elem(map_fd, &level, &actual_ids[level])) {
+			log_err("Failed to lookup key %d", level);
+			goto err;
+		}
+		if (actual_ids[level] != expected_ids[level]) {
+			log_err("%llx (actual) != %llx (expected), level: %u\n",
+				actual_ids[level], expected_ids[level], level);
+			goto err;
+		}
+	}
+
+	goto out;
+err:
+	err = -1;
+out:
+	if (map_fd >= 0)
+		close(map_fd);
+	return err;
+}
+
+int main(int argc, char **argv)
+{
+	int cgfd = -1;
+	int err = 0;
+
+	if (argc < 3) {
+		fprintf(stderr, "Usage: %s iface prog_id\n", argv[0]);
+		exit(EXIT_FAILURE);
+	}
+
+	if (setup_cgroup_environment())
+		goto err;
+
+	cgfd = create_and_get_cgroup(CGROUP_PATH);
+	if (!cgfd)
+		goto err;
+
+	if (join_cgroup(CGROUP_PATH))
+		goto err;
+
+	if (send_packet(argv[1]))
+		goto err;
+
+	if (check_ancestor_cgroup_ids(atoi(argv[2])))
+		goto err;
+
+	goto out;
+err:
+	err = -1;
+out:
+	close(cgfd);
+	cleanup_cgroup_environment();
+	printf("[%s]\n", err ? "FAIL" : "PASS");
+	return err;
+}
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
index f4d99fabc56d..b8ebe2f58074 100644
--- a/tools/testing/selftests/bpf/test_sock.c
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -14,10 +14,7 @@
 
 #include "cgroup_helpers.h"
 #include "bpf_rlimit.h"
-
-#ifndef ARRAY_SIZE
-# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
+#include "bpf_util.h"
 
 #define CG_PATH		"/foo"
 #define MAX_INSNS	512
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 2950f80ba7fb..aeeb76a54d63 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -1,12 +1,16 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2018 Facebook
 
+#define _GNU_SOURCE
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 
 #include <arpa/inet.h>
+#include <netinet/in.h>
 #include <sys/types.h>
+#include <sys/select.h>
 #include <sys/socket.h>
 
 #include <linux/filter.h>
@@ -16,35 +20,463 @@
 
 #include "cgroup_helpers.h"
 #include "bpf_rlimit.h"
+#include "bpf_util.h"
+
+#ifndef ENOTSUPP
+# define ENOTSUPP 524
+#endif
 
 #define CG_PATH	"/foo"
 #define CONNECT4_PROG_PATH	"./connect4_prog.o"
 #define CONNECT6_PROG_PATH	"./connect6_prog.o"
+#define SENDMSG4_PROG_PATH	"./sendmsg4_prog.o"
+#define SENDMSG6_PROG_PATH	"./sendmsg6_prog.o"
 
 #define SERV4_IP		"192.168.1.254"
 #define SERV4_REWRITE_IP	"127.0.0.1"
+#define SRC4_IP			"172.16.0.1"
+#define SRC4_REWRITE_IP		"127.0.0.4"
 #define SERV4_PORT		4040
 #define SERV4_REWRITE_PORT	4444
 
 #define SERV6_IP		"face:b00c:1234:5678::abcd"
 #define SERV6_REWRITE_IP	"::1"
+#define SERV6_V4MAPPED_IP	"::ffff:192.168.0.4"
+#define SRC6_IP			"::1"
+#define SRC6_REWRITE_IP		"::6"
 #define SERV6_PORT		6060
 #define SERV6_REWRITE_PORT	6666
 
 #define INET_NTOP_BUF	40
 
-typedef int (*load_fn)(enum bpf_attach_type, const char *comment);
+struct sock_addr_test;
+
+typedef int (*load_fn)(const struct sock_addr_test *test);
 typedef int (*info_fn)(int, struct sockaddr *, socklen_t *);
 
-struct program {
-	enum bpf_attach_type type;
-	load_fn	loadfn;
-	int fd;
-	const char *name;
-	enum bpf_attach_type invalid_type;
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+struct sock_addr_test {
+	const char *descr;
+	/* BPF prog properties */
+	load_fn loadfn;
+	enum bpf_attach_type expected_attach_type;
+	enum bpf_attach_type attach_type;
+	/* Socket properties */
+	int domain;
+	int type;
+	/* IP:port pairs for BPF prog to override */
+	const char *requested_ip;
+	unsigned short requested_port;
+	const char *expected_ip;
+	unsigned short expected_port;
+	const char *expected_src_ip;
+	/* Expected test result */
+	enum {
+		LOAD_REJECT,
+		ATTACH_REJECT,
+		SYSCALL_EPERM,
+		SYSCALL_ENOTSUPP,
+		SUCCESS,
+	} expected_result;
 };
 
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
+static int bind4_prog_load(const struct sock_addr_test *test);
+static int bind6_prog_load(const struct sock_addr_test *test);
+static int connect4_prog_load(const struct sock_addr_test *test);
+static int connect6_prog_load(const struct sock_addr_test *test);
+static int sendmsg_deny_prog_load(const struct sock_addr_test *test);
+static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test);
+static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test);
+static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test);
+static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test);
+static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test);
+
+static struct sock_addr_test tests[] = {
+	/* bind */
+	{
+		"bind4: load prog with wrong expected attach type",
+		bind4_prog_load,
+		BPF_CGROUP_INET6_BIND,
+		BPF_CGROUP_INET4_BIND,
+		AF_INET,
+		SOCK_STREAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		LOAD_REJECT,
+	},
+	{
+		"bind4: attach prog with wrong attach type",
+		bind4_prog_load,
+		BPF_CGROUP_INET4_BIND,
+		BPF_CGROUP_INET6_BIND,
+		AF_INET,
+		SOCK_STREAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		ATTACH_REJECT,
+	},
+	{
+		"bind4: rewrite IP & TCP port in",
+		bind4_prog_load,
+		BPF_CGROUP_INET4_BIND,
+		BPF_CGROUP_INET4_BIND,
+		AF_INET,
+		SOCK_STREAM,
+		SERV4_IP,
+		SERV4_PORT,
+		SERV4_REWRITE_IP,
+		SERV4_REWRITE_PORT,
+		NULL,
+		SUCCESS,
+	},
+	{
+		"bind4: rewrite IP & UDP port in",
+		bind4_prog_load,
+		BPF_CGROUP_INET4_BIND,
+		BPF_CGROUP_INET4_BIND,
+		AF_INET,
+		SOCK_DGRAM,
+		SERV4_IP,
+		SERV4_PORT,
+		SERV4_REWRITE_IP,
+		SERV4_REWRITE_PORT,
+		NULL,
+		SUCCESS,
+	},
+	{
+		"bind6: load prog with wrong expected attach type",
+		bind6_prog_load,
+		BPF_CGROUP_INET4_BIND,
+		BPF_CGROUP_INET6_BIND,
+		AF_INET6,
+		SOCK_STREAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		LOAD_REJECT,
+	},
+	{
+		"bind6: attach prog with wrong attach type",
+		bind6_prog_load,
+		BPF_CGROUP_INET6_BIND,
+		BPF_CGROUP_INET4_BIND,
+		AF_INET,
+		SOCK_STREAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		ATTACH_REJECT,
+	},
+	{
+		"bind6: rewrite IP & TCP port in",
+		bind6_prog_load,
+		BPF_CGROUP_INET6_BIND,
+		BPF_CGROUP_INET6_BIND,
+		AF_INET6,
+		SOCK_STREAM,
+		SERV6_IP,
+		SERV6_PORT,
+		SERV6_REWRITE_IP,
+		SERV6_REWRITE_PORT,
+		NULL,
+		SUCCESS,
+	},
+	{
+		"bind6: rewrite IP & UDP port in",
+		bind6_prog_load,
+		BPF_CGROUP_INET6_BIND,
+		BPF_CGROUP_INET6_BIND,
+		AF_INET6,
+		SOCK_DGRAM,
+		SERV6_IP,
+		SERV6_PORT,
+		SERV6_REWRITE_IP,
+		SERV6_REWRITE_PORT,
+		NULL,
+		SUCCESS,
+	},
+
+	/* connect */
+	{
+		"connect4: load prog with wrong expected attach type",
+		connect4_prog_load,
+		BPF_CGROUP_INET6_CONNECT,
+		BPF_CGROUP_INET4_CONNECT,
+		AF_INET,
+		SOCK_STREAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		LOAD_REJECT,
+	},
+	{
+		"connect4: attach prog with wrong attach type",
+		connect4_prog_load,
+		BPF_CGROUP_INET4_CONNECT,
+		BPF_CGROUP_INET6_CONNECT,
+		AF_INET,
+		SOCK_STREAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		ATTACH_REJECT,
+	},
+	{
+		"connect4: rewrite IP & TCP port",
+		connect4_prog_load,
+		BPF_CGROUP_INET4_CONNECT,
+		BPF_CGROUP_INET4_CONNECT,
+		AF_INET,
+		SOCK_STREAM,
+		SERV4_IP,
+		SERV4_PORT,
+		SERV4_REWRITE_IP,
+		SERV4_REWRITE_PORT,
+		SRC4_REWRITE_IP,
+		SUCCESS,
+	},
+	{
+		"connect4: rewrite IP & UDP port",
+		connect4_prog_load,
+		BPF_CGROUP_INET4_CONNECT,
+		BPF_CGROUP_INET4_CONNECT,
+		AF_INET,
+		SOCK_DGRAM,
+		SERV4_IP,
+		SERV4_PORT,
+		SERV4_REWRITE_IP,
+		SERV4_REWRITE_PORT,
+		SRC4_REWRITE_IP,
+		SUCCESS,
+	},
+	{
+		"connect6: load prog with wrong expected attach type",
+		connect6_prog_load,
+		BPF_CGROUP_INET4_CONNECT,
+		BPF_CGROUP_INET6_CONNECT,
+		AF_INET6,
+		SOCK_STREAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		LOAD_REJECT,
+	},
+	{
+		"connect6: attach prog with wrong attach type",
+		connect6_prog_load,
+		BPF_CGROUP_INET6_CONNECT,
+		BPF_CGROUP_INET4_CONNECT,
+		AF_INET,
+		SOCK_STREAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		ATTACH_REJECT,
+	},
+	{
+		"connect6: rewrite IP & TCP port",
+		connect6_prog_load,
+		BPF_CGROUP_INET6_CONNECT,
+		BPF_CGROUP_INET6_CONNECT,
+		AF_INET6,
+		SOCK_STREAM,
+		SERV6_IP,
+		SERV6_PORT,
+		SERV6_REWRITE_IP,
+		SERV6_REWRITE_PORT,
+		SRC6_REWRITE_IP,
+		SUCCESS,
+	},
+	{
+		"connect6: rewrite IP & UDP port",
+		connect6_prog_load,
+		BPF_CGROUP_INET6_CONNECT,
+		BPF_CGROUP_INET6_CONNECT,
+		AF_INET6,
+		SOCK_DGRAM,
+		SERV6_IP,
+		SERV6_PORT,
+		SERV6_REWRITE_IP,
+		SERV6_REWRITE_PORT,
+		SRC6_REWRITE_IP,
+		SUCCESS,
+	},
+
+	/* sendmsg */
+	{
+		"sendmsg4: load prog with wrong expected attach type",
+		sendmsg4_rw_asm_prog_load,
+		BPF_CGROUP_UDP6_SENDMSG,
+		BPF_CGROUP_UDP4_SENDMSG,
+		AF_INET,
+		SOCK_DGRAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		LOAD_REJECT,
+	},
+	{
+		"sendmsg4: attach prog with wrong attach type",
+		sendmsg4_rw_asm_prog_load,
+		BPF_CGROUP_UDP4_SENDMSG,
+		BPF_CGROUP_UDP6_SENDMSG,
+		AF_INET,
+		SOCK_DGRAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		ATTACH_REJECT,
+	},
+	{
+		"sendmsg4: rewrite IP & port (asm)",
+		sendmsg4_rw_asm_prog_load,
+		BPF_CGROUP_UDP4_SENDMSG,
+		BPF_CGROUP_UDP4_SENDMSG,
+		AF_INET,
+		SOCK_DGRAM,
+		SERV4_IP,
+		SERV4_PORT,
+		SERV4_REWRITE_IP,
+		SERV4_REWRITE_PORT,
+		SRC4_REWRITE_IP,
+		SUCCESS,
+	},
+	{
+		"sendmsg4: rewrite IP & port (C)",
+		sendmsg4_rw_c_prog_load,
+		BPF_CGROUP_UDP4_SENDMSG,
+		BPF_CGROUP_UDP4_SENDMSG,
+		AF_INET,
+		SOCK_DGRAM,
+		SERV4_IP,
+		SERV4_PORT,
+		SERV4_REWRITE_IP,
+		SERV4_REWRITE_PORT,
+		SRC4_REWRITE_IP,
+		SUCCESS,
+	},
+	{
+		"sendmsg4: deny call",
+		sendmsg_deny_prog_load,
+		BPF_CGROUP_UDP4_SENDMSG,
+		BPF_CGROUP_UDP4_SENDMSG,
+		AF_INET,
+		SOCK_DGRAM,
+		SERV4_IP,
+		SERV4_PORT,
+		SERV4_REWRITE_IP,
+		SERV4_REWRITE_PORT,
+		SRC4_REWRITE_IP,
+		SYSCALL_EPERM,
+	},
+	{
+		"sendmsg6: load prog with wrong expected attach type",
+		sendmsg6_rw_asm_prog_load,
+		BPF_CGROUP_UDP4_SENDMSG,
+		BPF_CGROUP_UDP6_SENDMSG,
+		AF_INET6,
+		SOCK_DGRAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		LOAD_REJECT,
+	},
+	{
+		"sendmsg6: attach prog with wrong attach type",
+		sendmsg6_rw_asm_prog_load,
+		BPF_CGROUP_UDP6_SENDMSG,
+		BPF_CGROUP_UDP4_SENDMSG,
+		AF_INET6,
+		SOCK_DGRAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		ATTACH_REJECT,
+	},
+	{
+		"sendmsg6: rewrite IP & port (asm)",
+		sendmsg6_rw_asm_prog_load,
+		BPF_CGROUP_UDP6_SENDMSG,
+		BPF_CGROUP_UDP6_SENDMSG,
+		AF_INET6,
+		SOCK_DGRAM,
+		SERV6_IP,
+		SERV6_PORT,
+		SERV6_REWRITE_IP,
+		SERV6_REWRITE_PORT,
+		SRC6_REWRITE_IP,
+		SUCCESS,
+	},
+	{
+		"sendmsg6: rewrite IP & port (C)",
+		sendmsg6_rw_c_prog_load,
+		BPF_CGROUP_UDP6_SENDMSG,
+		BPF_CGROUP_UDP6_SENDMSG,
+		AF_INET6,
+		SOCK_DGRAM,
+		SERV6_IP,
+		SERV6_PORT,
+		SERV6_REWRITE_IP,
+		SERV6_REWRITE_PORT,
+		SRC6_REWRITE_IP,
+		SUCCESS,
+	},
+	{
+		"sendmsg6: IPv4-mapped IPv6",
+		sendmsg6_rw_v4mapped_prog_load,
+		BPF_CGROUP_UDP6_SENDMSG,
+		BPF_CGROUP_UDP6_SENDMSG,
+		AF_INET6,
+		SOCK_DGRAM,
+		SERV6_IP,
+		SERV6_PORT,
+		SERV6_REWRITE_IP,
+		SERV6_REWRITE_PORT,
+		SRC6_REWRITE_IP,
+		SYSCALL_ENOTSUPP,
+	},
+	{
+		"sendmsg6: deny call",
+		sendmsg_deny_prog_load,
+		BPF_CGROUP_UDP6_SENDMSG,
+		BPF_CGROUP_UDP6_SENDMSG,
+		AF_INET6,
+		SOCK_DGRAM,
+		SERV6_IP,
+		SERV6_PORT,
+		SERV6_REWRITE_IP,
+		SERV6_REWRITE_PORT,
+		SRC6_REWRITE_IP,
+		SYSCALL_EPERM,
+	},
+};
 
 static int mk_sockaddr(int domain, const char *ip, unsigned short port,
 		       struct sockaddr *addr, socklen_t addr_len)
@@ -84,25 +516,23 @@ static int mk_sockaddr(int domain, const char *ip, unsigned short port,
 	return 0;
 }
 
-static int load_insns(enum bpf_attach_type attach_type,
-		      const struct bpf_insn *insns, size_t insns_cnt,
-		      const char *comment)
+static int load_insns(const struct sock_addr_test *test,
+		      const struct bpf_insn *insns, size_t insns_cnt)
 {
 	struct bpf_load_program_attr load_attr;
 	int ret;
 
 	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
 	load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
-	load_attr.expected_attach_type = attach_type;
+	load_attr.expected_attach_type = test->expected_attach_type;
 	load_attr.insns = insns;
 	load_attr.insns_cnt = insns_cnt;
 	load_attr.license = "GPL";
 
 	ret = bpf_load_program_xattr(&load_attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
-	if (ret < 0 && comment) {
-		log_err(">>> Loading %s program error.\n"
-			">>> Output from verifier:\n%s\n-------\n",
-			comment, bpf_log_buf);
+	if (ret < 0 && test->expected_result != LOAD_REJECT) {
+		log_err(">>> Loading program error.\n"
+			">>> Verifier output:\n%s\n-------\n", bpf_log_buf);
 	}
 
 	return ret;
@@ -119,8 +549,7 @@ static int load_insns(enum bpf_attach_type attach_type,
  * to count jumps properly.
  */
 
-static int bind4_prog_load(enum bpf_attach_type attach_type,
-			   const char *comment)
+static int bind4_prog_load(const struct sock_addr_test *test)
 {
 	union {
 		uint8_t u4_addr8[4];
@@ -186,12 +615,10 @@ static int bind4_prog_load(enum bpf_attach_type attach_type,
 		BPF_EXIT_INSN(),
 	};
 
-	return load_insns(attach_type, insns,
-			  sizeof(insns) / sizeof(struct bpf_insn), comment);
+	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
 }
 
-static int bind6_prog_load(enum bpf_attach_type attach_type,
-			   const char *comment)
+static int bind6_prog_load(const struct sock_addr_test *test)
 {
 	struct sockaddr_in6 addr6_rw;
 	struct in6_addr ip6;
@@ -254,13 +681,10 @@ static int bind6_prog_load(enum bpf_attach_type attach_type,
 		BPF_EXIT_INSN(),
 	};
 
-	return load_insns(attach_type, insns,
-			  sizeof(insns) / sizeof(struct bpf_insn), comment);
+	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
 }
 
-static int connect_prog_load_path(const char *path,
-				  enum bpf_attach_type attach_type,
-				  const char *comment)
+static int load_path(const struct sock_addr_test *test, const char *path)
 {
 	struct bpf_prog_load_attr attr;
 	struct bpf_object *obj;
@@ -269,75 +693,218 @@ static int connect_prog_load_path(const char *path,
 	memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
 	attr.file = path;
 	attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
-	attr.expected_attach_type = attach_type;
+	attr.expected_attach_type = test->expected_attach_type;
 
 	if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
-		if (comment)
-			log_err(">>> Loading %s program at %s error.\n",
-				comment, path);
+		if (test->expected_result != LOAD_REJECT)
+			log_err(">>> Loading program (%s) error.\n", path);
 		return -1;
 	}
 
 	return prog_fd;
 }
 
-static int connect4_prog_load(enum bpf_attach_type attach_type,
-			      const char *comment)
+static int connect4_prog_load(const struct sock_addr_test *test)
 {
-	return connect_prog_load_path(CONNECT4_PROG_PATH, attach_type, comment);
+	return load_path(test, CONNECT4_PROG_PATH);
 }
 
-static int connect6_prog_load(enum bpf_attach_type attach_type,
-			      const char *comment)
+static int connect6_prog_load(const struct sock_addr_test *test)
 {
-	return connect_prog_load_path(CONNECT6_PROG_PATH, attach_type, comment);
+	return load_path(test, CONNECT6_PROG_PATH);
 }
 
-static void print_ip_port(int sockfd, info_fn fn, const char *fmt)
+static int sendmsg_deny_prog_load(const struct sock_addr_test *test)
 {
-	char addr_buf[INET_NTOP_BUF];
-	struct sockaddr_storage addr;
-	struct sockaddr_in6 *addr6;
-	struct sockaddr_in *addr4;
-	socklen_t addr_len;
-	unsigned short port;
-	void *nip;
-
-	addr_len = sizeof(struct sockaddr_storage);
-	memset(&addr, 0, addr_len);
-
-	if (fn(sockfd, (struct sockaddr *)&addr, (socklen_t *)&addr_len) == 0) {
-		if (addr.ss_family == AF_INET) {
-			addr4 = (struct sockaddr_in *)&addr;
-			nip = (void *)&addr4->sin_addr;
-			port = ntohs(addr4->sin_port);
-		} else if (addr.ss_family == AF_INET6) {
-			addr6 = (struct sockaddr_in6 *)&addr;
-			nip = (void *)&addr6->sin6_addr;
-			port = ntohs(addr6->sin6_port);
-		} else {
-			return;
-		}
-		const char *addr_str =
-			inet_ntop(addr.ss_family, nip, addr_buf, INET_NTOP_BUF);
-		printf(fmt, addr_str ? addr_str : "??", port);
+	struct bpf_insn insns[] = {
+		/* return 0 */
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
+static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test)
+{
+	struct sockaddr_in dst4_rw_addr;
+	struct in_addr src4_rw_ip;
+
+	if (inet_pton(AF_INET, SRC4_REWRITE_IP, (void *)&src4_rw_ip) != 1) {
+		log_err("Invalid IPv4: %s", SRC4_REWRITE_IP);
+		return -1;
 	}
+
+	if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
+			(struct sockaddr *)&dst4_rw_addr,
+			sizeof(dst4_rw_addr)) == -1)
+		return -1;
+
+	struct bpf_insn insns[] = {
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+		/* if (sk.family == AF_INET && */
+		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, family)),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 8),
+
+		/*     sk.type == SOCK_DGRAM)  { */
+		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, type)),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 6),
+
+		/*      msg_src_ip4 = src4_rw_ip */
+		BPF_MOV32_IMM(BPF_REG_7, src4_rw_ip.s_addr),
+		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+			    offsetof(struct bpf_sock_addr, msg_src_ip4)),
+
+		/*      user_ip4 = dst4_rw_addr.sin_addr */
+		BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_addr.s_addr),
+		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+			    offsetof(struct bpf_sock_addr, user_ip4)),
+
+		/*      user_port = dst4_rw_addr.sin_port */
+		BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_port),
+		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+			    offsetof(struct bpf_sock_addr, user_port)),
+		/* } */
+
+		/* return 1 */
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+	};
+
+	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
 }
 
-static void print_local_ip_port(int sockfd, const char *fmt)
+static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test)
 {
-	print_ip_port(sockfd, getsockname, fmt);
+	return load_path(test, SENDMSG4_PROG_PATH);
 }
 
-static void print_remote_ip_port(int sockfd, const char *fmt)
+static int sendmsg6_rw_dst_asm_prog_load(const struct sock_addr_test *test,
+					 const char *rw_dst_ip)
 {
-	print_ip_port(sockfd, getpeername, fmt);
+	struct sockaddr_in6 dst6_rw_addr;
+	struct in6_addr src6_rw_ip;
+
+	if (inet_pton(AF_INET6, SRC6_REWRITE_IP, (void *)&src6_rw_ip) != 1) {
+		log_err("Invalid IPv6: %s", SRC6_REWRITE_IP);
+		return -1;
+	}
+
+	if (mk_sockaddr(AF_INET6, rw_dst_ip, SERV6_REWRITE_PORT,
+			(struct sockaddr *)&dst6_rw_addr,
+			sizeof(dst6_rw_addr)) == -1)
+		return -1;
+
+	struct bpf_insn insns[] = {
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+		/* if (sk.family == AF_INET6) { */
+		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, family)),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18),
+
+#define STORE_IPV6_WORD_N(DST, SRC, N)					       \
+		BPF_MOV32_IMM(BPF_REG_7, SRC[N]),			       \
+		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,		       \
+			    offsetof(struct bpf_sock_addr, DST[N]))
+
+#define STORE_IPV6(DST, SRC)						       \
+		STORE_IPV6_WORD_N(DST, SRC, 0),				       \
+		STORE_IPV6_WORD_N(DST, SRC, 1),				       \
+		STORE_IPV6_WORD_N(DST, SRC, 2),				       \
+		STORE_IPV6_WORD_N(DST, SRC, 3)
+
+		STORE_IPV6(msg_src_ip6, src6_rw_ip.s6_addr32),
+		STORE_IPV6(user_ip6, dst6_rw_addr.sin6_addr.s6_addr32),
+
+		/*      user_port = dst6_rw_addr.sin6_port */
+		BPF_MOV32_IMM(BPF_REG_7, dst6_rw_addr.sin6_port),
+		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+			    offsetof(struct bpf_sock_addr, user_port)),
+
+		/* } */
+
+		/* return 1 */
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+	};
+
+	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
+static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test)
+{
+	return sendmsg6_rw_dst_asm_prog_load(test, SERV6_REWRITE_IP);
+}
+
+static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test)
+{
+	return sendmsg6_rw_dst_asm_prog_load(test, SERV6_V4MAPPED_IP);
+}
+
+static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test)
+{
+	return load_path(test, SENDMSG6_PROG_PATH);
+}
+
+static int cmp_addr(const struct sockaddr_storage *addr1,
+		    const struct sockaddr_storage *addr2, int cmp_port)
+{
+	const struct sockaddr_in *four1, *four2;
+	const struct sockaddr_in6 *six1, *six2;
+
+	if (addr1->ss_family != addr2->ss_family)
+		return -1;
+
+	if (addr1->ss_family == AF_INET) {
+		four1 = (const struct sockaddr_in *)addr1;
+		four2 = (const struct sockaddr_in *)addr2;
+		return !((four1->sin_port == four2->sin_port || !cmp_port) &&
+			 four1->sin_addr.s_addr == four2->sin_addr.s_addr);
+	} else if (addr1->ss_family == AF_INET6) {
+		six1 = (const struct sockaddr_in6 *)addr1;
+		six2 = (const struct sockaddr_in6 *)addr2;
+		return !((six1->sin6_port == six2->sin6_port || !cmp_port) &&
+			 !memcmp(&six1->sin6_addr, &six2->sin6_addr,
+				 sizeof(struct in6_addr)));
+	}
+
+	return -1;
+}
+
+static int cmp_sock_addr(info_fn fn, int sock1,
+			 const struct sockaddr_storage *addr2, int cmp_port)
+{
+	struct sockaddr_storage addr1;
+	socklen_t len1 = sizeof(addr1);
+
+	memset(&addr1, 0, len1);
+	if (fn(sock1, (struct sockaddr *)&addr1, (socklen_t *)&len1) != 0)
+		return -1;
+
+	return cmp_addr(&addr1, addr2, cmp_port);
+}
+
+static int cmp_local_ip(int sock1, const struct sockaddr_storage *addr2)
+{
+	return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 0);
+}
+
+static int cmp_local_addr(int sock1, const struct sockaddr_storage *addr2)
+{
+	return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 1);
+}
+
+static int cmp_peer_addr(int sock1, const struct sockaddr_storage *addr2)
+{
+	return cmp_sock_addr(getpeername, sock1, addr2, /*cmp_port*/ 1);
 }
 
 static int start_server(int type, const struct sockaddr_storage *addr,
 			socklen_t addr_len)
 {
-
 	int fd;
 
 	fd = socket(addr->ss_family, type, 0);
@@ -358,8 +925,6 @@ static int start_server(int type, const struct sockaddr_storage *addr,
 		}
 	}
 
-	print_local_ip_port(fd, "\t   Actual: bind(%s, %d)\n");
-
 	goto out;
 close_out:
 	close(fd);
@@ -372,19 +937,19 @@ static int connect_to_server(int type, const struct sockaddr_storage *addr,
 			     socklen_t addr_len)
 {
 	int domain;
-	int fd;
+	int fd = -1;
 
 	domain = addr->ss_family;
 
 	if (domain != AF_INET && domain != AF_INET6) {
 		log_err("Unsupported address family");
-		return -1;
+		goto err;
 	}
 
 	fd = socket(domain, type, 0);
 	if (fd == -1) {
-		log_err("Failed to creating client socket");
-		return -1;
+		log_err("Failed to create client socket");
+		goto err;
 	}
 
 	if (connect(fd, (const struct sockaddr *)addr, addr_len) == -1) {
@@ -392,198 +957,438 @@ static int connect_to_server(int type, const struct sockaddr_storage *addr,
 		goto err;
 	}
 
-	print_remote_ip_port(fd, "\t   Actual: connect(%s, %d)");
-	print_local_ip_port(fd, " from (%s, %d)\n");
+	goto out;
+err:
+	close(fd);
+	fd = -1;
+out:
+	return fd;
+}
+
+int init_pktinfo(int domain, struct cmsghdr *cmsg)
+{
+	struct in6_pktinfo *pktinfo6;
+	struct in_pktinfo *pktinfo4;
+
+	if (domain == AF_INET) {
+		cmsg->cmsg_level = SOL_IP;
+		cmsg->cmsg_type = IP_PKTINFO;
+		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
+		pktinfo4 = (struct in_pktinfo *)CMSG_DATA(cmsg);
+		memset(pktinfo4, 0, sizeof(struct in_pktinfo));
+		if (inet_pton(domain, SRC4_IP,
+			      (void *)&pktinfo4->ipi_spec_dst) != 1)
+			return -1;
+	} else if (domain == AF_INET6) {
+		cmsg->cmsg_level = SOL_IPV6;
+		cmsg->cmsg_type = IPV6_PKTINFO;
+		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
+		pktinfo6 = (struct in6_pktinfo *)CMSG_DATA(cmsg);
+		memset(pktinfo6, 0, sizeof(struct in6_pktinfo));
+		if (inet_pton(domain, SRC6_IP,
+			      (void *)&pktinfo6->ipi6_addr) != 1)
+			return -1;
+	} else {
+		return -1;
+	}
 
 	return 0;
+}
+
+static int sendmsg_to_server(int type, const struct sockaddr_storage *addr,
+			     socklen_t addr_len, int set_cmsg, int flags,
+			     int *syscall_err)
+{
+	union {
+		char buf[CMSG_SPACE(sizeof(struct in6_pktinfo))];
+		struct cmsghdr align;
+	} control6;
+	union {
+		char buf[CMSG_SPACE(sizeof(struct in_pktinfo))];
+		struct cmsghdr align;
+	} control4;
+	struct msghdr hdr;
+	struct iovec iov;
+	char data = 'a';
+	int domain;
+	int fd = -1;
+
+	domain = addr->ss_family;
+
+	if (domain != AF_INET && domain != AF_INET6) {
+		log_err("Unsupported address family");
+		goto err;
+	}
+
+	fd = socket(domain, type, 0);
+	if (fd == -1) {
+		log_err("Failed to create client socket");
+		goto err;
+	}
+
+	memset(&iov, 0, sizeof(iov));
+	iov.iov_base = &data;
+	iov.iov_len = sizeof(data);
+
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.msg_name = (void *)addr;
+	hdr.msg_namelen = addr_len;
+	hdr.msg_iov = &iov;
+	hdr.msg_iovlen = 1;
+
+	if (set_cmsg) {
+		if (domain == AF_INET) {
+			hdr.msg_control = &control4;
+			hdr.msg_controllen = sizeof(control4.buf);
+		} else if (domain == AF_INET6) {
+			hdr.msg_control = &control6;
+			hdr.msg_controllen = sizeof(control6.buf);
+		}
+		if (init_pktinfo(domain, CMSG_FIRSTHDR(&hdr))) {
+			log_err("Fail to init pktinfo");
+			goto err;
+		}
+	}
+
+	if (sendmsg(fd, &hdr, flags) != sizeof(data)) {
+		log_err("Fail to send message to server");
+		*syscall_err = errno;
+		goto err;
+	}
+
+	goto out;
 err:
 	close(fd);
-	return -1;
+	fd = -1;
+out:
+	return fd;
 }
 
-static void print_test_case_num(int domain, int type)
+static int fastconnect_to_server(const struct sockaddr_storage *addr,
+				 socklen_t addr_len)
 {
-	static int test_num;
+	int sendmsg_err;
 
-	printf("Test case #%d (%s/%s):\n", ++test_num,
-	       (domain == AF_INET ? "IPv4" :
-		domain == AF_INET6 ? "IPv6" :
-		"unknown_domain"),
-	       (type == SOCK_STREAM ? "TCP" :
-		type == SOCK_DGRAM ? "UDP" :
-		"unknown_type"));
+	return sendmsg_to_server(SOCK_STREAM, addr, addr_len, /*set_cmsg*/0,
+				 MSG_FASTOPEN, &sendmsg_err);
 }
 
-static int run_test_case(int domain, int type, const char *ip,
-			 unsigned short port)
+static int recvmsg_from_client(int sockfd, struct sockaddr_storage *src_addr)
 {
-	struct sockaddr_storage addr;
-	socklen_t addr_len = sizeof(addr);
-	int servfd = -1;
-	int err = 0;
+	struct timeval tv;
+	struct msghdr hdr;
+	struct iovec iov;
+	char data[64];
+	fd_set rfds;
+
+	FD_ZERO(&rfds);
+	FD_SET(sockfd, &rfds);
 
-	print_test_case_num(domain, type);
+	tv.tv_sec = 2;
+	tv.tv_usec = 0;
 
-	if (mk_sockaddr(domain, ip, port, (struct sockaddr *)&addr,
-			addr_len) == -1)
+	if (select(sockfd + 1, &rfds, NULL, NULL, &tv) <= 0 ||
+	    !FD_ISSET(sockfd, &rfds))
 		return -1;
 
-	printf("\tRequested: bind(%s, %d) ..\n", ip, port);
-	servfd = start_server(type, &addr, addr_len);
+	memset(&iov, 0, sizeof(iov));
+	iov.iov_base = data;
+	iov.iov_len = sizeof(data);
+
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.msg_name = src_addr;
+	hdr.msg_namelen = sizeof(struct sockaddr_storage);
+	hdr.msg_iov = &iov;
+	hdr.msg_iovlen = 1;
+
+	return recvmsg(sockfd, &hdr, 0);
+}
+
+static int init_addrs(const struct sock_addr_test *test,
+		      struct sockaddr_storage *requested_addr,
+		      struct sockaddr_storage *expected_addr,
+		      struct sockaddr_storage *expected_src_addr)
+{
+	socklen_t addr_len = sizeof(struct sockaddr_storage);
+
+	if (mk_sockaddr(test->domain, test->expected_ip, test->expected_port,
+			(struct sockaddr *)expected_addr, addr_len) == -1)
+		goto err;
+
+	if (mk_sockaddr(test->domain, test->requested_ip, test->requested_port,
+			(struct sockaddr *)requested_addr, addr_len) == -1)
+		goto err;
+
+	if (test->expected_src_ip &&
+	    mk_sockaddr(test->domain, test->expected_src_ip, 0,
+			(struct sockaddr *)expected_src_addr, addr_len) == -1)
+		goto err;
+
+	return 0;
+err:
+	return -1;
+}
+
+static int run_bind_test_case(const struct sock_addr_test *test)
+{
+	socklen_t addr_len = sizeof(struct sockaddr_storage);
+	struct sockaddr_storage requested_addr;
+	struct sockaddr_storage expected_addr;
+	int clientfd = -1;
+	int servfd = -1;
+	int err = 0;
+
+	if (init_addrs(test, &requested_addr, &expected_addr, NULL))
+		goto err;
+
+	servfd = start_server(test->type, &requested_addr, addr_len);
 	if (servfd == -1)
 		goto err;
 
-	printf("\tRequested: connect(%s, %d) from (*, *) ..\n", ip, port);
-	if (connect_to_server(type, &addr, addr_len))
+	if (cmp_local_addr(servfd, &expected_addr))
+		goto err;
+
+	/* Try to connect to server just in case */
+	clientfd = connect_to_server(test->type, &expected_addr, addr_len);
+	if (clientfd == -1)
 		goto err;
 
 	goto out;
 err:
 	err = -1;
 out:
+	close(clientfd);
 	close(servfd);
 	return err;
 }
 
-static void close_progs_fds(struct program *progs, size_t prog_cnt)
+static int run_connect_test_case(const struct sock_addr_test *test)
 {
-	size_t i;
+	socklen_t addr_len = sizeof(struct sockaddr_storage);
+	struct sockaddr_storage expected_src_addr;
+	struct sockaddr_storage requested_addr;
+	struct sockaddr_storage expected_addr;
+	int clientfd = -1;
+	int servfd = -1;
+	int err = 0;
 
-	for (i = 0; i < prog_cnt; ++i) {
-		close(progs[i].fd);
-		progs[i].fd = -1;
-	}
-}
+	if (init_addrs(test, &requested_addr, &expected_addr,
+		       &expected_src_addr))
+		goto err;
 
-static int load_and_attach_progs(int cgfd, struct program *progs,
-				 size_t prog_cnt)
-{
-	size_t i;
+	/* Prepare server to connect to */
+	servfd = start_server(test->type, &expected_addr, addr_len);
+	if (servfd == -1)
+		goto err;
 
-	for (i = 0; i < prog_cnt; ++i) {
-		printf("Load %s with invalid type (can pollute stderr) ",
-		       progs[i].name);
-		fflush(stdout);
-		progs[i].fd = progs[i].loadfn(progs[i].invalid_type, NULL);
-		if (progs[i].fd != -1) {
-			log_err("Load with invalid type accepted for %s",
-				progs[i].name);
-			goto err;
-		}
-		printf("... REJECTED\n");
+	clientfd = connect_to_server(test->type, &requested_addr, addr_len);
+	if (clientfd == -1)
+		goto err;
+
+	/* Make sure src and dst addrs were overridden properly */
+	if (cmp_peer_addr(clientfd, &expected_addr))
+		goto err;
+
+	if (cmp_local_ip(clientfd, &expected_src_addr))
+		goto err;
 
-		printf("Load %s with valid type", progs[i].name);
-		progs[i].fd = progs[i].loadfn(progs[i].type, progs[i].name);
-		if (progs[i].fd == -1) {
-			log_err("Failed to load program %s", progs[i].name);
+	if (test->type == SOCK_STREAM) {
+		/* Test TCP Fast Open scenario */
+		clientfd = fastconnect_to_server(&requested_addr, addr_len);
+		if (clientfd == -1)
 			goto err;
-		}
-		printf(" ... OK\n");
 
-		printf("Attach %s with invalid type", progs[i].name);
-		if (bpf_prog_attach(progs[i].fd, cgfd, progs[i].invalid_type,
-				    BPF_F_ALLOW_OVERRIDE) != -1) {
-			log_err("Attach with invalid type accepted for %s",
-				progs[i].name);
+		/* Make sure src and dst addrs were overridden properly */
+		if (cmp_peer_addr(clientfd, &expected_addr))
 			goto err;
-		}
-		printf(" ... REJECTED\n");
 
-		printf("Attach %s with valid type", progs[i].name);
-		if (bpf_prog_attach(progs[i].fd, cgfd, progs[i].type,
-				    BPF_F_ALLOW_OVERRIDE) == -1) {
-			log_err("Failed to attach program %s", progs[i].name);
+		if (cmp_local_ip(clientfd, &expected_src_addr))
 			goto err;
-		}
-		printf(" ... OK\n");
 	}
 
-	return 0;
+	goto out;
 err:
-	close_progs_fds(progs, prog_cnt);
-	return -1;
+	err = -1;
+out:
+	close(clientfd);
+	close(servfd);
+	return err;
 }
 
-static int run_domain_test(int domain, int cgfd, struct program *progs,
-			   size_t prog_cnt, const char *ip, unsigned short port)
+static int run_sendmsg_test_case(const struct sock_addr_test *test)
 {
+	socklen_t addr_len = sizeof(struct sockaddr_storage);
+	struct sockaddr_storage expected_src_addr;
+	struct sockaddr_storage requested_addr;
+	struct sockaddr_storage expected_addr;
+	struct sockaddr_storage real_src_addr;
+	int clientfd = -1;
+	int servfd = -1;
+	int set_cmsg;
 	int err = 0;
 
-	if (load_and_attach_progs(cgfd, progs, prog_cnt) == -1)
+	if (test->type != SOCK_DGRAM)
 		goto err;
 
-	if (run_test_case(domain, SOCK_STREAM, ip, port) == -1)
+	if (init_addrs(test, &requested_addr, &expected_addr,
+		       &expected_src_addr))
 		goto err;
 
-	if (run_test_case(domain, SOCK_DGRAM, ip, port) == -1)
+	/* Prepare server to sendmsg to */
+	servfd = start_server(test->type, &expected_addr, addr_len);
+	if (servfd == -1)
 		goto err;
 
+	for (set_cmsg = 0; set_cmsg <= 1; ++set_cmsg) {
+		if (clientfd >= 0)
+			close(clientfd);
+
+		clientfd = sendmsg_to_server(test->type, &requested_addr,
+					     addr_len, set_cmsg, /*flags*/0,
+					     &err);
+		if (err)
+			goto out;
+		else if (clientfd == -1)
+			goto err;
+
+		/* Try to receive message on server instead of using
+		 * getpeername(2) on client socket, to check that client's
+		 * destination address was rewritten properly, since
+		 * getpeername(2) doesn't work with unconnected datagram
+		 * sockets.
+		 *
+		 * Get source address from recvmsg(2) as well to make sure
+		 * source was rewritten properly: getsockname(2) can't be used
+		 * since socket is unconnected and source defined for one
+		 * specific packet may differ from the one used by default and
+		 * returned by getsockname(2).
+		 */
+		if (recvmsg_from_client(servfd, &real_src_addr) == -1)
+			goto err;
+
+		if (cmp_addr(&real_src_addr, &expected_src_addr, /*cmp_port*/0))
+			goto err;
+	}
+
 	goto out;
 err:
 	err = -1;
 out:
-	close_progs_fds(progs, prog_cnt);
+	close(clientfd);
+	close(servfd);
 	return err;
 }
 
-static int run_test(void)
+static int run_test_case(int cgfd, const struct sock_addr_test *test)
 {
-	size_t inet6_prog_cnt;
-	size_t inet_prog_cnt;
-	int cgfd = -1;
+	int progfd = -1;
 	int err = 0;
 
-	struct program inet6_progs[] = {
-		{BPF_CGROUP_INET6_BIND, bind6_prog_load, -1, "bind6",
-		 BPF_CGROUP_INET4_BIND},
-		{BPF_CGROUP_INET6_CONNECT, connect6_prog_load, -1, "connect6",
-		 BPF_CGROUP_INET4_CONNECT},
-	};
-	inet6_prog_cnt = sizeof(inet6_progs) / sizeof(struct program);
-
-	struct program inet_progs[] = {
-		{BPF_CGROUP_INET4_BIND, bind4_prog_load, -1, "bind4",
-		 BPF_CGROUP_INET6_BIND},
-		{BPF_CGROUP_INET4_CONNECT, connect4_prog_load, -1, "connect4",
-		 BPF_CGROUP_INET6_CONNECT},
-	};
-	inet_prog_cnt = sizeof(inet_progs) / sizeof(struct program);
+	printf("Test case: %s .. ", test->descr);
 
-	if (setup_cgroup_environment())
+	progfd = test->loadfn(test);
+	if (test->expected_result == LOAD_REJECT && progfd < 0)
+		goto out;
+	else if (test->expected_result == LOAD_REJECT || progfd < 0)
 		goto err;
 
-	cgfd = create_and_get_cgroup(CG_PATH);
-	if (!cgfd)
+	err = bpf_prog_attach(progfd, cgfd, test->attach_type,
+			      BPF_F_ALLOW_OVERRIDE);
+	if (test->expected_result == ATTACH_REJECT && err) {
+		err = 0; /* error was expected, reset it */
+		goto out;
+	} else if (test->expected_result == ATTACH_REJECT || err) {
 		goto err;
+	}
 
-	if (join_cgroup(CG_PATH))
+	switch (test->attach_type) {
+	case BPF_CGROUP_INET4_BIND:
+	case BPF_CGROUP_INET6_BIND:
+		err = run_bind_test_case(test);
+		break;
+	case BPF_CGROUP_INET4_CONNECT:
+	case BPF_CGROUP_INET6_CONNECT:
+		err = run_connect_test_case(test);
+		break;
+	case BPF_CGROUP_UDP4_SENDMSG:
+	case BPF_CGROUP_UDP6_SENDMSG:
+		err = run_sendmsg_test_case(test);
+		break;
+	default:
 		goto err;
+	}
 
-	if (run_domain_test(AF_INET, cgfd, inet_progs, inet_prog_cnt, SERV4_IP,
-			    SERV4_PORT) == -1)
-		goto err;
+	if (test->expected_result == SYSCALL_EPERM && err == EPERM) {
+		err = 0; /* error was expected, reset it */
+		goto out;
+	}
+
+	if (test->expected_result == SYSCALL_ENOTSUPP && err == ENOTSUPP) {
+		err = 0; /* error was expected, reset it */
+		goto out;
+	}
 
-	if (run_domain_test(AF_INET6, cgfd, inet6_progs, inet6_prog_cnt,
-			    SERV6_IP, SERV6_PORT) == -1)
+	if (err || test->expected_result != SUCCESS)
 		goto err;
 
 	goto out;
 err:
 	err = -1;
 out:
-	close(cgfd);
-	cleanup_cgroup_environment();
-	printf(err ? "### FAIL\n" : "### SUCCESS\n");
+	/* Detaching w/o checking return code: best effort attempt. */
+	if (progfd != -1)
+		bpf_prog_detach(cgfd, test->attach_type);
+	close(progfd);
+	printf("[%s]\n", err ? "FAIL" : "PASS");
 	return err;
 }
 
+static int run_tests(int cgfd)
+{
+	int passes = 0;
+	int fails = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+		if (run_test_case(cgfd, &tests[i]))
+			++fails;
+		else
+			++passes;
+	}
+	printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
+	return fails ? -1 : 0;
+}
+
 int main(int argc, char **argv)
 {
+	int cgfd = -1;
+	int err = 0;
+
 	if (argc < 2) {
 		fprintf(stderr,
 			"%s has to be run via %s.sh. Skip direct run.\n",
 			argv[0], argv[0]);
-		exit(0);
+		exit(err);
 	}
-	return run_test();
+
+	if (setup_cgroup_environment())
+		goto err;
+
+	cgfd = create_and_get_cgroup(CG_PATH);
+	if (!cgfd)
+		goto err;
+
+	if (join_cgroup(CG_PATH))
+		goto err;
+
+	if (run_tests(cgfd))
+		goto err;
+
+	goto out;
+err:
+	err = -1;
+out:
+	close(cgfd);
+	cleanup_cgroup_environment();
+	return err;
 }
diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh
index 9832a875a828..3b9fdb8094aa 100755
--- a/tools/testing/selftests/bpf/test_sock_addr.sh
+++ b/tools/testing/selftests/bpf/test_sock_addr.sh
@@ -4,7 +4,8 @@ set -eu
 
 ping_once()
 {
-	ping -${1} -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1
+	type ping${1} >/dev/null 2>&1 && PING="ping${1}" || PING="ping -${1}"
+	$PING -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1
 }
 
 wait_for_ip()
diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c
new file mode 100644
index 000000000000..b6c2c605d8c0
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_socket_cookie.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+#define CG_PATH			"/foo"
+#define SOCKET_COOKIE_PROG	"./socket_cookie_prog.o"
+
+static int start_server(void)
+{
+	struct sockaddr_in6 addr;
+	int fd;
+
+	fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (fd == -1) {
+		log_err("Failed to create server socket");
+		goto out;
+	}
+
+	memset(&addr, 0, sizeof(addr));
+	addr.sin6_family = AF_INET6;
+	addr.sin6_addr = in6addr_loopback;
+	addr.sin6_port = 0;
+
+	if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) == -1) {
+		log_err("Failed to bind server socket");
+		goto close_out;
+	}
+
+	if (listen(fd, 128) == -1) {
+		log_err("Failed to listen on server socket");
+		goto close_out;
+	}
+
+	goto out;
+
+close_out:
+	close(fd);
+	fd = -1;
+out:
+	return fd;
+}
+
+static int connect_to_server(int server_fd)
+{
+	struct sockaddr_storage addr;
+	socklen_t len = sizeof(addr);
+	int fd;
+
+	fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (fd == -1) {
+		log_err("Failed to create client socket");
+		goto out;
+	}
+
+	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+		log_err("Failed to get server addr");
+		goto close_out;
+	}
+
+	if (connect(fd, (const struct sockaddr *)&addr, len) == -1) {
+		log_err("Fail to connect to server");
+		goto close_out;
+	}
+
+	goto out;
+
+close_out:
+	close(fd);
+	fd = -1;
+out:
+	return fd;
+}
+
+static int validate_map(struct bpf_map *map, int client_fd)
+{
+	__u32 cookie_expected_value;
+	struct sockaddr_in6 addr;
+	socklen_t len = sizeof(addr);
+	__u32 cookie_value;
+	__u64 cookie_key;
+	int err = 0;
+	int map_fd;
+
+	if (!map) {
+		log_err("Map not found in BPF object");
+		goto err;
+	}
+
+	map_fd = bpf_map__fd(map);
+
+	err = bpf_map_get_next_key(map_fd, NULL, &cookie_key);
+	if (err) {
+		log_err("Can't get cookie key from map");
+		goto out;
+	}
+
+	err = bpf_map_lookup_elem(map_fd, &cookie_key, &cookie_value);
+	if (err) {
+		log_err("Can't get cookie value from map");
+		goto out;
+	}
+
+	err = getsockname(client_fd, (struct sockaddr *)&addr, &len);
+	if (err) {
+		log_err("Can't get client local addr");
+		goto out;
+	}
+
+	cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF;
+	if (cookie_value != cookie_expected_value) {
+		log_err("Unexpected value in map: %x != %x", cookie_value,
+			cookie_expected_value);
+		goto err;
+	}
+
+	goto out;
+err:
+	err = -1;
+out:
+	return err;
+}
+
+static int run_test(int cgfd)
+{
+	enum bpf_attach_type attach_type;
+	struct bpf_prog_load_attr attr;
+	struct bpf_program *prog;
+	struct bpf_object *pobj;
+	const char *prog_name;
+	int server_fd = -1;
+	int client_fd = -1;
+	int prog_fd = -1;
+	int err = 0;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.file = SOCKET_COOKIE_PROG;
+	attr.prog_type = BPF_PROG_TYPE_UNSPEC;
+
+	err = bpf_prog_load_xattr(&attr, &pobj, &prog_fd);
+	if (err) {
+		log_err("Failed to load %s", attr.file);
+		goto out;
+	}
+
+	bpf_object__for_each_program(prog, pobj) {
+		prog_name = bpf_program__title(prog, /*needs_copy*/ false);
+
+		if (libbpf_attach_type_by_name(prog_name, &attach_type)) {
+			log_err("Unexpected prog: %s", prog_name);
+			goto err;
+		}
+
+		err = bpf_prog_attach(bpf_program__fd(prog), cgfd, attach_type,
+				      BPF_F_ALLOW_OVERRIDE);
+		if (err) {
+			log_err("Failed to attach prog %s", prog_name);
+			goto out;
+		}
+	}
+
+	server_fd = start_server();
+	if (server_fd == -1)
+		goto err;
+
+	client_fd = connect_to_server(server_fd);
+	if (client_fd == -1)
+		goto err;
+
+	if (validate_map(bpf_map__next(NULL, pobj), client_fd))
+		goto err;
+
+	goto out;
+err:
+	err = -1;
+out:
+	close(client_fd);
+	close(server_fd);
+	bpf_object__close(pobj);
+	printf("%s\n", err ? "FAILED" : "PASSED");
+	return err;
+}
+
+int main(int argc, char **argv)
+{
+	int cgfd = -1;
+	int err = 0;
+
+	if (setup_cgroup_environment())
+		goto err;
+
+	cgfd = create_and_get_cgroup(CG_PATH);
+	if (!cgfd)
+		goto err;
+
+	if (join_cgroup(CG_PATH))
+		goto err;
+
+	if (run_test(cgfd))
+		goto err;
+
+	goto out;
+err:
+	err = -1;
+out:
+	close(cgfd);
+	cleanup_cgroup_environment();
+	return err;
+}
diff --git a/tools/testing/selftests/bpf/test_sockhash_kern.c b/tools/testing/selftests/bpf/test_sockhash_kern.c
new file mode 100644
index 000000000000..e6755916442a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockhash_kern.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
+#undef SOCKMAP
+#define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKHASH
+#include "./test_sockmap_kern.h"
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
new file mode 100644
index 000000000000..622ade0a0957
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -0,0 +1,1731 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/select.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <sched.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/sendfile.h>
+
+#include <linux/netlink.h>
+#include <linux/socket.h>
+#include <linux/sock_diag.h>
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <linux/tls.h>
+#include <assert.h>
+#include <libgen.h>
+
+#include <getopt.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+int running;
+static void running_handler(int a);
+
+#ifndef TCP_ULP
+# define TCP_ULP 31
+#endif
+#ifndef SOL_TLS
+# define SOL_TLS 282
+#endif
+
+/* randomly selected ports for testing on lo */
+#define S1_PORT 10000
+#define S2_PORT 10001
+
+#define BPF_SOCKMAP_FILENAME "test_sockmap_kern.o"
+#define BPF_SOCKHASH_FILENAME "test_sockhash_kern.o"
+#define CG_PATH "/sockmap"
+
+/* global sockets */
+int s1, s2, c1, c2, p1, p2;
+int test_cnt;
+int passed;
+int failed;
+int map_fd[8];
+struct bpf_map *maps[8];
+int prog_fd[11];
+
+int txmsg_pass;
+int txmsg_noisy;
+int txmsg_redir;
+int txmsg_redir_noisy;
+int txmsg_drop;
+int txmsg_apply;
+int txmsg_cork;
+int txmsg_start;
+int txmsg_end;
+int txmsg_start_push;
+int txmsg_end_push;
+int txmsg_ingress;
+int txmsg_skb;
+int ktls;
+int peek_flag;
+
+static const struct option long_options[] = {
+	{"help",	no_argument,		NULL, 'h' },
+	{"cgroup",	required_argument,	NULL, 'c' },
+	{"rate",	required_argument,	NULL, 'r' },
+	{"verbose",	no_argument,		NULL, 'v' },
+	{"iov_count",	required_argument,	NULL, 'i' },
+	{"length",	required_argument,	NULL, 'l' },
+	{"test",	required_argument,	NULL, 't' },
+	{"data_test",   no_argument,		NULL, 'd' },
+	{"txmsg",		no_argument,	&txmsg_pass,  1  },
+	{"txmsg_noisy",		no_argument,	&txmsg_noisy, 1  },
+	{"txmsg_redir",		no_argument,	&txmsg_redir, 1  },
+	{"txmsg_redir_noisy",	no_argument,	&txmsg_redir_noisy, 1},
+	{"txmsg_drop",		no_argument,	&txmsg_drop, 1 },
+	{"txmsg_apply",	required_argument,	NULL, 'a'},
+	{"txmsg_cork",	required_argument,	NULL, 'k'},
+	{"txmsg_start", required_argument,	NULL, 's'},
+	{"txmsg_end",	required_argument,	NULL, 'e'},
+	{"txmsg_start_push", required_argument,	NULL, 'p'},
+	{"txmsg_end_push",   required_argument,	NULL, 'q'},
+	{"txmsg_ingress", no_argument,		&txmsg_ingress, 1 },
+	{"txmsg_skb", no_argument,		&txmsg_skb, 1 },
+	{"ktls", no_argument,			&ktls, 1 },
+	{"peek", no_argument,			&peek_flag, 1 },
+	{0, 0, NULL, 0 }
+};
+
+static void usage(char *argv[])
+{
+	int i;
+
+	printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
+	printf(" options:\n");
+	for (i = 0; long_options[i].name != 0; i++) {
+		printf(" --%-12s", long_options[i].name);
+		if (long_options[i].flag != NULL)
+			printf(" flag (internal value:%d)\n",
+				*long_options[i].flag);
+		else
+			printf(" -%c\n", long_options[i].val);
+	}
+	printf("\n");
+}
+
+char *sock_to_string(int s)
+{
+	if (s == c1)
+		return "client1";
+	else if (s == c2)
+		return "client2";
+	else if (s == s1)
+		return "server1";
+	else if (s == s2)
+		return "server2";
+	else if (s == p1)
+		return "peer1";
+	else if (s == p2)
+		return "peer2";
+	else
+		return "unknown";
+}
+
+static int sockmap_init_ktls(int verbose, int s)
+{
+	struct tls12_crypto_info_aes_gcm_128 tls_tx = {
+		.info = {
+			.version     = TLS_1_2_VERSION,
+			.cipher_type = TLS_CIPHER_AES_GCM_128,
+		},
+	};
+	struct tls12_crypto_info_aes_gcm_128 tls_rx = {
+		.info = {
+			.version     = TLS_1_2_VERSION,
+			.cipher_type = TLS_CIPHER_AES_GCM_128,
+		},
+	};
+	int so_buf = 6553500;
+	int err;
+
+	err = setsockopt(s, 6, TCP_ULP, "tls", sizeof("tls"));
+	if (err) {
+		fprintf(stderr, "setsockopt: TCP_ULP(%s) failed with error %i\n", sock_to_string(s), err);
+		return -EINVAL;
+	}
+	err = setsockopt(s, SOL_TLS, TLS_TX, (void *)&tls_tx, sizeof(tls_tx));
+	if (err) {
+		fprintf(stderr, "setsockopt: TLS_TX(%s) failed with error %i\n", sock_to_string(s), err);
+		return -EINVAL;
+	}
+	err = setsockopt(s, SOL_TLS, TLS_RX, (void *)&tls_rx, sizeof(tls_rx));
+	if (err) {
+		fprintf(stderr, "setsockopt: TLS_RX(%s) failed with error %i\n", sock_to_string(s), err);
+		return -EINVAL;
+	}
+	err = setsockopt(s, SOL_SOCKET, SO_SNDBUF, &so_buf, sizeof(so_buf));
+	if (err) {
+		fprintf(stderr, "setsockopt: (%s) failed sndbuf with error %i\n", sock_to_string(s), err);
+		return -EINVAL;
+	}
+	err = setsockopt(s, SOL_SOCKET, SO_RCVBUF, &so_buf, sizeof(so_buf));
+	if (err) {
+		fprintf(stderr, "setsockopt: (%s) failed rcvbuf with error %i\n", sock_to_string(s), err);
+		return -EINVAL;
+	}
+
+	if (verbose)
+		fprintf(stdout, "socket(%s) kTLS enabled\n", sock_to_string(s));
+	return 0;
+}
+static int sockmap_init_sockets(int verbose)
+{
+	int i, err, one = 1;
+	struct sockaddr_in addr;
+	int *fds[4] = {&s1, &s2, &c1, &c2};
+
+	s1 = s2 = p1 = p2 = c1 = c2 = 0;
+
+	/* Init sockets */
+	for (i = 0; i < 4; i++) {
+		*fds[i] = socket(AF_INET, SOCK_STREAM, 0);
+		if (*fds[i] < 0) {
+			perror("socket s1 failed()");
+			return errno;
+		}
+	}
+
+	/* Allow reuse */
+	for (i = 0; i < 2; i++) {
+		err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR,
+				 (char *)&one, sizeof(one));
+		if (err) {
+			perror("setsockopt failed()");
+			return errno;
+		}
+	}
+
+	/* Non-blocking sockets */
+	for (i = 0; i < 2; i++) {
+		err = ioctl(*fds[i], FIONBIO, (char *)&one);
+		if (err < 0) {
+			perror("ioctl s1 failed()");
+			return errno;
+		}
+	}
+
+	/* Bind server sockets */
+	memset(&addr, 0, sizeof(struct sockaddr_in));
+	addr.sin_family = AF_INET;
+	addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+
+	addr.sin_port = htons(S1_PORT);
+	err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
+	if (err < 0) {
+		perror("bind s1 failed()\n");
+		return errno;
+	}
+
+	addr.sin_port = htons(S2_PORT);
+	err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
+	if (err < 0) {
+		perror("bind s2 failed()\n");
+		return errno;
+	}
+
+	/* Listen server sockets */
+	addr.sin_port = htons(S1_PORT);
+	err = listen(s1, 32);
+	if (err < 0) {
+		perror("listen s1 failed()\n");
+		return errno;
+	}
+
+	addr.sin_port = htons(S2_PORT);
+	err = listen(s2, 32);
+	if (err < 0) {
+		perror("listen s1 failed()\n");
+		return errno;
+	}
+
+	/* Initiate Connect */
+	addr.sin_port = htons(S1_PORT);
+	err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
+	if (err < 0 && errno != EINPROGRESS) {
+		perror("connect c1 failed()\n");
+		return errno;
+	}
+
+	addr.sin_port = htons(S2_PORT);
+	err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
+	if (err < 0 && errno != EINPROGRESS) {
+		perror("connect c2 failed()\n");
+		return errno;
+	} else if (err < 0) {
+		err = 0;
+	}
+
+	/* Accept Connecrtions */
+	p1 = accept(s1, NULL, NULL);
+	if (p1 < 0) {
+		perror("accept s1 failed()\n");
+		return errno;
+	}
+
+	p2 = accept(s2, NULL, NULL);
+	if (p2 < 0) {
+		perror("accept s1 failed()\n");
+		return errno;
+	}
+
+	if (verbose) {
+		printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
+		printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
+			c1, s1, c2, s2);
+	}
+	return 0;
+}
+
+struct msg_stats {
+	size_t bytes_sent;
+	size_t bytes_recvd;
+	struct timespec start;
+	struct timespec end;
+};
+
+struct sockmap_options {
+	int verbose;
+	bool base;
+	bool sendpage;
+	bool data_test;
+	bool drop_expected;
+	int iov_count;
+	int iov_length;
+	int rate;
+};
+
+static int msg_loop_sendpage(int fd, int iov_length, int cnt,
+			     struct msg_stats *s,
+			     struct sockmap_options *opt)
+{
+	bool drop = opt->drop_expected;
+	unsigned char k = 0;
+	FILE *file;
+	int i, fp;
+
+	file = fopen(".sendpage_tst.tmp", "w+");
+	for (i = 0; i < iov_length * cnt; i++, k++)
+		fwrite(&k, sizeof(char), 1, file);
+	fflush(file);
+	fseek(file, 0, SEEK_SET);
+	fclose(file);
+
+	fp = open(".sendpage_tst.tmp", O_RDONLY);
+	clock_gettime(CLOCK_MONOTONIC, &s->start);
+	for (i = 0; i < cnt; i++) {
+		int sent = sendfile(fd, fp, NULL, iov_length);
+
+		if (!drop && sent < 0) {
+			perror("send loop error:");
+			close(fp);
+			return sent;
+		} else if (drop && sent >= 0) {
+			printf("sendpage loop error expected: %i\n", sent);
+			close(fp);
+			return -EIO;
+		}
+
+		if (sent > 0)
+			s->bytes_sent += sent;
+	}
+	clock_gettime(CLOCK_MONOTONIC, &s->end);
+	close(fp);
+	return 0;
+}
+
+static void msg_free_iov(struct msghdr *msg)
+{
+	int i;
+
+	for (i = 0; i < msg->msg_iovlen; i++)
+		free(msg->msg_iov[i].iov_base);
+	free(msg->msg_iov);
+	msg->msg_iov = NULL;
+	msg->msg_iovlen = 0;
+}
+
+static int msg_alloc_iov(struct msghdr *msg,
+			 int iov_count, int iov_length,
+			 bool data, bool xmit)
+{
+	unsigned char k = 0;
+	struct iovec *iov;
+	int i;
+
+	iov = calloc(iov_count, sizeof(struct iovec));
+	if (!iov)
+		return errno;
+
+	for (i = 0; i < iov_count; i++) {
+		unsigned char *d = calloc(iov_length, sizeof(char));
+
+		if (!d) {
+			fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
+			goto unwind_iov;
+		}
+		iov[i].iov_base = d;
+		iov[i].iov_len = iov_length;
+
+		if (data && xmit) {
+			int j;
+
+			for (j = 0; j < iov_length; j++)
+				d[j] = k++;
+		}
+	}
+
+	msg->msg_iov = iov;
+	msg->msg_iovlen = iov_count;
+
+	return 0;
+unwind_iov:
+	for (i--; i >= 0 ; i--)
+		free(msg->msg_iov[i].iov_base);
+	return -ENOMEM;
+}
+
+static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
+{
+	int i, j, bytes_cnt = 0;
+	unsigned char k = 0;
+
+	for (i = 0; i < msg->msg_iovlen; i++) {
+		unsigned char *d = msg->msg_iov[i].iov_base;
+
+		for (j = 0;
+		     j < msg->msg_iov[i].iov_len && size; j++) {
+			if (d[j] != k++) {
+				fprintf(stderr,
+					"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
+					i, j, d[j], k - 1, d[j+1], k);
+				return -EIO;
+			}
+			bytes_cnt++;
+			if (bytes_cnt == chunk_sz) {
+				k = 0;
+				bytes_cnt = 0;
+			}
+			size--;
+		}
+	}
+	return 0;
+}
+
+static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
+		    struct msg_stats *s, bool tx,
+		    struct sockmap_options *opt)
+{
+	struct msghdr msg = {0}, msg_peek = {0};
+	int err, i, flags = MSG_NOSIGNAL;
+	bool drop = opt->drop_expected;
+	bool data = opt->data_test;
+
+	err = msg_alloc_iov(&msg, iov_count, iov_length, data, tx);
+	if (err)
+		goto out_errno;
+	if (peek_flag) {
+		err = msg_alloc_iov(&msg_peek, iov_count, iov_length, data, tx);
+		if (err)
+			goto out_errno;
+	}
+
+	if (tx) {
+		clock_gettime(CLOCK_MONOTONIC, &s->start);
+		for (i = 0; i < cnt; i++) {
+			int sent = sendmsg(fd, &msg, flags);
+
+			if (!drop && sent < 0) {
+				perror("send loop error:");
+				goto out_errno;
+			} else if (drop && sent >= 0) {
+				printf("send loop error expected: %i\n", sent);
+				errno = -EIO;
+				goto out_errno;
+			}
+			if (sent > 0)
+				s->bytes_sent += sent;
+		}
+		clock_gettime(CLOCK_MONOTONIC, &s->end);
+	} else {
+		int slct, recvp = 0, recv, max_fd = fd;
+		int fd_flags = O_NONBLOCK;
+		struct timeval timeout;
+		float total_bytes;
+		fd_set w;
+
+		fcntl(fd, fd_flags);
+		total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
+		err = clock_gettime(CLOCK_MONOTONIC, &s->start);
+		if (err < 0)
+			perror("recv start time: ");
+		while (s->bytes_recvd < total_bytes) {
+			if (txmsg_cork) {
+				timeout.tv_sec = 0;
+				timeout.tv_usec = 300000;
+			} else {
+				timeout.tv_sec = 1;
+				timeout.tv_usec = 0;
+			}
+
+			/* FD sets */
+			FD_ZERO(&w);
+			FD_SET(fd, &w);
+
+			slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
+			if (slct == -1) {
+				perror("select()");
+				clock_gettime(CLOCK_MONOTONIC, &s->end);
+				goto out_errno;
+			} else if (!slct) {
+				if (opt->verbose)
+					fprintf(stderr, "unexpected timeout\n");
+				errno = -EIO;
+				clock_gettime(CLOCK_MONOTONIC, &s->end);
+				goto out_errno;
+			}
+
+			errno = 0;
+			if (peek_flag) {
+				flags |= MSG_PEEK;
+				recvp = recvmsg(fd, &msg_peek, flags);
+				if (recvp < 0) {
+					if (errno != EWOULDBLOCK) {
+						clock_gettime(CLOCK_MONOTONIC, &s->end);
+						goto out_errno;
+					}
+				}
+				flags = 0;
+			}
+
+			recv = recvmsg(fd, &msg, flags);
+			if (recv < 0) {
+				if (errno != EWOULDBLOCK) {
+					clock_gettime(CLOCK_MONOTONIC, &s->end);
+					perror("recv failed()\n");
+					goto out_errno;
+				}
+			}
+
+			s->bytes_recvd += recv;
+
+			if (data) {
+				int chunk_sz = opt->sendpage ?
+						iov_length * cnt :
+						iov_length * iov_count;
+
+				errno = msg_verify_data(&msg, recv, chunk_sz);
+				if (errno) {
+					perror("data verify msg failed\n");
+					goto out_errno;
+				}
+				if (recvp) {
+					errno = msg_verify_data(&msg_peek,
+								recvp,
+								chunk_sz);
+					if (errno) {
+						perror("data verify msg_peek failed\n");
+						goto out_errno;
+					}
+				}
+			}
+		}
+		clock_gettime(CLOCK_MONOTONIC, &s->end);
+	}
+
+	msg_free_iov(&msg);
+	msg_free_iov(&msg_peek);
+	return err;
+out_errno:
+	msg_free_iov(&msg);
+	msg_free_iov(&msg_peek);
+	return errno;
+}
+
+static float giga = 1000000000;
+
+static inline float sentBps(struct msg_stats s)
+{
+	return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
+}
+
+static inline float recvdBps(struct msg_stats s)
+{
+	return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
+}
+
+static int sendmsg_test(struct sockmap_options *opt)
+{
+	float sent_Bps = 0, recvd_Bps = 0;
+	int rx_fd, txpid, rxpid, err = 0;
+	struct msg_stats s = {0};
+	int iov_count = opt->iov_count;
+	int iov_buf = opt->iov_length;
+	int rx_status, tx_status;
+	int cnt = opt->rate;
+
+	errno = 0;
+
+	if (opt->base)
+		rx_fd = p1;
+	else
+		rx_fd = p2;
+
+	if (ktls) {
+		/* Redirecting into non-TLS socket which sends into a TLS
+		 * socket is not a valid test. So in this case lets not
+		 * enable kTLS but still run the test.
+		 */
+		if (!txmsg_redir || (txmsg_redir && txmsg_ingress)) {
+			err = sockmap_init_ktls(opt->verbose, rx_fd);
+			if (err)
+				return err;
+		}
+		err = sockmap_init_ktls(opt->verbose, c1);
+		if (err)
+			return err;
+	}
+
+	rxpid = fork();
+	if (rxpid == 0) {
+		if (opt->drop_expected)
+			exit(0);
+
+		if (opt->sendpage)
+			iov_count = 1;
+		err = msg_loop(rx_fd, iov_count, iov_buf,
+			       cnt, &s, false, opt);
+		if (err && opt->verbose)
+			fprintf(stderr,
+				"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
+				iov_count, iov_buf, cnt, err);
+		if (s.end.tv_sec - s.start.tv_sec) {
+			sent_Bps = sentBps(s);
+			recvd_Bps = recvdBps(s);
+		}
+		if (opt->verbose)
+			fprintf(stdout,
+				"rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s %s\n",
+				s.bytes_sent, sent_Bps, sent_Bps/giga,
+				s.bytes_recvd, recvd_Bps, recvd_Bps/giga,
+				peek_flag ? "(peek_msg)" : "");
+		if (err && txmsg_cork)
+			err = 0;
+		exit(err ? 1 : 0);
+	} else if (rxpid == -1) {
+		perror("msg_loop_rx: ");
+		return errno;
+	}
+
+	txpid = fork();
+	if (txpid == 0) {
+		if (opt->sendpage)
+			err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
+		else
+			err = msg_loop(c1, iov_count, iov_buf,
+				       cnt, &s, true, opt);
+
+		if (err)
+			fprintf(stderr,
+				"msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
+				iov_count, iov_buf, cnt, err);
+		if (s.end.tv_sec - s.start.tv_sec) {
+			sent_Bps = sentBps(s);
+			recvd_Bps = recvdBps(s);
+		}
+		if (opt->verbose)
+			fprintf(stdout,
+				"tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
+				s.bytes_sent, sent_Bps, sent_Bps/giga,
+				s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+		exit(err ? 1 : 0);
+	} else if (txpid == -1) {
+		perror("msg_loop_tx: ");
+		return errno;
+	}
+
+	assert(waitpid(rxpid, &rx_status, 0) == rxpid);
+	assert(waitpid(txpid, &tx_status, 0) == txpid);
+	if (WIFEXITED(rx_status)) {
+		err = WEXITSTATUS(rx_status);
+		if (err) {
+			fprintf(stderr, "rx thread exited with err %d. ", err);
+			goto out;
+		}
+	}
+	if (WIFEXITED(tx_status)) {
+		err = WEXITSTATUS(tx_status);
+		if (err)
+			fprintf(stderr, "tx thread exited with err %d. ", err);
+	}
+out:
+	return err;
+}
+
+static int forever_ping_pong(int rate, struct sockmap_options *opt)
+{
+	struct timeval timeout;
+	char buf[1024] = {0};
+	int sc;
+
+	timeout.tv_sec = 10;
+	timeout.tv_usec = 0;
+
+	/* Ping/Pong data from client to server */
+	sc = send(c1, buf, sizeof(buf), 0);
+	if (sc < 0) {
+		perror("send failed()\n");
+		return sc;
+	}
+
+	do {
+		int s, rc, i, max_fd = p2;
+		fd_set w;
+
+		/* FD sets */
+		FD_ZERO(&w);
+		FD_SET(c1, &w);
+		FD_SET(c2, &w);
+		FD_SET(p1, &w);
+		FD_SET(p2, &w);
+
+		s = select(max_fd + 1, &w, NULL, NULL, &timeout);
+		if (s == -1) {
+			perror("select()");
+			break;
+		} else if (!s) {
+			fprintf(stderr, "unexpected timeout\n");
+			break;
+		}
+
+		for (i = 0; i <= max_fd && s > 0; ++i) {
+			if (!FD_ISSET(i, &w))
+				continue;
+
+			s--;
+
+			rc = recv(i, buf, sizeof(buf), 0);
+			if (rc < 0) {
+				if (errno != EWOULDBLOCK) {
+					perror("recv failed()\n");
+					return rc;
+				}
+			}
+
+			if (rc == 0) {
+				close(i);
+				break;
+			}
+
+			sc = send(i, buf, rc, 0);
+			if (sc < 0) {
+				perror("send failed()\n");
+				return sc;
+			}
+		}
+
+		if (rate)
+			sleep(rate);
+
+		if (opt->verbose) {
+			printf(".");
+			fflush(stdout);
+
+		}
+	} while (running);
+
+	return 0;
+}
+
+enum {
+	PING_PONG,
+	SENDMSG,
+	BASE,
+	BASE_SENDPAGE,
+	SENDPAGE,
+};
+
+static int run_options(struct sockmap_options *options, int cg_fd,  int test)
+{
+	int i, key, next_key, err, tx_prog_fd = -1, zero = 0;
+
+	/* If base test skip BPF setup */
+	if (test == BASE || test == BASE_SENDPAGE)
+		goto run;
+
+	/* Attach programs to sockmap */
+	err = bpf_prog_attach(prog_fd[0], map_fd[0],
+				BPF_SK_SKB_STREAM_PARSER, 0);
+	if (err) {
+		fprintf(stderr,
+			"ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
+			prog_fd[0], map_fd[0], err, strerror(errno));
+		return err;
+	}
+
+	err = bpf_prog_attach(prog_fd[1], map_fd[0],
+				BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (err) {
+		fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
+			err, strerror(errno));
+		return err;
+	}
+
+	/* Attach to cgroups */
+	err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+	if (err) {
+		fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
+			err, strerror(errno));
+		return err;
+	}
+
+run:
+	err = sockmap_init_sockets(options->verbose);
+	if (err) {
+		fprintf(stderr, "ERROR: test socket failed: %d\n", err);
+		goto out;
+	}
+
+	/* Attach txmsg program to sockmap */
+	if (txmsg_pass)
+		tx_prog_fd = prog_fd[3];
+	else if (txmsg_noisy)
+		tx_prog_fd = prog_fd[4];
+	else if (txmsg_redir)
+		tx_prog_fd = prog_fd[5];
+	else if (txmsg_redir_noisy)
+		tx_prog_fd = prog_fd[6];
+	else if (txmsg_drop)
+		tx_prog_fd = prog_fd[9];
+	/* apply and cork must be last */
+	else if (txmsg_apply)
+		tx_prog_fd = prog_fd[7];
+	else if (txmsg_cork)
+		tx_prog_fd = prog_fd[8];
+	else
+		tx_prog_fd = 0;
+
+	if (tx_prog_fd) {
+		int redir_fd, i = 0;
+
+		err = bpf_prog_attach(tx_prog_fd,
+				      map_fd[1], BPF_SK_MSG_VERDICT, 0);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
+				err, strerror(errno));
+			goto out;
+		}
+
+		err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
+				err, strerror(errno));
+			goto out;
+		}
+
+		if (txmsg_redir || txmsg_redir_noisy)
+			redir_fd = c2;
+		else
+			redir_fd = c1;
+
+		err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
+				err, strerror(errno));
+			goto out;
+		}
+
+		if (txmsg_apply) {
+			err = bpf_map_update_elem(map_fd[3],
+						  &i, &txmsg_apply, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (apply_bytes):  %d (%s\n",
+					err, strerror(errno));
+				goto out;
+			}
+		}
+
+		if (txmsg_cork) {
+			err = bpf_map_update_elem(map_fd[4],
+						  &i, &txmsg_cork, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (cork_bytes):  %d (%s\n",
+					err, strerror(errno));
+				goto out;
+			}
+		}
+
+		if (txmsg_start) {
+			err = bpf_map_update_elem(map_fd[5],
+						  &i, &txmsg_start, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_start):  %d (%s)\n",
+					err, strerror(errno));
+				goto out;
+			}
+		}
+
+		if (txmsg_end) {
+			i = 1;
+			err = bpf_map_update_elem(map_fd[5],
+						  &i, &txmsg_end, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_end):  %d (%s)\n",
+					err, strerror(errno));
+				goto out;
+			}
+		}
+
+		if (txmsg_start_push) {
+			i = 2;
+			err = bpf_map_update_elem(map_fd[5],
+						  &i, &txmsg_start_push, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_start_push):  %d (%s)\n",
+					err, strerror(errno));
+				goto out;
+			}
+		}
+
+		if (txmsg_end_push) {
+			i = 3;
+			err = bpf_map_update_elem(map_fd[5],
+						  &i, &txmsg_end_push, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem %i@%i (txmsg_end_push):  %d (%s)\n",
+					txmsg_end_push, i, err, strerror(errno));
+				goto out;
+			}
+		}
+
+		if (txmsg_ingress) {
+			int in = BPF_F_INGRESS;
+
+			i = 0;
+			err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+					err, strerror(errno));
+			}
+			i = 1;
+			err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
+					err, strerror(errno));
+			}
+			err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
+					err, strerror(errno));
+			}
+
+			i = 2;
+			err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
+					err, strerror(errno));
+			}
+		}
+
+		if (txmsg_skb) {
+			int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
+					p2 : p1;
+			int ingress = BPF_F_INGRESS;
+
+			i = 0;
+			err = bpf_map_update_elem(map_fd[7],
+						  &i, &ingress, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+					err, strerror(errno));
+			}
+
+			i = 3;
+			err = bpf_map_update_elem(map_fd[0],
+						  &i, &skb_fd, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
+					err, strerror(errno));
+			}
+		}
+	}
+
+	if (txmsg_drop)
+		options->drop_expected = true;
+
+	if (test == PING_PONG)
+		err = forever_ping_pong(options->rate, options);
+	else if (test == SENDMSG) {
+		options->base = false;
+		options->sendpage = false;
+		err = sendmsg_test(options);
+	} else if (test == SENDPAGE) {
+		options->base = false;
+		options->sendpage = true;
+		err = sendmsg_test(options);
+	} else if (test == BASE) {
+		options->base = true;
+		options->sendpage = false;
+		err = sendmsg_test(options);
+	} else if (test == BASE_SENDPAGE) {
+		options->base = true;
+		options->sendpage = true;
+		err = sendmsg_test(options);
+	} else
+		fprintf(stderr, "unknown test\n");
+out:
+	/* Detatch and zero all the maps */
+	bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
+	bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
+	bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
+	if (tx_prog_fd >= 0)
+		bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
+
+	for (i = 0; i < 8; i++) {
+		key = next_key = 0;
+		bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
+		while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) {
+			bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
+			key = next_key;
+		}
+	}
+
+	close(s1);
+	close(s2);
+	close(p1);
+	close(p2);
+	close(c1);
+	close(c2);
+	return err;
+}
+
+static char *test_to_str(int test)
+{
+	switch (test) {
+	case SENDMSG:
+		return "sendmsg";
+	case SENDPAGE:
+		return "sendpage";
+	}
+	return "unknown";
+}
+
+#define OPTSTRING 60
+static void test_options(char *options)
+{
+	char tstr[OPTSTRING];
+
+	memset(options, 0, OPTSTRING);
+
+	if (txmsg_pass)
+		strncat(options, "pass,", OPTSTRING);
+	if (txmsg_noisy)
+		strncat(options, "pass_noisy,", OPTSTRING);
+	if (txmsg_redir)
+		strncat(options, "redir,", OPTSTRING);
+	if (txmsg_redir_noisy)
+		strncat(options, "redir_noisy,", OPTSTRING);
+	if (txmsg_drop)
+		strncat(options, "drop,", OPTSTRING);
+	if (txmsg_apply) {
+		snprintf(tstr, OPTSTRING, "apply %d,", txmsg_apply);
+		strncat(options, tstr, OPTSTRING);
+	}
+	if (txmsg_cork) {
+		snprintf(tstr, OPTSTRING, "cork %d,", txmsg_cork);
+		strncat(options, tstr, OPTSTRING);
+	}
+	if (txmsg_start) {
+		snprintf(tstr, OPTSTRING, "start %d,", txmsg_start);
+		strncat(options, tstr, OPTSTRING);
+	}
+	if (txmsg_end) {
+		snprintf(tstr, OPTSTRING, "end %d,", txmsg_end);
+		strncat(options, tstr, OPTSTRING);
+	}
+	if (txmsg_ingress)
+		strncat(options, "ingress,", OPTSTRING);
+	if (txmsg_skb)
+		strncat(options, "skb,", OPTSTRING);
+	if (ktls)
+		strncat(options, "ktls,", OPTSTRING);
+	if (peek_flag)
+		strncat(options, "peek,", OPTSTRING);
+}
+
+static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
+{
+	char *options = calloc(OPTSTRING, sizeof(char));
+	int err;
+
+	if (test == SENDPAGE)
+		opt->sendpage = true;
+	else
+		opt->sendpage = false;
+
+	if (txmsg_drop)
+		opt->drop_expected = true;
+	else
+		opt->drop_expected = false;
+
+	test_options(options);
+
+	fprintf(stdout,
+		"[TEST %i]: (%i, %i, %i, %s, %s): ",
+		test_cnt, opt->rate, opt->iov_count, opt->iov_length,
+		test_to_str(test), options);
+	fflush(stdout);
+	err = run_options(opt, cgrp, test);
+	fprintf(stdout, "%s\n", !err ? "PASS" : "FAILED");
+	test_cnt++;
+	!err ? passed++ : failed++;
+	free(options);
+	return err;
+}
+
+static int test_exec(int cgrp, struct sockmap_options *opt)
+{
+	int err = __test_exec(cgrp, SENDMSG, opt);
+
+	if (err)
+		goto out;
+
+	err = __test_exec(cgrp, SENDPAGE, opt);
+out:
+	return err;
+}
+
+static int test_loop(int cgrp)
+{
+	struct sockmap_options opt;
+
+	int err, i, l, r;
+
+	opt.verbose = 0;
+	opt.base = false;
+	opt.sendpage = false;
+	opt.data_test = false;
+	opt.drop_expected = false;
+	opt.iov_count = 0;
+	opt.iov_length = 0;
+	opt.rate = 0;
+
+	r = 1;
+	for (i = 1; i < 100; i += 33) {
+		for (l = 1; l < 100; l += 33) {
+			opt.rate = r;
+			opt.iov_count = i;
+			opt.iov_length = l;
+			err = test_exec(cgrp, &opt);
+			if (err)
+				goto out;
+		}
+	}
+	sched_yield();
+out:
+	return err;
+}
+
+static int test_txmsg(int cgrp)
+{
+	int err;
+
+	txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
+	txmsg_apply = txmsg_cork = 0;
+	txmsg_ingress = txmsg_skb = 0;
+
+	txmsg_pass = 1;
+	err = test_loop(cgrp);
+	txmsg_pass = 0;
+	if (err)
+		goto out;
+
+	txmsg_redir = 1;
+	err = test_loop(cgrp);
+	txmsg_redir = 0;
+	if (err)
+		goto out;
+
+	txmsg_drop = 1;
+	err = test_loop(cgrp);
+	txmsg_drop = 0;
+	if (err)
+		goto out;
+
+	txmsg_redir = 1;
+	txmsg_ingress = 1;
+	err = test_loop(cgrp);
+	txmsg_redir = 0;
+	txmsg_ingress = 0;
+	if (err)
+		goto out;
+out:
+	txmsg_pass = 0;
+	txmsg_redir = 0;
+	txmsg_drop = 0;
+	return err;
+}
+
+static int test_send(struct sockmap_options *opt, int cgrp)
+{
+	int err;
+
+	opt->iov_length = 1;
+	opt->iov_count = 1;
+	opt->rate = 1;
+	err = test_exec(cgrp, opt);
+	if (err)
+		goto out;
+
+	opt->iov_length = 1;
+	opt->iov_count = 1024;
+	opt->rate = 1;
+	err = test_exec(cgrp, opt);
+	if (err)
+		goto out;
+
+	opt->iov_length = 1024;
+	opt->iov_count = 1;
+	opt->rate = 1;
+	err = test_exec(cgrp, opt);
+	if (err)
+		goto out;
+
+	opt->iov_length = 1;
+	opt->iov_count = 1;
+	opt->rate = 512;
+	err = test_exec(cgrp, opt);
+	if (err)
+		goto out;
+
+	opt->iov_length = 256;
+	opt->iov_count = 1024;
+	opt->rate = 2;
+	err = test_exec(cgrp, opt);
+	if (err)
+		goto out;
+
+	opt->rate = 100;
+	opt->iov_count = 1;
+	opt->iov_length = 5;
+	err = test_exec(cgrp, opt);
+	if (err)
+		goto out;
+out:
+	sched_yield();
+	return err;
+}
+
+static int test_mixed(int cgrp)
+{
+	struct sockmap_options opt = {0};
+	int err;
+
+	txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
+	txmsg_apply = txmsg_cork = 0;
+	txmsg_start = txmsg_end = 0;
+	txmsg_start_push = txmsg_end_push = 0;
+
+	/* Test small and large iov_count values with pass/redir/apply/cork */
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 1;
+	txmsg_cork = 0;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 0;
+	txmsg_cork = 1;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 1;
+	txmsg_cork = 1;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 1024;
+	txmsg_cork = 0;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 0;
+	txmsg_cork = 1024;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 1024;
+	txmsg_cork = 1024;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_cork = 4096;
+	txmsg_apply = 4096;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_apply = 1;
+	txmsg_cork = 0;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_apply = 0;
+	txmsg_cork = 1;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_apply = 1024;
+	txmsg_cork = 0;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_apply = 0;
+	txmsg_cork = 1024;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_apply = 1024;
+	txmsg_cork = 1024;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_cork = 4096;
+	txmsg_apply = 4096;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+out:
+	return err;
+}
+
+static int test_start_end(int cgrp)
+{
+	struct sockmap_options opt = {0};
+	int err, i;
+
+	/* Test basic start/end with lots of iov_count and iov_lengths */
+	txmsg_start = 1;
+	txmsg_end = 2;
+	txmsg_start_push = 1;
+	txmsg_end_push = 2;
+	err = test_txmsg(cgrp);
+	if (err)
+		goto out;
+
+	/* Test start/end with cork */
+	opt.rate = 16;
+	opt.iov_count = 1;
+	opt.iov_length = 100;
+	txmsg_cork = 1600;
+
+	for (i = 99; i <= 1600; i += 500) {
+		txmsg_start = 0;
+		txmsg_end = i;
+		txmsg_start_push = 0;
+		txmsg_end_push = i;
+		err = test_exec(cgrp, &opt);
+		if (err)
+			goto out;
+	}
+
+	/* Test start/end with cork but pull data in middle */
+	for (i = 199; i <= 1600; i += 500) {
+		txmsg_start = 100;
+		txmsg_end = i;
+		txmsg_start_push = 100;
+		txmsg_end_push = i;
+		err = test_exec(cgrp, &opt);
+		if (err)
+			goto out;
+	}
+
+	/* Test start/end with cork pulling last sg entry */
+	txmsg_start = 1500;
+	txmsg_end = 1600;
+	txmsg_start_push = 1500;
+	txmsg_end_push = 1600;
+	err = test_exec(cgrp, &opt);
+	if (err)
+		goto out;
+
+	/* Test start/end pull of single byte in last page */
+	txmsg_start = 1111;
+	txmsg_end = 1112;
+	txmsg_start_push = 1111;
+	txmsg_end_push = 1112;
+	err = test_exec(cgrp, &opt);
+	if (err)
+		goto out;
+
+	/* Test start/end with end < start */
+	txmsg_start = 1111;
+	txmsg_end = 0;
+	txmsg_start_push = 1111;
+	txmsg_end_push = 0;
+	err = test_exec(cgrp, &opt);
+	if (err)
+		goto out;
+
+	/* Test start/end with end > data */
+	txmsg_start = 0;
+	txmsg_end = 1601;
+	txmsg_start_push = 0;
+	txmsg_end_push = 1601;
+	err = test_exec(cgrp, &opt);
+	if (err)
+		goto out;
+
+	/* Test start/end with start > data */
+	txmsg_start = 1601;
+	txmsg_end = 1600;
+	txmsg_start_push = 1601;
+	txmsg_end_push = 1600;
+	err = test_exec(cgrp, &opt);
+
+out:
+	txmsg_start = 0;
+	txmsg_end = 0;
+	sched_yield();
+	return err;
+}
+
+char *map_names[] = {
+	"sock_map",
+	"sock_map_txmsg",
+	"sock_map_redir",
+	"sock_apply_bytes",
+	"sock_cork_bytes",
+	"sock_bytes",
+	"sock_redir_flags",
+	"sock_skb_opts",
+};
+
+int prog_attach_type[] = {
+	BPF_SK_SKB_STREAM_PARSER,
+	BPF_SK_SKB_STREAM_VERDICT,
+	BPF_CGROUP_SOCK_OPS,
+	BPF_SK_MSG_VERDICT,
+	BPF_SK_MSG_VERDICT,
+	BPF_SK_MSG_VERDICT,
+	BPF_SK_MSG_VERDICT,
+	BPF_SK_MSG_VERDICT,
+	BPF_SK_MSG_VERDICT,
+	BPF_SK_MSG_VERDICT,
+};
+
+int prog_type[] = {
+	BPF_PROG_TYPE_SK_SKB,
+	BPF_PROG_TYPE_SK_SKB,
+	BPF_PROG_TYPE_SOCK_OPS,
+	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_SK_MSG,
+};
+
+static int populate_progs(char *bpf_file)
+{
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	int i = 0;
+	long err;
+
+	obj = bpf_object__open(bpf_file);
+	err = libbpf_get_error(obj);
+	if (err) {
+		char err_buf[256];
+
+		libbpf_strerror(err, err_buf, sizeof(err_buf));
+		printf("Unable to load eBPF objects in file '%s' : %s\n",
+		       bpf_file, err_buf);
+		return -1;
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		bpf_program__set_type(prog, prog_type[i]);
+		bpf_program__set_expected_attach_type(prog,
+						      prog_attach_type[i]);
+		i++;
+	}
+
+	i = bpf_object__load(obj);
+	i = 0;
+	bpf_object__for_each_program(prog, obj) {
+		prog_fd[i] = bpf_program__fd(prog);
+		i++;
+	}
+
+	for (i = 0; i < sizeof(map_fd)/sizeof(int); i++) {
+		maps[i] = bpf_object__find_map_by_name(obj, map_names[i]);
+		map_fd[i] = bpf_map__fd(maps[i]);
+		if (map_fd[i] < 0) {
+			fprintf(stderr, "load_bpf_file: (%i) %s\n",
+				map_fd[i], strerror(errno));
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int __test_suite(int cg_fd, char *bpf_file)
+{
+	int err, cleanup = cg_fd;
+
+	err = populate_progs(bpf_file);
+	if (err < 0) {
+		fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
+		return err;
+	}
+
+	if (cg_fd < 0) {
+		if (setup_cgroup_environment()) {
+			fprintf(stderr, "ERROR: cgroup env failed\n");
+			return -EINVAL;
+		}
+
+		cg_fd = create_and_get_cgroup(CG_PATH);
+		if (cg_fd < 0) {
+			fprintf(stderr,
+				"ERROR: (%i) open cg path failed: %s\n",
+				cg_fd, optarg);
+			return cg_fd;
+		}
+
+		if (join_cgroup(CG_PATH)) {
+			fprintf(stderr, "ERROR: failed to join cgroup\n");
+			return -EINVAL;
+		}
+	}
+
+	/* Tests basic commands and APIs with range of iov values */
+	txmsg_start = txmsg_end = txmsg_start_push = txmsg_end_push = 0;
+	err = test_txmsg(cg_fd);
+	if (err)
+		goto out;
+
+	/* Tests interesting combinations of APIs used together */
+	err = test_mixed(cg_fd);
+	if (err)
+		goto out;
+
+	/* Tests pull_data API using start/end API */
+	err = test_start_end(cg_fd);
+	if (err)
+		goto out;
+
+out:
+	printf("Summary: %i PASSED %i FAILED\n", passed, failed);
+	if (cleanup < 0) {
+		cleanup_cgroup_environment();
+		close(cg_fd);
+	}
+	return err;
+}
+
+static int test_suite(int cg_fd)
+{
+	int err;
+
+	err = __test_suite(cg_fd, BPF_SOCKMAP_FILENAME);
+	if (err)
+		goto out;
+	err = __test_suite(cg_fd, BPF_SOCKHASH_FILENAME);
+out:
+	if (cg_fd > -1)
+		close(cg_fd);
+	return err;
+}
+
+int main(int argc, char **argv)
+{
+	int iov_count = 1, length = 1024, rate = 1;
+	struct sockmap_options options = {0};
+	int opt, longindex, err, cg_fd = 0;
+	char *bpf_file = BPF_SOCKMAP_FILENAME;
+	int test = PING_PONG;
+
+	if (argc < 2)
+		return test_suite(-1);
+
+	while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:",
+				  long_options, &longindex)) != -1) {
+		switch (opt) {
+		case 's':
+			txmsg_start = atoi(optarg);
+			break;
+		case 'e':
+			txmsg_end = atoi(optarg);
+			break;
+		case 'p':
+			txmsg_start_push = atoi(optarg);
+			break;
+		case 'q':
+			txmsg_end_push = atoi(optarg);
+			break;
+		case 'a':
+			txmsg_apply = atoi(optarg);
+			break;
+		case 'k':
+			txmsg_cork = atoi(optarg);
+			break;
+		case 'c':
+			cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
+			if (cg_fd < 0) {
+				fprintf(stderr,
+					"ERROR: (%i) open cg path failed: %s\n",
+					cg_fd, optarg);
+				return cg_fd;
+			}
+			break;
+		case 'r':
+			rate = atoi(optarg);
+			break;
+		case 'v':
+			options.verbose = 1;
+			break;
+		case 'i':
+			iov_count = atoi(optarg);
+			break;
+		case 'l':
+			length = atoi(optarg);
+			break;
+		case 'd':
+			options.data_test = true;
+			break;
+		case 't':
+			if (strcmp(optarg, "ping") == 0) {
+				test = PING_PONG;
+			} else if (strcmp(optarg, "sendmsg") == 0) {
+				test = SENDMSG;
+			} else if (strcmp(optarg, "base") == 0) {
+				test = BASE;
+			} else if (strcmp(optarg, "base_sendpage") == 0) {
+				test = BASE_SENDPAGE;
+			} else if (strcmp(optarg, "sendpage") == 0) {
+				test = SENDPAGE;
+			} else {
+				usage(argv);
+				return -1;
+			}
+			break;
+		case 0:
+			break;
+		case 'h':
+		default:
+			usage(argv);
+			return -1;
+		}
+	}
+
+	if (argc <= 3 && cg_fd)
+		return test_suite(cg_fd);
+
+	if (!cg_fd) {
+		fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
+			argv[0]);
+		return -1;
+	}
+
+	err = populate_progs(bpf_file);
+	if (err) {
+		fprintf(stderr, "populate program: (%s) %s\n",
+			bpf_file, strerror(errno));
+		return 1;
+	}
+	running = 1;
+
+	/* catch SIGINT */
+	signal(SIGINT, running_handler);
+
+	options.iov_count = iov_count;
+	options.iov_length = length;
+	options.rate = rate;
+
+	err = run_options(&options, cg_fd, test);
+	close(cg_fd);
+	return err;
+}
+
+void running_handler(int a)
+{
+	running = 0;
+}
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.c b/tools/testing/selftests/bpf/test_sockmap_kern.c
new file mode 100644
index 000000000000..677b2ed1cc1e
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
+#define SOCKMAP
+#define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKMAP
+#include "./test_sockmap_kern.h"
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h
new file mode 100644
index 000000000000..14b8bbac004f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.h
@@ -0,0 +1,414 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+/* Sockmap sample program connects a client and a backend together
+ * using cgroups.
+ *
+ *    client:X <---> frontend:80 client:X <---> backend:80
+ *
+ * For simplicity we hard code values here and bind 1:1. The hard
+ * coded values are part of the setup in sockmap.sh script that
+ * is associated with this BPF program.
+ *
+ * The bpf_printk is verbose and prints information as connections
+ * are established and verdicts are decided.
+ */
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+struct bpf_map_def SEC("maps") sock_map = {
+	.type = TEST_MAP_TYPE,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_txmsg = {
+	.type = TEST_MAP_TYPE,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_redir = {
+	.type = TEST_MAP_TYPE,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_apply_bytes = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_cork_bytes = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_bytes = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 4
+};
+
+struct bpf_map_def SEC("maps") sock_redir_flags = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_skb_opts = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
+
+SEC("sk_skb1")
+int bpf_prog1(struct __sk_buff *skb)
+{
+	return skb->len;
+}
+
+SEC("sk_skb2")
+int bpf_prog2(struct __sk_buff *skb)
+{
+	__u32 lport = skb->local_port;
+	__u32 rport = skb->remote_port;
+	int len, *f, ret, zero = 0;
+	__u64 flags = 0;
+
+	if (lport == 10000)
+		ret = 10;
+	else
+		ret = 1;
+
+	len = (__u32)skb->data_end - (__u32)skb->data;
+	f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
+	if (f && *f) {
+		ret = 3;
+		flags = *f;
+	}
+
+	bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
+		   len, flags);
+#ifdef SOCKMAP
+	return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
+#else
+	return bpf_sk_redirect_hash(skb, &sock_map, &ret, flags);
+#endif
+
+}
+
+SEC("sockops")
+int bpf_sockmap(struct bpf_sock_ops *skops)
+{
+	__u32 lport, rport;
+	int op, err = 0, index, key, ret;
+
+
+	op = (int) skops->op;
+
+	switch (op) {
+	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+		lport = skops->local_port;
+		rport = skops->remote_port;
+
+		if (lport == 10000) {
+			ret = 1;
+#ifdef SOCKMAP
+			err = bpf_sock_map_update(skops, &sock_map, &ret,
+						  BPF_NOEXIST);
+#else
+			err = bpf_sock_hash_update(skops, &sock_map, &ret,
+						   BPF_NOEXIST);
+#endif
+			bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
+				   lport, bpf_ntohl(rport), err);
+		}
+		break;
+	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+		lport = skops->local_port;
+		rport = skops->remote_port;
+
+		if (bpf_ntohl(rport) == 10001) {
+			ret = 10;
+#ifdef SOCKMAP
+			err = bpf_sock_map_update(skops, &sock_map, &ret,
+						  BPF_NOEXIST);
+#else
+			err = bpf_sock_hash_update(skops, &sock_map, &ret,
+						   BPF_NOEXIST);
+#endif
+			bpf_printk("active(%i -> %i) map ctx update err: %d\n",
+				   lport, bpf_ntohl(rport), err);
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+SEC("sk_msg1")
+int bpf_prog4(struct sk_msg_md *msg)
+{
+	int *bytes, zero = 0, one = 1, two = 2, three = 3;
+	int *start, *end, *start_push, *end_push;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
+	start = bpf_map_lookup_elem(&sock_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
+	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+	if (start_push && end_push)
+		bpf_msg_push_data(msg, *start_push, *end_push, 0);
+	return SK_PASS;
+}
+
+SEC("sk_msg2")
+int bpf_prog5(struct sk_msg_md *msg)
+{
+	int zero = 0, one = 1, two = 2, three = 3;
+	int *start, *end, *start_push, *end_push;
+	int *bytes, len1, len2 = 0, len3;
+	int err1 = -1, err2 = -1;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		err1 = bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		err2 = bpf_msg_cork_bytes(msg, *bytes);
+	len1 = (__u64)msg->data_end - (__u64)msg->data;
+	start = bpf_map_lookup_elem(&sock_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_bytes, &one);
+	if (start && end) {
+		int err;
+
+		bpf_printk("sk_msg2: pull(%i:%i)\n",
+			   start ? *start : 0, end ? *end : 0);
+		err = bpf_msg_pull_data(msg, *start, *end, 0);
+		if (err)
+			bpf_printk("sk_msg2: pull_data err %i\n",
+				   err);
+		len2 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg2: length update %i->%i\n",
+			   len1, len2);
+	}
+
+	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+	if (start_push && end_push) {
+		int err;
+
+		bpf_printk("sk_msg2: push(%i:%i)\n",
+			   start_push ? *start_push : 0,
+			   end_push ? *end_push : 0);
+		err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
+		if (err)
+			bpf_printk("sk_msg2: push_data err %i\n", err);
+		len3 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg2: length push_update %i->%i\n",
+			   len2 ? len2 : len1, len3);
+	}
+
+	bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
+		   len1, err1, err2);
+	return SK_PASS;
+}
+
+SEC("sk_msg3")
+int bpf_prog6(struct sk_msg_md *msg)
+{
+	int *bytes, *start, *end, *start_push, *end_push, *f;
+	int zero = 0, one = 1, two = 2, three = 3, key = 0;
+	__u64 flags = 0;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
+
+	start = bpf_map_lookup_elem(&sock_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
+
+	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+	if (start_push && end_push)
+		bpf_msg_push_data(msg, *start_push, *end_push, 0);
+
+	f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+	if (f && *f) {
+		key = 2;
+		flags = *f;
+	}
+#ifdef SOCKMAP
+	return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+#else
+	return bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags);
+#endif
+}
+
+SEC("sk_msg4")
+int bpf_prog7(struct sk_msg_md *msg)
+{
+	int zero = 0, one = 1, two = 2, three = 3, len1, len2 = 0, len3;
+	int *bytes, *start, *end, *start_push, *end_push, *f;
+	int err1 = 0, err2 = 0, key = 0;
+	__u64 flags = 0;
+
+		int err;
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		err1 = bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		err2 = bpf_msg_cork_bytes(msg, *bytes);
+	len1 = (__u64)msg->data_end - (__u64)msg->data;
+
+	start = bpf_map_lookup_elem(&sock_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_bytes, &one);
+	if (start && end) {
+		bpf_printk("sk_msg2: pull(%i:%i)\n",
+			   start ? *start : 0, end ? *end : 0);
+		err = bpf_msg_pull_data(msg, *start, *end, 0);
+		if (err)
+			bpf_printk("sk_msg2: pull_data err %i\n",
+				   err);
+		len2 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg2: length update %i->%i\n",
+			   len1, len2);
+	}
+
+	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+	if (start_push && end_push) {
+		bpf_printk("sk_msg4: push(%i:%i)\n",
+			   start_push ? *start_push : 0,
+			   end_push ? *end_push : 0);
+		err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
+		if (err)
+			bpf_printk("sk_msg4: push_data err %i\n",
+				   err);
+		len3 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg4: length push_update %i->%i\n",
+			   len2 ? len2 : len1, len3);
+	}
+
+	f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+	if (f && *f) {
+		key = 2;
+		flags = *f;
+	}
+	bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
+		   len1, flags, err1 ? err1 : err2);
+#ifdef SOCKMAP
+	err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+#else
+	err = bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags);
+#endif
+	bpf_printk("sk_msg3: err %i\n", err);
+	return err;
+}
+
+SEC("sk_msg5")
+int bpf_prog8(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+	int ret = 0, *bytes, zero = 0;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes) {
+		ret = bpf_msg_apply_bytes(msg, *bytes);
+		if (ret)
+			return SK_DROP;
+	} else {
+		return SK_DROP;
+	}
+	return SK_PASS;
+}
+SEC("sk_msg6")
+int bpf_prog9(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+	int ret = 0, *bytes, zero = 0;
+
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes) {
+		if (((__u64)data_end - (__u64)data) >= *bytes)
+			return SK_PASS;
+		ret = bpf_msg_cork_bytes(msg, *bytes);
+		if (ret)
+			return SK_DROP;
+	}
+	return SK_PASS;
+}
+
+SEC("sk_msg7")
+int bpf_prog10(struct sk_msg_md *msg)
+{
+	int *bytes, *start, *end, *start_push, *end_push;
+	int zero = 0, one = 1, two = 2, three = 3;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
+	start = bpf_map_lookup_elem(&sock_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
+	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+	if (start_push && end_push)
+		bpf_msg_push_data(msg, *start_push, *end_push, 0);
+
+	return SK_DROP;
+}
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_stack_map.c b/tools/testing/selftests/bpf/test_stack_map.c
new file mode 100644
index 000000000000..31c3880e6da0
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stack_map.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Politecnico di Torino
+#define MAP_TYPE BPF_MAP_TYPE_STACK
+#include "test_queue_stack_map.h"
diff --git a/tools/testing/selftests/bpf/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
index b755bd783ce5..d86c281e957f 100644
--- a/tools/testing/selftests/bpf/test_stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
@@ -19,7 +19,7 @@ struct bpf_map_def SEC("maps") stackid_hmap = {
 	.type = BPF_MAP_TYPE_HASH,
 	.key_size = sizeof(__u32),
 	.value_size = sizeof(__u32),
-	.max_entries = 10000,
+	.max_entries = 16384,
 };
 
 struct bpf_map_def SEC("maps") stackmap = {
@@ -31,6 +31,14 @@ struct bpf_map_def SEC("maps") stackmap = {
 	.map_flags = BPF_F_STACK_BUILD_ID,
 };
 
+struct bpf_map_def SEC("maps") stack_amap = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct bpf_stack_build_id)
+		* PERF_MAX_STACK_DEPTH,
+	.max_entries = 128,
+};
+
 /* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
 struct random_urandom_args {
 	unsigned long long pad;
@@ -42,7 +50,10 @@ struct random_urandom_args {
 SEC("tracepoint/random/urandom_read")
 int oncpu(struct random_urandom_args *args)
 {
+	__u32 max_len = sizeof(struct bpf_stack_build_id)
+			* PERF_MAX_STACK_DEPTH;
 	__u32 key = 0, val = 0, *value_p;
+	void *stack_p;
 
 	value_p = bpf_map_lookup_elem(&control_map, &key);
 	if (value_p && *value_p)
@@ -50,8 +61,13 @@ int oncpu(struct random_urandom_args *args)
 
 	/* The size of stackmap and stackid_hmap should be the same */
 	key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK);
-	if ((int)key >= 0)
+	if ((int)key >= 0) {
 		bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+		stack_p = bpf_map_lookup_elem(&stack_amap, &key);
+		if (stack_p)
+			bpf_get_stack(args, stack_p, max_len,
+				      BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
+	}
 
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/test_stacktrace_map.c b/tools/testing/selftests/bpf/test_stacktrace_map.c
index 76d85c5d08bd..af111af7ca1a 100644
--- a/tools/testing/selftests/bpf/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/test_stacktrace_map.c
@@ -19,14 +19,21 @@ struct bpf_map_def SEC("maps") stackid_hmap = {
 	.type = BPF_MAP_TYPE_HASH,
 	.key_size = sizeof(__u32),
 	.value_size = sizeof(__u32),
-	.max_entries = 10000,
+	.max_entries = 16384,
 };
 
 struct bpf_map_def SEC("maps") stackmap = {
 	.type = BPF_MAP_TYPE_STACK_TRACE,
 	.key_size = sizeof(__u32),
 	.value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
-	.max_entries = 10000,
+	.max_entries = 16384,
+};
+
+struct bpf_map_def SEC("maps") stack_amap = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
+	.max_entries = 16384,
 };
 
 /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
@@ -44,7 +51,9 @@ struct sched_switch_args {
 SEC("tracepoint/sched/sched_switch")
 int oncpu(struct sched_switch_args *ctx)
 {
+	__u32 max_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
 	__u32 key = 0, val = 0, *value_p;
+	void *stack_p;
 
 	value_p = bpf_map_lookup_elem(&control_map, &key);
 	if (value_p && *value_p)
@@ -52,8 +61,12 @@ int oncpu(struct sched_switch_args *ctx)
 
 	/* The size of stackmap and stackid_hmap should be the same */
 	key = bpf_get_stackid(ctx, &stackmap, 0);
-	if ((int)key >= 0)
+	if ((int)key >= 0) {
 		bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+		stack_p = bpf_map_lookup_elem(&stack_amap, &key);
+		if (stack_p)
+			bpf_get_stack(ctx, stack_p, max_len, 0);
+	}
 
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h
index 2fe43289943c..7bcfa6207005 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf.h
+++ b/tools/testing/selftests/bpf/test_tcpbpf.h
@@ -12,5 +12,6 @@ struct tcpbpf_globals {
 	__u32 good_cb_test_rv;
 	__u64 bytes_received;
 	__u64 bytes_acked;
+	__u32 num_listen;
 };
 #endif
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
index 3e645ee41ed5..74f73b33a7b0 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
@@ -5,6 +5,7 @@
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
 #include <linux/ip.h>
+#include <linux/ipv6.h>
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/tcp.h>
@@ -17,6 +18,13 @@ struct bpf_map_def SEC("maps") global_map = {
 	.type = BPF_MAP_TYPE_ARRAY,
 	.key_size = sizeof(__u32),
 	.value_size = sizeof(struct tcpbpf_globals),
+	.max_entries = 4,
+};
+
+struct bpf_map_def SEC("maps") sockopt_results = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(int),
 	.max_entries = 2,
 };
 
@@ -45,11 +53,14 @@ int _version SEC("version") = 1;
 SEC("sockops")
 int bpf_testcb(struct bpf_sock_ops *skops)
 {
-	int rv = -1;
-	int bad_call_rv = 0;
+	char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)];
+	struct tcphdr *thdr;
 	int good_call_rv = 0;
-	int op;
+	int bad_call_rv = 0;
+	int save_syn = 1;
+	int rv = -1;
 	int v = 0;
+	int op;
 
 	op = (int) skops->op;
 
@@ -82,6 +93,21 @@ int bpf_testcb(struct bpf_sock_ops *skops)
 		v = 0xff;
 		rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v,
 				    sizeof(v));
+		if (skops->family == AF_INET6) {
+			v = bpf_getsockopt(skops, IPPROTO_TCP, TCP_SAVED_SYN,
+					   header, (sizeof(struct ipv6hdr) +
+						    sizeof(struct tcphdr)));
+			if (!v) {
+				int offset = sizeof(struct ipv6hdr);
+
+				thdr = (struct tcphdr *)(header + offset);
+				v = thdr->syn;
+				__u32 key = 1;
+
+				bpf_map_update_elem(&sockopt_results, &key, &v,
+						    BPF_ANY);
+			}
+		}
 		break;
 	case BPF_SOCK_OPS_RTO_CB:
 		break;
@@ -96,15 +122,28 @@ int bpf_testcb(struct bpf_sock_ops *skops)
 			if (!gp)
 				break;
 			g = *gp;
-			g.total_retrans = skops->total_retrans;
-			g.data_segs_in = skops->data_segs_in;
-			g.data_segs_out = skops->data_segs_out;
-			g.bytes_received = skops->bytes_received;
-			g.bytes_acked = skops->bytes_acked;
+			if (skops->args[0] == BPF_TCP_LISTEN) {
+				g.num_listen++;
+			} else {
+				g.total_retrans = skops->total_retrans;
+				g.data_segs_in = skops->data_segs_in;
+				g.data_segs_out = skops->data_segs_out;
+				g.bytes_received = skops->bytes_received;
+				g.bytes_acked = skops->bytes_acked;
+			}
 			bpf_map_update_elem(&global_map, &key, &g,
 					    BPF_ANY);
 		}
 		break;
+	case BPF_SOCK_OPS_TCP_LISTEN_CB:
+		bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
+		v = bpf_setsockopt(skops, IPPROTO_TCP, TCP_SAVE_SYN,
+				   &save_syn, sizeof(save_syn));
+		/* Update global map w/ result of setsock opt */
+		__u32 key = 0;
+
+		bpf_map_update_elem(&sockopt_results, &key, &v, BPF_ANY);
+		break;
 	default:
 		rv = -1;
 	}
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
index 84ab5163c828..e6eebda7d112 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
@@ -1,27 +1,79 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <stdio.h>
 #include <unistd.h>
 #include <errno.h>
-#include <signal.h>
 #include <string.h>
-#include <assert.h>
-#include <linux/perf_event.h>
-#include <linux/ptrace.h>
 #include <linux/bpf.h>
-#include <sys/ioctl.h>
-#include <sys/time.h>
 #include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
-#include "bpf_util.h"
+
 #include "bpf_rlimit.h"
-#include <linux/perf_event.h>
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
 #include "test_tcpbpf.h"
 
+#define EXPECT_EQ(expected, actual, fmt)			\
+	do {							\
+		if ((expected) != (actual)) {			\
+			printf("  Value of: " #actual "\n"	\
+			       "    Actual: %" fmt "\n"		\
+			       "  Expected: %" fmt "\n",	\
+			       (actual), (expected));		\
+			goto err;				\
+		}						\
+	} while (0)
+
+int verify_result(const struct tcpbpf_globals *result)
+{
+	__u32 expected_events;
+
+	expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
+			   (1 << BPF_SOCK_OPS_RWND_INIT) |
+			   (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) |
+			   (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) |
+			   (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) |
+			   (1 << BPF_SOCK_OPS_NEEDS_ECN) |
+			   (1 << BPF_SOCK_OPS_STATE_CB) |
+			   (1 << BPF_SOCK_OPS_TCP_LISTEN_CB));
+
+	EXPECT_EQ(expected_events, result->event_map, "#" PRIx32);
+	EXPECT_EQ(501ULL, result->bytes_received, "llu");
+	EXPECT_EQ(1002ULL, result->bytes_acked, "llu");
+	EXPECT_EQ(1, result->data_segs_in, PRIu32);
+	EXPECT_EQ(1, result->data_segs_out, PRIu32);
+	EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32);
+	EXPECT_EQ(0, result->good_cb_test_rv, PRIu32);
+	EXPECT_EQ(1, result->num_listen, PRIu32);
+
+	return 0;
+err:
+	return -1;
+}
+
+int verify_sockopt_result(int sock_map_fd)
+{
+	__u32 key = 0;
+	int res;
+	int rv;
+
+	/* check setsockopt for SAVE_SYN */
+	rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
+	EXPECT_EQ(0, rv, "d");
+	EXPECT_EQ(0, res, "d");
+	key = 1;
+	/* check getsockopt for SAVED_SYN */
+	rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
+	EXPECT_EQ(0, rv, "d");
+	EXPECT_EQ(1, res, "d");
+	return 0;
+err:
+	return -1;
+}
+
 static int bpf_find_map(const char *test, struct bpf_object *obj,
 			const char *name)
 {
@@ -35,42 +87,28 @@ static int bpf_find_map(const char *test, struct bpf_object *obj,
 	return bpf_map__fd(map);
 }
 
-#define SYSTEM(CMD)						\
-	do {							\
-		if (system(CMD)) {				\
-			printf("system(%s) FAILS!\n", CMD);	\
-		}						\
-	} while (0)
-
 int main(int argc, char **argv)
 {
 	const char *file = "test_tcpbpf_kern.o";
+	int prog_fd, map_fd, sock_map_fd;
 	struct tcpbpf_globals g = {0};
-	int cg_fd, prog_fd, map_fd;
-	bool debug_flag = false;
+	const char *cg_path = "/foo";
 	int error = EXIT_FAILURE;
 	struct bpf_object *obj;
-	char cmd[100], *dir;
-	struct stat buffer;
+	int cg_fd = -1;
 	__u32 key = 0;
-	int pid;
 	int rv;
 
-	if (argc > 1 && strcmp(argv[1], "-d") == 0)
-		debug_flag = true;
+	if (setup_cgroup_environment())
+		goto err;
 
-	dir = "/tmp/cgroupv2/foo";
+	cg_fd = create_and_get_cgroup(cg_path);
+	if (!cg_fd)
+		goto err;
 
-	if (stat(dir, &buffer) != 0) {
-		SYSTEM("mkdir -p /tmp/cgroupv2");
-		SYSTEM("mount -t cgroup2 none /tmp/cgroupv2");
-		SYSTEM("mkdir -p /tmp/cgroupv2/foo");
-	}
-	pid = (int) getpid();
-	sprintf(cmd, "echo %d >> /tmp/cgroupv2/foo/cgroup.procs", pid);
-	SYSTEM(cmd);
+	if (join_cgroup(cg_path))
+		goto err;
 
-	cg_fd = open(dir, O_DIRECTORY, O_RDONLY);
 	if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
 		printf("FAILED: load_bpf_file failed for: %s\n", file);
 		goto err;
@@ -83,46 +121,40 @@ int main(int argc, char **argv)
 		goto err;
 	}
 
-	SYSTEM("./tcp_server.py");
+	if (system("./tcp_server.py")) {
+		printf("FAILED: TCP server\n");
+		goto err;
+	}
 
 	map_fd = bpf_find_map(__func__, obj, "global_map");
 	if (map_fd < 0)
 		goto err;
 
+	sock_map_fd = bpf_find_map(__func__, obj, "sockopt_results");
+	if (sock_map_fd < 0)
+		goto err;
+
 	rv = bpf_map_lookup_elem(map_fd, &key, &g);
 	if (rv != 0) {
 		printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
 		goto err;
 	}
 
-	if (g.bytes_received != 501 || g.bytes_acked != 1002 ||
-	    g.data_segs_in != 1 || g.data_segs_out != 1 ||
-	    (g.event_map ^ 0x47e) != 0 || g.bad_cb_test_rv != 0x80 ||
-		g.good_cb_test_rv != 0) {
+	if (verify_result(&g)) {
 		printf("FAILED: Wrong stats\n");
-		if (debug_flag) {
-			printf("\n");
-			printf("bytes_received: %d (expecting 501)\n",
-			       (int)g.bytes_received);
-			printf("bytes_acked:    %d (expecting 1002)\n",
-			       (int)g.bytes_acked);
-			printf("data_segs_in:   %d (expecting 1)\n",
-			       g.data_segs_in);
-			printf("data_segs_out:  %d (expecting 1)\n",
-			       g.data_segs_out);
-			printf("event_map:      0x%x (at least 0x47e)\n",
-			       g.event_map);
-			printf("bad_cb_test_rv: 0x%x (expecting 0x80)\n",
-			       g.bad_cb_test_rv);
-			printf("good_cb_test_rv:0x%x (expecting 0)\n",
-			       g.good_cb_test_rv);
-		}
 		goto err;
 	}
+
+	if (verify_sockopt_result(sock_map_fd)) {
+		printf("FAILED: Wrong sockopt stats\n");
+		goto err;
+	}
+
 	printf("PASSED!\n");
 	error = 0;
 err:
 	bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
+	close(cg_fd);
+	cleanup_cgroup_environment();
 	return error;
-
 }
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
new file mode 100755
index 000000000000..546aee3e9fb4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -0,0 +1,731 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# End-to-end eBPF tunnel test suite
+#   The script tests BPF network tunnel implementation.
+#
+# Topology:
+# ---------
+#     root namespace   |     at_ns0 namespace
+#                      |
+#      -----------     |     -----------
+#      | tnl dev |     |     | tnl dev |  (overlay network)
+#      -----------     |     -----------
+#      metadata-mode   |     native-mode
+#       with bpf       |
+#                      |
+#      ----------      |     ----------
+#      |  veth1  | --------- |  veth0  |  (underlay network)
+#      ----------    peer    ----------
+#
+#
+# Device Configuration
+# --------------------
+# Root namespace with metadata-mode tunnel + BPF
+# Device names and addresses:
+# 	veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay)
+# 	tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200 (overlay)
+#
+# Namespace at_ns0 with native tunnel
+# Device names and addresses:
+# 	veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
+# 	tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100 (overlay)
+#
+#
+# End-to-end ping packet flow
+# ---------------------------
+# Most of the tests start by namespace creation, device configuration,
+# then ping the underlay and overlay network.  When doing 'ping 10.1.1.100'
+# from root namespace, the following operations happen:
+# 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.
+# 2) Tnl device's egress BPF program is triggered and set the tunnel metadata,
+#    with remote_ip=172.16.1.200 and others.
+# 3) Outer tunnel header is prepended and route the packet to veth1's egress
+# 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0
+# 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet
+# 6) Forward the packet to the overlay tnl dev
+
+PING_ARG="-c 3 -w 10 -q"
+ret=0
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+config_device()
+{
+	ip netns add at_ns0
+	ip link add veth0 type veth peer name veth1
+	ip link set veth0 netns at_ns0
+	ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip link set dev veth1 up mtu 1500
+	ip addr add dev veth1 172.16.1.200/24
+}
+
+add_gre_tunnel()
+{
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+        ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local 172.16.1.100 remote 172.16.1.200
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# root namespace
+	ip link add dev $DEV type $TYPE key 2 external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6gretap_tunnel()
+{
+
+	# assign ipv6 address
+	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip addr add dev veth1 ::22/96
+	ip link set dev veth1 up
+
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
+		local ::11 remote ::22
+
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip addr add dev $DEV 10.1.1.200/24
+	ip addr add dev $DEV fc80::200/24
+	ip link set dev $DEV up
+}
+
+add_erspan_tunnel()
+{
+	# at_ns0 namespace
+	if [ "$1" == "v1" ]; then
+		ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local 172.16.1.100 remote 172.16.1.200 \
+		erspan_ver 1 erspan 123
+	else
+		ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local 172.16.1.100 remote 172.16.1.200 \
+		erspan_ver 2 erspan_dir egress erspan_hwid 3
+	fi
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6erspan_tunnel()
+{
+
+	# assign ipv6 address
+	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip addr add dev veth1 ::22/96
+	ip link set dev veth1 up
+
+	# at_ns0 namespace
+	if [ "$1" == "v1" ]; then
+		ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local ::11 remote ::22 \
+		erspan_ver 1 erspan 123
+	else
+		ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local ::11 remote ::22 \
+		erspan_ver 2 erspan_dir egress erspan_hwid 7
+	fi
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip addr add dev $DEV 10.1.1.200/24
+	ip link set dev $DEV up
+}
+
+add_vxlan_tunnel()
+{
+	# Set static ARP entry here because iptables set-mark works
+	# on L3 packet, as a result not applying to ARP packets,
+	# causing errors at get_tunnel_{key/opt}.
+
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE \
+		id 2 dstport 4789 gbp remote 172.16.1.200
+	ip netns exec at_ns0 \
+		ip link set dev $DEV_NS address 52:54:00:d9:01:00 up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00
+	ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external gbp dstport 4789
+	ip link set dev $DEV address 52:54:00:d9:02:00 up
+	ip addr add dev $DEV 10.1.1.200/24
+	arp -s 10.1.1.100 52:54:00:d9:01:00
+}
+
+add_ip6vxlan_tunnel()
+{
+	#ip netns exec at_ns0 ip -4 addr del 172.16.1.100 dev veth0
+	ip netns exec at_ns0 ip -6 addr add ::11/96 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	#ip -4 addr del 172.16.1.200 dev veth1
+	ip -6 addr add dev veth1 ::22/96
+	ip link set dev veth1 up
+
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE id 22 dstport 4789 \
+		local ::11 remote ::22
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external dstport 4789
+	ip addr add dev $DEV 10.1.1.200/24
+	ip link set dev $DEV up
+}
+
+add_geneve_tunnel()
+{
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE \
+		id 2 dstport 6081 remote 172.16.1.200
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# root namespace
+	ip link add dev $DEV type $TYPE dstport 6081 external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6geneve_tunnel()
+{
+	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip addr add dev veth1 ::22/96
+	ip link set dev veth1 up
+
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE id 22 \
+		remote ::22     # geneve has no local option
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip addr add dev $DEV 10.1.1.200/24
+	ip link set dev $DEV up
+}
+
+add_ipip_tunnel()
+{
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE \
+		local 172.16.1.100 remote 172.16.1.200
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ipip6tnl_tunnel()
+{
+	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip addr add dev veth1 ::22/96
+	ip link set dev veth1 up
+
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE \
+		local ::11 remote ::22
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip addr add dev $DEV 10.1.1.200/24
+	ip link set dev $DEV up
+}
+
+test_gre()
+{
+	TYPE=gretap
+	DEV_NS=gretap00
+	DEV=gretap11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_gre_tunnel
+	attach_bpf $DEV gre_set_tunnel gre_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+        if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6gre()
+{
+	TYPE=ip6gre
+	DEV_NS=ip6gre00
+	DEV=ip6gre11
+	ret=0
+
+	check $TYPE
+	config_device
+	# reuse the ip6gretap function
+	add_ip6gretap_tunnel
+	attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+	# underlay
+	ping6 $PING_ARG ::11
+	# overlay: ipv4 over ipv6
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	# overlay: ipv6 over ipv6
+	ip netns exec at_ns0 ping6 $PING_ARG fc80::200
+	check_err $?
+	cleanup
+
+        if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6gretap()
+{
+	TYPE=ip6gretap
+	DEV_NS=ip6gretap00
+	DEV=ip6gretap11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ip6gretap_tunnel
+	attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+	# underlay
+	ping6 $PING_ARG ::11
+	# overlay: ipv4 over ipv6
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	# overlay: ipv6 over ipv6
+	ip netns exec at_ns0 ping6 $PING_ARG fc80::200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_erspan()
+{
+	TYPE=erspan
+	DEV_NS=erspan00
+	DEV=erspan11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_erspan_tunnel $1
+	attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6erspan()
+{
+	TYPE=ip6erspan
+	DEV_NS=ip6erspan00
+	DEV=ip6erspan11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ip6erspan_tunnel $1
+	attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel
+	ping6 $PING_ARG ::11
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_vxlan()
+{
+	TYPE=vxlan
+	DEV_NS=vxlan00
+	DEV=vxlan11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_vxlan_tunnel
+	attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6vxlan()
+{
+	TYPE=vxlan
+	DEV_NS=ip6vxlan00
+	DEV=ip6vxlan11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ip6vxlan_tunnel
+	ip link set dev veth1 mtu 1500
+	attach_bpf $DEV ip6vxlan_set_tunnel ip6vxlan_get_tunnel
+	# underlay
+	ping6 $PING_ARG ::11
+	# ip4 over ip6
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: ip6$TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
+}
+
+test_geneve()
+{
+	TYPE=geneve
+	DEV_NS=geneve00
+	DEV=geneve11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_geneve_tunnel
+	attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6geneve()
+{
+	TYPE=geneve
+	DEV_NS=ip6geneve00
+	DEV=ip6geneve11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ip6geneve_tunnel
+	attach_bpf $DEV ip6geneve_set_tunnel ip6geneve_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: ip6$TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
+}
+
+test_ipip()
+{
+	TYPE=ipip
+	DEV_NS=ipip00
+	DEV=ipip11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ipip_tunnel
+	ip link set dev veth1 mtu 1500
+	attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ipip6()
+{
+	TYPE=ip6tnl
+	DEV_NS=ipip6tnl00
+	DEV=ipip6tnl11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ipip6tnl_tunnel
+	ip link set dev veth1 mtu 1500
+	attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
+	# underlay
+	ping6 $PING_ARG ::11
+	# ip4 over ip6
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+setup_xfrm_tunnel()
+{
+	auth=0x$(printf '1%.0s' {1..40})
+	enc=0x$(printf '2%.0s' {1..32})
+	spi_in_to_out=0x1
+	spi_out_to_in=0x2
+	# at_ns0 namespace
+	# at_ns0 -> root
+	ip netns exec at_ns0 \
+		ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
+			spi $spi_in_to_out reqid 1 mode tunnel \
+			auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+	ip netns exec at_ns0 \
+		ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \
+		tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
+		mode tunnel
+	# root -> at_ns0
+	ip netns exec at_ns0 \
+		ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
+			spi $spi_out_to_in reqid 2 mode tunnel \
+			auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+	ip netns exec at_ns0 \
+		ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \
+		tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
+		mode tunnel
+	# address & route
+	ip netns exec at_ns0 \
+		ip addr add dev veth0 10.1.1.100/32
+	ip netns exec at_ns0 \
+		ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \
+			src 10.1.1.100
+
+	# root namespace
+	# at_ns0 -> root
+	ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
+		spi $spi_in_to_out reqid 1 mode tunnel \
+		auth-trunc 'hmac(sha1)' $auth 96  enc 'cbc(aes)' $enc
+	ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \
+		tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
+		mode tunnel
+	# root -> at_ns0
+	ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
+		spi $spi_out_to_in reqid 2 mode tunnel \
+		auth-trunc 'hmac(sha1)' $auth 96  enc 'cbc(aes)' $enc
+	ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \
+		tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
+		mode tunnel
+	# address & route
+	ip addr add dev veth1 10.1.1.200/32
+	ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200
+}
+
+test_xfrm_tunnel()
+{
+	config_device
+	> /sys/kernel/debug/tracing/trace
+	setup_xfrm_tunnel
+	tc qdisc add dev veth1 clsact
+	tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \
+		sec xfrm_get_state
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	sleep 1
+	grep "reqid 1" /sys/kernel/debug/tracing/trace
+	check_err $?
+	grep "spi 0x1" /sys/kernel/debug/tracing/trace
+	check_err $?
+	grep "remote ip 0xac100164" /sys/kernel/debug/tracing/trace
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+		echo -e ${RED}"FAIL: xfrm tunnel"${NC}
+		return 1
+	fi
+	echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
+}
+
+attach_bpf()
+{
+	DEV=$1
+	SET=$2
+	GET=$3
+	tc qdisc add dev $DEV clsact
+	tc filter add dev $DEV egress bpf da obj test_tunnel_kern.o sec $SET
+	tc filter add dev $DEV ingress bpf da obj test_tunnel_kern.o sec $GET
+}
+
+cleanup()
+{
+	ip netns delete at_ns0 2> /dev/null
+	ip link del veth1 2> /dev/null
+	ip link del ipip11 2> /dev/null
+	ip link del ipip6tnl11 2> /dev/null
+	ip link del gretap11 2> /dev/null
+	ip link del ip6gre11 2> /dev/null
+	ip link del ip6gretap11 2> /dev/null
+	ip link del vxlan11 2> /dev/null
+	ip link del ip6vxlan11 2> /dev/null
+	ip link del geneve11 2> /dev/null
+	ip link del ip6geneve11 2> /dev/null
+	ip link del erspan11 2> /dev/null
+	ip link del ip6erspan11 2> /dev/null
+	ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null
+	ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null
+	ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null
+	ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null
+}
+
+cleanup_exit()
+{
+	echo "CATCH SIGKILL or SIGINT, cleanup and exit"
+	cleanup
+	exit 0
+}
+
+check()
+{
+	ip link help 2>&1 | grep -q "\s$1\s"
+	if [ $? -ne 0 ];then
+		echo "SKIP $1: iproute2 not support"
+	cleanup
+	return 1
+	fi
+}
+
+enable_debug()
+{
+	echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
+	echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
+	echo 'file vxlan.c +p' > /sys/kernel/debug/dynamic_debug/control
+	echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control
+	echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control
+}
+
+check_err()
+{
+	if [ $ret -eq 0 ]; then
+		ret=$1
+	fi
+}
+
+bpf_tunnel_test()
+{
+	echo "Testing GRE tunnel..."
+	test_gre
+	echo "Testing IP6GRE tunnel..."
+	test_ip6gre
+	echo "Testing IP6GRETAP tunnel..."
+	test_ip6gretap
+	echo "Testing ERSPAN tunnel..."
+	test_erspan v2
+	echo "Testing IP6ERSPAN tunnel..."
+	test_ip6erspan v2
+	echo "Testing VXLAN tunnel..."
+	test_vxlan
+	echo "Testing IP6VXLAN tunnel..."
+	test_ip6vxlan
+	echo "Testing GENEVE tunnel..."
+	test_geneve
+	echo "Testing IP6GENEVE tunnel..."
+	test_ip6geneve
+	echo "Testing IPIP tunnel..."
+	test_ipip
+	echo "Testing IPIP6 tunnel..."
+	test_ipip6
+	echo "Testing IPSec tunnel..."
+	test_xfrm_tunnel
+}
+
+trap cleanup 0 3 6
+trap cleanup_exit 2 9
+
+cleanup
+bpf_tunnel_test
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_tunnel_kern.c b/tools/testing/selftests/bpf/test_tunnel_kern.c
new file mode 100644
index 000000000000..504df69c83df
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tunnel_kern.c
@@ -0,0 +1,713 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2016 VMware
+ * Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <arpa/inet.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/pkt_cls.h>
+#include <linux/erspan.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define ERROR(ret) do {\
+		char fmt[] = "ERROR line:%d ret:%d\n";\
+		bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
+	} while (0)
+
+int _version SEC("version") = 1;
+
+struct geneve_opt {
+	__be16	opt_class;
+	__u8	type;
+	__u8	length:5;
+	__u8	r3:1;
+	__u8	r2:1;
+	__u8	r1:1;
+	__u8	opt_data[8]; /* hard-coded to 8 byte */
+};
+
+struct vxlan_metadata {
+	__u32     gbp;
+};
+
+SEC("gre_set_tunnel")
+int _gre_set_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("gre_get_tunnel")
+int _gre_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "key %d remote ip 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4);
+	return TC_ACT_OK;
+}
+
+SEC("ip6gretap_set_tunnel")
+int _ip6gretap_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+	key.tunnel_label = 0xabcde;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
+				     BPF_F_SEQ_NUMBER);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6gretap_get_tunnel")
+int _ip6gretap_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "key %d remote ip6 ::%x label %x\n";
+	struct bpf_tunnel_key key;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			 key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+
+	return TC_ACT_OK;
+}
+
+SEC("erspan_set_tunnel")
+int _erspan_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	struct erspan_metadata md;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	__builtin_memset(&md, 0, sizeof(md));
+#ifdef ERSPAN_V1
+	md.version = 1;
+	md.u.index = bpf_htonl(123);
+#else
+	__u8 direction = 1;
+	__u8 hwid = 7;
+
+	md.version = 2;
+	md.u.md2.dir = direction;
+	md.u.md2.hwid = hwid & 0xf;
+	md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+#endif
+
+	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("erspan_get_tunnel")
+int _erspan_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "key %d remote ip 0x%x erspan version %d\n";
+	struct bpf_tunnel_key key;
+	struct erspan_metadata md;
+	__u32 index;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, md.version);
+
+#ifdef ERSPAN_V1
+	char fmt2[] = "\tindex %x\n";
+
+	index = bpf_ntohl(md.u.index);
+	bpf_trace_printk(fmt2, sizeof(fmt2), index);
+#else
+	char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
+
+	bpf_trace_printk(fmt2, sizeof(fmt2),
+			 md.u.md2.dir,
+			 (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+			 bpf_ntohl(md.u.md2.timestamp));
+#endif
+
+	return TC_ACT_OK;
+}
+
+SEC("ip4ip6erspan_set_tunnel")
+int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	struct erspan_metadata md;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv6[3] = bpf_htonl(0x11);
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	__builtin_memset(&md, 0, sizeof(md));
+
+#ifdef ERSPAN_V1
+	md.u.index = bpf_htonl(123);
+	md.version = 1;
+#else
+	__u8 direction = 0;
+	__u8 hwid = 17;
+
+	md.version = 2;
+	md.u.md2.dir = direction;
+	md.u.md2.hwid = hwid & 0xf;
+	md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+#endif
+
+	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip4ip6erspan_get_tunnel")
+int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n";
+	struct bpf_tunnel_key key;
+	struct erspan_metadata md;
+	__u32 index;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, md.version);
+
+#ifdef ERSPAN_V1
+	char fmt2[] = "\tindex %x\n";
+
+	index = bpf_ntohl(md.u.index);
+	bpf_trace_printk(fmt2, sizeof(fmt2), index);
+#else
+	char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
+
+	bpf_trace_printk(fmt2, sizeof(fmt2),
+			 md.u.md2.dir,
+			 (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+			 bpf_ntohl(md.u.md2.timestamp));
+#endif
+
+	return TC_ACT_OK;
+}
+
+SEC("vxlan_set_tunnel")
+int _vxlan_set_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	struct vxlan_metadata md;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */
+	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("vxlan_get_tunnel")
+int _vxlan_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	struct vxlan_metadata md;
+	char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, md.gbp);
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6vxlan_set_tunnel")
+int _ip6vxlan_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+	key.tunnel_id = 22;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6vxlan_get_tunnel")
+int _ip6vxlan_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "key %d remote ip6 ::%x label %x\n";
+	struct bpf_tunnel_key key;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			 key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+
+	return TC_ACT_OK;
+}
+
+SEC("geneve_set_tunnel")
+int _geneve_set_tunnel(struct __sk_buff *skb)
+{
+	int ret, ret2;
+	struct bpf_tunnel_key key;
+	struct geneve_opt gopt;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	__builtin_memset(&gopt, 0x0, sizeof(gopt));
+	gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+	gopt.type = 0x08;
+	gopt.r1 = 0;
+	gopt.r2 = 0;
+	gopt.r3 = 0;
+	gopt.length = 2; /* 4-byte multiple */
+	*(int *) &gopt.opt_data = bpf_htonl(0xdeadbeef);
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("geneve_get_tunnel")
+int _geneve_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	struct geneve_opt gopt;
+	char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+	return TC_ACT_OK;
+}
+
+SEC("ip6geneve_set_tunnel")
+int _ip6geneve_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	struct geneve_opt gopt;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+	key.tunnel_id = 22;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	__builtin_memset(&gopt, 0x0, sizeof(gopt));
+	gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+	gopt.type = 0x08;
+	gopt.r1 = 0;
+	gopt.r2 = 0;
+	gopt.r3 = 0;
+	gopt.length = 2; /* 4-byte multiple */
+	*(int *) &gopt.opt_data = bpf_htonl(0xfeedbeef);
+
+	ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6geneve_get_tunnel")
+int _ip6geneve_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
+	struct bpf_tunnel_key key;
+	struct geneve_opt gopt;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+
+	return TC_ACT_OK;
+}
+
+SEC("ipip_set_tunnel")
+int _ipip_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key = {};
+	void *data = (void *)(long)skb->data;
+	struct iphdr *iph = data;
+	struct tcphdr *tcp = data + sizeof(*iph);
+	void *data_end = (void *)(long)skb->data_end;
+	int ret;
+
+	/* single length check */
+	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+		ERROR(1);
+		return TC_ACT_SHOT;
+	}
+
+	key.tunnel_ttl = 64;
+	if (iph->protocol == IPPROTO_ICMP) {
+		key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	} else {
+		if (iph->protocol != IPPROTO_TCP || iph->ihl != 5)
+			return TC_ACT_SHOT;
+
+		if (tcp->dest == bpf_htons(5200))
+			key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+		else if (tcp->dest == bpf_htons(5201))
+			key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */
+		else
+			return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ipip_get_tunnel")
+int _ipip_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "remote ip 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4);
+	return TC_ACT_OK;
+}
+
+SEC("ipip6_set_tunnel")
+int _ipip6_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key = {};
+	void *data = (void *)(long)skb->data;
+	struct iphdr *iph = data;
+	struct tcphdr *tcp = data + sizeof(*iph);
+	void *data_end = (void *)(long)skb->data_end;
+	int ret;
+
+	/* single length check */
+	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+		ERROR(1);
+		return TC_ACT_SHOT;
+	}
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ipip6_get_tunnel")
+int _ipip6_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "remote ip6 %x::%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
+			 bpf_htonl(key.remote_ipv6[3]));
+	return TC_ACT_OK;
+}
+
+SEC("ip6ip6_set_tunnel")
+int _ip6ip6_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key = {};
+	void *data = (void *)(long)skb->data;
+	struct ipv6hdr *iph = data;
+	struct tcphdr *tcp = data + sizeof(*iph);
+	void *data_end = (void *)(long)skb->data_end;
+	int ret;
+
+	/* single length check */
+	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+		ERROR(1);
+		return TC_ACT_SHOT;
+	}
+
+	key.remote_ipv6[0] = bpf_htonl(0x2401db00);
+	key.tunnel_ttl = 64;
+
+	if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) {
+		key.remote_ipv6[3] = bpf_htonl(1);
+	} else {
+		if (iph->nexthdr != 6 /* NEXTHDR_TCP */) {
+			ERROR(iph->nexthdr);
+			return TC_ACT_SHOT;
+		}
+
+		if (tcp->dest == bpf_htons(5200)) {
+			key.remote_ipv6[3] = bpf_htonl(1);
+		} else if (tcp->dest == bpf_htons(5201)) {
+			key.remote_ipv6[3] = bpf_htonl(2);
+		} else {
+			ERROR(tcp->dest);
+			return TC_ACT_SHOT;
+		}
+	}
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6ip6_get_tunnel")
+int _ip6ip6_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "remote ip6 %x::%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
+			 bpf_htonl(key.remote_ipv6[3]));
+	return TC_ACT_OK;
+}
+
+SEC("xfrm_get_state")
+int _xfrm_get_state(struct __sk_buff *skb)
+{
+	struct bpf_xfrm_state x;
+	char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n";
+	int ret;
+
+	ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0);
+	if (ret < 0)
+		return TC_ACT_OK;
+
+	bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi),
+			 bpf_ntohl(x.remote_ipv4));
+	return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index fd7de7eb329e..f8eac4a544f4 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2014 PLUMgrid, http://plumgrid.com
  * Copyright (c) 2017 Facebook
+ * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -41,15 +42,13 @@
 # endif
 #endif
 #include "bpf_rlimit.h"
+#include "bpf_rand.h"
+#include "bpf_util.h"
 #include "../../../include/linux/filter.h"
 
-#ifndef ARRAY_SIZE
-# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-
-#define MAX_INSNS	512
+#define MAX_INSNS	BPF_MAXINSNS
 #define MAX_FIXUPS	8
-#define MAX_NR_MAPS	4
+#define MAX_NR_MAPS	13
 #define POINTER_VALUE	0xcafe4all
 #define TEST_DATA_LEN	64
 
@@ -62,13 +61,22 @@ static bool unpriv_disabled = false;
 struct bpf_test {
 	const char *descr;
 	struct bpf_insn	insns[MAX_INSNS];
-	int fixup_map1[MAX_FIXUPS];
-	int fixup_map2[MAX_FIXUPS];
-	int fixup_prog[MAX_FIXUPS];
+	int fixup_map_hash_8b[MAX_FIXUPS];
+	int fixup_map_hash_48b[MAX_FIXUPS];
+	int fixup_map_hash_16b[MAX_FIXUPS];
+	int fixup_map_array_48b[MAX_FIXUPS];
+	int fixup_map_sockmap[MAX_FIXUPS];
+	int fixup_map_sockhash[MAX_FIXUPS];
+	int fixup_map_xskmap[MAX_FIXUPS];
+	int fixup_map_stacktrace[MAX_FIXUPS];
+	int fixup_prog1[MAX_FIXUPS];
+	int fixup_prog2[MAX_FIXUPS];
 	int fixup_map_in_map[MAX_FIXUPS];
+	int fixup_cgroup_storage[MAX_FIXUPS];
+	int fixup_percpu_cgroup_storage[MAX_FIXUPS];
 	const char *errstr;
 	const char *errstr_unpriv;
-	uint32_t retval;
+	uint32_t retval, retval_unpriv;
 	enum {
 		UNDEF,
 		ACCEPT,
@@ -76,6 +84,8 @@ struct bpf_test {
 	} result, result_unpriv;
 	enum bpf_prog_type prog_type;
 	uint8_t flags;
+	__u8 data[TEST_DATA_LEN];
+	void (*fill_helper)(struct bpf_test *self);
 };
 
 /* Note we want this to be 64 bit aligned so that the end of our array is
@@ -88,6 +98,109 @@ struct test_val {
 	int foo[MAX_ENTRIES];
 };
 
+struct other_val {
+	long long foo;
+	long long bar;
+};
+
+static void bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
+{
+	/* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */
+#define PUSH_CNT 51
+	unsigned int len = BPF_MAXINSNS;
+	struct bpf_insn *insn = self->insns;
+	int i = 0, j, k = 0;
+
+	insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+loop:
+	for (j = 0; j < PUSH_CNT; j++) {
+		insn[i++] = BPF_LD_ABS(BPF_B, 0);
+		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2);
+		i++;
+		insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
+		insn[i++] = BPF_MOV64_IMM(BPF_REG_2, 1);
+		insn[i++] = BPF_MOV64_IMM(BPF_REG_3, 2);
+		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+					 BPF_FUNC_skb_vlan_push),
+		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2);
+		i++;
+	}
+
+	for (j = 0; j < PUSH_CNT; j++) {
+		insn[i++] = BPF_LD_ABS(BPF_B, 0);
+		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2);
+		i++;
+		insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
+		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+					 BPF_FUNC_skb_vlan_pop),
+		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2);
+		i++;
+	}
+	if (++k < 5)
+		goto loop;
+
+	for (; i < len - 1; i++)
+		insn[i] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 0xbef);
+	insn[len - 1] = BPF_EXIT_INSN();
+}
+
+static void bpf_fill_jump_around_ld_abs(struct bpf_test *self)
+{
+	struct bpf_insn *insn = self->insns;
+	unsigned int len = BPF_MAXINSNS;
+	int i = 0;
+
+	insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+	insn[i++] = BPF_LD_ABS(BPF_B, 0);
+	insn[i] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 10, len - i - 2);
+	i++;
+	while (i < len - 1)
+		insn[i++] = BPF_LD_ABS(BPF_B, 1);
+	insn[i] = BPF_EXIT_INSN();
+}
+
+static void bpf_fill_rand_ld_dw(struct bpf_test *self)
+{
+	struct bpf_insn *insn = self->insns;
+	uint64_t res = 0;
+	int i = 0;
+
+	insn[i++] = BPF_MOV32_IMM(BPF_REG_0, 0);
+	while (i < self->retval) {
+		uint64_t val = bpf_semi_rand_get();
+		struct bpf_insn tmp[2] = { BPF_LD_IMM64(BPF_REG_1, val) };
+
+		res ^= val;
+		insn[i++] = tmp[0];
+		insn[i++] = tmp[1];
+		insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1);
+	}
+	insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
+	insn[i++] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32);
+	insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1);
+	insn[i] = BPF_EXIT_INSN();
+	res ^= (res >> 32);
+	self->retval = (uint32_t)res;
+}
+
+/* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */
+#define BPF_SK_LOOKUP							\
+	/* struct bpf_sock_tuple tuple = {} */				\
+	BPF_MOV64_IMM(BPF_REG_2, 0),					\
+	BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),			\
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -16),		\
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -24),		\
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -32),		\
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -40),		\
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -48),		\
+	/* sk = sk_lookup_tcp(ctx, &tuple, sizeof tuple, 0, 0) */	\
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),				\
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48),				\
+	BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)),	\
+	BPF_MOV64_IMM(BPF_REG_4, 0),					\
+	BPF_MOV64_IMM(BPF_REG_5, 0),					\
+	BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp)
+
 static struct bpf_test tests[] = {
 	{
 		"add+sub+mul",
@@ -767,7 +880,7 @@ static struct bpf_test tests[] = {
 				     BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 2 },
+		.fixup_map_hash_8b = { 2 },
 		.errstr = "invalid indirect read from stack",
 		.result = REJECT,
 	},
@@ -1001,7 +1114,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 invalid mem access 'map_value_or_null'",
 		.result = REJECT,
 	},
@@ -1018,7 +1131,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "misaligned value access",
 		.result = REJECT,
 		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
@@ -1038,7 +1151,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 invalid mem access",
 		.errstr_unpriv = "R0 leaks addr",
 		.result = REJECT,
@@ -1128,7 +1241,7 @@ static struct bpf_test tests[] = {
 				     BPF_FUNC_map_delete_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 24 },
+		.fixup_map_hash_8b = { 24 },
 		.errstr_unpriv = "R1 pointer comparison",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -1282,7 +1395,7 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, pkt_type)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.errstr = "different pointers",
 		.errstr_unpriv = "R1 pointer comparison",
 		.result = REJECT,
@@ -1305,7 +1418,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 			BPF_JMP_IMM(BPF_JA, 0, 0, -12),
 		},
-		.fixup_map1 = { 6 },
+		.fixup_map_hash_8b = { 6 },
 		.errstr = "different pointers",
 		.errstr_unpriv = "R1 pointer comparison",
 		.result = REJECT,
@@ -1329,7 +1442,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 			BPF_JMP_IMM(BPF_JA, 0, 0, -13),
 		},
-		.fixup_map1 = { 7 },
+		.fixup_map_hash_8b = { 7 },
 		.errstr = "different pointers",
 		.errstr_unpriv = "R1 pointer comparison",
 		.result = REJECT,
@@ -1597,6 +1710,121 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SK_SKB,
 	},
 	{
+		"valid access family in SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, family)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"valid access remote_ip4 in SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, remote_ip4)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"valid access local_ip4 in SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, local_ip4)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"valid access remote_port in SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, remote_port)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"valid access local_port in SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, local_port)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"valid access remote_ip6 in SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, remote_ip6[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, remote_ip6[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, remote_ip6[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, remote_ip6[3])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access local_ip6 in SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, local_ip6[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, local_ip6[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, local_ip6[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, local_ip6[3])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"invalid 64B read of family in SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct sk_msg_md, family)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"invalid read past end of SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct sk_msg_md, local_port) + 4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R0 !read_ok",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"invalid read offset in SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct sk_msg_md, family) + 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
 		"direct packet read for SK_MSG",
 		.insns = {
 			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
@@ -2351,7 +2579,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr_unpriv = "R4 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -2368,7 +2596,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "invalid indirect read from stack off -8+0 size 8",
 		.result = REJECT,
 	},
@@ -2503,6 +2731,137 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
+		"unpriv: spill/fill of different pointers stx - ctx and sock",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+			/* struct bpf_sock *sock = bpf_sock_lookup(...); */
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			/* u64 foo; */
+			/* void *target = &foo; */
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+			/* if (skb == NULL) *target = sock; */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+				BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+			/* else *target = skb; */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+				BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+			/* struct __sk_buff *skb = *target; */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+			/* skb->mark = 42; */
+			BPF_MOV64_IMM(BPF_REG_3, 42),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+				    offsetof(struct __sk_buff, mark)),
+			/* if (sk) bpf_sk_release(sk) */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+				BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "type=ctx expected=sock",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"unpriv: spill/fill of different pointers stx - leak sock",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+			/* struct bpf_sock *sock = bpf_sock_lookup(...); */
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			/* u64 foo; */
+			/* void *target = &foo; */
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+			/* if (skb == NULL) *target = sock; */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+				BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+			/* else *target = skb; */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+				BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+			/* struct __sk_buff *skb = *target; */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+			/* skb->mark = 42; */
+			BPF_MOV64_IMM(BPF_REG_3, 42),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		//.errstr = "same insn cannot be used with different pointers",
+		.errstr = "Unreleased reference",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"unpriv: spill/fill of different pointers stx - sock and ctx (read)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+			/* struct bpf_sock *sock = bpf_sock_lookup(...); */
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			/* u64 foo; */
+			/* void *target = &foo; */
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+			/* if (skb) *target = skb */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+				BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+			/* else *target = sock */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+				BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+			/* struct bpf_sock *sk = *target; */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+			/* if (sk) u32 foo = sk->mark; bpf_sk_release(sk); */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2),
+				BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+					    offsetof(struct bpf_sock, mark)),
+				BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "same insn cannot be used with different pointers",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"unpriv: spill/fill of different pointers stx - sock and ctx (write)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+			/* struct bpf_sock *sock = bpf_sock_lookup(...); */
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			/* u64 foo; */
+			/* void *target = &foo; */
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+			/* if (skb) *target = skb */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+				BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+			/* else *target = sock */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+				BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+			/* struct bpf_sock *sk = *target; */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+			/* if (sk) sk->mark = 42; bpf_sk_release(sk); */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+				BPF_MOV64_IMM(BPF_REG_3, 42),
+				BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+					    offsetof(struct bpf_sock, mark)),
+				BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		//.errstr = "same insn cannot be used with different pointers",
+		.errstr = "cannot write into socket",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
 		"unpriv: spill/fill of different pointers ldx",
 		.insns = {
 			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
@@ -2539,7 +2898,7 @@ static struct bpf_test tests[] = {
 			BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -2565,7 +2924,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_prog = { 1 },
+		.fixup_prog1 = { 1 },
 		.errstr_unpriv = "R3 leaks addr into helper",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -2579,7 +2938,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 1 },
+		.fixup_map_hash_8b = { 1 },
 		.errstr_unpriv = "R1 pointer comparison",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -2652,7 +3011,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 1),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_prog = { 1 },
+		.fixup_prog1 = { 1 },
 		.result = ACCEPT,
 		.retval = 42,
 	},
@@ -2666,7 +3025,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 1),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_prog = { 1 },
+		.fixup_prog1 = { 1 },
 		.result = ACCEPT,
 		.retval = 41,
 	},
@@ -2680,7 +3039,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 1),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_prog = { 1 },
+		.fixup_prog1 = { 1 },
 		.result = ACCEPT,
 		.retval = 1,
 	},
@@ -2694,7 +3053,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 2),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_prog = { 1 },
+		.fixup_prog1 = { 1 },
 		.result = ACCEPT,
 		.retval = 2,
 	},
@@ -2708,7 +3067,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 2),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_prog = { 1 },
+		.fixup_prog1 = { 1 },
 		.result = ACCEPT,
 		.retval = 2,
 	},
@@ -2722,9 +3081,11 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 2),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_prog = { 2 },
+		.fixup_prog1 = { 2 },
 		.result = ACCEPT,
 		.retval = 42,
+		/* Verifier rewrite for unpriv skips tail call here. */
+		.retval_unpriv = 2,
 	},
 	{
 		"stack pointer arithmetic",
@@ -3071,7 +3432,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "BPF_ST stores into R1 context is not allowed",
+		.errstr = "BPF_ST stores into R1 ctx is not allowed",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -3083,7 +3444,7 @@ static struct bpf_test tests[] = {
 				     BPF_REG_0, offsetof(struct __sk_buff, mark), 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "BPF_XADD stores into R1 context is not allowed",
+		.errstr = "BPF_XADD stores into R1 ctx is not allowed",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -3433,7 +3794,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+		.errstr = "R3 pointer arithmetic on pkt_end",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -3718,7 +4079,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 5 },
+		.fixup_map_hash_8b = { 5 },
 		.result_unpriv = ACCEPT,
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_XDP,
@@ -3734,7 +4095,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 1 },
+		.fixup_map_hash_8b = { 1 },
 		.result = REJECT,
 		.errstr = "invalid access to packet",
 		.prog_type = BPF_PROG_TYPE_XDP,
@@ -3762,7 +4123,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 11 },
+		.fixup_map_hash_8b = { 11 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_XDP,
 	},
@@ -3784,7 +4145,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 7 },
+		.fixup_map_hash_8b = { 7 },
 		.result = REJECT,
 		.errstr = "invalid access to packet",
 		.prog_type = BPF_PROG_TYPE_XDP,
@@ -3806,7 +4167,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 6 },
+		.fixup_map_hash_8b = { 6 },
 		.result = REJECT,
 		.errstr = "invalid access to packet",
 		.prog_type = BPF_PROG_TYPE_XDP,
@@ -3829,7 +4190,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 5 },
+		.fixup_map_hash_8b = { 5 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -3844,7 +4205,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 1 },
+		.fixup_map_hash_8b = { 1 },
 		.result = REJECT,
 		.errstr = "invalid access to packet",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -3872,7 +4233,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 11 },
+		.fixup_map_hash_8b = { 11 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -3894,7 +4255,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 7 },
+		.fixup_map_hash_8b = { 7 },
 		.result = REJECT,
 		.errstr = "invalid access to packet",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -3916,7 +4277,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 6 },
+		.fixup_map_hash_8b = { 6 },
 		.result = REJECT,
 		.errstr = "invalid access to packet",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -4187,6 +4548,85 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
+		"prevent map lookup in sockmap",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_sockmap = { 3 },
+		.result = REJECT,
+		.errstr = "cannot pass map_type 15 into func bpf_map_lookup_elem",
+		.prog_type = BPF_PROG_TYPE_SOCK_OPS,
+	},
+	{
+		"prevent map lookup in sockhash",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_sockhash = { 3 },
+		.result = REJECT,
+		.errstr = "cannot pass map_type 18 into func bpf_map_lookup_elem",
+		.prog_type = BPF_PROG_TYPE_SOCK_OPS,
+	},
+	{
+		"prevent map lookup in xskmap",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_xskmap = { 3 },
+		.result = REJECT,
+		.errstr = "cannot pass map_type 17 into func bpf_map_lookup_elem",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"prevent map lookup in stack trace",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_stacktrace = { 3 },
+		.result = REJECT,
+		.errstr = "cannot pass map_type 7 into func bpf_map_lookup_elem",
+		.prog_type = BPF_PROG_TYPE_PERF_EVENT,
+	},
+	{
+		"prevent map lookup in prog array",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog2 = { 3 },
+		.result = REJECT,
+		.errstr = "cannot pass map_type 3 into func bpf_map_lookup_elem",
+	},
+	{
 		"valid map access into an array with a constant",
 		.insns = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
@@ -4200,7 +4640,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -4222,7 +4662,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -4246,7 +4686,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -4274,7 +4714,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -4294,7 +4734,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=48 size=8",
 		.result = REJECT,
 	},
@@ -4315,7 +4755,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 min value is outside of the array range",
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -4337,7 +4777,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map",
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -4362,7 +4802,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.errstr = "R0 unbounded memory access",
 		.result_unpriv = REJECT,
@@ -4389,7 +4829,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.errstr = "invalid access to map value, value_size=48 off=44 size=8",
 		.result_unpriv = REJECT,
@@ -4419,12 +4859,417 @@ static struct bpf_test tests[] = {
 				    offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3, 11 },
+		.fixup_map_hash_48b = { 3, 11 },
 		.errstr = "R0 pointer += pointer",
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
+		"direct packet read test#1 for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+				    offsetof(struct __sk_buff, len)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+				    offsetof(struct __sk_buff, pkt_type)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, queue_mapping)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+				    offsetof(struct __sk_buff, protocol)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+				    offsetof(struct __sk_buff, vlan_present)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "invalid bpf_context access off=76 size=4",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"direct packet read test#2 for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+				    offsetof(struct __sk_buff, vlan_tci)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+				    offsetof(struct __sk_buff, vlan_proto)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, priority)),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+				    offsetof(struct __sk_buff, priority)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff,
+					     ingress_ifindex)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"direct packet read test#3 for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+				    offsetof(struct __sk_buff, napi_id)),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_4,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_5,
+				    offsetof(struct __sk_buff, cb[1])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_8,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"direct packet read test#4 for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, family)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip4)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip4)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[3])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[3])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_port)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_port)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid access of tc_classid for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_classid)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid bpf_context access",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid access of data_meta for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_meta)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid bpf_context access",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid access of flow_keys for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, flow_keys)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid bpf_context access",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid write access to napi_id for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+				    offsetof(struct __sk_buff, napi_id)),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_9,
+				    offsetof(struct __sk_buff, napi_id)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid bpf_context access",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"valid cgroup storage access",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_cgroup_storage = { 1 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid cgroup storage access 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_hash_8b = { 1 },
+		.result = REJECT,
+		.errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid cgroup storage access 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "fd 1 is not pointing to valid bpf_map",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid cgroup storage access 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_cgroup_storage = { 1 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=64 off=256 size=4",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid cgroup storage access 4",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_cgroup_storage = { 1 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=64 off=-2 size=4",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid cgroup storage access 5",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 7),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_cgroup_storage = { 1 },
+		.result = REJECT,
+		.errstr = "get_local_storage() doesn't support non-zero flags",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid cgroup storage access 6",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_cgroup_storage = { 1 },
+		.result = REJECT,
+		.errstr = "get_local_storage() doesn't support non-zero flags",
+		.errstr_unpriv = "R2 leaks addr into helper function",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"valid per-cpu cgroup storage access",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_percpu_cgroup_storage = { 1 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid per-cpu cgroup storage access 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_hash_8b = { 1 },
+		.result = REJECT,
+		.errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid per-cpu cgroup storage access 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "fd 1 is not pointing to valid bpf_map",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid per-cpu cgroup storage access 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_percpu_cgroup_storage = { 1 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=64 off=256 size=4",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid per-cpu cgroup storage access 4",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_cgroup_storage = { 1 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=64 off=-2 size=4",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid per-cpu cgroup storage access 5",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 7),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_percpu_cgroup_storage = { 1 },
+		.result = REJECT,
+		.errstr = "get_local_storage() doesn't support non-zero flags",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid per-cpu cgroup storage access 6",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_percpu_cgroup_storage = { 1 },
+		.result = REJECT,
+		.errstr = "get_local_storage() doesn't support non-zero flags",
+		.errstr_unpriv = "R2 leaks addr into helper function",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
 		"multiple registers share map_lookup_elem result",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_1, 10),
@@ -4439,7 +5284,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
@@ -4460,8 +5305,8 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
-		.errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+		.fixup_map_hash_8b = { 4 },
+		.errstr = "R4 pointer arithmetic on map_value_or_null",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
@@ -4481,8 +5326,8 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
-		.errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+		.fixup_map_hash_8b = { 4 },
+		.errstr = "R4 pointer arithmetic on map_value_or_null",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
@@ -4502,8 +5347,8 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
-		.errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+		.fixup_map_hash_8b = { 4 },
+		.errstr = "R4 pointer arithmetic on map_value_or_null",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
@@ -4528,7 +5373,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.result = REJECT,
 		.errstr = "R4 !read_ok",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
@@ -4556,7 +5401,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
@@ -4577,7 +5422,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 unbounded memory access",
 		.result = REJECT,
 		.errstr_unpriv = "R0 leaks addr",
@@ -4769,6 +5614,24 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_LWT_XMIT,
 	},
 	{
+		"make headroom for LWT_XMIT",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_2, 34),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_skb_change_head),
+			/* split for s390 to succeed */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_2, 42),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_skb_change_head),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_LWT_XMIT,
+	},
+	{
 		"invalid access of tc_classid for LWT_IN",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
@@ -4809,11 +5672,11 @@ static struct bpf_test tests[] = {
 				      offsetof(struct __sk_buff, cb[0])),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 2 },
+		.fixup_map_hash_8b = { 2 },
 		.errstr_unpriv = "R2 leaks addr into mem",
 		.result_unpriv = REJECT,
 		.result = REJECT,
-		.errstr = "BPF_XADD stores into R1 context is not allowed",
+		.errstr = "BPF_XADD stores into R1 ctx is not allowed",
 	},
 	{
 		"leak pointer into ctx 2",
@@ -4828,7 +5691,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "R10 leaks addr into mem",
 		.result_unpriv = REJECT,
 		.result = REJECT,
-		.errstr = "BPF_XADD stores into R1 context is not allowed",
+		.errstr = "BPF_XADD stores into R1 ctx is not allowed",
 	},
 	{
 		"leak pointer into ctx 3",
@@ -4839,7 +5702,7 @@ static struct bpf_test tests[] = {
 				      offsetof(struct __sk_buff, cb[0])),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 1 },
+		.fixup_map_hash_8b = { 1 },
 		.errstr_unpriv = "R2 leaks addr into ctx",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -4861,7 +5724,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.errstr_unpriv = "R6 leaks addr into mem",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -4881,7 +5744,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -4900,7 +5763,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -4918,7 +5781,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_trace_printk),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=0 size=0",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -4938,7 +5801,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=0 size=56",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -4958,7 +5821,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -4982,7 +5845,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5003,7 +5866,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5023,7 +5886,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_trace_printk),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=4 size=0",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5047,7 +5910,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=4 size=52",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5069,7 +5932,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5091,7 +5954,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5116,7 +5979,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5138,7 +6001,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5158,7 +6021,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_trace_printk),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R1 min value is outside of the array range",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5183,7 +6046,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=4 size=52",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5206,7 +6069,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5229,7 +6092,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5255,7 +6118,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5278,7 +6141,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5300,7 +6163,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_trace_printk),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R1 min value is outside of the array range",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5322,7 +6185,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R1 unbounded memory access",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5348,7 +6211,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=4 size=45",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5372,7 +6235,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5395,7 +6258,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = REJECT,
 		.errstr = "R1 unbounded memory access",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5419,7 +6282,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5442,7 +6305,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = REJECT,
 		.errstr = "R1 unbounded memory access",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5467,7 +6330,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5491,7 +6354,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5515,7 +6378,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = REJECT,
 		.errstr = "R1 min value is negative",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5540,7 +6403,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5564,7 +6427,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5588,12 +6451,513 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = REJECT,
 		.errstr = "R1 min value is negative",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
 	{
+		"map access: known scalar += value_ptr",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_MOV64_IMM(BPF_REG_1, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"map access: value_ptr += known scalar",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_MOV64_IMM(BPF_REG_1, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"map access: unknown scalar += value_ptr",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"map access: value_ptr += unknown scalar",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"map access: value_ptr += value_ptr",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = REJECT,
+		.errstr = "R0 pointer += pointer prohibited",
+	},
+	{
+		"map access: known scalar -= value_ptr",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_MOV64_IMM(BPF_REG_1, 4),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = REJECT,
+		.errstr = "R1 tried to subtract pointer from scalar",
+	},
+	{
+		"map access: value_ptr -= known scalar",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_MOV64_IMM(BPF_REG_1, 4),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = REJECT,
+		.errstr = "R0 min value is outside of the array range",
+	},
+	{
+		"map access: value_ptr -= known scalar, 2",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_IMM(BPF_REG_1, 6),
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"map access: unknown scalar -= value_ptr",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = REJECT,
+		.errstr = "R1 tried to subtract pointer from scalar",
+	},
+	{
+		"map access: value_ptr -= unknown scalar",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = REJECT,
+		.errstr = "R0 min value is negative",
+	},
+	{
+		"map access: value_ptr -= unknown scalar, 2",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+			BPF_ALU64_IMM(BPF_OR, BPF_REG_1, 0x7),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"map access: value_ptr -= value_ptr",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = REJECT,
+		.errstr = "R0 invalid mem access 'inv'",
+		.errstr_unpriv = "R0 pointer -= pointer prohibited",
+	},
+	{
+		"map lookup helper access to map",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_hash_16b = { 3, 8 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map update helper access to map",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_hash_16b = { 3, 10 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map update helper access to map: wrong size",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_hash_8b = { 3 },
+		.fixup_map_hash_16b = { 10 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=8 off=0 size=16",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via const imm)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
+				      offsetof(struct other_val, bar)),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_hash_16b = { 3, 9 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via const imm): out-of-bound 1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
+				      sizeof(struct other_val) - 4),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_hash_16b = { 3, 9 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=16 off=12 size=8",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via const imm): out-of-bound 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_hash_16b = { 3, 9 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=16 off=-4 size=8",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via const reg)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3,
+				      offsetof(struct other_val, bar)),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_hash_16b = { 3, 10 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via const reg): out-of-bound 1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3,
+				      sizeof(struct other_val) - 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_hash_16b = { 3, 10 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=16 off=12 size=8",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via const reg): out-of-bound 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3, -4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_hash_16b = { 3, 10 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=16 off=-4 size=8",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via variable)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+				    offsetof(struct other_val, bar), 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_hash_16b = { 3, 11 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via variable): no max check",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_hash_16b = { 3, 10 },
+		.result = REJECT,
+		.errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via variable): wrong max check",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+				    offsetof(struct other_val, bar) + 1, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_hash_16b = { 3, 11 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=16 off=9 size=8",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
 		"map element value is preserved across register spilling",
 		.insns = {
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -5610,7 +6974,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
@@ -5631,7 +6995,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
@@ -5648,7 +7012,7 @@ static struct bpf_test tests[] = {
 			BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R1 !read_ok",
 		.errstr = "R1 !read_ok",
 		.result = REJECT,
@@ -5682,7 +7046,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
@@ -5710,7 +7074,7 @@ static struct bpf_test tests[] = {
 			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
@@ -5729,7 +7093,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 bitwise operator &= on pointer",
 		.result = REJECT,
 	},
@@ -5746,7 +7110,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 32-bit pointer arithmetic prohibited",
 		.result = REJECT,
 	},
@@ -5763,7 +7127,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 pointer arithmetic with /= operator",
 		.result = REJECT,
 	},
@@ -5780,7 +7144,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "invalid mem access 'inv'",
 		.result = REJECT,
@@ -5804,7 +7168,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 invalid mem access 'inv'",
 		.result = REJECT,
 	},
@@ -5827,7 +7191,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
@@ -6073,7 +7437,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6099,7 +7463,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=0 size=49",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6127,7 +7491,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6154,7 +7518,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R1 min value is outside of the array range",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6226,7 +7590,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -6251,7 +7615,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -6274,7 +7638,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -6355,7 +7719,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6376,7 +7740,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6396,7 +7760,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6471,7 +7835,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 max value is outside of the array range",
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -6501,7 +7865,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_REG(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 max value is outside of the array range",
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -6522,7 +7886,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 				     BPF_FUNC_map_lookup_elem),
-			BPF_MOV64_REG(BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map_in_map = { 3 },
@@ -6545,11 +7909,11 @@ static struct bpf_test tests[] = {
 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 				     BPF_FUNC_map_lookup_elem),
-			BPF_MOV64_REG(BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map_in_map = { 3 },
-		.errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
+		.errstr = "R1 pointer arithmetic on map_ptr prohibited",
 		.result = REJECT,
 	},
 	{
@@ -6567,7 +7931,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 				     BPF_FUNC_map_lookup_elem),
-			BPF_MOV64_REG(BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map_in_map = { 3 },
@@ -6854,7 +8218,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -6878,7 +8242,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -6904,7 +8268,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -6929,7 +8293,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -6953,7 +8317,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 	},
 	{
@@ -6977,7 +8341,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7023,7 +8387,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 	},
 	{
@@ -7048,7 +8412,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7074,7 +8438,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 	},
 	{
@@ -7099,7 +8463,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7126,7 +8490,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7152,7 +8516,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7181,7 +8545,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7211,8 +8575,8 @@ static struct bpf_test tests[] = {
 			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, -3),
 			BPF_JMP_IMM(BPF_JA, 0, 0, -7),
 		},
-		.fixup_map1 = { 4 },
-		.errstr = "R0 invalid mem access 'inv'",
+		.fixup_map_hash_8b = { 4 },
+		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
 	{
@@ -7239,7 +8603,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 		.result_unpriv = REJECT,
@@ -7266,7 +8630,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 max value is outside of the array range",
 		.result = REJECT,
 	},
@@ -7291,7 +8655,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 		.result = REJECT,
 	},
@@ -7317,7 +8681,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT
 	},
 	{
@@ -7342,7 +8706,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "map_value pointer and 4294967295",
 		.result = REJECT
 	},
@@ -7368,7 +8732,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 min value is outside of the array range",
 		.result = REJECT
 	},
@@ -7392,7 +8756,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.errstr = "value_size=8 off=1073741825",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -7417,7 +8781,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.errstr = "value 1073741823",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -7453,7 +8817,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT
 	},
 	{
@@ -7492,7 +8856,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		/* not actually fully unbounded, but the bound is very high */
 		.errstr = "R0 unbounded memory access",
 		.result = REJECT
@@ -7535,7 +8899,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		/* not actually fully unbounded, but the bound is very high */
 		.errstr = "R0 unbounded memory access",
 		.result = REJECT
@@ -7564,7 +8928,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT
 	},
 	{
@@ -7591,7 +8955,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 max value is outside of the array range",
 		.result = REJECT
 	},
@@ -7621,7 +8985,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 unbounded memory access",
 		.result = REJECT
 	},
@@ -7641,7 +9005,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_A(0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "map_value pointer and 2147483646",
 		.result = REJECT
 	},
@@ -7663,7 +9027,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_A(0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "pointer offset 1073741822",
 		.result = REJECT
 	},
@@ -7684,7 +9048,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_A(0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "pointer offset -1073741822",
 		.result = REJECT
 	},
@@ -7706,7 +9070,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_A(0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "map_value pointer and 1000000000000",
 		.result = REJECT
 	},
@@ -7726,7 +9090,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_A(0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.retval = POINTER_VALUE,
 		.result_unpriv = REJECT,
@@ -7747,7 +9111,7 @@ static struct bpf_test tests[] = {
 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.retval = POINTER_VALUE,
 		.result_unpriv = REJECT,
@@ -7815,7 +9179,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 5 },
+		.fixup_map_hash_8b = { 5 },
 		.errstr = "variable stack read R2",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_LWT_IN,
@@ -7896,7 +9260,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.errstr = "R0 unbounded memory access",
 		.result_unpriv = REJECT,
@@ -8190,7 +9554,7 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, mark)),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "dereference of modified ctx ptr R1 off=68+8, ctx+const is allowed, ctx+const+const is not",
+		.errstr = "dereference of modified ctx ptr",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -8223,7 +9587,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+		.errstr = "R3 pointer arithmetic on pkt_end",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_XDP,
 	},
@@ -8242,7 +9606,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+		.errstr = "R3 pointer arithmetic on pkt_end",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_XDP,
 	},
@@ -9183,7 +10547,7 @@ static struct bpf_test tests[] = {
 		"check deducing bounds from const, 5",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
-			BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1),
 			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
 			BPF_EXIT_INSN(),
 		},
@@ -9430,7 +10794,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.fixup_map1 = { 16 },
+		.fixup_map_hash_8b = { 16 },
 		.result = REJECT,
 		.errstr = "R0 min value is outside of the array range",
 	},
@@ -10381,7 +11745,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(), /* return 0 */
 		},
 		.prog_type = BPF_PROG_TYPE_XDP,
-		.fixup_map1 = { 23 },
+		.fixup_map_hash_8b = { 23 },
 		.result = ACCEPT,
 	},
 	{
@@ -10436,7 +11800,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(), /* return 1 */
 		},
 		.prog_type = BPF_PROG_TYPE_XDP,
-		.fixup_map1 = { 23 },
+		.fixup_map_hash_8b = { 23 },
 		.result = ACCEPT,
 	},
 	{
@@ -10491,7 +11855,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(), /* return 1 */
 		},
 		.prog_type = BPF_PROG_TYPE_XDP,
-		.fixup_map1 = { 23 },
+		.fixup_map_hash_8b = { 23 },
 		.result = REJECT,
 		.errstr = "invalid read from stack off -16+0 size 8",
 	},
@@ -10563,7 +11927,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.fixup_map1 = { 12, 22 },
+		.fixup_map_hash_8b = { 12, 22 },
 		.result = REJECT,
 		.errstr = "invalid access to map value, value_size=8 off=2 size=8",
 	},
@@ -10635,7 +11999,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.fixup_map1 = { 12, 22 },
+		.fixup_map_hash_8b = { 12, 22 },
 		.result = ACCEPT,
 	},
 	{
@@ -10706,7 +12070,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_IMM(BPF_JA, 0, 0, -8),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.fixup_map1 = { 12, 22 },
+		.fixup_map_hash_8b = { 12, 22 },
 		.result = REJECT,
 		.errstr = "invalid access to map value, value_size=8 off=2 size=8",
 	},
@@ -10778,7 +12142,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.fixup_map1 = { 12, 22 },
+		.fixup_map_hash_8b = { 12, 22 },
 		.result = ACCEPT,
 	},
 	{
@@ -10849,7 +12213,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.fixup_map1 = { 12, 22 },
+		.fixup_map_hash_8b = { 12, 22 },
 		.result = REJECT,
 		.errstr = "R0 invalid mem access 'inv'",
 	},
@@ -11194,7 +12558,7 @@ static struct bpf_test tests[] = {
 			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 13 },
+		.fixup_map_hash_8b = { 13 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_XDP,
 	},
@@ -11221,12 +12585,118 @@ static struct bpf_test tests[] = {
 				     BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 6 },
+		.fixup_map_hash_48b = { 6 },
 		.errstr = "invalid indirect read from stack off -8+0 size 8",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_XDP,
 	},
 	{
+		"calls: two calls returning different map pointers for lookup (hash, array)",
+		.insns = {
+			/* main prog */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+			BPF_CALL_REL(11),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_CALL_REL(12),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+				   offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			/* subprog 1 */
+			BPF_LD_MAP_FD(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			/* subprog 2 */
+			BPF_LD_MAP_FD(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.fixup_map_hash_48b = { 13 },
+		.fixup_map_array_48b = { 16 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"calls: two calls returning different map pointers for lookup (hash, map in map)",
+		.insns = {
+			/* main prog */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+			BPF_CALL_REL(11),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_CALL_REL(12),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+				   offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			/* subprog 1 */
+			BPF_LD_MAP_FD(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			/* subprog 2 */
+			BPF_LD_MAP_FD(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.fixup_map_in_map = { 16 },
+		.fixup_map_array_48b = { 13 },
+		.result = REJECT,
+		.errstr = "R0 invalid mem access 'map_ptr'",
+	},
+	{
+		"cond: two branches returning different map pointers for lookup (tail, tail)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 3),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_3, 7),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog1 = { 5 },
+		.fixup_prog2 = { 2 },
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "tail_call abusing map_ptr",
+		.result = ACCEPT,
+		.retval = 42,
+	},
+	{
+		"cond: two branches returning same map pointers for lookup (tail, tail)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 3),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_3, 7),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog2 = { 2, 5 },
+		.result_unpriv = ACCEPT,
+		.result = ACCEPT,
+		.retval = 42,
+	},
+	{
 		"search pruning: all branches should be verified (nop operation)",
 		.insns = {
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -11248,7 +12718,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R6 invalid mem access 'inv'",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -11272,7 +12742,7 @@ static struct bpf_test tests[] = {
 			BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "invalid read from stack off -16+0 size 8",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -11394,7 +12864,7 @@ static struct bpf_test tests[] = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = REJECT,
 		.errstr = "misaligned value access off",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -11420,9 +12890,1059 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "BPF_XADD stores into R2 packet",
+		.errstr = "BPF_XADD stores into R2 pkt is not allowed",
 		.prog_type = BPF_PROG_TYPE_XDP,
 	},
+	{
+		"xadd/w check whether src/dst got mangled, 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 42),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 3,
+	},
+	{
+		"xadd/w check whether src/dst got mangled, 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8),
+			BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -8),
+			BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -8),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 42),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 3,
+	},
+	{
+		"bpf_get_stack return R0 within range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 28),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+			BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)),
+			BPF_MOV64_IMM(BPF_REG_4, 256),
+			BPF_EMIT_CALL(BPF_FUNC_get_stack),
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32),
+			BPF_ALU64_IMM(BPF_ARSH, BPF_REG_8, 32),
+			BPF_JMP_REG(BPF_JSLT, BPF_REG_1, BPF_REG_8, 16),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
+			BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 32),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_5),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_9),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_EMIT_CALL(BPF_FUNC_get_stack),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_hash_48b = { 4 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"ld_abs: invalid op 1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LD_ABS(BPF_DW, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = REJECT,
+		.errstr = "unknown opcode",
+	},
+	{
+		"ld_abs: invalid op 2",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 256),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LD_IND(BPF_DW, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = REJECT,
+		.errstr = "unknown opcode",
+	},
+	{
+		"ld_abs: nmap reduced",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LD_ABS(BPF_H, 12),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 28),
+			BPF_LD_ABS(BPF_H, 12),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 26),
+			BPF_MOV32_IMM(BPF_REG_0, 18),
+			BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -64),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -64),
+			BPF_LD_IND(BPF_W, BPF_REG_7, 14),
+			BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -60),
+			BPF_MOV32_IMM(BPF_REG_0, 280971478),
+			BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -60),
+			BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 15),
+			BPF_LD_ABS(BPF_H, 12),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 13),
+			BPF_MOV32_IMM(BPF_REG_0, 22),
+			BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56),
+			BPF_LD_IND(BPF_H, BPF_REG_7, 14),
+			BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -52),
+			BPF_MOV32_IMM(BPF_REG_0, 17366),
+			BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -48),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -48),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -52),
+			BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV32_IMM(BPF_REG_0, 256),
+			BPF_EXIT_INSN(),
+			BPF_MOV32_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.data = {
+			0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0,
+			0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+			0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6,
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 256,
+	},
+	{
+		"ld_abs: div + abs, test 1",
+		.insns = {
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+			BPF_LD_ABS(BPF_B, 3),
+			BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0),
+			BPF_LD_ABS(BPF_B, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0),
+			BPF_LD_IND(BPF_B, BPF_REG_8, -70),
+			BPF_EXIT_INSN(),
+		},
+		.data = {
+			10, 20, 30, 40, 50,
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 10,
+	},
+	{
+		"ld_abs: div + abs, test 2",
+		.insns = {
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+			BPF_LD_ABS(BPF_B, 3),
+			BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0),
+			BPF_LD_ABS(BPF_B, 128),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0),
+			BPF_LD_IND(BPF_B, BPF_REG_8, -70),
+			BPF_EXIT_INSN(),
+		},
+		.data = {
+			10, 20, 30, 40, 50,
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"ld_abs: div + abs, test 3",
+		.insns = {
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0),
+			BPF_LD_ABS(BPF_B, 3),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+		},
+		.data = {
+			10, 20, 30, 40, 50,
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"ld_abs: div + abs, test 4",
+		.insns = {
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0),
+			BPF_LD_ABS(BPF_B, 256),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+		},
+		.data = {
+			10, 20, 30, 40, 50,
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"ld_abs: vlan + abs, test 1",
+		.insns = { },
+		.data = {
+			0x34,
+		},
+		.fill_helper = bpf_fill_ld_abs_vlan_push_pop,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0xbef,
+	},
+	{
+		"ld_abs: vlan + abs, test 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LD_ABS(BPF_B, 0),
+			BPF_LD_ABS(BPF_H, 0),
+			BPF_LD_ABS(BPF_W, 0),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_6, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
+			BPF_MOV64_IMM(BPF_REG_3, 2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_vlan_push),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+			BPF_LD_ABS(BPF_B, 0),
+			BPF_LD_ABS(BPF_H, 0),
+			BPF_LD_ABS(BPF_W, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 42),
+			BPF_EXIT_INSN(),
+		},
+		.data = {
+			0x34,
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 42,
+	},
+	{
+		"ld_abs: jump around ld_abs",
+		.insns = { },
+		.data = {
+			10, 11,
+		},
+		.fill_helper = bpf_fill_jump_around_ld_abs,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 10,
+	},
+	{
+		"ld_dw: xor semi-random 64 bit imms, test 1",
+		.insns = { },
+		.data = { },
+		.fill_helper = bpf_fill_rand_ld_dw,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 4090,
+	},
+	{
+		"ld_dw: xor semi-random 64 bit imms, test 2",
+		.insns = { },
+		.data = { },
+		.fill_helper = bpf_fill_rand_ld_dw,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 2047,
+	},
+	{
+		"ld_dw: xor semi-random 64 bit imms, test 3",
+		.insns = { },
+		.data = { },
+		.fill_helper = bpf_fill_rand_ld_dw,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 511,
+	},
+	{
+		"ld_dw: xor semi-random 64 bit imms, test 4",
+		.insns = { },
+		.data = { },
+		.fill_helper = bpf_fill_rand_ld_dw,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 5,
+	},
+	{
+		"pass unmodified ctx pointer to helper",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_csum_update),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"reference tracking: leak potential reference",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "Unreleased reference",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: leak potential reference on stack",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "Unreleased reference",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: leak potential reference on stack 2",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "Unreleased reference",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: zero potential reference",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "Unreleased reference",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: copy and zero potential references",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_7, 0), /* leak reference */
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "Unreleased reference",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: release reference without check",
+		.insns = {
+			BPF_SK_LOOKUP,
+			/* reference in r0 may be NULL */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "type=sock_or_null expected=sock",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: release reference",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"reference tracking: release reference 2",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"reference tracking: release reference twice",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "type=inv expected=sock",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: release reference twice inside branch",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), /* goto end */
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "type=inv expected=sock",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: alloc, check, free in one subbranch",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16),
+			/* if (offsetof(skb, mark) > data_len) exit; */
+			BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_SK_LOOKUP,
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1), /* mark == 0? */
+			/* Leak reference in R0 */
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "Unreleased reference",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: alloc, check, free in both subbranches",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16),
+			/* if (offsetof(skb, mark) > data_len) exit; */
+			BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_SK_LOOKUP,
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4), /* mark == 0? */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"reference tracking in call: free reference in subprog",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"pass modified ctx pointer to helper, 1",
+		.insns = {
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_csum_update),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = REJECT,
+		.errstr = "dereference of modified ctx ptr",
+	},
+	{
+		"pass modified ctx pointer to helper, 2",
+		.insns = {
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_socket_cookie),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result_unpriv = REJECT,
+		.result = REJECT,
+		.errstr_unpriv = "dereference of modified ctx ptr",
+		.errstr = "dereference of modified ctx ptr",
+	},
+	{
+		"pass modified ctx pointer to helper, 3",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_csum_update),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = REJECT,
+		.errstr = "variable ctx access var_off=(0x0; 0x4)",
+	},
+	{
+		"mov64 src == dst",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_2),
+			// Check bounds are OK
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"mov64 src != dst",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_3),
+			// Check bounds are OK
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"reference tracking in call: free reference in subprog and outside",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "type=inv expected=sock",
+		.result = REJECT,
+	},
+	{
+		"reference tracking in call: alloc & leak reference in subprog",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_4),
+			BPF_SK_LOOKUP,
+			/* spill unchecked sk_ptr into stack of caller */
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "Unreleased reference",
+		.result = REJECT,
+	},
+	{
+		"reference tracking in call: alloc in subprog, release outside",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_SK_LOOKUP,
+			BPF_EXIT_INSN(), /* return sk */
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = POINTER_VALUE,
+		.result = ACCEPT,
+	},
+	{
+		"reference tracking in call: sk_ptr leak into caller stack",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+			/* spill unchecked sk_ptr into stack of caller */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			BPF_SK_LOOKUP,
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "Unreleased reference",
+		.result = REJECT,
+	},
+	{
+		"reference tracking in call: sk_ptr spill into caller stack",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+			/* spill unchecked sk_ptr into stack of caller */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			/* now the sk_ptr is verified, free the reference */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_4, 0),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			BPF_SK_LOOKUP,
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"reference tracking: allow LD_ABS",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_LD_ABS(BPF_B, 0),
+			BPF_LD_ABS(BPF_H, 0),
+			BPF_LD_ABS(BPF_W, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"reference tracking: forbid LD_ABS while holding reference",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_SK_LOOKUP,
+			BPF_LD_ABS(BPF_B, 0),
+			BPF_LD_ABS(BPF_H, 0),
+			BPF_LD_ABS(BPF_W, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: allow LD_IND",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_MOV64_IMM(BPF_REG_7, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"reference tracking: forbid LD_IND while holding reference",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_7, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: check reference or tail call",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
+			BPF_SK_LOOKUP,
+			/* if (sk) bpf_sk_release() */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 7),
+			/* bpf_tail_call() */
+			BPF_MOV64_IMM(BPF_REG_3, 2),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog1 = { 17 },
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"reference tracking: release reference then tail call",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
+			BPF_SK_LOOKUP,
+			/* if (sk) bpf_sk_release() */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			/* bpf_tail_call() */
+			BPF_MOV64_IMM(BPF_REG_3, 2),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog1 = { 18 },
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"reference tracking: leak possible reference over tail call",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
+			/* Look up socket and store in REG_6 */
+			BPF_SK_LOOKUP,
+			/* bpf_tail_call() */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3, 2),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* if (sk) bpf_sk_release() */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog1 = { 16 },
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "tail_call would lead to reference leak",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: leak checked reference over tail call",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
+			/* Look up socket and store in REG_6 */
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			/* if (!sk) goto end */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			/* bpf_tail_call() */
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog1 = { 17 },
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "tail_call would lead to reference leak",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: mangle and release sock_or_null",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "R1 pointer arithmetic on sock_or_null prohibited",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: mangle and release sock",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "R1 pointer arithmetic on sock prohibited",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: access member",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"reference tracking: write to member",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_LD_IMM64(BPF_REG_2, 42),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_2,
+				    offsetof(struct bpf_sock, mark)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_LD_IMM64(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "cannot write into socket",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: invalid 64-bit access of member",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "invalid bpf_sock access off=0 size=8",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: access after release",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "!read_ok",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: direct access for lookup",
+		.insns = {
+			/* Check that the packet is at least 64B long */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9),
+			/* sk = sk_lookup_tcp(ctx, skb->data, ...) */
+			BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"calls: ctx read at start of subprog",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+			BPF_JMP_REG(BPF_JSGT, BPF_REG_0, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+		.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"calls: cross frame pruning",
+		.insns = {
+			/* r8 = !!random();
+			 * call pruner()
+			 * if (r8)
+			 *     do something bad;
+			 */
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_prandom_u32),
+			BPF_MOV64_IMM(BPF_REG_8, 0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_8, 1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 1, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+		.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+		.result_unpriv = REJECT,
+		.errstr = "!read_ok",
+		.result = REJECT,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
@@ -11435,30 +13955,31 @@ static int probe_filter_length(const struct bpf_insn *fp)
 	return len + 1;
 }
 
-static int create_map(uint32_t size_value, uint32_t max_elem)
+static int create_map(uint32_t type, uint32_t size_key,
+		      uint32_t size_value, uint32_t max_elem)
 {
 	int fd;
 
-	fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
-			    size_value, max_elem, BPF_F_NO_PREALLOC);
+	fd = bpf_create_map(type, size_key, size_value, max_elem,
+			    type == BPF_MAP_TYPE_HASH ? BPF_F_NO_PREALLOC : 0);
 	if (fd < 0)
 		printf("Failed to create hash map '%s'!\n", strerror(errno));
 
 	return fd;
 }
 
-static int create_prog_dummy1(void)
+static int create_prog_dummy1(enum bpf_prog_type prog_type)
 {
 	struct bpf_insn prog[] = {
 		BPF_MOV64_IMM(BPF_REG_0, 42),
 		BPF_EXIT_INSN(),
 	};
 
-	return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+	return bpf_load_program(prog_type, prog,
 				ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
 }
 
-static int create_prog_dummy2(int mfd, int idx)
+static int create_prog_dummy2(enum bpf_prog_type prog_type, int mfd, int idx)
 {
 	struct bpf_insn prog[] = {
 		BPF_MOV64_IMM(BPF_REG_3, idx),
@@ -11469,24 +13990,25 @@ static int create_prog_dummy2(int mfd, int idx)
 		BPF_EXIT_INSN(),
 	};
 
-	return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+	return bpf_load_program(prog_type, prog,
 				ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
 }
 
-static int create_prog_array(void)
+static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem,
+			     int p1key)
 {
-	int p1key = 0, p2key = 1;
+	int p2key = 1;
 	int mfd, p1fd, p2fd;
 
 	mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
-			     sizeof(int), 4, 0);
+			     sizeof(int), max_elem, 0);
 	if (mfd < 0) {
 		printf("Failed to create prog array '%s'!\n", strerror(errno));
 		return -1;
 	}
 
-	p1fd = create_prog_dummy1();
-	p2fd = create_prog_dummy2(mfd, p2key);
+	p1fd = create_prog_dummy1(prog_type);
+	p2fd = create_prog_dummy2(prog_type, mfd, p2key);
 	if (p1fd < 0 || p2fd < 0)
 		goto out;
 	if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0)
@@ -11526,83 +14048,217 @@ static int create_map_in_map(void)
 	return outer_map_fd;
 }
 
-static char bpf_vlog[32768];
+static int create_cgroup_storage(bool percpu)
+{
+	enum bpf_map_type type = percpu ? BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE :
+		BPF_MAP_TYPE_CGROUP_STORAGE;
+	int fd;
 
-static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
-			  int *map_fds)
+	fd = bpf_create_map(type, sizeof(struct bpf_cgroup_storage_key),
+			    TEST_DATA_LEN, 0, 0);
+	if (fd < 0)
+		printf("Failed to create cgroup storage '%s'!\n",
+		       strerror(errno));
+
+	return fd;
+}
+
+static char bpf_vlog[UINT_MAX >> 8];
+
+static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
+			  struct bpf_insn *prog, int *map_fds)
 {
-	int *fixup_map1 = test->fixup_map1;
-	int *fixup_map2 = test->fixup_map2;
-	int *fixup_prog = test->fixup_prog;
+	int *fixup_map_hash_8b = test->fixup_map_hash_8b;
+	int *fixup_map_hash_48b = test->fixup_map_hash_48b;
+	int *fixup_map_hash_16b = test->fixup_map_hash_16b;
+	int *fixup_map_array_48b = test->fixup_map_array_48b;
+	int *fixup_map_sockmap = test->fixup_map_sockmap;
+	int *fixup_map_sockhash = test->fixup_map_sockhash;
+	int *fixup_map_xskmap = test->fixup_map_xskmap;
+	int *fixup_map_stacktrace = test->fixup_map_stacktrace;
+	int *fixup_prog1 = test->fixup_prog1;
+	int *fixup_prog2 = test->fixup_prog2;
 	int *fixup_map_in_map = test->fixup_map_in_map;
+	int *fixup_cgroup_storage = test->fixup_cgroup_storage;
+	int *fixup_percpu_cgroup_storage = test->fixup_percpu_cgroup_storage;
+
+	if (test->fill_helper)
+		test->fill_helper(test);
 
 	/* Allocating HTs with 1 elem is fine here, since we only test
 	 * for verifier and not do a runtime lookup, so the only thing
 	 * that really matters is value size in this case.
 	 */
-	if (*fixup_map1) {
-		map_fds[0] = create_map(sizeof(long long), 1);
+	if (*fixup_map_hash_8b) {
+		map_fds[0] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
+					sizeof(long long), 1);
+		do {
+			prog[*fixup_map_hash_8b].imm = map_fds[0];
+			fixup_map_hash_8b++;
+		} while (*fixup_map_hash_8b);
+	}
+
+	if (*fixup_map_hash_48b) {
+		map_fds[1] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
+					sizeof(struct test_val), 1);
+		do {
+			prog[*fixup_map_hash_48b].imm = map_fds[1];
+			fixup_map_hash_48b++;
+		} while (*fixup_map_hash_48b);
+	}
+
+	if (*fixup_map_hash_16b) {
+		map_fds[2] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
+					sizeof(struct other_val), 1);
 		do {
-			prog[*fixup_map1].imm = map_fds[0];
-			fixup_map1++;
-		} while (*fixup_map1);
+			prog[*fixup_map_hash_16b].imm = map_fds[2];
+			fixup_map_hash_16b++;
+		} while (*fixup_map_hash_16b);
 	}
 
-	if (*fixup_map2) {
-		map_fds[1] = create_map(sizeof(struct test_val), 1);
+	if (*fixup_map_array_48b) {
+		map_fds[3] = create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
+					sizeof(struct test_val), 1);
 		do {
-			prog[*fixup_map2].imm = map_fds[1];
-			fixup_map2++;
-		} while (*fixup_map2);
+			prog[*fixup_map_array_48b].imm = map_fds[3];
+			fixup_map_array_48b++;
+		} while (*fixup_map_array_48b);
 	}
 
-	if (*fixup_prog) {
-		map_fds[2] = create_prog_array();
+	if (*fixup_prog1) {
+		map_fds[4] = create_prog_array(prog_type, 4, 0);
 		do {
-			prog[*fixup_prog].imm = map_fds[2];
-			fixup_prog++;
-		} while (*fixup_prog);
+			prog[*fixup_prog1].imm = map_fds[4];
+			fixup_prog1++;
+		} while (*fixup_prog1);
+	}
+
+	if (*fixup_prog2) {
+		map_fds[5] = create_prog_array(prog_type, 8, 7);
+		do {
+			prog[*fixup_prog2].imm = map_fds[5];
+			fixup_prog2++;
+		} while (*fixup_prog2);
 	}
 
 	if (*fixup_map_in_map) {
-		map_fds[3] = create_map_in_map();
+		map_fds[6] = create_map_in_map();
 		do {
-			prog[*fixup_map_in_map].imm = map_fds[3];
+			prog[*fixup_map_in_map].imm = map_fds[6];
 			fixup_map_in_map++;
 		} while (*fixup_map_in_map);
 	}
+
+	if (*fixup_cgroup_storage) {
+		map_fds[7] = create_cgroup_storage(false);
+		do {
+			prog[*fixup_cgroup_storage].imm = map_fds[7];
+			fixup_cgroup_storage++;
+		} while (*fixup_cgroup_storage);
+	}
+
+	if (*fixup_percpu_cgroup_storage) {
+		map_fds[8] = create_cgroup_storage(true);
+		do {
+			prog[*fixup_percpu_cgroup_storage].imm = map_fds[8];
+			fixup_percpu_cgroup_storage++;
+		} while (*fixup_percpu_cgroup_storage);
+	}
+	if (*fixup_map_sockmap) {
+		map_fds[9] = create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(int),
+					sizeof(int), 1);
+		do {
+			prog[*fixup_map_sockmap].imm = map_fds[9];
+			fixup_map_sockmap++;
+		} while (*fixup_map_sockmap);
+	}
+	if (*fixup_map_sockhash) {
+		map_fds[10] = create_map(BPF_MAP_TYPE_SOCKHASH, sizeof(int),
+					sizeof(int), 1);
+		do {
+			prog[*fixup_map_sockhash].imm = map_fds[10];
+			fixup_map_sockhash++;
+		} while (*fixup_map_sockhash);
+	}
+	if (*fixup_map_xskmap) {
+		map_fds[11] = create_map(BPF_MAP_TYPE_XSKMAP, sizeof(int),
+					sizeof(int), 1);
+		do {
+			prog[*fixup_map_xskmap].imm = map_fds[11];
+			fixup_map_xskmap++;
+		} while (*fixup_map_xskmap);
+	}
+	if (*fixup_map_stacktrace) {
+		map_fds[12] = create_map(BPF_MAP_TYPE_STACK_TRACE, sizeof(u32),
+					 sizeof(u64), 1);
+		do {
+			prog[*fixup_map_stacktrace].imm = map_fds[12];
+			fixup_map_stacktrace++;
+		} while (*fixup_map_stacktrace);
+	}
+}
+
+static int set_admin(bool admin)
+{
+	cap_t caps;
+	const cap_value_t cap_val = CAP_SYS_ADMIN;
+	int ret = -1;
+
+	caps = cap_get_proc();
+	if (!caps) {
+		perror("cap_get_proc");
+		return -1;
+	}
+	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val,
+				admin ? CAP_SET : CAP_CLEAR)) {
+		perror("cap_set_flag");
+		goto out;
+	}
+	if (cap_set_proc(caps)) {
+		perror("cap_set_proc");
+		goto out;
+	}
+	ret = 0;
+out:
+	if (cap_free(caps))
+		perror("cap_free");
+	return ret;
 }
 
 static void do_test_single(struct bpf_test *test, bool unpriv,
 			   int *passes, int *errors)
 {
 	int fd_prog, expected_ret, reject_from_alignment;
+	int prog_len, prog_type = test->prog_type;
 	struct bpf_insn *prog = test->insns;
-	int prog_len = probe_filter_length(prog);
-	char data_in[TEST_DATA_LEN] = {};
-	int prog_type = test->prog_type;
 	int map_fds[MAX_NR_MAPS];
 	const char *expected_err;
+	uint32_t expected_val;
 	uint32_t retval;
 	int i, err;
 
 	for (i = 0; i < MAX_NR_MAPS; i++)
 		map_fds[i] = -1;
 
-	do_test_fixup(test, prog, map_fds);
+	if (!prog_type)
+		prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	do_test_fixup(test, prog_type, prog, map_fds);
+	prog_len = probe_filter_length(prog);
 
-	fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
-				     prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT,
+	fd_prog = bpf_verify_program(prog_type, prog, prog_len,
+				     test->flags & F_LOAD_WITH_STRICT_ALIGNMENT,
 				     "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1);
 
 	expected_ret = unpriv && test->result_unpriv != UNDEF ?
 		       test->result_unpriv : test->result;
 	expected_err = unpriv && test->errstr_unpriv ?
 		       test->errstr_unpriv : test->errstr;
+	expected_val = unpriv && test->retval_unpriv ?
+		       test->retval_unpriv : test->retval;
 
 	reject_from_alignment = fd_prog < 0 &&
 				(test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) &&
-				strstr(bpf_vlog, "Unknown alignment.");
+				strstr(bpf_vlog, "misaligned");
 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 	if (reject_from_alignment) {
 		printf("FAIL\nFailed due to alignment despite having efficient unaligned access: '%s'!\n",
@@ -11629,15 +14285,23 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	}
 
 	if (fd_prog >= 0) {
-		err = bpf_prog_test_run(fd_prog, 1, data_in, sizeof(data_in),
-					NULL, NULL, &retval, NULL);
+		__u8 tmp[TEST_DATA_LEN << 2];
+		__u32 size_tmp = sizeof(tmp);
+
+		if (unpriv)
+			set_admin(true);
+		err = bpf_prog_test_run(fd_prog, 1, test->data,
+					sizeof(test->data), tmp, &size_tmp,
+					&retval, NULL);
+		if (unpriv)
+			set_admin(false);
 		if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) {
 			printf("Unexpected bpf_prog_test_run error\n");
 			goto fail_log;
 		}
-		if (!err && retval != test->retval &&
-		    test->retval != POINTER_VALUE) {
-			printf("FAIL retval %d != %d\n", retval, test->retval);
+		if (!err && retval != expected_val &&
+		    expected_val != POINTER_VALUE) {
+			printf("FAIL retval %d != %d\n", retval, expected_val);
 			goto fail_log;
 		}
 	}
@@ -11680,33 +14344,6 @@ static bool is_admin(void)
 	return (sysadmin == CAP_SET);
 }
 
-static int set_admin(bool admin)
-{
-	cap_t caps;
-	const cap_value_t cap_val = CAP_SYS_ADMIN;
-	int ret = -1;
-
-	caps = cap_get_proc();
-	if (!caps) {
-		perror("cap_get_proc");
-		return -1;
-	}
-	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val,
-				admin ? CAP_SET : CAP_CLEAR)) {
-		perror("cap_set_flag");
-		goto out;
-	}
-	if (cap_set_proc(caps)) {
-		perror("cap_set_proc");
-		goto out;
-	}
-	ret = 0;
-out:
-	if (cap_free(caps))
-		perror("cap_free");
-	return ret;
-}
-
 static void get_unpriv_disabled()
 {
 	char buf[2];
@@ -11723,6 +14360,13 @@ static void get_unpriv_disabled()
 	fclose(fd);
 }
 
+static bool test_as_unpriv(struct bpf_test *test)
+{
+	return !test->prog_type ||
+	       test->prog_type == BPF_PROG_TYPE_SOCKET_FILTER ||
+	       test->prog_type == BPF_PROG_TYPE_CGROUP_SKB;
+}
+
 static int do_test(bool unpriv, unsigned int from, unsigned int to)
 {
 	int i, passes = 0, errors = 0, skips = 0;
@@ -11733,10 +14377,10 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
 		/* Program types that are not supported by non-root we
 		 * skip right away.
 		 */
-		if (!test->prog_type && unpriv_disabled) {
+		if (test_as_unpriv(test) && unpriv_disabled) {
 			printf("#%d/u %s SKIP\n", i, test->descr);
 			skips++;
-		} else if (!test->prog_type) {
+		} else if (test_as_unpriv(test)) {
 			if (!unpriv)
 				set_admin(false);
 			printf("#%d/u %s ", i, test->descr);
@@ -11788,5 +14432,6 @@ int main(int argc, char **argv)
 		return EXIT_FAILURE;
 	}
 
+	bpf_semi_rand_init();
 	return do_test(unpriv, from, to);
 }
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.c b/tools/testing/selftests/bpf/test_xdp_vlan.c
new file mode 100644
index 000000000000..365a7d2d9f5c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_vlan.c
@@ -0,0 +1,292 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *  Copyright(c) 2018 Jesper Dangaard Brouer.
+ *
+ * XDP/TC VLAN manipulation example
+ *
+ * GOTCHA: Remember to disable NIC hardware offloading of VLANs,
+ * else the VLAN tags are NOT inlined in the packet payload:
+ *
+ *  # ethtool -K ixgbe2 rxvlan off
+ *
+ * Verify setting:
+ *  # ethtool -k ixgbe2 | grep rx-vlan-offload
+ *  rx-vlan-offload: off
+ *
+ */
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/in.h>
+#include <linux/pkt_cls.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+/* linux/if_vlan.h have not exposed this as UAPI, thus mirror some here
+ *
+ *	struct vlan_hdr - vlan header
+ *	@h_vlan_TCI: priority and VLAN ID
+ *	@h_vlan_encapsulated_proto: packet type ID or len
+ */
+struct _vlan_hdr {
+	__be16 h_vlan_TCI;
+	__be16 h_vlan_encapsulated_proto;
+};
+#define VLAN_PRIO_MASK		0xe000 /* Priority Code Point */
+#define VLAN_PRIO_SHIFT		13
+#define VLAN_CFI_MASK		0x1000 /* Canonical Format Indicator */
+#define VLAN_TAG_PRESENT	VLAN_CFI_MASK
+#define VLAN_VID_MASK		0x0fff /* VLAN Identifier */
+#define VLAN_N_VID		4096
+
+struct parse_pkt {
+	__u16 l3_proto;
+	__u16 l3_offset;
+	__u16 vlan_outer;
+	__u16 vlan_inner;
+	__u8  vlan_outer_offset;
+	__u8  vlan_inner_offset;
+};
+
+char _license[] SEC("license") = "GPL";
+
+static __always_inline
+bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt)
+{
+	__u16 eth_type;
+	__u8 offset;
+
+	offset = sizeof(*eth);
+	/* Make sure packet is large enough for parsing eth + 2 VLAN headers */
+	if ((void *)eth + offset + (2*sizeof(struct _vlan_hdr)) > data_end)
+		return false;
+
+	eth_type = eth->h_proto;
+
+	/* Handle outer VLAN tag */
+	if (eth_type == bpf_htons(ETH_P_8021Q)
+	    || eth_type == bpf_htons(ETH_P_8021AD)) {
+		struct _vlan_hdr *vlan_hdr;
+
+		vlan_hdr = (void *)eth + offset;
+		pkt->vlan_outer_offset = offset;
+		pkt->vlan_outer = bpf_ntohs(vlan_hdr->h_vlan_TCI)
+				& VLAN_VID_MASK;
+		eth_type        = vlan_hdr->h_vlan_encapsulated_proto;
+		offset += sizeof(*vlan_hdr);
+	}
+
+	/* Handle inner (double) VLAN tag */
+	if (eth_type == bpf_htons(ETH_P_8021Q)
+	    || eth_type == bpf_htons(ETH_P_8021AD)) {
+		struct _vlan_hdr *vlan_hdr;
+
+		vlan_hdr = (void *)eth + offset;
+		pkt->vlan_inner_offset = offset;
+		pkt->vlan_inner = bpf_ntohs(vlan_hdr->h_vlan_TCI)
+				& VLAN_VID_MASK;
+		eth_type        = vlan_hdr->h_vlan_encapsulated_proto;
+		offset += sizeof(*vlan_hdr);
+	}
+
+	pkt->l3_proto = bpf_ntohs(eth_type); /* Convert to host-byte-order */
+	pkt->l3_offset = offset;
+
+	return true;
+}
+
+/* Hint, VLANs are choosen to hit network-byte-order issues */
+#define TESTVLAN 4011 /* 0xFAB */
+// #define TO_VLAN  4000 /* 0xFA0 (hint 0xOA0 = 160) */
+
+SEC("xdp_drop_vlan_4011")
+int  xdp_prognum0(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data     = (void *)(long)ctx->data;
+	struct parse_pkt pkt = { 0 };
+
+	if (!parse_eth_frame(data, data_end, &pkt))
+		return XDP_ABORTED;
+
+	/* Drop specific VLAN ID example */
+	if (pkt.vlan_outer == TESTVLAN)
+		return XDP_ABORTED;
+	/*
+	 * Using XDP_ABORTED makes it possible to record this event,
+	 * via tracepoint xdp:xdp_exception like:
+	 *  # perf record -a -e xdp:xdp_exception
+	 *  # perf script
+	 */
+	return XDP_PASS;
+}
+/*
+Commands to setup VLAN on Linux to test packets gets dropped:
+
+ export ROOTDEV=ixgbe2
+ export VLANID=4011
+ ip link add link $ROOTDEV name $ROOTDEV.$VLANID type vlan id $VLANID
+ ip link set dev  $ROOTDEV.$VLANID up
+
+ ip link set dev $ROOTDEV mtu 1508
+ ip addr add 100.64.40.11/24 dev $ROOTDEV.$VLANID
+
+Load prog with ip tool:
+
+ ip link set $ROOTDEV xdp off
+ ip link set $ROOTDEV xdp object xdp_vlan01_kern.o section xdp_drop_vlan_4011
+
+*/
+
+/* Changing VLAN to zero, have same practical effect as removing the VLAN. */
+#define TO_VLAN	0
+
+SEC("xdp_vlan_change")
+int  xdp_prognum1(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data     = (void *)(long)ctx->data;
+	struct parse_pkt pkt = { 0 };
+
+	if (!parse_eth_frame(data, data_end, &pkt))
+		return XDP_ABORTED;
+
+	/* Change specific VLAN ID */
+	if (pkt.vlan_outer == TESTVLAN) {
+		struct _vlan_hdr *vlan_hdr = data + pkt.vlan_outer_offset;
+
+		/* Modifying VLAN, preserve top 4 bits */
+		vlan_hdr->h_vlan_TCI =
+			bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000)
+				  | TO_VLAN);
+	}
+
+	return XDP_PASS;
+}
+
+/*
+ * Show XDP+TC can cooperate, on creating a VLAN rewriter.
+ * 1. Create a XDP prog that can "pop"/remove a VLAN header.
+ * 2. Create a TC-bpf prog that egress can add a VLAN header.
+ */
+
+#ifndef ETH_ALEN /* Ethernet MAC address length */
+#define ETH_ALEN	6	/* bytes */
+#endif
+#define VLAN_HDR_SZ	4	/* bytes */
+
+SEC("xdp_vlan_remove_outer")
+int  xdp_prognum2(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data     = (void *)(long)ctx->data;
+	struct parse_pkt pkt = { 0 };
+	char *dest;
+
+	if (!parse_eth_frame(data, data_end, &pkt))
+		return XDP_ABORTED;
+
+	/* Skip packet if no outer VLAN was detected */
+	if (pkt.vlan_outer_offset == 0)
+		return XDP_PASS;
+
+	/* Moving Ethernet header, dest overlap with src, memmove handle this */
+	dest = data;
+	dest+= VLAN_HDR_SZ;
+	/*
+	 * Notice: Taking over vlan_hdr->h_vlan_encapsulated_proto, by
+	 * only moving two MAC addrs (12 bytes), not overwriting last 2 bytes
+	 */
+	__builtin_memmove(dest, data, ETH_ALEN * 2);
+	/* Note: LLVM built-in memmove inlining require size to be constant */
+
+	/* Move start of packet header seen by Linux kernel stack */
+	bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ);
+
+	return XDP_PASS;
+}
+
+static __always_inline
+void shift_mac_4bytes_16bit(void *data)
+{
+	__u16 *p = data;
+
+	p[7] = p[5]; /* delete p[7] was vlan_hdr->h_vlan_TCI */
+	p[6] = p[4]; /* delete p[6] was ethhdr->h_proto */
+	p[5] = p[3];
+	p[4] = p[2];
+	p[3] = p[1];
+	p[2] = p[0];
+}
+
+static __always_inline
+void shift_mac_4bytes_32bit(void *data)
+{
+	__u32 *p = data;
+
+	/* Assuming VLAN hdr present. The 4 bytes in p[3] that gets
+	 * overwritten, is ethhdr->h_proto and vlan_hdr->h_vlan_TCI.
+	 * The vlan_hdr->h_vlan_encapsulated_proto take over role as
+	 * ethhdr->h_proto.
+	 */
+	p[3] = p[2];
+	p[2] = p[1];
+	p[1] = p[0];
+}
+
+SEC("xdp_vlan_remove_outer2")
+int  xdp_prognum3(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data     = (void *)(long)ctx->data;
+	struct ethhdr *orig_eth = data;
+	struct parse_pkt pkt = { 0 };
+
+	if (!parse_eth_frame(orig_eth, data_end, &pkt))
+		return XDP_ABORTED;
+
+	/* Skip packet if no outer VLAN was detected */
+	if (pkt.vlan_outer_offset == 0)
+		return XDP_PASS;
+
+	/* Simply shift down MAC addrs 4 bytes, overwrite h_proto + TCI */
+	shift_mac_4bytes_32bit(data);
+
+	/* Move start of packet header seen by Linux kernel stack */
+	bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ);
+
+	return XDP_PASS;
+}
+
+/*=====================================
+ *  BELOW: TC-hook based ebpf programs
+ * ====================================
+ * The TC-clsact eBPF programs (currently) need to be attach via TC commands
+ */
+
+SEC("tc_vlan_push")
+int _tc_progA(struct __sk_buff *ctx)
+{
+	bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN);
+
+	return TC_ACT_OK;
+}
+/*
+Commands to setup TC to use above bpf prog:
+
+export ROOTDEV=ixgbe2
+export FILE=xdp_vlan01_kern.o
+
+# Re-attach clsact to clear/flush existing role
+tc qdisc del dev $ROOTDEV clsact 2> /dev/null ;\
+tc qdisc add dev $ROOTDEV clsact
+
+# Attach BPF prog EGRESS
+tc filter add dev $ROOTDEV egress \
+  prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push
+
+tc filter show dev $ROOTDEV egress
+*/
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh
new file mode 100755
index 000000000000..51a3a31d1aac
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_vlan.sh
@@ -0,0 +1,195 @@
+#!/bin/bash
+
+TESTNAME=xdp_vlan
+
+usage() {
+  echo "Testing XDP + TC eBPF VLAN manipulations: $TESTNAME"
+  echo ""
+  echo "Usage: $0 [-vfh]"
+  echo "  -v | --verbose : Verbose"
+  echo "  --flush        : Flush before starting (e.g. after --interactive)"
+  echo "  --interactive  : Keep netns setup running after test-run"
+  echo ""
+}
+
+cleanup()
+{
+	local status=$?
+
+	if [ "$status" = "0" ]; then
+		echo "selftests: $TESTNAME [PASS]";
+	else
+		echo "selftests: $TESTNAME [FAILED]";
+	fi
+
+	if [ -n "$INTERACTIVE" ]; then
+		echo "Namespace setup still active explore with:"
+		echo " ip netns exec ns1 bash"
+		echo " ip netns exec ns2 bash"
+		exit $status
+	fi
+
+	set +e
+	ip link del veth1 2> /dev/null
+	ip netns del ns1 2> /dev/null
+	ip netns del ns2 2> /dev/null
+}
+
+# Using external program "getopt" to get --long-options
+OPTIONS=$(getopt -o hvfi: \
+    --long verbose,flush,help,interactive,debug -- "$@")
+if (( $? != 0 )); then
+    usage
+    echo "selftests: $TESTNAME [FAILED] Error calling getopt, unknown option?"
+    exit 2
+fi
+eval set -- "$OPTIONS"
+
+##  --- Parse command line arguments / parameters ---
+while true; do
+	case "$1" in
+	    -v | --verbose)
+		export VERBOSE=yes
+		shift
+		;;
+	    -i | --interactive | --debug )
+		INTERACTIVE=yes
+		shift
+		;;
+	    -f | --flush )
+		cleanup
+		shift
+		;;
+	    -- )
+		shift
+		break
+		;;
+	    -h | --help )
+		usage;
+		echo "selftests: $TESTNAME [SKIP] usage help info requested"
+		exit 0
+		;;
+	    * )
+		shift
+		break
+		;;
+	esac
+done
+
+if [ "$EUID" -ne 0 ]; then
+	echo "selftests: $TESTNAME [FAILED] need root privileges"
+	exit 1
+fi
+
+ip link set dev lo xdp off 2>/dev/null > /dev/null
+if [ $? -ne 0 ];then
+	echo "selftests: $TESTNAME [SKIP] need ip xdp support"
+	exit 0
+fi
+
+# Interactive mode likely require us to cleanup netns
+if [ -n "$INTERACTIVE" ]; then
+	ip link del veth1 2> /dev/null
+	ip netns del ns1 2> /dev/null
+	ip netns del ns2 2> /dev/null
+fi
+
+# Exit on failure
+set -e
+
+# Some shell-tools dependencies
+which ip > /dev/null
+which tc > /dev/null
+which ethtool > /dev/null
+
+# Make rest of shell verbose, showing comments as doc/info
+if [ -n "$VERBOSE" ]; then
+    set -v
+fi
+
+# Create two namespaces
+ip netns add ns1
+ip netns add ns2
+
+# Run cleanup if failing or on kill
+trap cleanup 0 2 3 6 9
+
+# Create veth pair
+ip link add veth1 type veth peer name veth2
+
+# Move veth1 and veth2 into the respective namespaces
+ip link set veth1 netns ns1
+ip link set veth2 netns ns2
+
+# NOTICE: XDP require VLAN header inside packet payload
+#  - Thus, disable VLAN offloading driver features
+#  - For veth REMEMBER TX side VLAN-offload
+#
+# Disable rx-vlan-offload (mostly needed on ns1)
+ip netns exec ns1 ethtool -K veth1 rxvlan off
+ip netns exec ns2 ethtool -K veth2 rxvlan off
+#
+# Disable tx-vlan-offload (mostly needed on ns2)
+ip netns exec ns2 ethtool -K veth2 txvlan off
+ip netns exec ns1 ethtool -K veth1 txvlan off
+
+export IPADDR1=100.64.41.1
+export IPADDR2=100.64.41.2
+
+# In ns1/veth1 add IP-addr on plain net_device
+ip netns exec ns1 ip addr add ${IPADDR1}/24 dev veth1
+ip netns exec ns1 ip link set veth1 up
+
+# In ns2/veth2 create VLAN device
+export VLAN=4011
+export DEVNS2=veth2
+ip netns exec ns2 ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN
+ip netns exec ns2 ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN
+ip netns exec ns2 ip link set $DEVNS2 up
+ip netns exec ns2 ip link set $DEVNS2.$VLAN up
+
+# Bringup lo in netns (to avoids confusing people using --interactive)
+ip netns exec ns1 ip link set lo up
+ip netns exec ns2 ip link set lo up
+
+# At this point, the hosts cannot reach each-other,
+# because ns2 are using VLAN tags on the packets.
+
+ip netns exec ns2 sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Okay ping fails"'
+
+
+# Now we can use the test_xdp_vlan.c program to pop/push these VLAN tags
+# ----------------------------------------------------------------------
+# In ns1: ingress use XDP to remove VLAN tags
+export DEVNS1=veth1
+export FILE=test_xdp_vlan.o
+
+# First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change"
+export XDP_PROG=xdp_vlan_change
+ip netns exec ns1 ip link set $DEVNS1 xdp object $FILE section $XDP_PROG
+
+# In ns1: egress use TC to add back VLAN tag 4011
+#  (del cmd)
+#  tc qdisc del dev $DEVNS1 clsact 2> /dev/null
+#
+ip netns exec ns1 tc qdisc add dev $DEVNS1 clsact
+ip netns exec ns1 tc filter add dev $DEVNS1 egress \
+  prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push
+
+# Now the namespaces can reach each-other, test with ping:
+ip netns exec ns2 ping -W 2 -c 3 $IPADDR1
+ip netns exec ns1 ping -W 2 -c 3 $IPADDR2
+
+# Second test: Replace xdp prog, that fully remove vlan header
+#
+# Catch kernel bug for generic-XDP, that does didn't allow us to
+# remove a VLAN header, because skb->protocol still contain VLAN
+# ETH_P_8021Q indication, and this cause overwriting of our changes.
+#
+export XDP_PROG=xdp_vlan_remove_outer2
+ip netns exec ns1 ip link set $DEVNS1 xdp off
+ip netns exec ns1 ip link set $DEVNS1 xdp object $FILE section $XDP_PROG
+
+# Now the namespaces should still be able reach each-other, test with ping:
+ip netns exec ns2 ping -W 2 -c 3 $IPADDR1
+ip netns exec ns1 ping -W 2 -c 3 $IPADDR2
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
new file mode 100644
index 000000000000..4cdb63bf0521
--- /dev/null
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include <poll.h>
+#include <unistd.h>
+#include <linux/perf_event.h>
+#include <sys/mman.h>
+#include "trace_helpers.h"
+
+#define MAX_SYMS 300000
+static struct ksym syms[MAX_SYMS];
+static int sym_cnt;
+
+static int ksym_cmp(const void *p1, const void *p2)
+{
+	return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
+}
+
+int load_kallsyms(void)
+{
+	FILE *f = fopen("/proc/kallsyms", "r");
+	char func[256], buf[256];
+	char symbol;
+	void *addr;
+	int i = 0;
+
+	if (!f)
+		return -ENOENT;
+
+	while (!feof(f)) {
+		if (!fgets(buf, sizeof(buf), f))
+			break;
+		if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
+			break;
+		if (!addr)
+			continue;
+		syms[i].addr = (long) addr;
+		syms[i].name = strdup(func);
+		i++;
+	}
+	fclose(f);
+	sym_cnt = i;
+	qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
+	return 0;
+}
+
+struct ksym *ksym_search(long key)
+{
+	int start = 0, end = sym_cnt;
+	int result;
+
+	while (start < end) {
+		size_t mid = start + (end - start) / 2;
+
+		result = key - syms[mid].addr;
+		if (result < 0)
+			end = mid;
+		else if (result > 0)
+			start = mid + 1;
+		else
+			return &syms[mid];
+	}
+
+	if (start >= 1 && syms[start - 1].addr < key &&
+	    key < syms[start].addr)
+		/* valid ksym */
+		return &syms[start - 1];
+
+	/* out of range. return _stext */
+	return &syms[0];
+}
+
+long ksym_get_addr(const char *name)
+{
+	int i;
+
+	for (i = 0; i < sym_cnt; i++) {
+		if (strcmp(syms[i].name, name) == 0)
+			return syms[i].addr;
+	}
+
+	return 0;
+}
+
+static int page_size;
+static int page_cnt = 8;
+static struct perf_event_mmap_page *header;
+
+int perf_event_mmap_header(int fd, struct perf_event_mmap_page **header)
+{
+	void *base;
+	int mmap_size;
+
+	page_size = getpagesize();
+	mmap_size = page_size * (page_cnt + 1);
+
+	base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if (base == MAP_FAILED) {
+		printf("mmap err\n");
+		return -1;
+	}
+
+	*header = base;
+	return 0;
+}
+
+int perf_event_mmap(int fd)
+{
+	return perf_event_mmap_header(fd, &header);
+}
+
+static int perf_event_poll(int fd)
+{
+	struct pollfd pfd = { .fd = fd, .events = POLLIN };
+
+	return poll(&pfd, 1, 1000);
+}
+
+struct perf_event_sample {
+	struct perf_event_header header;
+	__u32 size;
+	char data[];
+};
+
+static enum bpf_perf_event_ret
+bpf_perf_event_print(struct perf_event_header *hdr, void *private_data)
+{
+	struct perf_event_sample *e = (struct perf_event_sample *)hdr;
+	perf_event_print_fn fn = private_data;
+	int ret;
+
+	if (e->header.type == PERF_RECORD_SAMPLE) {
+		ret = fn(e->data, e->size);
+		if (ret != LIBBPF_PERF_EVENT_CONT)
+			return ret;
+	} else if (e->header.type == PERF_RECORD_LOST) {
+		struct {
+			struct perf_event_header header;
+			__u64 id;
+			__u64 lost;
+		} *lost = (void *) e;
+		printf("lost %lld events\n", lost->lost);
+	} else {
+		printf("unknown event type=%d size=%d\n",
+		       e->header.type, e->header.size);
+	}
+
+	return LIBBPF_PERF_EVENT_CONT;
+}
+
+int perf_event_poller(int fd, perf_event_print_fn output_fn)
+{
+	enum bpf_perf_event_ret ret;
+	void *buf = NULL;
+	size_t len = 0;
+
+	for (;;) {
+		perf_event_poll(fd);
+		ret = bpf_perf_event_read_simple(header, page_cnt * page_size,
+						 page_size, &buf, &len,
+						 bpf_perf_event_print,
+						 output_fn);
+		if (ret != LIBBPF_PERF_EVENT_CONT)
+			break;
+	}
+	free(buf);
+
+	return ret;
+}
+
+int perf_event_poller_multi(int *fds, struct perf_event_mmap_page **headers,
+			    int num_fds, perf_event_print_fn output_fn)
+{
+	enum bpf_perf_event_ret ret;
+	struct pollfd *pfds;
+	void *buf = NULL;
+	size_t len = 0;
+	int i;
+
+	pfds = calloc(num_fds, sizeof(*pfds));
+	if (!pfds)
+		return LIBBPF_PERF_EVENT_ERROR;
+
+	for (i = 0; i < num_fds; i++) {
+		pfds[i].fd = fds[i];
+		pfds[i].events = POLLIN;
+	}
+
+	for (;;) {
+		poll(pfds, num_fds, 1000);
+		for (i = 0; i < num_fds; i++) {
+			if (!pfds[i].revents)
+				continue;
+
+			ret = bpf_perf_event_read_simple(headers[i],
+							 page_cnt * page_size,
+							 page_size, &buf, &len,
+							 bpf_perf_event_print,
+							 output_fn);
+			if (ret != LIBBPF_PERF_EVENT_CONT)
+				break;
+		}
+	}
+	free(buf);
+	free(pfds);
+
+	return ret;
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
new file mode 100644
index 000000000000..18924f23db1b
--- /dev/null
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TRACE_HELPER_H
+#define __TRACE_HELPER_H
+
+#include <libbpf.h>
+#include <linux/perf_event.h>
+
+struct ksym {
+	long addr;
+	char *name;
+};
+
+int load_kallsyms(void);
+struct ksym *ksym_search(long key);
+long ksym_get_addr(const char *name);
+
+typedef enum bpf_perf_event_ret (*perf_event_print_fn)(void *data, int size);
+
+int perf_event_mmap(int fd);
+int perf_event_mmap_header(int fd, struct perf_event_mmap_page **header);
+/* return LIBBPF_PERF_EVENT_DONE or LIBBPF_PERF_EVENT_ERROR */
+int perf_event_poller(int fd, perf_event_print_fn output_fn);
+int perf_event_poller_multi(int *fds, struct perf_event_mmap_page **headers,
+			    int num_fds, perf_event_print_fn output_fn);
+#endif
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
index 4acfdebf36fa..9de8b7cb4e6d 100644
--- a/tools/testing/selftests/bpf/urandom_read.c
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -6,15 +6,21 @@
 #include <stdlib.h>
 
 #define BUF_SIZE 256
-int main(void)
+
+int main(int argc, char *argv[])
 {
 	int fd = open("/dev/urandom", O_RDONLY);
 	int i;
 	char buf[BUF_SIZE];
+	int count = 4;
 
 	if (fd < 0)
 		return 1;
-	for (i = 0; i < 4; ++i)
+
+	if (argc == 2)
+		count = atoi(argv[1]);
+
+	for (i = 0; i < count; ++i)
 		read(fd, buf, BUF_SIZE);
 
 	close(fd);
diff --git a/tools/testing/selftests/bpf/with_addr.sh b/tools/testing/selftests/bpf/with_addr.sh
new file mode 100755
index 000000000000..ffcd3953f94c
--- /dev/null
+++ b/tools/testing/selftests/bpf/with_addr.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# add private ipv4 and ipv6 addresses to loopback
+
+readonly V6_INNER='100::a/128'
+readonly V4_INNER='192.168.0.1/32'
+
+if getopts ":s" opt; then
+  readonly SIT_DEV_NAME='sixtofourtest0'
+  readonly V6_SIT='2::/64'
+  readonly V4_SIT='172.17.0.1/32'
+  shift
+fi
+
+fail() {
+  echo "error: $*" 1>&2
+  exit 1
+}
+
+setup() {
+  ip -6 addr add "${V6_INNER}" dev lo || fail 'failed to setup v6 address'
+  ip -4 addr add "${V4_INNER}" dev lo || fail 'failed to setup v4 address'
+
+  if [[ -n "${V6_SIT}" ]]; then
+    ip link add "${SIT_DEV_NAME}" type sit remote any local any \
+	    || fail 'failed to add sit'
+    ip link set dev "${SIT_DEV_NAME}" up \
+	    || fail 'failed to bring sit device up'
+    ip -6 addr add "${V6_SIT}" dev "${SIT_DEV_NAME}" \
+	    || fail 'failed to setup v6 SIT address'
+    ip -4 addr add "${V4_SIT}" dev "${SIT_DEV_NAME}" \
+	    || fail 'failed to setup v4 SIT address'
+  fi
+
+  sleep 2	# avoid race causing bind to fail
+}
+
+cleanup() {
+  if [[ -n "${V6_SIT}" ]]; then
+    ip -4 addr del "${V4_SIT}" dev "${SIT_DEV_NAME}"
+    ip -6 addr del "${V6_SIT}" dev "${SIT_DEV_NAME}"
+    ip link del "${SIT_DEV_NAME}"
+  fi
+
+  ip -4 addr del "${V4_INNER}" dev lo
+  ip -6 addr del "${V6_INNER}" dev lo
+}
+
+trap cleanup EXIT
+
+setup
+"$@"
+exit "$?"
diff --git a/tools/testing/selftests/bpf/with_tunnels.sh b/tools/testing/selftests/bpf/with_tunnels.sh
new file mode 100755
index 000000000000..e24949ed3a20
--- /dev/null
+++ b/tools/testing/selftests/bpf/with_tunnels.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# setup tunnels for flow dissection test
+
+readonly SUFFIX="test_$(mktemp -u XXXX)"
+CONFIG="remote 127.0.0.2 local 127.0.0.1 dev lo"
+
+setup() {
+  ip link add "ipip_${SUFFIX}" type ipip ${CONFIG}
+  ip link add "gre_${SUFFIX}" type gre ${CONFIG}
+  ip link add "sit_${SUFFIX}" type sit ${CONFIG}
+
+  echo "tunnels before test:"
+  ip tunnel show
+
+  ip link set "ipip_${SUFFIX}" up
+  ip link set "gre_${SUFFIX}" up
+  ip link set "sit_${SUFFIX}" up
+}
+
+
+cleanup() {
+  ip tunnel del "ipip_${SUFFIX}"
+  ip tunnel del "gre_${SUFFIX}"
+  ip tunnel del "sit_${SUFFIX}"
+
+  echo "tunnels after test:"
+  ip tunnel show
+}
+
+trap cleanup EXIT
+
+setup
+"$@"
+exit "$?"
diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
index 3fece06e9f64..f82dcc1f8841 100644
--- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c
+++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
@@ -143,10 +143,14 @@ void suspend(void)
 	int err;
 	struct itimerspec spec = {};
 
+	if (getuid() != 0)
+		ksft_exit_skip("Please run the test as root - Exiting.\n");
+
 	power_state_fd = open("/sys/power/state", O_RDWR);
 	if (power_state_fd < 0)
 		ksft_exit_fail_msg(
-			"open(\"/sys/power/state\") failed (is this test running as root?)\n");
+			"open(\"/sys/power/state\") failed %s)\n",
+			strerror(errno));
 
 	timerfd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0);
 	if (timerfd < 0)
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
new file mode 100644
index 000000000000..adacda50a4b2
--- /dev/null
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -0,0 +1,2 @@
+test_memcontrol
+test_core
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
new file mode 100644
index 000000000000..23fbaa4a9630
--- /dev/null
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -Wall
+
+all:
+
+TEST_GEN_PROGS = test_memcontrol
+TEST_GEN_PROGS += test_core
+
+include ../lib.mk
+
+$(OUTPUT)/test_memcontrol: cgroup_util.c
+$(OUTPUT)/test_core: cgroup_util.c
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
new file mode 100644
index 000000000000..14c9fe284806
--- /dev/null
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -0,0 +1,371 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "cgroup_util.h"
+
+static ssize_t read_text(const char *path, char *buf, size_t max_len)
+{
+	ssize_t len;
+	int fd;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return fd;
+
+	len = read(fd, buf, max_len - 1);
+	if (len < 0)
+		goto out;
+
+	buf[len] = 0;
+out:
+	close(fd);
+	return len;
+}
+
+static ssize_t write_text(const char *path, char *buf, ssize_t len)
+{
+	int fd;
+
+	fd = open(path, O_WRONLY | O_APPEND);
+	if (fd < 0)
+		return fd;
+
+	len = write(fd, buf, len);
+	if (len < 0) {
+		close(fd);
+		return len;
+	}
+
+	close(fd);
+
+	return len;
+}
+
+char *cg_name(const char *root, const char *name)
+{
+	size_t len = strlen(root) + strlen(name) + 2;
+	char *ret = malloc(len);
+
+	snprintf(ret, len, "%s/%s", root, name);
+
+	return ret;
+}
+
+char *cg_name_indexed(const char *root, const char *name, int index)
+{
+	size_t len = strlen(root) + strlen(name) + 10;
+	char *ret = malloc(len);
+
+	snprintf(ret, len, "%s/%s_%d", root, name, index);
+
+	return ret;
+}
+
+int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
+{
+	char path[PATH_MAX];
+
+	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
+
+	if (read_text(path, buf, len) >= 0)
+		return 0;
+
+	return -1;
+}
+
+int cg_read_strcmp(const char *cgroup, const char *control,
+		   const char *expected)
+{
+	size_t size;
+	char *buf;
+	int ret;
+
+	/* Handle the case of comparing against empty string */
+	if (!expected)
+		size = 32;
+	else
+		size = strlen(expected) + 1;
+
+	buf = malloc(size);
+	if (!buf)
+		return -1;
+
+	if (cg_read(cgroup, control, buf, size)) {
+		free(buf);
+		return -1;
+	}
+
+	ret = strcmp(expected, buf);
+	free(buf);
+	return ret;
+}
+
+int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
+{
+	char buf[PAGE_SIZE];
+
+	if (cg_read(cgroup, control, buf, sizeof(buf)))
+		return -1;
+
+	return strstr(buf, needle) ? 0 : -1;
+}
+
+long cg_read_long(const char *cgroup, const char *control)
+{
+	char buf[128];
+
+	if (cg_read(cgroup, control, buf, sizeof(buf)))
+		return -1;
+
+	return atol(buf);
+}
+
+long cg_read_key_long(const char *cgroup, const char *control, const char *key)
+{
+	char buf[PAGE_SIZE];
+	char *ptr;
+
+	if (cg_read(cgroup, control, buf, sizeof(buf)))
+		return -1;
+
+	ptr = strstr(buf, key);
+	if (!ptr)
+		return -1;
+
+	return atol(ptr + strlen(key));
+}
+
+int cg_write(const char *cgroup, const char *control, char *buf)
+{
+	char path[PATH_MAX];
+	ssize_t len = strlen(buf);
+
+	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
+
+	if (write_text(path, buf, len) == len)
+		return 0;
+
+	return -1;
+}
+
+int cg_find_unified_root(char *root, size_t len)
+{
+	char buf[10 * PAGE_SIZE];
+	char *fs, *mount, *type;
+	const char delim[] = "\n\t ";
+
+	if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
+		return -1;
+
+	/*
+	 * Example:
+	 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
+	 */
+	for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
+		mount = strtok(NULL, delim);
+		type = strtok(NULL, delim);
+		strtok(NULL, delim);
+		strtok(NULL, delim);
+		strtok(NULL, delim);
+
+		if (strcmp(fs, "cgroup") == 0 &&
+		    strcmp(type, "cgroup2") == 0) {
+			strncpy(root, mount, len);
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+int cg_create(const char *cgroup)
+{
+	return mkdir(cgroup, 0644);
+}
+
+static int cg_killall(const char *cgroup)
+{
+	char buf[PAGE_SIZE];
+	char *ptr = buf;
+
+	if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
+		return -1;
+
+	while (ptr < buf + sizeof(buf)) {
+		int pid = strtol(ptr, &ptr, 10);
+
+		if (pid == 0)
+			break;
+		if (*ptr)
+			ptr++;
+		else
+			break;
+		if (kill(pid, SIGKILL))
+			return -1;
+	}
+
+	return 0;
+}
+
+int cg_destroy(const char *cgroup)
+{
+	int ret;
+
+retry:
+	ret = rmdir(cgroup);
+	if (ret && errno == EBUSY) {
+		ret = cg_killall(cgroup);
+		if (ret)
+			return ret;
+		usleep(100);
+		goto retry;
+	}
+
+	if (ret && errno == ENOENT)
+		ret = 0;
+
+	return ret;
+}
+
+int cg_enter_current(const char *cgroup)
+{
+	char pidbuf[64];
+
+	snprintf(pidbuf, sizeof(pidbuf), "%d", getpid());
+	return cg_write(cgroup, "cgroup.procs", pidbuf);
+}
+
+int cg_run(const char *cgroup,
+	   int (*fn)(const char *cgroup, void *arg),
+	   void *arg)
+{
+	int pid, retcode;
+
+	pid = fork();
+	if (pid < 0) {
+		return pid;
+	} else if (pid == 0) {
+		char buf[64];
+
+		snprintf(buf, sizeof(buf), "%d", getpid());
+		if (cg_write(cgroup, "cgroup.procs", buf))
+			exit(EXIT_FAILURE);
+		exit(fn(cgroup, arg));
+	} else {
+		waitpid(pid, &retcode, 0);
+		if (WIFEXITED(retcode))
+			return WEXITSTATUS(retcode);
+		else
+			return -1;
+	}
+}
+
+int cg_run_nowait(const char *cgroup,
+		  int (*fn)(const char *cgroup, void *arg),
+		  void *arg)
+{
+	int pid;
+
+	pid = fork();
+	if (pid == 0) {
+		char buf[64];
+
+		snprintf(buf, sizeof(buf), "%d", getpid());
+		if (cg_write(cgroup, "cgroup.procs", buf))
+			exit(EXIT_FAILURE);
+		exit(fn(cgroup, arg));
+	}
+
+	return pid;
+}
+
+int get_temp_fd(void)
+{
+	return open(".", O_TMPFILE | O_RDWR | O_EXCL);
+}
+
+int alloc_pagecache(int fd, size_t size)
+{
+	char buf[PAGE_SIZE];
+	struct stat st;
+	int i;
+
+	if (fstat(fd, &st))
+		goto cleanup;
+
+	size += st.st_size;
+
+	if (ftruncate(fd, size))
+		goto cleanup;
+
+	for (i = 0; i < size; i += sizeof(buf))
+		read(fd, buf, sizeof(buf));
+
+	return 0;
+
+cleanup:
+	return -1;
+}
+
+int alloc_anon(const char *cgroup, void *arg)
+{
+	size_t size = (unsigned long)arg;
+	char *buf, *ptr;
+
+	buf = malloc(size);
+	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+		*ptr = 0;
+
+	free(buf);
+	return 0;
+}
+
+int is_swap_enabled(void)
+{
+	char buf[PAGE_SIZE];
+	const char delim[] = "\n";
+	int cnt = 0;
+	char *line;
+
+	if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
+		return -1;
+
+	for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
+		cnt++;
+
+	return cnt > 1;
+}
+
+int set_oom_adj_score(int pid, int score)
+{
+	char path[PATH_MAX];
+	int fd, len;
+
+	sprintf(path, "/proc/%d/oom_score_adj", pid);
+
+	fd = open(path, O_WRONLY | O_APPEND);
+	if (fd < 0)
+		return fd;
+
+	len = dprintf(fd, "%d", score);
+	if (len < 0) {
+		close(fd);
+		return len;
+	}
+
+	close(fd);
+	return 0;
+}
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
new file mode 100644
index 000000000000..9ac8b7958f83
--- /dev/null
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdlib.h>
+
+#define PAGE_SIZE 4096
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#define MB(x) (x << 20)
+
+/*
+ * Checks if two given values differ by less than err% of their sum.
+ */
+static inline int values_close(long a, long b, int err)
+{
+	return abs(a - b) <= (a + b) / 100 * err;
+}
+
+extern int cg_find_unified_root(char *root, size_t len);
+extern char *cg_name(const char *root, const char *name);
+extern char *cg_name_indexed(const char *root, const char *name, int index);
+extern int cg_create(const char *cgroup);
+extern int cg_destroy(const char *cgroup);
+extern int cg_read(const char *cgroup, const char *control,
+		   char *buf, size_t len);
+extern int cg_read_strcmp(const char *cgroup, const char *control,
+			  const char *expected);
+extern int cg_read_strstr(const char *cgroup, const char *control,
+			  const char *needle);
+extern long cg_read_long(const char *cgroup, const char *control);
+long cg_read_key_long(const char *cgroup, const char *control, const char *key);
+extern int cg_write(const char *cgroup, const char *control, char *buf);
+extern int cg_run(const char *cgroup,
+		  int (*fn)(const char *cgroup, void *arg),
+		  void *arg);
+extern int cg_enter_current(const char *cgroup);
+extern int cg_run_nowait(const char *cgroup,
+			 int (*fn)(const char *cgroup, void *arg),
+			 void *arg);
+extern int get_temp_fd(void);
+extern int alloc_pagecache(int fd, size_t size);
+extern int alloc_anon(const char *cgroup, void *arg);
+extern int is_swap_enabled(void);
+extern int set_oom_adj_score(int pid, int score);
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
new file mode 100644
index 000000000000..be59f9c34ea2
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_core.c
@@ -0,0 +1,395 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/limits.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+/*
+ * A(0) - B(0) - C(1)
+ *        \ D(0)
+ *
+ * A, B and C's "populated" fields would be 1 while D's 0.
+ * test that after the one process in C is moved to root,
+ * A,B and C's "populated" fields would flip to "0" and file
+ * modified events will be generated on the
+ * "cgroup.events" files of both cgroups.
+ */
+static int test_cgcore_populated(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *cg_test_a = NULL, *cg_test_b = NULL;
+	char *cg_test_c = NULL, *cg_test_d = NULL;
+
+	cg_test_a = cg_name(root, "cg_test_a");
+	cg_test_b = cg_name(root, "cg_test_a/cg_test_b");
+	cg_test_c = cg_name(root, "cg_test_a/cg_test_b/cg_test_c");
+	cg_test_d = cg_name(root, "cg_test_a/cg_test_b/cg_test_d");
+
+	if (!cg_test_a || !cg_test_b || !cg_test_c || !cg_test_d)
+		goto cleanup;
+
+	if (cg_create(cg_test_a))
+		goto cleanup;
+
+	if (cg_create(cg_test_b))
+		goto cleanup;
+
+	if (cg_create(cg_test_c))
+		goto cleanup;
+
+	if (cg_create(cg_test_d))
+		goto cleanup;
+
+	if (cg_enter_current(cg_test_c))
+		goto cleanup;
+
+	if (cg_read_strcmp(cg_test_a, "cgroup.events", "populated 1\n"))
+		goto cleanup;
+
+	if (cg_read_strcmp(cg_test_b, "cgroup.events", "populated 1\n"))
+		goto cleanup;
+
+	if (cg_read_strcmp(cg_test_c, "cgroup.events", "populated 1\n"))
+		goto cleanup;
+
+	if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n"))
+		goto cleanup;
+
+	if (cg_enter_current(root))
+		goto cleanup;
+
+	if (cg_read_strcmp(cg_test_a, "cgroup.events", "populated 0\n"))
+		goto cleanup;
+
+	if (cg_read_strcmp(cg_test_b, "cgroup.events", "populated 0\n"))
+		goto cleanup;
+
+	if (cg_read_strcmp(cg_test_c, "cgroup.events", "populated 0\n"))
+		goto cleanup;
+
+	if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n"))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (cg_test_d)
+		cg_destroy(cg_test_d);
+	if (cg_test_c)
+		cg_destroy(cg_test_c);
+	if (cg_test_b)
+		cg_destroy(cg_test_b);
+	if (cg_test_a)
+		cg_destroy(cg_test_a);
+	free(cg_test_d);
+	free(cg_test_c);
+	free(cg_test_b);
+	free(cg_test_a);
+	return ret;
+}
+
+/*
+ * A (domain threaded) - B (threaded) - C (domain)
+ *
+ * test that C can't be used until it is turned into a
+ * threaded cgroup.  "cgroup.type" file will report "domain (invalid)" in
+ * these cases. Operations which fail due to invalid topology use
+ * EOPNOTSUPP as the errno.
+ */
+static int test_cgcore_invalid_domain(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *grandparent = NULL, *parent = NULL, *child = NULL;
+
+	grandparent = cg_name(root, "cg_test_grandparent");
+	parent = cg_name(root, "cg_test_grandparent/cg_test_parent");
+	child = cg_name(root, "cg_test_grandparent/cg_test_parent/cg_test_child");
+	if (!parent || !child || !grandparent)
+		goto cleanup;
+
+	if (cg_create(grandparent))
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (cg_write(parent, "cgroup.type", "threaded"))
+		goto cleanup;
+
+	if (cg_read_strcmp(child, "cgroup.type", "domain invalid\n"))
+		goto cleanup;
+
+	if (!cg_enter_current(child))
+		goto cleanup;
+
+	if (errno != EOPNOTSUPP)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_enter_current(root);
+	if (child)
+		cg_destroy(child);
+	if (parent)
+		cg_destroy(parent);
+	if (grandparent)
+		cg_destroy(grandparent);
+	free(child);
+	free(parent);
+	free(grandparent);
+	return ret;
+}
+
+/*
+ * Test that when a child becomes threaded
+ * the parent type becomes domain threaded.
+ */
+static int test_cgcore_parent_becomes_threaded(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent = NULL, *child = NULL;
+
+	parent = cg_name(root, "cg_test_parent");
+	child = cg_name(root, "cg_test_parent/cg_test_child");
+	if (!parent || !child)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (cg_write(child, "cgroup.type", "threaded"))
+		goto cleanup;
+
+	if (cg_read_strcmp(parent, "cgroup.type", "domain threaded\n"))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (child)
+		cg_destroy(child);
+	if (parent)
+		cg_destroy(parent);
+	free(child);
+	free(parent);
+	return ret;
+
+}
+
+/*
+ * Test that there's no internal process constrain on threaded cgroups.
+ * You can add threads/processes on a parent with a controller enabled.
+ */
+static int test_cgcore_no_internal_process_constraint_on_threads(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent = NULL, *child = NULL;
+
+	if (cg_read_strstr(root, "cgroup.controllers", "cpu") ||
+	    cg_read_strstr(root, "cgroup.subtree_control", "cpu")) {
+		ret = KSFT_SKIP;
+		goto cleanup;
+	}
+
+	parent = cg_name(root, "cg_test_parent");
+	child = cg_name(root, "cg_test_parent/cg_test_child");
+	if (!parent || !child)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (cg_write(parent, "cgroup.type", "threaded"))
+		goto cleanup;
+
+	if (cg_write(child, "cgroup.type", "threaded"))
+		goto cleanup;
+
+	if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
+		goto cleanup;
+
+	if (cg_enter_current(parent))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_enter_current(root);
+	cg_enter_current(root);
+	if (child)
+		cg_destroy(child);
+	if (parent)
+		cg_destroy(parent);
+	free(child);
+	free(parent);
+	return ret;
+}
+
+/*
+ * Test that you can't enable a controller on a child if it's not enabled
+ * on the parent.
+ */
+static int test_cgcore_top_down_constraint_enable(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent = NULL, *child = NULL;
+
+	parent = cg_name(root, "cg_test_parent");
+	child = cg_name(root, "cg_test_parent/cg_test_child");
+	if (!parent || !child)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (!cg_write(child, "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (child)
+		cg_destroy(child);
+	if (parent)
+		cg_destroy(parent);
+	free(child);
+	free(parent);
+	return ret;
+}
+
+/*
+ * Test that you can't disable a controller on a parent
+ * if it's enabled in a child.
+ */
+static int test_cgcore_top_down_constraint_disable(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent = NULL, *child = NULL;
+
+	parent = cg_name(root, "cg_test_parent");
+	child = cg_name(root, "cg_test_parent/cg_test_child");
+	if (!parent || !child)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (cg_write(child, "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (!cg_write(parent, "cgroup.subtree_control", "-memory"))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (child)
+		cg_destroy(child);
+	if (parent)
+		cg_destroy(parent);
+	free(child);
+	free(parent);
+	return ret;
+}
+
+/*
+ * Test internal process constraint.
+ * You can't add a pid to a domain parent if a controller is enabled.
+ */
+static int test_cgcore_internal_process_constraint(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent = NULL, *child = NULL;
+
+	parent = cg_name(root, "cg_test_parent");
+	child = cg_name(root, "cg_test_parent/cg_test_child");
+	if (!parent || !child)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (!cg_enter_current(parent))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (child)
+		cg_destroy(child);
+	if (parent)
+		cg_destroy(parent);
+	free(child);
+	free(parent);
+	return ret;
+}
+
+#define T(x) { x, #x }
+struct corecg_test {
+	int (*fn)(const char *root);
+	const char *name;
+} tests[] = {
+	T(test_cgcore_internal_process_constraint),
+	T(test_cgcore_top_down_constraint_enable),
+	T(test_cgcore_top_down_constraint_disable),
+	T(test_cgcore_no_internal_process_constraint_on_threads),
+	T(test_cgcore_parent_becomes_threaded),
+	T(test_cgcore_invalid_domain),
+	T(test_cgcore_populated),
+};
+#undef T
+
+int main(int argc, char *argv[])
+{
+	char root[PATH_MAX];
+	int i, ret = EXIT_SUCCESS;
+
+	if (cg_find_unified_root(root, sizeof(root)))
+		ksft_exit_skip("cgroup v2 isn't mounted\n");
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		switch (tests[i].fn(root)) {
+		case KSFT_PASS:
+			ksft_test_result_pass("%s\n", tests[i].name);
+			break;
+		case KSFT_SKIP:
+			ksft_test_result_skip("%s\n", tests[i].name);
+			break;
+		default:
+			ret = EXIT_FAILURE;
+			ksft_test_result_fail("%s\n", tests[i].name);
+			break;
+		}
+	}
+
+	return ret;
+}
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
new file mode 100644
index 000000000000..28d321ba311b
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -0,0 +1,1220 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define _GNU_SOURCE
+
+#include <linux/limits.h>
+#include <linux/oom.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <errno.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+/*
+ * This test creates two nested cgroups with and without enabling
+ * the memory controller.
+ */
+static int test_memcg_subtree_control(const char *root)
+{
+	char *parent, *child, *parent2, *child2;
+	int ret = KSFT_FAIL;
+	char buf[PAGE_SIZE];
+
+	/* Create two nested cgroups with the memory controller enabled */
+	parent = cg_name(root, "memcg_test_0");
+	child = cg_name(root, "memcg_test_0/memcg_test_1");
+	if (!parent || !child)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (cg_read_strstr(child, "cgroup.controllers", "memory"))
+		goto cleanup;
+
+	/* Create two nested cgroups without enabling memory controller */
+	parent2 = cg_name(root, "memcg_test_1");
+	child2 = cg_name(root, "memcg_test_1/memcg_test_1");
+	if (!parent2 || !child2)
+		goto cleanup;
+
+	if (cg_create(parent2))
+		goto cleanup;
+
+	if (cg_create(child2))
+		goto cleanup;
+
+	if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
+		goto cleanup;
+
+	if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(child);
+	cg_destroy(parent);
+	free(parent);
+	free(child);
+
+	cg_destroy(child2);
+	cg_destroy(parent2);
+	free(parent2);
+	free(child2);
+
+	return ret;
+}
+
+static int alloc_anon_50M_check(const char *cgroup, void *arg)
+{
+	size_t size = MB(50);
+	char *buf, *ptr;
+	long anon, current;
+	int ret = -1;
+
+	buf = malloc(size);
+	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+		*ptr = 0;
+
+	current = cg_read_long(cgroup, "memory.current");
+	if (current < size)
+		goto cleanup;
+
+	if (!values_close(size, current, 3))
+		goto cleanup;
+
+	anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
+	if (anon < 0)
+		goto cleanup;
+
+	if (!values_close(anon, current, 3))
+		goto cleanup;
+
+	ret = 0;
+cleanup:
+	free(buf);
+	return ret;
+}
+
+static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
+{
+	size_t size = MB(50);
+	int ret = -1;
+	long current, file;
+	int fd;
+
+	fd = get_temp_fd();
+	if (fd < 0)
+		return -1;
+
+	if (alloc_pagecache(fd, size))
+		goto cleanup;
+
+	current = cg_read_long(cgroup, "memory.current");
+	if (current < size)
+		goto cleanup;
+
+	file = cg_read_key_long(cgroup, "memory.stat", "file ");
+	if (file < 0)
+		goto cleanup;
+
+	if (!values_close(file, current, 10))
+		goto cleanup;
+
+	ret = 0;
+
+cleanup:
+	close(fd);
+	return ret;
+}
+
+/*
+ * This test create a memory cgroup, allocates
+ * some anonymous memory and some pagecache
+ * and check memory.current and some memory.stat values.
+ */
+static int test_memcg_current(const char *root)
+{
+	int ret = KSFT_FAIL;
+	long current;
+	char *memcg;
+
+	memcg = cg_name(root, "memcg_test");
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	current = cg_read_long(memcg, "memory.current");
+	if (current != 0)
+		goto cleanup;
+
+	if (cg_run(memcg, alloc_anon_50M_check, NULL))
+		goto cleanup;
+
+	if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
+static int alloc_pagecache_50M(const char *cgroup, void *arg)
+{
+	int fd = (long)arg;
+
+	return alloc_pagecache(fd, MB(50));
+}
+
+static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
+{
+	int fd = (long)arg;
+	int ppid = getppid();
+
+	if (alloc_pagecache(fd, MB(50)))
+		return -1;
+
+	while (getppid() == ppid)
+		sleep(1);
+
+	return 0;
+}
+
+static int alloc_anon_noexit(const char *cgroup, void *arg)
+{
+	int ppid = getppid();
+
+	if (alloc_anon(cgroup, arg))
+		return -1;
+
+	while (getppid() == ppid)
+		sleep(1);
+
+	return 0;
+}
+
+/*
+ * Wait until processes are killed asynchronously by the OOM killer
+ * If we exceed a timeout, fail.
+ */
+static int cg_test_proc_killed(const char *cgroup)
+{
+	int limit;
+
+	for (limit = 10; limit > 0; limit--) {
+		if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
+			return 0;
+
+		usleep(100000);
+	}
+	return -1;
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A       memory.min = 50M,  memory.max = 200M
+ * A/B     memory.min = 50M,  memory.current = 50M
+ * A/B/C   memory.min = 75M,  memory.current = 50M
+ * A/B/D   memory.min = 25M,  memory.current = 50M
+ * A/B/E   memory.min = 500M, memory.current = 0
+ * A/B/F   memory.min = 0,    memory.current = 50M
+ *
+ * Usages are pagecache, but the test keeps a running
+ * process in every leaf cgroup.
+ * Then it creates A/G and creates a significant
+ * memory pressure in it.
+ *
+ * A/B    memory.current ~= 50M
+ * A/B/C  memory.current ~= 33M
+ * A/B/D  memory.current ~= 17M
+ * A/B/E  memory.current ~= 0
+ *
+ * After that it tries to allocate more than there is
+ * unprotected memory in A available, and checks
+ * checks that memory.min protects pagecache even
+ * in this case.
+ */
+static int test_memcg_min(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent[3] = {NULL};
+	char *children[4] = {NULL};
+	long c[4];
+	int i, attempts;
+	int fd;
+
+	fd = get_temp_fd();
+	if (fd < 0)
+		goto cleanup;
+
+	parent[0] = cg_name(root, "memcg_test_0");
+	if (!parent[0])
+		goto cleanup;
+
+	parent[1] = cg_name(parent[0], "memcg_test_1");
+	if (!parent[1])
+		goto cleanup;
+
+	parent[2] = cg_name(parent[0], "memcg_test_2");
+	if (!parent[2])
+		goto cleanup;
+
+	if (cg_create(parent[0]))
+		goto cleanup;
+
+	if (cg_read_long(parent[0], "memory.min")) {
+		ret = KSFT_SKIP;
+		goto cleanup;
+	}
+
+	if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (cg_write(parent[0], "memory.max", "200M"))
+		goto cleanup;
+
+	if (cg_write(parent[0], "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (cg_create(parent[1]))
+		goto cleanup;
+
+	if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (cg_create(parent[2]))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(children); i++) {
+		children[i] = cg_name_indexed(parent[1], "child_memcg", i);
+		if (!children[i])
+			goto cleanup;
+
+		if (cg_create(children[i]))
+			goto cleanup;
+
+		if (i == 2)
+			continue;
+
+		cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
+			      (void *)(long)fd);
+	}
+
+	if (cg_write(parent[0], "memory.min", "50M"))
+		goto cleanup;
+	if (cg_write(parent[1], "memory.min", "50M"))
+		goto cleanup;
+	if (cg_write(children[0], "memory.min", "75M"))
+		goto cleanup;
+	if (cg_write(children[1], "memory.min", "25M"))
+		goto cleanup;
+	if (cg_write(children[2], "memory.min", "500M"))
+		goto cleanup;
+	if (cg_write(children[3], "memory.min", "0"))
+		goto cleanup;
+
+	attempts = 0;
+	while (!values_close(cg_read_long(parent[1], "memory.current"),
+			     MB(150), 3)) {
+		if (attempts++ > 5)
+			break;
+		sleep(1);
+	}
+
+	if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
+		goto cleanup;
+
+	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(children); i++)
+		c[i] = cg_read_long(children[i], "memory.current");
+
+	if (!values_close(c[0], MB(33), 10))
+		goto cleanup;
+
+	if (!values_close(c[1], MB(17), 10))
+		goto cleanup;
+
+	if (!values_close(c[2], 0, 1))
+		goto cleanup;
+
+	if (!cg_run(parent[2], alloc_anon, (void *)MB(170)))
+		goto cleanup;
+
+	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
+		if (!children[i])
+			continue;
+
+		cg_destroy(children[i]);
+		free(children[i]);
+	}
+
+	for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
+		if (!parent[i])
+			continue;
+
+		cg_destroy(parent[i]);
+		free(parent[i]);
+	}
+	close(fd);
+	return ret;
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A       memory.low = 50M,  memory.max = 200M
+ * A/B     memory.low = 50M,  memory.current = 50M
+ * A/B/C   memory.low = 75M,  memory.current = 50M
+ * A/B/D   memory.low = 25M,  memory.current = 50M
+ * A/B/E   memory.low = 500M, memory.current = 0
+ * A/B/F   memory.low = 0,    memory.current = 50M
+ *
+ * Usages are pagecache.
+ * Then it creates A/G an creates a significant
+ * memory pressure in it.
+ *
+ * Then it checks actual memory usages and expects that:
+ * A/B    memory.current ~= 50M
+ * A/B/   memory.current ~= 33M
+ * A/B/D  memory.current ~= 17M
+ * A/B/E  memory.current ~= 0
+ *
+ * After that it tries to allocate more than there is
+ * unprotected memory in A available,
+ * and checks low and oom events in memory.events.
+ */
+static int test_memcg_low(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent[3] = {NULL};
+	char *children[4] = {NULL};
+	long low, oom;
+	long c[4];
+	int i;
+	int fd;
+
+	fd = get_temp_fd();
+	if (fd < 0)
+		goto cleanup;
+
+	parent[0] = cg_name(root, "memcg_test_0");
+	if (!parent[0])
+		goto cleanup;
+
+	parent[1] = cg_name(parent[0], "memcg_test_1");
+	if (!parent[1])
+		goto cleanup;
+
+	parent[2] = cg_name(parent[0], "memcg_test_2");
+	if (!parent[2])
+		goto cleanup;
+
+	if (cg_create(parent[0]))
+		goto cleanup;
+
+	if (cg_read_long(parent[0], "memory.low"))
+		goto cleanup;
+
+	if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (cg_write(parent[0], "memory.max", "200M"))
+		goto cleanup;
+
+	if (cg_write(parent[0], "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (cg_create(parent[1]))
+		goto cleanup;
+
+	if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (cg_create(parent[2]))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(children); i++) {
+		children[i] = cg_name_indexed(parent[1], "child_memcg", i);
+		if (!children[i])
+			goto cleanup;
+
+		if (cg_create(children[i]))
+			goto cleanup;
+
+		if (i == 2)
+			continue;
+
+		if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd))
+			goto cleanup;
+	}
+
+	if (cg_write(parent[0], "memory.low", "50M"))
+		goto cleanup;
+	if (cg_write(parent[1], "memory.low", "50M"))
+		goto cleanup;
+	if (cg_write(children[0], "memory.low", "75M"))
+		goto cleanup;
+	if (cg_write(children[1], "memory.low", "25M"))
+		goto cleanup;
+	if (cg_write(children[2], "memory.low", "500M"))
+		goto cleanup;
+	if (cg_write(children[3], "memory.low", "0"))
+		goto cleanup;
+
+	if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
+		goto cleanup;
+
+	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(children); i++)
+		c[i] = cg_read_long(children[i], "memory.current");
+
+	if (!values_close(c[0], MB(33), 10))
+		goto cleanup;
+
+	if (!values_close(c[1], MB(17), 10))
+		goto cleanup;
+
+	if (!values_close(c[2], 0, 1))
+		goto cleanup;
+
+	if (cg_run(parent[2], alloc_anon, (void *)MB(166))) {
+		fprintf(stderr,
+			"memory.low prevents from allocating anon memory\n");
+		goto cleanup;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(children); i++) {
+		oom = cg_read_key_long(children[i], "memory.events", "oom ");
+		low = cg_read_key_long(children[i], "memory.events", "low ");
+
+		if (oom)
+			goto cleanup;
+		if (i < 2 && low <= 0)
+			goto cleanup;
+		if (i >= 2 && low)
+			goto cleanup;
+	}
+
+	ret = KSFT_PASS;
+
+cleanup:
+	for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
+		if (!children[i])
+			continue;
+
+		cg_destroy(children[i]);
+		free(children[i]);
+	}
+
+	for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
+		if (!parent[i])
+			continue;
+
+		cg_destroy(parent[i]);
+		free(parent[i]);
+	}
+	close(fd);
+	return ret;
+}
+
+static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
+{
+	size_t size = MB(50);
+	int ret = -1;
+	long current;
+	int fd;
+
+	fd = get_temp_fd();
+	if (fd < 0)
+		return -1;
+
+	if (alloc_pagecache(fd, size))
+		goto cleanup;
+
+	current = cg_read_long(cgroup, "memory.current");
+	if (current <= MB(29) || current > MB(30))
+		goto cleanup;
+
+	ret = 0;
+
+cleanup:
+	close(fd);
+	return ret;
+
+}
+
+/*
+ * This test checks that memory.high limits the amount of
+ * memory which can be consumed by either anonymous memory
+ * or pagecache.
+ */
+static int test_memcg_high(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *memcg;
+	long high;
+
+	memcg = cg_name(root, "memcg_test");
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	if (cg_read_strcmp(memcg, "memory.high", "max\n"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.high", "30M"))
+		goto cleanup;
+
+	if (cg_run(memcg, alloc_anon, (void *)MB(100)))
+		goto cleanup;
+
+	if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
+		goto cleanup;
+
+	if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
+		goto cleanup;
+
+	high = cg_read_key_long(memcg, "memory.events", "high ");
+	if (high <= 0)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
+/*
+ * This test checks that memory.max limits the amount of
+ * memory which can be consumed by either anonymous memory
+ * or pagecache.
+ */
+static int test_memcg_max(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *memcg;
+	long current, max;
+
+	memcg = cg_name(root, "memcg_test");
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	if (cg_read_strcmp(memcg, "memory.max", "max\n"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.max", "30M"))
+		goto cleanup;
+
+	/* Should be killed by OOM killer */
+	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+		goto cleanup;
+
+	if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
+		goto cleanup;
+
+	current = cg_read_long(memcg, "memory.current");
+	if (current > MB(30) || !current)
+		goto cleanup;
+
+	max = cg_read_key_long(memcg, "memory.events", "max ");
+	if (max <= 0)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
+static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
+{
+	long mem_max = (long)arg;
+	size_t size = MB(50);
+	char *buf, *ptr;
+	long mem_current, swap_current;
+	int ret = -1;
+
+	buf = malloc(size);
+	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+		*ptr = 0;
+
+	mem_current = cg_read_long(cgroup, "memory.current");
+	if (!mem_current || !values_close(mem_current, mem_max, 3))
+		goto cleanup;
+
+	swap_current = cg_read_long(cgroup, "memory.swap.current");
+	if (!swap_current ||
+	    !values_close(mem_current + swap_current, size, 3))
+		goto cleanup;
+
+	ret = 0;
+cleanup:
+	free(buf);
+	return ret;
+}
+
+/*
+ * This test checks that memory.swap.max limits the amount of
+ * anonymous memory which can be swapped out.
+ */
+static int test_memcg_swap_max(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *memcg;
+	long max;
+
+	if (!is_swap_enabled())
+		return KSFT_SKIP;
+
+	memcg = cg_name(root, "memcg_test");
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	if (cg_read_long(memcg, "memory.swap.current")) {
+		ret = KSFT_SKIP;
+		goto cleanup;
+	}
+
+	if (cg_read_strcmp(memcg, "memory.max", "max\n"))
+		goto cleanup;
+
+	if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.swap.max", "30M"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.max", "30M"))
+		goto cleanup;
+
+	/* Should be killed by OOM killer */
+	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+		goto cleanup;
+
+	if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
+		goto cleanup;
+
+	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
+		goto cleanup;
+
+	if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
+		goto cleanup;
+
+	max = cg_read_key_long(memcg, "memory.events", "max ");
+	if (max <= 0)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM. Then it checks for oom and oom_kill events in
+ * memory.events.
+ */
+static int test_memcg_oom_events(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *memcg;
+
+	memcg = cg_name(root, "memcg_test");
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.max", "30M"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+		goto cleanup;
+
+	if (cg_read_strcmp(memcg, "cgroup.procs", ""))
+		goto cleanup;
+
+	if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
+		goto cleanup;
+
+	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
+struct tcp_server_args {
+	unsigned short port;
+	int ctl[2];
+};
+
+static int tcp_server(const char *cgroup, void *arg)
+{
+	struct tcp_server_args *srv_args = arg;
+	struct sockaddr_in6 saddr = { 0 };
+	socklen_t slen = sizeof(saddr);
+	int sk, client_sk, ctl_fd, yes = 1, ret = -1;
+
+	close(srv_args->ctl[0]);
+	ctl_fd = srv_args->ctl[1];
+
+	saddr.sin6_family = AF_INET6;
+	saddr.sin6_addr = in6addr_any;
+	saddr.sin6_port = htons(srv_args->port);
+
+	sk = socket(AF_INET6, SOCK_STREAM, 0);
+	if (sk < 0)
+		return ret;
+
+	if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
+		goto cleanup;
+
+	if (bind(sk, (struct sockaddr *)&saddr, slen)) {
+		write(ctl_fd, &errno, sizeof(errno));
+		goto cleanup;
+	}
+
+	if (listen(sk, 1))
+		goto cleanup;
+
+	ret = 0;
+	if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
+		ret = -1;
+		goto cleanup;
+	}
+
+	client_sk = accept(sk, NULL, NULL);
+	if (client_sk < 0)
+		goto cleanup;
+
+	ret = -1;
+	for (;;) {
+		uint8_t buf[0x100000];
+
+		if (write(client_sk, buf, sizeof(buf)) <= 0) {
+			if (errno == ECONNRESET)
+				ret = 0;
+			break;
+		}
+	}
+
+	close(client_sk);
+
+cleanup:
+	close(sk);
+	return ret;
+}
+
+static int tcp_client(const char *cgroup, unsigned short port)
+{
+	const char server[] = "localhost";
+	struct addrinfo *ai;
+	char servport[6];
+	int retries = 0x10; /* nice round number */
+	int sk, ret;
+
+	snprintf(servport, sizeof(servport), "%hd", port);
+	ret = getaddrinfo(server, servport, NULL, &ai);
+	if (ret)
+		return ret;
+
+	sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
+	if (sk < 0)
+		goto free_ainfo;
+
+	ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
+	if (ret < 0)
+		goto close_sk;
+
+	ret = KSFT_FAIL;
+	while (retries--) {
+		uint8_t buf[0x100000];
+		long current, sock;
+
+		if (read(sk, buf, sizeof(buf)) <= 0)
+			goto close_sk;
+
+		current = cg_read_long(cgroup, "memory.current");
+		sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
+
+		if (current < 0 || sock < 0)
+			goto close_sk;
+
+		if (current < sock)
+			goto close_sk;
+
+		if (values_close(current, sock, 10)) {
+			ret = KSFT_PASS;
+			break;
+		}
+	}
+
+close_sk:
+	close(sk);
+free_ainfo:
+	freeaddrinfo(ai);
+	return ret;
+}
+
+/*
+ * This test checks socket memory accounting.
+ * The test forks a TCP server listens on a random port between 1000
+ * and 61000. Once it gets a client connection, it starts writing to
+ * its socket.
+ * The TCP client interleaves reads from the socket with check whether
+ * memory.current and memory.stat.sock are similar.
+ */
+static int test_memcg_sock(const char *root)
+{
+	int bind_retries = 5, ret = KSFT_FAIL, pid, err;
+	unsigned short port;
+	char *memcg;
+
+	memcg = cg_name(root, "memcg_test");
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	while (bind_retries--) {
+		struct tcp_server_args args;
+
+		if (pipe(args.ctl))
+			goto cleanup;
+
+		port = args.port = 1000 + rand() % 60000;
+
+		pid = cg_run_nowait(memcg, tcp_server, &args);
+		if (pid < 0)
+			goto cleanup;
+
+		close(args.ctl[1]);
+		if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
+			goto cleanup;
+		close(args.ctl[0]);
+
+		if (!err)
+			break;
+		if (err != EADDRINUSE)
+			goto cleanup;
+
+		waitpid(pid, NULL, 0);
+	}
+
+	if (err == EADDRINUSE) {
+		ret = KSFT_SKIP;
+		goto cleanup;
+	}
+
+	if (tcp_client(memcg, port) != KSFT_PASS)
+		goto cleanup;
+
+	waitpid(pid, &err, 0);
+	if (WEXITSTATUS(err))
+		goto cleanup;
+
+	if (cg_read_long(memcg, "memory.current") < 0)
+		goto cleanup;
+
+	if (cg_read_key_long(memcg, "memory.stat", "sock "))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM with memory.group.oom set. Then it checks that all
+ * processes in the leaf (but not the parent) were killed.
+ */
+static int test_memcg_oom_group_leaf_events(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent, *child;
+
+	parent = cg_name(root, "memcg_test_0");
+	child = cg_name(root, "memcg_test_0/memcg_test_1");
+
+	if (!parent || !child)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (cg_write(child, "memory.max", "50M"))
+		goto cleanup;
+
+	if (cg_write(child, "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (cg_write(child, "memory.oom.group", "1"))
+		goto cleanup;
+
+	cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
+	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
+	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
+	if (!cg_run(child, alloc_anon, (void *)MB(100)))
+		goto cleanup;
+
+	if (cg_test_proc_killed(child))
+		goto cleanup;
+
+	if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
+		goto cleanup;
+
+	if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (child)
+		cg_destroy(child);
+	if (parent)
+		cg_destroy(parent);
+	free(child);
+	free(parent);
+
+	return ret;
+}
+
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM with memory.group.oom set. Then it checks that all
+ * processes in the parent and leaf were killed.
+ */
+static int test_memcg_oom_group_parent_events(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent, *child;
+
+	parent = cg_name(root, "memcg_test_0");
+	child = cg_name(root, "memcg_test_0/memcg_test_1");
+
+	if (!parent || !child)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (cg_write(parent, "memory.max", "80M"))
+		goto cleanup;
+
+	if (cg_write(parent, "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (cg_write(parent, "memory.oom.group", "1"))
+		goto cleanup;
+
+	cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
+	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
+	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
+
+	if (!cg_run(child, alloc_anon, (void *)MB(100)))
+		goto cleanup;
+
+	if (cg_test_proc_killed(child))
+		goto cleanup;
+	if (cg_test_proc_killed(parent))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (child)
+		cg_destroy(child);
+	if (parent)
+		cg_destroy(parent);
+	free(child);
+	free(parent);
+
+	return ret;
+}
+
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM with memory.group.oom set. Then it checks that all
+ * processes were killed except those set with OOM_SCORE_ADJ_MIN
+ */
+static int test_memcg_oom_group_score_events(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *memcg;
+	int safe_pid;
+
+	memcg = cg_name(root, "memcg_test_0");
+
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.max", "50M"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.oom.group", "1"))
+		goto cleanup;
+
+	safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
+	if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
+		goto cleanup;
+
+	cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
+	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+		goto cleanup;
+
+	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
+		goto cleanup;
+
+	if (kill(safe_pid, SIGKILL))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (memcg)
+		cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
+
+#define T(x) { x, #x }
+struct memcg_test {
+	int (*fn)(const char *root);
+	const char *name;
+} tests[] = {
+	T(test_memcg_subtree_control),
+	T(test_memcg_current),
+	T(test_memcg_min),
+	T(test_memcg_low),
+	T(test_memcg_high),
+	T(test_memcg_max),
+	T(test_memcg_oom_events),
+	T(test_memcg_swap_max),
+	T(test_memcg_sock),
+	T(test_memcg_oom_group_leaf_events),
+	T(test_memcg_oom_group_parent_events),
+	T(test_memcg_oom_group_score_events),
+};
+#undef T
+
+int main(int argc, char **argv)
+{
+	char root[PATH_MAX];
+	int i, ret = EXIT_SUCCESS;
+
+	if (cg_find_unified_root(root, sizeof(root)))
+		ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+	/*
+	 * Check that memory controller is available:
+	 * memory is listed in cgroup.controllers
+	 */
+	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
+		ksft_exit_skip("memory controller isn't available\n");
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		switch (tests[i].fn(root)) {
+		case KSFT_PASS:
+			ksft_test_result_pass("%s\n", tests[i].name);
+			break;
+		case KSFT_SKIP:
+			ksft_test_result_skip("%s\n", tests[i].name);
+			break;
+		default:
+			ret = EXIT_FAILURE;
+			ksft_test_result_fail("%s\n", tests[i].name);
+			break;
+		}
+	}
+
+	return ret;
+}
diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
index f3a8933c1275..bab13dd025a6 100755
--- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
+++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
@@ -2,6 +2,8 @@
 # SPDX-License-Identifier: GPL-2.0
 
 SYSFS=
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
 
 prerequisite()
 {
@@ -9,7 +11,7 @@ prerequisite()
 
 	if [ $UID != 0 ]; then
 		echo $msg must be run as root >&2
-		exit 0
+		exit $ksft_skip
 	fi
 
 	taskset -p 01 $$
@@ -18,12 +20,12 @@ prerequisite()
 
 	if [ ! -d "$SYSFS" ]; then
 		echo $msg sysfs is not mounted >&2
-		exit 0
+		exit $ksft_skip
 	fi
 
 	if ! ls $SYSFS/devices/system/cpu/cpu* > /dev/null 2>&1; then
 		echo $msg cpu hotplug is not supported >&2
-		exit 0
+		exit $ksft_skip
 	fi
 
 	echo "CPU online/offline summary:"
@@ -32,7 +34,7 @@ prerequisite()
 
 	if [[ "$online_cpus" = "$online_max" ]]; then
 		echo "$msg: since there is only one cpu: $online_cpus"
-		exit 0
+		exit $ksft_skip
 	fi
 
 	echo -e "\t Cpus in online state: $online_cpus"
@@ -237,12 +239,12 @@ prerequisite_extra()
 
 	if [ ! -d "$DEBUGFS" ]; then
 		echo $msg debugfs is not mounted >&2
-		exit 0
+		exit $ksft_skip
 	fi
 
 	if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then
 		echo $msg cpu-notifier-error-inject module is not available >&2
-		exit 0
+		exit $ksft_skip
 	fi
 }
 
diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh
index d83922de9d89..31f8c9a76c5f 100755
--- a/tools/testing/selftests/cpufreq/main.sh
+++ b/tools/testing/selftests/cpufreq/main.sh
@@ -13,6 +13,9 @@ SYSFS=
 CPUROOT=
 CPUFREQROOT=
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 helpme()
 {
 	printf "Usage: $0 [-h] [-todg args]
@@ -38,7 +41,7 @@ prerequisite()
 
 	if [ $UID != 0 ]; then
 		echo $msg must be run as root >&2
-		exit 2
+		exit $ksft_skip
 	fi
 
 	taskset -p 01 $$
diff --git a/tools/testing/selftests/drivers/dma-buf/Makefile b/tools/testing/selftests/drivers/dma-buf/Makefile
new file mode 100644
index 000000000000..4154c3d7aa58
--- /dev/null
+++ b/tools/testing/selftests/drivers/dma-buf/Makefile
@@ -0,0 +1,5 @@
+CFLAGS += -I../../../../../usr/include/
+
+TEST_GEN_PROGS := udmabuf
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
new file mode 100644
index 000000000000..376b1d6730bd
--- /dev/null
+++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <malloc.h>
+
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <linux/memfd.h>
+#include <linux/udmabuf.h>
+
+#define TEST_PREFIX	"drivers/dma-buf/udmabuf"
+#define NUM_PAGES       4
+
+static int memfd_create(const char *name, unsigned int flags)
+{
+	return syscall(__NR_memfd_create, name, flags);
+}
+
+int main(int argc, char *argv[])
+{
+	struct udmabuf_create create;
+	int devfd, memfd, buf, ret;
+	off_t size;
+	void *mem;
+
+	devfd = open("/dev/udmabuf", O_RDWR);
+	if (devfd < 0) {
+		printf("%s: [skip,no-udmabuf]\n", TEST_PREFIX);
+		exit(77);
+	}
+
+	memfd = memfd_create("udmabuf-test", MFD_CLOEXEC);
+	if (memfd < 0) {
+		printf("%s: [skip,no-memfd]\n", TEST_PREFIX);
+		exit(77);
+	}
+
+	size = getpagesize() * NUM_PAGES;
+	ret = ftruncate(memfd, size);
+	if (ret == -1) {
+		printf("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
+		exit(1);
+	}
+
+	memset(&create, 0, sizeof(create));
+
+	/* should fail (offset not page aligned) */
+	create.memfd  = memfd;
+	create.offset = getpagesize()/2;
+	create.size   = getpagesize();
+	buf = ioctl(devfd, UDMABUF_CREATE, &create);
+	if (buf >= 0) {
+		printf("%s: [FAIL,test-1]\n", TEST_PREFIX);
+		exit(1);
+	}
+
+	/* should fail (size not multiple of page) */
+	create.memfd  = memfd;
+	create.offset = 0;
+	create.size   = getpagesize()/2;
+	buf = ioctl(devfd, UDMABUF_CREATE, &create);
+	if (buf >= 0) {
+		printf("%s: [FAIL,test-2]\n", TEST_PREFIX);
+		exit(1);
+	}
+
+	/* should fail (not memfd) */
+	create.memfd  = 0; /* stdin */
+	create.offset = 0;
+	create.size   = size;
+	buf = ioctl(devfd, UDMABUF_CREATE, &create);
+	if (buf >= 0) {
+		printf("%s: [FAIL,test-3]\n", TEST_PREFIX);
+		exit(1);
+	}
+
+	/* should work */
+	create.memfd  = memfd;
+	create.offset = 0;
+	create.size   = size;
+	buf = ioctl(devfd, UDMABUF_CREATE, &create);
+	if (buf < 0) {
+		printf("%s: [FAIL,test-4]\n", TEST_PREFIX);
+		exit(1);
+	}
+
+	fprintf(stderr, "%s: ok\n", TEST_PREFIX);
+	close(buf);
+	close(memfd);
+	close(devfd);
+	return 0;
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
new file mode 100755
index 000000000000..76f1ab4898d9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
@@ -0,0 +1,217 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# ../../../net/forwarding/mirror_gre_topo_lib.sh for more details.
+#
+# Test offloading various features of offloading gretap mirrors specific to
+# mlxsw.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/mirror_lib.sh
+source $lib_dir/mirror_gre_lib.sh
+source $lib_dir/mirror_gre_topo_lib.sh
+
+setup_keyful()
+{
+	tunnel_create gt6-key ip6gretap 2001:db8:3::1 2001:db8:3::2 \
+		      ttl 100 tos inherit allow-localremote \
+		      key 1234
+
+	tunnel_create h3-gt6-key ip6gretap 2001:db8:3::2 2001:db8:3::1 \
+		      key 1234
+	ip link set h3-gt6-key vrf v$h3
+	matchall_sink_create h3-gt6-key
+
+	ip address add dev $swp3 2001:db8:3::1/64
+	ip address add dev $h3 2001:db8:3::2/64
+}
+
+cleanup_keyful()
+{
+	ip address del dev $h3 2001:db8:3::2/64
+	ip address del dev $swp3 2001:db8:3::1/64
+
+	tunnel_destroy h3-gt6-key
+	tunnel_destroy gt6-key
+}
+
+setup_soft()
+{
+	# Set up a topology for testing underlay routes that point at an
+	# unsupported soft device.
+
+	tunnel_create gt6-soft ip6gretap 2001:db8:4::1 2001:db8:4::2 \
+		      ttl 100 tos inherit allow-localremote
+
+	tunnel_create h3-gt6-soft ip6gretap 2001:db8:4::2 2001:db8:4::1
+	ip link set h3-gt6-soft vrf v$h3
+	matchall_sink_create h3-gt6-soft
+
+	ip link add name v1 type veth peer name v2
+	ip link set dev v1 up
+	ip address add dev v1 2001:db8:4::1/64
+
+	ip link set dev v2 vrf v$h3
+	ip link set dev v2 up
+	ip address add dev v2 2001:db8:4::2/64
+}
+
+cleanup_soft()
+{
+	ip link del dev v1
+
+	tunnel_destroy h3-gt6-soft
+	tunnel_destroy gt6-soft
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip address add dev $swp3 2001:db8:2::1/64
+	ip address add dev $h3 2001:db8:2::2/64
+
+	ip address add dev $swp3 192.0.2.129/28
+	ip address add dev $h3 192.0.2.130/28
+
+	setup_keyful
+	setup_soft
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	cleanup_soft
+	cleanup_keyful
+
+	ip address del dev $h3 2001:db8:2::2/64
+	ip address del dev $swp3 2001:db8:2::1/64
+
+	ip address del dev $h3 192.0.2.130/28
+	ip address del dev $swp3 192.0.2.129/28
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_span_gre_ttl_inherit()
+{
+	local tundev=$1; shift
+	local type=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip link set dev $tundev type $type ttl inherit
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	fail_test_span_gre_dir $tundev ingress
+
+	ip link set dev $tundev type $type ttl 100
+
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: no offload on TTL of inherit ($tcflags)"
+}
+
+test_span_gre_tos_fixed()
+{
+	local tundev=$1; shift
+	local type=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip link set dev $tundev type $type tos 0x10
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	fail_test_span_gre_dir $tundev ingress
+
+	ip link set dev $tundev type $type tos inherit
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: no offload on a fixed TOS ($tcflags)"
+}
+
+test_span_failable()
+{
+	local should_fail=$1; shift
+	local tundev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	if ((should_fail)); then
+	    fail_test_span_gre_dir  $tundev ingress
+	else
+	    quick_test_span_gre_dir $tundev ingress
+	fi
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: should_fail=$should_fail ($tcflags)"
+}
+
+test_failable()
+{
+	local should_fail=$1; shift
+
+	test_span_failable $should_fail gt6-key "mirror to keyful gretap"
+	test_span_failable $should_fail gt6-soft "mirror to gretap w/ soft underlay"
+}
+
+test_sw()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	test_failable 0
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+test_hw()
+{
+	test_failable 1
+
+	test_span_gre_tos_fixed gt4 gretap "mirror to gretap"
+	test_span_gre_tos_fixed gt6 ip6gretap "mirror to ip6gretap"
+
+	test_span_gre_ttl_inherit gt4 gretap "mirror to gretap"
+	test_span_gre_ttl_inherit gt6 ip6gretap "mirror to ip6gretap"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+if ! tc_offload_check; then
+    check_err 1 "Could not test offloaded functionality"
+    log_test "mlxsw-specific tests for mirror to gretap"
+    exit
+fi
+
+tcflags="skip_hw"
+test_sw
+
+tcflags="skip_sw"
+test_hw
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
new file mode 100644
index 000000000000..6f3a70df63bc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
@@ -0,0 +1,197 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# Test offloading a number of mirrors-to-gretap. The test creates a number of
+# tunnels. Then it adds one flower mirror for each of the tunnels, matching a
+# given host IP. Then it generates traffic at each of the host IPs and checks
+# that the traffic has been mirrored at the appropriate tunnel.
+#
+#   +--------------------------+                   +--------------------------+
+#   | H1                       |                   |                       H2 |
+#   |     + $h1                |                   |                $h2 +     |
+#   |     | 2001:db8:1:X::1/64 |                   | 2001:db8:1:X::2/64 |     |
+#   +-----|--------------------+                   +--------------------|-----+
+#         |                                                             |
+#   +-----|-------------------------------------------------------------|-----+
+#   | SW  o--> mirrors                                                  |     |
+#   | +---|-------------------------------------------------------------|---+ |
+#   | |   + $swp1                    BR                           $swp2 +   | |
+#   | +---------------------------------------------------------------------+ |
+#   |                                                                         |
+#   |     + $swp3                          + gt6-<X> (ip6gretap)              |
+#   |     | 2001:db8:2:X::1/64             : loc=2001:db8:2:X::1              |
+#   |     |                                : rem=2001:db8:2:X::2              |
+#   |     |                                : ttl=100                          |
+#   |     |                                : tos=inherit                      |
+#   |     |                                :                                  |
+#   +-----|--------------------------------:----------------------------------+
+#         |                                :
+#   +-----|--------------------------------:----------------------------------+
+#   | H3  + $h3                            + h3-gt6-<X> (ip6gretap)           |
+#   |       2001:db8:2:X::2/64               loc=2001:db8:2:X::2              |
+#   |                                        rem=2001:db8:2:X::1              |
+#   |                                        ttl=100                          |
+#   |                                        tos=inherit                      |
+#   |                                                                         |
+#   +-------------------------------------------------------------------------+
+
+source ../../../../net/forwarding/mirror_lib.sh
+
+MIRROR_NUM_NETIFS=6
+
+mirror_gre_ipv6_addr()
+{
+	local net=$1; shift
+	local num=$1; shift
+
+	printf "2001:db8:%x:%x" $net $num
+}
+
+mirror_gre_tunnels_create()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	MIRROR_GRE_BATCH_FILE="$(mktemp)"
+	for ((i=0; i < count; ++i)); do
+		local match_dip=$(mirror_gre_ipv6_addr 1 $i)::2
+		local htun=h3-gt6-$i
+		local tun=gt6-$i
+
+		((mirror_gre_tunnels++))
+
+		ip address add dev $h1 $(mirror_gre_ipv6_addr 1 $i)::1/64
+		ip address add dev $h2 $(mirror_gre_ipv6_addr 1 $i)::2/64
+
+		ip address add dev $swp3 $(mirror_gre_ipv6_addr 2 $i)::1/64
+		ip address add dev $h3 $(mirror_gre_ipv6_addr 2 $i)::2/64
+
+		tunnel_create $tun ip6gretap \
+			      $(mirror_gre_ipv6_addr 2 $i)::1 \
+			      $(mirror_gre_ipv6_addr 2 $i)::2 \
+			      ttl 100 tos inherit allow-localremote
+
+		tunnel_create $htun ip6gretap \
+			      $(mirror_gre_ipv6_addr 2 $i)::2 \
+			      $(mirror_gre_ipv6_addr 2 $i)::1
+		ip link set $htun vrf v$h3
+		matchall_sink_create $htun
+
+		cat >> $MIRROR_GRE_BATCH_FILE <<-EOF
+			filter add dev $swp1 ingress pref 1000 \
+				protocol ipv6 \
+				flower $tcflags dst_ip $match_dip \
+				action mirred egress mirror dev $tun
+		EOF
+	done
+
+	tc -b $MIRROR_GRE_BATCH_FILE
+	check_err_fail $should_fail $? "Mirror rule insertion"
+}
+
+mirror_gre_tunnels_destroy()
+{
+	local count=$1; shift
+
+	for ((i=0; i < count; ++i)); do
+		local htun=h3-gt6-$i
+		local tun=gt6-$i
+
+		ip address del dev $h3 $(mirror_gre_ipv6_addr 2 $i)::2/64
+		ip address del dev $swp3 $(mirror_gre_ipv6_addr 2 $i)::1/64
+
+		ip address del dev $h2 $(mirror_gre_ipv6_addr 1 $i)::2/64
+		ip address del dev $h1 $(mirror_gre_ipv6_addr 1 $i)::1/64
+
+		tunnel_destroy $htun
+		tunnel_destroy $tun
+	done
+}
+
+__mirror_gre_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	mirror_gre_tunnels_create $count $should_fail
+	if ((should_fail)); then
+	    return
+	fi
+
+	sleep 5
+
+	for ((i = 0; i < count; ++i)); do
+		local dip=$(mirror_gre_ipv6_addr 1 $i)::2
+		local htun=h3-gt6-$i
+		local message
+
+		icmp6_capture_install $htun
+		mirror_test v$h1 "" $dip $htun 100 10
+		icmp6_capture_uninstall $htun
+	done
+}
+
+mirror_gre_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	if ! tc_offload_check $TC_FLOWER_NUM_NETIFS; then
+		check_err 1 "Could not test offloaded functionality"
+		return
+	fi
+
+	tcflags="skip_sw"
+	__mirror_gre_test $count $should_fail
+}
+
+mirror_gre_setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	mirror_gre_tunnels=0
+
+	vrf_prepare
+
+	simple_if_init $h1
+	simple_if_init $h2
+	simple_if_init $h3
+
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 up
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	tc qdisc add dev $swp1 clsact
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	ip link set dev $swp3 up
+}
+
+mirror_gre_cleanup()
+{
+	mirror_gre_tunnels_destroy $mirror_gre_tunnels
+
+	ip link set dev $swp3 down
+
+	ip link set dev $swp2 down
+
+	tc qdisc del dev $swp1 clsact
+	ip link set dev $swp1 down
+
+	ip link del dev br1
+
+	simple_if_fini $h3
+	simple_if_fini $h2
+	simple_if_fini $h1
+
+	vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
new file mode 100755
index 000000000000..1ca631d5aaba
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
@@ -0,0 +1,189 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for DSCP prioritization and rewrite. Packets ingress $swp1 with a DSCP
+# tag and are prioritized according to the map at $swp1. They egress $swp2 and
+# the DSCP value is updated to match the map at that interface. The updated DSCP
+# tag is verified at $h2.
+#
+# ICMP responses are produced with the same DSCP tag that arrived at $h2. They
+# go through prioritization at $swp2 and DSCP retagging at $swp1. The tag is
+# verified at $h1--it should match the original tag.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |    + $h1             |                             |            $h2 +     |
+# |    | 192.0.2.1/28    |                             |   192.0.2.2/28 |     |
+# +----|-----------------+                             +----------------|-----+
+#      |                                                                |
+# +----|----------------------------------------------------------------|-----+
+# | SW |                                                                |     |
+# |  +-|----------------------------------------------------------------|-+   |
+# |  | + $swp1                       BR                           $swp2 + |   |
+# |  |   APP=0,5,10 .. 7,5,17                      APP=0,5,20 .. 7,5,27   |   |
+# |  +--------------------------------------------------------------------+   |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	test_dscp
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+
+h1_create()
+{
+	local dscp;
+
+	simple_if_init $h1 192.0.2.1/28
+	tc qdisc add dev $h1 clsact
+	dscp_capture_install $h1 10
+}
+
+h1_destroy()
+{
+	dscp_capture_uninstall $h1 10
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28
+	tc qdisc add dev $h2 clsact
+	dscp_capture_install $h2 20
+}
+
+h2_destroy()
+{
+	dscp_capture_uninstall $h2 20
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/28
+}
+
+dscp_map()
+{
+	local base=$1; shift
+
+	for prio in {0..7}; do
+		echo app=$prio,5,$((base + prio))
+	done
+}
+
+switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 up
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	lldptool -T -i $swp1 -V APP $(dscp_map 10) >/dev/null
+	lldptool -T -i $swp2 -V APP $(dscp_map 20) >/dev/null
+	lldpad_app_wait_set $swp1
+	lldpad_app_wait_set $swp2
+}
+
+switch_destroy()
+{
+	lldptool -T -i $swp2 -V APP -d $(dscp_map 20) >/dev/null
+	lldptool -T -i $swp1 -V APP -d $(dscp_map 10) >/dev/null
+	lldpad_app_wait_del
+
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 nomaster
+	ip link del dev br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2
+}
+
+dscp_ping_test()
+{
+	local vrf_name=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local prio=$1; shift
+	local dev_10=$1; shift
+	local dev_20=$1; shift
+
+	local dscp_10=$(((prio + 10) << 2))
+	local dscp_20=$(((prio + 20) << 2))
+
+	RET=0
+
+	local -A t0s
+	eval "t0s=($(dscp_fetch_stats $dev_10 10)
+		   $(dscp_fetch_stats $dev_20 20))"
+
+	ip vrf exec $vrf_name \
+	   ${PING} -Q $dscp_10 ${sip:+-I $sip} $dip \
+		   -c 10 -i 0.1 -w 2 &> /dev/null
+
+	local -A t1s
+	eval "t1s=($(dscp_fetch_stats $dev_10 10)
+		   $(dscp_fetch_stats $dev_20 20))"
+
+	for key in ${!t0s[@]}; do
+		local expect
+		if ((key == prio+10 || key == prio+20)); then
+			expect=10
+		else
+			expect=0
+		fi
+
+		local delta=$((t1s[$key] - t0s[$key]))
+		((expect == delta))
+		check_err $? "DSCP $key: Expected to capture $expect packets, got $delta."
+	done
+
+	log_test "DSCP rewrite: $dscp_10-(prio $prio)-$dscp_20"
+}
+
+test_dscp()
+{
+	for prio in {0..7}; do
+		dscp_ping_test v$h1 192.0.2.1 192.0.2.2 $prio $h1 $h2
+	done
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
new file mode 100755
index 000000000000..281d90766e12
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
@@ -0,0 +1,233 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for DSCP prioritization in the router.
+#
+# With ip_forward_update_priority disabled, the packets are expected to keep
+# their DSCP (which in this test uses only values 0..7) intact as they are
+# forwarded by the switch. That is verified at $h2. ICMP responses are formed
+# with the same DSCP as the requests, and likewise pass through the switch
+# intact, which is verified at $h1.
+#
+# With ip_forward_update_priority enabled, router reprioritizes the packets
+# according to the table in reprioritize(). Thus, say, DSCP 7 maps to priority
+# 4, which on egress maps back to DSCP 4. The response packet then gets
+# reprioritized to 6, getting DSCP 6 on egress.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |    + $h1             |                             |            $h2 +     |
+# |    | 192.0.2.1/28    |                             |  192.0.2.18/28 |     |
+# +----|-----------------+                             +----------------|-----+
+#      |                                                                |
+# +----|----------------------------------------------------------------|-----+
+# | SW |                                                                |     |
+# |    + $swp1                                                    $swp2 +     |
+# |      192.0.2.2/28                                     192.0.2.17/28       |
+# |      APP=0,5,0 .. 7,5,7                          APP=0,5,0 .. 7,5,7       |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	test_update
+	test_no_update
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+
+reprioritize()
+{
+	local in=$1; shift
+
+	# This is based on rt_tos2priority in include/net/route.h. Assuming 1:1
+	# mapping between priorities and TOS, it yields a new priority for a
+	# packet with ingress priority of $in.
+	local -a reprio=(0 0 2 2 6 6 4 4)
+
+	echo ${reprio[$in]}
+}
+
+h1_create()
+{
+	local dscp;
+
+	simple_if_init $h1 192.0.2.1/28
+	tc qdisc add dev $h1 clsact
+	dscp_capture_install $h1 0
+	ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+	dscp_capture_uninstall $h1 0
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.18/28
+	tc qdisc add dev $h2 clsact
+	dscp_capture_install $h2 0
+	ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+}
+
+h2_destroy()
+{
+	ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+	dscp_capture_uninstall $h2 0
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.18/28
+}
+
+dscp_map()
+{
+	local base=$1; shift
+
+	for prio in {0..7}; do
+		echo app=$prio,5,$((base + prio))
+	done
+}
+
+switch_create()
+{
+	simple_if_init $swp1 192.0.2.2/28
+	__simple_if_init $swp2 v$swp1 192.0.2.17/28
+
+	lldptool -T -i $swp1 -V APP $(dscp_map 0) >/dev/null
+	lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null
+	lldpad_app_wait_set $swp1
+	lldpad_app_wait_set $swp2
+}
+
+switch_destroy()
+{
+	lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null
+	lldptool -T -i $swp1 -V APP -d $(dscp_map 0) >/dev/null
+	lldpad_app_wait_del
+
+	__simple_if_fini $swp2 192.0.2.17/28
+	simple_if_fini $swp1 192.0.2.2/28
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	sysctl_set net.ipv4.ip_forward_update_priority 1
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+	sysctl_restore net.ipv4.ip_forward_update_priority
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.18
+}
+
+dscp_ping_test()
+{
+	local vrf_name=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local prio=$1; shift
+	local reprio=$1; shift
+	local dev1=$1; shift
+	local dev2=$1; shift
+
+	local prio2=$($reprio $prio)   # ICMP Request egress prio
+	local prio3=$($reprio $prio2)  # ICMP Response egress prio
+
+	local dscp=$((prio << 2))     # ICMP Request ingress DSCP
+	local dscp2=$((prio2 << 2))   # ICMP Request egress DSCP
+	local dscp3=$((prio3 << 2))   # ICMP Response egress DSCP
+
+	RET=0
+
+	eval "local -A dev1_t0s=($(dscp_fetch_stats $dev1 0))"
+	eval "local -A dev2_t0s=($(dscp_fetch_stats $dev2 0))"
+
+	ip vrf exec $vrf_name \
+	   ${PING} -Q $dscp ${sip:+-I $sip} $dip \
+		   -c 10 -i 0.1 -w 2 &> /dev/null
+
+	eval "local -A dev1_t1s=($(dscp_fetch_stats $dev1 0))"
+	eval "local -A dev2_t1s=($(dscp_fetch_stats $dev2 0))"
+
+	for i in {0..7}; do
+		local dscpi=$((i << 2))
+		local expect2=0
+		local expect3=0
+
+		if ((i == prio2)); then
+			expect2=10
+		fi
+		if ((i == prio3)); then
+			expect3=10
+		fi
+
+		local delta=$((dev2_t1s[$i] - dev2_t0s[$i]))
+		((expect2 == delta))
+		check_err $? "DSCP $dscpi@$dev2: Expected to capture $expect2 packets, got $delta."
+
+		delta=$((dev1_t1s[$i] - dev1_t0s[$i]))
+		((expect3 == delta))
+		check_err $? "DSCP $dscpi@$dev1: Expected to capture $expect3 packets, got $delta."
+	done
+
+	log_test "DSCP rewrite: $dscp-(prio $prio2)-$dscp2-(prio $prio3)-$dscp3"
+}
+
+__test_update()
+{
+	local update=$1; shift
+	local reprio=$1; shift
+
+	sysctl_restore net.ipv4.ip_forward_update_priority
+	sysctl_set net.ipv4.ip_forward_update_priority $update
+
+	for prio in {0..7}; do
+		dscp_ping_test v$h1 192.0.2.1 192.0.2.18 $prio $reprio $h1 $h2
+	done
+}
+
+test_update()
+{
+	__test_update 1 reprioritize
+}
+
+test_no_update()
+{
+	__test_update 0 echo
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
new file mode 100755
index 000000000000..117f6f35d72f
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -0,0 +1,394 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# A test for switch behavior under MC overload. An issue in Spectrum chips
+# causes throughput of UC traffic to drop severely when a switch is under heavy
+# MC load. This issue can be overcome by putting the switch to MC-aware mode.
+# This test verifies that UC performance stays intact even as the switch is
+# under MC flood, and therefore that the MC-aware mode is enabled and correctly
+# configured.
+#
+# Because mlxsw throttles CPU port, the traffic can't actually reach userspace
+# at full speed. That makes it impossible to use iperf3 to simply measure the
+# throughput, because many packets (that reach $h3) don't get to the kernel at
+# all even in UDP mode (the situation is even worse in TCP mode, where one can't
+# hope to see more than a couple Mbps).
+#
+# So instead we send traffic with mausezahn and use RX ethtool counters at $h3.
+# Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore
+# each gets a different priority and we can use per-prio ethtool counters to
+# measure the throughput. In order to avoid prioritizing unicast traffic, prio
+# qdisc is installed on $swp3 and maps all priorities to the same band #7 (and
+# thus TC 0).
+#
+# Mausezahn can't actually saturate the links unless it's using large frames.
+# Thus we set MTU to 10K on all involved interfaces. Then both unicast and
+# multicast traffic uses 8K frames.
+#
+# +---------------------------+            +----------------------------------+
+# | H1                        |            |                               H2 |
+# |                           |            |  unicast --> + $h2.111           |
+# |                 multicast |            |  traffic     | 192.0.2.129/28    |
+# |                 traffic   |            |              | e-qos-map 0:1     |
+# |           $h1 + <-----    |            |              |                   |
+# | 192.0.2.65/28 |           |            |              + $h2               |
+# +---------------|-----------+            +--------------|-------------------+
+#                 |                                       |
+# +---------------|---------------------------------------|-------------------+
+# |         $swp1 +                                       + $swp2             |
+# |        >1Gbps |                                       | >1Gbps            |
+# | +-------------|------+                     +----------|----------------+  |
+# | |     $swp1.1 +      |                     |          + $swp2.111      |  |
+# | |                BR1 |             SW      | BR111                     |  |
+# | |     $swp3.1 +      |                     |          + $swp3.111      |  |
+# | +-------------|------+                     +----------|----------------+  |
+# |               \_______________________________________/                   |
+# |                                    |                                      |
+# |                                    + $swp3                                |
+# |                                    | 1Gbps bottleneck                     |
+# |                                    | prio qdisc: {0..7} -> 7              |
+# +------------------------------------|--------------------------------------+
+#                                      |
+#                                   +--|-----------------+
+#                                   |  + $h3          H3 |
+#                                   |  | 192.0.2.66/28   |
+#                                   |  |                 |
+#                                   |  + $h3.111         |
+#                                   |    192.0.2.130/28  |
+#                                   +--------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	test_mc_aware
+	test_uc_aware
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.65/28
+	mtu_set $h1 10000
+}
+
+h1_destroy()
+{
+	mtu_restore $h1
+	simple_if_fini $h1 192.0.2.65/28
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	mtu_set $h2 10000
+
+	vlan_create $h2 111 v$h2 192.0.2.129/28
+	ip link set dev $h2.111 type vlan egress-qos-map 0:1
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 111
+
+	mtu_restore $h2
+	simple_if_fini $h2
+}
+
+h3_create()
+{
+	simple_if_init $h3 192.0.2.66/28
+	mtu_set $h3 10000
+
+	vlan_create $h3 111 v$h3 192.0.2.130/28
+}
+
+h3_destroy()
+{
+	vlan_destroy $h3 111
+
+	mtu_restore $h3
+	simple_if_fini $h3 192.0.2.66/28
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+	mtu_set $swp1 10000
+
+	ip link set dev $swp2 up
+	mtu_set $swp2 10000
+
+	ip link set dev $swp3 up
+	mtu_set $swp3 10000
+
+	vlan_create $swp2 111
+	vlan_create $swp3 111
+
+	ethtool -s $swp3 speed 1000 autoneg off
+	tc qdisc replace dev $swp3 root handle 3: \
+	   prio bands 8 priomap 7 7 7 7 7 7 7 7
+
+	ip link add name br1 type bridge vlan_filtering 0
+	ip link set dev br1 up
+	ip link set dev $swp1 master br1
+	ip link set dev $swp3 master br1
+
+	ip link add name br111 type bridge vlan_filtering 0
+	ip link set dev br111 up
+	ip link set dev $swp2.111 master br111
+	ip link set dev $swp3.111 master br111
+}
+
+switch_destroy()
+{
+	ip link del dev br111
+	ip link del dev br1
+
+	tc qdisc del dev $swp3 root handle 3:
+	ethtool -s $swp3 autoneg on
+
+	vlan_destroy $swp3 111
+	vlan_destroy $swp2 111
+
+	mtu_restore $swp3
+	ip link set dev $swp3 down
+
+	mtu_restore $swp2
+	ip link set dev $swp2 down
+
+	mtu_restore $swp1
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	h3mac=$(mac_get $h3)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h2 192.0.2.130
+}
+
+humanize()
+{
+	local speed=$1; shift
+
+	for unit in bps Kbps Mbps Gbps; do
+		if (($(echo "$speed < 1024" | bc))); then
+			break
+		fi
+
+		speed=$(echo "scale=1; $speed / 1024" | bc)
+	done
+
+	echo "$speed${unit}"
+}
+
+rate()
+{
+	local t0=$1; shift
+	local t1=$1; shift
+	local interval=$1; shift
+
+	echo $((8 * (t1 - t0) / interval))
+}
+
+check_rate()
+{
+	local rate=$1; shift
+	local min=$1; shift
+	local what=$1; shift
+
+	if ((rate > min)); then
+		return 0
+	fi
+
+	echo "$what $(humanize $ir) < $(humanize $min_ingress)" > /dev/stderr
+	return 1
+}
+
+measure_uc_rate()
+{
+	local what=$1; shift
+
+	local interval=10
+	local i
+	local ret=0
+
+	# Dips in performance might cause momentary ingress rate to drop below
+	# 1Gbps. That wouldn't saturate egress and MC would thus get through,
+	# seemingly winning bandwidth on account of UC. Demand at least 2Gbps
+	# average ingress rate to somewhat mitigate this.
+	local min_ingress=2147483648
+
+	$MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
+		-a own -b $h3mac -t udp -q &
+	sleep 1
+
+	for i in {5..0}; do
+		local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
+		local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+		sleep $interval
+		local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
+		local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+
+		local ir=$(rate $u0 $u1 $interval)
+		local er=$(rate $t0 $t1 $interval)
+
+		if check_rate $ir $min_ingress "$what ingress rate"; then
+			break
+		fi
+
+		# Fail the test if we can't get the throughput.
+		if ((i == 0)); then
+			ret=1
+		fi
+	done
+
+	# Suppress noise from killing mausezahn.
+	{ kill %% && wait; } 2>/dev/null
+
+	echo $ir $er
+	exit $ret
+}
+
+test_mc_aware()
+{
+	RET=0
+
+	local -a uc_rate
+	uc_rate=($(measure_uc_rate "UC-only"))
+	check_err $? "Could not get high enough UC-only ingress rate"
+	local ucth1=${uc_rate[1]}
+
+	$MZ $h1 -p 8000 -c 0 -a own -b bc -t udp -q &
+
+	local d0=$(date +%s)
+	local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
+	local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0)
+
+	local -a uc_rate_2
+	uc_rate_2=($(measure_uc_rate "UC+MC"))
+	check_err $? "Could not get high enough UC+MC ingress rate"
+	local ucth2=${uc_rate_2[1]}
+
+	local d1=$(date +%s)
+	local t1=$(ethtool_stats_get $h3 rx_octets_prio_0)
+	local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0)
+
+	local deg=$(bc <<< "
+			scale=2
+			ret = 100 * ($ucth1 - $ucth2) / $ucth1
+			if (ret > 0) { ret } else { 0 }
+		    ")
+	check_err $(bc <<< "$deg > 25")
+
+	local interval=$((d1 - d0))
+	local mc_ir=$(rate $u0 $u1 $interval)
+	local mc_er=$(rate $t0 $t1 $interval)
+
+	# Suppress noise from killing mausezahn.
+	{ kill %% && wait; } 2>/dev/null
+
+	log_test "UC performace under MC overload"
+
+	echo "UC-only throughput  $(humanize $ucth1)"
+	echo "UC+MC throughput    $(humanize $ucth2)"
+	echo "Degradation         $deg %"
+	echo
+	echo "Full report:"
+	echo "  UC only:"
+	echo "    ingress UC throughput $(humanize ${uc_rate[0]})"
+	echo "    egress UC throughput  $(humanize ${uc_rate[1]})"
+	echo "  UC+MC:"
+	echo "    ingress UC throughput $(humanize ${uc_rate_2[0]})"
+	echo "    egress UC throughput  $(humanize ${uc_rate_2[1]})"
+	echo "    ingress MC throughput $(humanize $mc_ir)"
+	echo "    egress MC throughput  $(humanize $mc_er)"
+	echo
+}
+
+test_uc_aware()
+{
+	RET=0
+
+	$MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
+		-a own -b $h3mac -t udp -q &
+
+	local d0=$(date +%s)
+	local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
+	local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+	sleep 1
+
+	local attempts=50
+	local passes=0
+	local i
+
+	for ((i = 0; i < attempts; ++i)); do
+		if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 0.1; then
+			((passes++))
+		fi
+
+		sleep 0.1
+	done
+
+	local d1=$(date +%s)
+	local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
+	local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+
+	local interval=$((d1 - d0))
+	local uc_ir=$(rate $u0 $u1 $interval)
+	local uc_er=$(rate $t0 $t1 $interval)
+
+	((attempts == passes))
+	check_err $?
+
+	# Suppress noise from killing mausezahn.
+	{ kill %% && wait; } 2>/dev/null
+
+	log_test "MC performace under UC overload"
+	echo "    ingress UC throughput $(humanize ${uc_ir})"
+	echo "    egress UC throughput  $(humanize ${uc_er})"
+	echo "    sent $attempts BC ARPs, got $passes responses"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
new file mode 100644
index 000000000000..d231649b4f01
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ROUTER_NUM_NETIFS=4
+
+router_h1_create()
+{
+	simple_if_init $h1 192.0.1.1/24
+	ip route add 193.0.0.0/8 via 192.0.1.2 dev $h1
+}
+
+router_h1_destroy()
+{
+	ip route del 193.0.0.0/8 via 192.0.1.2 dev $h1
+	simple_if_fini $h1 192.0.1.1/24
+}
+
+router_h2_create()
+{
+	simple_if_init $h2 192.0.2.1/24
+	tc qdisc add dev $h2 handle ffff: ingress
+}
+
+router_h2_destroy()
+{
+	tc qdisc del dev $h2 handle ffff: ingress
+	simple_if_fini $h2 192.0.2.1/24
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	ip address add 192.0.1.2/24 dev $rp1
+	ip address add 192.0.2.2/24 dev $rp2
+}
+
+router_destroy()
+{
+	ip address del 192.0.2.2/24 dev $rp2
+	ip address del 192.0.1.2/24 dev $rp1
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+router_setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h1mac=$(mac_get $h1)
+	rp1mac=$(mac_get $rp1)
+
+	vrf_prepare
+
+	router_h1_create
+	router_h2_create
+
+	router_create
+}
+
+router_offload_validate()
+{
+	local route_count=$1
+	local offloaded_count
+
+	offloaded_count=$(ip route | grep -o 'offload' | wc -l)
+	[[ $offloaded_count -ge $route_count ]]
+}
+
+router_routes_create()
+{
+	local route_count=$1
+	local count=0
+
+	ROUTE_FILE="$(mktemp)"
+
+	for i in {0..255}
+	do
+		for j in {0..255}
+		do
+			for k in {0..255}
+			do
+				if [[ $count -eq $route_count ]]; then
+					break 3
+				fi
+
+				echo route add 193.${i}.${j}.${k}/32 via \
+				       192.0.2.1 dev $rp2  >> $ROUTE_FILE
+				((count++))
+			done
+		done
+	done
+
+	ip -b $ROUTE_FILE &> /dev/null
+}
+
+router_routes_destroy()
+{
+	if [[ -v ROUTE_FILE ]]; then
+		rm -f $ROUTE_FILE
+	fi
+}
+
+router_test()
+{
+	local route_count=$1
+	local should_fail=$2
+	local count=0
+
+	RET=0
+
+	router_routes_create $route_count
+
+	router_offload_validate $route_count
+	check_err_fail $should_fail $? "Offload of $route_count routes"
+	if [[ $RET -ne 0 ]] || [[ $should_fail -eq 1 ]]; then
+		return
+	fi
+
+	tc filter add dev $h2 ingress protocol ip pref 1 flower \
+		skip_sw dst_ip 193.0.0.0/8 action drop
+
+	for i in {0..255}
+	do
+		for j in {0..255}
+		do
+			for k in {0..255}
+			do
+				if [[ $count -eq $route_count ]]; then
+					break 3
+				fi
+
+				$MZ $h1 -c 1 -p 64 -a $h1mac -b $rp1mac \
+					-A 192.0.1.1 -B 193.${i}.${j}.${k} \
+					-t ip -q
+				((count++))
+			done
+		done
+	done
+
+	tc_check_packets "dev $h2 ingress" 1 $route_count
+	check_err $? "Offload mismatch"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 flower \
+		skip_sw dst_ip 193.0.0.0/8 action drop
+
+	router_routes_destroy
+}
+
+router_cleanup()
+{
+	pre_cleanup
+
+	router_routes_destroy
+	router_destroy
+
+	router_h2_destroy
+	router_h1_destroy
+
+	vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
new file mode 100755
index 000000000000..3b75180f455d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
@@ -0,0 +1,366 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking the A-TCAM and C-TCAM operation in Spectrum-2.
+# It tries to exercise as many code paths in the eRP state machine as
+# possible.
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+ALL_TESTS="single_mask_test identical_filters_test two_masks_test \
+	multiple_masks_test ctcam_edge_cases_test"
+NUM_NETIFS=2
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 198.51.100.2/24
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24
+}
+
+single_mask_test()
+{
+	# When only a single mask is required, the device uses the master
+	# mask and not the eRP table. Verify that under this mode the right
+	# filter is matched
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Single filter - did not match"
+
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 198.51.100.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 2
+	check_err $? "Two filters - did not match highest priority"
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Two filters - did not match lowest priority"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 2
+	check_err $? "Single filter - did not match after delete"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "single mask test ($tcflags)"
+}
+
+identical_filters_test()
+{
+	# When two filters that only differ in their priority are used,
+	# one needs to be inserted into the C-TCAM. This test verifies
+	# that filters are correctly spilled to C-TCAM and that the right
+	# filter is matched
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match A-TCAM filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match C-TCAM filter after A-TCAM delete"
+
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 2
+	check_err $? "Did not match C-TCAM filter after A-TCAM add"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match A-TCAM filter after C-TCAM delete"
+
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+	log_test "identical filters test ($tcflags)"
+}
+
+two_masks_test()
+{
+	# When more than one mask is required, the eRP table is used. This
+	# test verifies that the eRP table is correctly allocated and used
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.0.0/16 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Two filters - did not match highest priority"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Single filter - did not match"
+
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.0/24 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Two filters - did not match highest priority after add"
+
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "two masks test ($tcflags)"
+}
+
+multiple_masks_test()
+{
+	# The number of masks in a region is limited. Once the maximum
+	# number of masks has been reached filters that require new
+	# masks are spilled to the C-TCAM. This test verifies that
+	# spillage is performed correctly and that the right filter is
+	# matched
+
+	local index
+
+	RET=0
+
+	NUM_MASKS=32
+	BASE_INDEX=100
+
+	for i in $(eval echo {1..$NUM_MASKS}); do
+		index=$((BASE_INDEX - i))
+
+		tc filter add dev $h2 ingress protocol ip pref $index \
+			handle $index \
+			flower $tcflags dst_ip 192.0.2.2/${i} src_ip 192.0.2.1 \
+			action drop
+
+		$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 \
+			-B 192.0.2.2 -t ip -q
+
+		tc_check_packets "dev $h2 ingress" $index 1
+		check_err $? "$i filters - did not match highest priority (add)"
+	done
+
+	for i in $(eval echo {$NUM_MASKS..1}); do
+		index=$((BASE_INDEX - i))
+
+		$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 \
+			-B 192.0.2.2 -t ip -q
+
+		tc_check_packets "dev $h2 ingress" $index 2
+		check_err $? "$i filters - did not match highest priority (del)"
+
+		tc filter del dev $h2 ingress protocol ip pref $index \
+			handle $index flower
+	done
+
+	log_test "multiple masks test ($tcflags)"
+}
+
+ctcam_two_atcam_masks_test()
+{
+	RET=0
+
+	# First case: C-TCAM is disabled when there are two A-TCAM masks.
+	# We push a filter into the C-TCAM by using two identical filters
+	# as in identical_filters_test()
+
+	# Filter goes into A-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	# Filter goes into C-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	# Filter goes into A-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.2.0/24 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match A-TCAM filter"
+
+	# Delete both A-TCAM and C-TCAM filters and make sure the remaining
+	# A-TCAM filter still works
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match A-TCAM filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+	log_test "ctcam with two atcam masks test ($tcflags)"
+}
+
+ctcam_one_atcam_mask_test()
+{
+	RET=0
+
+	# Second case: C-TCAM is disabled when there is one A-TCAM mask.
+	# The test is similar to identical_filters_test()
+
+	# Filter goes into A-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	# Filter goes into C-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match C-TCAM filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match A-TCAM filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "ctcam with one atcam mask test ($tcflags)"
+}
+
+ctcam_no_atcam_masks_test()
+{
+	RET=0
+
+	# Third case: C-TCAM is disabled when there are no A-TCAM masks
+	# This test exercises the code path that transitions the eRP table
+	# to its initial state after deleting the last C-TCAM mask
+
+	# Filter goes into A-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	# Filter goes into C-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "ctcam with no atcam masks test ($tcflags)"
+}
+
+ctcam_edge_cases_test()
+{
+	# When the C-TCAM is disabled after deleting the last C-TCAM
+	# mask, we want to make sure the eRP state machine is put in
+	# the correct state
+
+	ctcam_two_atcam_masks_test
+	ctcam_one_atcam_mask_test
+	ctcam_no_atcam_masks_test
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+if ! tc_offload_check; then
+	check_err 1 "Could not test offloaded functionality"
+	log_test "mlxsw-specific tests for tc flower"
+	exit
+else
+	tcflags="skip_sw"
+	tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
new file mode 100644
index 000000000000..73035e25085d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source "../../../../net/forwarding/devlink_lib.sh"
+
+if [ "$DEVLINK_VIDDID" != "15b3:cb84" ]; then
+	echo "SKIP: test is tailored for Mellanox Spectrum"
+	exit 1
+fi
+
+# Needed for returning to default
+declare -A KVD_DEFAULTS
+
+KVD_CHILDREN="linear hash_single hash_double"
+KVDL_CHILDREN="singles chunks large_chunks"
+
+devlink_sp_resource_minimize()
+{
+	local size
+	local i
+
+	for i in $KVD_CHILDREN; do
+		size=$(devlink_resource_get kvd "$i" | jq '.["size_min"]')
+		devlink_resource_size_set "$size" kvd "$i"
+	done
+
+	for i in $KVDL_CHILDREN; do
+		size=$(devlink_resource_get kvd linear "$i" | \
+		       jq '.["size_min"]')
+		devlink_resource_size_set "$size" kvd linear "$i"
+	done
+}
+
+devlink_sp_size_kvd_to_default()
+{
+	local need_reload=0
+	local i
+
+	for i in $KVD_CHILDREN; do
+		local size=$(echo "${KVD_DEFAULTS[kvd_$i]}" | jq '.["size"]')
+		current_size=$(devlink_resource_size_get kvd "$i")
+
+		if [ "$size" -ne "$current_size" ]; then
+			devlink_resource_size_set "$size" kvd "$i"
+			need_reload=1
+		fi
+	done
+
+	for i in $KVDL_CHILDREN; do
+		local size=$(echo "${KVD_DEFAULTS[kvd_linear_$i]}" | \
+			     jq '.["size"]')
+		current_size=$(devlink_resource_size_get kvd linear "$i")
+
+		if [ "$size" -ne "$current_size" ]; then
+			devlink_resource_size_set "$size" kvd linear "$i"
+			need_reload=1
+		fi
+	done
+
+	if [ "$need_reload" -ne "0" ]; then
+		devlink_reload
+	fi
+}
+
+devlink_sp_read_kvd_defaults()
+{
+	local key
+	local i
+
+	KVD_DEFAULTS[kvd]=$(devlink_resource_get "kvd")
+	for i in $KVD_CHILDREN; do
+		key=kvd_$i
+		KVD_DEFAULTS[$key]=$(devlink_resource_get kvd "$i")
+	done
+
+	for i in $KVDL_CHILDREN; do
+		key=kvd_linear_$i
+		KVD_DEFAULTS[$key]=$(devlink_resource_get kvd linear "$i")
+	done
+}
+
+KVD_PROFILES="default scale ipv4_max"
+
+devlink_sp_resource_kvd_profile_set()
+{
+	local profile=$1
+
+	case "$profile" in
+	scale)
+		devlink_resource_size_set 64000 kvd linear
+		devlink_resource_size_set 15616 kvd linear singles
+		devlink_resource_size_set 32000 kvd linear chunks
+		devlink_resource_size_set 16384 kvd linear large_chunks
+		devlink_resource_size_set 128000 kvd hash_single
+		devlink_resource_size_set 48000 kvd hash_double
+		devlink_reload
+		;;
+	ipv4_max)
+		devlink_resource_size_set 64000 kvd linear
+		devlink_resource_size_set 15616 kvd linear singles
+		devlink_resource_size_set 32000 kvd linear chunks
+		devlink_resource_size_set 16384 kvd linear large_chunks
+		devlink_resource_size_set 144000 kvd hash_single
+		devlink_resource_size_set 32768 kvd hash_double
+		devlink_reload
+		;;
+	default)
+		devlink_resource_size_set 98304 kvd linear
+		devlink_resource_size_set 16384 kvd linear singles
+		devlink_resource_size_set 49152 kvd linear chunks
+		devlink_resource_size_set 32768 kvd linear large_chunks
+		devlink_resource_size_set 87040 kvd hash_single
+		devlink_resource_size_set 60416 kvd hash_double
+		devlink_reload
+		;;
+	*)
+		check_err 1 "Unknown profile $profile"
+	esac
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh
new file mode 100755
index 000000000000..b1fe960e398a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=1
+source devlink_lib_spectrum.sh
+
+setup_prepare()
+{
+	devlink_sp_read_kvd_defaults
+}
+
+cleanup()
+{
+	pre_cleanup
+	devlink_sp_size_kvd_to_default
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+profiles_test()
+{
+	local i
+
+	log_info "Running profile tests"
+
+	for i in $KVD_PROFILES; do
+		RET=0
+		devlink_sp_resource_kvd_profile_set $i
+		log_test "'$i' profile"
+	done
+
+	# Default is explicitly tested at end to ensure it's actually applied
+	RET=0
+	devlink_sp_resource_kvd_profile_set "default"
+	log_test "'default' profile"
+}
+
+resources_min_test()
+{
+	local size
+	local i
+	local j
+
+	log_info "Running KVD-minimum tests"
+
+	for i in $KVD_CHILDREN; do
+		RET=0
+		size=$(devlink_resource_get kvd "$i" | jq '.["size_min"]')
+		devlink_resource_size_set "$size" kvd "$i"
+
+		# In case of linear, need to minimize sub-resources as well
+		if [[ "$i" == "linear" ]]; then
+			for j in $KVDL_CHILDREN; do
+				devlink_resource_size_set 0 kvd linear "$j"
+			done
+		fi
+
+		devlink_reload
+		devlink_sp_size_kvd_to_default
+		log_test "'$i' minimize [$size]"
+	done
+}
+
+resources_max_test()
+{
+	local min_size
+	local size
+	local i
+	local j
+
+	log_info "Running KVD-maximum tests"
+	for i in $KVD_CHILDREN; do
+		RET=0
+		devlink_sp_resource_minimize
+
+		# Calculate the maximum possible size for the given partition
+		size=$(devlink_resource_size_get kvd)
+		for j in $KVD_CHILDREN; do
+			if [ "$i" != "$j" ]; then
+				min_size=$(devlink_resource_get kvd "$j" | \
+					   jq '.["size_min"]')
+				size=$((size - min_size))
+			fi
+		done
+
+		# Test almost maximum size
+		devlink_resource_size_set "$((size - 128))" kvd "$i"
+		devlink_reload
+		log_test "'$i' almost maximize [$((size - 128))]"
+
+		# Test above maximum size
+		devlink resource set "$DEVLINK_DEV" \
+			path "kvd/$i" size $((size + 128)) &> /dev/null
+		check_fail $? "Set kvd/$i to size $((size + 128)) should fail"
+		log_test "'$i' Overflow rejection [$((size + 128))]"
+
+		# Test maximum size
+		if [ "$i" == "hash_single" ] || [ "$i" == "hash_double" ]; then
+			echo "SKIP: Observed problem with exact max $i"
+			continue
+		fi
+
+		devlink_resource_size_set "$size" kvd "$i"
+		devlink_reload
+		log_test "'$i' maximize [$size]"
+
+		devlink_sp_size_kvd_to_default
+	done
+}
+
+profiles_test
+resources_min_test
+resources_max_test
+
+exit "$RET"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh
new file mode 100644
index 000000000000..8d2186c7c62b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../mirror_gre_scale.sh
+
+mirror_gre_get_target()
+{
+	local should_fail=$1; shift
+
+	if ((! should_fail)); then
+		echo 3
+	else
+		echo 4
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
new file mode 100755
index 000000000000..a0a80e1a69e8
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=6
+source ../../../../net/forwarding/lib.sh
+source ../../../../net/forwarding/tc_common.sh
+source devlink_lib_spectrum.sh
+
+current_test=""
+
+cleanup()
+{
+	pre_cleanup
+	if [ ! -z $current_test ]; then
+		${current_test}_cleanup
+	fi
+	devlink_sp_size_kvd_to_default
+}
+
+devlink_sp_read_kvd_defaults
+trap cleanup EXIT
+
+ALL_TESTS="router tc_flower mirror_gre"
+for current_test in ${TESTS:-$ALL_TESTS}; do
+	source ${current_test}_scale.sh
+
+	num_netifs_var=${current_test^^}_NUM_NETIFS
+	num_netifs=${!num_netifs_var:-$NUM_NETIFS}
+
+	for profile in $KVD_PROFILES; do
+		RET=0
+		devlink_sp_resource_kvd_profile_set $profile
+		if [[ $RET -gt 0 ]]; then
+			log_test "'$current_test' [$profile] setting"
+			continue
+		fi
+
+		for should_fail in 0 1; do
+			RET=0
+			target=$(${current_test}_get_target "$should_fail")
+			${current_test}_setup_prepare
+			setup_wait $num_netifs
+			${current_test}_test "$target" "$should_fail"
+			${current_test}_cleanup
+			if [[ "$should_fail" -eq 0 ]]; then
+				log_test "'$current_test' [$profile] $target"
+			else
+				log_test "'$current_test' [$profile] overflow $target"
+			fi
+		done
+	done
+done
+current_test=""
+
+exit "$RET"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh
new file mode 100644
index 000000000000..21c4697d5bab
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../router_scale.sh
+
+router_get_target()
+{
+	local should_fail=$1
+	local target
+
+	target=$(devlink_resource_size_get kvd hash_single)
+
+	if [[ $should_fail -eq 0 ]]; then
+		target=$((target * 85 / 100))
+	else
+		target=$((target + 1))
+	fi
+
+	echo $target
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh
new file mode 100644
index 000000000000..f9bfd8937765
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_flower_scale.sh
+
+tc_flower_get_target()
+{
+	local should_fail=$1; shift
+
+	# 6144 (6x1024) is the theoretical maximum.
+	# One bank of 512 rules is taken by the 18-byte MC router rule.
+	# One rule is the ACL catch-all.
+	# 6144 - 512 - 1 = 5631
+	local target=5631
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
new file mode 100644
index 000000000000..a6d733d2a4b4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
@@ -0,0 +1,134 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for resource limit of offloaded flower rules. The test adds a given
+# number of flower matches for different IPv6 addresses, then generates traffic,
+# and ensures each was hit exactly once. This file contains functions to set up
+# a testing topology and run the test, and is meant to be sourced from a test
+# script that calls the testing routine with a given number of rules.
+
+TC_FLOWER_NUM_NETIFS=2
+
+tc_flower_h1_create()
+{
+	simple_if_init $h1
+	tc qdisc add dev $h1 clsact
+}
+
+tc_flower_h1_destroy()
+{
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1
+}
+
+tc_flower_h2_create()
+{
+	simple_if_init $h2
+	tc qdisc add dev $h2 clsact
+}
+
+tc_flower_h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2
+}
+
+tc_flower_setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+
+	vrf_prepare
+
+	tc_flower_h1_create
+	tc_flower_h2_create
+}
+
+tc_flower_cleanup()
+{
+	pre_cleanup
+
+	tc_flower_h2_destroy
+	tc_flower_h1_destroy
+
+	vrf_cleanup
+
+	if [[ -v TC_FLOWER_BATCH_FILE ]]; then
+		rm -f $TC_FLOWER_BATCH_FILE
+	fi
+}
+
+tc_flower_addr()
+{
+	local num=$1; shift
+
+	printf "2001:db8:1::%x" $num
+}
+
+tc_flower_rules_create()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	TC_FLOWER_BATCH_FILE="$(mktemp)"
+
+	for ((i = 0; i < count; ++i)); do
+		cat >> $TC_FLOWER_BATCH_FILE <<-EOF
+			filter add dev $h2 ingress \
+				prot ipv6 \
+				pref 1000 \
+				flower $tcflags dst_ip $(tc_flower_addr $i) \
+				action drop
+		EOF
+	done
+
+	tc -b $TC_FLOWER_BATCH_FILE
+	check_err_fail $should_fail $? "Rule insertion"
+}
+
+__tc_flower_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+	local last=$((count - 1))
+
+	tc_flower_rules_create $count $should_fail
+
+	for ((i = 0; i < count; ++i)); do
+		$MZ $h1 -q -c 1 -t ip -p 20 -b bc -6 \
+			-A 2001:db8:2::1 \
+			-B $(tc_flower_addr $i)
+	done
+
+	MISMATCHES=$(
+		tc -j -s filter show dev $h2 ingress |
+		jq -r '[ .[] | select(.kind == "flower") | .options |
+		         values as $rule | .actions[].stats.packets |
+		         select(. != 1) | "\(.) on \($rule.keys.dst_ip)" ] |
+		       join(", ")'
+	)
+
+	test -z "$MISMATCHES"
+	check_err $? "Expected to capture 1 packet for each IP, but got $MISMATCHES"
+}
+
+tc_flower_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	# We use lower 16 bits of IPv6 address for match. Also there are only 16
+	# bits of rule priority space.
+	if ((count > 65536)); then
+		check_err 1 "Invalid count of $count. At most 65536 rules supported"
+		return
+	fi
+
+	if ! tc_offload_check $TC_FLOWER_NUM_NETIFS; then
+		check_err 1 "Could not test offloaded functionality"
+		return
+	fi
+
+	tcflags="skip_sw"
+	__tc_flower_test $count $should_fail
+}
diff --git a/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
new file mode 100755
index 000000000000..128f0ab24307
--- /dev/null
+++ b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
@@ -0,0 +1,200 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+usage() { echo "usbip_test.sh -b <busid> -p <usbip tools path>"; exit 1; }
+
+while getopts "h:b:p:" arg; do
+    case "${arg}" in
+	h)
+	    usage
+	    ;;
+	b)
+	    busid=${OPTARG}
+	    ;;
+	p)
+	    tools_path=${OPTARG}
+	    ;;
+	*)
+	    usage
+	    ;;
+    esac
+done
+shift $((OPTIND-1))
+
+if [ -z "${busid}" ]; then
+	usage
+fi
+
+echo "Running USB over IP Testing on $busid";
+
+test_end_msg="End of USB over IP Testing on $busid"
+
+if [ $UID != 0 ]; then
+	echo "Please run usbip_test as root [SKIP]"
+	echo $test_end_msg
+	exit $ksft_skip
+fi
+
+echo "Load usbip_host module"
+if ! /sbin/modprobe -q -n usbip_host; then
+	echo "usbip_test: module usbip_host is not found [SKIP]"
+	echo $test_end_msg
+	exit $ksft_skip
+fi
+
+if /sbin/modprobe -q usbip_host; then
+	echo "usbip_test: module usbip_host is loaded [OK]"
+else
+	echo "usbip_test: module usbip_host failed to load [FAIL]"
+	echo $test_end_msg
+	exit 1
+fi
+
+echo "Load vhci_hcd module"
+if /sbin/modprobe -q vhci_hcd; then
+	echo "usbip_test: module vhci_hcd is loaded [OK]"
+else
+	echo "usbip_test: module vhci_hcd failed to load [FAIL]"
+	echo $test_end_msg
+	exit 1
+fi
+echo "=============================================================="
+
+cd $tools_path;
+
+if [ ! -f src/usbip ]; then
+	echo "Please build usbip tools"
+	echo $test_end_msg
+	exit $ksft_skip
+fi
+
+echo "Expect to see export-able devices";
+src/usbip list -l;
+echo "=============================================================="
+
+echo "Run lsusb to see all usb devices"
+lsusb -t;
+echo "=============================================================="
+
+src/usbipd -D;
+
+echo "Get exported devices from localhost - expect to see none";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "bind devices";
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "Run lsusb - bound devices should be under usbip_host control"
+lsusb -t;
+echo "=============================================================="
+
+echo "bind devices - expect already bound messages"
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "Get exported devices from localhost - expect to see exported devices";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "unbind devices";
+src/usbip unbind -b $busid;
+echo "=============================================================="
+
+echo "Run lsusb - bound devices should be rebound to original drivers"
+lsusb -t;
+echo "=============================================================="
+
+echo "unbind devices - expect no devices bound message";
+src/usbip unbind -b $busid;
+echo "=============================================================="
+
+echo "Get exported devices from localhost - expect to see none";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "List imported devices - expect to see none";
+src/usbip port;
+echo "=============================================================="
+
+echo "Import devices from localhost - should fail with no devices"
+src/usbip attach -r localhost -b $busid;
+echo "=============================================================="
+
+echo "bind devices";
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "List imported devices - expect to see exported devices";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "List imported devices - expect to see none";
+src/usbip port;
+echo "=============================================================="
+
+echo "Import devices from localhost - should work"
+src/usbip attach -r localhost -b $busid;
+echo "=============================================================="
+
+# Wait for sysfs file to be updated. Without this sleep, usbip port
+# shows no imported devices.
+sleep 3;
+
+echo "List imported devices - expect to see imported devices";
+src/usbip port;
+echo "=============================================================="
+
+echo "Import devices from localhost - expect already imported messages"
+src/usbip attach -r localhost -b $busid;
+echo "=============================================================="
+
+echo "Un-import devices";
+src/usbip detach -p 00;
+src/usbip detach -p 01;
+echo "=============================================================="
+
+echo "List imported devices - expect to see none";
+src/usbip port;
+echo "=============================================================="
+
+echo "Un-import devices - expect no devices to detach messages";
+src/usbip detach -p 00;
+src/usbip detach -p 01;
+echo "=============================================================="
+
+echo "Detach invalid port tests - expect invalid port error message";
+src/usbip detach -p 100;
+echo "=============================================================="
+
+echo "Expect to see export-able devices";
+src/usbip list -l;
+echo "=============================================================="
+
+echo "Remove usbip_host module";
+rmmod usbip_host;
+
+echo "Run lsusb - bound devices should be rebound to original drivers"
+lsusb -t;
+echo "=============================================================="
+
+echo "Run bind without usbip_host - expect fail"
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "Run lsusb - devices that failed to bind aren't bound to any driver"
+lsusb -t;
+echo "=============================================================="
+
+echo "modprobe usbip_host - does it work?"
+/sbin/modprobe usbip_host
+echo "Should see -busid- is not in match_busid table... skip! dmesg"
+echo "=============================================================="
+dmesg | grep "is not in match_busid table"
+echo "=============================================================="
+
+echo $test_end_msg
diff --git a/tools/testing/selftests/efivarfs/config b/tools/testing/selftests/efivarfs/config
new file mode 100644
index 000000000000..4e151f1005b2
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/config
@@ -0,0 +1 @@
+CONFIG_EFIVAR_FS=y
diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh
index c6d5790575ae..a47029a799d2 100755
--- a/tools/testing/selftests/efivarfs/efivarfs.sh
+++ b/tools/testing/selftests/efivarfs/efivarfs.sh
@@ -4,18 +4,21 @@
 efivarfs_mount=/sys/firmware/efi/efivars
 test_guid=210be57c-9849-4fc7-a635-e6382d1aec27
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 check_prereqs()
 {
 	local msg="skip all tests:"
 
 	if [ $UID != 0 ]; then
 		echo $msg must be run as root >&2
-		exit 0
+		exit $ksft_skip
 	fi
 
 	if ! grep -q "^\S\+ $efivarfs_mount efivarfs" /proc/mounts; then
 		echo $msg efivarfs is not mounted on $efivarfs_mount >&2
-		exit 0
+		exit $ksft_skip
 	fi
 }
 
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
index 67cd4597db2b..47cbf54d0801 100644
--- a/tools/testing/selftests/exec/execveat.c
+++ b/tools/testing/selftests/exec/execveat.c
@@ -20,6 +20,8 @@
 #include <string.h>
 #include <unistd.h>
 
+#include "../kselftest.h"
+
 static char longpath[2 * PATH_MAX] = "";
 static char *envp[] = { "IN_TEST=yes", NULL, NULL };
 static char *argv[] = { "execveat", "99", NULL };
@@ -249,8 +251,8 @@ static int run_tests(void)
 	errno = 0;
 	execveat_(-1, NULL, NULL, NULL, 0);
 	if (errno == ENOSYS) {
-		printf("[FAIL] ENOSYS calling execveat - no kernel support?\n");
-		return 1;
+		ksft_exit_skip(
+			"ENOSYS calling execveat - no kernel support?\n");
 	}
 
 	/* Change file position to confirm it doesn't affect anything */
diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile
index 5c7d7001ad37..129880fb42d3 100644
--- a/tools/testing/selftests/filesystems/Makefile
+++ b/tools/testing/selftests/filesystems/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
+CFLAGS += -I../../../../usr/include/
 TEST_GEN_PROGS := devpts_pts
 TEST_GEN_PROGS_EXTENDED := dnotify_test
 
diff --git a/tools/testing/selftests/filesystems/devpts_pts.c b/tools/testing/selftests/filesystems/devpts_pts.c
index b9055e974289..b1fc9b916ace 100644
--- a/tools/testing/selftests/filesystems/devpts_pts.c
+++ b/tools/testing/selftests/filesystems/devpts_pts.c
@@ -8,9 +8,10 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
-#include <sys/ioctl.h>
+#include <asm/ioctls.h>
 #include <sys/mount.h>
 #include <sys/wait.h>
+#include "../kselftest.h"
 
 static bool terminal_dup2(int duplicate, int original)
 {
@@ -125,10 +126,12 @@ static int do_tiocgptpeer(char *ptmx, char *expected_procfd_contents)
 		if (errno == EINVAL) {
 			fprintf(stderr, "TIOCGPTPEER is not supported. "
 					"Skipping test.\n");
-			fret = EXIT_SUCCESS;
+			fret = KSFT_SKIP;
+		} else {
+			fprintf(stderr,
+				"Failed to perform TIOCGPTPEER ioctl\n");
+			fret = EXIT_FAILURE;
 		}
-
-		fprintf(stderr, "Failed to perform TIOCGPTPEER ioctl\n");
 		goto do_cleanup;
 	}
 
@@ -279,9 +282,9 @@ int main(int argc, char *argv[])
 	int ret;
 
 	if (!isatty(STDIN_FILENO)) {
-		fprintf(stderr, "Standard input file desciptor is not attached "
+		fprintf(stderr, "Standard input file descriptor is not attached "
 				"to a terminal. Skipping test\n");
-		exit(EXIT_FAILURE);
+		exit(KSFT_SKIP);
 	}
 
 	ret = unshare(CLONE_NEWNS);
diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh
index 8e2e34a2ca69..70d18be46af5 100755
--- a/tools/testing/selftests/firmware/fw_fallback.sh
+++ b/tools/testing/selftests/firmware/fw_fallback.sh
@@ -74,7 +74,7 @@ load_fw_custom()
 {
 	if [ ! -e "$DIR"/trigger_custom_fallback ]; then
 		echo "$0: custom fallback trigger not present, ignoring test" >&2
-		return 1
+		exit $ksft_skip
 	fi
 
 	local name="$1"
@@ -107,7 +107,7 @@ load_fw_custom_cancel()
 {
 	if [ ! -e "$DIR"/trigger_custom_fallback ]; then
 		echo "$0: canceling custom fallback trigger not present, ignoring test" >&2
-		return 1
+		exit $ksft_skip
 	fi
 
 	local name="$1"
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index 6452d2129cd9..a4320c4b44dc 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -30,6 +30,7 @@ fi
 
 if [ ! -e "$DIR"/trigger_async_request ]; then
 	echo "$0: empty filename: async trigger not present, ignoring test" >&2
+	exit $ksft_skip
 else
 	if printf '\000' >"$DIR"/trigger_async_request 2> /dev/null; then
 		echo "$0: empty filename should not succeed (async)" >&2
@@ -69,6 +70,7 @@ fi
 # Try the asynchronous version too
 if [ ! -e "$DIR"/trigger_async_request ]; then
 	echo "$0: firmware loading: async trigger not present, ignoring test" >&2
+	exit $ksft_skip
 else
 	if ! echo -n "$NAME" >"$DIR"/trigger_async_request ; then
 		echo "$0: could not trigger async request" >&2
@@ -89,7 +91,7 @@ test_config_present()
 {
 	if [ ! -f $DIR/reset ]; then
 		echo "Configuration triggers not present, ignoring test"
-		exit 0
+		exit $ksft_skip
 	fi
 }
 
diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh
index 962d7f4ac627..6c5f1b2ffb74 100755
--- a/tools/testing/selftests/firmware/fw_lib.sh
+++ b/tools/testing/selftests/firmware/fw_lib.sh
@@ -9,11 +9,14 @@ DIR=/sys/devices/virtual/misc/test_firmware
 PROC_CONFIG="/proc/config.gz"
 TEST_DIR=$(dirname $0)
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 print_reqs_exit()
 {
 	echo "You must have the following enabled in your kernel:" >&2
 	cat $TEST_DIR/config >&2
-	exit 1
+	exit $ksft_skip
 }
 
 test_modprobe()
@@ -88,7 +91,7 @@ verify_reqs()
 	if [ "$TEST_REQS_FW_SYSFS_FALLBACK" = "yes" ]; then
 		if [ ! "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
 			echo "usermode helper disabled so ignoring test"
-			exit 0
+			exit $ksft_skip
 		fi
 	fi
 }
diff --git a/tools/testing/selftests/ftrace/config b/tools/testing/selftests/ftrace/config
index b01924c71c09..c2c8de4fafff 100644
--- a/tools/testing/selftests/ftrace/config
+++ b/tools/testing/selftests/ftrace/config
@@ -4,3 +4,12 @@ CONFIG_FUNCTION_PROFILER=y
 CONFIG_TRACER_SNAPSHOT=y
 CONFIG_STACK_TRACER=y
 CONFIG_HIST_TRIGGERS=y
+CONFIG_SCHED_TRACER=y
+CONFIG_PREEMPT_TRACER=y
+CONFIG_IRQSOFF_TRACER=y
+CONFIG_PREEMPTIRQ_DELAY_TEST=m
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_SAMPLES=y
+CONFIG_SAMPLE_TRACE_PRINTK=m
+CONFIG_KALLSYMS_ALL=y
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
index f9a9d424c980..75244db70331 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -60,15 +60,29 @@ parse_opts() { # opts
       shift 1
     ;;
     --verbose|-v|-vv|-vvv)
+      if [ $VERBOSE -eq -1 ]; then
+	usage "--console can not use with --verbose"
+      fi
       VERBOSE=$((VERBOSE + 1))
       [ $1 = '-vv' ] && VERBOSE=$((VERBOSE + 1))
       [ $1 = '-vvv' ] && VERBOSE=$((VERBOSE + 2))
       shift 1
     ;;
+    --console)
+      if [ $VERBOSE -ne 0 ]; then
+	usage "--console can not use with --verbose"
+      fi
+      VERBOSE=-1
+      shift 1
+    ;;
     --debug|-d)
       DEBUG=1
       shift 1
     ;;
+    --stop-fail)
+      STOP_FAILURE=1
+      shift 1
+    ;;
     --fail-unsupported)
       UNSUPPORTED_RESULT=1
       shift 1
@@ -117,6 +131,7 @@ KEEP_LOG=0
 DEBUG=0
 VERBOSE=0
 UNSUPPORTED_RESULT=0
+STOP_FAILURE=0
 # Parse command-line options
 parse_opts $*
 
@@ -137,11 +152,33 @@ else
   date > $LOG_FILE
 fi
 
+# Define text colors
+# Check available colors on the terminal, if any
+ncolors=`tput colors 2>/dev/null`
+color_reset=
+color_red=
+color_green=
+color_blue=
+# If stdout exists and number of colors is eight or more, use them
+if [ -t 1 -a "$ncolors" -a "$ncolors" -ge 8 ]; then
+  color_reset="\e[0m"
+  color_red="\e[31m"
+  color_green="\e[32m"
+  color_blue="\e[34m"
+fi
+
+strip_esc() {
+  # busybox sed implementation doesn't accept "\x1B", so use [:cntrl:] instead.
+  sed -E "s/[[:cntrl:]]\[([0-9]{1,2}(;[0-9]{1,2})?)?[m|K]//g"
+}
+
 prlog() { # messages
-  [ -z "$LOG_FILE" ] && echo "$@" || echo "$@" | tee -a $LOG_FILE
+  echo -e "$@"
+  [ "$LOG_FILE" ] && echo -e "$@" | strip_esc >> $LOG_FILE
 }
 catlog() { #file
-  [ -z "$LOG_FILE" ] && cat $1 || cat $1 | tee -a $LOG_FILE
+  cat $1
+  [ "$LOG_FILE" ] && cat $1 | strip_esc >> $LOG_FILE
 }
 prlog "=== Ftrace unit tests ==="
 
@@ -180,37 +217,37 @@ test_on_instance() { # testfile
 eval_result() { # sigval
   case $1 in
     $PASS)
-      prlog "	[PASS]"
+      prlog "	[${color_green}PASS${color_reset}]"
       PASSED_CASES="$PASSED_CASES $CASENO"
       return 0
     ;;
     $FAIL)
-      prlog "	[FAIL]"
+      prlog "	[${color_red}FAIL${color_reset}]"
       FAILED_CASES="$FAILED_CASES $CASENO"
       return 1 # this is a bug.
     ;;
     $UNRESOLVED)
-      prlog "	[UNRESOLVED]"
+      prlog "	[${color_blue}UNRESOLVED${color_reset}]"
       UNRESOLVED_CASES="$UNRESOLVED_CASES $CASENO"
       return 1 # this is a kind of bug.. something happened.
     ;;
     $UNTESTED)
-      prlog "	[UNTESTED]"
+      prlog "	[${color_blue}UNTESTED${color_reset}]"
       UNTESTED_CASES="$UNTESTED_CASES $CASENO"
       return 0
     ;;
     $UNSUPPORTED)
-      prlog "	[UNSUPPORTED]"
+      prlog "	[${color_blue}UNSUPPORTED${color_reset}]"
       UNSUPPORTED_CASES="$UNSUPPORTED_CASES $CASENO"
       return $UNSUPPORTED_RESULT # depends on use case
     ;;
     $XFAIL)
-      prlog "	[XFAIL]"
+      prlog "	[${color_red}XFAIL${color_reset}]"
       XFAILED_CASES="$XFAILED_CASES $CASENO"
       return 0
     ;;
     *)
-      prlog "	[UNDEFINED]"
+      prlog "	[${color_blue}UNDEFINED${color_reset}]"
       UNDEFINED_CASES="$UNDEFINED_CASES $CASENO"
       return 1 # this must be a test bug
     ;;
@@ -269,16 +306,18 @@ __run_test() { # testfile
 # Run one test case
 run_test() { # testfile
   local testname=`basename $1`
+  testcase $1
   if [ ! -z "$LOG_FILE" ] ; then
-    local testlog=`mktemp $LOG_DIR/${testname}-log.XXXXXX`
+    local testlog=`mktemp $LOG_DIR/${CASENO}-${testname}-log.XXXXXX`
   else
     local testlog=/proc/self/fd/1
   fi
   export TMPDIR=`mktemp -d /tmp/ftracetest-dir.XXXXXX`
-  testcase $1
   echo "execute$INSTANCE: "$1 > $testlog
   SIG_RESULT=0
-  if [ -z "$LOG_FILE" ]; then
+  if [ $VERBOSE -eq -1 ]; then
+    __run_test $1
+  elif [ -z "$LOG_FILE" ]; then
     __run_test $1 2>&1
   elif [ $VERBOSE -ge 3 ]; then
     __run_test $1 | tee -a $testlog 2>&1
@@ -304,6 +343,10 @@ run_test() { # testfile
 # Main loop
 for t in $TEST_CASES; do
   run_test $t
+  if [ $STOP_FAILURE -ne 0 -a $TOTAL_RESULT -ne 0 ]; then
+    echo "A failure detected. Stop test."
+    exit 1
+  fi
 done
 
 # Test on instance loop
@@ -315,7 +358,12 @@ for t in $TEST_CASES; do
   run_test $t
   rmdir $TRACING_DIR
   TRACING_DIR=$SAVED_TRACING_DIR
+  if [ $STOP_FAILURE -ne 0 -a $TOTAL_RESULT -ne 0 ]; then
+    echo "A failure detected. Stop test."
+    exit 1
+  fi
 done
+(cd $TRACING_DIR; initialize_ftrace) # for cleanup
 
 prlog ""
 prlog "# of passed: " `echo $PASSED_CASES | wc -w`
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_size.tc b/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_size.tc
new file mode 100644
index 000000000000..ab70f0077c35
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_size.tc
@@ -0,0 +1,22 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Change the ringbuffer size
+# flags: instance
+
+rb_size_test() {
+ORIG=`cat buffer_size_kb`
+
+expr $ORIG / 2 > buffer_size_kb
+
+expr $ORIG \* 2 > buffer_size_kb
+
+echo $ORIG > buffer_size_kb
+}
+
+rb_size_test
+
+: "If per-cpu buffer is supported, imbalance it"
+if [ -d per_cpu/cpu0 ]; then
+  cd per_cpu/cpu0
+  rb_size_test
+fi
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc b/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc
new file mode 100644
index 000000000000..3b1f45e13a2e
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc
@@ -0,0 +1,28 @@
+#!/bin/sh
+# description: Snapshot and tracing setting
+# flags: instance
+
+[ ! -f snapshot ] && exit_unsupported
+
+echo "Set tracing off"
+echo 0 > tracing_on
+
+echo "Allocate and take a snapshot"
+echo 1 > snapshot
+
+# Since trace buffer is empty, snapshot is also empty, but allocated
+grep -q "Snapshot is allocated" snapshot
+
+echo "Ensure keep tracing off"
+test `cat tracing_on` -eq 0
+
+echo "Set tracing on"
+echo 1 > tracing_on
+
+echo "Take a snapshot again"
+echo 1 > snapshot
+
+echo "Ensure keep tracing on"
+test `cat tracing_on` -eq 1
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/trace_pipe.tc b/tools/testing/selftests/ftrace/test.d/00basic/trace_pipe.tc
new file mode 100644
index 000000000000..5058fbcfd90f
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/trace_pipe.tc
@@ -0,0 +1,16 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_pipe and trace_marker
+# flags: instance
+
+[ ! -f trace_marker ] && exit_unsupported
+
+echo "test input 1" > trace_marker
+
+: "trace interface never consume the ring buffer"
+grep -q "test input 1" trace
+grep -q "test input 1" trace
+
+: "trace interface never consume the ring buffer"
+head -n 1 trace_pipe | grep -q "test input 1"
+! grep -q "test input 1" trace
diff --git a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc
index 9daf034186f5..dfb0d5122f7b 100644
--- a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc
@@ -9,23 +9,15 @@ do_reset() {
 }
 
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
 
-yield() {
-    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
-}
-
 if [ ! -f set_event -o ! -d events/sched ]; then
     echo "event tracing is not supported"
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 echo 'sched:sched_switch' > set_event
 
 yield
@@ -57,6 +49,4 @@ if [ $count -ne 0 ]; then
     fail "sched_switch events should not be recorded"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/event/event-pid.tc b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc
index 132478b305c2..f9cb214220b1 100644
--- a/tools/testing/selftests/ftrace/test.d/event/event-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc
@@ -16,10 +16,6 @@ fail() { #msg
     exit_fail
 }
 
-yield() {
-    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
-}
-
 if [ ! -f set_event -o ! -d events/sched ]; then
     echo "event tracing is not supported"
     exit_unsupported
@@ -30,8 +26,7 @@ if [ ! -f set_event_pid ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
+echo 0 > options/event-fork
 
 echo 1 > events/sched/sched_switch/enable
 
@@ -47,6 +42,7 @@ do_reset
 read mypid rest < /proc/self/stat
 
 echo $mypid > set_event_pid
+grep -q $mypid set_event_pid
 echo 'sched:sched_switch' > set_event
 
 yield
diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
index 6a37a8642ee6..83a8c571e93a 100644
--- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
@@ -9,23 +9,15 @@ do_reset() {
 }
 
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
 
-yield() {
-    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
-}
-
 if [ ! -f set_event -o ! -d events/sched ]; then
     echo "event tracing is not supported"
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 echo 'sched:*' > set_event
 
 yield
@@ -57,6 +49,4 @@ if [ $count -ne 0 ]; then
     fail "any of scheduler events should not be recorded"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
index 4e9b6e2c0219..84d7bda08d2a 100644
--- a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
@@ -8,23 +8,15 @@ do_reset() {
 }
 
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
 
-yield() {
-    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
-}
-
 if [ ! -f available_events -o ! -f set_event -o ! -d events ]; then
     echo "event tracing is not supported"
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 echo '*:*' > set_event
 
 yield
@@ -60,6 +52,4 @@ if [ $count -ne 0 ]; then
     fail "any of events should not be recorded"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/event/trace_printk.tc b/tools/testing/selftests/ftrace/test.d/event/trace_printk.tc
new file mode 100644
index 000000000000..b02550b42be9
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/event/trace_printk.tc
@@ -0,0 +1,27 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Test trace_printk from module
+
+rmmod trace-printk ||:
+if ! modprobe trace-printk ; then
+  echo "No trace-printk sample module - please make CONFIG_SAMPLE_TRACE_PRINTK=m"
+  exit_unresolved;
+fi
+
+echo "Waiting for irq work"
+sleep 1
+
+grep -q ": This .* trace_bputs" trace
+grep -q ": This .* trace_puts" trace
+grep -q ": This .* trace_bprintk" trace
+grep -q ": This .* trace_printk" trace
+
+grep -q ": (irq) .* trace_bputs" trace
+grep -q ": (irq) .* trace_puts" trace
+grep -q ": (irq) .* trace_bprintk" trace
+grep -q ": (irq) .* trace_printk" trace
+
+grep -q "This is a %s that will use trace_bprintk" printk_formats
+grep -q "(irq) This is a static string that will use trace_bputs" printk_formats
+
+rmmod trace-printk ||:
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
index 1aec99d108eb..aefab0c66d54 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
@@ -16,13 +16,9 @@ if [ ! -f set_ftrace_filter ]; then
 fi
 
 do_reset() {
-    reset_tracer
     if [ -e /proc/sys/kernel/stack_tracer_enabled ]; then
 	    echo 0 > /proc/sys/kernel/stack_tracer_enabled
     fi
-    enable_tracing
-    clear_trace
-    echo > set_ftrace_filter
 }
 
 fail() { # msg
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
index 9f8d27ca39cf..c8a5209f2119 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
@@ -9,14 +9,7 @@ if ! grep -q function_graph available_tracers; then
     exit_unsupported
 fi
 
-do_reset() {
-    reset_tracer
-    enable_tracing
-    clear_trace
-}
-
 fail() { # msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -48,6 +41,4 @@ if [ $count -eq 0 ]; then
     fail "No schedule traces found?"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
index 524ce24b3c22..64cfcc75e3c1 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
@@ -35,12 +35,6 @@ if [ $do_function_fork -eq 1 ]; then
 fi
 
 do_reset() {
-    reset_tracer
-    clear_trace
-    enable_tracing
-    echo > set_ftrace_filter
-    echo > set_ftrace_pid
-
     if [ $do_function_fork -eq 0 ]; then
 	return
     fi
@@ -54,10 +48,6 @@ fail() { # msg
     exit_fail
 }
 
-yield() {
-    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
-}
-
 do_test() {
     disable_tracing
 
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
new file mode 100644
index 000000000000..bf72e783d014
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
@@ -0,0 +1,12 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL2.0
+# description: ftrace - stacktrace filter command
+# flags: instance
+
+echo _do_fork:stacktrace >> set_ftrace_filter
+
+grep -q "_do_fork:stacktrace:unlimited" set_ftrace_filter
+
+(echo "forked"; sleep 1)
+
+grep -q "<stack trace>" trace
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc
new file mode 100644
index 000000000000..0e6810743576
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc
@@ -0,0 +1,42 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL2.0
+# description: ftrace - function trace with cpumask
+
+if ! which nproc ; then
+  nproc() {
+    ls -d /sys/devices/system/cpu/cpu[0-9]* | wc -l
+  }
+fi
+
+NP=`nproc`
+
+if [ $NP -eq 1 ] ;then
+  echo "We can not test cpumask on UP environment"
+  exit_unresolved
+fi
+
+ORIG_CPUMASK=`cat tracing_cpumask`
+
+do_reset() {
+  echo $ORIG_CPUMASK > tracing_cpumask
+}
+
+echo 0 > tracing_on
+echo > trace
+: "Bitmask only record on CPU1"
+echo 2 > tracing_cpumask
+MASK=0x`cat tracing_cpumask`
+test `printf "%d" $MASK` -eq 2 || do_reset
+
+echo function > current_tracer
+echo 1 > tracing_on
+(echo "forked")
+echo 0 > tracing_on
+
+: "Check CPU1 events are recorded"
+grep -q -e "\[001\]" trace || do_reset
+
+: "There should be No other cpu events"
+! grep -qv -e "\[001\]" -e "^#" trace || do_reset
+
+do_reset
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
index 6fed4cf2db81..ca2ffd7957f9 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
@@ -25,15 +25,12 @@ do_reset() {
 }
 
 fail() { # mesg
-    do_reset
     echo $1
     exit_fail
 }
 
 SLEEP_TIME=".1"
 
-do_reset
-
 echo "Testing function probes with events:"
 
 EVENT="sched:sched_switch"
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc
new file mode 100644
index 000000000000..9330c873f9fe
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc
@@ -0,0 +1,24 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function trace on module
+
+[ ! -f set_ftrace_filter ] && exit_unsupported
+
+: "mod: allows to filter a non exist function"
+echo 'non_exist_func:mod:non_exist_module' > set_ftrace_filter
+grep -q "non_exist_func" set_ftrace_filter
+
+: "mod: on exist module"
+echo '*:mod:trace_printk' > set_ftrace_filter
+if ! modprobe trace-printk ; then
+  echo "No trace-printk sample module - please make CONFIG_SAMPLE_TRACE_PRINTK=
+m"
+  exit_unresolved;
+fi
+
+: "Wildcard should be resolved after loading module"
+grep -q "trace_printk_irq_work" set_ftrace_filter
+
+: "After removing the filter becomes empty"
+rmmod trace_printk
+test `cat set_ftrace_filter | wc -l` -eq 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profile_stat.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profile_stat.tc
new file mode 100644
index 000000000000..0d501058aa75
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profile_stat.tc
@@ -0,0 +1,22 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function profiling
+
+[ ! -f function_profile_enabled ] && exit_unsupported
+
+: "Enable function profile"
+echo 1 > function_profile_enabled
+
+: "Profile must be updated"
+cp trace_stat/function0 $TMPDIR/
+( echo "forked"; sleep 1 )
+: "diff returns 0 if there is no difference"
+! diff trace_stat/function0 $TMPDIR/function0
+
+echo 0 > function_profile_enabled
+
+: "Profile must NOT be updated"
+cp trace_stat/function0 $TMPDIR/
+( echo "forked"; sleep 1 )
+: "diff returns 0 if there is no difference"
+diff trace_stat/function0 $TMPDIR/function0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
index b2d5a8febfe8..dfbae637c60c 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
@@ -29,8 +29,6 @@ if [ ! -f function_profile_enabled ]; then
 fi
 
 fail() { # mesg
-    reset_tracer
-    echo > set_ftrace_filter
     echo $1
     exit_fail
 }
@@ -76,6 +74,4 @@ if ! grep -v -e '^#' -e 'schedule' trace > /dev/null; then
 	fail "no other functions besides schedule was found"
 fi
 
-reset_tracer
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
index 68e7a48f5828..51f6e6146bd9 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
@@ -15,22 +15,11 @@ if [ ! -f set_ftrace_filter ]; then
     exit_unsupported
 fi
 
-do_reset() {
-    reset_tracer
-    reset_ftrace_filter
-    disable_events
-    clear_trace
-    enable_tracing
-}
-
 fail() { # mesg
-    do_reset
     echo $1
     exit_fail
 }
 
-do_reset
-
 FILTER=set_ftrace_filter
 FUNC1="schedule"
 FUNC2="do_softirq"
@@ -165,6 +154,4 @@ test_actual
 rm $TMPDIR/expected
 rm $TMPDIR/actual
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc
new file mode 100644
index 000000000000..b414f0e3c646
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc
@@ -0,0 +1,39 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - Max stack tracer
+# Test the basic function of max-stack usage tracing
+
+if [ ! -f stack_trace ]; then
+  echo "Max stack tracer is not supported - please make CONFIG_STACK_TRACER=y"
+  exit_unsupported
+fi
+
+echo > stack_trace_filter
+echo 0 > stack_max_size
+echo 1 > /proc/sys/kernel/stack_tracer_enabled
+
+: "Fork and wait for the first entry become !lock"
+timeout=10
+while [ $timeout -ne 0 ]; do
+  ( echo "forked" )
+  FL=`grep " 0)" stack_trace`
+  echo $FL | grep -q "lock" || break;
+  timeout=$((timeout - 1))
+done
+echo 0 > /proc/sys/kernel/stack_tracer_enabled
+
+echo '*lock*' > stack_trace_filter
+test `cat stack_trace_filter | wc -l` -eq `grep lock stack_trace_filter | wc -l`
+
+echo 0 > stack_max_size
+echo 1 > /proc/sys/kernel/stack_tracer_enabled
+
+: "Fork and always the first entry including lock"
+timeout=10
+while [ $timeout -ne 0 ]; do
+  ( echo "forked" )
+  FL=`grep " 0)" stack_trace`
+  echo $FL | grep -q "lock"
+  timeout=$((timeout - 1))
+done
+echo 0 > /proc/sys/kernel/stack_tracer_enabled
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
index f6d9ac73268a..0c04282d33dd 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
@@ -16,24 +16,13 @@ if [ ! -f set_ftrace_filter ]; then
     exit_unsupported
 fi
 
-do_reset() {
-    reset_ftrace_filter
-    reset_tracer
-    disable_events
-    clear_trace
-    enable_tracing
-}
-
 fail() { # mesg
-    do_reset
     echo $1
     exit_fail
 }
 
 SLEEP_TIME=".1"
 
-do_reset
-
 echo "Testing function probes with enabling disabling tracing:"
 
 cnt_trace() {
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index 2a4f16fc9819..7b96e80e6b8a 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -15,14 +15,29 @@ reset_tracer() { # reset the current tracer
     echo nop > current_tracer
 }
 
-reset_trigger() { # reset all current setting triggers
-    grep -v ^# events/*/*/trigger |
+reset_trigger_file() {
+    # remove action triggers first
+    grep -H ':on[^:]*(' $@ |
+    while read line; do
+        cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["`
+	file=`echo $line | cut -f1 -d:`
+	echo "!$cmd" >> $file
+    done
+    grep -Hv ^# $@ |
     while read line; do
-        cmd=`echo $line | cut -f2- -d: | cut -f1 -d" "`
-	echo "!$cmd" > `echo $line | cut -f1 -d:`
+        cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["`
+	file=`echo $line | cut -f1 -d:`
+	echo "!$cmd" > $file
     done
 }
 
+reset_trigger() { # reset all current setting triggers
+    if [ -d events/synthetic ]; then
+        reset_trigger_file events/synthetic/*/trigger
+    fi
+    reset_trigger_file events/*/*/trigger
+}
+
 reset_events_filter() { # reset all current setting filters
     grep -v ^none events/*/*/filter |
     while read line; do
@@ -74,12 +89,23 @@ initialize_ftrace() { # Reset ftrace to initial-state
     reset_tracer
     reset_trigger
     reset_events_filter
+    reset_ftrace_filter
     disable_events
     echo > set_event_pid	# event tracer is always on
+    echo > set_ftrace_pid
     [ -f set_ftrace_filter ] && echo | tee set_ftrace_*
     [ -f set_graph_function ] && echo | tee set_graph_*
     [ -f stack_trace_filter ] && echo > stack_trace_filter
     [ -f kprobe_events ] && echo > kprobe_events
     [ -f uprobe_events ] && echo > uprobe_events
+    [ -f synthetic_events ] && echo > synthetic_events
+    [ -f snapshot ] && echo 0 > snapshot
+    clear_trace
     enable_tracing
 }
+
+LOCALHOST=127.0.0.1
+
+yield() {
+    ping $LOCALHOST -c 1 || sleep .001 || usleep 1 || sleep 1
+}
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
index 4604d2103c89..bb1eb5a7c64e 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
@@ -4,10 +4,7 @@
 
 [ -f kprobe_events ] || exit_unsupported # this is configurable
 
-echo 0 > events/enable
-echo > kprobe_events
 echo p:myevent _do_fork > kprobe_events
 grep myevent kprobe_events
 test -d events/kprobes/myevent
 echo > kprobe_events
-clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
index bbc443a9190c..442c1a8c5edf 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
@@ -4,12 +4,9 @@
 
 [ -f kprobe_events ] || exit_unsupported
 
-echo 0 > events/enable
-echo > kprobe_events
 echo p:myevent _do_fork > kprobe_events
 test -d events/kprobes/myevent
 echo 1 > events/kprobes/myevent/enable
 echo > kprobe_events && exit_fail # this must fail
 echo 0 > events/kprobes/myevent/enable
 echo > kprobe_events # this must succeed
-clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
index 8b43c6804fc3..bcdecf80a8f1 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
@@ -4,13 +4,15 @@
 
 [ -f kprobe_events ] || exit_unsupported # this is configurable
 
-echo 0 > events/enable
-echo > kprobe_events
 echo 'p:testprobe _do_fork $stack $stack0 +0($stack)' > kprobe_events
-grep testprobe kprobe_events
+grep testprobe kprobe_events | grep -q 'arg1=\$stack arg2=\$stack0 arg3=+0(\$stack)'
 test -d events/kprobes/testprobe
+
 echo 1 > events/kprobes/testprobe/enable
 ( echo "forked")
+grep testprobe trace | grep '_do_fork' | \
+  grep -q 'arg1=0x[[:xdigit:]]* arg2=0x[[:xdigit:]]* arg3=0x[[:xdigit:]]*$'
+
 echo 0 > events/kprobes/testprobe/enable
 echo "-:testprobe" >> kprobe_events
 clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
new file mode 100644
index 000000000000..15c1f70fcaf9
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
@@ -0,0 +1,17 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event with comm arguments
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+grep -A1 "fetcharg:" README | grep -q "\$comm" || exit_unsupported # this is too old
+
+echo 'p:testprobe _do_fork comm=$comm ' > kprobe_events
+grep testprobe kprobe_events | grep -q 'comm=$comm'
+test -d events/kprobes/testprobe
+
+echo 1 > events/kprobes/testprobe/enable
+( echo "forked")
+grep testprobe trace | grep -q 'comm=".*"'
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
index 5ba73035e1d9..46e7744f8358 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -4,25 +4,24 @@
 
 [ -f kprobe_events ] || exit_unsupported # this is configurable
 
-echo 0 > events/enable
-echo > kprobe_events
-
 case `uname -m` in
 x86_64)
-  ARG2=%si
-  OFFS=8
+  ARG1=%di
 ;;
 i[3456]86)
-  ARG2=%cx
-  OFFS=4
+  ARG1=%ax
 ;;
 aarch64)
-  ARG2=%x1
-  OFFS=8
+  ARG1=%x0
 ;;
 arm*)
-  ARG2=%r1
-  OFFS=4
+  ARG1=%r0
+;;
+ppc64*)
+  ARG1=%r3
+;;
+ppc*)
+  ARG1=%r3
 ;;
 *)
   echo "Please implement other architecture here"
@@ -30,17 +29,15 @@ arm*)
 esac
 
 : "Test get argument (1)"
-echo "p:testprobe create_trace_kprobe arg1=+0(+0(${ARG2})):string" > kprobe_events
+echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events
 echo 1 > events/kprobes/testprobe/enable
-! echo test >> kprobe_events
-tail -n 1 trace | grep -qe "testprobe.* arg1=\"test\""
+echo "p:test _do_fork" >> kprobe_events
+grep -qe "testprobe.* arg1=\"test\"" trace
 
 echo 0 > events/kprobes/testprobe/enable
 : "Test get argument (2)"
-echo "p:testprobe create_trace_kprobe arg1=+0(+0(${ARG2})):string arg2=+0(+${OFFS}(${ARG2})):string" > kprobe_events
+echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events
 echo 1 > events/kprobes/testprobe/enable
-! echo test1 test2 >> kprobe_events
-tail -n 1 trace | grep -qe "testprobe.* arg1=\"test1\" arg2=\"test2\""
+echo "p:test _do_fork" >> kprobe_events
+grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace
 
-echo 0 > events/enable
-echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
new file mode 100644
index 000000000000..2b6dd33f9076
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
@@ -0,0 +1,39 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event symbol argument
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+SYMBOL="linux_proc_banner"
+
+if [ ! -f /proc/kallsyms ]; then
+  echo "Can not check the target symbol - please enable CONFIG_KALLSYMS"
+  exit_unresolved
+elif ! grep "$SYMBOL\$" /proc/kallsyms; then
+  echo "Linux banner is not exported - please enable CONFIG_KALLSYMS_ALL"
+  exit_unresolved
+fi
+
+: "Test get basic types symbol argument"
+echo "p:testprobe_u _do_fork arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events
+echo "p:testprobe_s _do_fork arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events
+if grep -q "x8/16/32/64" README; then
+  echo "p:testprobe_x _do_fork arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events
+fi
+echo "p:testprobe_bf _do_fork arg1=@linux_proc_banner:b8@4/32" >> kprobe_events
+echo 1 > events/kprobes/enable
+(echo "forked")
+echo 0 > events/kprobes/enable
+grep "testprobe_[usx]:.* arg1=.* arg2=.* arg3=.* arg4=.*" trace
+grep "testprobe_bf:.* arg1=.*" trace
+
+: "Test get string symbol argument"
+echo "p:testprobe_str _do_fork arg1=@linux_proc_banner:string" > kprobe_events
+echo 1 > events/kprobes/enable
+(echo "forked")
+echo 0 > events/kprobes/enable
+RESULT=`grep "testprobe_str" trace | sed -e 's/.* arg1=\(.*\)/\1/'`
+
+RESULT=`echo $RESULT | sed -e 's/.* \((.*)\) \((.*)\) .*/\1 \2/'`
+ORIG=`cat /proc/version | sed -e 's/.* \((.*)\) \((.*)\) .*/\1 \2/'`
+test "$RESULT" = "$ORIG"
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
index 231bcd2c4eb5..6f0f19953193 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
@@ -6,9 +6,6 @@
 
 grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue
 
-echo 0 > events/enable
-echo > kprobe_events
-
 PROBEFUNC="vfs_read"
 GOODREG=
 BADREG=
@@ -34,6 +31,13 @@ arm*)
   GOODREG=%r0
   BADREG=%ax
 ;;
+ppc*)
+  GOODREG=%r3
+  BADREG=%msr
+;;
+*)
+  echo "Please implement other architecture here"
+  exit_untested
 esac
 
 test_goodarg() # Good-args
@@ -71,8 +75,11 @@ test_badarg "\$stackp" "\$stack0+10" "\$stack1-10"
 echo "r ${PROBEFUNC} \$retval" > kprobe_events
 ! echo "p ${PROBEFUNC} \$retval" > kprobe_events
 
+# $comm was introduced in 4.8, older kernels reject it.
+if grep -A1 "fetcharg:" README | grep -q '\$comm' ; then
 : "Comm access"
 test_goodarg "\$comm"
+fi
 
 : "Indirect memory access"
 test_goodarg "+0(${GOODREG})" "-0(${GOODREG})" "+10(\$stack)" \
@@ -93,5 +100,3 @@ test_badarg "${GOODREG}::${GOODTYPE}" "${GOODREG}:${BADTYPE}" \
 
 test_goodarg "\$comm:string" "+0(\$stack):string"
 test_badarg "\$comm:x64" "\$stack:string" "${GOODREG}:string"
-
-echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
index 2a1755bfc290..1bcb67dcae26 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
@@ -6,33 +6,45 @@
 
 grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue
 
-echo 0 > events/enable
-echo > kprobe_events
-enable_tracing
-
-echo 'p:testprobe _do_fork $stack0:s32 $stack0:u32 $stack0:x32 $stack0:b8@4/32' > kprobe_events
-grep testprobe kprobe_events
-test -d events/kprobes/testprobe
-
-echo 1 > events/kprobes/testprobe/enable
-( echo "forked")
-echo 0 > events/kprobes/testprobe/enable
-ARGS=`tail -n 1 trace | sed -e 's/.* arg1=\(.*\) arg2=\(.*\) arg3=\(.*\) arg4=\(.*\)/\1 \2 \3 \4/'`
+gen_event() { # Bitsize
+  echo "p:testprobe _do_fork \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1"
+}
 
-check_types() {
-  X1=`printf "%x" $1 | tail -c 8`
+check_types() { # s-type u-type x-type bf-type width
+  test $# -eq 5
+  CW=$5
+  CW=$((CW / 4))
+  X1=`printf "%x" $1 | tail -c ${CW}`
   X2=`printf "%x" $2`
   X3=`printf "%x" $3`
   test $X1 = $X2
   test $X2 = $X3
   test 0x$X3 = $3
 
-  B4=`printf "%02x" $4`
-  B3=`echo -n $X3 | tail -c 3 | head -c 2`
+  B4=`printf "%1x" $4`
+  B3=`printf "%03x" 0x$X3 | tail -c 2 | head -c 1`
   test $B3 = $B4
 }
-check_types $ARGS
 
-echo "-:testprobe" >> kprobe_events
-clear_trace
-test -d events/kprobes/testprobe && exit_fail || exit_pass
+for width in 64 32 16 8; do
+  : "Add new event with basic types"
+  gen_event $width > kprobe_events
+  grep testprobe kprobe_events
+  test -d events/kprobes/testprobe
+
+  : "Trace the event"
+  echo 1 > events/kprobes/testprobe/enable
+  ( echo "forked")
+  echo 0 > events/kprobes/testprobe/enable
+
+  : "Confirm the arguments is recorded in given types correctly"
+  ARGS=`grep "testprobe" trace | sed -e 's/.* arg1=\(.*\) arg2=\(.*\) arg3=\(.*\) arg4=\(.*\)/\1 \2 \3 \4/'`
+  check_types $ARGS $width
+
+  : "Clear event for next loop"
+  echo "-:testprobe" >> kprobe_events
+  clear_trace
+
+done
+
+exit_pass
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
index 2724a1068cb1..3fb70e01b1fe 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
@@ -4,9 +4,6 @@
 
 [ -f kprobe_events ] || exit_unsupported # this is configurable
 
-disable_events
-echo > kprobe_events
-
 :;: "Add an event on function without name" ;:
 
 FUNC=`grep " [tT] .*vfs_read$" /proc/kallsyms | tail -n 1 | cut -f 3 -d " "`
@@ -33,5 +30,3 @@ echo "p $FUNC" > kprobe_events
 EVENT=`grep $FUNC kprobe_events | cut -f 1 -d " " | cut -f 2 -d:`
 [ "x" != "x$EVENT" ] || exit_failure
 test -d events/$EVENT || exit_failure
-
-echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
index cc4cac0e60f2..492426e95e09 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
@@ -8,8 +8,6 @@ grep function available_tracers || exit_unsupported # this is configurable
 # prepare
 echo nop > current_tracer
 echo _do_fork > set_ftrace_filter
-echo 0 > events/enable
-echo > kprobe_events
 echo 'p:testprobe _do_fork' > kprobe_events
 
 # kprobe on / ftrace off
@@ -47,10 +45,3 @@ echo > trace
 ( echo "forked")
 grep testprobe trace
 ! grep '_do_fork <-' trace
-
-# cleanup
-echo nop > current_tracer
-echo > set_ftrace_filter
-echo 0 > events/kprobes/testprobe/enable
-echo > kprobe_events
-echo > trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc
index 1e9f75f7a30f..d861bd776c5e 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc
@@ -4,14 +4,18 @@
 
 [ -f kprobe_events ] || exit_unsupported # this is configurable
 
-disable_events
-echo > kprobe_events
+rmmod trace-printk ||:
+if ! modprobe trace-printk ; then
+  echo "No trace-printk sample module - please make CONFIG_SAMPLE_TRACE_PRINTK=
+m"
+  exit_unresolved;
+fi
+
+MOD=trace_printk
+FUNC=trace_printk_irq_work
 
 :;: "Add an event on a module function without specifying event name" ;:
 
-MOD=`lsmod | head -n 2 | tail -n 1 | cut -f1 -d" "`
-FUNC=`grep -m 1 ".* t .*\\[$MOD\\]" /proc/kallsyms | xargs | cut -f3 -d" "`
-[ "x" != "x$MOD" -a "y" != "y$FUNC" ] || exit_unresolved
 echo "p $MOD:$FUNC" > kprobe_events
 PROBE_NAME=`echo $MOD:$FUNC | tr ".:" "_"`
 test -d events/kprobes/p_${PROBE_NAME}_0 || exit_failure
@@ -26,4 +30,24 @@ test -d events/kprobes/event1 || exit_failure
 echo "p:kprobes1/event1 $MOD:$FUNC" > kprobe_events
 test -d events/kprobes1/event1 || exit_failure
 
-echo > kprobe_events
+:;: "Remove target module, but event still be there" ;:
+if ! rmmod trace-printk ; then
+  echo "Failed to unload module - please enable CONFIG_MODULE_UNLOAD"
+  exit_unresolved;
+fi
+test -d events/kprobes1/event1
+
+:;: "Check posibility to defining events on unloaded module";:
+echo "p:event2 $MOD:$FUNC" >> kprobe_events
+
+:;: "Target is gone, but we can prepare for next time";:
+echo 1 > events/kprobes1/event1/enable
+
+:;: "Load module again, which means the event1 should be recorded";:
+modprobe trace-printk
+grep "event1:" trace
+
+:;: "Remove the module again and check the event is not locked"
+rmmod trace-printk
+echo 0 > events/kprobes1/event1/enable
+echo "-:kprobes1/event1" >> kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
index 321954683aaa..ac9ab4a12e53 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
@@ -4,13 +4,16 @@
 
 [ -f kprobe_events ] || exit_unsupported # this is configurable
 
-echo 0 > events/enable
-echo > kprobe_events
+# Add new kretprobe event
 echo 'r:testprobe2 _do_fork $retval' > kprobe_events
-grep testprobe2 kprobe_events
+grep testprobe2 kprobe_events | grep -q 'arg1=\$retval'
 test -d events/kprobes/testprobe2
+
 echo 1 > events/kprobes/testprobe2/enable
 ( echo "forked")
+
+cat trace | grep testprobe2 | grep -q '<- _do_fork'
+
 echo 0 > events/kprobes/testprobe2/enable
 echo '-:testprobe2' >> kprobe_events
 clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc
index 7c0290684c43..8e05b178519a 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc
@@ -5,8 +5,6 @@
 [ -f kprobe_events ] || exit_unsupported # this is configurable
 grep -q 'r\[maxactive\]' README || exit_unsupported # this is older version
 
-echo > kprobe_events
-
 # Test if we successfully reject unknown messages
 if echo 'a:myprobeaccept inet_csk_accept' > kprobe_events; then false; else true; fi
 
@@ -37,5 +35,3 @@ echo > kprobe_events
 echo 'r10 inet_csk_accept' > kprobe_events
 grep inet_csk_accept kprobe_events
 echo > kprobe_events
-
-clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
index ce361b9d62cf..5862eee91e1d 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -12,11 +12,6 @@ case `uname -m` in
   *) OFFS=0;;
 esac
 
-if [ -d events/kprobes ]; then
-  echo 0 > events/kprobes/enable
-  echo > kprobe_events
-fi
-
 N=0
 echo "Setup up kprobes on first available 256 text symbols"
 grep -i " t " /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
index 4fda01a08da4..a902aa0aaabc 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
@@ -4,7 +4,7 @@
 
 [ -f kprobe_events ] || exit_unsupported # this is configurable
 
-TARGET_FUNC=create_trace_kprobe
+TARGET_FUNC=tracefs_create_dir
 
 dec_addr() { # hexaddr
   printf "%d" "0x"`echo $1 | tail -c 8`
@@ -30,8 +30,6 @@ if [ `printf "%x" -1 | wc -c` != 9 ]; then
   UINT_TEST=yes
 fi
 
-echo 0 > events/enable
-echo > kprobe_events
 echo "p:testprobe ${TARGET_FUNC}" > kprobe_events
 echo "p:testprobe ${TARGET}" > kprobe_events
 echo "p:testprobe ${TARGET_FUNC}${NEXT}" > kprobe_events
@@ -39,5 +37,3 @@ echo "p:testprobe ${TARGET_FUNC}${NEXT}" > kprobe_events
 if [ "${UINT_TEST}" = yes ]; then
 ! echo "p:testprobe ${TARGET_FUNC}${OVERFLOW}" > kprobe_events
 fi
-echo > kprobe_events
-clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
new file mode 100644
index 000000000000..0384b525cdee
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe dynamic event - adding and removing
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+! grep -q 'myevent' kprobe_profile
+echo p:myevent _do_fork > kprobe_events
+grep -q 'myevent[[:space:]]*0[[:space:]]*0$' kprobe_profile
+echo 1 > events/kprobes/myevent/enable
+( echo "forked" )
+grep -q 'myevent[[:space:]]*[[:digit:]]*[[:space:]]*0$' kprobe_profile
+echo 0 > events/kprobes/myevent/enable
+echo > kprobe_events
+! grep -q 'myevent' kprobe_profile
diff --git a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
new file mode 100644
index 000000000000..cbd174334a48
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
@@ -0,0 +1,73 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: test for the preemptirqsoff tracer
+
+MOD=preemptirq_delay_test
+
+fail() {
+    reset_tracer
+    rmmod $MOD || true
+    exit_fail
+}
+
+unsup() { #msg
+    reset_tracer
+    rmmod $MOD || true
+    echo $1
+    exit_unsupported
+}
+
+modprobe $MOD || unsup "$MOD module not available"
+rmmod $MOD
+
+grep -q "preemptoff" available_tracers || unsup "preemptoff tracer not enabled"
+grep -q "irqsoff" available_tracers || unsup "irqsoff tracer not enabled"
+
+reset_tracer
+
+# Simulate preemptoff section for half a second couple of times
+echo preemptoff > current_tracer
+sleep 1
+modprobe $MOD test_mode=preempt delay=500000 || fail
+rmmod $MOD || fail
+modprobe $MOD test_mode=preempt delay=500000 || fail
+rmmod $MOD || fail
+modprobe $MOD test_mode=preempt delay=500000 || fail
+rmmod $MOD || fail
+
+cat trace
+
+# Confirm which tracer
+grep -q "tracer: preemptoff" trace || fail
+
+# Check the end of the section
+egrep -q "5.....us : <stack trace>" trace || fail
+
+# Check for 500ms of latency
+egrep -q "latency: 5..... us" trace || fail
+
+reset_tracer
+
+# Simulate irqsoff section for half a second couple of times
+echo irqsoff > current_tracer
+sleep 1
+modprobe $MOD test_mode=irq delay=500000 || fail
+rmmod $MOD || fail
+modprobe $MOD test_mode=irq delay=500000 || fail
+rmmod $MOD || fail
+modprobe $MOD test_mode=irq delay=500000 || fail
+rmmod $MOD || fail
+
+cat trace
+
+# Confirm which tracer
+grep -q "tracer: irqsoff" trace || fail
+
+# Check the end of the section
+egrep -q "5.....us : <stack trace>" trace || fail
+
+# Check for 500ms of latency
+egrep -q "latency: 5..... us" trace || fail
+
+reset_tracer
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/template b/tools/testing/selftests/ftrace/test.d/template
index 5c39ceb18a0d..799da7e0b3c9 100644
--- a/tools/testing/selftests/ftrace/test.d/template
+++ b/tools/testing/selftests/ftrace/test.d/template
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: GPL2.0
 # description: %HERE DESCRIBE WHAT THIS DOES%
 # you have to add ".tc" extention for your testcase file
 # Note that all tests are run with "errexit" option.
diff --git a/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc b/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc
new file mode 100644
index 000000000000..e3005fa785f0
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc
@@ -0,0 +1,25 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL2.0
+# description: Test wakeup tracer
+
+if ! which chrt ; then
+  echo "chrt is not found. This test requires nice command."
+  exit_unresolved
+fi
+
+if ! grep -wq "wakeup" available_tracers ; then
+  echo "wakeup tracer is not supported"
+  exit_unsupported
+fi
+
+echo wakeup > current_tracer
+echo 1 > tracing_on
+echo 0 > tracing_max_latency
+
+: "Wakeup higher priority task"
+chrt -f 5 sleep 1
+
+echo 0 > tracing_on
+grep '+ \[[[:digit:]]*\]' trace
+grep '==> \[[[:digit:]]*\]' trace
+
diff --git a/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc b/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc
new file mode 100644
index 000000000000..f99b5178e00a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc
@@ -0,0 +1,25 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL2.0
+# description: Test wakeup RT tracer
+
+if ! which chrt ; then
+  echo "chrt is not found. This test requires chrt command."
+  exit_unresolved
+fi
+
+if ! grep -wq "wakeup_rt" available_tracers ; then
+  echo "wakeup_rt tracer is not supported"
+  exit_unsupported
+fi
+
+echo wakeup_rt > current_tracer
+echo 1 > tracing_on
+echo 0 > tracing_max_latency
+
+: "Wakeup a realtime task"
+chrt -f 5 sleep 1
+
+echo 0 > tracing_on
+grep "+ \[[[:digit:]]*\]" trace
+grep "==> \[[[:digit:]]*\]" trace
+
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
index 2aabab363cfb..401104344593 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
@@ -2,14 +2,7 @@
 # description: event trigger - test extended error support
 
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -24,9 +17,6 @@ if [ ! -f synthetic_events ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 echo "Test extended error support"
 echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
 ! echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger 2> /dev/null
@@ -34,6 +24,4 @@ if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then
     fail "Failed to generate extended error in histogram"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
index 7fd5b4a8f060..f59b2a9a1f22 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
@@ -1,14 +1,7 @@
 #!/bin/sh
 # description: event trigger - test field variable support
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -23,10 +16,6 @@ if [ ! -f synthetic_events ]; then
     exit_unsupported
 fi
 
-clear_synthetic_events
-reset_tracer
-do_reset
-
 echo "Test field variable support"
 
 echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events
@@ -34,7 +23,7 @@ echo 'hist:keys=comm:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/
 echo 'hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
 echo 'hist:keys=pid,prio,comm:vals=lat:sort=pid,prio' > events/synthetic/wakeup_latency/trigger
 
-ping localhost -c 3
+ping $LOCALHOST -c 3
 if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
     fail "Failed to create inter-event histogram"
 fi
@@ -49,6 +38,4 @@ if grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then
     fail "Failed to remove histogram with field variable"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
index c93dbe38b5df..524d9ce361e2 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
@@ -1,14 +1,7 @@
 #!/bin/sh
 # description: event trigger - test inter-event combined histogram trigger
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -23,10 +16,6 @@ if [ ! -f synthetic_events ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-clear_synthetic_events
-
 echo "Test create synthetic event"
 
 echo 'waking_latency  u64 lat pid_t pid' > synthetic_events
@@ -48,11 +37,9 @@ echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events
 echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
 echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger
 
-ping localhost -c 3
+ping $LOCALHOST -c 3
 if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then
     fail "Failed to create combined histogram"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
index c193dce611a2..4ddc546771b5 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
@@ -1,15 +1,7 @@
 #!/bin/sh
 # description: event trigger - test multiple actions on hist trigger
 
-
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -24,10 +16,6 @@ if [ ! -f synthetic_events ]; then
     exit_unsupported
 fi
 
-clear_synthetic_events
-reset_tracer
-do_reset
-
 echo "Test multiple actions on hist trigger"
 echo 'wakeup_latency u64 lat; pid_t pid' >> synthetic_events
 TRIGGER1=events/sched/sched_wakeup/trigger
@@ -39,6 +27,4 @@ echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_
 echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid) if next_comm=="cyclictest"' >> $TRIGGER2
 echo 'hist:keys=next_pid if next_comm=="cyclictest"' >> $TRIGGER2
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
index e84e7d048566..39fb65b0cd9f 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
@@ -1,14 +1,7 @@
 #!/bin/sh
 # description: event trigger - test inter-event histogram trigger onmatch action
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -23,10 +16,6 @@ if [ ! -f synthetic_events ]; then
     exit_unsupported
 fi
 
-clear_synthetic_events
-reset_tracer
-do_reset
-
 echo "Test create synthetic event"
 
 echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
@@ -40,11 +29,10 @@ echo "Test histogram variables,simple expression support and onmatch action"
 echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
 echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
 echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
-ping localhost -c 5
+
+ping $LOCALHOST -c 5
 if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
     fail "Failed to create onmatch action inter-event histogram"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
index 7907d8aacde3..81ab3939c96a 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
@@ -1,14 +1,7 @@
 #!/bin/sh
 # description: event trigger - test inter-event histogram trigger onmatch-onmax action
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -23,10 +16,6 @@ if [ ! -f synthetic_events ]; then
     exit_unsupported
 fi
 
-clear_synthetic_events
-reset_tracer
-do_reset
-
 echo "Test create synthetic event"
 
 echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
@@ -40,11 +29,10 @@ echo "Test histogram variables,simple expression support and onmatch-onmax actio
 echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
 echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm):onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
 echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
-ping localhost -c 5
+
+ping $LOCALHOST -c 5
 if [ ! grep -q "ping" events/synthetic/wakeup_latency/hist -o ! grep -q "max:" events/sched/sched_switch/hist]; then
     fail "Failed to create onmatch-onmax action inter-event histogram"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
index 38b7ed6242b2..1180ab5f0845 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
@@ -1,14 +1,7 @@
 #!/bin/sh
 # description: event trigger - test inter-event histogram trigger onmax action
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -23,10 +16,6 @@ if [ ! -f synthetic_events ]; then
     exit_unsupported
 fi
 
-clear_synthetic_events
-reset_tracer
-do_reset
-
 echo "Test create synthetic event"
 
 echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
@@ -38,11 +27,10 @@ echo "Test onmax action"
 
 echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_waking/trigger
 echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
-ping localhost -c 3
+
+ping $LOCALHOST -c 3
 if ! grep -q "max:" events/sched/sched_switch/hist; then
     fail "Failed to create onmax action inter-event histogram"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
index cef11377dcbd..41128219231a 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
@@ -1,13 +1,7 @@
 #!/bin/sh
 # description: event trigger - test synthetic event create remove
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
 
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -22,10 +16,6 @@ if [ ! -f synthetic_events ]; then
     exit_unsupported
 fi
 
-clear_synthetic_events
-reset_tracer
-do_reset
-
 echo "Test create synthetic event"
 
 echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
@@ -35,20 +25,18 @@ fi
 
 reset_trigger
 
-echo "Test create synthetic event with an error"
-echo 'wakeup_latency  u64 lat pid_t pid char' > synthetic_events > /dev/null
+echo "Test remove synthetic event"
+echo '!wakeup_latency  u64 lat pid_t pid char comm[16]' >> synthetic_events
 if [ -d events/synthetic/wakeup_latency ]; then
-    fail "Created wakeup_latency synthetic event with an invalid format"
+    fail "Failed to delete wakeup_latency synthetic event"
 fi
 
 reset_trigger
 
-echo "Test remove synthetic event"
-echo '!wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+echo "Test create synthetic event with an error"
+echo 'wakeup_latency  u64 lat pid_t pid char' > synthetic_events > /dev/null
 if [ -d events/synthetic/wakeup_latency ]; then
-    fail "Failed to delete wakeup_latency synthetic event"
+    fail "Created wakeup_latency synthetic event with an invalid format"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
new file mode 100644
index 000000000000..88e6c3f43006
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
@@ -0,0 +1,80 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test synthetic_events syntax parser
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test synthetic_events syntax parser"
+
+echo > synthetic_events
+
+# synthetic event must have a field
+! echo "myevent" >> synthetic_events
+echo "myevent u64 var1" >> synthetic_events
+
+# synthetic event must be found in synthetic_events
+grep "myevent[[:space:]]u64 var1" synthetic_events
+
+# it is not possible to add same name event
+! echo "myevent u64 var2" >> synthetic_events
+
+# Non-append open will cleanup all events and add new one
+echo "myevent u64 var2" > synthetic_events
+
+# multiple fields with different spaces
+echo "myevent u64 var1; u64 var2;" > synthetic_events
+grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events
+echo "myevent u64 var1 ; u64 var2 ;" > synthetic_events
+grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events
+echo "myevent u64 var1 ;u64 var2" > synthetic_events
+grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events
+
+# test field types
+echo "myevent u32 var" > synthetic_events
+echo "myevent u16 var" > synthetic_events
+echo "myevent u8 var" > synthetic_events
+echo "myevent s64 var" > synthetic_events
+echo "myevent s32 var" > synthetic_events
+echo "myevent s16 var" > synthetic_events
+echo "myevent s8 var" > synthetic_events
+
+echo "myevent char var" > synthetic_events
+echo "myevent int var" > synthetic_events
+echo "myevent long var" > synthetic_events
+echo "myevent pid_t var" > synthetic_events
+
+echo "myevent unsigned char var" > synthetic_events
+echo "myevent unsigned int var" > synthetic_events
+echo "myevent unsigned long var" > synthetic_events
+grep "myevent[[:space:]]unsigned long var" synthetic_events
+
+# test string type
+echo "myevent char var[10]" > synthetic_events
+grep "myevent[[:space:]]char\[10\] var" synthetic_events
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc
index 28cc355a3a7b..eddb51e1fbf7 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc
@@ -3,14 +3,7 @@
 # description: event trigger - test event enable/disable trigger
 # flags: instance
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -25,9 +18,6 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 FEATURE=`grep enable_event events/sched/sched_process_fork/trigger`
 if [ -z "$FEATURE" ]; then
     echo "event enable/disable trigger is not supported"
@@ -61,6 +51,4 @@ echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
 ! echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
 ! echo 'disable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc
index a48e23eb8a8b..2dcc2296ebdd 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc
@@ -3,14 +3,7 @@
 # description: event trigger - test trigger filter
 # flags: instance
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -25,9 +18,6 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 echo "Test trigger filter"
 echo 1 > tracing_on
 echo 'traceoff if child_pid == 0' > events/sched/sched_process_fork/trigger
@@ -54,8 +44,4 @@ echo '!traceoff' > events/sched/sched_process_fork/trigger
 echo 'traceoff if parent_pid >= 0 || child_pid >= 0' > events/sched/sched_process_fork/trigger
 echo '!traceoff' > events/sched/sched_process_fork/trigger
 
-
-
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
index 8da80efc44d8..fab4431639d3 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
@@ -3,14 +3,7 @@
 # description: event trigger - test histogram modifiers
 # flags: instance
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -30,9 +23,6 @@ if [ ! -f events/sched/sched_process_fork/hist ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 echo "Test histogram with execname modifier"
 
 echo 'hist:keys=common_pid.execname' > events/sched/sched_process_fork/trigger
@@ -71,6 +61,4 @@ for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
 grep 'bytes_req: ~ 2^[0-9]*' events/kmem/kmalloc/hist > /dev/null || \
     fail "log2 modifier on kmem/kmalloc did not work"
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
index 449fe9ff91a2..177e8d4c4744 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
@@ -3,14 +3,7 @@
 # description: event trigger - test histogram trigger
 # flags: instance
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -30,9 +23,6 @@ if [ ! -f events/sched/sched_process_fork/hist ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 echo "Test histogram basic tigger"
 
 echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger
@@ -79,6 +69,4 @@ check_inc `grep -o "child_pid:[[:space:]]*[[:digit:]]*" \
     events/sched/sched_process_fork/hist | cut -d: -f2 ` ||
     fail "sort param on sched_process_fork did not work"
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
index c5ef8b9d02b3..18fdaab9f570 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
@@ -3,14 +3,7 @@
 # description: event trigger - test multiple histogram triggers
 # flags: instance
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -30,11 +23,6 @@ if [ ! -f events/sched/sched_process_fork/hist ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
-reset_trigger
-
 echo "Test histogram multiple tiggers"
 
 echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger
@@ -67,8 +55,4 @@ grep test_hist events/sched/sched_process_exit/hist > /dev/null || \
 diffs=`diff events/sched/sched_process_exit/hist events/sched/sched_process_fork/hist | wc -l`
 test $diffs -eq 0 || fail "Same name histograms are not same"
 
-reset_trigger
-
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc
index ed38f0050d77..7717c0a09686 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc
@@ -2,14 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 # description: event trigger - test snapshot-trigger
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -29,9 +22,6 @@ if [ ! -f snapshot ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 FEATURE=`grep snapshot events/sched/sched_process_fork/trigger`
 if [ -z "$FEATURE" ]; then
     echo "snapshot trigger is not supported"
@@ -57,6 +47,4 @@ echo "Test snapshot semantic errors"
 echo "snapshot" > events/sched/sched_process_fork/trigger
 ! echo "snapshot" > events/sched/sched_process_fork/trigger
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc
index 3121d795a868..398c05c4d2a7 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc
@@ -2,14 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 # description: event trigger - test stacktrace-trigger
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -24,9 +17,6 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 FEATURE=`grep stacktrace events/sched/sched_process_fork/trigger`
 if [ -z "$FEATURE" ]; then
     echo "stacktrace trigger is not supported"
@@ -49,6 +39,4 @@ echo "Test stacktrace semantic errors"
 echo "stacktrace" > events/sched/sched_process_fork/trigger
 ! echo "stacktrace" > events/sched/sched_process_fork/trigger
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc
new file mode 100644
index 000000000000..ab6bedb25736
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc
@@ -0,0 +1,38 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_marker trigger - test histogram trigger
+# flags: instance
+
+fail() { #msg
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -d events/ftrace/print ]; then
+    echo "event trace_marker is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/trigger ]; then
+    echo "event trigger is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/hist ]; then
+    echo "hist trigger is not supported"
+    exit_unsupported
+fi
+
+echo "Test histogram trace_marker tigger"
+
+echo 'hist:keys=common_pid' > events/ftrace/print/trigger
+for i in `seq 1 10` ; do echo "hello" > trace_marker; done
+grep 'hitcount: *10$' events/ftrace/print/hist > /dev/null || \
+    fail "hist trigger did not trigger correct times on trace_marker"
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc
new file mode 100644
index 000000000000..df246e505af7
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc
@@ -0,0 +1,62 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_marker trigger - test snapshot trigger
+# flags: instance
+
+fail() { #msg
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f snapshot ]; then
+    echo "snapshot is not supported"
+    exit_unsupported
+fi
+
+if [ ! -d events/ftrace/print ]; then
+    echo "event trace_marker is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/trigger ]; then
+    echo "event trigger is not supported"
+    exit_unsupported
+fi
+
+test_trace() {
+    file=$1
+    x=$2
+
+    cat $file | while read line; do
+	comment=`echo $line | sed -e 's/^#//'`
+	if [ "$line" != "$comment" ]; then
+	    continue
+	fi
+	echo "testing $line for >$x<"
+	match=`echo $line | sed -e "s/>$x<//"`
+	if [ "$line" = "$match" ]; then
+	    fail "$line does not have >$x< in it"
+	fi
+	x=$((x+2))
+    done
+}
+
+echo "Test snapshot trace_marker tigger"
+
+echo 'snapshot' > events/ftrace/print/trigger
+
+# make sure the snapshot is allocated
+
+grep -q 'Snapshot is allocated' snapshot
+
+for i in `seq 1 10` ; do echo "hello >$i<" > trace_marker; done
+
+test_trace trace 1
+test_trace snapshot 2
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc
new file mode 100644
index 000000000000..18b4d1c2807e
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc
@@ -0,0 +1,56 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_marker trigger - test histogram with synthetic event against kernel event
+# flags:
+
+fail() { #msg
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic events not supported"
+    exit_unsupported
+fi
+
+if [ ! -d events/ftrace/print ]; then
+    echo "event trace_marker is not supported"
+    exit_unsupported
+fi
+
+if [ ! -d events/sched/sched_waking ]; then
+    echo "event sched_waking is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/trigger ]; then
+    echo "event trigger is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/hist ]; then
+    echo "hist trigger is not supported"
+    exit_unsupported
+fi
+
+echo "Test histogram kernel event to trace_marker latency histogram trigger"
+
+echo 'latency u64 lat' > synthetic_events
+echo 'hist:keys=pid:ts0=common_timestamp.usecs' > events/sched/sched_waking/trigger
+echo 'hist:keys=common_pid:lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).latency($lat)' > events/ftrace/print/trigger
+echo 'hist:keys=common_pid,lat:sort=lat' > events/synthetic/latency/trigger
+sleep 1
+echo "hello" > trace_marker
+
+grep 'hitcount: *1$' events/ftrace/print/hist > /dev/null || \
+    fail "hist trigger did not trigger correct times on trace_marker"
+
+grep 'hitcount: *1$' events/synthetic/latency/hist > /dev/null || \
+    fail "hist trigger did not trigger "
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc
new file mode 100644
index 000000000000..dd262d6d0db6
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc
@@ -0,0 +1,54 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_marker trigger - test histogram with synthetic event
+# flags:
+
+fail() { #msg
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic events not supported"
+    exit_unsupported
+fi
+
+if [ ! -d events/ftrace/print ]; then
+    echo "event trace_marker is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/trigger ]; then
+    echo "event trigger is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/hist ]; then
+    echo "hist trigger is not supported"
+    exit_unsupported
+fi
+
+echo "Test histogram trace_marker to trace_marker latency histogram trigger"
+
+echo 'latency u64 lat' > synthetic_events
+echo 'hist:keys=common_pid:ts0=common_timestamp.usecs if buf == "start"' > events/ftrace/print/trigger
+echo 'hist:keys=common_pid:lat=common_timestamp.usecs-$ts0:onmatch(ftrace.print).latency($lat) if buf == "end"' >> events/ftrace/print/trigger
+echo 'hist:keys=common_pid,lat:sort=lat' > events/synthetic/latency/trigger
+echo -n "start" > trace_marker
+echo -n "end" > trace_marker
+
+cnt=`grep 'hitcount: *1$' events/ftrace/print/hist | wc -l`
+
+if [ $cnt -ne 2 ]; then
+    fail "hist trace_marker trigger did not trigger correctly"
+fi
+
+grep 'hitcount: *1$' events/synthetic/latency/hist > /dev/null || \
+    fail "hist trigger did not trigger "
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc
index c59d9eb546da..d5d2dcbc9cab 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc
@@ -2,14 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 # description: event trigger - test traceon/off trigger
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -24,9 +17,6 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 echo "Test traceoff trigger"
 echo 1 > tracing_on
 echo 'traceoff' > events/sched/sched_process_fork/trigger
@@ -54,6 +44,4 @@ echo 'traceon' > events/sched/sched_process_fork/trigger
 ! echo 'traceon' > events/sched/sched_process_fork/trigger
 ! echo 'traceoff' > events/sched/sched_process_fork/trigger
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index 8497a376ef9d..12631f0076a1 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -17,14 +17,6 @@ all:
 		fi \
 	done
 
-override define RUN_TESTS
-	@export KSFT_TAP_LEVEL=`echo 1`;
-	@echo "TAP version 13";
-	@echo "selftests: futex";
-	@echo "========================================";
-	@cd $(OUTPUT); ./run.sh
-endef
-
 override define INSTALL_RULE
 	mkdir -p $(INSTALL_PATH)
 	install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
@@ -36,10 +28,6 @@ override define INSTALL_RULE
 	done;
 endef
 
-override define EMIT_TESTS
-	echo "./run.sh"
-endef
-
 override define CLEAN
 	@for DIR in $(SUBDIRS); do		\
 		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index ff8feca49746..ad1eeb14fda7 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -18,6 +18,7 @@ TEST_GEN_FILES := \
 
 TEST_PROGS := run.sh
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(TEST_GEN_FILES): $(HEADERS)
diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index 1bbb47565c55..46648427d537 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -1,31 +1,26 @@
 # SPDX-License-Identifier: GPL-2.0
 
+CFLAGS += -O2 -g -std=gnu99 -Wall -I../../../../usr/include/
+LDLIBS += -lmount -I/usr/include/libmount
+
 TEST_PROGS := gpio-mockup.sh
-TEST_FILES := gpio-mockup-sysfs.sh $(BINARIES)
-BINARIES := gpio-mockup-chardev
-EXTRA_PROGS := ../gpiogpio-event-mon ../gpiogpio-hammer ../gpiolsgpio
-EXTRA_DIRS := ../gpioinclude/
-EXTRA_OBJS := ../gpiogpio-event-mon-in.o ../gpiogpio-event-mon.o
-EXTRA_OBJS += ../gpiogpio-hammer-in.o ../gpiogpio-utils.o ../gpiolsgpio-in.o
-EXTRA_OBJS += ../gpiolsgpio.o
+TEST_FILES := gpio-mockup-sysfs.sh
+TEST_PROGS_EXTENDED := gpio-mockup-chardev
+
+GPIODIR := $(realpath ../../../gpio)
+GPIOOBJ := gpio-utils.o
 
 include ../lib.mk
 
-all: $(BINARIES)
+all: $(TEST_PROGS_EXTENDED)
 
 override define CLEAN
-	$(RM) $(BINARIES) $(EXTRA_PROGS) $(EXTRA_OBJS)
-	$(RM) -r $(EXTRA_DIRS)
+	$(RM) $(TEST_PROGS_EXTENDED)
+	$(MAKE) -C $(GPIODIR) OUTPUT=$(GPIODIR)/ clean
 endef
 
-CFLAGS += -O2 -g -std=gnu99 -Wall -I../../../../usr/include/
-LDLIBS += -lmount -I/usr/include/libmount
-
-$(BINARIES): ../../../gpio/gpio-utils.o ../../../../usr/include/linux/gpio.h
-
-../../../gpio/gpio-utils.o:
-	make ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) -C ../../../gpio
-
-../../../../usr/include/linux/gpio.h:
-	make -C ../../../.. headers_install INSTALL_HDR_PATH=$(shell pwd)/../../../../usr/
+$(TEST_PROGS_EXTENDED):| khdr
+$(TEST_PROGS_EXTENDED): $(GPIODIR)/$(GPIOOBJ)
 
+$(GPIODIR)/$(GPIOOBJ):
+	$(MAKE) OUTPUT=$(GPIODIR)/ -C $(GPIODIR)
diff --git a/tools/testing/selftests/gpio/gpio-mockup-chardev.c b/tools/testing/selftests/gpio/gpio-mockup-chardev.c
index 667e916fa7cc..f8d468f54e98 100644
--- a/tools/testing/selftests/gpio/gpio-mockup-chardev.c
+++ b/tools/testing/selftests/gpio/gpio-mockup-chardev.c
@@ -225,10 +225,10 @@ int gpio_pin_test(struct gpiochip_info *cinfo, int line, int flag, int value)
 		if (flag & GPIOHANDLE_REQUEST_ACTIVE_LOW)
 			debugfs_value = !debugfs_value;
 
-		if (!(debugfs_dir == OUT && value == debugfs_value))
+		if (!(debugfs_dir == OUT && value == debugfs_value)) {
 			errno = -EINVAL;
-		ret = -errno;
-
+			ret = -errno;
+		}
 	}
 	gpiotools_release_linehandle(fd);
 
diff --git a/tools/testing/selftests/gpio/gpio-mockup.sh b/tools/testing/selftests/gpio/gpio-mockup.sh
index 183fb932edbd..7f35b9880485 100755
--- a/tools/testing/selftests/gpio/gpio-mockup.sh
+++ b/tools/testing/selftests/gpio/gpio-mockup.sh
@@ -2,10 +2,11 @@
 # SPDX-License-Identifier: GPL-2.0
 
 #exit status
-#1: run as non-root user
+#1: Internal error
 #2: sysfs/debugfs not mount
 #3: insert module fail when gpio-mockup is a module.
-#4: other reason.
+#4: Skip test including run as non-root user.
+#5: other reason.
 
 SYSFS=
 GPIO_SYSFS=
@@ -15,6 +16,9 @@ GPIO_DEBUGFS=
 dev_type=
 module=
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 usage()
 {
 	echo "Usage:"
@@ -34,7 +38,7 @@ prerequisite()
 	msg="skip all tests:"
 	if [ $UID != 0 ]; then
 		echo $msg must be run as root >&2
-		exit 1
+		exit $ksft_skip
 	fi
 	SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
 	if [ ! -d "$SYSFS" ]; then
@@ -73,7 +77,7 @@ remove_module()
 die()
 {
 	remove_module
-	exit 4
+	exit 5
 }
 
 test_chips()
diff --git a/tools/testing/selftests/intel_pstate/aperf.c b/tools/testing/selftests/intel_pstate/aperf.c
index d21edea9c560..f6cd03a87493 100644
--- a/tools/testing/selftests/intel_pstate/aperf.c
+++ b/tools/testing/selftests/intel_pstate/aperf.c
@@ -9,6 +9,8 @@
 #include <sys/timeb.h>
 #include <sched.h>
 #include <errno.h>
+#include <string.h>
+#include "../kselftest.h"
 
 void usage(char *name) {
 	printf ("Usage: %s cpunum\n", name);
@@ -41,8 +43,8 @@ int main(int argc, char **argv) {
 	fd = open(msr_file_name, O_RDONLY);
 
 	if (fd == -1) {
-		perror("Failed to open");
-		return 1;
+		printf("/dev/cpu/%d/msr: %s\n", cpu, strerror(errno));
+		return KSFT_SKIP;
 	}
 
 	CPU_ZERO(&cpuset);
diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh
index c670359becc6..e7008f614ad7 100755
--- a/tools/testing/selftests/intel_pstate/run.sh
+++ b/tools/testing/selftests/intel_pstate/run.sh
@@ -30,9 +30,18 @@
 
 EVALUATE_ONLY=0
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 if ! uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ | grep -q x86; then
 	echo "$0 # Skipped: Test can only run on x86 architectures."
-	exit 0
+	exit $ksft_skip
+fi
+
+msg="skip all tests:"
+if [ $UID != 0 ] && [ $EVALUATE_ONLY == 0 ]; then
+    echo $msg please run this as root >&2
+    exit $ksft_skip
 fi
 
 max_cpus=$(($(nproc)-1))
@@ -48,11 +57,12 @@ function run_test () {
 
 	echo "sleeping for 5 seconds"
 	sleep 5
-	num_freqs=$(cat /proc/cpuinfo | grep MHz | sort -u | wc -l)
-	if [ $num_freqs -le 2 ]; then
-		cat /proc/cpuinfo | grep MHz | sort -u | tail -1 > /tmp/result.$1
+	grep MHz /proc/cpuinfo | sort -u > /tmp/result.freqs
+	num_freqs=$(wc -l /tmp/result.freqs | awk ' { print $1 } ')
+	if [ $num_freqs -ge 2 ]; then
+		tail -n 1 /tmp/result.freqs > /tmp/result.$1
 	else
-		cat /proc/cpuinfo | grep MHz | sort -u > /tmp/result.$1
+		cp /tmp/result.freqs /tmp/result.$1
 	fi
 	./msr 0 >> /tmp/result.$1
 
@@ -82,32 +92,37 @@ _max_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $2 } ')
 max_freq=$(($_max_freq / 1000))
 
 
-for freq in `seq $max_freq -100 $min_freq`
+[ $EVALUATE_ONLY -eq 0 ] && for freq in `seq $max_freq -100 $min_freq`
 do
 	echo "Setting maximum frequency to $freq"
 	cpupower frequency-set -g powersave --max=${freq}MHz >& /dev/null
-	[ $EVALUATE_ONLY -eq 0 ] && run_test $freq
+	run_test $freq
 done
 
-echo "=============================================================================="
+[ $EVALUATE_ONLY -eq 0 ] && cpupower frequency-set -g powersave --max=${max_freq}MHz >& /dev/null
 
+echo "========================================================================"
 echo "The marketing frequency of the cpu is $mkt_freq MHz"
 echo "The maximum frequency of the cpu is $max_freq MHz"
 echo "The minimum frequency of the cpu is $min_freq MHz"
 
-cpupower frequency-set -g powersave --max=${max_freq}MHz >& /dev/null
-
 # make a pretty table
-echo "Target      Actual      Difference     MSR(0x199)     max_perf_pct"
+echo "Target Actual Difference MSR(0x199) max_perf_pct" | tr " " "\n" > /tmp/result.tab
 for freq in `seq $max_freq -100 $min_freq`
 do
 	result_freq=$(cat /tmp/result.${freq} | grep "cpu MHz" | awk ' { print $4 } ' | awk -F "." ' { print $1 } ')
 	msr=$(cat /tmp/result.${freq} | grep "msr" | awk ' { print $3 } ')
 	max_perf_pct=$(cat /tmp/result.${freq} | grep "max_perf_pct" | awk ' { print $2 } ' )
-	if [ $result_freq -eq $freq ]; then
-		echo " $freq        $result_freq             0          $msr         $(($max_perf_pct*3300))"
-	else
-		echo " $freq        $result_freq          $(($result_freq-$freq))          $msr          $(($max_perf_pct*$max_freq))"
-	fi
+	cat >> /tmp/result.tab << EOF
+$freq
+$result_freq
+$((result_freq - freq))
+$msr
+$((max_perf_pct * max_freq))
+EOF
 done
+
+# print the table
+pr -aTt -5 < /tmp/result.tab
+
 exit 0
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
index ee9382bdfadc..dac927e82336 100644
--- a/tools/testing/selftests/ipc/msgque.c
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -196,10 +196,9 @@ int main(int argc, char **argv)
 	int msg, pid, err;
 	struct msgque_data msgque;
 
-	if (getuid() != 0) {
-		printf("Please run the test as root - Exiting.\n");
-		return ksft_exit_fail();
-	}
+	if (getuid() != 0)
+		return ksft_exit_skip(
+				"Please run the test as root - Exiting.\n");
 
 	msgque.key = ftok(argv[0], 822155650);
 	if (msgque.key == -1) {
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh
index 7956ea3be667..0a76314b4414 100755
--- a/tools/testing/selftests/kmod/kmod.sh
+++ b/tools/testing/selftests/kmod/kmod.sh
@@ -62,13 +62,16 @@ ALL_TESTS="$ALL_TESTS 0007:5:1"
 ALL_TESTS="$ALL_TESTS 0008:150:1"
 ALL_TESTS="$ALL_TESTS 0009:150:1"
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 test_modprobe()
 {
        if [ ! -d $DIR ]; then
                echo "$0: $DIR not present" >&2
                echo "You must have the following enabled in your kernel:" >&2
                cat $TEST_DIR/config >&2
-               exit 1
+               exit $ksft_skip
        fi
 }
 
@@ -105,12 +108,12 @@ test_reqs()
 {
 	if ! which modprobe 2> /dev/null > /dev/null; then
 		echo "$0: You need modprobe installed" >&2
-		exit 1
+		exit $ksft_skip
 	fi
 
 	if ! which kmod 2> /dev/null > /dev/null; then
 		echo "$0: You need kmod installed" >&2
-		exit 1
+		exit $ksft_skip
 	fi
 
 	# kmod 19 has a bad bug where it returns 0 when modprobe
@@ -124,13 +127,13 @@ test_reqs()
 		echo "$0: You need at least kmod 20" >&2
 		echo "kmod <= 19 is buggy, for details see:" >&2
 		echo "http://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4" >&2
-		exit 1
+		exit $ksft_skip
 	fi
 
 	uid=$(id -u)
 	if [ $uid -ne 0 ]; then
 		echo $msg must be run as root >&2
-		exit 0
+		exit $ksft_skip
 	fi
 }
 
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 1b9d8ecdebce..a3edb2c8e43d 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -19,8 +19,7 @@
 #define KSFT_FAIL  1
 #define KSFT_XFAIL 2
 #define KSFT_XPASS 3
-/* Treat skip as pass */
-#define KSFT_SKIP  KSFT_PASS
+#define KSFT_SKIP  4
 
 /* counters */
 struct ksft_count {
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
new file mode 100644
index 000000000000..6210ba41c29e
--- /dev/null
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -0,0 +1,8 @@
+/x86_64/cr4_cpuid_sync_test
+/x86_64/evmcs_test
+/x86_64/platform_info_test
+/x86_64/set_sregs_test
+/x86_64/sync_regs_test
+/x86_64/vmx_tsc_adjust_test
+/x86_64/state_test
+/dirty_log_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index d9d00319b07c..01a219229238 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -1,21 +1,31 @@
 all:
 
-top_srcdir = ../../../../
+top_srcdir = ../../../..
 UNAME_M := $(shell uname -m)
 
-LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c
-LIBKVM_x86_64 = lib/x86.c lib/vmx.c
+LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/ucall.c lib/sparsebit.c
+LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c
+LIBKVM_aarch64 = lib/aarch64/processor.c
 
-TEST_GEN_PROGS_x86_64 = set_sregs_test
-TEST_GEN_PROGS_x86_64 += sync_regs_test
-TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86_64 = x86_64/platform_info_test
+TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
+TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86_64 += x86_64/state_test
+TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
+TEST_GEN_PROGS_x86_64 += dirty_log_test
+
+TEST_GEN_PROGS_aarch64 += dirty_log_test
 
 TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
 LIBKVM += $(LIBKVM_$(UNAME_M))
 
 INSTALL_HDR_PATH = $(top_srcdir)/usr
 LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
-CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I..
+LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
+CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I..
+LDFLAGS += -pthread
 
 # After inclusion, $(OUTPUT) is defined and
 # $(TEST_GEN_PROGS) starts with $(OUTPUT)/
@@ -23,7 +33,7 @@ include ../lib.mk
 
 STATIC_LIBS := $(OUTPUT)/libkvm.a
 LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM))
-EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS)
+EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) cscope.*
 
 x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ))))
 $(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
@@ -32,9 +42,15 @@ $(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
 $(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
 	$(AR) crs $@ $^
 
-$(LINUX_HDR_PATH):
-	make -C $(top_srcdir) headers_install
-
-all: $(STATIC_LIBS) $(LINUX_HDR_PATH)
+all: $(STATIC_LIBS)
 $(TEST_GEN_PROGS): $(STATIC_LIBS)
-$(TEST_GEN_PROGS) $(LIBKVM_OBJ): | $(LINUX_HDR_PATH)
+$(STATIC_LIBS):| khdr
+
+cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
+cscope:
+	$(RM) cscope.*
+	(find $(include_paths) -name '*.h' \
+		-exec realpath --relative-base=$(PWD) {} \;; \
+	find . -name '*.c' \
+		-exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files
+	cscope -b
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
new file mode 100644
index 000000000000..aeff95a91b15
--- /dev/null
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -0,0 +1,448 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging test
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define DEBUG printf
+
+#define VCPU_ID				1
+
+/* The memory slot index to track dirty pages */
+#define TEST_MEM_SLOT_INDEX		1
+
+/* Default guest test memory offset, 1G */
+#define DEFAULT_GUEST_TEST_MEM		0x40000000
+
+/* How many pages to dirty for each guest loop */
+#define TEST_PAGES_PER_LOOP		1024
+
+/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */
+#define TEST_HOST_LOOP_N		32UL
+
+/* Interval for each host loop (ms) */
+#define TEST_HOST_LOOP_INTERVAL		10UL
+
+/*
+ * Guest/Host shared variables. Ensure addr_gva2hva() and/or
+ * sync_global_to/from_guest() are used when accessing from
+ * the host. READ/WRITE_ONCE() should also be used with anything
+ * that may change.
+ */
+static uint64_t host_page_size;
+static uint64_t guest_page_size;
+static uint64_t guest_num_pages;
+static uint64_t random_array[TEST_PAGES_PER_LOOP];
+static uint64_t iteration;
+
+/*
+ * GPA offset of the testing memory slot. Must be bigger than
+ * DEFAULT_GUEST_PHY_PAGES.
+ */
+static uint64_t guest_test_mem = DEFAULT_GUEST_TEST_MEM;
+
+/*
+ * Continuously write to the first 8 bytes of a random pages within
+ * the testing memory region.
+ */
+static void guest_code(void)
+{
+	int i;
+
+	while (true) {
+		for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
+			uint64_t addr = guest_test_mem;
+			addr += (READ_ONCE(random_array[i]) % guest_num_pages)
+				* guest_page_size;
+			addr &= ~(host_page_size - 1);
+			*(uint64_t *)addr = READ_ONCE(iteration);
+		}
+
+		/* Tell the host that we need more random numbers */
+		GUEST_SYNC(1);
+	}
+}
+
+/* Host variables */
+static bool host_quit;
+
+/* Points to the test VM memory region on which we track dirty logs */
+static void *host_test_mem;
+static uint64_t host_num_pages;
+
+/* For statistics only */
+static uint64_t host_dirty_count;
+static uint64_t host_clear_count;
+static uint64_t host_track_next_count;
+
+/*
+ * We use this bitmap to track some pages that should have its dirty
+ * bit set in the _next_ iteration.  For example, if we detected the
+ * page value changed to current iteration but at the same time the
+ * page bit is cleared in the latest bitmap, then the system must
+ * report that write in the next get dirty log call.
+ */
+static unsigned long *host_bmap_track;
+
+static void generate_random_array(uint64_t *guest_array, uint64_t size)
+{
+	uint64_t i;
+
+	for (i = 0; i < size; i++)
+		guest_array[i] = random();
+}
+
+static void *vcpu_worker(void *data)
+{
+	int ret;
+	struct kvm_vm *vm = data;
+	uint64_t *guest_array;
+	uint64_t pages_count = 0;
+	struct kvm_run *run;
+	struct ucall uc;
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array);
+	generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
+
+	while (!READ_ONCE(host_quit)) {
+		/* Let the guest dirty the random pages */
+		ret = _vcpu_run(vm, VCPU_ID);
+		if (get_ucall(vm, VCPU_ID, &uc) == UCALL_SYNC) {
+			pages_count += TEST_PAGES_PER_LOOP;
+			generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
+		} else {
+			TEST_ASSERT(false,
+				    "Invalid guest sync status: "
+				    "exit_reason=%s\n",
+				    exit_reason_str(run->exit_reason));
+		}
+	}
+
+	DEBUG("Dirtied %"PRIu64" pages\n", pages_count);
+
+	return NULL;
+}
+
+static void vm_dirty_log_verify(unsigned long *bmap)
+{
+	uint64_t page;
+	uint64_t *value_ptr;
+	uint64_t step = host_page_size >= guest_page_size ? 1 :
+				guest_page_size / host_page_size;
+
+	for (page = 0; page < host_num_pages; page += step) {
+		value_ptr = host_test_mem + page * host_page_size;
+
+		/* If this is a special page that we were tracking... */
+		if (test_and_clear_bit(page, host_bmap_track)) {
+			host_track_next_count++;
+			TEST_ASSERT(test_bit(page, bmap),
+				    "Page %"PRIu64" should have its dirty bit "
+				    "set in this iteration but it is missing",
+				    page);
+		}
+
+		if (test_bit(page, bmap)) {
+			host_dirty_count++;
+			/*
+			 * If the bit is set, the value written onto
+			 * the corresponding page should be either the
+			 * previous iteration number or the current one.
+			 */
+			TEST_ASSERT(*value_ptr == iteration ||
+				    *value_ptr == iteration - 1,
+				    "Set page %"PRIu64" value %"PRIu64
+				    " incorrect (iteration=%"PRIu64")",
+				    page, *value_ptr, iteration);
+		} else {
+			host_clear_count++;
+			/*
+			 * If cleared, the value written can be any
+			 * value smaller or equals to the iteration
+			 * number.  Note that the value can be exactly
+			 * (iteration-1) if that write can happen
+			 * like this:
+			 *
+			 * (1) increase loop count to "iteration-1"
+			 * (2) write to page P happens (with value
+			 *     "iteration-1")
+			 * (3) get dirty log for "iteration-1"; we'll
+			 *     see that page P bit is set (dirtied),
+			 *     and not set the bit in host_bmap_track
+			 * (4) increase loop count to "iteration"
+			 *     (which is current iteration)
+			 * (5) get dirty log for current iteration,
+			 *     we'll see that page P is cleared, with
+			 *     value "iteration-1".
+			 */
+			TEST_ASSERT(*value_ptr <= iteration,
+				    "Clear page %"PRIu64" value %"PRIu64
+				    " incorrect (iteration=%"PRIu64")",
+				    page, *value_ptr, iteration);
+			if (*value_ptr == iteration) {
+				/*
+				 * This page is _just_ modified; it
+				 * should report its dirtyness in the
+				 * next run
+				 */
+				set_bit(page, host_bmap_track);
+			}
+		}
+	}
+}
+
+static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
+				uint64_t extra_mem_pages, void *guest_code)
+{
+	struct kvm_vm *vm;
+	uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
+
+	vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
+	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+#ifdef __x86_64__
+	vm_create_irqchip(vm);
+#endif
+	vm_vcpu_add_default(vm, vcpuid, guest_code);
+	return vm;
+}
+
+static void run_test(enum vm_guest_mode mode, unsigned long iterations,
+		     unsigned long interval, bool top_offset)
+{
+	unsigned int guest_pa_bits, guest_page_shift;
+	pthread_t vcpu_thread;
+	struct kvm_vm *vm;
+	uint64_t max_gfn;
+	unsigned long *bmap;
+
+	switch (mode) {
+	case VM_MODE_P52V48_4K:
+		guest_pa_bits = 52;
+		guest_page_shift = 12;
+		break;
+	case VM_MODE_P52V48_64K:
+		guest_pa_bits = 52;
+		guest_page_shift = 16;
+		break;
+	case VM_MODE_P40V48_4K:
+		guest_pa_bits = 40;
+		guest_page_shift = 12;
+		break;
+	case VM_MODE_P40V48_64K:
+		guest_pa_bits = 40;
+		guest_page_shift = 16;
+		break;
+	default:
+		TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
+	}
+
+	DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+
+	max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1;
+	guest_page_size = (1ul << guest_page_shift);
+	/* 1G of guest page sized pages */
+	guest_num_pages = (1ul << (30 - guest_page_shift));
+	host_page_size = getpagesize();
+	host_num_pages = (guest_num_pages * guest_page_size) / host_page_size +
+			 !!((guest_num_pages * guest_page_size) % host_page_size);
+
+	if (top_offset) {
+		guest_test_mem = (max_gfn - guest_num_pages) * guest_page_size;
+		guest_test_mem &= ~(host_page_size - 1);
+	}
+
+	DEBUG("guest test mem offset: 0x%lx\n", guest_test_mem);
+
+	bmap = bitmap_alloc(host_num_pages);
+	host_bmap_track = bitmap_alloc(host_num_pages);
+
+	vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code);
+
+	/* Add an extra memory slot for testing dirty logging */
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+				    guest_test_mem,
+				    TEST_MEM_SLOT_INDEX,
+				    guest_num_pages,
+				    KVM_MEM_LOG_DIRTY_PAGES);
+
+	/* Do 1:1 mapping for the dirty track memory slot */
+	virt_map(vm, guest_test_mem, guest_test_mem,
+		 guest_num_pages * guest_page_size, 0);
+
+	/* Cache the HVA pointer of the region */
+	host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_mem);
+
+#ifdef __x86_64__
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+#endif
+#ifdef __aarch64__
+	ucall_init(vm, UCALL_MMIO, NULL);
+#endif
+
+	/* Export the shared variables to the guest */
+	sync_global_to_guest(vm, host_page_size);
+	sync_global_to_guest(vm, guest_page_size);
+	sync_global_to_guest(vm, guest_test_mem);
+	sync_global_to_guest(vm, guest_num_pages);
+
+	/* Start the iterations */
+	iteration = 1;
+	sync_global_to_guest(vm, iteration);
+	host_quit = false;
+	host_dirty_count = 0;
+	host_clear_count = 0;
+	host_track_next_count = 0;
+
+	pthread_create(&vcpu_thread, NULL, vcpu_worker, vm);
+
+	while (iteration < iterations) {
+		/* Give the vcpu thread some time to dirty some pages */
+		usleep(interval * 1000);
+		kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+		vm_dirty_log_verify(bmap);
+		iteration++;
+		sync_global_to_guest(vm, iteration);
+	}
+
+	/* Tell the vcpu thread to quit */
+	host_quit = true;
+	pthread_join(vcpu_thread, NULL);
+
+	DEBUG("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
+	      "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
+	      host_track_next_count);
+
+	free(bmap);
+	free(host_bmap_track);
+	ucall_uninit(vm);
+	kvm_vm_free(vm);
+}
+
+static struct vm_guest_modes {
+	enum vm_guest_mode mode;
+	bool supported;
+	bool enabled;
+} vm_guest_modes[NUM_VM_MODES] = {
+#if defined(__x86_64__)
+	{ VM_MODE_P52V48_4K,	1, 1, },
+	{ VM_MODE_P52V48_64K,	0, 0, },
+	{ VM_MODE_P40V48_4K,	0, 0, },
+	{ VM_MODE_P40V48_64K,	0, 0, },
+#elif defined(__aarch64__)
+	{ VM_MODE_P52V48_4K,	0, 0, },
+	{ VM_MODE_P52V48_64K,	0, 0, },
+	{ VM_MODE_P40V48_4K,	1, 1, },
+	{ VM_MODE_P40V48_64K,	1, 1, },
+#endif
+};
+
+static void help(char *name)
+{
+	int i;
+
+	puts("");
+	printf("usage: %s [-h] [-i iterations] [-I interval] "
+	       "[-o offset] [-t] [-m mode]\n", name);
+	puts("");
+	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
+	       TEST_HOST_LOOP_N);
+	printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n",
+	       TEST_HOST_LOOP_INTERVAL);
+	printf(" -o: guest test memory offset (default: 0x%lx)\n",
+	       DEFAULT_GUEST_TEST_MEM);
+	printf(" -t: map guest test memory at the top of the allowed "
+	       "physical address range\n");
+	printf(" -m: specify the guest mode ID to test "
+	       "(default: test all supported modes)\n"
+	       "     This option may be used multiple times.\n"
+	       "     Guest mode IDs:\n");
+	for (i = 0; i < NUM_VM_MODES; ++i) {
+		printf("         %d:    %s%s\n",
+		       vm_guest_modes[i].mode,
+		       vm_guest_mode_string(vm_guest_modes[i].mode),
+		       vm_guest_modes[i].supported ? " (supported)" : "");
+	}
+	puts("");
+	exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+	unsigned long iterations = TEST_HOST_LOOP_N;
+	unsigned long interval = TEST_HOST_LOOP_INTERVAL;
+	bool mode_selected = false;
+	bool top_offset = false;
+	unsigned int mode;
+	int opt, i;
+
+	while ((opt = getopt(argc, argv, "hi:I:o:tm:")) != -1) {
+		switch (opt) {
+		case 'i':
+			iterations = strtol(optarg, NULL, 10);
+			break;
+		case 'I':
+			interval = strtol(optarg, NULL, 10);
+			break;
+		case 'o':
+			guest_test_mem = strtoull(optarg, NULL, 0);
+			break;
+		case 't':
+			top_offset = true;
+			break;
+		case 'm':
+			if (!mode_selected) {
+				for (i = 0; i < NUM_VM_MODES; ++i)
+					vm_guest_modes[i].enabled = 0;
+				mode_selected = true;
+			}
+			mode = strtoul(optarg, NULL, 10);
+			TEST_ASSERT(mode < NUM_VM_MODES,
+				    "Guest mode ID %d too big", mode);
+			vm_guest_modes[mode].enabled = 1;
+			break;
+		case 'h':
+		default:
+			help(argv[0]);
+			break;
+		}
+	}
+
+	TEST_ASSERT(iterations > 2, "Iterations must be greater than two");
+	TEST_ASSERT(interval > 0, "Interval must be greater than zero");
+	TEST_ASSERT(!top_offset || guest_test_mem == DEFAULT_GUEST_TEST_MEM,
+		    "Cannot use both -o [offset] and -t at the same time");
+
+	DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
+	      iterations, interval);
+
+	srandom(time(0));
+
+	for (i = 0; i < NUM_VM_MODES; ++i) {
+		if (!vm_guest_modes[i].enabled)
+			continue;
+		TEST_ASSERT(vm_guest_modes[i].supported,
+			    "Guest mode ID %d (%s) not supported.",
+			    vm_guest_modes[i].mode,
+			    vm_guest_mode_string(vm_guest_modes[i].mode));
+		run_test(vm_guest_modes[i].mode, iterations, interval, top_offset);
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
new file mode 100644
index 000000000000..9ef2ab1a0c08
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AArch64 processor specific defines
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include "kvm_util.h"
+
+
+#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
+			   KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
+
+#define CPACR_EL1	3, 0,  1, 0, 2
+#define TCR_EL1		3, 0,  2, 0, 2
+#define MAIR_EL1	3, 0, 10, 2, 0
+#define TTBR0_EL1	3, 0,  2, 0, 0
+#define SCTLR_EL1	3, 0,  1, 0, 0
+
+/*
+ * Default MAIR
+ *                  index   attribute
+ * DEVICE_nGnRnE      0     0000:0000
+ * DEVICE_nGnRE       1     0000:0100
+ * DEVICE_GRE         2     0000:1100
+ * NORMAL_NC          3     0100:0100
+ * NORMAL             4     1111:1111
+ * NORMAL_WT          5     1011:1011
+ */
+#define DEFAULT_MAIR_EL1 ((0x00ul << (0 * 8)) | \
+			  (0x04ul << (1 * 8)) | \
+			  (0x0cul << (2 * 8)) | \
+			  (0x44ul << (3 * 8)) | \
+			  (0xfful << (4 * 8)) | \
+			  (0xbbul << (5 * 8)))
+
+static inline void get_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t *addr)
+{
+	struct kvm_one_reg reg;
+	reg.id = id;
+	reg.addr = (uint64_t)addr;
+	vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, &reg);
+}
+
+static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t val)
+{
+	struct kvm_one_reg reg;
+	reg.id = id;
+	reg.addr = (uint64_t)&val;
+	vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, &reg);
+}
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/evmcs.h
new file mode 100644
index 000000000000..4059014d93ea
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/evmcs.h
@@ -0,0 +1,1098 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * tools/testing/selftests/kvm/include/vmx.h
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ */
+
+#ifndef SELFTEST_KVM_EVMCS_H
+#define SELFTEST_KVM_EVMCS_H
+
+#include <stdint.h>
+#include "vmx.h"
+
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+
+extern bool enable_evmcs;
+
+struct hv_vp_assist_page {
+	__u32 apic_assist;
+	__u32 reserved;
+	__u64 vtl_control[2];
+	__u64 nested_enlightenments_control[2];
+	__u32 enlighten_vmentry;
+	__u64 current_nested_vmcs;
+};
+
+struct hv_enlightened_vmcs {
+	u32 revision_id;
+	u32 abort;
+
+	u16 host_es_selector;
+	u16 host_cs_selector;
+	u16 host_ss_selector;
+	u16 host_ds_selector;
+	u16 host_fs_selector;
+	u16 host_gs_selector;
+	u16 host_tr_selector;
+
+	u64 host_ia32_pat;
+	u64 host_ia32_efer;
+
+	u64 host_cr0;
+	u64 host_cr3;
+	u64 host_cr4;
+
+	u64 host_ia32_sysenter_esp;
+	u64 host_ia32_sysenter_eip;
+	u64 host_rip;
+	u32 host_ia32_sysenter_cs;
+
+	u32 pin_based_vm_exec_control;
+	u32 vm_exit_controls;
+	u32 secondary_vm_exec_control;
+
+	u64 io_bitmap_a;
+	u64 io_bitmap_b;
+	u64 msr_bitmap;
+
+	u16 guest_es_selector;
+	u16 guest_cs_selector;
+	u16 guest_ss_selector;
+	u16 guest_ds_selector;
+	u16 guest_fs_selector;
+	u16 guest_gs_selector;
+	u16 guest_ldtr_selector;
+	u16 guest_tr_selector;
+
+	u32 guest_es_limit;
+	u32 guest_cs_limit;
+	u32 guest_ss_limit;
+	u32 guest_ds_limit;
+	u32 guest_fs_limit;
+	u32 guest_gs_limit;
+	u32 guest_ldtr_limit;
+	u32 guest_tr_limit;
+	u32 guest_gdtr_limit;
+	u32 guest_idtr_limit;
+
+	u32 guest_es_ar_bytes;
+	u32 guest_cs_ar_bytes;
+	u32 guest_ss_ar_bytes;
+	u32 guest_ds_ar_bytes;
+	u32 guest_fs_ar_bytes;
+	u32 guest_gs_ar_bytes;
+	u32 guest_ldtr_ar_bytes;
+	u32 guest_tr_ar_bytes;
+
+	u64 guest_es_base;
+	u64 guest_cs_base;
+	u64 guest_ss_base;
+	u64 guest_ds_base;
+	u64 guest_fs_base;
+	u64 guest_gs_base;
+	u64 guest_ldtr_base;
+	u64 guest_tr_base;
+	u64 guest_gdtr_base;
+	u64 guest_idtr_base;
+
+	u64 padding64_1[3];
+
+	u64 vm_exit_msr_store_addr;
+	u64 vm_exit_msr_load_addr;
+	u64 vm_entry_msr_load_addr;
+
+	u64 cr3_target_value0;
+	u64 cr3_target_value1;
+	u64 cr3_target_value2;
+	u64 cr3_target_value3;
+
+	u32 page_fault_error_code_mask;
+	u32 page_fault_error_code_match;
+
+	u32 cr3_target_count;
+	u32 vm_exit_msr_store_count;
+	u32 vm_exit_msr_load_count;
+	u32 vm_entry_msr_load_count;
+
+	u64 tsc_offset;
+	u64 virtual_apic_page_addr;
+	u64 vmcs_link_pointer;
+
+	u64 guest_ia32_debugctl;
+	u64 guest_ia32_pat;
+	u64 guest_ia32_efer;
+
+	u64 guest_pdptr0;
+	u64 guest_pdptr1;
+	u64 guest_pdptr2;
+	u64 guest_pdptr3;
+
+	u64 guest_pending_dbg_exceptions;
+	u64 guest_sysenter_esp;
+	u64 guest_sysenter_eip;
+
+	u32 guest_activity_state;
+	u32 guest_sysenter_cs;
+
+	u64 cr0_guest_host_mask;
+	u64 cr4_guest_host_mask;
+	u64 cr0_read_shadow;
+	u64 cr4_read_shadow;
+	u64 guest_cr0;
+	u64 guest_cr3;
+	u64 guest_cr4;
+	u64 guest_dr7;
+
+	u64 host_fs_base;
+	u64 host_gs_base;
+	u64 host_tr_base;
+	u64 host_gdtr_base;
+	u64 host_idtr_base;
+	u64 host_rsp;
+
+	u64 ept_pointer;
+
+	u16 virtual_processor_id;
+	u16 padding16[3];
+
+	u64 padding64_2[5];
+	u64 guest_physical_address;
+
+	u32 vm_instruction_error;
+	u32 vm_exit_reason;
+	u32 vm_exit_intr_info;
+	u32 vm_exit_intr_error_code;
+	u32 idt_vectoring_info_field;
+	u32 idt_vectoring_error_code;
+	u32 vm_exit_instruction_len;
+	u32 vmx_instruction_info;
+
+	u64 exit_qualification;
+	u64 exit_io_instruction_ecx;
+	u64 exit_io_instruction_esi;
+	u64 exit_io_instruction_edi;
+	u64 exit_io_instruction_eip;
+
+	u64 guest_linear_address;
+	u64 guest_rsp;
+	u64 guest_rflags;
+
+	u32 guest_interruptibility_info;
+	u32 cpu_based_vm_exec_control;
+	u32 exception_bitmap;
+	u32 vm_entry_controls;
+	u32 vm_entry_intr_info_field;
+	u32 vm_entry_exception_error_code;
+	u32 vm_entry_instruction_len;
+	u32 tpr_threshold;
+
+	u64 guest_rip;
+
+	u32 hv_clean_fields;
+	u32 hv_padding_32;
+	u32 hv_synthetic_controls;
+	struct {
+		u32 nested_flush_hypercall:1;
+		u32 msr_bitmap:1;
+		u32 reserved:30;
+	} hv_enlightenments_control;
+	u32 hv_vp_id;
+
+	u64 hv_vm_id;
+	u64 partition_assist_page;
+	u64 padding64_4[4];
+	u64 guest_bndcfgs;
+	u64 padding64_5[7];
+	u64 xss_exit_bitmap;
+	u64 padding64_6[7];
+};
+
+#define HV_X64_MSR_VP_ASSIST_PAGE		0x40000073
+#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE	0x00000001
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT	12
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK	\
+		(~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+
+struct hv_enlightened_vmcs *current_evmcs;
+struct hv_vp_assist_page *current_vp_assist;
+
+static inline int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
+{
+	u64 val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
+		HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+	wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val);
+
+	current_vp_assist = vp_assist;
+
+	enable_evmcs = true;
+
+	return 0;
+}
+
+static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs)
+{
+	current_vp_assist->current_nested_vmcs = vmcs_pa;
+	current_vp_assist->enlighten_vmentry = 1;
+
+	current_evmcs = vmcs;
+
+	return 0;
+}
+
+static inline int evmcs_vmptrst(uint64_t *value)
+{
+	*value = current_vp_assist->current_nested_vmcs &
+		~HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+	return 0;
+}
+
+static inline int evmcs_vmread(uint64_t encoding, uint64_t *value)
+{
+	switch (encoding) {
+	case GUEST_RIP:
+		*value = current_evmcs->guest_rip;
+		break;
+	case GUEST_RSP:
+		*value = current_evmcs->guest_rsp;
+		break;
+	case GUEST_RFLAGS:
+		*value = current_evmcs->guest_rflags;
+		break;
+	case HOST_IA32_PAT:
+		*value = current_evmcs->host_ia32_pat;
+		break;
+	case HOST_IA32_EFER:
+		*value = current_evmcs->host_ia32_efer;
+		break;
+	case HOST_CR0:
+		*value = current_evmcs->host_cr0;
+		break;
+	case HOST_CR3:
+		*value = current_evmcs->host_cr3;
+		break;
+	case HOST_CR4:
+		*value = current_evmcs->host_cr4;
+		break;
+	case HOST_IA32_SYSENTER_ESP:
+		*value = current_evmcs->host_ia32_sysenter_esp;
+		break;
+	case HOST_IA32_SYSENTER_EIP:
+		*value = current_evmcs->host_ia32_sysenter_eip;
+		break;
+	case HOST_RIP:
+		*value = current_evmcs->host_rip;
+		break;
+	case IO_BITMAP_A:
+		*value = current_evmcs->io_bitmap_a;
+		break;
+	case IO_BITMAP_B:
+		*value = current_evmcs->io_bitmap_b;
+		break;
+	case MSR_BITMAP:
+		*value = current_evmcs->msr_bitmap;
+		break;
+	case GUEST_ES_BASE:
+		*value = current_evmcs->guest_es_base;
+		break;
+	case GUEST_CS_BASE:
+		*value = current_evmcs->guest_cs_base;
+		break;
+	case GUEST_SS_BASE:
+		*value = current_evmcs->guest_ss_base;
+		break;
+	case GUEST_DS_BASE:
+		*value = current_evmcs->guest_ds_base;
+		break;
+	case GUEST_FS_BASE:
+		*value = current_evmcs->guest_fs_base;
+		break;
+	case GUEST_GS_BASE:
+		*value = current_evmcs->guest_gs_base;
+		break;
+	case GUEST_LDTR_BASE:
+		*value = current_evmcs->guest_ldtr_base;
+		break;
+	case GUEST_TR_BASE:
+		*value = current_evmcs->guest_tr_base;
+		break;
+	case GUEST_GDTR_BASE:
+		*value = current_evmcs->guest_gdtr_base;
+		break;
+	case GUEST_IDTR_BASE:
+		*value = current_evmcs->guest_idtr_base;
+		break;
+	case TSC_OFFSET:
+		*value = current_evmcs->tsc_offset;
+		break;
+	case VIRTUAL_APIC_PAGE_ADDR:
+		*value = current_evmcs->virtual_apic_page_addr;
+		break;
+	case VMCS_LINK_POINTER:
+		*value = current_evmcs->vmcs_link_pointer;
+		break;
+	case GUEST_IA32_DEBUGCTL:
+		*value = current_evmcs->guest_ia32_debugctl;
+		break;
+	case GUEST_IA32_PAT:
+		*value = current_evmcs->guest_ia32_pat;
+		break;
+	case GUEST_IA32_EFER:
+		*value = current_evmcs->guest_ia32_efer;
+		break;
+	case GUEST_PDPTR0:
+		*value = current_evmcs->guest_pdptr0;
+		break;
+	case GUEST_PDPTR1:
+		*value = current_evmcs->guest_pdptr1;
+		break;
+	case GUEST_PDPTR2:
+		*value = current_evmcs->guest_pdptr2;
+		break;
+	case GUEST_PDPTR3:
+		*value = current_evmcs->guest_pdptr3;
+		break;
+	case GUEST_PENDING_DBG_EXCEPTIONS:
+		*value = current_evmcs->guest_pending_dbg_exceptions;
+		break;
+	case GUEST_SYSENTER_ESP:
+		*value = current_evmcs->guest_sysenter_esp;
+		break;
+	case GUEST_SYSENTER_EIP:
+		*value = current_evmcs->guest_sysenter_eip;
+		break;
+	case CR0_GUEST_HOST_MASK:
+		*value = current_evmcs->cr0_guest_host_mask;
+		break;
+	case CR4_GUEST_HOST_MASK:
+		*value = current_evmcs->cr4_guest_host_mask;
+		break;
+	case CR0_READ_SHADOW:
+		*value = current_evmcs->cr0_read_shadow;
+		break;
+	case CR4_READ_SHADOW:
+		*value = current_evmcs->cr4_read_shadow;
+		break;
+	case GUEST_CR0:
+		*value = current_evmcs->guest_cr0;
+		break;
+	case GUEST_CR3:
+		*value = current_evmcs->guest_cr3;
+		break;
+	case GUEST_CR4:
+		*value = current_evmcs->guest_cr4;
+		break;
+	case GUEST_DR7:
+		*value = current_evmcs->guest_dr7;
+		break;
+	case HOST_FS_BASE:
+		*value = current_evmcs->host_fs_base;
+		break;
+	case HOST_GS_BASE:
+		*value = current_evmcs->host_gs_base;
+		break;
+	case HOST_TR_BASE:
+		*value = current_evmcs->host_tr_base;
+		break;
+	case HOST_GDTR_BASE:
+		*value = current_evmcs->host_gdtr_base;
+		break;
+	case HOST_IDTR_BASE:
+		*value = current_evmcs->host_idtr_base;
+		break;
+	case HOST_RSP:
+		*value = current_evmcs->host_rsp;
+		break;
+	case EPT_POINTER:
+		*value = current_evmcs->ept_pointer;
+		break;
+	case GUEST_BNDCFGS:
+		*value = current_evmcs->guest_bndcfgs;
+		break;
+	case XSS_EXIT_BITMAP:
+		*value = current_evmcs->xss_exit_bitmap;
+		break;
+	case GUEST_PHYSICAL_ADDRESS:
+		*value = current_evmcs->guest_physical_address;
+		break;
+	case EXIT_QUALIFICATION:
+		*value = current_evmcs->exit_qualification;
+		break;
+	case GUEST_LINEAR_ADDRESS:
+		*value = current_evmcs->guest_linear_address;
+		break;
+	case VM_EXIT_MSR_STORE_ADDR:
+		*value = current_evmcs->vm_exit_msr_store_addr;
+		break;
+	case VM_EXIT_MSR_LOAD_ADDR:
+		*value = current_evmcs->vm_exit_msr_load_addr;
+		break;
+	case VM_ENTRY_MSR_LOAD_ADDR:
+		*value = current_evmcs->vm_entry_msr_load_addr;
+		break;
+	case CR3_TARGET_VALUE0:
+		*value = current_evmcs->cr3_target_value0;
+		break;
+	case CR3_TARGET_VALUE1:
+		*value = current_evmcs->cr3_target_value1;
+		break;
+	case CR3_TARGET_VALUE2:
+		*value = current_evmcs->cr3_target_value2;
+		break;
+	case CR3_TARGET_VALUE3:
+		*value = current_evmcs->cr3_target_value3;
+		break;
+	case TPR_THRESHOLD:
+		*value = current_evmcs->tpr_threshold;
+		break;
+	case GUEST_INTERRUPTIBILITY_INFO:
+		*value = current_evmcs->guest_interruptibility_info;
+		break;
+	case CPU_BASED_VM_EXEC_CONTROL:
+		*value = current_evmcs->cpu_based_vm_exec_control;
+		break;
+	case EXCEPTION_BITMAP:
+		*value = current_evmcs->exception_bitmap;
+		break;
+	case VM_ENTRY_CONTROLS:
+		*value = current_evmcs->vm_entry_controls;
+		break;
+	case VM_ENTRY_INTR_INFO_FIELD:
+		*value = current_evmcs->vm_entry_intr_info_field;
+		break;
+	case VM_ENTRY_EXCEPTION_ERROR_CODE:
+		*value = current_evmcs->vm_entry_exception_error_code;
+		break;
+	case VM_ENTRY_INSTRUCTION_LEN:
+		*value = current_evmcs->vm_entry_instruction_len;
+		break;
+	case HOST_IA32_SYSENTER_CS:
+		*value = current_evmcs->host_ia32_sysenter_cs;
+		break;
+	case PIN_BASED_VM_EXEC_CONTROL:
+		*value = current_evmcs->pin_based_vm_exec_control;
+		break;
+	case VM_EXIT_CONTROLS:
+		*value = current_evmcs->vm_exit_controls;
+		break;
+	case SECONDARY_VM_EXEC_CONTROL:
+		*value = current_evmcs->secondary_vm_exec_control;
+		break;
+	case GUEST_ES_LIMIT:
+		*value = current_evmcs->guest_es_limit;
+		break;
+	case GUEST_CS_LIMIT:
+		*value = current_evmcs->guest_cs_limit;
+		break;
+	case GUEST_SS_LIMIT:
+		*value = current_evmcs->guest_ss_limit;
+		break;
+	case GUEST_DS_LIMIT:
+		*value = current_evmcs->guest_ds_limit;
+		break;
+	case GUEST_FS_LIMIT:
+		*value = current_evmcs->guest_fs_limit;
+		break;
+	case GUEST_GS_LIMIT:
+		*value = current_evmcs->guest_gs_limit;
+		break;
+	case GUEST_LDTR_LIMIT:
+		*value = current_evmcs->guest_ldtr_limit;
+		break;
+	case GUEST_TR_LIMIT:
+		*value = current_evmcs->guest_tr_limit;
+		break;
+	case GUEST_GDTR_LIMIT:
+		*value = current_evmcs->guest_gdtr_limit;
+		break;
+	case GUEST_IDTR_LIMIT:
+		*value = current_evmcs->guest_idtr_limit;
+		break;
+	case GUEST_ES_AR_BYTES:
+		*value = current_evmcs->guest_es_ar_bytes;
+		break;
+	case GUEST_CS_AR_BYTES:
+		*value = current_evmcs->guest_cs_ar_bytes;
+		break;
+	case GUEST_SS_AR_BYTES:
+		*value = current_evmcs->guest_ss_ar_bytes;
+		break;
+	case GUEST_DS_AR_BYTES:
+		*value = current_evmcs->guest_ds_ar_bytes;
+		break;
+	case GUEST_FS_AR_BYTES:
+		*value = current_evmcs->guest_fs_ar_bytes;
+		break;
+	case GUEST_GS_AR_BYTES:
+		*value = current_evmcs->guest_gs_ar_bytes;
+		break;
+	case GUEST_LDTR_AR_BYTES:
+		*value = current_evmcs->guest_ldtr_ar_bytes;
+		break;
+	case GUEST_TR_AR_BYTES:
+		*value = current_evmcs->guest_tr_ar_bytes;
+		break;
+	case GUEST_ACTIVITY_STATE:
+		*value = current_evmcs->guest_activity_state;
+		break;
+	case GUEST_SYSENTER_CS:
+		*value = current_evmcs->guest_sysenter_cs;
+		break;
+	case VM_INSTRUCTION_ERROR:
+		*value = current_evmcs->vm_instruction_error;
+		break;
+	case VM_EXIT_REASON:
+		*value = current_evmcs->vm_exit_reason;
+		break;
+	case VM_EXIT_INTR_INFO:
+		*value = current_evmcs->vm_exit_intr_info;
+		break;
+	case VM_EXIT_INTR_ERROR_CODE:
+		*value = current_evmcs->vm_exit_intr_error_code;
+		break;
+	case IDT_VECTORING_INFO_FIELD:
+		*value = current_evmcs->idt_vectoring_info_field;
+		break;
+	case IDT_VECTORING_ERROR_CODE:
+		*value = current_evmcs->idt_vectoring_error_code;
+		break;
+	case VM_EXIT_INSTRUCTION_LEN:
+		*value = current_evmcs->vm_exit_instruction_len;
+		break;
+	case VMX_INSTRUCTION_INFO:
+		*value = current_evmcs->vmx_instruction_info;
+		break;
+	case PAGE_FAULT_ERROR_CODE_MASK:
+		*value = current_evmcs->page_fault_error_code_mask;
+		break;
+	case PAGE_FAULT_ERROR_CODE_MATCH:
+		*value = current_evmcs->page_fault_error_code_match;
+		break;
+	case CR3_TARGET_COUNT:
+		*value = current_evmcs->cr3_target_count;
+		break;
+	case VM_EXIT_MSR_STORE_COUNT:
+		*value = current_evmcs->vm_exit_msr_store_count;
+		break;
+	case VM_EXIT_MSR_LOAD_COUNT:
+		*value = current_evmcs->vm_exit_msr_load_count;
+		break;
+	case VM_ENTRY_MSR_LOAD_COUNT:
+		*value = current_evmcs->vm_entry_msr_load_count;
+		break;
+	case HOST_ES_SELECTOR:
+		*value = current_evmcs->host_es_selector;
+		break;
+	case HOST_CS_SELECTOR:
+		*value = current_evmcs->host_cs_selector;
+		break;
+	case HOST_SS_SELECTOR:
+		*value = current_evmcs->host_ss_selector;
+		break;
+	case HOST_DS_SELECTOR:
+		*value = current_evmcs->host_ds_selector;
+		break;
+	case HOST_FS_SELECTOR:
+		*value = current_evmcs->host_fs_selector;
+		break;
+	case HOST_GS_SELECTOR:
+		*value = current_evmcs->host_gs_selector;
+		break;
+	case HOST_TR_SELECTOR:
+		*value = current_evmcs->host_tr_selector;
+		break;
+	case GUEST_ES_SELECTOR:
+		*value = current_evmcs->guest_es_selector;
+		break;
+	case GUEST_CS_SELECTOR:
+		*value = current_evmcs->guest_cs_selector;
+		break;
+	case GUEST_SS_SELECTOR:
+		*value = current_evmcs->guest_ss_selector;
+		break;
+	case GUEST_DS_SELECTOR:
+		*value = current_evmcs->guest_ds_selector;
+		break;
+	case GUEST_FS_SELECTOR:
+		*value = current_evmcs->guest_fs_selector;
+		break;
+	case GUEST_GS_SELECTOR:
+		*value = current_evmcs->guest_gs_selector;
+		break;
+	case GUEST_LDTR_SELECTOR:
+		*value = current_evmcs->guest_ldtr_selector;
+		break;
+	case GUEST_TR_SELECTOR:
+		*value = current_evmcs->guest_tr_selector;
+		break;
+	case VIRTUAL_PROCESSOR_ID:
+		*value = current_evmcs->virtual_processor_id;
+		break;
+	default: return 1;
+	}
+
+	return 0;
+}
+
+static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value)
+{
+	switch (encoding) {
+	case GUEST_RIP:
+		current_evmcs->guest_rip = value;
+		break;
+	case GUEST_RSP:
+		current_evmcs->guest_rsp = value;
+		break;
+	case GUEST_RFLAGS:
+		current_evmcs->guest_rflags = value;
+		break;
+	case HOST_IA32_PAT:
+		current_evmcs->host_ia32_pat = value;
+		break;
+	case HOST_IA32_EFER:
+		current_evmcs->host_ia32_efer = value;
+		break;
+	case HOST_CR0:
+		current_evmcs->host_cr0 = value;
+		break;
+	case HOST_CR3:
+		current_evmcs->host_cr3 = value;
+		break;
+	case HOST_CR4:
+		current_evmcs->host_cr4 = value;
+		break;
+	case HOST_IA32_SYSENTER_ESP:
+		current_evmcs->host_ia32_sysenter_esp = value;
+		break;
+	case HOST_IA32_SYSENTER_EIP:
+		current_evmcs->host_ia32_sysenter_eip = value;
+		break;
+	case HOST_RIP:
+		current_evmcs->host_rip = value;
+		break;
+	case IO_BITMAP_A:
+		current_evmcs->io_bitmap_a = value;
+		break;
+	case IO_BITMAP_B:
+		current_evmcs->io_bitmap_b = value;
+		break;
+	case MSR_BITMAP:
+		current_evmcs->msr_bitmap = value;
+		break;
+	case GUEST_ES_BASE:
+		current_evmcs->guest_es_base = value;
+		break;
+	case GUEST_CS_BASE:
+		current_evmcs->guest_cs_base = value;
+		break;
+	case GUEST_SS_BASE:
+		current_evmcs->guest_ss_base = value;
+		break;
+	case GUEST_DS_BASE:
+		current_evmcs->guest_ds_base = value;
+		break;
+	case GUEST_FS_BASE:
+		current_evmcs->guest_fs_base = value;
+		break;
+	case GUEST_GS_BASE:
+		current_evmcs->guest_gs_base = value;
+		break;
+	case GUEST_LDTR_BASE:
+		current_evmcs->guest_ldtr_base = value;
+		break;
+	case GUEST_TR_BASE:
+		current_evmcs->guest_tr_base = value;
+		break;
+	case GUEST_GDTR_BASE:
+		current_evmcs->guest_gdtr_base = value;
+		break;
+	case GUEST_IDTR_BASE:
+		current_evmcs->guest_idtr_base = value;
+		break;
+	case TSC_OFFSET:
+		current_evmcs->tsc_offset = value;
+		break;
+	case VIRTUAL_APIC_PAGE_ADDR:
+		current_evmcs->virtual_apic_page_addr = value;
+		break;
+	case VMCS_LINK_POINTER:
+		current_evmcs->vmcs_link_pointer = value;
+		break;
+	case GUEST_IA32_DEBUGCTL:
+		current_evmcs->guest_ia32_debugctl = value;
+		break;
+	case GUEST_IA32_PAT:
+		current_evmcs->guest_ia32_pat = value;
+		break;
+	case GUEST_IA32_EFER:
+		current_evmcs->guest_ia32_efer = value;
+		break;
+	case GUEST_PDPTR0:
+		current_evmcs->guest_pdptr0 = value;
+		break;
+	case GUEST_PDPTR1:
+		current_evmcs->guest_pdptr1 = value;
+		break;
+	case GUEST_PDPTR2:
+		current_evmcs->guest_pdptr2 = value;
+		break;
+	case GUEST_PDPTR3:
+		current_evmcs->guest_pdptr3 = value;
+		break;
+	case GUEST_PENDING_DBG_EXCEPTIONS:
+		current_evmcs->guest_pending_dbg_exceptions = value;
+		break;
+	case GUEST_SYSENTER_ESP:
+		current_evmcs->guest_sysenter_esp = value;
+		break;
+	case GUEST_SYSENTER_EIP:
+		current_evmcs->guest_sysenter_eip = value;
+		break;
+	case CR0_GUEST_HOST_MASK:
+		current_evmcs->cr0_guest_host_mask = value;
+		break;
+	case CR4_GUEST_HOST_MASK:
+		current_evmcs->cr4_guest_host_mask = value;
+		break;
+	case CR0_READ_SHADOW:
+		current_evmcs->cr0_read_shadow = value;
+		break;
+	case CR4_READ_SHADOW:
+		current_evmcs->cr4_read_shadow = value;
+		break;
+	case GUEST_CR0:
+		current_evmcs->guest_cr0 = value;
+		break;
+	case GUEST_CR3:
+		current_evmcs->guest_cr3 = value;
+		break;
+	case GUEST_CR4:
+		current_evmcs->guest_cr4 = value;
+		break;
+	case GUEST_DR7:
+		current_evmcs->guest_dr7 = value;
+		break;
+	case HOST_FS_BASE:
+		current_evmcs->host_fs_base = value;
+		break;
+	case HOST_GS_BASE:
+		current_evmcs->host_gs_base = value;
+		break;
+	case HOST_TR_BASE:
+		current_evmcs->host_tr_base = value;
+		break;
+	case HOST_GDTR_BASE:
+		current_evmcs->host_gdtr_base = value;
+		break;
+	case HOST_IDTR_BASE:
+		current_evmcs->host_idtr_base = value;
+		break;
+	case HOST_RSP:
+		current_evmcs->host_rsp = value;
+		break;
+	case EPT_POINTER:
+		current_evmcs->ept_pointer = value;
+		break;
+	case GUEST_BNDCFGS:
+		current_evmcs->guest_bndcfgs = value;
+		break;
+	case XSS_EXIT_BITMAP:
+		current_evmcs->xss_exit_bitmap = value;
+		break;
+	case GUEST_PHYSICAL_ADDRESS:
+		current_evmcs->guest_physical_address = value;
+		break;
+	case EXIT_QUALIFICATION:
+		current_evmcs->exit_qualification = value;
+		break;
+	case GUEST_LINEAR_ADDRESS:
+		current_evmcs->guest_linear_address = value;
+		break;
+	case VM_EXIT_MSR_STORE_ADDR:
+		current_evmcs->vm_exit_msr_store_addr = value;
+		break;
+	case VM_EXIT_MSR_LOAD_ADDR:
+		current_evmcs->vm_exit_msr_load_addr = value;
+		break;
+	case VM_ENTRY_MSR_LOAD_ADDR:
+		current_evmcs->vm_entry_msr_load_addr = value;
+		break;
+	case CR3_TARGET_VALUE0:
+		current_evmcs->cr3_target_value0 = value;
+		break;
+	case CR3_TARGET_VALUE1:
+		current_evmcs->cr3_target_value1 = value;
+		break;
+	case CR3_TARGET_VALUE2:
+		current_evmcs->cr3_target_value2 = value;
+		break;
+	case CR3_TARGET_VALUE3:
+		current_evmcs->cr3_target_value3 = value;
+		break;
+	case TPR_THRESHOLD:
+		current_evmcs->tpr_threshold = value;
+		break;
+	case GUEST_INTERRUPTIBILITY_INFO:
+		current_evmcs->guest_interruptibility_info = value;
+		break;
+	case CPU_BASED_VM_EXEC_CONTROL:
+		current_evmcs->cpu_based_vm_exec_control = value;
+		break;
+	case EXCEPTION_BITMAP:
+		current_evmcs->exception_bitmap = value;
+		break;
+	case VM_ENTRY_CONTROLS:
+		current_evmcs->vm_entry_controls = value;
+		break;
+	case VM_ENTRY_INTR_INFO_FIELD:
+		current_evmcs->vm_entry_intr_info_field = value;
+		break;
+	case VM_ENTRY_EXCEPTION_ERROR_CODE:
+		current_evmcs->vm_entry_exception_error_code = value;
+		break;
+	case VM_ENTRY_INSTRUCTION_LEN:
+		current_evmcs->vm_entry_instruction_len = value;
+		break;
+	case HOST_IA32_SYSENTER_CS:
+		current_evmcs->host_ia32_sysenter_cs = value;
+		break;
+	case PIN_BASED_VM_EXEC_CONTROL:
+		current_evmcs->pin_based_vm_exec_control = value;
+		break;
+	case VM_EXIT_CONTROLS:
+		current_evmcs->vm_exit_controls = value;
+		break;
+	case SECONDARY_VM_EXEC_CONTROL:
+		current_evmcs->secondary_vm_exec_control = value;
+		break;
+	case GUEST_ES_LIMIT:
+		current_evmcs->guest_es_limit = value;
+		break;
+	case GUEST_CS_LIMIT:
+		current_evmcs->guest_cs_limit = value;
+		break;
+	case GUEST_SS_LIMIT:
+		current_evmcs->guest_ss_limit = value;
+		break;
+	case GUEST_DS_LIMIT:
+		current_evmcs->guest_ds_limit = value;
+		break;
+	case GUEST_FS_LIMIT:
+		current_evmcs->guest_fs_limit = value;
+		break;
+	case GUEST_GS_LIMIT:
+		current_evmcs->guest_gs_limit = value;
+		break;
+	case GUEST_LDTR_LIMIT:
+		current_evmcs->guest_ldtr_limit = value;
+		break;
+	case GUEST_TR_LIMIT:
+		current_evmcs->guest_tr_limit = value;
+		break;
+	case GUEST_GDTR_LIMIT:
+		current_evmcs->guest_gdtr_limit = value;
+		break;
+	case GUEST_IDTR_LIMIT:
+		current_evmcs->guest_idtr_limit = value;
+		break;
+	case GUEST_ES_AR_BYTES:
+		current_evmcs->guest_es_ar_bytes = value;
+		break;
+	case GUEST_CS_AR_BYTES:
+		current_evmcs->guest_cs_ar_bytes = value;
+		break;
+	case GUEST_SS_AR_BYTES:
+		current_evmcs->guest_ss_ar_bytes = value;
+		break;
+	case GUEST_DS_AR_BYTES:
+		current_evmcs->guest_ds_ar_bytes = value;
+		break;
+	case GUEST_FS_AR_BYTES:
+		current_evmcs->guest_fs_ar_bytes = value;
+		break;
+	case GUEST_GS_AR_BYTES:
+		current_evmcs->guest_gs_ar_bytes = value;
+		break;
+	case GUEST_LDTR_AR_BYTES:
+		current_evmcs->guest_ldtr_ar_bytes = value;
+		break;
+	case GUEST_TR_AR_BYTES:
+		current_evmcs->guest_tr_ar_bytes = value;
+		break;
+	case GUEST_ACTIVITY_STATE:
+		current_evmcs->guest_activity_state = value;
+		break;
+	case GUEST_SYSENTER_CS:
+		current_evmcs->guest_sysenter_cs = value;
+		break;
+	case VM_INSTRUCTION_ERROR:
+		current_evmcs->vm_instruction_error = value;
+		break;
+	case VM_EXIT_REASON:
+		current_evmcs->vm_exit_reason = value;
+		break;
+	case VM_EXIT_INTR_INFO:
+		current_evmcs->vm_exit_intr_info = value;
+		break;
+	case VM_EXIT_INTR_ERROR_CODE:
+		current_evmcs->vm_exit_intr_error_code = value;
+		break;
+	case IDT_VECTORING_INFO_FIELD:
+		current_evmcs->idt_vectoring_info_field = value;
+		break;
+	case IDT_VECTORING_ERROR_CODE:
+		current_evmcs->idt_vectoring_error_code = value;
+		break;
+	case VM_EXIT_INSTRUCTION_LEN:
+		current_evmcs->vm_exit_instruction_len = value;
+		break;
+	case VMX_INSTRUCTION_INFO:
+		current_evmcs->vmx_instruction_info = value;
+		break;
+	case PAGE_FAULT_ERROR_CODE_MASK:
+		current_evmcs->page_fault_error_code_mask = value;
+		break;
+	case PAGE_FAULT_ERROR_CODE_MATCH:
+		current_evmcs->page_fault_error_code_match = value;
+		break;
+	case CR3_TARGET_COUNT:
+		current_evmcs->cr3_target_count = value;
+		break;
+	case VM_EXIT_MSR_STORE_COUNT:
+		current_evmcs->vm_exit_msr_store_count = value;
+		break;
+	case VM_EXIT_MSR_LOAD_COUNT:
+		current_evmcs->vm_exit_msr_load_count = value;
+		break;
+	case VM_ENTRY_MSR_LOAD_COUNT:
+		current_evmcs->vm_entry_msr_load_count = value;
+		break;
+	case HOST_ES_SELECTOR:
+		current_evmcs->host_es_selector = value;
+		break;
+	case HOST_CS_SELECTOR:
+		current_evmcs->host_cs_selector = value;
+		break;
+	case HOST_SS_SELECTOR:
+		current_evmcs->host_ss_selector = value;
+		break;
+	case HOST_DS_SELECTOR:
+		current_evmcs->host_ds_selector = value;
+		break;
+	case HOST_FS_SELECTOR:
+		current_evmcs->host_fs_selector = value;
+		break;
+	case HOST_GS_SELECTOR:
+		current_evmcs->host_gs_selector = value;
+		break;
+	case HOST_TR_SELECTOR:
+		current_evmcs->host_tr_selector = value;
+		break;
+	case GUEST_ES_SELECTOR:
+		current_evmcs->guest_es_selector = value;
+		break;
+	case GUEST_CS_SELECTOR:
+		current_evmcs->guest_cs_selector = value;
+		break;
+	case GUEST_SS_SELECTOR:
+		current_evmcs->guest_ss_selector = value;
+		break;
+	case GUEST_DS_SELECTOR:
+		current_evmcs->guest_ds_selector = value;
+		break;
+	case GUEST_FS_SELECTOR:
+		current_evmcs->guest_fs_selector = value;
+		break;
+	case GUEST_GS_SELECTOR:
+		current_evmcs->guest_gs_selector = value;
+		break;
+	case GUEST_LDTR_SELECTOR:
+		current_evmcs->guest_ldtr_selector = value;
+		break;
+	case GUEST_TR_SELECTOR:
+		current_evmcs->guest_tr_selector = value;
+		break;
+	case VIRTUAL_PROCESSOR_ID:
+		current_evmcs->virtual_processor_id = value;
+		break;
+	default: return 1;
+	}
+
+	return 0;
+}
+
+static inline int evmcs_vmlaunch(void)
+{
+	int ret;
+
+	current_evmcs->hv_clean_fields = 0;
+
+	__asm__ __volatile__("push %%rbp;"
+			     "push %%rcx;"
+			     "push %%rdx;"
+			     "push %%rsi;"
+			     "push %%rdi;"
+			     "push $0;"
+			     "mov %%rsp, (%[host_rsp]);"
+			     "lea 1f(%%rip), %%rax;"
+			     "mov %%rax, (%[host_rip]);"
+			     "vmlaunch;"
+			     "incq (%%rsp);"
+			     "1: pop %%rax;"
+			     "pop %%rdi;"
+			     "pop %%rsi;"
+			     "pop %%rdx;"
+			     "pop %%rcx;"
+			     "pop %%rbp;"
+			     : [ret]"=&a"(ret)
+			     : [host_rsp]"r"
+			       ((uint64_t)&current_evmcs->host_rsp),
+			       [host_rip]"r"
+			       ((uint64_t)&current_evmcs->host_rip)
+			     : "memory", "cc", "rbx", "r8", "r9", "r10",
+			       "r11", "r12", "r13", "r14", "r15");
+	return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmresume.
+ */
+static inline int evmcs_vmresume(void)
+{
+	int ret;
+
+	current_evmcs->hv_clean_fields = 0;
+
+	__asm__ __volatile__("push %%rbp;"
+			     "push %%rcx;"
+			     "push %%rdx;"
+			     "push %%rsi;"
+			     "push %%rdi;"
+			     "push $0;"
+			     "mov %%rsp, (%[host_rsp]);"
+			     "lea 1f(%%rip), %%rax;"
+			     "mov %%rax, (%[host_rip]);"
+			     "vmresume;"
+			     "incq (%%rsp);"
+			     "1: pop %%rax;"
+			     "pop %%rdi;"
+			     "pop %%rsi;"
+			     "pop %%rdx;"
+			     "pop %%rcx;"
+			     "pop %%rbp;"
+			     : [ret]"=&a"(ret)
+			     : [host_rsp]"r"
+			       ((uint64_t)&current_evmcs->host_rsp),
+			       [host_rip]"r"
+			       ((uint64_t)&current_evmcs->host_rip)
+			     : "memory", "cc", "rbx", "r8", "r9", "r10",
+			       "r11", "r12", "r13", "r14", "r15");
+	return ret;
+}
+
+#endif /* !SELFTEST_KVM_EVMCS_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 637b7017b6ee..a4e59e3b4826 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -7,7 +7,7 @@
  *
  */
 #ifndef SELFTEST_KVM_UTIL_H
-#define SELFTEST_KVM_UTIL_H 1
+#define SELFTEST_KVM_UTIL_H
 
 #include "test_util.h"
 
@@ -17,12 +17,6 @@
 
 #include "sparsebit.h"
 
-/*
- * Memslots can't cover the gfn starting at this gpa otherwise vCPUs can't be
- * created. Only applies to VMs using EPT.
- */
-#define KVM_DEFAULT_IDENTITY_MAP_ADDRESS 0xfffbc000ul
-
 
 /* Callers of kvm_util only have an incomplete/opaque description of the
  * structure kvm_util is using to maintain the state of a VM.
@@ -33,16 +27,23 @@ typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
 typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
 
 /* Minimum allocated guest virtual and physical addresses */
-#define KVM_UTIL_MIN_VADDR 0x2000
+#define KVM_UTIL_MIN_VADDR		0x2000
 
 #define DEFAULT_GUEST_PHY_PAGES		512
 #define DEFAULT_GUEST_STACK_VADDR_MIN	0xab6000
-#define DEFAULT_STACK_PGS               5
+#define DEFAULT_STACK_PGS		5
 
 enum vm_guest_mode {
-	VM_MODE_FLAT48PG,
+	VM_MODE_P52V48_4K,
+	VM_MODE_P52V48_64K,
+	VM_MODE_P40V48_4K,
+	VM_MODE_P40V48_64K,
+	NUM_VM_MODES,
 };
 
+#define vm_guest_mode_string(m) vm_guest_mode_string[m]
+extern const char * const vm_guest_mode_string[];
+
 enum vm_mem_backing_src_type {
 	VM_MEM_SRC_ANONYMOUS,
 	VM_MEM_SRC_ANONYMOUS_THP,
@@ -50,19 +51,23 @@ enum vm_mem_backing_src_type {
 };
 
 int kvm_check_cap(long cap);
+int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
 
 struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
 void kvm_vm_free(struct kvm_vm *vmp);
+void kvm_vm_restart(struct kvm_vm *vmp, int perm);
+void kvm_vm_release(struct kvm_vm *vmp);
+void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
 
-int kvm_memcmp_hva_gva(void *hva,
-	struct kvm_vm *vm, const vm_vaddr_t gva, size_t len);
+int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
+		       size_t len);
 
 void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
-	uint32_t data_memslot, uint32_t pgd_memslot);
+		     uint32_t data_memslot, uint32_t pgd_memslot);
 
 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-void vcpu_dump(FILE *stream, struct kvm_vm *vm,
-	uint32_t vcpuid, uint8_t indent);
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid,
+	       uint8_t indent);
 
 void vm_create_irqchip(struct kvm_vm *vm);
 
@@ -71,13 +76,16 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 	uint64_t guest_paddr, uint32_t slot, uint64_t npages,
 	uint32_t flags);
 
-void vcpu_ioctl(struct kvm_vm *vm,
-	uint32_t vcpuid, unsigned long ioctl, void *arg);
+void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
+		void *arg);
 void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
-void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot,
+		 int gdt_memslot);
 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
-	uint32_t data_memslot, uint32_t pgd_memslot);
+			  uint32_t data_memslot, uint32_t pgd_memslot);
+void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+	      size_t size, uint32_t pgd_memslot);
 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
@@ -87,52 +95,35 @@ struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
 void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
 int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
 void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
-	struct kvm_mp_state *mp_state);
-void vcpu_regs_get(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_regs *regs);
-void vcpu_regs_set(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_regs *regs);
+		       struct kvm_mp_state *mp_state);
+void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
 void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
-void vcpu_sregs_get(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_sregs *sregs);
-void vcpu_sregs_set(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_sregs *sregs);
-int _vcpu_sregs_set(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_sregs *sregs);
+void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid,
+		    struct kvm_sregs *sregs);
+void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
+		    struct kvm_sregs *sregs);
+int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
+		    struct kvm_sregs *sregs);
 void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
-			  struct kvm_vcpu_events *events);
+		     struct kvm_vcpu_events *events);
 void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
-			  struct kvm_vcpu_events *events);
+		     struct kvm_vcpu_events *events);
 
 const char *exit_reason_str(unsigned int exit_reason);
 
 void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
 void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
-	uint32_t pgd_memslot);
-vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
-	vm_paddr_t paddr_min, uint32_t memslot);
-
-struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
-void vcpu_set_cpuid(
-	struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid);
-
-struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_index(uint32_t function, uint32_t index);
-
-static inline struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_entry(uint32_t function)
-{
-	return kvm_get_supported_cpuid_index(function, 0);
-}
-
-struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code);
+		 uint32_t pgd_memslot);
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
+			     uint32_t memslot);
+vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+			      vm_paddr_t paddr_min, uint32_t memslot);
+
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_size,
+				 void *guest_code);
 void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
 
-typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr,
-				 vm_paddr_t vmxon_paddr,
-				 vm_vaddr_t vmcs_vaddr,
-				 vm_paddr_t vmcs_paddr);
-
 struct kvm_userspace_memory_region *
 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
 				 uint64_t end);
@@ -142,4 +133,49 @@ allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
 
 int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
 
+#define sync_global_to_guest(vm, g) ({				\
+	typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g));	\
+	memcpy(_p, &(g), sizeof(g));				\
+})
+
+#define sync_global_from_guest(vm, g) ({			\
+	typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g));	\
+	memcpy(&(g), _p, sizeof(g));				\
+})
+
+/* ucall implementation types */
+typedef enum {
+	UCALL_PIO,
+	UCALL_MMIO,
+} ucall_type_t;
+
+/* Common ucalls */
+enum {
+	UCALL_NONE,
+	UCALL_SYNC,
+	UCALL_ABORT,
+	UCALL_DONE,
+};
+
+#define UCALL_MAX_ARGS 6
+
+struct ucall {
+	uint64_t cmd;
+	uint64_t args[UCALL_MAX_ARGS];
+};
+
+void ucall_init(struct kvm_vm *vm, ucall_type_t type, void *arg);
+void ucall_uninit(struct kvm_vm *vm);
+void ucall(uint64_t cmd, int nargs, ...);
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
+
+#define GUEST_SYNC(stage)	ucall(UCALL_SYNC, 2, "hello", stage)
+#define GUEST_DONE()		ucall(UCALL_DONE, 0)
+#define GUEST_ASSERT(_condition) do {			\
+	if (!(_condition))				\
+		ucall(UCALL_ABORT, 2,			\
+			"Failed guest assert: "		\
+			#_condition, __LINE__);		\
+} while (0)
+
 #endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h
index 54cfeb6568d3..31e030915c1f 100644
--- a/tools/testing/selftests/kvm/include/sparsebit.h
+++ b/tools/testing/selftests/kvm/include/sparsebit.h
@@ -15,8 +15,8 @@
  * even in the case where most bits are set.
  */
 
-#ifndef _TEST_SPARSEBIT_H_
-#define _TEST_SPARSEBIT_H_
+#ifndef SELFTEST_KVM_SPARSEBIT_H
+#define SELFTEST_KVM_SPARSEBIT_H
 
 #include <stdbool.h>
 #include <stdint.h>
@@ -72,4 +72,4 @@ void sparsebit_validate_internal(struct sparsebit *sbit);
 }
 #endif
 
-#endif /* _TEST_SPARSEBIT_H_ */
+#endif /* SELFTEST_KVM_SPARSEBIT_H */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index ac53730b30aa..c7dafe8bd02c 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -7,8 +7,8 @@
  *
  */
 
-#ifndef TEST_UTIL_H
-#define TEST_UTIL_H 1
+#ifndef SELFTEST_KVM_TEST_UTIL_H
+#define SELFTEST_KVM_TEST_UTIL_H
 
 #include <stdlib.h>
 #include <stdarg.h>
@@ -28,8 +28,6 @@ int test_seq_read(const char *path, char **bufp, size_t *sizep);
 void test_assert(bool exp, const char *exp_str,
 		 const char *file, unsigned int line, const char *fmt, ...);
 
-#define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0]))
-
 #define TEST_ASSERT(e, fmt, ...) \
 	test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
 
@@ -43,4 +41,4 @@ void test_assert(bool exp, const char *exp_str,
 		    #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
 } while (0)
 
-#endif /* TEST_UTIL_H */
+#endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 4a5b2c4c1a0f..e2884c2b81ff 100644
--- a/tools/testing/selftests/kvm/include/x86.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -1,5 +1,5 @@
 /*
- * tools/testing/selftests/kvm/include/x86.h
+ * tools/testing/selftests/kvm/include/x86_64/processor.h
  *
  * Copyright (C) 2018, Google LLC.
  *
@@ -7,8 +7,8 @@
  *
  */
 
-#ifndef SELFTEST_KVM_X86_H
-#define SELFTEST_KVM_X86_H
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
 
 #include <assert.h>
 #include <stdint.h>
@@ -59,8 +59,8 @@ enum x86_register {
 struct desc64 {
 	uint16_t limit0;
 	uint16_t base0;
-	unsigned base1:8, type:5, dpl:2, p:1;
-	unsigned limit1:4, zero0:3, g:1, base2:8;
+	unsigned base1:8, s:1, type:4, dpl:2, p:1;
+	unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
 	uint32_t base3;
 	uint32_t zero1;
 } __attribute__((packed));
@@ -303,6 +303,28 @@ static inline unsigned long get_xmm(int n)
 	return 0;
 }
 
+struct kvm_x86_state;
+struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid,
+		     struct kvm_x86_state *state);
+
+struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
+void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
+		    struct kvm_cpuid2 *cpuid);
+
+struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_index(uint32_t function, uint32_t index);
+
+static inline struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_entry(uint32_t function)
+{
+	return kvm_get_supported_cpuid_index(function, 0);
+}
+
+uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
+void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+	  	  uint64_t msr_value);
+
 /*
  * Basic CPU control in CR0
  */
@@ -1040,4 +1062,4 @@ static inline unsigned long get_xmm(int n)
 #define MSR_VM_IGNNE                    0xc0010115
 #define MSR_VM_HSAVE_PA                 0xc0010117
 
-#endif /* !SELFTEST_KVM_X86_H */
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 6ed8499807fd..c9bd935b939c 100644
--- a/tools/testing/selftests/kvm/include/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -1,5 +1,5 @@
 /*
- * tools/testing/selftests/kvm/include/vmx.h
+ * tools/testing/selftests/kvm/include/x86_64/vmx.h
  *
  * Copyright (C) 2018, Google LLC.
  *
@@ -11,7 +11,7 @@
 #define SELFTEST_KVM_VMX_H
 
 #include <stdint.h>
-#include "x86.h"
+#include "processor.h"
 
 #define CPUID_VMX_BIT				5
 
@@ -339,6 +339,8 @@ struct vmx_msr_entry {
 	uint64_t value;
 } __attribute__ ((aligned(16)));
 
+#include "evmcs.h"
+
 static inline int vmxon(uint64_t phys)
 {
 	uint8_t ret;
@@ -372,6 +374,9 @@ static inline int vmptrld(uint64_t vmcs_pa)
 {
 	uint8_t ret;
 
+	if (enable_evmcs)
+		return -1;
+
 	__asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]"
 		: [ret]"=rm"(ret)
 		: [pa]"m"(vmcs_pa)
@@ -380,6 +385,33 @@ static inline int vmptrld(uint64_t vmcs_pa)
 	return ret;
 }
 
+static inline int vmptrst(uint64_t *value)
+{
+	uint64_t tmp;
+	uint8_t ret;
+
+	if (enable_evmcs)
+		return evmcs_vmptrst(value);
+
+	__asm__ __volatile__("vmptrst %[value]; setna %[ret]"
+		: [value]"=m"(tmp), [ret]"=rm"(ret)
+		: : "cc", "memory");
+
+	*value = tmp;
+	return ret;
+}
+
+/*
+ * A wrapper around vmptrst that ignores errors and returns zero if the
+ * vmptrst instruction fails.
+ */
+static inline uint64_t vmptrstz(void)
+{
+	uint64_t value = 0;
+	vmptrst(&value);
+	return value;
+}
+
 /*
  * No guest state (e.g. GPRs) is established by this vmlaunch.
  */
@@ -387,6 +419,9 @@ static inline int vmlaunch(void)
 {
 	int ret;
 
+	if (enable_evmcs)
+		return evmcs_vmlaunch();
+
 	__asm__ __volatile__("push %%rbp;"
 			     "push %%rcx;"
 			     "push %%rdx;"
@@ -419,6 +454,9 @@ static inline int vmresume(void)
 {
 	int ret;
 
+	if (enable_evmcs)
+		return evmcs_vmresume();
+
 	__asm__ __volatile__("push %%rbp;"
 			     "push %%rcx;"
 			     "push %%rdx;"
@@ -444,11 +482,23 @@ static inline int vmresume(void)
 	return ret;
 }
 
+static inline void vmcall(void)
+{
+	/* Currently, L1 destroys our GPRs during vmexits.  */
+	__asm__ __volatile__("push %%rbp; vmcall; pop %%rbp" : : :
+			     "rax", "rbx", "rcx", "rdx",
+			     "rsi", "rdi", "r8", "r9", "r10", "r11", "r12",
+			     "r13", "r14", "r15");
+}
+
 static inline int vmread(uint64_t encoding, uint64_t *value)
 {
 	uint64_t tmp;
 	uint8_t ret;
 
+	if (enable_evmcs)
+		return evmcs_vmread(encoding, value);
+
 	__asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]"
 		: [value]"=rm"(tmp), [ret]"=rm"(ret)
 		: [encoding]"r"(encoding)
@@ -473,6 +523,9 @@ static inline int vmwrite(uint64_t encoding, uint64_t value)
 {
 	uint8_t ret;
 
+	if (enable_evmcs)
+		return evmcs_vmwrite(encoding, value);
+
 	__asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]"
 		: [ret]"=rm"(ret)
 		: [value]"rm"(value), [encoding]"r"(encoding)
@@ -486,9 +539,43 @@ static inline uint32_t vmcs_revision(void)
 	return rdmsr(MSR_IA32_VMX_BASIC);
 }
 
-void prepare_for_vmx_operation(void);
-void prepare_vmcs(void *guest_rip, void *guest_rsp);
-struct kvm_vm *vm_create_default_vmx(uint32_t vcpuid,
-				     vmx_guest_code_t guest_code);
+struct vmx_pages {
+	void *vmxon_hva;
+	uint64_t vmxon_gpa;
+	void *vmxon;
+
+	void *vmcs_hva;
+	uint64_t vmcs_gpa;
+	void *vmcs;
+
+	void *msr_hva;
+	uint64_t msr_gpa;
+	void *msr;
+
+	void *shadow_vmcs_hva;
+	uint64_t shadow_vmcs_gpa;
+	void *shadow_vmcs;
+
+	void *vmread_hva;
+	uint64_t vmread_gpa;
+	void *vmread;
+
+	void *vmwrite_hva;
+	uint64_t vmwrite_gpa;
+	void *vmwrite;
+
+	void *vp_assist_hva;
+	uint64_t vp_assist_gpa;
+	void *vp_assist;
+
+	void *enlightened_vmcs_hva;
+	uint64_t enlightened_vmcs_gpa;
+	void *enlightened_vmcs;
+};
+
+struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
+bool prepare_for_vmx_operation(struct vmx_pages *vmx);
+void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
+bool load_vmcs(struct vmx_pages *vmx);
 
-#endif /* !SELFTEST_KVM_VMX_H */
+#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
new file mode 100644
index 000000000000..b6022e2f116e
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AArch64 code
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
+
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR		0x180000
+#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN	0xac0000
+
+static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
+{
+	return (v + vm->page_size) & ~(vm->page_size - 1);
+}
+
+static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+	uint64_t mask = (1UL << (vm->va_bits - shift)) - 1;
+
+	return (gva >> shift) & mask;
+}
+
+static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift;
+	uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+
+	TEST_ASSERT(vm->pgtable_levels == 4,
+		"Mode %d does not have 4 page table levels", vm->mode);
+
+	return (gva >> shift) & mask;
+}
+
+static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	unsigned int shift = (vm->page_shift - 3) + vm->page_shift;
+	uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+
+	TEST_ASSERT(vm->pgtable_levels >= 3,
+		"Mode %d does not have >= 3 page table levels", vm->mode);
+
+	return (gva >> shift) & mask;
+}
+
+static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+	return (gva >> vm->page_shift) & mask;
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+{
+	uint64_t mask = ((1UL << (vm->va_bits - vm->page_shift)) - 1) << vm->page_shift;
+	return entry & mask;
+}
+
+static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
+{
+	unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+	return 1 << (vm->va_bits - shift);
+}
+
+static uint64_t ptrs_per_pte(struct kvm_vm *vm)
+{
+	return 1 << (vm->page_shift - 3);
+}
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+{
+	int rc;
+
+	if (!vm->pgd_created) {
+		vm_paddr_t paddr = vm_phy_pages_alloc(vm,
+			page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size,
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+		vm->pgd = paddr;
+		vm->pgd_created = true;
+	}
+}
+
+void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+		  uint32_t pgd_memslot, uint64_t flags)
+{
+	uint8_t attr_idx = flags & 7;
+	uint64_t *ptep;
+
+	TEST_ASSERT((vaddr % vm->page_size) == 0,
+		"Virtual address not on page boundary,\n"
+		"  vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
+	TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+		(vaddr >> vm->page_shift)),
+		"Invalid virtual address, vaddr: 0x%lx", vaddr);
+	TEST_ASSERT((paddr % vm->page_size) == 0,
+		"Physical address not on page boundary,\n"
+		"  paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
+	TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+		"Physical address beyond beyond maximum supported,\n"
+		"  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+		paddr, vm->max_gfn, vm->page_size);
+
+	ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
+	if (!*ptep) {
+		*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+		*ptep |= 3;
+	}
+
+	switch (vm->pgtable_levels) {
+	case 4:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
+		if (!*ptep) {
+			*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+			*ptep |= 3;
+		}
+		/* fall through */
+	case 3:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
+		if (!*ptep) {
+			*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+			*ptep |= 3;
+		}
+		/* fall through */
+	case 2:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
+		break;
+	default:
+		TEST_ASSERT(false, "Page table levels must be 2, 3, or 4");
+	}
+
+	*ptep = paddr | 3;
+	*ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */;
+}
+
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+		 uint32_t pgd_memslot)
+{
+	uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */
+
+	_virt_pg_map(vm, vaddr, paddr, pgd_memslot, attr_idx);
+}
+
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	uint64_t *ptep;
+
+	if (!vm->pgd_created)
+		goto unmapped_gva;
+
+	ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
+	if (!ptep)
+		goto unmapped_gva;
+
+	switch (vm->pgtable_levels) {
+	case 4:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
+		if (!ptep)
+			goto unmapped_gva;
+		/* fall through */
+	case 3:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
+		if (!ptep)
+			goto unmapped_gva;
+		/* fall through */
+	case 2:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
+		if (!ptep)
+			goto unmapped_gva;
+		break;
+	default:
+		TEST_ASSERT(false, "Page table levels must be 2, 3, or 4");
+	}
+
+	return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+
+unmapped_gva:
+	TEST_ASSERT(false, "No mapping for vm virtual address, "
+		    "gva: 0x%lx", gva);
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
+{
+#ifdef DEBUG_VM
+	static const char * const type[] = { "", "pud", "pmd", "pte" };
+	uint64_t pte, *ptep;
+
+	if (level == 4)
+		return;
+
+	for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+		ptep = addr_gpa2hva(vm, pte);
+		if (!*ptep)
+			continue;
+		printf("%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep);
+		pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level + 1);
+	}
+#endif
+}
+
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+	int level = 4 - (vm->pgtable_levels - 1);
+	uint64_t pgd, *ptep;
+
+	if (!vm->pgd_created)
+		return;
+
+	for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
+		ptep = addr_gpa2hva(vm, pgd);
+		if (!*ptep)
+			continue;
+		printf("%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep);
+		pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level);
+	}
+}
+
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+				 void *guest_code)
+{
+	uint64_t ptrs_per_4k_pte = 512;
+	uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2;
+	struct kvm_vm *vm;
+
+	vm = vm_create(VM_MODE_P52V48_4K, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
+
+	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+	vm_vcpu_add_default(vm, vcpuid, guest_code);
+
+	return vm;
+}
+
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+{
+	size_t stack_size = vm->page_size == 4096 ?
+					DEFAULT_STACK_PGS * vm->page_size :
+					vm->page_size;
+	uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
+					DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, 0, 0);
+
+	vm_vcpu_add(vm, vcpuid, 0, 0);
+
+	set_reg(vm, vcpuid, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
+	set_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+}
+
+void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
+{
+	struct kvm_vcpu_init init;
+	uint64_t sctlr_el1, tcr_el1;
+
+	memset(&init, 0, sizeof(init));
+	init.target = KVM_ARM_TARGET_GENERIC_V8;
+	vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_INIT, &init);
+
+	/*
+	 * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
+	 * registers, which the variable argument list macros do.
+	 */
+	set_reg(vm, vcpuid, ARM64_SYS_REG(CPACR_EL1), 3 << 20);
+
+	get_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), &sctlr_el1);
+	get_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), &tcr_el1);
+
+	switch (vm->mode) {
+	case VM_MODE_P52V48_4K:
+		tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
+		tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+		break;
+	case VM_MODE_P52V48_64K:
+		tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
+		tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+		break;
+	case VM_MODE_P40V48_4K:
+		tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
+		tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
+		break;
+	case VM_MODE_P40V48_64K:
+		tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
+		tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
+		break;
+	default:
+		TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
+	}
+
+	sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */;
+	/* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
+	tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
+	tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
+
+	set_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), sctlr_el1);
+	set_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), tcr_el1);
+	set_reg(vm, vcpuid, ARM64_SYS_REG(MAIR_EL1), DEFAULT_MAIR_EL1);
+	set_reg(vm, vcpuid, ARM64_SYS_REG(TTBR0_EL1), vm->pgd);
+}
+
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+{
+	uint64_t pstate, pc;
+
+	get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pstate), &pstate);
+	get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), &pc);
+
+        fprintf(stream, "%*spstate: 0x%.16llx pc: 0x%.16llx\n",
+                indent, "", pstate, pc);
+
+}
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
index c9f5b7d4ce38..6398efe67885 100644
--- a/tools/testing/selftests/kvm/lib/assert.c
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -13,6 +13,8 @@
 #include <execinfo.h>
 #include <sys/syscall.h>
 
+#include "kselftest.h"
+
 /* Dumps the current stack trace to stderr. */
 static void __attribute__((noinline)) test_dump_stack(void);
 static void test_dump_stack(void)
@@ -70,8 +72,9 @@ test_assert(bool exp, const char *exp_str,
 
 		fprintf(stderr, "==== Test Assertion Failure ====\n"
 			"  %s:%u: %s\n"
-			"  pid=%d tid=%d\n",
-			file, line, exp_str, getpid(), gettid());
+			"  pid=%d tid=%d - %s\n",
+			file, line, exp_str, getpid(), gettid(),
+			strerror(errno));
 		test_dump_stack();
 		if (fmt) {
 			fputs("  ", stderr);
@@ -80,6 +83,8 @@ test_assert(bool exp, const char *exp_str,
 		}
 		va_end(ap);
 
+		if (errno == EACCES)
+			ksft_exit_skip("Access denied - Exiting.\n");
 		exit(254);
 	}
 
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 37e2a787d2fc..1b41e71283d5 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -14,11 +14,10 @@
 #include <sys/mman.h>
 #include <sys/types.h>
 #include <sys/stat.h>
-
-#define KVM_DEV_PATH "/dev/kvm"
+#include <linux/kernel.h>
 
 #define KVM_UTIL_PGS_PER_HUGEPG 512
-#define KVM_UTIL_MIN_PADDR      0x2000
+#define KVM_UTIL_MIN_PFN	2
 
 /* Aligns x up to the next multiple of size. Size must be a power of 2. */
 static void *align(void *x, size_t size)
@@ -29,7 +28,8 @@ static void *align(void *x, size_t size)
 	return (void *) (((size_t) x + mask) & ~mask);
 }
 
-/* Capability
+/*
+ * Capability
  *
  * Input Args:
  *   cap - Capability
@@ -62,10 +62,52 @@ int kvm_check_cap(long cap)
 	return ret;
 }
 
-/* VM Create
+/* VM Enable Capability
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
+ *
+ * Enables a capability (KVM_CAP_*) on the VM.
+ */
+int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
+{
+	int ret;
+
+	ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap);
+	TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n"
+		"  rc: %i errno: %i", ret, errno);
+
+	return ret;
+}
+
+static void vm_open(struct kvm_vm *vm, int perm)
+{
+	vm->kvm_fd = open(KVM_DEV_PATH, perm);
+	if (vm->kvm_fd < 0)
+		exit(KSFT_SKIP);
+
+	vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, NULL);
+	TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
+		"rc: %i errno: %i", vm->fd, errno);
+}
+
+const char * const vm_guest_mode_string[] = {
+	"PA-bits:52, VA-bits:48, 4K pages",
+	"PA-bits:52, VA-bits:48, 64K pages",
+	"PA-bits:40, VA-bits:48, 4K pages",
+	"PA-bits:40, VA-bits:48, 64K pages",
+};
+
+/*
+ * VM Create
  *
  * Input Args:
- *   mode - VM Mode (e.g. VM_MODE_FLAT48PG)
+ *   mode - VM Mode (e.g. VM_MODE_P52V48_4K)
  *   phy_pages - Physical memory pages
  *   perm - permission
  *
@@ -74,7 +116,7 @@ int kvm_check_cap(long cap)
  * Return:
  *   Pointer to opaque structure that describes the created VM.
  *
- * Creates a VM with the mode specified by mode (e.g. VM_MODE_FLAT48PG).
+ * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
  * When phy_pages is non-zero, a memory region of phy_pages physical pages
  * is created and mapped starting at guest physical address 0.  The file
  * descriptor to control the created VM is created with the permissions
@@ -85,44 +127,56 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 	struct kvm_vm *vm;
 	int kvm_fd;
 
-	/* Allocate memory. */
 	vm = calloc(1, sizeof(*vm));
-	TEST_ASSERT(vm != NULL, "Insufficent Memory");
+	TEST_ASSERT(vm != NULL, "Insufficient Memory");
 
 	vm->mode = mode;
-	kvm_fd = open(KVM_DEV_PATH, perm);
-	if (kvm_fd < 0)
-		exit(KSFT_SKIP);
-
-	/* Create VM. */
-	vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL);
-	TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
-		"rc: %i errno: %i", vm->fd, errno);
-
-	close(kvm_fd);
+	vm_open(vm, perm);
 
 	/* Setup mode specific traits. */
 	switch (vm->mode) {
-	case VM_MODE_FLAT48PG:
+	case VM_MODE_P52V48_4K:
+		vm->pgtable_levels = 4;
 		vm->page_size = 0x1000;
 		vm->page_shift = 12;
-
-		/* Limit to 48-bit canonical virtual addresses. */
-		vm->vpages_valid = sparsebit_alloc();
-		sparsebit_set_num(vm->vpages_valid,
-			0, (1ULL << (48 - 1)) >> vm->page_shift);
-		sparsebit_set_num(vm->vpages_valid,
-			(~((1ULL << (48 - 1)) - 1)) >> vm->page_shift,
-			(1ULL << (48 - 1)) >> vm->page_shift);
-
-		/* Limit physical addresses to 52-bits. */
-		vm->max_gfn = ((1ULL << 52) >> vm->page_shift) - 1;
+		vm->va_bits = 48;
+		break;
+	case VM_MODE_P52V48_64K:
+		vm->pgtable_levels = 3;
+		vm->pa_bits = 52;
+		vm->page_size = 0x10000;
+		vm->page_shift = 16;
+		vm->va_bits = 48;
+		break;
+	case VM_MODE_P40V48_4K:
+		vm->pgtable_levels = 4;
+		vm->pa_bits = 40;
+		vm->va_bits = 48;
+		vm->page_size = 0x1000;
+		vm->page_shift = 12;
+		break;
+	case VM_MODE_P40V48_64K:
+		vm->pgtable_levels = 3;
+		vm->pa_bits = 40;
+		vm->va_bits = 48;
+		vm->page_size = 0x10000;
+		vm->page_shift = 16;
 		break;
-
 	default:
 		TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
 	}
 
+	/* Limit to VA-bit canonical virtual addresses. */
+	vm->vpages_valid = sparsebit_alloc();
+	sparsebit_set_num(vm->vpages_valid,
+		0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+	sparsebit_set_num(vm->vpages_valid,
+		(~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
+		(1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+
+	/* Limit physical addresses to PA-bits. */
+	vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
+
 	/* Allocate and setup memory for guest. */
 	vm->vpages_mapped = sparsebit_alloc();
 	if (phy_pages != 0)
@@ -132,7 +186,53 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 	return vm;
 }
 
-/* Userspace Memory Region Find
+/*
+ * VM Restart
+ *
+ * Input Args:
+ *   vm - VM that has been released before
+ *   perm - permission
+ *
+ * Output Args: None
+ *
+ * Reopens the file descriptors associated to the VM and reinstates the
+ * global state, such as the irqchip and the memory regions that are mapped
+ * into the guest.
+ */
+void kvm_vm_restart(struct kvm_vm *vmp, int perm)
+{
+	struct userspace_mem_region *region;
+
+	vm_open(vmp, perm);
+	if (vmp->has_irqchip)
+		vm_create_irqchip(vmp);
+
+	for (region = vmp->userspace_mem_region_head; region;
+		region = region->next) {
+		int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+		TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+			    "  rc: %i errno: %i\n"
+			    "  slot: %u flags: 0x%x\n"
+			    "  guest_phys_addr: 0x%lx size: 0x%lx",
+			    ret, errno, region->region.slot,
+			    region->region.flags,
+			    region->region.guest_phys_addr,
+			    region->region.memory_size);
+	}
+}
+
+void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
+{
+	struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
+	int ret;
+
+	ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args);
+	TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s",
+		    strerror(-ret));
+}
+
+/*
+ * Userspace Memory Region Find
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -150,8 +250,8 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
  * of the regions is returned.  Null is returned only when no overlapping
  * region exists.
  */
-static struct userspace_mem_region *userspace_mem_region_find(
-	struct kvm_vm *vm, uint64_t start, uint64_t end)
+static struct userspace_mem_region *
+userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
 {
 	struct userspace_mem_region *region;
 
@@ -167,7 +267,8 @@ static struct userspace_mem_region *userspace_mem_region_find(
 	return NULL;
 }
 
-/* KVM Userspace Memory Region Find
+/*
+ * KVM Userspace Memory Region Find
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -195,7 +296,8 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
 	return &region->region;
 }
 
-/* VCPU Find
+/*
+ * VCPU Find
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -210,8 +312,7 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
  * returns a pointer to it.  Returns NULL if the VM doesn't contain a VCPU
  * for the specified vcpuid.
  */
-struct vcpu *vcpu_find(struct kvm_vm *vm,
-	uint32_t vcpuid)
+struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
 {
 	struct vcpu *vcpup;
 
@@ -223,7 +324,8 @@ struct vcpu *vcpu_find(struct kvm_vm *vm,
 	return NULL;
 }
 
-/* VM VCPU Remove
+/*
+ * VM VCPU Remove
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -238,8 +340,12 @@ struct vcpu *vcpu_find(struct kvm_vm *vm,
 static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
 
-	int ret = close(vcpu->fd);
+	ret = munmap(vcpu->state, sizeof(*vcpu->state));
+	TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
+		"errno: %i", ret, errno);
+	close(vcpu->fd);
 	TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
 		"errno: %i", ret, errno);
 
@@ -252,8 +358,24 @@ static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
 	free(vcpu);
 }
 
+void kvm_vm_release(struct kvm_vm *vmp)
+{
+	int ret;
+
+	while (vmp->vcpu_head)
+		vm_vcpu_rm(vmp, vmp->vcpu_head->id);
+
+	ret = close(vmp->fd);
+	TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
+		"  vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
 
-/* Destroys and frees the VM pointed to by vmp.
+	close(vmp->kvm_fd);
+	TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n"
+		"  vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
+}
+
+/*
+ * Destroys and frees the VM pointed to by vmp.
  */
 void kvm_vm_free(struct kvm_vm *vmp)
 {
@@ -282,24 +404,18 @@ void kvm_vm_free(struct kvm_vm *vmp)
 		free(region);
 	}
 
-	/* Free VCPUs. */
-	while (vmp->vcpu_head)
-		vm_vcpu_rm(vmp, vmp->vcpu_head->id);
-
 	/* Free sparsebit arrays. */
 	sparsebit_free(&vmp->vpages_valid);
 	sparsebit_free(&vmp->vpages_mapped);
 
-	/* Close file descriptor for the VM. */
-	ret = close(vmp->fd);
-	TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
-		"  vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
+	kvm_vm_release(vmp);
 
 	/* Free the structure describing the VM. */
 	free(vmp);
 }
 
-/* Memory Compare, host virtual to guest virtual
+/*
+ * Memory Compare, host virtual to guest virtual
  *
  * Input Args:
  *   hva - Starting host virtual address
@@ -321,23 +437,25 @@ void kvm_vm_free(struct kvm_vm *vmp)
  * a length of len, to the guest bytes starting at the guest virtual
  * address given by gva.
  */
-int kvm_memcmp_hva_gva(void *hva,
-	struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
+int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
 {
 	size_t amt;
 
-	/* Compare a batch of bytes until either a match is found
+	/*
+	 * Compare a batch of bytes until either a match is found
 	 * or all the bytes have been compared.
 	 */
 	for (uintptr_t offset = 0; offset < len; offset += amt) {
 		uintptr_t ptr1 = (uintptr_t)hva + offset;
 
-		/* Determine host address for guest virtual address
+		/*
+		 * Determine host address for guest virtual address
 		 * at offset.
 		 */
 		uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
 
-		/* Determine amount to compare on this pass.
+		/*
+		 * Determine amount to compare on this pass.
 		 * Don't allow the comparsion to cross a page boundary.
 		 */
 		amt = len - offset;
@@ -349,7 +467,8 @@ int kvm_memcmp_hva_gva(void *hva,
 		assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
 		assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
 
-		/* Perform the comparison.  If there is a difference
+		/*
+		 * Perform the comparison.  If there is a difference
 		 * return that result to the caller, otherwise need
 		 * to continue on looking for a mismatch.
 		 */
@@ -358,109 +477,15 @@ int kvm_memcmp_hva_gva(void *hva,
 			return ret;
 	}
 
-	/* No mismatch found.  Let the caller know the two memory
+	/*
+	 * No mismatch found.  Let the caller know the two memory
 	 * areas are equal.
 	 */
 	return 0;
 }
 
-/* Allocate an instance of struct kvm_cpuid2
- *
- * Input Args: None
- *
- * Output Args: None
- *
- * Return: A pointer to the allocated struct. The caller is responsible
- * for freeing this struct.
- *
- * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
- * array to be decided at allocation time, allocation is slightly
- * complicated. This function uses a reasonable default length for
- * the array and performs the appropriate allocation.
- */
-static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
-{
-	struct kvm_cpuid2 *cpuid;
-	int nent = 100;
-	size_t size;
-
-	size = sizeof(*cpuid);
-	size += nent * sizeof(struct kvm_cpuid_entry2);
-	cpuid = malloc(size);
-	if (!cpuid) {
-		perror("malloc");
-		abort();
-	}
-
-	cpuid->nent = nent;
-
-	return cpuid;
-}
-
-/* KVM Supported CPUID Get
- *
- * Input Args: None
- *
- * Output Args:
- *
- * Return: The supported KVM CPUID
- *
- * Get the guest CPUID supported by KVM.
- */
-struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
-{
-	static struct kvm_cpuid2 *cpuid;
-	int ret;
-	int kvm_fd;
-
-	if (cpuid)
-		return cpuid;
-
-	cpuid = allocate_kvm_cpuid2();
-	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
-	if (kvm_fd < 0)
-		exit(KSFT_SKIP);
-
-	ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
-	TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
-		    ret, errno);
-
-	close(kvm_fd);
-	return cpuid;
-}
-
-/* Locate a cpuid entry.
- *
- * Input Args:
- *   cpuid: The cpuid.
- *   function: The function of the cpuid entry to find.
- *
- * Output Args: None
- *
- * Return: A pointer to the cpuid entry. Never returns NULL.
- */
-struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
-{
-	struct kvm_cpuid2 *cpuid;
-	struct kvm_cpuid_entry2 *entry = NULL;
-	int i;
-
-	cpuid = kvm_get_supported_cpuid();
-	for (i = 0; i < cpuid->nent; i++) {
-		if (cpuid->entries[i].function == function &&
-		    cpuid->entries[i].index == index) {
-			entry = &cpuid->entries[i];
-			break;
-		}
-	}
-
-	TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
-		    function, index);
-	return entry;
-}
-
-/* VM Userspace Memory Region Add
+/*
+ * VM Userspace Memory Region Add
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -502,7 +527,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 		"  vm->max_gfn: 0x%lx vm->page_size: 0x%x",
 		guest_paddr, npages, vm->max_gfn, vm->page_size);
 
-	/* Confirm a mem region with an overlapping address doesn't
+	/*
+	 * Confirm a mem region with an overlapping address doesn't
 	 * already exist.
 	 */
 	region = (struct userspace_mem_region *) userspace_mem_region_find(
@@ -593,7 +619,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 	vm->userspace_mem_region_head = region;
 }
 
-/* Memslot to region
+/*
+ * Memslot to region
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -607,8 +634,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
  *   on error (e.g. currently no memory region using memslot as a KVM
  *   memory slot ID).
  */
-static struct userspace_mem_region *memslot2region(struct kvm_vm *vm,
-	uint32_t memslot)
+static struct userspace_mem_region *
+memslot2region(struct kvm_vm *vm, uint32_t memslot)
 {
 	struct userspace_mem_region *region;
 
@@ -628,7 +655,8 @@ static struct userspace_mem_region *memslot2region(struct kvm_vm *vm,
 	return region;
 }
 
-/* VM Memory Region Flags Set
+/*
+ * VM Memory Region Flags Set
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -646,7 +674,6 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
 	int ret;
 	struct userspace_mem_region *region;
 
-	/* Locate memory region. */
 	region = memslot2region(vm, slot);
 
 	region->region.flags = flags;
@@ -658,7 +685,8 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
 		ret, errno, slot, flags);
 }
 
-/* VCPU mmap Size
+/*
+ * VCPU mmap Size
  *
  * Input Args: None
  *
@@ -688,7 +716,8 @@ static int vcpu_mmap_sz(void)
 	return ret;
 }
 
-/* VM VCPU Add
+/*
+ * VM VCPU Add
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -701,7 +730,8 @@ static int vcpu_mmap_sz(void)
  * Creates and adds to the VM specified by vm and virtual CPU with
  * the ID given by vcpuid.
  */
-void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot,
+		 int gdt_memslot)
 {
 	struct vcpu *vcpu;
 
@@ -736,10 +766,11 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
 	vcpu->next = vm->vcpu_head;
 	vm->vcpu_head = vcpu;
 
-	vcpu_setup(vm, vcpuid);
+	vcpu_setup(vm, vcpuid, pgd_memslot, gdt_memslot);
 }
 
-/* VM Virtual Address Unused Gap
+/*
+ * VM Virtual Address Unused Gap
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -759,14 +790,14 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
  * sz unallocated bytes >= vaddr_min is available.
  */
 static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
-	vm_vaddr_t vaddr_min)
+				      vm_vaddr_t vaddr_min)
 {
 	uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
 
 	/* Determine lowest permitted virtual page index. */
 	uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
 	if ((pgidx_start * vm->page_size) < vaddr_min)
-			goto no_va_found;
+		goto no_va_found;
 
 	/* Loop over section with enough valid virtual page indexes. */
 	if (!sparsebit_is_set_num(vm->vpages_valid,
@@ -825,7 +856,8 @@ va_found:
 	return pgidx_start * vm->page_size;
 }
 
-/* VM Virtual Address Allocate
+/*
+ * VM Virtual Address Allocate
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -846,13 +878,14 @@ va_found:
  * a page.
  */
 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
-	uint32_t data_memslot, uint32_t pgd_memslot)
+			  uint32_t data_memslot, uint32_t pgd_memslot)
 {
 	uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
 
 	virt_pgd_alloc(vm, pgd_memslot);
 
-	/* Find an unused range of virtual page addresses of at least
+	/*
+	 * Find an unused range of virtual page addresses of at least
 	 * pages in length.
 	 */
 	vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
@@ -862,7 +895,8 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
 		pages--, vaddr += vm->page_size) {
 		vm_paddr_t paddr;
 
-		paddr = vm_phy_page_alloc(vm, KVM_UTIL_MIN_PADDR, data_memslot);
+		paddr = vm_phy_page_alloc(vm,
+				KVM_UTIL_MIN_PFN * vm->page_size, data_memslot);
 
 		virt_pg_map(vm, vaddr, paddr, pgd_memslot);
 
@@ -873,7 +907,41 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
 	return vaddr_start;
 }
 
-/* Address VM Physical to Host Virtual
+/*
+ * Map a range of VM virtual address to the VM's physical address
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vaddr - Virtuall address to map
+ *   paddr - VM Physical Address
+ *   size - The size of the range to map
+ *   pgd_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by vm, creates a virtual translation for the
+ * page range starting at vaddr to the page range starting at paddr.
+ */
+void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+	      size_t size, uint32_t pgd_memslot)
+{
+	size_t page_size = vm->page_size;
+	size_t npages = size / page_size;
+
+	TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow");
+	TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
+
+	while (npages--) {
+		virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+		vaddr += page_size;
+		paddr += page_size;
+	}
+}
+
+/*
+ * Address VM Physical to Host Virtual
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -905,7 +973,8 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
 	return NULL;
 }
 
-/* Address Host Virtual to VM Physical
+/*
+ * Address Host Virtual to VM Physical
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -939,7 +1008,8 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
 	return -1;
 }
 
-/* VM Create IRQ Chip
+/*
+ * VM Create IRQ Chip
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -957,9 +1027,12 @@ void vm_create_irqchip(struct kvm_vm *vm)
 	ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
 	TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
 		"rc: %i errno: %i", ret, errno);
+
+	vm->has_irqchip = true;
 }
 
-/* VM VCPU State
+/*
+ * VM VCPU State
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -981,7 +1054,8 @@ struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
 	return vcpu->state;
 }
 
-/* VM VCPU Run
+/*
+ * VM VCPU Run
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1007,13 +1081,14 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
 	int rc;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-        do {
+	do {
 		rc = ioctl(vcpu->fd, KVM_RUN, NULL);
 	} while (rc == -1 && errno == EINTR);
 	return rc;
 }
 
-/* VM VCPU Set MP State
+/*
+ * VM VCPU Set MP State
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1028,7 +1103,7 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
  * by mp_state.
  */
 void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
-	struct kvm_mp_state *mp_state)
+		       struct kvm_mp_state *mp_state)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
@@ -1040,7 +1115,8 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
 		"rc: %i errno: %i", ret, errno);
 }
 
-/* VM VCPU Regs Get
+/*
+ * VM VCPU Regs Get
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1054,21 +1130,20 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
  * Obtains the current register state for the VCPU specified by vcpuid
  * and stores it at the location given by regs.
  */
-void vcpu_regs_get(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_regs *regs)
+void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 
-	/* Get the regs. */
 	ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
 	TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
 		ret, errno);
 }
 
-/* VM VCPU Regs Set
+/*
+ * VM VCPU Regs Set
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1082,99 +1157,46 @@ void vcpu_regs_get(struct kvm_vm *vm,
  * Sets the regs of the VCPU specified by vcpuid to the values
  * given by regs.
  */
-void vcpu_regs_set(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_regs *regs)
+void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 
-	/* Set the regs. */
 	ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
 	TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
 		ret, errno);
 }
 
 void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
-			  struct kvm_vcpu_events *events)
+		     struct kvm_vcpu_events *events)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 
-	/* Get the regs. */
 	ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
 	TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
 		ret, errno);
 }
 
 void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
-			  struct kvm_vcpu_events *events)
+		     struct kvm_vcpu_events *events)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 
-	/* Set the regs. */
 	ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
 	TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
 		ret, errno);
 }
 
-/* VM VCPU Args Set
- *
- * Input Args:
- *   vm - Virtual Machine
- *   vcpuid - VCPU ID
- *   num - number of arguments
- *   ... - arguments, each of type uint64_t
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the first num function input arguments to the values
- * given as variable args.  Each of the variable args is expected to
- * be of type uint64_t.
- */
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
-{
-	va_list ap;
-	struct kvm_regs regs;
-
-	TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
-		    "  num: %u\n",
-		    num);
-
-	va_start(ap, num);
-	vcpu_regs_get(vm, vcpuid, &regs);
-
-	if (num >= 1)
-		regs.rdi = va_arg(ap, uint64_t);
-
-	if (num >= 2)
-		regs.rsi = va_arg(ap, uint64_t);
-
-	if (num >= 3)
-		regs.rdx = va_arg(ap, uint64_t);
-
-	if (num >= 4)
-		regs.rcx = va_arg(ap, uint64_t);
-
-	if (num >= 5)
-		regs.r8 = va_arg(ap, uint64_t);
-
-	if (num >= 6)
-		regs.r9 = va_arg(ap, uint64_t);
-
-	vcpu_regs_set(vm, vcpuid, &regs);
-	va_end(ap);
-}
-
-/* VM VCPU System Regs Get
+/*
+ * VM VCPU System Regs Get
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1188,22 +1210,20 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
  * Obtains the current system register state for the VCPU specified by
  * vcpuid and stores it at the location given by sregs.
  */
-void vcpu_sregs_get(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_sregs *sregs)
+void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 
-	/* Get the regs. */
-	/* Get the regs. */
 	ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
 	TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
 		ret, errno);
 }
 
-/* VM VCPU System Regs Set
+/*
+ * VM VCPU System Regs Set
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1217,27 +1237,25 @@ void vcpu_sregs_get(struct kvm_vm *vm,
  * Sets the system regs of the VCPU specified by vcpuid to the values
  * given by sregs.
  */
-void vcpu_sregs_set(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_sregs *sregs)
+void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
 {
 	int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
 	TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
 		"rc: %i errno: %i", ret, errno);
 }
 
-int _vcpu_sregs_set(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_sregs *sregs)
+int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 
-	/* Get the regs. */
 	return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
 }
 
-/* VCPU Ioctl
+/*
+ * VCPU Ioctl
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1249,8 +1267,8 @@ int _vcpu_sregs_set(struct kvm_vm *vm,
  *
  * Issues an arbitrary ioctl on a VCPU fd.
  */
-void vcpu_ioctl(struct kvm_vm *vm,
-	uint32_t vcpuid, unsigned long cmd, void *arg)
+void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
+		unsigned long cmd, void *arg)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
@@ -1262,7 +1280,8 @@ void vcpu_ioctl(struct kvm_vm *vm,
 		cmd, ret, errno, strerror(errno));
 }
 
-/* VM Ioctl
+/*
+ * VM Ioctl
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1282,7 +1301,8 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
 		cmd, ret, errno, strerror(errno));
 }
 
-/* VM Dump
+/*
+ * VM Dump
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1329,38 +1349,6 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 		vcpu_dump(stream, vm, vcpu->id, indent + 2);
 }
 
-/* VM VCPU Dump
- *
- * Input Args:
- *   vm - Virtual Machine
- *   vcpuid - VCPU ID
- *   indent - Left margin indent amount
- *
- * Output Args:
- *   stream - Output FILE stream
- *
- * Return: None
- *
- * Dumps the current state of the VCPU specified by vcpuid, within the VM
- * given by vm, to the FILE stream given by stream.
- */
-void vcpu_dump(FILE *stream, struct kvm_vm *vm,
-	uint32_t vcpuid, uint8_t indent)
-{
-		struct kvm_regs regs;
-		struct kvm_sregs sregs;
-
-		fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
-
-		fprintf(stream, "%*sregs:\n", indent + 2, "");
-		vcpu_regs_get(vm, vcpuid, &regs);
-		regs_dump(stream, &regs, indent + 4);
-
-		fprintf(stream, "%*ssregs:\n", indent + 2, "");
-		vcpu_sregs_get(vm, vcpuid, &sregs);
-		sregs_dump(stream, &sregs, indent + 4);
-}
-
 /* Known KVM exit reasons */
 static struct exit_reason {
 	unsigned int reason;
@@ -1391,7 +1379,8 @@ static struct exit_reason {
 #endif
 };
 
-/* Exit Reason String
+/*
+ * Exit Reason String
  *
  * Input Args:
  *   exit_reason - Exit reason
@@ -1417,10 +1406,12 @@ const char *exit_reason_str(unsigned int exit_reason)
 	return "Unknown";
 }
 
-/* Physical Page Allocate
+/*
+ * Physical Contiguous Page Allocator
  *
  * Input Args:
  *   vm - Virtual Machine
+ *   num - number of pages
  *   paddr_min - Physical address minimum
  *   memslot - Memory region to allocate page from
  *
@@ -1429,47 +1420,59 @@ const char *exit_reason_str(unsigned int exit_reason)
  * Return:
  *   Starting physical address
  *
- * Within the VM specified by vm, locates an available physical page
- * at or above paddr_min.  If found, the page is marked as in use
- * and its address is returned.  A TEST_ASSERT failure occurs if no
- * page is available at or above paddr_min.
+ * Within the VM specified by vm, locates a range of available physical
+ * pages at or above paddr_min. If found, the pages are marked as in use
+ * and thier base address is returned. A TEST_ASSERT failure occurs if
+ * not enough pages are available at or above paddr_min.
  */
-vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
-	vm_paddr_t paddr_min, uint32_t memslot)
+vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+			      vm_paddr_t paddr_min, uint32_t memslot)
 {
 	struct userspace_mem_region *region;
-	sparsebit_idx_t pg;
+	sparsebit_idx_t pg, base;
+
+	TEST_ASSERT(num > 0, "Must allocate at least one page");
 
 	TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
 		"not divisible by page size.\n"
 		"  paddr_min: 0x%lx page_size: 0x%x",
 		paddr_min, vm->page_size);
 
-	/* Locate memory region. */
 	region = memslot2region(vm, memslot);
+	base = pg = paddr_min >> vm->page_shift;
 
-	/* Locate next available physical page at or above paddr_min. */
-	pg = paddr_min >> vm->page_shift;
-
-	if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
-		pg = sparsebit_next_set(region->unused_phy_pages, pg);
-		if (pg == 0) {
-			fprintf(stderr, "No guest physical page available, "
-				"paddr_min: 0x%lx page_size: 0x%x memslot: %u",
-				paddr_min, vm->page_size, memslot);
-			fputs("---- vm dump ----\n", stderr);
-			vm_dump(stderr, vm, 2);
-			abort();
+	do {
+		for (; pg < base + num; ++pg) {
+			if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
+				base = pg = sparsebit_next_set(region->unused_phy_pages, pg);
+				break;
+			}
 		}
+	} while (pg && pg != base + num);
+
+	if (pg == 0) {
+		fprintf(stderr, "No guest physical page available, "
+			"paddr_min: 0x%lx page_size: 0x%x memslot: %u\n",
+			paddr_min, vm->page_size, memslot);
+		fputs("---- vm dump ----\n", stderr);
+		vm_dump(stderr, vm, 2);
+		abort();
 	}
 
-	/* Specify page as in use and return its address. */
-	sparsebit_clear(region->unused_phy_pages, pg);
+	for (pg = base; pg < base + num; ++pg)
+		sparsebit_clear(region->unused_phy_pages, pg);
 
-	return pg * vm->page_size;
+	return base * vm->page_size;
 }
 
-/* Address Guest Virtual to Host Virtual
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
+			     uint32_t memslot)
+{
+	return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
+}
+
+/*
+ * Address Guest Virtual to Host Virtual
  *
  * Input Args:
  *   vm - Virtual Machine
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
index a0bd1980c81c..52701db0f253 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -1,28 +1,29 @@
 /*
- * tools/testing/selftests/kvm/lib/kvm_util.c
+ * tools/testing/selftests/kvm/lib/kvm_util_internal.h
  *
  * Copyright (C) 2018, Google LLC.
  *
  * This work is licensed under the terms of the GNU GPL, version 2.
  */
 
-#ifndef KVM_UTIL_INTERNAL_H
-#define KVM_UTIL_INTERNAL_H 1
+#ifndef SELFTEST_KVM_UTIL_INTERNAL_H
+#define SELFTEST_KVM_UTIL_INTERNAL_H
 
 #include "sparsebit.h"
 
+#define KVM_DEV_PATH		"/dev/kvm"
+
 #ifndef BITS_PER_BYTE
-#define BITS_PER_BYTE           8
+#define BITS_PER_BYTE		8
 #endif
 
 #ifndef BITS_PER_LONG
-#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long))
+#define BITS_PER_LONG		(BITS_PER_BYTE * sizeof(long))
 #endif
 
 #define DIV_ROUND_UP(n, d)	(((n) + (d) - 1) / (d))
-#define BITS_TO_LONGS(nr)       DIV_ROUND_UP(nr, BITS_PER_LONG)
+#define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_LONG)
 
-/* Concrete definition of struct kvm_vm. */
 struct userspace_mem_region {
 	struct userspace_mem_region *next, *prev;
 	struct kvm_userspace_memory_region region;
@@ -43,25 +44,30 @@ struct vcpu {
 
 struct kvm_vm {
 	int mode;
+	int kvm_fd;
 	int fd;
+	unsigned int pgtable_levels;
 	unsigned int page_size;
 	unsigned int page_shift;
+	unsigned int pa_bits;
+	unsigned int va_bits;
 	uint64_t max_gfn;
 	struct vcpu *vcpu_head;
 	struct userspace_mem_region *userspace_mem_region_head;
 	struct sparsebit *vpages_valid;
 	struct sparsebit *vpages_mapped;
+	bool has_irqchip;
 	bool pgd_created;
 	vm_paddr_t pgd;
+	vm_vaddr_t gdt;
+	vm_vaddr_t tss;
 };
 
-struct vcpu *vcpu_find(struct kvm_vm *vm,
-	uint32_t vcpuid);
-void vcpu_setup(struct kvm_vm *vm, int vcpuid);
+struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot,
+		int gdt_memslot);
 void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-void regs_dump(FILE *stream, struct kvm_regs *regs,
-	uint8_t indent);
-void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
-	uint8_t indent);
+void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent);
+void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent);
 
-#endif
+#endif /* SELFTEST_KVM_UTIL_INTERNAL_H */
diff --git a/tools/testing/selftests/kvm/lib/ucall.c b/tools/testing/selftests/kvm/lib/ucall.c
new file mode 100644
index 000000000000..4777f9bb5194
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/ucall.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+
+#define UCALL_PIO_PORT ((uint16_t)0x1000)
+
+static ucall_type_t ucall_type;
+static vm_vaddr_t *ucall_exit_mmio_addr;
+
+static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+	if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1))
+		return false;
+
+	virt_pg_map(vm, gpa, gpa, 0);
+
+	ucall_exit_mmio_addr = (vm_vaddr_t *)gpa;
+	sync_global_to_guest(vm, ucall_exit_mmio_addr);
+
+	return true;
+}
+
+void ucall_init(struct kvm_vm *vm, ucall_type_t type, void *arg)
+{
+	ucall_type = type;
+	sync_global_to_guest(vm, ucall_type);
+
+	if (type == UCALL_PIO)
+		return;
+
+	if (type == UCALL_MMIO) {
+		vm_paddr_t gpa, start, end, step;
+		bool ret;
+
+		if (arg) {
+			gpa = (vm_paddr_t)arg;
+			ret = ucall_mmio_init(vm, gpa);
+			TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa);
+			return;
+		}
+
+		/*
+		 * Find an address within the allowed virtual address space,
+		 * that does _not_ have a KVM memory region associated with it.
+		 * Identity mapping an address like this allows the guest to
+		 * access it, but as KVM doesn't know what to do with it, it
+		 * will assume it's something userspace handles and exit with
+		 * KVM_EXIT_MMIO. Well, at least that's how it works for AArch64.
+		 * Here we start with a guess that the addresses around two
+		 * thirds of the VA space are unmapped and then work both down
+		 * and up from there in 1/6 VA space sized steps.
+		 */
+		start = 1ul << (vm->va_bits * 2 / 3);
+		end = 1ul << vm->va_bits;
+		step = 1ul << (vm->va_bits / 6);
+		for (gpa = start; gpa >= 0; gpa -= step) {
+			if (ucall_mmio_init(vm, gpa & ~(vm->page_size - 1)))
+				return;
+		}
+		for (gpa = start + step; gpa < end; gpa += step) {
+			if (ucall_mmio_init(vm, gpa & ~(vm->page_size - 1)))
+				return;
+		}
+		TEST_ASSERT(false, "Can't find a ucall mmio address");
+	}
+}
+
+void ucall_uninit(struct kvm_vm *vm)
+{
+	ucall_type = 0;
+	sync_global_to_guest(vm, ucall_type);
+	ucall_exit_mmio_addr = 0;
+	sync_global_to_guest(vm, ucall_exit_mmio_addr);
+}
+
+static void ucall_pio_exit(struct ucall *uc)
+{
+#ifdef __x86_64__
+	asm volatile("in %[port], %%al"
+		: : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax");
+#endif
+}
+
+static void ucall_mmio_exit(struct ucall *uc)
+{
+	*ucall_exit_mmio_addr = (vm_vaddr_t)uc;
+}
+
+void ucall(uint64_t cmd, int nargs, ...)
+{
+	struct ucall uc = {
+		.cmd = cmd,
+	};
+	va_list va;
+	int i;
+
+	nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
+
+	va_start(va, nargs);
+	for (i = 0; i < nargs; ++i)
+		uc.args[i] = va_arg(va, uint64_t);
+	va_end(va);
+
+	switch (ucall_type) {
+	case UCALL_PIO:
+		ucall_pio_exit(&uc);
+		break;
+	case UCALL_MMIO:
+		ucall_mmio_exit(&uc);
+		break;
+	};
+}
+
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+{
+	struct kvm_run *run = vcpu_state(vm, vcpu_id);
+
+	memset(uc, 0, sizeof(*uc));
+
+#ifdef __x86_64__
+	if (ucall_type == UCALL_PIO && run->exit_reason == KVM_EXIT_IO &&
+	    run->io.port == UCALL_PIO_PORT) {
+		struct kvm_regs regs;
+		vcpu_regs_get(vm, vcpu_id, &regs);
+		memcpy(uc, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi), sizeof(*uc));
+		return uc->cmd;
+	}
+#endif
+	if (ucall_type == UCALL_MMIO && run->exit_reason == KVM_EXIT_MMIO &&
+	    run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
+		vm_vaddr_t gva;
+		TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
+			    "Unexpected ucall exit mmio address access");
+		gva = *(vm_vaddr_t *)run->mmio.data;
+		memcpy(uc, addr_gva2hva(vm, gva), sizeof(*uc));
+	}
+
+	return uc->cmd;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 2f17675f4275..f28127f4a3af 100644
--- a/tools/testing/selftests/kvm/lib/x86.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1,5 +1,5 @@
 /*
- * tools/testing/selftests/kvm/lib/x86.c
+ * tools/testing/selftests/kvm/lib/x86_64/processor.c
  *
  * Copyright (C) 2018, Google LLC.
  *
@@ -10,8 +10,8 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
-#include "kvm_util_internal.h"
-#include "x86.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
 
 /* Minimum physical address used for virtual translation tables. */
 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
@@ -231,7 +231,7 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
 {
 	int rc;
 
-	TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+	TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 
 	/* If needed, create page map l4 table. */
@@ -239,25 +239,6 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
 		vm_paddr_t paddr = vm_phy_page_alloc(vm,
 			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
 		vm->pgd = paddr;
-
-		/* Set pointer to pgd tables in all the VCPUs that
-		 * have already been created.  Future VCPUs will have
-		 * the value set as each one is created.
-		 */
-		for (struct vcpu *vcpu = vm->vcpu_head; vcpu;
-			vcpu = vcpu->next) {
-			struct kvm_sregs sregs;
-
-			/* Obtain the current system register settings */
-			vcpu_sregs_get(vm, vcpu->id, &sregs);
-
-			/* Set and store the pointer to the start of the
-			 * pgd tables.
-			 */
-			sregs.cr3 = vm->pgd;
-			vcpu_sregs_set(vm, vcpu->id, &sregs);
-		}
-
 		vm->pgd_created = true;
 	}
 }
@@ -283,7 +264,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	uint16_t index[4];
 	struct pageMapL4Entry *pml4e;
 
-	TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+	TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 
 	TEST_ASSERT((vaddr % vm->page_size) == 0,
@@ -460,9 +441,32 @@ static void kvm_seg_set_unusable(struct kvm_segment *segp)
 	segp->unusable = true;
 }
 
+static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
+{
+	void *gdt = addr_gva2hva(vm, vm->gdt);
+	struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
+
+	desc->limit0 = segp->limit & 0xFFFF;
+	desc->base0 = segp->base & 0xFFFF;
+	desc->base1 = segp->base >> 16;
+	desc->s = segp->s;
+	desc->type = segp->type;
+	desc->dpl = segp->dpl;
+	desc->p = segp->present;
+	desc->limit1 = segp->limit >> 16;
+	desc->l = segp->l;
+	desc->db = segp->db;
+	desc->g = segp->g;
+	desc->base2 = segp->base >> 24;
+	if (!segp->s)
+		desc->base3 = segp->base >> 32;
+}
+
+
 /* Set Long Mode Flat Kernel Code Segment
  *
  * Input Args:
+ *   vm - VM whose GDT is being filled, or NULL to only write segp
  *   selector - selector value
  *
  * Output Args:
@@ -473,7 +477,7 @@ static void kvm_seg_set_unusable(struct kvm_segment *segp)
  * Sets up the KVM segment pointed to by segp, to be a code segment
  * with the selector value given by selector.
  */
-static void kvm_seg_set_kernel_code_64bit(uint16_t selector,
+static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
 	struct kvm_segment *segp)
 {
 	memset(segp, 0, sizeof(*segp));
@@ -486,11 +490,14 @@ static void kvm_seg_set_kernel_code_64bit(uint16_t selector,
 	segp->g = true;
 	segp->l = true;
 	segp->present = 1;
+	if (vm)
+		kvm_seg_fill_gdt_64bit(vm, segp);
 }
 
 /* Set Long Mode Flat Kernel Data Segment
  *
  * Input Args:
+ *   vm - VM whose GDT is being filled, or NULL to only write segp
  *   selector - selector value
  *
  * Output Args:
@@ -501,7 +508,7 @@ static void kvm_seg_set_kernel_code_64bit(uint16_t selector,
  * Sets up the KVM segment pointed to by segp, to be a data segment
  * with the selector value given by selector.
  */
-static void kvm_seg_set_kernel_data_64bit(uint16_t selector,
+static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
 	struct kvm_segment *segp)
 {
 	memset(segp, 0, sizeof(*segp));
@@ -513,6 +520,8 @@ static void kvm_seg_set_kernel_data_64bit(uint16_t selector,
 					  */
 	segp->g = true;
 	segp->present = true;
+	if (vm)
+		kvm_seg_fill_gdt_64bit(vm, segp);
 }
 
 /* Address Guest Virtual to Guest Physical
@@ -542,7 +551,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
 	struct pageTableEntry *pte;
 	void *hva;
 
-	TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+	TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 
 	index[0] = (gva >> 12) & 0x1ffu;
@@ -575,44 +584,64 @@ unmapped_gva:
 		    "gva: 0x%lx", gva);
 }
 
-void vcpu_setup(struct kvm_vm *vm, int vcpuid)
+static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot,
+			  int pgd_memslot)
+{
+	if (!vm->gdt)
+		vm->gdt = vm_vaddr_alloc(vm, getpagesize(),
+			KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+
+	dt->base = vm->gdt;
+	dt->limit = getpagesize();
+}
+
+static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
+				int selector, int gdt_memslot,
+				int pgd_memslot)
+{
+	if (!vm->tss)
+		vm->tss = vm_vaddr_alloc(vm, getpagesize(),
+			KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+
+	memset(segp, 0, sizeof(*segp));
+	segp->base = vm->tss;
+	segp->limit = 0x67;
+	segp->selector = selector;
+	segp->type = 0xb;
+	segp->present = 1;
+	kvm_seg_fill_gdt_64bit(vm, segp);
+}
+
+void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
 {
 	struct kvm_sregs sregs;
 
 	/* Set mode specific system register values. */
 	vcpu_sregs_get(vm, vcpuid, &sregs);
 
+	sregs.idt.limit = 0;
+
+	kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
+
 	switch (vm->mode) {
-	case VM_MODE_FLAT48PG:
+	case VM_MODE_P52V48_4K:
 		sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
-		sregs.cr4 |= X86_CR4_PAE;
+		sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
 		sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
 
 		kvm_seg_set_unusable(&sregs.ldt);
-		kvm_seg_set_kernel_code_64bit(0x8, &sregs.cs);
-		kvm_seg_set_kernel_data_64bit(0x10, &sregs.ds);
-		kvm_seg_set_kernel_data_64bit(0x10, &sregs.es);
+		kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs);
+		kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds);
+		kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es);
+		kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
 		break;
 
 	default:
 		TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
 	}
-	vcpu_sregs_set(vm, vcpuid, &sregs);
 
-	/* If virtual translation table have been setup, set system register
-	 * to point to the tables.  It's okay if they haven't been setup yet,
-	 * in that the code that sets up the virtual translation tables, will
-	 * go back through any VCPUs that have already been created and set
-	 * their values.
-	 */
-	if (vm->pgd_created) {
-		struct kvm_sregs sregs;
-
-		vcpu_sregs_get(vm, vcpuid, &sregs);
-
-		sregs.cr3 = vm->pgd;
-		vcpu_sregs_set(vm, vcpuid, &sregs);
-	}
+	sregs.cr3 = vm->pgd;
+	vcpu_sregs_set(vm, vcpuid, &sregs);
 }
 /* Adds a vCPU with reasonable defaults (i.e., a stack)
  *
@@ -629,7 +658,7 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 				     DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
 
 	/* Create VCPU */
-	vm_vcpu_add(vm, vcpuid);
+	vm_vcpu_add(vm, vcpuid, 0, 0);
 
 	/* Setup guest general purpose registers */
 	vcpu_regs_get(vm, vcpuid, &regs);
@@ -643,6 +672,102 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 	vcpu_set_mp_state(vm, vcpuid, &mp_state);
 }
 
+/* Allocate an instance of struct kvm_cpuid2
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the allocated struct. The caller is responsible
+ * for freeing this struct.
+ *
+ * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
+ * array to be decided at allocation time, allocation is slightly
+ * complicated. This function uses a reasonable default length for
+ * the array and performs the appropriate allocation.
+ */
+static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
+{
+	struct kvm_cpuid2 *cpuid;
+	int nent = 100;
+	size_t size;
+
+	size = sizeof(*cpuid);
+	size += nent * sizeof(struct kvm_cpuid_entry2);
+	cpuid = malloc(size);
+	if (!cpuid) {
+		perror("malloc");
+		abort();
+	}
+
+	cpuid->nent = nent;
+
+	return cpuid;
+}
+
+/* KVM Supported CPUID Get
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ *
+ * Return: The supported KVM CPUID
+ *
+ * Get the guest CPUID supported by KVM.
+ */
+struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
+{
+	static struct kvm_cpuid2 *cpuid;
+	int ret;
+	int kvm_fd;
+
+	if (cpuid)
+		return cpuid;
+
+	cpuid = allocate_kvm_cpuid2();
+	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+	if (kvm_fd < 0)
+		exit(KSFT_SKIP);
+
+	ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
+	TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
+		    ret, errno);
+
+	close(kvm_fd);
+	return cpuid;
+}
+
+/* Locate a cpuid entry.
+ *
+ * Input Args:
+ *   cpuid: The cpuid.
+ *   function: The function of the cpuid entry to find.
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the cpuid entry. Never returns NULL.
+ */
+struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
+{
+	struct kvm_cpuid2 *cpuid;
+	struct kvm_cpuid_entry2 *entry = NULL;
+	int i;
+
+	cpuid = kvm_get_supported_cpuid();
+	for (i = 0; i < cpuid->nent; i++) {
+		if (cpuid->entries[i].function == function &&
+		    cpuid->entries[i].index == index) {
+			entry = &cpuid->entries[i];
+			break;
+		}
+	}
+
+	TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
+		    function, index);
+	return entry;
+}
+
 /* VM VCPU CPUID Set
  *
  * Input Args:
@@ -669,10 +794,14 @@ void vcpu_set_cpuid(struct kvm_vm *vm,
 		    rc, errno);
 
 }
+
 /* Create a VM with reasonable defaults
  *
  * Input Args:
  *   vcpuid - The id of the single VCPU to add to the VM.
+ *   extra_mem_pages - The size of extra memories to add (this will
+ *                     decide how much extra space we will need to
+ *                     setup the page tables using mem slot 0)
  *   guest_code - The vCPU's entry point
  *
  * Output Args: None
@@ -680,12 +809,23 @@ void vcpu_set_cpuid(struct kvm_vm *vm,
  * Return:
  *   Pointer to opaque structure that describes the created VM.
  */
-struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code)
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+				 void *guest_code)
 {
 	struct kvm_vm *vm;
+	/*
+	 * For x86 the maximum page table size for a memory region
+	 * will be when only 4K pages are used.  In that case the
+	 * total extra size for page tables (for extra N pages) will
+	 * be: N/512+N/512^2+N/512^3+... which is definitely smaller
+	 * than N/512*2.
+	 */
+	uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
 
 	/* Create VM */
-	vm = vm_create(VM_MODE_FLAT48PG, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+	vm = vm_create(VM_MODE_P52V48_4K,
+		       DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
+		       O_RDWR);
 
 	/* Setup guest code */
 	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
@@ -698,3 +838,296 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code)
 
 	return vm;
 }
+
+/* VCPU Get MSR
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   msr_index - Index of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
+ *
+ * Get value of MSR for VCPU.
+ */
+uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	struct {
+		struct kvm_msrs header;
+		struct kvm_msr_entry entry;
+	} buffer = {};
+	int r;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+	buffer.header.nmsrs = 1;
+	buffer.entry.index = msr_index;
+	r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header);
+	TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
+		"  rc: %i errno: %i", r, errno);
+
+	return buffer.entry.data;
+}
+
+/* VCPU Set MSR
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   msr_index - Index of MSR
+ *   msr_value - New value of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, nothing. On failure a TEST_ASSERT is produced.
+ *
+ * Set value of MSR for VCPU.
+ */
+void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+	uint64_t msr_value)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	struct {
+		struct kvm_msrs header;
+		struct kvm_msr_entry entry;
+	} buffer = {};
+	int r;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+	memset(&buffer, 0, sizeof(buffer));
+	buffer.header.nmsrs = 1;
+	buffer.entry.index = msr_index;
+	buffer.entry.data = msr_value;
+	r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
+	TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
+		"  rc: %i errno: %i", r, errno);
+}
+
+/* VM VCPU Args Set
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   num - number of arguments
+ *   ... - arguments, each of type uint64_t
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the first num function input arguments to the values
+ * given as variable args.  Each of the variable args is expected to
+ * be of type uint64_t.
+ */
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+	va_list ap;
+	struct kvm_regs regs;
+
+	TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
+		    "  num: %u\n",
+		    num);
+
+	va_start(ap, num);
+	vcpu_regs_get(vm, vcpuid, &regs);
+
+	if (num >= 1)
+		regs.rdi = va_arg(ap, uint64_t);
+
+	if (num >= 2)
+		regs.rsi = va_arg(ap, uint64_t);
+
+	if (num >= 3)
+		regs.rdx = va_arg(ap, uint64_t);
+
+	if (num >= 4)
+		regs.rcx = va_arg(ap, uint64_t);
+
+	if (num >= 5)
+		regs.r8 = va_arg(ap, uint64_t);
+
+	if (num >= 6)
+		regs.r9 = va_arg(ap, uint64_t);
+
+	vcpu_regs_set(vm, vcpuid, &regs);
+	va_end(ap);
+}
+
+/*
+ * VM VCPU Dump
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   indent - Left margin indent amount
+ *
+ * Output Args:
+ *   stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VCPU specified by vcpuid, within the VM
+ * given by vm, to the FILE stream given by stream.
+ */
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+{
+	struct kvm_regs regs;
+	struct kvm_sregs sregs;
+
+	fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
+
+	fprintf(stream, "%*sregs:\n", indent + 2, "");
+	vcpu_regs_get(vm, vcpuid, &regs);
+	regs_dump(stream, &regs, indent + 4);
+
+	fprintf(stream, "%*ssregs:\n", indent + 2, "");
+	vcpu_sregs_get(vm, vcpuid, &sregs);
+	sregs_dump(stream, &sregs, indent + 4);
+}
+
+struct kvm_x86_state {
+	struct kvm_vcpu_events events;
+	struct kvm_mp_state mp_state;
+	struct kvm_regs regs;
+	struct kvm_xsave xsave;
+	struct kvm_xcrs xcrs;
+	struct kvm_sregs sregs;
+	struct kvm_debugregs debugregs;
+	union {
+		struct kvm_nested_state nested;
+		char nested_[16384];
+	};
+	struct kvm_msrs msrs;
+};
+
+static int kvm_get_num_msrs(struct kvm_vm *vm)
+{
+	struct kvm_msr_list nmsrs;
+	int r;
+
+	nmsrs.nmsrs = 0;
+	r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
+	TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
+		r);
+
+	return nmsrs.nmsrs;
+}
+
+struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	struct kvm_msr_list *list;
+	struct kvm_x86_state *state;
+	int nmsrs, r, i;
+	static int nested_size = -1;
+
+	if (nested_size == -1) {
+		nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE);
+		TEST_ASSERT(nested_size <= sizeof(state->nested_),
+			    "Nested state size too big, %i > %zi",
+			    nested_size, sizeof(state->nested_));
+	}
+
+	nmsrs = kvm_get_num_msrs(vm);
+	list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
+	list->nmsrs = nmsrs;
+	r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
+                r);
+
+	state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0]));
+	r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
+                r);
+
+	if (nested_size) {
+		state->nested.size = sizeof(state->nested_);
+		r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested);
+		TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i",
+			r);
+		TEST_ASSERT(state->nested.size <= nested_size,
+			"Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
+			state->nested.size, nested_size);
+	} else
+		state->nested.size = 0;
+
+	state->msrs.nmsrs = nmsrs;
+	for (i = 0; i < nmsrs; i++)
+		state->msrs.entries[i].index = list->indices[i];
+	r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs);
+        TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed at %x)",
+                r, r == nmsrs ? -1 : list->indices[r]);
+
+	r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
+                r);
+
+	free(list);
+	return state;
+}
+
+void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int r;
+
+	if (state->nested.size) {
+		r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
+		TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
+			r);
+	}
+
+	r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
+        TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
+                r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index);
+
+	r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
+                r);
+}
diff --git a/tools/testing/selftests/kvm/lib/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index 0231bc0aae7b..771ba6bf751c 100644
--- a/tools/testing/selftests/kvm/lib/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -1,5 +1,5 @@
 /*
- * tools/testing/selftests/kvm/lib/x86.c
+ * tools/testing/selftests/kvm/lib/x86_64/vmx.c
  *
  * Copyright (C) 2018, Google LLC.
  *
@@ -10,50 +10,79 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
 #include "vmx.h"
 
-/* Create a default VM for VMX tests.
+bool enable_evmcs;
+
+/* Allocate memory regions for nested VMX tests.
  *
  * Input Args:
- *   vcpuid - The id of the single VCPU to add to the VM.
- *   guest_code - The vCPU's entry point
+ *   vm - The VM to allocate guest-virtual addresses in.
  *
- * Output Args: None
+ * Output Args:
+ *   p_vmx_gva - The guest virtual address for the struct vmx_pages.
  *
  * Return:
- *   Pointer to opaque structure that describes the created VM.
+ *   Pointer to structure with the addresses of the VMX areas.
  */
-struct kvm_vm *
-vm_create_default_vmx(uint32_t vcpuid, vmx_guest_code_t guest_code)
+struct vmx_pages *
+vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
 {
-	struct kvm_cpuid2 *cpuid;
-	struct kvm_vm *vm;
-	vm_vaddr_t vmxon_vaddr;
-	vm_paddr_t vmxon_paddr;
-	vm_vaddr_t vmcs_vaddr;
-	vm_paddr_t vmcs_paddr;
-
-	vm = vm_create_default(vcpuid, (void *) guest_code);
-
-	/* Enable nesting in CPUID */
-	vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
+	vm_vaddr_t vmx_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva);
 
 	/* Setup of a region of guest memory for the vmxon region. */
-	vmxon_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0);
-	vmxon_paddr = addr_gva2gpa(vm, vmxon_vaddr);
+	vmx->vmxon = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon);
+	vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon);
 
 	/* Setup of a region of guest memory for a vmcs. */
-	vmcs_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0);
-	vmcs_paddr = addr_gva2gpa(vm, vmcs_vaddr);
+	vmx->vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs);
+	vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs);
+
+	/* Setup of a region of guest memory for the MSR bitmap. */
+	vmx->msr = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr);
+	vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr);
+	memset(vmx->msr_hva, 0, getpagesize());
+
+	/* Setup of a region of guest memory for the shadow VMCS. */
+	vmx->shadow_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs);
+	vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs);
 
-	vcpu_args_set(vm, vcpuid, 4, vmxon_vaddr, vmxon_paddr, vmcs_vaddr,
-		      vmcs_paddr);
+	/* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */
+	vmx->vmread = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread);
+	vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread);
+	memset(vmx->vmread_hva, 0, getpagesize());
 
-	return vm;
+	vmx->vmwrite = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite);
+	vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
+	memset(vmx->vmwrite_hva, 0, getpagesize());
+
+	/* Setup of a region of guest memory for the VP Assist page. */
+	vmx->vp_assist = (void *)vm_vaddr_alloc(vm, getpagesize(),
+						0x10000, 0, 0);
+	vmx->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)vmx->vp_assist);
+	vmx->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vp_assist);
+
+	/* Setup of a region of guest memory for the enlightened VMCS. */
+	vmx->enlightened_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(),
+						       0x10000, 0, 0);
+	vmx->enlightened_vmcs_hva =
+		addr_gva2hva(vm, (uintptr_t)vmx->enlightened_vmcs);
+	vmx->enlightened_vmcs_gpa =
+		addr_gva2gpa(vm, (uintptr_t)vmx->enlightened_vmcs);
+
+	*p_vmx_gva = vmx_gva;
+	return vmx;
 }
 
-void prepare_for_vmx_operation(void)
+bool prepare_for_vmx_operation(struct vmx_pages *vmx)
 {
 	uint64_t feature_control;
 	uint64_t required;
@@ -88,18 +117,55 @@ void prepare_for_vmx_operation(void)
 	feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
 	if ((feature_control & required) != required)
 		wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required);
+
+	/* Enter VMX root operation. */
+	*(uint32_t *)(vmx->vmxon) = vmcs_revision();
+	if (vmxon(vmx->vmxon_gpa))
+		return false;
+
+	return true;
+}
+
+bool load_vmcs(struct vmx_pages *vmx)
+{
+	if (!enable_evmcs) {
+		/* Load a VMCS. */
+		*(uint32_t *)(vmx->vmcs) = vmcs_revision();
+		if (vmclear(vmx->vmcs_gpa))
+			return false;
+
+		if (vmptrld(vmx->vmcs_gpa))
+			return false;
+
+		/* Setup shadow VMCS, do not load it yet. */
+		*(uint32_t *)(vmx->shadow_vmcs) =
+			vmcs_revision() | 0x80000000ul;
+		if (vmclear(vmx->shadow_vmcs_gpa))
+			return false;
+	} else {
+		if (evmcs_vmptrld(vmx->enlightened_vmcs_gpa,
+				  vmx->enlightened_vmcs))
+			return false;
+		current_evmcs->revision_id = vmcs_revision();
+	}
+
+	return true;
 }
 
 /*
  * Initialize the control fields to the most basic settings possible.
  */
-static inline void init_vmcs_control_fields(void)
+static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
 {
 	vmwrite(VIRTUAL_PROCESSOR_ID, 0);
 	vmwrite(POSTED_INTR_NV, 0);
 
-	vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PINBASED_CTLS));
-	vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PROCBASED_CTLS));
+	vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS));
+	if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, 0))
+		vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+			rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
+	else
+		vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS));
 	vmwrite(EXCEPTION_BITMAP, 0);
 	vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
 	vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */
@@ -113,12 +179,15 @@ static inline void init_vmcs_control_fields(void)
 	vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
 	vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
 	vmwrite(TPR_THRESHOLD, 0);
-	vmwrite(SECONDARY_VM_EXEC_CONTROL, 0);
 
 	vmwrite(CR0_GUEST_HOST_MASK, 0);
 	vmwrite(CR4_GUEST_HOST_MASK, 0);
 	vmwrite(CR0_READ_SHADOW, get_cr0());
 	vmwrite(CR4_READ_SHADOW, get_cr4());
+
+	vmwrite(MSR_BITMAP, vmx->msr_gpa);
+	vmwrite(VMREAD_BITMAP, vmx->vmread_gpa);
+	vmwrite(VMWRITE_BITMAP, vmx->vmwrite_gpa);
 }
 
 /*
@@ -235,9 +304,9 @@ static inline void init_vmcs_guest_state(void *rip, void *rsp)
 	vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP));
 }
 
-void prepare_vmcs(void *guest_rip, void *guest_rsp)
+void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
 {
-	init_vmcs_control_fields();
+	init_vmcs_control_fields(vmx);
 	init_vmcs_host_state();
 	init_vmcs_guest_state(guest_rip, guest_rsp);
 }
diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
new file mode 100644
index 000000000000..d503a51fad30
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CR4 and CPUID sync test
+ *
+ * Copyright 2018, Red Hat, Inc. and/or its affiliates.
+ *
+ * Author:
+ *   Wei Huang <wei@redhat.com>
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define X86_FEATURE_XSAVE	(1<<26)
+#define X86_FEATURE_OSXSAVE	(1<<27)
+#define VCPU_ID			1
+
+static inline bool cr4_cpuid_is_sync(void)
+{
+	int func, subfunc;
+	uint32_t eax, ebx, ecx, edx;
+	uint64_t cr4;
+
+	func = 0x1;
+	subfunc = 0x0;
+	__asm__ __volatile__("cpuid"
+			     : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
+			     : "a"(func), "c"(subfunc));
+
+	cr4 = get_cr4();
+
+	return (!!(ecx & X86_FEATURE_OSXSAVE)) == (!!(cr4 & X86_CR4_OSXSAVE));
+}
+
+static void guest_code(void)
+{
+	uint64_t cr4;
+
+	/* turn on CR4.OSXSAVE */
+	cr4 = get_cr4();
+	cr4 |= X86_CR4_OSXSAVE;
+	set_cr4(cr4);
+
+	/* verify CR4.OSXSAVE == CPUID.OSXSAVE */
+	GUEST_ASSERT(cr4_cpuid_is_sync());
+
+	/* notify hypervisor to change CR4 */
+	GUEST_SYNC(0);
+
+	/* check again */
+	GUEST_ASSERT(cr4_cpuid_is_sync());
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_run *run;
+	struct kvm_vm *vm;
+	struct kvm_sregs sregs;
+	struct kvm_cpuid_entry2 *entry;
+	struct ucall uc;
+	int rc;
+
+	entry = kvm_get_supported_cpuid_entry(1);
+	if (!(entry->ecx & X86_FEATURE_XSAVE)) {
+		printf("XSAVE feature not supported, skipping test\n");
+		return 0;
+	}
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+	run = vcpu_state(vm, VCPU_ID);
+
+	while (1) {
+		rc = _vcpu_run(vm, VCPU_ID);
+
+		if (run->exit_reason == KVM_EXIT_IO) {
+			switch (get_ucall(vm, VCPU_ID, &uc)) {
+			case UCALL_SYNC:
+				/* emulate hypervisor clearing CR4.OSXSAVE */
+				vcpu_sregs_get(vm, VCPU_ID, &sregs);
+				sregs.cr4 &= ~X86_CR4_OSXSAVE;
+				vcpu_sregs_set(vm, VCPU_ID, &sregs);
+				break;
+			case UCALL_ABORT:
+				TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
+				break;
+			case UCALL_DONE:
+				goto done;
+			default:
+				TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+			}
+		}
+	}
+
+	kvm_vm_free(vm);
+
+done:
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
new file mode 100644
index 000000000000..92c2cfd1b182
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for Enlightened VMCS, including nested guest state.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+
+#include "vmx.h"
+
+#define VCPU_ID		5
+
+static bool have_nested_state;
+
+void l2_guest_code(void)
+{
+	GUEST_SYNC(6);
+
+	GUEST_SYNC(7);
+
+	/* Done, exit to L1 and never come back.  */
+	vmcall();
+}
+
+void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+	enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist);
+
+	GUEST_ASSERT(vmx_pages->vmcs_gpa);
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_SYNC(3);
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+
+	GUEST_SYNC(4);
+	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_SYNC(5);
+	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+	GUEST_SYNC(8);
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	GUEST_SYNC(9);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+	GUEST_SYNC(1);
+	GUEST_SYNC(2);
+
+	if (vmx_pages)
+		l1_guest_code(vmx_pages);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	struct vmx_pages *vmx_pages = NULL;
+	vm_vaddr_t vmx_pages_gva = 0;
+
+	struct kvm_regs regs1, regs2;
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_x86_state *state;
+	struct ucall uc;
+	int stage;
+	uint16_t evmcs_ver;
+	struct kvm_enable_cap enable_evmcs_cap = {
+		.cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
+		 .args[0] = (unsigned long)&evmcs_ver
+	};
+
+	struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	if (!kvm_check_cap(KVM_CAP_NESTED_STATE) ||
+	    !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+		printf("capabilities not available, skipping test\n");
+		exit(KSFT_SKIP);
+	}
+
+	vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap);
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+	vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+
+	for (stage = 1;; stage++) {
+		_vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Unexpected exit reason: %u (%s),\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		memset(&regs1, 0, sizeof(regs1));
+		vcpu_regs_get(vm, VCPU_ID, &regs1);
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
+				    __FILE__, uc.args[1]);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+		}
+
+		/* UCALL_SYNC is handled here.  */
+		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+			    uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx",
+			    stage, (ulong)uc.args[1]);
+
+		state = vcpu_save_state(vm, VCPU_ID);
+		kvm_vm_release(vm);
+
+		/* Restore state in a new VM.  */
+		kvm_vm_restart(vm, O_RDWR);
+		vm_vcpu_add(vm, VCPU_ID, 0, 0);
+		vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+		vcpu_load_state(vm, VCPU_ID, state);
+		run = vcpu_state(vm, VCPU_ID);
+		free(state);
+
+		memset(&regs2, 0, sizeof(regs2));
+		vcpu_regs_get(vm, VCPU_ID, &regs2);
+		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+			    (ulong) regs2.rdi, (ulong) regs2.rsi);
+	}
+
+done:
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
new file mode 100644
index 000000000000..eb3e7a838cb4
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_CAP_MSR_PLATFORM_INFO
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies expected behavior of controlling guest access to
+ * MSR_PLATFORM_INFO.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID 0
+#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
+
+static void guest_code(void)
+{
+	uint64_t msr_platform_info;
+
+	for (;;) {
+		msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
+		GUEST_SYNC(msr_platform_info);
+		asm volatile ("inc %r11");
+	}
+}
+
+static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable)
+{
+	struct kvm_enable_cap cap = {};
+
+	cap.cap = KVM_CAP_MSR_PLATFORM_INFO;
+	cap.flags = 0;
+	cap.args[0] = (int)enable;
+	vm_enable_cap(vm, &cap);
+}
+
+static void test_msr_platform_info_enabled(struct kvm_vm *vm)
+{
+	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+	struct ucall uc;
+
+	set_msr_platform_info_enabled(vm, true);
+	vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			"Exit_reason other than KVM_EXIT_IO: %u (%s),\n",
+			run->exit_reason,
+			exit_reason_str(run->exit_reason));
+	get_ucall(vm, VCPU_ID, &uc);
+	TEST_ASSERT(uc.cmd == UCALL_SYNC,
+			"Received ucall other than UCALL_SYNC: %u\n",
+			ucall);
+	TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
+		MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
+		"Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.",
+		MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+}
+
+static void test_msr_platform_info_disabled(struct kvm_vm *vm)
+{
+	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+	set_msr_platform_info_enabled(vm, false);
+	vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
+			"Exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n",
+			run->exit_reason,
+			exit_reason_str(run->exit_reason));
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vm *vm;
+	struct kvm_run *state;
+	int rv;
+	uint64_t msr_platform_info;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	rv = kvm_check_cap(KVM_CAP_MSR_PLATFORM_INFO);
+	if (!rv) {
+		fprintf(stderr,
+			"KVM_CAP_MSR_PLATFORM_INFO not supported, skip test\n");
+		exit(KSFT_SKIP);
+	}
+
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+	msr_platform_info = vcpu_get_msr(vm, VCPU_ID, MSR_PLATFORM_INFO);
+	vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO,
+		msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+	test_msr_platform_info_disabled(vm);
+	test_msr_platform_info_enabled(vm);
+	vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info);
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
index 090fd3f19352..35640e8e95bc 100644
--- a/tools/testing/selftests/kvm/set_sregs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
@@ -22,7 +22,7 @@
 #include "test_util.h"
 
 #include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
 
 #define VCPU_ID                  5
 
@@ -36,7 +36,7 @@ int main(int argc, char *argv[])
 	setbuf(stdout, NULL);
 
 	/* Create VM */
-	vm = vm_create_default(VCPU_ID, NULL);
+	vm = vm_create_default(VCPU_ID, 0, NULL);
 
 	vcpu_sregs_get(vm, VCPU_ID, &sregs);
 	sregs.apic_base = 1 << 10;
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
new file mode 100644
index 000000000000..03da41f0f736
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -0,0 +1,199 @@
+/*
+ * KVM_GET/SET_* tests
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Tests for vCPU state save/restore, including nested guest state.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define VCPU_ID		5
+
+static bool have_nested_state;
+
+void l2_guest_code(void)
+{
+	GUEST_SYNC(6);
+
+        /* Exit to L1 */
+	vmcall();
+
+	/* L1 has now set up a shadow VMCS for us.  */
+	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+	GUEST_SYNC(10);
+	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+	GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee));
+	GUEST_SYNC(11);
+	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee);
+	GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee));
+	GUEST_SYNC(12);
+
+	/* Done, exit to L1 and never come back.  */
+	vmcall();
+}
+
+void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+        unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+	GUEST_ASSERT(vmx_pages->vmcs_gpa);
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_SYNC(3);
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+	GUEST_SYNC(4);
+	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_SYNC(5);
+	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	/* Check that the launched state is preserved.  */
+	GUEST_ASSERT(vmlaunch());
+
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	GUEST_SYNC(7);
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);
+
+	vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
+	vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa);
+
+	GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
+	GUEST_ASSERT(vmlaunch());
+	GUEST_SYNC(8);
+	GUEST_ASSERT(vmlaunch());
+	GUEST_ASSERT(vmresume());
+
+	vmwrite(GUEST_RIP, 0xc0ffee);
+	GUEST_SYNC(9);
+	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+
+	GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa));
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
+	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
+	GUEST_ASSERT(vmlaunch());
+	GUEST_ASSERT(vmresume());
+	GUEST_SYNC(13);
+	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
+	GUEST_ASSERT(vmlaunch());
+	GUEST_ASSERT(vmresume());
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+	GUEST_SYNC(1);
+	GUEST_SYNC(2);
+
+	if (vmx_pages)
+		l1_guest_code(vmx_pages);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	struct vmx_pages *vmx_pages = NULL;
+	vm_vaddr_t vmx_pages_gva = 0;
+
+	struct kvm_regs regs1, regs2;
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_x86_state *state;
+	struct ucall uc;
+	int stage;
+
+	struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+	run = vcpu_state(vm, VCPU_ID);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+	if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+		vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+		vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+	} else {
+		printf("will skip nested state checks\n");
+		vcpu_args_set(vm, VCPU_ID, 1, 0);
+	}
+
+	for (stage = 1;; stage++) {
+		_vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Unexpected exit reason: %u (%s),\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		memset(&regs1, 0, sizeof(regs1));
+		vcpu_regs_get(vm, VCPU_ID, &regs1);
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
+				    __FILE__, uc.args[1]);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+		}
+
+		/* UCALL_SYNC is handled here.  */
+		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+			    uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx",
+			    stage, (ulong)uc.args[1]);
+
+		state = vcpu_save_state(vm, VCPU_ID);
+		kvm_vm_release(vm);
+
+		/* Restore state in a new VM.  */
+		kvm_vm_restart(vm, O_RDWR);
+		vm_vcpu_add(vm, VCPU_ID, 0, 0);
+		vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+		vcpu_load_state(vm, VCPU_ID, state);
+		run = vcpu_state(vm, VCPU_ID);
+		free(state);
+
+		memset(&regs2, 0, sizeof(regs2));
+		vcpu_regs_get(vm, VCPU_ID, &regs2);
+		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+			    (ulong) regs2.rdi, (ulong) regs2.rsi);
+	}
+
+done:
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index eae1ece3c31b..c8478ce9ea77 100644
--- a/tools/testing/selftests/kvm/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -19,31 +19,14 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
 
 #define VCPU_ID 5
-#define PORT_HOST_SYNC 0x1000
-
-static void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1)
-{
-	        __asm__ __volatile__("in %[port], %%al"
-				     :
-				     : [port]"d"(port), "D"(arg0), "S"(arg1)
-				     : "rax");
-}
-
-#define exit_to_l0(_port, _arg0, _arg1) \
-        __exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1))
-
-#define GUEST_ASSERT(_condition) do { \
-	if (!(_condition)) \
-		exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition, 0);\
-} while (0)
 
 void guest_code(void)
 {
 	for (;;) {
-		exit_to_l0(PORT_HOST_SYNC, "hello", 0);
+		GUEST_SYNC(0);
 		asm volatile ("inc %r11");
 	}
 }
@@ -111,7 +94,7 @@ int main(int argc, char *argv[])
 	}
 
 	/* Create VM */
-	vm = vm_create_default(VCPU_ID, guest_code);
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
 
 	run = vcpu_state(vm, VCPU_ID);
 
diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
index aaa633263b2c..18fa64db0d7a 100644
--- a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
@@ -1,5 +1,5 @@
 /*
- * gtests/tests/vmx_tsc_adjust_test.c
+ * vmx_tsc_adjust_test
  *
  * Copyright (C) 2018, Google LLC.
  *
@@ -22,12 +22,14 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
 #include "vmx.h"
 
 #include <string.h>
 #include <sys/ioctl.h>
 
+#include "kselftest.h"
+
 #ifndef MSR_IA32_TSC_ADJUST
 #define MSR_IA32_TSC_ADJUST 0x3b
 #endif
@@ -44,11 +46,6 @@ enum {
 	PORT_DONE,
 };
 
-struct vmx_page {
-	vm_vaddr_t virt;
-	vm_paddr_t phys;
-};
-
 enum {
 	VMXON_PAGE = 0,
 	VMCS_PAGE,
@@ -65,30 +62,12 @@ struct kvm_single_msr {
 /* The virtual machine object. */
 static struct kvm_vm *vm;
 
-/* Array of vmx_page descriptors that is shared with the guest. */
-struct vmx_page *vmx_pages;
-
-#define exit_to_l0(_port, _arg) do_exit_to_l0(_port, (unsigned long) (_arg))
-static void do_exit_to_l0(uint16_t port, unsigned long arg)
-{
-	__asm__ __volatile__("in %[port], %%al"
-		:
-		: [port]"d"(port), "D"(arg)
-		: "rax");
-}
-
-
-#define GUEST_ASSERT(_condition) do {					     \
-	if (!(_condition))						     \
-		exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition); \
-} while (0)
-
 static void check_ia32_tsc_adjust(int64_t max)
 {
 	int64_t adjust;
 
 	adjust = rdmsr(MSR_IA32_TSC_ADJUST);
-	exit_to_l0(PORT_REPORT, adjust);
+	GUEST_SYNC(adjust);
 	GUEST_ASSERT(adjust <= max);
 }
 
@@ -103,7 +82,7 @@ static void l2_guest_code(void)
 	__asm__ __volatile__("vmcall");
 }
 
-static void l1_guest_code(struct vmx_page *vmx_pages)
+static void l1_guest_code(struct vmx_pages *vmx_pages)
 {
 #define L2_GUEST_STACK_SIZE 64
 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
@@ -114,23 +93,15 @@ static void l1_guest_code(struct vmx_page *vmx_pages)
 	wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
 	check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
 
-	prepare_for_vmx_operation();
-
-	/* Enter VMX root operation. */
-	*(uint32_t *)vmx_pages[VMXON_PAGE].virt = vmcs_revision();
-	GUEST_ASSERT(!vmxon(vmx_pages[VMXON_PAGE].phys));
-
-	/* Load a VMCS. */
-	*(uint32_t *)vmx_pages[VMCS_PAGE].virt = vmcs_revision();
-	GUEST_ASSERT(!vmclear(vmx_pages[VMCS_PAGE].phys));
-	GUEST_ASSERT(!vmptrld(vmx_pages[VMCS_PAGE].phys));
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_ASSERT(load_vmcs(vmx_pages));
 
 	/* Prepare the VMCS for L2 execution. */
-	prepare_vmcs(l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
 	control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
 	control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING;
 	vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
-	vmwrite(MSR_BITMAP, vmx_pages[MSR_BITMAP_PAGE].phys);
 	vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
 
 	/* Jump into L2.  First, test failure to load guest CR3.  */
@@ -147,34 +118,7 @@ static void l1_guest_code(struct vmx_page *vmx_pages)
 
 	check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
 
-	exit_to_l0(PORT_DONE, 0);
-}
-
-static void allocate_vmx_page(struct vmx_page *page)
-{
-	vm_vaddr_t virt;
-
-	virt = vm_vaddr_alloc(vm, PAGE_SIZE, 0, 0, 0);
-	memset(addr_gva2hva(vm, virt), 0, PAGE_SIZE);
-
-	page->virt = virt;
-	page->phys = addr_gva2gpa(vm, virt);
-}
-
-static vm_vaddr_t allocate_vmx_pages(void)
-{
-	vm_vaddr_t vmx_pages_vaddr;
-	int i;
-
-	vmx_pages_vaddr = vm_vaddr_alloc(
-		vm, sizeof(struct vmx_page) * NUM_VMX_PAGES, 0, 0, 0);
-
-	vmx_pages = (void *) addr_gva2hva(vm, vmx_pages_vaddr);
-
-	for (i = 0; i < NUM_VMX_PAGES; i++)
-		allocate_vmx_page(&vmx_pages[i]);
-
-	return vmx_pages_vaddr;
+	GUEST_DONE();
 }
 
 void report(int64_t val)
@@ -185,7 +129,8 @@ void report(int64_t val)
 
 int main(int argc, char *argv[])
 {
-	vm_vaddr_t vmx_pages_vaddr;
+	struct vmx_pages *vmx_pages;
+	vm_vaddr_t vmx_pages_gva;
 	struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
 
 	if (!(entry->ecx & CPUID_VMX)) {
@@ -193,35 +138,34 @@ int main(int argc, char *argv[])
 		exit(KSFT_SKIP);
 	}
 
-	vm = vm_create_default_vmx(VCPU_ID, (void *) l1_guest_code);
+	vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 
 	/* Allocate VMX pages and shared descriptors (vmx_pages). */
-	vmx_pages_vaddr = allocate_vmx_pages();
-	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_vaddr);
+	vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
 
 	for (;;) {
 		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
-		struct kvm_regs regs;
+		struct ucall uc;
 
 		vcpu_run(vm, VCPU_ID);
 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "Got exit_reason other than KVM_EXIT_IO: %u (%s),\n",
+			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
 			    run->exit_reason,
 			    exit_reason_str(run->exit_reason));
 
-		vcpu_regs_get(vm, VCPU_ID, &regs);
-
-		switch (run->io.port) {
-		case PORT_ABORT:
-			TEST_ASSERT(false, "%s", (const char *) regs.rdi);
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_ASSERT(false, "%s", (const char *)uc.args[0]);
 			/* NOT REACHED */
-		case PORT_REPORT:
-			report(regs.rdi);
+		case UCALL_SYNC:
+			report(uc.args[1]);
 			break;
-		case PORT_DONE:
+		case UCALL_DONE:
 			goto done;
 		default:
-			TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port);
+			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
 		}
 	}
 
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index c1b1a4dc6a96..0a8e75886224 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -16,28 +16,58 @@ TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
 TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
 TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
 
+top_srcdir ?= ../../../..
+include $(top_srcdir)/scripts/subarch.include
+ARCH		?= $(SUBARCH)
+
 all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
 
+.PHONY: khdr
+khdr:
+	make ARCH=$(ARCH) -C $(top_srcdir) headers_install
+
+ifdef KSFT_KHDR_INSTALL
+$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES):| khdr
+endif
+
 .ONESHELL:
+define RUN_TEST_PRINT_RESULT
+	TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST";	\
+	echo $$TEST_HDR_MSG;					\
+	echo "========================================";	\
+	if [ ! -x $$TEST ]; then	\
+		echo "$$TEST_HDR_MSG: Warning: file $$BASENAME_TEST is not executable, correct this.";\
+		echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \
+	else					\
+		cd `dirname $$TEST` > /dev/null; \
+		if [ "X$(summary)" != "X" ]; then	\
+			(./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && \
+			echo "ok 1..$$test_num $$TEST_HDR_MSG [PASS]") || \
+			(if [ $$? -eq $$skip ]; then	\
+				echo "not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]";				\
+			else echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]";					\
+			fi;)			\
+		else				\
+			(./$$BASENAME_TEST &&	\
+			echo "ok 1..$$test_num $$TEST_HDR_MSG [PASS]") ||						\
+			(if [ $$? -eq $$skip ]; then \
+				echo "not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]"; \
+			else echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]";				\
+			fi;)		\
+		fi;				\
+		cd - > /dev/null;		\
+	fi;
+endef
+
 define RUN_TESTS
 	@export KSFT_TAP_LEVEL=`echo 1`;		\
 	test_num=`echo 0`;				\
+	skip=`echo 4`;					\
 	echo "TAP version 13";				\
 	for TEST in $(1); do				\
 		BASENAME_TEST=`basename $$TEST`;	\
 		test_num=`echo $$test_num+1 | bc`;	\
-		echo "selftests: $$BASENAME_TEST";	\
-		echo "========================================";	\
-		if [ ! -x $$TEST ]; then	\
-			echo "selftests: Warning: file $$BASENAME_TEST is not executable, correct this.";\
-			echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; \
-		else					\
-		if [ "X$(summary)" != "X" ]; then		\
-				cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests:  $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\
-			else				\
-				cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests:  $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\
-			fi;				\
-		fi;					\
+		$(call RUN_TEST_PRINT_RESULT,$(TEST),$(BASENAME_TEST),$(test_num),$(skip))						\
 	done;
 endef
 
@@ -76,9 +106,18 @@ else
 endif
 
 define EMIT_TESTS
-	@for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
+	@test_num=`echo 0`;				\
+	for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
 		BASENAME_TEST=`basename $$TEST`;	\
-		echo "(./$$BASENAME_TEST >> \$$OUTPUT 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \
+		test_num=`echo $$test_num+1 | bc`;	\
+		TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST";	\
+		echo "echo $$TEST_HDR_MSG";	\
+		if [ ! -x $$TEST ]; then	\
+			echo "echo \"$$TEST_HDR_MSG: Warning: file $$BASENAME_TEST is not executable, correct this.\"";		\
+			echo "echo \"not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]\""; \
+		else
+			echo "(./$$BASENAME_TEST >> \$$OUTPUT 2>&1 && echo \"ok 1..$$test_num $$TEST_HDR_MSG [PASS]\") || (if [ \$$? -eq \$$skip ]; then echo \"not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]\"; else echo \"not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]\"; fi;)"; \
+		fi;		\
 	done;
 endef
 
@@ -106,6 +145,9 @@ COMPILE.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c
 LINK.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
 endif
 
+# Selftest makefiles can override those targets by setting
+# OVERRIDE_TARGETS = 1.
+ifeq ($(OVERRIDE_TARGETS),)
 $(OUTPUT)/%:%.c
 	$(LINK.c) $^ $(LDLIBS) -o $@
 
@@ -114,5 +156,6 @@ $(OUTPUT)/%.o:%.S
 
 $(OUTPUT)/%:%.S
 	$(LINK.S) $^ $(LDLIBS) -o $@
+endif
 
 .PHONY: run_tests all clean install emit_tests
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
index 08360060ab14..70d5711e3ac8 100644
--- a/tools/testing/selftests/lib/Makefile
+++ b/tools/testing/selftests/lib/Makefile
@@ -3,6 +3,6 @@
 # No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
 all:
 
-TEST_PROGS := printf.sh bitmap.sh
+TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh
 
 include ../lib.mk
diff --git a/tools/testing/selftests/lib/bitmap.sh b/tools/testing/selftests/lib/bitmap.sh
index 4dee4d2a8bbe..5a90006d1aea 100755
--- a/tools/testing/selftests/lib/bitmap.sh
+++ b/tools/testing/selftests/lib/bitmap.sh
@@ -1,9 +1,13 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 # Runs bitmap infrastructure tests using test_bitmap kernel module
 if ! /sbin/modprobe -q -n test_bitmap; then
-	echo "bitmap: [SKIP]"
-	exit 77
+	echo "bitmap: module test_bitmap is not found [SKIP]"
+	exit $ksft_skip
 fi
 
 if /sbin/modprobe -q test_bitmap; then
diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh
index b363994e5e11..78e7483c8d60 100755
--- a/tools/testing/selftests/lib/prime_numbers.sh
+++ b/tools/testing/selftests/lib/prime_numbers.sh
@@ -2,9 +2,12 @@
 # SPDX-License-Identifier: GPL-2.0
 # Checks fast/slow prime_number generation for inconsistencies
 
-if ! /sbin/modprobe -q -r prime_numbers; then
-	echo "prime_numbers: [SKIP]"
-	exit 77
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n prime_numbers; then
+	echo "prime_numbers: module prime_numbers is not found [SKIP]"
+	exit $ksft_skip
 fi
 
 if /sbin/modprobe -q prime_numbers selftest=65536; then
diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh
index 0c37377fd7d4..45a23e2d64ad 100755
--- a/tools/testing/selftests/lib/printf.sh
+++ b/tools/testing/selftests/lib/printf.sh
@@ -1,9 +1,13 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 # Runs printf infrastructure using test_printf kernel module
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 if ! /sbin/modprobe -q -n test_printf; then
-	echo "printf: [SKIP]"
-	exit 77
+	echo "printf: module test_printf is not found [SKIP]"
+	exit $ksft_skip
 fi
 
 if /sbin/modprobe -q test_printf; then
diff --git a/tools/testing/selftests/locking/Makefile b/tools/testing/selftests/locking/Makefile
new file mode 100644
index 000000000000..6e7761ab3536
--- /dev/null
+++ b/tools/testing/selftests/locking/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for locking/ww_mutx selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := ww_mutex.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/locking/ww_mutex.sh b/tools/testing/selftests/locking/ww_mutex.sh
index 2c3d6b1878c2..91e4ac7566af 100644..100755
--- a/tools/testing/selftests/locking/ww_mutex.sh
+++ b/tools/testing/selftests/locking/ww_mutex.sh
@@ -1,6 +1,14 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 # Runs API tests for struct ww_mutex (Wait/Wound mutexes)
+if ! /sbin/modprobe -q -n test-ww_mutex; then
+	echo "ww_mutex: module test-ww_mutex is not found [SKIP]"
+	exit $ksft_skip
+fi
 
 if /sbin/modprobe -q test-ww_mutex; then
        /sbin/modprobe -q -r test-ww_mutex
diff --git a/tools/testing/selftests/media_tests/Makefile b/tools/testing/selftests/media_tests/Makefile
index c82cec2497de..60826d7d37d4 100644
--- a/tools/testing/selftests/media_tests/Makefile
+++ b/tools/testing/selftests/media_tests/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
+#
+CFLAGS += -I../ -I../../../../usr/include/
 TEST_GEN_PROGS := media_device_test media_device_open video_device_test
-all: $(TEST_GEN_PROGS)
 
 include ../lib.mk
diff --git a/tools/testing/selftests/media_tests/media_device_open.c b/tools/testing/selftests/media_tests/media_device_open.c
index a5ce5434bafd..93183a37b133 100644
--- a/tools/testing/selftests/media_tests/media_device_open.c
+++ b/tools/testing/selftests/media_tests/media_device_open.c
@@ -34,6 +34,8 @@
 #include <sys/stat.h>
 #include <linux/media.h>
 
+#include "../kselftest.h"
+
 int main(int argc, char **argv)
 {
 	int opt;
@@ -61,10 +63,8 @@ int main(int argc, char **argv)
 		}
 	}
 
-	if (getuid() != 0) {
-		printf("Please run the test as root - Exiting.\n");
-		exit(-1);
-	}
+	if (getuid() != 0)
+		ksft_exit_skip("Please run the test as root - Exiting.\n");
 
 	/* Open Media device and keep it open */
 	fd = open(media_device, O_RDWR);
diff --git a/tools/testing/selftests/media_tests/media_device_test.c b/tools/testing/selftests/media_tests/media_device_test.c
index 421a367e4bb3..4b9953359e40 100644
--- a/tools/testing/selftests/media_tests/media_device_test.c
+++ b/tools/testing/selftests/media_tests/media_device_test.c
@@ -39,6 +39,8 @@
 #include <time.h>
 #include <linux/media.h>
 
+#include "../kselftest.h"
+
 int main(int argc, char **argv)
 {
 	int opt;
@@ -66,10 +68,8 @@ int main(int argc, char **argv)
 		}
 	}
 
-	if (getuid() != 0) {
-		printf("Please run the test as root - Exiting.\n");
-		exit(-1);
-	}
+	if (getuid() != 0)
+		ksft_exit_skip("Please run the test as root - Exiting.\n");
 
 	/* Generate random number of interations */
 	srand((unsigned int) time(NULL));
@@ -88,7 +88,7 @@ int main(int argc, char **argv)
 	       "other Oops in the dmesg. Enable KaSan kernel\n"
 	       "config option for use-after-free error detection.\n\n");
 
-	printf("Running test for %d iternations\n", count);
+	printf("Running test for %d iterations\n", count);
 
 	while (count > 0) {
 		ret = ioctl(fd, MEDIA_IOC_DEVICE_INFO, &mdi);
diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c
index 22bffd55a523..6793f8ecc8e7 100644
--- a/tools/testing/selftests/membarrier/membarrier_test.c
+++ b/tools/testing/selftests/membarrier/membarrier_test.c
@@ -293,10 +293,9 @@ static int test_membarrier_query(void)
 		}
 		ksft_exit_fail_msg("sys_membarrier() failed\n");
 	}
-	if (!(ret & MEMBARRIER_CMD_GLOBAL)) {
-		ksft_test_result_fail("sys_membarrier() CMD_GLOBAL query failed\n");
-		ksft_exit_fail_msg("sys_membarrier is not supported.\n");
-	}
+	if (!(ret & MEMBARRIER_CMD_GLOBAL))
+		ksft_exit_skip(
+			"sys_membarrier unsupported: CMD_GLOBAL not found.\n");
 
 	ksft_test_result_pass("sys_membarrier available\n");
 	return 0;
diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
index 0862e6f47a38..53a848109f7b 100644
--- a/tools/testing/selftests/memfd/Makefile
+++ b/tools/testing/selftests/memfd/Makefile
@@ -4,9 +4,9 @@ CFLAGS += -I../../../../include/uapi/
 CFLAGS += -I../../../../include/
 CFLAGS += -I../../../../usr/include/
 
-TEST_PROGS := run_tests.sh
-TEST_FILES := run_fuse_test.sh
-TEST_GEN_FILES := memfd_test fuse_mnt fuse_test
+TEST_GEN_PROGS := memfd_test
+TEST_PROGS := run_fuse_test.sh run_hugetlbfs_test.sh
+TEST_GEN_FILES := fuse_mnt fuse_test
 
 fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags)
 
diff --git a/tools/testing/selftests/memfd/run_tests.sh b/tools/testing/selftests/memfd/run_hugetlbfs_test.sh
index c2d41ed81b24..fb633eeb0290 100755
--- a/tools/testing/selftests/memfd/run_tests.sh
+++ b/tools/testing/selftests/memfd/run_hugetlbfs_test.sh
@@ -1,11 +1,8 @@
 #!/bin/bash
 # please run as root
 
-#
-# Normal tests requiring no special resources
-#
-./run_fuse_test.sh
-./memfd_test
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
 
 #
 # To test memfd_create with hugetlbfs, there needs to be hpages_test
@@ -29,12 +26,13 @@ if [ -n "$freepgs" ] && [ $freepgs -lt $hpages_test ]; then
 	nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
 	hpages_needed=`expr $hpages_test - $freepgs`
 
+	if [ $UID != 0 ]; then
+		echo "Please run memfd with hugetlbfs test as root"
+		exit $ksft_skip
+	fi
+
 	echo 3 > /proc/sys/vm/drop_caches
 	echo $(( $hpages_needed + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
-	if [ $? -ne 0 ]; then
-		echo "Please run this test as root"
-		exit 1
-	fi
 	while read name size unit; do
 		if [ "$name" = "HugePages_Free:" ]; then
 			freepgs=$size
@@ -53,7 +51,7 @@ if [ $freepgs -lt $hpages_test ]; then
 	fi
 	printf "Not enough huge pages available (%d < %d)\n" \
 		$freepgs $needpgs
-	exit 1
+	exit $ksft_skip
 fi
 
 #
diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile
index 686da510f989..e0a625e34f40 100644
--- a/tools/testing/selftests/memory-hotplug/Makefile
+++ b/tools/testing/selftests/memory-hotplug/Makefile
@@ -4,11 +4,8 @@ all:
 include ../lib.mk
 
 TEST_PROGS := mem-on-off-test.sh
-override RUN_TESTS := @./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]"
-
-override EMIT_TESTS := echo "$(subst @,,$(RUN_TESTS))"
 
 run_full_test:
-	@/bin/bash ./mem-on-off-test.sh && echo "memory-hotplug selftests: [PASS]" || echo "memory-hotplug selftests: [FAIL]"
+	@/bin/bash ./mem-on-off-test.sh -r 10 && echo "memory-hotplug selftests: [PASS]" || echo "memory-hotplug selftests: [FAIL]"
 
 clean:
diff --git a/tools/testing/selftests/memory-hotplug/config b/tools/testing/selftests/memory-hotplug/config
index 2fde30191a47..a7e8cd5bb265 100644
--- a/tools/testing/selftests/memory-hotplug/config
+++ b/tools/testing/selftests/memory-hotplug/config
@@ -2,3 +2,4 @@ CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTPLUG_SPARSE=y
 CONFIG_NOTIFIER_ERROR_INJECTION=y
 CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
+CONFIG_MEMORY_HOTREMOVE=y
diff --git a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
index ae2c790d0880..b37585e6aa38 100755
--- a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
+++ b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
@@ -3,30 +3,33 @@
 
 SYSFS=
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 prerequisite()
 {
 	msg="skip all tests:"
 
 	if [ $UID != 0 ]; then
 		echo $msg must be run as root >&2
-		exit 0
+		exit $ksft_skip
 	fi
 
 	SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
 
 	if [ ! -d "$SYSFS" ]; then
 		echo $msg sysfs is not mounted >&2
-		exit 0
+		exit $ksft_skip
 	fi
 
 	if ! ls $SYSFS/devices/system/memory/memory* > /dev/null 2>&1; then
 		echo $msg memory hotplug is not supported >&2
-		exit 0
+		exit $ksft_skip
 	fi
 
 	if ! grep -q 1 $SYSFS/devices/system/memory/memory*/removable; then
 		echo $msg no hot-pluggable memory >&2
-		exit 0
+		exit $ksft_skip
 	fi
 }
 
@@ -133,7 +136,8 @@ offline_memory_expect_fail()
 
 error=-12
 priority=0
-ratio=10
+# Run with default of ratio=2 for Kselftest run
+ratio=2
 retval=0
 
 while getopts e:hp:r: opt; do
diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile
index e094f71c6dbc..026890744215 100644
--- a/tools/testing/selftests/mount/Makefile
+++ b/tools/testing/selftests/mount/Makefile
@@ -3,15 +3,7 @@
 CFLAGS = -Wall \
          -O2
 
-TEST_GEN_PROGS := unprivileged-remount-test
+TEST_PROGS := run_tests.sh
+TEST_GEN_FILES := unprivileged-remount-test
 
 include ../lib.mk
-
-override RUN_TESTS := if [ -f /proc/self/uid_map ] ; \
-		      then	\
-				./unprivileged-remount-test ; \
-		      else	\
-				echo "WARN: No /proc/self/uid_map exist, test skipped." ; \
-		      fi
-override EMIT_TESTS := echo "$(RUN_TESTS)"
-
diff --git a/tools/testing/selftests/mount/config b/tools/testing/selftests/mount/config
index b5d881e48548..416bd53ce982 100644
--- a/tools/testing/selftests/mount/config
+++ b/tools/testing/selftests/mount/config
@@ -1,2 +1 @@
 CONFIG_USER_NS=y
-CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
diff --git a/tools/testing/selftests/mount/run_tests.sh b/tools/testing/selftests/mount/run_tests.sh
new file mode 100755
index 000000000000..4ab8f507dcba
--- /dev/null
+++ b/tools/testing/selftests/mount/run_tests.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Run mount selftests
+if [ -f /proc/self/uid_map ] ; then
+	./unprivileged-remount-test ;
+else
+	echo "WARN: No /proc/self/uid_map exist, test skipped." ;
+	exit $ksft_skip
+fi
diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile
index 743d3f9e5918..8a58055fc1f5 100644
--- a/tools/testing/selftests/mqueue/Makefile
+++ b/tools/testing/selftests/mqueue/Makefile
@@ -1,17 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 CFLAGS += -O2
 LDLIBS = -lrt -lpthread -lpopt
+
 TEST_GEN_PROGS := mq_open_tests mq_perf_tests
 
 include ../lib.mk
-
-override define RUN_TESTS
-	@$(OUTPUT)/mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]"
-	@$(OUTPUT)/mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]"
-endef
-
-override define EMIT_TESTS
-	echo "./mq_open_tests /test1 || echo \"selftests: mq_open_tests [FAIL]\""
-	echo "./mq_perf_tests || echo \"selftests: mq_perf_tests [FAIL]\""
-endef
-
diff --git a/tools/testing/selftests/mqueue/mq_open_tests.c b/tools/testing/selftests/mqueue/mq_open_tests.c
index e0a74bd207a5..9403ac01ba11 100644
--- a/tools/testing/selftests/mqueue/mq_open_tests.c
+++ b/tools/testing/selftests/mqueue/mq_open_tests.c
@@ -33,6 +33,8 @@
 #include <mqueue.h>
 #include <error.h>
 
+#include "../kselftest.h"
+
 static char *usage =
 "Usage:\n"
 "  %s path\n"
@@ -53,6 +55,7 @@ int saved_def_msgs, saved_def_msgsize, saved_max_msgs, saved_max_msgsize;
 int cur_def_msgs, cur_def_msgsize, cur_max_msgs, cur_max_msgsize;
 FILE *def_msgs, *def_msgsize, *max_msgs, *max_msgsize;
 char *queue_path;
+char *default_queue_path = "/test1";
 mqd_t queue = -1;
 
 static inline void __set(FILE *stream, int value, char *err_msg);
@@ -238,35 +241,33 @@ int main(int argc, char *argv[])
 	struct mq_attr attr, result;
 
 	if (argc != 2) {
-		fprintf(stderr, "Must pass a valid queue name\n\n");
-		fprintf(stderr, usage, argv[0]);
-		exit(1);
-	}
+		printf("Using Default queue path - %s\n", default_queue_path);
+		queue_path = default_queue_path;
+	} else {
 
 	/*
 	 * Although we can create a msg queue with a non-absolute path name,
 	 * unlink will fail.  So, if the name doesn't start with a /, add one
 	 * when we save it.
 	 */
-	if (*argv[1] == '/')
-		queue_path = strdup(argv[1]);
-	else {
-		queue_path = malloc(strlen(argv[1]) + 2);
-		if (!queue_path) {
-			perror("malloc()");
-			exit(1);
+		if (*argv[1] == '/')
+			queue_path = strdup(argv[1]);
+		else {
+			queue_path = malloc(strlen(argv[1]) + 2);
+			if (!queue_path) {
+				perror("malloc()");
+				exit(1);
+			}
+			queue_path[0] = '/';
+			queue_path[1] = 0;
+			strcat(queue_path, argv[1]);
 		}
-		queue_path[0] = '/';
-		queue_path[1] = 0;
-		strcat(queue_path, argv[1]);
 	}
 
-	if (getuid() != 0) {
-		fprintf(stderr, "Not running as root, but almost all tests "
+	if (getuid() != 0)
+		ksft_exit_skip("Not running as root, but almost all tests "
 			"require root in order to modify\nsystem settings.  "
 			"Exiting.\n");
-		exit(1);
-	}
 
 	/* Find out what files there are for us to make tweaks in */
 	def_msgs = fopen(DEF_MSGS, "r+");
diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c
index 8188f72de93c..b019e0b8221c 100644
--- a/tools/testing/selftests/mqueue/mq_perf_tests.c
+++ b/tools/testing/selftests/mqueue/mq_perf_tests.c
@@ -39,6 +39,8 @@
 #include <popt.h>
 #include <error.h>
 
+#include "../kselftest.h"
+
 static char *usage =
 "Usage:\n"
 "  %s [-c #[,#..] -f] path\n"
@@ -626,12 +628,10 @@ int main(int argc, char *argv[])
 		cpus_to_pin[0] = cpus_online - 1;
 	}
 
-	if (getuid() != 0) {
-		fprintf(stderr, "Not running as root, but almost all tests "
+	if (getuid() != 0)
+		ksft_exit_skip("Not running as root, but almost all tests "
 			"require root in order to modify\nsystem settings.  "
 			"Exiting.\n");
-		exit(1);
-	}
 
 	max_msgs = fopen(MAX_MSGS, "r+");
 	max_msgsize = fopen(MAX_MSGSIZE, "r+");
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index c612d6e38c62..8cf22b3c2563 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,9 +1,17 @@
 msg_zerocopy
 socket
 psock_fanout
+psock_snd
 psock_tpacket
 reuseport_bpf
 reuseport_bpf_cpu
 reuseport_bpf_numa
 reuseport_dualstack
 reuseaddr_conflict
+tcp_mmap
+udpgso
+udpgso_bench_rx
+udpgso_bench_tx
+tcp_inq
+tls
+ip_defrag
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 3ff81a478dbe..923570a9708a 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,13 +5,20 @@ CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
-TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
+TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
+TEST_PROGS += test_vxlan_fdb_changelink.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
+TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd
+TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict
+TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
 
+KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
 $(OUTPUT)/reuseport_bpf_numa: LDFLAGS += -lnuma
+$(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread
+$(OUTPUT)/tcp_inq: LDFLAGS += -lpthread
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 7ba089b33e8b..cd3a2f1545b5 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -12,3 +12,5 @@ CONFIG_NET_IPVTI=y
 CONFIG_INET6_XFRM_MODE_TUNNEL=y
 CONFIG_IPV6_VTI=y
 CONFIG_DUMMY=y
+CONFIG_BRIDGE=y
+CONFIG_VLAN_8021Q=y
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
index 3991ad1a368d..864f865eee55 100755
--- a/tools/testing/selftests/net/fib-onlink-tests.sh
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -167,8 +167,8 @@ setup()
 	# add vrf table
 	ip li add ${VRF} type vrf table ${VRF_TABLE}
 	ip li set ${VRF} up
-	ip ro add table ${VRF_TABLE} unreachable default
-	ip -6 ro add table ${VRF_TABLE} unreachable default
+	ip ro add table ${VRF_TABLE} unreachable default metric 8192
+	ip -6 ro add table ${VRF_TABLE} unreachable default metric 8192
 
 	# create test interfaces
 	ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]}
@@ -185,20 +185,20 @@ setup()
 	for n in 1 3 5 7; do
 		ip li set ${NETIFS[p${n}]} up
 		ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
-		ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
+		ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
 	done
 
 	# move peer interfaces to namespace and add addresses
 	for n in 2 4 6 8; do
 		ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up
 		ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
-		ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
+		ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
 	done
 
-	set +e
+	ip -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64}
+	ip -6 ro add table ${VRF_TABLE} default via ${V6ADDRS[p7]/::[0-9]/::64}
 
-	# let DAD complete - assume default of 1 probe
-	sleep 1
+	set +e
 }
 
 cleanup()
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
new file mode 100755
index 000000000000..d4cfb6a7a086
--- /dev/null
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -0,0 +1,248 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking IPv4 and IPv6 FIB rules API
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+IP="ip -netns testns"
+
+RTABLE=100
+GW_IP4=192.51.100.2
+SRC_IP=192.51.100.3
+GW_IP6=2001:db8:1::2
+SRC_IP6=2001:db8:1::3
+
+DEV_ADDR=192.51.100.1
+DEV=dummy0
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-50s  [ OK ]\n" "${msg}"
+	else
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-50s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "######################################################################"
+	echo "TEST SECTION: $*"
+	echo "######################################################################"
+}
+
+setup()
+{
+	set -e
+	ip netns add testns
+	$IP link set dev lo up
+
+	$IP link add dummy0 type dummy
+	$IP link set dev dummy0 up
+	$IP address add 198.51.100.1/24 dev dummy0
+	$IP -6 address add 2001:db8:1::1/64 dev dummy0
+
+	set +e
+}
+
+cleanup()
+{
+	$IP link del dev dummy0 &> /dev/null
+	ip netns del testns
+}
+
+fib_check_iproute_support()
+{
+	ip rule help 2>&1 | grep -q $1
+	if [ $? -ne 0 ]; then
+		echo "SKIP: iproute2 iprule too old, missing $1 match"
+		return 1
+	fi
+
+	ip route get help 2>&1 | grep -q $2
+	if [ $? -ne 0 ]; then
+		echo "SKIP: iproute2 get route too old, missing $2 match"
+		return 1
+	fi
+
+	return 0
+}
+
+fib_rule6_del()
+{
+	$IP -6 rule del $1
+	log_test $? 0 "rule6 del $1"
+}
+
+fib_rule6_del_by_pref()
+{
+	pref=$($IP -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+	$IP -6 rule del pref $pref
+}
+
+fib_rule6_test_match_n_redirect()
+{
+	local match="$1"
+	local getmatch="$2"
+
+	$IP -6 rule add $match table $RTABLE
+	$IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE"
+	log_test $? 0 "rule6 check: $1"
+
+	fib_rule6_del_by_pref "$match"
+	log_test $? 0 "rule6 del by pref: $match"
+}
+
+fib_rule6_test()
+{
+	# setup the fib rule redirect route
+	$IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
+
+	match="oif $DEV"
+	fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table"
+
+	match="from $SRC_IP6 iif $DEV"
+	fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
+
+	match="tos 0x10"
+	fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table"
+
+	match="fwmark 0x64"
+	getmatch="mark 0x64"
+	fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+
+	fib_check_iproute_support "uidrange" "uid"
+	if [ $? -eq 0 ]; then
+		match="uidrange 100-100"
+		getmatch="uid 100"
+		fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+	fi
+
+	fib_check_iproute_support "sport" "sport"
+	if [ $? -eq 0 ]; then
+		match="sport 666 dport 777"
+		fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+	fi
+
+	fib_check_iproute_support "ipproto" "ipproto"
+	if [ $? -eq 0 ]; then
+		match="ipproto tcp"
+		fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match"
+	fi
+
+	fib_check_iproute_support "ipproto" "ipproto"
+	if [ $? -eq 0 ]; then
+		match="ipproto icmp"
+		fib_rule6_test_match_n_redirect "$match" "$match" "ipproto icmp match"
+	fi
+}
+
+fib_rule4_del()
+{
+	$IP rule del $1
+	log_test $? 0 "del $1"
+}
+
+fib_rule4_del_by_pref()
+{
+	pref=$($IP rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+	$IP rule del pref $pref
+}
+
+fib_rule4_test_match_n_redirect()
+{
+	local match="$1"
+	local getmatch="$2"
+
+	$IP rule add $match table $RTABLE
+	$IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE"
+	log_test $? 0 "rule4 check: $1"
+
+	fib_rule4_del_by_pref "$match"
+	log_test $? 0 "rule4 del by pref: $match"
+}
+
+fib_rule4_test()
+{
+	# setup the fib rule redirect route
+	$IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink
+
+	match="oif $DEV"
+	fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table"
+
+	match="from $SRC_IP iif $DEV"
+	fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
+
+	match="tos 0x10"
+	fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table"
+
+	match="fwmark 0x64"
+	getmatch="mark 0x64"
+	fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+
+	fib_check_iproute_support "uidrange" "uid"
+	if [ $? -eq 0 ]; then
+		match="uidrange 100-100"
+		getmatch="uid 100"
+		fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+	fi
+
+	fib_check_iproute_support "sport" "sport"
+	if [ $? -eq 0 ]; then
+		match="sport 666 dport 777"
+		fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+	fi
+
+	fib_check_iproute_support "ipproto" "ipproto"
+	if [ $? -eq 0 ]; then
+		match="ipproto tcp"
+		fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match"
+	fi
+
+	fib_check_iproute_support "ipproto" "ipproto"
+	if [ $? -eq 0 ]; then
+		match="ipproto icmp"
+		fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match"
+	fi
+}
+
+run_fibrule_tests()
+{
+	log_section "IPv4 fib rule"
+	fib_rule4_test
+	log_section "IPv6 fib rule"
+	fib_rule6_test
+}
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit 0
+fi
+
+# start clean
+cleanup &> /dev/null
+setup
+run_fibrule_tests
+cleanup
+
+exit $ret
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 9164e60d4b66..802b4af18729 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -5,10 +5,15 @@
 # different events.
 
 ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
 
-VERBOSE=${VERBOSE:=0}
-PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
-IP="ip -netns testns"
+# all tests in this script. Can be overridden with -t option
+TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+IP="ip -netns ns1"
 
 log_test()
 {
@@ -18,8 +23,10 @@ log_test()
 
 	if [ ${rc} -eq ${expected} ]; then
 		printf "    TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
 	else
 		ret=1
+		nfail=$((nfail+1))
 		printf "    TEST: %-60s  [FAIL]\n" "${msg}"
 		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
 		echo
@@ -28,13 +35,22 @@ log_test()
 			[ "$a" = "q" ] && exit 1
 		fi
 	fi
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo
+		echo "hit enter to continue, 'q' to quit"
+		read a
+		[ "$a" = "q" ] && exit 1
+	fi
 }
 
 setup()
 {
 	set -e
-	ip netns add testns
+	ip netns add ns1
 	$IP link set dev lo up
+	ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec ns1 sysctl -qw net.ipv6.conf.all.forwarding=1
 
 	$IP link add dummy0 type dummy
 	$IP link set dev dummy0 up
@@ -47,7 +63,8 @@ setup()
 cleanup()
 {
 	$IP link del dev dummy0 &> /dev/null
-	ip netns del testns
+	ip netns del ns1
+	ip netns del ns2 &> /dev/null
 }
 
 get_linklocal()
@@ -563,39 +580,944 @@ fib_nexthop_test()
 }
 
 ################################################################################
-#
+# Tests on route add and replace
+
+run_cmd()
+{
+	local cmd="$1"
+	local out
+	local stderr="2>/dev/null"
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "    COMMAND: $cmd\n"
+		stderr=
+	fi
+
+	out=$(eval $cmd $stderr)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+
+	return $rc
+}
+
+# add route for a prefix, flushing any existing routes first
+# expected to be the first step of a test
+add_route6()
+{
+	local pfx="$1"
+	local nh="$2"
+	local out
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo
+		echo "    ##################################################"
+		echo
+	fi
+
+	run_cmd "$IP -6 ro flush ${pfx}"
+	[ $? -ne 0 ] && exit 1
+
+	out=$($IP -6 ro ls match ${pfx})
+	if [ -n "$out" ]; then
+		echo "Failed to flush routes for prefix used for tests."
+		exit 1
+	fi
+
+	run_cmd "$IP -6 ro add ${pfx} ${nh}"
+	if [ $? -ne 0 ]; then
+		echo "Failed to add initial route for test."
+		exit 1
+	fi
+}
+
+# add initial route - used in replace route tests
+add_initial_route6()
+{
+	add_route6 "2001:db8:104::/64" "$1"
+}
+
+check_route6()
+{
+	local pfx
+	local expected="$1"
+	local out
+	local rc=0
+
+	set -- $expected
+	pfx=$1
+
+	out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//')
+	[ "${out}" = "${expected}" ] && return 0
+
+	if [ -z "${out}" ]; then
+		if [ "$VERBOSE" = "1" ]; then
+			printf "\nNo route entry found\n"
+			printf "Expected:\n"
+			printf "    ${expected}\n"
+		fi
+		return 1
+	fi
+
+	# tricky way to convert output to 1-line without ip's
+	# messy '\'; this drops all extra white space
+	out=$(echo ${out})
+	if [ "${out}" != "${expected}" ]; then
+		rc=1
+		if [ "${VERBOSE}" = "1" ]; then
+			printf "    Unexpected route entry. Have:\n"
+			printf "        ${out}\n"
+			printf "    Expected:\n"
+			printf "        ${expected}\n\n"
+		fi
+	fi
+
+	return $rc
+}
+
+route_cleanup()
+{
+	$IP li del red 2>/dev/null
+	$IP li del dummy1 2>/dev/null
+	$IP li del veth1 2>/dev/null
+	$IP li del veth3 2>/dev/null
+
+	cleanup &> /dev/null
+}
+
+route_setup()
+{
+	route_cleanup
+	setup
+
+	[ "${VERBOSE}" = "1" ] && set -x
+	set -e
+
+	ip netns add ns2
+	ip -netns ns2 link set dev lo up
+	ip netns exec ns2 sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec ns2 sysctl -qw net.ipv6.conf.all.forwarding=1
+
+	$IP li add veth1 type veth peer name veth2
+	$IP li add veth3 type veth peer name veth4
+
+	$IP li set veth1 up
+	$IP li set veth3 up
+	$IP li set veth2 netns ns2 up
+	$IP li set veth4 netns ns2 up
+	ip -netns ns2 li add dummy1 type dummy
+	ip -netns ns2 li set dummy1 up
+
+	$IP -6 addr add 2001:db8:101::1/64 dev veth1 nodad
+	$IP -6 addr add 2001:db8:103::1/64 dev veth3 nodad
+	$IP addr add 172.16.101.1/24 dev veth1
+	$IP addr add 172.16.103.1/24 dev veth3
+
+	ip -netns ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad
+	ip -netns ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad
+	ip -netns ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad
+
+	ip -netns ns2 addr add 172.16.101.2/24 dev veth2
+	ip -netns ns2 addr add 172.16.103.2/24 dev veth4
+	ip -netns ns2 addr add 172.16.104.1/24 dev dummy1
+
+	set +ex
+}
+
+# assumption is that basic add of a single path route works
+# otherwise just adding an address on an interface is broken
+ipv6_rt_add()
+{
+	local rc
+
+	echo
+	echo "IPv6 route add / append tests"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2"
+	log_test $? 2 "Attempt to add duplicate route - gw"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro add 2001:db8:104::/64 dev veth3"
+	log_test $? 2 "Attempt to add duplicate route - dev only"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
+	log_test $? 2 "Attempt to add duplicate route - reject route"
+
+	# route append with same prefix adds a new route
+	# - iproute2 sets NLM_F_CREATE | NLM_F_APPEND
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro append 2001:db8:104::/64 via 2001:db8:103::2"
+	check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+	log_test $? 0 "Append nexthop to existing route - gw"
+
+	# insert mpath directly
+	add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	check_route6  "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+	log_test $? 0 "Add multipath route"
+
+	add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro add 2001:db8:104::/64 nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	log_test $? 2 "Attempt to add duplicate multipath route"
+
+	# insert of a second route without append but different metric
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2 metric 512"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::3 metric 256"
+		rc=$?
+	fi
+	log_test $rc 0 "Route add with different metrics"
+
+	run_cmd "$IP -6 ro del 2001:db8:104::/64 metric 512"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 "2001:db8:104::/64 via 2001:db8:103::3 dev veth3 metric 256 2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024"
+		rc=$?
+	fi
+	log_test $rc 0 "Route delete with metric"
+}
+
+ipv6_rt_replace_single()
+{
+	# single path with single path
+	#
+	add_initial_route6 "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:103::2"
+	check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
+	log_test $? 0 "Single path with single path"
+
+	# single path with multipath
+	#
+	add_initial_route6 "nexthop via 2001:db8:101::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::2"
+	check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+	log_test $? 0 "Single path with multipath"
+
+	# single path with single path using MULTIPATH attribute
+	#
+	add_initial_route6 "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:103::2"
+	check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
+	log_test $? 0 "Single path with single path via multipath attribute"
+
+	# route replace fails - invalid nexthop
+	add_initial_route6 "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:104::2"
+	if [ $? -eq 0 ]; then
+		# previous command is expected to fail so if it returns 0
+		# that means the test failed.
+		log_test 0 1 "Invalid nexthop"
+	else
+		check_route6 "2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024"
+		log_test $? 0 "Invalid nexthop"
+	fi
+
+	# replace non-existent route
+	# - note use of change versus replace since ip adds NLM_F_CREATE
+	#   for replace
+	add_initial_route6 "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro change 2001:db8:105::/64 via 2001:db8:101::2"
+	log_test $? 2 "Single path - replace of non-existent route"
+}
+
+ipv6_rt_replace_mpath()
+{
+	# multipath with multipath
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3"
+	check_route6  "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::3 dev veth3 weight 1"
+	log_test $? 0 "Multipath with multipath"
+
+	# multipath with single
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:101::3"
+	check_route6  "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
+	log_test $? 0 "Multipath with single path"
+
+	# multipath with single
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3"
+	check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
+	log_test $? 0 "Multipath with single path via multipath attribute"
+
+	# route replace fails - invalid nexthop 1
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:111::3 nexthop via 2001:db8:103::3"
+	check_route6  "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+	log_test $? 0 "Multipath - invalid first nexthop"
+
+	# route replace fails - invalid nexthop 2
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:113::3"
+	check_route6  "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+	log_test $? 0 "Multipath - invalid second nexthop"
+
+	# multipath non-existent route
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro change 2001:db8:105::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3"
+	log_test $? 2 "Multipath - replace of non-existent route"
+}
+
+ipv6_rt_replace()
+{
+	echo
+	echo "IPv6 route replace tests"
+
+	ipv6_rt_replace_single
+	ipv6_rt_replace_mpath
+}
+
+ipv6_route_test()
+{
+	route_setup
+
+	ipv6_rt_add
+	ipv6_rt_replace
+
+	route_cleanup
+}
+
+ip_addr_metric_check()
+{
+	ip addr help 2>&1 | grep -q metric
+	if [ $? -ne 0 ]; then
+		echo "iproute2 command does not support metric for addresses. Skipping test"
+		return 1
+	fi
+
+	return 0
+}
+
+ipv6_addr_metric_test()
+{
+	local rc
+
+	echo
+	echo "IPv6 prefix route tests"
+
+	ip_addr_metric_check || return 1
+
+	setup
+
+	set -e
+	$IP li add dummy1 type dummy
+	$IP li add dummy2 type dummy
+	$IP li set dummy1 up
+	$IP li set dummy2 up
+
+	# default entry is metric 256
+	run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64"
+	run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64"
+	set +e
+
+	check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 256 2001:db8:104::/64 dev dummy2 proto kernel metric 256"
+	log_test $? 0 "Default metric"
+
+	set -e
+	run_cmd "$IP -6 addr flush dev dummy1"
+	run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64 metric 257"
+	set +e
+
+	check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 256 2001:db8:104::/64 dev dummy1 proto kernel metric 257"
+	log_test $? 0 "User specified metric on first device"
+
+	set -e
+	run_cmd "$IP -6 addr flush dev dummy2"
+	run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64 metric 258"
+	set +e
+
+	check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 257 2001:db8:104::/64 dev dummy2 proto kernel metric 258"
+	log_test $? 0 "User specified metric on second device"
+
+	run_cmd "$IP -6 addr del dev dummy1 2001:db8:104::1/64 metric 257"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 258"
+		rc=$?
+	fi
+	log_test $rc 0 "Delete of address on first device"
+
+	run_cmd "$IP -6 addr change dev dummy2 2001:db8:104::2/64 metric 259"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259"
+		rc=$?
+	fi
+	log_test $rc 0 "Modify metric of address"
+
+	# verify prefix route removed on down
+	run_cmd "ip netns exec ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
+	run_cmd "$IP li set dev dummy2 down"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 ""
+		rc=$?
+	fi
+	log_test $rc 0 "Prefix route removed on link down"
+
+	# verify prefix route re-inserted with assigned metric
+	run_cmd "$IP li set dev dummy2 up"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259"
+		rc=$?
+	fi
+	log_test $rc 0 "Prefix route with metric on link up"
+
+	$IP li del dummy1
+	$IP li del dummy2
+	cleanup
+}
+
+ipv6_route_metrics_test()
+{
+	local rc
+
+	echo
+	echo "IPv6 routes with metrics"
+
+	route_setup
+
+	#
+	# single path with metrics
+	#
+	run_cmd "$IP -6 ro add 2001:db8:111::/64 via 2001:db8:101::2 mtu 1400"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6  "2001:db8:111::/64 via 2001:db8:101::2 dev veth1 metric 1024 mtu 1400"
+		rc=$?
+	fi
+	log_test $rc 0 "Single path route with mtu metric"
+
+
+	#
+	# multipath via separate routes with metrics
+	#
+	run_cmd "$IP -6 ro add 2001:db8:112::/64 via 2001:db8:101::2 mtu 1400"
+	run_cmd "$IP -6 ro append 2001:db8:112::/64 via 2001:db8:103::2"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 "2001:db8:112::/64 metric 1024 mtu 1400 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+		rc=$?
+	fi
+	log_test $rc 0 "Multipath route via 2 single routes with mtu metric on first"
+
+	# second route is coalesced to first to make a multipath route.
+	# MTU of the second path is hidden from display!
+	run_cmd "$IP -6 ro add 2001:db8:113::/64 via 2001:db8:101::2"
+	run_cmd "$IP -6 ro append 2001:db8:113::/64 via 2001:db8:103::2 mtu 1400"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 "2001:db8:113::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+		rc=$?
+	fi
+	log_test $rc 0 "Multipath route via 2 single routes with mtu metric on 2nd"
+
+	run_cmd "$IP -6 ro del 2001:db8:113::/64 via 2001:db8:101::2"
+	if [ $? -eq 0 ]; then
+		check_route6 "2001:db8:113::/64 via 2001:db8:103::2 dev veth3 metric 1024 mtu 1400"
+		log_test $? 0 "    MTU of second leg"
+	fi
+
+	#
+	# multipath with metrics
+	#
+	run_cmd "$IP -6 ro add 2001:db8:115::/64 mtu 1400 nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6  "2001:db8:115::/64 metric 1024 mtu 1400 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+		rc=$?
+	fi
+	log_test $rc 0 "Multipath route with mtu metric"
+
+	$IP -6 ro add 2001:db8:104::/64 via 2001:db8:101::2 mtu 1300
+	run_cmd "ip netns exec ns1 ping6 -w1 -c1 -s 1500 2001:db8:104::1"
+	log_test $? 0 "Using route with mtu metric"
+
+	run_cmd "$IP -6 ro add 2001:db8:114::/64 via  2001:db8:101::2  congctl lock foo"
+	log_test $? 2 "Invalid metric (fails metric_convert)"
+
+	route_cleanup
+}
+
+# add route for a prefix, flushing any existing routes first
+# expected to be the first step of a test
+add_route()
+{
+	local pfx="$1"
+	local nh="$2"
+	local out
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo
+		echo "    ##################################################"
+		echo
+	fi
+
+	run_cmd "$IP ro flush ${pfx}"
+	[ $? -ne 0 ] && exit 1
+
+	out=$($IP ro ls match ${pfx})
+	if [ -n "$out" ]; then
+		echo "Failed to flush routes for prefix used for tests."
+		exit 1
+	fi
+
+	run_cmd "$IP ro add ${pfx} ${nh}"
+	if [ $? -ne 0 ]; then
+		echo "Failed to add initial route for test."
+		exit 1
+	fi
+}
 
-fib_test()
+# add initial route - used in replace route tests
+add_initial_route()
 {
-	if [ -n "$TEST" ]; then
-		eval $TEST
+	add_route "172.16.104.0/24" "$1"
+}
+
+check_route()
+{
+	local pfx
+	local expected="$1"
+	local out
+	local rc=0
+
+	set -- $expected
+	pfx=$1
+	[ "${pfx}" = "unreachable" ] && pfx=$2
+
+	out=$($IP ro ls match ${pfx})
+	[ "${out}" = "${expected}" ] && return 0
+
+	if [ -z "${out}" ]; then
+		if [ "$VERBOSE" = "1" ]; then
+			printf "\nNo route entry found\n"
+			printf "Expected:\n"
+			printf "    ${expected}\n"
+		fi
+		return 1
+	fi
+
+	# tricky way to convert output to 1-line without ip's
+	# messy '\'; this drops all extra white space
+	out=$(echo ${out})
+	if [ "${out}" != "${expected}" ]; then
+		rc=1
+		if [ "${VERBOSE}" = "1" ]; then
+			printf "    Unexpected route entry. Have:\n"
+			printf "        ${out}\n"
+			printf "    Expected:\n"
+			printf "        ${expected}\n\n"
+		fi
+	fi
+
+	return $rc
+}
+
+# assumption is that basic add of a single path route works
+# otherwise just adding an address on an interface is broken
+ipv4_rt_add()
+{
+	local rc
+
+	echo
+	echo "IPv4 route add / append tests"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2"
+	log_test $? 2 "Attempt to add duplicate route - gw"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro add 172.16.104.0/24 dev veth3"
+	log_test $? 2 "Attempt to add duplicate route - dev only"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro add unreachable 172.16.104.0/24"
+	log_test $? 2 "Attempt to add duplicate route - reject route"
+
+	# iproute2 prepend only sets NLM_F_CREATE
+	# - adds a new route; does NOT convert existing route to ECMP
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro prepend 172.16.104.0/24 via 172.16.103.2"
+	check_route "172.16.104.0/24 via 172.16.103.2 dev veth3 172.16.104.0/24 via 172.16.101.2 dev veth1"
+	log_test $? 0 "Add new nexthop for existing prefix"
+
+	# route append with same prefix adds a new route
+	# - iproute2 sets NLM_F_CREATE | NLM_F_APPEND
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2"
+	check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.2 dev veth3"
+	log_test $? 0 "Append nexthop to existing route - gw"
+
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro append 172.16.104.0/24 dev veth3"
+	check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 dev veth3 scope link"
+	log_test $? 0 "Append nexthop to existing route - dev only"
+
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro append unreachable 172.16.104.0/24"
+	check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 unreachable 172.16.104.0/24"
+	log_test $? 0 "Append nexthop to existing route - reject route"
+
+	run_cmd "$IP ro flush 172.16.104.0/24"
+	run_cmd "$IP ro add unreachable 172.16.104.0/24"
+	run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2"
+	check_route "unreachable 172.16.104.0/24 172.16.104.0/24 via 172.16.103.2 dev veth3"
+	log_test $? 0 "Append nexthop to existing reject route - gw"
+
+	run_cmd "$IP ro flush 172.16.104.0/24"
+	run_cmd "$IP ro add unreachable 172.16.104.0/24"
+	run_cmd "$IP ro append 172.16.104.0/24 dev veth3"
+	check_route "unreachable 172.16.104.0/24 172.16.104.0/24 dev veth3 scope link"
+	log_test $? 0 "Append nexthop to existing reject route - dev only"
+
+	# insert mpath directly
+	add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	check_route  "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+	log_test $? 0 "add multipath route"
+
+	add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro add 172.16.104.0/24 nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	log_test $? 2 "Attempt to add duplicate multipath route"
+
+	# insert of a second route without append but different metric
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2 metric 512"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.3 metric 256"
+		rc=$?
+	fi
+	log_test $rc 0 "Route add with different metrics"
+
+	run_cmd "$IP ro del 172.16.104.0/24 metric 512"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.3 dev veth3 metric 256"
+		rc=$?
+	fi
+	log_test $rc 0 "Route delete with metric"
+}
+
+ipv4_rt_replace_single()
+{
+	# single path with single path
+	#
+	add_initial_route "via 172.16.101.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.103.2"
+	check_route "172.16.104.0/24 via 172.16.103.2 dev veth3"
+	log_test $? 0 "Single path with single path"
+
+	# single path with multipath
+	#
+	add_initial_route "nexthop via 172.16.101.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.2"
+	check_route "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+	log_test $? 0 "Single path with multipath"
+
+	# single path with reject
+	#
+	add_initial_route "nexthop via 172.16.101.2"
+	run_cmd "$IP ro replace unreachable 172.16.104.0/24"
+	check_route "unreachable 172.16.104.0/24"
+	log_test $? 0 "Single path with reject route"
+
+	# single path with single path using MULTIPATH attribute
+	#
+	add_initial_route "via 172.16.101.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.103.2"
+	check_route "172.16.104.0/24 via 172.16.103.2 dev veth3"
+	log_test $? 0 "Single path with single path via multipath attribute"
+
+	# route replace fails - invalid nexthop
+	add_initial_route "via 172.16.101.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 via 2001:db8:104::2"
+	if [ $? -eq 0 ]; then
+		# previous command is expected to fail so if it returns 0
+		# that means the test failed.
+		log_test 0 1 "Invalid nexthop"
 	else
-		fib_unreg_test
-		fib_down_test
-		fib_carrier_test
-		fib_nexthop_test
+		check_route "172.16.104.0/24 via 172.16.101.2 dev veth1"
+		log_test $? 0 "Invalid nexthop"
 	fi
+
+	# replace non-existent route
+	# - note use of change versus replace since ip adds NLM_F_CREATE
+	#   for replace
+	add_initial_route "via 172.16.101.2"
+	run_cmd "$IP ro change 172.16.105.0/24 via 172.16.101.2"
+	log_test $? 2 "Single path - replace of non-existent route"
+}
+
+ipv4_rt_replace_mpath()
+{
+	# multipath with multipath
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3"
+	check_route  "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.3 dev veth3 weight 1"
+	log_test $? 0 "Multipath with multipath"
+
+	# multipath with single
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.101.3"
+	check_route  "172.16.104.0/24 via 172.16.101.3 dev veth1"
+	log_test $? 0 "Multipath with single path"
+
+	# multipath with single
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3"
+	check_route "172.16.104.0/24 via 172.16.101.3 dev veth1"
+	log_test $? 0 "Multipath with single path via multipath attribute"
+
+	# multipath with reject
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace unreachable 172.16.104.0/24"
+	check_route "unreachable 172.16.104.0/24"
+	log_test $? 0 "Multipath with reject route"
+
+	# route replace fails - invalid nexthop 1
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.111.3 nexthop via 172.16.103.3"
+	check_route  "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+	log_test $? 0 "Multipath - invalid first nexthop"
+
+	# route replace fails - invalid nexthop 2
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.113.3"
+	check_route  "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+	log_test $? 0 "Multipath - invalid second nexthop"
+
+	# multipath non-existent route
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro change 172.16.105.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3"
+	log_test $? 2 "Multipath - replace of non-existent route"
+}
+
+ipv4_rt_replace()
+{
+	echo
+	echo "IPv4 route replace tests"
+
+	ipv4_rt_replace_single
+	ipv4_rt_replace_mpath
 }
 
+ipv4_route_test()
+{
+	route_setup
+
+	ipv4_rt_add
+	ipv4_rt_replace
+
+	route_cleanup
+}
+
+ipv4_addr_metric_test()
+{
+	local rc
+
+	echo
+	echo "IPv4 prefix route tests"
+
+	ip_addr_metric_check || return 1
+
+	setup
+
+	set -e
+	$IP li add dummy1 type dummy
+	$IP li add dummy2 type dummy
+	$IP li set dummy1 up
+	$IP li set dummy2 up
+
+	# default entry is metric 256
+	run_cmd "$IP addr add dev dummy1 172.16.104.1/24"
+	run_cmd "$IP addr add dev dummy2 172.16.104.2/24"
+	set +e
+
+	check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2"
+	log_test $? 0 "Default metric"
+
+	set -e
+	run_cmd "$IP addr flush dev dummy1"
+	run_cmd "$IP addr add dev dummy1 172.16.104.1/24 metric 257"
+	set +e
+
+	check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257"
+	log_test $? 0 "User specified metric on first device"
+
+	set -e
+	run_cmd "$IP addr flush dev dummy2"
+	run_cmd "$IP addr add dev dummy2 172.16.104.2/24 metric 258"
+	set +e
+
+	check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258"
+	log_test $? 0 "User specified metric on second device"
+
+	run_cmd "$IP addr del dev dummy1 172.16.104.1/24 metric 257"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258"
+		rc=$?
+	fi
+	log_test $rc 0 "Delete of address on first device"
+
+	run_cmd "$IP addr change dev dummy2 172.16.104.2/24 metric 259"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259"
+		rc=$?
+	fi
+	log_test $rc 0 "Modify metric of address"
+
+	# verify prefix route removed on down
+	run_cmd "$IP li set dev dummy2 down"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route ""
+		rc=$?
+	fi
+	log_test $rc 0 "Prefix route removed on link down"
+
+	# verify prefix route re-inserted with assigned metric
+	run_cmd "$IP li set dev dummy2 up"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259"
+		rc=$?
+	fi
+	log_test $rc 0 "Prefix route with metric on link up"
+
+	$IP li del dummy1
+	$IP li del dummy2
+	cleanup
+}
+
+ipv4_route_metrics_test()
+{
+	local rc
+
+	echo
+	echo "IPv4 route add / append tests"
+
+	route_setup
+
+	run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 mtu 1400"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.111.0/24 via 172.16.101.2 dev veth1 mtu 1400"
+		rc=$?
+	fi
+	log_test $rc 0 "Single path route with mtu metric"
+
+
+	run_cmd "$IP ro add 172.16.112.0/24 mtu 1400 nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.112.0/24 mtu 1400 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+		rc=$?
+	fi
+	log_test $rc 0 "Multipath route with mtu metric"
+
+	$IP ro add 172.16.104.0/24 via 172.16.101.2 mtu 1300
+	run_cmd "ip netns exec ns1 ping -w1 -c1 -s 1500 172.16.104.1"
+	log_test $? 0 "Using route with mtu metric"
+
+	run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 congctl lock foo"
+	log_test $? 2 "Invalid metric (fails metric_convert)"
+
+	route_cleanup
+}
+
+
+################################################################################
+# usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+        -t <test>   Test(s) to run (default: all)
+                    (options: $TESTS)
+        -p          Pause on fail
+        -P          Pause after each test before cleanup
+        -v          verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts :t:pPhv o
+do
+	case $o in
+		t) TESTS=$OPTARG;;
+		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+PEER_CMD="ip netns exec ${PEER_NS}"
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
 if [ "$(id -u)" -ne 0 ];then
 	echo "SKIP: Need root privileges"
-	exit 0
+	exit $ksft_skip;
 fi
 
 if [ ! -x "$(command -v ip)" ]; then
 	echo "SKIP: Could not run test without ip tool"
-	exit 0
+	exit $ksft_skip
 fi
 
 ip route help 2>&1 | grep -q fibmatch
 if [ $? -ne 0 ]; then
 	echo "SKIP: iproute2 too old, missing fibmatch"
-	exit 0
+	exit $ksft_skip
 fi
 
 # start clean
 cleanup &> /dev/null
 
-fib_test
+for t in $TESTS
+do
+	case $t in
+	fib_unreg_test|unregister)	fib_unreg_test;;
+	fib_down_test|down)		fib_down_test;;
+	fib_carrier_test|carrier)	fib_carrier_test;;
+	fib_nexthop_test|nexthop)	fib_nexthop_test;;
+	ipv6_route_test|ipv6_rt)	ipv6_route_test;;
+	ipv4_route_test|ipv4_rt)	ipv4_route_test;;
+	ipv6_addr_metric)		ipv6_addr_metric_test;;
+	ipv4_addr_metric)		ipv4_addr_metric_test;;
+	ipv6_route_metrics)		ipv6_route_metrics_test;;
+	ipv4_route_metrics)		ipv4_route_metrics_test;;
+
+	help) echo "Test names: $TESTS"; exit 0;;
+	esac
+done
+
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi
 
 exit $ret
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
index 4a0964c42860..b8a2af8fcfb7 100644
--- a/tools/testing/selftests/net/forwarding/README
+++ b/tools/testing/selftests/net/forwarding/README
@@ -46,6 +46,8 @@ Guidelines for Writing Tests
 
 o Where possible, reuse an existing topology for different tests instead
   of recreating the same topology.
+o Tests that use anything but the most trivial topologies should include
+  an ASCII art showing the topology.
 o Where possible, IPv6 and IPv4 addresses shall conform to RFC 3849 and
   RFC 5737, respectively.
 o Where possible, tests shall be written so that they can be reused by
diff --git a/tools/testing/selftests/net/forwarding/bridge_port_isolation.sh b/tools/testing/selftests/net/forwarding/bridge_port_isolation.sh
new file mode 100755
index 000000000000..a43b4645c4de
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_port_isolation.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6 flooding"
+NUM_NETIFS=6
+CHECK_TC="yes"
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h3_create()
+{
+	simple_if_init $h3 192.0.2.3/24 2001:db8:1::3/64
+}
+
+h3_destroy()
+{
+	simple_if_fini $h3 192.0.2.3/24 2001:db8:1::3/64
+}
+
+switch_create()
+{
+	ip link add dev br0 type bridge
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+	ip link set dev $swp3 master br0
+
+	ip link set dev $swp1 type bridge_slave isolated on
+	check_err $? "Can't set isolation on port $swp1"
+	ip link set dev $swp2 type bridge_slave isolated on
+	check_err $? "Can't set isolation on port $swp2"
+	ip link set dev $swp3 type bridge_slave isolated off
+	check_err $? "Can't disable isolation on port $swp3"
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+	ip link set dev $swp3 up
+}
+
+switch_destroy()
+{
+	ip link set dev $swp3 down
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	RET=0
+	ping_do $h1 192.0.2.2
+	check_fail $? "Ping worked when it should not have"
+
+	RET=0
+	ping_do $h3 192.0.2.2
+	check_err $? "Ping didn't work when it should have"
+
+	log_test "Isolated port ping"
+}
+
+ping_ipv6()
+{
+	RET=0
+	ping6_do $h1 2001:db8:1::2
+	check_fail $? "Ping6 worked when it should not have"
+
+	RET=0
+	ping6_do $h3 2001:db8:1::2
+	check_err $? "Ping6 didn't work when it should have"
+
+	log_test "Isolated port ping6"
+}
+
+flooding()
+{
+	local mac=de:ad:be:ef:13:37
+	local ip=192.0.2.100
+
+	RET=0
+	flood_test_do false $mac $ip $h1 $h2
+	check_err $? "Packet was flooded when it should not have been"
+
+	RET=0
+	flood_test_do true $mac $ip $h3 $h2
+	check_err $? "Packet was not flooded when it should have been"
+
+	log_test "Isolated port flooding"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh b/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh
new file mode 100755
index 000000000000..1f8ef0eff862
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="sticky"
+NUM_NETIFS=4
+TEST_MAC=de:ad:be:ef:13:37
+source lib.sh
+
+switch_create()
+{
+	ip link add dev br0 type bridge
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $h1 up
+	ip link set dev $swp1 up
+	ip link set dev $h2 up
+	ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+	ip link set dev $swp2 down
+	ip link set dev $h2 down
+	ip link set dev $swp1 down
+	ip link set dev $h1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+	h2=${NETIFS[p3]}
+	swp2=${NETIFS[p4]}
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+	switch_destroy
+}
+
+sticky()
+{
+	bridge fdb add $TEST_MAC dev $swp1 master static sticky
+	check_err $? "Could not add fdb entry"
+	bridge fdb del $TEST_MAC dev $swp1 vlan 1 master static sticky
+	$MZ $h2 -c 1 -a $TEST_MAC -t arp "request" -q
+	bridge -j fdb show br br0 brport $swp1\
+		| jq -e ".[] | select(.mac == \"$TEST_MAC\")" &> /dev/null
+	check_err $? "Did not find FDB record when should"
+
+	log_test "Sticky fdb entry"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
index 75d922438bc9..d8313d0438b7 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding"
 NUM_NETIFS=4
 CHECK_TC="yes"
 source lib.sh
@@ -75,14 +76,31 @@ cleanup()
 	vrf_cleanup
 }
 
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:1::2
+}
+
+learning()
+{
+	learning_test "br0" $swp1 $h1 $h2
+}
+
+flooding()
+{
+	flood_test $swp2 $h1 $h2
+}
+
 trap cleanup EXIT
 
 setup_prepare
 setup_wait
 
-ping_test $h1 192.0.2.2
-ping6_test $h1 2001:db8:1::2
-learning_test "br0" $swp1 $h1 $h2
-flood_test $swp2 $h1 $h2
+tests_run
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
index 1cddf06f691d..c15c6c85c984 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding"
 NUM_NETIFS=4
 source lib.sh
 
@@ -73,14 +74,31 @@ cleanup()
 	vrf_cleanup
 }
 
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:1::2
+}
+
+learning()
+{
+	learning_test "br0" $swp1 $h1 $h2
+}
+
+flooding()
+{
+	flood_test $swp2 $h1 $h2
+}
+
 trap cleanup EXIT
 
 setup_prepare
 setup_wait
 
-ping_test $h1 192.0.2.2
-ping6_test $h1 2001:db8:1::2
-learning_test "br0" $swp1 $h1 $h2
-flood_test $swp2 $h1 $h2
+tests_run
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
new file mode 100644
index 000000000000..5ab1e5f43022
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Source library
+
+relative_path="${BASH_SOURCE%/*}"
+if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
+	relative_path="."
+fi
+
+source "$relative_path/lib.sh"
+
+##############################################################################
+# Defines
+
+DEVLINK_DEV=$(devlink port show | grep "${NETIFS[p1]}" | \
+	      grep -v "${NETIFS[p1]}[0-9]" | cut -d" " -f1 | \
+	      rev | cut -d"/" -f2- | rev)
+if [ -z "$DEVLINK_DEV" ]; then
+	echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
+	exit 1
+fi
+if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then
+	echo "SKIP: devlink device's bus is not PCI"
+	exit 1
+fi
+
+DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
+		 -n | cut -d" " -f3)
+
+##############################################################################
+# Sanity checks
+
+devlink -j resource show "$DEVLINK_DEV" &> /dev/null
+if [ $? -ne 0 ]; then
+	echo "SKIP: iproute2 too old, missing devlink resource support"
+	exit 1
+fi
+
+##############################################################################
+# Devlink helpers
+
+devlink_resource_names_to_path()
+{
+	local resource
+	local path=""
+
+	for resource in "${@}"; do
+		if [ "$path" == "" ]; then
+			path="$resource"
+		else
+			path="${path}/$resource"
+		fi
+	done
+
+	echo "$path"
+}
+
+devlink_resource_get()
+{
+	local name=$1
+	local resource_name=.[][\"$DEVLINK_DEV\"]
+
+	resource_name="$resource_name | .[] | select (.name == \"$name\")"
+
+	shift
+	for resource in "${@}"; do
+		resource_name="${resource_name} | .[\"resources\"][] | \
+			       select (.name == \"$resource\")"
+	done
+
+	devlink -j resource show "$DEVLINK_DEV" | jq "$resource_name"
+}
+
+devlink_resource_size_get()
+{
+	local size=$(devlink_resource_get "$@" | jq '.["size_new"]')
+
+	if [ "$size" == "null" ]; then
+		devlink_resource_get "$@" | jq '.["size"]'
+	else
+		echo "$size"
+	fi
+}
+
+devlink_resource_size_set()
+{
+	local new_size=$1
+	local path
+
+	shift
+	path=$(devlink_resource_names_to_path "$@")
+	devlink resource set "$DEVLINK_DEV" path "$path" size "$new_size"
+	check_err $? "Failed setting path $path to size $size"
+}
+
+devlink_reload()
+{
+	local still_pending
+
+	devlink dev reload "$DEVLINK_DEV" &> /dev/null
+	check_err $? "Failed reload"
+
+	still_pending=$(devlink resource show "$DEVLINK_DEV" | \
+			grep -c "size_new")
+	check_err $still_pending "Failed reload - There are still unset sizes"
+}
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath.sh b/tools/testing/selftests/net/forwarding/gre_multipath.sh
new file mode 100755
index 000000000000..cca2baa03fb8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_multipath.sh
@@ -0,0 +1,253 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when a wECMP route forwards traffic to two GRE
+# tunnels.
+#
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# |      192.0.2.1/28 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|------------------------+
+# | SW1               |                        |
+# |              $ol1 +                        |
+# |      192.0.2.2/28                          |
+# |                                            |
+# |  + g1a (gre)          + g1b (gre)          |
+# |    loc=192.0.2.65       loc=192.0.2.81     |
+# |    rem=192.0.2.66 --.   rem=192.0.2.82 --. |
+# |    tos=inherit      |   tos=inherit      | |
+# |  .------------------'                    | |
+# |  |                    .------------------' |
+# |  v                    v                    |
+# |  + $ul1.111 (vlan)    + $ul1.222 (vlan)    |
+# |  | 192.0.2.129/28     | 192.0.2.145/28     |
+# |   \                  /                     |
+# |    \________________/                      |
+# |            |                               |
+# |            + $ul1                          |
+# +------------|-------------------------------+
+#              |
+# +------------|-------------------------------+
+# | SW2        + $ul2                          |
+# |     _______|________                       |
+# |    /                \                      |
+# |   /                  \                     |
+# |  + $ul2.111 (vlan)    + $ul2.222 (vlan)    |
+# |  ^ 192.0.2.130/28     ^ 192.0.2.146/28     |
+# |  |                    |                    |
+# |  |                    '------------------. |
+# |  '------------------.                    | |
+# |  + g2a (gre)        | + g2b (gre)        | |
+# |    loc=192.0.2.66   |   loc=192.0.2.82   | |
+# |    rem=192.0.2.65 --'   rem=192.0.2.81 --' |
+# |    tos=inherit          tos=inherit        |
+# |                                            |
+# |              $ol2 +                        |
+# |     192.0.2.17/28 |                        |
+# +-------------------|------------------------+
+#                     |
+# +-------------------|-----+
+# | H2                |     |
+# |               $h2 +     |
+# |     192.0.2.18/28       |
+# +-------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	multipath_ipv4
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+	ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+sw1_create()
+{
+	simple_if_init $ol1 192.0.2.2/28
+	__simple_if_init $ul1 v$ol1
+	vlan_create $ul1 111 v$ol1 192.0.2.129/28
+	vlan_create $ul1 222 v$ol1 192.0.2.145/28
+
+	tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1
+	__simple_if_init g1a v$ol1 192.0.2.65/32
+	ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+
+	tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1
+	__simple_if_init g1b v$ol1 192.0.2.81/32
+	ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+
+	ip route add vrf v$ol1 192.0.2.16/28 \
+	   nexthop dev g1a \
+	   nexthop dev g1b
+
+	tc qdisc add dev $ul1 clsact
+	tc filter add dev $ul1 egress pref 111 prot ipv4 \
+	   flower dst_ip 192.0.2.66 action pass
+	tc filter add dev $ul1 egress pref 222 prot ipv4 \
+	   flower dst_ip 192.0.2.82 action pass
+}
+
+sw1_destroy()
+{
+	tc qdisc del dev $ul1 clsact
+
+	ip route del vrf v$ol1 192.0.2.16/28
+
+	ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+	__simple_if_fini g1b 192.0.2.81/32
+	tunnel_destroy g1b
+
+	ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+	__simple_if_fini g1a 192.0.2.65/32
+	tunnel_destroy g1a
+
+	vlan_destroy $ul1 222
+	vlan_destroy $ul1 111
+	__simple_if_fini $ul1
+	simple_if_fini $ol1 192.0.2.2/28
+}
+
+sw2_create()
+{
+	simple_if_init $ol2 192.0.2.17/28
+	__simple_if_init $ul2 v$ol2
+	vlan_create $ul2 111 v$ol2 192.0.2.130/28
+	vlan_create $ul2 222 v$ol2 192.0.2.146/28
+
+	tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2
+	__simple_if_init g2a v$ol2 192.0.2.66/32
+	ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+
+	tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2
+	__simple_if_init g2b v$ol2 192.0.2.82/32
+	ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+
+	ip route add vrf v$ol2 192.0.2.0/28 \
+	   nexthop dev g2a \
+	   nexthop dev g2b
+}
+
+sw2_destroy()
+{
+	ip route del vrf v$ol2 192.0.2.0/28
+
+	ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+	__simple_if_fini g2b 192.0.2.82/32
+	tunnel_destroy g2b
+
+	ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+	__simple_if_fini g2a 192.0.2.66/32
+	tunnel_destroy g2a
+
+	vlan_destroy $ul2 222
+	vlan_destroy $ul2 111
+	__simple_if_fini $ul2
+	simple_if_fini $ol2 192.0.2.17/28
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.18/28
+	ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+}
+
+h2_destroy()
+{
+	ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+	simple_if_fini $h2 192.0.2.18/28
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+multipath4_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv4.fib_multipath_hash_policy 1
+	ip route replace vrf v$ol1 192.0.2.16/28 \
+	   nexthop dev g1a weight $weight1 \
+	   nexthop dev g1b weight $weight2
+
+	local t0_111=$(tc_rule_stats_get $ul1 111 egress)
+	local t0_222=$(tc_rule_stats_get $ul1 222 egress)
+
+	ip vrf exec v$h1 \
+	   $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
+	       -d 1msec -t udp "sp=1024,dp=0-32768"
+
+	local t1_111=$(tc_rule_stats_get $ul1 111 egress)
+	local t1_222=$(tc_rule_stats_get $ul1 222 egress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip route replace vrf v$ol1 192.0.2.16/28 \
+	   nexthop dev g1a \
+	   nexthop dev g1b
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.18
+}
+
+multipath_ipv4()
+{
+	log_info "Running IPv4 multipath tests"
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 1ac6c62271f3..85d253546684 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -8,14 +8,21 @@
 PING=${PING:=ping}
 PING6=${PING6:=ping6}
 MZ=${MZ:=mausezahn}
+ARPING=${ARPING:=arping}
+TEAMD=${TEAMD:=teamd}
 WAIT_TIME=${WAIT_TIME:=5}
 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
 PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
 NETIF_TYPE=${NETIF_TYPE:=veth}
 NETIF_CREATE=${NETIF_CREATE:=yes}
 
-if [[ -f forwarding.config ]]; then
-	source forwarding.config
+relative_path="${BASH_SOURCE%/*}"
+if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
+	relative_path="."
+fi
+
+if [[ -f $relative_path/forwarding.config ]]; then
+	source "$relative_path/forwarding.config"
 fi
 
 ##############################################################################
@@ -28,7 +35,10 @@ check_tc_version()
 		echo "SKIP: iproute2 too old; tc is missing JSON support"
 		exit 1
 	fi
+}
 
+check_tc_shblock_support()
+{
 	tc filter help 2>&1 | grep block &> /dev/null
 	if [[ $? -ne 0 ]]; then
 		echo "SKIP: iproute2 too old; tc is missing shared block support"
@@ -36,6 +46,15 @@ check_tc_version()
 	fi
 }
 
+check_tc_chain_support()
+{
+	tc help 2>&1|grep chain &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: iproute2 too old; tc is missing chain support"
+		exit 1
+	fi
+}
+
 if [[ "$(id -u)" -ne 0 ]]; then
 	echo "SKIP: need root privileges"
 	exit 0
@@ -45,15 +64,18 @@ if [[ "$CHECK_TC" = "yes" ]]; then
 	check_tc_version
 fi
 
-if [[ ! -x "$(command -v jq)" ]]; then
-	echo "SKIP: jq not installed"
-	exit 1
-fi
+require_command()
+{
+	local cmd=$1; shift
 
-if [[ ! -x "$(command -v $MZ)" ]]; then
-	echo "SKIP: $MZ not installed"
-	exit 1
-fi
+	if [[ ! -x "$(command -v "$cmd")" ]]; then
+		echo "SKIP: $cmd not installed"
+		exit 1
+	fi
+}
+
+require_command jq
+require_command $MZ
 
 if [[ ! -v NUM_NETIFS ]]; then
 	echo "SKIP: importer does not define \"NUM_NETIFS\""
@@ -151,6 +173,19 @@ check_fail()
 	fi
 }
 
+check_err_fail()
+{
+	local should_fail=$1; shift
+	local err=$1; shift
+	local what=$1; shift
+
+	if ((should_fail)); then
+		check_fail $err "$what succeeded, but should have failed"
+	else
+		check_err $err "$what failed"
+	fi
+}
+
 log_test()
 {
 	local test_name=$1
@@ -185,24 +220,54 @@ log_info()
 	echo "INFO: $msg"
 }
 
+setup_wait_dev()
+{
+	local dev=$1; shift
+
+	while true; do
+		ip link show dev $dev up \
+			| grep 'state UP' &> /dev/null
+		if [[ $? -ne 0 ]]; then
+			sleep 1
+		else
+			break
+		fi
+	done
+}
+
 setup_wait()
 {
-	for i in $(eval echo {1..$NUM_NETIFS}); do
-		while true; do
-			ip link show dev ${NETIFS[p$i]} up \
-				| grep 'state UP' &> /dev/null
-			if [[ $? -ne 0 ]]; then
-				sleep 1
-			else
-				break
-			fi
-		done
+	local num_netifs=${1:-$NUM_NETIFS}
+
+	for ((i = 1; i <= num_netifs; ++i)); do
+		setup_wait_dev ${NETIFS[p$i]}
 	done
 
 	# Make sure links are ready.
 	sleep $WAIT_TIME
 }
 
+lldpad_app_wait_set()
+{
+	local dev=$1; shift
+
+	while lldptool -t -i $dev -V APP -c app | grep -Eq "pending|unknown"; do
+		echo "$dev: waiting for lldpad to push pending APP updates"
+		sleep 5
+	done
+}
+
+lldpad_app_wait_del()
+{
+	# Give lldpad a chance to push down the changes. If the device is downed
+	# too soon, the updates will be left pending. However, they will have
+	# been struck off the lldpad's DB already, so we won't be able to tell
+	# they are pending. Then on next test iteration this would cause
+	# weirdness as newly-added APP rules conflict with the old ones,
+	# sometimes getting stuck in an "unknown" state.
+	sleep 5
+}
+
 pre_cleanup()
 {
 	if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
@@ -287,6 +352,29 @@ __addr_add_del()
 	done
 }
 
+__simple_if_init()
+{
+	local if_name=$1; shift
+	local vrf_name=$1; shift
+	local addrs=("${@}")
+
+	ip link set dev $if_name master $vrf_name
+	ip link set dev $if_name up
+
+	__addr_add_del $if_name add "${addrs[@]}"
+}
+
+__simple_if_fini()
+{
+	local if_name=$1; shift
+	local addrs=("${@}")
+
+	__addr_add_del $if_name del "${addrs[@]}"
+
+	ip link set dev $if_name down
+	ip link set dev $if_name nomaster
+}
+
 simple_if_init()
 {
 	local if_name=$1
@@ -298,11 +386,8 @@ simple_if_init()
 	array=("${@}")
 
 	vrf_create $vrf_name
-	ip link set dev $if_name master $vrf_name
 	ip link set dev $vrf_name up
-	ip link set dev $if_name up
-
-	__addr_add_del $if_name add "${array[@]}"
+	__simple_if_init $if_name $vrf_name "${array[@]}"
 }
 
 simple_if_fini()
@@ -315,12 +400,76 @@ simple_if_fini()
 	vrf_name=v$if_name
 	array=("${@}")
 
-	__addr_add_del $if_name del "${array[@]}"
-
-	ip link set dev $if_name down
+	__simple_if_fini $if_name "${array[@]}"
 	vrf_destroy $vrf_name
 }
 
+tunnel_create()
+{
+	local name=$1; shift
+	local type=$1; shift
+	local local=$1; shift
+	local remote=$1; shift
+
+	ip link add name $name type $type \
+	   local $local remote $remote "$@"
+	ip link set dev $name up
+}
+
+tunnel_destroy()
+{
+	local name=$1; shift
+
+	ip link del dev $name
+}
+
+vlan_create()
+{
+	local if_name=$1; shift
+	local vid=$1; shift
+	local vrf=$1; shift
+	local ips=("${@}")
+	local name=$if_name.$vid
+
+	ip link add name $name link $if_name type vlan id $vid
+	if [ "$vrf" != "" ]; then
+		ip link set dev $name master $vrf
+	fi
+	ip link set dev $name up
+	__addr_add_del $name add "${ips[@]}"
+}
+
+vlan_destroy()
+{
+	local if_name=$1; shift
+	local vid=$1; shift
+	local name=$if_name.$vid
+
+	ip link del dev $name
+}
+
+team_create()
+{
+	local if_name=$1; shift
+	local mode=$1; shift
+
+	require_command $TEAMD
+	$TEAMD -t $if_name -d -c '{"runner": {"name": "'$mode'"}}'
+	for slave in "$@"; do
+		ip link set dev $slave down
+		ip link set dev $slave master $if_name
+		ip link set dev $slave up
+	done
+	ip link set dev $if_name up
+}
+
+team_destroy()
+{
+	local if_name=$1; shift
+
+	$TEAMD -t $if_name -k
+}
+
 master_name_get()
 {
 	local if_name=$1
@@ -335,6 +484,24 @@ link_stats_tx_packets_get()
        ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
 }
 
+tc_rule_stats_get()
+{
+	local dev=$1; shift
+	local pref=$1; shift
+	local dir=$1; shift
+
+	tc -j -s filter show dev $dev ${dir:-ingress} pref $pref \
+	    | jq '.[1].options.actions[].stats.packets'
+}
+
+ethtool_stats_get()
+{
+	local dev=$1; shift
+	local stat=$1; shift
+
+	ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
+}
+
 mac_get()
 {
 	local if_name=$1
@@ -353,24 +520,57 @@ bridge_ageing_time_get()
 	echo $((ageing_time / 100))
 }
 
-forwarding_enable()
+declare -A SYSCTL_ORIG
+sysctl_set()
 {
-       ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding)
-       ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding)
+	local key=$1; shift
+	local value=$1; shift
 
-       sysctl -q -w net.ipv4.conf.all.forwarding=1
-       sysctl -q -w net.ipv6.conf.all.forwarding=1
+	SYSCTL_ORIG[$key]=$(sysctl -n $key)
+	sysctl -qw $key=$value
+}
+
+sysctl_restore()
+{
+	local key=$1; shift
+
+	sysctl -qw $key=${SYSCTL_ORIG["$key"]}
+}
+
+forwarding_enable()
+{
+	sysctl_set net.ipv4.conf.all.forwarding 1
+	sysctl_set net.ipv6.conf.all.forwarding 1
 }
 
 forwarding_restore()
 {
-       sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd
-       sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd
+	sysctl_restore net.ipv6.conf.all.forwarding
+	sysctl_restore net.ipv4.conf.all.forwarding
+}
+
+declare -A MTU_ORIG
+mtu_set()
+{
+	local dev=$1; shift
+	local mtu=$1; shift
+
+	MTU_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].mtu')
+	ip link set dev $dev mtu $mtu
+}
+
+mtu_restore()
+{
+	local dev=$1; shift
+
+	ip link set dev $dev mtu ${MTU_ORIG["$dev"]}
 }
 
 tc_offload_check()
 {
-	for i in $(eval echo {1..$NUM_NETIFS}); do
+	local num_netifs=${1:-$NUM_NETIFS}
+
+	for ((i = 1; i <= num_netifs; ++i)); do
 		ethtool -k ${NETIFS[p$i]} \
 			| grep "hw-tc-offload: on" &> /dev/null
 		if [[ $? -ne 0 ]]; then
@@ -381,33 +581,245 @@ tc_offload_check()
 	return 0
 }
 
+trap_install()
+{
+	local dev=$1; shift
+	local direction=$1; shift
+
+	# Some devices may not support or need in-hardware trapping of traffic
+	# (e.g. the veth pairs that this library creates for non-existent
+	# loopbacks). Use continue instead, so that there is a filter in there
+	# (some tests check counters), and so that other filters are still
+	# processed.
+	tc filter add dev $dev $direction pref 1 \
+		flower skip_sw action trap 2>/dev/null \
+	    || tc filter add dev $dev $direction pref 1 \
+		       flower action continue
+}
+
+trap_uninstall()
+{
+	local dev=$1; shift
+	local direction=$1; shift
+
+	tc filter del dev $dev $direction pref 1 flower
+}
+
+slow_path_trap_install()
+{
+	# For slow-path testing, we need to install a trap to get to
+	# slow path the packets that would otherwise be switched in HW.
+	if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
+		trap_install "$@"
+	fi
+}
+
+slow_path_trap_uninstall()
+{
+	if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
+		trap_uninstall "$@"
+	fi
+}
+
+__icmp_capture_add_del()
+{
+	local add_del=$1; shift
+	local pref=$1; shift
+	local vsuf=$1; shift
+	local tundev=$1; shift
+	local filter=$1; shift
+
+	tc filter $add_del dev "$tundev" ingress \
+	   proto ip$vsuf pref $pref \
+	   flower ip_proto icmp$vsuf $filter \
+	   action pass
+}
+
+icmp_capture_install()
+{
+	__icmp_capture_add_del add 100 "" "$@"
+}
+
+icmp_capture_uninstall()
+{
+	__icmp_capture_add_del del 100 "" "$@"
+}
+
+icmp6_capture_install()
+{
+	__icmp_capture_add_del add 100 v6 "$@"
+}
+
+icmp6_capture_uninstall()
+{
+	__icmp_capture_add_del del 100 v6 "$@"
+}
+
+__vlan_capture_add_del()
+{
+	local add_del=$1; shift
+	local pref=$1; shift
+	local dev=$1; shift
+	local filter=$1; shift
+
+	tc filter $add_del dev "$dev" ingress \
+	   proto 802.1q pref $pref \
+	   flower $filter \
+	   action pass
+}
+
+vlan_capture_install()
+{
+	__vlan_capture_add_del add 100 "$@"
+}
+
+vlan_capture_uninstall()
+{
+	__vlan_capture_add_del del 100 "$@"
+}
+
+__dscp_capture_add_del()
+{
+	local add_del=$1; shift
+	local dev=$1; shift
+	local base=$1; shift
+	local dscp;
+
+	for prio in {0..7}; do
+		dscp=$((base + prio))
+		__icmp_capture_add_del $add_del $((dscp + 100)) "" $dev \
+				       "skip_hw ip_tos $((dscp << 2))"
+	done
+}
+
+dscp_capture_install()
+{
+	local dev=$1; shift
+	local base=$1; shift
+
+	__dscp_capture_add_del add $dev $base
+}
+
+dscp_capture_uninstall()
+{
+	local dev=$1; shift
+	local base=$1; shift
+
+	__dscp_capture_add_del del $dev $base
+}
+
+dscp_fetch_stats()
+{
+	local dev=$1; shift
+	local base=$1; shift
+
+	for prio in {0..7}; do
+		local dscp=$((base + prio))
+		local t=$(tc_rule_stats_get $dev $((dscp + 100)))
+		echo "[$dscp]=$t "
+	done
+}
+
+matchall_sink_create()
+{
+	local dev=$1; shift
+
+	tc qdisc add dev $dev clsact
+	tc filter add dev $dev ingress \
+	   pref 10000 \
+	   matchall \
+	   action drop
+}
+
+tests_run()
+{
+	local current_test
+
+	for current_test in ${TESTS:-$ALL_TESTS}; do
+		$current_test
+	done
+}
+
+multipath_eval()
+{
+	local desc="$1"
+	local weight_rp12=$2
+	local weight_rp13=$3
+	local packets_rp12=$4
+	local packets_rp13=$5
+	local weights_ratio packets_ratio diff
+
+	RET=0
+
+	if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
+		weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
+				| bc -l)
+	else
+		weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" \
+				| bc -l)
+	fi
+
+	if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
+	       check_err 1 "Packet difference is 0"
+	       log_test "Multipath"
+	       log_info "Expected ratio $weights_ratio"
+	       return
+	fi
+
+	if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
+		packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
+				| bc -l)
+	else
+		packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" \
+				| bc -l)
+	fi
+
+	diff=$(echo $weights_ratio - $packets_ratio | bc -l)
+	diff=${diff#-}
+
+	test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0
+	check_err $? "Too large discrepancy between expected and measured ratios"
+	log_test "$desc"
+	log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
+}
+
 ##############################################################################
 # Tests
 
-ping_test()
+ping_do()
 {
 	local if_name=$1
 	local dip=$2
 	local vrf_name
 
-	RET=0
-
 	vrf_name=$(master_name_get $if_name)
 	ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null
+}
+
+ping_test()
+{
+	RET=0
+
+	ping_do $1 $2
 	check_err $?
 	log_test "ping"
 }
 
-ping6_test()
+ping6_do()
 {
 	local if_name=$1
 	local dip=$2
 	local vrf_name
 
-	RET=0
-
 	vrf_name=$(master_name_get $if_name)
 	ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null
+}
+
+ping6_test()
+{
+	RET=0
+
+	ping6_do $1 $2
 	check_err $?
 	log_test "ping6"
 }
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh
new file mode 100755
index 000000000000..e6fd7a18c655
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh
@@ -0,0 +1,159 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the device to mirror to is a
+# gretap or ip6gretap netdevice. Expect that the packets come out encapsulated,
+# and another gretap / ip6gretap netdevice is then capable of decapsulating the
+# traffic. Test that the payload is what is expected (ICMP ping request or
+# reply, depending on test).
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+	test_gretap_mac
+	test_ip6gretap_mac
+	test_two_spans
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip address add dev $swp3 192.0.2.129/28
+	ip address add dev $h3 192.0.2.130/28
+
+	ip address add dev $swp3 2001:db8:2::1/64
+	ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip address del dev $h3 2001:db8:2::2/64
+	ip address del dev $swp3 2001:db8:2::1/64
+
+	ip address del dev $h3 192.0.2.130/28
+	ip address del dev $swp3 192.0.2.129/28
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_span_gre_mac()
+{
+	local tundev=$1; shift
+	local direction=$1; shift
+	local prot=$1; shift
+	local what=$1; shift
+
+	local swp3mac=$(mac_get $swp3)
+	local h3mac=$(mac_get $h3)
+
+	RET=0
+
+	mirror_install $swp1 $direction $tundev "matchall $tcflags"
+	tc filter add dev $h3 ingress pref 77 prot $prot \
+		flower ip_proto 0x2f src_mac $swp3mac dst_mac $h3mac \
+		action pass
+
+	mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
+
+	tc filter del dev $h3 ingress pref 77
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction $what: envelope MAC ($tcflags)"
+}
+
+test_two_spans()
+{
+	RET=0
+
+	mirror_install $swp1 ingress gt4 "matchall $tcflags"
+	mirror_install $swp1 egress gt6 "matchall $tcflags"
+	quick_test_span_gre_dir gt4 ingress
+	quick_test_span_gre_dir gt6 egress
+
+	mirror_uninstall $swp1 ingress
+	fail_test_span_gre_dir gt4 ingress
+	quick_test_span_gre_dir gt6 egress
+
+	mirror_install $swp1 ingress gt4 "matchall $tcflags"
+	mirror_uninstall $swp1 egress
+	quick_test_span_gre_dir gt4 ingress
+	fail_test_span_gre_dir gt6 egress
+
+	mirror_uninstall $swp1 ingress
+	log_test "two simultaneously configured mirrors ($tcflags)"
+}
+
+test_gretap()
+{
+	full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+	full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
+	full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
+}
+
+test_gretap_mac()
+{
+	test_span_gre_mac gt4 ingress ip "mirror to gretap"
+	test_span_gre_mac gt4 egress ip "mirror to gretap"
+}
+
+test_ip6gretap_mac()
+{
+	test_span_gre_mac gt6 ingress ipv6 "mirror to ip6gretap"
+	test_span_gre_mac gt6 egress ipv6 "mirror to ip6gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
new file mode 100755
index 000000000000..360ca133bead
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
@@ -0,0 +1,226 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+#   +---------------------+                             +---------------------+
+#   | H1                  |                             |                  H2 |
+#   |     + $h1           |                             |           $h2 +     |
+#   |     | 192.0.2.1/28  |                             |  192.0.2.2/28 |     |
+#   +-----|---------------+                             +---------------|-----+
+#         |                                                             |
+#   +-----|-------------------------------------------------------------|-----+
+#   | SW  o--> mirror                                                   |     |
+#   | +---|-------------------------------------------------------------|---+ |
+#   | |   + $swp1                    BR                           $swp2 +   | |
+#   | +---------------------------------------------------------------------+ |
+#   |                                                                         |
+#   | +---------------------------------------------------------------------+ |
+#   | | OL                      + gt6 (ip6gretap)      + gt4 (gretap)       | |
+#   | |                         : loc=2001:db8:2::1    : loc=192.0.2.129    | |
+#   | |                         : rem=2001:db8:2::2    : rem=192.0.2.130    | |
+#   | |                         : ttl=100              : ttl=100            | |
+#   | |                         : tos=inherit          : tos=inherit        | |
+#   | +-------------------------:--|-------------------:--|-----------------+ |
+#   |                           :  |                   :  |                   |
+#   | +-------------------------:--|-------------------:--|-----------------+ |
+#   | | UL                      :  |,---------------------'                 | |
+#   | |   + $swp3               :  ||                  :                    | |
+#   | |   | 192.0.2.129/28      :  vv                  :                    | |
+#   | |   | 2001:db8:2::1/64    :  + ul (dummy)        :                    | |
+#   | +---|---------------------:----------------------:--------------------+ |
+#   +-----|---------------------:----------------------:----------------------+
+#         |                     :                      :
+#   +-----|---------------------:----------------------:----------------------+
+#   | H3  + $h3                 + h3-gt6 (ip6gretap)   + h3-gt4 (gretap)      |
+#   |       192.0.2.130/28        loc=2001:db8:2::2      loc=192.0.2.130      |
+#   |       2001:db8:2::2/64      rem=2001:db8:2::1      rem=192.0.2.129      |
+#   |                             ttl=100                ttl=100              |
+#   |                             tos=inherit            tos=inherit          |
+#   |                                                                         |
+#   +-------------------------------------------------------------------------+
+#
+# This tests mirroring to gretap and ip6gretap configured in an overlay /
+# underlay manner, i.e. with a bound dummy device that marks underlay VRF where
+# the encapsulated packed should be routed.
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/28
+}
+
+h3_create()
+{
+	simple_if_init $h3 192.0.2.130/28 2001:db8:2::2/64
+
+	tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129
+	ip link set h3-gt4 vrf v$h3
+	matchall_sink_create h3-gt4
+
+	tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1
+	ip link set h3-gt6 vrf v$h3
+	matchall_sink_create h3-gt6
+}
+
+h3_destroy()
+{
+	tunnel_destroy h3-gt6
+	tunnel_destroy h3-gt4
+
+	simple_if_fini $h3 192.0.2.130/28 2001:db8:2::2/64
+}
+
+switch_create()
+{
+	# Bridge between H1 and H2.
+
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 up
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp1 clsact
+
+	# Underlay.
+
+	simple_if_init $swp3 192.0.2.129/28 2001:db8:2::1/64
+
+	ip link add name ul type dummy
+	ip link set dev ul master v$swp3
+	ip link set dev ul up
+
+	# Overlay.
+
+	vrf_create vrf-ol
+	ip link set dev vrf-ol up
+
+	tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+		      ttl 100 tos inherit dev ul
+	ip link set dev gt4 master vrf-ol
+	ip link set dev gt4 up
+
+	tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \
+		      ttl 100 tos inherit dev ul allow-localremote
+	ip link set dev gt6 master vrf-ol
+	ip link set dev gt6 up
+}
+
+switch_destroy()
+{
+	vrf_destroy vrf-ol
+
+	tunnel_destroy gt6
+	tunnel_destroy gt4
+
+	simple_if_fini $swp3 192.0.2.129/28 2001:db8:2::1/64
+
+	ip link del dev ul
+
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev $swp1 down
+	ip link set dev $swp2 down
+	ip link del dev br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+test_gretap()
+{
+	full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap w/ UL"
+	full_test_span_gre_dir gt4 egress  0 8 "mirror to gretap w/ UL"
+}
+
+test_ip6gretap()
+{
+	full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap w/ UL"
+	full_test_span_gre_dir gt6 egress  0 8 "mirror to ip6gretap w/ UL"
+}
+
+test_all()
+{
+	RET=0
+
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
new file mode 100755
index 000000000000..c5095da7f6bf
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device without vlan filtering (802.1d).
+#
+# This test uses standard topology for testing mirror-to-gretap. See
+# mirror_gre_topo_lib.sh for more details. The full topology is as follows:
+#
+#  +---------------------+                             +---------------------+
+#  | H1                  |                             |                  H2 |
+#  |     + $h1           |                             |           $h2 +     |
+#  |     | 192.0.2.1/28  |                             |  192.0.2.2/28 |     |
+#  +-----|---------------+                             +---------------|-----+
+#        |                                                             |
+#  +-----|-------------------------------------------------------------|-----+
+#  | SW  o---> mirror                                                  |     |
+#  | +---|-------------------------------------------------------------|---+ |
+#  | |   + $swp1            + br1 (802.1q bridge)                $swp2 +   | |
+#  | +---------------------------------------------------------------------+ |
+#  |                                                                         |
+#  | +---------------------------------------------------------------------+ |
+#  | |                      + br2 (802.1d bridge)                          | |
+#  | |                        192.0.2.129/28                               | |
+#  | |   + $swp3              2001:db8:2::1/64                             | |
+#  | +---|-----------------------------------------------------------------+ |
+#  |     |                                          ^                    ^   |
+#  |     |                     + gt6 (ip6gretap)    | + gt4 (gretap)     |   |
+#  |     |                     : loc=2001:db8:2::1  | : loc=192.0.2.129  |   |
+#  |     |                     : rem=2001:db8:2::2 -+ : rem=192.0.2.130 -+   |
+#  |     |                     : ttl=100              : ttl=100              |
+#  |     |                     : tos=inherit          : tos=inherit          |
+#  +-----|---------------------:----------------------:----------------------+
+#        |                     :                      :
+#  +-----|---------------------:----------------------:----------------------+
+#  | H3  + $h3                 + h3-gt6(ip6gretap)    + h3-gt4 (gretap)      |
+#  |       192.0.2.130/28        loc=2001:db8:2::2      loc=192.0.2.130      |
+#  |       2001:db8:2::2/64      rem=2001:db8:2::1      rem=192.0.2.129      |
+#  |                             ttl=100                ttl=100              |
+#  |                             tos=inherit            tos=inherit          |
+#  +-------------------------------------------------------------------------+
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip link add name br2 type bridge vlan_filtering 0
+	ip link set dev br2 up
+
+	ip link set dev $swp3 master br2
+	ip route add 192.0.2.130/32 dev br2
+	ip -6 route add 2001:db8:2::2/128 dev br2
+
+	ip address add dev br2 192.0.2.129/28
+	ip address add dev br2 2001:db8:2::1/64
+
+	ip address add dev $h3 192.0.2.130/28
+	ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip address del dev $h3 2001:db8:2::2/64
+	ip address del dev $h3 192.0.2.130/28
+	ip link del dev br2
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_gretap()
+{
+	full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+	full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
+	full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
new file mode 100755
index 000000000000..197e769c2ed1
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
@@ -0,0 +1,123 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device without vlan filtering (802.1d). The device attached to that
+# bridge is a VLAN.
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+	test_gretap_stp
+	test_ip6gretap_stp
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip link add name br2 type bridge vlan_filtering 0
+	ip link set dev br2 up
+
+	vlan_create $swp3 555
+
+	ip link set dev $swp3.555 master br2
+	ip route add 192.0.2.130/32 dev br2
+	ip -6 route add 2001:db8:2::2/128 dev br2
+
+	ip address add dev br2 192.0.2.129/32
+	ip address add dev br2 2001:db8:2::1/128
+
+	vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	vlan_destroy $h3 555
+	ip link del dev br2
+	vlan_destroy $swp3 555
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_vlan_match()
+{
+	local tundev=$1; shift
+	local vlan_match=$1; shift
+	local what=$1; shift
+
+	full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what"
+	full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what"
+}
+
+test_gretap()
+{
+	test_vlan_match gt4 'skip_hw vlan_id 555 vlan_ethtype ip' \
+			"mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ip' \
+			"mirror to ip6gretap"
+}
+
+test_gretap_stp()
+{
+	full_test_span_gre_stp gt4 $swp3.555 "mirror to gretap"
+}
+
+test_ip6gretap_stp()
+{
+	full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
new file mode 100755
index 000000000000..a3402cd8d5b6
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
@@ -0,0 +1,126 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device with vlan filtering (802.1q).
+#
+# This test uses standard topology for testing mirror-to-gretap. See
+# mirror_gre_topo_lib.sh for more details. The full topology is as follows:
+#
+#  +---------------------+                               +---------------------+
+#  | H1                  |                               |                  H2 |
+#  |     + $h1           |                               |           $h2 +     |
+#  |     | 192.0.2.1/28  |                               |  192.0.2.2/28 |     |
+#  +-----|---------------+                               +---------------|-----+
+#        |                                                               |
+#  +-----|---------------------------------------------------------------|-----+
+#  | SW  o---> mirror                                                    |     |
+#  | +---|---------------------------------------------------------------|---+ |
+#  | |   + $swp1                  + br1 (802.1q bridge)            $swp2 +   | |
+#  | |                              192.0.2.129/28                           | |
+#  | |   + $swp3                    2001:db8:2::1/64                         | |
+#  | |   | vid555                   vid555[pvid,untagged]                    | |
+#  | +---|-------------------------------------------------------------------+ |
+#  |     |                                          ^                      ^   |
+#  |     |                     + gt6 (ip6gretap)    |   + gt4 (gretap)     |   |
+#  |     |                     : loc=2001:db8:2::1  |   : loc=192.0.2.129  |   |
+#  |     |                     : rem=2001:db8:2::2 -+   : rem=192.0.2.130 -+   |
+#  |     |                     : ttl=100                : ttl=100              |
+#  |     |                     : tos=inherit            : tos=inherit          |
+#  +-----|---------------------:------------------------:----------------------+
+#        |                     :                        :
+#  +-----|---------------------:------------------------:----------------------+
+#  | H3  + $h3                 + h3-gt6(ip6gretap)      + h3-gt4 (gretap)      |
+#  |     |                       loc=2001:db8:2::2        loc=192.0.2.130      |
+#  |     + $h3.555               rem=2001:db8:2::1        rem=192.0.2.129      |
+#  |       192.0.2.130/28        ttl=100                  ttl=100              |
+#  |       2001:db8:2::2/64      tos=inherit              tos=inherit          |
+#  +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip link set dev $swp3 master br1
+	bridge vlan add dev br1 vid 555 pvid untagged self
+	ip address add dev br1 192.0.2.129/28
+	ip address add dev br1 2001:db8:2::1/64
+
+	ip -4 route add 192.0.2.130/32 dev br1
+	ip -6 route add 2001:db8:2::2/128 dev br1
+
+	vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+	bridge vlan add dev $swp3 vid 555
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp3 nomaster
+	vlan_destroy $h3 555
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_gretap()
+{
+	full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+	full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
+	full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
+}
+
+tests()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+tests
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	tests
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
new file mode 100755
index 000000000000..61844caf671e
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device with vlan filtering (802.1q), and the egress device is a team
+# device.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |     + $h1.333        |                             |        $h1.555 +     |
+# |     | 192.0.2.1/28   |                             |  192.0.2.18/28 |     |
+# +-----|----------------+                             +----------------|-----+
+#       |                               $h1                             |
+#       +--------------------------------+------------------------------+
+#                                        |
+# +--------------------------------------|------------------------------------+
+# | SW                                   o---> mirror                         |
+# |                                      |                                    |
+# |     +--------------------------------+------------------------------+     |
+# |     |                              $swp1                            |     |
+# |     + $swp1.333                                           $swp1.555 +     |
+# |       192.0.2.2/28                                    192.0.2.17/28       |
+# |                                                                           |
+# | +-----------------------------------------------------------------------+ |
+# | |                        BR1 (802.1q)                                   | |
+# | |     + lag (team)       192.0.2.129/28                                 | |
+# | |    / \                 2001:db8:2::1/64                               | |
+# | +---/---\---------------------------------------------------------------+ |
+# |    /     \                                                            ^   |
+# |   |       \                                        + gt4 (gretap)     |   |
+# |   |        \                                         loc=192.0.2.129  |   |
+# |   |         \                                        rem=192.0.2.130 -+   |
+# |   |          \                                       ttl=100              |
+# |   |           \                                      tos=inherit          |
+# |   |            \                                                          |
+# |   |             \_________________________________                        |
+# |   |                                               \                       |
+# |   + $swp3                                          + $swp4                |
+# +---|------------------------------------------------|----------------------+
+#     |                                                |
+# +---|----------------------+                     +---|----------------------+
+# |   + $h3               H3 |                     |   + $h4               H4 |
+# |     192.0.2.130/28       |                     |     192.0.2.130/28       |
+# |     2001:db8:2::2/64     |                     |     2001:db8:2::2/64     |
+# +--------------------------+                     +--------------------------+
+
+ALL_TESTS="
+	test_mirror_gretap_first
+	test_mirror_gretap_second
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+
+require_command $ARPING
+
+vlan_host_create()
+{
+	local if_name=$1; shift
+	local vid=$1; shift
+	local vrf_name=$1; shift
+	local ips=("${@}")
+
+	vrf_create $vrf_name
+	ip link set dev $vrf_name up
+	vlan_create $if_name $vid $vrf_name "${ips[@]}"
+}
+
+vlan_host_destroy()
+{
+	local if_name=$1; shift
+	local vid=$1; shift
+	local vrf_name=$1; shift
+
+	vlan_destroy $if_name $vid
+	ip link set dev $vrf_name down
+	vrf_destroy $vrf_name
+}
+
+h1_create()
+{
+	vlan_host_create $h1 333 vrf-h1 192.0.2.1/28
+	ip -4 route add 192.0.2.16/28 vrf vrf-h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip -4 route del 192.0.2.16/28 vrf vrf-h1
+	vlan_host_destroy $h1 333 vrf-h1
+}
+
+h2_create()
+{
+	vlan_host_create $h1 555 vrf-h2 192.0.2.18/28
+	ip -4 route add 192.0.2.0/28 vrf vrf-h2 nexthop via 192.0.2.17
+}
+
+h2_destroy()
+{
+	ip -4 route del 192.0.2.0/28 vrf vrf-h2
+	vlan_host_destroy $h1 555 vrf-h2
+}
+
+h3_create()
+{
+	simple_if_init $h3 192.0.2.130/28
+	tc qdisc add dev $h3 clsact
+}
+
+h3_destroy()
+{
+	tc qdisc del dev $h3 clsact
+	simple_if_fini $h3 192.0.2.130/28
+}
+
+h4_create()
+{
+	simple_if_init $h4 192.0.2.130/28
+	tc qdisc add dev $h4 clsact
+}
+
+h4_destroy()
+{
+	tc qdisc del dev $h4 clsact
+	simple_if_fini $h4 192.0.2.130/28
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+	tc qdisc add dev $swp1 clsact
+	vlan_create $swp1 333 "" 192.0.2.2/28
+	vlan_create $swp1 555 "" 192.0.2.17/28
+
+	tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+		      ttl 100 tos inherit
+
+	ip link set dev $swp3 up
+	ip link set dev $swp4 up
+
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 up
+	__addr_add_del br1 add 192.0.2.129/32
+	ip -4 route add 192.0.2.130/32 dev br1
+
+	team_create lag loadbalance $swp3 $swp4
+	ip link set dev lag master br1
+}
+
+switch_destroy()
+{
+	ip link set dev lag nomaster
+	team_destroy lag
+
+	ip -4 route del 192.0.2.130/32 dev br1
+	__addr_add_del br1 del 192.0.2.129/32
+	ip link set dev br1 down
+	ip link del dev br1
+
+	ip link set dev $swp4 down
+	ip link set dev $swp3 down
+
+	tunnel_destroy gt4
+
+	vlan_destroy $swp1 555
+	vlan_destroy $swp1 333
+	tc qdisc del dev $swp1 clsact
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp3=${NETIFS[p3]}
+	h3=${NETIFS[p4]}
+
+	swp4=${NETIFS[p5]}
+	h4=${NETIFS[p6]}
+
+	vrf_prepare
+
+	ip link set dev $h1 up
+	h1_create
+	h2_create
+	h3_create
+	h4_create
+	switch_create
+
+	trap_install $h3 ingress
+	trap_install $h4 ingress
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	trap_uninstall $h4 ingress
+	trap_uninstall $h3 ingress
+
+	switch_destroy
+	h4_destroy
+	h3_destroy
+	h2_destroy
+	h1_destroy
+	ip link set dev $h1 down
+
+	vrf_cleanup
+}
+
+test_lag_slave()
+{
+	local host_dev=$1; shift
+	local up_dev=$1; shift
+	local down_dev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress gt4 \
+		       "proto 802.1q flower vlan_id 333 $tcflags"
+
+	# Test connectivity through $up_dev when $down_dev is set down.
+	ip link set dev $down_dev down
+	setup_wait_dev $up_dev
+	setup_wait_dev $host_dev
+	$ARPING -I br1 192.0.2.130 -qfc 1
+	sleep 2
+	mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 10
+
+	# Test lack of connectivity when both slaves are down.
+	ip link set dev $up_dev down
+	sleep 2
+	mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h3 1 0
+	mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h4 1 0
+
+	ip link set dev $up_dev up
+	ip link set dev $down_dev up
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what ($tcflags)"
+}
+
+test_mirror_gretap_first()
+{
+	test_lag_slave $h3 $swp3 $swp4 "mirror to gretap: LAG first slave"
+}
+
+test_mirror_gretap_second()
+{
+	test_lag_slave $h4 $swp4 $swp3 "mirror to gretap: LAG second slave"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
new file mode 100755
index 000000000000..135902aa8b11
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
@@ -0,0 +1,271 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test how mirrors to gretap and ip6gretap react to changes to relevant
+# configuration.
+
+ALL_TESTS="
+	test_ttl
+	test_tun_up
+	test_egress_up
+	test_remote_ip
+	test_tun_del
+	test_route_del
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	# This test downs $swp3, which deletes the configured IPv6 address
+	# unless this sysctl is set.
+	sysctl_set net.ipv6.conf.$swp3.keep_addr_on_down 1
+
+	ip address add dev $swp3 192.0.2.129/28
+	ip address add dev $h3 192.0.2.130/28
+
+	ip address add dev $swp3 2001:db8:2::1/64
+	ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip address del dev $h3 2001:db8:2::2/64
+	ip address del dev $swp3 2001:db8:2::1/64
+
+	ip address del dev $h3 192.0.2.130/28
+	ip address del dev $swp3 192.0.2.129/28
+
+	sysctl_restore net.ipv6.conf.$swp3.keep_addr_on_down
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_span_gre_ttl()
+{
+	local tundev=$1; shift
+	local type=$1; shift
+	local prot=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	tc filter add dev $h3 ingress pref 77 prot $prot \
+		flower ip_ttl 50 action pass
+
+	mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 0
+
+	ip link set dev $tundev type $type ttl 50
+	mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
+
+	ip link set dev $tundev type $type ttl 100
+	tc filter del dev $h3 ingress pref 77
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: TTL change ($tcflags)"
+}
+
+test_span_gre_tun_up()
+{
+	local tundev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip link set dev $tundev down
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	fail_test_span_gre_dir $tundev ingress
+
+	ip link set dev $tundev up
+
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: tunnel down/up ($tcflags)"
+}
+
+test_span_gre_egress_up()
+{
+	local tundev=$1; shift
+	local remote_ip=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip link set dev $swp3 down
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	fail_test_span_gre_dir $tundev ingress
+
+	# After setting the device up, wait for neighbor to get resolved so that
+	# we can expect mirroring to work.
+	ip link set dev $swp3 up
+	setup_wait_dev $swp3
+	ping -c 1 -I $swp3 $remote_ip &>/dev/null
+
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: egress down/up ($tcflags)"
+}
+
+test_span_gre_remote_ip()
+{
+	local tundev=$1; shift
+	local type=$1; shift
+	local correct_ip=$1; shift
+	local wrong_ip=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip link set dev $tundev type $type remote $wrong_ip
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	fail_test_span_gre_dir $tundev ingress
+
+	ip link set dev $tundev type $type remote $correct_ip
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: remote address change ($tcflags)"
+}
+
+test_span_gre_tun_del()
+{
+	local tundev=$1; shift
+	local type=$1; shift
+	local flags=$1; shift
+	local local_ip=$1; shift
+	local remote_ip=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir $tundev ingress
+	ip link del dev $tundev
+	fail_test_span_gre_dir $tundev ingress
+
+	tunnel_create $tundev $type $local_ip $remote_ip \
+		      ttl 100 tos inherit $flags
+
+	# Recreating the tunnel doesn't reestablish mirroring, so reinstall it
+	# and verify it works for the follow-up tests.
+	mirror_uninstall $swp1 ingress
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: tunnel deleted ($tcflags)"
+}
+
+test_span_gre_route_del()
+{
+	local tundev=$1; shift
+	local edev=$1; shift
+	local route=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir $tundev ingress
+
+	ip route del $route dev $edev
+	fail_test_span_gre_dir $tundev ingress
+
+	ip route add $route dev $edev
+	quick_test_span_gre_dir $tundev ingress
+
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: underlay route removal ($tcflags)"
+}
+
+test_ttl()
+{
+	test_span_gre_ttl gt4 gretap ip "mirror to gretap"
+	test_span_gre_ttl gt6 ip6gretap ipv6 "mirror to ip6gretap"
+}
+
+test_tun_up()
+{
+	test_span_gre_tun_up gt4 "mirror to gretap"
+	test_span_gre_tun_up gt6 "mirror to ip6gretap"
+}
+
+test_egress_up()
+{
+	test_span_gre_egress_up gt4 192.0.2.130 "mirror to gretap"
+	test_span_gre_egress_up gt6 2001:db8:2::2 "mirror to ip6gretap"
+}
+
+test_remote_ip()
+{
+	test_span_gre_remote_ip gt4 gretap 192.0.2.130 192.0.2.132 "mirror to gretap"
+	test_span_gre_remote_ip gt6 ip6gretap 2001:db8:2::2 2001:db8:2::4 "mirror to ip6gretap"
+}
+
+test_tun_del()
+{
+	test_span_gre_tun_del gt4 gretap "" \
+			      192.0.2.129 192.0.2.130 "mirror to gretap"
+	test_span_gre_tun_del gt6 ip6gretap allow-localremote \
+			      2001:db8:2::1 2001:db8:2::2 "mirror to ip6gretap"
+}
+
+test_route_del()
+{
+	test_span_gre_route_del gt4 $swp3 192.0.2.128/28 "mirror to gretap"
+	test_span_gre_route_del gt6 $swp3 2001:db8:2::/64 "mirror to ip6gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
new file mode 100755
index 000000000000..12914f40612d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
@@ -0,0 +1,137 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# This tests flower-triggered mirroring to gretap and ip6gretap netdevices. The
+# interfaces on H1 and H2 have two addresses each. Flower match on one of the
+# addresses is configured with mirror action. It is expected that when pinging
+# this address, mirroring takes place, whereas when pinging the other one,
+# there's no mirroring.
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip address add dev $swp3 192.0.2.129/28
+	ip address add dev $h3 192.0.2.130/28
+
+	ip address add dev $swp3 2001:db8:2::1/64
+	ip address add dev $h3 2001:db8:2::2/64
+
+	ip address add dev $h1 192.0.2.3/28
+	ip address add dev $h2 192.0.2.4/28
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip address del dev $h2 192.0.2.4/28
+	ip address del dev $h1 192.0.2.3/28
+
+	ip address del dev $h3 2001:db8:2::2/64
+	ip address del dev $swp3 2001:db8:2::1/64
+
+	ip address del dev $h3 192.0.2.130/28
+	ip address del dev $swp3 192.0.2.129/28
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_span_gre_dir_acl()
+{
+	test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
+}
+
+fail_test_span_gre_dir_acl()
+{
+	fail_test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
+}
+
+full_test_span_gre_dir_acl()
+{
+	local tundev=$1; shift
+	local direction=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+	local match_dip=$1; shift
+	local what=$1; shift
+
+	mirror_install $swp1 $direction $tundev \
+		       "protocol ip flower $tcflags dst_ip $match_dip"
+	fail_test_span_gre_dir $tundev $direction
+	test_span_gre_dir_acl "$tundev" "$direction" \
+			  "$forward_type" "$backward_type"
+	mirror_uninstall $swp1 $direction
+
+	# Test lack of mirroring after ACL mirror is uninstalled.
+	fail_test_span_gre_dir_acl "$tundev" "$direction"
+
+	log_test "$direction $what ($tcflags)"
+}
+
+test_gretap()
+{
+	full_test_span_gre_dir_acl gt4 ingress 8 0 192.0.2.4 "ACL mirror to gretap"
+	full_test_span_gre_dir_acl gt4 egress 0 8 192.0.2.3 "ACL mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	full_test_span_gre_dir_acl gt6 ingress 8 0 192.0.2.4 "ACL mirror to ip6gretap"
+	full_test_span_gre_dir_acl gt6 egress 0 8 192.0.2.3 "ACL mirror to ip6gretap"
+}
+
+test_all()
+{
+	RET=0
+
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
new file mode 100755
index 000000000000..9edf4cb104a8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
@@ -0,0 +1,285 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# team device.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |    + $h1.333         |                             |        $h1.555 +     |
+# |    | 192.0.2.1/28    |                             |  192.0.2.18/28 |     |
+# +----|-----------------+                             +----------------|-----+
+#      |                                $h1                             |
+#      +---------------------------------+------------------------------+
+#                                        |
+# +--------------------------------------|------------------------------------+
+# | SW                                   o---> mirror                         |
+# |                                      |                                    |
+# |   +----------------------------------+------------------------------+     |
+# |   |                                $swp1                            |     |
+# |   + $swp1.333                                             $swp1.555 +     |
+# |     192.0.2.2/28                                      192.0.2.17/28       |
+# |                                                                           |
+# |                                                                           |
+# |   + gt4 (gretap)      ,-> + lag1 (team)                                   |
+# |     loc=192.0.2.129   |   | 192.0.2.129/28                                |
+# |     rem=192.0.2.130 --'   |                                               |
+# |     ttl=100               |                                               |
+# |     tos=inherit           |                                               |
+# |      _____________________|______________________                         |
+# |     /                                            \                        |
+# |    /                                              \                       |
+# |   + $swp3                                          + $swp4                |
+# +---|------------------------------------------------|----------------------+
+#     |                                                |
+# +---|------------------------------------------------|----------------------+
+# |   + $h3                                            + $h4               H3 |
+# |    \                                              /                       |
+# |     \____________________________________________/                        |
+# |                           |                                               |
+# |                           + lag2 (team)                                   |
+# |                             192.0.2.130/28                                |
+# |                                                                           |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	test_mirror_gretap_first
+	test_mirror_gretap_second
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+
+require_command $ARPING
+
+vlan_host_create()
+{
+	local if_name=$1; shift
+	local vid=$1; shift
+	local vrf_name=$1; shift
+	local ips=("${@}")
+
+	vrf_create $vrf_name
+	ip link set dev $vrf_name up
+	vlan_create $if_name $vid $vrf_name "${ips[@]}"
+}
+
+vlan_host_destroy()
+{
+	local if_name=$1; shift
+	local vid=$1; shift
+	local vrf_name=$1; shift
+
+	vlan_destroy $if_name $vid
+	ip link set dev $vrf_name down
+	vrf_destroy $vrf_name
+}
+
+h1_create()
+{
+	vlan_host_create $h1 333 vrf-h1 192.0.2.1/28
+	ip -4 route add 192.0.2.16/28 vrf vrf-h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip -4 route del 192.0.2.16/28 vrf vrf-h1
+	vlan_host_destroy $h1 333 vrf-h1
+}
+
+h2_create()
+{
+	vlan_host_create $h1 555 vrf-h2 192.0.2.18/28
+	ip -4 route add 192.0.2.0/28 vrf vrf-h2 nexthop via 192.0.2.17
+}
+
+h2_destroy()
+{
+	ip -4 route del 192.0.2.0/28 vrf vrf-h2
+	vlan_host_destroy $h1 555 vrf-h2
+}
+
+h3_create_team()
+{
+	team_create lag2 lacp $h3 $h4
+	__simple_if_init lag2 vrf-h3 192.0.2.130/32
+	ip -4 route add vrf vrf-h3 192.0.2.129/32 dev lag2
+}
+
+h3_destroy_team()
+{
+	ip -4 route del vrf vrf-h3 192.0.2.129/32 dev lag2
+	__simple_if_fini lag2 192.0.2.130/32
+	team_destroy lag2
+
+	ip link set dev $h3 down
+	ip link set dev $h4 down
+}
+
+h3_create()
+{
+	vrf_create vrf-h3
+	ip link set dev vrf-h3 up
+	tc qdisc add dev $h3 clsact
+	tc qdisc add dev $h4 clsact
+	h3_create_team
+}
+
+h3_destroy()
+{
+	h3_destroy_team
+	tc qdisc del dev $h4 clsact
+	tc qdisc del dev $h3 clsact
+	ip link set dev vrf-h3 down
+	vrf_destroy vrf-h3
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+	tc qdisc add dev $swp1 clsact
+	vlan_create $swp1 333 "" 192.0.2.2/28
+	vlan_create $swp1 555 "" 192.0.2.17/28
+
+	tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+		      ttl 100 tos inherit
+
+	ip link set dev $swp3 up
+	ip link set dev $swp4 up
+	team_create lag1 lacp $swp3 $swp4
+	__addr_add_del lag1 add 192.0.2.129/32
+	ip -4 route add 192.0.2.130/32 dev lag1
+}
+
+switch_destroy()
+{
+	ip -4 route del 192.0.2.130/32 dev lag1
+	__addr_add_del lag1 del 192.0.2.129/32
+	team_destroy lag1
+
+	ip link set dev $swp4 down
+	ip link set dev $swp3 down
+
+	tunnel_destroy gt4
+
+	vlan_destroy $swp1 555
+	vlan_destroy $swp1 333
+	tc qdisc del dev $swp1 clsact
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp3=${NETIFS[p3]}
+	h3=${NETIFS[p4]}
+
+	swp4=${NETIFS[p5]}
+	h4=${NETIFS[p6]}
+
+	vrf_prepare
+
+	ip link set dev $h1 up
+	h1_create
+	h2_create
+	h3_create
+	switch_create
+
+	trap_install $h3 ingress
+	trap_install $h4 ingress
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	trap_uninstall $h4 ingress
+	trap_uninstall $h3 ingress
+
+	switch_destroy
+	h3_destroy
+	h2_destroy
+	h1_destroy
+	ip link set dev $h1 down
+
+	vrf_cleanup
+}
+
+test_lag_slave()
+{
+	local up_dev=$1; shift
+	local down_dev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress gt4 \
+		       "proto 802.1q flower vlan_id 333 $tcflags"
+
+	# Move $down_dev away from the team. That will prompt change in
+	# txability of the connected device, without changing its upness. The
+	# driver should notice the txability change and move the traffic to the
+	# other slave.
+	ip link set dev $down_dev nomaster
+	sleep 2
+	mirror_test vrf-h1 192.0.2.1 192.0.2.18 $up_dev 1 10
+
+	# Test lack of connectivity when neither slave is txable.
+	ip link set dev $up_dev nomaster
+	sleep 2
+	mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h3 1 0
+	mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h4 1 0
+	mirror_uninstall $swp1 ingress
+
+	# Recreate H3's team device, because mlxsw, which this test is
+	# predominantly mean to test, requires a bottom-up construction and
+	# doesn't allow enslavement to a device that already has an upper.
+	h3_destroy_team
+	h3_create_team
+	# Wait for ${h,swp}{3,4}.
+	setup_wait
+
+	log_test "$what ($tcflags)"
+}
+
+test_mirror_gretap_first()
+{
+	test_lag_slave $h3 $h4 "mirror to gretap: LAG first slave"
+}
+
+test_mirror_gretap_second()
+{
+	test_lag_slave $h4 $h3 "mirror to gretap: LAG second slave"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
new file mode 100644
index 000000000000..fac486178ef7
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
@@ -0,0 +1,130 @@
+# SPDX-License-Identifier: GPL-2.0
+
+source "$relative_path/mirror_lib.sh"
+
+quick_test_span_gre_dir_ips()
+{
+	local tundev=$1; shift
+
+	do_test_span_dir_ips 10 h3-$tundev "$@"
+}
+
+fail_test_span_gre_dir_ips()
+{
+	local tundev=$1; shift
+
+	do_test_span_dir_ips 0 h3-$tundev "$@"
+}
+
+test_span_gre_dir_ips()
+{
+	local tundev=$1; shift
+
+	test_span_dir_ips h3-$tundev "$@"
+}
+
+full_test_span_gre_dir_ips()
+{
+	local tundev=$1; shift
+	local direction=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+	local what=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 $direction $tundev "matchall $tcflags"
+	test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
+			  "$backward_type" "$ip1" "$ip2"
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction $what ($tcflags)"
+}
+
+full_test_span_gre_dir_vlan_ips()
+{
+	local tundev=$1; shift
+	local direction=$1; shift
+	local vlan_match=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+	local what=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 $direction $tundev "matchall $tcflags"
+
+	test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
+			  "$backward_type" "$ip1" "$ip2"
+
+	tc filter add dev $h3 ingress pref 77 prot 802.1q \
+		flower $vlan_match \
+		action pass
+	mirror_test v$h1 $ip1 $ip2 $h3 77 10
+	tc filter del dev $h3 ingress pref 77
+
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction $what ($tcflags)"
+}
+
+quick_test_span_gre_dir()
+{
+	quick_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+fail_test_span_gre_dir()
+{
+	fail_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+test_span_gre_dir()
+{
+	test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+full_test_span_gre_dir()
+{
+	full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+full_test_span_gre_dir_vlan()
+{
+	full_test_span_gre_dir_vlan_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+full_test_span_gre_stp_ips()
+{
+	local tundev=$1; shift
+	local nbpdev=$1; shift
+	local what=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+	local h3mac=$(mac_get $h3)
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+
+	bridge link set dev $nbpdev state disabled
+	sleep 1
+	fail_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+
+	bridge link set dev $nbpdev state forwarding
+	sleep 1
+	quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: STP state ($tcflags)"
+}
+
+full_test_span_gre_stp()
+{
+	full_test_span_gre_stp_ips "$@" 192.0.2.1 192.0.2.2
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
new file mode 100755
index 000000000000..fc0508e40fca
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for mirroring to gretap and ip6gretap, such that the neighbor entry for
+# the tunnel remote address has invalid address at the time that the mirroring
+# is set up. Later on, the neighbor is deleted and it is expected to be
+# reinitialized using the usual ARP process, and the mirroring offload updated.
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip address add dev $swp3 192.0.2.129/28
+	ip address add dev $h3 192.0.2.130/28
+
+	ip address add dev $swp3 2001:db8:2::1/64
+	ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip address del dev $h3 2001:db8:2::2/64
+	ip address del dev $swp3 2001:db8:2::1/64
+
+	ip address del dev $h3 192.0.2.130/28
+	ip address del dev $swp3 192.0.2.129/28
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_span_gre_neigh()
+{
+	local addr=$1; shift
+	local tundev=$1; shift
+	local direction=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip neigh replace dev $swp3 $addr lladdr 00:11:22:33:44:55
+	mirror_install $swp1 $direction $tundev "matchall $tcflags"
+	fail_test_span_gre_dir $tundev ingress
+	ip neigh del dev $swp3 $addr
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction $what: neighbor change ($tcflags)"
+}
+
+test_gretap()
+{
+	test_span_gre_neigh 192.0.2.130 gt4 ingress "mirror to gretap"
+	test_span_gre_neigh 192.0.2.130 gt4 egress "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	test_span_gre_neigh 2001:db8:2::2 gt6 ingress "mirror to ip6gretap"
+	test_span_gre_neigh 2001:db8:2::2 gt6 egress "mirror to ip6gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
new file mode 100755
index 000000000000..6f9ef1820e93
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test that gretap and ip6gretap mirroring works when the other tunnel endpoint
+# is reachable through a next-hop route (as opposed to directly-attached route).
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	sysctl_set net.ipv4.conf.all.rp_filter 0
+	sysctl_set net.ipv4.conf.$h3.rp_filter 0
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	sysctl_set net.ipv4.conf.v$h3.rp_filter 0
+
+	ip address add dev $swp3 192.0.2.161/28
+	ip address add dev $h3 192.0.2.162/28
+	ip address add dev gt4 192.0.2.129/32
+	ip address add dev h3-gt4 192.0.2.130/32
+
+	# IPv6 route can't be added after address. Such routes are rejected due
+	# to the gateway address having been configured on the local system. It
+	# works the other way around though.
+	ip address add dev $swp3 2001:db8:4::1/64
+	ip -6 route add 2001:db8:2::2/128 via 2001:db8:4::2
+	ip address add dev $h3 2001:db8:4::2/64
+	ip address add dev gt6 2001:db8:2::1
+	ip address add dev h3-gt6 2001:db8:2::2
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip -6 route del 2001:db8:2::2/128 via 2001:db8:4::2
+	ip address del dev $h3 2001:db8:4::2/64
+	ip address del dev $swp3 2001:db8:4::1/64
+
+	ip address del dev $h3 192.0.2.162/28
+	ip address del dev $swp3 192.0.2.161/28
+
+	sysctl_restore net.ipv4.conf.v$h3.rp_filter 0
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+
+	sysctl_restore net.ipv4.conf.$h3.rp_filter
+	sysctl_restore net.ipv4.conf.all.rp_filter
+}
+
+test_gretap()
+{
+	RET=0
+	mirror_install $swp1 ingress gt4 "matchall $tcflags"
+
+	# For IPv4, test that there's no mirroring without the route directing
+	# the traffic to tunnel remote address. Then add it and test that
+	# mirroring starts. For IPv6 we can't test this due to the limitation
+	# that routes for locally-specified IPv6 addresses can't be added.
+	fail_test_span_gre_dir gt4 ingress
+
+	ip route add 192.0.2.130/32 via 192.0.2.162
+	quick_test_span_gre_dir gt4 ingress
+	ip route del 192.0.2.130/32 via 192.0.2.162
+
+	mirror_uninstall $swp1 ingress
+	log_test "mirror to gre with next-hop remote ($tcflags)"
+}
+
+test_ip6gretap()
+{
+	RET=0
+
+	mirror_install $swp1 ingress gt6 "matchall $tcflags"
+	quick_test_span_gre_dir gt6 ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "mirror to ip6gre with next-hop remote ($tcflags)"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
new file mode 100644
index 000000000000..39c03e2867f4
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This is the standard topology for testing mirroring to gretap and ip6gretap
+# netdevices. The tests that use it tweak it in one way or another--importantly,
+# $swp3 and $h3 need to have addresses set up.
+#
+#   +---------------------+                             +---------------------+
+#   | H1                  |                             |                  H2 |
+#   |     + $h1           |                             |           $h2 +     |
+#   |     | 192.0.2.1/28  |                             |  192.0.2.2/28 |     |
+#   +-----|---------------+                             +---------------|-----+
+#         |                                                             |
+#   +-----|-------------------------------------------------------------|-----+
+#   | SW  o--> mirror                                                   |     |
+#   | +---|-------------------------------------------------------------|---+ |
+#   | |   + $swp1                    BR                           $swp2 +   | |
+#   | +---------------------------------------------------------------------+ |
+#   |                                                                         |
+#   |     + $swp3               + gt6 (ip6gretap)      + gt4 (gretap)         |
+#   |     |                     : loc=2001:db8:2::1    : loc=192.0.2.129      |
+#   |     |                     : rem=2001:db8:2::2    : rem=192.0.2.130      |
+#   |     |                     : ttl=100              : ttl=100              |
+#   |     |                     : tos=inherit          : tos=inherit          |
+#   |     |                     :                      :                      |
+#   +-----|---------------------:----------------------:----------------------+
+#         |                     :                      :
+#   +-----|---------------------:----------------------:----------------------+
+#   | H3  + $h3                 + h3-gt6 (ip6gretap)   + h3-gt4 (gretap)      |
+#   |                             loc=2001:db8:2::2      loc=192.0.2.130      |
+#   |                             rem=2001:db8:2::1      rem=192.0.2.129      |
+#   |                             ttl=100                ttl=100              |
+#   |                             tos=inherit            tos=inherit          |
+#   |                                                                         |
+#   +-------------------------------------------------------------------------+
+
+source "$relative_path/mirror_topo_lib.sh"
+
+mirror_gre_topo_h3_create()
+{
+	mirror_topo_h3_create
+
+	tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129
+	ip link set h3-gt4 vrf v$h3
+	matchall_sink_create h3-gt4
+
+	tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1
+	ip link set h3-gt6 vrf v$h3
+	matchall_sink_create h3-gt6
+}
+
+mirror_gre_topo_h3_destroy()
+{
+	tunnel_destroy h3-gt6
+	tunnel_destroy h3-gt4
+
+	mirror_topo_h3_destroy
+}
+
+mirror_gre_topo_switch_create()
+{
+	mirror_topo_switch_create
+
+	tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+		      ttl 100 tos inherit
+
+	tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \
+		      ttl 100 tos inherit allow-localremote
+}
+
+mirror_gre_topo_switch_destroy()
+{
+	tunnel_destroy gt6
+	tunnel_destroy gt4
+
+	mirror_topo_switch_destroy
+}
+
+mirror_gre_topo_create()
+{
+	mirror_topo_h1_create
+	mirror_topo_h2_create
+	mirror_gre_topo_h3_create
+
+	mirror_gre_topo_switch_create
+}
+
+mirror_gre_topo_destroy()
+{
+	mirror_gre_topo_switch_destroy
+
+	mirror_gre_topo_h3_destroy
+	mirror_topo_h2_destroy
+	mirror_topo_h1_destroy
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
new file mode 100755
index 000000000000..88cecdb9a861
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" that mirrors to a gretap netdevice
+# whose underlay route points at a vlan device.
+
+ALL_TESTS="
+	test_gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip link add name $swp3.555 link $swp3 type vlan id 555
+	ip address add dev $swp3.555 192.0.2.129/32
+	ip address add dev $swp3.555 2001:db8:2::1/128
+	ip link set dev $swp3.555 up
+
+	ip route add 192.0.2.130/32 dev $swp3.555
+	ip -6 route add 2001:db8:2::2/128 dev $swp3.555
+
+	ip link add name $h3.555 link $h3 type vlan id 555
+	ip link set dev $h3.555 master v$h3
+	ip address add dev $h3.555 192.0.2.130/28
+	ip address add dev $h3.555 2001:db8:2::2/64
+	ip link set dev $h3.555 up
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link del dev $h3.555
+	ip link del dev $swp3.555
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_gretap()
+{
+	full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+	full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
new file mode 100755
index 000000000000..204b25f13934
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# vlan device on top of a bridge device with vlan filtering (802.1q).
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+	test_gretap_forbidden_cpu
+	test_ip6gretap_forbidden_cpu
+	test_gretap_forbidden_egress
+	test_ip6gretap_forbidden_egress
+	test_gretap_untagged_egress
+	test_ip6gretap_untagged_egress
+	test_gretap_fdb_roaming
+	test_ip6gretap_fdb_roaming
+	test_gretap_stp
+	test_ip6gretap_stp
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+require_command $ARPING
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	# gt4's remote address is at $h3.555, not $h3. Thus the packets arriving
+	# directly to $h3 for test_gretap_untagged_egress() are rejected by
+	# rp_filter and the test spuriously fails.
+	sysctl_set net.ipv4.conf.all.rp_filter 0
+	sysctl_set net.ipv4.conf.$h3.rp_filter 0
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	vlan_create br1 555 "" 192.0.2.129/32 2001:db8:2::1/128
+	bridge vlan add dev br1 vid 555 self
+	ip route rep 192.0.2.130/32 dev br1.555
+	ip -6 route rep 2001:db8:2::2/128 dev br1.555
+
+	vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+
+	ip link set dev $swp3 master br1
+	bridge vlan add dev $swp3 vid 555
+	bridge vlan add dev $swp2 vid 555
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp3 nomaster
+	vlan_destroy $h3 555
+	vlan_destroy br1 555
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+
+	sysctl_restore net.ipv4.conf.$h3.rp_filter
+	sysctl_restore net.ipv4.conf.all.rp_filter
+}
+
+test_vlan_match()
+{
+	local tundev=$1; shift
+	local vlan_match=$1; shift
+	local what=$1; shift
+
+	full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what"
+	full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what"
+}
+
+test_gretap()
+{
+	test_vlan_match gt4 'skip_hw vlan_id 555 vlan_ethtype ip' \
+			"mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ip' \
+			"mirror to ip6gretap"
+}
+
+test_span_gre_forbidden_cpu()
+{
+	local tundev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	# Run the pass-test first, to prime neighbor table.
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir $tundev ingress
+
+	# Now forbid the VLAN at the bridge and see it fail.
+	bridge vlan del dev br1 vid 555 self
+	sleep 1
+	fail_test_span_gre_dir $tundev ingress
+
+	bridge vlan add dev br1 vid 555 self
+	sleep 1
+	quick_test_span_gre_dir $tundev ingress
+
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: vlan forbidden at a bridge ($tcflags)"
+}
+
+test_gretap_forbidden_cpu()
+{
+	test_span_gre_forbidden_cpu gt4 "mirror to gretap"
+}
+
+test_ip6gretap_forbidden_cpu()
+{
+	test_span_gre_forbidden_cpu gt6 "mirror to ip6gretap"
+}
+
+test_span_gre_forbidden_egress()
+{
+	local tundev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir $tundev ingress
+
+	bridge vlan del dev $swp3 vid 555
+	sleep 1
+	fail_test_span_gre_dir $tundev ingress
+
+	bridge vlan add dev $swp3 vid 555
+	# Re-prime FDB
+	$ARPING -I br1.555 192.0.2.130 -fqc 1
+	sleep 1
+	quick_test_span_gre_dir $tundev ingress
+
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: vlan forbidden at a bridge egress ($tcflags)"
+}
+
+test_gretap_forbidden_egress()
+{
+	test_span_gre_forbidden_egress gt4 "mirror to gretap"
+}
+
+test_ip6gretap_forbidden_egress()
+{
+	test_span_gre_forbidden_egress gt6 "mirror to ip6gretap"
+}
+
+test_span_gre_untagged_egress()
+{
+	local tundev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+
+	quick_test_span_gre_dir $tundev ingress
+	quick_test_span_vlan_dir $h3 555 ingress
+
+	bridge vlan add dev $swp3 vid 555 pvid untagged
+	sleep 1
+	quick_test_span_gre_dir $tundev ingress
+	fail_test_span_vlan_dir $h3 555 ingress
+
+	bridge vlan add dev $swp3 vid 555
+	sleep 1
+	quick_test_span_gre_dir $tundev ingress
+	quick_test_span_vlan_dir $h3 555 ingress
+
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: vlan untagged at a bridge egress ($tcflags)"
+}
+
+test_gretap_untagged_egress()
+{
+	test_span_gre_untagged_egress gt4 "mirror to gretap"
+}
+
+test_ip6gretap_untagged_egress()
+{
+	test_span_gre_untagged_egress gt6 "mirror to ip6gretap"
+}
+
+test_span_gre_fdb_roaming()
+{
+	local tundev=$1; shift
+	local what=$1; shift
+	local h3mac=$(mac_get $h3)
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir $tundev ingress
+
+	bridge fdb del dev $swp3 $h3mac vlan 555 master
+	bridge fdb add dev $swp2 $h3mac vlan 555 master
+	sleep 1
+	fail_test_span_gre_dir $tundev ingress
+
+	bridge fdb del dev $swp2 $h3mac vlan 555 master
+	# Re-prime FDB
+	$ARPING -I br1.555 192.0.2.130 -fqc 1
+	sleep 1
+	quick_test_span_gre_dir $tundev ingress
+
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: MAC roaming ($tcflags)"
+}
+
+test_gretap_fdb_roaming()
+{
+	test_span_gre_fdb_roaming gt4 "mirror to gretap"
+}
+
+test_ip6gretap_fdb_roaming()
+{
+	test_span_gre_fdb_roaming gt6 "mirror to ip6gretap"
+}
+
+test_gretap_stp()
+{
+	full_test_span_gre_stp gt4 $swp3 "mirror to gretap"
+}
+
+test_ip6gretap_stp()
+{
+	full_test_span_gre_stp gt6 $swp3 "mirror to ip6gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
new file mode 100644
index 000000000000..07991e1025c7
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -0,0 +1,132 @@
+# SPDX-License-Identifier: GPL-2.0
+
+mirror_install()
+{
+	local from_dev=$1; shift
+	local direction=$1; shift
+	local to_dev=$1; shift
+	local filter=$1; shift
+
+	tc filter add dev $from_dev $direction \
+	   pref 1000 $filter \
+	   action mirred egress mirror dev $to_dev
+}
+
+mirror_uninstall()
+{
+	local from_dev=$1; shift
+	local direction=$1; shift
+
+	tc filter del dev $swp1 $direction pref 1000
+}
+
+mirror_test()
+{
+	local vrf_name=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local dev=$1; shift
+	local pref=$1; shift
+	local expect=$1; shift
+
+	local t0=$(tc_rule_stats_get $dev $pref)
+	ip vrf exec $vrf_name \
+	   ${PING} ${sip:+-I $sip} $dip -c 10 -i 0.1 -w 2 &> /dev/null
+	local t1=$(tc_rule_stats_get $dev $pref)
+	local delta=$((t1 - t0))
+	# Tolerate a couple stray extra packets.
+	((expect <= delta && delta <= expect + 2))
+	check_err $? "Expected to capture $expect packets, got $delta."
+}
+
+do_test_span_dir_ips()
+{
+	local expect=$1; shift
+	local dev=$1; shift
+	local direction=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+
+	icmp_capture_install $dev
+	mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
+	mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
+	icmp_capture_uninstall $dev
+}
+
+quick_test_span_dir_ips()
+{
+	do_test_span_dir_ips 10 "$@"
+}
+
+fail_test_span_dir_ips()
+{
+	do_test_span_dir_ips 0 "$@"
+}
+
+test_span_dir_ips()
+{
+	local dev=$1; shift
+	local direction=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+
+	quick_test_span_dir_ips "$dev" "$direction" "$ip1" "$ip2"
+
+	icmp_capture_install $dev "type $forward_type"
+	mirror_test v$h1 $ip1 $ip2 $dev 100 10
+	icmp_capture_uninstall $dev
+
+	icmp_capture_install $dev "type $backward_type"
+	mirror_test v$h2 $ip2 $ip1 $dev 100 10
+	icmp_capture_uninstall $dev
+}
+
+fail_test_span_dir()
+{
+	fail_test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+test_span_dir()
+{
+	test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+do_test_span_vlan_dir_ips()
+{
+	local expect=$1; shift
+	local dev=$1; shift
+	local vid=$1; shift
+	local direction=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+
+	# Install the capture as skip_hw to avoid double-counting of packets.
+	# The traffic is meant for local box anyway, so will be trapped to
+	# kernel.
+	vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype ip"
+	mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
+	mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
+	vlan_capture_uninstall $dev
+}
+
+quick_test_span_vlan_dir_ips()
+{
+	do_test_span_vlan_dir_ips 10 "$@"
+}
+
+fail_test_span_vlan_dir_ips()
+{
+	do_test_span_vlan_dir_ips 0 "$@"
+}
+
+quick_test_span_vlan_dir()
+{
+	quick_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+fail_test_span_vlan_dir()
+{
+	fail_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
new file mode 100644
index 000000000000..04979e5962e7
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This is the standard topology for testing mirroring. The tests that use it
+# tweak it in one way or another--typically add more devices to the topology.
+#
+#   +---------------------+                             +---------------------+
+#   | H1                  |                             |                  H2 |
+#   |     + $h1           |                             |           $h2 +     |
+#   |     | 192.0.2.1/28  |                             |  192.0.2.2/28 |     |
+#   +-----|---------------+                             +---------------|-----+
+#         |                                                             |
+#   +-----|-------------------------------------------------------------|-----+
+#   | SW  o--> mirror                                                   |     |
+#   | +---|-------------------------------------------------------------|---+ |
+#   | |   + $swp1                    BR                           $swp2 +   | |
+#   | +---------------------------------------------------------------------+ |
+#   |                                                                         |
+#   |     + $swp3                                                             |
+#   +-----|-------------------------------------------------------------------+
+#         |
+#   +-----|-------------------------------------------------------------------+
+#   | H3  + $h3                                                               |
+#   |                                                                         |
+#   +-------------------------------------------------------------------------+
+
+mirror_topo_h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+}
+
+mirror_topo_h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+mirror_topo_h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28
+}
+
+mirror_topo_h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/28
+}
+
+mirror_topo_h3_create()
+{
+	simple_if_init $h3
+	tc qdisc add dev $h3 clsact
+}
+
+mirror_topo_h3_destroy()
+{
+	tc qdisc del dev $h3 clsact
+	simple_if_fini $h3
+}
+
+mirror_topo_switch_create()
+{
+	ip link set dev $swp3 up
+
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 up
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp1 clsact
+}
+
+mirror_topo_switch_destroy()
+{
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev $swp1 down
+	ip link set dev $swp2 down
+	ip link del dev br1
+
+	ip link set dev $swp3 down
+}
+
+mirror_topo_create()
+{
+	mirror_topo_h1_create
+	mirror_topo_h2_create
+	mirror_topo_h3_create
+
+	mirror_topo_switch_create
+}
+
+mirror_topo_destroy()
+{
+	mirror_topo_switch_destroy
+
+	mirror_topo_h3_destroy
+	mirror_topo_h2_destroy
+	mirror_topo_h1_destroy
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
new file mode 100755
index 000000000000..9ab2ce77b332
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing mirroring. See mirror_topo_lib.sh
+# for more details.
+#
+# Test for "tc action mirred egress mirror" that mirrors to a vlan device.
+
+ALL_TESTS="
+	test_vlan
+	test_tagged_vlan
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_topo_create
+
+	vlan_create $swp3 555
+
+	vlan_create $h3 555 v$h3
+	matchall_sink_create $h3.555
+
+	vlan_create $h1 111 v$h1 192.0.2.17/28
+	bridge vlan add dev $swp1 vid 111
+
+	vlan_create $h2 111 v$h2 192.0.2.18/28
+	bridge vlan add dev $swp2 vid 111
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	vlan_destroy $h2 111
+	vlan_destroy $h1 111
+	vlan_destroy $h3 555
+	vlan_destroy $swp3 555
+
+	mirror_topo_destroy
+	vrf_cleanup
+}
+
+test_vlan_dir()
+{
+	local direction=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
+	test_span_dir "$h3.555" "$direction" "$forward_type" "$backward_type"
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction mirror to vlan ($tcflags)"
+}
+
+test_vlan()
+{
+	test_vlan_dir ingress 8 0
+	test_vlan_dir egress 0 8
+}
+
+test_tagged_vlan_dir()
+{
+	local direction=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
+	do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" \
+				  192.0.2.17 192.0.2.18
+	do_test_span_vlan_dir_ips  0 "$h3.555" 555 "$direction" \
+				  192.0.2.17 192.0.2.18
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction mirror tagged to vlan ($tcflags)"
+}
+
+test_tagged_vlan()
+{
+	test_tagged_vlan_dir ingress 8 0
+	test_tagged_vlan_dir egress 0 8
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+	trap_install $h3 ingress
+
+	tests_run
+
+	trap_uninstall $h3 ingress
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
index cc6a14abfa87..a75cb51cc5bd 100755
--- a/tools/testing/selftests/net/forwarding/router.sh
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="ping_ipv4 ping_ipv6"
 NUM_NETIFS=4
 source lib.sh
 
@@ -114,12 +115,21 @@ cleanup()
 	vrf_cleanup
 }
 
+ping_ipv4()
+{
+	ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
 trap cleanup EXIT
 
 setup_prepare
 setup_wait
 
-ping_test $h1 198.51.100.2
-ping6_test $h1 2001:db8:2::2
+tests_run
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_bridge.sh b/tools/testing/selftests/net/forwarding/router_bridge.sh
new file mode 100755
index 000000000000..ebc596a272f7
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge.sh
@@ -0,0 +1,113 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+	ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del 2001:db8:2::/64 vrf v$h1
+	ip -4 route del 192.0.2.128/28 vrf v$h1
+	simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64
+	ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+	ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip -6 route del 2001:db8:1::/64 vrf v$h2
+	ip -4 route del 192.0.2.0/28 vrf v$h2
+	simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64
+}
+
+router_create()
+{
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 up
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	__addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+
+	ip link set dev $swp2 up
+	__addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64
+}
+
+router_destroy()
+{
+	__addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64
+	ip link set dev $swp2 down
+
+	__addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link del dev br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.130
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
new file mode 100755
index 000000000000..fef88eb4b873
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	vlan
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	vlan_create $h1 555 v$h1 192.0.2.1/28 2001:db8:1::1/64
+	ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del 2001:db8:2::/64 vrf v$h1
+	ip -4 route del 192.0.2.128/28 vrf v$h1
+	vlan_destroy $h1 555
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64
+	ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+	ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip -6 route del 2001:db8:1::/64 vrf v$h2
+	ip -4 route del 192.0.2.0/28 vrf v$h2
+	simple_if_fini $h2 192.0.2.130/28
+}
+
+router_create()
+{
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 up
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+
+	bridge vlan add dev br1 vid 555 self pvid untagged
+	bridge vlan add dev $swp1 vid 555
+
+	__addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+
+	ip link set dev $swp2 up
+	__addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64
+}
+
+router_destroy()
+{
+	__addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64
+	ip link set dev $swp2 down
+
+	__addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link del dev br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+vlan()
+{
+	RET=0
+
+	bridge vlan add dev br1 vid 333 self
+	check_err $? "Can't add a non-PVID VLAN"
+	bridge vlan del dev br1 vid 333 self
+	check_err $? "Can't remove a non-PVID VLAN"
+
+	log_test "vlan"
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.130
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_broadcast.sh b/tools/testing/selftests/net/forwarding/router_broadcast.sh
new file mode 100755
index 000000000000..7bd2ebb6e9de
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_broadcast.sh
@@ -0,0 +1,233 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4"
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	vrf_create "vrf-h1"
+	ip link set dev $h1 master vrf-h1
+
+	ip link set dev vrf-h1 up
+	ip link set dev $h1 up
+
+	ip address add 192.0.2.2/24 dev $h1
+
+	ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+	ip route add 198.51.200.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+}
+
+h1_destroy()
+{
+	ip route del 198.51.200.0/24 vrf vrf-h1
+	ip route del 198.51.100.0/24 vrf vrf-h1
+
+	ip address del 192.0.2.2/24 dev $h1
+
+	ip link set dev $h1 down
+	vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+	vrf_create "vrf-h2"
+	ip link set dev $h2 master vrf-h2
+
+	ip link set dev vrf-h2 up
+	ip link set dev $h2 up
+
+	ip address add 198.51.100.2/24 dev $h2
+
+	ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+	ip route add 198.51.200.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+}
+
+h2_destroy()
+{
+	ip route del 198.51.200.0/24 vrf vrf-h2
+	ip route del 192.0.2.0/24 vrf vrf-h2
+
+	ip address del 198.51.100.2/24 dev $h2
+
+	ip link set dev $h2 down
+	vrf_destroy "vrf-h2"
+}
+
+h3_create()
+{
+	vrf_create "vrf-h3"
+	ip link set dev $h3 master vrf-h3
+
+	ip link set dev vrf-h3 up
+	ip link set dev $h3 up
+
+	ip address add 198.51.200.2/24 dev $h3
+
+	ip route add 192.0.2.0/24 vrf vrf-h3 nexthop via 198.51.200.1
+	ip route add 198.51.100.0/24 vrf vrf-h3 nexthop via 198.51.200.1
+}
+
+h3_destroy()
+{
+	ip route del 198.51.100.0/24 vrf vrf-h3
+	ip route del 192.0.2.0/24 vrf vrf-h3
+
+	ip address del 198.51.200.2/24 dev $h3
+
+	ip link set dev $h3 down
+	vrf_destroy "vrf-h3"
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+	ip link set dev $rp3 up
+
+	ip address add 192.0.2.1/24 dev $rp1
+
+	ip address add 198.51.100.1/24 dev $rp2
+	ip address add 198.51.200.1/24 dev $rp3
+}
+
+router_destroy()
+{
+	ip address del 198.51.200.1/24 dev $rp3
+	ip address del 198.51.100.1/24 dev $rp2
+
+	ip address del 192.0.2.1/24 dev $rp1
+
+	ip link set dev $rp3 down
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+bc_forwarding_disable()
+{
+	sysctl_set net.ipv4.conf.all.bc_forwarding 0
+	sysctl_set net.ipv4.conf.$rp1.bc_forwarding 0
+}
+
+bc_forwarding_enable()
+{
+	sysctl_set net.ipv4.conf.all.bc_forwarding 1
+	sysctl_set net.ipv4.conf.$rp1.bc_forwarding 1
+}
+
+bc_forwarding_restore()
+{
+	sysctl_restore net.ipv4.conf.$rp1.bc_forwarding
+	sysctl_restore net.ipv4.conf.all.bc_forwarding
+}
+
+ping_test_from()
+{
+	local oif=$1
+	local dip=$2
+	local from=$3
+	local fail=${4:-0}
+
+	RET=0
+
+	log_info "ping $dip, expected reply from $from"
+	ip vrf exec $(master_name_get $oif) \
+	$PING -I $oif $dip -c 10 -i 0.1 -w 2 -b 2>&1 | grep $from &> /dev/null
+	check_err_fail $fail $?
+}
+
+ping_ipv4()
+{
+	sysctl_set net.ipv4.icmp_echo_ignore_broadcasts 0
+
+	bc_forwarding_disable
+	log_info "bc_forwarding disabled on r1 =>"
+	ping_test_from $h1 198.51.100.255 192.0.2.1
+	log_test "h1 -> net2: reply from r1 (not forwarding)"
+	ping_test_from $h1 198.51.200.255 192.0.2.1
+	log_test "h1 -> net3: reply from r1 (not forwarding)"
+	ping_test_from $h1 192.0.2.255 192.0.2.1
+	log_test "h1 -> net1: reply from r1 (not dropping)"
+	ping_test_from $h1 255.255.255.255 192.0.2.1
+	log_test "h1 -> 255.255.255.255: reply from r1 (not forwarding)"
+
+	ping_test_from $h2 192.0.2.255 198.51.100.1
+	log_test "h2 -> net1: reply from r1 (not forwarding)"
+	ping_test_from $h2 198.51.200.255 198.51.100.1
+	log_test "h2 -> net3: reply from r1 (not forwarding)"
+	ping_test_from $h2 198.51.100.255 198.51.100.1
+	log_test "h2 -> net2: reply from r1 (not dropping)"
+	ping_test_from $h2 255.255.255.255 198.51.100.1
+	log_test "h2 -> 255.255.255.255: reply from r1 (not forwarding)"
+	bc_forwarding_restore
+
+	bc_forwarding_enable
+	log_info "bc_forwarding enabled on r1 =>"
+	ping_test_from $h1 198.51.100.255 198.51.100.2
+	log_test "h1 -> net2: reply from h2 (forwarding)"
+	ping_test_from $h1 198.51.200.255 198.51.200.2
+	log_test "h1 -> net3: reply from h3 (forwarding)"
+	ping_test_from $h1 192.0.2.255 192.0.2.1 1
+	log_test "h1 -> net1: no reply (dropping)"
+	ping_test_from $h1 255.255.255.255 192.0.2.1
+	log_test "h1 -> 255.255.255.255: reply from r1 (not forwarding)"
+
+	ping_test_from $h2 192.0.2.255 192.0.2.2
+	log_test "h2 -> net1: reply from h1 (forwarding)"
+	ping_test_from $h2 198.51.200.255 198.51.200.2
+	log_test "h2 -> net3: reply from h3 (forwarding)"
+	ping_test_from $h2 198.51.100.255 198.51.100.1 1
+	log_test "h2 -> net2: no reply (dropping)"
+	ping_test_from $h2 255.255.255.255 198.51.100.1
+	log_test "h2 -> 255.255.255.255: reply from r1 (not forwarding)"
+	bc_forwarding_restore
+
+	sysctl_restore net.ipv4.icmp_echo_ignore_broadcasts
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
index 3bc351008db6..79a209927962 100755
--- a/tools/testing/selftests/net/forwarding/router_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
 NUM_NETIFS=8
 source lib.sh
 
@@ -158,45 +159,6 @@ router2_destroy()
 	vrf_destroy "vrf-r2"
 }
 
-multipath_eval()
-{
-       local desc="$1"
-       local weight_rp12=$2
-       local weight_rp13=$3
-       local packets_rp12=$4
-       local packets_rp13=$5
-       local weights_ratio packets_ratio diff
-
-       RET=0
-
-       if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
-              check_err 1 "Packet difference is 0"
-              log_test "Multipath"
-              log_info "Expected ratio $weights_ratio"
-              return
-       fi
-
-       if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
-               weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
-		       | bc -l)
-               packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
-		       | bc -l)
-       else
-               weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" | \
-		       bc -l)
-               packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" | \
-		       bc -l)
-       fi
-
-       diff=$(echo $weights_ratio - $packets_ratio | bc -l)
-       diff=${diff#-}
-
-       test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0
-       check_err $? "Too large discrepancy between expected and measured ratios"
-       log_test "$desc"
-       log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
-}
-
 multipath4_test()
 {
        local desc="$1"
@@ -204,13 +166,11 @@ multipath4_test()
        local weight_rp13=$3
        local t0_rp12 t0_rp13 t1_rp12 t1_rp13
        local packets_rp12 packets_rp13
-       local hash_policy
 
        # Transmit multiple flows from h1 to h2 and make sure they are
        # distributed between both multipath links (rp12 and rp13)
        # according to the configured weights.
-       hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
-       sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+       sysctl_set net.ipv4.fib_multipath_hash_policy 1
        ip route replace 198.51.100.0/24 vrf vrf-r1 \
                nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \
                nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13
@@ -232,7 +192,7 @@ multipath4_test()
        ip route replace 198.51.100.0/24 vrf vrf-r1 \
                nexthop via 169.254.2.22 dev $rp12 \
                nexthop via 169.254.3.23 dev $rp13
-       sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy
+       sysctl_restore net.ipv4.fib_multipath_hash_policy
 }
 
 multipath6_l4_test()
@@ -242,13 +202,11 @@ multipath6_l4_test()
        local weight_rp13=$3
        local t0_rp12 t0_rp13 t1_rp12 t1_rp13
        local packets_rp12 packets_rp13
-       local hash_policy
 
        # Transmit multiple flows from h1 to h2 and make sure they are
        # distributed between both multipath links (rp12 and rp13)
        # according to the configured weights.
-       hash_policy=$(sysctl -n net.ipv6.fib_multipath_hash_policy)
-       sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
+       sysctl_set net.ipv6.fib_multipath_hash_policy 1
 
        ip route replace 2001:db8:2::/64 vrf vrf-r1 \
 	       nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
@@ -271,7 +229,7 @@ multipath6_l4_test()
 	       nexthop via fe80:2::22 dev $rp12 \
 	       nexthop via fe80:3::23 dev $rp13
 
-       sysctl -q -w net.ipv6.fib_multipath_hash_policy=$hash_policy
+       sysctl_restore net.ipv6.fib_multipath_hash_policy
 }
 
 multipath6_test()
@@ -364,13 +322,21 @@ cleanup()
 	vrf_cleanup
 }
 
+ping_ipv4()
+{
+	ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
 trap cleanup EXIT
 
 setup_prepare
 setup_wait
 
-ping_test $h1 198.51.100.2
-ping6_test $h1 2001:db8:2::2
-multipath_test
+tests_run
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index 3a6385ebd5d0..813d02d1939d 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -1,6 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
+	mirred_egress_mirror_test gact_trap_test"
 NUM_NETIFS=4
 source tc_common.sh
 source lib.sh
@@ -111,6 +113,10 @@ gact_trap_test()
 {
 	RET=0
 
+	if [[ "$tcflags" != "skip_sw" ]]; then
+		return 0;
+	fi
+
 	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
 		skip_hw dst_ip 192.0.2.2 action drop
 	tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \
@@ -179,24 +185,29 @@ cleanup()
 	ip link set $swp1 address $swp1origmac
 }
 
+mirred_egress_redirect_test()
+{
+	mirred_egress_test "redirect"
+}
+
+mirred_egress_mirror_test()
+{
+	mirred_egress_test "mirror"
+}
+
 trap cleanup EXIT
 
 setup_prepare
 setup_wait
 
-gact_drop_and_ok_test
-mirred_egress_test "redirect"
-mirred_egress_test "mirror"
+tests_run
 
 tc_offload_check
 if [[ $? -ne 0 ]]; then
 	log_info "Could not test offloaded functionality"
 else
 	tcflags="skip_sw"
-	gact_drop_and_ok_test
-	mirred_egress_test "redirect"
-	mirred_egress_test "mirror"
-	gact_trap_test
+	tests_run
 fi
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
index 2fd15226974b..2934fb5ed2a2 100755
--- a/tools/testing/selftests/net/forwarding/tc_chains.sh
+++ b/tools/testing/selftests/net/forwarding/tc_chains.sh
@@ -1,6 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="unreachable_chain_test gact_goto_chain_test create_destroy_chain \
+	   template_filter_fits"
 NUM_NETIFS=2
 source tc_common.sh
 source lib.sh
@@ -79,6 +81,87 @@ gact_goto_chain_test()
 	log_test "gact goto chain ($tcflags)"
 }
 
+create_destroy_chain()
+{
+	RET=0
+
+	tc chain add dev $h2 ingress
+	check_err $? "Failed to create default chain"
+
+	output="$(tc -j chain get dev $h2 ingress)"
+	check_err $? "Failed to get default chain"
+
+	echo $output | jq -e ".[] | select(.chain == 0)" &> /dev/null
+	check_err $? "Unexpected output for default chain"
+
+	tc chain add dev $h2 ingress chain 1
+	check_err $? "Failed to create chain 1"
+
+	output="$(tc -j chain get dev $h2 ingress chain 1)"
+	check_err $? "Failed to get chain 1"
+
+	echo $output | jq -e ".[] | select(.chain == 1)" &> /dev/null
+	check_err $? "Unexpected output for chain 1"
+
+	output="$(tc -j chain show dev $h2 ingress)"
+	check_err $? "Failed to dump chains"
+
+	echo $output | jq -e ".[] | select(.chain == 0)" &> /dev/null
+	check_err $? "Can't find default chain in dump"
+
+	echo $output | jq -e ".[] | select(.chain == 1)" &> /dev/null
+	check_err $? "Can't find chain 1 in dump"
+
+	tc chain del dev $h2 ingress
+	check_err $? "Failed to destroy default chain"
+
+	tc chain del dev $h2 ingress chain 1
+	check_err $? "Failed to destroy chain 1"
+
+	log_test "create destroy chain"
+}
+
+template_filter_fits()
+{
+	RET=0
+
+	tc chain add dev $h2 ingress protocol ip \
+		flower dst_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null
+	tc chain add dev $h2 ingress chain 1 protocol ip \
+		flower src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 1101 \
+		flower dst_mac $h2mac action drop
+	check_err $? "Failed to insert filter which fits template"
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 1102 \
+		flower src_mac $h2mac action drop &> /dev/null
+	check_fail $? "Incorrectly succeded to insert filter which does not template"
+
+	tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower src_mac $h2mac action drop
+	check_err $? "Failed to insert filter which fits template"
+
+	tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \
+		flower dst_mac $h2mac action drop &> /dev/null
+	check_fail $? "Incorrectly succeded to insert filter which does not template"
+
+	tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \
+		flower &> /dev/null
+	tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower &> /dev/null
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 1102 \
+		flower &> /dev/null
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 1101 \
+		flower &> /dev/null
+
+	tc chain del dev $h2 ingress chain 1
+	tc chain del dev $h2 ingress
+
+	log_test "template filter fits"
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
@@ -102,21 +185,21 @@ cleanup()
 	vrf_cleanup
 }
 
+check_tc_chain_support
+
 trap cleanup EXIT
 
 setup_prepare
 setup_wait
 
-unreachable_chain_test
-gact_goto_chain_test
+tests_run
 
 tc_offload_check
 if [[ $? -ne 0 ]]; then
 	log_info "Could not test offloaded functionality"
 else
 	tcflags="skip_sw"
-	unreachable_chain_test
-	gact_goto_chain_test
+	tests_run
 fi
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index 032b882adfc0..20d1077e5a3d 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -1,6 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
+	match_src_ip_test match_ip_flags_test"
 NUM_NETIFS=2
 source tc_common.sh
 source lib.sh
@@ -149,6 +151,74 @@ match_src_ip_test()
 	log_test "src_ip match ($tcflags)"
 }
 
+match_ip_flags_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags ip_flags frag action continue
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags ip_flags firstfrag action continue
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags ip_flags nofirstfrag action continue
+	tc filter add dev $h2 ingress protocol ip pref 4 handle 104 flower \
+		$tcflags ip_flags nofrag action drop
+
+	$MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "frag=0" -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on wrong frag filter (nofrag)"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_fail $? "Matched on wrong firstfrag filter (nofrag)"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match on nofirstfrag filter (nofrag) "
+
+	tc_check_packets "dev $h2 ingress" 104 1
+	check_err $? "Did not match on nofrag filter (nofrag)"
+
+	$MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "frag=0,mf" -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match on frag filter (1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match fistfrag filter (1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Matched on wrong nofirstfrag filter (1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 104 1
+	check_err $? "Match on wrong nofrag filter (1stfrag)"
+
+	$MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "frag=256,mf" -q
+	$MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "frag=256" -q
+
+	tc_check_packets "dev $h2 ingress" 101 3
+	check_err $? "Did not match on frag filter (no1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Matched on wrong firstfrag filter (no1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 103 3
+	check_err $? "Did not match on nofirstfrag filter (no1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 104 1
+	check_err $? "Matched on nofrag filter (no1stfrag)"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+	tc filter del dev $h2 ingress protocol ip pref 4 handle 104 flower
+
+	log_test "ip_flags match ($tcflags)"
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
@@ -177,20 +247,14 @@ trap cleanup EXIT
 setup_prepare
 setup_wait
 
-match_dst_mac_test
-match_src_mac_test
-match_dst_ip_test
-match_src_ip_test
+tests_run
 
 tc_offload_check
 if [[ $? -ne 0 ]]; then
 	log_info "Could not test offloaded functionality"
 else
 	tcflags="skip_sw"
-	match_dst_mac_test
-	match_src_mac_test
-	match_dst_ip_test
-	match_src_ip_test
+	tests_run
 fi
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
index 077b98048ef4..9826a446e2c0 100755
--- a/tools/testing/selftests/net/forwarding/tc_shblocks.sh
+++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="shared_block_test"
 NUM_NETIFS=4
 source tc_common.sh
 source lib.sh
@@ -104,19 +105,21 @@ cleanup()
 	ip link set $swp2 address $swp2origmac
 }
 
+check_tc_shblock_support
+
 trap cleanup EXIT
 
 setup_prepare
 setup_wait
 
-shared_block_test
+tests_run
 
 tc_offload_check
 if [[ $? -ne 0 ]]; then
 	log_info "Could not test offloaded functionality"
 else
 	tcflags="skip_sw"
-	shared_block_test
+	tests_run
 fi
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/ip6_gre_headroom.sh b/tools/testing/selftests/net/ip6_gre_headroom.sh
new file mode 100755
index 000000000000..5b41e8bb6e2d
--- /dev/null
+++ b/tools/testing/selftests/net/ip6_gre_headroom.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that enough headroom is reserved for the first packet passing through an
+# IPv6 GRE-like netdevice.
+
+setup_prepare()
+{
+	ip link add h1 type veth peer name swp1
+	ip link add h3 type veth peer name swp3
+
+	ip link set dev h1 up
+	ip address add 192.0.2.1/28 dev h1
+
+	ip link add dev vh3 type vrf table 20
+	ip link set dev h3 master vh3
+	ip link set dev vh3 up
+	ip link set dev h3 up
+
+	ip link set dev swp3 up
+	ip address add dev swp3 2001:db8:2::1/64
+	ip address add dev swp3 2001:db8:2::3/64
+
+	ip link set dev swp1 up
+	tc qdisc add dev swp1 clsact
+
+	ip link add name er6 type ip6erspan \
+	   local 2001:db8:2::1 remote 2001:db8:2::2 oseq okey 123
+	ip link set dev er6 up
+
+	ip link add name gt6 type ip6gretap \
+	   local 2001:db8:2::3 remote 2001:db8:2::4
+	ip link set dev gt6 up
+
+	sleep 1
+}
+
+cleanup()
+{
+	ip link del dev gt6
+	ip link del dev er6
+	ip link del dev swp1
+	ip link del dev swp3
+	ip link del dev vh3
+}
+
+test_headroom()
+{
+	local type=$1; shift
+	local tundev=$1; shift
+
+	tc filter add dev swp1 ingress pref 1000 matchall skip_hw \
+		action mirred egress mirror dev $tundev
+	ping -I h1 192.0.2.2 -c 1 -w 2 &> /dev/null
+	tc filter del dev swp1 ingress pref 1000
+
+	# If it doesn't panic, it passes.
+	printf "TEST: %-60s  [PASS]\n" "$type headroom"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+test_headroom ip6gretap gt6
+test_headroom ip6erspan er6
diff --git a/tools/testing/selftests/net/ip_defrag.c b/tools/testing/selftests/net/ip_defrag.c
new file mode 100644
index 000000000000..61ae2782388e
--- /dev/null
+++ b/tools/testing/selftests/net/ip_defrag.c
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+static bool		cfg_do_ipv4;
+static bool		cfg_do_ipv6;
+static bool		cfg_verbose;
+static bool		cfg_overlap;
+static unsigned short	cfg_port = 9000;
+
+const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
+const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
+
+#define IP4_HLEN	(sizeof(struct iphdr))
+#define IP6_HLEN	(sizeof(struct ip6_hdr))
+#define UDP_HLEN	(sizeof(struct udphdr))
+
+/* IPv6 fragment header lenth. */
+#define FRAG_HLEN	8
+
+static int payload_len;
+static int max_frag_len;
+
+#define MSG_LEN_MAX	60000	/* Max UDP payload length. */
+
+#define IP4_MF		(1u << 13)  /* IPv4 MF flag. */
+#define IP6_MF		(1)  /* IPv6 MF flag. */
+
+#define CSUM_MANGLED_0 (0xffff)
+
+static uint8_t udp_payload[MSG_LEN_MAX];
+static uint8_t ip_frame[IP_MAXPACKET];
+static uint32_t ip_id = 0xabcd;
+static int msg_counter;
+static int frag_counter;
+static unsigned int seed;
+
+/* Receive a UDP packet. Validate it matches udp_payload. */
+static void recv_validate_udp(int fd_udp)
+{
+	ssize_t ret;
+	static uint8_t recv_buff[MSG_LEN_MAX];
+
+	ret = recv(fd_udp, recv_buff, payload_len, 0);
+	msg_counter++;
+
+	if (cfg_overlap) {
+		if (ret != -1)
+			error(1, 0, "recv: expected timeout; got %d",
+				(int)ret);
+		if (errno != ETIMEDOUT && errno != EAGAIN)
+			error(1, errno, "recv: expected timeout: %d",
+				 errno);
+		return;  /* OK */
+	}
+
+	if (ret == -1)
+		error(1, errno, "recv: payload_len = %d max_frag_len = %d",
+			payload_len, max_frag_len);
+	if (ret != payload_len)
+		error(1, 0, "recv: wrong size: %d vs %d", (int)ret, payload_len);
+	if (memcmp(udp_payload, recv_buff, payload_len))
+		error(1, 0, "recv: wrong data");
+}
+
+static uint32_t raw_checksum(uint8_t *buf, int len, uint32_t sum)
+{
+	int i;
+
+	for (i = 0; i < (len & ~1U); i += 2) {
+		sum += (u_int16_t)ntohs(*((u_int16_t *)(buf + i)));
+		if (sum > 0xffff)
+			sum -= 0xffff;
+	}
+
+	if (i < len) {
+		sum += buf[i] << 8;
+		if (sum > 0xffff)
+			sum -= 0xffff;
+	}
+
+	return sum;
+}
+
+static uint16_t udp_checksum(struct ip *iphdr, struct udphdr *udphdr)
+{
+	uint32_t sum = 0;
+	uint16_t res;
+
+	sum = raw_checksum((uint8_t *)&iphdr->ip_src, 2 * sizeof(iphdr->ip_src),
+				IPPROTO_UDP + (uint32_t)(UDP_HLEN + payload_len));
+	sum = raw_checksum((uint8_t *)udphdr, UDP_HLEN, sum);
+	sum = raw_checksum((uint8_t *)udp_payload, payload_len, sum);
+	res = 0xffff & ~sum;
+	if (res)
+		return htons(res);
+	else
+		return CSUM_MANGLED_0;
+}
+
+static uint16_t udp6_checksum(struct ip6_hdr *iphdr, struct udphdr *udphdr)
+{
+	uint32_t sum = 0;
+	uint16_t res;
+
+	sum = raw_checksum((uint8_t *)&iphdr->ip6_src, 2 * sizeof(iphdr->ip6_src),
+				IPPROTO_UDP);
+	sum = raw_checksum((uint8_t *)&udphdr->len, sizeof(udphdr->len), sum);
+	sum = raw_checksum((uint8_t *)udphdr, UDP_HLEN, sum);
+	sum = raw_checksum((uint8_t *)udp_payload, payload_len, sum);
+	res = 0xffff & ~sum;
+	if (res)
+		return htons(res);
+	else
+		return CSUM_MANGLED_0;
+}
+
+static void send_fragment(int fd_raw, struct sockaddr *addr, socklen_t alen,
+				int offset, bool ipv6)
+{
+	int frag_len;
+	int res;
+	int payload_offset = offset > 0 ? offset - UDP_HLEN : 0;
+	uint8_t *frag_start = ipv6 ? ip_frame + IP6_HLEN + FRAG_HLEN :
+					ip_frame + IP4_HLEN;
+
+	if (offset == 0) {
+		struct udphdr udphdr;
+		udphdr.source = htons(cfg_port + 1);
+		udphdr.dest = htons(cfg_port);
+		udphdr.len = htons(UDP_HLEN + payload_len);
+		udphdr.check = 0;
+		if (ipv6)
+			udphdr.check = udp6_checksum((struct ip6_hdr *)ip_frame, &udphdr);
+		else
+			udphdr.check = udp_checksum((struct ip *)ip_frame, &udphdr);
+		memcpy(frag_start, &udphdr, UDP_HLEN);
+	}
+
+	if (ipv6) {
+		struct ip6_hdr *ip6hdr = (struct ip6_hdr *)ip_frame;
+		struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN);
+		if (payload_len - payload_offset <= max_frag_len && offset > 0) {
+			/* This is the last fragment. */
+			frag_len = FRAG_HLEN + payload_len - payload_offset;
+			fraghdr->ip6f_offlg = htons(offset);
+		} else {
+			frag_len = FRAG_HLEN + max_frag_len;
+			fraghdr->ip6f_offlg = htons(offset | IP6_MF);
+		}
+		ip6hdr->ip6_plen = htons(frag_len);
+		if (offset == 0)
+			memcpy(frag_start + UDP_HLEN, udp_payload,
+				frag_len - FRAG_HLEN - UDP_HLEN);
+		else
+			memcpy(frag_start, udp_payload + payload_offset,
+				frag_len - FRAG_HLEN);
+		frag_len += IP6_HLEN;
+	} else {
+		struct ip *iphdr = (struct ip *)ip_frame;
+		if (payload_len - payload_offset <= max_frag_len && offset > 0) {
+			/* This is the last fragment. */
+			frag_len = IP4_HLEN + payload_len - payload_offset;
+			iphdr->ip_off = htons(offset / 8);
+		} else {
+			frag_len = IP4_HLEN + max_frag_len;
+			iphdr->ip_off = htons(offset / 8 | IP4_MF);
+		}
+		iphdr->ip_len = htons(frag_len);
+		if (offset == 0)
+			memcpy(frag_start + UDP_HLEN, udp_payload,
+				frag_len - IP4_HLEN - UDP_HLEN);
+		else
+			memcpy(frag_start, udp_payload + payload_offset,
+				frag_len - IP4_HLEN);
+	}
+
+	res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
+	if (res < 0)
+		error(1, errno, "send_fragment");
+	if (res != frag_len)
+		error(1, 0, "send_fragment: %d vs %d", res, frag_len);
+
+	frag_counter++;
+}
+
+static void send_udp_frags(int fd_raw, struct sockaddr *addr,
+				socklen_t alen, bool ipv6)
+{
+	struct ip *iphdr = (struct ip *)ip_frame;
+	struct ip6_hdr *ip6hdr = (struct ip6_hdr *)ip_frame;
+	int res;
+	int offset;
+	int frag_len;
+
+	/* Send the UDP datagram using raw IP fragments: the 0th fragment
+	 * has the UDP header; other fragments are pieces of udp_payload
+	 * split in chunks of frag_len size.
+	 *
+	 * Odd fragments (1st, 3rd, 5th, etc.) are sent out first, then
+	 * even fragments (0th, 2nd, etc.) are sent out.
+	 */
+	if (ipv6) {
+		struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN);
+		((struct sockaddr_in6 *)addr)->sin6_port = 0;
+		memset(ip6hdr, 0, sizeof(*ip6hdr));
+		ip6hdr->ip6_flow = htonl(6<<28);  /* Version. */
+		ip6hdr->ip6_nxt = IPPROTO_FRAGMENT;
+		ip6hdr->ip6_hops = 255;
+		ip6hdr->ip6_src = addr6;
+		ip6hdr->ip6_dst = addr6;
+		fraghdr->ip6f_nxt = IPPROTO_UDP;
+		fraghdr->ip6f_reserved = 0;
+		fraghdr->ip6f_ident = htonl(ip_id++);
+	} else {
+		memset(iphdr, 0, sizeof(*iphdr));
+		iphdr->ip_hl = 5;
+		iphdr->ip_v = 4;
+		iphdr->ip_tos = 0;
+		iphdr->ip_id = htons(ip_id++);
+		iphdr->ip_ttl = 0x40;
+		iphdr->ip_p = IPPROTO_UDP;
+		iphdr->ip_src.s_addr = htonl(INADDR_LOOPBACK);
+		iphdr->ip_dst = addr4;
+		iphdr->ip_sum = 0;
+	}
+
+	/* Odd fragments. */
+	offset = max_frag_len;
+	while (offset < (UDP_HLEN + payload_len)) {
+		send_fragment(fd_raw, addr, alen, offset, ipv6);
+		offset += 2 * max_frag_len;
+	}
+
+	if (cfg_overlap) {
+		/* Send an extra random fragment. */
+		offset = rand() % (UDP_HLEN + payload_len - 1);
+		/* sendto() returns EINVAL if offset + frag_len is too small. */
+		if (ipv6) {
+			struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN);
+			frag_len = max_frag_len + rand() % 256;
+			/* In IPv6 if !!(frag_len % 8), the fragment is dropped. */
+			frag_len &= ~0x7;
+			fraghdr->ip6f_offlg = htons(offset / 8 | IP6_MF);
+			ip6hdr->ip6_plen = htons(frag_len);
+			frag_len += IP6_HLEN;
+		} else {
+			frag_len = IP4_HLEN + UDP_HLEN + rand() % 256;
+			iphdr->ip_off = htons(offset / 8 | IP4_MF);
+			iphdr->ip_len = htons(frag_len);
+		}
+		res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
+		if (res < 0)
+			error(1, errno, "sendto overlap");
+		if (res != frag_len)
+			error(1, 0, "sendto overlap: %d vs %d", (int)res, frag_len);
+		frag_counter++;
+	}
+
+	/* Event fragments. */
+	offset = 0;
+	while (offset < (UDP_HLEN + payload_len)) {
+		send_fragment(fd_raw, addr, alen, offset, ipv6);
+		offset += 2 * max_frag_len;
+	}
+}
+
+static void run_test(struct sockaddr *addr, socklen_t alen, bool ipv6)
+{
+	int fd_tx_raw, fd_rx_udp;
+	struct timeval tv = { .tv_sec = 0, .tv_usec = 10 * 1000 };
+	int idx;
+	int min_frag_len = ipv6 ? 1280 : 8;
+
+	/* Initialize the payload. */
+	for (idx = 0; idx < MSG_LEN_MAX; ++idx)
+		udp_payload[idx] = idx % 256;
+
+	/* Open sockets. */
+	fd_tx_raw = socket(addr->sa_family, SOCK_RAW, IPPROTO_RAW);
+	if (fd_tx_raw == -1)
+		error(1, errno, "socket tx_raw");
+
+	fd_rx_udp = socket(addr->sa_family, SOCK_DGRAM, 0);
+	if (fd_rx_udp == -1)
+		error(1, errno, "socket rx_udp");
+	if (bind(fd_rx_udp, addr, alen))
+		error(1, errno, "bind");
+	/* Fail fast. */
+	if (setsockopt(fd_rx_udp, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+		error(1, errno, "setsockopt rcv timeout");
+
+	for (payload_len = min_frag_len; payload_len < MSG_LEN_MAX;
+			payload_len += (rand() % 4096)) {
+		if (cfg_verbose)
+			printf("payload_len: %d\n", payload_len);
+		max_frag_len = min_frag_len;
+		do {
+			send_udp_frags(fd_tx_raw, addr, alen, ipv6);
+			recv_validate_udp(fd_rx_udp);
+			max_frag_len += 8 * (rand() % 8);
+		} while (max_frag_len < (1500 - FRAG_HLEN) && max_frag_len <= payload_len);
+	}
+
+	/* Cleanup. */
+	if (close(fd_tx_raw))
+		error(1, errno, "close tx_raw");
+	if (close(fd_rx_udp))
+		error(1, errno, "close rx_udp");
+
+	if (cfg_verbose)
+		printf("processed %d messages, %d fragments\n",
+			msg_counter, frag_counter);
+
+	fprintf(stderr, "PASS\n");
+}
+
+
+static void run_test_v4(void)
+{
+	struct sockaddr_in addr = {0};
+
+	addr.sin_family = AF_INET;
+	addr.sin_port = htons(cfg_port);
+	addr.sin_addr = addr4;
+
+	run_test((void *)&addr, sizeof(addr), false /* !ipv6 */);
+}
+
+static void run_test_v6(void)
+{
+	struct sockaddr_in6 addr = {0};
+
+	addr.sin6_family = AF_INET6;
+	addr.sin6_port = htons(cfg_port);
+	addr.sin6_addr = addr6;
+
+	run_test((void *)&addr, sizeof(addr), true /* ipv6 */);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "46ov")) != -1) {
+		switch (c) {
+		case '4':
+			cfg_do_ipv4 = true;
+			break;
+		case '6':
+			cfg_do_ipv6 = true;
+			break;
+		case 'o':
+			cfg_overlap = true;
+			break;
+		case 'v':
+			cfg_verbose = true;
+			break;
+		default:
+			error(1, 0, "%s: parse error", argv[0]);
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+	seed = time(NULL);
+	srand(seed);
+	/* Print the seed to track/reproduce potential failures. */
+	printf("seed = %d\n", seed);
+
+	if (cfg_do_ipv4)
+		run_test_v4();
+	if (cfg_do_ipv6)
+		run_test_v6();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/ip_defrag.sh b/tools/testing/selftests/net/ip_defrag.sh
new file mode 100755
index 000000000000..f34672796044
--- /dev/null
+++ b/tools/testing/selftests/net/ip_defrag.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a couple of IP defragmentation tests.
+
+set +x
+set -e
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+	ip netns add "${NETNS}"
+	ip -netns "${NETNS}" link set lo up
+	ip netns exec "${NETNS}" sysctl -w net.ipv4.ipfrag_high_thresh=9000000 >/dev/null 2>&1
+	ip netns exec "${NETNS}" sysctl -w net.ipv4.ipfrag_low_thresh=7000000 >/dev/null 2>&1
+	ip netns exec "${NETNS}" sysctl -w net.ipv6.ip6frag_high_thresh=9000000 >/dev/null 2>&1
+	ip netns exec "${NETNS}" sysctl -w net.ipv6.ip6frag_low_thresh=7000000 >/dev/null 2>&1
+}
+
+cleanup() {
+	ip netns del "${NETNS}"
+}
+
+trap cleanup EXIT
+setup
+
+echo "ipv4 defrag"
+ip netns exec "${NETNS}" ./ip_defrag -4
+
+
+echo "ipv4 defrag with overlaps"
+ip netns exec "${NETNS}" ./ip_defrag -4o
+
+echo "ipv6 defrag"
+ip netns exec "${NETNS}" ./ip_defrag -6
+
+echo "ipv6 defrag with overlaps"
+ip netns exec "${NETNS}" ./ip_defrag -6o
+
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
index d571d213418d..c43c6debda06 100755
--- a/tools/testing/selftests/net/msg_zerocopy.sh
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -21,6 +21,14 @@ readonly DADDR6='fd::2'
 
 readonly path_sysctl_mem="net.core.optmem_max"
 
+# No arguments: automated test
+if [[ "$#" -eq "0" ]]; then
+	$0 4 tcp -t 1
+	$0 6 tcp -t 1
+	echo "OK. All tests passed"
+	exit 0
+fi
+
 # Argument parsing
 if [[ "$#" -lt "2" ]]; then
 	echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>"
diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh
index 903679e0ff31..e3afcb424710 100755
--- a/tools/testing/selftests/net/netdevice.sh
+++ b/tools/testing/selftests/net/netdevice.sh
@@ -8,6 +8,9 @@
 # if not they probably have failed earlier in the boot process and their logged error will be catched by another test
 #
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 # this function will try to up the interface
 # if already up, nothing done
 # arg1: network interface name
@@ -18,7 +21,7 @@ kci_net_start()
 	ip link show "$netdev" |grep -q UP
 	if [ $? -eq 0 ];then
 		echo "SKIP: $netdev: interface already up"
-		return 0
+		return $ksft_skip
 	fi
 
 	ip link set "$netdev" up
@@ -61,12 +64,12 @@ kci_net_setup()
 	ip address show "$netdev" |grep '^[[:space:]]*inet'
 	if [ $? -eq 0 ];then
 		echo "SKIP: $netdev: already have an IP"
-		return 0
+		return $ksft_skip
 	fi
 
 	# TODO what ipaddr to set ? DHCP ?
 	echo "SKIP: $netdev: set IP address"
-	return 0
+	return $ksft_skip
 }
 
 # test an ethtool command
@@ -84,6 +87,7 @@ kci_netdev_ethtool_test()
 	if [ $ret -ne 0 ];then
 		if [ $ret -eq "$1" ];then
 			echo "SKIP: $netdev: ethtool $2 not supported"
+			return $ksft_skip
 		else
 			echo "FAIL: $netdev: ethtool $2"
 			return 1
@@ -104,7 +108,7 @@ kci_netdev_ethtool()
 	ethtool --version 2>/dev/null >/dev/null
 	if [ $? -ne 0 ];then
 		echo "SKIP: ethtool not present"
-		return 1
+		return $ksft_skip
 	fi
 
 	TMP_ETHTOOL_FEATURES="$(mktemp)"
@@ -176,13 +180,13 @@ kci_test_netdev()
 #check for needed privileges
 if [ "$(id -u)" -ne 0 ];then
 	echo "SKIP: Need root privileges"
-	exit 0
+	exit $ksft_skip
 fi
 
 ip link show 2>/dev/null >/dev/null
 if [ $? -ne 0 ];then
 	echo "SKIP: Could not run test without the ip tool"
-	exit 0
+	exit $ksft_skip
 fi
 
 TMP_LIST_NETDEV="$(mktemp)"
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 1e428781a625..a369d616b390 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -6,6 +6,26 @@
 #
 # Tests currently implemented:
 #
+# - pmtu_ipv4
+#	Set up two namespaces, A and B, with two paths between them over routers
+#	R1 and R2 (also implemented with namespaces), with different MTUs:
+#
+#	  segment a_r1    segment b_r1		a_r1: 2000
+#	.--------------R1--------------.	a_r2: 1500
+#	A                               B	a_r3: 2000
+#	'--------------R2--------------'	a_r4: 1400
+#	  segment a_r2    segment b_r2
+#
+#	Check that PMTU exceptions with the correct PMTU are created. Then
+#	decrease and increase the MTU of the local link for one of the paths,
+#	A to R1, checking that route exception PMTU changes accordingly over
+#	this path. Also check that locked exceptions are created when an ICMP
+#	message advertising a PMTU smaller than net.ipv4.route.min_pmtu is
+#	received
+#
+# - pmtu_ipv6
+#	Same as pmtu_ipv4, except for locked PMTU tests, using IPv6
+#
 # - pmtu_vti4_exception
 #	Set up vti tunnel on top of veth, with xfrm states and policies, in two
 #	namespaces with matching endpoints. Check that route exception is not
@@ -43,7 +63,15 @@
 #	that MTU is properly calculated instead when MTU is not configured from
 #	userspace
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Some systems don't have a ping6 binary anymore
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
 tests="
+	pmtu_ipv4_exception		ipv4: PMTU exceptions
+	pmtu_ipv6_exception		ipv6: PMTU exceptions
 	pmtu_vti6_exception		vti6: PMTU exceptions
 	pmtu_vti4_exception		vti4: PMTU exceptions
 	pmtu_vti4_default_mtu		vti4: default MTU assignment
@@ -54,8 +82,45 @@ tests="
 
 NS_A="ns-$(mktemp -u XXXXXX)"
 NS_B="ns-$(mktemp -u XXXXXX)"
+NS_R1="ns-$(mktemp -u XXXXXX)"
+NS_R2="ns-$(mktemp -u XXXXXX)"
 ns_a="ip netns exec ${NS_A}"
 ns_b="ip netns exec ${NS_B}"
+ns_r1="ip netns exec ${NS_R1}"
+ns_r2="ip netns exec ${NS_R2}"
+
+# Addressing and routing for tests with routers: four network segments, with
+# index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an
+# identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2).
+# Addresses are:
+# - IPv4: PREFIX4.SEGMENT.ID (/24)
+# - IPv6: PREFIX6:SEGMENT::ID (/64)
+prefix4="192.168"
+prefix6="fd00"
+a_r1=1
+a_r2=2
+b_r1=3
+b_r2=4
+#	ns	peer	segment
+routing_addrs="
+	A	R1	${a_r1}
+	A	R2	${a_r2}
+	B	R1	${b_r1}
+	B	R2	${b_r2}
+"
+# Traffic from A to B goes through R1 by default, and through R2, if destined to
+# B's address on the b_r2 segment.
+# Traffic from B to A goes through R1.
+#	ns	destination		gateway
+routes="
+	A	default			${prefix4}.${a_r1}.2
+	A	${prefix4}.${b_r2}.1	${prefix4}.${a_r2}.2
+	B	default			${prefix4}.${b_r1}.2
+
+	A	default			${prefix6}:${a_r1}::2
+	A	${prefix6}:${b_r2}::1	${prefix6}:${a_r2}::2
+	B	default			${prefix6}:${b_r1}::2
+"
 
 veth4_a_addr="192.168.1.1"
 veth4_b_addr="192.168.1.2"
@@ -77,6 +142,7 @@ dummy6_mask="64"
 
 cleanup_done=1
 err_buf=
+tcpdump_pids=
 
 err() {
 	err_buf="${err_buf}${1}
@@ -88,9 +154,15 @@ err_flush() {
 	err_buf=
 }
 
+# Find the auto-generated name for this namespace
+nsname() {
+	eval echo \$NS_$1
+}
+
 setup_namespaces() {
-	ip netns add ${NS_A} || return 1
-	ip netns add ${NS_B}
+	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
+		ip netns add ${n} || return 1
+	done
 }
 
 setup_veth() {
@@ -161,8 +233,51 @@ setup_xfrm6() {
 	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
 }
 
+setup_routing() {
+	for i in ${NS_R1} ${NS_R2}; do
+		ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1
+		ip netns exec ${i} sysctl -q net/ipv6/conf/all/forwarding=1
+	done
+
+	for i in ${routing_addrs}; do
+		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
+		[ "${peer}" = "" ]	&& peer="${i}"		&& continue
+		[ "${segment}" = "" ]	&& segment="${i}"
+
+		ns_name="$(nsname ${ns})"
+		peer_name="$(nsname ${peer})"
+		if="veth_${ns}-${peer}"
+		ifpeer="veth_${peer}-${ns}"
+
+		# Create veth links
+		ip link add ${if} up netns ${ns_name} type veth peer name ${ifpeer} netns ${peer_name} || return 1
+		ip -n ${peer_name} link set dev ${ifpeer} up
+
+		# Add addresses
+		ip -n ${ns_name}   addr add ${prefix4}.${segment}.1/24  dev ${if}
+		ip -n ${ns_name}   addr add ${prefix6}:${segment}::1/64 dev ${if}
+
+		ip -n ${peer_name} addr add ${prefix4}.${segment}.2/24  dev ${ifpeer}
+		ip -n ${peer_name} addr add ${prefix6}:${segment}::2/64 dev ${ifpeer}
+
+		ns=""; peer=""; segment=""
+	done
+
+	for i in ${routes}; do
+		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
+		[ "${addr}" = "" ]	&& addr="${i}"		&& continue
+		[ "${gw}" = "" ]	&& gw="${i}"
+
+		ns_name="$(nsname ${ns})"
+
+		ip -n ${ns_name} route add ${addr} via ${gw}
+
+		ns=""; addr=""; gw=""
+	done
+}
+
 setup() {
-	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return 1
+	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return $ksft_skip
 
 	cleanup_done=0
 	for arg do
@@ -170,10 +285,28 @@ setup() {
 	done
 }
 
+trace() {
+	[ $tracing -eq 0 ] && return
+
+	for arg do
+		[ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
+		${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
+		tcpdump_pids="${tcpdump_pids} $!"
+		ns_cmd=
+	done
+	sleep 1
+}
+
 cleanup() {
+	for pid in ${tcpdump_pids}; do
+		kill ${pid}
+	done
+	tcpdump_pids=
+
 	[ ${cleanup_done} -eq 1 ] && return
-	ip netns del ${NS_A} 2 > /dev/null
-	ip netns del ${NS_B} 2 > /dev/null
+	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
+		ip netns del ${n} 2> /dev/null
+	done
 	cleanup_done=1
 }
 
@@ -190,7 +323,9 @@ mtu_parse() {
 
 	next=0
 	for i in ${input}; do
+		[ ${next} -eq 1 -a "${i}" = "lock" ] && next=2 && continue
 		[ ${next} -eq 1 ] && echo "${i}" && return
+		[ ${next} -eq 2 ] && echo "lock ${i}" && return
 		[ "${i}" = "mtu" ] && next=1
 	done
 }
@@ -223,8 +358,117 @@ route_get_dst_pmtu_from_exception() {
 	mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
 }
 
+check_pmtu_value() {
+	expected="${1}"
+	value="${2}"
+	event="${3}"
+
+	[ "${expected}" = "any" ] && [ -n "${value}" ] && return 0
+	[ "${value}" = "${expected}" ] && return 0
+	[ -z "${value}" ] &&    err "  PMTU exception wasn't created after ${event}" && return 1
+	[ -z "${expected}" ] && err "  PMTU exception shouldn't exist after ${event}" && return 1
+	err "  found PMTU exception with incorrect MTU ${value}, expected ${expected}, after ${event}"
+	return 1
+}
+
+test_pmtu_ipvX() {
+	family=${1}
+
+	setup namespaces routing || return 2
+	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
+	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
+	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
+	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
+
+	if [ ${family} -eq 4 ]; then
+		ping=ping
+		dst1="${prefix4}.${b_r1}.1"
+		dst2="${prefix4}.${b_r2}.1"
+	else
+		ping=${ping6}
+		dst1="${prefix6}:${b_r1}::1"
+		dst2="${prefix6}:${b_r2}::1"
+	fi
+
+	# Set up initial MTU values
+	mtu "${ns_a}"  veth_A-R1 2000
+	mtu "${ns_r1}" veth_R1-A 2000
+	mtu "${ns_r1}" veth_R1-B 1400
+	mtu "${ns_b}"  veth_B-R1 1400
+
+	mtu "${ns_a}"  veth_A-R2 2000
+	mtu "${ns_r2}" veth_R2-A 2000
+	mtu "${ns_r2}" veth_R2-B 1500
+	mtu "${ns_b}"  veth_B-R2 1500
+
+	# Create route exceptions
+	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1800 ${dst1} > /dev/null
+	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1800 ${dst2} > /dev/null
+
+	# Check that exceptions have been created with the correct PMTU
+	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
+	check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
+	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+	check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
+
+	# Decrease local MTU below PMTU, check for PMTU decrease in route exception
+	mtu "${ns_a}"  veth_A-R1 1300
+	mtu "${ns_r1}" veth_R1-A 1300
+	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
+	check_pmtu_value "1300" "${pmtu_1}" "decreasing local MTU" || return 1
+	# Second exception shouldn't be modified
+	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+	check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
+
+	# Increase MTU, check for PMTU increase in route exception
+	mtu "${ns_a}"  veth_A-R1 1700
+	mtu "${ns_r1}" veth_R1-A 1700
+	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
+	check_pmtu_value "1700" "${pmtu_1}" "increasing local MTU" || return 1
+	# Second exception shouldn't be modified
+	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+	check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
+
+	# Skip PMTU locking tests for IPv6
+	[ $family -eq 6 ] && return 0
+
+	# Decrease remote MTU on path via R2, get new exception
+	mtu "${ns_r2}" veth_R2-B 400
+	mtu "${ns_b}"  veth_B-R2 400
+	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1400 ${dst2} > /dev/null
+	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+	check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
+
+	# Decrease local MTU below PMTU
+	mtu "${ns_a}"  veth_A-R2 500
+	mtu "${ns_r2}" veth_R2-A 500
+	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+	check_pmtu_value "500" "${pmtu_2}" "decreasing local MTU" || return 1
+
+	# Increase local MTU
+	mtu "${ns_a}"  veth_A-R2 1500
+	mtu "${ns_r2}" veth_R2-A 1500
+	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+	check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1
+
+	# Get new exception
+	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1400 ${dst2} > /dev/null
+	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+	check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
+}
+
+test_pmtu_ipv4_exception() {
+	test_pmtu_ipvX 4
+}
+
+test_pmtu_ipv6_exception() {
+	test_pmtu_ipvX 6
+}
+
 test_pmtu_vti4_exception() {
 	setup namespaces veth vti4 xfrm4 || return 2
+	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
+	      "${ns_a}" vti4_a    "${ns_b}" vti4_b
 
 	veth_mtu=1500
 	vti_mtu=$((veth_mtu - 20))
@@ -242,28 +486,19 @@ test_pmtu_vti4_exception() {
 	# exception is created
 	${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null
 	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
-	if [ "${pmtu}" != "" ]; then
-		err "  unexpected exception created with PMTU ${pmtu} for IP payload length ${esp_payload_rfc4106}"
-		return 1
-	fi
+	check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
 
 	# Now exceed link layer MTU by one byte, check that exception is created
+	# with the right PMTU value
 	${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null
 	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
-	if [ "${pmtu}" = "" ]; then
-		err "  exception not created for IP payload length $((esp_payload_rfc4106 + 1))"
-		return 1
-	fi
-
-	# ...with the right PMTU value
-	if [ ${pmtu} -ne ${esp_payload_rfc4106} ]; then
-		err "  wrong PMTU ${pmtu} in exception, expected: ${esp_payload_rfc4106}"
-		return 1
-	fi
+	check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
 }
 
 test_pmtu_vti6_exception() {
 	setup namespaces veth vti6 xfrm6 || return 2
+	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
+	      "${ns_a}" vti6_a    "${ns_b}" vti6_b
 	fail=0
 
 	# Create route exception by exceeding link layer MTU
@@ -271,28 +506,21 @@ test_pmtu_vti6_exception() {
 	mtu "${ns_b}" veth_b 4000
 	mtu "${ns_a}" vti6_a 5000
 	mtu "${ns_b}" vti6_b 5000
-	${ns_a} ping6 -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
+	${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
 
 	# Check that exception was created
-	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then
-		err "  tunnel exceeding link layer MTU didn't create route exception"
-		return 1
-	fi
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
+	check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
 
 	# Decrease tunnel MTU, check for PMTU decrease in route exception
 	mtu "${ns_a}" vti6_a 3000
-
-	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then
-		err "  decreasing tunnel MTU didn't decrease route exception PMTU"
-		fail=1
-	fi
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
+	check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
 
 	# Increase tunnel MTU, check for PMTU increase in route exception
 	mtu "${ns_a}" vti6_a 9000
-	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then
-		err "  increasing tunnel MTU didn't increase route exception PMTU"
-		fail=1
-	fi
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
+	check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
 
 	return ${fail}
 }
@@ -331,7 +559,7 @@ test_pmtu_vti4_link_add_mtu() {
 	fail=0
 
 	min=68
-	max=$((65528 - 20))
+	max=$((65535 - 20))
 	# Check invalid values first
 	for v in $((min - 1)) $((max + 1)); do
 		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null
@@ -368,7 +596,7 @@ test_pmtu_vti6_link_add_mtu() {
 
 	fail=0
 
-	min=1280
+	min=68			# vti6 can carry IPv4 packets too
 	max=$((65535 - 40))
 	# Check invalid values first
 	for v in $((min - 1)) $((max + 1)); do
@@ -384,7 +612,7 @@ test_pmtu_vti6_link_add_mtu() {
 	done
 
 	# Now check valid values
-	for v in 1280 1300 $((65535 - 40)); do
+	for v in 68 1280 1300 $((65535 - 40)); do
 		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
 		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
 		${ns_a} ip link del vti6_a
@@ -439,15 +667,56 @@ test_pmtu_vti6_link_change_mtu() {
 	return ${fail}
 }
 
-trap cleanup EXIT
+usage() {
+	echo
+	echo "$0 [OPTIONS] [TEST]..."
+	echo "If no TEST argument is given, all tests will be run."
+	echo
+	echo "Options"
+	echo "  --trace: capture traffic to TEST_INTERFACE.pcap"
+	echo
+	echo "Available tests${tests}"
+	exit 1
+}
 
 exitcode=0
 desc=0
 IFS="	
 "
+
+tracing=0
+for arg do
+	if [ "${arg}" != "${arg#--*}" ]; then
+		opt="${arg#--}"
+		if [ "${opt}" = "trace" ]; then
+			if which tcpdump > /dev/null 2>&1; then
+				tracing=1
+			else
+				echo "=== tcpdump not available, tracing disabled"
+			fi
+		else
+			usage
+		fi
+	else
+		# Check first that all requested tests are available before
+		# running any
+		command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
+	fi
+done
+
+trap cleanup EXIT
+
 for t in ${tests}; do
 	[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
 
+	run_this=1
+	for arg do
+		[ "${arg}" != "${arg#--*}" ] && continue
+		[ "${arg}" = "${name}" ] && run_this=1 && break
+		run_this=0
+	done
+	[ $run_this -eq 0 ] && continue
+
 	(
 		unset IFS
 		eval test_${name}
diff --git a/tools/testing/selftests/net/psock_snd.c b/tools/testing/selftests/net/psock_snd.c
new file mode 100644
index 000000000000..7d15e10a9fb6
--- /dev/null
+++ b/tools/testing/selftests/net/psock_snd.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/virtio_net.h>
+#include <net/if.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "psock_lib.h"
+
+static bool	cfg_use_bind;
+static bool	cfg_use_csum_off;
+static bool	cfg_use_csum_off_bad;
+static bool	cfg_use_dgram;
+static bool	cfg_use_gso;
+static bool	cfg_use_qdisc_bypass;
+static bool	cfg_use_vlan;
+static bool	cfg_use_vnet;
+
+static char	*cfg_ifname = "lo";
+static int	cfg_mtu	= 1500;
+static int	cfg_payload_len = DATA_LEN;
+static int	cfg_truncate_len = INT_MAX;
+static uint16_t	cfg_port = 8000;
+
+/* test sending up to max mtu + 1 */
+#define TEST_SZ	(sizeof(struct virtio_net_hdr) + ETH_HLEN + ETH_MAX_MTU + 1)
+
+static char tbuf[TEST_SZ], rbuf[TEST_SZ];
+
+static unsigned long add_csum_hword(const uint16_t *start, int num_u16)
+{
+	unsigned long sum = 0;
+	int i;
+
+	for (i = 0; i < num_u16; i++)
+		sum += start[i];
+
+	return sum;
+}
+
+static uint16_t build_ip_csum(const uint16_t *start, int num_u16,
+			      unsigned long sum)
+{
+	sum += add_csum_hword(start, num_u16);
+
+	while (sum >> 16)
+		sum = (sum & 0xffff) + (sum >> 16);
+
+	return ~sum;
+}
+
+static int build_vnet_header(void *header)
+{
+	struct virtio_net_hdr *vh = header;
+
+	vh->hdr_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr);
+
+	if (cfg_use_csum_off) {
+		vh->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
+		vh->csum_start = ETH_HLEN + sizeof(struct iphdr);
+		vh->csum_offset = __builtin_offsetof(struct udphdr, check);
+
+		/* position check field exactly one byte beyond end of packet */
+		if (cfg_use_csum_off_bad)
+			vh->csum_start += sizeof(struct udphdr) + cfg_payload_len -
+					  vh->csum_offset - 1;
+	}
+
+	if (cfg_use_gso) {
+		vh->gso_type = VIRTIO_NET_HDR_GSO_UDP;
+		vh->gso_size = cfg_mtu - sizeof(struct iphdr);
+	}
+
+	return sizeof(*vh);
+}
+
+static int build_eth_header(void *header)
+{
+	struct ethhdr *eth = header;
+
+	if (cfg_use_vlan) {
+		uint16_t *tag = header + ETH_HLEN;
+
+		eth->h_proto = htons(ETH_P_8021Q);
+		tag[1] = htons(ETH_P_IP);
+		return ETH_HLEN + 4;
+	}
+
+	eth->h_proto = htons(ETH_P_IP);
+	return ETH_HLEN;
+}
+
+static int build_ipv4_header(void *header, int payload_len)
+{
+	struct iphdr *iph = header;
+
+	iph->ihl = 5;
+	iph->version = 4;
+	iph->ttl = 8;
+	iph->tot_len = htons(sizeof(*iph) + sizeof(struct udphdr) + payload_len);
+	iph->id = htons(1337);
+	iph->protocol = IPPROTO_UDP;
+	iph->saddr = htonl((172 << 24) | (17 << 16) | 2);
+	iph->daddr = htonl((172 << 24) | (17 << 16) | 1);
+	iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0);
+
+	return iph->ihl << 2;
+}
+
+static int build_udp_header(void *header, int payload_len)
+{
+	const int alen = sizeof(uint32_t);
+	struct udphdr *udph = header;
+	int len = sizeof(*udph) + payload_len;
+
+	udph->source = htons(9);
+	udph->dest = htons(cfg_port);
+	udph->len = htons(len);
+
+	if (cfg_use_csum_off)
+		udph->check = build_ip_csum(header - (2 * alen), alen,
+					    htons(IPPROTO_UDP) + udph->len);
+	else
+		udph->check = 0;
+
+	return sizeof(*udph);
+}
+
+static int build_packet(int payload_len)
+{
+	int off = 0;
+
+	off += build_vnet_header(tbuf);
+	off += build_eth_header(tbuf + off);
+	off += build_ipv4_header(tbuf + off, payload_len);
+	off += build_udp_header(tbuf + off, payload_len);
+
+	if (off + payload_len > sizeof(tbuf))
+		error(1, 0, "payload length exceeds max");
+
+	memset(tbuf + off, DATA_CHAR, payload_len);
+
+	return off + payload_len;
+}
+
+static void do_bind(int fd)
+{
+	struct sockaddr_ll laddr = {0};
+
+	laddr.sll_family = AF_PACKET;
+	laddr.sll_protocol = htons(ETH_P_IP);
+	laddr.sll_ifindex = if_nametoindex(cfg_ifname);
+	if (!laddr.sll_ifindex)
+		error(1, errno, "if_nametoindex");
+
+	if (bind(fd, (void *)&laddr, sizeof(laddr)))
+		error(1, errno, "bind");
+}
+
+static void do_send(int fd, char *buf, int len)
+{
+	int ret;
+
+	if (!cfg_use_vnet) {
+		buf += sizeof(struct virtio_net_hdr);
+		len -= sizeof(struct virtio_net_hdr);
+	}
+	if (cfg_use_dgram) {
+		buf += ETH_HLEN;
+		len -= ETH_HLEN;
+	}
+
+	if (cfg_use_bind) {
+		ret = write(fd, buf, len);
+	} else {
+		struct sockaddr_ll laddr = {0};
+
+		laddr.sll_protocol = htons(ETH_P_IP);
+		laddr.sll_ifindex = if_nametoindex(cfg_ifname);
+		if (!laddr.sll_ifindex)
+			error(1, errno, "if_nametoindex");
+
+		ret = sendto(fd, buf, len, 0, (void *)&laddr, sizeof(laddr));
+	}
+
+	if (ret == -1)
+		error(1, errno, "write");
+	if (ret != len)
+		error(1, 0, "write: %u %u", ret, len);
+
+	fprintf(stderr, "tx: %u\n", ret);
+}
+
+static int do_tx(void)
+{
+	const int one = 1;
+	int fd, len;
+
+	fd = socket(PF_PACKET, cfg_use_dgram ? SOCK_DGRAM : SOCK_RAW, 0);
+	if (fd == -1)
+		error(1, errno, "socket t");
+
+	if (cfg_use_bind)
+		do_bind(fd);
+
+	if (cfg_use_qdisc_bypass &&
+	    setsockopt(fd, SOL_PACKET, PACKET_QDISC_BYPASS, &one, sizeof(one)))
+		error(1, errno, "setsockopt qdisc bypass");
+
+	if (cfg_use_vnet &&
+	    setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one)))
+		error(1, errno, "setsockopt vnet");
+
+	len = build_packet(cfg_payload_len);
+
+	if (cfg_truncate_len < len)
+		len = cfg_truncate_len;
+
+	do_send(fd, tbuf, len);
+
+	if (close(fd))
+		error(1, errno, "close t");
+
+	return len;
+}
+
+static int setup_rx(void)
+{
+	struct timeval tv = { .tv_usec = 100 * 1000 };
+	struct sockaddr_in raddr = {0};
+	int fd;
+
+	fd = socket(PF_INET, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket r");
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+		error(1, errno, "setsockopt rcv timeout");
+
+	raddr.sin_family = AF_INET;
+	raddr.sin_port = htons(cfg_port);
+	raddr.sin_addr.s_addr = htonl(INADDR_ANY);
+
+	if (bind(fd, (void *)&raddr, sizeof(raddr)))
+		error(1, errno, "bind r");
+
+	return fd;
+}
+
+static void do_rx(int fd, int expected_len, char *expected)
+{
+	int ret;
+
+	ret = recv(fd, rbuf, sizeof(rbuf), 0);
+	if (ret == -1)
+		error(1, errno, "recv");
+	if (ret != expected_len)
+		error(1, 0, "recv: %u != %u", ret, expected_len);
+
+	if (memcmp(rbuf, expected, ret))
+		error(1, 0, "recv: data mismatch");
+
+	fprintf(stderr, "rx: %u\n", ret);
+}
+
+static int setup_sniffer(void)
+{
+	struct timeval tv = { .tv_usec = 100 * 1000 };
+	int fd;
+
+	fd = socket(PF_PACKET, SOCK_RAW, 0);
+	if (fd == -1)
+		error(1, errno, "socket p");
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+		error(1, errno, "setsockopt rcv timeout");
+
+	pair_udp_setfilter(fd);
+	do_bind(fd);
+
+	return fd;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "bcCdgl:qt:vV")) != -1) {
+		switch (c) {
+		case 'b':
+			cfg_use_bind = true;
+			break;
+		case 'c':
+			cfg_use_csum_off = true;
+			break;
+		case 'C':
+			cfg_use_csum_off_bad = true;
+			break;
+		case 'd':
+			cfg_use_dgram = true;
+			break;
+		case 'g':
+			cfg_use_gso = true;
+			break;
+		case 'l':
+			cfg_payload_len = strtoul(optarg, NULL, 0);
+			break;
+		case 'q':
+			cfg_use_qdisc_bypass = true;
+			break;
+		case 't':
+			cfg_truncate_len = strtoul(optarg, NULL, 0);
+			break;
+		case 'v':
+			cfg_use_vnet = true;
+			break;
+		case 'V':
+			cfg_use_vlan = true;
+			break;
+		default:
+			error(1, 0, "%s: parse error", argv[0]);
+		}
+	}
+
+	if (cfg_use_vlan && cfg_use_dgram)
+		error(1, 0, "option vlan (-V) conflicts with dgram (-d)");
+
+	if (cfg_use_csum_off && !cfg_use_vnet)
+		error(1, 0, "option csum offload (-c) requires vnet (-v)");
+
+	if (cfg_use_csum_off_bad && !cfg_use_csum_off)
+		error(1, 0, "option csum bad (-C) requires csum offload (-c)");
+
+	if (cfg_use_gso && !cfg_use_csum_off)
+		error(1, 0, "option gso (-g) requires csum offload (-c)");
+}
+
+static void run_test(void)
+{
+	int fdr, fds, total_len;
+
+	fdr = setup_rx();
+	fds = setup_sniffer();
+
+	total_len = do_tx();
+
+	/* BPF filter accepts only this length, vlan changes MAC */
+	if (cfg_payload_len == DATA_LEN && !cfg_use_vlan)
+		do_rx(fds, total_len - sizeof(struct virtio_net_hdr),
+		      tbuf + sizeof(struct virtio_net_hdr));
+
+	do_rx(fdr, cfg_payload_len, tbuf + total_len - cfg_payload_len);
+
+	if (close(fds))
+		error(1, errno, "close s");
+	if (close(fdr))
+		error(1, errno, "close r");
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+
+	if (system("ip link set dev lo mtu 1500"))
+		error(1, errno, "ip link set mtu");
+	if (system("ip addr add dev lo 172.17.0.1/24"))
+		error(1, errno, "ip addr add");
+
+	run_test();
+
+	fprintf(stderr, "OK\n\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
new file mode 100755
index 000000000000..6331d91b86a6
--- /dev/null
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of packet socket send regression tests
+
+set -e
+
+readonly mtu=1500
+readonly iphlen=20
+readonly udphlen=8
+
+readonly vnet_hlen=10
+readonly eth_hlen=14
+
+readonly mss="$((${mtu} - ${iphlen} - ${udphlen}))"
+readonly mss_exceeds="$((${mss} + 1))"
+
+readonly max_mtu=65535
+readonly max_mss="$((${max_mtu} - ${iphlen} - ${udphlen}))"
+readonly max_mss_exceeds="$((${max_mss} + 1))"
+
+# functional checks (not a full cross-product)
+
+echo "dgram"
+./in_netns.sh ./psock_snd -d
+
+echo "dgram bind"
+./in_netns.sh ./psock_snd -d -b
+
+echo "raw"
+./in_netns.sh ./psock_snd
+
+echo "raw bind"
+./in_netns.sh ./psock_snd -b
+
+echo "raw qdisc bypass"
+./in_netns.sh ./psock_snd -q
+
+echo "raw vlan"
+./in_netns.sh ./psock_snd -V
+
+echo "raw vnet hdr"
+./in_netns.sh ./psock_snd -v
+
+echo "raw csum_off"
+./in_netns.sh ./psock_snd -v -c
+
+echo "raw csum_off with bad offset (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -C)
+
+
+# bounds check: send {max, max + 1, min, min - 1} lengths
+
+echo "raw min size"
+./in_netns.sh ./psock_snd -l 0
+
+echo "raw mtu size"
+./in_netns.sh ./psock_snd -l "${mss}"
+
+echo "raw mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -l "${mss_exceeds}")
+
+# fails due to ARPHRD_ETHER check in packet_extra_vlan_len_allowed
+#
+# echo "raw vlan mtu size"
+# ./in_netns.sh ./psock_snd -V -l "${mss}"
+
+echo "raw vlan mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -V -l "${mss_exceeds}")
+
+echo "dgram mtu size"
+./in_netns.sh ./psock_snd -d -l "${mss}"
+
+echo "dgram mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -d -l "${mss_exceeds}")
+
+echo "raw truncate hlen (fails: does not arrive)"
+(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen}))")
+
+echo "raw truncate hlen - 1 (fails: EINVAL)"
+(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen} - 1))")
+
+
+# gso checks: implies -l, because with gso len must exceed gso_size
+
+echo "raw gso min size"
+./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
+
+echo "raw gso min size - 1 (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
+
+echo "raw gso max size"
+./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
+
+echo "raw gso max size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss_exceeds}")
+
+echo "OK. All tests passed"
diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c
index 7f6cd9fdacf3..7ec4fa4d55dc 100644
--- a/tools/testing/selftests/net/psock_tpacket.c
+++ b/tools/testing/selftests/net/psock_tpacket.c
@@ -60,6 +60,8 @@
 
 #include "psock_lib.h"
 
+#include "../kselftest.h"
+
 #ifndef bug_on
 # define bug_on(cond)		assert(!(cond))
 #endif
@@ -825,7 +827,7 @@ static int test_tpacket(int version, int type)
 		fprintf(stderr, "test: skip %s %s since user and kernel "
 			"space have different bit width\n",
 			tpacket_str[version], type_str[type]);
-		return 0;
+		return KSFT_SKIP;
 	}
 
 	sock = pfsocket(version);
diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
index cad14cd0ea92..b5277106df1f 100644
--- a/tools/testing/selftests/net/reuseport_bpf.c
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -437,14 +437,19 @@ void enable_fastopen(void)
 	}
 }
 
-static struct rlimit rlim_old, rlim_new;
+static struct rlimit rlim_old;
 
 static  __attribute__((constructor)) void main_ctor(void)
 {
 	getrlimit(RLIMIT_MEMLOCK, &rlim_old);
-	rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
-	rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
-	setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+
+	if (rlim_old.rlim_cur != RLIM_INFINITY) {
+		struct rlimit rlim_new;
+
+		rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
+		rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
+		setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+	}
 }
 
 static __attribute__((destructor)) void main_dtor(void)
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index e6f485235435..e101af52d1d6 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 #
 # This test is for checking rtnetlink callpaths, and get as much coverage as possible.
 #
@@ -7,6 +7,9 @@
 devdummy="test-dummy0"
 ret=0
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 # set global exit status, but never reset nonzero one.
 check_err()
 {
@@ -333,7 +336,7 @@ kci_test_vrf()
 	ip link show type vrf 2>/dev/null
 	if [ $? -ne 0 ]; then
 		echo "SKIP: vrf: iproute2 too old"
-		return 0
+		return $ksft_skip
 	fi
 
 	ip link add "$vrfname" type vrf table 10
@@ -409,7 +412,7 @@ kci_test_encap_fou()
 	ip fou help 2>&1 |grep -q 'Usage: ip fou'
 	if [ $? -ne 0 ];then
 		echo "SKIP: fou: iproute2 too old"
-		return 1
+		return $ksft_skip
 	fi
 
 	ip netns exec "$testns" ip fou add port 7777 ipproto 47 2>/dev/null
@@ -444,7 +447,7 @@ kci_test_encap()
 	ip netns add "$testns"
 	if [ $? -ne 0 ]; then
 		echo "SKIP encap tests: cannot add net namespace $testns"
-		return 1
+		return $ksft_skip
 	fi
 
 	ip netns exec "$testns" ip link set lo up
@@ -469,7 +472,7 @@ kci_test_macsec()
 	ip macsec help 2>&1 | grep -q "^Usage: ip macsec"
 	if [ $? -ne 0 ]; then
 		echo "SKIP: macsec: iproute2 too old"
-		return 0
+		return $ksft_skip
 	fi
 
 	ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
@@ -502,6 +505,225 @@ kci_test_macsec()
 	echo "PASS: macsec"
 }
 
+#-------------------------------------------------------------------
+# Example commands
+#   ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \
+#            spi 0x07 mode transport reqid 0x07 replay-window 32 \
+#            aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \
+#            sel src 14.0.0.52/24 dst 14.0.0.70/24
+#   ip x p add dir out src 14.0.0.52/24 dst 14.0.0.70/24 \
+#            tmpl proto esp src 14.0.0.52 dst 14.0.0.70 \
+#            spi 0x07 mode transport reqid 0x07
+#
+# Subcommands not tested
+#    ip x s update
+#    ip x s allocspi
+#    ip x s deleteall
+#    ip x p update
+#    ip x p deleteall
+#    ip x p set
+#-------------------------------------------------------------------
+kci_test_ipsec()
+{
+	ret=0
+	algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
+	srcip=192.168.123.1
+	dstip=192.168.123.2
+	spi=7
+
+	ip addr add $srcip dev $devdummy
+
+	# flush to be sure there's nothing configured
+	ip x s flush ; ip x p flush
+	check_err $?
+
+	# start the monitor in the background
+	tmpfile=`mktemp /var/run/ipsectestXXX`
+	mpid=`(ip x m > $tmpfile & echo $!) 2>/dev/null`
+	sleep 0.2
+
+	ipsecid="proto esp src $srcip dst $dstip spi 0x07"
+	ip x s add $ipsecid \
+            mode transport reqid 0x07 replay-window 32 \
+            $algo sel src $srcip/24 dst $dstip/24
+	check_err $?
+
+	lines=`ip x s list | grep $srcip | grep $dstip | wc -l`
+	test $lines -eq 2
+	check_err $?
+
+	ip x s count | grep -q "SAD count 1"
+	check_err $?
+
+	lines=`ip x s get $ipsecid | grep $srcip | grep $dstip | wc -l`
+	test $lines -eq 2
+	check_err $?
+
+	ip x s delete $ipsecid
+	check_err $?
+
+	lines=`ip x s list | wc -l`
+	test $lines -eq 0
+	check_err $?
+
+	ipsecsel="dir out src $srcip/24 dst $dstip/24"
+	ip x p add $ipsecsel \
+		    tmpl proto esp src $srcip dst $dstip \
+		    spi 0x07 mode transport reqid 0x07
+	check_err $?
+
+	lines=`ip x p list | grep $srcip | grep $dstip | wc -l`
+	test $lines -eq 2
+	check_err $?
+
+	ip x p count | grep -q "SPD IN  0 OUT 1 FWD 0"
+	check_err $?
+
+	lines=`ip x p get $ipsecsel | grep $srcip | grep $dstip | wc -l`
+	test $lines -eq 2
+	check_err $?
+
+	ip x p delete $ipsecsel
+	check_err $?
+
+	lines=`ip x p list | wc -l`
+	test $lines -eq 0
+	check_err $?
+
+	# check the monitor results
+	kill $mpid
+	lines=`wc -l $tmpfile | cut "-d " -f1`
+	test $lines -eq 20
+	check_err $?
+	rm -rf $tmpfile
+
+	# clean up any leftovers
+	ip x s flush
+	check_err $?
+	ip x p flush
+	check_err $?
+	ip addr del $srcip/32 dev $devdummy
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: ipsec"
+		return 1
+	fi
+	echo "PASS: ipsec"
+}
+
+#-------------------------------------------------------------------
+# Example commands
+#   ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \
+#            spi 0x07 mode transport reqid 0x07 replay-window 32 \
+#            aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \
+#            sel src 14.0.0.52/24 dst 14.0.0.70/24
+#            offload dev sim1 dir out
+#   ip x p add dir out src 14.0.0.52/24 dst 14.0.0.70/24 \
+#            tmpl proto esp src 14.0.0.52 dst 14.0.0.70 \
+#            spi 0x07 mode transport reqid 0x07
+#
+#-------------------------------------------------------------------
+kci_test_ipsec_offload()
+{
+	ret=0
+	algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
+	srcip=192.168.123.3
+	dstip=192.168.123.4
+	dev=simx1
+	sysfsd=/sys/kernel/debug/netdevsim/$dev
+	sysfsf=$sysfsd/ipsec
+
+	# setup netdevsim since dummydev doesn't have offload support
+	modprobe netdevsim
+	check_err $?
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: ipsec_offload can't load netdevsim"
+		return 1
+	fi
+
+	ip link add $dev type netdevsim
+	ip addr add $srcip dev $dev
+	ip link set $dev up
+	if [ ! -d $sysfsd ] ; then
+		echo "FAIL: ipsec_offload can't create device $dev"
+		return 1
+	fi
+	if [ ! -f $sysfsf ] ; then
+		echo "FAIL: ipsec_offload netdevsim doesn't support IPsec offload"
+		return 1
+	fi
+
+	# flush to be sure there's nothing configured
+	ip x s flush ; ip x p flush
+
+	# create offloaded SAs, both in and out
+	ip x p add dir out src $srcip/24 dst $dstip/24 \
+	    tmpl proto esp src $srcip dst $dstip spi 9 \
+	    mode transport reqid 42
+	check_err $?
+	ip x p add dir out src $dstip/24 dst $srcip/24 \
+	    tmpl proto esp src $dstip dst $srcip spi 9 \
+	    mode transport reqid 42
+	check_err $?
+
+	ip x s add proto esp src $srcip dst $dstip spi 9 \
+	    mode transport reqid 42 $algo sel src $srcip/24 dst $dstip/24 \
+	    offload dev $dev dir out
+	check_err $?
+	ip x s add proto esp src $dstip dst $srcip spi 9 \
+	    mode transport reqid 42 $algo sel src $dstip/24 dst $srcip/24 \
+	    offload dev $dev dir in
+	check_err $?
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: ipsec_offload can't create SA"
+		return 1
+	fi
+
+	# does offload show up in ip output
+	lines=`ip x s list | grep -c "crypto offload parameters: dev $dev dir"`
+	if [ $lines -ne 2 ] ; then
+		echo "FAIL: ipsec_offload SA offload missing from list output"
+		check_err 1
+	fi
+
+	# use ping to exercise the Tx path
+	ping -I $dev -c 3 -W 1 -i 0 $dstip >/dev/null
+
+	# does driver have correct offload info
+	diff $sysfsf - << EOF
+SA count=2 tx=3
+sa[0] tx ipaddr=0x00000000 00000000 00000000 00000000
+sa[0]    spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
+sa[0]    key=0x34333231 38373635 32313039 36353433
+sa[1] rx ipaddr=0x00000000 00000000 00000000 037ba8c0
+sa[1]    spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
+sa[1]    key=0x34333231 38373635 32313039 36353433
+EOF
+	if [ $? -ne 0 ] ; then
+		echo "FAIL: ipsec_offload incorrect driver data"
+		check_err 1
+	fi
+
+	# does offload get removed from driver
+	ip x s flush
+	ip x p flush
+	lines=`grep -c "SA count=0" $sysfsf`
+	if [ $lines -ne 1 ] ; then
+		echo "FAIL: ipsec_offload SA not removed from driver"
+		check_err 1
+	fi
+
+	# clean up any leftovers
+	ip link del $dev
+	rmmod netdevsim
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: ipsec_offload"
+		return 1
+	fi
+	echo "PASS: ipsec_offload"
+}
+
 kci_test_gretap()
 {
 	testns="testns"
@@ -511,14 +733,14 @@ kci_test_gretap()
 	ip netns add "$testns"
 	if [ $? -ne 0 ]; then
 		echo "SKIP gretap tests: cannot add net namespace $testns"
-		return 1
+		return $ksft_skip
 	fi
 
 	ip link help gretap 2>&1 | grep -q "^Usage:"
 	if [ $? -ne 0 ];then
 		echo "SKIP: gretap: iproute2 too old"
 		ip netns del "$testns"
-		return 1
+		return $ksft_skip
 	fi
 
 	# test native tunnel
@@ -561,14 +783,14 @@ kci_test_ip6gretap()
 	ip netns add "$testns"
 	if [ $? -ne 0 ]; then
 		echo "SKIP ip6gretap tests: cannot add net namespace $testns"
-		return 1
+		return $ksft_skip
 	fi
 
 	ip link help ip6gretap 2>&1 | grep -q "^Usage:"
 	if [ $? -ne 0 ];then
 		echo "SKIP: ip6gretap: iproute2 too old"
 		ip netns del "$testns"
-		return 1
+		return $ksft_skip
 	fi
 
 	# test native tunnel
@@ -611,13 +833,13 @@ kci_test_erspan()
 	ip link help erspan 2>&1 | grep -q "^Usage:"
 	if [ $? -ne 0 ];then
 		echo "SKIP: erspan: iproute2 too old"
-		return 1
+		return $ksft_skip
 	fi
 
 	ip netns add "$testns"
 	if [ $? -ne 0 ]; then
 		echo "SKIP erspan tests: cannot add net namespace $testns"
-		return 1
+		return $ksft_skip
 	fi
 
 	# test native tunnel erspan v1
@@ -676,13 +898,13 @@ kci_test_ip6erspan()
 	ip link help ip6erspan 2>&1 | grep -q "^Usage:"
 	if [ $? -ne 0 ];then
 		echo "SKIP: ip6erspan: iproute2 too old"
-		return 1
+		return $ksft_skip
 	fi
 
 	ip netns add "$testns"
 	if [ $? -ne 0 ]; then
 		echo "SKIP ip6erspan tests: cannot add net namespace $testns"
-		return 1
+		return $ksft_skip
 	fi
 
 	# test native tunnel ip6erspan v1
@@ -755,6 +977,8 @@ kci_test_rtnl()
 	kci_test_vrf
 	kci_test_encap
 	kci_test_macsec
+	kci_test_ipsec
+	kci_test_ipsec_offload
 
 	kci_del_dummy
 }
@@ -762,14 +986,14 @@ kci_test_rtnl()
 #check for needed privileges
 if [ "$(id -u)" -ne 0 ];then
 	echo "SKIP: Need root privileges"
-	exit 0
+	exit $ksft_skip
 fi
 
 for x in ip tc;do
 	$x -Version 2>/dev/null >/dev/null
 	if [ $? -ne 0 ];then
 		echo "SKIP: Could not run test without the $x tool"
-		exit 0
+		exit $ksft_skip
 	fi
 done
 
diff --git a/tools/testing/selftests/net/tcp_inq.c b/tools/testing/selftests/net/tcp_inq.c
new file mode 100644
index 000000000000..d044b29ddabc
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_inq.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2018 Google Inc.
+ * Author: Soheil Hassas Yeganeh (soheil@google.com)
+ *
+ * Simple example on how to use TCP_INQ and TCP_CM_INQ.
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ */
+#define _GNU_SOURCE
+
+#include <error.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#ifndef TCP_INQ
+#define TCP_INQ 36
+#endif
+
+#ifndef TCP_CM_INQ
+#define TCP_CM_INQ TCP_INQ
+#endif
+
+#define BUF_SIZE 8192
+#define CMSG_SIZE 32
+
+static int family = AF_INET6;
+static socklen_t addr_len = sizeof(struct sockaddr_in6);
+static int port = 4974;
+
+static void setup_loopback_addr(int family, struct sockaddr_storage *sockaddr)
+{
+	struct sockaddr_in6 *addr6 = (void *) sockaddr;
+	struct sockaddr_in *addr4 = (void *) sockaddr;
+
+	switch (family) {
+	case PF_INET:
+		memset(addr4, 0, sizeof(*addr4));
+		addr4->sin_family = AF_INET;
+		addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		addr4->sin_port = htons(port);
+		break;
+	case PF_INET6:
+		memset(addr6, 0, sizeof(*addr6));
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_addr = in6addr_loopback;
+		addr6->sin6_port = htons(port);
+		break;
+	default:
+		error(1, 0, "illegal family");
+	}
+}
+
+void *start_server(void *arg)
+{
+	int server_fd = (int)(unsigned long)arg;
+	struct sockaddr_in addr;
+	socklen_t addrlen = sizeof(addr);
+	char *buf;
+	int fd;
+	int r;
+
+	buf = malloc(BUF_SIZE);
+
+	for (;;) {
+		fd = accept(server_fd, (struct sockaddr *)&addr, &addrlen);
+		if (fd == -1) {
+			perror("accept");
+			break;
+		}
+		do {
+			r = send(fd, buf, BUF_SIZE, 0);
+		} while (r < 0 && errno == EINTR);
+		if (r < 0)
+			perror("send");
+		if (r != BUF_SIZE)
+			fprintf(stderr, "can only send %d bytes\n", r);
+		/* TCP_INQ can overestimate in-queue by one byte if we send
+		 * the FIN packet. Sleep for 1 second, so that the client
+		 * likely invoked recvmsg().
+		 */
+		sleep(1);
+		close(fd);
+	}
+
+	free(buf);
+	close(server_fd);
+	pthread_exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+	struct sockaddr_storage listen_addr, addr;
+	int c, one = 1, inq = -1;
+	pthread_t server_thread;
+	char cmsgbuf[CMSG_SIZE];
+	struct iovec iov[1];
+	struct cmsghdr *cm;
+	struct msghdr msg;
+	int server_fd, fd;
+	char *buf;
+
+	while ((c = getopt(argc, argv, "46p:")) != -1) {
+		switch (c) {
+		case '4':
+			family = PF_INET;
+			addr_len = sizeof(struct sockaddr_in);
+			break;
+		case '6':
+			family = PF_INET6;
+			addr_len = sizeof(struct sockaddr_in6);
+			break;
+		case 'p':
+			port = atoi(optarg);
+			break;
+		}
+	}
+
+	server_fd = socket(family, SOCK_STREAM, 0);
+	if (server_fd < 0)
+		error(1, errno, "server socket");
+	setup_loopback_addr(family, &listen_addr);
+	if (setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR,
+		       &one, sizeof(one)) != 0)
+		error(1, errno, "setsockopt(SO_REUSEADDR)");
+	if (bind(server_fd, (const struct sockaddr *)&listen_addr,
+		 addr_len) == -1)
+		error(1, errno, "bind");
+	if (listen(server_fd, 128) == -1)
+		error(1, errno, "listen");
+	if (pthread_create(&server_thread, NULL, start_server,
+			   (void *)(unsigned long)server_fd) != 0)
+		error(1, errno, "pthread_create");
+
+	fd = socket(family, SOCK_STREAM, 0);
+	if (fd < 0)
+		error(1, errno, "client socket");
+	setup_loopback_addr(family, &addr);
+	if (connect(fd, (const struct sockaddr *)&addr, addr_len) == -1)
+		error(1, errno, "connect");
+	if (setsockopt(fd, SOL_TCP, TCP_INQ, &one, sizeof(one)) != 0)
+		error(1, errno, "setsockopt(TCP_INQ)");
+
+	msg.msg_name = NULL;
+	msg.msg_namelen = 0;
+	msg.msg_iov = iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cmsgbuf;
+	msg.msg_controllen = sizeof(cmsgbuf);
+	msg.msg_flags = 0;
+
+	buf = malloc(BUF_SIZE);
+	iov[0].iov_base = buf;
+	iov[0].iov_len = BUF_SIZE / 2;
+
+	if (recvmsg(fd, &msg, 0) != iov[0].iov_len)
+		error(1, errno, "recvmsg");
+	if (msg.msg_flags & MSG_CTRUNC)
+		error(1, 0, "control message is truncated");
+
+	for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm))
+		if (cm->cmsg_level == SOL_TCP && cm->cmsg_type == TCP_CM_INQ)
+			inq = *((int *) CMSG_DATA(cm));
+
+	if (inq != BUF_SIZE - iov[0].iov_len) {
+		fprintf(stderr, "unexpected inq: %d\n", inq);
+		exit(1);
+	}
+
+	printf("PASSED\n");
+	free(buf);
+	close(fd);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
new file mode 100644
index 000000000000..e8c5dff448eb
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -0,0 +1,447 @@
+/*
+ * Copyright 2018 Google Inc.
+ * Author: Eric Dumazet (edumazet@google.com)
+ *
+ * Reference program demonstrating tcp mmap() usage,
+ * and SO_RCVLOWAT hints for receiver.
+ *
+ * Note : NIC with header split is needed to use mmap() on TCP :
+ * Each incoming frame must be a multiple of PAGE_SIZE bytes of TCP payload.
+ *
+ * How to use on loopback interface :
+ *
+ *  ifconfig lo mtu 61512  # 15*4096 + 40 (ipv6 header) + 32 (TCP with TS option header)
+ *  tcp_mmap -s -z &
+ *  tcp_mmap -H ::1 -z
+ *
+ *  Or leave default lo mtu, but use -M option to set TCP_MAXSEG option to (4096 + 12)
+ *      (4096 : page size on x86, 12: TCP TS option length)
+ *  tcp_mmap -s -z -M $((4096+12)) &
+ *  tcp_mmap -H ::1 -z -M $((4096+12))
+ *
+ * Note: -z option on sender uses MSG_ZEROCOPY, which forces a copy when packets go through loopback interface.
+ *       We might use sendfile() instead, but really this test program is about mmap(), for receivers ;)
+ *
+ * $ ./tcp_mmap -s &                                 # Without mmap()
+ * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
+ * received 32768 MB (0 % mmap'ed) in 14.1157 s, 19.4732 Gbit
+ *   cpu usage user:0.057 sys:7.815, 240.234 usec per MB, 65531 c-switches
+ * received 32768 MB (0 % mmap'ed) in 14.6833 s, 18.7204 Gbit
+ *  cpu usage user:0.043 sys:8.103, 248.596 usec per MB, 65524 c-switches
+ * received 32768 MB (0 % mmap'ed) in 11.143 s, 24.6682 Gbit
+ *   cpu usage user:0.044 sys:6.576, 202.026 usec per MB, 65519 c-switches
+ * received 32768 MB (0 % mmap'ed) in 14.9056 s, 18.4413 Gbit
+ *   cpu usage user:0.036 sys:8.193, 251.129 usec per MB, 65530 c-switches
+ * $ kill %1   # kill tcp_mmap server
+ *
+ * $ ./tcp_mmap -s -z &                              # With mmap()
+ * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
+ * received 32768 MB (99.9939 % mmap'ed) in 6.73792 s, 40.7956 Gbit
+ *   cpu usage user:0.045 sys:2.827, 87.6465 usec per MB, 65532 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.26732 s, 37.8238 Gbit
+ *   cpu usage user:0.037 sys:3.087, 95.3369 usec per MB, 65532 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.61661 s, 36.0893 Gbit
+ *   cpu usage user:0.046 sys:3.559, 110.016 usec per MB, 65529 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.43764 s, 36.9577 Gbit
+ *   cpu usage user:0.035 sys:3.467, 106.873 usec per MB, 65530 c-switches
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <error.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <poll.h>
+#include <linux/tcp.h>
+#include <assert.h>
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY    0x4000000
+#endif
+
+#define FILE_SZ (1UL << 35)
+static int cfg_family = AF_INET6;
+static socklen_t cfg_alen = sizeof(struct sockaddr_in6);
+static int cfg_port = 8787;
+
+static int rcvbuf; /* Default: autotuning.  Can be set with -r <integer> option */
+static int sndbuf; /* Default: autotuning.  Can be set with -w <integer> option */
+static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */
+static int xflg; /* hash received data (simple xor) (-h option) */
+static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */
+
+static int chunk_size  = 512*1024;
+
+unsigned long htotal;
+
+static inline void prefetch(const void *x)
+{
+#if defined(__x86_64__)
+	asm volatile("prefetcht0 %P0" : : "m" (*(const char *)x));
+#endif
+}
+
+void hash_zone(void *zone, unsigned int length)
+{
+	unsigned long temp = htotal;
+
+	while (length >= 8*sizeof(long)) {
+		prefetch(zone + 384);
+		temp ^= *(unsigned long *)zone;
+		temp ^= *(unsigned long *)(zone + sizeof(long));
+		temp ^= *(unsigned long *)(zone + 2*sizeof(long));
+		temp ^= *(unsigned long *)(zone + 3*sizeof(long));
+		temp ^= *(unsigned long *)(zone + 4*sizeof(long));
+		temp ^= *(unsigned long *)(zone + 5*sizeof(long));
+		temp ^= *(unsigned long *)(zone + 6*sizeof(long));
+		temp ^= *(unsigned long *)(zone + 7*sizeof(long));
+		zone += 8*sizeof(long);
+		length -= 8*sizeof(long);
+	}
+	while (length >= 1) {
+		temp ^= *(unsigned char *)zone;
+		zone += 1;
+		length--;
+	}
+	htotal = temp;
+}
+
+void *child_thread(void *arg)
+{
+	unsigned long total_mmap = 0, total = 0;
+	struct tcp_zerocopy_receive zc;
+	unsigned long delta_usec;
+	int flags = MAP_SHARED;
+	struct timeval t0, t1;
+	char *buffer = NULL;
+	void *addr = NULL;
+	double throughput;
+	struct rusage ru;
+	int lu, fd;
+
+	fd = (int)(unsigned long)arg;
+
+	gettimeofday(&t0, NULL);
+
+	fcntl(fd, F_SETFL, O_NDELAY);
+	buffer = malloc(chunk_size);
+	if (!buffer) {
+		perror("malloc");
+		goto error;
+	}
+	if (zflg) {
+		addr = mmap(NULL, chunk_size, PROT_READ, flags, fd, 0);
+		if (addr == (void *)-1)
+			zflg = 0;
+	}
+	while (1) {
+		struct pollfd pfd = { .fd = fd, .events = POLLIN, };
+		int sub;
+
+		poll(&pfd, 1, 10000);
+		if (zflg) {
+			socklen_t zc_len = sizeof(zc);
+			int res;
+
+			zc.address = (__u64)addr;
+			zc.length = chunk_size;
+			zc.recv_skip_hint = 0;
+			res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
+					 &zc, &zc_len);
+			if (res == -1)
+				break;
+
+			if (zc.length) {
+				assert(zc.length <= chunk_size);
+				total_mmap += zc.length;
+				if (xflg)
+					hash_zone(addr, zc.length);
+				total += zc.length;
+			}
+			if (zc.recv_skip_hint) {
+				assert(zc.recv_skip_hint <= chunk_size);
+				lu = read(fd, buffer, zc.recv_skip_hint);
+				if (lu > 0) {
+					if (xflg)
+						hash_zone(buffer, lu);
+					total += lu;
+				}
+			}
+			continue;
+		}
+		sub = 0;
+		while (sub < chunk_size) {
+			lu = read(fd, buffer + sub, chunk_size - sub);
+			if (lu == 0)
+				goto end;
+			if (lu < 0)
+				break;
+			if (xflg)
+				hash_zone(buffer + sub, lu);
+			total += lu;
+			sub += lu;
+		}
+	}
+end:
+	gettimeofday(&t1, NULL);
+	delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
+
+	throughput = 0;
+	if (delta_usec)
+		throughput = total * 8.0 / (double)delta_usec / 1000.0;
+	getrusage(RUSAGE_THREAD, &ru);
+	if (total > 1024*1024) {
+		unsigned long total_usec;
+		unsigned long mb = total >> 20;
+		total_usec = 1000000*ru.ru_utime.tv_sec + ru.ru_utime.tv_usec +
+			     1000000*ru.ru_stime.tv_sec + ru.ru_stime.tv_usec;
+		printf("received %lg MB (%lg %% mmap'ed) in %lg s, %lg Gbit\n"
+		       "  cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches\n",
+				total / (1024.0 * 1024.0),
+				100.0*total_mmap/total,
+				(double)delta_usec / 1000000.0,
+				throughput,
+				(double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1000000.0,
+				(double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1000000.0,
+				(double)total_usec/mb,
+				ru.ru_nvcsw);
+	}
+error:
+	free(buffer);
+	close(fd);
+	if (zflg)
+		munmap(addr, chunk_size);
+	pthread_exit(0);
+}
+
+static void apply_rcvsnd_buf(int fd)
+{
+	if (rcvbuf && setsockopt(fd, SOL_SOCKET,
+				 SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) == -1) {
+		perror("setsockopt SO_RCVBUF");
+	}
+
+	if (sndbuf && setsockopt(fd, SOL_SOCKET,
+				 SO_SNDBUF, &sndbuf, sizeof(sndbuf)) == -1) {
+		perror("setsockopt SO_SNDBUF");
+	}
+}
+
+
+static void setup_sockaddr(int domain, const char *str_addr,
+			   struct sockaddr_storage *sockaddr)
+{
+	struct sockaddr_in6 *addr6 = (void *) sockaddr;
+	struct sockaddr_in *addr4 = (void *) sockaddr;
+
+	switch (domain) {
+	case PF_INET:
+		memset(addr4, 0, sizeof(*addr4));
+		addr4->sin_family = AF_INET;
+		addr4->sin_port = htons(cfg_port);
+		if (str_addr &&
+		    inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+			error(1, 0, "ipv4 parse error: %s", str_addr);
+		break;
+	case PF_INET6:
+		memset(addr6, 0, sizeof(*addr6));
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_port = htons(cfg_port);
+		if (str_addr &&
+		    inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+			error(1, 0, "ipv6 parse error: %s", str_addr);
+		break;
+	default:
+		error(1, 0, "illegal domain");
+	}
+}
+
+static void do_accept(int fdlisten)
+{
+	if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT,
+		       &chunk_size, sizeof(chunk_size)) == -1) {
+		perror("setsockopt SO_RCVLOWAT");
+	}
+
+	apply_rcvsnd_buf(fdlisten);
+
+	while (1) {
+		struct sockaddr_in addr;
+		socklen_t addrlen = sizeof(addr);
+		pthread_t th;
+		int fd, res;
+
+		fd = accept(fdlisten, (struct sockaddr *)&addr, &addrlen);
+		if (fd == -1) {
+			perror("accept");
+			continue;
+		}
+		res = pthread_create(&th, NULL, child_thread,
+				     (void *)(unsigned long)fd);
+		if (res) {
+			errno = res;
+			perror("pthread_create");
+			close(fd);
+		}
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	struct sockaddr_storage listenaddr, addr;
+	unsigned int max_pacing_rate = 0;
+	unsigned long total = 0;
+	char *host = NULL;
+	int fd, c, on = 1;
+	char *buffer;
+	int sflg = 0;
+	int mss = 0;
+
+	while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:")) != -1) {
+		switch (c) {
+		case '4':
+			cfg_family = PF_INET;
+			cfg_alen = sizeof(struct sockaddr_in);
+			break;
+		case '6':
+			cfg_family = PF_INET6;
+			cfg_alen = sizeof(struct sockaddr_in6);
+			break;
+		case 'p':
+			cfg_port = atoi(optarg);
+			break;
+		case 'H':
+			host = optarg;
+			break;
+		case 's': /* server : listen for incoming connections */
+			sflg++;
+			break;
+		case 'r':
+			rcvbuf = atoi(optarg);
+			break;
+		case 'w':
+			sndbuf = atoi(optarg);
+			break;
+		case 'z':
+			zflg = 1;
+			break;
+		case 'M':
+			mss = atoi(optarg);
+			break;
+		case 'x':
+			xflg = 1;
+			break;
+		case 'k':
+			keepflag = 1;
+			break;
+		case 'P':
+			max_pacing_rate = atoi(optarg) ;
+			break;
+		default:
+			exit(1);
+		}
+	}
+	if (sflg) {
+		int fdlisten = socket(cfg_family, SOCK_STREAM, 0);
+
+		if (fdlisten == -1) {
+			perror("socket");
+			exit(1);
+		}
+		apply_rcvsnd_buf(fdlisten);
+		setsockopt(fdlisten, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
+
+		setup_sockaddr(cfg_family, host, &listenaddr);
+
+		if (mss &&
+		    setsockopt(fdlisten, IPPROTO_TCP, TCP_MAXSEG,
+			       &mss, sizeof(mss)) == -1) {
+			perror("setsockopt TCP_MAXSEG");
+			exit(1);
+		}
+		if (bind(fdlisten, (const struct sockaddr *)&listenaddr, cfg_alen) == -1) {
+			perror("bind");
+			exit(1);
+		}
+		if (listen(fdlisten, 128) == -1) {
+			perror("listen");
+			exit(1);
+		}
+		do_accept(fdlisten);
+	}
+	buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE,
+			      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (buffer == (char *)-1) {
+		perror("mmap");
+		exit(1);
+	}
+
+	fd = socket(cfg_family, SOCK_STREAM, 0);
+	if (fd == -1) {
+		perror("socket");
+		exit(1);
+	}
+	apply_rcvsnd_buf(fd);
+
+	setup_sockaddr(cfg_family, host, &addr);
+
+	if (mss &&
+	    setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) {
+		perror("setsockopt TCP_MAXSEG");
+		exit(1);
+	}
+	if (connect(fd, (const struct sockaddr *)&addr, cfg_alen) == -1) {
+		perror("connect");
+		exit(1);
+	}
+	if (max_pacing_rate &&
+	    setsockopt(fd, SOL_SOCKET, SO_MAX_PACING_RATE,
+		       &max_pacing_rate, sizeof(max_pacing_rate)) == -1)
+		perror("setsockopt SO_MAX_PACING_RATE");
+
+	if (zflg && setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY,
+			       &on, sizeof(on)) == -1) {
+		perror("setsockopt SO_ZEROCOPY, (-z option disabled)");
+		zflg = 0;
+	}
+	while (total < FILE_SZ) {
+		long wr = FILE_SZ - total;
+
+		if (wr > chunk_size)
+			wr = chunk_size;
+		/* Note : we just want to fill the pipe with 0 bytes */
+		wr = send(fd, buffer, wr, zflg ? MSG_ZEROCOPY : 0);
+		if (wr <= 0)
+			break;
+		total += wr;
+	}
+	close(fd);
+	munmap(buffer, chunk_size);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh
new file mode 100755
index 000000000000..2d442cdab11e
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Check FDB default-remote handling across "ip link set".
+
+check_remotes()
+{
+	local what=$1; shift
+	local N=$(bridge fdb sh dev vx | grep 00:00:00:00:00:00 | wc -l)
+
+	echo -ne "expected two remotes after $what\t"
+	if [[ $N != 2 ]]; then
+		echo "[FAIL]"
+		EXIT_STATUS=1
+	else
+		echo "[ OK ]"
+	fi
+}
+
+ip link add name vx up type vxlan id 2000 dstport 4789
+bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent
+bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent
+check_remotes "fdb append"
+
+ip link set dev vx type vxlan remote 192.0.2.30
+check_remotes "link set"
+
+ip link del dev vx
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
new file mode 100644
index 000000000000..fac68d710f35
--- /dev/null
+++ b/tools/testing/selftests/net/tls.c
@@ -0,0 +1,764 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/tls.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+
+#include <sys/types.h>
+#include <sys/sendfile.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include "../kselftest_harness.h"
+
+#define TLS_PAYLOAD_MAX_LEN 16384
+#define SOL_TLS 282
+
+FIXTURE(tls)
+{
+	int fd, cfd;
+	bool notls;
+};
+
+FIXTURE_SETUP(tls)
+{
+	struct tls12_crypto_info_aes_gcm_128 tls12;
+	struct sockaddr_in addr;
+	socklen_t len;
+	int sfd, ret;
+
+	self->notls = false;
+	len = sizeof(addr);
+
+	memset(&tls12, 0, sizeof(tls12));
+	tls12.info.version = TLS_1_2_VERSION;
+	tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+
+	addr.sin_family = AF_INET;
+	addr.sin_addr.s_addr = htonl(INADDR_ANY);
+	addr.sin_port = 0;
+
+	self->fd = socket(AF_INET, SOCK_STREAM, 0);
+	sfd = socket(AF_INET, SOCK_STREAM, 0);
+
+	ret = bind(sfd, &addr, sizeof(addr));
+	ASSERT_EQ(ret, 0);
+	ret = listen(sfd, 10);
+	ASSERT_EQ(ret, 0);
+
+	ret = getsockname(sfd, &addr, &len);
+	ASSERT_EQ(ret, 0);
+
+	ret = connect(self->fd, &addr, sizeof(addr));
+	ASSERT_EQ(ret, 0);
+
+	ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+	if (ret != 0) {
+		self->notls = true;
+		printf("Failure setting TCP_ULP, testing without tls\n");
+	}
+
+	if (!self->notls) {
+		ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12,
+				 sizeof(tls12));
+		ASSERT_EQ(ret, 0);
+	}
+
+	self->cfd = accept(sfd, &addr, &len);
+	ASSERT_GE(self->cfd, 0);
+
+	if (!self->notls) {
+		ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls",
+				 sizeof("tls"));
+		ASSERT_EQ(ret, 0);
+
+		ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12,
+				 sizeof(tls12));
+		ASSERT_EQ(ret, 0);
+	}
+
+	close(sfd);
+}
+
+FIXTURE_TEARDOWN(tls)
+{
+	close(self->fd);
+	close(self->cfd);
+}
+
+TEST_F(tls, sendfile)
+{
+	int filefd = open("/proc/self/exe", O_RDONLY);
+	struct stat st;
+
+	EXPECT_GE(filefd, 0);
+	fstat(filefd, &st);
+	EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
+}
+
+TEST_F(tls, send_then_sendfile)
+{
+	int filefd = open("/proc/self/exe", O_RDONLY);
+	char const *test_str = "test_send";
+	int to_send = strlen(test_str) + 1;
+	char recv_buf[10];
+	struct stat st;
+	char *buf;
+
+	EXPECT_GE(filefd, 0);
+	fstat(filefd, &st);
+	buf = (char *)malloc(st.st_size);
+
+	EXPECT_EQ(send(self->fd, test_str, to_send, 0), to_send);
+	EXPECT_EQ(recv(self->cfd, recv_buf, to_send, MSG_WAITALL), to_send);
+	EXPECT_EQ(memcmp(test_str, recv_buf, to_send), 0);
+
+	EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
+	EXPECT_EQ(recv(self->cfd, buf, st.st_size, MSG_WAITALL), st.st_size);
+}
+
+TEST_F(tls, recv_max)
+{
+	unsigned int send_len = TLS_PAYLOAD_MAX_LEN;
+	char recv_mem[TLS_PAYLOAD_MAX_LEN];
+	char buf[TLS_PAYLOAD_MAX_LEN];
+
+	EXPECT_GE(send(self->fd, buf, send_len, 0), 0);
+	EXPECT_NE(recv(self->cfd, recv_mem, send_len, 0), -1);
+	EXPECT_EQ(memcmp(buf, recv_mem, send_len), 0);
+}
+
+TEST_F(tls, recv_small)
+{
+	char const *test_str = "test_read";
+	int send_len = 10;
+	char buf[10];
+
+	send_len = strlen(test_str) + 1;
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+	EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_F(tls, msg_more)
+{
+	char const *test_str = "test_read";
+	int send_len = 10;
+	char buf[10 * 2];
+
+	EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1);
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+	EXPECT_EQ(recv(self->cfd, buf, send_len * 2, MSG_WAITALL),
+		  send_len * 2);
+	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_F(tls, sendmsg_single)
+{
+	struct msghdr msg;
+
+	char const *test_str = "test_sendmsg";
+	size_t send_len = 13;
+	struct iovec vec;
+	char buf[13];
+
+	vec.iov_base = (char *)test_str;
+	vec.iov_len = send_len;
+	memset(&msg, 0, sizeof(struct msghdr));
+	msg.msg_iov = &vec;
+	msg.msg_iovlen = 1;
+	EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_WAITALL), send_len);
+	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_F(tls, sendmsg_large)
+{
+	void *mem = malloc(16384);
+	size_t send_len = 16384;
+	size_t sends = 128;
+	struct msghdr msg;
+	size_t recvs = 0;
+	size_t sent = 0;
+
+	memset(&msg, 0, sizeof(struct msghdr));
+	while (sent++ < sends) {
+		struct iovec vec = { (void *)mem, send_len };
+
+		msg.msg_iov = &vec;
+		msg.msg_iovlen = 1;
+		EXPECT_EQ(sendmsg(self->cfd, &msg, 0), send_len);
+	}
+
+	while (recvs++ < sends)
+		EXPECT_NE(recv(self->fd, mem, send_len, 0), -1);
+
+	free(mem);
+}
+
+TEST_F(tls, sendmsg_multiple)
+{
+	char const *test_str = "test_sendmsg_multiple";
+	struct iovec vec[5];
+	char *test_strs[5];
+	struct msghdr msg;
+	int total_len = 0;
+	int len_cmp = 0;
+	int iov_len = 5;
+	char *buf;
+	int i;
+
+	memset(&msg, 0, sizeof(struct msghdr));
+	for (i = 0; i < iov_len; i++) {
+		test_strs[i] = (char *)malloc(strlen(test_str) + 1);
+		snprintf(test_strs[i], strlen(test_str) + 1, "%s", test_str);
+		vec[i].iov_base = (void *)test_strs[i];
+		vec[i].iov_len = strlen(test_strs[i]) + 1;
+		total_len += vec[i].iov_len;
+	}
+	msg.msg_iov = vec;
+	msg.msg_iovlen = iov_len;
+
+	EXPECT_EQ(sendmsg(self->cfd, &msg, 0), total_len);
+	buf = malloc(total_len);
+	EXPECT_NE(recv(self->fd, buf, total_len, 0), -1);
+	for (i = 0; i < iov_len; i++) {
+		EXPECT_EQ(memcmp(test_strs[i], buf + len_cmp,
+				 strlen(test_strs[i])),
+			  0);
+		len_cmp += strlen(buf + len_cmp) + 1;
+	}
+	for (i = 0; i < iov_len; i++)
+		free(test_strs[i]);
+	free(buf);
+}
+
+TEST_F(tls, sendmsg_multiple_stress)
+{
+	char const *test_str = "abcdefghijklmno";
+	struct iovec vec[1024];
+	char *test_strs[1024];
+	int iov_len = 1024;
+	int total_len = 0;
+	char buf[1 << 14];
+	struct msghdr msg;
+	int len_cmp = 0;
+	int i;
+
+	memset(&msg, 0, sizeof(struct msghdr));
+	for (i = 0; i < iov_len; i++) {
+		test_strs[i] = (char *)malloc(strlen(test_str) + 1);
+		snprintf(test_strs[i], strlen(test_str) + 1, "%s", test_str);
+		vec[i].iov_base = (void *)test_strs[i];
+		vec[i].iov_len = strlen(test_strs[i]) + 1;
+		total_len += vec[i].iov_len;
+	}
+	msg.msg_iov = vec;
+	msg.msg_iovlen = iov_len;
+
+	EXPECT_EQ(sendmsg(self->fd, &msg, 0), total_len);
+	EXPECT_NE(recv(self->cfd, buf, total_len, 0), -1);
+
+	for (i = 0; i < iov_len; i++)
+		len_cmp += strlen(buf + len_cmp) + 1;
+
+	for (i = 0; i < iov_len; i++)
+		free(test_strs[i]);
+}
+
+TEST_F(tls, splice_from_pipe)
+{
+	int send_len = TLS_PAYLOAD_MAX_LEN;
+	char mem_send[TLS_PAYLOAD_MAX_LEN];
+	char mem_recv[TLS_PAYLOAD_MAX_LEN];
+	int p[2];
+
+	ASSERT_GE(pipe(p), 0);
+	EXPECT_GE(write(p[1], mem_send, send_len), 0);
+	EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), 0);
+	EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
+	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, splice_from_pipe2)
+{
+	int send_len = 16000;
+	char mem_send[16000];
+	char mem_recv[16000];
+	int p2[2];
+	int p[2];
+
+	ASSERT_GE(pipe(p), 0);
+	ASSERT_GE(pipe(p2), 0);
+	EXPECT_GE(write(p[1], mem_send, 8000), 0);
+	EXPECT_GE(splice(p[0], NULL, self->fd, NULL, 8000, 0), 0);
+	EXPECT_GE(write(p2[1], mem_send + 8000, 8000), 0);
+	EXPECT_GE(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 0);
+	EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
+	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, send_and_splice)
+{
+	int send_len = TLS_PAYLOAD_MAX_LEN;
+	char mem_send[TLS_PAYLOAD_MAX_LEN];
+	char mem_recv[TLS_PAYLOAD_MAX_LEN];
+	char const *test_str = "test_read";
+	int send_len2 = 10;
+	char buf[10];
+	int p[2];
+
+	ASSERT_GE(pipe(p), 0);
+	EXPECT_EQ(send(self->fd, test_str, send_len2, 0), send_len2);
+	EXPECT_EQ(recv(self->cfd, buf, send_len2, MSG_WAITALL), send_len2);
+	EXPECT_EQ(memcmp(test_str, buf, send_len2), 0);
+
+	EXPECT_GE(write(p[1], mem_send, send_len), send_len);
+	EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), send_len);
+
+	EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
+	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, splice_to_pipe)
+{
+	int send_len = TLS_PAYLOAD_MAX_LEN;
+	char mem_send[TLS_PAYLOAD_MAX_LEN];
+	char mem_recv[TLS_PAYLOAD_MAX_LEN];
+	int p[2];
+
+	ASSERT_GE(pipe(p), 0);
+	EXPECT_GE(send(self->fd, mem_send, send_len, 0), 0);
+	EXPECT_GE(splice(self->cfd, NULL, p[1], NULL, send_len, 0), 0);
+	EXPECT_GE(read(p[0], mem_recv, send_len), 0);
+	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, recvmsg_single)
+{
+	char const *test_str = "test_recvmsg_single";
+	int send_len = strlen(test_str) + 1;
+	char buf[20];
+	struct msghdr hdr;
+	struct iovec vec;
+
+	memset(&hdr, 0, sizeof(hdr));
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+	vec.iov_base = (char *)buf;
+	vec.iov_len = send_len;
+	hdr.msg_iovlen = 1;
+	hdr.msg_iov = &vec;
+	EXPECT_NE(recvmsg(self->cfd, &hdr, 0), -1);
+	EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, recvmsg_single_max)
+{
+	int send_len = TLS_PAYLOAD_MAX_LEN;
+	char send_mem[TLS_PAYLOAD_MAX_LEN];
+	char recv_mem[TLS_PAYLOAD_MAX_LEN];
+	struct iovec vec;
+	struct msghdr hdr;
+
+	EXPECT_EQ(send(self->fd, send_mem, send_len, 0), send_len);
+	vec.iov_base = (char *)recv_mem;
+	vec.iov_len = TLS_PAYLOAD_MAX_LEN;
+
+	hdr.msg_iovlen = 1;
+	hdr.msg_iov = &vec;
+	EXPECT_NE(recvmsg(self->cfd, &hdr, 0), -1);
+	EXPECT_EQ(memcmp(send_mem, recv_mem, send_len), 0);
+}
+
+TEST_F(tls, recvmsg_multiple)
+{
+	unsigned int msg_iovlen = 1024;
+	unsigned int len_compared = 0;
+	struct iovec vec[1024];
+	char *iov_base[1024];
+	unsigned int iov_len = 16;
+	int send_len = 1 << 14;
+	char buf[1 << 14];
+	struct msghdr hdr;
+	int i;
+
+	EXPECT_EQ(send(self->fd, buf, send_len, 0), send_len);
+	for (i = 0; i < msg_iovlen; i++) {
+		iov_base[i] = (char *)malloc(iov_len);
+		vec[i].iov_base = iov_base[i];
+		vec[i].iov_len = iov_len;
+	}
+
+	hdr.msg_iovlen = msg_iovlen;
+	hdr.msg_iov = vec;
+	EXPECT_NE(recvmsg(self->cfd, &hdr, 0), -1);
+	for (i = 0; i < msg_iovlen; i++)
+		len_compared += iov_len;
+
+	for (i = 0; i < msg_iovlen; i++)
+		free(iov_base[i]);
+}
+
+TEST_F(tls, single_send_multiple_recv)
+{
+	unsigned int total_len = TLS_PAYLOAD_MAX_LEN * 2;
+	unsigned int send_len = TLS_PAYLOAD_MAX_LEN;
+	char send_mem[TLS_PAYLOAD_MAX_LEN * 2];
+	char recv_mem[TLS_PAYLOAD_MAX_LEN * 2];
+
+	EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0);
+	memset(recv_mem, 0, total_len);
+
+	EXPECT_NE(recv(self->cfd, recv_mem, send_len, 0), -1);
+	EXPECT_NE(recv(self->cfd, recv_mem + send_len, send_len, 0), -1);
+	EXPECT_EQ(memcmp(send_mem, recv_mem, total_len), 0);
+}
+
+TEST_F(tls, multiple_send_single_recv)
+{
+	unsigned int total_len = 2 * 10;
+	unsigned int send_len = 10;
+	char recv_mem[2 * 10];
+	char send_mem[10];
+
+	EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
+	EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
+	memset(recv_mem, 0, total_len);
+	EXPECT_EQ(recv(self->cfd, recv_mem, total_len, MSG_WAITALL), total_len);
+
+	EXPECT_EQ(memcmp(send_mem, recv_mem, send_len), 0);
+	EXPECT_EQ(memcmp(send_mem, recv_mem + send_len, send_len), 0);
+}
+
+TEST_F(tls, recv_partial)
+{
+	char const *test_str = "test_read_partial";
+	char const *test_str_first = "test_read";
+	char const *test_str_second = "_partial";
+	int send_len = strlen(test_str) + 1;
+	char recv_mem[18];
+
+	memset(recv_mem, 0, sizeof(recv_mem));
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+	EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_first), 0), -1);
+	EXPECT_EQ(memcmp(test_str_first, recv_mem, strlen(test_str_first)), 0);
+	memset(recv_mem, 0, sizeof(recv_mem));
+	EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_second), 0), -1);
+	EXPECT_EQ(memcmp(test_str_second, recv_mem, strlen(test_str_second)),
+		  0);
+}
+
+TEST_F(tls, recv_nonblock)
+{
+	char buf[4096];
+	bool err;
+
+	EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1);
+	err = (errno == EAGAIN || errno == EWOULDBLOCK);
+	EXPECT_EQ(err, true);
+}
+
+TEST_F(tls, recv_peek)
+{
+	char const *test_str = "test_read_peek";
+	int send_len = strlen(test_str) + 1;
+	char buf[15];
+
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+	EXPECT_NE(recv(self->cfd, buf, send_len, MSG_PEEK), -1);
+	EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+	memset(buf, 0, sizeof(buf));
+	EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+	EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, recv_peek_multiple)
+{
+	char const *test_str = "test_read_peek";
+	int send_len = strlen(test_str) + 1;
+	unsigned int num_peeks = 100;
+	char buf[15];
+	int i;
+
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+	for (i = 0; i < num_peeks; i++) {
+		EXPECT_NE(recv(self->cfd, buf, send_len, MSG_PEEK), -1);
+		EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+		memset(buf, 0, sizeof(buf));
+	}
+	EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+	EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, recv_peek_multiple_records)
+{
+	char const *test_str = "test_read_peek_mult_recs";
+	char const *test_str_first = "test_read_peek";
+	char const *test_str_second = "_mult_recs";
+	int len;
+	char buf[64];
+
+	len = strlen(test_str_first);
+	EXPECT_EQ(send(self->fd, test_str_first, len, 0), len);
+
+	len = strlen(test_str_second) + 1;
+	EXPECT_EQ(send(self->fd, test_str_second, len, 0), len);
+
+	len = strlen(test_str_first);
+	memset(buf, 0, len);
+	EXPECT_EQ(recv(self->cfd, buf, len, MSG_PEEK | MSG_WAITALL), len);
+
+	/* MSG_PEEK can only peek into the current record. */
+	len = strlen(test_str_first);
+	EXPECT_EQ(memcmp(test_str_first, buf, len), 0);
+
+	len = strlen(test_str) + 1;
+	memset(buf, 0, len);
+	EXPECT_EQ(recv(self->cfd, buf, len, MSG_WAITALL), len);
+
+	/* Non-MSG_PEEK will advance strparser (and therefore record)
+	 * however.
+	 */
+	len = strlen(test_str) + 1;
+	EXPECT_EQ(memcmp(test_str, buf, len), 0);
+
+	/* MSG_MORE will hold current record open, so later MSG_PEEK
+	 * will see everything.
+	 */
+	len = strlen(test_str_first);
+	EXPECT_EQ(send(self->fd, test_str_first, len, MSG_MORE), len);
+
+	len = strlen(test_str_second) + 1;
+	EXPECT_EQ(send(self->fd, test_str_second, len, 0), len);
+
+	len = strlen(test_str) + 1;
+	memset(buf, 0, len);
+	EXPECT_EQ(recv(self->cfd, buf, len, MSG_PEEK | MSG_WAITALL), len);
+
+	len = strlen(test_str) + 1;
+	EXPECT_EQ(memcmp(test_str, buf, len), 0);
+}
+
+TEST_F(tls, recv_peek_large_buf_mult_recs)
+{
+	char const *test_str = "test_read_peek_mult_recs";
+	char const *test_str_first = "test_read_peek";
+	char const *test_str_second = "_mult_recs";
+	int len;
+	char buf[64];
+
+	len = strlen(test_str_first);
+	EXPECT_EQ(send(self->fd, test_str_first, len, 0), len);
+
+	len = strlen(test_str_second) + 1;
+	EXPECT_EQ(send(self->fd, test_str_second, len, 0), len);
+
+	len = sizeof(buf);
+	memset(buf, 0, len);
+	EXPECT_NE(recv(self->cfd, buf, len, MSG_PEEK), -1);
+
+	len = strlen(test_str) + 1;
+	EXPECT_EQ(memcmp(test_str, buf, len), 0);
+}
+
+
+TEST_F(tls, pollin)
+{
+	char const *test_str = "test_poll";
+	struct pollfd fd = { 0, 0, 0 };
+	char buf[10];
+	int send_len = 10;
+
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+	fd.fd = self->cfd;
+	fd.events = POLLIN;
+
+	EXPECT_EQ(poll(&fd, 1, 20), 1);
+	EXPECT_EQ(fd.revents & POLLIN, 1);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_WAITALL), send_len);
+	/* Test timing out */
+	EXPECT_EQ(poll(&fd, 1, 20), 0);
+}
+
+TEST_F(tls, poll_wait)
+{
+	char const *test_str = "test_poll_wait";
+	int send_len = strlen(test_str) + 1;
+	struct pollfd fd = { 0, 0, 0 };
+	char recv_mem[15];
+
+	fd.fd = self->cfd;
+	fd.events = POLLIN;
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+	/* Set timeout to inf. secs */
+	EXPECT_EQ(poll(&fd, 1, -1), 1);
+	EXPECT_EQ(fd.revents & POLLIN, 1);
+	EXPECT_EQ(recv(self->cfd, recv_mem, send_len, MSG_WAITALL), send_len);
+}
+
+TEST_F(tls, blocking)
+{
+	size_t data = 100000;
+	int res = fork();
+
+	EXPECT_NE(res, -1);
+
+	if (res) {
+		/* parent */
+		size_t left = data;
+		char buf[16384];
+		int status;
+		int pid2;
+
+		while (left) {
+			int res = send(self->fd, buf,
+				       left > 16384 ? 16384 : left, 0);
+
+			EXPECT_GE(res, 0);
+			left -= res;
+		}
+
+		pid2 = wait(&status);
+		EXPECT_EQ(status, 0);
+		EXPECT_EQ(res, pid2);
+	} else {
+		/* child */
+		size_t left = data;
+		char buf[16384];
+
+		while (left) {
+			int res = recv(self->cfd, buf,
+				       left > 16384 ? 16384 : left, 0);
+
+			EXPECT_GE(res, 0);
+			left -= res;
+		}
+	}
+}
+
+TEST_F(tls, nonblocking)
+{
+	size_t data = 100000;
+	int sendbuf = 100;
+	int flags;
+	int res;
+
+	flags = fcntl(self->fd, F_GETFL, 0);
+	fcntl(self->fd, F_SETFL, flags | O_NONBLOCK);
+	fcntl(self->cfd, F_SETFL, flags | O_NONBLOCK);
+
+	/* Ensure nonblocking behavior by imposing a small send
+	 * buffer.
+	 */
+	EXPECT_EQ(setsockopt(self->fd, SOL_SOCKET, SO_SNDBUF,
+			     &sendbuf, sizeof(sendbuf)), 0);
+
+	res = fork();
+	EXPECT_NE(res, -1);
+
+	if (res) {
+		/* parent */
+		bool eagain = false;
+		size_t left = data;
+		char buf[16384];
+		int status;
+		int pid2;
+
+		while (left) {
+			int res = send(self->fd, buf,
+				       left > 16384 ? 16384 : left, 0);
+
+			if (res == -1 && errno == EAGAIN) {
+				eagain = true;
+				usleep(10000);
+				continue;
+			}
+			EXPECT_GE(res, 0);
+			left -= res;
+		}
+
+		EXPECT_TRUE(eagain);
+		pid2 = wait(&status);
+
+		EXPECT_EQ(status, 0);
+		EXPECT_EQ(res, pid2);
+	} else {
+		/* child */
+		bool eagain = false;
+		size_t left = data;
+		char buf[16384];
+
+		while (left) {
+			int res = recv(self->cfd, buf,
+				       left > 16384 ? 16384 : left, 0);
+
+			if (res == -1 && errno == EAGAIN) {
+				eagain = true;
+				usleep(10000);
+				continue;
+			}
+			EXPECT_GE(res, 0);
+			left -= res;
+		}
+		EXPECT_TRUE(eagain);
+	}
+}
+
+TEST_F(tls, control_msg)
+{
+	if (self->notls)
+		return;
+
+	char cbuf[CMSG_SPACE(sizeof(char))];
+	char const *test_str = "test_read";
+	int cmsg_len = sizeof(char);
+	char record_type = 100;
+	struct cmsghdr *cmsg;
+	struct msghdr msg;
+	int send_len = 10;
+	struct iovec vec;
+	char buf[10];
+
+	vec.iov_base = (char *)test_str;
+	vec.iov_len = 10;
+	memset(&msg, 0, sizeof(struct msghdr));
+	msg.msg_iov = &vec;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cbuf;
+	msg.msg_controllen = sizeof(cbuf);
+	cmsg = CMSG_FIRSTHDR(&msg);
+	cmsg->cmsg_level = SOL_TLS;
+	/* test sending non-record types. */
+	cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
+	cmsg->cmsg_len = CMSG_LEN(cmsg_len);
+	*CMSG_DATA(cmsg) = record_type;
+	msg.msg_controllen = cmsg->cmsg_len;
+
+	EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
+	/* Should fail because we didn't provide a control message */
+	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+
+	vec.iov_base = buf;
+	EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL), send_len);
+	cmsg = CMSG_FIRSTHDR(&msg);
+	EXPECT_NE(cmsg, NULL);
+	EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
+	EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
+	record_type = *((unsigned char *)CMSG_DATA(cmsg));
+	EXPECT_EQ(record_type, 100);
+	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
new file mode 100644
index 000000000000..e279051bc631
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso.c
@@ -0,0 +1,693 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <net/if.h>
+#include <linux/in.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef ETH_MAX_MTU
+#define ETH_MAX_MTU	0xFFFFU
+#endif
+
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT		103
+#endif
+
+#ifndef UDP_MAX_SEGMENTS
+#define UDP_MAX_SEGMENTS	(1 << 6UL)
+#endif
+
+#define CONST_MTU_TEST	1500
+
+#define CONST_HDRLEN_V4		(sizeof(struct iphdr) + sizeof(struct udphdr))
+#define CONST_HDRLEN_V6		(sizeof(struct ip6_hdr) + sizeof(struct udphdr))
+
+#define CONST_MSS_V4		(CONST_MTU_TEST - CONST_HDRLEN_V4)
+#define CONST_MSS_V6		(CONST_MTU_TEST - CONST_HDRLEN_V6)
+
+#define CONST_MAX_SEGS_V4	(ETH_MAX_MTU / CONST_MSS_V4)
+#define CONST_MAX_SEGS_V6	(ETH_MAX_MTU / CONST_MSS_V6)
+
+static bool		cfg_do_ipv4;
+static bool		cfg_do_ipv6;
+static bool		cfg_do_connected;
+static bool		cfg_do_connectionless;
+static bool		cfg_do_msgmore;
+static bool		cfg_do_setsockopt;
+static int		cfg_specific_test_id = -1;
+
+static const char	cfg_ifname[] = "lo";
+static unsigned short	cfg_port = 9000;
+
+static char buf[ETH_MAX_MTU];
+
+struct testcase {
+	int tlen;		/* send() buffer size, may exceed mss */
+	bool tfail;		/* send() call is expected to fail */
+	int gso_len;		/* mss after applying gso */
+	int r_num_mss;		/* recv(): number of calls of full mss */
+	int r_len_last;		/* recv(): size of last non-mss dgram, if any */
+};
+
+const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
+const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
+
+struct testcase testcases_v4[] = {
+	{
+		/* no GSO: send a single byte */
+		.tlen = 1,
+		.r_len_last = 1,
+	},
+	{
+		/* no GSO: send a single MSS */
+		.tlen = CONST_MSS_V4,
+		.r_num_mss = 1,
+	},
+	{
+		/* no GSO: send a single MSS + 1B: fail */
+		.tlen = CONST_MSS_V4 + 1,
+		.tfail = true,
+	},
+	{
+		/* send a single MSS: will fail with GSO, because the segment
+		 * logic in udp4_ufo_fragment demands a gso skb to be > MTU
+		 */
+		.tlen = CONST_MSS_V4,
+		.gso_len = CONST_MSS_V4,
+		.tfail = true,
+		.r_num_mss = 1,
+	},
+	{
+		/* send a single MSS + 1B */
+		.tlen = CONST_MSS_V4 + 1,
+		.gso_len = CONST_MSS_V4,
+		.r_num_mss = 1,
+		.r_len_last = 1,
+	},
+	{
+		/* send exactly 2 MSS */
+		.tlen = CONST_MSS_V4 * 2,
+		.gso_len = CONST_MSS_V4,
+		.r_num_mss = 2,
+	},
+	{
+		/* send 2 MSS + 1B */
+		.tlen = (CONST_MSS_V4 * 2) + 1,
+		.gso_len = CONST_MSS_V4,
+		.r_num_mss = 2,
+		.r_len_last = 1,
+	},
+	{
+		/* send MAX segs */
+		.tlen = (ETH_MAX_MTU / CONST_MSS_V4) * CONST_MSS_V4,
+		.gso_len = CONST_MSS_V4,
+		.r_num_mss = (ETH_MAX_MTU / CONST_MSS_V4),
+	},
+
+	{
+		/* send MAX bytes */
+		.tlen = ETH_MAX_MTU - CONST_HDRLEN_V4,
+		.gso_len = CONST_MSS_V4,
+		.r_num_mss = CONST_MAX_SEGS_V4,
+		.r_len_last = ETH_MAX_MTU - CONST_HDRLEN_V4 -
+			      (CONST_MAX_SEGS_V4 * CONST_MSS_V4),
+	},
+	{
+		/* send MAX + 1: fail */
+		.tlen = ETH_MAX_MTU - CONST_HDRLEN_V4 + 1,
+		.gso_len = CONST_MSS_V4,
+		.tfail = true,
+	},
+	{
+		/* send a single 1B MSS: will fail, see single MSS above */
+		.tlen = 1,
+		.gso_len = 1,
+		.tfail = true,
+		.r_num_mss = 1,
+	},
+	{
+		/* send 2 1B segments */
+		.tlen = 2,
+		.gso_len = 1,
+		.r_num_mss = 2,
+	},
+	{
+		/* send 2B + 2B + 1B segments */
+		.tlen = 5,
+		.gso_len = 2,
+		.r_num_mss = 2,
+		.r_len_last = 1,
+	},
+	{
+		/* send max number of min sized segments */
+		.tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
+		.gso_len = 1,
+		.r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
+	},
+	{
+		/* send max number + 1 of min sized segments: fail */
+		.tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1,
+		.gso_len = 1,
+		.tfail = true,
+	},
+	{
+		/* EOL */
+	}
+};
+
+#ifndef IP6_MAX_MTU
+#define IP6_MAX_MTU	(ETH_MAX_MTU + sizeof(struct ip6_hdr))
+#endif
+
+struct testcase testcases_v6[] = {
+	{
+		/* no GSO: send a single byte */
+		.tlen = 1,
+		.r_len_last = 1,
+	},
+	{
+		/* no GSO: send a single MSS */
+		.tlen = CONST_MSS_V6,
+		.r_num_mss = 1,
+	},
+	{
+		/* no GSO: send a single MSS + 1B: fail */
+		.tlen = CONST_MSS_V6 + 1,
+		.tfail = true,
+	},
+	{
+		/* send a single MSS: will fail with GSO, because the segment
+		 * logic in udp4_ufo_fragment demands a gso skb to be > MTU
+		 */
+		.tlen = CONST_MSS_V6,
+		.gso_len = CONST_MSS_V6,
+		.tfail = true,
+		.r_num_mss = 1,
+	},
+	{
+		/* send a single MSS + 1B */
+		.tlen = CONST_MSS_V6 + 1,
+		.gso_len = CONST_MSS_V6,
+		.r_num_mss = 1,
+		.r_len_last = 1,
+	},
+	{
+		/* send exactly 2 MSS */
+		.tlen = CONST_MSS_V6 * 2,
+		.gso_len = CONST_MSS_V6,
+		.r_num_mss = 2,
+	},
+	{
+		/* send 2 MSS + 1B */
+		.tlen = (CONST_MSS_V6 * 2) + 1,
+		.gso_len = CONST_MSS_V6,
+		.r_num_mss = 2,
+		.r_len_last = 1,
+	},
+	{
+		/* send MAX segs */
+		.tlen = (IP6_MAX_MTU / CONST_MSS_V6) * CONST_MSS_V6,
+		.gso_len = CONST_MSS_V6,
+		.r_num_mss = (IP6_MAX_MTU / CONST_MSS_V6),
+	},
+
+	{
+		/* send MAX bytes */
+		.tlen = IP6_MAX_MTU - CONST_HDRLEN_V6,
+		.gso_len = CONST_MSS_V6,
+		.r_num_mss = CONST_MAX_SEGS_V6,
+		.r_len_last = IP6_MAX_MTU - CONST_HDRLEN_V6 -
+			      (CONST_MAX_SEGS_V6 * CONST_MSS_V6),
+	},
+	{
+		/* send MAX + 1: fail */
+		.tlen = IP6_MAX_MTU - CONST_HDRLEN_V6 + 1,
+		.gso_len = CONST_MSS_V6,
+		.tfail = true,
+	},
+	{
+		/* send a single 1B MSS: will fail, see single MSS above */
+		.tlen = 1,
+		.gso_len = 1,
+		.tfail = true,
+		.r_num_mss = 1,
+	},
+	{
+		/* send 2 1B segments */
+		.tlen = 2,
+		.gso_len = 1,
+		.r_num_mss = 2,
+	},
+	{
+		/* send 2B + 2B + 1B segments */
+		.tlen = 5,
+		.gso_len = 2,
+		.r_num_mss = 2,
+		.r_len_last = 1,
+	},
+	{
+		/* send max number of min sized segments */
+		.tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
+		.gso_len = 1,
+		.r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
+	},
+	{
+		/* send max number + 1 of min sized segments: fail */
+		.tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1,
+		.gso_len = 1,
+		.tfail = true,
+	},
+	{
+		/* EOL */
+	}
+};
+
+static unsigned int get_device_mtu(int fd, const char *ifname)
+{
+	struct ifreq ifr;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	strcpy(ifr.ifr_name, ifname);
+
+	if (ioctl(fd, SIOCGIFMTU, &ifr))
+		error(1, errno, "ioctl get mtu");
+
+	return ifr.ifr_mtu;
+}
+
+static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu)
+{
+	struct ifreq ifr;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	ifr.ifr_mtu = mtu;
+	strcpy(ifr.ifr_name, ifname);
+
+	if (ioctl(fd, SIOCSIFMTU, &ifr))
+		error(1, errno, "ioctl set mtu");
+}
+
+static void set_device_mtu(int fd, int mtu)
+{
+	int val;
+
+	val = get_device_mtu(fd, cfg_ifname);
+	fprintf(stderr, "device mtu (orig): %u\n", val);
+
+	__set_device_mtu(fd, cfg_ifname, mtu);
+	val = get_device_mtu(fd, cfg_ifname);
+	if (val != mtu)
+		error(1, 0, "unable to set device mtu to %u\n", val);
+
+	fprintf(stderr, "device mtu (test): %u\n", val);
+}
+
+static void set_pmtu_discover(int fd, bool is_ipv4)
+{
+	int level, name, val;
+
+	if (is_ipv4) {
+		level	= SOL_IP;
+		name	= IP_MTU_DISCOVER;
+		val	= IP_PMTUDISC_DO;
+	} else {
+		level	= SOL_IPV6;
+		name	= IPV6_MTU_DISCOVER;
+		val	= IPV6_PMTUDISC_DO;
+	}
+
+	if (setsockopt(fd, level, name, &val, sizeof(val)))
+		error(1, errno, "setsockopt path mtu");
+}
+
+static unsigned int get_path_mtu(int fd, bool is_ipv4)
+{
+	socklen_t vallen;
+	unsigned int mtu;
+	int ret;
+
+	vallen = sizeof(mtu);
+	if (is_ipv4)
+		ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen);
+	else
+		ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen);
+
+	if (ret)
+		error(1, errno, "getsockopt mtu");
+
+
+	fprintf(stderr, "path mtu (read):  %u\n", mtu);
+	return mtu;
+}
+
+/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */
+static void set_route_mtu(int mtu, bool is_ipv4)
+{
+	struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
+	struct nlmsghdr *nh;
+	struct rtattr *rta;
+	struct rtmsg *rt;
+	char data[NLMSG_ALIGN(sizeof(*nh)) +
+		  NLMSG_ALIGN(sizeof(*rt)) +
+		  NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) +
+		  NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) +
+		  NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))];
+	int fd, ret, alen, off = 0;
+
+	alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6);
+
+	fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (fd == -1)
+		error(1, errno, "socket netlink");
+
+	memset(data, 0, sizeof(data));
+
+	nh = (void *)data;
+	nh->nlmsg_type = RTM_NEWROUTE;
+	nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
+	off += NLMSG_ALIGN(sizeof(*nh));
+
+	rt = (void *)(data + off);
+	rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6;
+	rt->rtm_table = RT_TABLE_MAIN;
+	rt->rtm_dst_len = alen << 3;
+	rt->rtm_protocol = RTPROT_BOOT;
+	rt->rtm_scope = RT_SCOPE_UNIVERSE;
+	rt->rtm_type = RTN_UNICAST;
+	off += NLMSG_ALIGN(sizeof(*rt));
+
+	rta = (void *)(data + off);
+	rta->rta_type = RTA_DST;
+	rta->rta_len = RTA_LENGTH(alen);
+	if (is_ipv4)
+		memcpy(RTA_DATA(rta), &addr4, alen);
+	else
+		memcpy(RTA_DATA(rta), &addr6, alen);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	rta = (void *)(data + off);
+	rta->rta_type = RTA_OIF;
+	rta->rta_len = RTA_LENGTH(sizeof(int));
+	*((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo");
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	/* MTU is a subtype in a metrics type */
+	rta = (void *)(data + off);
+	rta->rta_type = RTA_METRICS;
+	rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int));
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	/* now fill MTU subtype. Note that it fits within above rta_len */
+	rta = (void *)(((char *) rta) + RTA_LENGTH(0));
+	rta->rta_type = RTAX_MTU;
+	rta->rta_len = RTA_LENGTH(sizeof(int));
+	*((int *)(RTA_DATA(rta))) = mtu;
+
+	nh->nlmsg_len = off;
+
+	ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr));
+	if (ret != off)
+		error(1, errno, "send netlink: %uB != %uB\n", ret, off);
+
+	if (close(fd))
+		error(1, errno, "close netlink");
+
+	fprintf(stderr, "route mtu (test): %u\n", mtu);
+}
+
+static bool __send_one(int fd, struct msghdr *msg, int flags)
+{
+	int ret;
+
+	ret = sendmsg(fd, msg, flags);
+	if (ret == -1 &&
+	    (errno == EMSGSIZE || errno == ENOMEM || errno == EINVAL))
+		return false;
+	if (ret == -1)
+		error(1, errno, "sendmsg");
+	if (ret != msg->msg_iov->iov_len)
+		error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len);
+	if (msg->msg_flags)
+		error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags);
+
+	return true;
+}
+
+static bool send_one(int fd, int len, int gso_len,
+		     struct sockaddr *addr, socklen_t alen)
+{
+	char control[CMSG_SPACE(sizeof(uint16_t))] = {0};
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	struct cmsghdr *cm;
+
+	iov.iov_base = buf;
+	iov.iov_len = len;
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	msg.msg_name = addr;
+	msg.msg_namelen = alen;
+
+	if (gso_len && !cfg_do_setsockopt) {
+		msg.msg_control = control;
+		msg.msg_controllen = sizeof(control);
+
+		cm = CMSG_FIRSTHDR(&msg);
+		cm->cmsg_level = SOL_UDP;
+		cm->cmsg_type = UDP_SEGMENT;
+		cm->cmsg_len = CMSG_LEN(sizeof(uint16_t));
+		*((uint16_t *) CMSG_DATA(cm)) = gso_len;
+	}
+
+	/* If MSG_MORE, send 1 byte followed by remainder */
+	if (cfg_do_msgmore && len > 1) {
+		iov.iov_len = 1;
+		if (!__send_one(fd, &msg, MSG_MORE))
+			error(1, 0, "send 1B failed");
+
+		iov.iov_base++;
+		iov.iov_len = len - 1;
+	}
+
+	return __send_one(fd, &msg, 0);
+}
+
+static int recv_one(int fd, int flags)
+{
+	int ret;
+
+	ret = recv(fd, buf, sizeof(buf), flags);
+	if (ret == -1 && errno == EAGAIN && (flags & MSG_DONTWAIT))
+		return 0;
+	if (ret == -1)
+		error(1, errno, "recv");
+
+	return ret;
+}
+
+static void run_one(struct testcase *test, int fdt, int fdr,
+		    struct sockaddr *addr, socklen_t alen)
+{
+	int i, ret, val, mss;
+	bool sent;
+
+	fprintf(stderr, "ipv%d tx:%d gso:%d %s\n",
+			addr->sa_family == AF_INET ? 4 : 6,
+			test->tlen, test->gso_len,
+			test->tfail ? "(fail)" : "");
+
+	val = test->gso_len;
+	if (cfg_do_setsockopt) {
+		if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val)))
+			error(1, errno, "setsockopt udp segment");
+	}
+
+	sent = send_one(fdt, test->tlen, test->gso_len, addr, alen);
+	if (sent && test->tfail)
+		error(1, 0, "send succeeded while expecting failure");
+	if (!sent && !test->tfail)
+		error(1, 0, "send failed while expecting success");
+	if (!sent)
+		return;
+
+	if (test->gso_len)
+		mss = test->gso_len;
+	else
+		mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6;
+
+
+	/* Recv all full MSS datagrams */
+	for (i = 0; i < test->r_num_mss; i++) {
+		ret = recv_one(fdr, 0);
+		if (ret != mss)
+			error(1, 0, "recv.%d: %d != %d", i, ret, mss);
+	}
+
+	/* Recv the non-full last datagram, if tlen was not a multiple of mss */
+	if (test->r_len_last) {
+		ret = recv_one(fdr, 0);
+		if (ret != test->r_len_last)
+			error(1, 0, "recv.%d: %d != %d (last)",
+			      i, ret, test->r_len_last);
+	}
+
+	/* Verify received all data */
+	ret = recv_one(fdr, MSG_DONTWAIT);
+	if (ret)
+		error(1, 0, "recv: unexpected datagram");
+}
+
+static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen)
+{
+	struct testcase *tests, *test;
+
+	tests = addr->sa_family == AF_INET ? testcases_v4 : testcases_v6;
+
+	for (test = tests; test->tlen; test++) {
+		/* if a specific test is given, then skip all others */
+		if (cfg_specific_test_id == -1 ||
+		    cfg_specific_test_id == test - tests)
+			run_one(test, fdt, fdr, addr, alen);
+	}
+}
+
+static void run_test(struct sockaddr *addr, socklen_t alen)
+{
+	struct timeval tv = { .tv_usec = 100 * 1000 };
+	int fdr, fdt, val;
+
+	fdr = socket(addr->sa_family, SOCK_DGRAM, 0);
+	if (fdr == -1)
+		error(1, errno, "socket r");
+
+	if (bind(fdr, addr, alen))
+		error(1, errno, "bind");
+
+	/* Have tests fail quickly instead of hang */
+	if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+		error(1, errno, "setsockopt rcv timeout");
+
+	fdt = socket(addr->sa_family, SOCK_DGRAM, 0);
+	if (fdt == -1)
+		error(1, errno, "socket t");
+
+	/* Do not fragment these datagrams: only succeed if GSO works */
+	set_pmtu_discover(fdt, addr->sa_family == AF_INET);
+
+	if (cfg_do_connectionless) {
+		set_device_mtu(fdt, CONST_MTU_TEST);
+		run_all(fdt, fdr, addr, alen);
+	}
+
+	if (cfg_do_connected) {
+		set_device_mtu(fdt, CONST_MTU_TEST + 100);
+		set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET);
+
+		if (connect(fdt, addr, alen))
+			error(1, errno, "connect");
+
+		val = get_path_mtu(fdt, addr->sa_family == AF_INET);
+		if (val != CONST_MTU_TEST)
+			error(1, 0, "bad path mtu %u\n", val);
+
+		run_all(fdt, fdr, addr, 0 /* use connected addr */);
+	}
+
+	if (close(fdt))
+		error(1, errno, "close t");
+	if (close(fdr))
+		error(1, errno, "close r");
+}
+
+static void run_test_v4(void)
+{
+	struct sockaddr_in addr = {0};
+
+	addr.sin_family = AF_INET;
+	addr.sin_port = htons(cfg_port);
+	addr.sin_addr = addr4;
+
+	run_test((void *)&addr, sizeof(addr));
+}
+
+static void run_test_v6(void)
+{
+	struct sockaddr_in6 addr = {0};
+
+	addr.sin6_family = AF_INET6;
+	addr.sin6_port = htons(cfg_port);
+	addr.sin6_addr = addr6;
+
+	run_test((void *)&addr, sizeof(addr));
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "46cCmst:")) != -1) {
+		switch (c) {
+		case '4':
+			cfg_do_ipv4 = true;
+			break;
+		case '6':
+			cfg_do_ipv6 = true;
+			break;
+		case 'c':
+			cfg_do_connected = true;
+			break;
+		case 'C':
+			cfg_do_connectionless = true;
+			break;
+		case 'm':
+			cfg_do_msgmore = true;
+			break;
+		case 's':
+			cfg_do_setsockopt = true;
+			break;
+		case 't':
+			cfg_specific_test_id = strtoul(optarg, NULL, 0);
+			break;
+		default:
+			error(1, 0, "%s: parse error", argv[0]);
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+
+	if (cfg_do_ipv4)
+		run_test_v4();
+	if (cfg_do_ipv6)
+		run_test_v6();
+
+	fprintf(stderr, "OK\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh
new file mode 100755
index 000000000000..fec24f584fe9
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgso regression tests
+
+echo "ipv4 cmsg"
+./in_netns.sh ./udpgso -4 -C
+
+echo "ipv4 setsockopt"
+./in_netns.sh ./udpgso -4 -C -s
+
+echo "ipv6 cmsg"
+./in_netns.sh ./udpgso -6 -C
+
+echo "ipv6 setsockopt"
+./in_netns.sh ./udpgso -6 -C -s
+
+echo "ipv4 connected"
+./in_netns.sh ./udpgso -4 -c
+
+# blocked on 2nd loopback address
+# echo "ipv6 connected"
+# ./in_netns.sh ./udpgso -6 -c
+
+echo "ipv4 msg_more"
+./in_netns.sh ./udpgso -4 -C -m
+
+echo "ipv6 msg_more"
+./in_netns.sh ./udpgso -6 -C -m
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
new file mode 100755
index 000000000000..99e537ab5ad9
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgso benchmarks
+
+wake_children() {
+	local -r jobs="$(jobs -p)"
+
+	if [[ "${jobs}" != "" ]]; then
+		kill -1 ${jobs} 2>/dev/null
+	fi
+}
+trap wake_children EXIT
+
+run_one() {
+	local -r args=$@
+
+	./udpgso_bench_rx &
+	./udpgso_bench_rx -t &
+
+	./udpgso_bench_tx ${args}
+}
+
+run_in_netns() {
+	local -r args=$@
+
+	./in_netns.sh $0 __subprocess ${args}
+}
+
+run_udp() {
+	local -r args=$@
+
+	echo "udp"
+	run_in_netns ${args}
+
+	echo "udp gso"
+	run_in_netns ${args} -S
+}
+
+run_tcp() {
+	local -r args=$@
+
+	echo "tcp"
+	run_in_netns ${args} -t
+
+	echo "tcp zerocopy"
+	run_in_netns ${args} -t -z
+}
+
+run_all() {
+	local -r core_args="-l 4"
+	local -r ipv4_args="${core_args} -4 -D 127.0.0.1"
+	local -r ipv6_args="${core_args} -6 -D ::1"
+
+	echo "ipv4"
+	run_tcp "${ipv4_args}"
+	run_udp "${ipv4_args}"
+
+	echo "ipv6"
+	run_tcp "${ipv4_args}"
+	run_udp "${ipv6_args}"
+}
+
+if [[ $# -eq 0 ]]; then
+	run_all
+elif [[ $1 == "__subprocess" ]]; then
+	shift
+	run_one $@
+else
+	run_in_netns $@
+fi
diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
new file mode 100644
index 000000000000..727cf67a3f75
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static int  cfg_port		= 8000;
+static bool cfg_tcp;
+static bool cfg_verify;
+
+static bool interrupted;
+static unsigned long packets, bytes;
+
+static void sigint_handler(int signum)
+{
+	if (signum == SIGINT)
+		interrupted = true;
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void do_poll(int fd)
+{
+	struct pollfd pfd;
+	int ret;
+
+	pfd.events = POLLIN;
+	pfd.revents = 0;
+	pfd.fd = fd;
+
+	do {
+		ret = poll(&pfd, 1, 10);
+		if (ret == -1)
+			error(1, errno, "poll");
+		if (ret == 0)
+			continue;
+		if (pfd.revents != POLLIN)
+			error(1, errno, "poll: 0x%x expected 0x%x\n",
+					pfd.revents, POLLIN);
+	} while (!ret && !interrupted);
+}
+
+static int do_socket(bool do_tcp)
+{
+	struct sockaddr_in6 addr = {0};
+	int fd, val;
+
+	fd = socket(PF_INET6, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket");
+
+	val = 1 << 21;
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)))
+		error(1, errno, "setsockopt rcvbuf");
+	val = 1;
+	if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)))
+		error(1, errno, "setsockopt reuseport");
+
+	addr.sin6_family =	PF_INET6;
+	addr.sin6_port =	htons(cfg_port);
+	addr.sin6_addr =	in6addr_any;
+	if (bind(fd, (void *) &addr, sizeof(addr)))
+		error(1, errno, "bind");
+
+	if (do_tcp) {
+		int accept_fd = fd;
+
+		if (listen(accept_fd, 1))
+			error(1, errno, "listen");
+
+		do_poll(accept_fd);
+
+		fd = accept(accept_fd, NULL, NULL);
+		if (fd == -1)
+			error(1, errno, "accept");
+		if (close(accept_fd))
+			error(1, errno, "close accept fd");
+	}
+
+	return fd;
+}
+
+/* Flush all outstanding bytes for the tcp receive queue */
+static void do_flush_tcp(int fd)
+{
+	int ret;
+
+	while (true) {
+		/* MSG_TRUNC flushes up to len bytes */
+		ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
+		if (ret == -1 && errno == EAGAIN)
+			return;
+		if (ret == -1)
+			error(1, errno, "flush");
+		if (ret == 0) {
+			/* client detached */
+			exit(0);
+		}
+
+		packets++;
+		bytes += ret;
+	}
+
+}
+
+static char sanitized_char(char val)
+{
+	return (val >= 'a' && val <= 'z') ? val : '.';
+}
+
+static void do_verify_udp(const char *data, int len)
+{
+	char cur = data[0];
+	int i;
+
+	/* verify contents */
+	if (cur < 'a' || cur > 'z')
+		error(1, 0, "data initial byte out of range");
+
+	for (i = 1; i < len; i++) {
+		if (cur == 'z')
+			cur = 'a';
+		else
+			cur++;
+
+		if (data[i] != cur)
+			error(1, 0, "data[%d]: len %d, %c(%hhu) != %c(%hhu)\n",
+			      i, len,
+			      sanitized_char(data[i]), data[i],
+			      sanitized_char(cur), cur);
+	}
+}
+
+/* Flush all outstanding datagrams. Verify first few bytes of each. */
+static void do_flush_udp(int fd)
+{
+	static char rbuf[ETH_DATA_LEN];
+	int ret, len, budget = 256;
+
+	len = cfg_verify ? sizeof(rbuf) : 0;
+	while (budget--) {
+		/* MSG_TRUNC will make return value full datagram length */
+		ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT);
+		if (ret == -1 && errno == EAGAIN)
+			return;
+		if (ret == -1)
+			error(1, errno, "recv");
+		if (len) {
+			if (ret == 0)
+				error(1, errno, "recv: 0 byte datagram\n");
+
+			do_verify_udp(rbuf, ret);
+		}
+
+		packets++;
+		bytes += ret;
+	}
+}
+
+static void usage(const char *filepath)
+{
+	error(1, 0, "Usage: %s [-tv] [-p port]", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "ptv")) != -1) {
+		switch (c) {
+		case 'p':
+			cfg_port = htons(strtoul(optarg, NULL, 0));
+			break;
+		case 't':
+			cfg_tcp = true;
+			break;
+		case 'v':
+			cfg_verify = true;
+			break;
+		}
+	}
+
+	if (optind != argc)
+		usage(argv[0]);
+
+	if (cfg_tcp && cfg_verify)
+		error(1, 0, "TODO: implement verify mode for tcp");
+}
+
+static void do_recv(void)
+{
+	unsigned long tnow, treport;
+	int fd;
+
+	fd = do_socket(cfg_tcp);
+
+	treport = gettimeofday_ms() + 1000;
+	do {
+		do_poll(fd);
+
+		if (cfg_tcp)
+			do_flush_tcp(fd);
+		else
+			do_flush_udp(fd);
+
+		tnow = gettimeofday_ms();
+		if (tnow > treport) {
+			if (packets)
+				fprintf(stderr,
+					"%s rx: %6lu MB/s %8lu calls/s\n",
+					cfg_tcp ? "tcp" : "udp",
+					bytes >> 20, packets);
+			bytes = packets = 0;
+			treport = tnow + 1000;
+		}
+
+	} while (!interrupted);
+
+	if (close(fd))
+		error(1, errno, "close");
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+
+	signal(SIGINT, sigint_handler);
+
+	do_recv();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
new file mode 100644
index 000000000000..e821564053cf
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -0,0 +1,420 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef ETH_MAX_MTU
+#define ETH_MAX_MTU 0xFFFFU
+#endif
+
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT		103
+#endif
+
+#ifndef SO_ZEROCOPY
+#define SO_ZEROCOPY	60
+#endif
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY	0x4000000
+#endif
+
+#define NUM_PKT		100
+
+static bool	cfg_cache_trash;
+static int	cfg_cpu		= -1;
+static int	cfg_connected	= true;
+static int	cfg_family	= PF_UNSPEC;
+static uint16_t	cfg_mss;
+static int	cfg_payload_len	= (1472 * 42);
+static int	cfg_port	= 8000;
+static int	cfg_runtime_ms	= -1;
+static bool	cfg_segment;
+static bool	cfg_sendmmsg;
+static bool	cfg_tcp;
+static bool	cfg_zerocopy;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+
+static bool interrupted;
+static char buf[NUM_PKT][ETH_MAX_MTU];
+
+static void sigint_handler(int signum)
+{
+	if (signum == SIGINT)
+		interrupted = true;
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static int set_cpu(int cpu)
+{
+	cpu_set_t mask;
+
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+	if (sched_setaffinity(0, sizeof(mask), &mask))
+		error(1, 0, "setaffinity %d", cpu);
+
+	return 0;
+}
+
+static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr)
+{
+	struct sockaddr_in6 *addr6 = (void *) sockaddr;
+	struct sockaddr_in *addr4 = (void *) sockaddr;
+
+	switch (domain) {
+	case PF_INET:
+		addr4->sin_family = AF_INET;
+		addr4->sin_port = htons(cfg_port);
+		if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+			error(1, 0, "ipv4 parse error: %s", str_addr);
+		break;
+	case PF_INET6:
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_port = htons(cfg_port);
+		if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+			error(1, 0, "ipv6 parse error: %s", str_addr);
+		break;
+	default:
+		error(1, 0, "illegal domain");
+	}
+}
+
+static void flush_zerocopy(int fd)
+{
+	struct msghdr msg = {0};	/* flush */
+	int ret;
+
+	while (1) {
+		ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+		if (ret == -1 && errno == EAGAIN)
+			break;
+		if (ret == -1)
+			error(1, errno, "errqueue");
+		if (msg.msg_flags != (MSG_ERRQUEUE | MSG_CTRUNC))
+			error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags);
+		msg.msg_flags = 0;
+	}
+}
+
+static int send_tcp(int fd, char *data)
+{
+	int ret, done = 0, count = 0;
+
+	while (done < cfg_payload_len) {
+		ret = send(fd, data + done, cfg_payload_len - done,
+			   cfg_zerocopy ? MSG_ZEROCOPY : 0);
+		if (ret == -1)
+			error(1, errno, "write");
+
+		done += ret;
+		count++;
+	}
+
+	return count;
+}
+
+static int send_udp(int fd, char *data)
+{
+	int ret, total_len, len, count = 0;
+
+	total_len = cfg_payload_len;
+
+	while (total_len) {
+		len = total_len < cfg_mss ? total_len : cfg_mss;
+
+		ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0,
+			     cfg_connected ? NULL : (void *)&cfg_dst_addr,
+			     cfg_connected ? 0 : cfg_alen);
+		if (ret == -1)
+			error(1, errno, "write");
+		if (ret != len)
+			error(1, errno, "write: %uB != %uB\n", ret, len);
+
+		total_len -= len;
+		count++;
+	}
+
+	return count;
+}
+
+static int send_udp_sendmmsg(int fd, char *data)
+{
+	const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN;
+	struct mmsghdr mmsgs[max_nr_msg];
+	struct iovec iov[max_nr_msg];
+	unsigned int off = 0, left;
+	int i = 0, ret;
+
+	memset(mmsgs, 0, sizeof(mmsgs));
+
+	left = cfg_payload_len;
+	while (left) {
+		if (i == max_nr_msg)
+			error(1, 0, "sendmmsg: exceeds max_nr_msg");
+
+		iov[i].iov_base = data + off;
+		iov[i].iov_len = cfg_mss < left ? cfg_mss : left;
+
+		mmsgs[i].msg_hdr.msg_iov = iov + i;
+		mmsgs[i].msg_hdr.msg_iovlen = 1;
+
+		off += iov[i].iov_len;
+		left -= iov[i].iov_len;
+		i++;
+	}
+
+	ret = sendmmsg(fd, mmsgs, i, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+	if (ret == -1)
+		error(1, errno, "sendmmsg");
+
+	return ret;
+}
+
+static void send_udp_segment_cmsg(struct cmsghdr *cm)
+{
+	uint16_t *valp;
+
+	cm->cmsg_level = SOL_UDP;
+	cm->cmsg_type = UDP_SEGMENT;
+	cm->cmsg_len = CMSG_LEN(sizeof(cfg_mss));
+	valp = (void *)CMSG_DATA(cm);
+	*valp = cfg_mss;
+}
+
+static int send_udp_segment(int fd, char *data)
+{
+	char control[CMSG_SPACE(sizeof(cfg_mss))] = {0};
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	int ret;
+
+	iov.iov_base = data;
+	iov.iov_len = cfg_payload_len;
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	msg.msg_control = control;
+	msg.msg_controllen = sizeof(control);
+	send_udp_segment_cmsg(CMSG_FIRSTHDR(&msg));
+
+	msg.msg_name = (void *)&cfg_dst_addr;
+	msg.msg_namelen = cfg_alen;
+
+	ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+	if (ret == -1)
+		error(1, errno, "sendmsg");
+	if (ret != iov.iov_len)
+		error(1, 0, "sendmsg: %u != %lu\n", ret, iov.iov_len);
+
+	return 1;
+}
+
+static void usage(const char *filepath)
+{
+	error(1, 0, "Usage: %s [-46cmStuz] [-C cpu] [-D dst ip] [-l secs] [-p port] [-s sendsize]",
+		    filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int max_len, hdrlen;
+	int c;
+
+	while ((c = getopt(argc, argv, "46cC:D:l:mp:s:Stuz")) != -1) {
+		switch (c) {
+		case '4':
+			if (cfg_family != PF_UNSPEC)
+				error(1, 0, "Pass one of -4 or -6");
+			cfg_family = PF_INET;
+			cfg_alen = sizeof(struct sockaddr_in);
+			break;
+		case '6':
+			if (cfg_family != PF_UNSPEC)
+				error(1, 0, "Pass one of -4 or -6");
+			cfg_family = PF_INET6;
+			cfg_alen = sizeof(struct sockaddr_in6);
+			break;
+		case 'c':
+			cfg_cache_trash = true;
+			break;
+		case 'C':
+			cfg_cpu = strtol(optarg, NULL, 0);
+			break;
+		case 'D':
+			setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
+			break;
+		case 'l':
+			cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
+			break;
+		case 'm':
+			cfg_sendmmsg = true;
+			break;
+		case 'p':
+			cfg_port = strtoul(optarg, NULL, 0);
+			break;
+		case 's':
+			cfg_payload_len = strtoul(optarg, NULL, 0);
+			break;
+		case 'S':
+			cfg_segment = true;
+			break;
+		case 't':
+			cfg_tcp = true;
+			break;
+		case 'u':
+			cfg_connected = false;
+			break;
+		case 'z':
+			cfg_zerocopy = true;
+			break;
+		}
+	}
+
+	if (optind != argc)
+		usage(argv[0]);
+
+	if (cfg_family == PF_UNSPEC)
+		error(1, 0, "must pass one of -4 or -6");
+	if (cfg_tcp && !cfg_connected)
+		error(1, 0, "connectionless tcp makes no sense");
+	if (cfg_segment && cfg_sendmmsg)
+		error(1, 0, "cannot combine segment offload and sendmmsg");
+
+	if (cfg_family == PF_INET)
+		hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr);
+	else
+		hdrlen = sizeof(struct ip6_hdr) + sizeof(struct udphdr);
+
+	cfg_mss = ETH_DATA_LEN - hdrlen;
+	max_len = ETH_MAX_MTU - hdrlen;
+
+	if (cfg_payload_len > max_len)
+		error(1, 0, "payload length %u exceeds max %u",
+		      cfg_payload_len, max_len);
+}
+
+static void set_pmtu_discover(int fd, bool is_ipv4)
+{
+	int level, name, val;
+
+	if (is_ipv4) {
+		level	= SOL_IP;
+		name	= IP_MTU_DISCOVER;
+		val	= IP_PMTUDISC_DO;
+	} else {
+		level	= SOL_IPV6;
+		name	= IPV6_MTU_DISCOVER;
+		val	= IPV6_PMTUDISC_DO;
+	}
+
+	if (setsockopt(fd, level, name, &val, sizeof(val)))
+		error(1, errno, "setsockopt path mtu");
+}
+
+int main(int argc, char **argv)
+{
+	unsigned long num_msgs, num_sends;
+	unsigned long tnow, treport, tstop;
+	int fd, i, val;
+
+	parse_opts(argc, argv);
+
+	if (cfg_cpu > 0)
+		set_cpu(cfg_cpu);
+
+	for (i = 0; i < sizeof(buf[0]); i++)
+		buf[0][i] = 'a' + (i % 26);
+	for (i = 1; i < NUM_PKT; i++)
+		memcpy(buf[i], buf[0], sizeof(buf[0]));
+
+	signal(SIGINT, sigint_handler);
+
+	fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket");
+
+	if (cfg_zerocopy) {
+		val = 1;
+		if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val)))
+			error(1, errno, "setsockopt zerocopy");
+	}
+
+	if (cfg_connected &&
+	    connect(fd, (void *)&cfg_dst_addr, cfg_alen))
+		error(1, errno, "connect");
+
+	if (cfg_segment)
+		set_pmtu_discover(fd, cfg_family == PF_INET);
+
+	num_msgs = num_sends = 0;
+	tnow = gettimeofday_ms();
+	tstop = tnow + cfg_runtime_ms;
+	treport = tnow + 1000;
+
+	i = 0;
+	do {
+		if (cfg_tcp)
+			num_sends += send_tcp(fd, buf[i]);
+		else if (cfg_segment)
+			num_sends += send_udp_segment(fd, buf[i]);
+		else if (cfg_sendmmsg)
+			num_sends += send_udp_sendmmsg(fd, buf[i]);
+		else
+			num_sends += send_udp(fd, buf[i]);
+		num_msgs++;
+
+		if (cfg_zerocopy && ((num_msgs & 0xF) == 0))
+			flush_zerocopy(fd);
+
+		tnow = gettimeofday_ms();
+		if (tnow > treport) {
+			fprintf(stderr,
+				"%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n",
+				cfg_tcp ? "tcp" : "udp",
+				(num_msgs * cfg_payload_len) >> 20,
+				num_sends, num_msgs);
+			num_msgs = num_sends = 0;
+			treport = tnow + 1000;
+		}
+
+		/* cold cache when writing buffer */
+		if (cfg_cache_trash)
+			i = ++i < NUM_PKT ? i : 0;
+
+	} while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop));
+
+	if (close(fd))
+		error(1, errno, "close");
+
+	return 0;
+}
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
new file mode 100644
index 000000000000..47ed6cef93fb
--- /dev/null
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for netfilter selftests
+
+TEST_PROGS := nft_trans_stress.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config
new file mode 100644
index 000000000000..1017313e41a8
--- /dev/null
+++ b/tools/testing/selftests/netfilter/config
@@ -0,0 +1,2 @@
+CONFIG_NET_NS=y
+NF_TABLES_INET=y
diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh
new file mode 100755
index 000000000000..f1affd12c4b1
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+#
+# This test is for stress-testing the nf_tables config plane path vs.
+# packet path processing: Make sure we never release rules that are
+# still visible to other cpus.
+#
+# set -e
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+testns=testns1
+tables="foo bar baz quux"
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without nft tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+tmp=$(mktemp)
+
+for table in $tables; do
+	echo add table inet "$table" >> "$tmp"
+	echo flush table inet "$table" >> "$tmp"
+
+	echo "add chain inet $table INPUT { type filter hook input priority 0; }" >> "$tmp"
+	echo "add chain inet $table OUTPUT { type filter hook output priority 0; }" >> "$tmp"
+	for c in $(seq 1 400); do
+		chain=$(printf "chain%03u" "$c")
+		echo "add chain inet $table $chain" >> "$tmp"
+	done
+
+	for c in $(seq 1 400); do
+		chain=$(printf "chain%03u" "$c")
+		for BASE in INPUT OUTPUT; do
+			echo "add rule inet $table $BASE counter jump $chain" >> "$tmp"
+		done
+		echo "add rule inet $table $chain counter return" >> "$tmp"
+	done
+done
+
+ip netns add "$testns"
+ip -netns "$testns" link set lo up
+
+lscpu | grep ^CPU\(s\): | ( read cpu cpunum ;
+cpunum=$((cpunum-1))
+for i in $(seq 0 $cpunum);do
+	mask=$(printf 0x%x $((1<<$i)))
+        ip netns exec "$testns" taskset $mask ping -4 127.0.0.1 -fq > /dev/null &
+        ip netns exec "$testns" taskset $mask ping -6 ::1 -fq > /dev/null &
+done)
+
+sleep 1
+
+for i in $(seq 1 10) ; do ip netns exec "$testns" nft -f "$tmp" & done
+
+for table in $tables;do
+	randsleep=$((RANDOM%10))
+	sleep $randsleep
+	ip netns exec "$testns" nft delete table inet $table 2>/dev/null
+done
+
+randsleep=$((RANDOM%10))
+sleep $randsleep
+
+pkill -9 ping
+
+wait
+
+rm -f "$tmp"
+ip netns del "$testns"
diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile
index a728040edbe1..14cfcf006936 100644
--- a/tools/testing/selftests/networking/timestamping/Makefile
+++ b/tools/testing/selftests/networking/timestamping/Makefile
@@ -5,6 +5,7 @@ TEST_PROGS := hwtstamp_config rxtimestamp timestamping txtimestamp
 
 all: $(TEST_PROGS)
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 clean:
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index f6b1338730db..b3ad909aefbc 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -17,7 +17,6 @@ SUB_DIRS = alignment		\
 	   benchmarks		\
 	   cache_shape		\
 	   copyloops		\
-	   context_switch	\
 	   dscr			\
 	   mm			\
 	   pmu			\
@@ -29,7 +28,8 @@ SUB_DIRS = alignment		\
 	   tm			\
 	   vphn         \
 	   math		\
-	   ptrace
+	   ptrace	\
+	   security
 
 endif
 
diff --git a/tools/testing/selftests/powerpc/alignment/.gitignore b/tools/testing/selftests/powerpc/alignment/.gitignore
index 1d980e3d7039..6d4fd014511c 100644
--- a/tools/testing/selftests/powerpc/alignment/.gitignore
+++ b/tools/testing/selftests/powerpc/alignment/.gitignore
@@ -1,5 +1,2 @@
-copy_unaligned
 copy_first_unaligned
-paste_unaligned
-paste_last_unaligned
-copy_paste_unaligned_common
+alignment_handler
diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile
index 083a48a008b4..d056486f49de 100644
--- a/tools/testing/selftests/powerpc/alignment/Makefile
+++ b/tools/testing/selftests/powerpc/alignment/Makefile
@@ -1,6 +1,6 @@
-TEST_GEN_PROGS := copy_unaligned copy_first_unaligned paste_unaligned \
-	paste_last_unaligned alignment_handler
+TEST_GEN_PROGS := copy_first_unaligned alignment_handler
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
-$(TEST_GEN_PROGS): ../harness.c ../utils.c copy_paste_unaligned_common.c
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
index 0f2698f9fd6d..169a8b9719fb 100644
--- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -40,6 +40,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <unistd.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -48,6 +49,8 @@
 #include <setjmp.h>
 #include <signal.h>
 
+#include <asm/cputable.h>
+
 #include "utils.h"
 
 int bufsize;
@@ -191,7 +194,7 @@ int test_memcmp(void *s1, void *s2, int n, int offset, char *test_name)
  */
 int do_test(char *test_name, void (*test_func)(char *, char *))
 {
-	int offset, width, fd, rc = 0, r;
+	int offset, width, fd, rc, r;
 	void *mem0, *mem1, *ci0, *ci1;
 
 	printf("\tDoing %s:\t", test_name);
@@ -199,8 +202,8 @@ int do_test(char *test_name, void (*test_func)(char *, char *))
 	fd = open("/dev/fb0", O_RDWR);
 	if (fd < 0) {
 		printf("\n");
-		perror("Can't open /dev/fb0");
-		SKIP_IF(1);
+		perror("Can't open /dev/fb0 now?");
+		return 1;
 	}
 
 	ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
@@ -226,6 +229,7 @@ int do_test(char *test_name, void (*test_func)(char *, char *))
 		return rc;
 	}
 
+	rc = 0;
 	/* offset = 0 no alignment fault, so skip */
 	for (offset = 1; offset < 16; offset++) {
 		width = 16; /* vsx == 16 bytes */
@@ -244,32 +248,51 @@ int do_test(char *test_name, void (*test_func)(char *, char *))
 		r |= test_memcpy(mem1, mem0, width, offset, test_func);
 		if (r && !debug) {
 			printf("FAILED: Got signal");
+			rc = 1;
 			break;
 		}
 
 		r |= test_memcmp(mem1, ci1, width, offset, test_name);
-		rc |= r;
 		if (r && !debug) {
 			printf("FAILED: Wrong Data");
+			rc = 1;
 			break;
 		}
 	}
-	if (!r)
+
+	if (rc == 0)
 		printf("PASSED");
+
 	printf("\n");
 
 	munmap(ci0, bufsize);
 	munmap(ci1, bufsize);
 	free(mem0);
 	free(mem1);
+	close(fd);
 
 	return rc;
 }
 
+static bool can_open_fb0(void)
+{
+	int fd;
+
+	fd = open("/dev/fb0", O_RDWR);
+	if (fd < 0)
+		return false;
+
+	close(fd);
+	return true;
+}
+
 int test_alignment_handler_vsx_206(void)
 {
 	int rc = 0;
 
+	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
 	printf("VSX: 2.06B\n");
 	LOAD_VSX_XFORM_TEST(lxvd2x);
 	LOAD_VSX_XFORM_TEST(lxvw4x);
@@ -285,6 +308,9 @@ int test_alignment_handler_vsx_207(void)
 {
 	int rc = 0;
 
+	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
 	printf("VSX: 2.07B\n");
 	LOAD_VSX_XFORM_TEST(lxsspx);
 	LOAD_VSX_XFORM_TEST(lxsiwax);
@@ -298,6 +324,8 @@ int test_alignment_handler_vsx_300(void)
 {
 	int rc = 0;
 
+	SKIP_IF(!can_open_fb0());
+
 	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
 	printf("VSX: 3.00B\n");
 	LOAD_VMX_DFORM_TEST(lxsd);
@@ -328,6 +356,8 @@ int test_alignment_handler_integer(void)
 {
 	int rc = 0;
 
+	SKIP_IF(!can_open_fb0());
+
 	printf("Integer\n");
 	LOAD_DFORM_TEST(lbz);
 	LOAD_DFORM_TEST(lbzu);
@@ -354,7 +384,6 @@ int test_alignment_handler_integer(void)
 	LOAD_DFORM_TEST(ldu);
 	LOAD_XFORM_TEST(ldx);
 	LOAD_XFORM_TEST(ldux);
-	LOAD_XFORM_TEST(ldbrx);
 	LOAD_DFORM_TEST(lmw);
 	STORE_DFORM_TEST(stb);
 	STORE_XFORM_TEST(stbx);
@@ -374,8 +403,23 @@ int test_alignment_handler_integer(void)
 	STORE_XFORM_TEST(stdx);
 	STORE_DFORM_TEST(stdu);
 	STORE_XFORM_TEST(stdux);
-	STORE_XFORM_TEST(stdbrx);
 	STORE_DFORM_TEST(stmw);
+
+	return rc;
+}
+
+int test_alignment_handler_integer_206(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+	printf("Integer: 2.06\n");
+
+	LOAD_XFORM_TEST(ldbrx);
+	STORE_XFORM_TEST(stdbrx);
+
 	return rc;
 }
 
@@ -383,6 +427,9 @@ int test_alignment_handler_vmx(void)
 {
 	int rc = 0;
 
+	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC));
+
 	printf("VMX\n");
 	LOAD_VMX_XFORM_TEST(lvx);
 
@@ -408,23 +455,19 @@ int test_alignment_handler_fp(void)
 {
 	int rc = 0;
 
+	SKIP_IF(!can_open_fb0());
+
 	printf("Floating point\n");
 	LOAD_FLOAT_DFORM_TEST(lfd);
 	LOAD_FLOAT_XFORM_TEST(lfdx);
-	LOAD_FLOAT_DFORM_TEST(lfdp);
-	LOAD_FLOAT_XFORM_TEST(lfdpx);
 	LOAD_FLOAT_DFORM_TEST(lfdu);
 	LOAD_FLOAT_XFORM_TEST(lfdux);
 	LOAD_FLOAT_DFORM_TEST(lfs);
 	LOAD_FLOAT_XFORM_TEST(lfsx);
 	LOAD_FLOAT_DFORM_TEST(lfsu);
 	LOAD_FLOAT_XFORM_TEST(lfsux);
-	LOAD_FLOAT_XFORM_TEST(lfiwzx);
-	LOAD_FLOAT_XFORM_TEST(lfiwax);
 	STORE_FLOAT_DFORM_TEST(stfd);
 	STORE_FLOAT_XFORM_TEST(stfdx);
-	STORE_FLOAT_DFORM_TEST(stfdp);
-	STORE_FLOAT_XFORM_TEST(stfdpx);
 	STORE_FLOAT_DFORM_TEST(stfdu);
 	STORE_FLOAT_XFORM_TEST(stfdux);
 	STORE_FLOAT_DFORM_TEST(stfs);
@@ -436,6 +479,38 @@ int test_alignment_handler_fp(void)
 	return rc;
 }
 
+int test_alignment_handler_fp_205(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_05));
+
+	printf("Floating point: 2.05\n");
+
+	LOAD_FLOAT_DFORM_TEST(lfdp);
+	LOAD_FLOAT_XFORM_TEST(lfdpx);
+	LOAD_FLOAT_XFORM_TEST(lfiwax);
+	STORE_FLOAT_DFORM_TEST(stfdp);
+	STORE_FLOAT_XFORM_TEST(stfdpx);
+
+	return rc;
+}
+
+int test_alignment_handler_fp_206(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+	printf("Floating point: 2.06\n");
+
+	LOAD_FLOAT_XFORM_TEST(lfiwzx);
+
+	return rc;
+}
+
 void usage(char *prog)
 {
 	printf("Usage: %s [options]\n", prog);
@@ -483,9 +558,15 @@ int main(int argc, char *argv[])
 			   "test_alignment_handler_vsx_300");
 	rc |= test_harness(test_alignment_handler_integer,
 			   "test_alignment_handler_integer");
+	rc |= test_harness(test_alignment_handler_integer_206,
+			   "test_alignment_handler_integer_206");
 	rc |= test_harness(test_alignment_handler_vmx,
 			   "test_alignment_handler_vmx");
 	rc |= test_harness(test_alignment_handler_fp,
 			   "test_alignment_handler_fp");
+	rc |= test_harness(test_alignment_handler_fp_205,
+			   "test_alignment_handler_fp_205");
+	rc |= test_harness(test_alignment_handler_fp_206,
+			   "test_alignment_handler_fp_206");
 	return rc;
 }
diff --git a/tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c b/tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c
index 47b73b3a08bd..5a9589987702 100644
--- a/tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c
+++ b/tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c
@@ -11,15 +11,46 @@
  *
  */
 
+#include <signal.h>
 #include <string.h>
 #include <unistd.h>
 #include "utils.h"
 #include "instructions.h"
-#include "copy_paste_unaligned_common.h"
 
 unsigned int expected_instruction = PPC_INST_COPY_FIRST;
 unsigned int instruction_mask = 0xfc2007fe;
 
+void signal_action_handler(int signal_num, siginfo_t *info, void *ptr)
+{
+	ucontext_t *ctx = ptr;
+#ifdef __powerpc64__
+	unsigned int *pc = (unsigned int *)ctx->uc_mcontext.gp_regs[PT_NIP];
+#else
+	unsigned int *pc = (unsigned int *)ctx->uc_mcontext.uc_regs->gregs[PT_NIP];
+#endif
+
+	/*
+	 * Check that the signal was on the correct instruction, using a
+	 * mask because the compiler assigns the register at RB.
+	 */
+	if ((*pc & instruction_mask) == expected_instruction)
+		_exit(0); /* We hit the right instruction */
+
+	_exit(1);
+}
+
+void setup_signal_handler(void)
+{
+	struct sigaction signal_action;
+
+	memset(&signal_action, 0, sizeof(signal_action));
+	signal_action.sa_sigaction = signal_action_handler;
+	signal_action.sa_flags = SA_SIGINFO;
+	sigaction(SIGBUS, &signal_action, NULL);
+}
+
+char cacheline_buf[128] __cacheline_aligned;
+
 int test_copy_first_unaligned(void)
 {
 	/* Only run this test on a P9 or later */
diff --git a/tools/testing/selftests/powerpc/alignment/copy_paste_unaligned_common.c b/tools/testing/selftests/powerpc/alignment/copy_paste_unaligned_common.c
deleted file mode 100644
index d35fa5f5d2d3..000000000000
--- a/tools/testing/selftests/powerpc/alignment/copy_paste_unaligned_common.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright 2016, Chris Smart, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Common code for copy, copy_first, paste and paste_last unaligned
- * tests.
- *
- */
-
-#include <signal.h>
-#include <string.h>
-#include <unistd.h>
-#include "utils.h"
-#include "instructions.h"
-#include "copy_paste_unaligned_common.h"
-
-unsigned int expected_instruction;
-unsigned int instruction_mask;
-
-char cacheline_buf[128] __cacheline_aligned;
-
-void signal_action_handler(int signal_num, siginfo_t *info, void *ptr)
-{
-	ucontext_t *ctx = ptr;
-#if defined(__powerpc64__)
-	unsigned int *pc = (unsigned int *)ctx->uc_mcontext.gp_regs[PT_NIP];
-#else
-	unsigned int *pc = (unsigned int *)ctx->uc_mcontext.uc_regs->gregs[PT_NIP];
-#endif
-
-	/*
-	 * Check that the signal was on the correct instruction, using a
-	 * mask because the compiler assigns the register at RB.
-	 */
-	if ((*pc & instruction_mask) == expected_instruction)
-		_exit(0); /* We hit the right instruction */
-
-	_exit(1);
-}
-
-void setup_signal_handler(void)
-{
-	struct sigaction signal_action;
-
-	memset(&signal_action, 0, sizeof(signal_action));
-	signal_action.sa_sigaction = signal_action_handler;
-	signal_action.sa_flags = SA_SIGINFO;
-	sigaction(SIGBUS, &signal_action, NULL);
-}
diff --git a/tools/testing/selftests/powerpc/alignment/copy_paste_unaligned_common.h b/tools/testing/selftests/powerpc/alignment/copy_paste_unaligned_common.h
deleted file mode 100644
index 053899fe506e..000000000000
--- a/tools/testing/selftests/powerpc/alignment/copy_paste_unaligned_common.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright 2016, Chris Smart, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Declarations for common code for copy, copy_first, paste and
- * paste_last unaligned tests.
- *
- */
-
-#ifndef _SELFTESTS_POWERPC_COPY_PASTE_H
-#define _SELFTESTS_POWERPC_COPY_PASTE_H
-
-#include <signal.h>
-
-int main(int argc, char *argv[]);
-void signal_action_handler(int signal_num, siginfo_t *info, void *ptr);
-void setup_signal_handler(void);
-extern char cacheline_buf[128] __cacheline_aligned;
-extern unsigned int expected_instruction;
-extern unsigned int instruction_mask;
-
-#endif /* _SELFTESTS_POWERPC_COPY_PASTE_H */
diff --git a/tools/testing/selftests/powerpc/alignment/copy_unaligned.c b/tools/testing/selftests/powerpc/alignment/copy_unaligned.c
deleted file mode 100644
index 3a4e26461554..000000000000
--- a/tools/testing/selftests/powerpc/alignment/copy_unaligned.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright 2016, Chris Smart, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Calls to copy which are not 128-byte aligned should be caught
- * and sent a SIGBUS.
- *
- */
-
-#include <string.h>
-#include <unistd.h>
-#include "utils.h"
-#include "instructions.h"
-#include "copy_paste_unaligned_common.h"
-
-unsigned int expected_instruction = PPC_INST_COPY;
-unsigned int instruction_mask = 0xfc0007fe;
-
-int test_copy_unaligned(void)
-{
-	/* Only run this test on a P9 or later */
-	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
-
-	/* Register our signal handler with SIGBUS */
-	setup_signal_handler();
-
-	/* +1 makes buf unaligned */
-	copy(cacheline_buf+1);
-
-	/* We should not get here */
-	return 1;
-}
-
-int main(int argc, char *argv[])
-{
-	return test_harness(test_copy_unaligned, "test_copy_unaligned");
-}
diff --git a/tools/testing/selftests/powerpc/alignment/paste_last_unaligned.c b/tools/testing/selftests/powerpc/alignment/paste_last_unaligned.c
deleted file mode 100644
index 6e0ad045fcc3..000000000000
--- a/tools/testing/selftests/powerpc/alignment/paste_last_unaligned.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright 2016, Chris Smart, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Calls to paste_last which are not 128-byte aligned should be
- * caught and sent a SIGBUS.
- *
- */
-
-#include <string.h>
-#include <unistd.h>
-#include "utils.h"
-#include "instructions.h"
-#include "copy_paste_unaligned_common.h"
-
-unsigned int expected_instruction = PPC_INST_PASTE_LAST;
-unsigned int instruction_mask = 0xfc2007ff;
-
-int test_paste_last_unaligned(void)
-{
-	/* Only run this test on a P9 or later */
-	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
-
-	/* Register our signal handler with SIGBUS */
-	setup_signal_handler();
-
-	copy(cacheline_buf);
-
-	/* +1 makes buf unaligned */
-	paste_last(cacheline_buf+1);
-
-	/* We should not get here */
-	return 1;
-}
-
-int main(int argc, char *argv[])
-{
-	return test_harness(test_paste_last_unaligned, "test_paste_last_unaligned");
-}
diff --git a/tools/testing/selftests/powerpc/alignment/paste_unaligned.c b/tools/testing/selftests/powerpc/alignment/paste_unaligned.c
deleted file mode 100644
index 6f982b45e4bd..000000000000
--- a/tools/testing/selftests/powerpc/alignment/paste_unaligned.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright 2016, Chris Smart, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Calls to paste which are not 128-byte aligned should be caught
- * and sent a SIGBUS.
- *
- */
-
-#include <string.h>
-#include <unistd.h>
-#include "utils.h"
-#include "instructions.h"
-#include "copy_paste_unaligned_common.h"
-
-unsigned int expected_instruction = PPC_INST_PASTE;
-unsigned int instruction_mask = 0xfc0007fe;
-
-int test_paste_unaligned(void)
-{
-	/* Only run this test on a P9 or later */
-	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
-
-	/* Register our signal handler with SIGBUS */
-	setup_signal_handler();
-
-	copy(cacheline_buf);
-
-	/* +1 makes buf unaligned */
-	paste(cacheline_buf+1);
-
-	/* We should not get here */
-	return 1;
-}
-
-int main(int argc, char *argv[])
-{
-	return test_harness(test_paste_unaligned, "test_paste_unaligned");
-}
diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile
index b4d7432a0ecd..d40300a65b42 100644
--- a/tools/testing/selftests/powerpc/benchmarks/Makefile
+++ b/tools/testing/selftests/powerpc/benchmarks/Makefile
@@ -4,6 +4,7 @@ TEST_GEN_FILES := exec_target
 
 CFLAGS += -O2
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/benchmarks/exec_target.c b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
index 3c9c144192be..c14b0fc1edde 100644
--- a/tools/testing/selftests/powerpc/benchmarks/exec_target.c
+++ b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
@@ -6,8 +6,11 @@
  * Copyright 2018, Anton Blanchard, IBM Corp.
  */
 
-void _exit(int);
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+
 void _start(void)
 {
-	_exit(0);
+	syscall(SYS_exit, 0);
 }
diff --git a/tools/testing/selftests/powerpc/benchmarks/futex_bench.c b/tools/testing/selftests/powerpc/benchmarks/futex_bench.c
index 2fc711d9150d..d58e4dc50fcd 100644
--- a/tools/testing/selftests/powerpc/benchmarks/futex_bench.c
+++ b/tools/testing/selftests/powerpc/benchmarks/futex_bench.c
@@ -38,5 +38,6 @@ int test_futex(void)
 
 int main(void)
 {
+	test_harness_set_timeout(300);
 	return test_harness(test_futex, "futex_bench");
 }
diff --git a/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c
index 7a0a462a2272..033de0560d99 100644
--- a/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c
+++ b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c
@@ -84,5 +84,7 @@ int main(int argc, char *argv[])
 			exit(1);
 		}
 	}
+
+	test_harness_set_timeout(300);
 	return test_harness(test_mmap, "mmap_bench");
 }
diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile
index 1be547434a49..689f6c8ebcd8 100644
--- a/tools/testing/selftests/powerpc/cache_shape/Makefile
+++ b/tools/testing/selftests/powerpc/cache_shape/Makefile
@@ -1,11 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := cache_shape
-
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c ../utils.c
+TEST_GEN_PROGS := cache_shape
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
-clean:
-	rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/context_switch/.gitignore b/tools/testing/selftests/powerpc/context_switch/.gitignore
deleted file mode 100644
index c1431af7b51c..000000000000
--- a/tools/testing/selftests/powerpc/context_switch/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-cp_abort
diff --git a/tools/testing/selftests/powerpc/context_switch/Makefile b/tools/testing/selftests/powerpc/context_switch/Makefile
deleted file mode 100644
index e9351bb4285d..000000000000
--- a/tools/testing/selftests/powerpc/context_switch/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-TEST_GEN_PROGS := cp_abort
-
-include ../../lib.mk
-
-$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/context_switch/cp_abort.c b/tools/testing/selftests/powerpc/context_switch/cp_abort.c
deleted file mode 100644
index 5a5b55afda0e..000000000000
--- a/tools/testing/selftests/powerpc/context_switch/cp_abort.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Adapted from Anton Blanchard's context switch microbenchmark.
- *
- * Copyright 2009, Anton Blanchard, IBM Corporation.
- * Copyright 2016, Mikey Neuling, Chris Smart, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * This program tests the copy paste abort functionality of a P9
- * (or later) by setting up two processes on the same CPU, one
- * which executes the copy instruction and the other which
- * executes paste.
- *
- * The paste instruction should never succeed, as the cp_abort
- * instruction is called by the kernel during a context switch.
- *
- */
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include "utils.h"
-#include <sched.h>
-
-#define READ_FD 0
-#define WRITE_FD 1
-
-#define NUM_LOOPS 1000
-
-/* This defines the "paste" instruction from Power ISA 3.0 Book II, section 4.4. */
-#define PASTE(RA, RB, L, RC) \
-	.long (0x7c00070c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10) | (RC) << (31-31))
-
-int paste(void *i)
-{
-	int cr;
-
-	asm volatile(str(PASTE(0, %1, 1, 1))";"
-			"mfcr %0;"
-			: "=r" (cr)
-			: "b" (i)
-			: "memory"
-		    );
-	return cr;
-}
-
-/* This defines the "copy" instruction from Power ISA 3.0 Book II, section 4.4. */
-#define COPY(RA, RB, L) \
-	.long (0x7c00060c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10))
-
-void copy(void *i)
-{
-	asm volatile(str(COPY(0, %0, 1))";"
-			:
-			: "b" (i)
-			: "memory"
-		    );
-}
-
-int test_cp_abort(void)
-{
-	/* 128 bytes for a full cache line */
-	char buf[128] __cacheline_aligned;
-	cpu_set_t cpuset;
-	int fd1[2], fd2[2], pid;
-	char c;
-
-	/* only run this test on a P9 or later */
-	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
-
-	/*
-	 * Run both processes on the same CPU, so that copy is more likely
-	 * to leak into a paste.
-	 */
-	CPU_ZERO(&cpuset);
-	CPU_SET(pick_online_cpu(), &cpuset);
-	FAIL_IF(sched_setaffinity(0, sizeof(cpuset), &cpuset));
-
-	FAIL_IF(pipe(fd1) || pipe(fd2));
-
-	pid = fork();
-	FAIL_IF(pid < 0);
-
-	if (!pid) {
-		for (int i = 0; i < NUM_LOOPS; i++) {
-			FAIL_IF((write(fd1[WRITE_FD], &c, 1)) != 1);
-			FAIL_IF((read(fd2[READ_FD], &c, 1)) != 1);
-			/* A paste succeeds if CR0 EQ bit is set */
-			FAIL_IF(paste(buf) & 0x20000000);
-		}
-	} else {
-		for (int i = 0; i < NUM_LOOPS; i++) {
-			FAIL_IF((read(fd1[READ_FD], &c, 1)) != 1);
-			copy(buf);
-			FAIL_IF((write(fd2[WRITE_FD], &c, 1) != 1));
-		}
-	}
-	return 0;
-
-}
-
-int main(int argc, char *argv[])
-{
-	return test_harness(test_cp_abort, "cp_abort");
-}
diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
index 25a192f62c4d..ce12cd0e2967 100644
--- a/tools/testing/selftests/powerpc/copyloops/.gitignore
+++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
@@ -1,4 +1,13 @@
-copyuser_64
-copyuser_power7
-memcpy_64
-memcpy_power7
+copyuser_64_t0
+copyuser_64_t1
+copyuser_64_t2
+copyuser_power7_t0
+copyuser_power7_t1
+memcpy_64_t0
+memcpy_64_t1
+memcpy_64_t2
+memcpy_power7_t0
+memcpy_power7_t1
+copyuser_64_exc_t0
+copyuser_64_exc_t1
+copyuser_64_exc_t2
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index eedce3366f64..44574f3818b3 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -4,18 +4,50 @@ CFLAGS += -m64
 CFLAGS += -I$(CURDIR)
 CFLAGS += -D SELFTEST
 CFLAGS += -maltivec
+CFLAGS += -mcpu=power4
 
 # Use our CFLAGS for the implicit .S rule & set the asm machine type
 ASFLAGS = $(CFLAGS) -Wa,-mpower4
 
-TEST_GEN_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
-EXTRA_SOURCES := validate.c ../harness.c
+TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
+		copyuser_p7_t0 copyuser_p7_t1 \
+		memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
+		memcpy_p7_t0 memcpy_p7_t1 \
+		copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2
 
+EXTRA_SOURCES := validate.c ../harness.c stubs.S
+
+top_srcdir = ../../../../..
 include ../../lib.mk
 
-$(OUTPUT)/copyuser_64:     CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_base
-$(OUTPUT)/copyuser_power7: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_power7
-$(OUTPUT)/memcpy_64:       CPPFLAGS += -D COPY_LOOP=test_memcpy
-$(OUTPUT)/memcpy_power7:   CPPFLAGS += -D COPY_LOOP=test_memcpy_power7
+$(OUTPUT)/copyuser_64_t%:	copyuser_64.S $(EXTRA_SOURCES)
+	$(CC) $(CPPFLAGS) $(CFLAGS) \
+		-D COPY_LOOP=test___copy_tofrom_user_base \
+		-D SELFTEST_CASE=$(subst copyuser_64_t,,$(notdir $@)) \
+		-o $@ $^
+
+$(OUTPUT)/copyuser_p7_t%:	copyuser_power7.S $(EXTRA_SOURCES)
+	$(CC) $(CPPFLAGS) $(CFLAGS) \
+		-D COPY_LOOP=test___copy_tofrom_user_power7 \
+		-D SELFTEST_CASE=$(subst copyuser_p7_t,,$(notdir $@)) \
+		-o $@ $^
+
+# Strictly speaking, we only need the memcpy_64 test cases for big-endian
+$(OUTPUT)/memcpy_64_t%:	memcpy_64.S $(EXTRA_SOURCES)
+	$(CC) $(CPPFLAGS) $(CFLAGS) \
+		-D COPY_LOOP=test_memcpy \
+		-D SELFTEST_CASE=$(subst memcpy_64_t,,$(notdir $@)) \
+		-o $@ $^
+
+$(OUTPUT)/memcpy_p7_t%:	memcpy_power7.S $(EXTRA_SOURCES)
+	$(CC) $(CPPFLAGS) $(CFLAGS) \
+		-D COPY_LOOP=test_memcpy_power7 \
+		-D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \
+		-o $@ $^
 
-$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
+$(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
+		copy_tofrom_user_reference.S stubs.S
+	$(CC) $(CPPFLAGS) $(CFLAGS) \
+		-D COPY_LOOP=test___copy_tofrom_user_base \
+		-D SELFTEST_CASE=$(subst copyuser_64_exc_t,,$(notdir $@)) \
+		-o $@ $^
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/asm-compat.h b/tools/testing/selftests/powerpc/copyloops/asm/asm-compat.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/asm/asm-compat.h
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/feature-fixups.h b/tools/testing/selftests/powerpc/copyloops/asm/feature-fixups.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/asm/feature-fixups.h
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
index 5ffe04d802c9..0605df807593 100644
--- a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
+++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
@@ -1,4 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SELFTESTS_POWERPC_PPC_ASM_H
+#define __SELFTESTS_POWERPC_PPC_ASM_H
 #include <ppc-asm.h>
 
 #define CONFIG_ALTIVEC
@@ -26,34 +28,20 @@
 
 #define PPC_MTOCRF(A, B)	mtocrf A, B
 
-#define EX_TABLE(x, y)
+#define EX_TABLE(x, y)			\
+	.section __ex_table,"a";	\
+	.8byte	x, y;			\
+	.previous
 
-FUNC_START(enter_vmx_usercopy)
-	li	r3,1
-	blr
+#define BEGIN_FTR_SECTION		.if test_feature
+#define FTR_SECTION_ELSE		.else
+#define ALT_FTR_SECTION_END_IFCLR(x)	.endif
+#define ALT_FTR_SECTION_END_IFSET(x)	.endif
+#define ALT_FTR_SECTION_END(x, y)	.endif
+#define END_FTR_SECTION_IFCLR(x)	.endif
+#define END_FTR_SECTION_IFSET(x)	.endif
 
-FUNC_START(exit_vmx_usercopy)
-	li	r3,0
-	blr
+/* Default to taking the first of any alternative feature sections */
+test_feature = 1
 
-FUNC_START(enter_vmx_copy)
-	li	r3,1
-	blr
-
-FUNC_START(exit_vmx_copy)
-	blr
-
-FUNC_START(memcpy_power7)
-	blr
-
-FUNC_START(__copy_tofrom_user_power7)
-	blr
-
-FUNC_START(__copy_tofrom_user_base)
-	blr
-
-#define BEGIN_FTR_SECTION
-#define FTR_SECTION_ELSE
-#define ALT_FTR_SECTION_END_IFCLR(x)
-#define ALT_FTR_SECTION_END(x, y)
-#define END_FTR_SECTION_IFCLR(x)
+#endif /* __SELFTESTS_POWERPC_PPC_ASM_H */
diff --git a/tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S b/tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S
new file mode 100644
index 000000000000..3363b86407d6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S
@@ -0,0 +1,24 @@
+#include <asm/ppc_asm.h>
+
+_GLOBAL(copy_tofrom_user_reference)
+	cmpdi	r5,0
+	beq	4f
+
+	mtctr	r5
+
+1:	lbz	r6,0(r4)
+2:	stb	r6,0(r3)
+	addi	r3,r3,1
+	addi	r4,r4,1
+	bdnz	1b
+
+3:	mfctr	r3
+	blr
+
+4:	mr	r3,r5
+	blr
+
+.section __ex_table,"a"
+	.llong	1b,3b
+	.llong	2b,3b
+.text
diff --git a/tools/testing/selftests/powerpc/copyloops/exc_validate.c b/tools/testing/selftests/powerpc/copyloops/exc_validate.c
new file mode 100644
index 000000000000..c896ea9a763c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/exc_validate.c
@@ -0,0 +1,124 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include "utils.h"
+
+extern char __start___ex_table[];
+extern char __stop___ex_table[];
+
+#if defined(__powerpc64__)
+#define UCONTEXT_NIA(UC)	(UC)->uc_mcontext.gp_regs[PT_NIP]
+#elif defined(__powerpc__)
+#define UCONTEXT_NIA(UC)	(UC)->uc_mcontext.uc_regs->gregs[PT_NIP]
+#else
+#error implement UCONTEXT_NIA
+#endif
+
+static void segv_handler(int signr, siginfo_t *info, void *ptr)
+{
+	ucontext_t *uc = (ucontext_t *)ptr;
+	unsigned long addr = (unsigned long)info->si_addr;
+	unsigned long *ip = &UCONTEXT_NIA(uc);
+	unsigned long *ex_p = (unsigned long *)__start___ex_table;
+
+	while (ex_p < (unsigned long *)__stop___ex_table) {
+		unsigned long insn, fixup;
+
+		insn = *ex_p++;
+		fixup = *ex_p++;
+
+		if (insn == *ip) {
+			*ip = fixup;
+			return;
+		}
+	}
+
+	printf("No exception table match for NIA %lx ADDR %lx\n", *ip, addr);
+	abort();
+}
+
+static void setup_segv_handler(void)
+{
+	struct sigaction action;
+
+	memset(&action, 0, sizeof(action));
+	action.sa_sigaction = segv_handler;
+	action.sa_flags = SA_SIGINFO;
+	sigaction(SIGSEGV, &action, NULL);
+}
+
+unsigned long COPY_LOOP(void *to, const void *from, unsigned long size);
+unsigned long test_copy_tofrom_user_reference(void *to, const void *from, unsigned long size);
+
+static int total_passed;
+static int total_failed;
+
+static void do_one_test(char *dstp, char *srcp, unsigned long len)
+{
+	unsigned long got, expected;
+
+	got = COPY_LOOP(dstp, srcp, len);
+	expected = test_copy_tofrom_user_reference(dstp, srcp, len);
+
+	if (got != expected) {
+		total_failed++;
+		printf("FAIL from=%p to=%p len=%ld returned %ld, expected %ld\n",
+		       srcp, dstp, len, got, expected);
+		//abort();
+	} else
+		total_passed++;
+}
+
+//#define MAX_LEN 512
+#define MAX_LEN 16
+
+int test_copy_exception(void)
+{
+	int page_size;
+	static char *p, *q;
+	unsigned long src, dst, len;
+
+	page_size = getpagesize();
+	p = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE,
+		MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+
+	if (p == MAP_FAILED) {
+		perror("mmap");
+		exit(1);
+	}
+
+	memset(p, 0, page_size);
+
+	setup_segv_handler();
+
+	if (mprotect(p + page_size, page_size, PROT_NONE)) {
+		perror("mprotect");
+		exit(1);
+	}
+
+	q = p + page_size - MAX_LEN;
+
+	for (src = 0; src < MAX_LEN; src++) {
+		for (dst = 0; dst < MAX_LEN; dst++) {
+			for (len = 0; len < MAX_LEN+1; len++) {
+				// printf("from=%p to=%p len=%ld\n", q+dst, q+src, len);
+				do_one_test(q+dst, q+src, len);
+			}
+		}
+	}
+
+	printf("Totals:\n");
+	printf("  Pass: %d\n", total_passed);
+	printf("  Fail: %d\n", total_failed);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_copy_exception, str(COPY_LOOP));
+}
diff --git a/tools/testing/selftests/powerpc/copyloops/stubs.S b/tools/testing/selftests/powerpc/copyloops/stubs.S
new file mode 100644
index 000000000000..ec8bcf2bf1c2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/stubs.S
@@ -0,0 +1,19 @@
+#include <asm/ppc_asm.h>
+
+FUNC_START(enter_vmx_usercopy)
+	li	r3,1
+	blr
+
+FUNC_START(exit_vmx_usercopy)
+	li	r3,0
+	blr
+
+FUNC_START(enter_vmx_ops)
+	li	r3,1
+	blr
+
+FUNC_START(exit_vmx_ops)
+	blr
+
+FUNC_START(__copy_tofrom_user_base)
+	blr
diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile
index 55d7db7a616b..5df476364b4d 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -3,6 +3,7 @@ TEST_GEN_PROGS := dscr_default_test dscr_explicit_test dscr_user_test	\
 	      dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test	\
 	      dscr_sysfs_thread_test
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(OUTPUT)/dscr_default_test: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
index 08a8b95e3bc1..c8c240accc0c 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
@@ -5,8 +5,8 @@
  * verifies that the child is using the changed DSCR using mfspr.
  *
  * When using the privilege state SPR, the instructions such as
- * mfspr or mtspr are priviledged and the kernel emulates them
- * for us. Instructions using problem state SPR can be exuecuted
+ * mfspr or mtspr are privileged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be executed
  * directly without any emulation if the HW supports them. Else
  * they also get emulated by the kernel.
  *
@@ -19,7 +19,7 @@
  */
 #include "dscr.h"
 
-static char prog[LEN_MAX];
+static char *prog;
 
 static void do_exec(unsigned long parent_dscr)
 {
@@ -104,6 +104,6 @@ int main(int argc, char *argv[])
 		exit(1);
 	}
 
-	strncpy(prog, argv[0], strlen(argv[0]));
+	prog = argv[0];
 	return test_harness(dscr_inherit_exec, "dscr_inherit_exec_test");
 }
diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c
index 66d31de60b9a..9d7166dfad1e 100644
--- a/tools/testing/selftests/powerpc/harness.c
+++ b/tools/testing/selftests/powerpc/harness.c
@@ -85,13 +85,13 @@ wait:
 	return status;
 }
 
-static void alarm_handler(int signum)
+static void sig_handler(int signum)
 {
-	/* Jut wake us up from waitpid */
+	/* Just wake us up from waitpid */
 }
 
-static struct sigaction alarm_action = {
-	.sa_handler = alarm_handler,
+static struct sigaction sig_action = {
+	.sa_handler = sig_handler,
 };
 
 void test_harness_set_timeout(uint64_t time)
@@ -106,8 +106,14 @@ int test_harness(int (test_function)(void), char *name)
 	test_start(name);
 	test_set_git_version(GIT_VERSION);
 
-	if (sigaction(SIGALRM, &alarm_action, NULL)) {
-		perror("sigaction");
+	if (sigaction(SIGINT, &sig_action, NULL)) {
+		perror("sigaction (sigint)");
+		test_error(name);
+		return 1;
+	}
+
+	if (sigaction(SIGALRM, &sig_action, NULL)) {
+		perror("sigaction (sigalrm)");
 		test_error(name);
 		return 1;
 	}
diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h
index 4afdebcce4cd..52b4710469d2 100644
--- a/tools/testing/selftests/powerpc/include/reg.h
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -17,6 +17,7 @@
 				    : "memory")
 
 #define mb()		asm volatile("sync" : : : "memory");
+#define barrier()	asm volatile("" : : : "memory");
 
 #define SPRN_MMCR2     769
 #define SPRN_MMCRA     770
@@ -54,6 +55,7 @@
 #define SPRN_DSCR_PRIV 0x11	/* Privilege State DSCR */
 #define SPRN_DSCR      0x03	/* Data Stream Control Register */
 #define SPRN_PPR       896	/* Program Priority Register */
+#define SPRN_AMR       13	/* Authority Mask Register - problem state */
 
 /* TEXASR register bits */
 #define TEXASR_FC	0xFE00000000000000
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index 735815b3ad7f..49621822d7c3 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -11,6 +11,7 @@
 #include <stdint.h>
 #include <stdbool.h>
 #include <linux/auxvec.h>
+#include <linux/perf_event.h>
 #include "reg.h"
 
 /* Avoid headaches with PRI?64 - just use %ll? always */
@@ -31,6 +32,15 @@ void *get_auxv_entry(int type);
 
 int pick_online_cpu(void);
 
+int read_debugfs_file(char *debugfs_file, int *result);
+int write_debugfs_file(char *debugfs_file, int result);
+void set_dscr(unsigned long val);
+int perf_event_open_counter(unsigned int type,
+			    unsigned long config, int group_fd);
+int perf_event_enable(int fd);
+int perf_event_disable(int fd);
+int perf_event_reset(int fd);
+
 static inline bool have_hwcap(unsigned long ftr)
 {
 	return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr;
@@ -48,6 +58,8 @@ static inline bool have_hwcap2(unsigned long ftr2)
 }
 #endif
 
+bool is_ppc64le(void);
+
 /* Yes, this is evil */
 #define FAIL_IF(x)						\
 do {								\
@@ -78,4 +90,12 @@ do {								\
 #define PPC_FEATURE2_ARCH_3_00 0x00800000
 #endif
 
+#if defined(__powerpc64__)
+#define UCONTEXT_NIA(UC)	(UC)->uc_mcontext.gp_regs[PT_NIP]
+#elif defined(__powerpc__)
+#define UCONTEXT_NIA(UC)	(UC)->uc_mcontext.uc_regs->gregs[PT_NIP]
+#else
+#error implement UCONTEXT_NIA
+#endif
+
 #endif /* _SELFTESTS_POWERPC_UTILS_H */
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
index 0dd3a01fdab9..11a10d7a2bbd 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index 7d7c42ed6de9..ba919308fe30 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -2,4 +2,5 @@ hugetlb_vs_thp_test
 subpage_prot
 tempfile
 prot_sao
-segv_errors
-\ No newline at end of file
+segv_errors
+wild_bctr
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index 8ebbe96d80a8..43d68420e363 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -2,15 +2,18 @@
 noarg:
 	$(MAKE) -C ../
 
-TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr
 TEST_GEN_FILES := tempfile
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(TEST_GEN_PROGS): ../harness.c
 
 $(OUTPUT)/prot_sao: ../utils.c
 
+$(OUTPUT)/wild_bctr: CFLAGS += -m64
+
 $(OUTPUT)/tempfile:
 	dd if=/dev/zero of=$@ bs=64k count=1
 
diff --git a/tools/testing/selftests/powerpc/mm/wild_bctr.c b/tools/testing/selftests/powerpc/mm/wild_bctr.c
new file mode 100644
index 000000000000..f2fa101c5a6a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/wild_bctr.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2018, Michael Ellerman, IBM Corp.
+ *
+ * Test that an out-of-bounds branch to counter behaves as expected.
+ */
+
+#include <setjmp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <ucontext.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+
+#define BAD_NIP	0x788c545a18000000ull
+
+static struct pt_regs signal_regs;
+static jmp_buf setjmp_env;
+
+static void save_regs(ucontext_t *ctxt)
+{
+	struct pt_regs *regs = ctxt->uc_mcontext.regs;
+
+	memcpy(&signal_regs, regs, sizeof(signal_regs));
+}
+
+static void segv_handler(int signum, siginfo_t *info, void *ctxt_v)
+{
+	save_regs(ctxt_v);
+	longjmp(setjmp_env, 1);
+}
+
+static void usr2_handler(int signum, siginfo_t *info, void *ctxt_v)
+{
+	save_regs(ctxt_v);
+}
+
+static int ok(void)
+{
+	printf("Everything is OK in here.\n");
+	return 0;
+}
+
+#define REG_POISON	0x5a5a
+#define POISONED_REG(n)	((((unsigned long)REG_POISON) << 48) | ((n) << 32) | \
+			 (((unsigned long)REG_POISON) << 16) | (n))
+
+static inline void poison_regs(void)
+{
+	#define POISON_REG(n)	\
+	  "lis  " __stringify(n) "," __stringify(REG_POISON) ";" \
+	  "addi " __stringify(n) "," __stringify(n) "," __stringify(n) ";" \
+	  "sldi " __stringify(n) "," __stringify(n) ", 32 ;" \
+	  "oris " __stringify(n) "," __stringify(n) "," __stringify(REG_POISON) ";" \
+	  "addi " __stringify(n) "," __stringify(n) "," __stringify(n) ";"
+
+	asm (POISON_REG(15)
+	     POISON_REG(16)
+	     POISON_REG(17)
+	     POISON_REG(18)
+	     POISON_REG(19)
+	     POISON_REG(20)
+	     POISON_REG(21)
+	     POISON_REG(22)
+	     POISON_REG(23)
+	     POISON_REG(24)
+	     POISON_REG(25)
+	     POISON_REG(26)
+	     POISON_REG(27)
+	     POISON_REG(28)
+	     POISON_REG(29)
+	     : // inputs
+	     : // outputs
+	     : "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25",
+	       "26", "27", "28", "29"
+	);
+	#undef POISON_REG
+}
+
+static int check_regs(void)
+{
+	unsigned long i;
+
+	for (i = 15; i <= 29; i++)
+		FAIL_IF(signal_regs.gpr[i] != POISONED_REG(i));
+
+	printf("Regs OK\n");
+	return 0;
+}
+
+static void dump_regs(void)
+{
+	for (int i = 0; i < 32; i += 4) {
+		printf("r%02d 0x%016lx  r%02d 0x%016lx  " \
+		       "r%02d 0x%016lx  r%02d 0x%016lx\n",
+		       i, signal_regs.gpr[i],
+		       i+1, signal_regs.gpr[i+1],
+		       i+2, signal_regs.gpr[i+2],
+		       i+3, signal_regs.gpr[i+3]);
+	}
+}
+
+#ifdef _CALL_AIXDESC
+struct opd {
+	unsigned long ip;
+	unsigned long toc;
+	unsigned long env;
+};
+static struct opd bad_opd = {
+	.ip = BAD_NIP,
+};
+#define BAD_FUNC (&bad_opd)
+#else
+#define BAD_FUNC BAD_NIP
+#endif
+
+int test_wild_bctr(void)
+{
+	int (*func_ptr)(void);
+	struct sigaction segv = {
+		.sa_sigaction = segv_handler,
+		.sa_flags = SA_SIGINFO
+	};
+	struct sigaction usr2 = {
+		.sa_sigaction = usr2_handler,
+		.sa_flags = SA_SIGINFO
+	};
+
+	FAIL_IF(sigaction(SIGSEGV, &segv, NULL));
+	FAIL_IF(sigaction(SIGUSR2, &usr2, NULL));
+
+	bzero(&signal_regs, sizeof(signal_regs));
+
+	if (setjmp(setjmp_env) == 0) {
+		func_ptr = ok;
+		func_ptr();
+
+		kill(getpid(), SIGUSR2);
+		printf("Regs before:\n");
+		dump_regs();
+		bzero(&signal_regs, sizeof(signal_regs));
+
+		poison_regs();
+
+		func_ptr = (int (*)(void))BAD_FUNC;
+		func_ptr();
+
+		FAIL_IF(1); /* we didn't segv? */
+	}
+
+	FAIL_IF(signal_regs.nip != BAD_NIP);
+
+	printf("All good - took SEGV as expected branching to 0x%llx\n", BAD_NIP);
+
+	dump_regs();
+	FAIL_IF(check_regs());
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_wild_bctr, "wild_bctr");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index 6e1629bf5b09..19046db995fe 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -5,6 +5,7 @@ noarg:
 TEST_GEN_PROGS := count_instructions l3_bank_test per_event_excludes
 EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 all: $(TEST_GEN_PROGS) ebb
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index c4e64bc2e265..23f4caf48ffc 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -5,6 +5,9 @@ noarg:
 # The EBB handler is 64-bit code and everything links against it
 CFLAGS += -m64
 
+# Toolchains may build PIE by default which breaks the assembly
+LDFLAGS += -no-pie
+
 TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test	\
 	 cycles_with_freeze_test pmc56_overflow_test		\
 	 ebb_vs_cpu_event_test cpu_event_vs_ebb_test		\
@@ -17,6 +20,7 @@ TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test	\
 	 lost_exception_test no_handler_test			\
 	 cycles_with_mmcr2_test
 
+top_srcdir = ../../../../../..
 include ../../../lib.mk
 
 $(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c \
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c b/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c
index ae9a79086111..35a3426e341c 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c
@@ -162,5 +162,6 @@ int instruction_count(void)
 
 int main(void)
 {
+	test_harness_set_timeout(300);
 	return test_harness(instruction_count, "instruction_count");
 }
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
index eb8acb78bc6c..2ed7ad33f7a3 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
@@ -98,5 +98,6 @@ static int lost_exception(void)
 
 int main(void)
 {
+	test_harness_set_timeout(300);
 	return test_harness(lost_exception, "lost_exception");
 }
diff --git a/tools/testing/selftests/powerpc/primitives/Makefile b/tools/testing/selftests/powerpc/primitives/Makefile
index 175366db7be8..ea2b7bd09e36 100644
--- a/tools/testing/selftests/powerpc/primitives/Makefile
+++ b/tools/testing/selftests/powerpc/primitives/Makefile
@@ -2,6 +2,7 @@ CFLAGS += -I$(CURDIR)
 
 TEST_GEN_PROGS := load_unaligned_zeropad
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/primitives/asm/asm-const.h b/tools/testing/selftests/powerpc/primitives/asm/asm-const.h
new file mode 120000
index 000000000000..18d8be13e67f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/asm-const.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/asm-const.h
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h b/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h
new file mode 120000
index 000000000000..8dc6d4d46e8e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/feature-fixups.h
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c b/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c
index ed3239bbfae2..ee1e9ca22f0d 100644
--- a/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c
+++ b/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c
@@ -65,14 +65,6 @@ static int unprotect_region(void)
 extern char __start___ex_table[];
 extern char __stop___ex_table[];
 
-#if defined(__powerpc64__)
-#define UCONTEXT_NIA(UC)	(UC)->uc_mcontext.gp_regs[PT_NIP]
-#elif defined(__powerpc__)
-#define UCONTEXT_NIA(UC)	(UC)->uc_mcontext.uc_regs->gregs[PT_NIP]
-#else
-#error implement UCONTEXT_NIA
-#endif
-
 struct extbl_entry {
 	int insn;
 	int fixup;
diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore
index 349acfafc95b..07ec449a2767 100644
--- a/tools/testing/selftests/powerpc/ptrace/.gitignore
+++ b/tools/testing/selftests/powerpc/ptrace/.gitignore
@@ -8,3 +8,5 @@ ptrace-vsx
 ptrace-tm-vsx
 ptrace-tm-spd-vsx
 ptrace-tm-spr
+ptrace-hwbreak
+perf-hwbreak
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index 480305266504..8d3f006c98cc 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -1,15 +1,15 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
+TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
               ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
-              ptrace-tm-spd-vsx ptrace-tm-spr
+              ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \
+              perf-hwbreak ptrace-syscall
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
-all: $(TEST_PROGS)
-
 CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie
 
-$(TEST_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
+$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: child.h
+$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread
 
-clean:
-	rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
diff --git a/tools/testing/selftests/powerpc/ptrace/child.h b/tools/testing/selftests/powerpc/ptrace/child.h
new file mode 100644
index 000000000000..d7275b7b33dc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/child.h
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Helper functions to sync execution between parent and child processes.
+ *
+ * Copyright 2018, Thiago Jung Bauermann, IBM Corporation.
+ */
+#include <stdio.h>
+#include <stdbool.h>
+#include <semaphore.h>
+
+/*
+ * Information in a shared memory location for synchronization between child and
+ * parent.
+ */
+struct child_sync {
+	/* The parent waits on this semaphore. */
+	sem_t sem_parent;
+
+	/* If true, the child should give up as well. */
+	bool parent_gave_up;
+
+	/* The child waits on this semaphore. */
+	sem_t sem_child;
+
+	/* If true, the parent should give up as well. */
+	bool child_gave_up;
+};
+
+#define CHILD_FAIL_IF(x, sync)						\
+	do {								\
+		if (x) {						\
+			fprintf(stderr,					\
+				"[FAIL] Test FAILED on line %d\n", __LINE__); \
+			(sync)->child_gave_up = true;			\
+			prod_parent(sync);				\
+			return 1;					\
+		}							\
+	} while (0)
+
+#define PARENT_FAIL_IF(x, sync)						\
+	do {								\
+		if (x) {						\
+			fprintf(stderr,					\
+				"[FAIL] Test FAILED on line %d\n", __LINE__); \
+			(sync)->parent_gave_up = true;			\
+			prod_child(sync);				\
+			return 1;					\
+		}							\
+	} while (0)
+
+#define PARENT_SKIP_IF_UNSUPPORTED(x, sync)				\
+	do {								\
+		if ((x) == -1 && (errno == ENODEV || errno == EINVAL)) { \
+			(sync)->parent_gave_up = true;			\
+			prod_child(sync);				\
+			SKIP_IF(1);					\
+		}							\
+	} while (0)
+
+int init_child_sync(struct child_sync *sync)
+{
+	int ret;
+
+	ret = sem_init(&sync->sem_parent, 1, 0);
+	if (ret) {
+		perror("Semaphore initialization failed");
+		return 1;
+	}
+
+	ret = sem_init(&sync->sem_child, 1, 0);
+	if (ret) {
+		perror("Semaphore initialization failed");
+		return 1;
+	}
+
+	return 0;
+}
+
+void destroy_child_sync(struct child_sync *sync)
+{
+	sem_destroy(&sync->sem_parent);
+	sem_destroy(&sync->sem_child);
+}
+
+int wait_child(struct child_sync *sync)
+{
+	int ret;
+
+	/* Wait until the child prods us. */
+	ret = sem_wait(&sync->sem_parent);
+	if (ret) {
+		perror("Error waiting for child");
+		return 1;
+	}
+
+	return sync->child_gave_up;
+}
+
+int prod_child(struct child_sync *sync)
+{
+	int ret;
+
+	/* Unblock the child now. */
+	ret = sem_post(&sync->sem_child);
+	if (ret) {
+		perror("Error prodding child");
+		return 1;
+	}
+
+	return 0;
+}
+
+int wait_parent(struct child_sync *sync)
+{
+	int ret;
+
+	/* Wait until the parent prods us. */
+	ret = sem_wait(&sync->sem_child);
+	if (ret) {
+		perror("Error waiting for parent");
+		return 1;
+	}
+
+	return sync->parent_gave_up;
+}
+
+int prod_parent(struct child_sync *sync)
+{
+	int ret;
+
+	/* Unblock the parent now. */
+	ret = sem_post(&sync->sem_parent);
+	if (ret) {
+		perror("Error prodding parent");
+		return 1;
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
new file mode 100644
index 000000000000..e23e2e199eb4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -0,0 +1,465 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Ptrace test for Memory Protection Key registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ * Copyright (C) 2018 IBM Corporation.
+ */
+#include <limits.h>
+#include <linux/kernel.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "ptrace.h"
+#include "child.h"
+
+#ifndef __NR_pkey_alloc
+#define __NR_pkey_alloc		384
+#endif
+
+#ifndef __NR_pkey_free
+#define __NR_pkey_free		385
+#endif
+
+#ifndef NT_PPC_PKEY
+#define NT_PPC_PKEY		0x110
+#endif
+
+#ifndef PKEY_DISABLE_EXECUTE
+#define PKEY_DISABLE_EXECUTE	0x4
+#endif
+
+#define AMR_BITS_PER_PKEY 2
+#define PKEY_REG_BITS (sizeof(u64) * 8)
+#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY))
+
+#define CORE_FILE_LIMIT	(5 * 1024 * 1024)	/* 5 MB should be enough */
+
+static const char core_pattern_file[] = "/proc/sys/kernel/core_pattern";
+
+static const char user_write[] = "[User Write (Running)]";
+static const char core_read_running[] = "[Core Read (Running)]";
+
+/* Information shared between the parent and the child. */
+struct shared_info {
+	struct child_sync child_sync;
+
+	/* AMR value the parent expects to read in the core file. */
+	unsigned long amr;
+
+	/* IAMR value the parent expects to read in the core file. */
+	unsigned long iamr;
+
+	/* UAMOR value the parent expects to read in the core file. */
+	unsigned long uamor;
+
+	/* When the child crashed. */
+	time_t core_time;
+};
+
+static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
+{
+	return syscall(__NR_pkey_alloc, flags, init_access_rights);
+}
+
+static int sys_pkey_free(int pkey)
+{
+	return syscall(__NR_pkey_free, pkey);
+}
+
+static int increase_core_file_limit(void)
+{
+	struct rlimit rlim;
+	int ret;
+
+	ret = getrlimit(RLIMIT_CORE, &rlim);
+	FAIL_IF(ret);
+
+	if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < CORE_FILE_LIMIT) {
+		rlim.rlim_cur = CORE_FILE_LIMIT;
+
+		if (rlim.rlim_max != RLIM_INFINITY &&
+		    rlim.rlim_max < CORE_FILE_LIMIT)
+			rlim.rlim_max = CORE_FILE_LIMIT;
+
+		ret = setrlimit(RLIMIT_CORE, &rlim);
+		FAIL_IF(ret);
+	}
+
+	ret = getrlimit(RLIMIT_FSIZE, &rlim);
+	FAIL_IF(ret);
+
+	if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < CORE_FILE_LIMIT) {
+		rlim.rlim_cur = CORE_FILE_LIMIT;
+
+		if (rlim.rlim_max != RLIM_INFINITY &&
+		    rlim.rlim_max < CORE_FILE_LIMIT)
+			rlim.rlim_max = CORE_FILE_LIMIT;
+
+		ret = setrlimit(RLIMIT_FSIZE, &rlim);
+		FAIL_IF(ret);
+	}
+
+	return TEST_PASS;
+}
+
+static int child(struct shared_info *info)
+{
+	bool disable_execute = true;
+	int pkey1, pkey2, pkey3;
+	int *ptr, ret;
+
+	/* Wait until parent fills out the initial register values. */
+	ret = wait_parent(&info->child_sync);
+	if (ret)
+		return ret;
+
+	ret = increase_core_file_limit();
+	FAIL_IF(ret);
+
+	/* Get some pkeys so that we can change their bits in the AMR. */
+	pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
+	if (pkey1 < 0) {
+		pkey1 = sys_pkey_alloc(0, 0);
+		FAIL_IF(pkey1 < 0);
+
+		disable_execute = false;
+	}
+
+	pkey2 = sys_pkey_alloc(0, 0);
+	FAIL_IF(pkey2 < 0);
+
+	pkey3 = sys_pkey_alloc(0, 0);
+	FAIL_IF(pkey3 < 0);
+
+	info->amr |= 3ul << pkeyshift(pkey1) | 2ul << pkeyshift(pkey2);
+
+	if (disable_execute)
+		info->iamr |= 1ul << pkeyshift(pkey1);
+	else
+		info->iamr &= ~(1ul << pkeyshift(pkey1));
+
+	info->iamr &= ~(1ul << pkeyshift(pkey2) | 1ul << pkeyshift(pkey3));
+
+	info->uamor |= 3ul << pkeyshift(pkey1) | 3ul << pkeyshift(pkey2);
+
+	printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
+	       user_write, info->amr, pkey1, pkey2, pkey3);
+
+	mtspr(SPRN_AMR, info->amr);
+
+	/*
+	 * We won't use pkey3. This tests whether the kernel restores the UAMOR
+	 * permissions after a key is freed.
+	 */
+	sys_pkey_free(pkey3);
+
+	info->core_time = time(NULL);
+
+	/* Crash. */
+	ptr = 0;
+	*ptr = 1;
+
+	/* Shouldn't get here. */
+	FAIL_IF(true);
+
+	return TEST_FAIL;
+}
+
+/* Return file size if filename exists and pass sanity check, or zero if not. */
+static off_t try_core_file(const char *filename, struct shared_info *info,
+			   pid_t pid)
+{
+	struct stat buf;
+	int ret;
+
+	ret = stat(filename, &buf);
+	if (ret == -1)
+		return TEST_FAIL;
+
+	/* Make sure we're not using a stale core file. */
+	return buf.st_mtime >= info->core_time ? buf.st_size : TEST_FAIL;
+}
+
+static Elf64_Nhdr *next_note(Elf64_Nhdr *nhdr)
+{
+	return (void *) nhdr + sizeof(*nhdr) +
+		__ALIGN_KERNEL(nhdr->n_namesz, 4) +
+		__ALIGN_KERNEL(nhdr->n_descsz, 4);
+}
+
+static int check_core_file(struct shared_info *info, Elf64_Ehdr *ehdr,
+			   off_t core_size)
+{
+	unsigned long *regs;
+	Elf64_Phdr *phdr;
+	Elf64_Nhdr *nhdr;
+	size_t phdr_size;
+	void *p = ehdr, *note;
+	int ret;
+
+	ret = memcmp(ehdr->e_ident, ELFMAG, SELFMAG);
+	FAIL_IF(ret);
+
+	FAIL_IF(ehdr->e_type != ET_CORE);
+	FAIL_IF(ehdr->e_machine != EM_PPC64);
+	FAIL_IF(ehdr->e_phoff == 0 || ehdr->e_phnum == 0);
+
+	/*
+	 * e_phnum is at most 65535 so calculating the size of the
+	 * program header cannot overflow.
+	 */
+	phdr_size = sizeof(*phdr) * ehdr->e_phnum;
+
+	/* Sanity check the program header table location. */
+	FAIL_IF(ehdr->e_phoff + phdr_size < ehdr->e_phoff);
+	FAIL_IF(ehdr->e_phoff + phdr_size > core_size);
+
+	/* Find the PT_NOTE segment. */
+	for (phdr = p + ehdr->e_phoff;
+	     (void *) phdr < p + ehdr->e_phoff + phdr_size;
+	     phdr += ehdr->e_phentsize)
+		if (phdr->p_type == PT_NOTE)
+			break;
+
+	FAIL_IF((void *) phdr >= p + ehdr->e_phoff + phdr_size);
+
+	/* Find the NT_PPC_PKEY note. */
+	for (nhdr = p + phdr->p_offset;
+	     (void *) nhdr < p + phdr->p_offset + phdr->p_filesz;
+	     nhdr = next_note(nhdr))
+		if (nhdr->n_type == NT_PPC_PKEY)
+			break;
+
+	FAIL_IF((void *) nhdr >= p + phdr->p_offset + phdr->p_filesz);
+	FAIL_IF(nhdr->n_descsz == 0);
+
+	p = nhdr;
+	note = p + sizeof(*nhdr) + __ALIGN_KERNEL(nhdr->n_namesz, 4);
+
+	regs = (unsigned long *) note;
+
+	printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+	       core_read_running, regs[0], regs[1], regs[2]);
+
+	FAIL_IF(regs[0] != info->amr);
+	FAIL_IF(regs[1] != info->iamr);
+	FAIL_IF(regs[2] != info->uamor);
+
+	return TEST_PASS;
+}
+
+static int parent(struct shared_info *info, pid_t pid)
+{
+	char *filenames, *filename[3];
+	int fd, i, ret, status;
+	unsigned long regs[3];
+	off_t core_size;
+	void *core;
+
+	/*
+	 * Get the initial values for AMR, IAMR and UAMOR and communicate them
+	 * to the child.
+	 */
+	ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+	PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	info->amr = regs[0];
+	info->iamr = regs[1];
+	info->uamor = regs[2];
+
+	/* Wake up child so that it can set itself up. */
+	ret = prod_child(&info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait(&status);
+	if (ret != pid) {
+		printf("Child's exit status not captured\n");
+		return TEST_FAIL;
+	} else if (!WIFSIGNALED(status) || !WCOREDUMP(status)) {
+		printf("Child didn't dump core\n");
+		return TEST_FAIL;
+	}
+
+	/* Construct array of core file names to try. */
+
+	filename[0] = filenames = malloc(PATH_MAX);
+	if (!filenames) {
+		perror("Error allocating memory");
+		return TEST_FAIL;
+	}
+
+	ret = snprintf(filename[0], PATH_MAX, "core-pkey.%d", pid);
+	if (ret < 0 || ret >= PATH_MAX) {
+		ret = TEST_FAIL;
+		goto out;
+	}
+
+	filename[1] = filename[0] + ret + 1;
+	ret = snprintf(filename[1], PATH_MAX - ret - 1, "core.%d", pid);
+	if (ret < 0 || ret >= PATH_MAX - ret - 1) {
+		ret = TEST_FAIL;
+		goto out;
+	}
+	filename[2] = "core";
+
+	for (i = 0; i < 3; i++) {
+		core_size = try_core_file(filename[i], info, pid);
+		if (core_size != TEST_FAIL)
+			break;
+	}
+
+	if (i == 3) {
+		printf("Couldn't find core file\n");
+		ret = TEST_FAIL;
+		goto out;
+	}
+
+	fd = open(filename[i], O_RDONLY);
+	if (fd == -1) {
+		perror("Error opening core file");
+		ret = TEST_FAIL;
+		goto out;
+	}
+
+	core = mmap(NULL, core_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	if (core == (void *) -1) {
+		perror("Error mmaping core file");
+		ret = TEST_FAIL;
+		goto out;
+	}
+
+	ret = check_core_file(info, core, core_size);
+
+	munmap(core, core_size);
+	close(fd);
+	unlink(filename[i]);
+
+ out:
+	free(filenames);
+
+	return ret;
+}
+
+static int write_core_pattern(const char *core_pattern)
+{
+	size_t len = strlen(core_pattern), ret;
+	FILE *f;
+
+	f = fopen(core_pattern_file, "w");
+	if (!f) {
+		perror("Error writing to core_pattern file");
+		return TEST_FAIL;
+	}
+
+	ret = fwrite(core_pattern, 1, len, f);
+	fclose(f);
+	if (ret != len) {
+		perror("Error writing to core_pattern file");
+		return TEST_FAIL;
+	}
+
+	return TEST_PASS;
+}
+
+static int setup_core_pattern(char **core_pattern_, bool *changed_)
+{
+	FILE *f;
+	char *core_pattern;
+	int ret;
+
+	core_pattern = malloc(PATH_MAX);
+	if (!core_pattern) {
+		perror("Error allocating memory");
+		return TEST_FAIL;
+	}
+
+	f = fopen(core_pattern_file, "r");
+	if (!f) {
+		perror("Error opening core_pattern file");
+		ret = TEST_FAIL;
+		goto out;
+	}
+
+	ret = fread(core_pattern, 1, PATH_MAX, f);
+	fclose(f);
+	if (!ret) {
+		perror("Error reading core_pattern file");
+		ret = TEST_FAIL;
+		goto out;
+	}
+
+	/* Check whether we can predict the name of the core file. */
+	if (!strcmp(core_pattern, "core") || !strcmp(core_pattern, "core.%p"))
+		*changed_ = false;
+	else {
+		ret = write_core_pattern("core-pkey.%p");
+		if (ret)
+			goto out;
+
+		*changed_ = true;
+	}
+
+	*core_pattern_ = core_pattern;
+	ret = TEST_PASS;
+
+ out:
+	if (ret)
+		free(core_pattern);
+
+	return ret;
+}
+
+static int core_pkey(void)
+{
+	char *core_pattern;
+	bool changed_core_pattern;
+	struct shared_info *info;
+	int shm_id;
+	int ret;
+	pid_t pid;
+
+	ret = setup_core_pattern(&core_pattern, &changed_core_pattern);
+	if (ret)
+		return ret;
+
+	shm_id = shmget(IPC_PRIVATE, sizeof(*info), 0777 | IPC_CREAT);
+	info = shmat(shm_id, NULL, 0);
+
+	ret = init_child_sync(&info->child_sync);
+	if (ret)
+		return ret;
+
+	pid = fork();
+	if (pid < 0) {
+		perror("fork() failed");
+		ret = TEST_FAIL;
+	} else if (pid == 0)
+		ret = child(info);
+	else
+		ret = parent(info, pid);
+
+	shmdt(info);
+
+	if (pid) {
+		destroy_child_sync(&info->child_sync);
+		shmctl(shm_id, IPC_RMID, NULL);
+
+		if (changed_core_pattern)
+			write_core_pattern(core_pattern);
+	}
+
+	free(core_pattern);
+
+	return ret;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(core_pkey, "core_pkey");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
new file mode 100644
index 000000000000..60df0b5e628a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
@@ -0,0 +1,195 @@
+/*
+ * perf events self profiling example test case for hw breakpoints.
+ *
+ * This tests perf PERF_TYPE_BREAKPOINT parameters
+ * 1) tests all variants of the break on read/write flags
+ * 2) tests exclude_user == 0 and 1
+ * 3) test array matches (if DAWR is supported))
+ * 4) test different numbers of breakpoints matches
+ *
+ * Configure this breakpoint, then read and write the data a number of
+ * times. Then check the output count from perf is as expected.
+ *
+ * Based on:
+ *   http://ozlabs.org/~anton/junkcode/perf_events_example1.c
+ *
+ * Copyright (C) 2018 Michael Neuling, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <unistd.h>
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <elf.h>
+#include <pthread.h>
+#include <sys/syscall.h>
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
+#include "utils.h"
+
+#define MAX_LOOPS 10000
+
+#define DAWR_LENGTH_MAX ((0x3f + 1) * 8)
+
+static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid,
+				      int cpu, int group_fd,
+				      unsigned long flags)
+{
+	attr->size = sizeof(*attr);
+	return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+static inline bool breakpoint_test(int len)
+{
+	struct perf_event_attr attr;
+	int fd;
+
+	/* setup counters */
+	memset(&attr, 0, sizeof(attr));
+	attr.disabled = 1;
+	attr.type = PERF_TYPE_BREAKPOINT;
+	attr.bp_type = HW_BREAKPOINT_R;
+	/* bp_addr can point anywhere but needs to be aligned */
+	attr.bp_addr = (__u64)(&attr) & 0xfffffffffffff800;
+	attr.bp_len = len;
+	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+	if (fd < 0)
+		return false;
+	close(fd);
+	return true;
+}
+
+static inline bool perf_breakpoint_supported(void)
+{
+	return breakpoint_test(4);
+}
+
+static inline bool dawr_supported(void)
+{
+	return breakpoint_test(DAWR_LENGTH_MAX);
+}
+
+static int runtestsingle(int readwriteflag, int exclude_user, int arraytest)
+{
+	int i,j;
+	struct perf_event_attr attr;
+	size_t res;
+	unsigned long long breaks, needed;
+	int readint;
+	int readintarraybig[2*DAWR_LENGTH_MAX/sizeof(int)];
+	int *readintalign;
+	volatile int *ptr;
+	int break_fd;
+	int loop_num = MAX_LOOPS - (rand() % 100); /* provide some variability */
+	volatile int *k;
+
+	/* align to 0x400 boundary as required by DAWR */
+	readintalign = (int *)(((unsigned long)readintarraybig + 0x7ff) &
+			       0xfffffffffffff800);
+
+	ptr = &readint;
+	if (arraytest)
+		ptr = &readintalign[0];
+
+	/* setup counters */
+	memset(&attr, 0, sizeof(attr));
+	attr.disabled = 1;
+	attr.type = PERF_TYPE_BREAKPOINT;
+	attr.bp_type = readwriteflag;
+	attr.bp_addr = (__u64)ptr;
+	attr.bp_len = sizeof(int);
+	if (arraytest)
+		attr.bp_len = DAWR_LENGTH_MAX;
+	attr.exclude_user = exclude_user;
+	break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+	if (break_fd < 0) {
+		perror("sys_perf_event_open");
+		exit(1);
+	}
+
+	/* start counters */
+	ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
+
+	/* Test a bunch of reads and writes */
+	k = &readint;
+	for (i = 0; i < loop_num; i++) {
+		if (arraytest)
+			k = &(readintalign[i % (DAWR_LENGTH_MAX/sizeof(int))]);
+
+		j = *k;
+		*k = j;
+	}
+
+	/* stop counters */
+	ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
+
+	/* read and check counters */
+	res = read(break_fd, &breaks, sizeof(unsigned long long));
+	assert(res == sizeof(unsigned long long));
+	/* we read and write each loop, so subtract the ones we are counting */
+	needed = 0;
+	if (readwriteflag & HW_BREAKPOINT_R)
+		needed += loop_num;
+	if (readwriteflag & HW_BREAKPOINT_W)
+		needed += loop_num;
+	needed = needed * (1 - exclude_user);
+	printf("TESTED: addr:0x%lx brks:% 8lld loops:% 8i rw:%i !user:%i array:%i\n",
+	       (unsigned long int)ptr, breaks, loop_num, readwriteflag, exclude_user, arraytest);
+	if (breaks != needed) {
+		printf("FAILED: 0x%lx brks:%lld needed:%lli %i %i %i\n\n",
+		       (unsigned long int)ptr, breaks, needed, loop_num, readwriteflag, exclude_user);
+		return 1;
+	}
+	close(break_fd);
+
+	return 0;
+}
+
+static int runtest(void)
+{
+	int rwflag;
+	int exclude_user;
+	int ret;
+
+	/*
+	 * perf defines rwflag as two bits read and write and at least
+	 * one must be set.  So range 1-3.
+	 */
+	for (rwflag = 1 ; rwflag < 4; rwflag++) {
+		for (exclude_user = 0 ; exclude_user < 2; exclude_user++) {
+			ret = runtestsingle(rwflag, exclude_user, 0);
+			if (ret)
+				return ret;
+
+			/* if we have the dawr, we can do an array test */
+			if (!dawr_supported())
+				continue;
+			ret = runtestsingle(rwflag, exclude_user, 1);
+			if (ret)
+				return ret;
+		}
+	}
+	return 0;
+}
+
+
+static int perf_hwbreak(void)
+{
+	srand ( time(NULL) );
+
+	SKIP_IF(!perf_breakpoint_supported());
+
+	return runtest();
+}
+
+int main(int argc, char *argv[], char **envp)
+{
+	return test_harness(perf_hwbreak, "perf_hwbreak");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
new file mode 100644
index 000000000000..3066d310f32b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Ptrace test for hw breakpoints
+ *
+ * Based on tools/testing/selftests/breakpoints/breakpoint_test.c
+ *
+ * This test forks and the parent then traces the child doing various
+ * types of ptrace enabled breakpoints
+ *
+ * Copyright (C) 2018 Michael Neuling, IBM Corporation.
+ */
+
+#include <sys/ptrace.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <sys/user.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include "ptrace.h"
+
+/* Breakpoint access modes */
+enum {
+	BP_X = 1,
+	BP_RW = 2,
+	BP_W = 4,
+};
+
+static pid_t child_pid;
+static struct ppc_debug_info dbginfo;
+
+static void get_dbginfo(void)
+{
+	int ret;
+
+	ret = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo);
+	if (ret) {
+		perror("Can't get breakpoint info\n");
+		exit(-1);
+	}
+}
+
+static bool hwbreak_present(void)
+{
+	return (dbginfo.num_data_bps != 0);
+}
+
+static bool dawr_present(void)
+{
+	return !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_DAWR);
+}
+
+static void set_breakpoint_addr(void *addr)
+{
+	int ret;
+
+	ret = ptrace(PTRACE_SET_DEBUGREG, child_pid, 0, addr);
+	if (ret) {
+		perror("Can't set breakpoint addr\n");
+		exit(-1);
+	}
+}
+
+static int set_hwbreakpoint_addr(void *addr, int range)
+{
+	int ret;
+
+	struct ppc_hw_breakpoint info;
+
+	info.version = 1;
+	info.trigger_type = PPC_BREAKPOINT_TRIGGER_RW;
+	info.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+	if (range > 0)
+		info.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+	info.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+	info.addr = (__u64)addr;
+	info.addr2 = (__u64)addr + range;
+	info.condition_value = 0;
+
+	ret = ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info);
+	if (ret < 0) {
+		perror("Can't set breakpoint\n");
+		exit(-1);
+	}
+	return ret;
+}
+
+static int del_hwbreakpoint_addr(int watchpoint_handle)
+{
+	int ret;
+
+	ret = ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, watchpoint_handle);
+	if (ret < 0) {
+		perror("Can't delete hw breakpoint\n");
+		exit(-1);
+	}
+	return ret;
+}
+
+#define DAWR_LENGTH_MAX 512
+
+/* Dummy variables to test read/write accesses */
+static unsigned long long
+	dummy_array[DAWR_LENGTH_MAX / sizeof(unsigned long long)]
+	__attribute__((aligned(512)));
+static unsigned long long *dummy_var = dummy_array;
+
+static void write_var(int len)
+{
+	long long *plval;
+	char *pcval;
+	short *psval;
+	int *pival;
+
+	switch (len) {
+	case 1:
+		pcval = (char *)dummy_var;
+		*pcval = 0xff;
+		break;
+	case 2:
+		psval = (short *)dummy_var;
+		*psval = 0xffff;
+		break;
+	case 4:
+		pival = (int *)dummy_var;
+		*pival = 0xffffffff;
+		break;
+	case 8:
+		plval = (long long *)dummy_var;
+		*plval = 0xffffffffffffffffLL;
+		break;
+	}
+}
+
+static void read_var(int len)
+{
+	char cval __attribute__((unused));
+	short sval __attribute__((unused));
+	int ival __attribute__((unused));
+	long long lval __attribute__((unused));
+
+	switch (len) {
+	case 1:
+		cval = *(char *)dummy_var;
+		break;
+	case 2:
+		sval = *(short *)dummy_var;
+		break;
+	case 4:
+		ival = *(int *)dummy_var;
+		break;
+	case 8:
+		lval = *(long long *)dummy_var;
+		break;
+	}
+}
+
+/*
+ * Do the r/w accesses to trigger the breakpoints. And run
+ * the usual traps.
+ */
+static void trigger_tests(void)
+{
+	int len, ret;
+
+	ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
+	if (ret) {
+		perror("Can't be traced?\n");
+		return;
+	}
+
+	/* Wake up father so that it sets up the first test */
+	kill(getpid(), SIGUSR1);
+
+	/* Test write watchpoints */
+	for (len = 1; len <= sizeof(long); len <<= 1)
+		write_var(len);
+
+	/* Test read/write watchpoints (on read accesses) */
+	for (len = 1; len <= sizeof(long); len <<= 1)
+		read_var(len);
+
+	/* Test when breakpoint is unset */
+
+	/* Test write watchpoints */
+	for (len = 1; len <= sizeof(long); len <<= 1)
+		write_var(len);
+
+	/* Test read/write watchpoints (on read accesses) */
+	for (len = 1; len <= sizeof(long); len <<= 1)
+		read_var(len);
+}
+
+static void check_success(const char *msg)
+{
+	const char *msg2;
+	int status;
+
+	/* Wait for the child to SIGTRAP */
+	wait(&status);
+
+	msg2 = "Failed";
+
+	if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
+		msg2 = "Child process hit the breakpoint";
+	}
+
+	printf("%s Result: [%s]\n", msg, msg2);
+}
+
+static void launch_watchpoints(char *buf, int mode, int len,
+			       struct ppc_debug_info *dbginfo, bool dawr)
+{
+	const char *mode_str;
+	unsigned long data = (unsigned long)(dummy_var);
+	int wh, range;
+
+	data &= ~0x7UL;
+
+	if (mode == BP_W) {
+		data |= (1UL << 1);
+		mode_str = "write";
+	} else {
+		data |= (1UL << 0);
+		data |= (1UL << 1);
+		mode_str = "read";
+	}
+
+	/* Set DABR_TRANSLATION bit */
+	data |= (1UL << 2);
+
+	/* use PTRACE_SET_DEBUGREG breakpoints */
+	set_breakpoint_addr((void *)data);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
+	check_success(buf);
+	/* Unregister hw brkpoint */
+	set_breakpoint_addr(NULL);
+
+	data = (data & ~7); /* remove dabr control bits */
+
+	/* use PPC_PTRACE_SETHWDEBUG breakpoint */
+	if (!(dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE))
+		return; /* not supported */
+	wh = set_hwbreakpoint_addr((void *)data, 0);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
+	check_success(buf);
+	/* Unregister hw brkpoint */
+	del_hwbreakpoint_addr(wh);
+
+	/* try a wider range */
+	range = 8;
+	if (dawr)
+		range = 512 - ((int)data & (DAWR_LENGTH_MAX - 1));
+	wh = set_hwbreakpoint_addr((void *)data, range);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
+	check_success(buf);
+	/* Unregister hw brkpoint */
+	del_hwbreakpoint_addr(wh);
+}
+
+/* Set the breakpoints and check the child successfully trigger them */
+static int launch_tests(bool dawr)
+{
+	char buf[1024];
+	int len, i, status;
+
+	struct ppc_debug_info dbginfo;
+
+	i = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo);
+	if (i) {
+		perror("Can't set breakpoint info\n");
+		exit(-1);
+	}
+	if (!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_RANGE))
+		printf("WARNING: Kernel doesn't support PPC_PTRACE_SETHWDEBUG\n");
+
+	/* Write watchpoint */
+	for (len = 1; len <= sizeof(long); len <<= 1)
+		launch_watchpoints(buf, BP_W, len, &dbginfo, dawr);
+
+	/* Read-Write watchpoint */
+	for (len = 1; len <= sizeof(long); len <<= 1)
+		launch_watchpoints(buf, BP_RW, len, &dbginfo, dawr);
+
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+
+	/*
+	 * Now we have unregistered the breakpoint, access by child
+	 * should not cause SIGTRAP.
+	 */
+
+	wait(&status);
+
+	if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
+		printf("FAIL: Child process hit the breakpoint, which is not expected\n");
+		ptrace(PTRACE_CONT, child_pid, NULL, 0);
+		return TEST_FAIL;
+	}
+
+	if (WIFEXITED(status))
+		printf("Child exited normally\n");
+
+	return TEST_PASS;
+}
+
+static int ptrace_hwbreak(void)
+{
+	pid_t pid;
+	int ret;
+	bool dawr;
+
+	pid = fork();
+	if (!pid) {
+		trigger_tests();
+		return 0;
+	}
+
+	wait(NULL);
+
+	child_pid = pid;
+
+	get_dbginfo();
+	SKIP_IF(!hwbreak_present());
+	dawr = dawr_present();
+
+	ret = launch_tests(dawr);
+
+	wait(NULL);
+
+	return ret;
+}
+
+int main(int argc, char **argv, char **envp)
+{
+	return test_harness(ptrace_hwbreak, "ptrace-hwbreak");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
new file mode 100644
index 000000000000..bdbbbe8431e0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Ptrace test for Memory Protection Key registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ * Copyright (C) 2018 IBM Corporation.
+ */
+#include "ptrace.h"
+#include "child.h"
+
+#ifndef __NR_pkey_alloc
+#define __NR_pkey_alloc		384
+#endif
+
+#ifndef __NR_pkey_free
+#define __NR_pkey_free		385
+#endif
+
+#ifndef NT_PPC_PKEY
+#define NT_PPC_PKEY		0x110
+#endif
+
+#ifndef PKEY_DISABLE_EXECUTE
+#define PKEY_DISABLE_EXECUTE	0x4
+#endif
+
+#define AMR_BITS_PER_PKEY 2
+#define PKEY_REG_BITS (sizeof(u64) * 8)
+#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY))
+
+static const char user_read[] = "[User Read (Running)]";
+static const char user_write[] = "[User Write (Running)]";
+static const char ptrace_read_running[] = "[Ptrace Read (Running)]";
+static const char ptrace_write_running[] = "[Ptrace Write (Running)]";
+
+/* Information shared between the parent and the child. */
+struct shared_info {
+	struct child_sync child_sync;
+
+	/* AMR value the parent expects to read from the child. */
+	unsigned long amr1;
+
+	/* AMR value the parent is expected to write to the child. */
+	unsigned long amr2;
+
+	/* AMR value that ptrace should refuse to write to the child. */
+	unsigned long amr3;
+
+	/* IAMR value the parent expects to read from the child. */
+	unsigned long expected_iamr;
+
+	/* UAMOR value the parent expects to read from the child. */
+	unsigned long expected_uamor;
+
+	/*
+	 * IAMR and UAMOR values that ptrace should refuse to write to the child
+	 * (even though they're valid ones) because userspace doesn't have
+	 * access to those registers.
+	 */
+	unsigned long new_iamr;
+	unsigned long new_uamor;
+};
+
+static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
+{
+	return syscall(__NR_pkey_alloc, flags, init_access_rights);
+}
+
+static int sys_pkey_free(int pkey)
+{
+	return syscall(__NR_pkey_free, pkey);
+}
+
+static int child(struct shared_info *info)
+{
+	unsigned long reg;
+	bool disable_execute = true;
+	int pkey1, pkey2, pkey3;
+	int ret;
+
+	/* Wait until parent fills out the initial register values. */
+	ret = wait_parent(&info->child_sync);
+	if (ret)
+		return ret;
+
+	/* Get some pkeys so that we can change their bits in the AMR. */
+	pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
+	if (pkey1 < 0) {
+		pkey1 = sys_pkey_alloc(0, 0);
+		CHILD_FAIL_IF(pkey1 < 0, &info->child_sync);
+
+		disable_execute = false;
+	}
+
+	pkey2 = sys_pkey_alloc(0, 0);
+	CHILD_FAIL_IF(pkey2 < 0, &info->child_sync);
+
+	pkey3 = sys_pkey_alloc(0, 0);
+	CHILD_FAIL_IF(pkey3 < 0, &info->child_sync);
+
+	info->amr1 |= 3ul << pkeyshift(pkey1);
+	info->amr2 |= 3ul << pkeyshift(pkey2);
+	info->amr3 |= info->amr2 | 3ul << pkeyshift(pkey3);
+
+	if (disable_execute)
+		info->expected_iamr |= 1ul << pkeyshift(pkey1);
+	else
+		info->expected_iamr &= ~(1ul << pkeyshift(pkey1));
+
+	info->expected_iamr &= ~(1ul << pkeyshift(pkey2) | 1ul << pkeyshift(pkey3));
+
+	info->expected_uamor |= 3ul << pkeyshift(pkey1) |
+				3ul << pkeyshift(pkey2);
+	info->new_iamr |= 1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2);
+	info->new_uamor |= 3ul << pkeyshift(pkey1);
+
+	/*
+	 * We won't use pkey3. We just want a plausible but invalid key to test
+	 * whether ptrace will let us write to AMR bits we are not supposed to.
+	 *
+	 * This also tests whether the kernel restores the UAMOR permissions
+	 * after a key is freed.
+	 */
+	sys_pkey_free(pkey3);
+
+	printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
+	       user_write, info->amr1, pkey1, pkey2, pkey3);
+
+	mtspr(SPRN_AMR, info->amr1);
+
+	/* Wait for parent to read our AMR value and write a new one. */
+	ret = prod_parent(&info->child_sync);
+	CHILD_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait_parent(&info->child_sync);
+	if (ret)
+		return ret;
+
+	reg = mfspr(SPRN_AMR);
+
+	printf("%-30s AMR: %016lx\n", user_read, reg);
+
+	CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
+
+	/*
+	 * Wait for parent to try to write an invalid AMR value.
+	 */
+	ret = prod_parent(&info->child_sync);
+	CHILD_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait_parent(&info->child_sync);
+	if (ret)
+		return ret;
+
+	reg = mfspr(SPRN_AMR);
+
+	printf("%-30s AMR: %016lx\n", user_read, reg);
+
+	CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
+
+	/*
+	 * Wait for parent to try to write an IAMR and a UAMOR value. We can't
+	 * verify them, but we can verify that the AMR didn't change.
+	 */
+	ret = prod_parent(&info->child_sync);
+	CHILD_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait_parent(&info->child_sync);
+	if (ret)
+		return ret;
+
+	reg = mfspr(SPRN_AMR);
+
+	printf("%-30s AMR: %016lx\n", user_read, reg);
+
+	CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
+
+	/* Now let parent now that we are finished. */
+
+	ret = prod_parent(&info->child_sync);
+	CHILD_FAIL_IF(ret, &info->child_sync);
+
+	return TEST_PASS;
+}
+
+static int parent(struct shared_info *info, pid_t pid)
+{
+	unsigned long regs[3];
+	int ret, status;
+
+	/*
+	 * Get the initial values for AMR, IAMR and UAMOR and communicate them
+	 * to the child.
+	 */
+	ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+	PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	info->amr1 = info->amr2 = info->amr3 = regs[0];
+	info->expected_iamr = info->new_iamr = regs[1];
+	info->expected_uamor = info->new_uamor = regs[2];
+
+	/* Wake up child so that it can set itself up. */
+	ret = prod_child(&info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait_child(&info->child_sync);
+	if (ret)
+		return ret;
+
+	/* Verify that we can read the pkey registers from the child. */
+	ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+	       ptrace_read_running, regs[0], regs[1], regs[2]);
+
+	PARENT_FAIL_IF(regs[0] != info->amr1, &info->child_sync);
+	PARENT_FAIL_IF(regs[1] != info->expected_iamr, &info->child_sync);
+	PARENT_FAIL_IF(regs[2] != info->expected_uamor, &info->child_sync);
+
+	/* Write valid AMR value in child. */
+	ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr2, 1);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr2);
+
+	/* Wake up child so that it can verify it changed. */
+	ret = prod_child(&info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait_child(&info->child_sync);
+	if (ret)
+		return ret;
+
+	/* Write invalid AMR value in child. */
+	ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr3, 1);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr3);
+
+	/* Wake up child so that it can verify it didn't change. */
+	ret = prod_child(&info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait_child(&info->child_sync);
+	if (ret)
+		return ret;
+
+	/* Try to write to IAMR. */
+	regs[0] = info->amr1;
+	regs[1] = info->new_iamr;
+	ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 2);
+	PARENT_FAIL_IF(!ret, &info->child_sync);
+
+	printf("%-30s AMR: %016lx IAMR: %016lx\n",
+	       ptrace_write_running, regs[0], regs[1]);
+
+	/* Try to write to IAMR and UAMOR. */
+	regs[2] = info->new_uamor;
+	ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 3);
+	PARENT_FAIL_IF(!ret, &info->child_sync);
+
+	printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+	       ptrace_write_running, regs[0], regs[1], regs[2]);
+
+	/* Verify that all registers still have their expected values. */
+	ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+	       ptrace_read_running, regs[0], regs[1], regs[2]);
+
+	PARENT_FAIL_IF(regs[0] != info->amr2, &info->child_sync);
+	PARENT_FAIL_IF(regs[1] != info->expected_iamr, &info->child_sync);
+	PARENT_FAIL_IF(regs[2] != info->expected_uamor, &info->child_sync);
+
+	/* Wake up child so that it can verify AMR didn't change and wrap up. */
+	ret = prod_child(&info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait(&status);
+	if (ret != pid) {
+		printf("Child's exit status not captured\n");
+		ret = TEST_PASS;
+	} else if (!WIFEXITED(status)) {
+		printf("Child exited abnormally\n");
+		ret = TEST_FAIL;
+	} else
+		ret = WEXITSTATUS(status) ? TEST_FAIL : TEST_PASS;
+
+	return ret;
+}
+
+static int ptrace_pkey(void)
+{
+	struct shared_info *info;
+	int shm_id;
+	int ret;
+	pid_t pid;
+
+	shm_id = shmget(IPC_PRIVATE, sizeof(*info), 0777 | IPC_CREAT);
+	info = shmat(shm_id, NULL, 0);
+
+	ret = init_child_sync(&info->child_sync);
+	if (ret)
+		return ret;
+
+	pid = fork();
+	if (pid < 0) {
+		perror("fork() failed");
+		ret = TEST_FAIL;
+	} else if (pid == 0)
+		ret = child(info);
+	else
+		ret = parent(info, pid);
+
+	shmdt(info);
+
+	if (pid) {
+		destroy_child_sync(&info->child_sync);
+		shmctl(shm_id, IPC_RMID, NULL);
+	}
+
+	return ret;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(ptrace_pkey, "ptrace_pkey");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-syscall.c b/tools/testing/selftests/powerpc/ptrace/ptrace-syscall.c
new file mode 100644
index 000000000000..3353210dcdbd
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-syscall.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A ptrace test for testing PTRACE_SYSEMU, PTRACE_SETREGS and
+ * PTRACE_GETREG.  This test basically create a child process that executes
+ * syscalls and the parent process check if it is being traced appropriated.
+ *
+ * This test is heavily based on tools/testing/selftests/x86/ptrace_syscall.c
+ * test, and it was adapted to run on Powerpc by
+ * Breno Leitao <leitao@debian.org>
+ */
+#define _GNU_SOURCE
+
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <sys/user.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <err.h>
+#include <string.h>
+#include <sys/auxv.h>
+#include "utils.h"
+
+/* Bitness-agnostic defines for user_regs_struct fields. */
+#define user_syscall_nr	gpr[0]
+#define user_arg0		gpr[3]
+#define user_arg1		gpr[4]
+#define user_arg2		gpr[5]
+#define user_arg3		gpr[6]
+#define user_arg4		gpr[7]
+#define user_arg5		gpr[8]
+#define user_ip		nip
+
+#define PTRACE_SYSEMU		0x1d
+
+static int nerrs;
+
+static void wait_trap(pid_t chld)
+{
+	siginfo_t si;
+
+	if (waitid(P_PID, chld, &si, WEXITED|WSTOPPED) != 0)
+		err(1, "waitid");
+	if (si.si_pid != chld)
+		errx(1, "got unexpected pid in event\n");
+	if (si.si_code != CLD_TRAPPED)
+		errx(1, "got unexpected event type %d\n", si.si_code);
+}
+
+static void test_ptrace_syscall_restart(void)
+{
+	int status;
+	struct pt_regs regs;
+	pid_t chld;
+
+	printf("[RUN]\tptrace-induced syscall restart\n");
+
+	chld = fork();
+	if (chld < 0)
+		err(1, "fork");
+
+	/*
+	 * Child process is running 4 syscalls after ptrace.
+	 *
+	 * 1) getpid()
+	 * 2) gettid()
+	 * 3) tgkill() -> Send SIGSTOP
+	 * 4) gettid() -> Where the tests will happen essentially
+	 */
+	if (chld == 0) {
+		if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
+			err(1, "PTRACE_TRACEME");
+
+		pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
+		printf("\tChild will make one syscall\n");
+		syscall(SYS_tgkill, pid, tid, SIGSTOP);
+
+		syscall(SYS_gettid, 10, 11, 12, 13, 14, 15);
+		_exit(0);
+	}
+	/* Parent process below */
+
+	/* Wait for SIGSTOP sent by tgkill above. */
+	if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+		err(1, "waitpid");
+
+	printf("[RUN]\tSYSEMU\n");
+	if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+		err(1, "PTRACE_SYSEMU");
+	wait_trap(chld);
+
+	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_GETREGS");
+
+	/*
+	 * Ptrace trapped prior to executing the syscall, thus r3 still has
+	 * the syscall number instead of the sys_gettid() result
+	 */
+	if (regs.user_syscall_nr != SYS_gettid ||
+	    regs.user_arg0 != 10 || regs.user_arg1 != 11 ||
+	    regs.user_arg2 != 12 || regs.user_arg3 != 13 ||
+	    regs.user_arg4 != 14 || regs.user_arg5 != 15) {
+		printf("[FAIL]\tInitial args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n",
+			(unsigned long)regs.user_syscall_nr,
+			(unsigned long)regs.user_arg0,
+			(unsigned long)regs.user_arg1,
+			(unsigned long)regs.user_arg2,
+			(unsigned long)regs.user_arg3,
+			(unsigned long)regs.user_arg4,
+			(unsigned long)regs.user_arg5);
+		 nerrs++;
+	} else {
+		printf("[OK]\tInitial nr and args are correct\n"); }
+
+	printf("[RUN]\tRestart the syscall (ip = 0x%lx)\n",
+	       (unsigned long)regs.user_ip);
+
+	/*
+	 * Rewind to retry the same syscall again. This will basically test
+	 * the rewind process together with PTRACE_SETREGS and PTRACE_GETREGS.
+	 */
+	regs.user_ip -= 4;
+	if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_SETREGS");
+
+	if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+		err(1, "PTRACE_SYSEMU");
+	wait_trap(chld);
+
+	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_GETREGS");
+
+	if (regs.user_syscall_nr != SYS_gettid ||
+	    regs.user_arg0 != 10 || regs.user_arg1 != 11 ||
+	    regs.user_arg2 != 12 || regs.user_arg3 != 13 ||
+	    regs.user_arg4 != 14 || regs.user_arg5 != 15) {
+		printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n",
+			(unsigned long)regs.user_syscall_nr,
+			(unsigned long)regs.user_arg0,
+			(unsigned long)regs.user_arg1,
+			(unsigned long)regs.user_arg2,
+			(unsigned long)regs.user_arg3,
+			(unsigned long)regs.user_arg4,
+			(unsigned long)regs.user_arg5);
+		nerrs++;
+	} else {
+		printf("[OK]\tRestarted nr and args are correct\n");
+	}
+
+	printf("[RUN]\tChange nr and args and restart the syscall (ip = 0x%lx)\n",
+	       (unsigned long)regs.user_ip);
+
+	/*
+	 * Inject a new syscall (getpid) in the same place the previous
+	 * syscall (gettid), rewind and re-execute.
+	 */
+	regs.user_syscall_nr = SYS_getpid;
+	regs.user_arg0 = 20;
+	regs.user_arg1 = 21;
+	regs.user_arg2 = 22;
+	regs.user_arg3 = 23;
+	regs.user_arg4 = 24;
+	regs.user_arg5 = 25;
+	regs.user_ip -= 4;
+
+	if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_SETREGS");
+
+	if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+		err(1, "PTRACE_SYSEMU");
+	wait_trap(chld);
+
+	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_GETREGS");
+
+	/* Check that ptrace stopped at the new syscall that was
+	 * injected, and guarantee that it haven't executed, i.e, user_args
+	 * contain the arguments and not the syscall return value, for
+	 * instance.
+	 */
+	if (regs.user_syscall_nr != SYS_getpid
+		|| regs.user_arg0 != 20 || regs.user_arg1 != 21
+		|| regs.user_arg2 != 22 || regs.user_arg3 != 23
+		|| regs.user_arg4 != 24 || regs.user_arg5 != 25) {
+
+		printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n",
+			(unsigned long)regs.user_syscall_nr,
+			(unsigned long)regs.user_arg0,
+			(unsigned long)regs.user_arg1,
+			(unsigned long)regs.user_arg2,
+			(unsigned long)regs.user_arg3,
+			(unsigned long)regs.user_arg4,
+			(unsigned long)regs.user_arg5);
+		nerrs++;
+	} else {
+		printf("[OK]\tReplacement nr and args are correct\n");
+	}
+
+	if (ptrace(PTRACE_CONT, chld, 0, 0) != 0)
+		err(1, "PTRACE_CONT");
+
+	if (waitpid(chld, &status, 0) != chld)
+		err(1, "waitpid");
+
+	/* Guarantee that the process executed properly, returning 0 */
+	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+		printf("[FAIL]\tChild failed\n");
+		nerrs++;
+	} else {
+		printf("[OK]\tChild exited cleanly\n");
+	}
+}
+
+int ptrace_syscall(void)
+{
+	test_ptrace_syscall_restart();
+
+	return nerrs;
+}
+
+int main(void)
+{
+	return test_harness(ptrace_syscall, "ptrace_syscall");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
index 327fa943c7f3..dbdffa2e2c82 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
@@ -67,8 +67,8 @@ trans:
 		"3: ;"
 		: [res] "=r" (result), [texasr] "=r" (texasr)
 		: [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2), [gpr_4]"i"(GPR_4),
-		[sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "r" (&a),
-		[flt_2] "r" (&b), [flt_4] "r" (&d)
+		[sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a),
+		[flt_4] "b" (&d)
 		: "memory", "r5", "r6", "r7",
 		"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
 		"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace.h b/tools/testing/selftests/powerpc/ptrace/ptrace.h
index 19fb825270a1..34201cfa8335 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace.h
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace.h
@@ -102,6 +102,44 @@ int cont_trace(pid_t child)
 	return TEST_PASS;
 }
 
+int ptrace_read_regs(pid_t child, unsigned long type, unsigned long regs[],
+		     int n)
+{
+	struct iovec iov;
+	long ret;
+
+	FAIL_IF(start_trace(child));
+
+	iov.iov_base = regs;
+	iov.iov_len = n * sizeof(unsigned long);
+
+	ret = ptrace(PTRACE_GETREGSET, child, type, &iov);
+	if (ret)
+		return ret;
+
+	FAIL_IF(stop_trace(child));
+
+	return TEST_PASS;
+}
+
+long ptrace_write_regs(pid_t child, unsigned long type, unsigned long regs[],
+		       int n)
+{
+	struct iovec iov;
+	long ret;
+
+	FAIL_IF(start_trace(child));
+
+	iov.iov_base = regs;
+	iov.iov_len = n * sizeof(unsigned long);
+
+	ret = ptrace(PTRACE_SETREGSET, child, type, &iov);
+
+	FAIL_IF(stop_trace(child));
+
+	return ret;
+}
+
 /* TAR, PPR, DSCR */
 int show_tar_registers(pid_t child, unsigned long *out)
 {
diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile
new file mode 100644
index 000000000000..85861c46b445
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+TEST_GEN_PROGS := rfi_flush
+top_srcdir = ../../../../..
+
+CFLAGS += -I../../../../../usr/include
+
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c
new file mode 100644
index 000000000000..0a7d0afb26b8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/rfi_flush.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2018 IBM Corporation.
+ */
+
+#define __SANE_USERSPACE_TYPES__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "utils.h"
+
+#define CACHELINE_SIZE 128
+
+struct perf_event_read {
+	__u64 nr;
+	__u64 l1d_misses;
+};
+
+static inline __u64 load(void *addr)
+{
+	__u64 tmp;
+
+	asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
+
+	return tmp;
+}
+
+static void syscall_loop(char *p, unsigned long iterations,
+			 unsigned long zero_size)
+{
+	for (unsigned long i = 0; i < iterations; i++) {
+		for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
+			load(p + j);
+		getppid();
+	}
+}
+
+int rfi_flush_test(void)
+{
+	char *p;
+	int repetitions = 10;
+	int fd, passes = 0, iter, rc = 0;
+	struct perf_event_read v;
+	__u64 l1d_misses_total = 0;
+	unsigned long iterations = 100000, zero_size = 24 * 1024;
+	unsigned long l1d_misses_expected;
+	int rfi_flush_org, rfi_flush;
+
+	SKIP_IF(geteuid() != 0);
+
+	if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) {
+		perror("Unable to read powerpc/rfi_flush debugfs file");
+		SKIP_IF(1);
+	}
+
+	rfi_flush = rfi_flush_org;
+
+	fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1);
+	FAIL_IF(fd < 0);
+
+	p = (char *)memalign(zero_size, CACHELINE_SIZE);
+
+	FAIL_IF(perf_event_enable(fd));
+
+	set_dscr(1);
+
+	iter = repetitions;
+
+	/*
+	 * We expect to see l1d miss for each cacheline access when rfi_flush
+	 * is set. Allow a small variation on this.
+	 */
+	l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2);
+
+again:
+	FAIL_IF(perf_event_reset(fd));
+
+	syscall_loop(p, iterations, zero_size);
+
+	FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v));
+
+	if (rfi_flush && v.l1d_misses >= l1d_misses_expected)
+		passes++;
+	else if (!rfi_flush && v.l1d_misses < (l1d_misses_expected / 2))
+		passes++;
+
+	l1d_misses_total += v.l1d_misses;
+
+	while (--iter)
+		goto again;
+
+	if (passes < repetitions) {
+		printf("FAIL (L1D misses with rfi_flush=%d: %llu %c %lu) [%d/%d failures]\n",
+		       rfi_flush, l1d_misses_total, rfi_flush ? '<' : '>',
+		       rfi_flush ? repetitions * l1d_misses_expected :
+		       repetitions * l1d_misses_expected / 2,
+		       repetitions - passes, repetitions);
+		rc = 1;
+	} else
+		printf("PASS (L1D misses with rfi_flush=%d: %llu %c %lu) [%d/%d pass]\n",
+		       rfi_flush, l1d_misses_total, rfi_flush ? '>' : '<',
+		       rfi_flush ? repetitions * l1d_misses_expected :
+		       repetitions * l1d_misses_expected / 2,
+		       passes, repetitions);
+
+	if (rfi_flush == rfi_flush_org) {
+		rfi_flush = !rfi_flush_org;
+		if (write_debugfs_file("powerpc/rfi_flush", rfi_flush) < 0) {
+			perror("error writing to powerpc/rfi_flush debugfs file");
+			return 1;
+		}
+		iter = repetitions;
+		l1d_misses_total = 0;
+		passes = 0;
+		goto again;
+	}
+
+	perf_event_disable(fd);
+	close(fd);
+
+	set_dscr(0);
+
+	if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_org) < 0) {
+		perror("unable to restore original value of powerpc/rfi_flush debugfs file");
+		return 1;
+	}
+
+	return rc;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(rfi_flush_test, "rfi_flush_test");
+}
diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
index a7cbd5082e27..209a958dca12 100644
--- a/tools/testing/selftests/powerpc/signal/Makefile
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -1,14 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := signal signal_tm
-
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c ../utils.c signal.S
+TEST_GEN_PROGS := signal signal_tm
 
 CFLAGS += -maltivec
-signal_tm: CFLAGS += -mhtm
+$(OUTPUT)/signal_tm: CFLAGS += -mhtm
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
-clean:
-	rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): ../harness.c ../utils.c signal.S
diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile
index 1125e489055e..7fc0623d85c3 100644
--- a/tools/testing/selftests/powerpc/stringloops/Makefile
+++ b/tools/testing/selftests/powerpc/stringloops/Makefile
@@ -1,11 +1,35 @@
 # SPDX-License-Identifier: GPL-2.0
 # The loops are all 64-bit code
-CFLAGS += -m64
 CFLAGS += -I$(CURDIR)
 
-TEST_GEN_PROGS := memcmp
-EXTRA_SOURCES := memcmp_64.S ../harness.c
+EXTRA_SOURCES := ../harness.c
 
+build_32bit = $(shell if ($(CC) $(CFLAGS) -m32 -o /dev/null memcmp.c >/dev/null 2>&1) then echo "1"; fi)
+
+TEST_GEN_PROGS := memcmp_64 strlen
+
+$(OUTPUT)/memcmp_64: memcmp.c
+$(OUTPUT)/memcmp_64: CFLAGS += -m64 -maltivec
+
+ifeq ($(build_32bit),1)
+$(OUTPUT)/memcmp_32: memcmp.c
+$(OUTPUT)/memcmp_32: CFLAGS += -m32
+
+TEST_GEN_PROGS += memcmp_32
+endif
+
+$(OUTPUT)/strlen: strlen.c string.c
+
+ifeq ($(build_32bit),1)
+$(OUTPUT)/strlen_32: strlen.c
+$(OUTPUT)/strlen_32: CFLAGS += -m32
+
+TEST_GEN_PROGS += strlen_32
+endif
+
+ASFLAGS = $(CFLAGS)
+
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(TEST_GEN_PROGS): $(EXTRA_SOURCES)
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/cache.h b/tools/testing/selftests/powerpc/stringloops/asm/cache.h
new file mode 100644
index 000000000000..8a2840831122
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/asm/cache.h
@@ -0,0 +1 @@
+#define	IFETCH_ALIGN_BYTES 4
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/ppc-opcode.h b/tools/testing/selftests/powerpc/stringloops/asm/ppc-opcode.h
new file mode 100644
index 000000000000..9de413c0c2cb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/asm/ppc-opcode.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2009 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * provides masks and opcode images for use by code generation, emulation
+ * and for instructions that older assemblers might not know about
+ */
+#ifndef _ASM_POWERPC_PPC_OPCODE_H
+#define _ASM_POWERPC_PPC_OPCODE_H
+
+
+#  define stringify_in_c(...)	__VA_ARGS__
+#  define ASM_CONST(x)		x
+
+
+#define PPC_INST_VCMPEQUD_RC		0x100000c7
+#define PPC_INST_VCMPEQUB_RC		0x10000006
+
+#define __PPC_RC21     (0x1 << 10)
+
+/* macros to insert fields into opcodes */
+#define ___PPC_RA(a)	(((a) & 0x1f) << 16)
+#define ___PPC_RB(b)	(((b) & 0x1f) << 11)
+#define ___PPC_RS(s)	(((s) & 0x1f) << 21)
+#define ___PPC_RT(t)	___PPC_RS(t)
+
+#define VCMPEQUD_RC(vrt, vra, vrb)	stringify_in_c(.long PPC_INST_VCMPEQUD_RC | \
+			      ___PPC_RT(vrt) | ___PPC_RA(vra) | \
+			      ___PPC_RB(vrb) | __PPC_RC21)
+
+#define VCMPEQUB_RC(vrt, vra, vrb)	stringify_in_c(.long PPC_INST_VCMPEQUB_RC | \
+			      ___PPC_RT(vrt) | ___PPC_RA(vra) | \
+			      ___PPC_RB(vrb) | __PPC_RC21)
+
+#endif /* _ASM_POWERPC_PPC_OPCODE_H */
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h
index 136242ec4b0e..d2c0a911f55e 100644
--- a/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h
+++ b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h
@@ -1,4 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_ASM_H
+#define __PPC_ASM_H
 #include <ppc-asm.h>
 
 #ifndef r1
@@ -6,3 +8,26 @@
 #endif
 
 #define _GLOBAL(A) FUNC_START(test_ ## A)
+#define _GLOBAL_TOC(A) FUNC_START(test_ ## A)
+
+#define CONFIG_ALTIVEC
+
+#define R14 r14
+#define R15 r15
+#define R16 r16
+#define R17 r17
+#define R18 r18
+#define R19 r19
+#define R20 r20
+#define R21 r21
+#define R22 r22
+#define R29 r29
+#define R30 r30
+#define R31 r31
+
+#define STACKFRAMESIZE	256
+#define STK_REG(i)	(112 + ((i)-14)*8)
+
+#define BEGIN_FTR_SECTION
+#define END_FTR_SECTION_IFSET(val)
+#endif
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c
index 8250db25b379..b1fa7546957f 100644
--- a/tools/testing/selftests/powerpc/stringloops/memcmp.c
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c
@@ -2,20 +2,40 @@
 #include <malloc.h>
 #include <stdlib.h>
 #include <string.h>
+#include <time.h>
 #include "utils.h"
 
 #define SIZE 256
 #define ITERATIONS 10000
 
+#define LARGE_SIZE (5 * 1024)
+#define LARGE_ITERATIONS 1000
+#define LARGE_MAX_OFFSET 32
+#define LARGE_SIZE_START 4096
+
+#define MAX_OFFSET_DIFF_S1_S2 48
+
+int vmx_count;
+int enter_vmx_ops(void)
+{
+	vmx_count++;
+	return 1;
+}
+
+void exit_vmx_ops(void)
+{
+	vmx_count--;
+}
 int test_memcmp(const void *s1, const void *s2, size_t n);
 
 /* test all offsets and lengths */
-static void test_one(char *s1, char *s2)
+static void test_one(char *s1, char *s2, unsigned long max_offset,
+		unsigned long size_start, unsigned long max_size)
 {
 	unsigned long offset, size;
 
-	for (offset = 0; offset < SIZE; offset++) {
-		for (size = 0; size < (SIZE-offset); size++) {
+	for (offset = 0; offset < max_offset; offset++) {
+		for (size = size_start; size < (max_size - offset); size++) {
 			int x, y;
 			unsigned long i;
 
@@ -35,70 +55,105 @@ static void test_one(char *s1, char *s2)
 				printf("\n");
 				abort();
 			}
+
+			if (vmx_count != 0) {
+				printf("vmx enter/exit not paired.(offset:%ld size:%ld s1:%p s2:%p vc:%d\n",
+					offset, size, s1, s2, vmx_count);
+				printf("\n");
+				abort();
+			}
 		}
 	}
 }
 
-static int testcase(void)
+static int testcase(bool islarge)
 {
 	char *s1;
 	char *s2;
 	unsigned long i;
 
-	s1 = memalign(128, SIZE);
+	unsigned long comp_size = (islarge ? LARGE_SIZE : SIZE);
+	unsigned long alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2;
+	int iterations = islarge ? LARGE_ITERATIONS : ITERATIONS;
+
+	s1 = memalign(128, alloc_size);
 	if (!s1) {
 		perror("memalign");
 		exit(1);
 	}
 
-	s2 = memalign(128, SIZE);
+	s2 = memalign(128, alloc_size);
 	if (!s2) {
 		perror("memalign");
 		exit(1);
 	}
 
-	srandom(1);
+	srandom(time(0));
 
-	for (i = 0; i < ITERATIONS; i++) {
+	for (i = 0; i < iterations; i++) {
 		unsigned long j;
 		unsigned long change;
+		char *rand_s1 = s1;
+		char *rand_s2 = s2;
 
-		for (j = 0; j < SIZE; j++)
+		for (j = 0; j < alloc_size; j++)
 			s1[j] = random();
 
-		memcpy(s2, s1, SIZE);
+		rand_s1 += random() % MAX_OFFSET_DIFF_S1_S2;
+		rand_s2 += random() % MAX_OFFSET_DIFF_S1_S2;
+		memcpy(rand_s2, rand_s1, comp_size);
 
 		/* change one byte */
-		change = random() % SIZE;
-		s2[change] = random() & 0xff;
-
-		test_one(s1, s2);
+		change = random() % comp_size;
+		rand_s2[change] = random() & 0xff;
+
+		if (islarge)
+			test_one(rand_s1, rand_s2, LARGE_MAX_OFFSET,
+					LARGE_SIZE_START, comp_size);
+		else
+			test_one(rand_s1, rand_s2, SIZE, 0, comp_size);
 	}
 
-	srandom(1);
+	srandom(time(0));
 
-	for (i = 0; i < ITERATIONS; i++) {
+	for (i = 0; i < iterations; i++) {
 		unsigned long j;
 		unsigned long change;
+		char *rand_s1 = s1;
+		char *rand_s2 = s2;
 
-		for (j = 0; j < SIZE; j++)
+		for (j = 0; j < alloc_size; j++)
 			s1[j] = random();
 
-		memcpy(s2, s1, SIZE);
+		rand_s1 += random() % MAX_OFFSET_DIFF_S1_S2;
+		rand_s2 += random() % MAX_OFFSET_DIFF_S1_S2;
+		memcpy(rand_s2, rand_s1, comp_size);
 
 		/* change multiple bytes, 1/8 of total */
-		for (j = 0; j < SIZE / 8; j++) {
-			change = random() % SIZE;
+		for (j = 0; j < comp_size / 8; j++) {
+			change = random() % comp_size;
 			s2[change] = random() & 0xff;
 		}
 
-		test_one(s1, s2);
+		if (islarge)
+			test_one(rand_s1, rand_s2, LARGE_MAX_OFFSET,
+					LARGE_SIZE_START, comp_size);
+		else
+			test_one(rand_s1, rand_s2, SIZE, 0, comp_size);
 	}
 
 	return 0;
 }
 
+static int testcases(void)
+{
+	testcase(0);
+	testcase(1);
+	return 0;
+}
+
 int main(void)
 {
-	return test_harness(testcase, "memcmp");
+	test_harness_set_timeout(300);
+	return test_harness(testcases, "memcmp");
 }
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp_32.S b/tools/testing/selftests/powerpc/stringloops/memcmp_32.S
new file mode 120000
index 000000000000..056f2b3af789
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp_32.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/memcmp_32.S
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/stringloops/string.c b/tools/testing/selftests/powerpc/stringloops/string.c
new file mode 100644
index 000000000000..45e7775415c7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/string.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copied from linux/lib/string.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include <stddef.h>
+
+/**
+ * strlen - Find the length of a string
+ * @s: The string to be sized
+ */
+size_t test_strlen(const char *s)
+{
+	const char *sc;
+
+	for (sc = s; *sc != '\0'; ++sc)
+		/* nothing */;
+	return sc - s;
+}
diff --git a/tools/testing/selftests/powerpc/stringloops/strlen.c b/tools/testing/selftests/powerpc/stringloops/strlen.c
new file mode 100644
index 000000000000..9055ebc484d0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/strlen.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <malloc.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "utils.h"
+
+#define SIZE 256
+#define ITERATIONS 1000
+#define ITERATIONS_BENCH 100000
+
+int test_strlen(const void *s);
+
+/* test all offsets and lengths */
+static void test_one(char *s)
+{
+	unsigned long offset;
+
+	for (offset = 0; offset < SIZE; offset++) {
+		int x, y;
+		unsigned long i;
+
+		y = strlen(s + offset);
+		x = test_strlen(s + offset);
+
+		if (x != y) {
+			printf("strlen() returned %d, should have returned %d (%p offset %ld)\n", x, y, s, offset);
+
+			for (i = offset; i < SIZE; i++)
+				printf("%02x ", s[i]);
+			printf("\n");
+		}
+	}
+}
+
+static void bench_test(char *s)
+{
+	struct timespec ts_start, ts_end;
+	int i;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts_start);
+
+	for (i = 0; i < ITERATIONS_BENCH; i++)
+		test_strlen(s);
+
+	clock_gettime(CLOCK_MONOTONIC, &ts_end);
+
+	printf("len %3.3d : time = %.6f\n", test_strlen(s), ts_end.tv_sec - ts_start.tv_sec + (ts_end.tv_nsec - ts_start.tv_nsec) / 1e9);
+}
+
+static int testcase(void)
+{
+	char *s;
+	unsigned long i;
+
+	s = memalign(128, SIZE);
+	if (!s) {
+		perror("memalign");
+		exit(1);
+	}
+
+	srandom(1);
+
+	memset(s, 0, SIZE);
+	for (i = 0; i < SIZE; i++) {
+		char c;
+
+		do {
+			c = random() & 0x7f;
+		} while (!c);
+		s[i] = c;
+		test_one(s);
+	}
+
+	for (i = 0; i < ITERATIONS; i++) {
+		unsigned long j;
+
+		for (j = 0; j < SIZE; j++) {
+			char c;
+
+			do {
+				c = random() & 0x7f;
+			} while (!c);
+			s[j] = c;
+		}
+		for (j = 0; j < sizeof(long); j++) {
+			s[SIZE - 1 - j] = 0;
+			test_one(s);
+		}
+	}
+
+	for (i = 0; i < SIZE; i++) {
+		char c;
+
+		do {
+			c = random() & 0x7f;
+		} while (!c);
+		s[i] = c;
+	}
+
+	bench_test(s);
+
+	s[16] = 0;
+	bench_test(s);
+
+	s[8] = 0;
+	bench_test(s);
+
+	s[4] = 0;
+	bench_test(s);
+
+	s[3] = 0;
+	bench_test(s);
+
+	s[2] = 0;
+	bench_test(s);
+
+	s[1] = 0;
+	bench_test(s);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(testcase, "strlen");
+}
diff --git a/tools/testing/selftests/powerpc/stringloops/strlen_32.S b/tools/testing/selftests/powerpc/stringloops/strlen_32.S
new file mode 120000
index 000000000000..72b13731b24c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/strlen_32.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/strlen_32.S
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile
index 30b8ff8fb82e..bdc081afedb0 100644
--- a/tools/testing/selftests/powerpc/switch_endian/Makefile
+++ b/tools/testing/selftests/powerpc/switch_endian/Makefile
@@ -5,8 +5,10 @@ ASFLAGS += -O2 -Wall -g -nostdlib -m64
 
 EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
+$(OUTPUT)/switch_endian_test: ASFLAGS += -I $(OUTPUT)
 $(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S
 
 $(OUTPUT)/check-reversed.o: $(OUTPUT)/check.o
diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile
index da22ca7c38c1..161b8846336f 100644
--- a/tools/testing/selftests/powerpc/syscalls/Makefile
+++ b/tools/testing/selftests/powerpc/syscalls/Makefile
@@ -2,6 +2,7 @@ TEST_GEN_PROGS := ipc_unmuxed
 
 CFLAGS += -I../../../../../usr/include
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore
index bb90d4b79524..c3ee8393dae8 100644
--- a/tools/testing/selftests/powerpc/tm/.gitignore
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -14,3 +14,4 @@ tm-signal-context-chk-vsx
 tm-vmx-unavail
 tm-unavailable
 tm-trap
+tm-sigreturn
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index c0e45d2dde25..9fc2cf6fbc92 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -6,6 +6,7 @@ TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack
 	tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
 	$(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
index 85d63449243b..9a6017a1d769 100644
--- a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
+++ b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
@@ -55,6 +55,7 @@ int tm_sigreturn(void)
 	uint64_t ret = 0;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(!is_ppc64le());
 
 	memset(&sa, 0, sizeof(sa));
 	sa.sa_handler = handler;
diff --git a/tools/testing/selftests/powerpc/tm/tm-tar.c b/tools/testing/selftests/powerpc/tm/tm-tar.c
index 2d2fcc2b7a60..f31fe5a28ddb 100644
--- a/tools/testing/selftests/powerpc/tm/tm-tar.c
+++ b/tools/testing/selftests/powerpc/tm/tm-tar.c
@@ -26,6 +26,7 @@ int test_tar(void)
 	int i;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(!is_ppc64le());
 
 	for (i = 0; i < num_loops; i++)
 	{
diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
index 2bda81c7bf23..df1d7d4b1c89 100644
--- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
@@ -98,7 +98,7 @@ void texasr(void *in)
 
 int test_tmspr()
 {
-	pthread_t 	thread;
+	pthread_t	*thread;
 	int	   	thread_num;
 	unsigned long	i;
 
@@ -107,21 +107,28 @@ int test_tmspr()
 	/* To cause some context switching */
 	thread_num = 10 * sysconf(_SC_NPROCESSORS_ONLN);
 
+	thread = malloc(thread_num * sizeof(pthread_t));
+	if (thread == NULL)
+		return EXIT_FAILURE;
+
 	/* Test TFIAR and TFHAR */
-	for (i = 0 ; i < thread_num ; i += 2){
-		if (pthread_create(&thread, NULL, (void*)tfiar_tfhar, (void *)i))
+	for (i = 0; i < thread_num; i += 2) {
+		if (pthread_create(&thread[i], NULL, (void *)tfiar_tfhar,
+				   (void *)i))
 			return EXIT_FAILURE;
 	}
-	if (pthread_join(thread, NULL) != 0)
-		return EXIT_FAILURE;
-
 	/* Test TEXASR */
-	for (i = 0 ; i < thread_num ; i++){
-		if (pthread_create(&thread, NULL, (void*)texasr, (void *)i))
+	for (i = 1; i < thread_num; i += 2) {
+		if (pthread_create(&thread[i], NULL, (void *)texasr, (void *)i))
 			return EXIT_FAILURE;
 	}
-	if (pthread_join(thread, NULL) != 0)
-		return EXIT_FAILURE;
+
+	for (i = 0; i < thread_num; i++) {
+		if (pthread_join(thread[i], NULL) != 0)
+			return EXIT_FAILURE;
+	}
+
+	free(thread);
 
 	if (passed)
 		return 0;
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
index 156c8e750259..09894f4ff62e 100644
--- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -236,7 +236,8 @@ void *tm_una_ping(void *input)
 	}
 
 	/* Check if we were not expecting a failure and a it occurred. */
-	if (!expecting_failure() && is_failure(cr_)) {
+	if (!expecting_failure() && is_failure(cr_) &&
+	    !failure_is_reschedule()) {
 		printf("\n\tUnexpected transaction failure 0x%02lx\n\t",
 			failure_code());
 		return (void *) -1;
@@ -244,9 +245,11 @@ void *tm_una_ping(void *input)
 
 	/*
 	 * Check if TM failed due to the cause we were expecting. 0xda is a
-	 * TM_CAUSE_FAC_UNAV cause, otherwise it's an unexpected cause.
+	 * TM_CAUSE_FAC_UNAV cause, otherwise it's an unexpected cause, unless
+	 * it was caused by a reschedule.
 	 */
-	if (is_failure(cr_) && !failure_is_unavailable()) {
+	if (is_failure(cr_) && !failure_is_unavailable() &&
+	    !failure_is_reschedule()) {
 		printf("\n\tUnexpected failure cause 0x%02lx\n\t",
 			failure_code());
 		return (void *) -1;
diff --git a/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
index 0274de7b11f3..fe52811584ae 100644
--- a/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
+++ b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
@@ -46,6 +46,7 @@ int test_vmxcopy()
 	uint64_t aborted = 0;
 
 	SKIP_IF(!have_htm());
+	SKIP_IF(!is_ppc64le());
 
 	fd = mkstemp(tmpfile);
 	assert(fd >= 0);
diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h
index df4204247d45..5518b1d4ef8b 100644
--- a/tools/testing/selftests/powerpc/tm/tm.h
+++ b/tools/testing/selftests/powerpc/tm/tm.h
@@ -52,6 +52,15 @@ static inline bool failure_is_unavailable(void)
 	return (failure_code() & TM_CAUSE_FAC_UNAV) == TM_CAUSE_FAC_UNAV;
 }
 
+static inline bool failure_is_reschedule(void)
+{
+	if ((failure_code() & TM_CAUSE_RESCHED) == TM_CAUSE_RESCHED ||
+	    (failure_code() & TM_CAUSE_KVM_RESCHED) == TM_CAUSE_KVM_RESCHED)
+		return true;
+
+	return false;
+}
+
 static inline bool failure_is_nesting(void)
 {
 	return (__builtin_get_texasru() & 0x400000);
diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
index d46916867a6f..ed62f4153d3e 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -10,10 +10,17 @@
 #include <fcntl.h>
 #include <link.h>
 #include <sched.h>
+#include <signal.h>
 #include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include <sys/utsname.h>
 #include <unistd.h>
+#include <asm/unistd.h>
+#include <linux/limits.h>
 
 #include "utils.h"
 
@@ -104,3 +111,165 @@ int pick_online_cpu(void)
 	printf("No cpus in affinity mask?!\n");
 	return -1;
 }
+
+bool is_ppc64le(void)
+{
+	struct utsname uts;
+	int rc;
+
+	errno = 0;
+	rc = uname(&uts);
+	if (rc) {
+		perror("uname");
+		return false;
+	}
+
+	return strcmp(uts.machine, "ppc64le") == 0;
+}
+
+int read_debugfs_file(char *debugfs_file, int *result)
+{
+	int rc = -1, fd;
+	char path[PATH_MAX];
+	char value[16];
+
+	strcpy(path, "/sys/kernel/debug/");
+	strncat(path, debugfs_file, PATH_MAX - strlen(path) - 1);
+
+	if ((fd = open(path, O_RDONLY)) < 0)
+		return rc;
+
+	if ((rc = read(fd, value, sizeof(value))) < 0)
+		return rc;
+
+	value[15] = 0;
+	*result = atoi(value);
+	close(fd);
+
+	return 0;
+}
+
+int write_debugfs_file(char *debugfs_file, int result)
+{
+	int rc = -1, fd;
+	char path[PATH_MAX];
+	char value[16];
+
+	strcpy(path, "/sys/kernel/debug/");
+	strncat(path, debugfs_file, PATH_MAX - strlen(path) - 1);
+
+	if ((fd = open(path, O_WRONLY)) < 0)
+		return rc;
+
+	snprintf(value, 16, "%d", result);
+
+	if ((rc = write(fd, value, strlen(value))) < 0)
+		return rc;
+
+	close(fd);
+
+	return 0;
+}
+
+static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
+		int cpu, int group_fd, unsigned long flags)
+{
+	return syscall(__NR_perf_event_open, hw_event, pid, cpu,
+		      group_fd, flags);
+}
+
+static void perf_event_attr_init(struct perf_event_attr *event_attr,
+					unsigned int type,
+					unsigned long config)
+{
+	memset(event_attr, 0, sizeof(*event_attr));
+
+	event_attr->type = type;
+	event_attr->size = sizeof(struct perf_event_attr);
+	event_attr->config = config;
+	event_attr->read_format = PERF_FORMAT_GROUP;
+	event_attr->disabled = 1;
+	event_attr->exclude_kernel = 1;
+	event_attr->exclude_hv = 1;
+	event_attr->exclude_guest = 1;
+}
+
+int perf_event_open_counter(unsigned int type,
+			    unsigned long config, int group_fd)
+{
+	int fd;
+	struct perf_event_attr event_attr;
+
+	perf_event_attr_init(&event_attr, type, config);
+
+	fd = perf_event_open(&event_attr, 0, -1, group_fd, 0);
+
+	if (fd < 0)
+		perror("perf_event_open() failed");
+
+	return fd;
+}
+
+int perf_event_enable(int fd)
+{
+	if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
+		perror("error while enabling perf events");
+		return -1;
+	}
+
+	return 0;
+}
+
+int perf_event_disable(int fd)
+{
+	if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
+		perror("error disabling perf events");
+		return -1;
+	}
+
+	return 0;
+}
+
+int perf_event_reset(int fd)
+{
+	if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
+		perror("error resetting perf events");
+		return -1;
+	}
+
+	return 0;
+}
+
+static void sigill_handler(int signr, siginfo_t *info, void *unused)
+{
+	static int warned = 0;
+	ucontext_t *ctx = (ucontext_t *)unused;
+	unsigned long *pc = &UCONTEXT_NIA(ctx);
+
+	/* mtspr 3,RS to check for move to DSCR below */
+	if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
+		if (!warned++)
+			printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
+		*pc += 4;
+	} else {
+		printf("SIGILL at %p\n", pc);
+		abort();
+	}
+}
+
+void set_dscr(unsigned long val)
+{
+	static int init = 0;
+	struct sigaction sa;
+
+	if (!init) {
+		memset(&sa, 0, sizeof(sa));
+		sa.sa_sigaction = sigill_handler;
+		sa.sa_flags = SA_SIGINFO;
+		if (sigaction(SIGILL, &sa, NULL))
+			perror("sigill_handler");
+		init = 1;
+	}
+
+	asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}
diff --git a/tools/testing/selftests/powerpc/vphn/Makefile b/tools/testing/selftests/powerpc/vphn/Makefile
index f8ced26748f8..fb82068c9fda 100644
--- a/tools/testing/selftests/powerpc/vphn/Makefile
+++ b/tools/testing/selftests/powerpc/vphn/Makefile
@@ -2,6 +2,7 @@ TEST_GEN_PROGS := test-vphn
 
 CFLAGS += -m64
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index 6c16f77c722c..82121a81681f 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -1,3 +1,6 @@
+/fd-001-lookup
+/fd-002-posix-eq
+/fd-003-kthread
 /proc-loadavg-001
 /proc-self-map-files-001
 /proc-self-map-files-002
@@ -6,3 +9,5 @@
 /proc-uptime-001
 /proc-uptime-002
 /read
+/self
+/thread-self
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
index dbb87e56264c..1c12c34cf85d 100644
--- a/tools/testing/selftests/proc/Makefile
+++ b/tools/testing/selftests/proc/Makefile
@@ -1,6 +1,10 @@
-CFLAGS += -Wall -O2
+CFLAGS += -Wall -O2 -Wno-unused-function
+CFLAGS += -D_GNU_SOURCE
 
 TEST_GEN_PROGS :=
+TEST_GEN_PROGS += fd-001-lookup
+TEST_GEN_PROGS += fd-002-posix-eq
+TEST_GEN_PROGS += fd-003-kthread
 TEST_GEN_PROGS += proc-loadavg-001
 TEST_GEN_PROGS += proc-self-map-files-001
 TEST_GEN_PROGS += proc-self-map-files-002
@@ -9,5 +13,7 @@ TEST_GEN_PROGS += proc-self-wchan
 TEST_GEN_PROGS += proc-uptime-001
 TEST_GEN_PROGS += proc-uptime-002
 TEST_GEN_PROGS += read
+TEST_GEN_PROGS += self
+TEST_GEN_PROGS += thread-self
 
 include ../lib.mk
diff --git a/tools/testing/selftests/proc/fd-001-lookup.c b/tools/testing/selftests/proc/fd-001-lookup.c
new file mode 100644
index 000000000000..60d7948e7124
--- /dev/null
+++ b/tools/testing/selftests/proc/fd-001-lookup.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test /proc/*/fd lookup.
+
+#undef NDEBUG
+#include <assert.h>
+#include <dirent.h>
+#include <errno.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "proc.h"
+
+/* lstat(2) has more "coverage" in case non-symlink pops up somehow. */
+static void test_lookup_pass(const char *pathname)
+{
+	struct stat st;
+	ssize_t rv;
+
+	memset(&st, 0, sizeof(struct stat));
+	rv = lstat(pathname, &st);
+	assert(rv == 0);
+	assert(S_ISLNK(st.st_mode));
+}
+
+static void test_lookup_fail(const char *pathname)
+{
+	struct stat st;
+	ssize_t rv;
+
+	rv = lstat(pathname, &st);
+	assert(rv == -1 && errno == ENOENT);
+}
+
+static void test_lookup(unsigned int fd)
+{
+	char buf[64];
+	unsigned int c;
+	unsigned int u;
+	int i;
+
+	snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd);
+	test_lookup_pass(buf);
+
+	/* leading junk */
+	for (c = 1; c <= 255; c++) {
+		if (c == '/')
+			continue;
+		snprintf(buf, sizeof(buf), "/proc/self/fd/%c%u", c, fd);
+		test_lookup_fail(buf);
+	}
+
+	/* trailing junk */
+	for (c = 1; c <= 255; c++) {
+		if (c == '/')
+			continue;
+		snprintf(buf, sizeof(buf), "/proc/self/fd/%u%c", fd, c);
+		test_lookup_fail(buf);
+	}
+
+	for (i = INT_MIN; i < INT_MIN + 1024; i++) {
+		snprintf(buf, sizeof(buf), "/proc/self/fd/%d", i);
+		test_lookup_fail(buf);
+	}
+	for (i = -1024; i < 0; i++) {
+		snprintf(buf, sizeof(buf), "/proc/self/fd/%d", i);
+		test_lookup_fail(buf);
+	}
+	for (u = INT_MAX - 1024; u <= (unsigned int)INT_MAX + 1024; u++) {
+		snprintf(buf, sizeof(buf), "/proc/self/fd/%u", u);
+		test_lookup_fail(buf);
+	}
+	for (u = UINT_MAX - 1024; u != 0; u++) {
+		snprintf(buf, sizeof(buf), "/proc/self/fd/%u", u);
+		test_lookup_fail(buf);
+	}
+
+
+}
+
+int main(void)
+{
+	struct dirent *de;
+	unsigned int fd, target_fd;
+
+	if (unshare(CLONE_FILES) == -1)
+		return 1;
+
+	/* Wipe fdtable. */
+	do {
+		DIR *d;
+
+		d = opendir("/proc/self/fd");
+		if (!d)
+			return 1;
+
+		de = xreaddir(d);
+		assert(de->d_type == DT_DIR);
+		assert(streq(de->d_name, "."));
+
+		de = xreaddir(d);
+		assert(de->d_type == DT_DIR);
+		assert(streq(de->d_name, ".."));
+next:
+		de = xreaddir(d);
+		if (de) {
+			unsigned long long fd_ull;
+			unsigned int fd;
+			char *end;
+
+			assert(de->d_type == DT_LNK);
+
+			fd_ull = xstrtoull(de->d_name, &end);
+			assert(*end == '\0');
+			assert(fd_ull == (unsigned int)fd_ull);
+
+			fd = fd_ull;
+			if (fd == dirfd(d))
+				goto next;
+			close(fd);
+		}
+
+		closedir(d);
+	} while (de);
+
+	/* Now fdtable is clean. */
+
+	fd = open("/", O_PATH|O_DIRECTORY);
+	assert(fd == 0);
+	test_lookup(fd);
+	close(fd);
+
+	/* Clean again! */
+
+	fd = open("/", O_PATH|O_DIRECTORY);
+	assert(fd == 0);
+	/* Default RLIMIT_NOFILE-1 */
+	target_fd = 1023;
+	while (target_fd > 0) {
+		if (dup2(fd, target_fd) == target_fd)
+			break;
+		target_fd /= 2;
+	}
+	assert(target_fd > 0);
+	close(fd);
+	test_lookup(target_fd);
+	close(target_fd);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/fd-002-posix-eq.c b/tools/testing/selftests/proc/fd-002-posix-eq.c
new file mode 100644
index 000000000000..417322ca9c53
--- /dev/null
+++ b/tools/testing/selftests/proc/fd-002-posix-eq.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that open(/proc/*/fd/*) opens the same file.
+#undef NDEBUG
+#include <assert.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+int main(void)
+{
+	int fd0, fd1, fd2;
+	struct stat st0, st1, st2;
+	char buf[64];
+	int rv;
+
+	fd0 = open("/", O_DIRECTORY|O_RDONLY);
+	assert(fd0 >= 0);
+
+	snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd0);
+	fd1 = open(buf, O_RDONLY);
+	assert(fd1 >= 0);
+
+	snprintf(buf, sizeof(buf), "/proc/thread-self/fd/%u", fd0);
+	fd2 = open(buf, O_RDONLY);
+	assert(fd2 >= 0);
+
+	rv = fstat(fd0, &st0);
+	assert(rv == 0);
+	rv = fstat(fd1, &st1);
+	assert(rv == 0);
+	rv = fstat(fd2, &st2);
+	assert(rv == 0);
+
+	assert(st0.st_dev == st1.st_dev);
+	assert(st0.st_ino == st1.st_ino);
+
+	assert(st0.st_dev == st2.st_dev);
+	assert(st0.st_ino == st2.st_ino);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/fd-003-kthread.c b/tools/testing/selftests/proc/fd-003-kthread.c
new file mode 100644
index 000000000000..dc591f97b63d
--- /dev/null
+++ b/tools/testing/selftests/proc/fd-003-kthread.c
@@ -0,0 +1,178 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that /proc/$KERNEL_THREAD/fd/ is empty.
+
+#undef NDEBUG
+#include <sys/syscall.h>
+#include <assert.h>
+#include <dirent.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "proc.h"
+
+#define PF_KHTREAD 0x00200000
+
+/*
+ * Test for kernel threadness atomically with openat().
+ *
+ * Return /proc/$PID/fd descriptor if process is kernel thread.
+ * Return -1 if a process is userspace process.
+ */
+static int kernel_thread_fd(unsigned int pid)
+{
+	unsigned int flags = 0;
+	char buf[4096];
+	int dir_fd, fd;
+	ssize_t rv;
+
+	snprintf(buf, sizeof(buf), "/proc/%u", pid);
+	dir_fd = open(buf, O_RDONLY|O_DIRECTORY);
+	if (dir_fd == -1)
+		return -1;
+
+	/*
+	 * Believe it or not, struct task_struct::flags is directly exposed
+	 * to userspace!
+	 */
+	fd = openat(dir_fd, "stat", O_RDONLY);
+	if (fd == -1) {
+		close(dir_fd);
+		return -1;
+	}
+	rv = read(fd, buf, sizeof(buf));
+	close(fd);
+	if (0 < rv && rv <= sizeof(buf)) {
+		unsigned long long flags_ull;
+		char *p, *end;
+		int i;
+
+		assert(buf[rv - 1] == '\n');
+		buf[rv - 1] = '\0';
+
+		/* Search backwards: ->comm can contain whitespace and ')'. */
+		for (i = 0; i < 43; i++) {
+			p = strrchr(buf, ' ');
+			assert(p);
+			*p = '\0';
+		}
+
+		p = strrchr(buf, ' ');
+		assert(p);
+
+		flags_ull = xstrtoull(p + 1, &end);
+		assert(*end == '\0');
+		assert(flags_ull == (unsigned int)flags_ull);
+
+		flags = flags_ull;
+	}
+
+	fd = -1;
+	if (flags & PF_KHTREAD) {
+		fd = openat(dir_fd, "fd", O_RDONLY|O_DIRECTORY);
+	}
+	close(dir_fd);
+	return fd;
+}
+
+static void test_readdir(int fd)
+{
+	DIR *d;
+	struct dirent *de;
+
+	d = fdopendir(fd);
+	assert(d);
+
+	de = xreaddir(d);
+	assert(streq(de->d_name, "."));
+	assert(de->d_type == DT_DIR);
+
+	de = xreaddir(d);
+	assert(streq(de->d_name, ".."));
+	assert(de->d_type == DT_DIR);
+
+	de = xreaddir(d);
+	assert(!de);
+}
+
+static inline int sys_statx(int dirfd, const char *pathname, int flags,
+			    unsigned int mask, void *stx)
+{
+	return syscall(SYS_statx, dirfd, pathname, flags, mask, stx);
+}
+
+static void test_lookup_fail(int fd, const char *pathname)
+{
+	char stx[256] __attribute__((aligned(8)));
+	int rv;
+
+	rv = sys_statx(fd, pathname, AT_SYMLINK_NOFOLLOW, 0, (void *)stx);
+	assert(rv == -1 && errno == ENOENT);
+}
+
+static void test_lookup(int fd)
+{
+	char buf[64];
+	unsigned int u;
+	int i;
+
+	for (i = INT_MIN; i < INT_MIN + 1024; i++) {
+		snprintf(buf, sizeof(buf), "%d", i);
+		test_lookup_fail(fd, buf);
+	}
+	for (i = -1024; i < 1024; i++) {
+		snprintf(buf, sizeof(buf), "%d", i);
+		test_lookup_fail(fd, buf);
+	}
+	for (u = INT_MAX - 1024; u < (unsigned int)INT_MAX + 1024; u++) {
+		snprintf(buf, sizeof(buf), "%u", u);
+		test_lookup_fail(fd, buf);
+	}
+	for (u = UINT_MAX - 1024; u != 0; u++) {
+		snprintf(buf, sizeof(buf), "%u", u);
+		test_lookup_fail(fd, buf);
+	}
+}
+
+int main(void)
+{
+	unsigned int pid;
+	int fd;
+
+	/*
+	 * In theory this will loop indefinitely if kernel threads are exiled
+	 * from /proc.
+	 *
+	 * Start with kthreadd.
+	 */
+	pid = 2;
+	while ((fd = kernel_thread_fd(pid)) == -1 && pid < 1024) {
+		pid++;
+	}
+	/* EACCES if run as non-root. */
+	if (pid >= 1024)
+		return 1;
+
+	test_readdir(fd);
+	test_lookup(fd);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c
index 6f1f4a6e1ecb..85744425b08d 100644
--- a/tools/testing/selftests/proc/proc-self-map-files-002.c
+++ b/tools/testing/selftests/proc/proc-self-map-files-002.c
@@ -13,7 +13,7 @@
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
-/* Test readlink /proc/self/map_files/... with address 0. */
+/* Test readlink /proc/self/map_files/... with minimum address. */
 #include <errno.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -47,6 +47,11 @@ static void fail(const char *fmt, unsigned long a, unsigned long b)
 int main(void)
 {
 	const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+#ifdef __arm__
+	unsigned long va = 2 * PAGE_SIZE;
+#else
+	unsigned long va = 0;
+#endif
 	void *p;
 	int fd;
 	unsigned long a, b;
@@ -55,7 +60,7 @@ int main(void)
 	if (fd == -1)
 		return 1;
 
-	p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0);
+	p = mmap((void *)va, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0);
 	if (p == MAP_FAILED) {
 		if (errno == EPERM)
 			return 2;
diff --git a/tools/testing/selftests/proc/proc-uptime.h b/tools/testing/selftests/proc/proc-uptime.h
index 0e464b50e9d9..dc6a42b1d6b0 100644
--- a/tools/testing/selftests/proc/proc-uptime.h
+++ b/tools/testing/selftests/proc/proc-uptime.h
@@ -20,21 +20,7 @@
 #include <stdlib.h>
 #include <unistd.h>
 
-static unsigned long long xstrtoull(const char *p, char **end)
-{
-	if (*p == '0') {
-		*end = (char *)p + 1;
-		return 0;
-	} else if ('1' <= *p && *p <= '9') {
-		unsigned long long val;
-
-		errno = 0;
-		val = strtoull(p, end, 10);
-		assert(errno == 0);
-		return val;
-	} else
-		assert(0);
-}
+#include "proc.h"
 
 static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle)
 {
diff --git a/tools/testing/selftests/proc/proc.h b/tools/testing/selftests/proc/proc.h
new file mode 100644
index 000000000000..b7d57ea40237
--- /dev/null
+++ b/tools/testing/selftests/proc/proc.h
@@ -0,0 +1,51 @@
+#pragma once
+#undef NDEBUG
+#include <assert.h>
+#include <dirent.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+static inline pid_t sys_getpid(void)
+{
+	return syscall(SYS_getpid);
+}
+
+static inline pid_t sys_gettid(void)
+{
+	return syscall(SYS_gettid);
+}
+
+static inline bool streq(const char *s1, const char *s2)
+{
+	return strcmp(s1, s2) == 0;
+}
+
+static unsigned long long xstrtoull(const char *p, char **end)
+{
+	if (*p == '0') {
+		*end = (char *)p + 1;
+		return 0;
+	} else if ('1' <= *p && *p <= '9') {
+		unsigned long long val;
+
+		errno = 0;
+		val = strtoull(p, end, 10);
+		assert(errno == 0);
+		return val;
+	} else
+		assert(0);
+}
+
+static struct dirent *xreaddir(DIR *d)
+{
+	struct dirent *de;
+
+	errno = 0;
+	de = readdir(d);
+	assert(de || errno == 0);
+	return de;
+}
diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c
index 1e73c2232097..563e752e6eba 100644
--- a/tools/testing/selftests/proc/read.c
+++ b/tools/testing/selftests/proc/read.c
@@ -31,22 +31,7 @@
 #include <fcntl.h>
 #include <unistd.h>
 
-static inline bool streq(const char *s1, const char *s2)
-{
-	return strcmp(s1, s2) == 0;
-}
-
-static struct dirent *xreaddir(DIR *d)
-{
-	struct dirent *de;
-
-	errno = 0;
-	de = readdir(d);
-	if (!de && errno != 0) {
-		exit(1);
-	}
-	return de;
-}
+#include "proc.h"
 
 static void f_reg(DIR *d, const char *filename)
 {
diff --git a/tools/testing/selftests/proc/self.c b/tools/testing/selftests/proc/self.c
new file mode 100644
index 000000000000..21c15a1ffefb
--- /dev/null
+++ b/tools/testing/selftests/proc/self.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that /proc/self gives correct TGID.
+#undef NDEBUG
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "proc.h"
+
+int main(void)
+{
+	char buf1[64], buf2[64];
+	pid_t pid;
+	ssize_t rv;
+
+	pid = sys_getpid();
+	snprintf(buf1, sizeof(buf1), "%u", pid);
+
+	rv = readlink("/proc/self", buf2, sizeof(buf2));
+	assert(rv == strlen(buf1));
+	buf2[rv] = '\0';
+	assert(streq(buf1, buf2));
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/thread-self.c b/tools/testing/selftests/proc/thread-self.c
new file mode 100644
index 000000000000..4b23b39b7ae0
--- /dev/null
+++ b/tools/testing/selftests/proc/thread-self.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that /proc/thread-self gives correct TGID/PID.
+#undef NDEBUG
+#include <assert.h>
+#include <sched.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+
+#include "proc.h"
+
+int f(void *arg)
+{
+	char buf1[64], buf2[64];
+	pid_t pid, tid;
+	ssize_t rv;
+
+	pid = sys_getpid();
+	tid = sys_gettid();
+	snprintf(buf1, sizeof(buf1), "%u/task/%u", pid, tid);
+
+	rv = readlink("/proc/thread-self", buf2, sizeof(buf2));
+	assert(rv == strlen(buf1));
+	buf2[rv] = '\0';
+	assert(streq(buf1, buf2));
+
+	if (arg)
+		exit(0);
+	return 0;
+}
+
+int main(void)
+{
+	const int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+	pid_t pid;
+	void *stack;
+
+	/* main thread */
+	f((void *)0);
+
+	stack = mmap(NULL, 2 * PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	assert(stack != MAP_FAILED);
+	/* side thread */
+	pid = clone(f, stack + PAGE_SIZE, CLONE_THREAD|CLONE_SIGHAND|CLONE_VM, (void *)1);
+	assert(pid > 0);
+	pause();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/pstore/pstore_post_reboot_tests b/tools/testing/selftests/pstore/pstore_post_reboot_tests
index 6ccb154cb4aa..22f8df1ad7d4 100755
--- a/tools/testing/selftests/pstore/pstore_post_reboot_tests
+++ b/tools/testing/selftests/pstore/pstore_post_reboot_tests
@@ -7,13 +7,16 @@
 #
 # Released under the terms of the GPL v2.
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 . ./common_tests
 
 if [ -e $REBOOT_FLAG  ]; then
     rm $REBOOT_FLAG
 else
     prlog "pstore_crash_test has not been executed yet. we skip further tests."
-    exit 0
+    exit $ksft_skip
 fi
 
 prlog -n "Mounting pstore filesystem ... "
diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh
index c15f270e121d..65541c21a544 100755
--- a/tools/testing/selftests/rcutorture/bin/configinit.sh
+++ b/tools/testing/selftests/rcutorture/bin/configinit.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Usage: configinit.sh config-spec-file [ build output dir ]
+# Usage: configinit.sh config-spec-file build-output-dir results-dir
 #
 # Create a .config file from the spec file.  Run from the kernel source tree.
 # Exits with 0 if all went well, with 1 if all went well but the config
@@ -40,20 +40,18 @@ mkdir $T
 
 c=$1
 buildloc=$2
+resdir=$3
 builddir=
-if test -n $buildloc
+if echo $buildloc | grep -q '^O='
 then
-	if echo $buildloc | grep -q '^O='
+	builddir=`echo $buildloc | sed -e 's/^O=//'`
+	if test ! -d $builddir
 	then
-		builddir=`echo $buildloc | sed -e 's/^O=//'`
-		if test ! -d $builddir
-		then
-			mkdir $builddir
-		fi
-	else
-		echo Bad build directory: \"$buildloc\"
-		exit 2
+		mkdir $builddir
 	fi
+else
+	echo Bad build directory: \"$buildloc\"
+	exit 2
 fi
 
 sed -e 's/^\(CONFIG[0-9A-Z_]*\)=.*$/grep -v "^# \1" |/' < $c > $T/u.sh
@@ -61,12 +59,12 @@ sed -e 's/^\(CONFIG[0-9A-Z_]*=\).*$/grep -v \1 |/' < $c >> $T/u.sh
 grep '^grep' < $T/u.sh > $T/upd.sh
 echo "cat - $c" >> $T/upd.sh
 make mrproper
-make $buildloc distclean > $builddir/Make.distclean 2>&1
-make $buildloc $TORTURE_DEFCONFIG > $builddir/Make.defconfig.out 2>&1
+make $buildloc distclean > $resdir/Make.distclean 2>&1
+make $buildloc $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1
 mv $builddir/.config $builddir/.config.sav
 sh $T/upd.sh < $builddir/.config.sav > $builddir/.config
 cp $builddir/.config $builddir/.config.new
-yes '' | make $buildloc oldconfig > $builddir/Make.oldconfig.out 2> $builddir/Make.oldconfig.err
+yes '' | make $buildloc oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err
 
 # verify new config matches specification.
 configcheck.sh $builddir/.config $c
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
index 34d126734cde..9115fcdb5617 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -2,7 +2,7 @@
 #
 # Build a kvm-ready Linux kernel from the tree in the current directory.
 #
-# Usage: kvm-build.sh config-template build-dir
+# Usage: kvm-build.sh config-template build-dir resdir
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -29,6 +29,7 @@ then
 	exit 1
 fi
 builddir=${2}
+resdir=${3}
 
 T=${TMPDIR-/tmp}/test-linux.sh.$$
 trap 'rm -rf $T' 0
@@ -41,19 +42,19 @@ CONFIG_VIRTIO_PCI=y
 CONFIG_VIRTIO_CONSOLE=y
 ___EOF___
 
-configinit.sh $T/config O=$builddir
+configinit.sh $T/config O=$builddir $resdir
 retval=$?
 if test $retval -gt 1
 then
 	exit 2
 fi
 ncpus=`cpus2use.sh`
-make O=$builddir -j$ncpus $TORTURE_KMAKE_ARG > $builddir/Make.out 2>&1
+make O=$builddir -j$ncpus $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
 retval=$?
-if test $retval -ne 0 || grep "rcu[^/]*": < $builddir/Make.out | egrep -q "Stop|Error|error:|warning:" || egrep -q "Stop|Error|error:" < $builddir/Make.out
+if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | egrep -q "Stop|Error|error:|warning:" || egrep -q "Stop|Error|error:" < $resdir/Make.out
 then
 	echo Kernel build error
-	egrep "Stop|Error|error:|warning:" < $builddir/Make.out
+	egrep "Stop|Error|error:|warning:" < $resdir/Make.out
 	echo Run aborted.
 	exit 3
 fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
new file mode 100755
index 000000000000..98f650c9bf54
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+#
+# Invoke a text editor on all console.log files for all runs with diagnostics,
+# that is, on all such files having a console.log.diags counterpart.
+# Note that both console.log.diags and console.log are passed to the
+# editor (currently defaulting to "vi"), allowing the user to get an
+# idea of what to search for in the console.log file.
+#
+# Usage: kvm-find-errors.sh directory
+#
+# The "directory" above should end with the date/time directory, for example,
+# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27".
+
+rundir="${1}"
+if test -z "$rundir" -o ! -d "$rundir"
+then
+	echo Usage: $0 directory
+fi
+editor=${EDITOR-vi}
+
+# Find builds with errors
+files=
+for i in ${rundir}/*/Make.out
+do
+	if egrep -q "error:|warning:" < $i
+	then
+		egrep "error:|warning:" < $i > $i.diags
+		files="$files $i.diags $i"
+	fi
+done
+if test -n "$files"
+then
+	$editor $files
+else
+	echo No build errors.
+fi
+if grep -q -e "--buildonly" < ${rundir}/log
+then
+	echo Build-only run, no console logs to check.
+fi
+
+# Find console logs with errors
+files=
+for i in ${rundir}/*/console.log
+do
+	if test -r $i.diags
+	then
+		files="$files $i.diags $i"
+	fi
+done
+if test -n "$files"
+then
+	$editor $files
+else
+	echo No errors in console logs.
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index c2e1bb6d0cba..0fa8a61ccb7b 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -34,11 +34,15 @@ fi
 
 configfile=`echo $i | sed -e 's/^.*\///'`
 ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'`
+stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null |
+	    tail -1 | sed -e 's/^\[[ 0-9.]*] //' |
+	    awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' |
+	    tr -d '\012\015'`"
 if test -z "$ngps"
 then
-	echo "$configfile -------"
+	echo "$configfile ------- " $stopstate
 else
-	title="$configfile ------- $ngps grace periods"
+	title="$configfile ------- $ngps GPs"
 	dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
 	if test -z "$dur"
 	then
@@ -46,9 +50,9 @@ else
 	else
 		ngpsps=`awk -v ngps=$ngps -v dur=$dur '
 			BEGIN { print ngps / dur }' < /dev/null`
-		title="$title ($ngpsps per second)"
+		title="$title ($ngpsps/s)"
 	fi
-	echo $title
+	echo $title $stopstate
 	nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
 	if test -z "$nclosecalls"
 	then
@@ -66,4 +70,5 @@ else
 	else
 		print_warning $nclosecalls "Reader Batch close calls in" $(($dur/60)) minute run: $i
 	fi
+	echo $nclosecalls "Reader Batch close calls in" $(($dur/60)) minute run: $i > $i/console.log.rcu.diags
 fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index f7e988f369dd..c9bab57a77eb 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -39,6 +39,7 @@ do
 			head -1 $resdir/log
 		fi
 		TORTURE_SUITE="`cat $i/../TORTURE_SUITE`"
+		rm -f $i/console.log.*.diags
 		kvm-recheck-${TORTURE_SUITE}.sh $i
 		if test -f "$i/console.log"
 		then
@@ -48,10 +49,6 @@ do
 				cat $i/Make.oldconfig.err
 			fi
 			parse-build.sh $i/Make.out $configfile
-			if test "$TORTURE_SUITE" != rcuperf
-			then
-				parse-torture.sh $i/console.log $configfile
-			fi
 			parse-console.sh $i/console.log $configfile
 			if test -r $i/Warnings
 			then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 5f8fbb0d7c17..58ca758a5786 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -98,14 +98,15 @@ then
 	ln -s $base_resdir/.config $resdir  # for kvm-recheck.sh
 	# Arch-independent indicator
 	touch $resdir/builtkernel
-elif kvm-build.sh $T/Kc2 $builddir
+elif kvm-build.sh $T/Kc2 $builddir $resdir
 then
 	# Had to build a kernel for this test.
 	QEMU="`identify_qemu $builddir/vmlinux`"
 	BOOT_IMAGE="`identify_boot_image $QEMU`"
-	cp $builddir/Make*.out $resdir
 	cp $builddir/vmlinux $resdir
 	cp $builddir/.config $resdir
+	cp $builddir/Module.symvers $resdir > /dev/null || :
+	cp $builddir/System.map $resdir > /dev/null || :
 	if test -n "$BOOT_IMAGE"
 	then
 		cp $builddir/$BOOT_IMAGE $resdir
@@ -119,7 +120,6 @@ then
 	parse-build.sh $resdir/Make.out $title
 else
 	# Build failed.
-	cp $builddir/Make*.out $resdir
 	cp $builddir/.config $resdir || :
 	echo Build failed, not running KVM, see $resdir.
 	if test -f $builddir.wait
@@ -267,5 +267,4 @@ then
 	echo Unknown PID, cannot kill qemu command
 fi
 
-parse-torture.sh $resdir/console.log $title
 parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 56610dbbdf73..5a7a62d76a50 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -347,7 +347,7 @@ function dump(first, pastlast, batchnum)
 	print "needqemurun="
 	jn=1
 	for (j = first; j < pastlast; j++) {
-		builddir=KVM "/b" jn
+		builddir=KVM "/b1"
 		cpusr[jn] = cpus[j];
 		if (cfrep[cf[j]] == "") {
 			cfr[jn] = cf[j];
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 08aa7d50ae0e..84933f6aed77 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -24,57 +24,153 @@
 #
 # Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
 
+T=${TMPDIR-/tmp}/parse-console.sh.$$
 file="$1"
 title="$2"
 
+trap 'rm -f $T.seq $T.diags' 0
+
 . functions.sh
 
+# Check for presence and readability of console output file
+if test -f "$file" -a -r "$file"
+then
+	:
+else
+	echo $title unreadable console output file: $file
+	exit 1
+fi
 if grep -Pq '\x00' < $file
 then
 	print_warning Console output contains nul bytes, old qemu still running?
 fi
-egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags
-if test -s $1.diags
+cat /dev/null > $file.diags
+
+# Check for proper termination, except that rcuperf runs don't indicate this.
+if test "$TORTURE_SUITE" != rcuperf
 then
-	print_warning Assertion failure in $file $title
-	# cat $1.diags
+	# check for abject failure
+
+	if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
+	then
+		nerrs=`grep --binary-files=text '!!!' $file |
+		tail -1 |
+		awk '
+		{
+			for (i=NF-8;i<=NF;i++)
+				sum+=$i;
+		}
+		END { print sum }'`
+		print_bug $title FAILURE, $nerrs instances
+		exit
+	fi
+
+	grep --binary-files=text 'torture:.*ver:' $file |
+	egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' |
+	sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
+	awk '
+	BEGIN	{
+		ver = 0;
+		badseq = 0;
+		}
+
+		{
+		if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
+			badseqno1 = ver;
+			badseqno2 = $5;
+			badseqnr = NR;
+			badseq = 1;
+		}
+		ver = $5
+		}
+
+	END	{
+		if (badseq) {
+			if (badseqno1 == badseqno2 && badseqno2 == ver)
+				print "GP HANG at " ver " torture stat " badseqnr;
+			else
+				print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
+		}
+		}' > $T.seq
+
+	if grep -q SUCCESS $file
+	then
+		if test -s $T.seq
+		then
+			print_warning $title `cat $T.seq`
+			echo "   " $file
+			exit 2
+		fi
+	else
+		if grep -q "_HOTPLUG:" $file
+		then
+			print_warning HOTPLUG FAILURES $title `cat $T.seq`
+			echo "   " $file
+			exit 3
+		fi
+		echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
+		if test -s $T.seq
+		then
+			print_warning $title `cat $T.seq`
+		fi
+		exit 2
+	fi
+fi | tee -a $file.diags
+
+egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file |
+grep -v 'ODEBUG: ' |
+grep -v 'Warning: unable to open an initial console' > $T.diags
+if test -s $T.diags
+then
+	print_warning "Assertion failure in $file $title"
+	# cat $T.diags
 	summary=""
-	n_badness=`grep -c Badness $1`
+	n_badness=`grep -c Badness $file`
 	if test "$n_badness" -ne 0
 	then
 		summary="$summary  Badness: $n_badness"
 	fi
-	n_warn=`grep -v 'Warning: unable to open an initial console' $1 | egrep -c 'WARNING:|Warn'`
+	n_warn=`grep -v 'Warning: unable to open an initial console' $file | egrep -c 'WARNING:|Warn'`
 	if test "$n_warn" -ne 0
 	then
 		summary="$summary  Warnings: $n_warn"
 	fi
-	n_bugs=`egrep -c 'BUG|Oops:' $1`
+	n_bugs=`egrep -c 'BUG|Oops:' $file`
 	if test "$n_bugs" -ne 0
 	then
 		summary="$summary  Bugs: $n_bugs"
 	fi
-	n_calltrace=`grep -c 'Call Trace:' $1`
+	n_calltrace=`grep -c 'Call Trace:' $file`
 	if test "$n_calltrace" -ne 0
 	then
 		summary="$summary  Call Traces: $n_calltrace"
 	fi
-	n_lockdep=`grep -c =========== $1`
+	n_lockdep=`grep -c =========== $file`
 	if test "$n_badness" -ne 0
 	then
 		summary="$summary  lockdep: $n_badness"
 	fi
-	n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $1`
+	n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`
 	if test "$n_stalls" -ne 0
 	then
 		summary="$summary  Stalls: $n_stalls"
 	fi
-	n_starves=`grep -c 'rcu_.*kthread starved for' $1`
+	n_starves=`grep -c 'rcu_.*kthread starved for' $file`
 	if test "$n_starves" -ne 0
 	then
 		summary="$summary  Starves: $n_starves"
 	fi
 	print_warning Summary: $summary
-else
-	rm $1.diags
+	cat $T.diags >> $file.diags
+fi
+for i in $file.*.diags
+do
+	if test -f "$i"
+	then
+		cat $i >> $file.diags
+	fi
+done
+if ! test -s $file.diags
+then
+	rm -f $file.diags
 fi
diff --git a/tools/testing/selftests/rcutorture/bin/parse-torture.sh b/tools/testing/selftests/rcutorture/bin/parse-torture.sh
deleted file mode 100755
index 5987e50cfeb4..000000000000
--- a/tools/testing/selftests/rcutorture/bin/parse-torture.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/bin/bash
-#
-# Check the console output from a torture run for goodness.
-# The "file" is a pathname on the local system, and "title" is
-# a text string for error-message purposes.
-#
-# The file must contain torture output, but can be interspersed
-# with other dmesg text, as in console-log output.
-#
-# Usage: parse-torture.sh file title
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
-# Copyright (C) IBM Corporation, 2011
-#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-
-T=${TMPDIR-/tmp}/parse-torture.sh.$$
-file="$1"
-title="$2"
-
-trap 'rm -f $T.seq' 0
-
-. functions.sh
-
-# check for presence of torture output file.
-
-if test -f "$file" -a -r "$file"
-then
-	:
-else
-	echo $title unreadable torture output file: $file
-	exit 1
-fi
-
-# check for abject failure
-
-if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
-then
-	nerrs=`grep --binary-files=text '!!!' $file | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
-	print_bug $title FAILURE, $nerrs instances
-	echo "   " $url
-	exit
-fi
-
-grep --binary-files=text 'torture:.*ver:' $file | egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
-awk '
-BEGIN	{
-	ver = 0;
-	badseq = 0;
-	}
-
-	{
-	if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
-		badseqno1 = ver;
-		badseqno2 = $5;
-		badseqnr = NR;
-		badseq = 1;
-	}
-	ver = $5
-	}
-
-END	{
-	if (badseq) {
-		if (badseqno1 == badseqno2 && badseqno2 == ver)
-			print "GP HANG at " ver " torture stat " badseqnr;
-		else
-			print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
-	}
-	}' > $T.seq
-
-if grep -q SUCCESS $file
-then
-	if test -s $T.seq
-	then
-		print_warning $title $title `cat $T.seq`
-		echo "   " $file
-		exit 2
-	fi
-else
-	if grep -q "_HOTPLUG:" $file
-	then
-		print_warning HOTPLUG FAILURES $title `cat $T.seq`
-		echo "   " $file
-		exit 3
-	fi
-	echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
-	if test -s $T.seq
-	then
-		print_warning $title `cat $T.seq`
-	fi
-	exit 2
-fi
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
index 6a0b9f69faad..c3c1fb5a9e1f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
@@ -3,9 +3,7 @@ TREE02
 TREE03
 TREE04
 TREE05
-TREE06
 TREE07
-TREE08
 TREE09
 SRCU-N
 SRCU-P
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
index 84a7d51b7481..ce48c7b82673 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
@@ -1 +1,2 @@
 rcutorture.torture_type=srcud
+rcupdate.rcu_self_test=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot
index 84a7d51b7481..ce48c7b82673 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot
@@ -1 +1,2 @@
 rcutorture.torture_type=srcud
+rcupdate.rcu_self_test=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot
index 6c1a292a65fb..b39f1553a478 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot
@@ -1,3 +1 @@
 rcupdate.rcu_self_test=1
-rcupdate.rcu_self_test_bh=1
-rcutorture.torture_type=rcu_bh
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
index 9f3a4d28e508..ea47da95374b 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
@@ -1,4 +1,4 @@
-rcutorture.torture_type=rcu_bh maxcpus=8 nr_cpus=43
+maxcpus=8 nr_cpus=43
 rcutree.gp_preinit_delay=3
 rcutree.gp_init_delay=3
 rcutree.gp_cleanup_delay=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
index 5d2cc0bd50a0..5c3213cc3ad7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
@@ -1,5 +1,5 @@
-rcutorture.onoff_interval=1 rcutorture.onoff_holdoff=30
-rcutree.gp_preinit_delay=3
+rcutorture.onoff_interval=200 rcutorture.onoff_holdoff=30
+rcutree.gp_preinit_delay=12
 rcutree.gp_init_delay=3
 rcutree.gp_cleanup_delay=3
 rcutree.kthread_prio=2
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
index e6071bb96c7d..5adc6756792a 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
@@ -1 +1 @@
-rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4 nohz_full=1-7
+rcutree.rcu_fanout_leaf=4 nohz_full=1-7
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
index c7fd050dfcd9..c419cac233ee 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
@@ -1,5 +1,4 @@
-rcutorture.torture_type=sched
-rcupdate.rcu_self_test_sched=1
 rcutree.gp_preinit_delay=3
 rcutree.gp_init_delay=3
 rcutree.gp_cleanup_delay=3
+rcupdate.rcu_self_test=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
index ad18b52a2cad..055f4aa79077 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
@@ -1,6 +1,4 @@
 rcupdate.rcu_self_test=1
-rcupdate.rcu_self_test_bh=1
-rcupdate.rcu_self_test_sched=1
 rcutree.rcu_fanout_exact=1
 rcutree.gp_preinit_delay=3
 rcutree.gp_init_delay=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot
deleted file mode 100644
index 883149b5f2d1..000000000000
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot
+++ /dev/null
@@ -1 +0,0 @@
-rcutree.rcu_fanout_exact=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
index 1bd8efc4141e..22478fd3a865 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
@@ -1,5 +1,3 @@
-rcutorture.torture_type=sched
 rcupdate.rcu_self_test=1
-rcupdate.rcu_self_test_sched=1
 rcutree.rcu_fanout_exact=1
 rcu_nocbs=0-7
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
index 24ec91041957..7bab8246392b 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
@@ -39,7 +39,7 @@ rcutorture_param_onoff () {
 	if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2"
 	then
 		echo CPU-hotplug kernel, adding rcutorture onoff. 1>&2
-		echo rcutorture.onoff_interval=3 rcutorture.onoff_holdoff=30
+		echo rcutorture.onoff_interval=1000 rcutorture.onoff_holdoff=30
 	fi
 }
 
diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore
new file mode 100644
index 000000000000..cc610da7e369
--- /dev/null
+++ b/tools/testing/selftests/rseq/.gitignore
@@ -0,0 +1,6 @@
+basic_percpu_ops_test
+basic_test
+basic_rseq_op_test
+param_test
+param_test_benchmark
+param_test_compare_twice
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
new file mode 100644
index 000000000000..c30c52e1d0d2
--- /dev/null
+++ b/tools/testing/selftests/rseq/Makefile
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./
+LDLIBS += -lpthread
+
+# Own dependencies because we only want to build against 1st prerequisite, but
+# still track changes to header files and depend on shared object.
+OVERRIDE_TARGETS = 1
+
+TEST_GEN_PROGS = basic_test basic_percpu_ops_test param_test \
+		param_test_benchmark param_test_compare_twice
+
+TEST_GEN_PROGS_EXTENDED = librseq.so
+
+TEST_PROGS = run_param_test.sh
+
+include ../lib.mk
+
+$(OUTPUT)/librseq.so: rseq.c rseq.h rseq-*.h
+	$(CC) $(CFLAGS) -shared -fPIC $< $(LDLIBS) -o $@
+
+$(OUTPUT)/%: %.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h
+	$(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@
+
+$(OUTPUT)/param_test_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
+					rseq.h rseq-*.h
+	$(CC) $(CFLAGS) -DBENCHMARK $< $(LDLIBS) -lrseq -o $@
+
+$(OUTPUT)/param_test_compare_twice: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
+					rseq.h rseq-*.h
+	$(CC) $(CFLAGS) -DRSEQ_COMPARE_TWICE $< $(LDLIBS) -lrseq -o $@
diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
new file mode 100644
index 000000000000..eb3f6db36d36
--- /dev/null
+++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: LGPL-2.1
+#define _GNU_SOURCE
+#include <assert.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+
+#include "rseq.h"
+
+#define ARRAY_SIZE(arr)	(sizeof(arr) / sizeof((arr)[0]))
+
+struct percpu_lock_entry {
+	intptr_t v;
+} __attribute__((aligned(128)));
+
+struct percpu_lock {
+	struct percpu_lock_entry c[CPU_SETSIZE];
+};
+
+struct test_data_entry {
+	intptr_t count;
+} __attribute__((aligned(128)));
+
+struct spinlock_test_data {
+	struct percpu_lock lock;
+	struct test_data_entry c[CPU_SETSIZE];
+	int reps;
+};
+
+struct percpu_list_node {
+	intptr_t data;
+	struct percpu_list_node *next;
+};
+
+struct percpu_list_entry {
+	struct percpu_list_node *head;
+} __attribute__((aligned(128)));
+
+struct percpu_list {
+	struct percpu_list_entry c[CPU_SETSIZE];
+};
+
+/* A simple percpu spinlock.  Returns the cpu lock was acquired on. */
+int rseq_this_cpu_lock(struct percpu_lock *lock)
+{
+	int cpu;
+
+	for (;;) {
+		int ret;
+
+		cpu = rseq_cpu_start();
+		ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
+					 0, 1, cpu);
+		if (rseq_likely(!ret))
+			break;
+		/* Retry if comparison fails or rseq aborts. */
+	}
+	/*
+	 * Acquire semantic when taking lock after control dependency.
+	 * Matches rseq_smp_store_release().
+	 */
+	rseq_smp_acquire__after_ctrl_dep();
+	return cpu;
+}
+
+void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
+{
+	assert(lock->c[cpu].v == 1);
+	/*
+	 * Release lock, with release semantic. Matches
+	 * rseq_smp_acquire__after_ctrl_dep().
+	 */
+	rseq_smp_store_release(&lock->c[cpu].v, 0);
+}
+
+void *test_percpu_spinlock_thread(void *arg)
+{
+	struct spinlock_test_data *data = arg;
+	int i, cpu;
+
+	if (rseq_register_current_thread()) {
+		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		abort();
+	}
+	for (i = 0; i < data->reps; i++) {
+		cpu = rseq_this_cpu_lock(&data->lock);
+		data->c[cpu].count++;
+		rseq_percpu_unlock(&data->lock, cpu);
+	}
+	if (rseq_unregister_current_thread()) {
+		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		abort();
+	}
+
+	return NULL;
+}
+
+/*
+ * A simple test which implements a sharded counter using a per-cpu
+ * lock.  Obviously real applications might prefer to simply use a
+ * per-cpu increment; however, this is reasonable for a test and the
+ * lock can be extended to synchronize more complicated operations.
+ */
+void test_percpu_spinlock(void)
+{
+	const int num_threads = 200;
+	int i;
+	uint64_t sum;
+	pthread_t test_threads[num_threads];
+	struct spinlock_test_data data;
+
+	memset(&data, 0, sizeof(data));
+	data.reps = 5000;
+
+	for (i = 0; i < num_threads; i++)
+		pthread_create(&test_threads[i], NULL,
+			       test_percpu_spinlock_thread, &data);
+
+	for (i = 0; i < num_threads; i++)
+		pthread_join(test_threads[i], NULL);
+
+	sum = 0;
+	for (i = 0; i < CPU_SETSIZE; i++)
+		sum += data.c[i].count;
+
+	assert(sum == (uint64_t)data.reps * num_threads);
+}
+
+void this_cpu_list_push(struct percpu_list *list,
+			struct percpu_list_node *node,
+			int *_cpu)
+{
+	int cpu;
+
+	for (;;) {
+		intptr_t *targetptr, newval, expect;
+		int ret;
+
+		cpu = rseq_cpu_start();
+		/* Load list->c[cpu].head with single-copy atomicity. */
+		expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
+		newval = (intptr_t)node;
+		targetptr = (intptr_t *)&list->c[cpu].head;
+		node->next = (struct percpu_list_node *)expect;
+		ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
+		if (rseq_likely(!ret))
+			break;
+		/* Retry if comparison fails or rseq aborts. */
+	}
+	if (_cpu)
+		*_cpu = cpu;
+}
+
+/*
+ * Unlike a traditional lock-less linked list; the availability of a
+ * rseq primitive allows us to implement pop without concerns over
+ * ABA-type races.
+ */
+struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
+					   int *_cpu)
+{
+	for (;;) {
+		struct percpu_list_node *head;
+		intptr_t *targetptr, expectnot, *load;
+		off_t offset;
+		int ret, cpu;
+
+		cpu = rseq_cpu_start();
+		targetptr = (intptr_t *)&list->c[cpu].head;
+		expectnot = (intptr_t)NULL;
+		offset = offsetof(struct percpu_list_node, next);
+		load = (intptr_t *)&head;
+		ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
+						 offset, load, cpu);
+		if (rseq_likely(!ret)) {
+			if (_cpu)
+				*_cpu = cpu;
+			return head;
+		}
+		if (ret > 0)
+			return NULL;
+		/* Retry if rseq aborts. */
+	}
+}
+
+/*
+ * __percpu_list_pop is not safe against concurrent accesses. Should
+ * only be used on lists that are not concurrently modified.
+ */
+struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
+{
+	struct percpu_list_node *node;
+
+	node = list->c[cpu].head;
+	if (!node)
+		return NULL;
+	list->c[cpu].head = node->next;
+	return node;
+}
+
+void *test_percpu_list_thread(void *arg)
+{
+	int i;
+	struct percpu_list *list = (struct percpu_list *)arg;
+
+	if (rseq_register_current_thread()) {
+		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		abort();
+	}
+
+	for (i = 0; i < 100000; i++) {
+		struct percpu_list_node *node;
+
+		node = this_cpu_list_pop(list, NULL);
+		sched_yield();  /* encourage shuffling */
+		if (node)
+			this_cpu_list_push(list, node, NULL);
+	}
+
+	if (rseq_unregister_current_thread()) {
+		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		abort();
+	}
+
+	return NULL;
+}
+
+/* Simultaneous modification to a per-cpu linked list from many threads.  */
+void test_percpu_list(void)
+{
+	int i, j;
+	uint64_t sum = 0, expected_sum = 0;
+	struct percpu_list list;
+	pthread_t test_threads[200];
+	cpu_set_t allowed_cpus;
+
+	memset(&list, 0, sizeof(list));
+
+	/* Generate list entries for every usable cpu. */
+	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		if (!CPU_ISSET(i, &allowed_cpus))
+			continue;
+		for (j = 1; j <= 100; j++) {
+			struct percpu_list_node *node;
+
+			expected_sum += j;
+
+			node = malloc(sizeof(*node));
+			assert(node);
+			node->data = j;
+			node->next = list.c[i].head;
+			list.c[i].head = node;
+		}
+	}
+
+	for (i = 0; i < 200; i++)
+		pthread_create(&test_threads[i], NULL,
+		       test_percpu_list_thread, &list);
+
+	for (i = 0; i < 200; i++)
+		pthread_join(test_threads[i], NULL);
+
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		struct percpu_list_node *node;
+
+		if (!CPU_ISSET(i, &allowed_cpus))
+			continue;
+
+		while ((node = __percpu_list_pop(&list, i))) {
+			sum += node->data;
+			free(node);
+		}
+	}
+
+	/*
+	 * All entries should now be accounted for (unless some external
+	 * actor is interfering with our allowed affinity while this
+	 * test is running).
+	 */
+	assert(sum == expected_sum);
+}
+
+int main(int argc, char **argv)
+{
+	if (rseq_register_current_thread()) {
+		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		goto error;
+	}
+	printf("spinlock\n");
+	test_percpu_spinlock();
+	printf("percpu_list\n");
+	test_percpu_list();
+	if (rseq_unregister_current_thread()) {
+		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		goto error;
+	}
+	return 0;
+
+error:
+	return -1;
+}
diff --git a/tools/testing/selftests/rseq/basic_test.c b/tools/testing/selftests/rseq/basic_test.c
new file mode 100644
index 000000000000..d8efbfb89193
--- /dev/null
+++ b/tools/testing/selftests/rseq/basic_test.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Basic test coverage for critical regions and rseq_current_cpu().
+ */
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/time.h>
+
+#include "rseq.h"
+
+void test_cpu_pointer(void)
+{
+	cpu_set_t affinity, test_affinity;
+	int i;
+
+	sched_getaffinity(0, sizeof(affinity), &affinity);
+	CPU_ZERO(&test_affinity);
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		if (CPU_ISSET(i, &affinity)) {
+			CPU_SET(i, &test_affinity);
+			sched_setaffinity(0, sizeof(test_affinity),
+					&test_affinity);
+			assert(sched_getcpu() == i);
+			assert(rseq_current_cpu() == i);
+			assert(rseq_current_cpu_raw() == i);
+			assert(rseq_cpu_start() == i);
+			CPU_CLR(i, &test_affinity);
+		}
+	}
+	sched_setaffinity(0, sizeof(affinity), &affinity);
+}
+
+int main(int argc, char **argv)
+{
+	if (rseq_register_current_thread()) {
+		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		goto init_thread_error;
+	}
+	printf("testing current cpu\n");
+	test_cpu_pointer();
+	if (rseq_unregister_current_thread()) {
+		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		goto init_thread_error;
+	}
+	return 0;
+
+init_thread_error:
+	return -1;
+}
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
new file mode 100644
index 000000000000..eec2663261f2
--- /dev/null
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -0,0 +1,1329 @@
+// SPDX-License-Identifier: LGPL-2.1
+#define _GNU_SOURCE
+#include <assert.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <poll.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <errno.h>
+#include <stddef.h>
+
+static inline pid_t gettid(void)
+{
+	return syscall(__NR_gettid);
+}
+
+#define NR_INJECT	9
+static int loop_cnt[NR_INJECT + 1];
+
+static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
+static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
+static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
+static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
+static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
+static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
+
+static int opt_modulo, verbose;
+
+static int opt_yield, opt_signal, opt_sleep,
+		opt_disable_rseq, opt_threads = 200,
+		opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
+
+#ifndef RSEQ_SKIP_FASTPATH
+static long long opt_reps = 5000;
+#else
+static long long opt_reps = 100;
+#endif
+
+static __thread __attribute__((tls_model("initial-exec")))
+unsigned int signals_delivered;
+
+#ifndef BENCHMARK
+
+static __thread __attribute__((tls_model("initial-exec"), unused))
+unsigned int yield_mod_cnt, nr_abort;
+
+#define printf_verbose(fmt, ...)			\
+	do {						\
+		if (verbose)				\
+			printf(fmt, ## __VA_ARGS__);	\
+	} while (0)
+
+#ifdef __i386__
+
+#define INJECT_ASM_REG	"eax"
+
+#define RSEQ_INJECT_CLOBBER \
+	, INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+	"mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
+	"test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
+	"jz 333f\n\t" \
+	"222:\n\t" \
+	"dec %%" INJECT_ASM_REG "\n\t" \
+	"jnz 222b\n\t" \
+	"333:\n\t"
+
+#elif defined(__x86_64__)
+
+#define INJECT_ASM_REG_P	"rax"
+#define INJECT_ASM_REG		"eax"
+
+#define RSEQ_INJECT_CLOBBER \
+	, INJECT_ASM_REG_P \
+	, INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+	"lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
+	"mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
+	"test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
+	"jz 333f\n\t" \
+	"222:\n\t" \
+	"dec %%" INJECT_ASM_REG "\n\t" \
+	"jnz 222b\n\t" \
+	"333:\n\t"
+
+#elif defined(__s390__)
+
+#define RSEQ_INJECT_INPUT \
+	, [loop_cnt_1]"m"(loop_cnt[1]) \
+	, [loop_cnt_2]"m"(loop_cnt[2]) \
+	, [loop_cnt_3]"m"(loop_cnt[3]) \
+	, [loop_cnt_4]"m"(loop_cnt[4]) \
+	, [loop_cnt_5]"m"(loop_cnt[5]) \
+	, [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG	"r12"
+
+#define RSEQ_INJECT_CLOBBER \
+	, INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+	"l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+	"ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
+	"je 333f\n\t" \
+	"222:\n\t" \
+	"ahi %%" INJECT_ASM_REG ", -1\n\t" \
+	"jnz 222b\n\t" \
+	"333:\n\t"
+
+#elif defined(__ARMEL__)
+
+#define RSEQ_INJECT_INPUT \
+	, [loop_cnt_1]"m"(loop_cnt[1]) \
+	, [loop_cnt_2]"m"(loop_cnt[2]) \
+	, [loop_cnt_3]"m"(loop_cnt[3]) \
+	, [loop_cnt_4]"m"(loop_cnt[4]) \
+	, [loop_cnt_5]"m"(loop_cnt[5]) \
+	, [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG	"r4"
+
+#define RSEQ_INJECT_CLOBBER \
+	, INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+	"ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+	"cmp " INJECT_ASM_REG ", #0\n\t" \
+	"beq 333f\n\t" \
+	"222:\n\t" \
+	"subs " INJECT_ASM_REG ", #1\n\t" \
+	"bne 222b\n\t" \
+	"333:\n\t"
+
+#elif defined(__AARCH64EL__)
+
+#define RSEQ_INJECT_INPUT \
+	, [loop_cnt_1] "Qo" (loop_cnt[1]) \
+	, [loop_cnt_2] "Qo" (loop_cnt[2]) \
+	, [loop_cnt_3] "Qo" (loop_cnt[3]) \
+	, [loop_cnt_4] "Qo" (loop_cnt[4]) \
+	, [loop_cnt_5] "Qo" (loop_cnt[5]) \
+	, [loop_cnt_6] "Qo" (loop_cnt[6])
+
+#define INJECT_ASM_REG	RSEQ_ASM_TMP_REG32
+
+#define RSEQ_INJECT_ASM(n) \
+	"	ldr	" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n"	\
+	"	cbz	" INJECT_ASM_REG ", 333f\n"			\
+	"222:\n"							\
+	"	sub	" INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n"	\
+	"	cbnz	" INJECT_ASM_REG ", 222b\n"			\
+	"333:\n"
+
+#elif __PPC__
+
+#define RSEQ_INJECT_INPUT \
+	, [loop_cnt_1]"m"(loop_cnt[1]) \
+	, [loop_cnt_2]"m"(loop_cnt[2]) \
+	, [loop_cnt_3]"m"(loop_cnt[3]) \
+	, [loop_cnt_4]"m"(loop_cnt[4]) \
+	, [loop_cnt_5]"m"(loop_cnt[5]) \
+	, [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG	"r18"
+
+#define RSEQ_INJECT_CLOBBER \
+	, INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+	"lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+	"cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
+	"beq 333f\n\t" \
+	"222:\n\t" \
+	"subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
+	"bne 222b\n\t" \
+	"333:\n\t"
+
+#elif defined(__mips__)
+
+#define RSEQ_INJECT_INPUT \
+	, [loop_cnt_1]"m"(loop_cnt[1]) \
+	, [loop_cnt_2]"m"(loop_cnt[2]) \
+	, [loop_cnt_3]"m"(loop_cnt[3]) \
+	, [loop_cnt_4]"m"(loop_cnt[4]) \
+	, [loop_cnt_5]"m"(loop_cnt[5]) \
+	, [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG	"$5"
+
+#define RSEQ_INJECT_CLOBBER \
+	, INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+	"lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+	"beqz " INJECT_ASM_REG ", 333f\n\t" \
+	"222:\n\t" \
+	"addiu " INJECT_ASM_REG ", -1\n\t" \
+	"bnez " INJECT_ASM_REG ", 222b\n\t" \
+	"333:\n\t"
+
+#else
+#error unsupported target
+#endif
+
+#define RSEQ_INJECT_FAILED \
+	nr_abort++;
+
+#define RSEQ_INJECT_C(n) \
+{ \
+	int loc_i, loc_nr_loops = loop_cnt[n]; \
+	\
+	for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
+		rseq_barrier(); \
+	} \
+	if (loc_nr_loops == -1 && opt_modulo) { \
+		if (yield_mod_cnt == opt_modulo - 1) { \
+			if (opt_sleep > 0) \
+				poll(NULL, 0, opt_sleep); \
+			if (opt_yield) \
+				sched_yield(); \
+			if (opt_signal) \
+				raise(SIGUSR1); \
+			yield_mod_cnt = 0; \
+		} else { \
+			yield_mod_cnt++; \
+		} \
+	} \
+}
+
+#else
+
+#define printf_verbose(fmt, ...)
+
+#endif /* BENCHMARK */
+
+#include "rseq.h"
+
+struct percpu_lock_entry {
+	intptr_t v;
+} __attribute__((aligned(128)));
+
+struct percpu_lock {
+	struct percpu_lock_entry c[CPU_SETSIZE];
+};
+
+struct test_data_entry {
+	intptr_t count;
+} __attribute__((aligned(128)));
+
+struct spinlock_test_data {
+	struct percpu_lock lock;
+	struct test_data_entry c[CPU_SETSIZE];
+};
+
+struct spinlock_thread_test_data {
+	struct spinlock_test_data *data;
+	long long reps;
+	int reg;
+};
+
+struct inc_test_data {
+	struct test_data_entry c[CPU_SETSIZE];
+};
+
+struct inc_thread_test_data {
+	struct inc_test_data *data;
+	long long reps;
+	int reg;
+};
+
+struct percpu_list_node {
+	intptr_t data;
+	struct percpu_list_node *next;
+};
+
+struct percpu_list_entry {
+	struct percpu_list_node *head;
+} __attribute__((aligned(128)));
+
+struct percpu_list {
+	struct percpu_list_entry c[CPU_SETSIZE];
+};
+
+#define BUFFER_ITEM_PER_CPU	100
+
+struct percpu_buffer_node {
+	intptr_t data;
+};
+
+struct percpu_buffer_entry {
+	intptr_t offset;
+	intptr_t buflen;
+	struct percpu_buffer_node **array;
+} __attribute__((aligned(128)));
+
+struct percpu_buffer {
+	struct percpu_buffer_entry c[CPU_SETSIZE];
+};
+
+#define MEMCPY_BUFFER_ITEM_PER_CPU	100
+
+struct percpu_memcpy_buffer_node {
+	intptr_t data1;
+	uint64_t data2;
+};
+
+struct percpu_memcpy_buffer_entry {
+	intptr_t offset;
+	intptr_t buflen;
+	struct percpu_memcpy_buffer_node *array;
+} __attribute__((aligned(128)));
+
+struct percpu_memcpy_buffer {
+	struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
+};
+
+/* A simple percpu spinlock. Grabs lock on current cpu. */
+static int rseq_this_cpu_lock(struct percpu_lock *lock)
+{
+	int cpu;
+
+	for (;;) {
+		int ret;
+
+		cpu = rseq_cpu_start();
+		ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
+					 0, 1, cpu);
+		if (rseq_likely(!ret))
+			break;
+		/* Retry if comparison fails or rseq aborts. */
+	}
+	/*
+	 * Acquire semantic when taking lock after control dependency.
+	 * Matches rseq_smp_store_release().
+	 */
+	rseq_smp_acquire__after_ctrl_dep();
+	return cpu;
+}
+
+static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
+{
+	assert(lock->c[cpu].v == 1);
+	/*
+	 * Release lock, with release semantic. Matches
+	 * rseq_smp_acquire__after_ctrl_dep().
+	 */
+	rseq_smp_store_release(&lock->c[cpu].v, 0);
+}
+
+void *test_percpu_spinlock_thread(void *arg)
+{
+	struct spinlock_thread_test_data *thread_data = arg;
+	struct spinlock_test_data *data = thread_data->data;
+	long long i, reps;
+
+	if (!opt_disable_rseq && thread_data->reg &&
+	    rseq_register_current_thread())
+		abort();
+	reps = thread_data->reps;
+	for (i = 0; i < reps; i++) {
+		int cpu = rseq_cpu_start();
+
+		cpu = rseq_this_cpu_lock(&data->lock);
+		data->c[cpu].count++;
+		rseq_percpu_unlock(&data->lock, cpu);
+#ifndef BENCHMARK
+		if (i != 0 && !(i % (reps / 10)))
+			printf_verbose("tid %d: count %lld\n", (int) gettid(), i);
+#endif
+	}
+	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
+		       (int) gettid(), nr_abort, signals_delivered);
+	if (!opt_disable_rseq && thread_data->reg &&
+	    rseq_unregister_current_thread())
+		abort();
+	return NULL;
+}
+
+/*
+ * A simple test which implements a sharded counter using a per-cpu
+ * lock.  Obviously real applications might prefer to simply use a
+ * per-cpu increment; however, this is reasonable for a test and the
+ * lock can be extended to synchronize more complicated operations.
+ */
+void test_percpu_spinlock(void)
+{
+	const int num_threads = opt_threads;
+	int i, ret;
+	uint64_t sum;
+	pthread_t test_threads[num_threads];
+	struct spinlock_test_data data;
+	struct spinlock_thread_test_data thread_data[num_threads];
+
+	memset(&data, 0, sizeof(data));
+	for (i = 0; i < num_threads; i++) {
+		thread_data[i].reps = opt_reps;
+		if (opt_disable_mod <= 0 || (i % opt_disable_mod))
+			thread_data[i].reg = 1;
+		else
+			thread_data[i].reg = 0;
+		thread_data[i].data = &data;
+		ret = pthread_create(&test_threads[i], NULL,
+				     test_percpu_spinlock_thread,
+				     &thread_data[i]);
+		if (ret) {
+			errno = ret;
+			perror("pthread_create");
+			abort();
+		}
+	}
+
+	for (i = 0; i < num_threads; i++) {
+		ret = pthread_join(test_threads[i], NULL);
+		if (ret) {
+			errno = ret;
+			perror("pthread_join");
+			abort();
+		}
+	}
+
+	sum = 0;
+	for (i = 0; i < CPU_SETSIZE; i++)
+		sum += data.c[i].count;
+
+	assert(sum == (uint64_t)opt_reps * num_threads);
+}
+
+void *test_percpu_inc_thread(void *arg)
+{
+	struct inc_thread_test_data *thread_data = arg;
+	struct inc_test_data *data = thread_data->data;
+	long long i, reps;
+
+	if (!opt_disable_rseq && thread_data->reg &&
+	    rseq_register_current_thread())
+		abort();
+	reps = thread_data->reps;
+	for (i = 0; i < reps; i++) {
+		int ret;
+
+		do {
+			int cpu;
+
+			cpu = rseq_cpu_start();
+			ret = rseq_addv(&data->c[cpu].count, 1, cpu);
+		} while (rseq_unlikely(ret));
+#ifndef BENCHMARK
+		if (i != 0 && !(i % (reps / 10)))
+			printf_verbose("tid %d: count %lld\n", (int) gettid(), i);
+#endif
+	}
+	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
+		       (int) gettid(), nr_abort, signals_delivered);
+	if (!opt_disable_rseq && thread_data->reg &&
+	    rseq_unregister_current_thread())
+		abort();
+	return NULL;
+}
+
+void test_percpu_inc(void)
+{
+	const int num_threads = opt_threads;
+	int i, ret;
+	uint64_t sum;
+	pthread_t test_threads[num_threads];
+	struct inc_test_data data;
+	struct inc_thread_test_data thread_data[num_threads];
+
+	memset(&data, 0, sizeof(data));
+	for (i = 0; i < num_threads; i++) {
+		thread_data[i].reps = opt_reps;
+		if (opt_disable_mod <= 0 || (i % opt_disable_mod))
+			thread_data[i].reg = 1;
+		else
+			thread_data[i].reg = 0;
+		thread_data[i].data = &data;
+		ret = pthread_create(&test_threads[i], NULL,
+				     test_percpu_inc_thread,
+				     &thread_data[i]);
+		if (ret) {
+			errno = ret;
+			perror("pthread_create");
+			abort();
+		}
+	}
+
+	for (i = 0; i < num_threads; i++) {
+		ret = pthread_join(test_threads[i], NULL);
+		if (ret) {
+			errno = ret;
+			perror("pthread_join");
+			abort();
+		}
+	}
+
+	sum = 0;
+	for (i = 0; i < CPU_SETSIZE; i++)
+		sum += data.c[i].count;
+
+	assert(sum == (uint64_t)opt_reps * num_threads);
+}
+
+void this_cpu_list_push(struct percpu_list *list,
+			struct percpu_list_node *node,
+			int *_cpu)
+{
+	int cpu;
+
+	for (;;) {
+		intptr_t *targetptr, newval, expect;
+		int ret;
+
+		cpu = rseq_cpu_start();
+		/* Load list->c[cpu].head with single-copy atomicity. */
+		expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
+		newval = (intptr_t)node;
+		targetptr = (intptr_t *)&list->c[cpu].head;
+		node->next = (struct percpu_list_node *)expect;
+		ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
+		if (rseq_likely(!ret))
+			break;
+		/* Retry if comparison fails or rseq aborts. */
+	}
+	if (_cpu)
+		*_cpu = cpu;
+}
+
+/*
+ * Unlike a traditional lock-less linked list; the availability of a
+ * rseq primitive allows us to implement pop without concerns over
+ * ABA-type races.
+ */
+struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
+					   int *_cpu)
+{
+	struct percpu_list_node *node = NULL;
+	int cpu;
+
+	for (;;) {
+		struct percpu_list_node *head;
+		intptr_t *targetptr, expectnot, *load;
+		off_t offset;
+		int ret;
+
+		cpu = rseq_cpu_start();
+		targetptr = (intptr_t *)&list->c[cpu].head;
+		expectnot = (intptr_t)NULL;
+		offset = offsetof(struct percpu_list_node, next);
+		load = (intptr_t *)&head;
+		ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
+						   offset, load, cpu);
+		if (rseq_likely(!ret)) {
+			node = head;
+			break;
+		}
+		if (ret > 0)
+			break;
+		/* Retry if rseq aborts. */
+	}
+	if (_cpu)
+		*_cpu = cpu;
+	return node;
+}
+
+/*
+ * __percpu_list_pop is not safe against concurrent accesses. Should
+ * only be used on lists that are not concurrently modified.
+ */
+struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
+{
+	struct percpu_list_node *node;
+
+	node = list->c[cpu].head;
+	if (!node)
+		return NULL;
+	list->c[cpu].head = node->next;
+	return node;
+}
+
+void *test_percpu_list_thread(void *arg)
+{
+	long long i, reps;
+	struct percpu_list *list = (struct percpu_list *)arg;
+
+	if (!opt_disable_rseq && rseq_register_current_thread())
+		abort();
+
+	reps = opt_reps;
+	for (i = 0; i < reps; i++) {
+		struct percpu_list_node *node;
+
+		node = this_cpu_list_pop(list, NULL);
+		if (opt_yield)
+			sched_yield();  /* encourage shuffling */
+		if (node)
+			this_cpu_list_push(list, node, NULL);
+	}
+
+	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
+		       (int) gettid(), nr_abort, signals_delivered);
+	if (!opt_disable_rseq && rseq_unregister_current_thread())
+		abort();
+
+	return NULL;
+}
+
+/* Simultaneous modification to a per-cpu linked list from many threads.  */
+void test_percpu_list(void)
+{
+	const int num_threads = opt_threads;
+	int i, j, ret;
+	uint64_t sum = 0, expected_sum = 0;
+	struct percpu_list list;
+	pthread_t test_threads[num_threads];
+	cpu_set_t allowed_cpus;
+
+	memset(&list, 0, sizeof(list));
+
+	/* Generate list entries for every usable cpu. */
+	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		if (!CPU_ISSET(i, &allowed_cpus))
+			continue;
+		for (j = 1; j <= 100; j++) {
+			struct percpu_list_node *node;
+
+			expected_sum += j;
+
+			node = malloc(sizeof(*node));
+			assert(node);
+			node->data = j;
+			node->next = list.c[i].head;
+			list.c[i].head = node;
+		}
+	}
+
+	for (i = 0; i < num_threads; i++) {
+		ret = pthread_create(&test_threads[i], NULL,
+				     test_percpu_list_thread, &list);
+		if (ret) {
+			errno = ret;
+			perror("pthread_create");
+			abort();
+		}
+	}
+
+	for (i = 0; i < num_threads; i++) {
+		ret = pthread_join(test_threads[i], NULL);
+		if (ret) {
+			errno = ret;
+			perror("pthread_join");
+			abort();
+		}
+	}
+
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		struct percpu_list_node *node;
+
+		if (!CPU_ISSET(i, &allowed_cpus))
+			continue;
+
+		while ((node = __percpu_list_pop(&list, i))) {
+			sum += node->data;
+			free(node);
+		}
+	}
+
+	/*
+	 * All entries should now be accounted for (unless some external
+	 * actor is interfering with our allowed affinity while this
+	 * test is running).
+	 */
+	assert(sum == expected_sum);
+}
+
+bool this_cpu_buffer_push(struct percpu_buffer *buffer,
+			  struct percpu_buffer_node *node,
+			  int *_cpu)
+{
+	bool result = false;
+	int cpu;
+
+	for (;;) {
+		intptr_t *targetptr_spec, newval_spec;
+		intptr_t *targetptr_final, newval_final;
+		intptr_t offset;
+		int ret;
+
+		cpu = rseq_cpu_start();
+		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
+		if (offset == buffer->c[cpu].buflen)
+			break;
+		newval_spec = (intptr_t)node;
+		targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
+		newval_final = offset + 1;
+		targetptr_final = &buffer->c[cpu].offset;
+		if (opt_mb)
+			ret = rseq_cmpeqv_trystorev_storev_release(
+				targetptr_final, offset, targetptr_spec,
+				newval_spec, newval_final, cpu);
+		else
+			ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
+				offset, targetptr_spec, newval_spec,
+				newval_final, cpu);
+		if (rseq_likely(!ret)) {
+			result = true;
+			break;
+		}
+		/* Retry if comparison fails or rseq aborts. */
+	}
+	if (_cpu)
+		*_cpu = cpu;
+	return result;
+}
+
+struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
+					       int *_cpu)
+{
+	struct percpu_buffer_node *head;
+	int cpu;
+
+	for (;;) {
+		intptr_t *targetptr, newval;
+		intptr_t offset;
+		int ret;
+
+		cpu = rseq_cpu_start();
+		/* Load offset with single-copy atomicity. */
+		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
+		if (offset == 0) {
+			head = NULL;
+			break;
+		}
+		head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
+		newval = offset - 1;
+		targetptr = (intptr_t *)&buffer->c[cpu].offset;
+		ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
+			(intptr_t *)&buffer->c[cpu].array[offset - 1],
+			(intptr_t)head, newval, cpu);
+		if (rseq_likely(!ret))
+			break;
+		/* Retry if comparison fails or rseq aborts. */
+	}
+	if (_cpu)
+		*_cpu = cpu;
+	return head;
+}
+
+/*
+ * __percpu_buffer_pop is not safe against concurrent accesses. Should
+ * only be used on buffers that are not concurrently modified.
+ */
+struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
+					       int cpu)
+{
+	struct percpu_buffer_node *head;
+	intptr_t offset;
+
+	offset = buffer->c[cpu].offset;
+	if (offset == 0)
+		return NULL;
+	head = buffer->c[cpu].array[offset - 1];
+	buffer->c[cpu].offset = offset - 1;
+	return head;
+}
+
+void *test_percpu_buffer_thread(void *arg)
+{
+	long long i, reps;
+	struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
+
+	if (!opt_disable_rseq && rseq_register_current_thread())
+		abort();
+
+	reps = opt_reps;
+	for (i = 0; i < reps; i++) {
+		struct percpu_buffer_node *node;
+
+		node = this_cpu_buffer_pop(buffer, NULL);
+		if (opt_yield)
+			sched_yield();  /* encourage shuffling */
+		if (node) {
+			if (!this_cpu_buffer_push(buffer, node, NULL)) {
+				/* Should increase buffer size. */
+				abort();
+			}
+		}
+	}
+
+	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
+		       (int) gettid(), nr_abort, signals_delivered);
+	if (!opt_disable_rseq && rseq_unregister_current_thread())
+		abort();
+
+	return NULL;
+}
+
+/* Simultaneous modification to a per-cpu buffer from many threads.  */
+void test_percpu_buffer(void)
+{
+	const int num_threads = opt_threads;
+	int i, j, ret;
+	uint64_t sum = 0, expected_sum = 0;
+	struct percpu_buffer buffer;
+	pthread_t test_threads[num_threads];
+	cpu_set_t allowed_cpus;
+
+	memset(&buffer, 0, sizeof(buffer));
+
+	/* Generate list entries for every usable cpu. */
+	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		if (!CPU_ISSET(i, &allowed_cpus))
+			continue;
+		/* Worse-case is every item in same CPU. */
+		buffer.c[i].array =
+			malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
+			       BUFFER_ITEM_PER_CPU);
+		assert(buffer.c[i].array);
+		buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
+		for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
+			struct percpu_buffer_node *node;
+
+			expected_sum += j;
+
+			/*
+			 * We could theoretically put the word-sized
+			 * "data" directly in the buffer. However, we
+			 * want to model objects that would not fit
+			 * within a single word, so allocate an object
+			 * for each node.
+			 */
+			node = malloc(sizeof(*node));
+			assert(node);
+			node->data = j;
+			buffer.c[i].array[j - 1] = node;
+			buffer.c[i].offset++;
+		}
+	}
+
+	for (i = 0; i < num_threads; i++) {
+		ret = pthread_create(&test_threads[i], NULL,
+				     test_percpu_buffer_thread, &buffer);
+		if (ret) {
+			errno = ret;
+			perror("pthread_create");
+			abort();
+		}
+	}
+
+	for (i = 0; i < num_threads; i++) {
+		ret = pthread_join(test_threads[i], NULL);
+		if (ret) {
+			errno = ret;
+			perror("pthread_join");
+			abort();
+		}
+	}
+
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		struct percpu_buffer_node *node;
+
+		if (!CPU_ISSET(i, &allowed_cpus))
+			continue;
+
+		while ((node = __percpu_buffer_pop(&buffer, i))) {
+			sum += node->data;
+			free(node);
+		}
+		free(buffer.c[i].array);
+	}
+
+	/*
+	 * All entries should now be accounted for (unless some external
+	 * actor is interfering with our allowed affinity while this
+	 * test is running).
+	 */
+	assert(sum == expected_sum);
+}
+
+bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
+				 struct percpu_memcpy_buffer_node item,
+				 int *_cpu)
+{
+	bool result = false;
+	int cpu;
+
+	for (;;) {
+		intptr_t *targetptr_final, newval_final, offset;
+		char *destptr, *srcptr;
+		size_t copylen;
+		int ret;
+
+		cpu = rseq_cpu_start();
+		/* Load offset with single-copy atomicity. */
+		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
+		if (offset == buffer->c[cpu].buflen)
+			break;
+		destptr = (char *)&buffer->c[cpu].array[offset];
+		srcptr = (char *)&item;
+		/* copylen must be <= 4kB. */
+		copylen = sizeof(item);
+		newval_final = offset + 1;
+		targetptr_final = &buffer->c[cpu].offset;
+		if (opt_mb)
+			ret = rseq_cmpeqv_trymemcpy_storev_release(
+				targetptr_final, offset,
+				destptr, srcptr, copylen,
+				newval_final, cpu);
+		else
+			ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
+				offset, destptr, srcptr, copylen,
+				newval_final, cpu);
+		if (rseq_likely(!ret)) {
+			result = true;
+			break;
+		}
+		/* Retry if comparison fails or rseq aborts. */
+	}
+	if (_cpu)
+		*_cpu = cpu;
+	return result;
+}
+
+bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
+				struct percpu_memcpy_buffer_node *item,
+				int *_cpu)
+{
+	bool result = false;
+	int cpu;
+
+	for (;;) {
+		intptr_t *targetptr_final, newval_final, offset;
+		char *destptr, *srcptr;
+		size_t copylen;
+		int ret;
+
+		cpu = rseq_cpu_start();
+		/* Load offset with single-copy atomicity. */
+		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
+		if (offset == 0)
+			break;
+		destptr = (char *)item;
+		srcptr = (char *)&buffer->c[cpu].array[offset - 1];
+		/* copylen must be <= 4kB. */
+		copylen = sizeof(*item);
+		newval_final = offset - 1;
+		targetptr_final = &buffer->c[cpu].offset;
+		ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
+			offset, destptr, srcptr, copylen,
+			newval_final, cpu);
+		if (rseq_likely(!ret)) {
+			result = true;
+			break;
+		}
+		/* Retry if comparison fails or rseq aborts. */
+	}
+	if (_cpu)
+		*_cpu = cpu;
+	return result;
+}
+
+/*
+ * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
+ * only be used on buffers that are not concurrently modified.
+ */
+bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
+				struct percpu_memcpy_buffer_node *item,
+				int cpu)
+{
+	intptr_t offset;
+
+	offset = buffer->c[cpu].offset;
+	if (offset == 0)
+		return false;
+	memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
+	buffer->c[cpu].offset = offset - 1;
+	return true;
+}
+
+void *test_percpu_memcpy_buffer_thread(void *arg)
+{
+	long long i, reps;
+	struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
+
+	if (!opt_disable_rseq && rseq_register_current_thread())
+		abort();
+
+	reps = opt_reps;
+	for (i = 0; i < reps; i++) {
+		struct percpu_memcpy_buffer_node item;
+		bool result;
+
+		result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
+		if (opt_yield)
+			sched_yield();  /* encourage shuffling */
+		if (result) {
+			if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
+				/* Should increase buffer size. */
+				abort();
+			}
+		}
+	}
+
+	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
+		       (int) gettid(), nr_abort, signals_delivered);
+	if (!opt_disable_rseq && rseq_unregister_current_thread())
+		abort();
+
+	return NULL;
+}
+
+/* Simultaneous modification to a per-cpu buffer from many threads.  */
+void test_percpu_memcpy_buffer(void)
+{
+	const int num_threads = opt_threads;
+	int i, j, ret;
+	uint64_t sum = 0, expected_sum = 0;
+	struct percpu_memcpy_buffer buffer;
+	pthread_t test_threads[num_threads];
+	cpu_set_t allowed_cpus;
+
+	memset(&buffer, 0, sizeof(buffer));
+
+	/* Generate list entries for every usable cpu. */
+	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		if (!CPU_ISSET(i, &allowed_cpus))
+			continue;
+		/* Worse-case is every item in same CPU. */
+		buffer.c[i].array =
+			malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
+			       MEMCPY_BUFFER_ITEM_PER_CPU);
+		assert(buffer.c[i].array);
+		buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
+		for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
+			expected_sum += 2 * j + 1;
+
+			/*
+			 * We could theoretically put the word-sized
+			 * "data" directly in the buffer. However, we
+			 * want to model objects that would not fit
+			 * within a single word, so allocate an object
+			 * for each node.
+			 */
+			buffer.c[i].array[j - 1].data1 = j;
+			buffer.c[i].array[j - 1].data2 = j + 1;
+			buffer.c[i].offset++;
+		}
+	}
+
+	for (i = 0; i < num_threads; i++) {
+		ret = pthread_create(&test_threads[i], NULL,
+				     test_percpu_memcpy_buffer_thread,
+				     &buffer);
+		if (ret) {
+			errno = ret;
+			perror("pthread_create");
+			abort();
+		}
+	}
+
+	for (i = 0; i < num_threads; i++) {
+		ret = pthread_join(test_threads[i], NULL);
+		if (ret) {
+			errno = ret;
+			perror("pthread_join");
+			abort();
+		}
+	}
+
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		struct percpu_memcpy_buffer_node item;
+
+		if (!CPU_ISSET(i, &allowed_cpus))
+			continue;
+
+		while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
+			sum += item.data1;
+			sum += item.data2;
+		}
+		free(buffer.c[i].array);
+	}
+
+	/*
+	 * All entries should now be accounted for (unless some external
+	 * actor is interfering with our allowed affinity while this
+	 * test is running).
+	 */
+	assert(sum == expected_sum);
+}
+
+static void test_signal_interrupt_handler(int signo)
+{
+	signals_delivered++;
+}
+
+static int set_signal_handler(void)
+{
+	int ret = 0;
+	struct sigaction sa;
+	sigset_t sigset;
+
+	ret = sigemptyset(&sigset);
+	if (ret < 0) {
+		perror("sigemptyset");
+		return ret;
+	}
+
+	sa.sa_handler = test_signal_interrupt_handler;
+	sa.sa_mask = sigset;
+	sa.sa_flags = 0;
+	ret = sigaction(SIGUSR1, &sa, NULL);
+	if (ret < 0) {
+		perror("sigaction");
+		return ret;
+	}
+
+	printf_verbose("Signal handler set for SIGUSR1\n");
+
+	return ret;
+}
+
+static void show_usage(int argc, char **argv)
+{
+	printf("Usage : %s <OPTIONS>\n",
+		argv[0]);
+	printf("OPTIONS:\n");
+	printf("	[-1 loops] Number of loops for delay injection 1\n");
+	printf("	[-2 loops] Number of loops for delay injection 2\n");
+	printf("	[-3 loops] Number of loops for delay injection 3\n");
+	printf("	[-4 loops] Number of loops for delay injection 4\n");
+	printf("	[-5 loops] Number of loops for delay injection 5\n");
+	printf("	[-6 loops] Number of loops for delay injection 6\n");
+	printf("	[-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
+	printf("	[-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
+	printf("	[-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
+	printf("	[-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
+	printf("	[-y] Yield\n");
+	printf("	[-k] Kill thread with signal\n");
+	printf("	[-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
+	printf("	[-t N] Number of threads (default 200)\n");
+	printf("	[-r N] Number of repetitions per thread (default 5000)\n");
+	printf("	[-d] Disable rseq system call (no initialization)\n");
+	printf("	[-D M] Disable rseq for each M threads\n");
+	printf("	[-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n");
+	printf("	[-M] Push into buffer and memcpy buffer with memory barriers.\n");
+	printf("	[-v] Verbose output.\n");
+	printf("	[-h] Show this help.\n");
+	printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+	int i;
+
+	for (i = 1; i < argc; i++) {
+		if (argv[i][0] != '-')
+			continue;
+		switch (argv[i][1]) {
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		case '8':
+		case '9':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
+			i++;
+			break;
+		case 'm':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			opt_modulo = atol(argv[i + 1]);
+			if (opt_modulo < 0) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			i++;
+			break;
+		case 's':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			opt_sleep = atol(argv[i + 1]);
+			if (opt_sleep < 0) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			i++;
+			break;
+		case 'y':
+			opt_yield = 1;
+			break;
+		case 'k':
+			opt_signal = 1;
+			break;
+		case 'd':
+			opt_disable_rseq = 1;
+			break;
+		case 'D':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			opt_disable_mod = atol(argv[i + 1]);
+			if (opt_disable_mod < 0) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			i++;
+			break;
+		case 't':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			opt_threads = atol(argv[i + 1]);
+			if (opt_threads < 0) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			i++;
+			break;
+		case 'r':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			opt_reps = atoll(argv[i + 1]);
+			if (opt_reps < 0) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			i++;
+			break;
+		case 'h':
+			show_usage(argc, argv);
+			goto end;
+		case 'T':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			opt_test = *argv[i + 1];
+			switch (opt_test) {
+			case 's':
+			case 'l':
+			case 'i':
+			case 'b':
+			case 'm':
+				break;
+			default:
+				show_usage(argc, argv);
+				goto error;
+			}
+			i++;
+			break;
+		case 'v':
+			verbose = 1;
+			break;
+		case 'M':
+			opt_mb = 1;
+			break;
+		default:
+			show_usage(argc, argv);
+			goto error;
+		}
+	}
+
+	loop_cnt_1 = loop_cnt[1];
+	loop_cnt_2 = loop_cnt[2];
+	loop_cnt_3 = loop_cnt[3];
+	loop_cnt_4 = loop_cnt[4];
+	loop_cnt_5 = loop_cnt[5];
+	loop_cnt_6 = loop_cnt[6];
+
+	if (set_signal_handler())
+		goto error;
+
+	if (!opt_disable_rseq && rseq_register_current_thread())
+		goto error;
+	switch (opt_test) {
+	case 's':
+		printf_verbose("spinlock\n");
+		test_percpu_spinlock();
+		break;
+	case 'l':
+		printf_verbose("linked list\n");
+		test_percpu_list();
+		break;
+	case 'b':
+		printf_verbose("buffer\n");
+		test_percpu_buffer();
+		break;
+	case 'm':
+		printf_verbose("memcpy buffer\n");
+		test_percpu_memcpy_buffer();
+		break;
+	case 'i':
+		printf_verbose("counter increment\n");
+		test_percpu_inc();
+		break;
+	}
+	if (!opt_disable_rseq && rseq_unregister_current_thread())
+		abort();
+end:
+	return 0;
+
+error:
+	return -1;
+}
diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h
new file mode 100644
index 000000000000..3cea19877227
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-arm.h
@@ -0,0 +1,716 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-arm.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#define RSEQ_SIG	0x53053053
+
+#define rseq_smp_mb()	__asm__ __volatile__ ("dmb" ::: "memory", "cc")
+#define rseq_smp_rmb()	__asm__ __volatile__ ("dmb" ::: "memory", "cc")
+#define rseq_smp_wmb()	__asm__ __volatile__ ("dmb" ::: "memory", "cc")
+
+#define rseq_smp_load_acquire(p)					\
+__extension__ ({							\
+	__typeof(*p) ____p1 = RSEQ_READ_ONCE(*p);			\
+	rseq_smp_mb();							\
+	____p1;								\
+})
+
+#define rseq_smp_acquire__after_ctrl_dep()	rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v)					\
+do {									\
+	rseq_smp_mb();							\
+	RSEQ_WRITE_ONCE(*p, v);						\
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#define __RSEQ_ASM_DEFINE_TABLE(version, flags,	start_ip,		\
+				post_commit_offset, abort_ip)		\
+		".pushsection __rseq_table, \"aw\"\n\t"			\
+		".balign 32\n\t"					\
+		".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+		".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \
+		".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip)	\
+	__RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip,			\
+				(post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs)		\
+		RSEQ_INJECT_ASM(1)					\
+		"adr r0, " __rseq_str(cs_label) "\n\t"			\
+		"str r0, %[" __rseq_str(rseq_cs) "]\n\t"		\
+		__rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label)		\
+		RSEQ_INJECT_ASM(2)					\
+		"ldr r0, %[" __rseq_str(current_cpu_id) "]\n\t"	\
+		"cmp %[" __rseq_str(cpu_id) "], r0\n\t"		\
+		"bne " __rseq_str(label) "\n\t"
+
+#define __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown,		\
+				abort_label, version, flags,		\
+				start_ip, post_commit_offset, abort_ip)	\
+		".balign 32\n\t"					\
+		__rseq_str(table_label) ":\n\t"				\
+		".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+		".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \
+		".word " __rseq_str(RSEQ_SIG) "\n\t"			\
+		__rseq_str(label) ":\n\t"				\
+		teardown						\
+		"b %l[" __rseq_str(abort_label) "]\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, abort_label, \
+			      start_ip, post_commit_ip, abort_ip)	\
+	__RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown,		\
+				abort_label, 0x0, 0x0, start_ip,	\
+				(post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label)		\
+		__rseq_str(label) ":\n\t"				\
+		teardown						\
+		"b %l[" __rseq_str(cmpfail_label) "]\n\t"
+
+#define rseq_workaround_gcc_asm_size_guess()	__asm__ __volatile__("")
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne %l[error2]\n\t"
+#endif
+		/* final store */
+		"str %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "r0", "memory", "cc"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+			       off_t voffp, intptr_t *load, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"ldr r0, %[v]\n\t"
+		"cmp %[expectnot], r0\n\t"
+		"beq %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"ldr r0, %[v]\n\t"
+		"cmp %[expectnot], r0\n\t"
+		"beq %l[error2]\n\t"
+#endif
+		"str r0, %[load]\n\t"
+		"add r0, %[voffp]\n\t"
+		"ldr r0, [r0]\n\t"
+		/* final store */
+		"str r0, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expectnot]		"r" (expectnot),
+		  [voffp]		"Ir" (voffp),
+		  [load]		"m" (*load)
+		  RSEQ_INJECT_INPUT
+		: "r0", "memory", "cc"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+		"ldr r0, %[v]\n\t"
+		"add r0, %[count]\n\t"
+		/* final store */
+		"str r0, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(4)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"m" (*v),
+		  [count]		"Ir" (count)
+		  RSEQ_INJECT_INPUT
+		: "r0", "memory", "cc"
+		  RSEQ_INJECT_CLOBBER
+		: abort
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+				 intptr_t *v2, intptr_t newv2,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne %l[error2]\n\t"
+#endif
+		/* try store */
+		"str %[newv2], %[v2]\n\t"
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		"str %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"r" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "r0", "memory", "cc"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+					 intptr_t *v2, intptr_t newv2,
+					 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne %l[error2]\n\t"
+#endif
+		/* try store */
+		"str %[newv2], %[v2]\n\t"
+		RSEQ_INJECT_ASM(5)
+		"dmb\n\t"	/* full mb provides store-release */
+		/* final store */
+		"str %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"r" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "r0", "memory", "cc"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+			      intptr_t *v2, intptr_t expect2,
+			      intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+		"ldr r0, %[v2]\n\t"
+		"cmp %[expect2], r0\n\t"
+		"bne %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne %l[error2]\n\t"
+		"ldr r0, %[v2]\n\t"
+		"cmp %[expect2], r0\n\t"
+		"bne %l[error3]\n\t"
+#endif
+		/* final store */
+		"str %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* cmp2 input */
+		  [v2]			"m" (*v2),
+		  [expect2]		"r" (expect2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "r0", "memory", "cc"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2, error3
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("1st expected value comparison failed");
+error3:
+	rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+				 void *dst, void *src, size_t len,
+				 intptr_t newv, int cpu)
+{
+	uint32_t rseq_scratch[3];
+
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		"str %[src], %[rseq_scratch0]\n\t"
+		"str %[dst], %[rseq_scratch1]\n\t"
+		"str %[len], %[rseq_scratch2]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne 5f\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne 7f\n\t"
+#endif
+		/* try memcpy */
+		"cmp %[len], #0\n\t" \
+		"beq 333f\n\t" \
+		"222:\n\t" \
+		"ldrb %%r0, [%[src]]\n\t" \
+		"strb %%r0, [%[dst]]\n\t" \
+		"adds %[src], #1\n\t" \
+		"adds %[dst], #1\n\t" \
+		"subs %[len], #1\n\t" \
+		"bne 222b\n\t" \
+		"333:\n\t" \
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		"str %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		"ldr %[len], %[rseq_scratch2]\n\t"
+		"ldr %[dst], %[rseq_scratch1]\n\t"
+		"ldr %[src], %[rseq_scratch0]\n\t"
+		"b 8f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4,
+				      /* teardown */
+				      "ldr %[len], %[rseq_scratch2]\n\t"
+				      "ldr %[dst], %[rseq_scratch1]\n\t"
+				      "ldr %[src], %[rseq_scratch0]\n\t",
+				      abort, 1b, 2b, 4f)
+		RSEQ_ASM_DEFINE_CMPFAIL(5,
+					/* teardown */
+					"ldr %[len], %[rseq_scratch2]\n\t"
+					"ldr %[dst], %[rseq_scratch1]\n\t"
+					"ldr %[src], %[rseq_scratch0]\n\t",
+					cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_DEFINE_CMPFAIL(6,
+					/* teardown */
+					"ldr %[len], %[rseq_scratch2]\n\t"
+					"ldr %[dst], %[rseq_scratch1]\n\t"
+					"ldr %[src], %[rseq_scratch0]\n\t",
+					error1)
+		RSEQ_ASM_DEFINE_CMPFAIL(7,
+					/* teardown */
+					"ldr %[len], %[rseq_scratch2]\n\t"
+					"ldr %[dst], %[rseq_scratch1]\n\t"
+					"ldr %[src], %[rseq_scratch0]\n\t",
+					error2)
+#endif
+		"8:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len),
+		  [rseq_scratch0]	"m" (rseq_scratch[0]),
+		  [rseq_scratch1]	"m" (rseq_scratch[1]),
+		  [rseq_scratch2]	"m" (rseq_scratch[2])
+		  RSEQ_INJECT_INPUT
+		: "r0", "memory", "cc"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_workaround_gcc_asm_size_guess();
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_workaround_gcc_asm_size_guess();
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+					 void *dst, void *src, size_t len,
+					 intptr_t newv, int cpu)
+{
+	uint32_t rseq_scratch[3];
+
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		"str %[src], %[rseq_scratch0]\n\t"
+		"str %[dst], %[rseq_scratch1]\n\t"
+		"str %[len], %[rseq_scratch2]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne 5f\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne 7f\n\t"
+#endif
+		/* try memcpy */
+		"cmp %[len], #0\n\t" \
+		"beq 333f\n\t" \
+		"222:\n\t" \
+		"ldrb %%r0, [%[src]]\n\t" \
+		"strb %%r0, [%[dst]]\n\t" \
+		"adds %[src], #1\n\t" \
+		"adds %[dst], #1\n\t" \
+		"subs %[len], #1\n\t" \
+		"bne 222b\n\t" \
+		"333:\n\t" \
+		RSEQ_INJECT_ASM(5)
+		"dmb\n\t"	/* full mb provides store-release */
+		/* final store */
+		"str %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		"ldr %[len], %[rseq_scratch2]\n\t"
+		"ldr %[dst], %[rseq_scratch1]\n\t"
+		"ldr %[src], %[rseq_scratch0]\n\t"
+		"b 8f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4,
+				      /* teardown */
+				      "ldr %[len], %[rseq_scratch2]\n\t"
+				      "ldr %[dst], %[rseq_scratch1]\n\t"
+				      "ldr %[src], %[rseq_scratch0]\n\t",
+				      abort, 1b, 2b, 4f)
+		RSEQ_ASM_DEFINE_CMPFAIL(5,
+					/* teardown */
+					"ldr %[len], %[rseq_scratch2]\n\t"
+					"ldr %[dst], %[rseq_scratch1]\n\t"
+					"ldr %[src], %[rseq_scratch0]\n\t",
+					cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_DEFINE_CMPFAIL(6,
+					/* teardown */
+					"ldr %[len], %[rseq_scratch2]\n\t"
+					"ldr %[dst], %[rseq_scratch1]\n\t"
+					"ldr %[src], %[rseq_scratch0]\n\t",
+					error1)
+		RSEQ_ASM_DEFINE_CMPFAIL(7,
+					/* teardown */
+					"ldr %[len], %[rseq_scratch2]\n\t"
+					"ldr %[dst], %[rseq_scratch1]\n\t"
+					"ldr %[src], %[rseq_scratch0]\n\t",
+					error2)
+#endif
+		"8:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len),
+		  [rseq_scratch0]	"m" (rseq_scratch[0]),
+		  [rseq_scratch1]	"m" (rseq_scratch[1]),
+		  [rseq_scratch2]	"m" (rseq_scratch[2])
+		  RSEQ_INJECT_INPUT
+		: "r0", "memory", "cc"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_workaround_gcc_asm_size_guess();
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_workaround_gcc_asm_size_guess();
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h
new file mode 100644
index 000000000000..954f34671ca6
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-arm64.h
@@ -0,0 +1,594 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-arm64.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * (C) Copyright 2018 - Will Deacon <will.deacon@arm.com>
+ */
+
+#define RSEQ_SIG	0xd428bc00	/* BRK #0x45E0 */
+
+#define rseq_smp_mb()	__asm__ __volatile__ ("dmb ish" ::: "memory")
+#define rseq_smp_rmb()	__asm__ __volatile__ ("dmb ishld" ::: "memory")
+#define rseq_smp_wmb()	__asm__ __volatile__ ("dmb ishst" ::: "memory")
+
+#define rseq_smp_load_acquire(p)						\
+__extension__ ({								\
+	__typeof(*p) ____p1;							\
+	switch (sizeof(*p)) {							\
+	case 1:									\
+		asm volatile ("ldarb %w0, %1"					\
+			: "=r" (*(__u8 *)p)					\
+			: "Q" (*p) : "memory");					\
+		break;								\
+	case 2:									\
+		asm volatile ("ldarh %w0, %1"					\
+			: "=r" (*(__u16 *)p)					\
+			: "Q" (*p) : "memory");					\
+		break;								\
+	case 4:									\
+		asm volatile ("ldar %w0, %1"					\
+			: "=r" (*(__u32 *)p)					\
+			: "Q" (*p) : "memory");					\
+		break;								\
+	case 8:									\
+		asm volatile ("ldar %0, %1"					\
+			: "=r" (*(__u64 *)p)					\
+			: "Q" (*p) : "memory");					\
+		break;								\
+	}									\
+	____p1;									\
+})
+
+#define rseq_smp_acquire__after_ctrl_dep()	rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v)						\
+do {										\
+	switch (sizeof(*p)) {							\
+	case 1:									\
+		asm volatile ("stlrb %w1, %0"					\
+				: "=Q" (*p)					\
+				: "r" ((__u8)v)					\
+				: "memory");					\
+		break;								\
+	case 2:									\
+		asm volatile ("stlrh %w1, %0"					\
+				: "=Q" (*p)					\
+				: "r" ((__u16)v)				\
+				: "memory");					\
+		break;								\
+	case 4:									\
+		asm volatile ("stlr %w1, %0"					\
+				: "=Q" (*p)					\
+				: "r" ((__u32)v)				\
+				: "memory");					\
+		break;								\
+	case 8:									\
+		asm volatile ("stlr %1, %0"					\
+				: "=Q" (*p)					\
+				: "r" ((__u64)v)				\
+				: "memory");					\
+		break;								\
+	}									\
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#define RSEQ_ASM_TMP_REG32	"w15"
+#define RSEQ_ASM_TMP_REG	"x15"
+#define RSEQ_ASM_TMP_REG_2	"x14"
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip,		\
+				post_commit_offset, abort_ip)			\
+	"	.pushsection	__rseq_table, \"aw\"\n"				\
+	"	.balign	32\n"							\
+	__rseq_str(label) ":\n"							\
+	"	.long	" __rseq_str(version) ", " __rseq_str(flags) "\n"	\
+	"	.quad	" __rseq_str(start_ip) ", "				\
+			  __rseq_str(post_commit_offset) ", "			\
+			  __rseq_str(abort_ip) "\n"				\
+	"	.popsection\n"
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip)	\
+	__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip,			\
+				(post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs)			\
+	RSEQ_INJECT_ASM(1)							\
+	"	adrp	" RSEQ_ASM_TMP_REG ", " __rseq_str(cs_label) "\n"	\
+	"	add	" RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG		\
+			", :lo12:" __rseq_str(cs_label) "\n"			\
+	"	str	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(rseq_cs) "]\n"	\
+	__rseq_str(label) ":\n"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, abort_label)				\
+	"	b	222f\n"							\
+	"	.inst 	"	__rseq_str(RSEQ_SIG) "\n"			\
+	__rseq_str(label) ":\n"							\
+	"	b	%l[" __rseq_str(abort_label) "]\n"			\
+	"222:\n"
+
+#define RSEQ_ASM_OP_STORE(value, var)						\
+	"	str	%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_STORE_RELEASE(value, var)					\
+	"	stlr	%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label)			\
+	RSEQ_ASM_OP_STORE(value, var)						\
+	__rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_FINAL_STORE_RELEASE(value, var, post_commit_label)		\
+	RSEQ_ASM_OP_STORE_RELEASE(value, var)					\
+	__rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_CMPEQ(var, expect, label)					\
+	"	ldr	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"		\
+	"	sub	" RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG		\
+			", %[" __rseq_str(expect) "]\n"				\
+	"	cbnz	" RSEQ_ASM_TMP_REG ", " __rseq_str(label) "\n"
+
+#define RSEQ_ASM_OP_CMPEQ32(var, expect, label)					\
+	"	ldr	" RSEQ_ASM_TMP_REG32 ", %[" __rseq_str(var) "]\n"	\
+	"	sub	" RSEQ_ASM_TMP_REG32 ", " RSEQ_ASM_TMP_REG32		\
+			", %w[" __rseq_str(expect) "]\n"			\
+	"	cbnz	" RSEQ_ASM_TMP_REG32 ", " __rseq_str(label) "\n"
+
+#define RSEQ_ASM_OP_CMPNE(var, expect, label)					\
+	"	ldr	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"		\
+	"	sub	" RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG		\
+			", %[" __rseq_str(expect) "]\n"				\
+	"	cbz	" RSEQ_ASM_TMP_REG ", " __rseq_str(label) "\n"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label)			\
+	RSEQ_INJECT_ASM(2)							\
+	RSEQ_ASM_OP_CMPEQ32(current_cpu_id, cpu_id, label)
+
+#define RSEQ_ASM_OP_R_LOAD(var)							\
+	"	ldr	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_R_STORE(var)						\
+	"	str	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_R_LOAD_OFF(offset)						\
+	"	ldr	" RSEQ_ASM_TMP_REG ", [" RSEQ_ASM_TMP_REG		\
+			", %[" __rseq_str(offset) "]]\n"
+
+#define RSEQ_ASM_OP_R_ADD(count)						\
+	"	add	" RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG		\
+			", %[" __rseq_str(count) "]\n"
+
+#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label)			\
+	"	str	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"		\
+	__rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)					\
+	"	cbz	%[" __rseq_str(len) "], 333f\n"				\
+	"	mov	" RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(len) "]\n"	\
+	"222:	sub	" RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", #1\n"	\
+	"	ldrb	" RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(src) "]"	\
+			", " RSEQ_ASM_TMP_REG_2 "]\n"				\
+	"	strb	" RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(dst) "]"	\
+			", " RSEQ_ASM_TMP_REG_2 "]\n"				\
+	"	cbnz	" RSEQ_ASM_TMP_REG_2 ", 222b\n"				\
+	"333:\n"
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+		RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"Qo" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "memory", RSEQ_ASM_TMP_REG
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+			       off_t voffp, intptr_t *load, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		RSEQ_ASM_OP_CMPNE(v, expectnot, %l[error2])
+#endif
+		RSEQ_ASM_OP_R_LOAD(v)
+		RSEQ_ASM_OP_R_STORE(load)
+		RSEQ_ASM_OP_R_LOAD_OFF(voffp)
+		RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"Qo" (*v),
+		  [expectnot]		"r" (expectnot),
+		  [load]		"Qo" (*load),
+		  [voffp]		"r" (voffp)
+		  RSEQ_INJECT_INPUT
+		: "memory", RSEQ_ASM_TMP_REG
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+		RSEQ_ASM_OP_R_LOAD(v)
+		RSEQ_ASM_OP_R_ADD(count)
+		RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
+		RSEQ_INJECT_ASM(4)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"Qo" (*v),
+		  [count]		"r" (count)
+		  RSEQ_INJECT_INPUT
+		: "memory", RSEQ_ASM_TMP_REG
+		: abort
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+				 intptr_t *v2, intptr_t newv2,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+		RSEQ_ASM_OP_STORE(newv2, v2)
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [expect]		"r" (expect),
+		  [v]			"Qo" (*v),
+		  [newv]		"r" (newv),
+		  [v2]			"Qo" (*v2),
+		  [newv2]		"r" (newv2)
+		  RSEQ_INJECT_INPUT
+		: "memory", RSEQ_ASM_TMP_REG
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+					 intptr_t *v2, intptr_t newv2,
+					 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+		RSEQ_ASM_OP_STORE(newv2, v2)
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3)
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [expect]		"r" (expect),
+		  [v]			"Qo" (*v),
+		  [newv]		"r" (newv),
+		  [v2]			"Qo" (*v2),
+		  [newv2]		"r" (newv2)
+		  RSEQ_INJECT_INPUT
+		: "memory", RSEQ_ASM_TMP_REG
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+			      intptr_t *v2, intptr_t expect2,
+			      intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+		RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[cmpfail])
+		RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+		RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3])
+#endif
+		RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"Qo" (*v),
+		  [expect]		"r" (expect),
+		  [v2]			"Qo" (*v2),
+		  [expect2]		"r" (expect2),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "memory", RSEQ_ASM_TMP_REG
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2, error3
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+error3:
+	rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+				 void *dst, void *src, size_t len,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+		RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [expect]		"r" (expect),
+		  [v]			"Qo" (*v),
+		  [newv]		"r" (newv),
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len)
+		  RSEQ_INJECT_INPUT
+		: "memory", RSEQ_ASM_TMP_REG, RSEQ_ASM_TMP_REG_2
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+					 void *dst, void *src, size_t len,
+					 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+		RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3)
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [expect]		"r" (expect),
+		  [v]			"Qo" (*v),
+		  [newv]		"r" (newv),
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len)
+		  RSEQ_INJECT_INPUT
+		: "memory", RSEQ_ASM_TMP_REG, RSEQ_ASM_TMP_REG_2
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h
new file mode 100644
index 000000000000..7f48ecf46994
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-mips.h
@@ -0,0 +1,725 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Author: Paul Burton <paul.burton@mips.com>
+ * (C) Copyright 2018 MIPS Tech LLC
+ *
+ * Based on rseq-arm.h:
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#define RSEQ_SIG	0x53053053
+
+#define rseq_smp_mb()	__asm__ __volatile__ ("sync" ::: "memory")
+#define rseq_smp_rmb()	rseq_smp_mb()
+#define rseq_smp_wmb()	rseq_smp_mb()
+
+#define rseq_smp_load_acquire(p)					\
+__extension__ ({							\
+	__typeof(*p) ____p1 = RSEQ_READ_ONCE(*p);			\
+	rseq_smp_mb();							\
+	____p1;								\
+})
+
+#define rseq_smp_acquire__after_ctrl_dep()	rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v)					\
+do {									\
+	rseq_smp_mb();							\
+	RSEQ_WRITE_ONCE(*p, v);						\
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#if _MIPS_SZLONG == 64
+# define LONG			".dword"
+# define LONG_LA		"dla"
+# define LONG_L			"ld"
+# define LONG_S			"sd"
+# define LONG_ADDI		"daddiu"
+# define U32_U64_PAD(x)		x
+#elif _MIPS_SZLONG == 32
+# define LONG			".word"
+# define LONG_LA		"la"
+# define LONG_L			"lw"
+# define LONG_S			"sw"
+# define LONG_ADDI		"addiu"
+# ifdef __BIG_ENDIAN
+#  define U32_U64_PAD(x)	"0x0, " x
+# else
+#  define U32_U64_PAD(x)	x ", 0x0"
+# endif
+#else
+# error unsupported _MIPS_SZLONG
+#endif
+
+#define __RSEQ_ASM_DEFINE_TABLE(version, flags,	start_ip, \
+				post_commit_offset, abort_ip) \
+		".pushsection __rseq_table, \"aw\"\n\t" \
+		".balign 32\n\t" \
+		".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+		LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \
+		LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" \
+		LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t" \
+		".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip) \
+	__RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \
+				(post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+		RSEQ_INJECT_ASM(1) \
+		LONG_LA " $4, " __rseq_str(cs_label) "\n\t" \
+		LONG_S  " $4, %[" __rseq_str(rseq_cs) "]\n\t" \
+		__rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+		RSEQ_INJECT_ASM(2) \
+		"lw  $4, %[" __rseq_str(current_cpu_id) "]\n\t" \
+		"bne $4, %[" __rseq_str(cpu_id) "], " __rseq_str(label) "\n\t"
+
+#define __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \
+				abort_label, version, flags, \
+				start_ip, post_commit_offset, abort_ip) \
+		".balign 32\n\t" \
+		__rseq_str(table_label) ":\n\t" \
+		".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+		LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \
+		LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" \
+		LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t" \
+		".word " __rseq_str(RSEQ_SIG) "\n\t" \
+		__rseq_str(label) ":\n\t" \
+		teardown \
+		"b %l[" __rseq_str(abort_label) "]\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, abort_label, \
+			      start_ip, post_commit_ip, abort_ip) \
+	__RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \
+				abort_label, 0x0, 0x0, start_ip, \
+				(post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \
+		__rseq_str(label) ":\n\t" \
+		teardown \
+		"b %l[" __rseq_str(cmpfail_label) "]\n\t"
+
+#define rseq_workaround_gcc_asm_size_guess()	__asm__ __volatile__("")
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[error2]\n\t"
+#endif
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+			       off_t voffp, intptr_t *load, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"beq $4, %[expectnot], %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_L " $4, %[v]\n\t"
+		"beq $4, %[expectnot], %l[error2]\n\t"
+#endif
+		LONG_S " $4, %[load]\n\t"
+		LONG_ADDI " $4, %[voffp]\n\t"
+		LONG_L " $4, 0($4)\n\t"
+		/* final store */
+		LONG_S " $4, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expectnot]		"r" (expectnot),
+		  [voffp]		"Ir" (voffp),
+		  [load]		"m" (*load)
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+		LONG_L " $4, %[v]\n\t"
+		LONG_ADDI " $4, %[count]\n\t"
+		/* final store */
+		LONG_S " $4, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(4)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"m" (*v),
+		  [count]		"Ir" (count)
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+				 intptr_t *v2, intptr_t newv2,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[error2]\n\t"
+#endif
+		/* try store */
+		LONG_S " %[newv2], %[v2]\n\t"
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"r" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+					 intptr_t *v2, intptr_t newv2,
+					 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[error2]\n\t"
+#endif
+		/* try store */
+		LONG_S " %[newv2], %[v2]\n\t"
+		RSEQ_INJECT_ASM(5)
+		"sync\n\t"	/* full sync provides store-release */
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"r" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+			      intptr_t *v2, intptr_t expect2,
+			      intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+		LONG_L " $4, %[v2]\n\t"
+		"bne $4, %[expect2], %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[error2]\n\t"
+		LONG_L " $4, %[v2]\n\t"
+		"bne $4, %[expect2], %l[error3]\n\t"
+#endif
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* cmp2 input */
+		  [v2]			"m" (*v2),
+		  [expect2]		"r" (expect2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2, error3
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("1st expected value comparison failed");
+error3:
+	rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+				 void *dst, void *src, size_t len,
+				 intptr_t newv, int cpu)
+{
+	uintptr_t rseq_scratch[3];
+
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		LONG_S " %[src], %[rseq_scratch0]\n\t"
+		LONG_S "  %[dst], %[rseq_scratch1]\n\t"
+		LONG_S " %[len], %[rseq_scratch2]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], 5f\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], 7f\n\t"
+#endif
+		/* try memcpy */
+		"beqz %[len], 333f\n\t" \
+		"222:\n\t" \
+		"lb   $4, 0(%[src])\n\t" \
+		"sb   $4, 0(%[dst])\n\t" \
+		LONG_ADDI " %[src], 1\n\t" \
+		LONG_ADDI " %[dst], 1\n\t" \
+		LONG_ADDI " %[len], -1\n\t" \
+		"bnez %[len], 222b\n\t" \
+		"333:\n\t" \
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		LONG_L " %[len], %[rseq_scratch2]\n\t"
+		LONG_L " %[dst], %[rseq_scratch1]\n\t"
+		LONG_L " %[src], %[rseq_scratch0]\n\t"
+		"b 8f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4,
+				      /* teardown */
+				      LONG_L " %[len], %[rseq_scratch2]\n\t"
+				      LONG_L " %[dst], %[rseq_scratch1]\n\t"
+				      LONG_L " %[src], %[rseq_scratch0]\n\t",
+				      abort, 1b, 2b, 4f)
+		RSEQ_ASM_DEFINE_CMPFAIL(5,
+					/* teardown */
+					LONG_L " %[len], %[rseq_scratch2]\n\t"
+					LONG_L " %[dst], %[rseq_scratch1]\n\t"
+					LONG_L " %[src], %[rseq_scratch0]\n\t",
+					cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_DEFINE_CMPFAIL(6,
+					/* teardown */
+					LONG_L " %[len], %[rseq_scratch2]\n\t"
+					LONG_L " %[dst], %[rseq_scratch1]\n\t"
+					LONG_L " %[src], %[rseq_scratch0]\n\t",
+					error1)
+		RSEQ_ASM_DEFINE_CMPFAIL(7,
+					/* teardown */
+					LONG_L " %[len], %[rseq_scratch2]\n\t"
+					LONG_L " %[dst], %[rseq_scratch1]\n\t"
+					LONG_L " %[src], %[rseq_scratch0]\n\t",
+					error2)
+#endif
+		"8:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len),
+		  [rseq_scratch0]	"m" (rseq_scratch[0]),
+		  [rseq_scratch1]	"m" (rseq_scratch[1]),
+		  [rseq_scratch2]	"m" (rseq_scratch[2])
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_workaround_gcc_asm_size_guess();
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_workaround_gcc_asm_size_guess();
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+					 void *dst, void *src, size_t len,
+					 intptr_t newv, int cpu)
+{
+	uintptr_t rseq_scratch[3];
+
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		LONG_S " %[src], %[rseq_scratch0]\n\t"
+		LONG_S " %[dst], %[rseq_scratch1]\n\t"
+		LONG_S " %[len], %[rseq_scratch2]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], 5f\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], 7f\n\t"
+#endif
+		/* try memcpy */
+		"beqz %[len], 333f\n\t" \
+		"222:\n\t" \
+		"lb   $4, 0(%[src])\n\t" \
+		"sb   $4, 0(%[dst])\n\t" \
+		LONG_ADDI " %[src], 1\n\t" \
+		LONG_ADDI " %[dst], 1\n\t" \
+		LONG_ADDI " %[len], -1\n\t" \
+		"bnez %[len], 222b\n\t" \
+		"333:\n\t" \
+		RSEQ_INJECT_ASM(5)
+		"sync\n\t"	/* full sync provides store-release */
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		LONG_L " %[len], %[rseq_scratch2]\n\t"
+		LONG_L " %[dst], %[rseq_scratch1]\n\t"
+		LONG_L " %[src], %[rseq_scratch0]\n\t"
+		"b 8f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4,
+				      /* teardown */
+				      LONG_L " %[len], %[rseq_scratch2]\n\t"
+				      LONG_L " %[dst], %[rseq_scratch1]\n\t"
+				      LONG_L " %[src], %[rseq_scratch0]\n\t",
+				      abort, 1b, 2b, 4f)
+		RSEQ_ASM_DEFINE_CMPFAIL(5,
+					/* teardown */
+					LONG_L " %[len], %[rseq_scratch2]\n\t"
+					LONG_L " %[dst], %[rseq_scratch1]\n\t"
+					LONG_L " %[src], %[rseq_scratch0]\n\t",
+					cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_DEFINE_CMPFAIL(6,
+					/* teardown */
+					LONG_L " %[len], %[rseq_scratch2]\n\t"
+					LONG_L " %[dst], %[rseq_scratch1]\n\t"
+					LONG_L " %[src], %[rseq_scratch0]\n\t",
+					error1)
+		RSEQ_ASM_DEFINE_CMPFAIL(7,
+					/* teardown */
+					LONG_L " %[len], %[rseq_scratch2]\n\t"
+					LONG_L " %[dst], %[rseq_scratch1]\n\t"
+					LONG_L " %[src], %[rseq_scratch0]\n\t",
+					error2)
+#endif
+		"8:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len),
+		  [rseq_scratch0]	"m" (rseq_scratch[0]),
+		  [rseq_scratch1]	"m" (rseq_scratch[1]),
+		  [rseq_scratch2]	"m" (rseq_scratch[2])
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_workaround_gcc_asm_size_guess();
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_workaround_gcc_asm_size_guess();
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h
new file mode 100644
index 000000000000..52630c9f42be
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-ppc.h
@@ -0,0 +1,671 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-ppc.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * (C) Copyright 2016-2018 - Boqun Feng <boqun.feng@gmail.com>
+ */
+
+#define RSEQ_SIG	0x53053053
+
+#define rseq_smp_mb()		__asm__ __volatile__ ("sync"	::: "memory", "cc")
+#define rseq_smp_lwsync()	__asm__ __volatile__ ("lwsync"	::: "memory", "cc")
+#define rseq_smp_rmb()		rseq_smp_lwsync()
+#define rseq_smp_wmb()		rseq_smp_lwsync()
+
+#define rseq_smp_load_acquire(p)					\
+__extension__ ({							\
+	__typeof(*p) ____p1 = RSEQ_READ_ONCE(*p);			\
+	rseq_smp_lwsync();						\
+	____p1;								\
+})
+
+#define rseq_smp_acquire__after_ctrl_dep()	rseq_smp_lwsync()
+
+#define rseq_smp_store_release(p, v)					\
+do {									\
+	rseq_smp_lwsync();						\
+	RSEQ_WRITE_ONCE(*p, v);						\
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+/*
+ * The __rseq_table section can be used by debuggers to better handle
+ * single-stepping through the restartable critical sections.
+ */
+
+#ifdef __PPC64__
+
+#define STORE_WORD	"std "
+#define LOAD_WORD	"ld "
+#define LOADX_WORD	"ldx "
+#define CMP_WORD	"cmpd "
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags,				\
+			start_ip, post_commit_offset, abort_ip)			\
+		".pushsection __rseq_table, \"aw\"\n\t"				\
+		".balign 32\n\t"						\
+		__rseq_str(label) ":\n\t"					\
+		".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t"	\
+		".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
+		".popsection\n\t"
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs)			\
+		RSEQ_INJECT_ASM(1)						\
+		"lis %%r17, (" __rseq_str(cs_label) ")@highest\n\t"		\
+		"ori %%r17, %%r17, (" __rseq_str(cs_label) ")@higher\n\t"	\
+		"rldicr %%r17, %%r17, 32, 31\n\t"				\
+		"oris %%r17, %%r17, (" __rseq_str(cs_label) ")@high\n\t"	\
+		"ori %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t"		\
+		"std %%r17, %[" __rseq_str(rseq_cs) "]\n\t"			\
+		__rseq_str(label) ":\n\t"
+
+#else /* #ifdef __PPC64__ */
+
+#define STORE_WORD	"stw "
+#define LOAD_WORD	"lwz "
+#define LOADX_WORD	"lwzx "
+#define CMP_WORD	"cmpw "
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags,				\
+			start_ip, post_commit_offset, abort_ip)			\
+		".pushsection __rseq_table, \"aw\"\n\t"				\
+		".balign 32\n\t"						\
+		__rseq_str(label) ":\n\t"					\
+		".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t"	\
+		/* 32-bit only supported on BE */				\
+		".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \
+		".popsection\n\t"
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs)			\
+		RSEQ_INJECT_ASM(1)						\
+		"lis %%r17, (" __rseq_str(cs_label) ")@ha\n\t"			\
+		"addi %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t"		\
+		"stw %%r17, %[" __rseq_str(rseq_cs) "]\n\t"			\
+		__rseq_str(label) ":\n\t"
+
+#endif /* #ifdef __PPC64__ */
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip)	\
+		__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip,		\
+					(post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label)			\
+		RSEQ_INJECT_ASM(2)						\
+		"lwz %%r17, %[" __rseq_str(current_cpu_id) "]\n\t"		\
+		"cmpw cr7, %[" __rseq_str(cpu_id) "], %%r17\n\t"		\
+		"bne- cr7, " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, abort_label)				\
+		".pushsection __rseq_failure, \"ax\"\n\t"			\
+		".long " __rseq_str(RSEQ_SIG) "\n\t"				\
+		__rseq_str(label) ":\n\t"					\
+		"b %l[" __rseq_str(abort_label) "]\n\t"				\
+		".popsection\n\t"
+
+/*
+ * RSEQ_ASM_OPs: asm operations for rseq
+ * 	RSEQ_ASM_OP_R_*: has hard-code registers in it
+ * 	RSEQ_ASM_OP_* (else): doesn't have hard-code registers(unless cr7)
+ */
+#define RSEQ_ASM_OP_CMPEQ(var, expect, label)					\
+		LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t"			\
+		CMP_WORD "cr7, %%r17, %[" __rseq_str(expect) "]\n\t"		\
+		"bne- cr7, " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_OP_CMPNE(var, expectnot, label)				\
+		LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t"			\
+		CMP_WORD "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t"		\
+		"beq- cr7, " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_OP_STORE(value, var)						\
+		STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t"
+
+/* Load @var to r17 */
+#define RSEQ_ASM_OP_R_LOAD(var)							\
+		LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t"
+
+/* Store r17 to @var */
+#define RSEQ_ASM_OP_R_STORE(var)						\
+		STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t"
+
+/* Add @count to r17 */
+#define RSEQ_ASM_OP_R_ADD(count)						\
+		"add %%r17, %[" __rseq_str(count) "], %%r17\n\t"
+
+/* Load (r17 + voffp) to r17 */
+#define RSEQ_ASM_OP_R_LOADX(voffp)						\
+		LOADX_WORD "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t"
+
+/* TODO: implement a faster memcpy. */
+#define RSEQ_ASM_OP_R_MEMCPY() \
+		"cmpdi %%r19, 0\n\t" \
+		"beq 333f\n\t" \
+		"addi %%r20, %%r20, -1\n\t" \
+		"addi %%r21, %%r21, -1\n\t" \
+		"222:\n\t" \
+		"lbzu %%r18, 1(%%r20)\n\t" \
+		"stbu %%r18, 1(%%r21)\n\t" \
+		"addi %%r19, %%r19, -1\n\t" \
+		"cmpdi %%r19, 0\n\t" \
+		"bne 222b\n\t" \
+		"333:\n\t" \
+
+#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label)			\
+		STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t"			\
+		__rseq_str(post_commit_label) ":\n\t"
+
+#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label)			\
+		STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \
+		__rseq_str(post_commit_label) ":\n\t"
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+		/* final store */
+		RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r17"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+			       off_t voffp, intptr_t *load, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		/* cmp @v not equal to @expectnot */
+		RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		/* cmp @v not equal to @expectnot */
+		RSEQ_ASM_OP_CMPNE(v, expectnot, %l[error2])
+#endif
+		/* load the value of @v */
+		RSEQ_ASM_OP_R_LOAD(v)
+		/* store it in @load */
+		RSEQ_ASM_OP_R_STORE(load)
+		/* dereference voffp(v) */
+		RSEQ_ASM_OP_R_LOADX(voffp)
+		/* final store the value at voffp(v) */
+		RSEQ_ASM_OP_R_FINAL_STORE(v, 2)
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expectnot]		"r" (expectnot),
+		  [voffp]		"b" (voffp),
+		  [load]		"m" (*load)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r17"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+		/* load the value of @v */
+		RSEQ_ASM_OP_R_LOAD(v)
+		/* add @count to it */
+		RSEQ_ASM_OP_R_ADD(count)
+		/* final store */
+		RSEQ_ASM_OP_R_FINAL_STORE(v, 2)
+		RSEQ_INJECT_ASM(4)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [count]		"r" (count)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r17"
+		  RSEQ_INJECT_CLOBBER
+		: abort
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+				 intptr_t *v2, intptr_t newv2,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+		/* try store */
+		RSEQ_ASM_OP_STORE(newv2, v2)
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"r" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r17"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+					 intptr_t *v2, intptr_t newv2,
+					 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+		/* try store */
+		RSEQ_ASM_OP_STORE(newv2, v2)
+		RSEQ_INJECT_ASM(5)
+		/* for 'release' */
+		"lwsync\n\t"
+		/* final store */
+		RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"r" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r17"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+			      intptr_t *v2, intptr_t expect2,
+			      intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+		/* cmp @v2 equal to @expct2 */
+		RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[cmpfail])
+		RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+		/* cmp @v2 equal to @expct2 */
+		RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3])
+#endif
+		/* final store */
+		RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* cmp2 input */
+		  [v2]			"m" (*v2),
+		  [expect2]		"r" (expect2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r17"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2, error3
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("1st expected value comparison failed");
+error3:
+	rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+				 void *dst, void *src, size_t len,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* setup for mempcy */
+		"mr %%r19, %[len]\n\t"
+		"mr %%r20, %[src]\n\t"
+		"mr %%r21, %[dst]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+		/* try memcpy */
+		RSEQ_ASM_OP_R_MEMCPY()
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r17", "r18", "r19", "r20", "r21"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+					 void *dst, void *src, size_t len,
+					 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* setup for mempcy */
+		"mr %%r19, %[len]\n\t"
+		"mr %%r20, %[src]\n\t"
+		"mr %%r21, %[dst]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+		/* try memcpy */
+		RSEQ_ASM_OP_R_MEMCPY()
+		RSEQ_INJECT_ASM(5)
+		/* for 'release' */
+		"lwsync\n\t"
+		/* final store */
+		RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r17", "r18", "r19", "r20", "r21"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+#undef STORE_WORD
+#undef LOAD_WORD
+#undef LOADX_WORD
+#undef CMP_WORD
+
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h
new file mode 100644
index 000000000000..1069e85258ce
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-s390.h
@@ -0,0 +1,513 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+
+#define RSEQ_SIG	0x53053053
+
+#define rseq_smp_mb()	__asm__ __volatile__ ("bcr 15,0" ::: "memory")
+#define rseq_smp_rmb()	rseq_smp_mb()
+#define rseq_smp_wmb()	rseq_smp_mb()
+
+#define rseq_smp_load_acquire(p)					\
+__extension__ ({							\
+	__typeof(*p) ____p1 = RSEQ_READ_ONCE(*p);			\
+	rseq_barrier();							\
+	____p1;								\
+})
+
+#define rseq_smp_acquire__after_ctrl_dep()	rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v)					\
+do {									\
+	rseq_barrier();							\
+	RSEQ_WRITE_ONCE(*p, v);						\
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#ifdef __s390x__
+
+#define LONG_L			"lg"
+#define LONG_S			"stg"
+#define LONG_LT_R		"ltgr"
+#define LONG_CMP		"cg"
+#define LONG_CMP_R		"cgr"
+#define LONG_ADDI		"aghi"
+#define LONG_ADD_R		"agr"
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags,			\
+				start_ip, post_commit_offset, abort_ip)	\
+		".pushsection __rseq_table, \"aw\"\n\t"			\
+		".balign 32\n\t"					\
+		__rseq_str(label) ":\n\t"				\
+		".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+		".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
+		".popsection\n\t"
+
+#elif __s390__
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags,			\
+				start_ip, post_commit_offset, abort_ip)	\
+		".pushsection __rseq_table, \"aw\"\n\t"			\
+		".balign 32\n\t"					\
+		__rseq_str(label) ":\n\t"				\
+		".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+		".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \
+		".popsection\n\t"
+
+#define LONG_L			"l"
+#define LONG_S			"st"
+#define LONG_LT_R		"ltr"
+#define LONG_CMP		"c"
+#define LONG_CMP_R		"cr"
+#define LONG_ADDI		"ahi"
+#define LONG_ADD_R		"ar"
+
+#endif
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+	__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip,		\
+				(post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs)		\
+		RSEQ_INJECT_ASM(1)					\
+		"larl %%r0, " __rseq_str(cs_label) "\n\t"		\
+		LONG_S " %%r0, %[" __rseq_str(rseq_cs) "]\n\t"		\
+		__rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label)		\
+		RSEQ_INJECT_ASM(2)					\
+		"c %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \
+		"jnz " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label)		\
+		".pushsection __rseq_failure, \"ax\"\n\t"		\
+		".long " __rseq_str(RSEQ_SIG) "\n\t"			\
+		__rseq_str(label) ":\n\t"				\
+		teardown						\
+		"j %l[" __rseq_str(abort_label) "]\n\t"			\
+		".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label)		\
+		".pushsection __rseq_failure, \"ax\"\n\t"		\
+		__rseq_str(label) ":\n\t"				\
+		teardown						\
+		"j %l[" __rseq_str(cmpfail_label) "]\n\t"		\
+		".popsection\n\t"
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_CMP " %[expect], %[v]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_CMP " %[expect], %[v]\n\t"
+		"jnz %l[error2]\n\t"
+#endif
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r0"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+/*
+ * Compare @v against @expectnot. When it does _not_ match, load @v
+ * into @load, and store the content of *@v + voffp into @v.
+ */
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+			       off_t voffp, intptr_t *load, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " %%r1, %[v]\n\t"
+		LONG_CMP_R " %%r1, %[expectnot]\n\t"
+		"je %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_L " %%r1, %[v]\n\t"
+		LONG_CMP_R " %%r1, %[expectnot]\n\t"
+		"je %l[error2]\n\t"
+#endif
+		LONG_S " %%r1, %[load]\n\t"
+		LONG_ADD_R " %%r1, %[voffp]\n\t"
+		LONG_L " %%r1, 0(%%r1)\n\t"
+		/* final store */
+		LONG_S " %%r1, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expectnot]		"r" (expectnot),
+		  [voffp]		"r" (voffp),
+		  [load]		"m" (*load)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r0", "r1"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+		LONG_L " %%r0, %[v]\n\t"
+		LONG_ADD_R " %%r0, %[count]\n\t"
+		/* final store */
+		LONG_S " %%r0, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(4)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [count]		"r" (count)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r0"
+		  RSEQ_INJECT_CLOBBER
+		: abort
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+				 intptr_t *v2, intptr_t newv2,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_CMP " %[expect], %[v]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_CMP " %[expect], %[v]\n\t"
+		"jnz %l[error2]\n\t"
+#endif
+		/* try store */
+		LONG_S " %[newv2], %[v2]\n\t"
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"r" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r0"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* s390 is TSO. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+					 intptr_t *v2, intptr_t newv2,
+					 intptr_t newv, int cpu)
+{
+	return rseq_cmpeqv_trystorev_storev(v, expect, v2, newv2, newv, cpu);
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+			      intptr_t *v2, intptr_t expect2,
+			      intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_CMP " %[expect], %[v]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+		LONG_CMP " %[expect2], %[v2]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_CMP " %[expect], %[v]\n\t"
+		"jnz %l[error2]\n\t"
+		LONG_CMP " %[expect2], %[v2]\n\t"
+		"jnz %l[error3]\n\t"
+#endif
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* cmp2 input */
+		  [v2]			"m" (*v2),
+		  [expect2]		"r" (expect2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r0"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2, error3
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("1st expected value comparison failed");
+error3:
+	rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+				 void *dst, void *src, size_t len,
+				 intptr_t newv, int cpu)
+{
+	uint64_t rseq_scratch[3];
+
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		LONG_S " %[src], %[rseq_scratch0]\n\t"
+		LONG_S " %[dst], %[rseq_scratch1]\n\t"
+		LONG_S " %[len], %[rseq_scratch2]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_CMP " %[expect], %[v]\n\t"
+		"jnz 5f\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+		LONG_CMP " %[expect], %[v]\n\t"
+		"jnz 7f\n\t"
+#endif
+		/* try memcpy */
+		LONG_LT_R " %[len], %[len]\n\t"
+		"jz 333f\n\t"
+		"222:\n\t"
+		"ic %%r0,0(%[src])\n\t"
+		"stc %%r0,0(%[dst])\n\t"
+		LONG_ADDI " %[src], 1\n\t"
+		LONG_ADDI " %[dst], 1\n\t"
+		LONG_ADDI " %[len], -1\n\t"
+		"jnz 222b\n\t"
+		"333:\n\t"
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		LONG_L " %[len], %[rseq_scratch2]\n\t"
+		LONG_L " %[dst], %[rseq_scratch1]\n\t"
+		LONG_L " %[src], %[rseq_scratch0]\n\t"
+		RSEQ_ASM_DEFINE_ABORT(4,
+			LONG_L " %[len], %[rseq_scratch2]\n\t"
+			LONG_L " %[dst], %[rseq_scratch1]\n\t"
+			LONG_L " %[src], %[rseq_scratch0]\n\t",
+			abort)
+		RSEQ_ASM_DEFINE_CMPFAIL(5,
+			LONG_L " %[len], %[rseq_scratch2]\n\t"
+			LONG_L " %[dst], %[rseq_scratch1]\n\t"
+			LONG_L " %[src], %[rseq_scratch0]\n\t",
+			cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_DEFINE_CMPFAIL(6,
+			LONG_L " %[len], %[rseq_scratch2]\n\t"
+			LONG_L " %[dst], %[rseq_scratch1]\n\t"
+			LONG_L " %[src], %[rseq_scratch0]\n\t",
+			error1)
+		RSEQ_ASM_DEFINE_CMPFAIL(7,
+			LONG_L " %[len], %[rseq_scratch2]\n\t"
+			LONG_L " %[dst], %[rseq_scratch1]\n\t"
+			LONG_L " %[src], %[rseq_scratch0]\n\t",
+			error2)
+#endif
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len),
+		  [rseq_scratch0]	"m" (rseq_scratch[0]),
+		  [rseq_scratch1]	"m" (rseq_scratch[1]),
+		  [rseq_scratch2]	"m" (rseq_scratch[2])
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r0"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* s390 is TSO. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+					 void *dst, void *src, size_t len,
+					 intptr_t newv, int cpu)
+{
+	return rseq_cmpeqv_trymemcpy_storev(v, expect, dst, src, len,
+					    newv, cpu);
+}
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-skip.h b/tools/testing/selftests/rseq/rseq-skip.h
new file mode 100644
index 000000000000..72750b5905a9
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-skip.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-skip.h
+ *
+ * (C) Copyright 2017-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+	return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+			       off_t voffp, intptr_t *load, int cpu)
+{
+	return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+	return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+				 intptr_t *v2, intptr_t newv2,
+				 intptr_t newv, int cpu)
+{
+	return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+					 intptr_t *v2, intptr_t newv2,
+					 intptr_t newv, int cpu)
+{
+	return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+			      intptr_t *v2, intptr_t expect2,
+			      intptr_t newv, int cpu)
+{
+	return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+				 void *dst, void *src, size_t len,
+				 intptr_t newv, int cpu)
+{
+	return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+					 void *dst, void *src, size_t len,
+					 intptr_t newv, int cpu)
+{
+	return -1;
+}
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
new file mode 100644
index 000000000000..089410a314e9
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -0,0 +1,1132 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-x86.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#include <stdint.h>
+
+#define RSEQ_SIG	0x53053053
+
+#ifdef __x86_64__
+
+#define rseq_smp_mb()	\
+	__asm__ __volatile__ ("lock; addl $0,-128(%%rsp)" ::: "memory", "cc")
+#define rseq_smp_rmb()	rseq_barrier()
+#define rseq_smp_wmb()	rseq_barrier()
+
+#define rseq_smp_load_acquire(p)					\
+__extension__ ({							\
+	__typeof(*p) ____p1 = RSEQ_READ_ONCE(*p);			\
+	rseq_barrier();							\
+	____p1;								\
+})
+
+#define rseq_smp_acquire__after_ctrl_dep()	rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v)					\
+do {									\
+	rseq_barrier();							\
+	RSEQ_WRITE_ONCE(*p, v);						\
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags,			\
+				start_ip, post_commit_offset, abort_ip)	\
+		".pushsection __rseq_table, \"aw\"\n\t"			\
+		".balign 32\n\t"					\
+		__rseq_str(label) ":\n\t"				\
+		".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+		".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
+		".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+	__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip,		\
+				(post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs)		\
+		RSEQ_INJECT_ASM(1)					\
+		"leaq " __rseq_str(cs_label) "(%%rip), %%rax\n\t"	\
+		"movq %%rax, %[" __rseq_str(rseq_cs) "]\n\t"		\
+		__rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label)		\
+		RSEQ_INJECT_ASM(2)					\
+		"cmpl %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \
+		"jnz " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label)		\
+		".pushsection __rseq_failure, \"ax\"\n\t"		\
+		/* Disassembler-friendly signature: nopl <sig>(%rip). */\
+		".byte 0x0f, 0x1f, 0x05\n\t"				\
+		".long " __rseq_str(RSEQ_SIG) "\n\t"			\
+		__rseq_str(label) ":\n\t"				\
+		teardown						\
+		"jmp %l[" __rseq_str(abort_label) "]\n\t"		\
+		".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label)		\
+		".pushsection __rseq_failure, \"ax\"\n\t"		\
+		__rseq_str(label) ":\n\t"				\
+		teardown						\
+		"jmp %l[" __rseq_str(cmpfail_label) "]\n\t"		\
+		".popsection\n\t"
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"cmpq %[v], %[expect]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"cmpq %[v], %[expect]\n\t"
+		"jnz %l[error2]\n\t"
+#endif
+		/* final store */
+		"movq %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		: "memory", "cc", "rax"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+/*
+ * Compare @v against @expectnot. When it does _not_ match, load @v
+ * into @load, and store the content of *@v + voffp into @v.
+ */
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+			       off_t voffp, intptr_t *load, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"movq %[v], %%rbx\n\t"
+		"cmpq %%rbx, %[expectnot]\n\t"
+		"je %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"movq %[v], %%rbx\n\t"
+		"cmpq %%rbx, %[expectnot]\n\t"
+		"je %l[error2]\n\t"
+#endif
+		"movq %%rbx, %[load]\n\t"
+		"addq %[voffp], %%rbx\n\t"
+		"movq (%%rbx), %%rbx\n\t"
+		/* final store */
+		"movq %%rbx, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expectnot]		"r" (expectnot),
+		  [voffp]		"er" (voffp),
+		  [load]		"m" (*load)
+		: "memory", "cc", "rax", "rbx"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+		/* final store */
+		"addq %[count], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(4)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [count]		"er" (count)
+		: "memory", "cc", "rax"
+		  RSEQ_INJECT_CLOBBER
+		: abort
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+				 intptr_t *v2, intptr_t newv2,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"cmpq %[v], %[expect]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"cmpq %[v], %[expect]\n\t"
+		"jnz %l[error2]\n\t"
+#endif
+		/* try store */
+		"movq %[newv2], %[v2]\n\t"
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		"movq %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"r" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		: "memory", "cc", "rax"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* x86-64 is TSO. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+					 intptr_t *v2, intptr_t newv2,
+					 intptr_t newv, int cpu)
+{
+	return rseq_cmpeqv_trystorev_storev(v, expect, v2, newv2, newv, cpu);
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+			      intptr_t *v2, intptr_t expect2,
+			      intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"cmpq %[v], %[expect]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+		"cmpq %[v2], %[expect2]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"cmpq %[v], %[expect]\n\t"
+		"jnz %l[error2]\n\t"
+		"cmpq %[v2], %[expect2]\n\t"
+		"jnz %l[error3]\n\t"
+#endif
+		/* final store */
+		"movq %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* cmp2 input */
+		  [v2]			"m" (*v2),
+		  [expect2]		"r" (expect2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		: "memory", "cc", "rax"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2, error3
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("1st expected value comparison failed");
+error3:
+	rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+				 void *dst, void *src, size_t len,
+				 intptr_t newv, int cpu)
+{
+	uint64_t rseq_scratch[3];
+
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		"movq %[src], %[rseq_scratch0]\n\t"
+		"movq %[dst], %[rseq_scratch1]\n\t"
+		"movq %[len], %[rseq_scratch2]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"cmpq %[v], %[expect]\n\t"
+		"jnz 5f\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+		"cmpq %[v], %[expect]\n\t"
+		"jnz 7f\n\t"
+#endif
+		/* try memcpy */
+		"test %[len], %[len]\n\t" \
+		"jz 333f\n\t" \
+		"222:\n\t" \
+		"movb (%[src]), %%al\n\t" \
+		"movb %%al, (%[dst])\n\t" \
+		"inc %[src]\n\t" \
+		"inc %[dst]\n\t" \
+		"dec %[len]\n\t" \
+		"jnz 222b\n\t" \
+		"333:\n\t" \
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		"movq %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		"movq %[rseq_scratch2], %[len]\n\t"
+		"movq %[rseq_scratch1], %[dst]\n\t"
+		"movq %[rseq_scratch0], %[src]\n\t"
+		RSEQ_ASM_DEFINE_ABORT(4,
+			"movq %[rseq_scratch2], %[len]\n\t"
+			"movq %[rseq_scratch1], %[dst]\n\t"
+			"movq %[rseq_scratch0], %[src]\n\t",
+			abort)
+		RSEQ_ASM_DEFINE_CMPFAIL(5,
+			"movq %[rseq_scratch2], %[len]\n\t"
+			"movq %[rseq_scratch1], %[dst]\n\t"
+			"movq %[rseq_scratch0], %[src]\n\t",
+			cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_DEFINE_CMPFAIL(6,
+			"movq %[rseq_scratch2], %[len]\n\t"
+			"movq %[rseq_scratch1], %[dst]\n\t"
+			"movq %[rseq_scratch0], %[src]\n\t",
+			error1)
+		RSEQ_ASM_DEFINE_CMPFAIL(7,
+			"movq %[rseq_scratch2], %[len]\n\t"
+			"movq %[rseq_scratch1], %[dst]\n\t"
+			"movq %[rseq_scratch0], %[src]\n\t",
+			error2)
+#endif
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len),
+		  [rseq_scratch0]	"m" (rseq_scratch[0]),
+		  [rseq_scratch1]	"m" (rseq_scratch[1]),
+		  [rseq_scratch2]	"m" (rseq_scratch[2])
+		: "memory", "cc", "rax"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* x86-64 is TSO. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+					 void *dst, void *src, size_t len,
+					 intptr_t newv, int cpu)
+{
+	return rseq_cmpeqv_trymemcpy_storev(v, expect, dst, src, len,
+					    newv, cpu);
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
+
+#elif __i386__
+
+#define rseq_smp_mb()	\
+	__asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc")
+#define rseq_smp_rmb()	\
+	__asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc")
+#define rseq_smp_wmb()	\
+	__asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc")
+
+#define rseq_smp_load_acquire(p)					\
+__extension__ ({							\
+	__typeof(*p) ____p1 = RSEQ_READ_ONCE(*p);			\
+	rseq_smp_mb();							\
+	____p1;								\
+})
+
+#define rseq_smp_acquire__after_ctrl_dep()	rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v)					\
+do {									\
+	rseq_smp_mb();							\
+	RSEQ_WRITE_ONCE(*p, v);						\
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+/*
+ * Use eax as scratch register and take memory operands as input to
+ * lessen register pressure. Especially needed when compiling in O0.
+ */
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags,			\
+				start_ip, post_commit_offset, abort_ip)	\
+		".pushsection __rseq_table, \"aw\"\n\t"			\
+		".balign 32\n\t"					\
+		__rseq_str(label) ":\n\t"				\
+		".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+		".long " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \
+		".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+	__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip,		\
+				(post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs)		\
+		RSEQ_INJECT_ASM(1)					\
+		"movl $" __rseq_str(cs_label) ", %[rseq_cs]\n\t"	\
+		__rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label)		\
+		RSEQ_INJECT_ASM(2)					\
+		"cmpl %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \
+		"jnz " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label)		\
+		".pushsection __rseq_failure, \"ax\"\n\t"		\
+		/* Disassembler-friendly signature: nopl <sig>. */	\
+		".byte 0x0f, 0x1f, 0x05\n\t"				\
+		".long " __rseq_str(RSEQ_SIG) "\n\t"			\
+		__rseq_str(label) ":\n\t"				\
+		teardown						\
+		"jmp %l[" __rseq_str(abort_label) "]\n\t"		\
+		".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label)		\
+		".pushsection __rseq_failure, \"ax\"\n\t"		\
+		__rseq_str(label) ":\n\t"				\
+		teardown						\
+		"jmp %l[" __rseq_str(cmpfail_label) "]\n\t"		\
+		".popsection\n\t"
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"cmpl %[v], %[expect]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"cmpl %[v], %[expect]\n\t"
+		"jnz %l[error2]\n\t"
+#endif
+		/* final store */
+		"movl %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		: "memory", "cc", "eax"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+/*
+ * Compare @v against @expectnot. When it does _not_ match, load @v
+ * into @load, and store the content of *@v + voffp into @v.
+ */
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+			       off_t voffp, intptr_t *load, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"movl %[v], %%ebx\n\t"
+		"cmpl %%ebx, %[expectnot]\n\t"
+		"je %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"movl %[v], %%ebx\n\t"
+		"cmpl %%ebx, %[expectnot]\n\t"
+		"je %l[error2]\n\t"
+#endif
+		"movl %%ebx, %[load]\n\t"
+		"addl %[voffp], %%ebx\n\t"
+		"movl (%%ebx), %%ebx\n\t"
+		/* final store */
+		"movl %%ebx, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expectnot]		"r" (expectnot),
+		  [voffp]		"ir" (voffp),
+		  [load]		"m" (*load)
+		: "memory", "cc", "eax", "ebx"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+		/* final store */
+		"addl %[count], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(4)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [count]		"ir" (count)
+		: "memory", "cc", "eax"
+		  RSEQ_INJECT_CLOBBER
+		: abort
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+				 intptr_t *v2, intptr_t newv2,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"cmpl %[v], %[expect]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"cmpl %[v], %[expect]\n\t"
+		"jnz %l[error2]\n\t"
+#endif
+		/* try store */
+		"movl %[newv2], %%eax\n\t"
+		"movl %%eax, %[v2]\n\t"
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		"movl %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"m" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		: "memory", "cc", "eax"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+					 intptr_t *v2, intptr_t newv2,
+					 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"movl %[expect], %%eax\n\t"
+		"cmpl %[v], %%eax\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"movl %[expect], %%eax\n\t"
+		"cmpl %[v], %%eax\n\t"
+		"jnz %l[error2]\n\t"
+#endif
+		/* try store */
+		"movl %[newv2], %[v2]\n\t"
+		RSEQ_INJECT_ASM(5)
+		"lock; addl $0,-128(%%esp)\n\t"
+		/* final store */
+		"movl %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"r" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"m" (expect),
+		  [newv]		"r" (newv)
+		: "memory", "cc", "eax"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+			      intptr_t *v2, intptr_t expect2,
+			      intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"cmpl %[v], %[expect]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+		"cmpl %[expect2], %[v2]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"cmpl %[v], %[expect]\n\t"
+		"jnz %l[error2]\n\t"
+		"cmpl %[expect2], %[v2]\n\t"
+		"jnz %l[error3]\n\t"
+#endif
+		"movl %[newv], %%eax\n\t"
+		/* final store */
+		"movl %%eax, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* cmp2 input */
+		  [v2]			"m" (*v2),
+		  [expect2]		"r" (expect2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"m" (newv)
+		: "memory", "cc", "eax"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2, error3
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("1st expected value comparison failed");
+error3:
+	rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+/* TODO: implement a faster memcpy. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+				 void *dst, void *src, size_t len,
+				 intptr_t newv, int cpu)
+{
+	uint32_t rseq_scratch[3];
+
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		"movl %[src], %[rseq_scratch0]\n\t"
+		"movl %[dst], %[rseq_scratch1]\n\t"
+		"movl %[len], %[rseq_scratch2]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"movl %[expect], %%eax\n\t"
+		"cmpl %%eax, %[v]\n\t"
+		"jnz 5f\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+		"movl %[expect], %%eax\n\t"
+		"cmpl %%eax, %[v]\n\t"
+		"jnz 7f\n\t"
+#endif
+		/* try memcpy */
+		"test %[len], %[len]\n\t" \
+		"jz 333f\n\t" \
+		"222:\n\t" \
+		"movb (%[src]), %%al\n\t" \
+		"movb %%al, (%[dst])\n\t" \
+		"inc %[src]\n\t" \
+		"inc %[dst]\n\t" \
+		"dec %[len]\n\t" \
+		"jnz 222b\n\t" \
+		"333:\n\t" \
+		RSEQ_INJECT_ASM(5)
+		"movl %[newv], %%eax\n\t"
+		/* final store */
+		"movl %%eax, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		"movl %[rseq_scratch2], %[len]\n\t"
+		"movl %[rseq_scratch1], %[dst]\n\t"
+		"movl %[rseq_scratch0], %[src]\n\t"
+		RSEQ_ASM_DEFINE_ABORT(4,
+			"movl %[rseq_scratch2], %[len]\n\t"
+			"movl %[rseq_scratch1], %[dst]\n\t"
+			"movl %[rseq_scratch0], %[src]\n\t",
+			abort)
+		RSEQ_ASM_DEFINE_CMPFAIL(5,
+			"movl %[rseq_scratch2], %[len]\n\t"
+			"movl %[rseq_scratch1], %[dst]\n\t"
+			"movl %[rseq_scratch0], %[src]\n\t",
+			cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_DEFINE_CMPFAIL(6,
+			"movl %[rseq_scratch2], %[len]\n\t"
+			"movl %[rseq_scratch1], %[dst]\n\t"
+			"movl %[rseq_scratch0], %[src]\n\t",
+			error1)
+		RSEQ_ASM_DEFINE_CMPFAIL(7,
+			"movl %[rseq_scratch2], %[len]\n\t"
+			"movl %[rseq_scratch1], %[dst]\n\t"
+			"movl %[rseq_scratch0], %[src]\n\t",
+			error2)
+#endif
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"m" (expect),
+		  [newv]		"m" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len),
+		  [rseq_scratch0]	"m" (rseq_scratch[0]),
+		  [rseq_scratch1]	"m" (rseq_scratch[1]),
+		  [rseq_scratch2]	"m" (rseq_scratch[2])
+		: "memory", "cc", "eax"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* TODO: implement a faster memcpy. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+					 void *dst, void *src, size_t len,
+					 intptr_t newv, int cpu)
+{
+	uint32_t rseq_scratch[3];
+
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		"movl %[src], %[rseq_scratch0]\n\t"
+		"movl %[dst], %[rseq_scratch1]\n\t"
+		"movl %[len], %[rseq_scratch2]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"movl %[expect], %%eax\n\t"
+		"cmpl %%eax, %[v]\n\t"
+		"jnz 5f\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+		"movl %[expect], %%eax\n\t"
+		"cmpl %%eax, %[v]\n\t"
+		"jnz 7f\n\t"
+#endif
+		/* try memcpy */
+		"test %[len], %[len]\n\t" \
+		"jz 333f\n\t" \
+		"222:\n\t" \
+		"movb (%[src]), %%al\n\t" \
+		"movb %%al, (%[dst])\n\t" \
+		"inc %[src]\n\t" \
+		"inc %[dst]\n\t" \
+		"dec %[len]\n\t" \
+		"jnz 222b\n\t" \
+		"333:\n\t" \
+		RSEQ_INJECT_ASM(5)
+		"lock; addl $0,-128(%%esp)\n\t"
+		"movl %[newv], %%eax\n\t"
+		/* final store */
+		"movl %%eax, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		"movl %[rseq_scratch2], %[len]\n\t"
+		"movl %[rseq_scratch1], %[dst]\n\t"
+		"movl %[rseq_scratch0], %[src]\n\t"
+		RSEQ_ASM_DEFINE_ABORT(4,
+			"movl %[rseq_scratch2], %[len]\n\t"
+			"movl %[rseq_scratch1], %[dst]\n\t"
+			"movl %[rseq_scratch0], %[src]\n\t",
+			abort)
+		RSEQ_ASM_DEFINE_CMPFAIL(5,
+			"movl %[rseq_scratch2], %[len]\n\t"
+			"movl %[rseq_scratch1], %[dst]\n\t"
+			"movl %[rseq_scratch0], %[src]\n\t",
+			cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_DEFINE_CMPFAIL(6,
+			"movl %[rseq_scratch2], %[len]\n\t"
+			"movl %[rseq_scratch1], %[dst]\n\t"
+			"movl %[rseq_scratch0], %[src]\n\t",
+			error1)
+		RSEQ_ASM_DEFINE_CMPFAIL(7,
+			"movl %[rseq_scratch2], %[len]\n\t"
+			"movl %[rseq_scratch1], %[dst]\n\t"
+			"movl %[rseq_scratch0], %[src]\n\t",
+			error2)
+#endif
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"m" (expect),
+		  [newv]		"m" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len),
+		  [rseq_scratch0]	"m" (rseq_scratch[0]),
+		  [rseq_scratch1]	"m" (rseq_scratch[1]),
+		  [rseq_scratch2]	"m" (rseq_scratch[2])
+		: "memory", "cc", "eax"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
+
+#endif
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
new file mode 100644
index 000000000000..4847e97ed049
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * rseq.c
+ *
+ * Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; only
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <assert.h>
+#include <signal.h>
+
+#include "rseq.h"
+
+#define ARRAY_SIZE(arr)	(sizeof(arr) / sizeof((arr)[0]))
+
+__attribute__((tls_model("initial-exec"))) __thread
+volatile struct rseq __rseq_abi = {
+	.cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
+};
+
+static __attribute__((tls_model("initial-exec"))) __thread
+volatile int refcount;
+
+static void signal_off_save(sigset_t *oldset)
+{
+	sigset_t set;
+	int ret;
+
+	sigfillset(&set);
+	ret = pthread_sigmask(SIG_BLOCK, &set, oldset);
+	if (ret)
+		abort();
+}
+
+static void signal_restore(sigset_t oldset)
+{
+	int ret;
+
+	ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+	if (ret)
+		abort();
+}
+
+static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len,
+		    int flags, uint32_t sig)
+{
+	return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
+}
+
+int rseq_register_current_thread(void)
+{
+	int rc, ret = 0;
+	sigset_t oldset;
+
+	signal_off_save(&oldset);
+	if (refcount++)
+		goto end;
+	rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
+	if (!rc) {
+		assert(rseq_current_cpu_raw() >= 0);
+		goto end;
+	}
+	if (errno != EBUSY)
+		__rseq_abi.cpu_id = -2;
+	ret = -1;
+	refcount--;
+end:
+	signal_restore(oldset);
+	return ret;
+}
+
+int rseq_unregister_current_thread(void)
+{
+	int rc, ret = 0;
+	sigset_t oldset;
+
+	signal_off_save(&oldset);
+	if (--refcount)
+		goto end;
+	rc = sys_rseq(&__rseq_abi, sizeof(struct rseq),
+		      RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
+	if (!rc)
+		goto end;
+	ret = -1;
+end:
+	signal_restore(oldset);
+	return ret;
+}
+
+int32_t rseq_fallback_current_cpu(void)
+{
+	int32_t cpu;
+
+	cpu = sched_getcpu();
+	if (cpu < 0) {
+		perror("sched_getcpu()");
+		abort();
+	}
+	return cpu;
+}
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
new file mode 100644
index 000000000000..c72eb70f9b52
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifndef RSEQ_H
+#define RSEQ_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sched.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sched.h>
+#include <linux/rseq.h>
+
+/*
+ * Empty code injection macros, override when testing.
+ * It is important to consider that the ASM injection macros need to be
+ * fully reentrant (e.g. do not modify the stack).
+ */
+#ifndef RSEQ_INJECT_ASM
+#define RSEQ_INJECT_ASM(n)
+#endif
+
+#ifndef RSEQ_INJECT_C
+#define RSEQ_INJECT_C(n)
+#endif
+
+#ifndef RSEQ_INJECT_INPUT
+#define RSEQ_INJECT_INPUT
+#endif
+
+#ifndef RSEQ_INJECT_CLOBBER
+#define RSEQ_INJECT_CLOBBER
+#endif
+
+#ifndef RSEQ_INJECT_FAILED
+#define RSEQ_INJECT_FAILED
+#endif
+
+extern __thread volatile struct rseq __rseq_abi;
+
+#define rseq_likely(x)		__builtin_expect(!!(x), 1)
+#define rseq_unlikely(x)	__builtin_expect(!!(x), 0)
+#define rseq_barrier()		__asm__ __volatile__("" : : : "memory")
+
+#define RSEQ_ACCESS_ONCE(x)	(*(__volatile__  __typeof__(x) *)&(x))
+#define RSEQ_WRITE_ONCE(x, v)	__extension__ ({ RSEQ_ACCESS_ONCE(x) = (v); })
+#define RSEQ_READ_ONCE(x)	RSEQ_ACCESS_ONCE(x)
+
+#define __rseq_str_1(x)	#x
+#define __rseq_str(x)		__rseq_str_1(x)
+
+#define rseq_log(fmt, args...)						       \
+	fprintf(stderr, fmt "(in %s() at " __FILE__ ":" __rseq_str(__LINE__)"\n", \
+		## args, __func__)
+
+#define rseq_bug(fmt, args...)		\
+	do {				\
+		rseq_log(fmt, ##args);	\
+		abort();		\
+	} while (0)
+
+#if defined(__x86_64__) || defined(__i386__)
+#include <rseq-x86.h>
+#elif defined(__ARMEL__)
+#include <rseq-arm.h>
+#elif defined (__AARCH64EL__)
+#include <rseq-arm64.h>
+#elif defined(__PPC__)
+#include <rseq-ppc.h>
+#elif defined(__mips__)
+#include <rseq-mips.h>
+#elif defined(__s390__)
+#include <rseq-s390.h>
+#else
+#error unsupported target
+#endif
+
+/*
+ * Register rseq for the current thread. This needs to be called once
+ * by any thread which uses restartable sequences, before they start
+ * using restartable sequences, to ensure restartable sequences
+ * succeed. A restartable sequence executed from a non-registered
+ * thread will always fail.
+ */
+int rseq_register_current_thread(void);
+
+/*
+ * Unregister rseq for current thread.
+ */
+int rseq_unregister_current_thread(void);
+
+/*
+ * Restartable sequence fallback for reading the current CPU number.
+ */
+int32_t rseq_fallback_current_cpu(void);
+
+/*
+ * Values returned can be either the current CPU number, -1 (rseq is
+ * uninitialized), or -2 (rseq initialization has failed).
+ */
+static inline int32_t rseq_current_cpu_raw(void)
+{
+	return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id);
+}
+
+/*
+ * Returns a possible CPU number, which is typically the current CPU.
+ * The returned CPU number can be used to prepare for an rseq critical
+ * section, which will confirm whether the cpu number is indeed the
+ * current one, and whether rseq is initialized.
+ *
+ * The CPU number returned by rseq_cpu_start should always be validated
+ * by passing it to a rseq asm sequence, or by comparing it to the
+ * return value of rseq_current_cpu_raw() if the rseq asm sequence
+ * does not need to be invoked.
+ */
+static inline uint32_t rseq_cpu_start(void)
+{
+	return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start);
+}
+
+static inline uint32_t rseq_current_cpu(void)
+{
+	int32_t cpu;
+
+	cpu = rseq_current_cpu_raw();
+	if (rseq_unlikely(cpu < 0))
+		cpu = rseq_fallback_current_cpu();
+	return cpu;
+}
+
+static inline void rseq_clear_rseq_cs(void)
+{
+#ifdef __LP64__
+	__rseq_abi.rseq_cs.ptr = 0;
+#else
+	__rseq_abi.rseq_cs.ptr.ptr32 = 0;
+#endif
+}
+
+/*
+ * rseq_prepare_unload() should be invoked by each thread executing a rseq
+ * critical section at least once between their last critical section and
+ * library unload of the library defining the rseq critical section
+ * (struct rseq_cs). This also applies to use of rseq in code generated by
+ * JIT: rseq_prepare_unload() should be invoked at least once by each
+ * thread executing a rseq critical section before reclaim of the memory
+ * holding the struct rseq_cs.
+ */
+static inline void rseq_prepare_unload(void)
+{
+	rseq_clear_rseq_cs();
+}
+
+#endif  /* RSEQ_H_ */
diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh
new file mode 100755
index 000000000000..3acd6d75ff9f
--- /dev/null
+++ b/tools/testing/selftests/rseq/run_param_test.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+ or MIT
+
+EXTRA_ARGS=${@}
+
+OLDIFS="$IFS"
+IFS=$'\n'
+TEST_LIST=(
+	"-T s"
+	"-T l"
+	"-T b"
+	"-T b -M"
+	"-T m"
+	"-T m -M"
+	"-T i"
+)
+
+TEST_NAME=(
+	"spinlock"
+	"list"
+	"buffer"
+	"buffer with barrier"
+	"memcpy"
+	"memcpy with barrier"
+	"increment"
+)
+IFS="$OLDIFS"
+
+REPS=1000
+SLOW_REPS=100
+
+function do_tests()
+{
+	local i=0
+	while [ "$i" -lt "${#TEST_LIST[@]}" ]; do
+		echo "Running test ${TEST_NAME[$i]}"
+		./param_test ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1
+		echo "Running compare-twice test ${TEST_NAME[$i]}"
+		./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1
+		let "i++"
+	done
+}
+
+echo "Default parameters"
+do_tests
+
+echo "Loop injection: 10000 loops"
+
+OLDIFS="$IFS"
+IFS=$'\n'
+INJECT_LIST=(
+	"1"
+	"2"
+	"3"
+	"4"
+	"5"
+	"6"
+	"7"
+	"8"
+	"9"
+)
+IFS="$OLDIFS"
+
+NR_LOOPS=10000
+
+i=0
+while [ "$i" -lt "${#INJECT_LIST[@]}" ]; do
+	echo "Injecting at <${INJECT_LIST[$i]}>"
+	do_tests -${INJECT_LIST[i]} ${NR_LOOPS}
+	let "i++"
+done
+NR_LOOPS=
+
+function inject_blocking()
+{
+	OLDIFS="$IFS"
+	IFS=$'\n'
+	INJECT_LIST=(
+		"7"
+		"8"
+		"9"
+	)
+	IFS="$OLDIFS"
+
+	NR_LOOPS=-1
+
+	i=0
+	while [ "$i" -lt "${#INJECT_LIST[@]}" ]; do
+		echo "Injecting at <${INJECT_LIST[$i]}>"
+		do_tests -${INJECT_LIST[i]} -1 ${@}
+		let "i++"
+	done
+	NR_LOOPS=
+}
+
+echo "Yield injection (25%)"
+inject_blocking -m 4 -y
+
+echo "Yield injection (50%)"
+inject_blocking -m 2 -y
+
+echo "Yield injection (100%)"
+inject_blocking -m 1 -y
+
+echo "Kill injection (25%)"
+inject_blocking -m 4 -k
+
+echo "Kill injection (50%)"
+inject_blocking -m 2 -k
+
+echo "Kill injection (100%)"
+inject_blocking -m 1 -k
+
+echo "Sleep injection (1ms, 25%)"
+inject_blocking -m 4 -s 1
+
+echo "Sleep injection (1ms, 50%)"
+inject_blocking -m 2 -s 1
+
+echo "Sleep injection (1ms, 100%)"
+inject_blocking -m 1 -s 1
diff --git a/tools/testing/selftests/rtc/.gitignore b/tools/testing/selftests/rtc/.gitignore
new file mode 100644
index 000000000000..d0ad44f6294a
--- /dev/null
+++ b/tools/testing/selftests/rtc/.gitignore
@@ -0,0 +1,2 @@
+rtctest
+setdate
diff --git a/tools/testing/selftests/rtc/Makefile b/tools/testing/selftests/rtc/Makefile
new file mode 100644
index 000000000000..de9c8566672a
--- /dev/null
+++ b/tools/testing/selftests/rtc/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -O3 -Wl,-no-as-needed -Wall
+LDFLAGS += -lrt -lpthread -lm
+
+TEST_GEN_PROGS = rtctest
+
+TEST_GEN_PROGS_EXTENDED = setdate
+
+include ../lib.mk
diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c
new file mode 100644
index 000000000000..e20b017e7073
--- /dev/null
+++ b/tools/testing/selftests/rtc/rtctest.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Real Time Clock Driver Test Program
+ *
+ * Copyright (c) 2018 Alexandre Belloni <alexandre.belloni@bootlin.com>
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/rtc.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+
+#define NUM_UIE 3
+#define ALARM_DELTA 3
+
+static char *rtc_file = "/dev/rtc0";
+
+FIXTURE(rtc) {
+	int fd;
+};
+
+FIXTURE_SETUP(rtc) {
+	self->fd = open(rtc_file, O_RDONLY);
+	ASSERT_NE(-1, self->fd);
+}
+
+FIXTURE_TEARDOWN(rtc) {
+	close(self->fd);
+}
+
+TEST_F(rtc, date_read) {
+	int rc;
+	struct rtc_time rtc_tm;
+
+	/* Read the RTC time/date */
+	rc = ioctl(self->fd, RTC_RD_TIME, &rtc_tm);
+	ASSERT_NE(-1, rc);
+
+	TH_LOG("Current RTC date/time is %02d/%02d/%02d %02d:%02d:%02d.",
+	       rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900,
+	       rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
+}
+
+TEST_F(rtc, uie_read) {
+	int i, rc, irq = 0;
+	unsigned long data;
+
+	/* Turn on update interrupts */
+	rc = ioctl(self->fd, RTC_UIE_ON, 0);
+	if (rc == -1) {
+		ASSERT_EQ(EINVAL, errno);
+		TH_LOG("skip update IRQs not supported.");
+		return;
+	}
+
+	for (i = 0; i < NUM_UIE; i++) {
+		/* This read will block */
+		rc = read(self->fd, &data, sizeof(data));
+		ASSERT_NE(-1, rc);
+		irq++;
+	}
+
+	EXPECT_EQ(NUM_UIE, irq);
+
+	rc = ioctl(self->fd, RTC_UIE_OFF, 0);
+	ASSERT_NE(-1, rc);
+}
+
+TEST_F(rtc, uie_select) {
+	int i, rc, irq = 0;
+	unsigned long data;
+
+	/* Turn on update interrupts */
+	rc = ioctl(self->fd, RTC_UIE_ON, 0);
+	if (rc == -1) {
+		ASSERT_EQ(EINVAL, errno);
+		TH_LOG("skip update IRQs not supported.");
+		return;
+	}
+
+	for (i = 0; i < NUM_UIE; i++) {
+		struct timeval tv = { .tv_sec = 2 };
+		fd_set readfds;
+
+		FD_ZERO(&readfds);
+		FD_SET(self->fd, &readfds);
+		/* The select will wait until an RTC interrupt happens. */
+		rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+		ASSERT_NE(-1, rc);
+		ASSERT_NE(0, rc);
+
+		/* This read won't block */
+		rc = read(self->fd, &data, sizeof(unsigned long));
+		ASSERT_NE(-1, rc);
+		irq++;
+	}
+
+	EXPECT_EQ(NUM_UIE, irq);
+
+	rc = ioctl(self->fd, RTC_UIE_OFF, 0);
+	ASSERT_NE(-1, rc);
+}
+
+TEST_F(rtc, alarm_alm_set) {
+	struct timeval tv = { .tv_sec = ALARM_DELTA + 2 };
+	unsigned long data;
+	struct rtc_time tm;
+	fd_set readfds;
+	time_t secs, new;
+	int rc;
+
+	rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+	ASSERT_NE(-1, rc);
+
+	secs = timegm((struct tm *)&tm) + ALARM_DELTA;
+	gmtime_r(&secs, (struct tm *)&tm);
+
+	rc = ioctl(self->fd, RTC_ALM_SET, &tm);
+	if (rc == -1) {
+		ASSERT_EQ(EINVAL, errno);
+		TH_LOG("skip alarms are not supported.");
+		return;
+	}
+
+	rc = ioctl(self->fd, RTC_ALM_READ, &tm);
+	ASSERT_NE(-1, rc);
+
+	TH_LOG("Alarm time now set to %02d:%02d:%02d.",
+	       tm.tm_hour, tm.tm_min, tm.tm_sec);
+
+	/* Enable alarm interrupts */
+	rc = ioctl(self->fd, RTC_AIE_ON, 0);
+	ASSERT_NE(-1, rc);
+
+	FD_ZERO(&readfds);
+	FD_SET(self->fd, &readfds);
+
+	rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+	ASSERT_NE(-1, rc);
+	EXPECT_NE(0, rc);
+
+	/* Disable alarm interrupts */
+	rc = ioctl(self->fd, RTC_AIE_OFF, 0);
+	ASSERT_NE(-1, rc);
+
+	if (rc == 0)
+		return;
+
+	rc = read(self->fd, &data, sizeof(unsigned long));
+	ASSERT_NE(-1, rc);
+	TH_LOG("data: %lx", data);
+
+	rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+	ASSERT_NE(-1, rc);
+
+	new = timegm((struct tm *)&tm);
+	ASSERT_EQ(new, secs);
+}
+
+TEST_F(rtc, alarm_wkalm_set) {
+	struct timeval tv = { .tv_sec = ALARM_DELTA + 2 };
+	struct rtc_wkalrm alarm = { 0 };
+	struct rtc_time tm;
+	unsigned long data;
+	fd_set readfds;
+	time_t secs, new;
+	int rc;
+
+	rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time);
+	ASSERT_NE(-1, rc);
+
+	secs = timegm((struct tm *)&alarm.time) + ALARM_DELTA;
+	gmtime_r(&secs, (struct tm *)&alarm.time);
+
+	alarm.enabled = 1;
+
+	rc = ioctl(self->fd, RTC_WKALM_SET, &alarm);
+	if (rc == -1) {
+		ASSERT_EQ(EINVAL, errno);
+		TH_LOG("skip alarms are not supported.");
+		return;
+	}
+
+	rc = ioctl(self->fd, RTC_WKALM_RD, &alarm);
+	ASSERT_NE(-1, rc);
+
+	TH_LOG("Alarm time now set to %02d/%02d/%02d %02d:%02d:%02d.",
+	       alarm.time.tm_mday, alarm.time.tm_mon + 1,
+	       alarm.time.tm_year + 1900, alarm.time.tm_hour,
+	       alarm.time.tm_min, alarm.time.tm_sec);
+
+	FD_ZERO(&readfds);
+	FD_SET(self->fd, &readfds);
+
+	rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+	ASSERT_NE(-1, rc);
+	EXPECT_NE(0, rc);
+
+	rc = read(self->fd, &data, sizeof(unsigned long));
+	ASSERT_NE(-1, rc);
+
+	rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+	ASSERT_NE(-1, rc);
+
+	new = timegm((struct tm *)&tm);
+	ASSERT_EQ(new, secs);
+}
+
+static void __attribute__((constructor))
+__constructor_order_last(void)
+{
+	if (!__constructor_order)
+		__constructor_order = _CONSTRUCTOR_ORDER_BACKWARD;
+}
+
+int main(int argc, char **argv)
+{
+	switch (argc) {
+	case 2:
+		rtc_file = argv[1];
+		/* FALLTHROUGH */
+	case 1:
+		break;
+	default:
+		fprintf(stderr, "usage: %s [rtcdev]\n", argv[0]);
+		return 1;
+	}
+
+	return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/timers/rtctest_setdate.c b/tools/testing/selftests/rtc/setdate.c
index 2cb78489eca4..2cb78489eca4 100644
--- a/tools/testing/selftests/timers/rtctest_setdate.c
+++ b/tools/testing/selftests/rtc/setdate.c
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index e1473234968d..c9a2abf8be1b 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -2731,9 +2731,14 @@ TEST(syscall_restart)
 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
 	ASSERT_EQ(true, WIFSTOPPED(status));
 	ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
-	/* Verify signal delivery came from parent now. */
 	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
-	EXPECT_EQ(getpid(), info.si_pid);
+	/*
+	 * There is no siginfo on SIGSTOP any more, so we can't verify
+	 * signal delivery came from parent now (getpid() == info.si_pid).
+	 * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com
+	 * At least verify the SIGSTOP via PTRACE_GETSIGINFO.
+	 */
+	EXPECT_EQ(SIGSTOP, info.si_signo);
 
 	/* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
 	ASSERT_EQ(0, kill(child_pid, SIGCONT));
diff --git a/tools/testing/selftests/sparc64/Makefile b/tools/testing/selftests/sparc64/Makefile
new file mode 100644
index 000000000000..a19531dba4dc
--- /dev/null
+++ b/tools/testing/selftests/sparc64/Makefile
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/x86_64/x86/)
+
+ifneq ($(ARCH),sparc64)
+nothing:
+.PHONY: all clean run_tests install
+.SILENT:
+else
+
+SUBDIRS := drivers
+
+TEST_PROGS := run.sh
+
+
+.PHONY: all clean
+
+include ../lib.mk
+
+all:
+	@for DIR in $(SUBDIRS); do		\
+		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
+		mkdir $$BUILD_TARGET  -p;	\
+		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+		#SUBDIR test prog name should be in the form: SUBDIR_test.sh \
+		TEST=$$DIR"_test.sh"; \
+		if [ -e $$DIR/$$TEST ]; then \
+			rsync -a $$DIR/$$TEST $$BUILD_TARGET/; \
+		fi \
+	done
+
+override define INSTALL_RULE
+	mkdir -p $(INSTALL_PATH)
+	install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
+
+	@for SUBDIR in $(SUBDIRS); do \
+		BUILD_TARGET=$(OUTPUT)/$$SUBDIR;	\
+		mkdir $$BUILD_TARGET  -p;	\
+		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
+	done;
+endef
+
+override define CLEAN
+	@for DIR in $(SUBDIRS); do		\
+		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
+		mkdir $$BUILD_TARGET  -p;	\
+		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+	done
+endef
+endif
diff --git a/tools/testing/selftests/sparc64/drivers/.gitignore b/tools/testing/selftests/sparc64/drivers/.gitignore
new file mode 100644
index 000000000000..90e835ed74e6
--- /dev/null
+++ b/tools/testing/selftests/sparc64/drivers/.gitignore
@@ -0,0 +1 @@
+adi-test
diff --git a/tools/testing/selftests/sparc64/drivers/Makefile b/tools/testing/selftests/sparc64/drivers/Makefile
new file mode 100644
index 000000000000..deb0df415565
--- /dev/null
+++ b/tools/testing/selftests/sparc64/drivers/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+INCLUDEDIR := -I.
+CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g
+
+TEST_GEN_FILES := adi-test
+
+all: $(TEST_GEN_FILES)
+
+$(TEST_GEN_FILES): adi-test.c
+
+TEST_PROGS := drivers_test.sh
+
+include ../../lib.mk
+
+$(OUTPUT)/adi-test: adi-test.c
diff --git a/tools/testing/selftests/sparc64/drivers/adi-test.c b/tools/testing/selftests/sparc64/drivers/adi-test.c
new file mode 100644
index 000000000000..95d93c6a88a5
--- /dev/null
+++ b/tools/testing/selftests/sparc64/drivers/adi-test.c
@@ -0,0 +1,721 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * selftest for sparc64's privileged ADI driver
+ *
+ * Author: Tom Hromatka <tom.hromatka@oracle.com>
+ */
+#include <linux/kernel.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "../../kselftest.h"
+
+#define DEBUG_LEVEL_1_BIT	(0x0001)
+#define DEBUG_LEVEL_2_BIT	(0x0002)
+#define DEBUG_LEVEL_3_BIT	(0x0004)
+#define DEBUG_LEVEL_4_BIT	(0x0008)
+#define DEBUG_TIMING_BIT	(0x1000)
+
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+/* bit mask of enabled bits to print */
+#define DEBUG 0x0001
+
+#define DEBUG_PRINT_L1(...)	debug_print(DEBUG_LEVEL_1_BIT, __VA_ARGS__)
+#define DEBUG_PRINT_L2(...)	debug_print(DEBUG_LEVEL_2_BIT, __VA_ARGS__)
+#define DEBUG_PRINT_L3(...)	debug_print(DEBUG_LEVEL_3_BIT, __VA_ARGS__)
+#define DEBUG_PRINT_L4(...)	debug_print(DEBUG_LEVEL_4_BIT, __VA_ARGS__)
+#define DEBUG_PRINT_T(...)	debug_print(DEBUG_TIMING_BIT, __VA_ARGS__)
+
+static void debug_print(int level, const char *s, ...)
+{
+	va_list args;
+
+	va_start(args, s);
+
+	if (DEBUG & level)
+		vfprintf(stdout, s, args);
+	va_end(args);
+}
+
+#ifndef min
+#define min(x, y) ((x) < (y) ? x : y)
+#endif
+
+#define RETURN_FROM_TEST(_ret) \
+	do { \
+		DEBUG_PRINT_L1( \
+			"\tTest %s returned %d\n", __func__, _ret); \
+		return _ret; \
+	} while (0)
+
+#define ADI_BLKSZ	64
+#define ADI_MAX_VERSION	15
+
+#define TEST_STEP_FAILURE(_ret) \
+	do { \
+		fprintf(stderr, "\tTest step failure: %d at %s:%d\n", \
+			_ret, __func__, __LINE__); \
+		goto out; \
+	} while (0)
+
+#define RDTICK(_x) \
+	asm volatile(" rd %%tick, %0\n" : "=r" (_x))
+
+static int random_version(void)
+{
+	long tick;
+
+	RDTICK(tick);
+
+	return tick % (ADI_MAX_VERSION + 1);
+}
+
+#define MAX_RANGES_SUPPORTED	5
+static const char system_ram_str[] = "System RAM\n";
+static int range_count;
+static unsigned long long int start_addr[MAX_RANGES_SUPPORTED];
+static unsigned long long int   end_addr[MAX_RANGES_SUPPORTED];
+
+struct stats {
+	char		name[16];
+	unsigned long	total;
+	unsigned long	count;
+	unsigned long	bytes;
+};
+
+static struct stats read_stats = {
+	.name = "read", .total = 0, .count = 0, .bytes = 0};
+static struct stats pread_stats = {
+	.name = "pread", .total = 0, .count = 0, .bytes = 0};
+static struct stats write_stats = {
+	.name = "write", .total = 0, .count = 0, .bytes = 0};
+static struct stats pwrite_stats = {
+	.name = "pwrite", .total = 0, .count = 0, .bytes = 0};
+static struct stats seek_stats = {
+	.name = "seek", .total = 0, .count = 0, .bytes = 0};
+
+static void update_stats(struct stats * const ustats,
+			 unsigned long measurement, unsigned long bytes)
+{
+	ustats->total += measurement;
+	ustats->bytes += bytes;
+	ustats->count++;
+}
+
+static void print_ustats(const struct stats * const ustats)
+{
+	DEBUG_PRINT_L1("%s\t%7d\t%7.0f\t%7.0f\n",
+		       ustats->name, ustats->count,
+		       (float)ustats->total / (float)ustats->count,
+		       (float)ustats->bytes / (float)ustats->count);
+}
+
+static void print_stats(void)
+{
+	DEBUG_PRINT_L1("\nSyscall\tCall\tAvgTime\tAvgSize\n"
+		       "\tCount\t(ticks)\t(bytes)\n"
+		       "-------------------------------\n");
+
+	print_ustats(&read_stats);
+	print_ustats(&pread_stats);
+	print_ustats(&write_stats);
+	print_ustats(&pwrite_stats);
+	print_ustats(&seek_stats);
+}
+
+static int build_memory_map(void)
+{
+	char line[256];
+	FILE *fp;
+	int i;
+
+	range_count = 0;
+
+	fp = fopen("/proc/iomem", "r");
+	if (!fp) {
+		fprintf(stderr, "/proc/iomem: error %d: %s\n",
+			errno, strerror(errno));
+		return -errno;
+	}
+
+	while (fgets(line, sizeof(line), fp) != 0) {
+		if (strstr(line, system_ram_str)) {
+			char *dash, *end_ptr;
+
+			/* Given a line like this:
+			 * d0400000-10ffaffff : System RAM
+			 * replace the "-" with a space
+			 */
+			dash = strstr(line, "-");
+			dash[0] = 0x20;
+
+			start_addr[range_count] = strtoull(line, &end_ptr, 16);
+			end_addr[range_count] = strtoull(end_ptr, NULL, 16);
+			range_count++;
+		}
+	}
+
+	fclose(fp);
+
+	DEBUG_PRINT_L1("RAM Ranges\n");
+	for (i = 0; i < range_count; i++)
+		DEBUG_PRINT_L1("\trange %d: 0x%llx\t- 0x%llx\n",
+			       i, start_addr[i], end_addr[i]);
+
+	if (range_count == 0) {
+		fprintf(stderr, "No valid address ranges found.  Error.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int read_adi(int fd, unsigned char *buf, int buf_sz)
+{
+	int ret, bytes_read = 0;
+	long start, end, elapsed_time = 0;
+
+	do {
+		RDTICK(start);
+		ret = read(fd, buf + bytes_read, buf_sz - bytes_read);
+		RDTICK(end);
+		if (ret < 0)
+			return -errno;
+
+		elapsed_time += end - start;
+		update_stats(&read_stats, elapsed_time, buf_sz);
+		bytes_read += ret;
+
+	} while (bytes_read < buf_sz);
+
+	DEBUG_PRINT_T("\tread elapsed timed = %ld\n", elapsed_time);
+	DEBUG_PRINT_L3("\tRead  %d bytes\n", bytes_read);
+
+	return bytes_read;
+}
+
+static int pread_adi(int fd, unsigned char *buf,
+		     int buf_sz, unsigned long offset)
+{
+	int ret, i, bytes_read = 0;
+	unsigned long cur_offset;
+	long start, end, elapsed_time = 0;
+
+	cur_offset = offset;
+	do {
+		RDTICK(start);
+		ret = pread(fd, buf + bytes_read, buf_sz - bytes_read,
+			    cur_offset);
+		RDTICK(end);
+		if (ret < 0)
+			return -errno;
+
+		elapsed_time += end - start;
+		update_stats(&pread_stats, elapsed_time, buf_sz);
+		bytes_read += ret;
+		cur_offset += ret;
+
+	} while (bytes_read < buf_sz);
+
+	DEBUG_PRINT_T("\tpread elapsed timed = %ld\n", elapsed_time);
+	DEBUG_PRINT_L3("\tRead  %d bytes starting at offset 0x%lx\n",
+		       bytes_read, offset);
+	for (i = 0; i < bytes_read; i++)
+		DEBUG_PRINT_L4("\t\t0x%lx\t%d\n", offset + i, buf[i]);
+
+	return bytes_read;
+}
+
+static int write_adi(int fd, const unsigned char * const buf, int buf_sz)
+{
+	int ret, bytes_written = 0;
+	long start, end, elapsed_time = 0;
+
+	do {
+		RDTICK(start);
+		ret = write(fd, buf + bytes_written, buf_sz - bytes_written);
+		RDTICK(end);
+		if (ret < 0)
+			return -errno;
+
+		elapsed_time += (end - start);
+		update_stats(&write_stats, elapsed_time, buf_sz);
+		bytes_written += ret;
+	} while (bytes_written < buf_sz);
+
+	DEBUG_PRINT_T("\twrite elapsed timed = %ld\n", elapsed_time);
+	DEBUG_PRINT_L3("\tWrote %d of %d bytes\n", bytes_written, buf_sz);
+
+	return bytes_written;
+}
+
+static int pwrite_adi(int fd, const unsigned char * const buf,
+		      int buf_sz, unsigned long offset)
+{
+	int ret, bytes_written = 0;
+	unsigned long cur_offset;
+	long start, end, elapsed_time = 0;
+
+	cur_offset = offset;
+
+	do {
+		RDTICK(start);
+		ret = pwrite(fd, buf + bytes_written,
+			     buf_sz - bytes_written, cur_offset);
+		RDTICK(end);
+		if (ret < 0) {
+			fprintf(stderr, "pwrite(): error %d: %s\n",
+				errno, strerror(errno));
+			return -errno;
+		}
+
+		elapsed_time += (end - start);
+		update_stats(&pwrite_stats, elapsed_time, buf_sz);
+		bytes_written += ret;
+		cur_offset += ret;
+
+	} while (bytes_written < buf_sz);
+
+	DEBUG_PRINT_T("\tpwrite elapsed timed = %ld\n", elapsed_time);
+	DEBUG_PRINT_L3("\tWrote %d of %d bytes starting at address 0x%lx\n",
+		       bytes_written, buf_sz, offset);
+
+	return bytes_written;
+}
+
+static off_t seek_adi(int fd, off_t offset, int whence)
+{
+	long start, end;
+	off_t ret;
+
+	RDTICK(start);
+	ret = lseek(fd, offset, whence);
+	RDTICK(end);
+	DEBUG_PRINT_L2("\tlseek ret = 0x%llx\n", ret);
+	if (ret < 0)
+		goto out;
+
+	DEBUG_PRINT_T("\tlseek elapsed timed = %ld\n", end - start);
+	update_stats(&seek_stats, end - start, 0);
+
+out:
+	(void)lseek(fd, 0, SEEK_END);
+	return ret;
+}
+
+static int test0_prpw_aligned_1byte(int fd)
+{
+	/* somewhat arbitrarily chosen address */
+	unsigned long paddr =
+		(end_addr[range_count - 1] - 0x1000) & ~(ADI_BLKSZ - 1);
+	unsigned char version[1], expected_version;
+	loff_t offset;
+	int ret;
+
+	version[0] = random_version();
+	expected_version = version[0];
+
+	offset = paddr / ADI_BLKSZ;
+
+	ret = pwrite_adi(fd, version, sizeof(version), offset);
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	ret = pread_adi(fd, version, sizeof(version), offset);
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	if (expected_version != version[0]) {
+		DEBUG_PRINT_L2("\tExpected version %d but read version %d\n",
+			       expected_version, version[0]);
+		TEST_STEP_FAILURE(-expected_version);
+	}
+
+	ret = 0;
+out:
+	RETURN_FROM_TEST(ret);
+}
+
+#define TEST1_VERSION_SZ	4096
+static int test1_prpw_aligned_4096bytes(int fd)
+{
+	/* somewhat arbitrarily chosen address */
+	unsigned long paddr =
+		(end_addr[range_count - 1] - 0x6000) & ~(ADI_BLKSZ - 1);
+	unsigned char version[TEST1_VERSION_SZ],
+		expected_version[TEST1_VERSION_SZ];
+	loff_t offset;
+	int ret, i;
+
+	for (i = 0; i < TEST1_VERSION_SZ; i++) {
+		version[i] = random_version();
+		expected_version[i] = version[i];
+	}
+
+	offset = paddr / ADI_BLKSZ;
+
+	ret = pwrite_adi(fd, version, sizeof(version), offset);
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	ret = pread_adi(fd, version, sizeof(version), offset);
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	for (i = 0; i < TEST1_VERSION_SZ; i++) {
+		if (expected_version[i] != version[i]) {
+			DEBUG_PRINT_L2(
+				"\tExpected version %d but read version %d\n",
+				expected_version, version[0]);
+			TEST_STEP_FAILURE(-expected_version[i]);
+		}
+	}
+
+	ret = 0;
+out:
+	RETURN_FROM_TEST(ret);
+}
+
+#define TEST2_VERSION_SZ	10327
+static int test2_prpw_aligned_10327bytes(int fd)
+{
+	/* somewhat arbitrarily chosen address */
+	unsigned long paddr =
+		(start_addr[0] + 0x6000) & ~(ADI_BLKSZ - 1);
+	unsigned char version[TEST2_VERSION_SZ],
+		expected_version[TEST2_VERSION_SZ];
+	loff_t offset;
+	int ret, i;
+
+	for (i = 0; i < TEST2_VERSION_SZ; i++) {
+		version[i] = random_version();
+		expected_version[i] = version[i];
+	}
+
+	offset = paddr / ADI_BLKSZ;
+
+	ret = pwrite_adi(fd, version, sizeof(version), offset);
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	ret = pread_adi(fd, version, sizeof(version), offset);
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	for (i = 0; i < TEST2_VERSION_SZ; i++) {
+		if (expected_version[i] != version[i]) {
+			DEBUG_PRINT_L2(
+				"\tExpected version %d but read version %d\n",
+				expected_version, version[0]);
+			TEST_STEP_FAILURE(-expected_version[i]);
+		}
+	}
+
+	ret = 0;
+out:
+	RETURN_FROM_TEST(ret);
+}
+
+#define TEST3_VERSION_SZ	12541
+static int test3_prpw_unaligned_12541bytes(int fd)
+{
+	/* somewhat arbitrarily chosen address */
+	unsigned long paddr =
+		((start_addr[0] + 0xC000) & ~(ADI_BLKSZ - 1)) + 17;
+	unsigned char version[TEST3_VERSION_SZ],
+		expected_version[TEST3_VERSION_SZ];
+	loff_t offset;
+	int ret, i;
+
+	for (i = 0; i < TEST3_VERSION_SZ; i++) {
+		version[i] = random_version();
+		expected_version[i] = version[i];
+	}
+
+	offset = paddr / ADI_BLKSZ;
+
+	ret = pwrite_adi(fd, version, sizeof(version), offset);
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	ret = pread_adi(fd, version, sizeof(version), offset);
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	for (i = 0; i < TEST3_VERSION_SZ; i++) {
+		if (expected_version[i] != version[i]) {
+			DEBUG_PRINT_L2(
+				"\tExpected version %d but read version %d\n",
+				expected_version, version[0]);
+			TEST_STEP_FAILURE(-expected_version[i]);
+		}
+	}
+
+	ret = 0;
+out:
+	RETURN_FROM_TEST(ret);
+}
+
+static int test4_lseek(int fd)
+{
+#define	OFFSET_ADD	(0x100)
+#define OFFSET_SUBTRACT	(0xFFFFFFF000000000)
+
+	off_t offset_out, offset_in;
+	int ret;
+
+
+	offset_in = 0x123456789abcdef0;
+	offset_out = seek_adi(fd, offset_in, SEEK_SET);
+	if (offset_out != offset_in) {
+		ret = -1;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	/* seek to the current offset.  this should return EINVAL */
+	offset_out = seek_adi(fd, offset_in, SEEK_SET);
+	if (offset_out < 0 && errno == EINVAL)
+		DEBUG_PRINT_L2(
+			"\tSEEK_SET failed as designed. Not an error\n");
+	else {
+		ret = -2;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	offset_out = seek_adi(fd, 0, SEEK_CUR);
+	if (offset_out != offset_in) {
+		ret = -3;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	offset_out = seek_adi(fd, OFFSET_ADD, SEEK_CUR);
+	if (offset_out != (offset_in + OFFSET_ADD)) {
+		ret = -4;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	offset_out = seek_adi(fd, OFFSET_SUBTRACT, SEEK_CUR);
+	if (offset_out != (offset_in + OFFSET_ADD + OFFSET_SUBTRACT)) {
+		ret = -5;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	ret = 0;
+out:
+	RETURN_FROM_TEST(ret);
+}
+
+static int test5_rw_aligned_1byte(int fd)
+{
+	/* somewhat arbitrarily chosen address */
+	unsigned long paddr =
+		(end_addr[range_count - 1] - 0xF000) & ~(ADI_BLKSZ - 1);
+	unsigned char version, expected_version;
+	loff_t offset;
+	off_t oret;
+	int ret;
+
+	offset = paddr / ADI_BLKSZ;
+	version = expected_version = random_version();
+
+	oret = seek_adi(fd, offset, SEEK_SET);
+	if (oret != offset) {
+		ret = -1;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	ret = write_adi(fd, &version, sizeof(version));
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	oret = seek_adi(fd, offset, SEEK_SET);
+	if (oret != offset) {
+		ret = -1;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	ret = read_adi(fd, &version, sizeof(version));
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	if (expected_version != version) {
+		DEBUG_PRINT_L2("\tExpected version %d but read version %d\n",
+			       expected_version, version);
+		TEST_STEP_FAILURE(-expected_version);
+	}
+
+	ret = 0;
+out:
+	RETURN_FROM_TEST(ret);
+}
+
+#define TEST6_VERSION_SZ        9434
+static int test6_rw_aligned_9434bytes(int fd)
+{
+	/* somewhat arbitrarily chosen address */
+	unsigned long paddr =
+		(end_addr[range_count - 1] - 0x5F000) & ~(ADI_BLKSZ - 1);
+	unsigned char version[TEST6_VERSION_SZ],
+		      expected_version[TEST6_VERSION_SZ];
+	loff_t offset;
+	off_t oret;
+	int ret, i;
+
+	offset = paddr / ADI_BLKSZ;
+	for (i = 0; i < TEST6_VERSION_SZ; i++)
+		version[i] = expected_version[i] = random_version();
+
+	oret = seek_adi(fd, offset, SEEK_SET);
+	if (oret != offset) {
+		ret = -1;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	ret = write_adi(fd, version, sizeof(version));
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	memset(version, 0, TEST6_VERSION_SZ);
+
+	oret = seek_adi(fd, offset, SEEK_SET);
+	if (oret != offset) {
+		ret = -1;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	ret = read_adi(fd, version, sizeof(version));
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	for (i = 0; i < TEST6_VERSION_SZ; i++) {
+		if (expected_version[i] != version[i]) {
+			DEBUG_PRINT_L2(
+				"\tExpected version %d but read version %d\n",
+				expected_version[i], version[i]);
+			TEST_STEP_FAILURE(-expected_version[i]);
+		}
+	}
+
+	ret = 0;
+out:
+	RETURN_FROM_TEST(ret);
+}
+
+#define TEST7_VERSION_SZ        14963
+static int test7_rw_aligned_14963bytes(int fd)
+{
+	/* somewhat arbitrarily chosen address */
+	unsigned long paddr =
+	  ((start_addr[range_count - 1] + 0xF000) & ~(ADI_BLKSZ - 1)) + 39;
+	unsigned char version[TEST7_VERSION_SZ],
+		      expected_version[TEST7_VERSION_SZ];
+	loff_t offset;
+	off_t oret;
+	int ret, i;
+
+	offset = paddr / ADI_BLKSZ;
+	for (i = 0; i < TEST7_VERSION_SZ; i++) {
+		version[i] = random_version();
+		expected_version[i] = version[i];
+	}
+
+	oret = seek_adi(fd, offset, SEEK_SET);
+	if (oret != offset) {
+		ret = -1;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	ret = write_adi(fd, version, sizeof(version));
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	memset(version, 0, TEST7_VERSION_SZ);
+
+	oret = seek_adi(fd, offset, SEEK_SET);
+	if (oret != offset) {
+		ret = -1;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	ret = read_adi(fd, version, sizeof(version));
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	for (i = 0; i < TEST7_VERSION_SZ; i++) {
+		if (expected_version[i] != version[i]) {
+			DEBUG_PRINT_L2(
+				"\tExpected version %d but read version %d\n",
+				expected_version[i], version[i]);
+			TEST_STEP_FAILURE(-expected_version[i]);
+		}
+
+		paddr += ADI_BLKSZ;
+	}
+
+	ret = 0;
+out:
+	RETURN_FROM_TEST(ret);
+}
+
+static int (*tests[])(int fd) = {
+	test0_prpw_aligned_1byte,
+	test1_prpw_aligned_4096bytes,
+	test2_prpw_aligned_10327bytes,
+	test3_prpw_unaligned_12541bytes,
+	test4_lseek,
+	test5_rw_aligned_1byte,
+	test6_rw_aligned_9434bytes,
+	test7_rw_aligned_14963bytes,
+};
+#define TEST_COUNT	ARRAY_SIZE(tests)
+
+int main(int argc, char *argv[])
+{
+	int fd, ret, test;
+
+	ret = build_memory_map();
+	if (ret < 0)
+		return ret;
+
+	fd = open("/dev/adi", O_RDWR);
+	if (fd < 0) {
+		fprintf(stderr, "open: error %d: %s\n",
+			errno, strerror(errno));
+		return -errno;
+	}
+
+	for (test = 0; test < TEST_COUNT; test++) {
+		DEBUG_PRINT_L1("Running test #%d\n", test);
+
+		ret = (*tests[test])(fd);
+		if (ret != 0)
+			ksft_test_result_fail("Test #%d failed: error %d\n",
+					      test, ret);
+		else
+			ksft_test_result_pass("Test #%d passed\n", test);
+	}
+
+	print_stats();
+	close(fd);
+
+	if (ksft_get_fail_cnt() > 0)
+		ksft_exit_fail();
+	else
+		ksft_exit_pass();
+
+	/* it's impossible to get here, but the compiler throws a warning
+	 * about control reaching the end of non-void function.  bah.
+	 */
+	return 0;
+}
diff --git a/tools/testing/selftests/sparc64/drivers/drivers_test.sh b/tools/testing/selftests/sparc64/drivers/drivers_test.sh
new file mode 100755
index 000000000000..6d08273b7532
--- /dev/null
+++ b/tools/testing/selftests/sparc64/drivers/drivers_test.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+SRC_TREE=../../../../
+
+test_run()
+{
+	if [ -f ${SRC_TREE}/drivers/char/adi.ko ]; then
+		insmod ${SRC_TREE}/drivers/char/adi.ko 2> /dev/null
+		if [ $? -ne 0 ]; then
+			rc=1
+		fi
+	else
+		# Use modprobe dry run to check for missing adi module
+		if ! /sbin/modprobe -q -n adi; then
+			echo "adi: [SKIP]"
+		elif /sbin/modprobe -q adi; then
+			echo "adi: ok"
+		else
+			echo "adi: [FAIL]"
+			rc=1
+		fi
+	fi
+	./adi-test
+	rmmod adi 2> /dev/null
+}
+
+rc=0
+test_run
+exit $rc
diff --git a/tools/testing/selftests/sparc64/run.sh b/tools/testing/selftests/sparc64/run.sh
new file mode 100755
index 000000000000..38ad61f9328e
--- /dev/null
+++ b/tools/testing/selftests/sparc64/run.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+(cd drivers; ./drivers_test.sh)
diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh
index 24cff498b31a..fc9f8cde7d42 100755
--- a/tools/testing/selftests/static_keys/test_static_keys.sh
+++ b/tools/testing/selftests/static_keys/test_static_keys.sh
@@ -2,6 +2,19 @@
 # SPDX-License-Identifier: GPL-2.0
 # Runs static keys kernel module tests
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n test_static_key_base; then
+	echo "static_key: module test_static_key_base is not found [SKIP]"
+	exit $ksft_skip
+fi
+
+if ! /sbin/modprobe -q -n test_static_keys; then
+	echo "static_key: module test_static_keys is not found [SKIP]"
+	exit $ksft_skip
+fi
+
 if /sbin/modprobe -q test_static_key_base; then
 	if /sbin/modprobe -q test_static_keys; then
 		echo "static_key: ok"
diff --git a/tools/testing/selftests/sync/config b/tools/testing/selftests/sync/config
new file mode 100644
index 000000000000..1ab7e8130db2
--- /dev/null
+++ b/tools/testing/selftests/sync/config
@@ -0,0 +1,4 @@
+CONFIG_STAGING=y
+CONFIG_ANDROID=y
+CONFIG_SYNC=y
+CONFIG_SW_SYNC=y
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index ec232c3cfcaa..584eb8ea780a 100755
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -14,6 +14,9 @@
 
 # This performs a series tests against the proc sysctl interface.
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 TEST_NAME="sysctl"
 TEST_DRIVER="test_${TEST_NAME}"
 TEST_DIR=$(dirname $0)
@@ -41,7 +44,7 @@ test_modprobe()
                echo "$0: $DIR not present" >&2
                echo "You must have the following enabled in your kernel:" >&2
                cat $TEST_DIR/config >&2
-               exit 1
+               exit $ksft_skip
        fi
 }
 
@@ -98,28 +101,30 @@ test_reqs()
 	uid=$(id -u)
 	if [ $uid -ne 0 ]; then
 		echo $msg must be run as root >&2
-		exit 0
+		exit $ksft_skip
 	fi
 
 	if ! which perl 2> /dev/null > /dev/null; then
 		echo "$0: You need perl installed"
-		exit 1
+		exit $ksft_skip
 	fi
 	if ! which getconf 2> /dev/null > /dev/null; then
 		echo "$0: You need getconf installed"
-		exit 1
+		exit $ksft_skip
 	fi
 	if ! which diff 2> /dev/null > /dev/null; then
 		echo "$0: You need diff installed"
-		exit 1
+		exit $ksft_skip
 	fi
 }
 
 function load_req_mod()
 {
-	trap "test_modprobe" EXIT
-
 	if [ ! -d $DIR ]; then
+		if ! modprobe -q -n $TEST_DRIVER; then
+			echo "$0: module $TEST_DRIVER not found [SKIP]"
+			exit $ksft_skip
+		fi
 		modprobe $TEST_DRIVER
 		if [ $? -ne 0 ]; then
 			exit
@@ -765,6 +770,7 @@ function parse_args()
 test_reqs
 allow_user_defaults
 check_production_sysctl_writes_strict
+test_modprobe
 load_req_mod
 
 trap "test_finish" EXIT
diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README
index 3a0336782d2d..f9281e8aa313 100644
--- a/tools/testing/selftests/tc-testing/README
+++ b/tools/testing/selftests/tc-testing/README
@@ -17,6 +17,10 @@ REQUIREMENTS
 *  The kernel must have veth support available, as a veth pair is created
    prior to running the tests.
 
+*  The kernel must have the appropriate infrastructure enabled to run all tdc
+   unit tests. See the config file in this directory for minimum required
+   features. As new tests will be added, config options list will be updated.
+
 *  All tc-related features being tested must be built in or available as
    modules.  To check what is required in current setup run:
    ./tdc.py -c
@@ -109,8 +113,8 @@ COMMAND LINE ARGUMENTS
 Run tdc.py -h to see the full list of available arguments.
 
 usage: tdc.py [-h] [-p PATH] [-D DIR [DIR ...]] [-f FILE [FILE ...]]
-              [-c [CATG [CATG ...]]] [-e ID [ID ...]] [-l] [-s] [-i] [-v]
-              [-d DEVICE] [-n NS] [-V]
+              [-c [CATG [CATG ...]]] [-e ID [ID ...]] [-l] [-s] [-i] [-v] [-N]
+              [-d DEVICE] [-P] [-n] [-V]
 
 Linux TC unit tests
 
@@ -118,8 +122,10 @@ optional arguments:
   -h, --help            show this help message and exit
   -p PATH, --path PATH  The full path to the tc executable to use
   -v, --verbose         Show the commands that are being run
+  -N, --notap           Suppress tap results for command under test
   -d DEVICE, --device DEVICE
                         Execute the test case in flower category
+  -P, --pause           Pause execution just before post-suite stage
 
 selection:
   select which test cases: files plus directories; filtered by categories
@@ -146,10 +152,10 @@ action:
   -i, --id              Generate ID numbers for new test cases
 
 netns:
-  options for nsPlugin(run commands in net namespace)
+  options for nsPlugin (run commands in net namespace)
 
-  -n NS, --namespace NS
-                        Run commands in namespace NS
+  -n, --namespace
+                        Run commands in namespace as specified in tdc_config.py
 
 valgrind:
   options for valgrindPlugin (run command under test under Valgrind)
@@ -226,6 +232,8 @@ directory:
       and the other is a test whether the command leaked memory or not.
       (This one is a preliminary version, it may not work quite right yet,
       but the overall template is there and it should only need tweaks.)
+  - buildebpfPlugin.py:
+      builds all programs in $EBPFDIR.
 
 
 ACKNOWLEDGEMENTS
diff --git a/tools/testing/selftests/tc-testing/bpf/Makefile b/tools/testing/selftests/tc-testing/bpf/Makefile
new file mode 100644
index 000000000000..dc92eb271d9a
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/bpf/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+
+APIDIR := ../../../../include/uapi
+TEST_GEN_FILES = action.o
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+CLANG ?= clang
+LLC   ?= llc
+PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
+
+ifeq ($(PROBE),)
+  CPU ?= probe
+else
+  CPU ?= generic
+endif
+
+CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
+	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
+
+CLANG_FLAGS = -I. -I$(APIDIR) \
+	      $(CLANG_SYS_INCLUDES) \
+	      -Wno-compare-distinct-pointer-types
+
+$(OUTPUT)/%.o: %.c
+	$(CLANG) $(CLANG_FLAGS) \
+		 -O2 -target bpf -emit-llvm -c $< -o - |      \
+	$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
diff --git a/tools/testing/selftests/tc-testing/bpf/action.c b/tools/testing/selftests/tc-testing/bpf/action.c
new file mode 100644
index 000000000000..c32b99b80e19
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/bpf/action.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Davide Caratti, Red Hat inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+
+__attribute__((section("action-ok"),used)) int action_ok(struct __sk_buff *s)
+{
+	return TC_ACT_OK;
+}
+
+__attribute__((section("action-ko"),used)) int action_ko(struct __sk_buff *s)
+{
+	s->data = 0x0;
+	return TC_ACT_OK;
+}
+
+char _license[] __attribute__((section("license"),used)) = "GPL";
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
new file mode 100644
index 000000000000..203302065458
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/config
@@ -0,0 +1,48 @@
+CONFIG_NET_SCHED=y
+
+#
+# Queueing/Scheduling
+#
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_INGRESS=m
+
+#
+# Classification
+#
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_STACK=32
+CONFIG_NET_EMATCH_CMP=m
+CONFIG_NET_EMATCH_NBYTE=m
+CONFIG_NET_EMATCH_U32=m
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_EMATCH_TEXT=m
+CONFIG_NET_EMATCH_IPSET=m
+CONFIG_NET_EMATCH_IPT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_SAMPLE=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_VLAN=m
+CONFIG_NET_ACT_BPF=m
+CONFIG_NET_ACT_CONNMARK=m
+CONFIG_NET_ACT_SKBMOD=m
+CONFIG_NET_ACT_IFE=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_IFE_SKBMARK=m
+CONFIG_NET_IFE_SKBPRIO=m
+CONFIG_NET_IFE_SKBTCINDEX=m
+CONFIG_NET_CLS_IND=y
+CONFIG_NET_SCH_FIFO=y
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
new file mode 100644
index 000000000000..9f0ba10c44b4
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
@@ -0,0 +1,66 @@
+'''
+build ebpf program
+'''
+
+import os
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+from tdc_config import *
+
+class SubPlugin(TdcPlugin):
+    def __init__(self):
+        self.sub_class = 'buildebpf/SubPlugin'
+        self.tap = ''
+        super().__init__()
+
+    def pre_suite(self, testcount, testidlist):
+        super().pre_suite(testcount, testidlist)
+
+        if self.args.buildebpf:
+            self._ebpf_makeall()
+
+    def post_suite(self, index):
+        super().post_suite(index)
+
+        self._ebpf_makeclean()
+
+    def add_args(self, parser):
+        super().add_args(parser)
+
+        self.argparser_group = self.argparser.add_argument_group(
+            'buildebpf',
+            'options for buildebpfPlugin')
+        self.argparser_group.add_argument(
+            '-B', '--buildebpf', action='store_true',
+            help='build eBPF programs')
+
+        return self.argparser
+
+    def _ebpf_makeall(self):
+        if self.args.buildebpf:
+            self._make('all')
+
+    def _ebpf_makeclean(self):
+        if self.args.buildebpf:
+            self._make('clean')
+
+    def _make(self, target):
+        command = 'make -C {} {}'.format(self.args.NAMES['EBPFDIR'], target)
+        proc = subprocess.Popen(command,
+            shell=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            env=ENVIR)
+        (rawout, serr) = proc.communicate()
+
+        if proc.returncode != 0 and len(serr) > 0:
+            foutput = serr.decode("utf-8")
+        else:
+            foutput = rawout.decode("utf-8")
+
+        proc.stdout.close()
+        proc.stderr.close()
+        return proc, foutput
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
index 6f289a49e5ec..5970cee6d05f 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -55,7 +55,6 @@
             "bpf"
         ],
         "setup": [
-            "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { return 2; }' | clang -O2 -x c -c - -target bpf -o _b.o",
             [
                 "$TC action flush action bpf",
                 0,
@@ -63,14 +62,13 @@
                 255
             ]
         ],
-        "cmdUnderTest": "$TC action add action bpf object-file _b.o index 667",
+        "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667",
         "expExitCode": "0",
         "verifyCmd": "$TC action get action bpf index 667",
-        "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c( jited)? default-action pipe.*index 667 ref",
+        "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9]* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
         "matchCount": "1",
         "teardown": [
-            "$TC action flush action bpf",
-            "rm -f _b.o"
+            "$TC action flush action bpf"
         ]
     },
     {
@@ -81,7 +79,6 @@
             "bpf"
         ],
         "setup": [
-            "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { s->data = 0x0; return 2; }' | clang -O2 -x c -c - -target bpf -o _c.o",
             [
                 "$TC action flush action bpf",
                 0,
@@ -89,10 +86,10 @@
                 255
             ]
         ],
-        "cmdUnderTest": "$TC action add action bpf object-file _c.o index 667",
+        "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ko index 667",
         "expExitCode": "255",
         "verifyCmd": "$TC action get action bpf index 667",
-        "matchPattern": "action order [0-9]*: bpf _c.o:\\[action\\] id [0-9].*index 667 ref",
+        "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ko\\] id [0-9].*index 667 ref",
         "matchCount": "0",
         "teardown": [
             [
@@ -100,8 +97,7 @@
                 0,
                 1,
                 255
-            ],
-            "rm -f _c.o"
+            ]
         ]
     },
     {
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json
index 70952bd98ff9..13147a1f5731 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json
@@ -17,7 +17,7 @@
         "cmdUnderTest": "$TC actions add action connmark",
         "expExitCode": "0",
         "verifyCmd": "$TC actions list action connmark",
-        "matchPattern": "action order [0-9]+:  connmark zone 0 pipe",
+        "matchPattern": "action order [0-9]+: connmark zone 0 pipe",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action connmark"
@@ -41,7 +41,7 @@
         "cmdUnderTest": "$TC actions add action connmark pass index 1",
         "expExitCode": "0",
         "verifyCmd": "$TC actions get action connmark index 1",
-        "matchPattern": "action order [0-9]+:  connmark zone 0 pass.*index 1 ref",
+        "matchPattern": "action order [0-9]+: connmark zone 0 pass.*index 1 ref",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action connmark"
@@ -65,7 +65,7 @@
         "cmdUnderTest": "$TC actions add action connmark drop index 100",
         "expExitCode": "0",
         "verifyCmd": "$TC actions get action connmark index 100",
-        "matchPattern": "action order [0-9]+:  connmark zone 0 drop.*index 100 ref",
+        "matchPattern": "action order [0-9]+: connmark zone 0 drop.*index 100 ref",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action connmark"
@@ -89,7 +89,7 @@
         "cmdUnderTest": "$TC actions add action connmark pipe index 455",
         "expExitCode": "0",
         "verifyCmd": "$TC actions get action connmark index 455",
-        "matchPattern": "action order [0-9]+:  connmark zone 0 pipe.*index 455 ref",
+        "matchPattern": "action order [0-9]+: connmark zone 0 pipe.*index 455 ref",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action connmark"
@@ -113,7 +113,7 @@
         "cmdUnderTest": "$TC actions add action connmark reclassify index 7",
         "expExitCode": "0",
         "verifyCmd": "$TC actions list action connmark",
-        "matchPattern": "action order [0-9]+:  connmark zone 0 reclassify.*index 7 ref",
+        "matchPattern": "action order [0-9]+: connmark zone 0 reclassify.*index 7 ref",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action connmark"
@@ -137,7 +137,7 @@
         "cmdUnderTest": "$TC actions add action connmark continue index 17",
         "expExitCode": "0",
         "verifyCmd": "$TC actions list action connmark",
-        "matchPattern": "action order [0-9]+:  connmark zone 0 continue.*index 17 ref",
+        "matchPattern": "action order [0-9]+: connmark zone 0 continue.*index 17 ref",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action connmark"
@@ -161,7 +161,7 @@
         "cmdUnderTest": "$TC actions add action connmark jump 10 index 17",
         "expExitCode": "0",
         "verifyCmd": "$TC actions list action connmark",
-        "matchPattern": "action order [0-9]+:  connmark zone 0 jump 10.*index 17 ref",
+        "matchPattern": "action order [0-9]+: connmark zone 0 jump 10.*index 17 ref",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action connmark"
@@ -185,7 +185,7 @@
         "cmdUnderTest": "$TC actions add action connmark zone 100 pipe index 1",
         "expExitCode": "0",
         "verifyCmd": "$TC actions get action connmark index 1",
-        "matchPattern": "action order [0-9]+:  connmark zone 100 pipe.*index 1 ref",
+        "matchPattern": "action order [0-9]+: connmark zone 100 pipe.*index 1 ref",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action connmark"
@@ -209,7 +209,7 @@
         "cmdUnderTest": "$TC actions add action connmark zone 65536 reclassify index 21",
         "expExitCode": "255",
         "verifyCmd": "$TC actions get action connmark index 1",
-        "matchPattern": "action order [0-9]+:  connmark zone 65536 reclassify.*index 21 ref",
+        "matchPattern": "action order [0-9]+: connmark zone 65536 reclassify.*index 21 ref",
         "matchCount": "0",
         "teardown": [
             "$TC actions flush action connmark"
@@ -233,7 +233,7 @@
         "cmdUnderTest": "$TC actions add action connmark zone 655 unsupp_arg pass index 2",
         "expExitCode": "255",
         "verifyCmd": "$TC actions get action connmark index 2",
-        "matchPattern": "action order [0-9]+:  connmark zone 655 unsupp_arg pass.*index 2 ref",
+        "matchPattern": "action order [0-9]+: connmark zone 655 unsupp_arg pass.*index 2 ref",
         "matchCount": "0",
         "teardown": [
             "$TC actions flush action connmark"
@@ -258,7 +258,7 @@
         "cmdUnderTest": "$TC actions replace action connmark zone 555 reclassify index 555",
         "expExitCode": "0",
         "verifyCmd": "$TC actions get action connmark index 555",
-        "matchPattern": "action order [0-9]+:  connmark zone 555 reclassify.*index 555 ref",
+        "matchPattern": "action order [0-9]+: connmark zone 555 reclassify.*index 555 ref",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action connmark"
@@ -282,7 +282,7 @@
         "cmdUnderTest": "$TC actions add action connmark zone 555 pipe index 5 cookie aabbccddeeff112233445566778800a1",
         "expExitCode": "0",
         "verifyCmd": "$TC actions get action connmark index 5",
-        "matchPattern": "action order [0-9]+:  connmark zone 555 pipe.*index 5 ref.*cookie aabbccddeeff112233445566778800a1",
+        "matchPattern": "action order [0-9]+: connmark zone 555 pipe.*index 5 ref.*cookie aabbccddeeff112233445566778800a1",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action connmark"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
index 93cf8fea8ae7..a022792d392a 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
@@ -336,6 +336,30 @@
         ]
     },
     {
+        "id": "b10b",
+        "name": "Add all 7 csum actions",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum icmp ip4h sctp igmp udplite udp tcp index 7",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 7",
+        "matchPattern": "action order [0-9]*: csum \\(iph, icmp, igmp, tcp, udp, udplite, sctp\\).*index 7 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
         "id": "ce92",
         "name": "Add csum udp action with cookie",
         "category": [
@@ -398,13 +422,83 @@
                 255
             ]
         ],
-        "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action csum tcp continue index $i \"; args=\"$args$cmd\"; done && $TC actions add $args",
-        "expExitCode": "255",
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+        "expExitCode": "0",
         "verifyCmd": "$TC actions ls action csum",
         "matchPattern": "^[ \t]+index [0-9]* ref",
         "matchCount": "32",
         "teardown": [
             "$TC actions flush action csum"
         ]
+    },
+    {
+        "id": "b4e9",
+        "name": "Delete batch of 32 csum actions",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ],
+            "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\""
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action csum",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "0015",
+        "name": "Add batch of 32 csum tcp actions with large cookies",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action csum",
+        "matchPattern": "^[ \t]+index [0-9]* ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "989e",
+        "name": "Delete batch of 32 csum actions with large cookies",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ],
+            "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\""
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action csum",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "0",
+        "teardown": []
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
index 68c91023cdb9..89189a03ce3d 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
@@ -536,5 +536,29 @@
         "matchPattern": "^[ \t]+index [0-9]+ ref",
         "matchCount": "0",
         "teardown": []
+    },
+    {
+        "id": "8e47",
+        "name": "Add gact action with random determ goto chain control action",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pass random determ goto chain 1 2 index 90",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "action order [0-9]*: gact action pass random type determ goto chain 1 val 2.*index 90 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
index 9f34f0753969..637ea0219617 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
@@ -1,7 +1,7 @@
 [
     {
-        "id": "a568",
-        "name": "Add action with ife type",
+        "id": "7682",
+        "name": "Create valid ife encode action with mark and pass control",
         "category": [
             "actions",
             "ife"
@@ -12,21 +12,400 @@
                 0,
                 1,
                 255
-            ],
-            "$TC actions add action ife encode type 0xDEAD index 1"
+            ]
         ],
-        "cmdUnderTest": "$TC actions get action ife index 1",
+        "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 2",
+        "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "ef47",
+        "name": "Create valid ife encode action with mark and pipe control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use mark 10 pipe index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 2",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use mark.*index 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "df43",
+        "name": "Create valid ife encode action with mark and continue control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow mark continue index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 2",
+        "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*allow mark.*index 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "e4cf",
+        "name": "Create valid ife encode action with mark and drop control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use mark 789 drop index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 2",
+        "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*use mark 789.*index 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "ccba",
+        "name": "Create valid ife encode action with mark and reclassify control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use mark 656768 reclassify index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 2",
+        "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use mark 656768.*index 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "a1cf",
+        "name": "Create valid ife encode action with mark and jump control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use mark 65 jump 1 index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 2",
+        "matchPattern": "action order [0-9]*: ife encode action jump 1.*type 0xED3E.*use mark 65.*index 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "cb3d",
+        "name": "Create valid ife encode action with mark value at 32-bit maximum",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295 reclassify index 90",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 90",
+        "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use mark 4294967295.*index 90",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "1efb",
+        "name": "Create ife encode action with mark value exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295999 pipe index 90",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action ife index 90",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use mark 4294967295999.*index 90",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "95ed",
+        "name": "Create valid ife encode action with prio and pass control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow prio pass index 9",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 9",
+        "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow prio.*index 9",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "aa17",
+        "name": "Create valid ife encode action with prio and pipe control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use prio 7 pipe index 9",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 9",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use prio 7.*index 9",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "74c7",
+        "name": "Create valid ife encode action with prio and continue control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use prio 3 continue index 9",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 9",
+        "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use prio 3.*index 9",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "7a97",
+        "name": "Create valid ife encode action with prio and drop control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow prio drop index 9",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 9",
+        "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*allow prio.*index 9",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "f66b",
+        "name": "Create valid ife encode action with prio and reclassify control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use prio 998877 reclassify index 9",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 9",
+        "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 998877.*index 9",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "3056",
+        "name": "Create valid ife encode action with prio and jump control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use prio 998877 jump 10 index 9",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 9",
+        "matchPattern": "action order [0-9]*: ife encode action jump 10.*type 0xED3E.*use prio 998877.*index 9",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "7dd3",
+        "name": "Create valid ife encode action with prio value at 32-bit maximum",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use prio 4294967295 reclassify index 99",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 99",
+        "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 4294967295.*index 99",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "2ca1",
+        "name": "Create ife encode action with prio value exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use prio 4294967298 pipe index 99",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action ife index 99",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use prio 4294967298.*index 99",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "05bb",
+        "name": "Create valid ife encode action with tcindex and pass control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow tcindex pass index 1",
         "expExitCode": "0",
         "verifyCmd": "$TC actions get action ife index 1",
-        "matchPattern": "type 0xDEAD",
+        "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow tcindex.*index 1",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action ife"
         ]
     },
     {
-        "id": "b983",
-        "name": "Add action without ife type",
+        "id": "ce65",
+        "name": "Create valid ife encode action with tcindex and pipe control",
         "category": [
             "actions",
             "ife"
@@ -37,16 +416,649 @@
                 0,
                 1,
                 255
-            ],
-            "$TC actions add action ife encode index 1"
+            ]
         ],
-        "cmdUnderTest": "$TC actions get action ife index 1",
+        "cmdUnderTest": "$TC actions add action ife encode use tcindex 111 pipe index 1",
         "expExitCode": "0",
         "verifyCmd": "$TC actions get action ife index 1",
-        "matchPattern": "type 0xED3E",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use tcindex 111.*index 1",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action ife"
         ]
+    },
+    {
+        "id": "09cd",
+        "name": "Create valid ife encode action with tcindex and continue control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use tcindex 1.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "8eb5",
+        "name": "Create valid ife encode action with tcindex and continue control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use tcindex 1.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "451a",
+        "name": "Create valid ife encode action with tcindex and drop control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow tcindex drop index 77",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 77",
+        "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*allow tcindex.*index 77",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "d76c",
+        "name": "Create valid ife encode action with tcindex and reclassify control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow tcindex reclassify index 77",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 77",
+        "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*allow tcindex.*index 77",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "e731",
+        "name": "Create valid ife encode action with tcindex and jump control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow tcindex jump 999 index 77",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 77",
+        "matchPattern": "action order [0-9]*: ife encode action jump 999.*type 0xED3E.*allow tcindex.*index 77",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "b7b8",
+        "name": "Create valid ife encode action with tcindex value at 16-bit maximum",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use tcindex 65535 pass index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*use tcindex 65535.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "d0d8",
+        "name": "Create ife encode action with tcindex value exceeding 16-bit maximum",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use tcindex 65539 pipe index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use tcindex 65539.*index 1",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "2a9c",
+        "name": "Create valid ife encode action with mac src parameter",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow mark src 00:11:22:33:44:55 pipe index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow mark src 00:11:22:33:44:55.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "cf5c",
+        "name": "Create valid ife encode action with mac dst parameter",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use prio 9876 dst 00:11:22:33:44:55 reclassify index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 9876 dst 00:11:22:33:44:55.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "2353",
+        "name": "Create valid ife encode action with mac src and mac dst parameters",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow tcindex src 00:aa:bb:cc:dd:ee dst 00:11:22:33:44:55 pass index 11",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 11",
+        "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow tcindex dst 00:11:22:33:44:55 src 00:aa:bb:cc:dd:ee .*index 11",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "552c",
+        "name": "Create valid ife encode action with mark and type parameters",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use mark 7 type 0xfefe pass index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xFEFE.*use mark 7.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "0421",
+        "name": "Create valid ife encode action with prio and type parameters",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use prio 444 type 0xabba pipe index 21",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 21",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xABBA.*use prio 444.*index 21",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "4017",
+        "name": "Create valid ife encode action with tcindex and type parameters",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use tcindex 5000 type 0xabcd reclassify index 21",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 21",
+        "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xABCD.*use tcindex 5000.*index 21",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "fac3",
+        "name": "Create valid ife encode action with index at 32-bit maximnum",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 4294967295",
+        "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 4294967295",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "7c25",
+        "name": "Create valid ife decode action with pass control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife decode pass index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife decode action pass.*type 0x0.*allow mark allow tcindex allow prio.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "dccb",
+        "name": "Create valid ife decode action with pipe control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife decode pipe index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife decode action pipe.*type 0x0.*allow mark allow tcindex allow prio.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "7bb9",
+        "name": "Create valid ife decode action with continue control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife decode continue index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife decode action continue.*type 0x0.*allow mark allow tcindex allow prio.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "d9ad",
+        "name": "Create valid ife decode action with drop control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife decode drop index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife decode action drop.*type 0x0.*allow mark allow tcindex allow prio.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "219f",
+        "name": "Create valid ife decode action with reclassify control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife decode reclassify index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife decode action reclassify.*type 0x0.*allow mark allow tcindex allow prio.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "8f44",
+        "name": "Create valid ife decode action with jump control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife decode jump 10 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife decode action jump 10.*type 0x0.*allow mark allow tcindex allow prio.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "56cf",
+        "name": "Create ife encode action with index exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295999",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action ife index 4294967295999",
+        "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 4294967295999",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "ee94",
+        "name": "Create ife encode action with invalid control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow mark kuka index 4",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action ife index 4",
+        "matchPattern": "action order [0-9]*: ife encode action kuka.*type 0xED3E.*allow mark.*index 4",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "b330",
+        "name": "Create ife encode action with cookie",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow prio pipe index 4 cookie aabbccddeeff112233445566778800a1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 4",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow prio.*index 4.*cookie aabbccddeeff112233445566778800a1",
+        "matchCount": "1",
+        "teardown": [
+           "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "bbc0",
+        "name": "Create ife encode action with invalid argument",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow foo pipe index 4",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action ife index 4",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow foo.*index 4",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "d54a",
+        "name": "Create ife encode action with invalid type argument",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow prio type 70000 pipe index 4",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action ife index 4",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0x11170.*allow prio.*index 4",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "7ee0",
+        "name": "Create ife encode action with invalid mac src argument",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow prio src 00:11:22:33:44:pp pipe index 4",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action ife index 4",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "0a7d",
+        "name": "Create ife encode action with invalid mac dst argument",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow prio dst 00.111-22:33:44:aa pipe index 4",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action ife index 4",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4",
+        "matchCount": "0",
+        "teardown": []
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
index 443c9b3c8664..db49fd0f8445 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
@@ -44,7 +44,8 @@
         "matchPattern": "action order [0-9]*: mirred \\(Egress Redirect to device lo\\).*index 2 ref",
         "matchCount": "1",
         "teardown": [
-            "$TC actions flush action mirred"
+            "$TC actions flush action mirred",
+            "$TC actions flush action gact"
         ]
     },
     {
@@ -340,7 +341,7 @@
     },
     {
         "id": "8b69",
-        "name": "Add mirred mirror action with maximum index",
+        "name": "Add mirred mirror action with index at 32-bit maximum",
         "category": [
             "actions",
             "mirred"
@@ -363,6 +364,28 @@
         ]
     },
     {
+        "id": "3f66",
+        "name": "Add mirred mirror action with index exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 429496729555",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action mirred index 429496729555",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 429496729555",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
         "id": "a70e",
         "name": "Delete mirred mirror action",
         "category": [
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json
new file mode 100644
index 000000000000..0080dc2fd41c
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json
@@ -0,0 +1,593 @@
+[
+    {
+        "id": "7565",
+        "name": "Add nat action on ingress with default control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 192.168.1.1 200.200.200.1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action nat",
+        "matchPattern": "action order [0-9]+:  nat ingress 192.168.1.1/32 200.200.200.1 pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "fd79",
+        "name": "Add nat action on ingress with pipe control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 1.1.1.1 2.2.2.1 pipe index 77",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 77",
+        "matchPattern": "action order [0-9]+:  nat ingress 1.1.1.1/32 2.2.2.1 pipe.*index 77 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "eab9",
+        "name": "Add nat action on ingress with continue control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 192.168.10.10 192.168.20.20 continue index 1000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 1000",
+        "matchPattern": "action order [0-9]+:  nat ingress 192.168.10.10/32 192.168.20.20 continue.*index 1000 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "c53a",
+        "name": "Add nat action on ingress with reclassify control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 192.168.10.10 192.168.20.20 reclassify index 1000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 1000",
+        "matchPattern": "action order [0-9]+:  nat ingress 192.168.10.10/32 192.168.20.20 reclassify.*index 1000 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "76c9",
+        "name": "Add nat action on ingress with jump control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 12.18.10.10 12.18.20.20 jump 10 index 22",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 22",
+        "matchPattern": "action order [0-9]+:  nat ingress 12.18.10.10/32 12.18.20.20 jump 10.*index 22 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "24c6",
+        "name": "Add nat action on ingress with drop control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 1.18.1.1 1.18.2.2 drop index 722",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 722",
+        "matchPattern": "action order [0-9]+:  nat ingress 1.18.1.1/32 1.18.2.2 drop.*index 722 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "2120",
+        "name": "Add nat action on ingress with maximum index value",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 1.18.1.1 1.18.2.2 index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 4294967295",
+        "matchPattern": "action order [0-9]+:  nat ingress 1.18.1.1/32 1.18.2.2 pass.*index 4294967295 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "3e9d",
+        "name": "Add nat action on ingress with invalid index value",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 1.18.1.1 1.18.2.2 index 4294967295555",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action nat index 4294967295555",
+        "matchPattern": "action order [0-9]+:  nat ingress 1.18.1.1/32 1.18.2.2 pass.*index 4294967295555 ref",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ]
+    },
+    {
+        "id": "f6c9",
+        "name": "Add nat action on ingress with invalid IP address",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 1.1.1.1 1.1888.2.2 index 7",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action nat index 7",
+        "matchPattern": "action order [0-9]+:  nat ingress 1.1.1.1/32 1.1888.2.2 pass.*index 7 ref",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ]
+    },
+    {
+        "id": "be25",
+        "name": "Add nat action on ingress with invalid argument",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 1.1.1.1 1.18.2.2 another_arg index 12",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action nat index 12",
+        "matchPattern": "action order [0-9]+:  nat ingress 1.1.1.1/32 1.18.2.2 pass.*another_arg.*index 12 ref",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ]
+    },
+    {
+        "id": "a7bd",
+        "name": "Add nat action on ingress with DEFAULT IP address",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress default 10.10.10.1 index 12",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 12",
+        "matchPattern": "action order [0-9]+:  nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "ee1e",
+        "name": "Add nat action on ingress with ANY IP address",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress any 10.10.10.1 index 12",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 12",
+        "matchPattern": "action order [0-9]+:  nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "1de8",
+        "name": "Add nat action on ingress with ALL IP address",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress all 10.10.10.1 index 12",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 12",
+        "matchPattern": "action order [0-9]+:  nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "8dba",
+        "name": "Add nat action on egress with default control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action nat",
+        "matchPattern": "action order [0-9]+:  nat egress 10.10.10.1/32 20.20.20.1 pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "19a7",
+        "name": "Add nat action on egress with pipe control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1 pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action nat",
+        "matchPattern": "action order [0-9]+:  nat egress 10.10.10.1/32 20.20.20.1 pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "f1d9",
+        "name": "Add nat action on egress with continue control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1 continue",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action nat",
+        "matchPattern": "action order [0-9]+:  nat egress 10.10.10.1/32 20.20.20.1 continue",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "6d4a",
+        "name": "Add nat action on egress with reclassify control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1 reclassify",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action nat",
+        "matchPattern": "action order [0-9]+:  nat egress 10.10.10.1/32 20.20.20.1 reclassify",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "b313",
+        "name": "Add nat action on egress with jump control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1 jump 777",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action nat",
+        "matchPattern": "action order [0-9]+:  nat egress 10.10.10.1/32 20.20.20.1 jump 777",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "d9fc",
+        "name": "Add nat action on egress with drop control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1 drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action nat",
+        "matchPattern": "action order [0-9]+:  nat egress 10.10.10.1/32 20.20.20.1 drop",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "a895",
+        "name": "Add nat action on egress with DEFAULT IP address",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat egress default 20.20.20.1 pipe index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 10",
+        "matchPattern": "action order [0-9]+:  nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "2572",
+        "name": "Add nat action on egress with ANY IP address",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat egress any 20.20.20.1 pipe index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 10",
+        "matchPattern": "action order [0-9]+:  nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "37f3",
+        "name": "Add nat action on egress with ALL IP address",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat egress all 20.20.20.1 pipe index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 10",
+        "matchPattern": "action order [0-9]+:  nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "6054",
+        "name": "Add nat action on egress with cookie",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat egress all 20.20.20.1 pipe index 10 cookie aa1bc2d3eeff112233445566778800a1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 10",
+        "matchPattern": "action order [0-9]+:  nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref.*cookie aa1bc2d3eeff112233445566778800a1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "79d6",
+        "name": "Add nat action on ingress with cookie",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 192.168.1.1 10.10.10.1 reclassify index 1 cookie 112233445566778899aabbccddeeff11",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 1",
+        "matchPattern": "action order [0-9]+:  nat ingress 192.168.1.1/32 10.10.10.1 reclassify.*index 1 ref.*cookie 112233445566778899aabbccddeeff11",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
index 38d85a1d7492..4086a50a670e 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
@@ -313,6 +313,54 @@
         ]
     },
     {
+        "id": "6aaf",
+        "name": "Add police actions with conform-exceed control pass/pipe [with numeric values]",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed 0/3 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action police index 1",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 3Mbit burst 250Kb mtu 2Kb action pass/pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "29b1",
+        "name": "Add police actions with conform-exceed control <invalid>/drop",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed 10/drop index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 3Mbit burst 250Kb mtu 2Kb action ",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
         "id": "c26f",
         "name": "Add police action with invalid peakrate value",
         "category": [
@@ -401,11 +449,11 @@
         ],
         "cmdUnderTest": "$TC actions add action police rate 10mbit burst 10k index 4294967295",
         "expExitCode": "0",
-        "verifyCmd": "$TC actions get action mirred index 4294967295",
+        "verifyCmd": "$TC actions get action police index 4294967295",
         "matchPattern": "action order [0-9]*:  police 0xffffffff rate 10Mbit burst 10Kb mtu 2Kb",
         "matchCount": "1",
         "teardown": [
-            "$TC actions flush action mirred"
+            "$TC actions flush action police"
         ]
     },
     {
@@ -667,5 +715,29 @@
         "teardown": [
             "$TC actions flush action police"
         ]
+    },
+    {
+        "id": "b48b",
+        "name": "Add police action with exceed goto chain control action",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 1mbit burst 1k conform-exceed pass / goto chain 42",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 1Mbit burst 1Kb mtu 2Kb action pass/goto chain 42",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
new file mode 100644
index 000000000000..3aca33c00039
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
@@ -0,0 +1,588 @@
+[
+    {
+        "id": "9784",
+        "name": "Add valid sample action with mandatory arguments",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 10 group 1 index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 2",
+        "matchPattern": "action order [0-9]+: sample rate 1/10 group 1.*index 2 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "5c91",
+        "name": "Add valid sample action with mandatory arguments and continue control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 700 group 2 continue index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 2",
+        "matchPattern": "action order [0-9]+: sample rate 1/700 group 2 continue.*index 2 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "334b",
+        "name": "Add valid sample action with mandatory arguments and drop control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 10000 group 11 drop index 22",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/10000 group 11 drop.*index 22 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "da69",
+        "name": "Add valid sample action with mandatory arguments and reclassify control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 20000 group 72 reclassify index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/20000 group 72 reclassify.*index 100 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "13ce",
+        "name": "Add valid sample action with mandatory arguments and pipe control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 20 group 2 pipe index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/20 group 2 pipe.*index 100 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "1886",
+        "name": "Add valid sample action with mandatory arguments and jump control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 700 group 25 jump 4 index 200",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 200",
+        "matchPattern": "action order [0-9]+: sample rate 1/700 group 25 jump 4.*index 200 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "b6d4",
+        "name": "Add sample action with mandatory arguments and invalid control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 200000 group 52 foo index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/200000 group 52 foo.*index 1 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "a874",
+        "name": "Add invalid sample action without mandatory arguments",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample.*index 1 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "ac01",
+        "name": "Add invalid sample action without mandatory argument rate",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample group 10 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample.*group 10.*index 1 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "4203",
+        "name": "Add invalid sample action without mandatory argument group",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 100 index 10",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/100.*index 10 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "14a7",
+        "name": "Add invalid sample action without mandatory argument group",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 100 index 10",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/100.*index 10 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "8f2e",
+        "name": "Add valid sample action with trunc argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 trunc 1024 index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 trunc_size 1024 pipe.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "45f8",
+        "name": "Add sample action with maximum rate argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 4294967295 group 4 index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/4294967295 group 4 pipe.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "ad0c",
+        "name": "Add sample action with maximum trunc argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 16000 group 4 trunc 4294967295 index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/16000 group 4 trunc_size 4294967295 pipe.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "83a9",
+        "name": "Add sample action with maximum group argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 4294 group 4294967295 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 1",
+        "matchPattern": "action order [0-9]+: sample rate 1/4294 group 4294967295 pipe.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "ed27",
+        "name": "Add sample action with invalid rate argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 4294967296 group 4 index 10",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/4294967296 group 4 pipe.*index 10 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "2eae",
+        "name": "Add sample action with invalid group argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 4098 group 5294967299 continue index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 1",
+        "matchPattern": "action order [0-9]+: sample rate 1/4098 group 5294967299 continue.*index 1 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "6ff3",
+        "name": "Add sample action with invalid trunc size",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 trunc 112233445566 index 11",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 11",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 trunc_size 112233445566.*index 11 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "2b2a",
+        "name": "Add sample action with invalid index",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 5294967299",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 5294967299",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 5294967299 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "dee2",
+        "name": "Add sample action with maximum allowed index",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 4294967295",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 4294967295 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "560e",
+        "name": "Add sample action with cookie",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 45 cookie aabbccdd",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 45",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 45.*cookie aabbccdd",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "704a",
+        "name": "Replace existing sample action with new rate argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action sample rate 1024 group 4 index 4"
+        ],
+        "cmdUnderTest": "$TC actions replace action sample rate 2048 group 4 index 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/2048 group 4 pipe.*index 4",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "60eb",
+        "name": "Replace existing sample action with new group argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action sample rate 1024 group 4 index 4"
+        ],
+        "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 index 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 pipe.*index 4",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "2cce",
+        "name": "Replace existing sample action with new trunc argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action sample rate 1024 group 4 trunc 48 index 4"
+        ],
+        "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 trunc 64 index 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 trunc_size 64 pipe.*index 4",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "59d1",
+        "name": "Replace existing sample action with new control argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action sample rate 1024 group 4 reclassify index 4"
+        ],
+        "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 pipe index 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 pipe.*index 4",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
index 37ecc2716fee..5aaf593b914a 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
@@ -17,7 +17,7 @@
         "cmdUnderTest": "$TC actions add action skbedit mark 1",
         "expExitCode": "0",
         "verifyCmd": "$TC actions list action skbedit",
-        "matchPattern": "action order [0-9]*:  skbedit mark 1",
+        "matchPattern": "action order [0-9]*: skbedit  mark 1",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action skbedit"
@@ -65,7 +65,7 @@
         "cmdUnderTest": "$TC actions add action skbedit prio 99",
         "expExitCode": "0",
         "verifyCmd": "$TC actions list action skbedit",
-        "matchPattern": "action order [0-9]*:  skbedit priority :99",
+        "matchPattern": "action order [0-9]*: skbedit  priority :99",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action skbedit"
@@ -113,7 +113,7 @@
         "cmdUnderTest": "$TC actions add action skbedit queue_mapping 909",
         "expExitCode": "0",
         "verifyCmd": "$TC actions list action skbedit",
-        "matchPattern": "action order [0-9]*:  skbedit queue_mapping 909",
+        "matchPattern": "action order [0-9]*: skbedit queue_mapping 909",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action skbedit"
@@ -161,7 +161,7 @@
         "cmdUnderTest": "$TC actions add action skbedit ptype host",
         "expExitCode": "0",
         "verifyCmd": "$TC actions list action skbedit",
-        "matchPattern": "action order [0-9]*:  skbedit ptype host",
+        "matchPattern": "action order [0-9]*: skbedit  ptype host",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action skbedit"
@@ -185,7 +185,7 @@
         "cmdUnderTest": "$TC actions add action skbedit ptype otherhost",
         "expExitCode": "0",
         "verifyCmd": "$TC actions list action skbedit",
-        "matchPattern": "action order [0-9]*:  skbedit ptype otherhost",
+        "matchPattern": "action order [0-9]*: skbedit  ptype otherhost",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action skbedit"
@@ -233,7 +233,7 @@
         "cmdUnderTest": "$TC actions add action skbedit ptype host pipe index 11",
         "expExitCode": "0",
         "verifyCmd": "$TC actions get action skbedit index 11",
-        "matchPattern": "action order [0-9]*:  skbedit ptype host pipe.*index 11 ref",
+        "matchPattern": "action order [0-9]*: skbedit  ptype host pipe.*index 11 ref",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action skbedit"
@@ -257,7 +257,7 @@
         "cmdUnderTest": "$TC actions add action skbedit mark 56789 reclassify index 90",
         "expExitCode": "0",
         "verifyCmd": "$TC actions get action skbedit index 90",
-        "matchPattern": "action order [0-9]*:  skbedit mark 56789 reclassify.*index 90 ref",
+        "matchPattern": "action order [0-9]*: skbedit  mark 56789 reclassify.*index 90 ref",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action skbedit"
@@ -281,7 +281,7 @@
         "cmdUnderTest": "$TC actions add action skbedit queue_mapping 3 pass index 271",
         "expExitCode": "0",
         "verifyCmd": "$TC actions get action skbedit index 271",
-        "matchPattern": "action order [0-9]*:  skbedit queue_mapping 3 pass.*index 271 ref",
+        "matchPattern": "action order [0-9]*: skbedit queue_mapping 3 pass.*index 271 ref",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action skbedit"
@@ -305,7 +305,7 @@
         "cmdUnderTest": "$TC actions add action skbedit queue_mapping 3 drop index 271",
         "expExitCode": "0",
         "verifyCmd": "$TC actions get action skbedit index 271",
-        "matchPattern": "action order [0-9]*:  skbedit queue_mapping 3 drop.*index 271 ref",
+        "matchPattern": "action order [0-9]*: skbedit queue_mapping 3 drop.*index 271 ref",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action skbedit"
@@ -329,7 +329,7 @@
         "cmdUnderTest": "$TC actions add action skbedit priority 8 jump 9 index 2",
         "expExitCode": "0",
         "verifyCmd": "$TC actions get action skbedit index 2",
-        "matchPattern": "action order [0-9]*:  skbedit priority :8 jump 9.*index 2 ref",
+        "matchPattern": "action order [0-9]*: skbedit  priority :8 jump 9.*index 2 ref",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action skbedit"
@@ -353,7 +353,7 @@
         "cmdUnderTest": "$TC actions add action skbedit priority 16 continue index 32",
         "expExitCode": "0",
         "verifyCmd": "$TC actions get action skbedit index 32",
-        "matchPattern": "action order [0-9]*:  skbedit priority :16 continue.*index 32 ref",
+        "matchPattern": "action order [0-9]*: skbedit  priority :16 continue.*index 32 ref",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action skbedit"
@@ -377,7 +377,7 @@
         "cmdUnderTest": "$TC actions add action skbedit priority 16 continue index 32 cookie deadbeef",
         "expExitCode": "0",
         "verifyCmd": "$TC actions get action skbedit index 32",
-        "matchPattern": "action order [0-9]*:  skbedit priority :16 continue.*index 32 ref.*cookie deadbeef",
+        "matchPattern": "action order [0-9]*: skbedit  priority :16 continue.*index 32 ref.*cookie deadbeef",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action skbedit"
@@ -405,7 +405,7 @@
         "cmdUnderTest": "$TC actions list action skbedit",
         "expExitCode": "0",
         "verifyCmd": "$TC actions list action skbedit",
-        "matchPattern": "action order [0-9]*:  skbedit",
+        "matchPattern": "action order [0-9]*: skbedit",
         "matchCount": "4",
         "teardown": [
             "$TC actions flush action skbedit"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
new file mode 100644
index 000000000000..10b2d894e436
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
@@ -0,0 +1,917 @@
+[
+    {
+        "id": "2b11",
+        "name": "Add tunnel_key set action with mandatory parameters",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "dc6b",
+        "name": "Add tunnel_key set action with missing mandatory src_ip parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set dst_ip 20.20.20.2 id 100",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*dst_ip 20.20.20.2.*key_id 100",
+        "matchCount": "0",
+        "teardown": [
+	    [
+		"$TC actions flush action tunnel_key",
+		0,
+		1,
+		255
+	    ]
+        ]
+    },
+    {
+        "id": "7f25",
+        "name": "Add tunnel_key set action with missing mandatory dst_ip parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 id 100",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 10.10.10.1.*key_id 100",
+        "matchCount": "0",
+        "teardown": [
+	    [
+		"$TC actions flush action tunnel_key",
+		0,
+		1,
+		255
+	    ]
+        ]
+    },
+    {
+        "id": "ba4e",
+        "name": "Add tunnel_key set action with missing mandatory id parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2",
+        "matchCount": "0",
+        "teardown": [
+	    [
+		"$TC actions flush action tunnel_key",
+		0,
+		1,
+		255
+	    ]
+        ]
+    },
+    {
+        "id": "a5e0",
+        "name": "Add tunnel_key set action with invalid src_ip parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 300.168.100.1 dst_ip 192.168.200.1 id 7 index 1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 300.168.100.1.*dst_ip 192.168.200.1.*key_id 7.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+	    [
+		"$TC actions flush action tunnel_key",
+		0,
+		1,
+		255
+	    ]
+        ]
+    },
+    {
+        "id": "eaa8",
+        "name": "Add tunnel_key set action with invalid dst_ip parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.100.1 dst_ip 192.168.800.1 id 10 index 11",
+        "expExitCode": "1",
+        "verifyCmd": "$TC actions get action tunnel_key index 11",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 192.168.100.1.*dst_ip 192.168.800.1.*key_id 10.*index 11 ref",
+        "matchCount": "0",
+        "teardown": [
+	    [
+		"$TC actions flush action tunnel_key",
+		0,
+		1,
+		255
+	    ]
+        ]
+    },
+    {
+        "id": "3b09",
+        "name": "Add tunnel_key set action with invalid id parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 112233445566778899 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 112233445566778899.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+	    [
+		"$TC actions flush action tunnel_key",
+		0,
+		1,
+		255
+	    ]
+        ]
+    },
+    {
+        "id": "9625",
+        "name": "Add tunnel_key set action with invalid dst_port parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 11 dst_port 998877 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 11.*dst_port 998877.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+	    [
+		"$TC actions flush action tunnel_key",
+		0,
+		1,
+		255
+	    ]
+        ]
+    },
+    {
+        "id": "05af",
+        "name": "Add tunnel_key set action with optional dst_port parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.100.1 dst_ip 192.168.200.1 id 789 dst_port 4000 index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 10",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 192.168.100.1.*dst_ip 192.168.200.1.*key_id 789.*dst_port 4000.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "da80",
+        "name": "Add tunnel_key set action with index at 32-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 11 index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 4294967295",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*id 11.*index 4294967295 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "d407",
+        "name": "Add tunnel_key set action with index exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 11 index 4294967295678",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 4294967295678",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*index 4294967295678 ref",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ]
+    },
+    {
+        "id": "5cba",
+        "name": "Add tunnel_key set action with id value at 32-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 4294967295 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 4294967295.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "e84a",
+        "name": "Add tunnel_key set action with id value exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42949672955 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 4294967295",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42949672955.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+	]
+    },
+    {
+        "id": "9c19",
+        "name": "Add tunnel_key set action with dst_port value at 16-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 429 dst_port 65535 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 429.*dst_port 65535.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "3bd9",
+        "name": "Add tunnel_key set action with dst_port value exceeding 16-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 429 dst_port 65535789 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 429.*dst_port 65535789.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+	]
+    },
+    {
+        "id": "68e2",
+        "name": "Add tunnel_key unset action",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key unset index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*unset.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "6192",
+        "name": "Add tunnel_key unset continue action",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key unset continue index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*unset continue.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "061d",
+        "name": "Add tunnel_key set continue action with cookie",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.10.1 dst_ip 192.168.20.2 id 123 continue index 1 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 192.168.10.1.*dst_ip 192.168.20.2.*key_id 123.*csum continue.*index 1.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "8acb",
+        "name": "Add tunnel_key set continue action with invalid cookie",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.10.1 dst_ip 192.168.20.2 id 123 continue index 1 cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 192.168.10.1.*dst_ip 192.168.20.2.*key_id 123.*csum continue.*index 1.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+	]
+    },
+    {
+        "id": "a07e",
+        "name": "Add tunnel_key action with no set/unset command specified",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ]
+    },
+    {
+        "id": "b227",
+        "name": "Add tunnel_key action with csum option",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1 csum index 99",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 99",
+        "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1.*csum pipe.*index 99",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "58a7",
+        "name": "Add tunnel_key action with nocsum option",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7823 nocsum index 234",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 234",
+        "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7823.*nocsum pipe.*index 234",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "2575",
+        "name": "Add tunnel_key action with not-supported parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7 foobar 999 index 4",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 4",
+        "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7.*foobar 999.*index 4",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ]
+    },
+    {
+        "id": "7a88",
+        "name": "Add tunnel_key action with cookie parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7 index 4 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 4",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7.*dst_port 0.*csum pipe.*index 4 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "4f20",
+        "name": "Add tunnel_key action with a single geneve option parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "e33d",
+        "name": "Add tunnel_key action with multiple geneve options parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022,0408:42:0040007611223344,0111:02:1020304011223344 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022,0408:42:0040007611223344,0111:02:1020304011223344.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "0778",
+        "name": "Add tunnel_key action with invalid class geneve option parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 824212:80:00880022 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 824212:80:00880022.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "4ae8",
+        "name": "Add tunnel_key action with invalid type geneve option parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:4224:00880022 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:4224:00880022.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "4039",
+        "name": "Add tunnel_key action with short data length geneve option parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:4288 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:4288.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "26a6",
+        "name": "Add tunnel_key action with non-multiple of 4 data length geneve option parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:4288428822 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:4288428822.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "f44d",
+        "name": "Add tunnel_key action with incomplete geneve options parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022,0408:42: index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022,0408:42:.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "7afc",
+        "name": "Replace tunnel_key set action with all parameters",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 csum id 1 index 1"
+        ],
+        "cmdUnderTest": "$TC actions replace action tunnel_key set src_ip 11.11.11.1 dst_ip 21.21.21.2 dst_port 3129 nocsum id 11 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 11.11.11.1.*dst_ip 21.21.21.2.*key_id 11.*dst_port 3129.*nocsum pipe.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "364d",
+        "name": "Replace tunnel_key set action with all parameters and cookie",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 index 1 cookie aabbccddeeff112233445566778800a"
+        ],
+        "cmdUnderTest": "$TC actions replace action tunnel_key set src_ip 11.11.11.1 dst_ip 21.21.21.2 dst_port 3129 id 11 csum reclassify index 1 cookie a1b1c1d1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 11.11.11.1.*dst_ip 21.21.21.2.*key_id 11.*dst_port 3129.*csum reclassify.*index 1.*cookie a1b1c1d1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "937c",
+        "name": "Fetch all existing tunnel_key actions",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 pipe index 1",
+            "$TC actions add action tunnel_key set src_ip 11.10.10.1 dst_ip 21.20.20.2 dst_port 3129 csum id 2 jump 10 index 2",
+            "$TC actions add action tunnel_key set src_ip 12.10.10.1 dst_ip 22.20.20.2 dst_port 3130 csum id 3 pass index 3",
+            "$TC actions add action tunnel_key set src_ip 13.10.10.1 dst_ip 23.20.20.2 dst_port 3131 nocsum id 4 continue index 4"
+        ],
+        "cmdUnderTest": "$TC actions list action tunnel_key",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1.*dst_port 3128.*nocsum pipe.*index 1.*set.*src_ip 11.10.10.1.*dst_ip 21.20.20.2.*key_id 2.*dst_port 3129.*csum jump 10.*index 2.*set.*src_ip 12.10.10.1.*dst_ip 22.20.20.2.*key_id 3.*dst_port 3130.*csum pass.*index 3.*set.*src_ip 13.10.10.1.*dst_ip 23.20.20.2.*key_id 4.*dst_port 3131.*nocsum continue.*index 4",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "6783",
+        "name": "Flush all existing tunnel_key actions",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 pipe index 1",
+            "$TC actions add action tunnel_key set src_ip 11.10.10.1 dst_ip 21.20.20.2 dst_port 3129 csum id 2 reclassify index 2",
+            "$TC actions add action tunnel_key set src_ip 12.10.10.1 dst_ip 22.20.20.2 dst_port 3130 csum id 3 pass index 3",
+            "$TC actions add action tunnel_key set src_ip 13.10.10.1 dst_ip 23.20.20.2 dst_port 3131 nocsum id 4 continue index 4"
+        ],
+        "cmdUnderTest": "$TC actions flush action tunnel_key",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+:.*",
+        "matchCount": "0",
+        "teardown": [
+	    "$TC actions flush action tunnel_key"
+	]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
index 4510ddfa6e54..69ea09eefffc 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
@@ -1,7 +1,7 @@
 [
     {
         "id": "6f5a",
-        "name": "Add vlan pop action",
+        "name": "Add vlan pop action with pipe opcode",
         "category": [
             "actions",
             "vlan"
@@ -14,18 +14,18 @@
                 255
             ]
         ],
-        "cmdUnderTest": "$TC actions add action vlan pop index 8",
+        "cmdUnderTest": "$TC actions add action vlan pop pipe index 8",
         "expExitCode": "0",
         "verifyCmd": "$TC actions list action vlan",
-        "matchPattern": "action order [0-9]+: vlan.*pop.*index 8 ref",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*pipe.*index 8 ref",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action vlan"
         ]
     },
     {
-        "id": "ee6f",
-        "name": "Add vlan pop action with large index",
+        "id": "df35",
+        "name": "Add vlan pop action with pass opcode",
         "category": [
             "actions",
             "vlan"
@@ -38,10 +38,82 @@
                 255
             ]
         ],
-        "cmdUnderTest": "$TC actions add action vlan pop index 4294967295",
+        "cmdUnderTest": "$TC actions add action vlan pop pass index 8",
         "expExitCode": "0",
-        "verifyCmd": "$TC actions list action vlan",
-        "matchPattern": "action order [0-9]+: vlan.*pop.*index 4294967295 ref",
+        "verifyCmd": "$TC actions get action vlan index 8",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*pass.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "b0d4",
+        "name": "Add vlan pop action with drop opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop drop index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 8",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*drop.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "95ee",
+        "name": "Add vlan pop action with reclassify opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop reclassify index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 8",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*reclassify.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "0283",
+        "name": "Add vlan pop action with continue opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop continue index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 8",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*continue.*index 8 ref",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action vlan"
@@ -96,6 +168,74 @@
         ]
     },
     {
+        "id": "a178",
+        "name": "Add vlan pop action with invalid opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop foo index 8",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*foo.*index 8 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "ee6f",
+        "name": "Add vlan pop action with index at 32-bit maximum",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*index 4294967295 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "0dfa",
+        "name": "Add vlan pop action with index exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop reclassify index 429496729599",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action vlan index 429496729599",
+        "matchPattern": "action order [0-9]+: vlan.*pop.reclassify.*index 429496729599",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
         "id": "2b91",
         "name": "Add vlan invalid action",
         "category": [
@@ -115,13 +255,11 @@
         "verifyCmd": "$TC actions list action vlan",
         "matchPattern": "action order [0-9]+: vlan.*bad_mode",
         "matchCount": "0",
-        "teardown": [
-            "$TC actions flush action vlan"
-        ]
+        "teardown": []
     },
     {
         "id": "57fc",
-        "name": "Add vlan action with invalid protocol type",
+        "name": "Add vlan push action with invalid protocol type",
         "category": [
             "actions",
             "vlan"
@@ -139,9 +277,7 @@
         "verifyCmd": "$TC actions list action vlan",
         "matchPattern": "action order [0-9]+: vlan.*push",
         "matchCount": "0",
-        "teardown": [
-            "$TC actions flush action vlan"
-        ]
+        "teardown": []
     },
     {
         "id": "3989",
@@ -216,6 +352,30 @@
         ]
     },
     {
+        "id": "1f4b",
+        "name": "Add vlan push action with maximum 12-bit vlan ID",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 4094 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 1",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4094.*protocol 802.1Q.*priority 0.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
         "id": "1f7b",
         "name": "Add vlan push action with invalid vlan ID",
         "category": [
@@ -240,6 +400,30 @@
         ]
     },
     {
+        "id": "fe40",
+        "name": "Add vlan push action with maximum 3-bit IEEE 802.1p priority",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 4 priority 7 reclassify index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 1",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4.*protocol 802.1Q.*priority 7.*reclassify.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
         "id": "5d02",
         "name": "Add vlan push action with invalid IEEE 802.1p priority",
         "category": [
@@ -259,9 +443,7 @@
         "verifyCmd": "$TC actions list action vlan",
         "matchPattern": "action order [0-9]+: vlan.*push id 5.*index 1 ref",
         "matchCount": "0",
-        "teardown": [
-            "$TC actions flush action vlan"
-        ]
+        "teardown": []
     },
     {
         "id": "6812",
@@ -312,6 +494,106 @@
         ]
     },
     {
+        "id": "3deb",
+        "name": "Replace existing vlan push action with new ID",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan push id 500 pipe index 12"
+        ],
+        "cmdUnderTest": "$TC actions replace action vlan push id 700 pipe index 12",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 12",
+        "matchPattern": "action order [0-9]+: vlan.*push id 700 protocol 802.1Q priority 0 pipe.*index 12 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "9e76",
+        "name": "Replace existing vlan push action with new protocol",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan push id 1 protocol 802.1Q pipe index 1"
+        ],
+        "cmdUnderTest": "$TC actions replace action vlan push id 1 protocol 802.1ad pipe index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 1",
+        "matchPattern": "action order [0-9]+: vlan.*push id 1 protocol 802.1ad priority 0 pipe.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "ede4",
+        "name": "Replace existing vlan push action with new priority",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan push id 1 protocol 802.1Q priority 3 reclassify index 1"
+        ],
+        "cmdUnderTest": "$TC actions replace action vlan push id 1 priority 4 reclassify index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 1",
+        "matchPattern": "action order [0-9]+: vlan.*push id 1 protocol 802.1Q priority 4 reclassify.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "d413",
+        "name": "Replace existing vlan pop action with new cookie",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan pop continue index 1 cookie 22334455"
+        ],
+        "cmdUnderTest": "$TC actions replace action vlan pop continue index 1 cookie a1b1c2d1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 1",
+        "matchPattern": "action order [0-9]+: vlan.*pop continue.*index 1 ref.*cookie a1b1c2d1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
         "id": "83a4",
         "name": "Delete vlan pop action",
         "category": [
@@ -385,7 +667,7 @@
     },
     {
         "id": "1d78",
-        "name": "Add vlan action with cookie",
+        "name": "Add vlan push action with cookie",
         "category": [
             "actions",
             "vlan"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
new file mode 100644
index 000000000000..3b97cfd7e0f8
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
@@ -0,0 +1,1049 @@
+[
+    {
+        "id": "901f",
+        "name": "Add fw filter with prio at 32-bit maxixum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 65535 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 65535 protocol all fw",
+        "matchPattern": "pref 65535 fw.*handle 0x1.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "51e2",
+        "name": "Add fw filter with prio exceeding 32-bit maxixum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 65536 fw action ok",
+        "expExitCode": "255",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 65536 protocol all fw",
+        "matchPattern": "pref 65536 fw.*handle 0x1.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "d987",
+        "name": "Add fw filter with action ok",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "affe",
+        "name": "Add fw filter with action continue",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action continue",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action continue",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "28bc",
+        "name": "Add fw filter with action pipe",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "8da2",
+        "name": "Add fw filter with action drop",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol all prio 1 fw",
+        "matchPattern": "handle 0x1.*gact action drop",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "9436",
+        "name": "Add fw filter with action reclassify",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action reclassify",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action reclassify",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "95bb",
+        "name": "Add fw filter with action jump 10",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action jump 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action jump 10",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "3d74",
+        "name": "Add fw filter with action goto chain 5",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action goto chain 5",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action goto chain 5",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "eb8f",
+        "name": "Add fw filter with invalid action",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action pump",
+        "expExitCode": "255",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action pump",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "6a79",
+        "name": "Add fw filter with missing mandatory action",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "filter protocol all pref [0-9]+ fw.*handle 0x1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "8298",
+        "name": "Add fw filter with cookie",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action pipe cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 2 protocol all fw",
+        "matchPattern": "pref 2 fw.*handle 0x1.*gact action pipe.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "a88c",
+        "name": "Add fw filter with invalid cookie",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action continue cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+        "expExitCode": "255",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 2 protocol all fw",
+        "matchPattern": "pref 2 fw.*handle 0x1.*gact action continue.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "10f6",
+        "name": "Add fw filter with handle in hex",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0xa1b2ff prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0xa1b2ff prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xa1b2ff.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "9d51",
+        "name": "Add fw filter with handle at 32-bit maximum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 4294967295 prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4294967295 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xffffffff.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "d939",
+        "name": "Add fw filter with handle exceeding 32-bit maximum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 4294967296 prio 1 fw action ok",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4294967296 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0x.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "658c",
+        "name": "Add fw filter with mask in hex",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 10/0xa1b2f prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xa/0xa1b2f",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "86be",
+        "name": "Add fw filter with mask at 32-bit maximum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 10/4294967295 prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xa[^/]",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "e635",
+        "name": "Add fw filter with mask exceeding 32-bit maximum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 10/4294967296 prio 1 fw action ok",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xa",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "6cab",
+        "name": "Add fw filter with handle/mask in hex",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0xa1b2cdff/0x1a2bffdc prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0xa1b2cdff prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xa1b2cdff/0x1a2bffdc",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "8700",
+        "name": "Add fw filter with handle/mask at 32-bit maximum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 4294967295/4294967295 prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0xffffffff prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xffffffff[^/]",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "7d62",
+        "name": "Add fw filter with handle/mask exceeding 32-bit maximum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 4294967296/4294967296 prio 1 fw action ok",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "7b69",
+        "name": "Add fw filter with missing mandatory handle",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 1 fw action ok",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "filter protocol all.*fw.*handle.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "d68b",
+        "name": "Add fw filter with invalid parent",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent aa11b1b2: handle 1 prio 1 fw action ok",
+        "expExitCode": "255",
+        "verifyCmd": "$TC filter dev $DEV1 parent aa11b1b2: handle 1 prio 1 protocol all fw",
+        "matchPattern": "filter protocol all pref 1 fw.*handle 0x1.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "66e0",
+        "name": "Add fw filter with missing mandatory parent id",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 handle 1 prio 1 fw action ok",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "pref [0-9]+ fw.*handle 0x1.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "0ff3",
+        "name": "Add fw filter with classid",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw classid 3 action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0x1 classid :3.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "9849",
+        "name": "Add fw filter with classid at root",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw classid ffff:ffff action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "pref 1 fw.*handle 0x1 classid root.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "b7ff",
+        "name": "Add fw filter with classid - keeps last 8 (hex) digits",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw classid 98765fedcb action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0x1 classid 765f:edcb.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2b18",
+        "name": "Add fw filter with invalid classid",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw classid 6789defg action ok",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0x1 classid 6789:defg.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "fade",
+        "name": "Add fw filter with flowid",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 10 prio 1 fw flowid 1:10 action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 1 fw.*handle 0xa classid 1:10.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "33af",
+        "name": "Add fw filter with flowid then classid (same arg, takes second)",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 11 prio 1 fw flowid 10 classid 4 action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 11 prio 1 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 1 fw.*handle 0xb classid :4.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "8a8c",
+        "name": "Add fw filter with classid then flowid (same arg, takes second)",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 11 prio 1 fw classid 4 flowid 10 action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 11 prio 1 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 1 fw.*handle 0xb classid :10.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "b50d",
+        "name": "Add fw filter with handle val/mask and flowid 10:1000",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 3 handle 10/0xff fw flowid 10:1000 action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 3 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 3 fw.*handle 0xa/0xff classid 10:1000.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "7207",
+        "name": "Add fw filter with protocol ip",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 handle 3 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 3 prio 1 protocol ip fw",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 fw.*handle 0x3.*gact action pass.*index [0-9]+ ref [0-9]+ bind [0-9]+",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "306d",
+        "name": "Add fw filter with protocol ipv6",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ipv6 prio 2 handle 4 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 2 protocol ipv6 fw",
+        "matchPattern": "filter parent ffff: protocol ipv6 pref 2 fw.*handle 0x4.*gact action pass.*index [0-9]+ ref [0-9]+ bind [0-9]+",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "9a78",
+        "name": "Add fw filter with protocol arp",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol arp prio 5 handle 7 fw action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 7 prio 5 protocol arp fw",
+        "matchPattern": "filter parent ffff: protocol arp pref 5 fw.*handle 0x7.*gact action drop.*index [0-9]+ ref [0-9]+ bind [0-9]+",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "1821",
+        "name": "Add fw filter with protocol 802_3",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol 802_3 handle 1 prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol 802_3 fw",
+        "matchPattern": "filter parent ffff: protocol 802_3 pref 1 fw.*handle 0x1.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2260",
+        "name": "Add fw filter with invalid protocol",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol igmp handle 1 prio 1 fw action ok",
+        "expExitCode": "255",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol igmp fw",
+        "matchPattern": "filter parent ffff: protocol igmp pref 1 fw.*handle 0x1.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "09d7",
+        "name": "Add fw filters protocol 802_3 and ip with conflicting priorities",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: protocol 802_3 prio 3 handle 7 fw action ok"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 3 handle 8 fw action ok",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 8 prio 3 protocol ip fw",
+        "matchPattern": "filter parent ffff: protocol ip pref 3 fw.*handle 0x8",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "6973",
+        "name": "Add fw filters with same index, same action",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: prio 6 handle 2 fw action continue index 5"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 8 handle 4 fw action continue index 5",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 8 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 8 fw.*handle 0x4.*gact action continue.*index 5 ref 2 bind 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "fc06",
+        "name": "Add fw filters with action police",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 3 handle 4 fw action police rate 1kbit burst 10k index 5",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 3 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 3 fw.*handle 0x4.*police 0x5 rate 1Kbit burst 10Kb mtu 2Kb action reclassify overhead 0b.*ref 1 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "aac7",
+        "name": "Add fw filters with action police linklayer atm",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 3 handle 4 fw action police rate 2mbit burst 200k linklayer atm index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 3 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 3 fw.*handle 0x4.*police 0x8 rate 2Mbit burst 200Kb mtu 2Kb action reclassify overhead 0b linklayer atm.*ref 1 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "5339",
+        "name": "Del entire fw filter",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
+            "$TC filter add dev $DEV1 parent ffff: handle 3 prio 9 fw action pass"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff:",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "protocol all pref.*handle.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "0e99",
+        "name": "Del single fw filter x1",
+        "__comment__": "First of two tests to check that one filter is there and the other isn't",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
+            "$TC filter add dev $DEV1 parent ffff: handle 3 prio 9 fw action pass"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 3 prio 9 fw action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "protocol all pref 7.*handle 0x5.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "f54c",
+        "name": "Del single fw filter x2",
+        "__comment__": "Second of two tests to check that one filter is there and the other isn't",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
+            "$TC filter add dev $DEV1 parent ffff: handle 3 prio 9 fw action pass"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 3 prio 9 fw action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "protocol all pref 9.*handle 0x3.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "ba94",
+        "name": "Del fw filter by prio",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 prio 4 fw action ok",
+            "$TC filter add dev $DEV1 parent ffff: handle 2 prio 4 fw action ok"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: prio 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "pref 4 fw.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4acb",
+        "name": "Del fw filter by chain",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 chain 13 fw action pipe",
+            "$TC filter add dev $DEV1 parent ffff: handle 3 prio 5 chain 13 fw action pipe"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: chain 13",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "fw chain 13 handle.*gact action pipe",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "3424",
+        "name": "Del fw filter by action (invalid)",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 2 prio 4 fw action drop"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: fw action drop",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 2 prio 4 protocol all fw",
+        "matchPattern": "handle 0x2.*gact action drop",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "da89",
+        "name": "Del fw filter by handle (invalid)",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 3 prio 4 fw action continue"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 3 fw",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 3 prio 4 protocol all fw",
+        "matchPattern": "handle 0x3.*gact action continue",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4d95",
+        "name": "Del fw filter by protocol (invalid)",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 protocol arp fw action pipe"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: protocol arp fw",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 2 protocol arp fw",
+        "matchPattern": "filter parent ffff: protocol arp.*handle 0x4.*gact action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4736",
+        "name": "Del fw filter by flowid (invalid)",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 fw action pipe flowid 45"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: fw flowid 45",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "handle 0x4.*gact action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "3dcb",
+        "name": "Replace fw filter action",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok"
+        ],
+        "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 prio 2 fw action pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "pref 2 fw.*handle 0x1.*gact action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "eb4d",
+        "name": "Replace fw filter classid",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok"
+        ],
+        "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 prio 2 fw action pipe classid 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "pref 2 fw.*handle 0x1 classid :2.*gact action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "67ec",
+        "name": "Replace fw filter index",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok index 3"
+        ],
+        "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 prio 2 fw action ok index 16",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "pref 2 fw.*handle 0x1.*gact action pass.*index 16",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index 5fa02d86b35f..99a5ffca1088 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -12,8 +12,8 @@
         "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 u32 match ip src 127.0.0.1/32 flowid 1:1 action ok",
         "expExitCode": "0",
         "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
-        "matchPattern": "match 7f000002/ffffffff at 12",
-        "matchCount": "0",
+        "matchPattern": "match 7f000001/ffffffff at 12",
+        "matchCount": "1",
         "teardown": [
             "$TC qdisc del dev $DEV1 ingress"
         ]
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index 87a04a8a5945..7607ba3e3cbe 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -134,9 +134,9 @@ def exec_cmd(args, pm, stage, command):
     (rawout, serr) = proc.communicate()
 
     if proc.returncode != 0 and len(serr) > 0:
-        foutput = serr.decode("utf-8")
+        foutput = serr.decode("utf-8", errors="ignore")
     else:
-        foutput = rawout.decode("utf-8")
+        foutput = rawout.decode("utf-8", errors="ignore")
 
     proc.stdout.close()
     proc.stderr.close()
@@ -169,6 +169,8 @@ def prepare_env(args, pm, stage, prefix, cmdlist, output = None):
                   file=sys.stderr)
             print("\n{} *** Error message: \"{}\"".format(prefix, foutput),
                   file=sys.stderr)
+            print("returncode {}; expected {}".format(proc.returncode,
+                                                      exit_codes))
             print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr)
             print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr)
             print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr)
@@ -195,12 +197,18 @@ def run_one_test(pm, args, index, tidx):
         print('-----> execute stage')
     pm.call_pre_execute()
     (p, procout) = exec_cmd(args, pm, 'execute', tidx["cmdUnderTest"])
-    exit_code = p.returncode
+    if p:
+        exit_code = p.returncode
+    else:
+        exit_code = None
+
     pm.call_post_execute()
 
-    if (exit_code != int(tidx["expExitCode"])):
+    if (exit_code is None or exit_code != int(tidx["expExitCode"])):
         result = False
-        print("exit:", exit_code, int(tidx["expExitCode"]))
+        print("exit: {!r}".format(exit_code))
+        print("exit: {}".format(int(tidx["expExitCode"])))
+        #print("exit: {!r} {}".format(exit_code, int(tidx["expExitCode"])))
         print(procout)
     else:
         if args.verbose > 0:
diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py
index a023d0d62b25..d651bc1501bd 100644
--- a/tools/testing/selftests/tc-testing/tdc_config.py
+++ b/tools/testing/selftests/tc-testing/tdc_config.py
@@ -16,7 +16,9 @@ NAMES = {
           'DEV2': '',
           'BATCH_FILE': './batch.txt',
           # Name of the namespace to use
-          'NS': 'tcut'
+          'NS': 'tcut',
+          # Directory containing eBPF test programs
+          'EBPFDIR': './bpf'
         }
 
 
diff --git a/tools/testing/selftests/timers/.gitignore b/tools/testing/selftests/timers/.gitignore
index 2c8ac8416299..32a9eadb2d4e 100644
--- a/tools/testing/selftests/timers/.gitignore
+++ b/tools/testing/selftests/timers/.gitignore
@@ -9,7 +9,7 @@ nanosleep
 nsleep-lat
 posix_timers
 raw_skew
-rtctest
+rtcpie
 set-2038
 set-tai
 set-timer-lat
@@ -19,4 +19,3 @@ valid-adjtimex
 adjtick
 set-tz
 freq-step
-rtctest_setdate
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
index 3496680981f2..c02683cfb6c9 100644
--- a/tools/testing/selftests/timers/Makefile
+++ b/tools/testing/selftests/timers/Makefile
@@ -5,13 +5,13 @@ LDFLAGS += -lrt -lpthread -lm
 # these are all "safe" tests that don't modify
 # system time or require escalated privileges
 TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \
-	     inconsistency-check raw_skew threadtest rtctest
+	     inconsistency-check raw_skew threadtest rtcpie
 
 DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \
 		      skew_consistency clocksource-switch freq-step leap-a-day \
 		      leapcrash set-tai set-2038 set-tz
 
-TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS) rtctest_setdate
+TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS)
 
 
 include ../lib.mk
diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c
index ca6cd146aafe..dcf73c5dab6e 100644
--- a/tools/testing/selftests/timers/raw_skew.c
+++ b/tools/testing/selftests/timers/raw_skew.c
@@ -134,6 +134,11 @@ int main(int argv, char **argc)
 	printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000)));
 
 	if (llabs(eppm - ppm) > 1000) {
+		if (tx1.offset || tx2.offset ||
+		    tx1.freq != tx2.freq || tx1.tick != tx2.tick) {
+			printf("	[SKIP]\n");
+			return ksft_exit_skip("The clock was adjusted externally. Shutdown NTPd or other time sync daemons\n");
+		}
 		printf("	[FAILED]\n");
 		return ksft_exit_fail();
 	}
diff --git a/tools/testing/selftests/timers/rtcpie.c b/tools/testing/selftests/timers/rtcpie.c
new file mode 100644
index 000000000000..47b5bad1b393
--- /dev/null
+++ b/tools/testing/selftests/timers/rtcpie.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Real Time Clock Periodic Interrupt test program
+ *
+ * Since commit 6610e0893b8bc ("RTC: Rework RTC code to use timerqueue for
+ * events"), PIE are completely handled using hrtimers, without actually using
+ * any underlying hardware RTC.
+ *
+ */
+
+#include <stdio.h>
+#include <linux/rtc.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+
+/*
+ * This expects the new RTC class driver framework, working with
+ * clocks that will often not be clones of what the PC-AT had.
+ * Use the command line to specify another RTC if you need one.
+ */
+static const char default_rtc[] = "/dev/rtc0";
+
+int main(int argc, char **argv)
+{
+	int i, fd, retval, irqcount = 0;
+	unsigned long tmp, data, old_pie_rate;
+	const char *rtc = default_rtc;
+	struct timeval start, end, diff;
+
+	switch (argc) {
+	case 2:
+		rtc = argv[1];
+		/* FALLTHROUGH */
+	case 1:
+		break;
+	default:
+		fprintf(stderr, "usage:  rtctest [rtcdev] [d]\n");
+		return 1;
+	}
+
+	fd = open(rtc, O_RDONLY);
+
+	if (fd ==  -1) {
+		perror(rtc);
+		exit(errno);
+	}
+
+	/* Read periodic IRQ rate */
+	retval = ioctl(fd, RTC_IRQP_READ, &old_pie_rate);
+	if (retval == -1) {
+		/* not all RTCs support periodic IRQs */
+		if (errno == EINVAL) {
+			fprintf(stderr, "\nNo periodic IRQ support\n");
+			goto done;
+		}
+		perror("RTC_IRQP_READ ioctl");
+		exit(errno);
+	}
+	fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", old_pie_rate);
+
+	fprintf(stderr, "Counting 20 interrupts at:");
+	fflush(stderr);
+
+	/* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */
+	for (tmp=2; tmp<=64; tmp*=2) {
+
+		retval = ioctl(fd, RTC_IRQP_SET, tmp);
+		if (retval == -1) {
+			/* not all RTCs can change their periodic IRQ rate */
+			if (errno == EINVAL) {
+				fprintf(stderr,
+					"\n...Periodic IRQ rate is fixed\n");
+				goto done;
+			}
+			perror("RTC_IRQP_SET ioctl");
+			exit(errno);
+		}
+
+		fprintf(stderr, "\n%ldHz:\t", tmp);
+		fflush(stderr);
+
+		/* Enable periodic interrupts */
+		retval = ioctl(fd, RTC_PIE_ON, 0);
+		if (retval == -1) {
+			perror("RTC_PIE_ON ioctl");
+			exit(errno);
+		}
+
+		for (i=1; i<21; i++) {
+			gettimeofday(&start, NULL);
+			/* This blocks */
+			retval = read(fd, &data, sizeof(unsigned long));
+			if (retval == -1) {
+				perror("read");
+				exit(errno);
+			}
+			gettimeofday(&end, NULL);
+			timersub(&end, &start, &diff);
+			if (diff.tv_sec > 0 ||
+			    diff.tv_usec > ((1000000L / tmp) * 1.10)) {
+				fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n",
+				       diff.tv_sec, diff.tv_usec,
+				       (1000000L / tmp));
+				fflush(stdout);
+				exit(-1);
+			}
+
+			fprintf(stderr, " %d",i);
+			fflush(stderr);
+			irqcount++;
+		}
+
+		/* Disable periodic interrupts */
+		retval = ioctl(fd, RTC_PIE_OFF, 0);
+		if (retval == -1) {
+			perror("RTC_PIE_OFF ioctl");
+			exit(errno);
+		}
+	}
+
+done:
+	ioctl(fd, RTC_IRQP_SET, old_pie_rate);
+
+	fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n");
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/timers/rtctest.c b/tools/testing/selftests/timers/rtctest.c
deleted file mode 100644
index 411eff625e66..000000000000
--- a/tools/testing/selftests/timers/rtctest.c
+++ /dev/null
@@ -1,403 +0,0 @@
-/*
- *      Real Time Clock Driver Test/Example Program
- *
- *      Compile with:
- *		     gcc -s -Wall -Wstrict-prototypes rtctest.c -o rtctest
- *
- *      Copyright (C) 1996, Paul Gortmaker.
- *
- *      Released under the GNU General Public License, version 2,
- *      included herein by reference.
- *
- */
-
-#include <stdio.h>
-#include <linux/rtc.h>
-#include <sys/ioctl.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <errno.h>
-
-#ifndef ARRAY_SIZE
-# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-
-/*
- * This expects the new RTC class driver framework, working with
- * clocks that will often not be clones of what the PC-AT had.
- * Use the command line to specify another RTC if you need one.
- */
-static const char default_rtc[] = "/dev/rtc0";
-
-static struct rtc_time cutoff_dates[] = {
-	{
-		.tm_year = 70, /* 1970 -1900 */
-		.tm_mday = 1,
-	},
-	/* signed time_t 19/01/2038 3:14:08 */
-	{
-		.tm_year = 138,
-		.tm_mday = 19,
-	},
-	{
-		.tm_year = 138,
-		.tm_mday = 20,
-	},
-	{
-		.tm_year = 199, /* 2099 -1900 */
-		.tm_mday = 1,
-	},
-	{
-		.tm_year = 200, /* 2100 -1900 */
-		.tm_mday = 1,
-	},
-	/* unsigned time_t 07/02/2106 7:28:15*/
-	{
-		.tm_year = 205,
-		.tm_mon = 1,
-		.tm_mday = 7,
-	},
-	{
-		.tm_year = 206,
-		.tm_mon = 1,
-		.tm_mday = 8,
-	},
-	/* signed time on 64bit in nanoseconds 12/04/2262 01:47:16*/
-	{
-		.tm_year = 362,
-		.tm_mon = 3,
-		.tm_mday = 12,
-	},
-	{
-		.tm_year = 362, /* 2262 -1900 */
-		.tm_mon = 3,
-		.tm_mday = 13,
-	},
-};
-
-static int compare_dates(struct rtc_time *a, struct rtc_time *b)
-{
-	if (a->tm_year != b->tm_year ||
-	    a->tm_mon != b->tm_mon ||
-	    a->tm_mday != b->tm_mday ||
-	    a->tm_hour != b->tm_hour ||
-	    a->tm_min != b->tm_min ||
-	    ((b->tm_sec - a->tm_sec) > 1))
-		return 1;
-
-	return 0;
-}
-
-int main(int argc, char **argv)
-{
-	int i, fd, retval, irqcount = 0, dangerous = 0;
-	unsigned long tmp, data;
-	struct rtc_time rtc_tm;
-	const char *rtc = default_rtc;
-	struct timeval start, end, diff;
-
-	switch (argc) {
-	case 3:
-		if (*argv[2] == 'd')
-			dangerous = 1;
-	case 2:
-		rtc = argv[1];
-		/* FALLTHROUGH */
-	case 1:
-		break;
-	default:
-		fprintf(stderr, "usage:  rtctest [rtcdev] [d]\n");
-		return 1;
-	}
-
-	fd = open(rtc, O_RDONLY);
-
-	if (fd ==  -1) {
-		perror(rtc);
-		exit(errno);
-	}
-
-	fprintf(stderr, "\n\t\t\tRTC Driver Test Example.\n\n");
-
-	/* Turn on update interrupts (one per second) */
-	retval = ioctl(fd, RTC_UIE_ON, 0);
-	if (retval == -1) {
-		if (errno == EINVAL) {
-			fprintf(stderr,
-				"\n...Update IRQs not supported.\n");
-			goto test_READ;
-		}
-		perror("RTC_UIE_ON ioctl");
-		exit(errno);
-	}
-
-	fprintf(stderr, "Counting 5 update (1/sec) interrupts from reading %s:",
-			rtc);
-	fflush(stderr);
-	for (i=1; i<6; i++) {
-		/* This read will block */
-		retval = read(fd, &data, sizeof(unsigned long));
-		if (retval == -1) {
-			perror("read");
-			exit(errno);
-		}
-		fprintf(stderr, " %d",i);
-		fflush(stderr);
-		irqcount++;
-	}
-
-	fprintf(stderr, "\nAgain, from using select(2) on /dev/rtc:");
-	fflush(stderr);
-	for (i=1; i<6; i++) {
-		struct timeval tv = {5, 0};     /* 5 second timeout on select */
-		fd_set readfds;
-
-		FD_ZERO(&readfds);
-		FD_SET(fd, &readfds);
-		/* The select will wait until an RTC interrupt happens. */
-		retval = select(fd+1, &readfds, NULL, NULL, &tv);
-		if (retval == -1) {
-		        perror("select");
-		        exit(errno);
-		}
-		/* This read won't block unlike the select-less case above. */
-		retval = read(fd, &data, sizeof(unsigned long));
-		if (retval == -1) {
-		        perror("read");
-		        exit(errno);
-		}
-		fprintf(stderr, " %d",i);
-		fflush(stderr);
-		irqcount++;
-	}
-
-	/* Turn off update interrupts */
-	retval = ioctl(fd, RTC_UIE_OFF, 0);
-	if (retval == -1) {
-		perror("RTC_UIE_OFF ioctl");
-		exit(errno);
-	}
-
-test_READ:
-	/* Read the RTC time/date */
-	retval = ioctl(fd, RTC_RD_TIME, &rtc_tm);
-	if (retval == -1) {
-		perror("RTC_RD_TIME ioctl");
-		exit(errno);
-	}
-
-	fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n",
-		rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900,
-		rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
-
-	/* Set the alarm to 5 sec in the future, and check for rollover */
-	rtc_tm.tm_sec += 5;
-	if (rtc_tm.tm_sec >= 60) {
-		rtc_tm.tm_sec %= 60;
-		rtc_tm.tm_min++;
-	}
-	if (rtc_tm.tm_min == 60) {
-		rtc_tm.tm_min = 0;
-		rtc_tm.tm_hour++;
-	}
-	if (rtc_tm.tm_hour == 24)
-		rtc_tm.tm_hour = 0;
-
-	retval = ioctl(fd, RTC_ALM_SET, &rtc_tm);
-	if (retval == -1) {
-		if (errno == EINVAL) {
-			fprintf(stderr,
-				"\n...Alarm IRQs not supported.\n");
-			goto test_PIE;
-		}
-
-		perror("RTC_ALM_SET ioctl");
-		exit(errno);
-	}
-
-	/* Read the current alarm settings */
-	retval = ioctl(fd, RTC_ALM_READ, &rtc_tm);
-	if (retval == -1) {
-		if (errno == EINVAL) {
-			fprintf(stderr,
-					"\n...EINVAL reading current alarm setting.\n");
-			goto test_PIE;
-		}
-		perror("RTC_ALM_READ ioctl");
-		exit(errno);
-	}
-
-	fprintf(stderr, "Alarm time now set to %02d:%02d:%02d.\n",
-		rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
-
-	/* Enable alarm interrupts */
-	retval = ioctl(fd, RTC_AIE_ON, 0);
-	if (retval == -1) {
-		if (errno == EINVAL || errno == EIO) {
-			fprintf(stderr,
-				"\n...Alarm IRQs not supported.\n");
-			goto test_PIE;
-		}
-
-		perror("RTC_AIE_ON ioctl");
-		exit(errno);
-	}
-
-	fprintf(stderr, "Waiting 5 seconds for alarm...");
-	fflush(stderr);
-	/* This blocks until the alarm ring causes an interrupt */
-	retval = read(fd, &data, sizeof(unsigned long));
-	if (retval == -1) {
-		perror("read");
-		exit(errno);
-	}
-	irqcount++;
-	fprintf(stderr, " okay. Alarm rang.\n");
-
-	/* Disable alarm interrupts */
-	retval = ioctl(fd, RTC_AIE_OFF, 0);
-	if (retval == -1) {
-		perror("RTC_AIE_OFF ioctl");
-		exit(errno);
-	}
-
-test_PIE:
-	/* Read periodic IRQ rate */
-	retval = ioctl(fd, RTC_IRQP_READ, &tmp);
-	if (retval == -1) {
-		/* not all RTCs support periodic IRQs */
-		if (errno == EINVAL) {
-			fprintf(stderr, "\nNo periodic IRQ support\n");
-			goto test_DATE;
-		}
-		perror("RTC_IRQP_READ ioctl");
-		exit(errno);
-	}
-	fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", tmp);
-
-	fprintf(stderr, "Counting 20 interrupts at:");
-	fflush(stderr);
-
-	/* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */
-	for (tmp=2; tmp<=64; tmp*=2) {
-
-		retval = ioctl(fd, RTC_IRQP_SET, tmp);
-		if (retval == -1) {
-			/* not all RTCs can change their periodic IRQ rate */
-			if (errno == EINVAL) {
-				fprintf(stderr,
-					"\n...Periodic IRQ rate is fixed\n");
-				goto test_DATE;
-			}
-			perror("RTC_IRQP_SET ioctl");
-			exit(errno);
-		}
-
-		fprintf(stderr, "\n%ldHz:\t", tmp);
-		fflush(stderr);
-
-		/* Enable periodic interrupts */
-		retval = ioctl(fd, RTC_PIE_ON, 0);
-		if (retval == -1) {
-			perror("RTC_PIE_ON ioctl");
-			exit(errno);
-		}
-
-		for (i=1; i<21; i++) {
-			gettimeofday(&start, NULL);
-			/* This blocks */
-			retval = read(fd, &data, sizeof(unsigned long));
-			if (retval == -1) {
-				perror("read");
-				exit(errno);
-			}
-			gettimeofday(&end, NULL);
-			timersub(&end, &start, &diff);
-			if (diff.tv_sec > 0 ||
-			    diff.tv_usec > ((1000000L / tmp) * 1.10)) {
-				fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n",
-				       diff.tv_sec, diff.tv_usec,
-				       (1000000L / tmp));
-				fflush(stdout);
-				exit(-1);
-			}
-
-			fprintf(stderr, " %d",i);
-			fflush(stderr);
-			irqcount++;
-		}
-
-		/* Disable periodic interrupts */
-		retval = ioctl(fd, RTC_PIE_OFF, 0);
-		if (retval == -1) {
-			perror("RTC_PIE_OFF ioctl");
-			exit(errno);
-		}
-	}
-
-test_DATE:
-	if (!dangerous)
-		goto done;
-
-	fprintf(stderr, "\nTesting problematic dates\n");
-
-	for (i = 0; i < ARRAY_SIZE(cutoff_dates); i++) {
-		struct rtc_time current;
-
-		/* Write the new date in RTC */
-		retval = ioctl(fd, RTC_SET_TIME, &cutoff_dates[i]);
-		if (retval == -1) {
-			perror("RTC_SET_TIME ioctl");
-			close(fd);
-			exit(errno);
-		}
-
-		/* Read back */
-		retval = ioctl(fd, RTC_RD_TIME, &current);
-		if (retval == -1) {
-			perror("RTC_RD_TIME ioctl");
-			exit(errno);
-		}
-
-		if(compare_dates(&cutoff_dates[i], &current)) {
-			fprintf(stderr,"Setting date %d failed\n",
-			        cutoff_dates[i].tm_year + 1900);
-			goto done;
-		}
-
-		cutoff_dates[i].tm_sec += 5;
-
-		/* Write the new alarm in RTC */
-		retval = ioctl(fd, RTC_ALM_SET, &cutoff_dates[i]);
-		if (retval == -1) {
-			perror("RTC_ALM_SET ioctl");
-			close(fd);
-			exit(errno);
-		}
-
-		/* Read back */
-		retval = ioctl(fd, RTC_ALM_READ, &current);
-		if (retval == -1) {
-			perror("RTC_ALM_READ ioctl");
-			exit(errno);
-		}
-
-		if(compare_dates(&cutoff_dates[i], &current)) {
-			fprintf(stderr,"Setting alarm %d failed\n",
-			        cutoff_dates[i].tm_year + 1900);
-			goto done;
-		}
-
-		fprintf(stderr, "Setting year %d is OK \n",
-			cutoff_dates[i].tm_year + 1900);
-	}
-done:
-	fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n");
-
-	close(fd);
-
-	return 0;
-}
diff --git a/tools/testing/selftests/uevent/Makefile b/tools/testing/selftests/uevent/Makefile
new file mode 100644
index 000000000000..f7baa9aa2932
--- /dev/null
+++ b/tools/testing/selftests/uevent/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+include ../lib.mk
+
+.PHONY: all clean
+
+BINARIES := uevent_filtering
+CFLAGS += -Wl,-no-as-needed -Wall
+
+uevent_filtering: uevent_filtering.c ../kselftest.h ../kselftest_harness.h
+	$(CC) $(CFLAGS) $< -o $@
+
+TEST_PROGS += $(BINARIES)
+EXTRA_CLEAN := $(BINARIES)
+
+all: $(BINARIES)
diff --git a/tools/testing/selftests/uevent/config b/tools/testing/selftests/uevent/config
new file mode 100644
index 000000000000..1038f4515be8
--- /dev/null
+++ b/tools/testing/selftests/uevent/config
@@ -0,0 +1,2 @@
+CONFIG_USER_NS=y
+CONFIG_NET=y
diff --git a/tools/testing/selftests/uevent/uevent_filtering.c b/tools/testing/selftests/uevent/uevent_filtering.c
new file mode 100644
index 000000000000..f83391aa42cf
--- /dev/null
+++ b/tools/testing/selftests/uevent/uevent_filtering.c
@@ -0,0 +1,486 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/netlink.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sched.h>
+#include <sys/eventfd.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+#define __DEV_FULL "/sys/devices/virtual/mem/full/uevent"
+#define __UEVENT_BUFFER_SIZE (2048 * 2)
+#define __UEVENT_HEADER "add@/devices/virtual/mem/full"
+#define __UEVENT_HEADER_LEN sizeof("add@/devices/virtual/mem/full")
+#define __UEVENT_LISTEN_ALL -1
+
+ssize_t read_nointr(int fd, void *buf, size_t count)
+{
+	ssize_t ret;
+
+again:
+	ret = read(fd, buf, count);
+	if (ret < 0 && errno == EINTR)
+		goto again;
+
+	return ret;
+}
+
+ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+	ssize_t ret;
+
+again:
+	ret = write(fd, buf, count);
+	if (ret < 0 && errno == EINTR)
+		goto again;
+
+	return ret;
+}
+
+int wait_for_pid(pid_t pid)
+{
+	int status, ret;
+
+again:
+	ret = waitpid(pid, &status, 0);
+	if (ret == -1) {
+		if (errno == EINTR)
+			goto again;
+
+		return -1;
+	}
+
+	if (ret != pid)
+		goto again;
+
+	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
+		return -1;
+
+	return 0;
+}
+
+static int uevent_listener(unsigned long post_flags, bool expect_uevent,
+			   int sync_fd)
+{
+	int sk_fd, ret;
+	socklen_t sk_addr_len;
+	int fret = -1, rcv_buf_sz = __UEVENT_BUFFER_SIZE;
+	uint64_t sync_add = 1;
+	struct sockaddr_nl sk_addr = { 0 }, rcv_addr = { 0 };
+	char buf[__UEVENT_BUFFER_SIZE] = { 0 };
+	struct iovec iov = { buf, __UEVENT_BUFFER_SIZE };
+	char control[CMSG_SPACE(sizeof(struct ucred))];
+	struct msghdr hdr = {
+		&rcv_addr, sizeof(rcv_addr), &iov, 1,
+		control,   sizeof(control),  0,
+	};
+
+	sk_fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC,
+		       NETLINK_KOBJECT_UEVENT);
+	if (sk_fd < 0) {
+		fprintf(stderr, "%s - Failed to open uevent socket\n", strerror(errno));
+		return -1;
+	}
+
+	ret = setsockopt(sk_fd, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz,
+			 sizeof(rcv_buf_sz));
+	if (ret < 0) {
+		fprintf(stderr, "%s - Failed to set socket options\n", strerror(errno));
+		goto on_error;
+	}
+
+	sk_addr.nl_family = AF_NETLINK;
+	sk_addr.nl_groups = __UEVENT_LISTEN_ALL;
+
+	sk_addr_len = sizeof(sk_addr);
+	ret = bind(sk_fd, (struct sockaddr *)&sk_addr, sk_addr_len);
+	if (ret < 0) {
+		fprintf(stderr, "%s - Failed to bind socket\n", strerror(errno));
+		goto on_error;
+	}
+
+	ret = getsockname(sk_fd, (struct sockaddr *)&sk_addr, &sk_addr_len);
+	if (ret < 0) {
+		fprintf(stderr, "%s - Failed to retrieve socket name\n", strerror(errno));
+		goto on_error;
+	}
+
+	if ((size_t)sk_addr_len != sizeof(sk_addr)) {
+		fprintf(stderr, "Invalid socket address size\n");
+		goto on_error;
+	}
+
+	if (post_flags & CLONE_NEWUSER) {
+		ret = unshare(CLONE_NEWUSER);
+		if (ret < 0) {
+			fprintf(stderr,
+				"%s - Failed to unshare user namespace\n",
+				strerror(errno));
+			goto on_error;
+		}
+	}
+
+	if (post_flags & CLONE_NEWNET) {
+		ret = unshare(CLONE_NEWNET);
+		if (ret < 0) {
+			fprintf(stderr,
+				"%s - Failed to unshare network namespace\n",
+				strerror(errno));
+			goto on_error;
+		}
+	}
+
+	ret = write_nointr(sync_fd, &sync_add, sizeof(sync_add));
+	close(sync_fd);
+	if (ret != sizeof(sync_add)) {
+		fprintf(stderr, "Failed to synchronize with parent process\n");
+		goto on_error;
+	}
+
+	fret = 0;
+	for (;;) {
+		ssize_t r;
+
+		r = recvmsg(sk_fd, &hdr, 0);
+		if (r <= 0) {
+			fprintf(stderr, "%s - Failed to receive uevent\n", strerror(errno));
+			ret = -1;
+			break;
+		}
+
+		/* ignore libudev messages */
+		if (memcmp(buf, "libudev", 8) == 0)
+			continue;
+
+		/* ignore uevents we didn't trigger */
+		if (memcmp(buf, __UEVENT_HEADER, __UEVENT_HEADER_LEN) != 0)
+			continue;
+
+		if (!expect_uevent) {
+			fprintf(stderr, "Received unexpected uevent:\n");
+			ret = -1;
+		}
+
+		if (TH_LOG_ENABLED) {
+			/* If logging is enabled dump the received uevent. */
+			(void)write_nointr(STDERR_FILENO, buf, r);
+			(void)write_nointr(STDERR_FILENO, "\n", 1);
+		}
+
+		break;
+	}
+
+on_error:
+	close(sk_fd);
+
+	return fret;
+}
+
+int trigger_uevent(unsigned int times)
+{
+	int fd, ret;
+	unsigned int i;
+
+	fd = open(__DEV_FULL, O_RDWR | O_CLOEXEC);
+	if (fd < 0) {
+		if (errno != ENOENT)
+			return -EINVAL;
+
+		return -1;
+	}
+
+	for (i = 0; i < times; i++) {
+		ret = write_nointr(fd, "add\n", sizeof("add\n") - 1);
+		if (ret < 0) {
+			fprintf(stderr, "Failed to trigger uevent\n");
+			break;
+		}
+	}
+	close(fd);
+
+	return ret;
+}
+
+int set_death_signal(void)
+{
+	int ret;
+	pid_t ppid;
+
+	ret = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
+
+	/* Check whether we have been orphaned. */
+	ppid = getppid();
+	if (ppid == 1) {
+		pid_t self;
+
+		self = getpid();
+		ret = kill(self, SIGKILL);
+	}
+
+	if (ret < 0)
+		return -1;
+
+	return 0;
+}
+
+static int do_test(unsigned long pre_flags, unsigned long post_flags,
+		   bool expect_uevent, int sync_fd)
+{
+	int ret;
+	uint64_t wait_val;
+	pid_t pid;
+	sigset_t mask;
+	sigset_t orig_mask;
+	struct timespec timeout;
+
+	sigemptyset(&mask);
+	sigaddset(&mask, SIGCHLD);
+
+	ret = sigprocmask(SIG_BLOCK, &mask, &orig_mask);
+	if (ret < 0) {
+		fprintf(stderr, "%s- Failed to block SIGCHLD\n", strerror(errno));
+		return -1;
+	}
+
+	pid = fork();
+	if (pid < 0) {
+		fprintf(stderr, "%s - Failed to fork() new process\n", strerror(errno));
+		return -1;
+	}
+
+	if (pid == 0) {
+		/* Make sure that we go away when our parent dies. */
+		ret = set_death_signal();
+		if (ret < 0) {
+			fprintf(stderr, "Failed to set PR_SET_PDEATHSIG to SIGKILL\n");
+			_exit(EXIT_FAILURE);
+		}
+
+		if (pre_flags & CLONE_NEWUSER) {
+			ret = unshare(CLONE_NEWUSER);
+			if (ret < 0) {
+				fprintf(stderr,
+					"%s - Failed to unshare user namespace\n",
+					strerror(errno));
+				_exit(EXIT_FAILURE);
+			}
+		}
+
+		if (pre_flags & CLONE_NEWNET) {
+			ret = unshare(CLONE_NEWNET);
+			if (ret < 0) {
+				fprintf(stderr,
+					"%s - Failed to unshare network namespace\n",
+					strerror(errno));
+				_exit(EXIT_FAILURE);
+			}
+		}
+
+		if (uevent_listener(post_flags, expect_uevent, sync_fd) < 0)
+			_exit(EXIT_FAILURE);
+
+		_exit(EXIT_SUCCESS);
+	}
+
+	ret = read_nointr(sync_fd, &wait_val, sizeof(wait_val));
+	if (ret != sizeof(wait_val)) {
+		fprintf(stderr, "Failed to synchronize with child process\n");
+		_exit(EXIT_FAILURE);
+	}
+
+	/* Trigger 10 uevents to account for the case where the kernel might
+	 * drop some.
+	 */
+	ret = trigger_uevent(10);
+	if (ret < 0)
+		fprintf(stderr, "Failed triggering uevents\n");
+
+	/* Wait for 2 seconds before considering this failed. This should be
+	 * plenty of time for the kernel to deliver the uevent even under heavy
+	 * load.
+	 */
+	timeout.tv_sec = 2;
+	timeout.tv_nsec = 0;
+
+again:
+	ret = sigtimedwait(&mask, NULL, &timeout);
+	if (ret < 0) {
+		if (errno == EINTR)
+			goto again;
+
+		if (!expect_uevent)
+			ret = kill(pid, SIGTERM); /* success */
+		else
+			ret = kill(pid, SIGUSR1); /* error */
+		if (ret < 0)
+			return -1;
+	}
+
+	ret = wait_for_pid(pid);
+	if (ret < 0)
+		return -1;
+
+	return ret;
+}
+
+static void signal_handler(int sig)
+{
+	if (sig == SIGTERM)
+		_exit(EXIT_SUCCESS);
+
+	_exit(EXIT_FAILURE);
+}
+
+TEST(uevent_filtering)
+{
+	int ret, sync_fd;
+	struct sigaction act;
+
+	if (geteuid()) {
+		TH_LOG("Uevent filtering tests require root privileges. Skipping test");
+		_exit(KSFT_SKIP);
+	}
+
+	ret = access(__DEV_FULL, F_OK);
+	EXPECT_EQ(0, ret) {
+		if (errno == ENOENT) {
+			TH_LOG(__DEV_FULL " does not exist. Skipping test");
+			_exit(KSFT_SKIP);
+		}
+
+		_exit(KSFT_FAIL);
+	}
+
+	act.sa_handler = signal_handler;
+	act.sa_flags = 0;
+	sigemptyset(&act.sa_mask);
+
+	ret = sigaction(SIGTERM, &act, NULL);
+	ASSERT_EQ(0, ret);
+
+	sync_fd = eventfd(0, EFD_CLOEXEC);
+	ASSERT_GE(sync_fd, 0);
+
+	/*
+	 * Setup:
+	 * - Open uevent listening socket in initial network namespace owned by
+	 *   initial user namespace.
+	 * - Trigger uevent in initial network namespace owned by initial user
+	 *   namespace.
+	 * Expected Result:
+	 * - uevent listening socket receives uevent
+	 */
+	ret = do_test(0, 0, true, sync_fd);
+	ASSERT_EQ(0, ret) {
+		goto do_cleanup;
+	}
+
+	/*
+	 * Setup:
+	 * - Open uevent listening socket in non-initial network namespace
+	 *   owned by initial user namespace.
+	 * - Trigger uevent in initial network namespace owned by initial user
+	 *   namespace.
+	 * Expected Result:
+	 * - uevent listening socket receives uevent
+	 */
+	ret = do_test(CLONE_NEWNET, 0, true, sync_fd);
+	ASSERT_EQ(0, ret) {
+		goto do_cleanup;
+	}
+
+	/*
+	 * Setup:
+	 * - unshare user namespace
+	 * - Open uevent listening socket in initial network namespace
+	 *   owned by initial user namespace.
+	 * - Trigger uevent in initial network namespace owned by initial user
+	 *   namespace.
+	 * Expected Result:
+	 * - uevent listening socket receives uevent
+	 */
+	ret = do_test(CLONE_NEWUSER, 0, true, sync_fd);
+	ASSERT_EQ(0, ret) {
+		goto do_cleanup;
+	}
+
+	/*
+	 * Setup:
+	 * - Open uevent listening socket in non-initial network namespace
+	 *   owned by non-initial user namespace.
+	 * - Trigger uevent in initial network namespace owned by initial user
+	 *   namespace.
+	 * Expected Result:
+	 * - uevent listening socket receives no uevent
+	 */
+	ret = do_test(CLONE_NEWUSER | CLONE_NEWNET, 0, false, sync_fd);
+	ASSERT_EQ(0, ret) {
+		goto do_cleanup;
+	}
+
+	/*
+	 * Setup:
+	 * - Open uevent listening socket in initial network namespace
+	 *   owned by initial user namespace.
+	 * - unshare network namespace
+	 * - Trigger uevent in initial network namespace owned by initial user
+	 *   namespace.
+	 * Expected Result:
+	 * - uevent listening socket receives uevent
+	 */
+	ret = do_test(0, CLONE_NEWNET, true, sync_fd);
+	ASSERT_EQ(0, ret) {
+		goto do_cleanup;
+	}
+
+	/*
+	 * Setup:
+	 * - Open uevent listening socket in initial network namespace
+	 *   owned by initial user namespace.
+	 * - unshare user namespace
+	 * - Trigger uevent in initial network namespace owned by initial user
+	 *   namespace.
+	 * Expected Result:
+	 * - uevent listening socket receives uevent
+	 */
+	ret = do_test(0, CLONE_NEWUSER, true, sync_fd);
+	ASSERT_EQ(0, ret) {
+		goto do_cleanup;
+	}
+
+	/*
+	 * Setup:
+	 * - Open uevent listening socket in initial network namespace
+	 *   owned by initial user namespace.
+	 * - unshare user namespace
+	 * - unshare network namespace
+	 * - Trigger uevent in initial network namespace owned by initial user
+	 *   namespace.
+	 * Expected Result:
+	 * - uevent listening socket receives uevent
+	 */
+	ret = do_test(0, CLONE_NEWUSER | CLONE_NEWNET, true, sync_fd);
+	ASSERT_EQ(0, ret) {
+		goto do_cleanup;
+	}
+
+do_cleanup:
+	close(sync_fd);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/user/test_user_copy.sh b/tools/testing/selftests/user/test_user_copy.sh
index d60506fc77f8..f9b31a57439b 100755
--- a/tools/testing/selftests/user/test_user_copy.sh
+++ b/tools/testing/selftests/user/test_user_copy.sh
@@ -2,6 +2,13 @@
 # SPDX-License-Identifier: GPL-2.0
 # Runs copy_to/from_user infrastructure using test_user_copy kernel module
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n test_user_copy; then
+	echo "user: module test_user_copy is not found [SKIP]"
+	exit $ksft_skip
+fi
 if /sbin/modprobe -q test_user_copy; then
 	/sbin/modprobe -q -r test_user_copy
 	echo "user_copy: ok"
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index f5d7a7851e21..9e03d61f52fd 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -1,6 +1,14 @@
 # SPDX-License-Identifier: GPL-2.0
 include ../lib.mk
 
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+TEST_GEN_PROGS := $(OUTPUT)/vdso_test
+ifeq ($(ARCH),x86)
+TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
+endif
+
 ifndef CROSS_COMPILE
 CFLAGS := -std=gnu99
 CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
@@ -8,14 +16,11 @@ ifeq ($(CONFIG_X86_32),y)
 LDLIBS += -lgcc_s
 endif
 
-TEST_PROGS := $(OUTPUT)/vdso_test $(OUTPUT)/vdso_standalone_test_x86
-
-all: $(TEST_PROGS)
+all: $(TEST_GEN_PROGS)
 $(OUTPUT)/vdso_test: parse_vdso.c vdso_test.c
 $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
 	$(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
 		vdso_standalone_test_x86.c parse_vdso.c \
 		-o $@
 
-EXTRA_CLEAN := $(TEST_PROGS)
 endif
diff --git a/tools/testing/selftests/vDSO/vdso_test.c b/tools/testing/selftests/vDSO/vdso_test.c
index 2df26bd0099c..eda53f833d8e 100644
--- a/tools/testing/selftests/vDSO/vdso_test.c
+++ b/tools/testing/selftests/vDSO/vdso_test.c
@@ -15,6 +15,8 @@
 #include <sys/auxv.h>
 #include <sys/time.h>
 
+#include "../kselftest.h"
+
 extern void *vdso_sym(const char *version, const char *name);
 extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
 extern void vdso_init_from_auxv(void *auxv);
@@ -37,7 +39,7 @@ int main(int argc, char **argv)
 	unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
 	if (!sysinfo_ehdr) {
 		printf("AT_SYSINFO_EHDR is not present!\n");
-		return 0;
+		return KSFT_SKIP;
 	}
 
 	vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
@@ -48,7 +50,7 @@ int main(int argc, char **argv)
 
 	if (!gtod) {
 		printf("Could not find %s\n", name);
-		return 1;
+		return KSFT_SKIP;
 	}
 
 	struct timeval tv;
@@ -59,6 +61,7 @@ int main(int argc, char **argv)
 		       (long long)tv.tv_sec, (long long)tv.tv_usec);
 	} else {
 		printf("%s failed\n", name);
+		return KSFT_FAIL;
 	}
 
 	return 0;
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 342c7bc9dc8c..31b3c98b6d34 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -1,6 +1,7 @@
 hugepage-mmap
 hugepage-shm
 map_hugetlb
+map_populate
 thuge-gen
 compaction_test
 mlock2-tests
@@ -12,3 +13,4 @@ mlock-random-test
 virtual_address_range
 gup_benchmark
 va_128TBswitch
+map_fixed_noreplace
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index fdefa2295ddc..6e67e726e5a5 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -12,6 +12,8 @@ TEST_GEN_FILES += gup_benchmark
 TEST_GEN_FILES += hugepage-mmap
 TEST_GEN_FILES += hugepage-shm
 TEST_GEN_FILES += map_hugetlb
+TEST_GEN_FILES += map_fixed_noreplace
+TEST_GEN_FILES += map_populate
 TEST_GEN_FILES += mlock-random-test
 TEST_GEN_FILES += mlock2-tests
 TEST_GEN_FILES += on-fault-limit
@@ -25,10 +27,6 @@ TEST_PROGS := run_vmtests
 
 include ../lib.mk
 
-$(OUTPUT)/userfaultfd: ../../../../usr/include/linux/kernel.h
 $(OUTPUT)/userfaultfd: LDLIBS += -lpthread
 
 $(OUTPUT)/mlock-random-test: LDLIBS += -lcap
-
-../../../../usr/include/linux/kernel.h:
-	make -C ../../../.. headers_install
diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c
index 1097f04e4d80..bcec71250873 100644
--- a/tools/testing/selftests/vm/compaction_test.c
+++ b/tools/testing/selftests/vm/compaction_test.c
@@ -16,6 +16,8 @@
 #include <unistd.h>
 #include <string.h>
 
+#include "../kselftest.h"
+
 #define MAP_SIZE 1048576
 
 struct map_list {
@@ -169,7 +171,7 @@ int main(int argc, char **argv)
 		printf("Either the sysctl compact_unevictable_allowed is not\n"
 		       "set to 1 or couldn't read the proc file.\n"
 		       "Skipping the test\n");
-		return 0;
+		return KSFT_SKIP;
 	}
 
 	lim.rlim_cur = RLIM_INFINITY;
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
index 36df55132036..880b96fc80d4 100644
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -15,9 +15,12 @@
 #define PAGE_SIZE sysconf(_SC_PAGESIZE)
 
 #define GUP_FAST_BENCHMARK	_IOWR('g', 1, struct gup_benchmark)
+#define GUP_LONGTERM_BENCHMARK	_IOWR('g', 2, struct gup_benchmark)
+#define GUP_BENCHMARK		_IOWR('g', 3, struct gup_benchmark)
 
 struct gup_benchmark {
-	__u64 delta_usec;
+	__u64 get_delta_usec;
+	__u64 put_delta_usec;
 	__u64 addr;
 	__u64 size;
 	__u32 nr_pages_per_call;
@@ -28,10 +31,12 @@ int main(int argc, char **argv)
 {
 	struct gup_benchmark gup;
 	unsigned long size = 128 * MB;
-	int i, fd, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
+	int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
+	int cmd = GUP_FAST_BENCHMARK, flags = MAP_PRIVATE;
+	char *file = "/dev/zero";
 	char *p;
 
-	while ((opt = getopt(argc, argv, "m:r:n:tT")) != -1) {
+	while ((opt = getopt(argc, argv, "m:r:n:f:tTLUSH")) != -1) {
 		switch (opt) {
 		case 'm':
 			size = atoi(optarg) * MB;
@@ -48,13 +53,36 @@ int main(int argc, char **argv)
 		case 'T':
 			thp = 0;
 			break;
+		case 'L':
+			cmd = GUP_LONGTERM_BENCHMARK;
+			break;
+		case 'U':
+			cmd = GUP_BENCHMARK;
+			break;
 		case 'w':
 			write = 1;
+			break;
+		case 'f':
+			file = optarg;
+			break;
+		case 'S':
+			flags &= ~MAP_PRIVATE;
+			flags |= MAP_SHARED;
+			break;
+		case 'H':
+			flags |= MAP_HUGETLB;
+			break;
 		default:
 			return -1;
 		}
 	}
 
+	filed = open(file, O_RDWR|O_CREAT);
+	if (filed < 0) {
+		perror("open");
+		exit(filed);
+	}
+
 	gup.nr_pages_per_call = nr_pages;
 	gup.flags = write;
 
@@ -62,8 +90,7 @@ int main(int argc, char **argv)
 	if (fd == -1)
 		perror("open"), exit(1);
 
-	p = mmap(NULL, size, PROT_READ | PROT_WRITE,
-			MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
 	if (p == MAP_FAILED)
 		perror("mmap"), exit(1);
 	gup.addr = (unsigned long)p;
@@ -78,10 +105,11 @@ int main(int argc, char **argv)
 
 	for (i = 0; i < repeats; i++) {
 		gup.size = size;
-		if (ioctl(fd, GUP_FAST_BENCHMARK, &gup))
+		if (ioctl(fd, cmd, &gup))
 			perror("ioctl"), exit(1);
 
-		printf("Time: %lld us", gup.delta_usec);
+		printf("Time: get:%lld put:%lld us", gup.get_delta_usec,
+			gup.put_delta_usec);
 		if (gup.size != size)
 			printf(", truncated (size: %lld)", gup.size);
 		printf("\n");
diff --git a/tools/testing/selftests/vm/map_fixed_noreplace.c b/tools/testing/selftests/vm/map_fixed_noreplace.c
new file mode 100644
index 000000000000..d91bde511268
--- /dev/null
+++ b/tools/testing/selftests/vm/map_fixed_noreplace.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Test that MAP_FIXED_NOREPLACE works.
+ *
+ * Copyright 2018, Jann Horn <jannh@google.com>
+ * Copyright 2018, Michael Ellerman, IBM Corporation.
+ */
+
+#include <sys/mman.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#ifndef MAP_FIXED_NOREPLACE
+#define MAP_FIXED_NOREPLACE 0x100000
+#endif
+
+#define BASE_ADDRESS	(256ul * 1024 * 1024)
+
+
+static void dump_maps(void)
+{
+	char cmd[32];
+
+	snprintf(cmd, sizeof(cmd), "cat /proc/%d/maps", getpid());
+	system(cmd);
+}
+
+int main(void)
+{
+	unsigned long flags, addr, size, page_size;
+	char *p;
+
+	page_size = sysconf(_SC_PAGE_SIZE);
+
+	flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE;
+
+	// Check we can map all the areas we need below
+	errno = 0;
+	addr = BASE_ADDRESS;
+	size = 5 * page_size;
+	p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+
+	printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+	if (p == MAP_FAILED) {
+		dump_maps();
+		printf("Error: couldn't map the space we need for the test\n");
+		return 1;
+	}
+
+	errno = 0;
+	if (munmap((void *)addr, 5 * page_size) != 0) {
+		dump_maps();
+		printf("Error: munmap failed!?\n");
+		return 1;
+	}
+	printf("unmap() successful\n");
+
+	errno = 0;
+	addr = BASE_ADDRESS + page_size;
+	size = 3 * page_size;
+	p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+	printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+	if (p == MAP_FAILED) {
+		dump_maps();
+		printf("Error: first mmap() failed unexpectedly\n");
+		return 1;
+	}
+
+	/*
+	 * Exact same mapping again:
+	 *   base |  free  | new
+	 *     +1 | mapped | new
+	 *     +2 | mapped | new
+	 *     +3 | mapped | new
+	 *     +4 |  free  | new
+	 */
+	errno = 0;
+	addr = BASE_ADDRESS;
+	size = 5 * page_size;
+	p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+	printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+	if (p != MAP_FAILED) {
+		dump_maps();
+		printf("Error:1: mmap() succeeded when it shouldn't have\n");
+		return 1;
+	}
+
+	/*
+	 * Second mapping contained within first:
+	 *
+	 *   base |  free  |
+	 *     +1 | mapped |
+	 *     +2 | mapped | new
+	 *     +3 | mapped |
+	 *     +4 |  free  |
+	 */
+	errno = 0;
+	addr = BASE_ADDRESS + (2 * page_size);
+	size = page_size;
+	p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+	printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+	if (p != MAP_FAILED) {
+		dump_maps();
+		printf("Error:2: mmap() succeeded when it shouldn't have\n");
+		return 1;
+	}
+
+	/*
+	 * Overlap end of existing mapping:
+	 *   base |  free  |
+	 *     +1 | mapped |
+	 *     +2 | mapped |
+	 *     +3 | mapped | new
+	 *     +4 |  free  | new
+	 */
+	errno = 0;
+	addr = BASE_ADDRESS + (3 * page_size);
+	size = 2 * page_size;
+	p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+	printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+	if (p != MAP_FAILED) {
+		dump_maps();
+		printf("Error:3: mmap() succeeded when it shouldn't have\n");
+		return 1;
+	}
+
+	/*
+	 * Overlap start of existing mapping:
+	 *   base |  free  | new
+	 *     +1 | mapped | new
+	 *     +2 | mapped |
+	 *     +3 | mapped |
+	 *     +4 |  free  |
+	 */
+	errno = 0;
+	addr = BASE_ADDRESS;
+	size = 2 * page_size;
+	p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+	printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+	if (p != MAP_FAILED) {
+		dump_maps();
+		printf("Error:4: mmap() succeeded when it shouldn't have\n");
+		return 1;
+	}
+
+	/*
+	 * Adjacent to start of existing mapping:
+	 *   base |  free  | new
+	 *     +1 | mapped |
+	 *     +2 | mapped |
+	 *     +3 | mapped |
+	 *     +4 |  free  |
+	 */
+	errno = 0;
+	addr = BASE_ADDRESS;
+	size = page_size;
+	p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+	printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+	if (p == MAP_FAILED) {
+		dump_maps();
+		printf("Error:5: mmap() failed when it shouldn't have\n");
+		return 1;
+	}
+
+	/*
+	 * Adjacent to end of existing mapping:
+	 *   base |  free  |
+	 *     +1 | mapped |
+	 *     +2 | mapped |
+	 *     +3 | mapped |
+	 *     +4 |  free  |  new
+	 */
+	errno = 0;
+	addr = BASE_ADDRESS + (4 * page_size);
+	size = page_size;
+	p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+	printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+	if (p == MAP_FAILED) {
+		dump_maps();
+		printf("Error:6: mmap() failed when it shouldn't have\n");
+		return 1;
+	}
+
+	addr = BASE_ADDRESS;
+	size = 5 * page_size;
+	if (munmap((void *)addr, size) != 0) {
+		dump_maps();
+		printf("Error: munmap failed!?\n");
+		return 1;
+	}
+	printf("unmap() successful\n");
+
+	printf("OK\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/vm/map_populate.c b/tools/testing/selftests/vm/map_populate.c
new file mode 100644
index 000000000000..6b8aeaa0bf7a
--- /dev/null
+++ b/tools/testing/selftests/vm/map_populate.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 Dmitry Safonov, Arista Networks
+ *
+ * MAP_POPULATE | MAP_PRIVATE should COW VMA pages.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#ifndef MMAP_SZ
+#define MMAP_SZ		4096
+#endif
+
+#define BUG_ON(condition, description)					\
+	do {								\
+		if (condition) {					\
+			fprintf(stderr, "[FAIL]\t%s:%d\t%s:%s\n", __func__, \
+				__LINE__, (description), strerror(errno)); \
+			exit(1);					\
+		}							\
+	} while (0)
+
+static int parent_f(int sock, unsigned long *smap, int child)
+{
+	int status, ret;
+
+	ret = read(sock, &status, sizeof(int));
+	BUG_ON(ret <= 0, "read(sock)");
+
+	*smap = 0x22222BAD;
+	ret = msync(smap, MMAP_SZ, MS_SYNC);
+	BUG_ON(ret, "msync()");
+
+	ret = write(sock, &status, sizeof(int));
+	BUG_ON(ret <= 0, "write(sock)");
+
+	waitpid(child, &status, 0);
+	BUG_ON(!WIFEXITED(status), "child in unexpected state");
+
+	return WEXITSTATUS(status);
+}
+
+static int child_f(int sock, unsigned long *smap, int fd)
+{
+	int ret, buf = 0;
+
+	smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE,
+			MAP_PRIVATE | MAP_POPULATE, fd, 0);
+	BUG_ON(smap == MAP_FAILED, "mmap()");
+
+	BUG_ON(*smap != 0xdeadbabe, "MAP_PRIVATE | MAP_POPULATE changed file");
+
+	ret = write(sock, &buf, sizeof(int));
+	BUG_ON(ret <= 0, "write(sock)");
+
+	ret = read(sock, &buf, sizeof(int));
+	BUG_ON(ret <= 0, "read(sock)");
+
+	BUG_ON(*smap == 0x22222BAD, "MAP_POPULATE didn't COW private page");
+	BUG_ON(*smap != 0xdeadbabe, "mapping was corrupted");
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int sock[2], child, ret;
+	FILE *ftmp;
+	unsigned long *smap;
+
+	ftmp = tmpfile();
+	BUG_ON(ftmp == 0, "tmpfile()");
+
+	ret = ftruncate(fileno(ftmp), MMAP_SZ);
+	BUG_ON(ret, "ftruncate()");
+
+	smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE,
+			MAP_SHARED, fileno(ftmp), 0);
+	BUG_ON(smap == MAP_FAILED, "mmap()");
+
+	*smap = 0xdeadbabe;
+	/* Probably unnecessary, but let it be. */
+	ret = msync(smap, MMAP_SZ, MS_SYNC);
+	BUG_ON(ret, "msync()");
+
+	ret = socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sock);
+	BUG_ON(ret, "socketpair()");
+
+	child = fork();
+	BUG_ON(child == -1, "fork()");
+
+	if (child) {
+		ret = close(sock[0]);
+		BUG_ON(ret, "close()");
+
+		return parent_f(sock[1], smap, child);
+	}
+
+	ret = close(sock[1]);
+	BUG_ON(ret, "close()");
+
+	return child_f(sock[0], smap, fileno(ftmp));
+}
diff --git a/tools/testing/selftests/vm/mlock2-tests.c b/tools/testing/selftests/vm/mlock2-tests.c
index 4997b9222cfa..637b6d0ac0d0 100644
--- a/tools/testing/selftests/vm/mlock2-tests.c
+++ b/tools/testing/selftests/vm/mlock2-tests.c
@@ -9,6 +9,8 @@
 #include <stdbool.h>
 #include "mlock2.h"
 
+#include "../kselftest.h"
+
 struct vm_boundaries {
 	unsigned long start;
 	unsigned long end;
@@ -303,7 +305,7 @@ static int test_mlock_lock()
 	if (mlock2_(map, 2 * page_size, 0)) {
 		if (errno == ENOSYS) {
 			printf("Cannot call new mlock family, skipping test\n");
-			_exit(0);
+			_exit(KSFT_SKIP);
 		}
 		perror("mlock2(0)");
 		goto unmap;
@@ -412,7 +414,7 @@ static int test_mlock_onfault()
 	if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
 		if (errno == ENOSYS) {
 			printf("Cannot call new mlock family, skipping test\n");
-			_exit(0);
+			_exit(KSFT_SKIP);
 		}
 		perror("mlock2(MLOCK_ONFAULT)");
 		goto unmap;
@@ -425,7 +427,7 @@ static int test_mlock_onfault()
 	if (munlock(map, 2 * page_size)) {
 		if (errno == ENOSYS) {
 			printf("Cannot call new mlock family, skipping test\n");
-			_exit(0);
+			_exit(KSFT_SKIP);
 		}
 		perror("munlock()");
 		goto unmap;
@@ -457,7 +459,7 @@ static int test_lock_onfault_of_present()
 	if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
 		if (errno == ENOSYS) {
 			printf("Cannot call new mlock family, skipping test\n");
-			_exit(0);
+			_exit(KSFT_SKIP);
 		}
 		perror("mlock2(MLOCK_ONFAULT)");
 		goto unmap;
@@ -583,7 +585,7 @@ static int test_vma_management(bool call_mlock)
 	if (call_mlock && mlock2_(map, 3 * page_size, MLOCK_ONFAULT)) {
 		if (errno == ENOSYS) {
 			printf("Cannot call new mlock family, skipping test\n");
-			_exit(0);
+			_exit(KSFT_SKIP);
 		}
 		perror("mlock(ONFAULT)\n");
 		goto out;
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index 22d564673830..584a91ae4a8f 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -2,6 +2,9 @@
 # SPDX-License-Identifier: GPL-2.0
 #please run as root
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 mnt=./huge
 exitcode=0
 
@@ -36,7 +39,7 @@ if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
 		echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
 		if [ $? -ne 0 ]; then
 			echo "Please run this test as root"
-			exit 1
+			exit $ksft_skip
 		fi
 		while read name size unit; do
 			if [ "$name" = "HugePages_Free:" ]; then
@@ -165,6 +168,17 @@ else
 fi
 
 echo "--------------------"
+echo "running map_populate"
+echo "--------------------"
+./map_populate
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+
+echo "--------------------"
 echo "running mlock2-tests"
 echo "--------------------"
 ./mlock2-tests
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index de2f9ec8a87f..5d1db824f73a 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -34,18 +34,6 @@
  * per-CPU threads 1 by triggering userfaults inside
  * pthread_mutex_lock will also verify the atomicity of the memory
  * transfer (UFFDIO_COPY).
- *
- * The program takes two parameters: the amounts of physical memory in
- * megabytes (MiB) of the area and the number of bounces to execute.
- *
- * # 100MiB 99999 bounces
- * ./userfaultfd 100 99999
- *
- * # 1GiB 99 bounces
- * ./userfaultfd 1000 99
- *
- * # 10MiB-~6GiB 999 bounces, continue forever unless an error triggers
- * while ./userfaultfd $[RANDOM % 6000 + 10] 999; do true; done
  */
 
 #define _GNU_SOURCE
@@ -69,6 +57,8 @@
 #include <setjmp.h>
 #include <stdbool.h>
 
+#include "../kselftest.h"
+
 #ifdef __NR_userfaultfd
 
 static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
@@ -113,6 +103,30 @@ pthread_attr_t attr;
 				 ~(unsigned long)(sizeof(unsigned long long) \
 						  -  1)))
 
+const char *examples =
+    "# Run anonymous memory test on 100MiB region with 99999 bounces:\n"
+    "./userfaultfd anon 100 99999\n\n"
+    "# Run share memory test on 1GiB region with 99 bounces:\n"
+    "./userfaultfd shmem 1000 99\n\n"
+    "# Run hugetlb memory test on 256MiB region with 50 bounces (using /dev/hugepages/hugefile):\n"
+    "./userfaultfd hugetlb 256 50 /dev/hugepages/hugefile\n\n"
+    "# Run the same hugetlb test but using shmem:\n"
+    "./userfaultfd hugetlb_shared 256 50 /dev/hugepages/hugefile\n\n"
+    "# 10MiB-~6GiB 999 bounces anonymous test, "
+    "continue forever unless an error triggers\n"
+    "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n";
+
+static void usage(void)
+{
+	fprintf(stderr, "\nUsage: ./userfaultfd <test type> <MiB> <bounces> "
+		"[hugetlbfs_file]\n\n");
+	fprintf(stderr, "Supported <test type>: anon, hugetlb, "
+		"hugetlb_shared, shmem\n\n");
+	fprintf(stderr, "Examples:\n\n");
+	fprintf(stderr, examples);
+	exit(1);
+}
+
 static int anon_release_pages(char *rel_area)
 {
 	int ret = 0;
@@ -437,6 +451,43 @@ static int copy_page(int ufd, unsigned long offset)
 	return __copy_page(ufd, offset, false);
 }
 
+static int uffd_read_msg(int ufd, struct uffd_msg *msg)
+{
+	int ret = read(uffd, msg, sizeof(*msg));
+
+	if (ret != sizeof(*msg)) {
+		if (ret < 0) {
+			if (errno == EAGAIN)
+				return 1;
+			else
+				perror("blocking read error"), exit(1);
+		} else {
+			fprintf(stderr, "short read\n"), exit(1);
+		}
+	}
+
+	return 0;
+}
+
+/* Return 1 if page fault handled by us; otherwise 0 */
+static int uffd_handle_page_fault(struct uffd_msg *msg)
+{
+	unsigned long offset;
+
+	if (msg->event != UFFD_EVENT_PAGEFAULT)
+		fprintf(stderr, "unexpected msg event %u\n",
+			msg->event), exit(1);
+
+	if (bounces & BOUNCE_VERIFY &&
+	    msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
+		fprintf(stderr, "unexpected write fault\n"), exit(1);
+
+	offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
+	offset &= ~(page_size-1);
+
+	return copy_page(uffd, offset);
+}
+
 static void *uffd_poll_thread(void *arg)
 {
 	unsigned long cpu = (unsigned long) arg;
@@ -444,7 +495,6 @@ static void *uffd_poll_thread(void *arg)
 	struct uffd_msg msg;
 	struct uffdio_register uffd_reg;
 	int ret;
-	unsigned long offset;
 	char tmp_chr;
 	unsigned long userfaults = 0;
 
@@ -468,25 +518,15 @@ static void *uffd_poll_thread(void *arg)
 		if (!(pollfd[0].revents & POLLIN))
 			fprintf(stderr, "pollfd[0].revents %d\n",
 				pollfd[0].revents), exit(1);
-		ret = read(uffd, &msg, sizeof(msg));
-		if (ret < 0) {
-			if (errno == EAGAIN)
-				continue;
-			perror("nonblocking read error"), exit(1);
-		}
+		if (uffd_read_msg(uffd, &msg))
+			continue;
 		switch (msg.event) {
 		default:
 			fprintf(stderr, "unexpected msg event %u\n",
 				msg.event), exit(1);
 			break;
 		case UFFD_EVENT_PAGEFAULT:
-			if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
-				fprintf(stderr, "unexpected write fault\n"), exit(1);
-			offset = (char *)(unsigned long)msg.arg.pagefault.address -
-				area_dst;
-			offset &= ~(page_size-1);
-			if (copy_page(uffd, offset))
-				userfaults++;
+			userfaults += uffd_handle_page_fault(&msg);
 			break;
 		case UFFD_EVENT_FORK:
 			close(uffd);
@@ -514,8 +554,6 @@ static void *uffd_read_thread(void *arg)
 {
 	unsigned long *this_cpu_userfaults;
 	struct uffd_msg msg;
-	unsigned long offset;
-	int ret;
 
 	this_cpu_userfaults = (unsigned long *) arg;
 	*this_cpu_userfaults = 0;
@@ -524,24 +562,9 @@ static void *uffd_read_thread(void *arg)
 	/* from here cancellation is ok */
 
 	for (;;) {
-		ret = read(uffd, &msg, sizeof(msg));
-		if (ret != sizeof(msg)) {
-			if (ret < 0)
-				perror("blocking read error"), exit(1);
-			else
-				fprintf(stderr, "short read\n"), exit(1);
-		}
-		if (msg.event != UFFD_EVENT_PAGEFAULT)
-			fprintf(stderr, "unexpected msg event %u\n",
-				msg.event), exit(1);
-		if (bounces & BOUNCE_VERIFY &&
-		    msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
-			fprintf(stderr, "unexpected write fault\n"), exit(1);
-		offset = (char *)(unsigned long)msg.arg.pagefault.address -
-			 area_dst;
-		offset &= ~(page_size-1);
-		if (copy_page(uffd, offset))
-			(*this_cpu_userfaults)++;
+		if (uffd_read_msg(uffd, &msg))
+			continue;
+		(*this_cpu_userfaults) += uffd_handle_page_fault(&msg);
 	}
 	return (void *)NULL;
 }
@@ -603,6 +626,12 @@ static int stress(unsigned long *userfaults)
 	if (uffd_test_ops->release_pages(area_src))
 		return 1;
 
+
+	finished = 1;
+	for (cpu = 0; cpu < nr_cpus; cpu++)
+		if (pthread_join(locking_threads[cpu], NULL))
+			return 1;
+
 	for (cpu = 0; cpu < nr_cpus; cpu++) {
 		char c;
 		if (bounces & BOUNCE_POLL) {
@@ -620,11 +649,6 @@ static int stress(unsigned long *userfaults)
 		}
 	}
 
-	finished = 1;
-	for (cpu = 0; cpu < nr_cpus; cpu++)
-		if (pthread_join(locking_threads[cpu], NULL))
-			return 1;
-
 	return 0;
 }
 
@@ -1270,8 +1294,7 @@ static void sigalrm(int sig)
 int main(int argc, char **argv)
 {
 	if (argc < 4)
-		fprintf(stderr, "Usage: <test type> <MiB> <bounces> [hugetlbfs_file]\n"),
-				exit(1);
+		usage();
 
 	if (signal(SIGALRM, sigalrm) == SIG_ERR)
 		fprintf(stderr, "failed to arm SIGALRM"), exit(1);
@@ -1284,20 +1307,19 @@ int main(int argc, char **argv)
 		nr_cpus;
 	if (!nr_pages_per_cpu) {
 		fprintf(stderr, "invalid MiB\n");
-		fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+		usage();
 	}
 
 	bounces = atoi(argv[3]);
 	if (bounces <= 0) {
 		fprintf(stderr, "invalid bounces\n");
-		fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+		usage();
 	}
 	nr_pages = nr_pages_per_cpu * nr_cpus;
 
 	if (test_type == TEST_HUGETLB) {
 		if (argc < 5)
-			fprintf(stderr, "Usage: hugetlb <MiB> <bounces> <hugetlbfs_file>\n"),
-				exit(1);
+			usage();
 		huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755);
 		if (huge_fd < 0) {
 			fprintf(stderr, "Open of %s failed", argv[3]);
@@ -1322,7 +1344,7 @@ int main(int argc, char **argv)
 int main(void)
 {
 	printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
-	return 0;
+	return KSFT_SKIP;
 }
 
 #endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c
index 6e290874b70e..c6bd9a68306b 100644
--- a/tools/testing/selftests/watchdog/watchdog-test.c
+++ b/tools/testing/selftests/watchdog/watchdog-test.c
@@ -19,7 +19,7 @@
 
 int fd;
 const char v = 'V';
-static const char sopts[] = "bdehp:t:";
+static const char sopts[] = "bdehp:t:Tn:N";
 static const struct option lopts[] = {
 	{"bootstatus",          no_argument, NULL, 'b'},
 	{"disable",             no_argument, NULL, 'd'},
@@ -27,6 +27,9 @@ static const struct option lopts[] = {
 	{"help",                no_argument, NULL, 'h'},
 	{"pingrate",      required_argument, NULL, 'p'},
 	{"timeout",       required_argument, NULL, 't'},
+	{"gettimeout",          no_argument, NULL, 'T'},
+	{"pretimeout",    required_argument, NULL, 'n'},
+	{"getpretimeout",       no_argument, NULL, 'N'},
 	{NULL,                  no_argument, NULL, 0x0}
 };
 
@@ -71,9 +74,13 @@ static void usage(char *progname)
 	printf(" -h, --help          Print the help message\n");
 	printf(" -p, --pingrate=P    Set ping rate to P seconds (default %d)\n", DEFAULT_PING_RATE);
 	printf(" -t, --timeout=T     Set timeout to T seconds\n");
+	printf(" -T, --gettimeout    Get the timeout\n");
+	printf(" -n, --pretimeout=T  Set the pretimeout to T seconds\n");
+	printf(" -N, --getpretimeout Get the pretimeout\n");
 	printf("\n");
 	printf("Parameters are parsed left-to-right in real-time.\n");
 	printf("Example: %s -d -t 10 -p 5 -e\n", progname);
+	printf("Example: %s -t 12 -T -n 7 -N\n", progname);
 }
 
 int main(int argc, char *argv[])
@@ -89,7 +96,13 @@ int main(int argc, char *argv[])
 	fd = open("/dev/watchdog", O_WRONLY);
 
 	if (fd == -1) {
-		printf("Watchdog device not enabled.\n");
+		if (errno == ENOENT)
+			printf("Watchdog device not enabled.\n");
+		else if (errno == EACCES)
+			printf("Run watchdog as root.\n");
+		else
+			printf("Watchdog device open failed %s\n",
+				strerror(errno));
 		exit(-1);
 	}
 
@@ -103,23 +116,27 @@ int main(int argc, char *argv[])
 				printf("Last boot is caused by: %s.\n", (flags != 0) ?
 					"Watchdog" : "Power-On-Reset");
 			else
-				printf("WDIOC_GETBOOTSTATUS errno '%s'\n", strerror(errno));
+				printf("WDIOC_GETBOOTSTATUS error '%s'\n", strerror(errno));
 			break;
 		case 'd':
 			flags = WDIOS_DISABLECARD;
 			ret = ioctl(fd, WDIOC_SETOPTIONS, &flags);
 			if (!ret)
 				printf("Watchdog card disabled.\n");
-			else
-				printf("WDIOS_DISABLECARD errno '%s'\n", strerror(errno));
+			else {
+				printf("WDIOS_DISABLECARD error '%s'\n", strerror(errno));
+				oneshot = 1;
+			}
 			break;
 		case 'e':
 			flags = WDIOS_ENABLECARD;
 			ret = ioctl(fd, WDIOC_SETOPTIONS, &flags);
 			if (!ret)
 				printf("Watchdog card enabled.\n");
-			else
-				printf("WDIOS_ENABLECARD errno '%s'\n", strerror(errno));
+			else {
+				printf("WDIOS_ENABLECARD error '%s'\n", strerror(errno));
+				oneshot = 1;
+			}
 			break;
 		case 'p':
 			ping_rate = strtoul(optarg, NULL, 0);
@@ -132,8 +149,36 @@ int main(int argc, char *argv[])
 			ret = ioctl(fd, WDIOC_SETTIMEOUT, &flags);
 			if (!ret)
 				printf("Watchdog timeout set to %u seconds.\n", flags);
+			else {
+				printf("WDIOC_SETTIMEOUT error '%s'\n", strerror(errno));
+				oneshot = 1;
+			}
+			break;
+		case 'T':
+			oneshot = 1;
+			ret = ioctl(fd, WDIOC_GETTIMEOUT, &flags);
+			if (!ret)
+				printf("WDIOC_GETTIMEOUT returns %u seconds.\n", flags);
+			else
+				printf("WDIOC_GETTIMEOUT error '%s'\n", strerror(errno));
+			break;
+		case 'n':
+			flags = strtoul(optarg, NULL, 0);
+			ret = ioctl(fd, WDIOC_SETPRETIMEOUT, &flags);
+			if (!ret)
+				printf("Watchdog pretimeout set to %u seconds.\n", flags);
+			else {
+				printf("WDIOC_SETPRETIMEOUT error '%s'\n", strerror(errno));
+				oneshot = 1;
+			}
+			break;
+		case 'N':
+			oneshot = 1;
+			ret = ioctl(fd, WDIOC_GETPRETIMEOUT, &flags);
+			if (!ret)
+				printf("WDIOC_GETPRETIMEOUT returns %u seconds.\n", flags);
 			else
-				printf("WDIOC_SETTIMEOUT errno '%s'\n", strerror(errno));
+				printf("WDIOC_GETPRETIMEOUT error '%s'\n", strerror(errno));
 			break;
 		default:
 			usage(argv[0]);
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 39f66bc29b82..186520198de7 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -8,6 +8,7 @@ include ../lib.mk
 UNAME_M := $(shell uname -m)
 CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
 CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
 			check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
@@ -31,7 +32,12 @@ BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64)
 BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
 BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
 
-CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie
+CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
+
+# call32_from_64 in thunks.S uses absolute addresses.
+ifeq ($(CAN_BUILD_WITH_NOPIE),1)
+CFLAGS += -no-pie
+endif
 
 define gen-target-rule-32
 $(1) $(1)_32: $(OUTPUT)/$(1)_32
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index 246145b84a12..4d9dc3f2fd70 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -610,21 +610,41 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
 	 */
 	for (int i = 0; i < NGREG; i++) {
 		greg_t req = requested_regs[i], res = resulting_regs[i];
+
 		if (i == REG_TRAPNO || i == REG_IP)
 			continue;	/* don't care */
-		if (i == REG_SP) {
-			printf("\tSP: %llx -> %llx\n", (unsigned long long)req,
-			       (unsigned long long)res);
 
+		if (i == REG_SP) {
 			/*
-			 * In many circumstances, the high 32 bits of rsp
-			 * are zeroed.  For example, we could be a real
-			 * 32-bit program, or we could hit any of a number
-			 * of poorly-documented IRET or segmented ESP
-			 * oddities.  If this happens, it's okay.
+			 * If we were using a 16-bit stack segment, then
+			 * the kernel is a bit stuck: IRET only restores
+			 * the low 16 bits of ESP/RSP if SS is 16-bit.
+			 * The kernel uses a hack to restore bits 31:16,
+			 * but that hack doesn't help with bits 63:32.
+			 * On Intel CPUs, bits 63:32 end up zeroed, and, on
+			 * AMD CPUs, they leak the high bits of the kernel
+			 * espfix64 stack pointer.  There's very little that
+			 * the kernel can do about it.
+			 *
+			 * Similarly, if we are returning to a 32-bit context,
+			 * the CPU will often lose the high 32 bits of RSP.
 			 */
-			if (res == (req & 0xFFFFFFFF))
-				continue;  /* OK; not expected to work */
+
+			if (res == req)
+				continue;
+
+			if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) {
+				printf("[NOTE]\tSP: %llx -> %llx\n",
+				       (unsigned long long)req,
+				       (unsigned long long)res);
+				continue;
+			}
+
+			printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n",
+			       (unsigned long long)requested_regs[i],
+			       (unsigned long long)resulting_regs[i]);
+			nerrs++;
+			continue;
 		}
 
 		bool ignore_reg = false;
@@ -654,25 +674,18 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
 #endif
 
 		/* Sanity check on the kernel */
-		if (i == REG_CX && requested_regs[i] != resulting_regs[i]) {
+		if (i == REG_CX && req != res) {
 			printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
-			       (unsigned long long)requested_regs[i],
-			       (unsigned long long)resulting_regs[i]);
+			       (unsigned long long)req,
+			       (unsigned long long)res);
 			nerrs++;
 			continue;
 		}
 
-		if (requested_regs[i] != resulting_regs[i] && !ignore_reg) {
-			/*
-			 * SP is particularly interesting here.  The
-			 * usual cause of failures is that we hit the
-			 * nasty IRET case of returning to a 16-bit SS,
-			 * in which case bits 16:31 of the *kernel*
-			 * stack pointer persist in ESP.
-			 */
+		if (req != res && !ignore_reg) {
 			printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
-			       i, (unsigned long long)requested_regs[i],
-			       (unsigned long long)resulting_regs[i]);
+			       i, (unsigned long long)req,
+			       (unsigned long long)res);
 			nerrs++;
 		}
 	}
diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
index 235259011704..35edd61d1663 100644
--- a/tools/testing/selftests/x86/test_vdso.c
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -17,6 +17,7 @@
 #include <errno.h>
 #include <sched.h>
 #include <stdbool.h>
+#include <limits.h>
 
 #ifndef SYS_getcpu
 # ifdef __x86_64__
@@ -31,6 +32,14 @@
 
 int nerrs = 0;
 
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+
+vgettime_t vdso_clock_gettime;
+
+typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz);
+
+vgtod_t vdso_gettimeofday;
+
 typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
 
 getcpu_t vgetcpu;
@@ -95,6 +104,15 @@ static void fill_function_pointers()
 		printf("Warning: failed to find getcpu in vDSO\n");
 
 	vgetcpu = (getcpu_t) vsyscall_getcpu();
+
+	vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+	if (!vdso_clock_gettime)
+		printf("Warning: failed to find clock_gettime in vDSO\n");
+
+	vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday");
+	if (!vdso_gettimeofday)
+		printf("Warning: failed to find gettimeofday in vDSO\n");
+
 }
 
 static long sys_getcpu(unsigned * cpu, unsigned * node,
@@ -103,6 +121,16 @@ static long sys_getcpu(unsigned * cpu, unsigned * node,
 	return syscall(__NR_getcpu, cpu, node, cache);
 }
 
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+	return syscall(__NR_clock_gettime, id, ts);
+}
+
+static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+	return syscall(__NR_gettimeofday, tv, tz);
+}
+
 static void test_getcpu(void)
 {
 	printf("[RUN]\tTesting getcpu...\n");
@@ -155,10 +183,154 @@ static void test_getcpu(void)
 	}
 }
 
+static bool ts_leq(const struct timespec *a, const struct timespec *b)
+{
+	if (a->tv_sec != b->tv_sec)
+		return a->tv_sec < b->tv_sec;
+	else
+		return a->tv_nsec <= b->tv_nsec;
+}
+
+static bool tv_leq(const struct timeval *a, const struct timeval *b)
+{
+	if (a->tv_sec != b->tv_sec)
+		return a->tv_sec < b->tv_sec;
+	else
+		return a->tv_usec <= b->tv_usec;
+}
+
+static char const * const clocknames[] = {
+	[0] = "CLOCK_REALTIME",
+	[1] = "CLOCK_MONOTONIC",
+	[2] = "CLOCK_PROCESS_CPUTIME_ID",
+	[3] = "CLOCK_THREAD_CPUTIME_ID",
+	[4] = "CLOCK_MONOTONIC_RAW",
+	[5] = "CLOCK_REALTIME_COARSE",
+	[6] = "CLOCK_MONOTONIC_COARSE",
+	[7] = "CLOCK_BOOTTIME",
+	[8] = "CLOCK_REALTIME_ALARM",
+	[9] = "CLOCK_BOOTTIME_ALARM",
+	[10] = "CLOCK_SGI_CYCLE",
+	[11] = "CLOCK_TAI",
+};
+
+static void test_one_clock_gettime(int clock, const char *name)
+{
+	struct timespec start, vdso, end;
+	int vdso_ret, end_ret;
+
+	printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock);
+
+	if (sys_clock_gettime(clock, &start) < 0) {
+		if (errno == EINVAL) {
+			vdso_ret = vdso_clock_gettime(clock, &vdso);
+			if (vdso_ret == -EINVAL) {
+				printf("[OK]\tNo such clock.\n");
+			} else {
+				printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret);
+				nerrs++;
+			}
+		} else {
+			printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno);
+		}
+		return;
+	}
+
+	vdso_ret = vdso_clock_gettime(clock, &vdso);
+	end_ret = sys_clock_gettime(clock, &end);
+
+	if (vdso_ret != 0 || end_ret != 0) {
+		printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+		       vdso_ret, errno);
+		nerrs++;
+		return;
+	}
+
+	printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n",
+	       (unsigned long long)start.tv_sec, start.tv_nsec,
+	       (unsigned long long)vdso.tv_sec, vdso.tv_nsec,
+	       (unsigned long long)end.tv_sec, end.tv_nsec);
+
+	if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) {
+		printf("[FAIL]\tTimes are out of sequence\n");
+		nerrs++;
+	}
+}
+
+static void test_clock_gettime(void)
+{
+	for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
+	     clock++) {
+		test_one_clock_gettime(clock, clocknames[clock]);
+	}
+
+	/* Also test some invalid clock ids */
+	test_one_clock_gettime(-1, "invalid");
+	test_one_clock_gettime(INT_MIN, "invalid");
+	test_one_clock_gettime(INT_MAX, "invalid");
+}
+
+static void test_gettimeofday(void)
+{
+	struct timeval start, vdso, end;
+	struct timezone sys_tz, vdso_tz;
+	int vdso_ret, end_ret;
+
+	if (!vdso_gettimeofday)
+		return;
+
+	printf("[RUN]\tTesting gettimeofday...\n");
+
+	if (sys_gettimeofday(&start, &sys_tz) < 0) {
+		printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno);
+		nerrs++;
+		return;
+	}
+
+	vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz);
+	end_ret = sys_gettimeofday(&end, NULL);
+
+	if (vdso_ret != 0 || end_ret != 0) {
+		printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+		       vdso_ret, errno);
+		nerrs++;
+		return;
+	}
+
+	printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n",
+	       (unsigned long long)start.tv_sec, start.tv_usec,
+	       (unsigned long long)vdso.tv_sec, vdso.tv_usec,
+	       (unsigned long long)end.tv_sec, end.tv_usec);
+
+	if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) {
+		printf("[FAIL]\tTimes are out of sequence\n");
+		nerrs++;
+	}
+
+	if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest &&
+	    sys_tz.tz_dsttime == vdso_tz.tz_dsttime) {
+		printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n",
+		       sys_tz.tz_minuteswest, sys_tz.tz_dsttime);
+	} else {
+		printf("[FAIL]\ttimezones do not match\n");
+		nerrs++;
+	}
+
+	/* And make sure that passing NULL for tz doesn't crash. */
+	vdso_gettimeofday(&vdso, NULL);
+}
+
 int main(int argc, char **argv)
 {
 	fill_function_pointers();
 
+	test_clock_gettime();
+	test_gettimeofday();
+
+	/*
+	 * Test getcpu() last so that, if something goes wrong setting affinity,
+	 * we still run the other tests.
+	 */
 	test_getcpu();
 
 	return nerrs ? 1 : 0;
diff --git a/tools/testing/selftests/x86/trivial_program.c b/tools/testing/selftests/x86/trivial_program.c
new file mode 100644
index 000000000000..46a447163b93
--- /dev/null
+++ b/tools/testing/selftests/x86/trivial_program.c
@@ -0,0 +1,10 @@
+/* Trivial program to check that compilation with certain flags is working. */
+
+#include <stdio.h>
+
+int
+main(void)
+{
+	puts("");
+	return 0;
+}
diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh
index 754de7da426a..232e958ec454 100755
--- a/tools/testing/selftests/zram/zram.sh
+++ b/tools/testing/selftests/zram/zram.sh
@@ -2,6 +2,9 @@
 # SPDX-License-Identifier: GPL-2.0
 TCID="zram.sh"
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 . ./zram_lib.sh
 
 run_zram () {
@@ -24,5 +27,5 @@ elif [ -b /dev/zram0 ]; then
 else
 	echo "$TCID : No zram.ko module or /dev/zram0 device file not found"
 	echo "$TCID : CONFIG_ZRAM is not set"
-	exit 1
+	exit $ksft_skip
 fi
diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh
index f6a9c73e7a44..9e73a4fb9b0a 100755
--- a/tools/testing/selftests/zram/zram_lib.sh
+++ b/tools/testing/selftests/zram/zram_lib.sh
@@ -18,6 +18,9 @@ MODULE=0
 dev_makeswap=-1
 dev_mounted=-1
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 trap INT
 
 check_prereqs()
@@ -27,7 +30,7 @@ check_prereqs()
 
 	if [ $uid -ne 0 ]; then
 		echo $msg must be run as root >&2
-		exit 0
+		exit $ksft_skip
 	fi
 }
 
diff --git a/tools/usb/ffs-test.c b/tools/usb/ffs-test.c
index 95dd14648ba5..0f395dfb7774 100644
--- a/tools/usb/ffs-test.c
+++ b/tools/usb/ffs-test.c
@@ -44,12 +44,25 @@
 
 /******************** Little Endian Handling ********************************/
 
-#define cpu_to_le16(x)  htole16(x)
-#define cpu_to_le32(x)  htole32(x)
+/*
+ * cpu_to_le16/32 are used when initializing structures, a context where a
+ * function call is not allowed. To solve this, we code cpu_to_le16/32 in a way
+ * that allows them to be used when initializing structures.
+ */
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define cpu_to_le16(x)  (x)
+#define cpu_to_le32(x)  (x)
+#else
+#define cpu_to_le16(x)  ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))
+#define cpu_to_le32(x)  \
+	((((x) & 0xff000000u) >> 24) | (((x) & 0x00ff0000u) >>  8) | \
+	(((x) & 0x0000ff00u) <<  8) | (((x) & 0x000000ffu) << 24))
+#endif
+
 #define le32_to_cpu(x)  le32toh(x)
 #define le16_to_cpu(x)  le16toh(x)
 
-
 /******************** Messages and Errors ***********************************/
 
 static const char argv0[] = "ffs-test";
diff --git a/tools/usb/usbip/libsrc/usbip_host_common.c b/tools/usb/usbip/libsrc/usbip_host_common.c
index dc93fadbee96..d79c7581b175 100644
--- a/tools/usb/usbip/libsrc/usbip_host_common.c
+++ b/tools/usb/usbip/libsrc/usbip_host_common.c
@@ -43,7 +43,7 @@ static int32_t read_attr_usbip_status(struct usbip_usb_device *udev)
 	int size;
 	int fd;
 	int length;
-	char status;
+	char status[2] = { 0 };
 	int value = 0;
 
 	size = snprintf(status_attr_path, sizeof(status_attr_path),
@@ -61,14 +61,14 @@ static int32_t read_attr_usbip_status(struct usbip_usb_device *udev)
 		return -1;
 	}
 
-	length = read(fd, &status, 1);
+	length = read(fd, status, 1);
 	if (length < 0) {
 		err("error reading attribute %s", status_attr_path);
 		close(fd);
 		return -1;
 	}
 
-	value = atoi(&status);
+	value = atoi(status);
 
 	return value;
 }
diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c
index c9c81614a66a..8159fd98680b 100644
--- a/tools/usb/usbip/libsrc/vhci_driver.c
+++ b/tools/usb/usbip/libsrc/vhci_driver.c
@@ -135,11 +135,11 @@ static int refresh_imported_device_list(void)
 	return 0;
 }
 
-static int get_nports(void)
+static int get_nports(struct udev_device *hc_device)
 {
 	const char *attr_nports;
 
-	attr_nports = udev_device_get_sysattr_value(vhci_driver->hc_device, "nports");
+	attr_nports = udev_device_get_sysattr_value(hc_device, "nports");
 	if (!attr_nports) {
 		err("udev_device_get_sysattr_value nports failed");
 		return -1;
@@ -150,7 +150,7 @@ static int get_nports(void)
 
 static int vhci_hcd_filter(const struct dirent *dirent)
 {
-	return strcmp(dirent->d_name, "vhci_hcd") >= 0;
+	return !strncmp(dirent->d_name, "vhci_hcd.", 9);
 }
 
 static int get_ncontrollers(void)
@@ -242,35 +242,41 @@ static int read_record(int rhport, char *host, unsigned long host_len,
 
 int usbip_vhci_driver_open(void)
 {
+	int nports;
+	struct udev_device *hc_device;
+
 	udev_context = udev_new();
 	if (!udev_context) {
 		err("udev_new failed");
 		return -1;
 	}
 
-	vhci_driver = calloc(1, sizeof(struct usbip_vhci_driver));
-
 	/* will be freed in usbip_driver_close() */
-	vhci_driver->hc_device =
+	hc_device =
 		udev_device_new_from_subsystem_sysname(udev_context,
 						       USBIP_VHCI_BUS_TYPE,
 						       USBIP_VHCI_DEVICE_NAME);
-	if (!vhci_driver->hc_device) {
+	if (!hc_device) {
 		err("udev_device_new_from_subsystem_sysname failed");
 		goto err;
 	}
 
-	vhci_driver->nports = get_nports();
-	dbg("available ports: %d", vhci_driver->nports);
-
-	if (vhci_driver->nports <= 0) {
+	nports = get_nports(hc_device);
+	if (nports <= 0) {
 		err("no available ports");
 		goto err;
-	} else if (vhci_driver->nports > MAXNPORT) {
-		err("port number exceeds %d", MAXNPORT);
+	}
+	dbg("available ports: %d", nports);
+
+	vhci_driver = calloc(1, sizeof(struct usbip_vhci_driver) +
+			nports * sizeof(struct usbip_imported_device));
+	if (!vhci_driver) {
+		err("vhci_driver allocation failed");
 		goto err;
 	}
 
+	vhci_driver->nports = nports;
+	vhci_driver->hc_device = hc_device;
 	vhci_driver->ncontrollers = get_ncontrollers();
 	dbg("available controllers: %d", vhci_driver->ncontrollers);
 
@@ -285,7 +291,7 @@ int usbip_vhci_driver_open(void)
 	return 0;
 
 err:
-	udev_device_unref(vhci_driver->hc_device);
+	udev_device_unref(hc_device);
 
 	if (vhci_driver)
 		free(vhci_driver);
diff --git a/tools/usb/usbip/libsrc/vhci_driver.h b/tools/usb/usbip/libsrc/vhci_driver.h
index 418b404d5121..6c9aca216705 100644
--- a/tools/usb/usbip/libsrc/vhci_driver.h
+++ b/tools/usb/usbip/libsrc/vhci_driver.h
@@ -13,7 +13,6 @@
 
 #define USBIP_VHCI_BUS_TYPE "platform"
 #define USBIP_VHCI_DEVICE_NAME "vhci_hcd.0"
-#define MAXNPORT 128
 
 enum hub_speed {
 	HUB_SPEED_HIGH = 0,
@@ -41,7 +40,7 @@ struct usbip_vhci_driver {
 
 	int ncontrollers;
 	int nports;
-	struct usbip_imported_device idev[MAXNPORT];
+	struct usbip_imported_device idev[];
 };
 
 
diff --git a/tools/usb/usbip/src/usbip_detach.c b/tools/usb/usbip/src/usbip_detach.c
index 9db9d21bb2ec..777f7286a0c5 100644
--- a/tools/usb/usbip/src/usbip_detach.c
+++ b/tools/usb/usbip/src/usbip_detach.c
@@ -43,9 +43,12 @@ void usbip_detach_usage(void)
 
 static int detach_port(char *port)
 {
-	int ret;
+	int ret = 0;
 	uint8_t portnum;
 	char path[PATH_MAX+1];
+	int i;
+	struct usbip_imported_device *idev;
+	int found = 0;
 
 	unsigned int port_len = strlen(port);
 
@@ -55,27 +58,48 @@ static int detach_port(char *port)
 			return -1;
 		}
 
-	/* check max port */
-
 	portnum = atoi(port);
 
-	/* remove the port state file */
+	ret = usbip_vhci_driver_open();
+	if (ret < 0) {
+		err("open vhci_driver");
+		return -1;
+	}
+
+	/* check for invalid port */
+	for (i = 0; i < vhci_driver->nports; i++) {
+		idev = &vhci_driver->idev[i];
+
+		if (idev->port == portnum) {
+			found = 1;
+			if (idev->status != VDEV_ST_NULL)
+				break;
+			info("Port %d is already detached!\n", idev->port);
+			goto call_driver_close;
+		}
+	}
 
+	if (!found) {
+		err("Invalid port %s > maxports %d",
+			port, vhci_driver->nports);
+		goto call_driver_close;
+	}
+
+	/* remove the port state file */
 	snprintf(path, PATH_MAX, VHCI_STATE_PATH"/port%d", portnum);
 
 	remove(path);
 	rmdir(VHCI_STATE_PATH);
 
-	ret = usbip_vhci_driver_open();
+	ret = usbip_vhci_detach_device(portnum);
 	if (ret < 0) {
-		err("open vhci_driver");
-		return -1;
+		ret = -1;
+		err("Port %d detach request failed!\n", portnum);
+		goto call_driver_close;
 	}
+	info("Port %d is now detached!\n", portnum);
 
-	ret = usbip_vhci_detach_device(portnum);
-	if (ret < 0)
-		return -1;
-
+call_driver_close:
 	usbip_vhci_driver_close();
 
 	return ret;
diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h
index 0ac3caf90877..d0351f83aebe 100644
--- a/tools/virtio/asm/barrier.h
+++ b/tools/virtio/asm/barrier.h
@@ -13,8 +13,8 @@
 } while (0);
 /* Weak barriers should be used. If not - it's a bug */
 # define mb() abort()
-# define rmb() abort()
-# define wmb() abort()
+# define dma_rmb() abort()
+# define dma_wmb() abort()
 #else
 #error Please fill in barrier macros
 #endif
diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h
index 1571e24e9494..f91aeb5fe571 100644
--- a/tools/virtio/linux/dma-mapping.h
+++ b/tools/virtio/linux/dma-mapping.h
@@ -6,8 +6,6 @@
 # error Virtio userspace code does not support CONFIG_HAS_DMA
 #endif
 
-#define PCI_DMA_BUS_IS_PHYS 1
-
 enum dma_data_direction {
 	DMA_BIDIRECTIONAL = 0,
 	DMA_TO_DEVICE = 1,
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index fca8381bbe04..7ef45a4a3cba 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -23,6 +23,10 @@
 #define PAGE_MASK (~(PAGE_SIZE-1))
 #define PAGE_ALIGN(x) ((x + PAGE_SIZE - 1) & PAGE_MASK)
 
+/* generic data direction definitions */
+#define READ                    0
+#define WRITE                   1
+
 typedef unsigned long long phys_addr_t;
 typedef unsigned long long dma_addr_t;
 typedef size_t __kernel_size_t;
@@ -52,6 +56,11 @@ static inline void *kmalloc(size_t s, gfp_t gfp)
 		return __kmalloc_fake;
 	return malloc(s);
 }
+static inline void *kmalloc_array(unsigned n, size_t s, gfp_t gfp)
+{
+	return kmalloc(n * s, gfp);
+}
+
 static inline void *kzalloc(size_t s, gfp_t gfp)
 {
 	void *p = kmalloc(s, gfp);
diff --git a/tools/virtio/linux/scatterlist.h b/tools/virtio/linux/scatterlist.h
index 9a45f90e2d08..369ee308b668 100644
--- a/tools/virtio/linux/scatterlist.h
+++ b/tools/virtio/linux/scatterlist.h
@@ -36,7 +36,6 @@ static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
 	 */
 	BUG_ON((unsigned long) page & 0x03);
 #ifdef CONFIG_DEBUG_SG
-	BUG_ON(sg->sg_magic != SG_MAGIC);
 	BUG_ON(sg_is_chain(sg));
 #endif
 	sg->page_link = page_link | (unsigned long) page;
@@ -67,7 +66,6 @@ static inline void sg_set_page(struct scatterlist *sg, struct page *page,
 static inline struct page *sg_page(struct scatterlist *sg)
 {
 #ifdef CONFIG_DEBUG_SG
-	BUG_ON(sg->sg_magic != SG_MAGIC);
 	BUG_ON(sg_is_chain(sg));
 #endif
 	return (struct page *)((sg)->page_link & ~0x3);
@@ -116,9 +114,6 @@ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
  **/
 static inline void sg_mark_end(struct scatterlist *sg)
 {
-#ifdef CONFIG_DEBUG_SG
-	BUG_ON(sg->sg_magic != SG_MAGIC);
-#endif
 	/*
 	 * Set termination bit, clear potential chain bit
 	 */
@@ -136,17 +131,11 @@ static inline void sg_mark_end(struct scatterlist *sg)
  **/
 static inline void sg_unmark_end(struct scatterlist *sg)
 {
-#ifdef CONFIG_DEBUG_SG
-	BUG_ON(sg->sg_magic != SG_MAGIC);
-#endif
 	sg->page_link &= ~0x02;
 }
 
 static inline struct scatterlist *sg_next(struct scatterlist *sg)
 {
-#ifdef CONFIG_DEBUG_SG
-	BUG_ON(sg->sg_magic != SG_MAGIC);
-#endif
 	if (sg_is_last(sg))
 		return NULL;
 
@@ -160,13 +149,6 @@ static inline struct scatterlist *sg_next(struct scatterlist *sg)
 static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents)
 {
 	memset(sgl, 0, sizeof(*sgl) * nents);
-#ifdef CONFIG_DEBUG_SG
-	{
-		unsigned int i;
-		for (i = 0; i < nents; i++)
-			sgl[i].sg_magic = SG_MAGIC;
-	}
-#endif
 	sg_mark_end(&sgl[nents - 1]);
 }
 
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index a8783f48f77f..37908a83ddc2 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -75,8 +75,11 @@
 
 #define KPF_BYTES		8
 #define PROC_KPAGEFLAGS		"/proc/kpageflags"
+#define PROC_KPAGECOUNT		"/proc/kpagecount"
 #define PROC_KPAGECGROUP	"/proc/kpagecgroup"
 
+#define SYS_KERNEL_MM_PAGE_IDLE "/sys/kernel/mm/page_idle/bitmap"
+
 /* [32-] kernel hacking assistances */
 #define KPF_RESERVED		32
 #define KPF_MLOCKED		33
@@ -131,6 +134,7 @@ static const char * const page_flag_names[] = {
 	[KPF_KSM]		= "x:ksm",
 	[KPF_THP]		= "t:thp",
 	[KPF_BALLOON]		= "o:balloon",
+	[KPF_PGTABLE]		= "g:pgtable",
 	[KPF_ZERO_PAGE]		= "z:zero_page",
 	[KPF_IDLE]              = "i:idle_page",
 
@@ -155,23 +159,19 @@ static const char * const page_flag_names[] = {
 };
 
 
-static const char * const debugfs_known_mountpoints[] = {
-	"/sys/kernel/debug",
-	"/debug",
-	0,
-};
-
 /*
  * data structures
  */
 
 static int		opt_raw;	/* for kernel developers */
 static int		opt_list;	/* list pages (in ranges) */
+static int		opt_mark_idle;	/* set accessed bit */
 static int		opt_no_summary;	/* don't show summary */
 static pid_t		opt_pid;	/* process to walk */
 const char		*opt_file;	/* file or directory path */
 static uint64_t		opt_cgroup;	/* cgroup inode */
 static int		opt_list_cgroup;/* list page cgroup */
+static int		opt_list_mapcnt;/* list page map count */
 static const char	*opt_kpageflags;/* kpageflags file to parse */
 
 #define MAX_ADDR_RANGES	1024
@@ -193,7 +193,9 @@ static int		page_size;
 
 static int		pagemap_fd;
 static int		kpageflags_fd;
+static int		kpagecount_fd = -1;
 static int		kpagecgroup_fd = -1;
+static int		page_idle_fd = -1;
 
 static int		opt_hwpoison;
 static int		opt_unpoison;
@@ -297,6 +299,15 @@ static unsigned long kpagecgroup_read(uint64_t *buf,
 	return do_u64_read(kpagecgroup_fd, opt_kpageflags, buf, index, pages);
 }
 
+static unsigned long kpagecount_read(uint64_t *buf,
+				     unsigned long index,
+				     unsigned long pages)
+{
+	return kpagecount_fd < 0 ? pages :
+		do_u64_read(kpagecount_fd, PROC_KPAGECOUNT,
+			    buf, index, pages);
+}
+
 static unsigned long pagemap_read(uint64_t *buf,
 				  unsigned long index,
 				  unsigned long pages)
@@ -369,16 +380,18 @@ static char *page_flag_longname(uint64_t flags)
  */
 
 static void show_page_range(unsigned long voffset, unsigned long offset,
-			    unsigned long size, uint64_t flags, uint64_t cgroup)
+			    unsigned long size, uint64_t flags,
+			    uint64_t cgroup, uint64_t mapcnt)
 {
 	static uint64_t      flags0;
 	static uint64_t	     cgroup0;
+	static uint64_t      mapcnt0;
 	static unsigned long voff;
 	static unsigned long index;
 	static unsigned long count;
 
-	if (flags == flags0 && cgroup == cgroup0 && offset == index + count &&
-	    size && voffset == voff + count) {
+	if (flags == flags0 && cgroup == cgroup0 && mapcnt == mapcnt0 &&
+	    offset == index + count && size && voffset == voff + count) {
 		count += size;
 		return;
 	}
@@ -390,12 +403,15 @@ static void show_page_range(unsigned long voffset, unsigned long offset,
 			printf("%lu\t", voff);
 		if (opt_list_cgroup)
 			printf("@%llu\t", (unsigned long long)cgroup0);
+		if (opt_list_mapcnt)
+			printf("%lu\t", mapcnt0);
 		printf("%lx\t%lx\t%s\n",
 				index, count, page_flag_name(flags0));
 	}
 
 	flags0 = flags;
-	cgroup0= cgroup;
+	cgroup0 = cgroup;
+	mapcnt0 = mapcnt;
 	index  = offset;
 	voff   = voffset;
 	count  = size;
@@ -403,11 +419,11 @@ static void show_page_range(unsigned long voffset, unsigned long offset,
 
 static void flush_page_range(void)
 {
-	show_page_range(0, 0, 0, 0, 0);
+	show_page_range(0, 0, 0, 0, 0, 0);
 }
 
 static void show_page(unsigned long voffset, unsigned long offset,
-		      uint64_t flags, uint64_t cgroup)
+		      uint64_t flags, uint64_t cgroup, uint64_t mapcnt)
 {
 	if (opt_pid)
 		printf("%lx\t", voffset);
@@ -415,6 +431,9 @@ static void show_page(unsigned long voffset, unsigned long offset,
 		printf("%lu\t", voffset);
 	if (opt_list_cgroup)
 		printf("@%llu\t", (unsigned long long)cgroup);
+	if (opt_list_mapcnt)
+		printf("%lu\t", mapcnt);
+
 	printf("%lx\t%s\n", offset, page_flag_name(flags));
 }
 
@@ -566,6 +585,30 @@ static int unpoison_page(unsigned long offset)
 	return 0;
 }
 
+static int mark_page_idle(unsigned long offset)
+{
+	static unsigned long off;
+	static uint64_t buf;
+	int len;
+
+	if ((offset / 64 == off / 64) || buf == 0) {
+		buf |= 1UL << (offset % 64);
+		off = offset;
+		return 0;
+	}
+
+	len = pwrite(page_idle_fd, &buf, 8, 8 * (off / 64));
+	if (len < 0) {
+		perror("mark page idle");
+		return len;
+	}
+
+	buf = 1UL << (offset % 64);
+	off = offset;
+
+	return 0;
+}
+
 /*
  * page frame walker
  */
@@ -598,7 +641,8 @@ static size_t hash_slot(uint64_t flags)
 }
 
 static void add_page(unsigned long voffset, unsigned long offset,
-		     uint64_t flags, uint64_t cgroup, uint64_t pme)
+		     uint64_t flags, uint64_t cgroup, uint64_t mapcnt,
+		     uint64_t pme)
 {
 	flags = kpageflags_flags(flags, pme);
 
@@ -613,10 +657,13 @@ static void add_page(unsigned long voffset, unsigned long offset,
 	if (opt_unpoison)
 		unpoison_page(offset);
 
+	if (opt_mark_idle)
+		mark_page_idle(offset);
+
 	if (opt_list == 1)
-		show_page_range(voffset, offset, 1, flags, cgroup);
+		show_page_range(voffset, offset, 1, flags, cgroup, mapcnt);
 	else if (opt_list == 2)
-		show_page(voffset, offset, flags, cgroup);
+		show_page(voffset, offset, flags, cgroup, mapcnt);
 
 	nr_pages[hash_slot(flags)]++;
 	total_pages++;
@@ -630,6 +677,7 @@ static void walk_pfn(unsigned long voffset,
 {
 	uint64_t buf[KPAGEFLAGS_BATCH];
 	uint64_t cgi[KPAGEFLAGS_BATCH];
+	uint64_t cnt[KPAGEFLAGS_BATCH];
 	unsigned long batch;
 	unsigned long pages;
 	unsigned long i;
@@ -653,8 +701,12 @@ static void walk_pfn(unsigned long voffset,
 		if (kpagecgroup_read(cgi, index, pages) != pages)
 			fatal("kpagecgroup returned fewer pages than expected");
 
+		if (kpagecount_read(cnt, index, batch) != pages)
+			fatal("kpagecount returned fewer pages than expected");
+
 		for (i = 0; i < pages; i++)
-			add_page(voffset + i, index + i, buf[i], cgi[i], pme);
+			add_page(voffset + i, index + i,
+				 buf[i], cgi[i], cnt[i], pme);
 
 		index += pages;
 		count -= pages;
@@ -672,9 +724,10 @@ static void walk_swap(unsigned long voffset, uint64_t pme)
 		return;
 
 	if (opt_list == 1)
-		show_page_range(voffset, pagemap_swap_offset(pme), 1, flags, 0);
+		show_page_range(voffset, pagemap_swap_offset(pme),
+				1, flags, 0, 0);
 	else if (opt_list == 2)
-		show_page(voffset, pagemap_swap_offset(pme), flags, 0);
+		show_page(voffset, pagemap_swap_offset(pme), flags, 0, 0);
 
 	nr_pages[hash_slot(flags)]++;
 	total_pages++;
@@ -755,6 +808,9 @@ static void walk_addr_ranges(void)
 		else
 			walk_task(opt_offset[i], opt_size[i]);
 
+	if (opt_mark_idle)
+		mark_page_idle(0);
+
 	close(kpageflags_fd);
 }
 
@@ -785,9 +841,11 @@ static void usage(void)
 "            -c|--cgroup  path|@inode   Walk pages within memory cgroup\n"
 "            -p|--pid     pid           Walk process address space\n"
 "            -f|--file    filename      Walk file address space\n"
+"            -i|--mark-idle             Mark pages idle\n"
 "            -l|--list                  Show page details in ranges\n"
 "            -L|--list-each             Show page details one by one\n"
 "            -C|--list-cgroup           Show cgroup inode for pages\n"
+"            -M|--list-mapcnt           Show page map count\n"
 "            -N|--no-summary            Don't show summary info\n"
 "            -X|--hwpoison              hwpoison pages\n"
 "            -x|--unpoison              unpoison pages\n"
@@ -924,6 +982,7 @@ static void walk_file(const char *name, const struct stat *st)
 	uint8_t vec[PAGEMAP_BATCH];
 	uint64_t buf[PAGEMAP_BATCH], flags;
 	uint64_t cgroup = 0;
+	uint64_t mapcnt = 0;
 	unsigned long nr_pages, pfn, i;
 	off_t off, end = st->st_size;
 	int fd;
@@ -983,13 +1042,15 @@ got_sigbus:
 				continue;
 			if (!kpagecgroup_read(&cgroup, pfn, 1))
 				fatal("kpagecgroup_read failed");
+			if (!kpagecount_read(&mapcnt, pfn, 1))
+				fatal("kpagecount_read failed");
 			if (first && opt_list) {
 				first = 0;
 				flush_page_range();
 				show_file(name, st);
 			}
 			add_page(off / page_size + i, pfn,
-				 flags, cgroup, buf[i]);
+				 flags, cgroup, mapcnt, buf[i]);
 		}
 	}
 
@@ -1189,9 +1250,11 @@ static const struct option opts[] = {
 	{ "bits"      , 1, NULL, 'b' },
 	{ "cgroup"    , 1, NULL, 'c' },
 	{ "describe"  , 1, NULL, 'd' },
+	{ "mark-idle" , 0, NULL, 'i' },
 	{ "list"      , 0, NULL, 'l' },
 	{ "list-each" , 0, NULL, 'L' },
 	{ "list-cgroup", 0, NULL, 'C' },
+	{ "list-mapcnt", 0, NULL, 'M' },
 	{ "no-summary", 0, NULL, 'N' },
 	{ "hwpoison"  , 0, NULL, 'X' },
 	{ "unpoison"  , 0, NULL, 'x' },
@@ -1207,7 +1270,8 @@ int main(int argc, char *argv[])
 	page_size = getpagesize();
 
 	while ((c = getopt_long(argc, argv,
-				"rp:f:a:b:d:c:ClLNXxF:h", opts, NULL)) != -1) {
+				"rp:f:a:b:d:c:CilLMNXxF:h",
+				opts, NULL)) != -1) {
 		switch (c) {
 		case 'r':
 			opt_raw = 1;
@@ -1233,12 +1297,18 @@ int main(int argc, char *argv[])
 		case 'd':
 			describe_flags(optarg);
 			exit(0);
+		case 'i':
+			opt_mark_idle = 1;
+			break;
 		case 'l':
 			opt_list = 1;
 			break;
 		case 'L':
 			opt_list = 2;
 			break;
+		case 'M':
+			opt_list_mapcnt = 1;
+			break;
 		case 'N':
 			opt_no_summary = 1;
 			break;
@@ -1268,12 +1338,21 @@ int main(int argc, char *argv[])
 	if (opt_cgroup || opt_list_cgroup)
 		kpagecgroup_fd = checked_open(PROC_KPAGECGROUP, O_RDONLY);
 
+	if (opt_list && opt_list_mapcnt)
+		kpagecount_fd = checked_open(PROC_KPAGECOUNT, O_RDONLY);
+
+	if (opt_mark_idle && opt_file)
+		page_idle_fd = checked_open(SYS_KERNEL_MM_PAGE_IDLE, O_RDWR);
+
 	if (opt_list && opt_pid)
 		printf("voffset\t");
 	if (opt_list && opt_file)
 		printf("foffset\t");
 	if (opt_list && opt_list_cgroup)
 		printf("cgroup\t");
+	if (opt_list && opt_list_mapcnt)
+		printf("map-cnt\t");
+
 	if (opt_list == 1)
 		printf("offset\tlen\tflags\n");
 	if (opt_list == 2)
@@ -1295,5 +1374,11 @@ int main(int argc, char *argv[])
 
 	show_summary();
 
+	if (opt_list_mapcnt)
+		close(kpagecount_fd);
+
+	if (page_idle_fd >= 0)
+		close(page_idle_fd);
+
 	return 0;
 }
diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c
index f82c2eaa859d..334b16db0ebb 100644
--- a/tools/vm/slabinfo.c
+++ b/tools/vm/slabinfo.c
@@ -30,8 +30,8 @@ struct slabinfo {
 	int alias;
 	int refs;
 	int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu;
-	int hwcache_align, object_size, objs_per_slab;
-	int sanity_checks, slab_size, store_user, trace;
+	unsigned int hwcache_align, object_size, objs_per_slab;
+	unsigned int sanity_checks, slab_size, store_user, trace;
 	int order, poison, reclaim_account, red_zone;
 	unsigned long partial, objects, slabs, objects_partial, objects_total;
 	unsigned long alloc_fastpath, alloc_slowpath;