136 files changed, 5456 insertions, 1159 deletions
diff --git a/tools/arch/x86/include/asm/inat.h b/tools/arch/x86/include/asm/inat.h
index 877827b7c2c3..a61051400311 100644
--- a/tools/arch/x86/include/asm/inat.h
+++ b/tools/arch/x86/include/asm/inat.h
@@ -6,7 +6,7 @@
  *
  * Written by Masami Hiramatsu <mhiramat@redhat.com>
  */
-#include "inat_types.h"
+#include "inat_types.h" /* __ignore_sync_check__ */
 
 /*
  * Internal bits. Don't use bitmasks directly, because these bits are
diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h
index cc777c185212..dc632b41f135 100644
--- a/tools/arch/x86/include/asm/insn.h
+++ b/tools/arch/x86/include/asm/insn.h
@@ -9,7 +9,7 @@
 
 #include <asm/byteorder.h>
 /* insn_attr_t is defined in inat.h */
-#include "inat.h"
+#include "inat.h" /* __ignore_sync_check__ */
 
 #if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN)
 
@@ -132,13 +132,25 @@ struct insn {
 #define X86_VEX_M_MAX	0x1f			/* VEX3.M Maximum value */
 
 extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
-extern void insn_get_prefixes(struct insn *insn);
-extern void insn_get_opcode(struct insn *insn);
-extern void insn_get_modrm(struct insn *insn);
-extern void insn_get_sib(struct insn *insn);
-extern void insn_get_displacement(struct insn *insn);
-extern void insn_get_immediate(struct insn *insn);
-extern void insn_get_length(struct insn *insn);
+extern int insn_get_prefixes(struct insn *insn);
+extern int insn_get_opcode(struct insn *insn);
+extern int insn_get_modrm(struct insn *insn);
+extern int insn_get_sib(struct insn *insn);
+extern int insn_get_displacement(struct insn *insn);
+extern int insn_get_immediate(struct insn *insn);
+extern int insn_get_length(struct insn *insn);
+
+enum insn_mode {
+	INSN_MODE_32,
+	INSN_MODE_64,
+	/* Mode is determined by the current kernel build. */
+	INSN_MODE_KERN,
+	INSN_NUM_MODES,
+};
+
+extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m);
+
+#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN)
 
 /* Attribute will be determined after getting ModRM (for opcode groups) */
 static inline void insn_get_attribute(struct insn *insn)
@@ -149,17 +161,6 @@ static inline void insn_get_attribute(struct insn *insn)
 /* Instruction uses RIP-relative addressing */
 extern int insn_rip_relative(struct insn *insn);
 
-/* Init insn for kernel text */
-static inline void kernel_insn_init(struct insn *insn,
-				    const void *kaddr, int buf_len)
-{
-#ifdef CONFIG_X86_64
-	insn_init(insn, kaddr, buf_len, 1);
-#else /* CONFIG_X86_32 */
-	insn_init(insn, kaddr, buf_len, 0);
-#endif
-}
-
 static inline int insn_is_avx(struct insn *insn)
 {
 	if (!insn->prefixes.got)
@@ -179,13 +180,6 @@ static inline int insn_has_emulate_prefix(struct insn *insn)
 	return !!insn->emulate_prefix_size;
 }
 
-/* Ensure this instruction is decoded completely */
-static inline int insn_complete(struct insn *insn)
-{
-	return insn->opcode.got && insn->modrm.got && insn->sib.got &&
-		insn->displacement.got && insn->immediate.got;
-}
-
 static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
 {
 	if (insn->vex_prefix.nbytes == 2)	/* 2 bytes VEX */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 546d6ecf0a35..45029354e0a8 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -628,8 +628,6 @@
 #define MSR_IA32_APICBASE_ENABLE	(1<<11)
 #define MSR_IA32_APICBASE_BASE		(0xfffff<<12)
 
-#define MSR_IA32_TSCDEADLINE		0x000006e0
-
 #define MSR_IA32_UCODE_WRITE		0x00000079
 #define MSR_IA32_UCODE_REV		0x0000008b
 
diff --git a/tools/arch/x86/include/asm/nops.h b/tools/arch/x86/include/asm/nops.h
new file mode 100644
index 000000000000..c1e5e818ba16
--- /dev/null
+++ b/tools/arch/x86/include/asm/nops.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_NOPS_H
+#define _ASM_X86_NOPS_H
+
+/*
+ * Define nops for use with alternative() and for tracing.
+ */
+
+#ifndef CONFIG_64BIT
+
+/*
+ * Generic 32bit nops from GAS:
+ *
+ * 1: nop
+ * 2: movl %esi,%esi
+ * 3: leal 0x0(%esi),%esi
+ * 4: leal 0x0(%esi,%eiz,1),%esi
+ * 5: leal %ds:0x0(%esi,%eiz,1),%esi
+ * 6: leal 0x0(%esi),%esi
+ * 7: leal 0x0(%esi,%eiz,1),%esi
+ * 8: leal %ds:0x0(%esi,%eiz,1),%esi
+ *
+ * Except 5 and 8, which are DS prefixed 4 and 7 resp, where GAS would emit 2
+ * nop instructions.
+ */
+#define BYTES_NOP1	0x90
+#define BYTES_NOP2	0x89,0xf6
+#define BYTES_NOP3	0x8d,0x76,0x00
+#define BYTES_NOP4	0x8d,0x74,0x26,0x00
+#define BYTES_NOP5	0x3e,BYTES_NOP4
+#define BYTES_NOP6	0x8d,0xb6,0x00,0x00,0x00,0x00
+#define BYTES_NOP7	0x8d,0xb4,0x26,0x00,0x00,0x00,0x00
+#define BYTES_NOP8	0x3e,BYTES_NOP7
+
+#else
+
+/*
+ * Generic 64bit nops from GAS:
+ *
+ * 1: nop
+ * 2: osp nop
+ * 3: nopl (%eax)
+ * 4: nopl 0x00(%eax)
+ * 5: nopl 0x00(%eax,%eax,1)
+ * 6: osp nopl 0x00(%eax,%eax,1)
+ * 7: nopl 0x00000000(%eax)
+ * 8: nopl 0x00000000(%eax,%eax,1)
+ */
+#define BYTES_NOP1	0x90
+#define BYTES_NOP2	0x66,BYTES_NOP1
+#define BYTES_NOP3	0x0f,0x1f,0x00
+#define BYTES_NOP4	0x0f,0x1f,0x40,0x00
+#define BYTES_NOP5	0x0f,0x1f,0x44,0x00,0x00
+#define BYTES_NOP6	0x66,BYTES_NOP5
+#define BYTES_NOP7	0x0f,0x1f,0x80,0x00,0x00,0x00,0x00
+#define BYTES_NOP8	0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
+
+#endif /* CONFIG_64BIT */
+
+#ifdef __ASSEMBLY__
+#define _ASM_MK_NOP(x) .byte x
+#else
+#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
+#endif
+
+#define ASM_NOP1 _ASM_MK_NOP(BYTES_NOP1)
+#define ASM_NOP2 _ASM_MK_NOP(BYTES_NOP2)
+#define ASM_NOP3 _ASM_MK_NOP(BYTES_NOP3)
+#define ASM_NOP4 _ASM_MK_NOP(BYTES_NOP4)
+#define ASM_NOP5 _ASM_MK_NOP(BYTES_NOP5)
+#define ASM_NOP6 _ASM_MK_NOP(BYTES_NOP6)
+#define ASM_NOP7 _ASM_MK_NOP(BYTES_NOP7)
+#define ASM_NOP8 _ASM_MK_NOP(BYTES_NOP8)
+
+#define ASM_NOP_MAX 8
+
+#ifndef __ASSEMBLY__
+extern const unsigned char * const x86_nops[];
+#endif
+
+#endif /* _ASM_X86_NOPS_H */
diff --git a/tools/arch/x86/kcpuid/Makefile b/tools/arch/x86/kcpuid/Makefile
new file mode 100644
index 000000000000..87b554fab14b
--- /dev/null
+++ b/tools/arch/x86/kcpuid/Makefile
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for x86/kcpuid tool
+
+kcpuid : kcpuid.c
+
+CFLAGS = -Wextra
+
+BINDIR ?= /usr/sbin
+
+HWDATADIR ?= /usr/share/misc/
+
+override CFLAGS += -O2 -Wall -I../../../include
+
+%: %.c
+	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
+
+.PHONY : clean
+clean :
+	@rm -f kcpuid
+
+install : kcpuid
+	install -d  $(DESTDIR)$(BINDIR)
+	install -m 755 -p kcpuid $(DESTDIR)$(BINDIR)/kcpuid
+	install -m 444 -p cpuid.csv $(HWDATADIR)/cpuid.csv
diff --git a/tools/arch/x86/kcpuid/cpuid.csv b/tools/arch/x86/kcpuid/cpuid.csv
new file mode 100644
index 000000000000..4f1c4b0c29e9
--- /dev/null
+++ b/tools/arch/x86/kcpuid/cpuid.csv
@@ -0,0 +1,400 @@
+# The basic row format is:
+# LEAF, SUBLEAF, register_name, bits, short_name, long_description
+
+# Leaf 00H
+         0,    0,  EAX,   31:0, max_basic_leafs, Max input value for supported subleafs
+
+# Leaf 01H
+         1,    0,  EAX,    3:0, stepping, Stepping ID
+         1,    0,  EAX,    7:4, model, Model
+         1,    0,  EAX,   11:8, family, Family ID
+         1,    0,  EAX,  13:12, processor, Processor Type
+         1,    0,  EAX,  19:16, model_ext, Extended Model ID
+         1,    0,  EAX,  27:20, family_ext, Extended Family ID
+
+         1,    0,  EBX,    7:0, brand, Brand Index
+         1,    0,  EBX,   15:8, clflush_size, CLFLUSH line size (value * 8) in bytes
+         1,    0,  EBX,  23:16, max_cpu_id, Maxim number of addressable logic cpu in this package
+         1,    0,  EBX,  31:24, apic_id, Initial APIC ID
+
+         1,    0,  ECX,      0, sse3, Streaming SIMD Extensions 3(SSE3)
+         1,    0,  ECX,      1, pclmulqdq, PCLMULQDQ instruction supported
+         1,    0,  ECX,      2, dtes64, DS area uses 64-bit layout
+         1,    0,  ECX,      3, mwait, MONITOR/MWAIT supported
+         1,    0,  ECX,      4, ds_cpl, CPL Qualified Debug Store which allows for branch message storage qualified by CPL
+         1,    0,  ECX,      5, vmx, Virtual Machine Extensions supported
+         1,    0,  ECX,      6, smx, Safer Mode Extension supported
+         1,    0,  ECX,      7, eist, Enhanced Intel SpeedStep Technology
+         1,    0,  ECX,      8, tm2, Thermal Monitor 2
+         1,    0,  ECX,      9, ssse3, Supplemental Streaming SIMD Extensions 3 (SSSE3)
+         1,    0,  ECX,     10, l1_ctx_id, L1 data cache could be set to either adaptive mode or shared mode (check IA32_MISC_ENABLE bit 24 definition)
+         1,    0,  ECX,     11, sdbg, IA32_DEBUG_INTERFACE MSR for silicon debug supported
+         1,    0,  ECX,     12, fma, FMA extensions using YMM state supported
+         1,    0,  ECX,     13, cmpxchg16b, 'CMPXCHG16B - Compare and Exchange Bytes' supported
+         1,    0,  ECX,     14, xtpr_update, xTPR Update Control supported
+         1,    0,  ECX,     15, pdcm, Perfmon and Debug Capability present
+         1,    0,  ECX,     17, pcid, Process-Context Identifiers feature present
+         1,    0,  ECX,     18, dca, Prefetching data from a memory mapped device supported
+         1,    0,  ECX,     19, sse4_1, SSE4.1 feature present
+         1,    0,  ECX,     20, sse4_2, SSE4.2 feature present
+         1,    0,  ECX,     21, x2apic, x2APIC supported
+         1,    0,  ECX,     22, movbe, MOVBE instruction supported
+         1,    0,  ECX,     23, popcnt, POPCNT instruction supported
+         1,    0,  ECX,     24, tsc_deadline_timer, LAPIC supports one-shot operation using a TSC deadline value
+         1,    0,  ECX,     25, aesni, AESNI instruction supported
+         1,    0,  ECX,     26, xsave, XSAVE/XRSTOR processor extended states (XSETBV/XGETBV/XCR0)
+         1,    0,  ECX,     27, osxsave, OS has set CR4.OSXSAVE bit to enable XSETBV/XGETBV/XCR0
+         1,    0,  ECX,     28, avx, AVX instruction supported
+         1,    0,  ECX,     29, f16c, 16-bit floating-point conversion instruction supported
+         1,    0,  ECX,     30, rdrand, RDRAND instruction supported
+
+         1,    0,  EDX,      0, fpu, x87 FPU on chip
+         1,    0,  EDX,      1, vme, Virtual-8086 Mode Enhancement
+         1,    0,  EDX,      2, de, Debugging Extensions
+         1,    0,  EDX,      3, pse, Page Size Extensions
+         1,    0,  EDX,      4, tsc, Time Stamp Counter
+         1,    0,  EDX,      5, msr, RDMSR and WRMSR Support
+         1,    0,  EDX,      6, pae, Physical Address Extensions
+         1,    0,  EDX,      7, mce, Machine Check Exception
+         1,    0,  EDX,      8, cx8, CMPXCHG8B instr
+         1,    0,  EDX,      9, apic, APIC on Chip
+         1,    0,  EDX,     11, sep, SYSENTER and SYSEXIT instrs
+         1,    0,  EDX,     12, mtrr, Memory Type Range Registers
+         1,    0,  EDX,     13, pge, Page Global Bit
+         1,    0,  EDX,     14, mca, Machine Check Architecture
+         1,    0,  EDX,     15, cmov, Conditional Move Instrs
+         1,    0,  EDX,     16, pat, Page Attribute Table
+         1,    0,  EDX,     17, pse36, 36-Bit Page Size Extension
+         1,    0,  EDX,     18, psn, Processor Serial Number
+         1,    0,  EDX,     19, clflush, CLFLUSH instr
+#         1,    0,  EDX,     20,
+         1,    0,  EDX,     21, ds, Debug Store
+         1,    0,  EDX,     22, acpi, Thermal Monitor and Software Controlled Clock Facilities
+         1,    0,  EDX,     23, mmx, Intel MMX Technology
+         1,    0,  EDX,     24, fxsr, XSAVE and FXRSTOR Instrs
+         1,    0,  EDX,     25, sse, SSE
+         1,    0,  EDX,     26, sse2, SSE2
+         1,    0,  EDX,     27, ss, Self Snoop
+         1,    0,  EDX,     28, hit, Max APIC IDs
+         1,    0,  EDX,     29, tm, Thermal Monitor
+#         1,    0,  EDX,     30,
+         1,    0,  EDX,     31, pbe, Pending Break Enable
+
+# Leaf 02H
+# cache and TLB descriptor info
+
+# Leaf 03H
+# Precessor Serial Number, introduced on Pentium III, not valid for
+# latest models
+
+# Leaf 04H
+# thread/core and cache topology
+         4,    0,  EAX,    4:0, cache_type, Cache type like instr/data or unified
+         4,    0,  EAX,    7:5, cache_level, Cache Level (starts at 1)
+         4,    0,  EAX,      8, cache_self_init, Cache Self Initialization
+         4,    0,  EAX,      9, fully_associate, Fully Associative cache
+#         4,    0,  EAX,  13:10, resvd, resvd
+         4,    0,  EAX,  25:14, max_logical_id, Max number of addressable IDs for logical processors sharing the cache
+         4,    0,  EAX,  31:26, max_phy_id, Max number of addressable IDs for processors in phy package
+
+         4,    0,  EBX,   11:0, cache_linesize, Size of a cache line in bytes
+         4,    0,  EBX,  21:12, cache_partition, Physical Line partitions
+         4,    0,  EBX,  31:22, cache_ways, Ways of associativity
+         4,    0,  ECX,   31:0, cache_sets, Number of Sets - 1
+         4,    0,  EDX,      0, c_wbinvd, 1 means WBINVD/INVD is not ganranteed to act upon lower level caches of non-originating threads sharing this cache
+         4,    0,  EDX,      1, c_incl, Whether cache is inclusive of lower cache level
+         4,    0,  EDX,      2, c_comp_index, Complex Cache Indexing
+
+# Leaf 05H
+# MONITOR/MWAIT
+	 5,    0,  EAX,   15:0, min_mon_size, Smallest monitor line size in bytes
+	 5,    0,  EBX,   15:0, max_mon_size, Largest monitor line size in bytes
+	 5,    0,  ECX,      0, mwait_ext, Enum of Monitor-Mwait extensions supported
+	 5,    0,  ECX,      1, mwait_irq_break, Largest monitor line size in bytes
+	 5,    0,  EDX,    3:0, c0_sub_stats, Number of C0* sub C-states supported using MWAIT
+	 5,    0,  EDX,    7:4, c1_sub_stats, Number of C1* sub C-states supported using MWAIT
+	 5,    0,  EDX,   11:8, c2_sub_stats, Number of C2* sub C-states supported using MWAIT
+	 5,    0,  EDX,  15:12, c3_sub_stats, Number of C3* sub C-states supported using MWAIT
+	 5,    0,  EDX,  19:16, c4_sub_stats, Number of C4* sub C-states supported using MWAIT
+	 5,    0,  EDX,  23:20, c5_sub_stats, Number of C5* sub C-states supported using MWAIT
+	 5,    0,  EDX,  27:24, c6_sub_stats, Number of C6* sub C-states supported using MWAIT
+	 5,    0,  EDX,  31:28, c7_sub_stats, Number of C7* sub C-states supported using MWAIT
+
+# Leaf 06H
+# Thermal & Power Management
+
+	 6,    0,  EAX,      0, dig_temp, Digital temperature sensor supported
+	 6,    0,  EAX,      1, turbo, Intel Turbo Boost
+	 6,    0,  EAX,      2, arat, Always running APIC timer
+#	 6,    0,  EAX,      3, resv, Reserved
+	 6,    0,  EAX,      4, pln, Power limit notifications supported
+	 6,    0,  EAX,      5, ecmd, Clock modulation duty cycle extension supported
+	 6,    0,  EAX,      6, ptm, Package thermal management supported
+	 6,    0,  EAX,      7, hwp, HWP base register
+	 6,    0,  EAX,      8, hwp_notify, HWP notification
+	 6,    0,  EAX,      9, hwp_act_window, HWP activity window
+	 6,    0,  EAX,     10, hwp_energy, HWP energy performance preference
+	 6,    0,  EAX,     11, hwp_pkg_req, HWP package level request
+#	 6,    0,  EAX,     12, resv, Reserved
+	 6,    0,  EAX,     13, hdc, HDC base registers supported
+	 6,    0,  EAX,     14, turbo3, Turbo Boost Max 3.0
+	 6,    0,  EAX,     15, hwp_cap, Highest Performance change supported
+	 6,    0,  EAX,     16, hwp_peci, HWP PECI override is supported
+	 6,    0,  EAX,     17, hwp_flex, Flexible HWP is supported
+	 6,    0,  EAX,     18, hwp_fast, Fast access mode for the IA32_HWP_REQUEST MSR is supported
+#	 6,    0,  EAX,     19, resv, Reserved
+	 6,    0,  EAX,     20, hwp_ignr, Ignoring Idle Logical Processor HWP request is supported
+
+	 6,    0,  EBX,    3:0, therm_irq_thresh, Number of Interrupt Thresholds in Digital Thermal Sensor
+	 6,    0,  ECX,      0, aperfmperf, Presence of IA32_MPERF and IA32_APERF
+	 6,    0,  ECX,      3, energ_bias, Performance-energy bias preference supported
+
+# Leaf 07H
+#	ECX == 0
+# AVX512 refers to https://en.wikipedia.org/wiki/AVX-512
+# XXX: Do we really need to enumerate each and every AVX512 sub features
+
+	 7,    0,  EBX,      0, fsgsbase, RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE supported
+	 7,    0,  EBX,      1, tsc_adjust, TSC_ADJUST MSR supported
+	 7,    0,  EBX,      2, sgx, Software Guard Extensions
+	 7,    0,  EBX,      3, bmi1, BMI1
+	 7,    0,  EBX,      4, hle, Hardware Lock Elision
+	 7,    0,  EBX,      5, avx2, AVX2
+#	 7,    0,  EBX,      6, fdp_excp_only, x87 FPU Data Pointer updated only on x87 exceptions
+	 7,    0,  EBX,      7, smep, Supervisor-Mode Execution Prevention
+	 7,    0,  EBX,      8, bmi2, BMI2
+	 7,    0,  EBX,      9, rep_movsb, Enhanced REP MOVSB/STOSB
+	 7,    0,  EBX,     10, invpcid, INVPCID instruction
+	 7,    0,  EBX,     11, rtm, Restricted Transactional Memory
+	 7,    0,  EBX,     12, rdt_m, Intel RDT Monitoring capability
+	 7,    0,  EBX,     13, depc_fpu_cs_ds, Deprecates FPU CS and FPU DS
+	 7,    0,  EBX,     14, mpx, Memory Protection Extensions
+	 7,    0,  EBX,     15, rdt_a, Intel RDT Allocation capability
+	 7,    0,  EBX,     16, avx512f, AVX512 Foundation instr
+	 7,    0,  EBX,     17, avx512dq, AVX512 Double and Quadword AVX512 instr
+	 7,    0,  EBX,     18, rdseed, RDSEED instr
+	 7,    0,  EBX,     19, adx, ADX instr
+	 7,    0,  EBX,     20, smap, Supervisor Mode Access Prevention
+	 7,    0,  EBX,     21, avx512ifma, AVX512 Integer Fused Multiply Add
+#	 7,    0,  EBX,     22, resvd, resvd
+	 7,    0,  EBX,     23, clflushopt, CLFLUSHOPT instr
+	 7,    0,  EBX,     24, clwb, CLWB instr
+	 7,    0,  EBX,     25, intel_pt, Intel Processor Trace instr
+	 7,    0,  EBX,     26, avx512pf, Prefetch
+	 7,    0,  EBX,     27, avx512er, AVX512 Exponent Reciproca instr
+	 7,    0,  EBX,     28, avx512cd, AVX512 Conflict Detection instr
+	 7,    0,  EBX,     29, sha, Intel Secure Hash Algorithm Extensions instr
+	 7,    0,  EBX,     26, avx512bw, AVX512 Byte & Word instr
+	 7,    0,  EBX,     28, avx512vl, AVX512 Vector Length Extentions (VL)
+	 7,    0,  ECX,      0, prefetchwt1, X
+	 7,    0,  ECX,      1, avx512vbmi, AVX512 Vector Byte Manipulation Instructions
+	 7,    0,  ECX,      2, umip, User-mode Instruction Prevention
+
+	 7,    0,  ECX,      3, pku, Protection Keys for User-mode pages
+	 7,    0,  ECX,      4, ospke, CR4 PKE set to enable protection keys
+#	 7,    0,  ECX,   16:5, resvd, resvd
+	 7,    0,  ECX,  21:17, mawau, The value of MAWAU used by the BNDLDX and BNDSTX instructions in 64-bit mode
+	 7,    0,  ECX,     22, rdpid, RDPID and IA32_TSC_AUX
+#	 7,    0,  ECX,  29:23, resvd, resvd
+	 7,    0,  ECX,     30, sgx_lc, SGX Launch Configuration
+#	 7,    0,  ECX,     31, resvd, resvd
+
+# Leaf 08H
+#
+
+
+# Leaf 09H
+# Direct Cache Access (DCA) information
+	 9,    0,  ECX,   31:0, dca_cap, The value of IA32_PLATFORM_DCA_CAP
+
+# Leaf 0AH
+# Architectural Performance Monitoring
+#
+# Do we really need to print out the PMU related stuff?
+# Does normal user really care about it?
+#
+       0xA,    0,  EAX,    7:0, pmu_ver, Performance Monitoring Unit version
+       0xA,    0,  EAX,   15:8, pmu_gp_cnt_num, Numer of general-purose PMU counters per logical CPU
+       0xA,    0,  EAX,  23:16, pmu_cnt_bits, Bit wideth of PMU counter
+       0xA,    0,  EAX,  31:24, pmu_ebx_bits, Length of EBX bit vector to enumerate PMU events
+
+       0xA,    0,  EBX,      0, pmu_no_core_cycle_evt, Core cycle event not available
+       0xA,    0,  EBX,      1, pmu_no_instr_ret_evt, Instruction retired event not available
+       0xA,    0,  EBX,      2, pmu_no_ref_cycle_evt, Reference cycles event not available
+       0xA,    0,  EBX,      3, pmu_no_llc_ref_evt, Last-level cache reference event not available
+       0xA,    0,  EBX,      4, pmu_no_llc_mis_evt, Last-level cache misses event not available
+       0xA,    0,  EBX,      5, pmu_no_br_instr_ret_evt, Branch instruction retired event not available
+       0xA,    0,  EBX,      6, pmu_no_br_mispredict_evt, Branch mispredict retired event not available
+
+       0xA,    0,  ECX,    4:0, pmu_fixed_cnt_num, Performance Monitoring Unit version
+       0xA,    0,  ECX,   12:5, pmu_fixed_cnt_bits, Numer of PMU counters per logical CPU
+
+# Leaf 0BH
+# Extended Topology Enumeration Leaf
+#
+
+       0xB,    0,  EAX,    4:0, id_shift, Number of bits to shift right on x2APIC ID to get a unique topology ID of the next level type
+       0xB,    0,  EBX,   15:0, cpu_nr, Number of logical processors at this level type
+       0xB,    0,  ECX,   15:8, lvl_type, 0-Invalid 1-SMT 2-Core
+       0xB,    0,  EDX,   31:0, x2apic_id, x2APIC ID the current logical processor
+
+
+# Leaf 0DH
+# Processor Extended State
+
+       0xD,    0,  EAX,      0, x87, X87 state
+       0xD,    0,  EAX,      1, sse, SSE state
+       0xD,    0,  EAX,      2, avx, AVX state
+       0xD,    0,  EAX,    4:3, mpx, MPX state
+       0xD,    0,  EAX,    7:5, avx512, AVX-512 state
+       0xD,    0,  EAX,      9, pkru, PKRU state
+
+       0xD,    0,  EBX,   31:0, max_sz_xcr0, Maximum size (bytes) required by enabled features in XCR0
+       0xD,    0,  ECX,   31:0, max_sz_xsave, Maximum size (bytes) of the XSAVE/XRSTOR save area
+
+       0xD,    1,  EAX,      0, xsaveopt, XSAVEOPT available
+       0xD,    1,  EAX,      1, xsavec, XSAVEC and compacted form supported
+       0xD,    1,  EAX,      2, xgetbv, XGETBV supported
+       0xD,    1,  EAX,      3, xsaves, XSAVES/XRSTORS and IA32_XSS supported
+
+       0xD,    1,  EBX,   31:0, max_sz_xcr0, Maximum size (bytes) required by enabled features in XCR0
+       0xD,    1,  ECX,      8, pt, PT state
+       0xD,    1,  ECX,      11, cet_usr, CET user state
+       0xD,    1,  ECX,      12, cet_supv, CET supervisor state
+       0xD,    1,  ECX,      13, hdc, HDC state
+       0xD,    1,  ECX,      16, hwp, HWP state
+
+# Leaf 0FH
+# Intel RDT Monitoring
+
+       0xF,    0,  EBX,   31:0, rmid_range, Maximum range (zero-based) of RMID within this physical processor of all types
+       0xF,    0,  EDX,      1, l3c_rdt_mon, L3 Cache RDT Monitoring supported
+
+       0xF,    1,  ECX,   31:0, rmid_range, Maximum range (zero-based) of RMID of this types
+       0xF,    1,  EDX,      0, l3c_ocp_mon, L3 Cache occupancy Monitoring supported
+       0xF,    1,  EDX,      1, l3c_tbw_mon, L3 Cache Total Bandwidth Monitoring supported
+       0xF,    1,  EDX,      2, l3c_lbw_mon, L3 Cache Local Bandwidth Monitoring supported
+
+# Leaf 10H
+# Intel RDT Allocation
+
+      0x10,    0,  EBX,      1, l3c_rdt_alloc, L3 Cache Allocation supported
+      0x10,    0,  EBX,      2, l2c_rdt_alloc, L2 Cache Allocation supported
+      0x10,    0,  EBX,      3, mem_bw_alloc, Memory Bandwidth Allocation supported
+
+
+# Leaf 12H
+# SGX Capability
+#
+# Some detailed SGX features not added yet
+
+      0x12,    0,  EAX,      0, sgx1, L3 Cache Allocation supported
+      0x12,    1,  EAX,      0, sgx2, L3 Cache Allocation supported
+
+
+# Leaf 14H
+# Intel Processor Tracer
+#
+
+# Leaf 15H
+# Time Stamp Counter and Nominal Core Crystal Clock Information
+
+      0x15,    0,  EAX,   31:0, tsc_denominator, The denominator of the TSC/”core crystal clock” ratio
+      0x15,    0,  EBX,   31:0, tsc_numerator, The numerator of the TSC/”core crystal clock” ratio
+      0x15,    0,  ECX,   31:0, nom_freq, Nominal frequency of the core crystal clock in Hz
+
+# Leaf 16H
+# Processor Frequency Information
+
+      0x16,    0,  EAX,   15:0, cpu_base_freq, Processor Base Frequency in MHz
+      0x16,    0,  EBX,   15:0, cpu_max_freq, Maximum Frequency in MHz
+      0x16,    0,  ECX,   15:0, bus_freq, Bus (Reference) Frequency in MHz
+
+# Leaf 17H
+# System-On-Chip Vendor Attribute
+
+      0x17,    0,  EAX,   31:0, max_socid, Maximum input value of supported sub-leaf
+      0x17,    0,  EBX,   15:0, soc_vid, SOC Vendor ID
+      0x17,    0,  EBX,     16, std_vid, SOC Vendor ID is assigned via an industry standard scheme
+      0x17,    0,  ECX,   31:0, soc_pid, SOC Project ID assigned by vendor
+      0x17,    0,  EDX,   31:0, soc_sid, SOC Stepping ID
+
+# Leaf 18H
+# Deterministic Address Translation Parameters
+
+
+# Leaf 19H
+# Key Locker Leaf
+
+
+# Leaf 1AH
+# Hybrid Information
+
+      0x1A,    0,  EAX,  31:24, core_type, 20H-Intel_Atom 40H-Intel_Core
+
+
+# Leaf 1FH
+# V2 Extended Topology - A preferred superset to leaf 0BH
+
+
+# According to SDM
+# 40000000H - 4FFFFFFFH is invalid range
+
+
+# Leaf 80000001H
+# Extended Processor Signature and Feature Bits
+
+0x80000001,    0,  ECX,      0, lahf_lm, LAHF/SAHF available in 64-bit mode
+0x80000001,    0,  ECX,      5, lzcnt, LZCNT
+0x80000001,    0,  ECX,      8, prefetchw, PREFETCHW
+
+0x80000001,    0,  EDX,     11, sysret, SYSCALL/SYSRET supported
+0x80000001,    0,  EDX,     20, exec_dis, Execute Disable Bit available
+0x80000001,    0,  EDX,     26, 1gb_page, 1GB page supported
+0x80000001,    0,  EDX,     27, rdtscp, RDTSCP and IA32_TSC_AUX are available
+#0x80000001,    0,  EDX,     29, 64b, 64b Architecture supported
+
+# Leaf 80000002H/80000003H/80000004H
+# Processor Brand String
+
+# Leaf 80000005H
+# Reserved
+
+# Leaf 80000006H
+# Extended L2 Cache Features
+
+0x80000006,    0,  ECX,    7:0, clsize, Cache Line size in bytes
+0x80000006,    0,  ECX,  15:12, l2c_assoc, L2 Associativity
+0x80000006,    0,  ECX,  31:16, csize, Cache size in 1K units
+
+
+# Leaf 80000007H
+
+0x80000007,    0,  EDX,      8, nonstop_tsc, Invariant TSC available
+
+
+# Leaf 80000008H
+
+0x80000008,    0,  EAX,    7:0, phy_adr_bits, Physical Address Bits
+0x80000008,    0,  EAX,   15:8, lnr_adr_bits, Linear Address Bits
+0x80000007,    0,  EBX,      9, wbnoinvd, WBNOINVD
+
+# 0x8000001E
+# EAX: Extended APIC ID
+0x8000001E,	0, EAX,   31:0, extended_apic_id, Extended APIC ID
+# EBX: Core Identifiers
+0x8000001E,	0, EBX,    7:0, core_id, Identifies the logical core ID
+0x8000001E,	0, EBX,   15:8, threads_per_core, The number of threads per core is threads_per_core + 1
+# ECX: Node Identifiers
+0x8000001E,	0, ECX,    7:0, node_id, Node ID
+0x8000001E,	0, ECX,   10:8, nodes_per_processor, Nodes per processor { 0: 1 node, else reserved }
+
+# 8000001F: AMD Secure Encryption
+0x8000001F,	0, EAX,	     0, sme,	Secure Memory Encryption
+0x8000001F,	0, EAX,      1, sev,	Secure Encrypted Virtualization
+0x8000001F,	0, EAX,      2, vmpgflush, VM Page Flush MSR
+0x8000001F,	0, EAX,      3, seves, SEV Encrypted State
+0x8000001F,	0, EBX,    5:0, c-bit, Page table bit number used to enable memory encryption
+0x8000001F,	0, EBX,   11:6, mem_encrypt_physaddr_width, Reduction of physical address space in bits with SME enabled
+0x8000001F,	0, ECX,   31:0, num_encrypted_guests, Maximum ASID value that may be used for an SEV-enabled guest
+0x8000001F,	0, EDX,   31:0, minimum_sev_asid, Minimum ASID value that must be used for an SEV-enabled, SEV-ES-disabled guest
diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c
new file mode 100644
index 000000000000..dae75511fef7
--- /dev/null
+++ b/tools/arch/x86/kcpuid/kcpuid.c
@@ -0,0 +1,657 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+char *def_csv = "/usr/share/misc/cpuid.csv";
+char *user_csv;
+
+
+/* Cover both single-bit flag and multiple-bits fields */
+struct bits_desc {
+	/* start and end bits */
+	int start, end;
+	/* 0 or 1 for 1-bit flag */
+	int value;
+	char simp[32];
+	char detail[256];
+};
+
+/* descriptor info for eax/ebx/ecx/edx */
+struct reg_desc {
+	/* number of valid entries */
+	int nr;
+	struct bits_desc descs[32];
+};
+
+enum {
+	R_EAX = 0,
+	R_EBX,
+	R_ECX,
+	R_EDX,
+	NR_REGS
+};
+
+struct subleaf {
+	u32 index;
+	u32 sub;
+	u32 eax, ebx, ecx, edx;
+	struct reg_desc info[NR_REGS];
+};
+
+/* Represent one leaf (basic or extended) */
+struct cpuid_func {
+	/*
+	 * Array of subleafs for this func, if there is no subleafs
+	 * then the leafs[0] is the main leaf
+	 */
+	struct subleaf *leafs;
+	int nr;
+};
+
+struct cpuid_range {
+	/* array of main leafs */
+	struct cpuid_func *funcs;
+	/* number of valid leafs */
+	int nr;
+	bool is_ext;
+};
+
+/*
+ * basic:  basic functions range: [0... ]
+ * ext:    extended functions range: [0x80000000... ]
+ */
+struct cpuid_range *leafs_basic, *leafs_ext;
+
+static int num_leafs;
+static bool is_amd;
+static bool show_details;
+static bool show_raw;
+static bool show_flags_only = true;
+static u32 user_index = 0xFFFFFFFF;
+static u32 user_sub = 0xFFFFFFFF;
+static int flines;
+
+static inline void cpuid(u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+{
+	/* ecx is often an input as well as an output. */
+	asm volatile("cpuid"
+	    : "=a" (*eax),
+	      "=b" (*ebx),
+	      "=c" (*ecx),
+	      "=d" (*edx)
+	    : "0" (*eax), "2" (*ecx));
+}
+
+static inline bool has_subleafs(u32 f)
+{
+	if (f == 0x7 || f == 0xd)
+		return true;
+
+	if (is_amd) {
+		if (f == 0x8000001d)
+			return true;
+		return false;
+	}
+
+	switch (f) {
+	case 0x4:
+	case 0xb:
+	case 0xf:
+	case 0x10:
+	case 0x14:
+	case 0x18:
+	case 0x1f:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static void leaf_print_raw(struct subleaf *leaf)
+{
+	if (has_subleafs(leaf->index)) {
+		if (leaf->sub == 0)
+			printf("0x%08x: subleafs:\n", leaf->index);
+
+		printf(" %2d: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n",
+			leaf->sub, leaf->eax, leaf->ebx, leaf->ecx, leaf->edx);
+	} else {
+		printf("0x%08x: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n",
+			leaf->index, leaf->eax, leaf->ebx, leaf->ecx, leaf->edx);
+	}
+}
+
+/* Return true is the input eax/ebx/ecx/edx are all zero */
+static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf,
+			u32 a, u32 b, u32 c, u32 d)
+{
+	struct cpuid_func *func;
+	struct subleaf *leaf;
+	int s = 0;
+
+	if (a == 0 && b == 0 && c == 0 && d == 0)
+		return true;
+
+	/*
+	 * Cut off vendor-prefix from CPUID function as we're using it as an
+	 * index into ->funcs.
+	 */
+	func = &range->funcs[f & 0xffff];
+
+	if (!func->leafs) {
+		func->leafs = malloc(sizeof(struct subleaf));
+		if (!func->leafs)
+			perror("malloc func leaf");
+
+		func->nr = 1;
+	} else {
+		s = func->nr;
+		func->leafs = realloc(func->leafs, (s + 1) * sizeof(*leaf));
+		if (!func->leafs)
+			perror("realloc f->leafs");
+
+		func->nr++;
+	}
+
+	leaf = &func->leafs[s];
+
+	leaf->index = f;
+	leaf->sub = subleaf;
+	leaf->eax = a;
+	leaf->ebx = b;
+	leaf->ecx = c;
+	leaf->edx = d;
+
+	return false;
+}
+
+static void raw_dump_range(struct cpuid_range *range)
+{
+	u32 f;
+	int i;
+
+	printf("%s Leafs :\n", range->is_ext ? "Extended" : "Basic");
+	printf("================\n");
+
+	for (f = 0; (int)f < range->nr; f++) {
+		struct cpuid_func *func = &range->funcs[f];
+		u32 index = f;
+
+		if (range->is_ext)
+			index += 0x80000000;
+
+		/* Skip leaf without valid items */
+		if (!func->nr)
+			continue;
+
+		/* First item is the main leaf, followed by all subleafs */
+		for (i = 0; i < func->nr; i++)
+			leaf_print_raw(&func->leafs[i]);
+	}
+}
+
+#define MAX_SUBLEAF_NUM		32
+struct cpuid_range *setup_cpuid_range(u32 input_eax)
+{
+	u32 max_func, idx_func;
+	int subleaf;
+	struct cpuid_range *range;
+	u32 eax, ebx, ecx, edx;
+	u32 f = input_eax;
+	int max_subleaf;
+	bool allzero;
+
+	eax = input_eax;
+	ebx = ecx = edx = 0;
+
+	cpuid(&eax, &ebx, &ecx, &edx);
+	max_func = eax;
+	idx_func = (max_func & 0xffff) + 1;
+
+	range = malloc(sizeof(struct cpuid_range));
+	if (!range)
+		perror("malloc range");
+
+	if (input_eax & 0x80000000)
+		range->is_ext = true;
+	else
+		range->is_ext = false;
+
+	range->funcs = malloc(sizeof(struct cpuid_func) * idx_func);
+	if (!range->funcs)
+		perror("malloc range->funcs");
+
+	range->nr = idx_func;
+	memset(range->funcs, 0, sizeof(struct cpuid_func) * idx_func);
+
+	for (; f <= max_func; f++) {
+		eax = f;
+		subleaf = ecx = 0;
+
+		cpuid(&eax, &ebx, &ecx, &edx);
+		allzero = cpuid_store(range, f, subleaf, eax, ebx, ecx, edx);
+		if (allzero)
+			continue;
+		num_leafs++;
+
+		if (!has_subleafs(f))
+			continue;
+
+		max_subleaf = MAX_SUBLEAF_NUM;
+
+		/*
+		 * Some can provide the exact number of subleafs,
+		 * others have to be tried (0xf)
+		 */
+		if (f == 0x7 || f == 0x14 || f == 0x17 || f == 0x18)
+			max_subleaf = (eax & 0xff) + 1;
+
+		if (f == 0xb)
+			max_subleaf = 2;
+
+		for (subleaf = 1; subleaf < max_subleaf; subleaf++) {
+			eax = f;
+			ecx = subleaf;
+
+			cpuid(&eax, &ebx, &ecx, &edx);
+			allzero = cpuid_store(range, f, subleaf,
+						eax, ebx, ecx, edx);
+			if (allzero)
+				continue;
+			num_leafs++;
+		}
+
+	}
+
+	return range;
+}
+
+/*
+ * The basic row format for cpuid.csv  is
+ *	LEAF,SUBLEAF,register_name,bits,short name,long description
+ *
+ * like:
+ *	0,    0,  EAX,   31:0, max_basic_leafs,  Max input value for supported subleafs
+ *	1,    0,  ECX,      0, sse3,  Streaming SIMD Extensions 3(SSE3)
+ */
+static int parse_line(char *line)
+{
+	char *str;
+	int i;
+	struct cpuid_range *range;
+	struct cpuid_func *func;
+	struct subleaf *leaf;
+	u32 index;
+	u32 sub;
+	char buffer[512];
+	char *buf;
+	/*
+	 * Tokens:
+	 *  1. leaf
+	 *  2. subleaf
+	 *  3. register
+	 *  4. bits
+	 *  5. short name
+	 *  6. long detail
+	 */
+	char *tokens[6];
+	struct reg_desc *reg;
+	struct bits_desc *bdesc;
+	int reg_index;
+	char *start, *end;
+
+	/* Skip comments and NULL line */
+	if (line[0] == '#' || line[0] == '\n')
+		return 0;
+
+	strncpy(buffer, line, 511);
+	buffer[511] = 0;
+	str = buffer;
+	for (i = 0; i < 5; i++) {
+		tokens[i] = strtok(str, ",");
+		if (!tokens[i])
+			goto err_exit;
+		str = NULL;
+	}
+	tokens[5] = strtok(str, "\n");
+	if (!tokens[5])
+		goto err_exit;
+
+	/* index/main-leaf */
+	index = strtoull(tokens[0], NULL, 0);
+
+	if (index & 0x80000000)
+		range = leafs_ext;
+	else
+		range = leafs_basic;
+
+	index &= 0x7FFFFFFF;
+	/* Skip line parsing for non-existing indexes */
+	if ((int)index >= range->nr)
+		return -1;
+
+	func = &range->funcs[index];
+
+	/* Return if the index has no valid item on this platform */
+	if (!func->nr)
+		return 0;
+
+	/* subleaf */
+	sub = strtoul(tokens[1], NULL, 0);
+	if ((int)sub > func->nr)
+		return -1;
+
+	leaf = &func->leafs[sub];
+	buf = tokens[2];
+
+	if (strcasestr(buf, "EAX"))
+		reg_index = R_EAX;
+	else if (strcasestr(buf, "EBX"))
+		reg_index = R_EBX;
+	else if (strcasestr(buf, "ECX"))
+		reg_index = R_ECX;
+	else if (strcasestr(buf, "EDX"))
+		reg_index = R_EDX;
+	else
+		goto err_exit;
+
+	reg = &leaf->info[reg_index];
+	bdesc = &reg->descs[reg->nr++];
+
+	/* bit flag or bits field */
+	buf = tokens[3];
+
+	end = strtok(buf, ":");
+	bdesc->end = strtoul(end, NULL, 0);
+	bdesc->start = bdesc->end;
+
+	/* start != NULL means it is bit fields */
+	start = strtok(NULL, ":");
+	if (start)
+		bdesc->start = strtoul(start, NULL, 0);
+
+	strcpy(bdesc->simp, tokens[4]);
+	strcpy(bdesc->detail, tokens[5]);
+	return 0;
+
+err_exit:
+	printf("Warning: wrong line format:\n");
+	printf("\tline[%d]: %s\n", flines, line);
+	return -1;
+}
+
+/* Parse csv file, and construct the array of all leafs and subleafs */
+static void parse_text(void)
+{
+	FILE *file;
+	char *filename, *line = NULL;
+	size_t len = 0;
+	int ret;
+
+	if (show_raw)
+		return;
+
+	filename = user_csv ? user_csv : def_csv;
+	file = fopen(filename, "r");
+	if (!file) {
+		/* Fallback to a csv in the same dir */
+		file = fopen("./cpuid.csv", "r");
+	}
+
+	if (!file) {
+		printf("Fail to open '%s'\n", filename);
+		return;
+	}
+
+	while (1) {
+		ret = getline(&line, &len, file);
+		flines++;
+		if (ret > 0)
+			parse_line(line);
+
+		if (feof(file))
+			break;
+	}
+
+	fclose(file);
+}
+
+
+/* Decode every eax/ebx/ecx/edx */
+static void decode_bits(u32 value, struct reg_desc *rdesc)
+{
+	struct bits_desc *bdesc;
+	int start, end, i;
+	u32 mask;
+
+	for (i = 0; i < rdesc->nr; i++) {
+		bdesc = &rdesc->descs[i];
+
+		start = bdesc->start;
+		end = bdesc->end;
+		if (start == end) {
+			/* single bit flag */
+			if (value & (1 << start))
+				printf("\t%-20s %s%s\n",
+					bdesc->simp,
+					show_details ? "-" : "",
+					show_details ? bdesc->detail : ""
+					);
+		} else {
+			/* bit fields */
+			if (show_flags_only)
+				continue;
+
+			mask = ((u64)1 << (end - start + 1)) - 1;
+			printf("\t%-20s\t: 0x%-8x\t%s%s\n",
+					bdesc->simp,
+					(value >> start) & mask,
+					show_details ? "-" : "",
+					show_details ? bdesc->detail : ""
+					);
+		}
+	}
+}
+
+static void show_leaf(struct subleaf *leaf)
+{
+	if (!leaf)
+		return;
+
+	if (show_raw)
+		leaf_print_raw(leaf);
+
+	decode_bits(leaf->eax, &leaf->info[R_EAX]);
+	decode_bits(leaf->ebx, &leaf->info[R_EBX]);
+	decode_bits(leaf->ecx, &leaf->info[R_ECX]);
+	decode_bits(leaf->edx, &leaf->info[R_EDX]);
+}
+
+static void show_func(struct cpuid_func *func)
+{
+	int i;
+
+	if (!func)
+		return;
+
+	for (i = 0; i < func->nr; i++)
+		show_leaf(&func->leafs[i]);
+}
+
+static void show_range(struct cpuid_range *range)
+{
+	int i;
+
+	for (i = 0; i < range->nr; i++)
+		show_func(&range->funcs[i]);
+}
+
+static inline struct cpuid_func *index_to_func(u32 index)
+{
+	struct cpuid_range *range;
+
+	range = (index & 0x80000000) ? leafs_ext : leafs_basic;
+	index &= 0x7FFFFFFF;
+
+	if (((index & 0xFFFF) + 1) > (u32)range->nr) {
+		printf("ERR: invalid input index (0x%x)\n", index);
+		return NULL;
+	}
+	return &range->funcs[index];
+}
+
+static void show_info(void)
+{
+	struct cpuid_func *func;
+
+	if (show_raw) {
+		/* Show all of the raw output of 'cpuid' instr */
+		raw_dump_range(leafs_basic);
+		raw_dump_range(leafs_ext);
+		return;
+	}
+
+	if (user_index != 0xFFFFFFFF) {
+		/* Only show specific leaf/subleaf info */
+		func = index_to_func(user_index);
+		if (!func)
+			return;
+
+		/* Dump the raw data also */
+		show_raw = true;
+
+		if (user_sub != 0xFFFFFFFF) {
+			if (user_sub + 1 <= (u32)func->nr) {
+				show_leaf(&func->leafs[user_sub]);
+				return;
+			}
+
+			printf("ERR: invalid input subleaf (0x%x)\n", user_sub);
+		}
+
+		show_func(func);
+		return;
+	}
+
+	printf("CPU features:\n=============\n\n");
+	show_range(leafs_basic);
+	show_range(leafs_ext);
+}
+
+static void setup_platform_cpuid(void)
+{
+	 u32 eax, ebx, ecx, edx;
+
+	/* Check vendor */
+	eax = ebx = ecx = edx = 0;
+	cpuid(&eax, &ebx, &ecx, &edx);
+
+	/* "htuA" */
+	if (ebx == 0x68747541)
+		is_amd = true;
+
+	/* Setup leafs for the basic and extended range */
+	leafs_basic = setup_cpuid_range(0x0);
+	leafs_ext = setup_cpuid_range(0x80000000);
+}
+
+static void usage(void)
+{
+	printf("kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n"
+		"\t-a|--all             Show both bit flags and complex bit fields info\n"
+		"\t-b|--bitflags        Show boolean flags only\n"
+		"\t-d|--detail          Show details of the flag/fields (default)\n"
+		"\t-f|--flags           Specify the cpuid csv file\n"
+		"\t-h|--help            Show usage info\n"
+		"\t-l|--leaf=index      Specify the leaf you want to check\n"
+		"\t-r|--raw             Show raw cpuid data\n"
+		"\t-s|--subleaf=sub     Specify the subleaf you want to check\n"
+	);
+}
+
+static struct option opts[] = {
+	{ "all", no_argument, NULL, 'a' },		/* show both bit flags and fields */
+	{ "bitflags", no_argument, NULL, 'b' },		/* only show bit flags, default on */
+	{ "detail", no_argument, NULL, 'd' },		/* show detail descriptions */
+	{ "file", required_argument, NULL, 'f' },	/* use user's cpuid file */
+	{ "help", no_argument, NULL, 'h'},		/* show usage */
+	{ "leaf", required_argument, NULL, 'l'},	/* only check a specific leaf */
+	{ "raw", no_argument, NULL, 'r'},		/* show raw CPUID leaf data */
+	{ "subleaf", required_argument, NULL, 's'},	/* check a specific subleaf */
+	{ NULL, 0, NULL, 0 }
+};
+
+static int parse_options(int argc, char *argv[])
+{
+	int c;
+
+	while ((c = getopt_long(argc, argv, "abdf:hl:rs:",
+					opts, NULL)) != -1)
+		switch (c) {
+		case 'a':
+			show_flags_only = false;
+			break;
+		case 'b':
+			show_flags_only = true;
+			break;
+		case 'd':
+			show_details = true;
+			break;
+		case 'f':
+			user_csv = optarg;
+			break;
+		case 'h':
+			usage();
+			exit(1);
+			break;
+		case 'l':
+			/* main leaf */
+			user_index = strtoul(optarg, NULL, 0);
+			break;
+		case 'r':
+			show_raw = true;
+			break;
+		case 's':
+			/* subleaf */
+			user_sub = strtoul(optarg, NULL, 0);
+			break;
+		default:
+			printf("%s: Invalid option '%c'\n", argv[0], optopt);
+			return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Do 4 things in turn:
+ * 1. Parse user options
+ * 2. Parse and store all the CPUID leaf data supported on this platform
+ * 2. Parse the csv file, while skipping leafs which are not available
+ *    on this platform
+ * 3. Print leafs info based on user options
+ */
+int main(int argc, char *argv[])
+{
+	if (parse_options(argc, argv))
+		return -1;
+
+	/* Setup the cpuid leafs of current platform */
+	setup_platform_cpuid();
+
+	/* Read and parse the 'cpuid.csv' */
+	parse_text();
+
+	show_info();
+	return 0;
+}
diff --git a/tools/arch/x86/lib/inat.c b/tools/arch/x86/lib/inat.c
index 4f5ed49e1b4e..dfbcc6405941 100644
--- a/tools/arch/x86/lib/inat.c
+++ b/tools/arch/x86/lib/inat.c
@@ -4,7 +4,7 @@
  *
  * Written by Masami Hiramatsu <mhiramat@redhat.com>
  */
-#include "../include/asm/insn.h"
+#include "../include/asm/insn.h" /* __ignore_sync_check__ */
 
 /* Attribute tables are generated from opcode map */
 #include "inat-tables.c"
diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c
index 3d9355ed1246..c41f95815480 100644
--- a/tools/arch/x86/lib/insn.c
+++ b/tools/arch/x86/lib/insn.c
@@ -11,10 +11,13 @@
 #else
 #include <string.h>
 #endif
-#include "../include/asm/inat.h"
-#include "../include/asm/insn.h"
+#include "../include/asm/inat.h" /* __ignore_sync_check__ */
+#include "../include/asm/insn.h" /* __ignore_sync_check__ */
 
-#include "../include/asm/emulate_prefix.h"
+#include <linux/errno.h>
+#include <linux/kconfig.h>
+
+#include "../include/asm/emulate_prefix.h" /* __ignore_sync_check__ */
 
 #define leXX_to_cpu(t, r)						\
 ({									\
@@ -51,6 +54,7 @@
  * insn_init() - initialize struct insn
  * @insn:	&struct insn to be initialized
  * @kaddr:	address (in kernel memory) of instruction (or copy thereof)
+ * @buf_len:	length of the insn buffer at @kaddr
  * @x86_64:	!0 for 64-bit kernel or 64-bit app
  */
 void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
@@ -111,8 +115,12 @@ static void insn_get_emulate_prefix(struct insn *insn)
  * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
  * to point to the (first) opcode.  No effect if @insn->prefixes.got
  * is already set.
+ *
+ * * Returns:
+ * 0:  on success
+ * < 0: on error
  */
-void insn_get_prefixes(struct insn *insn)
+int insn_get_prefixes(struct insn *insn)
 {
 	struct insn_field *prefixes = &insn->prefixes;
 	insn_attr_t attr;
@@ -120,7 +128,7 @@ void insn_get_prefixes(struct insn *insn)
 	int i, nb;
 
 	if (prefixes->got)
-		return;
+		return 0;
 
 	insn_get_emulate_prefix(insn);
 
@@ -230,8 +238,10 @@ vex_end:
 
 	prefixes->got = 1;
 
+	return 0;
+
 err_out:
-	return;
+	return -ENODATA;
 }
 
 /**
@@ -243,16 +253,25 @@ err_out:
  * If necessary, first collects any preceding (prefix) bytes.
  * Sets @insn->opcode.value = opcode1.  No effect if @insn->opcode.got
  * is already 1.
+ *
+ * Returns:
+ * 0:  on success
+ * < 0: on error
  */
-void insn_get_opcode(struct insn *insn)
+int insn_get_opcode(struct insn *insn)
 {
 	struct insn_field *opcode = &insn->opcode;
+	int pfx_id, ret;
 	insn_byte_t op;
-	int pfx_id;
+
 	if (opcode->got)
-		return;
-	if (!insn->prefixes.got)
-		insn_get_prefixes(insn);
+		return 0;
+
+	if (!insn->prefixes.got) {
+		ret = insn_get_prefixes(insn);
+		if (ret)
+			return ret;
+	}
 
 	/* Get first opcode */
 	op = get_next(insn_byte_t, insn);
@@ -267,9 +286,13 @@ void insn_get_opcode(struct insn *insn)
 		insn->attr = inat_get_avx_attribute(op, m, p);
 		if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
 		    (!inat_accept_vex(insn->attr) &&
-		     !inat_is_group(insn->attr)))
-			insn->attr = 0;	/* This instruction is bad */
-		goto end;	/* VEX has only 1 byte for opcode */
+		     !inat_is_group(insn->attr))) {
+			/* This instruction is bad */
+			insn->attr = 0;
+			return -EINVAL;
+		}
+		/* VEX has only 1 byte for opcode */
+		goto end;
 	}
 
 	insn->attr = inat_get_opcode_attribute(op);
@@ -280,13 +303,18 @@ void insn_get_opcode(struct insn *insn)
 		pfx_id = insn_last_prefix_id(insn);
 		insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
 	}
-	if (inat_must_vex(insn->attr))
-		insn->attr = 0;	/* This instruction is bad */
+
+	if (inat_must_vex(insn->attr)) {
+		/* This instruction is bad */
+		insn->attr = 0;
+		return -EINVAL;
+	}
 end:
 	opcode->got = 1;
+	return 0;
 
 err_out:
-	return;
+	return -ENODATA;
 }
 
 /**
@@ -296,15 +324,25 @@ err_out:
  * Populates @insn->modrm and updates @insn->next_byte to point past the
  * ModRM byte, if any.  If necessary, first collects the preceding bytes
  * (prefixes and opcode(s)).  No effect if @insn->modrm.got is already 1.
+ *
+ * Returns:
+ * 0:  on success
+ * < 0: on error
  */
-void insn_get_modrm(struct insn *insn)
+int insn_get_modrm(struct insn *insn)
 {
 	struct insn_field *modrm = &insn->modrm;
 	insn_byte_t pfx_id, mod;
+	int ret;
+
 	if (modrm->got)
-		return;
-	if (!insn->opcode.got)
-		insn_get_opcode(insn);
+		return 0;
+
+	if (!insn->opcode.got) {
+		ret = insn_get_opcode(insn);
+		if (ret)
+			return ret;
+	}
 
 	if (inat_has_modrm(insn->attr)) {
 		mod = get_next(insn_byte_t, insn);
@@ -313,17 +351,22 @@ void insn_get_modrm(struct insn *insn)
 			pfx_id = insn_last_prefix_id(insn);
 			insn->attr = inat_get_group_attribute(mod, pfx_id,
 							      insn->attr);
-			if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
-				insn->attr = 0;	/* This is bad */
+			if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) {
+				/* Bad insn */
+				insn->attr = 0;
+				return -EINVAL;
+			}
 		}
 	}
 
 	if (insn->x86_64 && inat_is_force64(insn->attr))
 		insn->opnd_bytes = 8;
+
 	modrm->got = 1;
+	return 0;
 
 err_out:
-	return;
+	return -ENODATA;
 }
 
 
@@ -337,11 +380,16 @@ err_out:
 int insn_rip_relative(struct insn *insn)
 {
 	struct insn_field *modrm = &insn->modrm;
+	int ret;
 
 	if (!insn->x86_64)
 		return 0;
-	if (!modrm->got)
-		insn_get_modrm(insn);
+
+	if (!modrm->got) {
+		ret = insn_get_modrm(insn);
+		if (ret)
+			return 0;
+	}
 	/*
 	 * For rip-relative instructions, the mod field (top 2 bits)
 	 * is zero and the r/m field (bottom 3 bits) is 0x5.
@@ -355,15 +403,25 @@ int insn_rip_relative(struct insn *insn)
  *
  * If necessary, first collects the instruction up to and including the
  * ModRM byte.
+ *
+ * Returns:
+ * 0: if decoding succeeded
+ * < 0: otherwise.
  */
-void insn_get_sib(struct insn *insn)
+int insn_get_sib(struct insn *insn)
 {
 	insn_byte_t modrm;
+	int ret;
 
 	if (insn->sib.got)
-		return;
-	if (!insn->modrm.got)
-		insn_get_modrm(insn);
+		return 0;
+
+	if (!insn->modrm.got) {
+		ret = insn_get_modrm(insn);
+		if (ret)
+			return ret;
+	}
+
 	if (insn->modrm.nbytes) {
 		modrm = insn->modrm.bytes[0];
 		if (insn->addr_bytes != 2 &&
@@ -374,8 +432,10 @@ void insn_get_sib(struct insn *insn)
 	}
 	insn->sib.got = 1;
 
+	return 0;
+
 err_out:
-	return;
+	return -ENODATA;
 }
 
 
@@ -386,15 +446,25 @@ err_out:
  * If necessary, first collects the instruction up to and including the
  * SIB byte.
  * Displacement value is sign-expanded.
+ *
+ * * Returns:
+ * 0: if decoding succeeded
+ * < 0: otherwise.
  */
-void insn_get_displacement(struct insn *insn)
+int insn_get_displacement(struct insn *insn)
 {
 	insn_byte_t mod, rm, base;
+	int ret;
 
 	if (insn->displacement.got)
-		return;
-	if (!insn->sib.got)
-		insn_get_sib(insn);
+		return 0;
+
+	if (!insn->sib.got) {
+		ret = insn_get_sib(insn);
+		if (ret)
+			return ret;
+	}
+
 	if (insn->modrm.nbytes) {
 		/*
 		 * Interpreting the modrm byte:
@@ -436,9 +506,10 @@ void insn_get_displacement(struct insn *insn)
 	}
 out:
 	insn->displacement.got = 1;
+	return 0;
 
 err_out:
-	return;
+	return -ENODATA;
 }
 
 /* Decode moffset16/32/64. Return 0 if failed */
@@ -537,20 +608,30 @@ err_out:
 }
 
 /**
- * insn_get_immediate() - Get the immediates of instruction
+ * insn_get_immediate() - Get the immediate in an instruction
  * @insn:	&struct insn containing instruction
  *
  * If necessary, first collects the instruction up to and including the
  * displacement bytes.
  * Basically, most of immediates are sign-expanded. Unsigned-value can be
- * get by bit masking with ((1 << (nbytes * 8)) - 1)
+ * computed by bit masking with ((1 << (nbytes * 8)) - 1)
+ *
+ * Returns:
+ * 0:  on success
+ * < 0: on error
  */
-void insn_get_immediate(struct insn *insn)
+int insn_get_immediate(struct insn *insn)
 {
+	int ret;
+
 	if (insn->immediate.got)
-		return;
-	if (!insn->displacement.got)
-		insn_get_displacement(insn);
+		return 0;
+
+	if (!insn->displacement.got) {
+		ret = insn_get_displacement(insn);
+		if (ret)
+			return ret;
+	}
 
 	if (inat_has_moffset(insn->attr)) {
 		if (!__get_moffset(insn))
@@ -597,9 +678,10 @@ void insn_get_immediate(struct insn *insn)
 	}
 done:
 	insn->immediate.got = 1;
+	return 0;
 
 err_out:
-	return;
+	return -ENODATA;
 }
 
 /**
@@ -608,13 +690,65 @@ err_out:
  *
  * If necessary, first collects the instruction up to and including the
  * immediates bytes.
- */
-void insn_get_length(struct insn *insn)
+ *
+ * Returns:
+ *  - 0 on success
+ *  - < 0 on error
+*/
+int insn_get_length(struct insn *insn)
 {
+	int ret;
+
 	if (insn->length)
-		return;
-	if (!insn->immediate.got)
-		insn_get_immediate(insn);
+		return 0;
+
+	if (!insn->immediate.got) {
+		ret = insn_get_immediate(insn);
+		if (ret)
+			return ret;
+	}
+
 	insn->length = (unsigned char)((unsigned long)insn->next_byte
 				     - (unsigned long)insn->kaddr);
+
+	return 0;
+}
+
+/* Ensure this instruction is decoded completely */
+static inline int insn_complete(struct insn *insn)
+{
+	return insn->opcode.got && insn->modrm.got && insn->sib.got &&
+		insn->displacement.got && insn->immediate.got;
+}
+
+/**
+ * insn_decode() - Decode an x86 instruction
+ * @insn:	&struct insn to be initialized
+ * @kaddr:	address (in kernel memory) of instruction (or copy thereof)
+ * @buf_len:	length of the insn buffer at @kaddr
+ * @m:		insn mode, see enum insn_mode
+ *
+ * Returns:
+ * 0: if decoding succeeded
+ * < 0: otherwise.
+ */
+int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m)
+{
+	int ret;
+
+#define INSN_MODE_KERN (enum insn_mode)-1 /* __ignore_sync_check__ mode is only valid in the kernel */
+
+	if (m == INSN_MODE_KERN)
+		insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64));
+	else
+		insn_init(insn, kaddr, buf_len, m == INSN_MODE_64);
+
+	ret = insn_get_length(insn);
+	if (ret)
+		return ret;
+
+	if (insn_complete(insn))
+		return 0;
+
+	return -EINVAL;
 }
diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py
index c4225ed63565..1600b17dbb8a 100644
--- a/tools/cgroup/memcg_slabinfo.py
+++ b/tools/cgroup/memcg_slabinfo.py
@@ -128,9 +128,9 @@ def detect_kernel_config():
 
     cfg['nr_nodes'] = prog['nr_online_nodes'].value_()
 
-    if prog.type('struct kmem_cache').members[1][1] == 'flags':
+    if prog.type('struct kmem_cache').members[1].name == 'flags':
         cfg['allocator'] = 'SLUB'
-    elif prog.type('struct kmem_cache').members[1][1] == 'batchcount':
+    elif prog.type('struct kmem_cache').members[1].name == 'batchcount':
         cfg['allocator'] = 'SLAB'
     else:
         err('Can\'t determine the slab allocator')
@@ -193,7 +193,7 @@ def main():
         # look over all slab pages, belonging to non-root memcgs
         # and look for objects belonging to the given memory cgroup
         for page in for_each_slab_page(prog):
-            objcg_vec_raw = page.obj_cgroups.value_()
+            objcg_vec_raw = page.memcg_data.value_()
             if objcg_vec_raw == 0:
                 continue
             cache = page.slab_cache
@@ -202,7 +202,7 @@ def main():
             addr = cache.value_()
             caches[addr] = cache
             # clear the lowest bit to get the true obj_cgroups
-            objcg_vec = Object(prog, page.obj_cgroups.type_,
+            objcg_vec = Object(prog, 'struct obj_cgroup **',
                                value=objcg_vec_raw & ~1)
 
             if addr not in stats:
diff --git a/tools/debugging/kernel-chktaint b/tools/debugging/kernel-chktaint
index 607b2b280945..719f18b1edf0 100755
--- a/tools/debugging/kernel-chktaint
+++ b/tools/debugging/kernel-chktaint
@@ -25,7 +25,7 @@ if [ "$1"x != "x" ]; then
 	elif  [ $1 -ge 0 ] 2>/dev/null ; then
 		taint=$1
 	else
-		echo "Error: Parameter '$1' not a positive interger. Aborting." >&2
+		echo "Error: Parameter '$1' not a positive integer. Aborting." >&2
 		exit 1
 	fi
 else
diff --git a/tools/iio/Makefile b/tools/iio/Makefile
index 3de763d9ab70..5d12ac4e7f8f 100644
--- a/tools/iio/Makefile
+++ b/tools/iio/Makefile
@@ -27,6 +27,7 @@ include $(srctree)/tools/build/Makefile.include
 #
 $(OUTPUT)include/linux/iio: ../../include/uapi/linux/iio
 	mkdir -p $(OUTPUT)include/linux/iio 2>&1 || true
+	ln -sf $(CURDIR)/../../include/uapi/linux/iio/buffer.h $@
 	ln -sf $(CURDIR)/../../include/uapi/linux/iio/events.h $@
 	ln -sf $(CURDIR)/../../include/uapi/linux/iio/types.h $@
 
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
index bb03859db89d..0076437f6e3f 100644
--- a/tools/iio/iio_event_monitor.c
+++ b/tools/iio/iio_event_monitor.c
@@ -14,6 +14,7 @@
 
 #include <unistd.h>
 #include <stdlib.h>
+#include <dirent.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <errno.h>
@@ -280,22 +281,69 @@ static void print_event(struct iio_event_data *event)
 	printf("\n");
 }
 
+/* Enable or disable events in sysfs if the knob is available */
+static void enable_events(char *dev_dir, int enable)
+{
+	const struct dirent *ent;
+	char evdir[256];
+	int ret;
+	DIR *dp;
+
+	snprintf(evdir, sizeof(evdir), FORMAT_EVENTS_DIR, dev_dir);
+	evdir[sizeof(evdir)-1] = '\0';
+
+	dp = opendir(evdir);
+	if (!dp) {
+		fprintf(stderr, "Enabling/disabling events: can't open %s\n",
+			evdir);
+		return;
+	}
+
+	while (ent = readdir(dp), ent) {
+		if (iioutils_check_suffix(ent->d_name, "_en")) {
+			printf("%sabling: %s\n",
+			       enable ? "En" : "Dis",
+			       ent->d_name);
+			ret = write_sysfs_int(ent->d_name, evdir,
+					      enable);
+			if (ret < 0)
+				fprintf(stderr, "Failed to enable/disable %s\n",
+					ent->d_name);
+		}
+	}
+
+	if (closedir(dp) == -1) {
+		perror("Enabling/disabling channels: "
+		       "Failed to close directory");
+		return;
+	}
+}
+
 int main(int argc, char **argv)
 {
 	struct iio_event_data event;
 	const char *device_name;
+	char *dev_dir_name = NULL;
 	char *chrdev_name;
 	int ret;
 	int dev_num;
 	int fd, event_fd;
-
-	if (argc <= 1) {
-		fprintf(stderr, "Usage: %s <device_name>\n", argv[0]);
+	bool all_events = false;
+
+	if (argc == 2) {
+		device_name = argv[1];
+	} else if (argc == 3) {
+		device_name = argv[2];
+		if (!strcmp(argv[1], "-a"))
+			all_events = true;
+	} else {
+		fprintf(stderr,
+			"Usage: iio_event_monitor [options] <device_name>\n"
+			"Listen and display events from IIO devices\n"
+			"  -a         Auto-activate all available events\n");
 		return -1;
 	}
 
-	device_name = argv[1];
-
 	dev_num = find_type_by_name(device_name, "iio:device");
 	if (dev_num >= 0) {
 		printf("Found IIO device with name %s with device number %d\n",
@@ -303,6 +351,10 @@ int main(int argc, char **argv)
 		ret = asprintf(&chrdev_name, "/dev/iio:device%d", dev_num);
 		if (ret < 0)
 			return -ENOMEM;
+		/* Look up sysfs dir as well if we can */
+		ret = asprintf(&dev_dir_name, "%siio:device%d", iio_dir, dev_num);
+		if (ret < 0)
+			return -ENOMEM;
 	} else {
 		/*
 		 * If we can't find an IIO device by name assume device_name is
@@ -313,6 +365,9 @@ int main(int argc, char **argv)
 			return -ENOMEM;
 	}
 
+	if (all_events && dev_dir_name)
+		enable_events(dev_dir_name, 1);
+
 	fd = open(chrdev_name, 0);
 	if (fd == -1) {
 		ret = -errno;
@@ -365,6 +420,10 @@ int main(int argc, char **argv)
 		perror("Failed to close event file");
 
 error_free_chrdev_name:
+	/* Disable events after use */
+	if (all_events && dev_dir_name)
+		enable_events(dev_dir_name, 0);
+
 	free(chrdev_name);
 
 	return ret;
diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c
index 34d63bcebcd2..2491c54a5e4f 100644
--- a/tools/iio/iio_generic_buffer.c
+++ b/tools/iio/iio_generic_buffer.c
@@ -30,6 +30,8 @@
 #include <inttypes.h>
 #include <stdbool.h>
 #include <signal.h>
+#include <sys/ioctl.h>
+#include <linux/iio/buffer.h>
 #include "iio_utils.h"
 
 /**
@@ -49,7 +51,7 @@ enum autochan {
  * Has the side effect of filling the channels[i].location values used
  * in processing the buffer output.
  **/
-int size_from_channelarray(struct iio_channel_info *channels, int num_channels)
+static int size_from_channelarray(struct iio_channel_info *channels, int num_channels)
 {
 	int bytes = 0;
 	int i = 0;
@@ -68,7 +70,7 @@ int size_from_channelarray(struct iio_channel_info *channels, int num_channels)
 	return bytes;
 }
 
-void print1byte(uint8_t input, struct iio_channel_info *info)
+static void print1byte(uint8_t input, struct iio_channel_info *info)
 {
 	/*
 	 * Shift before conversion to avoid sign extension
@@ -85,7 +87,7 @@ void print1byte(uint8_t input, struct iio_channel_info *info)
 	}
 }
 
-void print2byte(uint16_t input, struct iio_channel_info *info)
+static void print2byte(uint16_t input, struct iio_channel_info *info)
 {
 	/* First swap if incorrect endian */
 	if (info->be)
@@ -108,7 +110,7 @@ void print2byte(uint16_t input, struct iio_channel_info *info)
 	}
 }
 
-void print4byte(uint32_t input, struct iio_channel_info *info)
+static void print4byte(uint32_t input, struct iio_channel_info *info)
 {
 	/* First swap if incorrect endian */
 	if (info->be)
@@ -131,7 +133,7 @@ void print4byte(uint32_t input, struct iio_channel_info *info)
 	}
 }
 
-void print8byte(uint64_t input, struct iio_channel_info *info)
+static void print8byte(uint64_t input, struct iio_channel_info *info)
 {
 	/* First swap if incorrect endian */
 	if (info->be)
@@ -167,9 +169,8 @@ void print8byte(uint64_t input, struct iio_channel_info *info)
  *			      to fill the location offsets.
  * @num_channels:	number of channels
  **/
-void process_scan(char *data,
-		  struct iio_channel_info *channels,
-		  int num_channels)
+static void process_scan(char *data, struct iio_channel_info *channels,
+			 int num_channels)
 {
 	int k;
 
@@ -198,7 +199,7 @@ void process_scan(char *data,
 	printf("\n");
 }
 
-static int enable_disable_all_channels(char *dev_dir_name, int enable)
+static int enable_disable_all_channels(char *dev_dir_name, int buffer_idx, int enable)
 {
 	const struct dirent *ent;
 	char scanelemdir[256];
@@ -206,7 +207,7 @@ static int enable_disable_all_channels(char *dev_dir_name, int enable)
 	int ret;
 
 	snprintf(scanelemdir, sizeof(scanelemdir),
-		 FORMAT_SCAN_ELEMENTS_DIR, dev_dir_name);
+		 FORMAT_SCAN_ELEMENTS_DIR, dev_dir_name, buffer_idx);
 	scanelemdir[sizeof(scanelemdir)-1] = '\0';
 
 	dp = opendir(scanelemdir);
@@ -238,12 +239,13 @@ static int enable_disable_all_channels(char *dev_dir_name, int enable)
 	return 0;
 }
 
-void print_usage(void)
+static void print_usage(void)
 {
 	fprintf(stderr, "Usage: generic_buffer [options]...\n"
 		"Capture, convert and output data from IIO device buffer\n"
 		"  -a         Auto-activate all available channels\n"
 		"  -A         Force-activate ALL channels\n"
+		"  -b <n>     The buffer which to open (by index), default 0\n"
 		"  -c <n>     Do n conversions, or loop forever if n < 0\n"
 		"  -e         Disable wait for event (new data)\n"
 		"  -g         Use trigger-less mode\n"
@@ -257,12 +259,13 @@ void print_usage(void)
 		"  -w <n>     Set delay between reads in us (event-less mode)\n");
 }
 
-enum autochan autochannels = AUTOCHANNELS_DISABLED;
-char *dev_dir_name = NULL;
-char *buf_dir_name = NULL;
-bool current_trigger_set = false;
+static enum autochan autochannels = AUTOCHANNELS_DISABLED;
+static char *dev_dir_name = NULL;
+static char *buf_dir_name = NULL;
+static int buffer_idx = 0;
+static bool current_trigger_set = false;
 
-void cleanup(void)
+static void cleanup(void)
 {
 	int ret;
 
@@ -287,21 +290,21 @@ void cleanup(void)
 
 	/* Disable channels if auto-enabled */
 	if (dev_dir_name && autochannels == AUTOCHANNELS_ACTIVE) {
-		ret = enable_disable_all_channels(dev_dir_name, 0);
+		ret = enable_disable_all_channels(dev_dir_name, buffer_idx, 0);
 		if (ret)
 			fprintf(stderr, "Failed to disable all channels\n");
 		autochannels = AUTOCHANNELS_DISABLED;
 	}
 }
 
-void sig_handler(int signum)
+static void sig_handler(int signum)
 {
 	fprintf(stderr, "Caught signal %d\n", signum);
 	cleanup();
 	exit(-signum);
 }
 
-void register_cleanup(void)
+static void register_cleanup(void)
 {
 	struct sigaction sa = { .sa_handler = sig_handler };
 	const int signums[] = { SIGINT, SIGTERM, SIGABRT };
@@ -334,7 +337,9 @@ int main(int argc, char **argv)
 	unsigned long long j;
 	unsigned long toread;
 	int ret, c;
-	int fp = -1;
+	struct stat st;
+	int fd = -1;
+	int buf_fd = -1;
 
 	int num_channels = 0;
 	char *trigger_name = NULL, *device_name = NULL;
@@ -353,7 +358,7 @@ int main(int argc, char **argv)
 
 	register_cleanup();
 
-	while ((c = getopt_long(argc, argv, "aAc:egl:n:N:t:T:w:?", longopts,
+	while ((c = getopt_long(argc, argv, "aAb:c:egl:n:N:t:T:w:?", longopts,
 				NULL)) != -1) {
 		switch (c) {
 		case 'a':
@@ -362,7 +367,20 @@ int main(int argc, char **argv)
 		case 'A':
 			autochannels = AUTOCHANNELS_ENABLED;
 			force_autochannels = true;
-			break;	
+			break;
+		case 'b':
+			errno = 0;
+			buffer_idx = strtoll(optarg, &dummy, 10);
+			if (errno) {
+				ret = -errno;
+				goto error;
+			}
+			if (buffer_idx < 0) {
+				ret = -ERANGE;
+				goto error;
+			}
+
+			break;
 		case 'c':
 			errno = 0;
 			num_loops = strtoll(optarg, &dummy, 10);
@@ -519,7 +537,7 @@ int main(int argc, char **argv)
 	 * Parse the files in scan_elements to identify what channels are
 	 * present
 	 */
-	ret = build_channel_array(dev_dir_name, &channels, &num_channels);
+	ret = build_channel_array(dev_dir_name, buffer_idx, &channels, &num_channels);
 	if (ret) {
 		fprintf(stderr, "Problem reading scan element information\n"
 			"diag %s\n", dev_dir_name);
@@ -536,7 +554,7 @@ int main(int argc, char **argv)
 	    (autochannels == AUTOCHANNELS_ENABLED && force_autochannels)) {
 		fprintf(stderr, "Enabling all channels\n");
 
-		ret = enable_disable_all_channels(dev_dir_name, 1);
+		ret = enable_disable_all_channels(dev_dir_name, buffer_idx, 1);
 		if (ret) {
 			fprintf(stderr, "Failed to enable all channels\n");
 			goto error;
@@ -545,7 +563,7 @@ int main(int argc, char **argv)
 		/* This flags that we need to disable the channels again */
 		autochannels = AUTOCHANNELS_ACTIVE;
 
-		ret = build_channel_array(dev_dir_name, &channels,
+		ret = build_channel_array(dev_dir_name, buffer_idx, &channels,
 					  &num_channels);
 		if (ret) {
 			fprintf(stderr, "Problem reading scan element "
@@ -566,7 +584,7 @@ int main(int argc, char **argv)
 		fprintf(stderr, "Enable channels manually in "
 			FORMAT_SCAN_ELEMENTS_DIR
 			"/*_en or pass -a to autoenable channels and "
-			"try again.\n", dev_dir_name);
+			"try again.\n", dev_dir_name, buffer_idx);
 		ret = -ENOENT;
 		goto error;
 	}
@@ -577,12 +595,25 @@ int main(int argc, char **argv)
 	 * be built rather than found.
 	 */
 	ret = asprintf(&buf_dir_name,
-		       "%siio:device%d/buffer", iio_dir, dev_num);
+		       "%siio:device%d/buffer%d", iio_dir, dev_num, buffer_idx);
 	if (ret < 0) {
 		ret = -ENOMEM;
 		goto error;
 	}
 
+	if (stat(buf_dir_name, &st)) {
+		fprintf(stderr, "Could not stat() '%s', got error %d: %s\n",
+			buf_dir_name, errno, strerror(errno));
+		ret = -errno;
+		goto error;
+	}
+
+	if (!S_ISDIR(st.st_mode)) {
+		fprintf(stderr, "File '%s' is not a directory\n", buf_dir_name);
+		ret = -EFAULT;
+		goto error;
+	}
+
 	if (!notrigger) {
 		printf("%s %s\n", dev_dir_name, trigger_name);
 		/*
@@ -599,6 +630,35 @@ int main(int argc, char **argv)
 		}
 	}
 
+	ret = asprintf(&buffer_access, "/dev/iio:device%d", dev_num);
+	if (ret < 0) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	/* Attempt to open non blocking the access dev */
+	fd = open(buffer_access, O_RDONLY | O_NONBLOCK);
+	if (fd == -1) { /* TODO: If it isn't there make the node */
+		ret = -errno;
+		fprintf(stderr, "Failed to open %s\n", buffer_access);
+		goto error;
+	}
+
+	/* specify for which buffer index we want an FD */
+	buf_fd = buffer_idx;
+
+	ret = ioctl(fd, IIO_BUFFER_GET_FD_IOCTL, &buf_fd);
+	if (ret == -1 || buf_fd == -1) {
+		ret = -errno;
+		if (ret == -ENODEV || ret == -EINVAL)
+			fprintf(stderr,
+				"Device does not have this many buffers\n");
+		else
+			fprintf(stderr, "Failed to retrieve buffer fd\n");
+
+		goto error;
+	}
+
 	/* Setup ring buffer parameters */
 	ret = write_sysfs_int("length", buf_dir_name, buf_len);
 	if (ret < 0)
@@ -608,7 +668,8 @@ int main(int argc, char **argv)
 	ret = write_sysfs_int("enable", buf_dir_name, 1);
 	if (ret < 0) {
 		fprintf(stderr,
-			"Failed to enable buffer: %s\n", strerror(-ret));
+			"Failed to enable buffer '%s': %s\n",
+			buf_dir_name, strerror(-ret));
 		goto error;
 	}
 
@@ -619,24 +680,30 @@ int main(int argc, char **argv)
 		goto error;
 	}
 
-	ret = asprintf(&buffer_access, "/dev/iio:device%d", dev_num);
-	if (ret < 0) {
-		ret = -ENOMEM;
-		goto error;
+	/**
+	 * This check is being done here for sanity reasons, however it
+	 * should be omitted under normal operation.
+	 * If this is buffer0, we check that we get EBUSY after this point.
+	 */
+	if (buffer_idx == 0) {
+		errno = 0;
+		read_size = read(fd, data, 1);
+		if (read_size > -1 || errno != EBUSY) {
+			ret = -EFAULT;
+			perror("Reading from '%s' should not be possible after ioctl()");
+			goto error;
+		}
 	}
 
-	/* Attempt to open non blocking the access dev */
-	fp = open(buffer_access, O_RDONLY | O_NONBLOCK);
-	if (fp == -1) { /* TODO: If it isn't there make the node */
-		ret = -errno;
-		fprintf(stderr, "Failed to open %s\n", buffer_access);
-		goto error;
-	}
+	/* close now the main chardev FD and let the buffer FD work */
+	if (close(fd) == -1)
+		perror("Failed to close character device file");
+	fd = -1;
 
 	for (j = 0; j < num_loops || num_loops < 0; j++) {
 		if (!noevents) {
 			struct pollfd pfd = {
-				.fd = fp,
+				.fd = buf_fd,
 				.events = POLLIN,
 			};
 
@@ -654,7 +721,7 @@ int main(int argc, char **argv)
 			toread = 64;
 		}
 
-		read_size = read(fp, data, toread * scan_size);
+		read_size = read(buf_fd, data, toread * scan_size);
 		if (read_size < 0) {
 			if (errno == EAGAIN) {
 				fprintf(stderr, "nothing available\n");
@@ -671,7 +738,9 @@ int main(int argc, char **argv)
 error:
 	cleanup();
 
-	if (fp >= 0 && close(fp) == -1)
+	if (fd >= 0 && close(fd) == -1)
+		perror("Failed to close character device");
+	if (buf_fd >= 0 && close(buf_fd) == -1)
 		perror("Failed to close buffer");
 	free(buffer_access);
 	free(data);
diff --git a/tools/iio/iio_utils.c b/tools/iio/iio_utils.c
index 7399eb7f1378..aadee6d34c74 100644
--- a/tools/iio/iio_utils.c
+++ b/tools/iio/iio_utils.c
@@ -77,15 +77,17 @@ int iioutils_break_up_name(const char *full_name, char **generic_name)
  * @mask: output a bit mask for the raw data
  * @be: output if data in big endian
  * @device_dir: the IIO device directory
+ * @buffer_idx: the IIO buffer index
  * @name: the channel name
  * @generic_name: the channel type name
  *
  * Returns a value >= 0 on success, otherwise a negative error code.
  **/
-int iioutils_get_type(unsigned *is_signed, unsigned *bytes, unsigned *bits_used,
-		      unsigned *shift, uint64_t *mask, unsigned *be,
-		      const char *device_dir, const char *name,
-		      const char *generic_name)
+static int iioutils_get_type(unsigned int *is_signed, unsigned int *bytes,
+			     unsigned int *bits_used, unsigned int *shift,
+			     uint64_t *mask, unsigned int *be,
+			     const char *device_dir, int buffer_idx,
+			     const char *name, const char *generic_name)
 {
 	FILE *sysfsfp;
 	int ret;
@@ -95,7 +97,7 @@ int iioutils_get_type(unsigned *is_signed, unsigned *bytes, unsigned *bits_used,
 	unsigned padint;
 	const struct dirent *ent;
 
-	ret = asprintf(&scan_el_dir, FORMAT_SCAN_ELEMENTS_DIR, device_dir);
+	ret = asprintf(&scan_el_dir, FORMAT_SCAN_ELEMENTS_DIR, device_dir, buffer_idx);
 	if (ret < 0)
 		return -ENOMEM;
 
@@ -303,12 +305,13 @@ void bsort_channel_array_by_index(struct iio_channel_info *ci_array, int cnt)
 /**
  * build_channel_array() - function to figure out what channels are present
  * @device_dir: the IIO device directory in sysfs
+ * @buffer_idx: the IIO buffer for this channel array
  * @ci_array: output the resulting array of iio_channel_info
  * @counter: output the amount of array elements
  *
  * Returns 0 on success, otherwise a negative error code.
  **/
-int build_channel_array(const char *device_dir,
+int build_channel_array(const char *device_dir, int buffer_idx,
 			struct iio_channel_info **ci_array, int *counter)
 {
 	DIR *dp;
@@ -321,7 +324,7 @@ int build_channel_array(const char *device_dir,
 	char *filename;
 
 	*counter = 0;
-	ret = asprintf(&scan_el_dir, FORMAT_SCAN_ELEMENTS_DIR, device_dir);
+	ret = asprintf(&scan_el_dir, FORMAT_SCAN_ELEMENTS_DIR, device_dir, buffer_idx);
 	if (ret < 0)
 		return -ENOMEM;
 
@@ -502,6 +505,7 @@ int build_channel_array(const char *device_dir,
 						&current->mask,
 						&current->be,
 						device_dir,
+						buffer_idx,
 						current->name,
 						current->generic_name);
 			if (ret < 0)
diff --git a/tools/iio/iio_utils.h b/tools/iio/iio_utils.h
index 74bde4fde2c8..663c94a6c705 100644
--- a/tools/iio/iio_utils.h
+++ b/tools/iio/iio_utils.h
@@ -12,7 +12,8 @@
 /* Made up value to limit allocation sizes */
 #define IIO_MAX_NAME_LENGTH 64
 
-#define FORMAT_SCAN_ELEMENTS_DIR "%s/scan_elements"
+#define FORMAT_SCAN_ELEMENTS_DIR "%s/buffer%d"
+#define FORMAT_EVENTS_DIR "%s/events"
 #define FORMAT_TYPE_FILE "%s_type"
 
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
@@ -57,15 +58,11 @@ static inline int iioutils_check_suffix(const char *str, const char *suffix)
 }
 
 int iioutils_break_up_name(const char *full_name, char **generic_name);
-int iioutils_get_type(unsigned *is_signed, unsigned *bytes, unsigned *bits_used,
-		      unsigned *shift, uint64_t *mask, unsigned *be,
-		      const char *device_dir, const char *name,
-		      const char *generic_name);
 int iioutils_get_param_float(float *output, const char *param_name,
 			     const char *device_dir, const char *name,
 			     const char *generic_name);
 void bsort_channel_array_by_index(struct iio_channel_info *ci_array, int cnt);
-int build_channel_array(const char *device_dir,
+int build_channel_array(const char *device_dir, int buffer_idx,
 			struct iio_channel_info **ci_array, int *counter);
 int find_type_by_name(const char *name, const char *type);
 int write_sysfs_int(const char *filename, const char *basedir, int val);
diff --git a/tools/include/linux/kconfig.h b/tools/include/linux/kconfig.h
new file mode 100644
index 000000000000..1555a0c4f345
--- /dev/null
+++ b/tools/include/linux/kconfig.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_KCONFIG_H
+#define _TOOLS_LINUX_KCONFIG_H
+
+/* CONFIG_CC_VERSION_TEXT (Do not delete this comment. See help in Kconfig) */
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define __BIG_ENDIAN 4321
+#else
+#define __LITTLE_ENDIAN 1234
+#endif
+
+#define __ARG_PLACEHOLDER_1 0,
+#define __take_second_arg(__ignored, val, ...) val
+
+/*
+ * The use of "&&" / "||" is limited in certain expressions.
+ * The following enable to calculate "and" / "or" with macro expansion only.
+ */
+#define __and(x, y)			___and(x, y)
+#define ___and(x, y)			____and(__ARG_PLACEHOLDER_##x, y)
+#define ____and(arg1_or_junk, y)	__take_second_arg(arg1_or_junk y, 0)
+
+#define __or(x, y)			___or(x, y)
+#define ___or(x, y)			____or(__ARG_PLACEHOLDER_##x, y)
+#define ____or(arg1_or_junk, y)		__take_second_arg(arg1_or_junk 1, y)
+
+/*
+ * Helper macros to use CONFIG_ options in C/CPP expressions. Note that
+ * these only work with boolean and tristate options.
+ */
+
+/*
+ * Getting something that works in C and CPP for an arg that may or may
+ * not be defined is tricky.  Here, if we have "#define CONFIG_BOOGER 1"
+ * we match on the placeholder define, insert the "0," for arg1 and generate
+ * the triplet (0, 1, 0).  Then the last step cherry picks the 2nd arg (a one).
+ * When CONFIG_BOOGER is not defined, we generate a (... 1, 0) pair, and when
+ * the last step cherry picks the 2nd arg, we get a zero.
+ */
+#define __is_defined(x)			___is_defined(x)
+#define ___is_defined(val)		____is_defined(__ARG_PLACEHOLDER_##val)
+#define ____is_defined(arg1_or_junk)	__take_second_arg(arg1_or_junk 1, 0)
+
+/*
+ * IS_BUILTIN(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y', 0
+ * otherwise. For boolean options, this is equivalent to
+ * IS_ENABLED(CONFIG_FOO).
+ */
+#define IS_BUILTIN(option) __is_defined(option)
+
+/*
+ * IS_MODULE(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'm', 0
+ * otherwise.
+ */
+#define IS_MODULE(option) __is_defined(option##_MODULE)
+
+/*
+ * IS_REACHABLE(CONFIG_FOO) evaluates to 1 if the currently compiled
+ * code can call a function defined in code compiled based on CONFIG_FOO.
+ * This is similar to IS_ENABLED(), but returns false when invoked from
+ * built-in code when CONFIG_FOO is set to 'm'.
+ */
+#define IS_REACHABLE(option) __or(IS_BUILTIN(option), \
+				__and(IS_MODULE(option), __is_defined(MODULE)))
+
+/*
+ * IS_ENABLED(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y' or 'm',
+ * 0 otherwise.
+ */
+#define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option))
+
+#endif /* _TOOLS_LINUX_KCONFIG_H */
diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
index ae5662d368b9..5a00b8b2cf9f 100644
--- a/tools/include/linux/static_call_types.h
+++ b/tools/include/linux/static_call_types.h
@@ -58,11 +58,25 @@ struct static_call_site {
 	__raw_static_call(name);					\
 })
 
+struct static_call_key {
+	void *func;
+	union {
+		/* bit 0: 0 = mods, 1 = sites */
+		unsigned long type;
+		struct static_call_mod *mods;
+		struct static_call_site *sites;
+	};
+};
+
 #else /* !CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #define __STATIC_CALL_ADDRESSABLE(name)
 #define __static_call(name)	__raw_static_call(name)
 
+struct static_call_key {
+	void *func;
+};
+
 #endif /* CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #ifdef MODULE
@@ -77,6 +91,10 @@ struct static_call_site {
 
 #else
 
+struct static_call_key {
+	void *func;
+};
+
 #define static_call(name)						\
 	((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
 
diff --git a/tools/memory-model/Documentation/access-marking.txt b/tools/memory-model/Documentation/access-marking.txt
new file mode 100644
index 000000000000..1ab189f51f55
--- /dev/null
+++ b/tools/memory-model/Documentation/access-marking.txt
@@ -0,0 +1,479 @@
+MARKING SHARED-MEMORY ACCESSES
+==============================
+
+This document provides guidelines for marking intentionally concurrent
+normal accesses to shared memory, that is "normal" as in accesses that do
+not use read-modify-write atomic operations.  It also describes how to
+document these accesses, both with comments and with special assertions
+processed by the Kernel Concurrency Sanitizer (KCSAN).  This discussion
+builds on an earlier LWN article [1].
+
+
+ACCESS-MARKING OPTIONS
+======================
+
+The Linux kernel provides the following access-marking options:
+
+1.	Plain C-language accesses (unmarked), for example, "a = b;"
+
+2.	Data-race marking, for example, "data_race(a = b);"
+
+3.	READ_ONCE(), for example, "a = READ_ONCE(b);"
+	The various forms of atomic_read() also fit in here.
+
+4.	WRITE_ONCE(), for example, "WRITE_ONCE(a, b);"
+	The various forms of atomic_set() also fit in here.
+
+
+These may be used in combination, as shown in this admittedly improbable
+example:
+
+	WRITE_ONCE(a, b + data_race(c + d) + READ_ONCE(e));
+
+Neither plain C-language accesses nor data_race() (#1 and #2 above) place
+any sort of constraint on the compiler's choice of optimizations [2].
+In contrast, READ_ONCE() and WRITE_ONCE() (#3 and #4 above) restrict the
+compiler's use of code-motion and common-subexpression optimizations.
+Therefore, if a given access is involved in an intentional data race,
+using READ_ONCE() for loads and WRITE_ONCE() for stores is usually
+preferable to data_race(), which in turn is usually preferable to plain
+C-language accesses.
+
+KCSAN will complain about many types of data races involving plain
+C-language accesses, but marking all accesses involved in a given data
+race with one of data_race(), READ_ONCE(), or WRITE_ONCE(), will prevent
+KCSAN from complaining.  Of course, lack of KCSAN complaints does not
+imply correct code.  Therefore, please take a thoughtful approach
+when responding to KCSAN complaints.  Churning the code base with
+ill-considered additions of data_race(), READ_ONCE(), and WRITE_ONCE()
+is unhelpful.
+
+In fact, the following sections describe situations where use of
+data_race() and even plain C-language accesses is preferable to
+READ_ONCE() and WRITE_ONCE().
+
+
+Use of the data_race() Macro
+----------------------------
+
+Here are some situations where data_race() should be used instead of
+READ_ONCE() and WRITE_ONCE():
+
+1.	Data-racy loads from shared variables whose values are used only
+	for diagnostic purposes.
+
+2.	Data-racy reads whose values are checked against marked reload.
+
+3.	Reads whose values feed into error-tolerant heuristics.
+
+4.	Writes setting values that feed into error-tolerant heuristics.
+
+
+Data-Racy Reads for Approximate Diagnostics
+
+Approximate diagnostics include lockdep reports, monitoring/statistics
+(including /proc and /sys output), WARN*()/BUG*() checks whose return
+values are ignored, and other situations where reads from shared variables
+are not an integral part of the core concurrency design.
+
+In fact, use of data_race() instead READ_ONCE() for these diagnostic
+reads can enable better checking of the remaining accesses implementing
+the core concurrency design.  For example, suppose that the core design
+prevents any non-diagnostic reads from shared variable x from running
+concurrently with updates to x.  Then using plain C-language writes
+to x allows KCSAN to detect reads from x from within regions of code
+that fail to exclude the updates.  In this case, it is important to use
+data_race() for the diagnostic reads because otherwise KCSAN would give
+false-positive warnings about these diagnostic reads.
+
+In theory, plain C-language loads can also be used for this use case.
+However, in practice this will have the disadvantage of causing KCSAN
+to generate false positives because KCSAN will have no way of knowing
+that the resulting data race was intentional.
+
+
+Data-Racy Reads That Are Checked Against Marked Reload
+
+The values from some reads are not implicitly trusted.  They are instead
+fed into some operation that checks the full value against a later marked
+load from memory, which means that the occasional arbitrarily bogus value
+is not a problem.  For example, if a bogus value is fed into cmpxchg(),
+all that happens is that this cmpxchg() fails, which normally results
+in a retry.  Unless the race condition that resulted in the bogus value
+recurs, this retry will with high probability succeed, so no harm done.
+
+However, please keep in mind that a data_race() load feeding into
+a cmpxchg_relaxed() might still be subject to load fusing on some
+architectures.  Therefore, it is best to capture the return value from
+the failing cmpxchg() for the next iteration of the loop, an approach
+that provides the compiler much less scope for mischievous optimizations.
+Capturing the return value from cmpxchg() also saves a memory reference
+in many cases.
+
+In theory, plain C-language loads can also be used for this use case.
+However, in practice this will have the disadvantage of causing KCSAN
+to generate false positives because KCSAN will have no way of knowing
+that the resulting data race was intentional.
+
+
+Reads Feeding Into Error-Tolerant Heuristics
+
+Values from some reads feed into heuristics that can tolerate occasional
+errors.  Such reads can use data_race(), thus allowing KCSAN to focus on
+the other accesses to the relevant shared variables.  But please note
+that data_race() loads are subject to load fusing, which can result in
+consistent errors, which in turn are quite capable of breaking heuristics.
+Therefore use of data_race() should be limited to cases where some other
+code (such as a barrier() call) will force the occasional reload.
+
+In theory, plain C-language loads can also be used for this use case.
+However, in practice this will have the disadvantage of causing KCSAN
+to generate false positives because KCSAN will have no way of knowing
+that the resulting data race was intentional.
+
+
+Writes Setting Values Feeding Into Error-Tolerant Heuristics
+
+The values read into error-tolerant heuristics come from somewhere,
+for example, from sysfs.  This means that some code in sysfs writes
+to this same variable, and these writes can also use data_race().
+After all, if the heuristic can tolerate the occasional bogus value
+due to compiler-mangled reads, it can also tolerate the occasional
+compiler-mangled write, at least assuming that the proper value is in
+place once the write completes.
+
+Plain C-language stores can also be used for this use case.  However,
+in kernels built with CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n, this
+will have the disadvantage of causing KCSAN to generate false positives
+because KCSAN will have no way of knowing that the resulting data race
+was intentional.
+
+
+Use of Plain C-Language Accesses
+--------------------------------
+
+Here are some example situations where plain C-language accesses should
+used instead of READ_ONCE(), WRITE_ONCE(), and data_race():
+
+1.	Accesses protected by mutual exclusion, including strict locking
+	and sequence locking.
+
+2.	Initialization-time and cleanup-time accesses.	This covers a
+	wide variety of situations, including the uniprocessor phase of
+	system boot, variables to be used by not-yet-spawned kthreads,
+	structures not yet published to reference-counted or RCU-protected
+	data structures, and the cleanup side of any of these situations.
+
+3.	Per-CPU variables that are not accessed from other CPUs.
+
+4.	Private per-task variables, including on-stack variables, some
+	fields in the task_struct structure, and task-private heap data.
+
+5.	Any other loads for which there is not supposed to be a concurrent
+	store to that same variable.
+
+6.	Any other stores for which there should be neither concurrent
+	loads nor concurrent stores to that same variable.
+
+	But note that KCSAN makes two explicit exceptions to this rule
+	by default, refraining from flagging plain C-language stores:
+
+	a.	No matter what.  You can override this default by building
+		with CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n.
+
+	b.	When the store writes the value already contained in
+		that variable.	You can override this default by building
+		with CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n.
+
+	c.	When one of the stores is in an interrupt handler and
+		the other in the interrupted code.  You can override this
+		default by building with CONFIG_KCSAN_INTERRUPT_WATCHER=y.
+
+Note that it is important to use plain C-language accesses in these cases,
+because doing otherwise prevents KCSAN from detecting violations of your
+code's synchronization rules.
+
+
+ACCESS-DOCUMENTATION OPTIONS
+============================
+
+It is important to comment marked accesses so that people reading your
+code, yourself included, are reminded of the synchronization design.
+However, it is even more important to comment plain C-language accesses
+that are intentionally involved in data races.  Such comments are
+needed to remind people reading your code, again, yourself included,
+of how the compiler has been prevented from optimizing those accesses
+into concurrency bugs.
+
+It is also possible to tell KCSAN about your synchronization design.
+For example, ASSERT_EXCLUSIVE_ACCESS(foo) tells KCSAN that any
+concurrent access to variable foo by any other CPU is an error, even
+if that concurrent access is marked with READ_ONCE().  In addition,
+ASSERT_EXCLUSIVE_WRITER(foo) tells KCSAN that although it is OK for there
+to be concurrent reads from foo from other CPUs, it is an error for some
+other CPU to be concurrently writing to foo, even if that concurrent
+write is marked with data_race() or WRITE_ONCE().
+
+Note that although KCSAN will call out data races involving either
+ASSERT_EXCLUSIVE_ACCESS() or ASSERT_EXCLUSIVE_WRITER() on the one hand
+and data_race() writes on the other, KCSAN will not report the location
+of these data_race() writes.
+
+
+EXAMPLES
+========
+
+As noted earlier, the goal is to prevent the compiler from destroying
+your concurrent algorithm, to help the human reader, and to inform
+KCSAN of aspects of your concurrency design.  This section looks at a
+few examples showing how this can be done.
+
+
+Lock Protection With Lockless Diagnostic Access
+-----------------------------------------------
+
+For example, suppose a shared variable "foo" is read only while a
+reader-writer spinlock is read-held, written only while that same
+spinlock is write-held, except that it is also read locklessly for
+diagnostic purposes.  The code might look as follows:
+
+	int foo;
+	DEFINE_RWLOCK(foo_rwlock);
+
+	void update_foo(int newval)
+	{
+		write_lock(&foo_rwlock);
+		foo = newval;
+		do_something(newval);
+		write_unlock(&foo_rwlock);
+	}
+
+	int read_foo(void)
+	{
+		int ret;
+
+		read_lock(&foo_rwlock);
+		do_something_else();
+		ret = foo;
+		read_unlock(&foo_rwlock);
+		return ret;
+	}
+
+	int read_foo_diagnostic(void)
+	{
+		return data_race(foo);
+	}
+
+The reader-writer lock prevents the compiler from introducing concurrency
+bugs into any part of the main algorithm using foo, which means that
+the accesses to foo within both update_foo() and read_foo() can (and
+should) be plain C-language accesses.  One benefit of making them be
+plain C-language accesses is that KCSAN can detect any erroneous lockless
+reads from or updates to foo.  The data_race() in read_foo_diagnostic()
+tells KCSAN that data races are expected, and should be silently
+ignored.  This data_race() also tells the human reading the code that
+read_foo_diagnostic() might sometimes return a bogus value.
+
+However, please note that your kernel must be built with
+CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n in order for KCSAN to
+detect a buggy lockless write.  If you need KCSAN to detect such a
+write even if that write did not change the value of foo, you also
+need CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n.  If you need KCSAN to
+detect such a write happening in an interrupt handler running on the
+same CPU doing the legitimate lock-protected write, you also need
+CONFIG_KCSAN_INTERRUPT_WATCHER=y.  With some or all of these Kconfig
+options set properly, KCSAN can be quite helpful, although it is not
+necessarily a full replacement for hardware watchpoints.  On the other
+hand, neither are hardware watchpoints a full replacement for KCSAN
+because it is not always easy to tell hardware watchpoint to conditionally
+trap on accesses.
+
+
+Lock-Protected Writes With Lockless Reads
+-----------------------------------------
+
+For another example, suppose a shared variable "foo" is updated only
+while holding a spinlock, but is read locklessly.  The code might look
+as follows:
+
+	int foo;
+	DEFINE_SPINLOCK(foo_lock);
+
+	void update_foo(int newval)
+	{
+		spin_lock(&foo_lock);
+		WRITE_ONCE(foo, newval);
+		ASSERT_EXCLUSIVE_WRITER(foo);
+		do_something(newval);
+		spin_unlock(&foo_wlock);
+	}
+
+	int read_foo(void)
+	{
+		do_something_else();
+		return READ_ONCE(foo);
+	}
+
+Because foo is read locklessly, all accesses are marked.  The purpose
+of the ASSERT_EXCLUSIVE_WRITER() is to allow KCSAN to check for a buggy
+concurrent lockless write.
+
+
+Lockless Reads and Writes
+-------------------------
+
+For another example, suppose a shared variable "foo" is both read and
+updated locklessly.  The code might look as follows:
+
+	int foo;
+
+	int update_foo(int newval)
+	{
+		int ret;
+
+		ret = xchg(&foo, newval);
+		do_something(newval);
+		return ret;
+	}
+
+	int read_foo(void)
+	{
+		do_something_else();
+		return READ_ONCE(foo);
+	}
+
+Because foo is accessed locklessly, all accesses are marked.  It does
+not make sense to use ASSERT_EXCLUSIVE_WRITER() in this case because
+there really can be concurrent lockless writers.  KCSAN would
+flag any concurrent plain C-language reads from foo, and given
+CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n, also any concurrent plain
+C-language writes to foo.
+
+
+Lockless Reads and Writes, But With Single-Threaded Initialization
+------------------------------------------------------------------
+
+For yet another example, suppose that foo is initialized in a
+single-threaded manner, but that a number of kthreads are then created
+that locklessly and concurrently access foo.  Some snippets of this code
+might look as follows:
+
+	int foo;
+
+	void initialize_foo(int initval, int nkthreads)
+	{
+		int i;
+
+		foo = initval;
+		ASSERT_EXCLUSIVE_ACCESS(foo);
+		for (i = 0; i < nkthreads; i++)
+			kthread_run(access_foo_concurrently, ...);
+	}
+
+	/* Called from access_foo_concurrently(). */
+	int update_foo(int newval)
+	{
+		int ret;
+
+		ret = xchg(&foo, newval);
+		do_something(newval);
+		return ret;
+	}
+
+	/* Also called from access_foo_concurrently(). */
+	int read_foo(void)
+	{
+		do_something_else();
+		return READ_ONCE(foo);
+	}
+
+The initialize_foo() uses a plain C-language write to foo because there
+are not supposed to be concurrent accesses during initialization.  The
+ASSERT_EXCLUSIVE_ACCESS() allows KCSAN to flag buggy concurrent unmarked
+reads, and the ASSERT_EXCLUSIVE_ACCESS() call further allows KCSAN to
+flag buggy concurrent writes, even if:  (1) Those writes are marked or
+(2) The kernel was built with CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=y.
+
+
+Checking Stress-Test Race Coverage
+----------------------------------
+
+When designing stress tests it is important to ensure that race conditions
+of interest really do occur.  For example, consider the following code
+fragment:
+
+	int foo;
+
+	int update_foo(int newval)
+	{
+		return xchg(&foo, newval);
+	}
+
+	int xor_shift_foo(int shift, int mask)
+	{
+		int old, new, newold;
+
+		newold = data_race(foo); /* Checked by cmpxchg(). */
+		do {
+			old = newold;
+			new = (old << shift) ^ mask;
+			newold = cmpxchg(&foo, old, new);
+		} while (newold != old);
+		return old;
+	}
+
+	int read_foo(void)
+	{
+		return READ_ONCE(foo);
+	}
+
+If it is possible for update_foo(), xor_shift_foo(), and read_foo() to be
+invoked concurrently, the stress test should force this concurrency to
+actually happen.  KCSAN can evaluate the stress test when the above code
+is modified to read as follows:
+
+	int foo;
+
+	int update_foo(int newval)
+	{
+		ASSERT_EXCLUSIVE_ACCESS(foo);
+		return xchg(&foo, newval);
+	}
+
+	int xor_shift_foo(int shift, int mask)
+	{
+		int old, new, newold;
+
+		newold = data_race(foo); /* Checked by cmpxchg(). */
+		do {
+			old = newold;
+			new = (old << shift) ^ mask;
+			ASSERT_EXCLUSIVE_ACCESS(foo);
+			newold = cmpxchg(&foo, old, new);
+		} while (newold != old);
+		return old;
+	}
+
+
+	int read_foo(void)
+	{
+		ASSERT_EXCLUSIVE_ACCESS(foo);
+		return READ_ONCE(foo);
+	}
+
+If a given stress-test run does not result in KCSAN complaints from
+each possible pair of ASSERT_EXCLUSIVE_ACCESS() invocations, the
+stress test needs improvement.  If the stress test was to be evaluated
+on a regular basis, it would be wise to place the above instances of
+ASSERT_EXCLUSIVE_ACCESS() under #ifdef so that they did not result in
+false positives when not evaluating the stress test.
+
+
+REFERENCES
+==========
+
+[1] "Concurrency bugs should fear the big bad data-race detector (part 2)"
+    https://lwn.net/Articles/816854/
+
+[2] "Who's afraid of a big bad optimizing compiler?"
+    https://lwn.net/Articles/793253/
diff --git a/tools/memory-model/Documentation/glossary.txt b/tools/memory-model/Documentation/glossary.txt
index b2da6365be63..6f3d16dbf467 100644
--- a/tools/memory-model/Documentation/glossary.txt
+++ b/tools/memory-model/Documentation/glossary.txt
@@ -19,7 +19,7 @@ Address Dependency:  When the address of a later memory access is computed
 	 from the value returned by the rcu_dereference() on line 2, the
 	 address dependency extends from that rcu_dereference() to that
 	 "p->a".  In rare cases, optimizing compilers can destroy address
-	 dependencies.	Please see Documentation/RCU/rcu_dereference.txt
+	 dependencies.	Please see Documentation/RCU/rcu_dereference.rst
 	 for more information.
 
 	 See also "Control Dependency" and "Data Dependency".
diff --git a/tools/memory-model/Documentation/simple.txt b/tools/memory-model/Documentation/simple.txt
index 81e1a0ec5342..4c789ec8334f 100644
--- a/tools/memory-model/Documentation/simple.txt
+++ b/tools/memory-model/Documentation/simple.txt
@@ -189,7 +189,6 @@ Additional information may be found in these files:
 
 Documentation/atomic_t.txt
 Documentation/atomic_bitops.txt
-Documentation/core-api/atomic_ops.rst
 Documentation/core-api/refcount-vs-atomic.rst
 
 Reading code using these primitives is often also quite helpful.
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 549813cff8ab..cedf3ede7545 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -11,22 +11,15 @@
 #include "../../../arch/x86/lib/inat.c"
 #include "../../../arch/x86/lib/insn.c"
 
+#define CONFIG_64BIT 1
+#include <asm/nops.h>
+
 #include <asm/orc_types.h>
 #include <objtool/check.h>
 #include <objtool/elf.h>
 #include <objtool/arch.h>
 #include <objtool/warn.h>
-
-static unsigned char op_to_cfi_reg[][2] = {
-	{CFI_AX, CFI_R8},
-	{CFI_CX, CFI_R9},
-	{CFI_DX, CFI_R10},
-	{CFI_BX, CFI_R11},
-	{CFI_SP, CFI_R12},
-	{CFI_BP, CFI_R13},
-	{CFI_SI, CFI_R14},
-	{CFI_DI, CFI_R15},
-};
+#include <arch/elf.h>
 
 static int is_x86_64(const struct elf *elf)
 {
@@ -83,6 +76,31 @@ unsigned long arch_jump_destination(struct instruction *insn)
 		return -1; \
 	else for (list_add_tail(&op->list, ops_list); op; op = NULL)
 
+/*
+ * Helpers to decode ModRM/SIB:
+ *
+ * r/m| AX  CX  DX  BX |  SP |  BP |  SI  DI |
+ *    | R8  R9 R10 R11 | R12 | R13 | R14 R15 |
+ * Mod+----------------+-----+-----+---------+
+ * 00 |    [r/m]       |[SIB]|[IP+]|  [r/m]  |
+ * 01 |  [r/m + d8]    |[S+d]|   [r/m + d8]  |
+ * 10 |  [r/m + d32]   |[S+D]|   [r/m + d32] |
+ * 11 |                   r/ m               |
+ */
+
+#define mod_is_mem()	(modrm_mod != 3)
+#define mod_is_reg()	(modrm_mod == 3)
+
+#define is_RIP()   ((modrm_rm & 7) == CFI_BP && modrm_mod == 0)
+#define have_SIB() ((modrm_rm & 7) == CFI_SP && mod_is_mem())
+
+#define rm_is(reg) (have_SIB() ? \
+		    sib_base == (reg) && sib_index == CFI_SP : \
+		    modrm_rm == (reg))
+
+#define rm_is_mem(reg)	(mod_is_mem() && !is_RIP() && rm_is(reg))
+#define rm_is_reg(reg)	(mod_is_reg() && modrm_rm == (reg))
+
 int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
 			    unsigned int *len, enum insn_type *type,
@@ -90,21 +108,22 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 			    struct list_head *ops_list)
 {
 	struct insn insn;
-	int x86_64, sign;
-	unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0,
-		      rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0,
-		      modrm_reg = 0, sib = 0;
+	int x86_64, ret;
+	unsigned char op1, op2,
+		      rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0,
+		      modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0,
+		      sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0;
 	struct stack_op *op = NULL;
 	struct symbol *sym;
+	u64 imm;
 
 	x86_64 = is_x86_64(elf);
 	if (x86_64 == -1)
 		return -1;
 
-	insn_init(&insn, sec->data->d_buf + offset, maxlen, x86_64);
-	insn_get_length(&insn);
-
-	if (!insn_complete(&insn)) {
+	ret = insn_decode(&insn, sec->data->d_buf + offset, maxlen,
+			  x86_64 ? INSN_MODE_64 : INSN_MODE_32);
+	if (ret < 0) {
 		WARN("can't decode instruction at %s:0x%lx", sec->name, offset);
 		return -1;
 	}
@@ -129,23 +148,27 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 	if (insn.modrm.nbytes) {
 		modrm = insn.modrm.bytes[0];
 		modrm_mod = X86_MODRM_MOD(modrm);
-		modrm_reg = X86_MODRM_REG(modrm);
-		modrm_rm = X86_MODRM_RM(modrm);
+		modrm_reg = X86_MODRM_REG(modrm) + 8*rex_r;
+		modrm_rm  = X86_MODRM_RM(modrm)  + 8*rex_b;
 	}
 
-	if (insn.sib.nbytes)
+	if (insn.sib.nbytes) {
 		sib = insn.sib.bytes[0];
+		/* sib_scale = X86_SIB_SCALE(sib); */
+		sib_index = X86_SIB_INDEX(sib) + 8*rex_x;
+		sib_base  = X86_SIB_BASE(sib)  + 8*rex_b;
+	}
 
 	switch (op1) {
 
 	case 0x1:
 	case 0x29:
-		if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
+		if (rex_w && rm_is_reg(CFI_SP)) {
 
 			/* add/sub reg, %rsp */
 			ADD_OP(op) {
 				op->src.type = OP_SRC_ADD;
-				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+				op->src.reg = modrm_reg;
 				op->dest.type = OP_DEST_REG;
 				op->dest.reg = CFI_SP;
 			}
@@ -157,7 +180,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		/* push reg */
 		ADD_OP(op) {
 			op->src.type = OP_SRC_REG;
-			op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+			op->src.reg = (op1 & 0x7) + 8*rex_b;
 			op->dest.type = OP_DEST_PUSH;
 		}
 
@@ -169,7 +192,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		ADD_OP(op) {
 			op->src.type = OP_SRC_POP;
 			op->dest.type = OP_DEST_REG;
-			op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+			op->dest.reg = (op1 & 0x7) + 8*rex_b;
 		}
 
 		break;
@@ -187,12 +210,54 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		*type = INSN_JUMP_CONDITIONAL;
 		break;
 
-	case 0x81:
-	case 0x83:
-		if (rex != 0x48)
+	case 0x80 ... 0x83:
+		/*
+		 * 1000 00sw : mod OP r/m : immediate
+		 *
+		 * s - sign extend immediate
+		 * w - imm8 / imm32
+		 *
+		 * OP: 000 ADD    100 AND
+		 *     001 OR     101 SUB
+		 *     010 ADC    110 XOR
+		 *     011 SBB    111 CMP
+		 */
+
+		/* 64bit only */
+		if (!rex_w)
+			break;
+
+		/* %rsp target only */
+		if (!rm_is_reg(CFI_SP))
 			break;
 
-		if (modrm == 0xe4) {
+		imm = insn.immediate.value;
+		if (op1 & 2) { /* sign extend */
+			if (op1 & 1) { /* imm32 */
+				imm <<= 32;
+				imm = (s64)imm >> 32;
+			} else { /* imm8 */
+				imm <<= 56;
+				imm = (s64)imm >> 56;
+			}
+		}
+
+		switch (modrm_reg & 7) {
+		case 5:
+			imm = -imm;
+			/* fallthrough */
+		case 0:
+			/* add/sub imm, %rsp */
+			ADD_OP(op) {
+				op->src.type = OP_SRC_ADD;
+				op->src.reg = CFI_SP;
+				op->src.offset = imm;
+				op->dest.type = OP_DEST_REG;
+				op->dest.reg = CFI_SP;
+			}
+			break;
+
+		case 4:
 			/* and imm, %rsp */
 			ADD_OP(op) {
 				op->src.type = OP_SRC_AND;
@@ -202,53 +267,48 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 				op->dest.reg = CFI_SP;
 			}
 			break;
-		}
 
-		if (modrm == 0xc4)
-			sign = 1;
-		else if (modrm == 0xec)
-			sign = -1;
-		else
+		default:
+			/* WARN ? */
 			break;
-
-		/* add/sub imm, %rsp */
-		ADD_OP(op) {
-			op->src.type = OP_SRC_ADD;
-			op->src.reg = CFI_SP;
-			op->src.offset = insn.immediate.value * sign;
-			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_SP;
 		}
+
 		break;
 
 	case 0x89:
-		if (rex_w && !rex_r && modrm_reg == 4) {
+		if (!rex_w)
+			break;
 
-			if (modrm_mod == 3) {
+		if (modrm_reg == CFI_SP) {
+
+			if (mod_is_reg()) {
 				/* mov %rsp, reg */
 				ADD_OP(op) {
 					op->src.type = OP_SRC_REG;
 					op->src.reg = CFI_SP;
 					op->dest.type = OP_DEST_REG;
-					op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+					op->dest.reg = modrm_rm;
 				}
 				break;
 
 			} else {
-				/* skip nontrivial SIB */
-				if (modrm_rm == 4 && !(sib == 0x24 && rex_b == rex_x))
-					break;
-
 				/* skip RIP relative displacement */
-				if (modrm_rm == 5 && modrm_mod == 0)
+				if (is_RIP())
 					break;
 
+				/* skip nontrivial SIB */
+				if (have_SIB()) {
+					modrm_rm = sib_base;
+					if (sib_index != CFI_SP)
+						break;
+				}
+
 				/* mov %rsp, disp(%reg) */
 				ADD_OP(op) {
 					op->src.type = OP_SRC_REG;
 					op->src.reg = CFI_SP;
 					op->dest.type = OP_DEST_REG_INDIRECT;
-					op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+					op->dest.reg = modrm_rm;
 					op->dest.offset = insn.displacement.value;
 				}
 				break;
@@ -257,12 +317,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 			break;
 		}
 
-		if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
+		if (rm_is_reg(CFI_SP)) {
 
 			/* mov reg, %rsp */
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG;
-				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+				op->src.reg = modrm_reg;
 				op->dest.type = OP_DEST_REG;
 				op->dest.reg = CFI_SP;
 			}
@@ -271,13 +331,15 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 
 		/* fallthrough */
 	case 0x88:
-		if (!rex_b &&
-		    (modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) {
+		if (!rex_w)
+			break;
+
+		if (rm_is_mem(CFI_BP)) {
 
 			/* mov reg, disp(%rbp) */
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG;
-				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+				op->src.reg = modrm_reg;
 				op->dest.type = OP_DEST_REG_INDIRECT;
 				op->dest.reg = CFI_BP;
 				op->dest.offset = insn.displacement.value;
@@ -285,12 +347,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 			break;
 		}
 
-		if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
+		if (rm_is_mem(CFI_SP)) {
 
 			/* mov reg, disp(%rsp) */
 			ADD_OP(op) {
 				op->src.type = OP_SRC_REG;
-				op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+				op->src.reg = modrm_reg;
 				op->dest.type = OP_DEST_REG_INDIRECT;
 				op->dest.reg = CFI_SP;
 				op->dest.offset = insn.displacement.value;
@@ -301,7 +363,10 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		break;
 
 	case 0x8b:
-		if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) {
+		if (!rex_w)
+			break;
+
+		if (rm_is_mem(CFI_BP)) {
 
 			/* mov disp(%rbp), reg */
 			ADD_OP(op) {
@@ -309,11 +374,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 				op->src.reg = CFI_BP;
 				op->src.offset = insn.displacement.value;
 				op->dest.type = OP_DEST_REG;
-				op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+				op->dest.reg = modrm_reg;
 			}
+			break;
+		}
 
-		} else if (rex_w && !rex_b && sib == 0x24 &&
-			   modrm_mod != 3 && modrm_rm == 4) {
+		if (rm_is_mem(CFI_SP)) {
 
 			/* mov disp(%rsp), reg */
 			ADD_OP(op) {
@@ -321,75 +387,48 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 				op->src.reg = CFI_SP;
 				op->src.offset = insn.displacement.value;
 				op->dest.type = OP_DEST_REG;
-				op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+				op->dest.reg = modrm_reg;
 			}
+			break;
 		}
 
 		break;
 
 	case 0x8d:
-		if (sib == 0x24 && rex_w && !rex_b && !rex_x) {
-
-			ADD_OP(op) {
-				if (!insn.displacement.value) {
-					/* lea (%rsp), reg */
-					op->src.type = OP_SRC_REG;
-				} else {
-					/* lea disp(%rsp), reg */
-					op->src.type = OP_SRC_ADD;
-					op->src.offset = insn.displacement.value;
-				}
-				op->src.reg = CFI_SP;
-				op->dest.type = OP_DEST_REG;
-				op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
-			}
-
-		} else if (rex == 0x48 && modrm == 0x65) {
-
-			/* lea disp(%rbp), %rsp */
-			ADD_OP(op) {
-				op->src.type = OP_SRC_ADD;
-				op->src.reg = CFI_BP;
-				op->src.offset = insn.displacement.value;
-				op->dest.type = OP_DEST_REG;
-				op->dest.reg = CFI_SP;
-			}
+		if (mod_is_reg()) {
+			WARN("invalid LEA encoding at %s:0x%lx", sec->name, offset);
+			break;
+		}
 
-		} else if (rex == 0x49 && modrm == 0x62 &&
-			   insn.displacement.value == -8) {
+		/* skip non 64bit ops */
+		if (!rex_w)
+			break;
 
-			/*
-			 * lea -0x8(%r10), %rsp
-			 *
-			 * Restoring rsp back to its original value after a
-			 * stack realignment.
-			 */
-			ADD_OP(op) {
-				op->src.type = OP_SRC_ADD;
-				op->src.reg = CFI_R10;
-				op->src.offset = -8;
-				op->dest.type = OP_DEST_REG;
-				op->dest.reg = CFI_SP;
-			}
+		/* skip RIP relative displacement */
+		if (is_RIP())
+			break;
 
-		} else if (rex == 0x49 && modrm == 0x65 &&
-			   insn.displacement.value == -16) {
+		/* skip nontrivial SIB */
+		if (have_SIB()) {
+			modrm_rm = sib_base;
+			if (sib_index != CFI_SP)
+				break;
+		}
 
-			/*
-			 * lea -0x10(%r13), %rsp
-			 *
-			 * Restoring rsp back to its original value after a
-			 * stack realignment.
-			 */
-			ADD_OP(op) {
+		/* lea disp(%src), %dst */
+		ADD_OP(op) {
+			op->src.offset = insn.displacement.value;
+			if (!op->src.offset) {
+				/* lea (%src), %dst */
+				op->src.type = OP_SRC_REG;
+			} else {
+				/* lea disp(%src), %dst */
 				op->src.type = OP_SRC_ADD;
-				op->src.reg = CFI_R13;
-				op->src.offset = -16;
-				op->dest.type = OP_DEST_REG;
-				op->dest.reg = CFI_SP;
 			}
+			op->src.reg = modrm_rm;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = modrm_reg;
 		}
-
 		break;
 
 	case 0x8f:
@@ -476,9 +515,17 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
 		 * mov bp, sp
 		 * pop bp
 		 */
-		ADD_OP(op)
-			op->dest.type = OP_DEST_LEAVE;
-
+		ADD_OP(op) {
+			op->src.type = OP_SRC_REG;
+			op->src.reg = CFI_BP;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+		}
+		ADD_OP(op) {
+			op->src.type = OP_SRC_POP;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_BP;
+		}
 		break;
 
 	case 0xe3:
@@ -596,11 +643,11 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state)
 const char *arch_nop_insn(int len)
 {
 	static const char nops[5][5] = {
-		/* 1 */ { 0x90 },
-		/* 2 */ { 0x66, 0x90 },
-		/* 3 */ { 0x0f, 0x1f, 0x00 },
-		/* 4 */ { 0x0f, 0x1f, 0x40, 0x00 },
-		/* 5 */ { 0x0f, 0x1f, 0x44, 0x00, 0x00 },
+		{ BYTES_NOP1 },
+		{ BYTES_NOP2 },
+		{ BYTES_NOP3 },
+		{ BYTES_NOP4 },
+		{ BYTES_NOP5 },
 	};
 
 	if (len < 1 || len > 5) {
@@ -611,6 +658,122 @@ const char *arch_nop_insn(int len)
 	return nops[len-1];
 }
 
+/* asm/alternative.h ? */
+
+#define ALTINSTR_FLAG_INV	(1 << 15)
+#define ALT_NOT(feat)		((feat) | ALTINSTR_FLAG_INV)
+
+struct alt_instr {
+	s32 instr_offset;	/* original instruction */
+	s32 repl_offset;	/* offset to replacement instruction */
+	u16 cpuid;		/* cpuid bit set for replacement */
+	u8  instrlen;		/* length of original instruction */
+	u8  replacementlen;	/* length of new instruction */
+} __packed;
+
+static int elf_add_alternative(struct elf *elf,
+			       struct instruction *orig, struct symbol *sym,
+			       int cpuid, u8 orig_len, u8 repl_len)
+{
+	const int size = sizeof(struct alt_instr);
+	struct alt_instr *alt;
+	struct section *sec;
+	Elf_Scn *s;
+
+	sec = find_section_by_name(elf, ".altinstructions");
+	if (!sec) {
+		sec = elf_create_section(elf, ".altinstructions",
+					 SHF_WRITE, size, 0);
+
+		if (!sec) {
+			WARN_ELF("elf_create_section");
+			return -1;
+		}
+	}
+
+	s = elf_getscn(elf->elf, sec->idx);
+	if (!s) {
+		WARN_ELF("elf_getscn");
+		return -1;
+	}
+
+	sec->data = elf_newdata(s);
+	if (!sec->data) {
+		WARN_ELF("elf_newdata");
+		return -1;
+	}
+
+	sec->data->d_size = size;
+	sec->data->d_align = 1;
+
+	alt = sec->data->d_buf = malloc(size);
+	if (!sec->data->d_buf) {
+		perror("malloc");
+		return -1;
+	}
+	memset(sec->data->d_buf, 0, size);
+
+	if (elf_add_reloc_to_insn(elf, sec, sec->sh.sh_size,
+				  R_X86_64_PC32, orig->sec, orig->offset)) {
+		WARN("elf_create_reloc: alt_instr::instr_offset");
+		return -1;
+	}
+
+	if (elf_add_reloc(elf, sec, sec->sh.sh_size + 4,
+			  R_X86_64_PC32, sym, 0)) {
+		WARN("elf_create_reloc: alt_instr::repl_offset");
+		return -1;
+	}
+
+	alt->cpuid = cpuid;
+	alt->instrlen = orig_len;
+	alt->replacementlen = repl_len;
+
+	sec->sh.sh_size += size;
+	sec->changed = true;
+
+	return 0;
+}
+
+#define X86_FEATURE_RETPOLINE                ( 7*32+12)
+
+int arch_rewrite_retpolines(struct objtool_file *file)
+{
+	struct instruction *insn;
+	struct reloc *reloc;
+	struct symbol *sym;
+	char name[32] = "";
+
+	list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
+
+		if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk"))
+			continue;
+
+		reloc = insn->reloc;
+
+		sprintf(name, "__x86_indirect_alt_%s_%s",
+			insn->type == INSN_JUMP_DYNAMIC ? "jmp" : "call",
+			reloc->sym->name + 21);
+
+		sym = find_symbol_by_name(file->elf, name);
+		if (!sym) {
+			sym = elf_create_undef_symbol(file->elf, name);
+			if (!sym) {
+				WARN("elf_create_undef_symbol");
+				return -1;
+			}
+		}
+
+		if (elf_add_alternative(file->elf, insn, sym,
+					ALT_NOT(X86_FEATURE_RETPOLINE), 5, 5)) {
+			WARN("elf_add_alternative");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
 int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg)
 {
 	struct cfi_reg *cfa = &insn->cfi.cfa;
@@ -646,3 +809,8 @@ int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg)
 
 	return 0;
 }
+
+bool arch_is_retpoline(struct symbol *sym)
+{
+	return !strncmp(sym->name, "__x86_indirect_", 15);
+}
diff --git a/tools/objtool/arch/x86/include/arch/cfi_regs.h b/tools/objtool/arch/x86/include/arch/cfi_regs.h
index 79bc517efba8..0579d22c433c 100644
--- a/tools/objtool/arch/x86/include/arch/cfi_regs.h
+++ b/tools/objtool/arch/x86/include/arch/cfi_regs.h
@@ -4,13 +4,13 @@
 #define _OBJTOOL_CFI_REGS_H
 
 #define CFI_AX			0
-#define CFI_DX			1
-#define CFI_CX			2
+#define CFI_CX			1
+#define CFI_DX			2
 #define CFI_BX			3
-#define CFI_SI			4
-#define CFI_DI			5
-#define CFI_BP			6
-#define CFI_SP			7
+#define CFI_SP			4
+#define CFI_BP			5
+#define CFI_SI			6
+#define CFI_DI			7
 #define CFI_R8			8
 #define CFI_R9			9
 #define CFI_R10			10
diff --git a/tools/objtool/arch/x86/include/arch/special.h b/tools/objtool/arch/x86/include/arch/special.h
index d818b2bffa02..14271cca0c74 100644
--- a/tools/objtool/arch/x86/include/arch/special.h
+++ b/tools/objtool/arch/x86/include/arch/special.h
@@ -10,7 +10,7 @@
 #define JUMP_ORIG_OFFSET	0
 #define JUMP_NEW_OFFSET		4
 
-#define ALT_ENTRY_SIZE		13
+#define ALT_ENTRY_SIZE		12
 #define ALT_ORIG_OFFSET		0
 #define ALT_NEW_OFFSET		4
 #define ALT_FEATURE_OFFSET	8
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index c3a85d8f6c5c..8b38b5d6fec7 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -15,16 +15,23 @@
 
 #include <subcmd/parse-options.h>
 #include <string.h>
+#include <stdlib.h>
 #include <objtool/builtin.h>
 #include <objtool/objtool.h>
 
-bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr;
+bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
+     validate_dup, vmlinux, mcount, noinstr, backup;
 
 static const char * const check_usage[] = {
 	"objtool check [<options>] file.o",
 	NULL,
 };
 
+static const char * const env_usage[] = {
+	"OBJTOOL_ARGS=\"<options>\"",
+	NULL,
+};
+
 const struct option check_options[] = {
 	OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
 	OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
@@ -37,20 +44,44 @@ const struct option check_options[] = {
 	OPT_BOOLEAN('n', "noinstr", &noinstr, "noinstr validation for vmlinux.o"),
 	OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"),
 	OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"),
+	OPT_BOOLEAN('B', "backup", &backup, "create .orig files before modification"),
 	OPT_END(),
 };
 
+int cmd_parse_options(int argc, const char **argv, const char * const usage[])
+{
+	const char *envv[16] = { };
+	char *env;
+	int envc;
+
+	env = getenv("OBJTOOL_ARGS");
+	if (env) {
+		envv[0] = "OBJTOOL_ARGS";
+		for (envc = 1; envc < ARRAY_SIZE(envv); ) {
+			envv[envc++] = env;
+			env = strchr(env, ' ');
+			if (!env)
+				break;
+			*env = '\0';
+			env++;
+		}
+
+		parse_options(envc, envv, check_options, env_usage, 0);
+	}
+
+	argc = parse_options(argc, argv, check_options, usage, 0);
+	if (argc != 1)
+		usage_with_options(usage, check_options);
+	return argc;
+}
+
 int cmd_check(int argc, const char **argv)
 {
 	const char *objname;
 	struct objtool_file *file;
 	int ret;
 
-	argc = parse_options(argc, argv, check_options, check_usage, 0);
-
-	if (argc != 1)
-		usage_with_options(check_usage, check_options);
-
+	argc = cmd_parse_options(argc, argv, check_usage);
 	objname = argv[0];
 
 	file = objtool_open_read(objname);
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
index 8273bbf7cebb..17f8b9307738 100644
--- a/tools/objtool/builtin-orc.c
+++ b/tools/objtool/builtin-orc.c
@@ -34,10 +34,7 @@ int cmd_orc(int argc, const char **argv)
 		struct objtool_file *file;
 		int ret;
 
-		argc = parse_options(argc, argv, check_options, orc_usage, 0);
-		if (argc != 1)
-			usage_with_options(orc_usage, check_options);
-
+		argc = cmd_parse_options(argc, argv, orc_usage);
 		objname = argv[0];
 
 		file = objtool_open_read(objname);
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 5e5388a38e2a..9ed1a4cd00dc 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -108,6 +108,18 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
 	for (insn = next_insn_same_sec(file, insn); insn;		\
 	     insn = next_insn_same_sec(file, insn))
 
+static bool is_jump_table_jump(struct instruction *insn)
+{
+	struct alt_group *alt_group = insn->alt_group;
+
+	if (insn->jump_table)
+		return true;
+
+	/* Retpoline alternative for a jump table? */
+	return alt_group && alt_group->orig_group &&
+	       alt_group->orig_group->first_insn->jump_table;
+}
+
 static bool is_sibling_call(struct instruction *insn)
 {
 	/*
@@ -120,7 +132,7 @@ static bool is_sibling_call(struct instruction *insn)
 
 	/* An indirect jump is either a sibling call or a jump to a table. */
 	if (insn->type == INSN_JUMP_DYNAMIC)
-		return list_empty(&insn->alts);
+		return !is_jump_table_jump(insn);
 
 	/* add_jump_destinations() sets insn->call_dest for sibling calls. */
 	return (is_static_jump(insn) && insn->call_dest);
@@ -433,8 +445,7 @@ reachable:
 
 static int create_static_call_sections(struct objtool_file *file)
 {
-	struct section *sec, *reloc_sec;
-	struct reloc *reloc;
+	struct section *sec;
 	struct static_call_site *site;
 	struct instruction *insn;
 	struct symbol *key_sym;
@@ -452,7 +463,7 @@ static int create_static_call_sections(struct objtool_file *file)
 		return 0;
 
 	idx = 0;
-	list_for_each_entry(insn, &file->static_call_list, static_call_node)
+	list_for_each_entry(insn, &file->static_call_list, call_node)
 		idx++;
 
 	sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE,
@@ -460,36 +471,18 @@ static int create_static_call_sections(struct objtool_file *file)
 	if (!sec)
 		return -1;
 
-	reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
-	if (!reloc_sec)
-		return -1;
-
 	idx = 0;
-	list_for_each_entry(insn, &file->static_call_list, static_call_node) {
+	list_for_each_entry(insn, &file->static_call_list, call_node) {
 
 		site = (struct static_call_site *)sec->data->d_buf + idx;
 		memset(site, 0, sizeof(struct static_call_site));
 
 		/* populate reloc for 'addr' */
-		reloc = malloc(sizeof(*reloc));
-
-		if (!reloc) {
-			perror("malloc");
-			return -1;
-		}
-		memset(reloc, 0, sizeof(*reloc));
-
-		insn_to_reloc_sym_addend(insn->sec, insn->offset, reloc);
-		if (!reloc->sym) {
-			WARN_FUNC("static call tramp: missing containing symbol",
-				  insn->sec, insn->offset);
+		if (elf_add_reloc_to_insn(file->elf, sec,
+					  idx * sizeof(struct static_call_site),
+					  R_X86_64_PC32,
+					  insn->sec, insn->offset))
 			return -1;
-		}
-
-		reloc->type = R_X86_64_PC32;
-		reloc->offset = idx * sizeof(struct static_call_site);
-		reloc->sec = reloc_sec;
-		elf_add_reloc(file->elf, reloc);
 
 		/* find key symbol */
 		key_name = strdup(insn->call_dest->name);
@@ -526,32 +519,21 @@ static int create_static_call_sections(struct objtool_file *file)
 		free(key_name);
 
 		/* populate reloc for 'key' */
-		reloc = malloc(sizeof(*reloc));
-		if (!reloc) {
-			perror("malloc");
+		if (elf_add_reloc(file->elf, sec,
+				  idx * sizeof(struct static_call_site) + 4,
+				  R_X86_64_PC32, key_sym,
+				  is_sibling_call(insn) * STATIC_CALL_SITE_TAIL))
 			return -1;
-		}
-		memset(reloc, 0, sizeof(*reloc));
-		reloc->sym = key_sym;
-		reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0;
-		reloc->type = R_X86_64_PC32;
-		reloc->offset = idx * sizeof(struct static_call_site) + 4;
-		reloc->sec = reloc_sec;
-		elf_add_reloc(file->elf, reloc);
 
 		idx++;
 	}
 
-	if (elf_rebuild_reloc_section(file->elf, reloc_sec))
-		return -1;
-
 	return 0;
 }
 
 static int create_mcount_loc_sections(struct objtool_file *file)
 {
-	struct section *sec, *reloc_sec;
-	struct reloc *reloc;
+	struct section *sec;
 	unsigned long *loc;
 	struct instruction *insn;
 	int idx;
@@ -574,49 +556,21 @@ static int create_mcount_loc_sections(struct objtool_file *file)
 	if (!sec)
 		return -1;
 
-	reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
-	if (!reloc_sec)
-		return -1;
-
 	idx = 0;
 	list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node) {
 
 		loc = (unsigned long *)sec->data->d_buf + idx;
 		memset(loc, 0, sizeof(unsigned long));
 
-		reloc = malloc(sizeof(*reloc));
-		if (!reloc) {
-			perror("malloc");
+		if (elf_add_reloc_to_insn(file->elf, sec,
+					  idx * sizeof(unsigned long),
+					  R_X86_64_64,
+					  insn->sec, insn->offset))
 			return -1;
-		}
-		memset(reloc, 0, sizeof(*reloc));
-
-		if (insn->sec->sym) {
-			reloc->sym = insn->sec->sym;
-			reloc->addend = insn->offset;
-		} else {
-			reloc->sym = find_symbol_containing(insn->sec, insn->offset);
-
-			if (!reloc->sym) {
-				WARN("missing symbol for insn at offset 0x%lx\n",
-				     insn->offset);
-				return -1;
-			}
-
-			reloc->addend = insn->offset - reloc->sym->offset;
-		}
-
-		reloc->type = R_X86_64_64;
-		reloc->offset = idx * sizeof(unsigned long);
-		reloc->sec = reloc_sec;
-		elf_add_reloc(file->elf, reloc);
 
 		idx++;
 	}
 
-	if (elf_rebuild_reloc_section(file->elf, reloc_sec))
-		return -1;
-
 	return 0;
 }
 
@@ -850,6 +804,30 @@ static int add_ignore_alternatives(struct objtool_file *file)
 	return 0;
 }
 
+__weak bool arch_is_retpoline(struct symbol *sym)
+{
+	return false;
+}
+
+#define NEGATIVE_RELOC	((void *)-1L)
+
+static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
+{
+	if (insn->reloc == NEGATIVE_RELOC)
+		return NULL;
+
+	if (!insn->reloc) {
+		insn->reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+						       insn->offset, insn->len);
+		if (!insn->reloc) {
+			insn->reloc = NEGATIVE_RELOC;
+			return NULL;
+		}
+	}
+
+	return insn->reloc;
+}
+
 /*
  * Find the destination instructions for all jumps.
  */
@@ -864,16 +842,14 @@ static int add_jump_destinations(struct objtool_file *file)
 		if (!is_static_jump(insn))
 			continue;
 
-		reloc = find_reloc_by_dest_range(file->elf, insn->sec,
-						 insn->offset, insn->len);
+		reloc = insn_reloc(file, insn);
 		if (!reloc) {
 			dest_sec = insn->sec;
 			dest_off = arch_jump_destination(insn);
 		} else if (reloc->sym->type == STT_SECTION) {
 			dest_sec = reloc->sym->sec;
 			dest_off = arch_dest_reloc_offset(reloc->addend);
-		} else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) ||
-			   !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) {
+		} else if (arch_is_retpoline(reloc->sym)) {
 			/*
 			 * Retpoline jumps are really dynamic jumps in
 			 * disguise, so convert them accordingly.
@@ -883,13 +859,16 @@ static int add_jump_destinations(struct objtool_file *file)
 			else
 				insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL;
 
+			list_add_tail(&insn->call_node,
+				      &file->retpoline_call_list);
+
 			insn->retpoline_safe = true;
 			continue;
 		} else if (insn->func) {
 			/* internal or external sibling call (with reloc) */
 			insn->call_dest = reloc->sym;
 			if (insn->call_dest->static_call_tramp) {
-				list_add_tail(&insn->static_call_node,
+				list_add_tail(&insn->call_node,
 					      &file->static_call_list);
 			}
 			continue;
@@ -951,7 +930,7 @@ static int add_jump_destinations(struct objtool_file *file)
 				/* internal sibling call (without reloc) */
 				insn->call_dest = insn->jump_dest->func;
 				if (insn->call_dest->static_call_tramp) {
-					list_add_tail(&insn->static_call_node,
+					list_add_tail(&insn->call_node,
 						      &file->static_call_list);
 				}
 			}
@@ -995,8 +974,7 @@ static int add_call_destinations(struct objtool_file *file)
 		if (insn->type != INSN_CALL)
 			continue;
 
-		reloc = find_reloc_by_dest_range(file->elf, insn->sec,
-					       insn->offset, insn->len);
+		reloc = insn_reloc(file, insn);
 		if (!reloc) {
 			dest_off = arch_jump_destination(insn);
 			insn->call_dest = find_call_destination(insn->sec, dest_off);
@@ -1026,9 +1004,29 @@ static int add_call_destinations(struct objtool_file *file)
 					  dest_off);
 				return -1;
 			}
+
+		} else if (arch_is_retpoline(reloc->sym)) {
+			/*
+			 * Retpoline calls are really dynamic calls in
+			 * disguise, so convert them accordingly.
+			 */
+			insn->type = INSN_CALL_DYNAMIC;
+			insn->retpoline_safe = true;
+
+			list_add_tail(&insn->call_node,
+				      &file->retpoline_call_list);
+
+			remove_insn_ops(insn);
+			continue;
+
 		} else
 			insn->call_dest = reloc->sym;
 
+		if (insn->call_dest && insn->call_dest->static_call_tramp) {
+			list_add_tail(&insn->call_node,
+				      &file->static_call_list);
+		}
+
 		/*
 		 * Many compilers cannot disable KCOV with a function attribute
 		 * so they need a little help, NOP out any KCOV calls from noinstr
@@ -1175,8 +1173,7 @@ static int handle_group_alt(struct objtool_file *file,
 		 * alternatives code can adjust the relative offsets
 		 * accordingly.
 		 */
-		alt_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
-						   insn->offset, insn->len);
+		alt_reloc = insn_reloc(file, insn);
 		if (alt_reloc &&
 		    !arch_support_alt_relocation(special_alt, insn, alt_reloc)) {
 
@@ -1751,6 +1748,11 @@ static void mark_rodata(struct objtool_file *file)
 	file->rodata = found;
 }
 
+__weak int arch_rewrite_retpolines(struct objtool_file *file)
+{
+	return 0;
+}
+
 static int decode_sections(struct objtool_file *file)
 {
 	int ret;
@@ -1772,10 +1774,17 @@ static int decode_sections(struct objtool_file *file)
 	if (ret)
 		return ret;
 
+	/*
+	 * Must be before add_{jump_call}_destination.
+	 */
 	ret = read_static_call_tramps(file);
 	if (ret)
 		return ret;
 
+	/*
+	 * Must be before add_special_section_alts() as that depends on
+	 * jump_dest being set.
+	 */
 	ret = add_jump_destinations(file);
 	if (ret)
 		return ret;
@@ -1784,6 +1793,10 @@ static int decode_sections(struct objtool_file *file)
 	if (ret)
 		return ret;
 
+	/*
+	 * Must be before add_call_destination(); it changes INSN_CALL to
+	 * INSN_JUMP.
+	 */
 	ret = read_intra_function_calls(file);
 	if (ret)
 		return ret;
@@ -1808,6 +1821,15 @@ static int decode_sections(struct objtool_file *file)
 	if (ret)
 		return ret;
 
+	/*
+	 * Must be after add_special_section_alts(), since this will emit
+	 * alternatives. Must be after add_{jump,call}_destination(), since
+	 * those create the call insn lists.
+	 */
+	ret = arch_rewrite_retpolines(file);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
@@ -1959,8 +1981,9 @@ static void restore_reg(struct cfi_state *cfi, unsigned char reg)
  *   41 5d			pop    %r13
  *   c3				retq
  */
-static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
-			     struct stack_op *op)
+static int update_cfi_state(struct instruction *insn,
+			    struct instruction *next_insn,
+			    struct cfi_state *cfi, struct stack_op *op)
 {
 	struct cfi_reg *cfa = &cfi->cfa;
 	struct cfi_reg *regs = cfi->regs;
@@ -2019,7 +2042,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
 			}
 
 			else if (op->src.reg == CFI_BP && op->dest.reg == CFI_SP &&
-				 cfa->base == CFI_BP) {
+				 (cfa->base == CFI_BP || cfa->base == cfi->drap_reg)) {
 
 				/*
 				 * mov %rbp, %rsp
@@ -2161,7 +2184,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
 				break;
 			}
 
-			if (op->dest.reg == cfi->cfa.base) {
+			if (op->dest.reg == cfi->cfa.base && !(next_insn && next_insn->hint)) {
 				WARN_FUNC("unsupported stack register modification",
 					  insn->sec, insn->offset);
 				return -1;
@@ -2216,7 +2239,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
 				cfa->offset = 0;
 				cfi->drap_offset = -1;
 
-			} else if (regs[op->dest.reg].offset == -cfi->stack_size) {
+			} else if (cfi->stack_size == -regs[op->dest.reg].offset) {
 
 				/* pop %reg */
 				restore_reg(cfi, op->dest.reg);
@@ -2357,26 +2380,6 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
 
 		break;
 
-	case OP_DEST_LEAVE:
-		if ((!cfi->drap && cfa->base != CFI_BP) ||
-		    (cfi->drap && cfa->base != cfi->drap_reg)) {
-			WARN_FUNC("leave instruction with modified stack frame",
-				  insn->sec, insn->offset);
-			return -1;
-		}
-
-		/* leave (mov %rbp, %rsp; pop %rbp) */
-
-		cfi->stack_size = -cfi->regs[CFI_BP].offset - 8;
-		restore_reg(cfi, CFI_BP);
-
-		if (!cfi->drap) {
-			cfa->base = CFI_SP;
-			cfa->offset -= 8;
-		}
-
-		break;
-
 	case OP_DEST_MEM:
 		if (op->src.type != OP_SRC_POP && op->src.type != OP_SRC_POPF) {
 			WARN_FUNC("unknown stack-related memory operation",
@@ -2433,13 +2436,15 @@ static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn
 	return 0;
 }
 
-static int handle_insn_ops(struct instruction *insn, struct insn_state *state)
+static int handle_insn_ops(struct instruction *insn,
+			   struct instruction *next_insn,
+			   struct insn_state *state)
 {
 	struct stack_op *op;
 
 	list_for_each_entry(op, &insn->stack_ops, list) {
 
-		if (update_cfi_state(insn, &state->cfi, op))
+		if (update_cfi_state(insn, next_insn, &state->cfi, op))
 			return 1;
 
 		if (!insn->alt_group)
@@ -2722,7 +2727,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 				return 0;
 		}
 
-		if (handle_insn_ops(insn, &state))
+		if (handle_insn_ops(insn, next_insn, &state))
 			return 1;
 
 		switch (insn->type) {
@@ -2746,11 +2751,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 			if (dead_end_function(file, insn->call_dest))
 				return 0;
 
-			if (insn->type == INSN_CALL && insn->call_dest->static_call_tramp) {
-				list_add_tail(&insn->static_call_node,
-					      &file->static_call_list);
-			}
-
 			break;
 
 		case INSN_JUMP_CONDITIONAL:
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 93fa833a49a5..d08f5f3670f8 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -211,32 +211,6 @@ struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, uns
 	return find_reloc_by_dest_range(elf, sec, offset, 1);
 }
 
-void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset,
-			      struct reloc *reloc)
-{
-	if (sec->sym) {
-		reloc->sym = sec->sym;
-		reloc->addend = offset;
-		return;
-	}
-
-	/*
-	 * The Clang assembler strips section symbols, so we have to reference
-	 * the function symbol instead:
-	 */
-	reloc->sym = find_symbol_containing(sec, offset);
-	if (!reloc->sym) {
-		/*
-		 * Hack alert.  This happens when we need to reference the NOP
-		 * pad insn immediately after the function.
-		 */
-		reloc->sym = find_symbol_containing(sec, offset - 1);
-	}
-
-	if (reloc->sym)
-		reloc->addend = offset - reloc->sym->offset;
-}
-
 static int read_sections(struct elf *elf)
 {
 	Elf_Scn *s = NULL;
@@ -316,12 +290,39 @@ static int read_sections(struct elf *elf)
 	return 0;
 }
 
+static void elf_add_symbol(struct elf *elf, struct symbol *sym)
+{
+	struct list_head *entry;
+	struct rb_node *pnode;
+
+	sym->type = GELF_ST_TYPE(sym->sym.st_info);
+	sym->bind = GELF_ST_BIND(sym->sym.st_info);
+
+	sym->offset = sym->sym.st_value;
+	sym->len = sym->sym.st_size;
+
+	rb_add(&sym->node, &sym->sec->symbol_tree, symbol_to_offset);
+	pnode = rb_prev(&sym->node);
+	if (pnode)
+		entry = &rb_entry(pnode, struct symbol, node)->list;
+	else
+		entry = &sym->sec->symbol_list;
+	list_add(&sym->list, entry);
+	elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx);
+	elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
+
+	/*
+	 * Don't store empty STT_NOTYPE symbols in the rbtree.  They
+	 * can exist within a function, confusing the sorting.
+	 */
+	if (!sym->len)
+		rb_erase(&sym->node, &sym->sec->symbol_tree);
+}
+
 static int read_symbols(struct elf *elf)
 {
 	struct section *symtab, *symtab_shndx, *sec;
 	struct symbol *sym, *pfunc;
-	struct list_head *entry;
-	struct rb_node *pnode;
 	int symbols_nr, i;
 	char *coldstr;
 	Elf_Data *shndx_data = NULL;
@@ -366,9 +367,6 @@ static int read_symbols(struct elf *elf)
 			goto err;
 		}
 
-		sym->type = GELF_ST_TYPE(sym->sym.st_info);
-		sym->bind = GELF_ST_BIND(sym->sym.st_info);
-
 		if ((sym->sym.st_shndx > SHN_UNDEF &&
 		     sym->sym.st_shndx < SHN_LORESERVE) ||
 		    (shndx_data && sym->sym.st_shndx == SHN_XINDEX)) {
@@ -381,32 +379,14 @@ static int read_symbols(struct elf *elf)
 				     sym->name);
 				goto err;
 			}
-			if (sym->type == STT_SECTION) {
+			if (GELF_ST_TYPE(sym->sym.st_info) == STT_SECTION) {
 				sym->name = sym->sec->name;
 				sym->sec->sym = sym;
 			}
 		} else
 			sym->sec = find_section_by_index(elf, 0);
 
-		sym->offset = sym->sym.st_value;
-		sym->len = sym->sym.st_size;
-
-		rb_add(&sym->node, &sym->sec->symbol_tree, symbol_to_offset);
-		pnode = rb_prev(&sym->node);
-		if (pnode)
-			entry = &rb_entry(pnode, struct symbol, node)->list;
-		else
-			entry = &sym->sec->symbol_list;
-		list_add(&sym->list, entry);
-		elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx);
-		elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
-
-		/*
-		 * Don't store empty STT_NOTYPE symbols in the rbtree.  They
-		 * can exist within a function, confusing the sorting.
-		 */
-		if (!sym->len)
-			rb_erase(&sym->node, &sym->sec->symbol_tree);
+		elf_add_symbol(elf, sym);
 	}
 
 	if (stats)
@@ -473,12 +453,73 @@ err:
 	return -1;
 }
 
-void elf_add_reloc(struct elf *elf, struct reloc *reloc)
+static struct section *elf_create_reloc_section(struct elf *elf,
+						struct section *base,
+						int reltype);
+
+int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
+		  unsigned int type, struct symbol *sym, int addend)
 {
-	struct section *sec = reloc->sec;
+	struct reloc *reloc;
 
-	list_add_tail(&reloc->list, &sec->reloc_list);
+	if (!sec->reloc && !elf_create_reloc_section(elf, sec, SHT_RELA))
+		return -1;
+
+	reloc = malloc(sizeof(*reloc));
+	if (!reloc) {
+		perror("malloc");
+		return -1;
+	}
+	memset(reloc, 0, sizeof(*reloc));
+
+	reloc->sec = sec->reloc;
+	reloc->offset = offset;
+	reloc->type = type;
+	reloc->sym = sym;
+	reloc->addend = addend;
+
+	list_add_tail(&reloc->list, &sec->reloc->reloc_list);
 	elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
+
+	sec->reloc->changed = true;
+
+	return 0;
+}
+
+int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
+			  unsigned long offset, unsigned int type,
+			  struct section *insn_sec, unsigned long insn_off)
+{
+	struct symbol *sym;
+	int addend;
+
+	if (insn_sec->sym) {
+		sym = insn_sec->sym;
+		addend = insn_off;
+
+	} else {
+		/*
+		 * The Clang assembler strips section symbols, so we have to
+		 * reference the function symbol instead:
+		 */
+		sym = find_symbol_containing(insn_sec, insn_off);
+		if (!sym) {
+			/*
+			 * Hack alert.  This happens when we need to reference
+			 * the NOP pad insn immediately after the function.
+			 */
+			sym = find_symbol_containing(insn_sec, insn_off - 1);
+		}
+
+		if (!sym) {
+			WARN("can't find symbol containing %s+0x%lx", insn_sec->name, insn_off);
+			return -1;
+		}
+
+		addend = insn_off - sym->offset;
+	}
+
+	return elf_add_reloc(elf, sec, offset, type, sym, addend);
 }
 
 static int read_rel_reloc(struct section *sec, int i, struct reloc *reloc, unsigned int *symndx)
@@ -558,7 +599,9 @@ static int read_relocs(struct elf *elf)
 				return -1;
 			}
 
-			elf_add_reloc(elf, reloc);
+			list_add_tail(&reloc->list, &sec->reloc_list);
+			elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
+
 			nr_reloc++;
 		}
 		max_reloc = max(max_reloc, nr_reloc);
@@ -636,13 +679,108 @@ err:
 	return NULL;
 }
 
+static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
+{
+	Elf_Data *data;
+	Elf_Scn *s;
+	int len;
+
+	if (!strtab)
+		strtab = find_section_by_name(elf, ".strtab");
+	if (!strtab) {
+		WARN("can't find .strtab section");
+		return -1;
+	}
+
+	s = elf_getscn(elf->elf, strtab->idx);
+	if (!s) {
+		WARN_ELF("elf_getscn");
+		return -1;
+	}
+
+	data = elf_newdata(s);
+	if (!data) {
+		WARN_ELF("elf_newdata");
+		return -1;
+	}
+
+	data->d_buf = str;
+	data->d_size = strlen(str) + 1;
+	data->d_align = 1;
+
+	len = strtab->len;
+	strtab->len += data->d_size;
+	strtab->changed = true;
+
+	return len;
+}
+
+struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
+{
+	struct section *symtab;
+	struct symbol *sym;
+	Elf_Data *data;
+	Elf_Scn *s;
+
+	sym = malloc(sizeof(*sym));
+	if (!sym) {
+		perror("malloc");
+		return NULL;
+	}
+	memset(sym, 0, sizeof(*sym));
+
+	sym->name = strdup(name);
+
+	sym->sym.st_name = elf_add_string(elf, NULL, sym->name);
+	if (sym->sym.st_name == -1)
+		return NULL;
+
+	sym->sym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE);
+	// st_other 0
+	// st_shndx 0
+	// st_value 0
+	// st_size 0
+
+	symtab = find_section_by_name(elf, ".symtab");
+	if (!symtab) {
+		WARN("can't find .symtab");
+		return NULL;
+	}
+
+	s = elf_getscn(elf->elf, symtab->idx);
+	if (!s) {
+		WARN_ELF("elf_getscn");
+		return NULL;
+	}
+
+	data = elf_newdata(s);
+	if (!data) {
+		WARN_ELF("elf_newdata");
+		return NULL;
+	}
+
+	data->d_buf = &sym->sym;
+	data->d_size = sizeof(sym->sym);
+	data->d_align = 1;
+
+	sym->idx = symtab->len / sizeof(sym->sym);
+
+	symtab->len += data->d_size;
+	symtab->changed = true;
+
+	sym->sec = find_section_by_index(elf, 0);
+
+	elf_add_symbol(elf, sym);
+
+	return sym;
+}
+
 struct section *elf_create_section(struct elf *elf, const char *name,
 				   unsigned int sh_flags, size_t entsize, int nr)
 {
 	struct section *sec, *shstrtab;
 	size_t size = entsize * nr;
 	Elf_Scn *s;
-	Elf_Data *data;
 
 	sec = malloc(sizeof(*sec));
 	if (!sec) {
@@ -699,7 +837,6 @@ struct section *elf_create_section(struct elf *elf, const char *name,
 	sec->sh.sh_addralign = 1;
 	sec->sh.sh_flags = SHF_ALLOC | sh_flags;
 
-
 	/* Add section name to .shstrtab (or .strtab for Clang) */
 	shstrtab = find_section_by_name(elf, ".shstrtab");
 	if (!shstrtab)
@@ -708,27 +845,9 @@ struct section *elf_create_section(struct elf *elf, const char *name,
 		WARN("can't find .shstrtab or .strtab section");
 		return NULL;
 	}
-
-	s = elf_getscn(elf->elf, shstrtab->idx);
-	if (!s) {
-		WARN_ELF("elf_getscn");
-		return NULL;
-	}
-
-	data = elf_newdata(s);
-	if (!data) {
-		WARN_ELF("elf_newdata");
+	sec->sh.sh_name = elf_add_string(elf, shstrtab, sec->name);
+	if (sec->sh.sh_name == -1)
 		return NULL;
-	}
-
-	data->d_buf = sec->name;
-	data->d_size = strlen(name) + 1;
-	data->d_align = 1;
-
-	sec->sh.sh_name = shstrtab->len;
-
-	shstrtab->len += strlen(name) + 1;
-	shstrtab->changed = true;
 
 	list_add_tail(&sec->list, &elf->sections);
 	elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
@@ -799,7 +918,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec
 	return sec;
 }
 
-struct section *elf_create_reloc_section(struct elf *elf,
+static struct section *elf_create_reloc_section(struct elf *elf,
 					 struct section *base,
 					 int reltype)
 {
@@ -873,14 +992,11 @@ static int elf_rebuild_rela_reloc_section(struct section *sec, int nr)
 	return 0;
 }
 
-int elf_rebuild_reloc_section(struct elf *elf, struct section *sec)
+static int elf_rebuild_reloc_section(struct elf *elf, struct section *sec)
 {
 	struct reloc *reloc;
 	int nr;
 
-	sec->changed = true;
-	elf->changed = true;
-
 	nr = 0;
 	list_for_each_entry(reloc, &sec->reloc_list, list)
 		nr++;
@@ -944,9 +1060,15 @@ int elf_write(struct elf *elf)
 	struct section *sec;
 	Elf_Scn *s;
 
-	/* Update section headers for changed sections: */
+	/* Update changed relocation sections and section headers: */
 	list_for_each_entry(sec, &elf->sections, list) {
 		if (sec->changed) {
+			if (sec->base &&
+			    elf_rebuild_reloc_section(elf, sec)) {
+				WARN("elf_rebuild_reloc_section");
+				return -1;
+			}
+
 			s = elf_getscn(elf->elf, sec->idx);
 			if (!s) {
 				WARN_ELF("elf_getscn");
@@ -958,6 +1080,7 @@ int elf_write(struct elf *elf)
 			}
 
 			sec->changed = false;
+			elf->changed = true;
 		}
 	}
 
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 6ff0685f5cc5..062bb6e9b865 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -35,7 +35,6 @@ enum op_dest_type {
 	OP_DEST_MEM,
 	OP_DEST_PUSH,
 	OP_DEST_PUSHF,
-	OP_DEST_LEAVE,
 };
 
 struct op_dest {
@@ -86,4 +85,8 @@ const char *arch_nop_insn(int len);
 
 int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg);
 
+bool arch_is_retpoline(struct symbol *sym);
+
+int arch_rewrite_retpolines(struct objtool_file *file);
+
 #endif /* _ARCH_H */
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index 2502bb27de17..15ac0b7d3d6a 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -8,7 +8,10 @@
 #include <subcmd/parse-options.h>
 
 extern const struct option check_options[];
-extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr;
+extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
+            validate_dup, vmlinux, mcount, noinstr, backup;
+
+extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]);
 
 extern int cmd_check(int argc, const char **argv);
 extern int cmd_orc(int argc, const char **argv);
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index f5be798107bc..56d50bc50c10 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -39,7 +39,7 @@ struct alt_group {
 struct instruction {
 	struct list_head list;
 	struct hlist_node hash;
-	struct list_head static_call_node;
+	struct list_head call_node;
 	struct list_head mcount_loc_node;
 	struct section *sec;
 	unsigned long offset;
@@ -56,6 +56,7 @@ struct instruction {
 	struct instruction *jump_dest;
 	struct instruction *first_jump_src;
 	struct reloc *jump_table;
+	struct reloc *reloc;
 	struct list_head alts;
 	struct symbol *func;
 	struct list_head stack_ops;
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index e6890cc70a25..45e5ede363b0 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -122,12 +122,18 @@ static inline u32 reloc_hash(struct reloc *reloc)
 
 struct elf *elf_open_read(const char *name, int flags);
 struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
-struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype);
-void elf_add_reloc(struct elf *elf, struct reloc *reloc);
+
+int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
+		  unsigned int type, struct symbol *sym, int addend);
+int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
+			  unsigned long offset, unsigned int type,
+			  struct section *insn_sec, unsigned long insn_off);
+
 int elf_write_insn(struct elf *elf, struct section *sec,
 		   unsigned long offset, unsigned int len,
 		   const char *insn);
 int elf_write_reloc(struct elf *elf, struct reloc *reloc);
+struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name);
 int elf_write(struct elf *elf);
 void elf_close(struct elf *elf);
 
@@ -140,9 +146,6 @@ struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, uns
 struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec,
 				     unsigned long offset, unsigned int len);
 struct symbol *find_func_containing(struct section *sec, unsigned long offset);
-void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset,
-			      struct reloc *reloc);
-int elf_rebuild_reloc_section(struct elf *elf, struct section *sec);
 
 #define for_each_sec(file, sec)						\
 	list_for_each_entry(sec, &file->elf->sections, list)
diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index e68e37476c15..e4084afb2304 100644
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -18,6 +18,7 @@ struct objtool_file {
 	struct elf *elf;
 	struct list_head insn_list;
 	DECLARE_HASHTABLE(insn_hash, 20);
+	struct list_head retpoline_call_list;
 	struct list_head static_call_list;
 	struct list_head mcount_loc_list;
 	bool ignore_unreachables, c_file, hints, rodata;
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 7b97ce499405..e21db8bce493 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -17,6 +17,7 @@
 #include <stdbool.h>
 #include <string.h>
 #include <stdlib.h>
+#include <unistd.h>
 #include <subcmd/exec-cmd.h>
 #include <subcmd/pager.h>
 #include <linux/kernel.h>
@@ -44,6 +45,64 @@ bool help;
 const char *objname;
 static struct objtool_file file;
 
+static bool objtool_create_backup(const char *_objname)
+{
+	int len = strlen(_objname);
+	char *buf, *base, *name = malloc(len+6);
+	int s, d, l, t;
+
+	if (!name) {
+		perror("failed backup name malloc");
+		return false;
+	}
+
+	strcpy(name, _objname);
+	strcpy(name + len, ".orig");
+
+	d = open(name, O_CREAT|O_WRONLY|O_TRUNC, 0644);
+	if (d < 0) {
+		perror("failed to create backup file");
+		return false;
+	}
+
+	s = open(_objname, O_RDONLY);
+	if (s < 0) {
+		perror("failed to open orig file");
+		return false;
+	}
+
+	buf = malloc(4096);
+	if (!buf) {
+		perror("failed backup data malloc");
+		return false;
+	}
+
+	while ((l = read(s, buf, 4096)) > 0) {
+		base = buf;
+		do {
+			t = write(d, base, l);
+			if (t < 0) {
+				perror("failed backup write");
+				return false;
+			}
+			base += t;
+			l -= t;
+		} while (l);
+	}
+
+	if (l < 0) {
+		perror("failed backup read");
+		return false;
+	}
+
+	free(name);
+	free(buf);
+	close(d);
+	close(s);
+
+	return true;
+}
+
 struct objtool_file *objtool_open_read(const char *_objname)
 {
 	if (objname) {
@@ -59,8 +118,14 @@ struct objtool_file *objtool_open_read(const char *_objname)
 	if (!file.elf)
 		return NULL;
 
+	if (backup && !objtool_create_backup(objname)) {
+		WARN("can't create backup file");
+		return NULL;
+	}
+
 	INIT_LIST_HEAD(&file.insn_list);
 	hash_init(file.insn_hash);
+	INIT_LIST_HEAD(&file.retpoline_call_list);
 	INIT_LIST_HEAD(&file.static_call_list);
 	INIT_LIST_HEAD(&file.mcount_loc_list);
 	file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 738aa5021bc4..dc9b7dd314b0 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -82,12 +82,11 @@ static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi)
 }
 
 static int write_orc_entry(struct elf *elf, struct section *orc_sec,
-			   struct section *ip_rsec, unsigned int idx,
+			   struct section *ip_sec, unsigned int idx,
 			   struct section *insn_sec, unsigned long insn_off,
 			   struct orc_entry *o)
 {
 	struct orc_entry *orc;
-	struct reloc *reloc;
 
 	/* populate ORC data */
 	orc = (struct orc_entry *)orc_sec->data->d_buf + idx;
@@ -96,25 +95,9 @@ static int write_orc_entry(struct elf *elf, struct section *orc_sec,
 	orc->bp_offset = bswap_if_needed(orc->bp_offset);
 
 	/* populate reloc for ip */
-	reloc = malloc(sizeof(*reloc));
-	if (!reloc) {
-		perror("malloc");
+	if (elf_add_reloc_to_insn(elf, ip_sec, idx * sizeof(int), R_X86_64_PC32,
+				  insn_sec, insn_off))
 		return -1;
-	}
-	memset(reloc, 0, sizeof(*reloc));
-
-	insn_to_reloc_sym_addend(insn_sec, insn_off, reloc);
-	if (!reloc->sym) {
-		WARN("missing symbol for insn at offset 0x%lx",
-		     insn_off);
-		return -1;
-	}
-
-	reloc->type = R_X86_64_PC32;
-	reloc->offset = idx * sizeof(int);
-	reloc->sec = ip_rsec;
-
-	elf_add_reloc(elf, reloc);
 
 	return 0;
 }
@@ -153,7 +136,7 @@ static unsigned long alt_group_len(struct alt_group *alt_group)
 
 int orc_create(struct objtool_file *file)
 {
-	struct section *sec, *ip_rsec, *orc_sec;
+	struct section *sec, *orc_sec;
 	unsigned int nr = 0, idx = 0;
 	struct orc_list_entry *entry;
 	struct list_head orc_list;
@@ -242,20 +225,14 @@ int orc_create(struct objtool_file *file)
 	sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr);
 	if (!sec)
 		return -1;
-	ip_rsec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
-	if (!ip_rsec)
-		return -1;
 
 	/* Write ORC entries to sections: */
 	list_for_each_entry(entry, &orc_list, list) {
-		if (write_orc_entry(file->elf, orc_sec, ip_rsec, idx++,
+		if (write_orc_entry(file->elf, orc_sec, sec, idx++,
 				    entry->insn_sec, entry->insn_off,
 				    &entry->orc))
 			return -1;
 	}
 
-	if (elf_rebuild_reloc_section(file->elf, ip_rsec))
-		return -1;
-
 	return 0;
 }
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index 2c7fbda7b055..07b21cfabf5c 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -106,6 +106,14 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
 			return -1;
 		}
 
+		/*
+		 * Skip retpoline .altinstr_replacement... we already rewrite the
+		 * instructions for retpolines anyway, see arch_is_retpoline()
+		 * usage in add_{call,jump}_destinations().
+		 */
+		if (arch_is_retpoline(new_reloc->sym))
+			return 1;
+
 		alt->new_sec = new_reloc->sym->sec;
 		alt->new_off = (unsigned int)new_reloc->addend;
 
@@ -154,7 +162,9 @@ int special_get_alts(struct elf *elf, struct list_head *alts)
 			memset(alt, 0, sizeof(*alt));
 
 			ret = get_alt_entry(elf, entry, sec, idx, alt);
-			if (ret)
+			if (ret > 0)
+				continue;
+			if (ret < 0)
 				return ret;
 
 			list_add_tail(&alt->list, alts);
diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh
index 606a4b5e929f..105a291ff8e7 100755
--- a/tools/objtool/sync-check.sh
+++ b/tools/objtool/sync-check.sh
@@ -10,17 +10,21 @@ FILES="include/linux/objtool.h"
 
 if [ "$SRCARCH" = "x86" ]; then
 FILES="$FILES
+arch/x86/include/asm/nops.h
 arch/x86/include/asm/inat_types.h
 arch/x86/include/asm/orc_types.h
 arch/x86/include/asm/emulate_prefix.h
 arch/x86/lib/x86-opcode-map.txt
 arch/x86/tools/gen-insn-attr-x86.awk
 include/linux/static_call_types.h
-arch/x86/include/asm/inat.h     -I '^#include [\"<]\(asm/\)*inat_types.h[\">]'
-arch/x86/include/asm/insn.h     -I '^#include [\"<]\(asm/\)*inat.h[\">]'
-arch/x86/lib/inat.c             -I '^#include [\"<]\(../include/\)*asm/insn.h[\">]'
-arch/x86/lib/insn.c             -I '^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]' -I '^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]'
 "
+
+SYNC_CHECK_FILES='
+arch/x86/include/asm/inat.h
+arch/x86/include/asm/insn.h
+arch/x86/lib/inat.c
+arch/x86/lib/insn.c
+'
 fi
 
 check_2 () {
@@ -63,3 +67,9 @@ while read -r file_entry; do
 done <<EOF
 $FILES
 EOF
+
+if [ "$SRCARCH" = "x86" ]; then
+	for i in $SYNC_CHECK_FILES; do
+		check $i '-I "^.*\/\*.*__ignore_sync_check__.*\*\/.*$"'
+	done
+fi
diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c
index 4f75ae990140..0262b0d8ccf5 100644
--- a/tools/perf/arch/x86/tests/insn-x86.c
+++ b/tools/perf/arch/x86/tests/insn-x86.c
@@ -96,13 +96,12 @@ static int get_branch(const char *branch_str)
 static int test_data_item(struct test_data *dat, int x86_64)
 {
 	struct intel_pt_insn intel_pt_insn;
+	int op, branch, ret;
 	struct insn insn;
-	int op, branch;
 
-	insn_init(&insn, dat->data, MAX_INSN_SIZE, x86_64);
-	insn_get_length(&insn);
-
-	if (!insn_complete(&insn)) {
+	ret = insn_decode(&insn, dat->data, MAX_INSN_SIZE,
+			  x86_64 ? INSN_MODE_64 : INSN_MODE_32);
+	if (ret < 0) {
 		pr_debug("Failed to decode: %s\n", dat->asm_rep);
 		return -1;
 	}
diff --git a/tools/perf/arch/x86/util/archinsn.c b/tools/perf/arch/x86/util/archinsn.c
index 34d600c51044..546feda08428 100644
--- a/tools/perf/arch/x86/util/archinsn.c
+++ b/tools/perf/arch/x86/util/archinsn.c
@@ -11,7 +11,7 @@ void arch_fetch_insn(struct perf_sample *sample,
 		     struct machine *machine)
 {
 	struct insn insn;
-	int len;
+	int len, ret;
 	bool is64bit = false;
 
 	if (!sample->ip)
@@ -19,8 +19,9 @@ void arch_fetch_insn(struct perf_sample *sample,
 	len = thread__memcpy(thread, machine, sample->insn, sample->ip, sizeof(sample->insn), &is64bit);
 	if (len <= 0)
 		return;
-	insn_init(&insn, sample->insn, len, is64bit);
-	insn_get_length(&insn);
-	if (insn_complete(&insn) && insn.length <= len)
+
+	ret = insn_decode(&insn, sample->insn, len,
+			  is64bit ? INSN_MODE_64 : INSN_MODE_32);
+	if (ret >= 0 && insn.length <= len)
 		sample->insn_len = insn.length;
 }
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index a6420c647959..6df0dc00d73a 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -776,6 +776,12 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 		}
 	}
 
+	if (!opts->auxtrace_snapshot_mode && !opts->auxtrace_sample_mode) {
+		u32 aux_watermark = opts->auxtrace_mmap_pages * page_size / 4;
+
+		intel_pt_evsel->core.attr.aux_watermark = aux_watermark;
+	}
+
 	intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
 			     "tsc", &tsc_bit);
 
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index d49448a1060c..87cb11a7a3ee 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -289,7 +289,7 @@ static int set_tracing_pid(struct perf_ftrace *ftrace)
 
 	for (i = 0; i < perf_thread_map__nr(ftrace->evlist->core.threads); i++) {
 		scnprintf(buf, sizeof(buf), "%d",
-			  ftrace->evlist->core.threads->map[i]);
+			  perf_thread_map__pid(ftrace->evlist->core.threads, i));
 		if (append_tracing_file("set_ftrace_pid", buf) < 0)
 			return -1;
 	}
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index dded93a2bc89..07857dfb4d91 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -75,6 +75,13 @@ include/uapi/asm-generic/mman-common.h
 include/uapi/asm-generic/unistd.h
 '
 
+SYNC_CHECK_FILES='
+arch/x86/include/asm/inat.h
+arch/x86/include/asm/insn.h
+arch/x86/lib/inat.c
+arch/x86/lib/insn.c
+'
+
 # These copies are under tools/perf/trace/beauty/ as they are not used to in
 # building object files only by scripts in tools/perf/trace/beauty/ to generate
 # tables that then gets included in .c files for things like id->string syscall
@@ -129,6 +136,10 @@ for i in $FILES; do
   check $i -B
 done
 
+for i in $SYNC_CHECK_FILES; do
+  check $i '-I "^.*\/\*.*__ignore_sync_check__.*\*\/.*$"'
+done
+
 # diff with extra ignore lines
 check arch/x86/lib/memcpy_64.S        '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))"'
 check arch/x86/lib/memset_64.S        '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
@@ -137,10 +148,6 @@ check include/uapi/linux/mman.h       '-I "^#include <\(uapi/\)*asm/mman.h>"'
 check include/linux/build_bug.h       '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"'
 check include/linux/ctype.h	      '-I "isdigit("'
 check lib/ctype.c		      '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B'
-check arch/x86/include/asm/inat.h     '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"'
-check arch/x86/include/asm/insn.h     '-I "^#include [\"<]\(asm/\)*inat.h[\">]"'
-check arch/x86/lib/inat.c	      '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"'
-check arch/x86/lib/insn.c             '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"'
 
 # diff non-symmetric files
 check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
diff --git a/tools/perf/trace/beauty/tracepoints/x86_msr.sh b/tools/perf/trace/beauty/tracepoints/x86_msr.sh
index 27ee1ea1fe94..9b0614a87831 100755
--- a/tools/perf/trace/beauty/tracepoints/x86_msr.sh
+++ b/tools/perf/trace/beauty/tracepoints/x86_msr.sh
@@ -15,7 +15,7 @@ x86_msr_index=${arch_x86_header_dir}/msr-index.h
 
 printf "static const char *x86_MSRs[] = {\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x00000[[:xdigit:]]+)[[:space:]]*.*'
-egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|IA32_(TSCDEADLINE|UCODE_REV)|IDT_FCR4)' | \
+egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|IA32_(TSC_DEADLINE|UCODE_REV)|IDT_FCR4)' | \
 	sed -r "s/$regex/\2 \1/g" | sort -n | \
 	xargs printf "\t[%s] = \"%s\",\n"
 printf "};\n\n"
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 5b6ccb90b397..1b4091a3b508 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -634,7 +634,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
 		break;
 	}
 
-	if (itr)
+	if (itr && itr->parse_snapshot_options)
 		return itr->parse_snapshot_options(itr, opts, str);
 
 	pr_err("No AUX area tracing to snapshot\n");
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index f29af4fc3d09..8fca4779ae6a 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -35,7 +35,7 @@ void perf_data__close_dir(struct perf_data *data)
 int perf_data__create_dir(struct perf_data *data, int nr)
 {
 	struct perf_data_file *files = NULL;
-	int i, ret = -1;
+	int i, ret;
 
 	if (WARN_ON(!data->is_dir))
 		return -EINVAL;
@@ -51,7 +51,8 @@ int perf_data__create_dir(struct perf_data *data, int nr)
 	for (i = 0; i < nr; i++) {
 		struct perf_data_file *file = &files[i];
 
-		if (asprintf(&file->path, "%s/data.%d", data->path, i) < 0)
+		ret = asprintf(&file->path, "%s/data.%d", data->path, i);
+		if (ret < 0)
 			goto out_err;
 
 		ret = open(file->path, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR);
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 2f6cc7eea251..593f20e9774c 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -169,11 +169,13 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
 		      struct intel_pt_insn *intel_pt_insn)
 {
 	struct insn insn;
+	int ret;
 
-	insn_init(&insn, buf, len, x86_64);
-	insn_get_length(&insn);
-	if (!insn_complete(&insn) || insn.length > len)
+	ret = insn_decode(&insn, buf, len,
+			  x86_64 ? INSN_MODE_64 : INSN_MODE_32);
+	if (ret < 0 || insn.length > len)
 		return -1;
+
 	intel_pt_insn_decoder(&insn, intel_pt_insn);
 	if (insn.length < INTEL_PT_INSN_BUF_SZ)
 		memcpy(intel_pt_insn->buf, buf, insn.length);
@@ -194,12 +196,13 @@ const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
 		      u8 *inbuf, int inlen, int *lenp)
 {
 	struct insn insn;
-	int n, i;
+	int n, i, ret;
 	int left;
 
-	insn_init(&insn, inbuf, inlen, x->is64bit);
-	insn_get_length(&insn);
-	if (!insn_complete(&insn) || insn.length > inlen)
+	ret = insn_decode(&insn, inbuf, inlen,
+			  x->is64bit ? INSN_MODE_64 : INSN_MODE_32);
+
+	if (ret < 0 || insn.length > inlen)
 		return "<bad>";
 	if (lenp)
 		*lenp = insn.length;
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index fbc40a2c17d4..8af693d9678c 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -840,15 +840,18 @@ out:
 int maps__clone(struct thread *thread, struct maps *parent)
 {
 	struct maps *maps = thread->maps;
-	int err = -ENOMEM;
+	int err;
 	struct map *map;
 
 	down_read(&parent->lock);
 
 	maps__for_each_entry(parent, map) {
 		struct map *new = map__clone(map);
-		if (new == NULL)
+
+		if (new == NULL) {
+			err = -ENOMEM;
 			goto out_unlock;
+		}
 
 		err = unwind__prepare_access(maps, new, NULL);
 		if (err)
diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c
index 9ea2c0aeb86c..185b8c588e1d 100644
--- a/tools/power/acpi/common/cmfsize.c
+++ b/tools/power/acpi/common/cmfsize.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
 /******************************************************************************
  *
- * Module Name: cfsize - Common get file size function
+ * Module Name: cmfsize - Common get file size function
  *
  * Copyright (C) 2000 - 2021, Intel Corp.
  *
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index 81f4b8abbdf7..ffd50953a024 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -6819,7 +6819,7 @@ if __name__ == '__main__':
 			sysvals.outdir = val
 			sysvals.notestrun = True
 			if(os.path.isdir(val) == False):
-				doError('%s is not accesible' % val)
+				doError('%s is not accessible' % val)
 		elif(arg == '-filter'):
 			try:
 				val = next(args)
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index 582feb88eca3..ab940c508ef0 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,7 @@ struct process_cmd_struct {
 	int arg;
 };
 
-static const char *version_str = "v1.8";
+static const char *version_str = "v1.9";
 static const int supported_api_ver = 1;
 static struct isst_if_platform_info isst_platform_info;
 static char *progname;
@@ -381,6 +381,18 @@ static void set_cpu_online_offline(int cpu, int state)
 	close(fd);
 }
 
+static void force_all_cpus_online(void)
+{
+	int i;
+
+	fprintf(stderr, "Forcing all CPUs online\n");
+
+	for (i = 0; i < topo_max_cpus; ++i)
+		set_cpu_online_offline(i, 1);
+
+	unlink("/var/run/isst_cpu_topology.dat");
+}
+
 #define MAX_PACKAGE_COUNT 8
 #define MAX_DIE_PER_PACKAGE 2
 static void for_each_online_package_in_set(void (*callback)(int, void *, void *,
@@ -959,6 +971,10 @@ static void isst_print_extended_platform_info(void)
 		fprintf(outf, "Intel(R) SST-BF (feature base-freq) is not supported\n");
 
 	ret = isst_read_pm_config(i, &cp_state, &cp_cap);
+	if (ret) {
+		fprintf(outf, "Intel(R) SST-CP (feature core-power) status is unknown\n");
+		return;
+	}
 	if (cp_cap)
 		fprintf(outf, "Intel(R) SST-CP (feature core-power) is supported\n");
 	else
@@ -2763,6 +2779,7 @@ static void usage(void)
 	printf("\t[-f|--format] : output format [json|text]. Default: text\n");
 	printf("\t[-h|--help] : Print help\n");
 	printf("\t[-i|--info] : Print platform information\n");
+	printf("\t[-a|--all-cpus-online] : Force online every CPU in the system\n");
 	printf("\t[-o|--out] : Output file\n");
 	printf("\t\t\tDefault : stderr\n");
 	printf("\t[-p|--pause] : Delay between two mail box commands in milliseconds\n");
@@ -2791,7 +2808,6 @@ static void usage(void)
 static void print_version(void)
 {
 	fprintf(outf, "Version %s\n", version_str);
-	fprintf(outf, "Build date %s time %s\n", __DATE__, __TIME__);
 	exit(0);
 }
 
@@ -2800,11 +2816,12 @@ static void cmdline(int argc, char **argv)
 	const char *pathname = "/dev/isst_interface";
 	char *ptr;
 	FILE *fp;
-	int opt;
+	int opt, force_cpus_online = 0;
 	int option_index = 0;
 	int ret;
 
 	static struct option long_options[] = {
+		{ "all-cpus-online", no_argument, 0, 'a' },
 		{ "cpu", required_argument, 0, 'c' },
 		{ "debug", no_argument, 0, 'd' },
 		{ "format", required_argument, 0, 'f' },
@@ -2840,9 +2857,12 @@ static void cmdline(int argc, char **argv)
 	}
 
 	progname = argv[0];
-	while ((opt = getopt_long_only(argc, argv, "+c:df:hio:v", long_options,
+	while ((opt = getopt_long_only(argc, argv, "+c:df:hio:va", long_options,
 				       &option_index)) != -1) {
 		switch (opt) {
+		case 'a':
+			force_cpus_online = 1;
+			break;
 		case 'c':
 			parse_cpu_command(optarg);
 			break;
@@ -2892,6 +2912,8 @@ static void cmdline(int argc, char **argv)
 		exit(0);
 	}
 	set_max_cpu_num();
+	if (force_cpus_online)
+		force_all_cpus_online();
 	store_cpu_topology();
 	set_cpu_present_cpu_mask();
 	set_cpu_target_cpu_mask();
diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c
index 8e54ce47648e..3bf1820c0da1 100644
--- a/tools/power/x86/intel-speed-select/isst-display.c
+++ b/tools/power/x86/intel-speed-select/isst-display.c
@@ -25,10 +25,14 @@ static void printcpulist(int str_len, char *str, int mask_size,
 			index = snprintf(&str[curr_index],
 					 str_len - curr_index, ",");
 			curr_index += index;
+			if (curr_index >= str_len)
+				break;
 		}
 		index = snprintf(&str[curr_index], str_len - curr_index, "%d",
 				 i);
 		curr_index += index;
+		if (curr_index >= str_len)
+			break;
 		first = 0;
 	}
 }
@@ -64,10 +68,14 @@ static void printcpumask(int str_len, char *str, int mask_size,
 		index = snprintf(&str[curr_index], str_len - curr_index, "%08x",
 				 mask[i]);
 		curr_index += index;
+		if (curr_index >= str_len)
+			break;
 		if (i) {
 			strncat(&str[curr_index], ",", str_len - curr_index);
 			curr_index++;
 		}
+		if (curr_index >= str_len)
+			break;
 	}
 
 	free(mask);
@@ -185,7 +193,7 @@ static void _isst_pbf_display_information(int cpu, FILE *outf, int level,
 					  int disp_level)
 {
 	char header[256];
-	char value[256];
+	char value[512];
 
 	snprintf(header, sizeof(header), "speed-select-base-freq-properties");
 	format_and_print(outf, disp_level, header, NULL);
@@ -349,7 +357,7 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
 				   struct isst_pkg_ctdp *pkg_dev)
 {
 	char header[256];
-	char value[256];
+	char value[512];
 	static int level;
 	int i;
 
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index a7c4f0772e53..5939615265f1 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2449,7 +2449,7 @@ dump_knl_turbo_ratio_limits(void)
 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
 		base_cpu, msr);
 
-	/**
+	/*
 	 * Turbo encoding in KNL is as follows:
 	 * [0] -- Reserved
 	 * [7:1] -- Base value of number of active cores of bucket 1.
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index 91130648d8e6..25adfec2cb39 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -94,7 +94,8 @@ endif
 # in newer systems.
 # Needed for the __raw_cmpxchg in tools/arch/x86/include/asm/cmpxchg.h
 #
-# See https://lkml.org/lkml/2006/11/28/253 and https://gcc.gnu.org/gcc-4.8/changes.html,
+# See https://lore.kernel.org/lkml/9a8748490611281710g78402fbeh8ff7fcc162dbcbca@mail.gmail.com/
+# and https://gcc.gnu.org/gcc-4.8/changes.html,
 # that takes into account Linus's comments (search for Wshadow) for the reasoning about
 # -Wshadow not being interesting before gcc 4.8.
 
diff --git a/tools/spi/Makefile b/tools/spi/Makefile
index ada881afb489..0aa6dbd31fb8 100644
--- a/tools/spi/Makefile
+++ b/tools/spi/Makefile
@@ -25,11 +25,12 @@ include $(srctree)/tools/build/Makefile.include
 #
 # We need the following to be outside of kernel tree
 #
-$(OUTPUT)include/linux/spi/spidev.h: ../../include/uapi/linux/spi/spidev.h
+$(OUTPUT)include/linux/spi: ../../include/uapi/linux/spi
 	mkdir -p $(OUTPUT)include/linux/spi 2>&1 || true
 	ln -sf $(CURDIR)/../../include/uapi/linux/spi/spidev.h $@
+	ln -sf $(CURDIR)/../../include/uapi/linux/spi/spi.h $@
 
-prepare: $(OUTPUT)include/linux/spi/spidev.h
+prepare: $(OUTPUT)include/linux/spi
 
 #
 # spidev_test
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index d5144fcb03ac..5da8fb3762f9 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -184,7 +184,9 @@ def add_common_opts(parser) -> None:
 			    help='Run all KUnit tests through allyesconfig',
 			    action='store_true')
 	parser.add_argument('--kunitconfig',
-			     help='Path to Kconfig fragment that enables KUnit tests',
+			     help='Path to Kconfig fragment that enables KUnit tests.'
+			     ' If given a directory, (e.g. lib/kunit), "/.kunitconfig" '
+			     'will get  automatically appended.',
 			     metavar='kunitconfig')
 
 def add_build_opts(parser) -> None:
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index f309a33256cd..89a7d4024e87 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -132,6 +132,8 @@ class LinuxSourceTree(object):
 			return
 
 		if kunitconfig_path:
+			if os.path.isdir(kunitconfig_path):
+				kunitconfig_path = os.path.join(kunitconfig_path, KUNITCONFIG_PATH)
 			if not os.path.exists(kunitconfig_path):
 				raise ConfigError(f'Specified kunitconfig ({kunitconfig_path}) does not exist')
 		else:
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 1ad3049e9069..2e809dd956a7 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -251,6 +251,12 @@ class LinuxSourceTreeTest(unittest.TestCase):
 		with tempfile.NamedTemporaryFile('wt') as kunitconfig:
 			tree = kunit_kernel.LinuxSourceTree('', kunitconfig_path=kunitconfig.name)
 
+	def test_dir_kunitconfig(self):
+		with tempfile.TemporaryDirectory('') as dir:
+			with open(os.path.join(dir, '.kunitconfig'), 'w') as f:
+				pass
+			tree = kunit_kernel.LinuxSourceTree('', kunitconfig_path=dir)
+
 	# TODO: add more test cases.
 
 
diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index 2c9d012797a7..ced910fb4019 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -4,7 +4,7 @@
 ARCH ?= $(shell uname -m 2>/dev/null || echo not)
 
 ifneq (,$(filter $(ARCH),aarch64 arm64))
-ARM64_SUBTARGETS ?= tags signal pauth fp mte
+ARM64_SUBTARGETS ?= tags signal pauth fp mte bti
 else
 ARM64_SUBTARGETS :=
 endif
diff --git a/tools/testing/selftests/arm64/bti/.gitignore b/tools/testing/selftests/arm64/bti/.gitignore
new file mode 100644
index 000000000000..73869fabada4
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/.gitignore
@@ -0,0 +1,2 @@
+btitest
+nobtitest
diff --git a/tools/testing/selftests/arm64/bti/Makefile b/tools/testing/selftests/arm64/bti/Makefile
new file mode 100644
index 000000000000..73e013c082a6
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/Makefile
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_PROGS := btitest nobtitest
+
+PROGS := $(patsubst %,gen/%,$(TEST_GEN_PROGS))
+
+# These tests are built as freestanding binaries since otherwise BTI
+# support in ld.so is required which is not currently widespread; when
+# it is available it will still be useful to test this separately as the
+# cases for statically linked and dynamically lined binaries are
+# slightly different.
+
+CFLAGS_NOBTI = -DBTI=0
+CFLAGS_BTI = -mbranch-protection=standard -DBTI=1
+
+CFLAGS_COMMON = -ffreestanding -Wall -Wextra $(CFLAGS)
+
+BTI_CC_COMMAND = $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -c -o $@ $<
+NOBTI_CC_COMMAND = $(CC) $(CFLAGS_NOBTI) $(CFLAGS_COMMON) -c -o $@ $<
+
+%-bti.o: %.c
+	$(BTI_CC_COMMAND)
+
+%-bti.o: %.S
+	$(BTI_CC_COMMAND)
+
+%-nobti.o: %.c
+	$(NOBTI_CC_COMMAND)
+
+%-nobti.o: %.S
+	$(NOBTI_CC_COMMAND)
+
+BTI_OBJS =                                      \
+	test-bti.o                           \
+	signal-bti.o                            \
+	start-bti.o                             \
+	syscall-bti.o                           \
+	system-bti.o                            \
+	teststubs-bti.o                         \
+	trampoline-bti.o
+gen/btitest: $(BTI_OBJS)
+	$(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^
+
+NOBTI_OBJS =                                    \
+	test-nobti.o                         \
+	signal-nobti.o                          \
+	start-nobti.o                           \
+	syscall-nobti.o                         \
+	system-nobti.o                          \
+	teststubs-nobti.o                       \
+	trampoline-nobti.o
+gen/nobtitest: $(NOBTI_OBJS)
+	$(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^
+
+# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list
+# to account for any OUTPUT target-dirs optionally provided by
+# the toplevel makefile
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): $(PROGS)
+	cp $(PROGS) $(OUTPUT)/
diff --git a/tools/testing/selftests/arm64/bti/assembler.h b/tools/testing/selftests/arm64/bti/assembler.h
new file mode 100644
index 000000000000..04e7b72880ef
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/assembler.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef ASSEMBLER_H
+#define ASSEMBLER_H
+
+#define NT_GNU_PROPERTY_TYPE_0	5
+#define GNU_PROPERTY_AARCH64_FEATURE_1_AND	0xc0000000
+
+/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */
+#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI	(1U << 0)
+#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC	(1U << 1)
+
+
+.macro startfn name:req
+	.globl \name
+\name:
+	.macro endfn
+		.size \name, . - \name
+		.type \name, @function
+		.purgem endfn
+	.endm
+.endm
+
+.macro emit_aarch64_feature_1_and
+	.pushsection .note.gnu.property, "a"
+	.align	3
+	.long	2f - 1f
+	.long	6f - 3f
+	.long	NT_GNU_PROPERTY_TYPE_0
+1:	.string	"GNU"
+2:
+	.align	3
+3:	.long	GNU_PROPERTY_AARCH64_FEATURE_1_AND
+	.long	5f - 4f
+4:
+#if BTI
+	.long	GNU_PROPERTY_AARCH64_FEATURE_1_PAC | \
+		GNU_PROPERTY_AARCH64_FEATURE_1_BTI
+#else
+	.long	0
+#endif
+5:
+	.align	3
+6:
+	.popsection
+.endm
+
+.macro paciasp
+	hint	0x19
+.endm
+
+.macro autiasp
+	hint	0x1d
+.endm
+
+.macro __bti_
+	hint	0x20
+.endm
+
+.macro __bti_c
+	hint	0x22
+.endm
+
+.macro __bti_j
+	hint	0x24
+.endm
+
+.macro __bti_jc
+	hint	0x26
+.endm
+
+.macro bti what=
+	__bti_\what
+.endm
+
+#endif /* ! ASSEMBLER_H */
diff --git a/tools/testing/selftests/arm64/bti/btitest.h b/tools/testing/selftests/arm64/bti/btitest.h
new file mode 100644
index 000000000000..2aff9b10336e
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/btitest.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef BTITEST_H
+#define BTITEST_H
+
+/* Trampolines for calling the test stubs: */
+void call_using_br_x0(void (*)(void));
+void call_using_br_x16(void (*)(void));
+void call_using_blr(void (*)(void));
+
+/* Test stubs: */
+void nohint_func(void);
+void bti_none_func(void);
+void bti_c_func(void);
+void bti_j_func(void);
+void bti_jc_func(void);
+void paciasp_func(void);
+
+#endif /* !BTITEST_H */
diff --git a/tools/testing/selftests/arm64/bti/compiler.h b/tools/testing/selftests/arm64/bti/compiler.h
new file mode 100644
index 000000000000..ebb6204f447a
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/compiler.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef COMPILER_H
+#define COMPILER_H
+
+#define __always_unused __attribute__((__unused__))
+#define __noreturn __attribute__((__noreturn__))
+#define __unreachable() __builtin_unreachable()
+
+/* curse(e) has value e, but the compiler cannot assume so */
+#define curse(e) ({				\
+	__typeof__(e) __curse_e = (e);		\
+	asm ("" : "+r" (__curse_e));		\
+	__curse_e;				\
+})
+
+#endif /* ! COMPILER_H */
diff --git a/tools/testing/selftests/arm64/bti/gen/.gitignore b/tools/testing/selftests/arm64/bti/gen/.gitignore
new file mode 100644
index 000000000000..73869fabada4
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/gen/.gitignore
@@ -0,0 +1,2 @@
+btitest
+nobtitest
diff --git a/tools/testing/selftests/arm64/bti/signal.c b/tools/testing/selftests/arm64/bti/signal.c
new file mode 100644
index 000000000000..f3fd29b91141
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/signal.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "system.h"
+#include "signal.h"
+
+int sigemptyset(sigset_t *s)
+{
+	unsigned int i;
+
+	for (i = 0; i < _NSIG_WORDS; ++i)
+		s->sig[i] = 0;
+
+	return 0;
+}
+
+int sigaddset(sigset_t *s, int n)
+{
+	if (n < 1 || n > _NSIG)
+		return -EINVAL;
+
+	s->sig[(n - 1) / _NSIG_BPW] |= 1UL << (n - 1) % _NSIG_BPW;
+	return 0;
+}
+
+int sigaction(int n, struct sigaction *sa, const struct sigaction *old)
+{
+	return syscall(__NR_rt_sigaction, n, sa, old, sizeof(sa->sa_mask));
+}
+
+int sigprocmask(int how, const sigset_t *mask, sigset_t *old)
+{
+	return syscall(__NR_rt_sigprocmask, how, mask, old, sizeof(*mask));
+}
diff --git a/tools/testing/selftests/arm64/bti/signal.h b/tools/testing/selftests/arm64/bti/signal.h
new file mode 100644
index 000000000000..103457dc880e
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/signal.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef SIGNAL_H
+#define SIGNAL_H
+
+#include <linux/signal.h>
+
+#include "system.h"
+
+typedef __sighandler_t sighandler_t;
+
+int sigemptyset(sigset_t *s);
+int sigaddset(sigset_t *s, int n);
+int sigaction(int n, struct sigaction *sa, const struct sigaction *old);
+int sigprocmask(int how, const sigset_t *mask, sigset_t *old);
+
+#endif /* ! SIGNAL_H */
diff --git a/tools/testing/selftests/arm64/bti/start.S b/tools/testing/selftests/arm64/bti/start.S
new file mode 100644
index 000000000000..831f952e0572
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/start.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "assembler.h"
+
+startfn _start
+	mov	x0, sp
+	b	start
+endfn
+
+emit_aarch64_feature_1_and
diff --git a/tools/testing/selftests/arm64/bti/syscall.S b/tools/testing/selftests/arm64/bti/syscall.S
new file mode 100644
index 000000000000..8dde8b6f3db1
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/syscall.S
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "assembler.h"
+
+startfn syscall
+	bti	c
+	mov	w8, w0
+	mov	x0, x1
+	mov	x1, x2
+	mov	x2, x3
+	mov	x3, x4
+	mov	x4, x5
+	mov	x5, x6
+	mov	x6, x7
+	svc	#0
+	ret
+endfn
+
+emit_aarch64_feature_1_and
diff --git a/tools/testing/selftests/arm64/bti/system.c b/tools/testing/selftests/arm64/bti/system.c
new file mode 100644
index 000000000000..6385d8d4973b
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/system.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "system.h"
+
+#include <asm/unistd.h>
+
+#include "compiler.h"
+
+void __noreturn exit(int n)
+{
+	syscall(__NR_exit, n);
+	__unreachable();
+}
+
+ssize_t write(int fd, const void *buf, size_t size)
+{
+	return syscall(__NR_write, fd, buf, size);
+}
diff --git a/tools/testing/selftests/arm64/bti/system.h b/tools/testing/selftests/arm64/bti/system.h
new file mode 100644
index 000000000000..aca118589705
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/system.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef SYSTEM_H
+#define SYSTEM_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+
+typedef __kernel_size_t size_t;
+typedef __kernel_ssize_t ssize_t;
+
+#include <linux/errno.h>
+#include <asm/hwcap.h>
+#include <asm/ptrace.h>
+#include <asm/unistd.h>
+
+#include "compiler.h"
+
+long syscall(int nr, ...);
+
+void __noreturn exit(int n);
+ssize_t write(int fd, const void *buf, size_t size);
+
+#endif /* ! SYSTEM_H */
diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c
new file mode 100644
index 000000000000..656b04976ccc
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/test.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019,2021  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "system.h"
+
+#include <linux/errno.h>
+#include <linux/auxvec.h>
+#include <linux/signal.h>
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+
+typedef struct ucontext ucontext_t;
+
+#include "btitest.h"
+#include "compiler.h"
+#include "signal.h"
+
+#define EXPECTED_TESTS 18
+
+static volatile unsigned int test_num = 1;
+static unsigned int test_passed;
+static unsigned int test_failed;
+static unsigned int test_skipped;
+
+static void fdputs(int fd, const char *str)
+{
+	size_t len = 0;
+	const char *p = str;
+
+	while (*p++)
+		++len;
+
+	write(fd, str, len);
+}
+
+static void putstr(const char *str)
+{
+	fdputs(1, str);
+}
+
+static void putnum(unsigned int num)
+{
+	char c;
+
+	if (num / 10)
+		putnum(num / 10);
+
+	c = '0' + (num % 10);
+	write(1, &c, 1);
+}
+
+#define puttestname(test_name, trampoline_name) do {	\
+	putstr(test_name);				\
+	putstr("/");					\
+	putstr(trampoline_name);			\
+} while (0)
+
+void print_summary(void)
+{
+	putstr("# Totals: pass:");
+	putnum(test_passed);
+	putstr(" fail:");
+	putnum(test_failed);
+	putstr(" xfail:0 xpass:0 skip:");
+	putnum(test_skipped);
+	putstr(" error:0\n");
+}
+
+static const char *volatile current_test_name;
+static const char *volatile current_trampoline_name;
+static volatile int sigill_expected, sigill_received;
+
+static void handler(int n, siginfo_t *si __always_unused,
+		    void *uc_ __always_unused)
+{
+	ucontext_t *uc = uc_;
+
+	putstr("# \t[SIGILL in ");
+	puttestname(current_test_name, current_trampoline_name);
+	putstr(", BTYPE=");
+	write(1, &"00011011"[((uc->uc_mcontext.pstate & PSR_BTYPE_MASK)
+			      >> PSR_BTYPE_SHIFT) * 2], 2);
+	if (!sigill_expected) {
+		putstr("]\n");
+		putstr("not ok ");
+		putnum(test_num);
+		putstr(" ");
+		puttestname(current_test_name, current_trampoline_name);
+		putstr("(unexpected SIGILL)\n");
+		print_summary();
+		exit(128 + n);
+	}
+
+	putstr(" (expected)]\n");
+	sigill_received = 1;
+	/* zap BTYPE so that resuming the faulting code will work */
+	uc->uc_mcontext.pstate &= ~PSR_BTYPE_MASK;
+}
+
+static int skip_all;
+
+static void __do_test(void (*trampoline)(void (*)(void)),
+		      void (*fn)(void),
+		      const char *trampoline_name,
+		      const char *name,
+		      int expect_sigill)
+{
+	if (skip_all) {
+		test_skipped++;
+		putstr("ok ");
+		putnum(test_num);
+		putstr(" ");
+		puttestname(name, trampoline_name);
+		putstr(" # SKIP\n");
+
+		return;
+	}
+
+	/* Branch Target exceptions should only happen in BTI binaries: */
+	if (!BTI)
+		expect_sigill = 0;
+
+	sigill_expected = expect_sigill;
+	sigill_received = 0;
+	current_test_name = name;
+	current_trampoline_name = trampoline_name;
+
+	trampoline(fn);
+
+	if (expect_sigill && !sigill_received) {
+		putstr("not ok ");
+		test_failed++;
+	} else {
+		putstr("ok ");
+		test_passed++;
+	}
+	putnum(test_num++);
+	putstr(" ");
+	puttestname(name, trampoline_name);
+	putstr("\n");
+}
+
+#define do_test(expect_sigill_br_x0,					\
+		expect_sigill_br_x16,					\
+		expect_sigill_blr,					\
+		name)							\
+do {									\
+	__do_test(call_using_br_x0, name, "call_using_br_x0", #name,	\
+		  expect_sigill_br_x0);					\
+	__do_test(call_using_br_x16, name, "call_using_br_x16", #name,	\
+		  expect_sigill_br_x16);				\
+	__do_test(call_using_blr, name, "call_using_blr", #name,	\
+		  expect_sigill_blr);					\
+} while (0)
+
+void start(int *argcp)
+{
+	struct sigaction sa;
+	void *const *p;
+	const struct auxv_entry {
+		unsigned long type;
+		unsigned long val;
+	} *auxv;
+	unsigned long hwcap = 0, hwcap2 = 0;
+
+	putstr("TAP version 13\n");
+	putstr("1..");
+	putnum(EXPECTED_TESTS);
+	putstr("\n");
+
+	/* Gross hack for finding AT_HWCAP2 from the initial process stack: */
+	p = (void *const *)argcp + 1 + *argcp + 1; /* start of environment */
+	/* step over environment */
+	while (*p++)
+		;
+	for (auxv = (const struct auxv_entry *)p; auxv->type != AT_NULL; ++auxv) {
+		switch (auxv->type) {
+		case AT_HWCAP:
+			hwcap = auxv->val;
+			break;
+		case AT_HWCAP2:
+			hwcap2 = auxv->val;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (hwcap & HWCAP_PACA)
+		putstr("# HWCAP_PACA present\n");
+	else
+		putstr("# HWCAP_PACA not present\n");
+
+	if (hwcap2 & HWCAP2_BTI) {
+		putstr("# HWCAP2_BTI present\n");
+		if (!(hwcap & HWCAP_PACA))
+			putstr("# Bad hardware?  Expect problems.\n");
+	} else {
+		putstr("# HWCAP2_BTI not present\n");
+		skip_all = 1;
+	}
+
+	putstr("# Test binary");
+	if (!BTI)
+		putstr(" not");
+	putstr(" built for BTI\n");
+
+	sa.sa_handler = (sighandler_t)(void *)handler;
+	sa.sa_flags = SA_SIGINFO;
+	sigemptyset(&sa.sa_mask);
+	sigaction(SIGILL, &sa, NULL);
+	sigaddset(&sa.sa_mask, SIGILL);
+	sigprocmask(SIG_UNBLOCK, &sa.sa_mask, NULL);
+
+	do_test(1, 1, 1, nohint_func);
+	do_test(1, 1, 1, bti_none_func);
+	do_test(1, 0, 0, bti_c_func);
+	do_test(0, 0, 1, bti_j_func);
+	do_test(0, 0, 0, bti_jc_func);
+	do_test(1, 0, 0, paciasp_func);
+
+	print_summary();
+
+	if (test_num - 1 != EXPECTED_TESTS)
+		putstr("# WARNING - EXPECTED TEST COUNT WRONG\n");
+
+	if (test_failed)
+		exit(1);
+	else
+		exit(0);
+}
diff --git a/tools/testing/selftests/arm64/bti/teststubs.S b/tools/testing/selftests/arm64/bti/teststubs.S
new file mode 100644
index 000000000000..b62c8c35f67e
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/teststubs.S
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "assembler.h"
+
+startfn bti_none_func
+	bti
+	ret
+endfn
+
+startfn bti_c_func
+	bti	c
+	ret
+endfn
+
+startfn bti_j_func
+	bti	j
+	ret
+endfn
+
+startfn bti_jc_func
+	bti	jc
+	ret
+endfn
+
+startfn paciasp_func
+	paciasp
+	autiasp
+	ret
+endfn
+
+startfn nohint_func
+	ret
+endfn
+
+emit_aarch64_feature_1_and
diff --git a/tools/testing/selftests/arm64/bti/trampoline.S b/tools/testing/selftests/arm64/bti/trampoline.S
new file mode 100644
index 000000000000..09beb3f361f1
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/trampoline.S
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "assembler.h"
+
+startfn call_using_br_x0
+	bti	c
+	br	x0
+endfn
+
+startfn call_using_br_x16
+	bti	c
+	mov	x16, x0
+	br	x16
+endfn
+
+startfn call_using_blr
+	paciasp
+	stp	x29, x30, [sp, #-16]!
+	blr	x0
+	ldp	x29, x30, [sp], #16
+	autiasp
+	ret
+endfn
+
+emit_aarch64_feature_1_and
diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile
index 0b3af552632a..409e3e53d00a 100644
--- a/tools/testing/selftests/arm64/mte/Makefile
+++ b/tools/testing/selftests/arm64/mte/Makefile
@@ -1,14 +1,18 @@
 # SPDX-License-Identifier: GPL-2.0
 # Copyright (C) 2020 ARM Limited
 
-CFLAGS += -std=gnu99 -I. -lpthread
+# preserve CC value from top level Makefile
+ifeq ($(CC),cc)
+CC := $(CROSS_COMPILE)gcc
+endif
+
+CFLAGS += -std=gnu99 -I. -pthread
+LDFLAGS += -pthread
 SRCS := $(filter-out mte_common_util.c,$(wildcard *.c))
 PROGS := $(patsubst %.c,%,$(SRCS))
 
 #Add mte compiler option
-ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep gcc),)
 CFLAGS += -march=armv8.5-a+memtag
-endif
 
 #check if the compiler works well
 mte_cc_support := $(shell if ($(CC) $(CFLAGS) -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
@@ -19,11 +23,14 @@ TEST_GEN_PROGS := $(PROGS)
 
 # Get Kernel headers installed and use them.
 KSFT_KHDR_INSTALL := 1
+else
+    $(warning compiler "$(CC)" does not support the ARMv8.5 MTE extension.)
+    $(warning test program "mte" will not be created.)
 endif
 
 # Include KSFT lib.mk.
 include ../../lib.mk
 
 ifeq ($(mte_cc_support),1)
-$(TEST_GEN_PROGS): mte_common_util.c mte_common_util.h mte_helper.S
+$(TEST_GEN_PROGS): mte_common_util.c mte_helper.S
 endif
diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c
index 3b23c4d61d38..88c74bc46d4f 100644
--- a/tools/testing/selftests/arm64/mte/check_ksm_options.c
+++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c
@@ -33,7 +33,10 @@ static unsigned long read_sysfs(char *str)
 		ksft_print_msg("ERR: missing %s\n", str);
 		return 0;
 	}
-	fscanf(f, "%lu", &val);
+	if (fscanf(f, "%lu", &val) != 1) {
+		ksft_print_msg("ERR: parsing %s\n", str);
+		val = 0;
+	}
 	fclose(f);
 	return val;
 }
diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index 4bfa80f2a8c3..1de7a0abd0ae 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -33,7 +33,8 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping)
 	if (fd == -1)
 		return KSFT_FAIL;
 	for (i = 0; i < len; i++)
-		write(fd, &val, sizeof(val));
+		if (write(fd, &val, sizeof(val)) != sizeof(val))
+			return KSFT_FAIL;
 	lseek(fd, 0, 0);
 	ptr = mte_allocate_memory(len, mem_type, mapping, true);
 	if (check_allocated_memory(ptr, len, mem_type, true) != KSFT_PASS) {
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
index 39f8908988ea..f50ac31920d1 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.c
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -181,10 +181,17 @@ void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags
 	}
 	/* Initialize the file for mappable size */
 	lseek(fd, 0, SEEK_SET);
-	for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE)
-		write(fd, buffer, INIT_BUFFER_SIZE);
+	for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE) {
+		if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) {
+			perror("initialising buffer");
+			return NULL;
+		}
+	}
 	index -= INIT_BUFFER_SIZE;
-	write(fd, buffer, size - index);
+	if (write(fd, buffer, size - index) != size - index) {
+		perror("initialising buffer");
+		return NULL;
+	}
 	return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd);
 }
 
@@ -202,9 +209,15 @@ void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping,
 	/* Initialize the file for mappable size */
 	lseek(fd, 0, SEEK_SET);
 	for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE)
-		write(fd, buffer, INIT_BUFFER_SIZE);
+		if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) {
+			perror("initialising buffer");
+			return NULL;
+		}
 	index -= INIT_BUFFER_SIZE;
-	write(fd, buffer, map_size - index);
+	if (write(fd, buffer, map_size - index) != map_size - index) {
+		perror("initialising buffer");
+		return NULL;
+	}
 	return __mte_allocate_memory_range(size, mem_type, mapping, range_before,
 					   range_after, true, fd);
 }
@@ -271,29 +284,20 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask)
 
 	en |= (incl_mask << PR_MTE_TAG_SHIFT);
 	/* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */
-	if (!prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) == 0) {
+	if (prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) != 0) {
 		ksft_print_msg("FAIL:prctl PR_SET_TAGGED_ADDR_CTRL for mte mode\n");
 		return -EINVAL;
 	}
 	return 0;
 }
 
-#define ID_AA64PFR1_MTE_SHIFT		8
-#define ID_AA64PFR1_MTE			2
-
 int mte_default_setup(void)
 {
-	unsigned long hwcaps = getauxval(AT_HWCAP);
+	unsigned long hwcaps2 = getauxval(AT_HWCAP2);
 	unsigned long en = 0;
 	int ret;
 
-	if (!(hwcaps & HWCAP_CPUID)) {
-		ksft_print_msg("FAIL: CPUID registers unavailable\n");
-		return KSFT_FAIL;
-	}
-	/* Read ID_AA64PFR1_EL1 register */
-	asm volatile("mrs %0, id_aa64pfr1_el1" : "=r"(hwcaps) : : "memory");
-	if (((hwcaps >> ID_AA64PFR1_MTE_SHIFT) & MT_TAG_MASK) != ID_AA64PFR1_MTE) {
+	if (!(hwcaps2 & HWCAP2_MTE)) {
 		ksft_print_msg("FAIL: MTE features unavailable\n");
 		return KSFT_SKIP;
 	}
@@ -333,6 +337,7 @@ int create_temp_file(void)
 	/* Create a file in the tmpfs filesystem */
 	fd = mkstemp(&filename[0]);
 	if (fd == -1) {
+		perror(filename);
 		ksft_print_msg("FAIL: Unable to open temporary file\n");
 		return 0;
 	}
diff --git a/tools/testing/selftests/firmware/fw_namespace.c b/tools/testing/selftests/firmware/fw_namespace.c
index 5ebc1aec7923..0e393cb5f42d 100644
--- a/tools/testing/selftests/firmware/fw_namespace.c
+++ b/tools/testing/selftests/firmware/fw_namespace.c
@@ -95,7 +95,7 @@ static bool test_fw_in_ns(const char *fw_name, const char *sys_path, bool block_
 		}
 		if (block_fw_in_parent_ns)
 			umount("/lib/firmware");
-		return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+		return WEXITSTATUS(status) == EXIT_SUCCESS;
 	}
 
 	if (unshare(CLONE_NEWNS) != 0) {
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 9a41d8bb9ff1..0af84ad48aa7 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -78,7 +78,8 @@ ifdef building_out_of_srctree
 		rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT); \
 	fi
 	@if [ "X$(TEST_PROGS)" != "X" ]; then \
-		$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS)) ; \
+		$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) \
+				  $(addprefix $(OUTPUT)/,$(TEST_PROGS))) ; \
 	else \
 		$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS)); \
 	fi
diff --git a/tools/testing/selftests/lkdtm/.gitignore b/tools/testing/selftests/lkdtm/.gitignore
index f26212605b6b..d4b0be857deb 100644
--- a/tools/testing/selftests/lkdtm/.gitignore
+++ b/tools/testing/selftests/lkdtm/.gitignore
@@ -1,2 +1,3 @@
 *.sh
 !run.sh
+!stack-entropy.sh
diff --git a/tools/testing/selftests/lkdtm/Makefile b/tools/testing/selftests/lkdtm/Makefile
index 1bcc9ee990eb..c71109ceeb2d 100644
--- a/tools/testing/selftests/lkdtm/Makefile
+++ b/tools/testing/selftests/lkdtm/Makefile
@@ -5,6 +5,7 @@ include ../lib.mk
 
 # NOTE: $(OUTPUT) won't get default value if used before lib.mk
 TEST_FILES := tests.txt
+TEST_PROGS := stack-entropy.sh
 TEST_GEN_PROGS = $(patsubst %,$(OUTPUT)/%.sh,$(shell awk '{print $$1}' tests.txt | sed -e 's/\#//'))
 all: $(TEST_GEN_PROGS)
 
diff --git a/tools/testing/selftests/lkdtm/stack-entropy.sh b/tools/testing/selftests/lkdtm/stack-entropy.sh
new file mode 100755
index 000000000000..b1b8a5097cbb
--- /dev/null
+++ b/tools/testing/selftests/lkdtm/stack-entropy.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Measure kernel stack entropy by sampling via LKDTM's REPORT_STACK test.
+set -e
+samples="${1:-1000}"
+
+# Capture dmesg continuously since it may fill up depending on sample size.
+log=$(mktemp -t stack-entropy-XXXXXX)
+dmesg --follow >"$log" & pid=$!
+report=-1
+for i in $(seq 1 $samples); do
+        echo "REPORT_STACK" >/sys/kernel/debug/provoke-crash/DIRECT
+	if [ -t 1 ]; then
+		percent=$(( 100 * $i / $samples ))
+		if [ "$percent" -ne "$report" ]; then
+			/bin/echo -en "$percent%\r"
+			report="$percent"
+		fi
+	fi
+done
+kill "$pid"
+
+# Count unique offsets since last run.
+seen=$(tac "$log" | grep -m1 -B"$samples"0 'Starting stack offset' | \
+	grep 'Stack offset' | awk '{print $NF}' | sort | uniq -c | wc -l)
+bits=$(echo "obase=2; $seen" | bc | wc -L)
+echo "Bits of stack entropy: $bits"
+rm -f "$log"
+
+# We would expect any functional stack randomization to be at least 5 bits.
+if [ "$bits" -lt 5 ]; then
+	exit 1
+else
+	exit 0
+fi
diff --git a/tools/testing/selftests/perf_events/.gitignore b/tools/testing/selftests/perf_events/.gitignore
new file mode 100644
index 000000000000..790c47001e77
--- /dev/null
+++ b/tools/testing/selftests/perf_events/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+sigtrap_threads
+remove_on_exec
diff --git a/tools/testing/selftests/perf_events/Makefile b/tools/testing/selftests/perf_events/Makefile
new file mode 100644
index 000000000000..fcafa5f0d34c
--- /dev/null
+++ b/tools/testing/selftests/perf_events/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -Wl,-no-as-needed -Wall -I../../../../usr/include
+LDFLAGS += -lpthread
+
+TEST_GEN_PROGS := sigtrap_threads remove_on_exec
+include ../lib.mk
diff --git a/tools/testing/selftests/perf_events/config b/tools/testing/selftests/perf_events/config
new file mode 100644
index 000000000000..ba58ff2203e4
--- /dev/null
+++ b/tools/testing/selftests/perf_events/config
@@ -0,0 +1 @@
+CONFIG_PERF_EVENTS=y
diff --git a/tools/testing/selftests/perf_events/remove_on_exec.c b/tools/testing/selftests/perf_events/remove_on_exec.c
new file mode 100644
index 000000000000..5814611a1dc7
--- /dev/null
+++ b/tools/testing/selftests/perf_events/remove_on_exec.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for remove_on_exec.
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+/* We need the latest siginfo from the kernel repo. */
+#include <sys/types.h>
+#include <asm/siginfo.h>
+#define __have_siginfo_t 1
+#define __have_sigval_t 1
+#define __have_sigevent_t 1
+#define __siginfo_t_defined
+#define __sigval_t_defined
+#define __sigevent_t_defined
+#define _BITS_SIGINFO_CONSTS_H 1
+#define _BITS_SIGEVENT_CONSTS_H 1
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <linux/perf_event.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+
+static volatile int signal_count;
+
+static struct perf_event_attr make_event_attr(void)
+{
+	struct perf_event_attr attr = {
+		.type		= PERF_TYPE_HARDWARE,
+		.size		= sizeof(attr),
+		.config		= PERF_COUNT_HW_INSTRUCTIONS,
+		.sample_period	= 1000,
+		.exclude_kernel = 1,
+		.exclude_hv	= 1,
+		.disabled	= 1,
+		.inherit	= 1,
+		/*
+		 * Children normally retain their inherited event on exec; with
+		 * remove_on_exec, we'll remove their event, but the parent and
+		 * any other non-exec'd children will keep their events.
+		 */
+		.remove_on_exec = 1,
+		.sigtrap	= 1,
+	};
+	return attr;
+}
+
+static void sigtrap_handler(int signum, siginfo_t *info, void *ucontext)
+{
+	if (info->si_code != TRAP_PERF) {
+		fprintf(stderr, "%s: unexpected si_code %d\n", __func__, info->si_code);
+		return;
+	}
+
+	signal_count++;
+}
+
+FIXTURE(remove_on_exec)
+{
+	struct sigaction oldact;
+	int fd;
+};
+
+FIXTURE_SETUP(remove_on_exec)
+{
+	struct perf_event_attr attr = make_event_attr();
+	struct sigaction action = {};
+
+	signal_count = 0;
+
+	/* Initialize sigtrap handler. */
+	action.sa_flags = SA_SIGINFO | SA_NODEFER;
+	action.sa_sigaction = sigtrap_handler;
+	sigemptyset(&action.sa_mask);
+	ASSERT_EQ(sigaction(SIGTRAP, &action, &self->oldact), 0);
+
+	/* Initialize perf event. */
+	self->fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
+	ASSERT_NE(self->fd, -1);
+}
+
+FIXTURE_TEARDOWN(remove_on_exec)
+{
+	close(self->fd);
+	sigaction(SIGTRAP, &self->oldact, NULL);
+}
+
+/* Verify event propagates to fork'd child. */
+TEST_F(remove_on_exec, fork_only)
+{
+	int status;
+	pid_t pid = fork();
+
+	if (pid == 0) {
+		ASSERT_EQ(signal_count, 0);
+		ASSERT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+		while (!signal_count);
+		_exit(42);
+	}
+
+	while (!signal_count); /* Child enables event. */
+	EXPECT_EQ(waitpid(pid, &status, 0), pid);
+	EXPECT_EQ(WEXITSTATUS(status), 42);
+}
+
+/*
+ * Verify that event does _not_ propagate to fork+exec'd child; event enabled
+ * after fork+exec.
+ */
+TEST_F(remove_on_exec, fork_exec_then_enable)
+{
+	pid_t pid_exec, pid_only_fork;
+	int pipefd[2];
+	int tmp;
+
+	/*
+	 * Non-exec child, to ensure exec does not affect inherited events of
+	 * other children.
+	 */
+	pid_only_fork = fork();
+	if (pid_only_fork == 0) {
+		/* Block until parent enables event. */
+		while (!signal_count);
+		_exit(42);
+	}
+
+	ASSERT_NE(pipe(pipefd), -1);
+	pid_exec = fork();
+	if (pid_exec == 0) {
+		ASSERT_NE(dup2(pipefd[1], STDOUT_FILENO), -1);
+		close(pipefd[0]);
+		execl("/proc/self/exe", "exec_child", NULL);
+		_exit((perror("exec failed"), 1));
+	}
+	close(pipefd[1]);
+
+	ASSERT_EQ(waitpid(pid_exec, &tmp, WNOHANG), 0); /* Child is running. */
+	/* Wait for exec'd child to start spinning. */
+	EXPECT_EQ(read(pipefd[0], &tmp, sizeof(int)), sizeof(int));
+	EXPECT_EQ(tmp, 42);
+	close(pipefd[0]);
+	/* Now we can enable the event, knowing the child is doing work. */
+	EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+	/* If the event propagated to the exec'd child, it will exit normally... */
+	usleep(100000); /* ... give time for event to trigger (in case of bug). */
+	EXPECT_EQ(waitpid(pid_exec, &tmp, WNOHANG), 0); /* Should still be running. */
+	EXPECT_EQ(kill(pid_exec, SIGKILL), 0);
+
+	/* Verify removal from child did not affect this task's event. */
+	tmp = signal_count;
+	while (signal_count == tmp); /* Should not hang! */
+	/* Nor should it have affected the first child. */
+	EXPECT_EQ(waitpid(pid_only_fork, &tmp, 0), pid_only_fork);
+	EXPECT_EQ(WEXITSTATUS(tmp), 42);
+}
+
+/*
+ * Verify that event does _not_ propagate to fork+exec'd child; event enabled
+ * before fork+exec.
+ */
+TEST_F(remove_on_exec, enable_then_fork_exec)
+{
+	pid_t pid_exec;
+	int tmp;
+
+	EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+
+	pid_exec = fork();
+	if (pid_exec == 0) {
+		execl("/proc/self/exe", "exec_child", NULL);
+		_exit((perror("exec failed"), 1));
+	}
+
+	/*
+	 * The child may exit abnormally at any time if the event propagated and
+	 * a SIGTRAP is sent before the handler was set up.
+	 */
+	usleep(100000); /* ... give time for event to trigger (in case of bug). */
+	EXPECT_EQ(waitpid(pid_exec, &tmp, WNOHANG), 0); /* Should still be running. */
+	EXPECT_EQ(kill(pid_exec, SIGKILL), 0);
+
+	/* Verify removal from child did not affect this task's event. */
+	tmp = signal_count;
+	while (signal_count == tmp); /* Should not hang! */
+}
+
+TEST_F(remove_on_exec, exec_stress)
+{
+	pid_t pids[30];
+	int i, tmp;
+
+	for (i = 0; i < sizeof(pids) / sizeof(pids[0]); i++) {
+		pids[i] = fork();
+		if (pids[i] == 0) {
+			execl("/proc/self/exe", "exec_child", NULL);
+			_exit((perror("exec failed"), 1));
+		}
+
+		/* Some forked with event disabled, rest with enabled. */
+		if (i > 10)
+			EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+	}
+
+	usleep(100000); /* ... give time for event to trigger (in case of bug). */
+
+	for (i = 0; i < sizeof(pids) / sizeof(pids[0]); i++) {
+		/* All children should still be running. */
+		EXPECT_EQ(waitpid(pids[i], &tmp, WNOHANG), 0);
+		EXPECT_EQ(kill(pids[i], SIGKILL), 0);
+	}
+
+	/* Verify event is still alive. */
+	tmp = signal_count;
+	while (signal_count == tmp);
+}
+
+/* For exec'd child. */
+static void exec_child(void)
+{
+	struct sigaction action = {};
+	const int val = 42;
+
+	/* Set up sigtrap handler in case we erroneously receive a trap. */
+	action.sa_flags = SA_SIGINFO | SA_NODEFER;
+	action.sa_sigaction = sigtrap_handler;
+	sigemptyset(&action.sa_mask);
+	if (sigaction(SIGTRAP, &action, NULL))
+		_exit((perror("sigaction failed"), 1));
+
+	/* Signal parent that we're starting to spin. */
+	if (write(STDOUT_FILENO, &val, sizeof(int)) == -1)
+		_exit((perror("write failed"), 1));
+
+	/* Should hang here until killed. */
+	while (!signal_count);
+}
+
+#define main test_main
+TEST_HARNESS_MAIN
+#undef main
+int main(int argc, char *argv[])
+{
+	if (!strcmp(argv[0], "exec_child")) {
+		exec_child();
+		return 1;
+	}
+
+	return test_main(argc, argv);
+}
diff --git a/tools/testing/selftests/perf_events/settings b/tools/testing/selftests/perf_events/settings
new file mode 100644
index 000000000000..6091b45d226b
--- /dev/null
+++ b/tools/testing/selftests/perf_events/settings
@@ -0,0 +1 @@
+timeout=120
diff --git a/tools/testing/selftests/perf_events/sigtrap_threads.c b/tools/testing/selftests/perf_events/sigtrap_threads.c
new file mode 100644
index 000000000000..78ddf5e11625
--- /dev/null
+++ b/tools/testing/selftests/perf_events/sigtrap_threads.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for perf events with SIGTRAP across all threads.
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+/* We need the latest siginfo from the kernel repo. */
+#include <sys/types.h>
+#include <asm/siginfo.h>
+#define __have_siginfo_t 1
+#define __have_sigval_t 1
+#define __have_sigevent_t 1
+#define __siginfo_t_defined
+#define __sigval_t_defined
+#define __sigevent_t_defined
+#define _BITS_SIGINFO_CONSTS_H 1
+#define _BITS_SIGEVENT_CONSTS_H 1
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/perf_event.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+
+#define NUM_THREADS 5
+
+/* Data shared between test body, threads, and signal handler. */
+static struct {
+	int tids_want_signal;		/* Which threads still want a signal. */
+	int signal_count;		/* Sanity check number of signals received. */
+	volatile int iterate_on;	/* Variable to set breakpoint on. */
+	siginfo_t first_siginfo;	/* First observed siginfo_t. */
+} ctx;
+
+/* Unique value to check si_perf is correctly set from perf_event_attr::sig_data. */
+#define TEST_SIG_DATA(addr) (~(unsigned long)(addr))
+
+static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr)
+{
+	struct perf_event_attr attr = {
+		.type		= PERF_TYPE_BREAKPOINT,
+		.size		= sizeof(attr),
+		.sample_period	= 1,
+		.disabled	= !enabled,
+		.bp_addr	= (unsigned long)addr,
+		.bp_type	= HW_BREAKPOINT_RW,
+		.bp_len		= HW_BREAKPOINT_LEN_1,
+		.inherit	= 1, /* Children inherit events ... */
+		.inherit_thread = 1, /* ... but only cloned with CLONE_THREAD. */
+		.remove_on_exec = 1, /* Required by sigtrap. */
+		.sigtrap	= 1, /* Request synchronous SIGTRAP on event. */
+		.sig_data	= TEST_SIG_DATA(addr),
+	};
+	return attr;
+}
+
+static void sigtrap_handler(int signum, siginfo_t *info, void *ucontext)
+{
+	if (info->si_code != TRAP_PERF) {
+		fprintf(stderr, "%s: unexpected si_code %d\n", __func__, info->si_code);
+		return;
+	}
+
+	/*
+	 * The data in siginfo_t we're interested in should all be the same
+	 * across threads.
+	 */
+	if (!__atomic_fetch_add(&ctx.signal_count, 1, __ATOMIC_RELAXED))
+		ctx.first_siginfo = *info;
+	__atomic_fetch_sub(&ctx.tids_want_signal, syscall(__NR_gettid), __ATOMIC_RELAXED);
+}
+
+static void *test_thread(void *arg)
+{
+	pthread_barrier_t *barrier = (pthread_barrier_t *)arg;
+	pid_t tid = syscall(__NR_gettid);
+	int iter;
+	int i;
+
+	pthread_barrier_wait(barrier);
+
+	__atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED);
+	iter = ctx.iterate_on; /* read */
+	for (i = 0; i < iter - 1; i++) {
+		__atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED);
+		ctx.iterate_on = iter; /* idempotent write */
+	}
+
+	return NULL;
+}
+
+FIXTURE(sigtrap_threads)
+{
+	struct sigaction oldact;
+	pthread_t threads[NUM_THREADS];
+	pthread_barrier_t barrier;
+	int fd;
+};
+
+FIXTURE_SETUP(sigtrap_threads)
+{
+	struct perf_event_attr attr = make_event_attr(false, &ctx.iterate_on);
+	struct sigaction action = {};
+	int i;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	/* Initialize sigtrap handler. */
+	action.sa_flags = SA_SIGINFO | SA_NODEFER;
+	action.sa_sigaction = sigtrap_handler;
+	sigemptyset(&action.sa_mask);
+	ASSERT_EQ(sigaction(SIGTRAP, &action, &self->oldact), 0);
+
+	/* Initialize perf event. */
+	self->fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
+	ASSERT_NE(self->fd, -1);
+
+	/* Spawn threads inheriting perf event. */
+	pthread_barrier_init(&self->barrier, NULL, NUM_THREADS + 1);
+	for (i = 0; i < NUM_THREADS; i++)
+		ASSERT_EQ(pthread_create(&self->threads[i], NULL, test_thread, &self->barrier), 0);
+}
+
+FIXTURE_TEARDOWN(sigtrap_threads)
+{
+	pthread_barrier_destroy(&self->barrier);
+	close(self->fd);
+	sigaction(SIGTRAP, &self->oldact, NULL);
+}
+
+static void run_test_threads(struct __test_metadata *_metadata,
+			     FIXTURE_DATA(sigtrap_threads) *self)
+{
+	int i;
+
+	pthread_barrier_wait(&self->barrier);
+	for (i = 0; i < NUM_THREADS; i++)
+		ASSERT_EQ(pthread_join(self->threads[i], NULL), 0);
+}
+
+TEST_F(sigtrap_threads, remain_disabled)
+{
+	run_test_threads(_metadata, self);
+	EXPECT_EQ(ctx.signal_count, 0);
+	EXPECT_NE(ctx.tids_want_signal, 0);
+}
+
+TEST_F(sigtrap_threads, enable_event)
+{
+	EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+	run_test_threads(_metadata, self);
+
+	EXPECT_EQ(ctx.signal_count, NUM_THREADS);
+	EXPECT_EQ(ctx.tids_want_signal, 0);
+	EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
+	EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT);
+	EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on));
+
+	/* Check enabled for parent. */
+	ctx.iterate_on = 0;
+	EXPECT_EQ(ctx.signal_count, NUM_THREADS + 1);
+}
+
+/* Test that modification propagates to all inherited events. */
+TEST_F(sigtrap_threads, modify_and_enable_event)
+{
+	struct perf_event_attr new_attr = make_event_attr(true, &ctx.iterate_on);
+
+	EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &new_attr), 0);
+	run_test_threads(_metadata, self);
+
+	EXPECT_EQ(ctx.signal_count, NUM_THREADS);
+	EXPECT_EQ(ctx.tids_want_signal, 0);
+	EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
+	EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT);
+	EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on));
+
+	/* Check enabled for parent. */
+	ctx.iterate_on = 0;
+	EXPECT_EQ(ctx.signal_count, NUM_THREADS + 1);
+}
+
+/* Stress test event + signal handling. */
+TEST_F(sigtrap_threads, signal_stress)
+{
+	ctx.iterate_on = 3000;
+
+	EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+	run_test_threads(_metadata, self);
+	EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_DISABLE, 0), 0);
+
+	EXPECT_EQ(ctx.signal_count, NUM_THREADS * ctx.iterate_on);
+	EXPECT_EQ(ctx.tids_want_signal, 0);
+	EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
+	EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT);
+	EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/rcutorture/bin/cpus2use.sh b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
index 1dbfb62567d2..6bb993001680 100755
--- a/tools/testing/selftests/rcutorture/bin/cpus2use.sh
+++ b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
@@ -21,7 +21,6 @@ then
 		awk -v ncpus=$ncpus '{ print ncpus * ($7 + $NF) / 100 }'`
 else
 	# No mpstat command, so use all available CPUs.
-	echo The mpstat command is not available, so greedily using all CPUs.
 	idlecpus=$ncpus
 fi
 awk -v ncpus=$ncpus -v idlecpus=$idlecpus < /dev/null '
diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
index 188b864bc4bf..15d937ba96ca 100755
--- a/tools/testing/selftests/rcutorture/bin/jitter.sh
+++ b/tools/testing/selftests/rcutorture/bin/jitter.sh
@@ -5,10 +5,11 @@
 # of this script is to inflict random OS jitter on a concurrently running
 # test.
 #
-# Usage: jitter.sh me duration [ sleepmax [ spinmax ] ]
+# Usage: jitter.sh me jittering-path duration [ sleepmax [ spinmax ] ]
 #
 # me: Random-number-generator seed salt.
 # duration: Time to run in seconds.
+# jittering-path: Path to file whose removal will stop this script.
 # sleepmax: Maximum microseconds to sleep, defaults to one second.
 # spinmax: Maximum microseconds to spin, defaults to one millisecond.
 #
@@ -17,9 +18,10 @@
 # Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 me=$(($1 * 1000))
-duration=$2
-sleepmax=${3-1000000}
-spinmax=${4-1000}
+jittering=$2
+duration=$3
+sleepmax=${4-1000000}
+spinmax=${5-1000}
 
 n=1
 
@@ -47,7 +49,7 @@ do
 	fi
 
 	# Check for stop request.
-	if test -f "$TORTURE_STOPFILE"
+	if ! test -f "$jittering"
 	then
 		exit 1;
 	fi
@@ -67,10 +69,10 @@ do
 		srand(n + me + systime());
 		ncpus = split(cpus, ca);
 		curcpu = ca[int(rand() * ncpus + 1)];
-		mask = lshift(1, curcpu);
-		if (mask + 0 <= 0)
-			mask = 1;
-		printf("%#x\n", mask);
+		z = "";
+		for (i = 1; 4 * i <= curcpu; i++)
+			z = z "0";
+		print "0x" 2 ^ (curcpu % 4) z;
 	}' < /dev/null`
 	n=$(($n+1))
 	if ! taskset -p $cpumask $$ > /dev/null 2>&1
diff --git a/tools/testing/selftests/rcutorture/bin/jitterstart.sh b/tools/testing/selftests/rcutorture/bin/jitterstart.sh
new file mode 100644
index 000000000000..3d710ad291c3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/jitterstart.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Start up the specified number of jitter.sh scripts in the background.
+#
+# Usage: . jitterstart.sh n jittering-dir duration [ sleepmax [ spinmax ] ]
+#
+# n: Number of jitter.sh scripts to start up.
+# jittering-dir: Directory in which to put "jittering" file.
+# duration: Time to run in seconds.
+# sleepmax: Maximum microseconds to sleep, defaults to one second.
+# spinmax: Maximum microseconds to spin, defaults to one millisecond.
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+jitter_n=$1
+if test -z "$jitter_n"
+then
+	echo jitterstart.sh: Missing count of jitter.sh scripts to start.
+	exit 33
+fi
+jittering_dir=$2
+if test -z "$jittering_dir"
+then
+	echo jitterstart.sh: Missing directory in which to place jittering file.
+	exit 34
+fi
+shift
+shift
+
+touch ${jittering_dir}/jittering
+for ((jitter_i = 1; jitter_i <= $jitter_n; jitter_i++))
+do
+	jitter.sh $jitter_i "${jittering_dir}/jittering" "$@" &
+done
diff --git a/tools/testing/selftests/rcutorture/bin/jitterstop.sh b/tools/testing/selftests/rcutorture/bin/jitterstop.sh
new file mode 100644
index 000000000000..576a4cf4b79a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/jitterstop.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Remove the "jittering" file, signaling the jitter.sh scripts to stop,
+# then wait for them to terminate.
+#
+# Usage: . jitterstop.sh jittering-dir
+#
+# jittering-dir: Directory containing "jittering" file.
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+jittering_dir=$1
+if test -z "$jittering_dir"
+then
+	echo jitterstop.sh: Missing directory in which to place jittering file.
+	exit 34
+fi
+
+rm -f ${jittering_dir}/jittering
+wait
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
new file mode 100755
index 000000000000..46e47a00a7db
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
@@ -0,0 +1,199 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Rerun a series of tests under KVM.
+#
+# Usage: kvm-again.sh /path/to/old/run [ options ]
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+scriptname=$0
+args="$*"
+
+T=${TMPDIR-/tmp}/kvm-again.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+if ! test -d tools/testing/selftests/rcutorture/bin
+then
+	echo $scriptname must be run from top-level directory of kernel source tree.
+	exit 1
+fi
+
+oldrun=$1
+shift
+if ! test -d "$oldrun"
+then
+	echo "Usage: $scriptname /path/to/old/run [ options ]"
+	exit 1
+fi
+if ! cp "$oldrun/batches" $T/batches.oldrun
+then
+	# Later on, can reconstitute this from console.log files.
+	echo Prior run batches file does not exist: $oldrun/batches
+	exit 1
+fi
+
+if test -f "$oldrun/torture_suite"
+then
+	torture_suite="`cat $oldrun/torture_suite`"
+elif test -f "$oldrun/TORTURE_SUITE"
+then
+	torture_suite="`cat $oldrun/TORTURE_SUITE`"
+else
+	echo "Prior run torture_suite file does not exist: $oldrun/{torture_suite,TORTURE_SUITE}"
+	exit 1
+fi
+
+KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
+PATH=${KVM}/bin:$PATH; export PATH
+. functions.sh
+
+dryrun=
+dur=
+default_link="cp -R"
+rundir="`pwd`/tools/testing/selftests/rcutorture/res/`date +%Y.%m.%d-%H.%M.%S-again`"
+
+startdate="`date`"
+starttime="`get_starttime`"
+
+usage () {
+	echo "Usage: $scriptname $oldrun [ arguments ]:"
+	echo "       --dryrun"
+	echo "       --duration minutes | <seconds>s | <hours>h | <days>d"
+	echo "       --link hard|soft|copy"
+	echo "       --remote"
+	echo "       --rundir /new/res/path"
+	exit 1
+}
+
+while test $# -gt 0
+do
+	case "$1" in
+	--dryrun)
+		dryrun=1
+		;;
+	--duration)
+		checkarg --duration "(minutes)" $# "$2" '^[0-9][0-9]*\(s\|m\|h\|d\|\)$' '^error'
+		mult=60
+		if echo "$2" | grep -q 's$'
+		then
+			mult=1
+		elif echo "$2" | grep -q 'h$'
+		then
+			mult=3600
+		elif echo "$2" | grep -q 'd$'
+		then
+			mult=86400
+		fi
+		ts=`echo $2 | sed -e 's/[smhd]$//'`
+		dur=$(($ts*mult))
+		shift
+		;;
+	--link)
+		checkarg --link "hard|soft|copy" "$#" "$2" 'hard\|soft\|copy' '^--'
+		case "$2" in
+		copy)
+			arg_link="cp -R"
+			;;
+		hard)
+			arg_link="cp -Rl"
+			;;
+		soft)
+			arg_link="cp -Rs"
+			;;
+		esac
+		shift
+		;;
+	--remote)
+		arg_remote=1
+		default_link="cp -as"
+		;;
+	--rundir)
+		checkarg --rundir "(absolute pathname)" "$#" "$2" '^/' '^error'
+		rundir=$2
+		if test -e "$rundir"
+		then
+			echo "--rundir $2: Already exists."
+			usage
+		fi
+		shift
+		;;
+	*)
+		echo Unknown argument $1
+		usage
+		;;
+	esac
+	shift
+done
+if test -z "$arg_link"
+then
+	arg_link="$default_link"
+fi
+
+echo ---- Re-run results directory: $rundir
+
+# Copy old run directory tree over and adjust.
+mkdir -p "`dirname "$rundir"`"
+if ! $arg_link "$oldrun" "$rundir"
+then
+	echo "Cannot copy from $oldrun to $rundir."
+	usage
+fi
+rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log
+echo $oldrun > "$rundir/re-run"
+if ! test -d "$rundir/../../bin"
+then
+	$arg_link "$oldrun/../../bin" "$rundir/../.."
+fi
+for i in $rundir/*/qemu-cmd
+do
+	cp "$i" $T
+	qemu_cmd_dir="`dirname "$i"`"
+	kernel_dir="`echo $qemu_cmd_dir | sed -e 's/\.[0-9]\+$//'`"
+	jitter_dir="`dirname "$kernel_dir"`"
+	kvm-transform.sh "$kernel_dir/bzImage" "$qemu_cmd_dir/console.log" "$jitter_dir" $dur < $T/qemu-cmd > $i
+	if test -n "$arg_remote"
+	then
+		echo "# TORTURE_KCONFIG_GDB_ARG=''" >> $i
+	fi
+done
+
+# Extract settings from the last qemu-cmd file transformed above.
+grep '^#' $i | sed -e 's/^# //' > $T/qemu-cmd-settings
+. $T/qemu-cmd-settings
+
+grep -v '^#' $T/batches.oldrun | awk '
+BEGIN {
+	oldbatch = 1;
+}
+
+{
+	if (oldbatch != $1) {
+		print "kvm-test-1-run-batch.sh" curbatch;
+		curbatch = "";
+		oldbatch = $1;
+	}
+	curbatch = curbatch " " $2;
+}
+
+END {
+	print "kvm-test-1-run-batch.sh" curbatch
+}' > $T/runbatches.sh
+
+if test -n "$dryrun"
+then
+	echo ---- Dryrun complete, directory: $rundir | tee -a "$rundir/log"
+else
+	( cd "$rundir"; sh $T/runbatches.sh )
+	kcsan-collapse.sh "$rundir" | tee -a "$rundir/log"
+	echo | tee -a "$rundir/log"
+	echo ---- Results directory: $rundir | tee -a "$rundir/log"
+	kvm-recheck.sh "$rundir" > $T/kvm-recheck.sh.out 2>&1
+	ret=$?
+	cat $T/kvm-recheck.sh.out | tee -a "$rundir/log"
+	echo " --- Done at `date` (`get_starttime_duration $starttime`) exitcode $ret" | tee -a "$rundir/log"
+	exit $ret
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 47cf4db10896..e01b31b87044 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -30,7 +30,7 @@ do
 			resdir=`echo $i | sed -e 's,/$,,' -e 's,/[^/]*$,,'`
 			head -1 $resdir/log
 		fi
-		TORTURE_SUITE="`cat $i/../TORTURE_SUITE`"
+		TORTURE_SUITE="`cat $i/../torture_suite`"
 		configfile=`echo $i | sed -e 's,^.*/,,'`
 		rm -f $i/console.log.*.diags
 		kvm-recheck-${TORTURE_SUITE}.sh $i
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
new file mode 100755
index 000000000000..7ea0809e229e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Carry out a kvm-based run for the specified batch of scenarios, which
+# might have been built by --build-only kvm.sh run.
+#
+# Usage: kvm-test-1-run-batch.sh SCENARIO [ SCENARIO ... ]
+#
+# Each SCENARIO is the name of a directory in the current directory
+#	containing a ready-to-run qemu-cmd file.
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+T=${TMPDIR-/tmp}/kvm-test-1-run-batch.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+echo ---- Running batch $*
+# Check arguments
+runfiles=
+for i in "$@"
+do
+	if ! echo $i | grep -q '^[^/.a-z]\+\(\.[0-9]\+\)\?$'
+	then
+		echo Bad scenario name: \"$i\" 1>&2
+		exit 1
+	fi
+	if ! test -d "$i"
+	then
+		echo Scenario name not a directory: \"$i\" 1>&2
+		exit 2
+	fi
+	if ! test -f "$i/qemu-cmd"
+	then
+		echo Scenario lacks a command file: \"$i/qemu-cmd\" 1>&2
+		exit 3
+	fi
+	rm -f $i/build.*
+	touch $i/build.run
+	runfiles="$runfiles $i/build.run"
+done
+
+# Extract settings from the qemu-cmd file.
+grep '^#' $1/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings
+. $T/qemu-cmd-settings
+
+# Start up jitter, start each scenario, wait, end jitter.
+echo ---- System running test: `uname -a`
+echo ---- Starting kernels. `date` | tee -a log
+$TORTURE_JITTER_START
+for i in "$@"
+do
+	echo ---- System running test: `uname -a` > $i/kvm-test-1-run-qemu.sh.out
+	echo > $i/kvm-test-1-run-qemu.sh.out
+	kvm-test-1-run-qemu.sh $i >> $i/kvm-test-1-run-qemu.sh.out 2>&1 &
+done
+for i in $runfiles
+do
+	while ls $i > /dev/null 2>&1
+	do
+		:
+	done
+done
+echo ---- All kernel runs complete. `date` | tee -a log
+$TORTURE_JITTER_STOP
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
new file mode 100755
index 000000000000..5b1aa2a4f3f6
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
@@ -0,0 +1,176 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Carry out a kvm-based run for the specified qemu-cmd file, which might
+# have been generated by --build-only kvm.sh run.
+#
+# Usage: kvm-test-1-run-qemu.sh qemu-cmd-dir
+#
+# qemu-cmd-dir provides the directory containing qemu-cmd file.
+#	This is assumed to be of the form prefix/ds/scenario, where
+#	"ds" is the top-level date-stamped directory and "scenario"
+#	is the scenario name.  Any required adjustments to this file
+#	must have been made by the caller.  The shell-command comments
+#	at the end of the qemu-cmd file are not optional.
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+T=${TMPDIR-/tmp}/kvm-test-1-run-qemu.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+resdir="$1"
+if ! test -d "$resdir"
+then
+	echo $0: Nonexistent directory: $resdir
+	exit 1
+fi
+if ! test -f "$resdir/qemu-cmd"
+then
+	echo $0: Nonexistent qemu-cmd file: $resdir/qemu-cmd
+	exit 1
+fi
+
+echo ' ---' `date`: Starting kernel, PID $$
+
+# Obtain settings from the qemu-cmd file.
+grep '^#' $resdir/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings
+. $T/qemu-cmd-settings
+
+# Decorate qemu-cmd with redirection, backgrounding, and PID capture
+sed -e 's/$/ 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd
+echo 'echo $! > $resdir/qemu_pid' >> $T/qemu-cmd
+
+# In case qemu refuses to run...
+echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
+
+# Attempt to run qemu
+kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
+( . $T/qemu-cmd; wait `cat  $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
+commandcompleted=0
+if test -z "$TORTURE_KCONFIG_GDB_ARG"
+then
+	sleep 10 # Give qemu's pid a chance to reach the file
+	if test -s "$resdir/qemu_pid"
+	then
+		qemu_pid=`cat "$resdir/qemu_pid"`
+		echo Monitoring qemu job at pid $qemu_pid
+	else
+		qemu_pid=""
+		echo Monitoring qemu job at yet-as-unknown pid
+	fi
+fi
+if test -n "$TORTURE_KCONFIG_GDB_ARG"
+then
+	base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
+	if ! test -f $base_resdir/vmlinux
+	then
+		base_resdir="`cat re-run`/$resdir"
+		if ! test -f $base_resdir/vmlinux
+		then
+			base_resdir=/path/to
+		fi
+	fi
+	echo Waiting for you to attach a debug session, for example: > /dev/tty
+	echo "    gdb $base_resdir/vmlinux" > /dev/tty
+	echo 'After symbols load and the "(gdb)" prompt appears:' > /dev/tty
+	echo "    target remote :1234" > /dev/tty
+	echo "    continue" > /dev/tty
+	kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
+fi
+while :
+do
+	if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+	then
+		qemu_pid=`cat "$resdir/qemu_pid"`
+	fi
+	kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
+	if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
+	then
+		if test -n "$TORTURE_KCONFIG_GDB_ARG"
+		then
+			:
+		elif test $kruntime -ge $seconds || test -f "$resdir/../STOP.1"
+		then
+			break;
+		fi
+		sleep 1
+	else
+		commandcompleted=1
+		if test $kruntime -lt $seconds
+		then
+			echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1
+			grep "^(qemu) qemu:" $resdir/kvm-test-1-run.sh.out >> $resdir/Warnings 2>&1
+			killpid="`sed -n "s/^(qemu) qemu: terminating on signal [0-9]* from pid \([0-9]*\).*$/\1/p" $resdir/Warnings`"
+			if test -n "$killpid"
+			then
+				echo "ps -fp $killpid" >> $resdir/Warnings 2>&1
+				ps -fp $killpid >> $resdir/Warnings 2>&1
+			fi
+		else
+			echo ' ---' `date`: "Kernel done"
+		fi
+		break
+	fi
+done
+if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+then
+	qemu_pid=`cat "$resdir/qemu_pid"`
+fi
+if test $commandcompleted -eq 0 -a -n "$qemu_pid"
+then
+	if ! test -f "$resdir/../STOP.1"
+	then
+		echo Grace period for qemu job at pid $qemu_pid
+	fi
+	oldline="`tail $resdir/console.log`"
+	while :
+	do
+		if test -f "$resdir/../STOP.1"
+		then
+			echo "PID $qemu_pid killed due to run STOP.1 request" >> $resdir/Warnings 2>&1
+			kill -KILL $qemu_pid
+			break
+		fi
+		kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
+		if kill -0 $qemu_pid > /dev/null 2>&1
+		then
+			:
+		else
+			break
+		fi
+		must_continue=no
+		newline="`tail $resdir/console.log`"
+		if test "$newline" != "$oldline" && echo $newline | grep -q ' [0-9]\+us : '
+		then
+			must_continue=yes
+		fi
+		last_ts="`tail $resdir/console.log | grep '^\[ *[0-9]\+\.[0-9]\+]' | tail -1 | sed -e 's/^\[ *//' -e 's/\..*$//'`"
+		if test -z "$last_ts"
+		then
+			last_ts=0
+		fi
+		if test "$newline" != "$oldline" -a "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE))
+		then
+			must_continue=yes
+		fi
+		if test $must_continue = no -a $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
+		then
+			echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1
+			kill -KILL $qemu_pid
+			break
+		fi
+		oldline=$newline
+		sleep 10
+	done
+elif test -z "$qemu_pid"
+then
+	echo Unknown PID, cannot kill qemu command
+fi
+
+# Tell the script that this run is done.
+rm -f $resdir/build.run
+
+parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 536d103ef166..420ed5ce9d32 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -7,15 +7,15 @@
 # Execute this in the source tree.  Do not run it as a background task
 # because qemu does not seem to like that much.
 #
-# Usage: kvm-test-1-run.sh config builddir resdir seconds qemu-args boot_args
+# Usage: kvm-test-1-run.sh config resdir seconds qemu-args boot_args_in
 #
 # qemu-args defaults to "-enable-kvm -nographic", along with arguments
 #			specifying the number of CPUs and other options
 #			generated from the underlying CPU architecture.
-# boot_args defaults to value returned by the per_version_boot_params
+# boot_args_in defaults to value returned by the per_version_boot_params
 #			shell function.
 #
-# Anything you specify for either qemu-args or boot_args is appended to
+# Anything you specify for either qemu-args or boot_args_in is appended to
 # the default values.  The "-smp" value is deduced from the contents of
 # the config fragment.
 #
@@ -35,14 +35,13 @@ mkdir $T
 config_template=${1}
 config_dir=`echo $config_template | sed -e 's,/[^/]*$,,'`
 title=`echo $config_template | sed -e 's/^.*\///'`
-builddir=${2}
-resdir=${3}
+resdir=${2}
 if test -z "$resdir" -o ! -d "$resdir" -o ! -w "$resdir"
 then
 	echo "kvm-test-1-run.sh :$resdir: Not a writable directory, cannot store results into it"
 	exit 1
 fi
-echo ' ---' `date`: Starting build
+echo ' ---' `date`: Starting build, PID $$
 echo ' ---' Kconfig fragment at: $config_template >> $resdir/log
 touch $resdir/ConfigFragment.input
 
@@ -73,7 +72,7 @@ config_override_param "--kconfig argument" KcList "$TORTURE_KCONFIG_ARG"
 cp $T/KcList $resdir/ConfigFragment
 
 base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
-if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux
+if test "$base_resdir" != "$resdir" && test -f $base_resdir/bzImage && test -f $base_resdir/vmlinux
 then
 	# Rerunning previous test, so use that test's kernel.
 	QEMU="`identify_qemu $base_resdir/vmlinux`"
@@ -83,6 +82,17 @@ then
 	ln -s $base_resdir/.config $resdir  # for kvm-recheck.sh
 	# Arch-independent indicator
 	touch $resdir/builtkernel
+elif test "$base_resdir" != "$resdir"
+then
+	# Rerunning previous test for which build failed
+	ln -s $base_resdir/Make*.out $resdir  # for kvm-recheck.sh
+	ln -s $base_resdir/.config $resdir  # for kvm-recheck.sh
+	echo Initial build failed, not running KVM, see $resdir.
+	if test -f $resdir/build.wait
+	then
+		mv $resdir/build.wait $resdir/build.ready
+	fi
+	exit 1
 elif kvm-build.sh $T/KcList $resdir
 then
 	# Had to build a kernel for this test.
@@ -107,23 +117,23 @@ else
 	# Build failed.
 	cp .config $resdir || :
 	echo Build failed, not running KVM, see $resdir.
-	if test -f $builddir.wait
+	if test -f $resdir/build.wait
 	then
-		mv $builddir.wait $builddir.ready
+		mv $resdir/build.wait $resdir/build.ready
 	fi
 	exit 1
 fi
-if test -f $builddir.wait
+if test -f $resdir/build.wait
 then
-	mv $builddir.wait $builddir.ready
+	mv $resdir/build.wait $resdir/build.ready
 fi
-while test -f $builddir.ready
+while test -f $resdir/build.ready
 do
 	sleep 1
 done
-seconds=$4
-qemu_args=$5
-boot_args=$6
+seconds=$3
+qemu_args=$4
+boot_args_in=$5
 
 if test -z "$TORTURE_BUILDONLY"
 then
@@ -133,7 +143,7 @@ fi
 # Generate -smp qemu argument.
 qemu_args="-enable-kvm -nographic $qemu_args"
 cpu_count=`configNR_CPUS.sh $resdir/ConfigFragment`
-cpu_count=`configfrag_boot_cpus "$boot_args" "$config_template" "$cpu_count"`
+cpu_count=`configfrag_boot_cpus "$boot_args_in" "$config_template" "$cpu_count"`
 if test "$cpu_count" -gt "$TORTURE_ALLOTED_CPUS"
 then
 	echo CPU count limited from $cpu_count to $TORTURE_ALLOTED_CPUS | tee -a $resdir/Warnings
@@ -149,16 +159,52 @@ qemu_args="$qemu_args `identify_qemu_args "$QEMU" "$resdir/console.log"`"
 qemu_append="`identify_qemu_append "$QEMU"`"
 
 # Pull in Kconfig-fragment boot parameters
-boot_args="`configfrag_boot_params "$boot_args" "$config_template"`"
+boot_args="`configfrag_boot_params "$boot_args_in" "$config_template"`"
 # Generate kernel-version-specific boot parameters
 boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`"
 if test -n "$TORTURE_BOOT_GDB_ARG"
 then
 	boot_args="$boot_args $TORTURE_BOOT_GDB_ARG"
 fi
+
+# Give bare-metal advice
+modprobe_args="`echo $boot_args | tr -s ' ' '\012' | grep "^$TORTURE_MOD\." | sed -e "s/$TORTURE_MOD\.//g"`"
+kboot_args="`echo $boot_args | tr -s ' ' '\012' | grep -v "^$TORTURE_MOD\."`"
+testid_txt="`dirname $resdir`/testid.txt"
+touch $resdir/bare-metal
+echo To run this scenario on bare metal: >> $resdir/bare-metal
+echo >> $resdir/bare-metal
+echo " 1." Set your bare-metal build tree to the state shown in this file: >> $resdir/bare-metal
+echo "   " $testid_txt >> $resdir/bare-metal
+echo " 2." Update your bare-metal build tree"'"s .config based on this file: >> $resdir/bare-metal
+echo "   " $resdir/ConfigFragment >> $resdir/bare-metal
+echo " 3." Make the bare-metal kernel"'"s build system aware of your .config updates: >> $resdir/bare-metal
+echo "   " $ 'yes "" | make oldconfig' >> $resdir/bare-metal
+echo " 4." Build your bare-metal kernel. >> $resdir/bare-metal
+echo " 5." Boot your bare-metal kernel with the following parameters: >> $resdir/bare-metal
+echo "   " $kboot_args >> $resdir/bare-metal
+echo " 6." Start the test with the following command: >> $resdir/bare-metal
+echo "   " $ modprobe $TORTURE_MOD $modprobe_args >> $resdir/bare-metal
+echo " 7." After some time, end the test with the following command: >> $resdir/bare-metal
+echo "   " $ rmmod $TORTURE_MOD >> $resdir/bare-metal
+echo " 8." Copy your bare-metal kernel"'"s .config file, overwriting this file: >> $resdir/bare-metal
+echo "   " $resdir/.config >> $resdir/bare-metal
+echo " 9." Copy the console output from just before the modprobe to just after >> $resdir/bare-metal
+echo "   " the rmmod into this file: >> $resdir/bare-metal
+echo "   " $resdir/console.log >> $resdir/bare-metal
+echo "10." Check for runtime errors using the following command: >> $resdir/bare-metal
+echo "   " $ tools/testing/selftests/rcutorture/bin/kvm-recheck.sh `dirname $resdir` >> $resdir/bare-metal
+echo >> $resdir/bare-metal
+echo Some of the above steps may be skipped if you build your bare-metal >> $resdir/bare-metal
+echo kernel here: `head -n 1 $testid_txt | sed -e 's/^Build directory: //'`  >> $resdir/bare-metal
+
 echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" $TORTURE_QEMU_GDB_ARG > $resdir/qemu-cmd
 echo "# TORTURE_SHUTDOWN_GRACE=$TORTURE_SHUTDOWN_GRACE" >> $resdir/qemu-cmd
 echo "# seconds=$seconds" >> $resdir/qemu-cmd
+echo "# TORTURE_KCONFIG_GDB_ARG=\"$TORTURE_KCONFIG_GDB_ARG\"" >> $resdir/qemu-cmd
+echo "# TORTURE_JITTER_START=\"$TORTURE_JITTER_START\"" >> $resdir/qemu-cmd
+echo "# TORTURE_JITTER_STOP=\"$TORTURE_JITTER_STOP\"" >> $resdir/qemu-cmd
+echo "# TORTURE_TRUST_MAKE=\"$TORTURE_TRUST_MAKE\"; export TORTURE_TRUST_MAKE" >> $resdir/qemu-cmd
 
 if test -n "$TORTURE_BUILDONLY"
 then
@@ -167,140 +213,4 @@ then
 	exit 0
 fi
 
-# Decorate qemu-cmd with redirection, backgrounding, and PID capture
-sed -e 's/$/ 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd
-echo 'echo $! > $resdir/qemu_pid' >> $T/qemu-cmd
-
-# In case qemu refuses to run...
-echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
-
-# Attempt to run qemu
-kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
-( . $T/qemu-cmd; wait `cat  $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
-commandcompleted=0
-if test -z "$TORTURE_KCONFIG_GDB_ARG"
-then
-	sleep 10 # Give qemu's pid a chance to reach the file
-	if test -s "$resdir/qemu_pid"
-	then
-		qemu_pid=`cat "$resdir/qemu_pid"`
-		echo Monitoring qemu job at pid $qemu_pid
-	else
-		qemu_pid=""
-		echo Monitoring qemu job at yet-as-unknown pid
-	fi
-fi
-if test -n "$TORTURE_KCONFIG_GDB_ARG"
-then
-	echo Waiting for you to attach a debug session, for example: > /dev/tty
-	echo "    gdb $base_resdir/vmlinux" > /dev/tty
-	echo 'After symbols load and the "(gdb)" prompt appears:' > /dev/tty
-	echo "    target remote :1234" > /dev/tty
-	echo "    continue" > /dev/tty
-	kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
-fi
-while :
-do
-	if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
-	then
-		qemu_pid=`cat "$resdir/qemu_pid"`
-	fi
-	kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
-	if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
-	then
-		if test -n "$TORTURE_KCONFIG_GDB_ARG"
-		then
-			:
-		elif test $kruntime -ge $seconds || test -f "$resdir/../STOP.1"
-		then
-			break;
-		fi
-		sleep 1
-	else
-		commandcompleted=1
-		if test $kruntime -lt $seconds
-		then
-			echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1
-			grep "^(qemu) qemu:" $resdir/kvm-test-1-run.sh.out >> $resdir/Warnings 2>&1
-			killpid="`sed -n "s/^(qemu) qemu: terminating on signal [0-9]* from pid \([0-9]*\).*$/\1/p" $resdir/Warnings`"
-			if test -n "$killpid"
-			then
-				echo "ps -fp $killpid" >> $resdir/Warnings 2>&1
-				ps -fp $killpid >> $resdir/Warnings 2>&1
-			fi
-			# Reduce probability of PID reuse by allowing a one-minute buffer
-			if test $((kruntime + 60)) -lt $seconds && test -s "$resdir/../jitter_pids"
-			then
-				awk < "$resdir/../jitter_pids" '
-				NF > 0 {
-					pidlist = pidlist " " $1;
-					n++;
-				}
-				END {
-					if (n > 0) {
-						print "kill " pidlist;
-					}
-				}' | sh
-			fi
-		else
-			echo ' ---' `date`: "Kernel done"
-		fi
-		break
-	fi
-done
-if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
-then
-	qemu_pid=`cat "$resdir/qemu_pid"`
-fi
-if test $commandcompleted -eq 0 -a -n "$qemu_pid"
-then
-	if ! test -f "$resdir/../STOP.1"
-	then
-		echo Grace period for qemu job at pid $qemu_pid
-	fi
-	oldline="`tail $resdir/console.log`"
-	while :
-	do
-		if test -f "$resdir/../STOP.1"
-		then
-			echo "PID $qemu_pid killed due to run STOP.1 request" >> $resdir/Warnings 2>&1
-			kill -KILL $qemu_pid
-			break
-		fi
-		kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
-		if kill -0 $qemu_pid > /dev/null 2>&1
-		then
-			:
-		else
-			break
-		fi
-		must_continue=no
-		newline="`tail $resdir/console.log`"
-		if test "$newline" != "$oldline" && echo $newline | grep -q ' [0-9]\+us : '
-		then
-			must_continue=yes
-		fi
-		last_ts="`tail $resdir/console.log | grep '^\[ *[0-9]\+\.[0-9]\+]' | tail -1 | sed -e 's/^\[ *//' -e 's/\..*$//'`"
-		if test -z "$last_ts"
-		then
-			last_ts=0
-		fi
-		if test "$newline" != "$oldline" -a "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE))
-		then
-			must_continue=yes
-		fi
-		if test $must_continue = no -a $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
-		then
-			echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1
-			kill -KILL $qemu_pid
-			break
-		fi
-		oldline=$newline
-		sleep 10
-	done
-elif test -z "$qemu_pid"
-then
-	echo Unknown PID, cannot kill qemu command
-fi
-
-parse-console.sh $resdir/console.log $title
+kvm-test-1-run-qemu.sh $resdir
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh
index c45a953ef393..d40b4e60a50c 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh
@@ -3,7 +3,7 @@
 #
 # Transform a qemu-cmd file to allow reuse.
 #
-# Usage: kvm-transform.sh bzImage console.log < qemu-cmd-in > qemu-cmd-out
+# Usage: kvm-transform.sh bzImage console.log jitter_dir [ seconds ] < qemu-cmd-in > qemu-cmd-out
 #
 #	bzImage: Kernel and initrd from the same prior kvm.sh run.
 #	console.log: File into which to place console output.
@@ -29,20 +29,62 @@ then
 	echo "Need console log file name."
 	exit 1
 fi
+jitter_dir="$3"
+if test -z "$jitter_dir" || ! test -d "$jitter_dir"
+then
+	echo "Need valid jitter directory: '$jitter_dir'"
+	exit 1
+fi
+seconds="$4"
+if test -n "$seconds" && echo $seconds | grep -q '[^0-9]'
+then
+	echo "Invalid duration, should be numeric in seconds: '$seconds'"
+	exit 1
+fi
+
+awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \
+    -v seconds="$seconds" '
+/^# seconds=/ {
+	if (seconds == "")
+		print $0;
+	else
+		print "# seconds=" seconds;
+	next;
+}
+
+/^# TORTURE_JITTER_START=/ {
+	print "# TORTURE_JITTER_START=\". jitterstart.sh " $4 " " jitter_dir " " $6 " " $7;
+	next;
+}
+
+/^# TORTURE_JITTER_STOP=/ {
+	print "# TORTURE_JITTER_STOP=\". jitterstop.sh " " " jitter_dir " " $5;
+	next;
+}
+
+/^#/ {
+	print $0;
+	next;
+}
 
-awk -v image="$image" -v consolelog="$consolelog" '
 {
 	line = "";
 	for (i = 1; i <= NF; i++) {
-		if (line == "")
+		if ("" seconds != "" && $i ~ /\.shutdown_secs=[0-9]*$/) {
+			sub(/[0-9]*$/, seconds, $i);
+			if (line == "")
+				line = $i;
+			else
+				line = line " " $i;
+		} else if (line == "") {
 			line = $i;
-		else
+		} else {
 			line = line " " $i;
+		}
 		if ($i == "-serial") {
 			i++;
 			line = line " file:" consolelog;
-		}
-		if ($i == "-kernel") {
+		} else if ($i == "-kernel") {
 			i++;
 			line = line " " image;
 		}
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 8d3c99b35e06..6bf00a003d3d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -29,17 +29,21 @@ PATH=${KVM}/bin:$PATH; export PATH
 TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`"
 TORTURE_DEFCONFIG=defconfig
 TORTURE_BOOT_IMAGE=""
+TORTURE_BUILDONLY=
 TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
 TORTURE_KCONFIG_ARG=""
 TORTURE_KCONFIG_GDB_ARG=""
 TORTURE_BOOT_GDB_ARG=""
 TORTURE_QEMU_GDB_ARG=""
+TORTURE_JITTER_START=""
+TORTURE_JITTER_STOP=""
 TORTURE_KCONFIG_KASAN_ARG=""
 TORTURE_KCONFIG_KCSAN_ARG=""
 TORTURE_KMAKE_ARG=""
 TORTURE_QEMU_MEM=512
 TORTURE_SHUTDOWN_GRACE=180
 TORTURE_SUITE=rcu
+TORTURE_MOD=rcutorture
 TORTURE_TRUST_MAKE=""
 resdir=""
 configs=""
@@ -100,7 +104,7 @@ do
 		TORTURE_BUILDONLY=1
 		;;
 	--configs|--config)
-		checkarg --configs "(list of config files)" "$#" "$2" '^[^/]\+$' '^--'
+		checkarg --configs "(list of config files)" "$#" "$2" '^[^/.a-z]\+$' '^--'
 		configs="$configs $2"
 		shift
 		;;
@@ -116,7 +120,7 @@ do
 		shift
 		;;
 	--datestamp)
-		checkarg --datestamp "(relative pathname)" "$#" "$2" '^[a-zA-Z0-9._-/]*$' '^--'
+		checkarg --datestamp "(relative pathname)" "$#" "$2" '^[a-zA-Z0-9._/-]*$' '^--'
 		ds=$2
 		shift
 		;;
@@ -215,6 +219,7 @@ do
 	--torture)
 		checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\)$' '^--'
 		TORTURE_SUITE=$2
+		TORTURE_MOD="`echo $TORTURE_SUITE | sed -e 's/^\(lock\|rcu\|scf\)$/\1torture/'`"
 		shift
 		if test "$TORTURE_SUITE" = rcuscale || test "$TORTURE_SUITE" = refscale
 		then
@@ -381,6 +386,7 @@ TORTURE_QEMU_GDB_ARG="$TORTURE_QEMU_GDB_ARG"; export TORTURE_QEMU_GDB_ARG
 TORTURE_KCONFIG_KASAN_ARG="$TORTURE_KCONFIG_KASAN_ARG"; export TORTURE_KCONFIG_KASAN_ARG
 TORTURE_KCONFIG_KCSAN_ARG="$TORTURE_KCONFIG_KCSAN_ARG"; export TORTURE_KCONFIG_KCSAN_ARG
 TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
+TORTURE_MOD="$TORTURE_MOD"; export TORTURE_MOD
 TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
 TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
 TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC
@@ -399,12 +405,17 @@ echo Results directory: $resdir/$ds
 echo $scriptname $args
 touch $resdir/$ds/log
 echo $scriptname $args >> $resdir/$ds/log
-echo ${TORTURE_SUITE} > $resdir/$ds/TORTURE_SUITE
-pwd > $resdir/$ds/testid.txt
+echo ${TORTURE_SUITE} > $resdir/$ds/torture_suite
+echo Build directory: `pwd` > $resdir/$ds/testid.txt
 if test -d .git
 then
+	echo Current commit: `git rev-parse HEAD` >> $resdir/$ds/testid.txt
+	echo >> $resdir/$ds/testid.txt
+	echo ' ---' Output of "'"git status"'": >> $resdir/$ds/testid.txt
 	git status >> $resdir/$ds/testid.txt
-	git rev-parse HEAD >> $resdir/$ds/testid.txt
+	echo >> $resdir/$ds/testid.txt
+	echo >> $resdir/$ds/testid.txt
+	echo ' ---' Output of "'"git diff HEAD"'": >> $resdir/$ds/testid.txt
 	git diff HEAD >> $resdir/$ds/testid.txt
 fi
 ___EOF___
@@ -434,8 +445,17 @@ function dump(first, pastlast, batchnum)
 	print "echo ----Start batch " batchnum ": `date` | tee -a " rd "log";
 	print "needqemurun="
 	jn=1
+	njitter = 0;
+	split(jitter, ja);
+	if (ja[1] == -1 && ncpus == 0)
+		njitter = 1;
+	else if (ja[1] == -1)
+		njitter = ncpus;
+	else
+		njitter = ja[1];
+	print "TORTURE_JITTER_START=\". jitterstart.sh " njitter " " rd " " dur " " ja[2] " " ja[3] "\"; export TORTURE_JITTER_START";
+	print "TORTURE_JITTER_STOP=\". jitterstop.sh " rd " \"; export TORTURE_JITTER_STOP"
 	for (j = first; j < pastlast; j++) {
-		builddir=KVM "/b" j - first + 1
 		cpusr[jn] = cpus[j];
 		if (cfrep[cf[j]] == "") {
 			cfr[jn] = cf[j];
@@ -444,15 +464,15 @@ function dump(first, pastlast, batchnum)
 			cfrep[cf[j]]++;
 			cfr[jn] = cf[j] "." cfrep[cf[j]];
 		}
+		builddir=rd cfr[jn] "/build";
 		if (cpusr[jn] > ncpus && ncpus != 0)
 			ovf = "-ovf";
 		else
 			ovf = "";
 		print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log";
-		print "rm -f " builddir ".*";
-		print "touch " builddir ".wait";
 		print "mkdir " rd cfr[jn] " || :";
-		print "kvm-test-1-run.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn]  "/kvm-test-1-run.sh.out 2>&1 &"
+		print "touch " builddir ".wait";
+		print "kvm-test-1-run.sh " CONFIGDIR cf[j], rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn]  "/kvm-test-1-run.sh.out 2>&1 &"
 		print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log";
 		print "while test -f " builddir ".wait"
 		print "do"
@@ -461,23 +481,21 @@ function dump(first, pastlast, batchnum)
 		print "echo ", cfr[jn], cpusr[jn] ovf ": Build complete. `date` | tee -a " rd "log";
 		jn++;
 	}
+	print "runfiles="
 	for (j = 1; j < jn; j++) {
-		builddir=KVM "/b" j
-		print "rm -f " builddir ".ready"
+		builddir=rd cfr[j] "/build";
+		if (TORTURE_BUILDONLY)
+			print "rm -f " builddir ".ready"
+		else
+			print "mv " builddir ".ready " builddir ".run"
+			print "runfiles=\"$runfiles " builddir ".run\""
+		fi
 		print "if test -f \"" rd cfr[j] "/builtkernel\""
 		print "then"
 		print "\techo ----", cfr[j], cpusr[j] ovf ": Kernel present. `date` | tee -a " rd "log";
 		print "\tneedqemurun=1"
 		print "fi"
 	}
-	njitter = 0;
-	split(jitter, ja);
-	if (ja[1] == -1 && ncpus == 0)
-		njitter = 1;
-	else if (ja[1] == -1)
-		njitter = ncpus;
-	else
-		njitter = ja[1];
 	if (TORTURE_BUILDONLY && njitter != 0) {
 		njitter = 0;
 		print "echo Build-only run, so suppressing jitter | tee -a " rd "log"
@@ -488,19 +506,18 @@ function dump(first, pastlast, batchnum)
 	print "if test -n \"$needqemurun\""
 	print "then"
 	print "\techo ---- Starting kernels. `date` | tee -a " rd "log";
-	print "\techo > " rd "jitter_pids"
-	for (j = 0; j < njitter; j++) {
-		print "\tjitter.sh " j " " dur " " ja[2] " " ja[3] "&"
-		print "\techo $! >> " rd "jitter_pids"
-	}
-	print "\twait"
+	print "\t$TORTURE_JITTER_START";
+	print "\twhile ls $runfiles > /dev/null 2>&1"
+	print "\tdo"
+	print "\t\t:"
+	print "\tdone"
+	print "\t$TORTURE_JITTER_STOP";
 	print "\techo ---- All kernel runs complete. `date` | tee -a " rd "log";
 	print "else"
 	print "\twait"
 	print "\techo ---- No kernel runs. `date` | tee -a " rd "log";
 	print "fi"
 	for (j = 1; j < jn; j++) {
-		builddir=KVM "/b" j
 		print "echo ----", cfr[j], cpusr[j] ovf ": Build/run results: | tee -a " rd "log";
 		print "cat " rd cfr[j]  "/kvm-test-1-run.sh.out | tee -a " rd "log";
 	}
@@ -548,6 +565,18 @@ echo 'ret=$?' >> $T/script
 echo "cat $T/kvm-recheck.sh.out | tee -a $resdir/$ds/log" >> $T/script
 echo 'exit $ret' >> $T/script
 
+# Extract the tests and their batches from the script.
+egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" |
+	sed -e 's/:.*$//' -e 's/^echo //' -e 's/-ovf//' |
+	awk '
+	/^----Start/ {
+		batchno = $3;
+		next;
+	}
+	{
+		print batchno, $1, $2
+	}' > $T/batches
+
 if test "$dryrun" = script
 then
 	cat $T/script
@@ -566,21 +595,14 @@ then
 	exit 0
 elif test "$dryrun" = batches
 then
-	# Extract the tests and their batches from the script.
-	egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" |
-		sed -e 's/:.*$//' -e 's/^echo //' -e 's/-ovf//' |
-		awk '
-		/^----Start/ {
-			batchno = $3;
-			next;
-		}
-		{
-			print batchno, $1, $2
-		}'
+	cat $T/batches
+	exit 0
 else
-	# Not a dryrun, so run the script.
+	# Not a dryrun.  Record the batches and the number of CPUs, then run the script.
 	bash $T/script
 	ret=$?
+	cp $T/batches $resdir/$ds/batches
+	echo '#' cpus=$cpus >> $resdir/$ds/batches
 	echo " --- Done at `date` (`get_starttime_duration $starttime`) exitcode $ret" | tee -a $resdir/$ds/log
 	exit $ret
 fi
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index ad7525b7ac29..56e2e1a42569 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -374,7 +374,7 @@ done
 if test "$do_kvfree" = "yes"
 then
 	torture_bootargs="rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot"
-	torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make
+	torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make
 fi
 
 echo " --- " $scriptname $args
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
index f2b20db9e296..98b6175e5aa0 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
@@ -7,8 +7,8 @@ TREE07
 TREE09
 SRCU-N
 SRCU-P
-SRCU-t
-SRCU-u
+SRCU-T
+SRCU-U
 TINY01
 TINY02
 TASKS01
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T
index d6557c38dfe4..d6557c38dfe4 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T.boot
index 238bfe3bd0cc..238bfe3bd0cc 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T.boot
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U
index 6bc24e99862f..6bc24e99862f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U.boot
index ce48c7b82673..ce48c7b82673 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U.boot
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
index 1c218944b1e9..64f864f1f361 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
@@ -4,3 +4,4 @@ rcutree.gp_init_delay=3
 rcutree.gp_cleanup_delay=3
 rcutree.kthread_prio=2
 threadirqs
+tree.use_softirq=0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
index 5adc6756792a..a8d94caf7d2f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
@@ -1 +1 @@
-rcutree.rcu_fanout_leaf=4 nohz_full=1-7
+rcutree.rcu_fanout_leaf=4 nohz_full=1-N
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
index 22478fd3a865..94d38445d393 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
@@ -1,3 +1,3 @@
 rcupdate.rcu_self_test=1
 rcutree.rcu_fanout_exact=1
-rcu_nocbs=0-7
+rcu_nocbs=all
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh
index 0333e9b18522..ffbe15109f0d 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh
@@ -12,5 +12,5 @@
 # Adds per-version torture-module parameters to kernels supporting them.
 per_version_boot_params () {
 	echo $1 rcuscale.shutdown=1 \
-		rcuscale.verbose=1
+		rcuscale.verbose=0
 }
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh
index 321e82641287..f81fa2c541a6 100644
--- a/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh
@@ -12,5 +12,5 @@
 # Adds per-version torture-module parameters to kernels supporting them.
 per_version_boot_params () {
 	echo $1 refscale.shutdown=1 \
-		refscale.verbose=1
+		refscale.verbose=0
 }
diff --git a/tools/testing/selftests/resctrl/.gitignore b/tools/testing/selftests/resctrl/.gitignore
new file mode 100644
index 000000000000..ab68442b6bc8
--- /dev/null
+++ b/tools/testing/selftests/resctrl/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+resctrl_tests
diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile
index d585cc1948cc..6bcee2ec91a9 100644
--- a/tools/testing/selftests/resctrl/Makefile
+++ b/tools/testing/selftests/resctrl/Makefile
@@ -1,5 +1,5 @@
 CC = $(CROSS_COMPILE)gcc
-CFLAGS = -g -Wall
+CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2
 SRCS=$(wildcard *.c)
 OBJS=$(SRCS:.c=.o)
 
diff --git a/tools/testing/selftests/resctrl/README b/tools/testing/selftests/resctrl/README
index 6e5a0ffa18e8..4b36b25b6ac0 100644
--- a/tools/testing/selftests/resctrl/README
+++ b/tools/testing/selftests/resctrl/README
@@ -46,8 +46,8 @@ ARGUMENTS
 Parameter '-h' shows usage information.
 
 usage: resctrl_tests [-h] [-b "benchmark_cmd [options]"] [-t test list] [-n no_of_bits]
-        -b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CQM default benchmark is builtin fill_buf
-        -t test list: run tests specified in the test list, e.g. -t mbm, mba, cqm, cat
+        -b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CMT default benchmark is builtin fill_buf
+        -t test list: run tests specified in the test list, e.g. -t mbm, mba, cmt, cat
         -n no_of_bits: run cache tests using specified no of bits in cache bit mask
         -p cpu_no: specify CPU number to run the test. 1 is default
         -h: help
diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
index 38dbf4962e33..68ff856d36f0 100644
--- a/tools/testing/selftests/resctrl/cache.c
+++ b/tools/testing/selftests/resctrl/cache.c
@@ -111,7 +111,7 @@ static int get_llc_perf(unsigned long *llc_perf_miss)
 
 /*
  * Get LLC Occupancy as reported by RESCTRL FS
- * For CQM,
+ * For CMT,
  * 1. If con_mon grp and mon grp given, then read from mon grp in
  * con_mon grp
  * 2. If only con_mon grp given, then read from con_mon grp
@@ -182,7 +182,7 @@ int measure_cache_vals(struct resctrl_val_param *param, int bm_pid)
 	/*
 	 * Measure cache miss from perf.
 	 */
-	if (!strcmp(param->resctrl_val, "cat")) {
+	if (!strncmp(param->resctrl_val, CAT_STR, sizeof(CAT_STR))) {
 		ret = get_llc_perf(&llc_perf_miss);
 		if (ret < 0)
 			return ret;
@@ -192,7 +192,7 @@ int measure_cache_vals(struct resctrl_val_param *param, int bm_pid)
 	/*
 	 * Measure llc occupancy from resctrl.
 	 */
-	if (!strcmp(param->resctrl_val, "cqm")) {
+	if (!strncmp(param->resctrl_val, CMT_STR, sizeof(CMT_STR))) {
 		ret = get_llc_occu_resctrl(&llc_occu_resc);
 		if (ret < 0)
 			return ret;
@@ -234,7 +234,7 @@ int cat_val(struct resctrl_val_param *param)
 	if (ret)
 		return ret;
 
-	if ((strcmp(resctrl_val, "cat") == 0)) {
+	if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) {
 		ret = initialize_llc_perf();
 		if (ret)
 			return ret;
@@ -242,7 +242,7 @@ int cat_val(struct resctrl_val_param *param)
 
 	/* Test runs until the callback setup() tells the test to stop. */
 	while (1) {
-		if (strcmp(resctrl_val, "cat") == 0) {
+		if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) {
 			ret = param->setup(1, param);
 			if (ret) {
 				ret = 0;
@@ -270,3 +270,45 @@ int cat_val(struct resctrl_val_param *param)
 
 	return ret;
 }
+
+/*
+ * show_cache_info:	show cache test result information
+ * @sum_llc_val:	sum of LLC cache result data
+ * @no_of_bits:		number of bits
+ * @cache_span:		cache span in bytes for CMT or in lines for CAT
+ * @max_diff:		max difference
+ * @max_diff_percent:	max difference percentage
+ * @num_of_runs:	number of runs
+ * @platform:		show test information on this platform
+ * @cmt:		CMT test or CAT test
+ *
+ * Return:		0 on success. non-zero on failure.
+ */
+int show_cache_info(unsigned long sum_llc_val, int no_of_bits,
+		    unsigned long cache_span, unsigned long max_diff,
+		    unsigned long max_diff_percent, unsigned long num_of_runs,
+		    bool platform, bool cmt)
+{
+	unsigned long avg_llc_val = 0;
+	float diff_percent;
+	long avg_diff = 0;
+	int ret;
+
+	avg_llc_val = sum_llc_val / (num_of_runs - 1);
+	avg_diff = (long)abs(cache_span - avg_llc_val);
+	diff_percent = ((float)cache_span - avg_llc_val) / cache_span * 100;
+
+	ret = platform && abs((int)diff_percent) > max_diff_percent &&
+	      (cmt ? (abs(avg_diff) > max_diff) : true);
+
+	ksft_print_msg("%s Check cache miss rate within %d%%\n",
+		       ret ? "Fail:" : "Pass:", max_diff_percent);
+
+	ksft_print_msg("Percent diff=%d\n", abs((int)diff_percent));
+	ksft_print_msg("Number of bits: %d\n", no_of_bits);
+	ksft_print_msg("Average LLC val: %lu\n", avg_llc_val);
+	ksft_print_msg("Cache span (%s): %lu\n", cmt ? "bytes" : "lines",
+		       cache_span);
+
+	return ret;
+}
diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
index 5da43767b973..cd4f68388e0f 100644
--- a/tools/testing/selftests/resctrl/cat_test.c
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -17,10 +17,10 @@
 #define MAX_DIFF_PERCENT	4
 #define MAX_DIFF		1000000
 
-int count_of_bits;
-char cbm_mask[256];
-unsigned long long_mask;
-unsigned long cache_size;
+static int count_of_bits;
+static char cbm_mask[256];
+static unsigned long long_mask;
+static unsigned long cache_size;
 
 /*
  * Change schemata. Write schemata to specified
@@ -52,27 +52,6 @@ static int cat_setup(int num, ...)
 	return ret;
 }
 
-static void show_cache_info(unsigned long sum_llc_perf_miss, int no_of_bits,
-			    unsigned long span)
-{
-	unsigned long allocated_cache_lines = span / 64;
-	unsigned long avg_llc_perf_miss = 0;
-	float diff_percent;
-
-	avg_llc_perf_miss = sum_llc_perf_miss / (NUM_OF_RUNS - 1);
-	diff_percent = ((float)allocated_cache_lines - avg_llc_perf_miss) /
-				allocated_cache_lines * 100;
-
-	printf("%sok CAT: cache miss rate within %d%%\n",
-	       !is_amd && abs((int)diff_percent) > MAX_DIFF_PERCENT ?
-	       "not " : "", MAX_DIFF_PERCENT);
-	tests_run++;
-	printf("# Percent diff=%d\n", abs((int)diff_percent));
-	printf("# Number of bits: %d\n", no_of_bits);
-	printf("# Avg_llc_perf_miss: %lu\n", avg_llc_perf_miss);
-	printf("# Allocated cache lines: %lu\n", allocated_cache_lines);
-}
-
 static int check_results(struct resctrl_val_param *param)
 {
 	char *token_array[8], temp[512];
@@ -80,7 +59,7 @@ static int check_results(struct resctrl_val_param *param)
 	int runs = 0, no_of_bits = 0;
 	FILE *fp;
 
-	printf("# Checking for pass/fail\n");
+	ksft_print_msg("Checking for pass/fail\n");
 	fp = fopen(param->filename, "r");
 	if (!fp) {
 		perror("# Cannot open file");
@@ -108,9 +87,9 @@ static int check_results(struct resctrl_val_param *param)
 	fclose(fp);
 	no_of_bits = count_bits(param->mask);
 
-	show_cache_info(sum_llc_perf_miss, no_of_bits, param->span);
-
-	return 0;
+	return show_cache_info(sum_llc_perf_miss, no_of_bits, param->span / 64,
+			       MAX_DIFF, MAX_DIFF_PERCENT, NUM_OF_RUNS,
+			       !is_amd, false);
 }
 
 void cat_test_cleanup(void)
@@ -132,11 +111,8 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
 	if (ret)
 		return ret;
 
-	if (!validate_resctrl_feature_request("cat"))
-		return -1;
-
 	/* Get default cbm mask for L3/L2 cache */
-	ret = get_cbm_mask(cache_type);
+	ret = get_cbm_mask(cache_type, cbm_mask);
 	if (ret)
 		return ret;
 
@@ -146,15 +122,18 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
 	ret = get_cache_size(cpu_no, cache_type, &cache_size);
 	if (ret)
 		return ret;
-	printf("cache size :%lu\n", cache_size);
+	ksft_print_msg("Cache size :%lu\n", cache_size);
 
 	/* Get max number of bits from default-cabm mask */
 	count_of_bits = count_bits(long_mask);
 
-	if (n < 1 || n > count_of_bits - 1) {
-		printf("Invalid input value for no_of_bits n!\n");
-		printf("Please Enter value in range 1 to %d\n",
-		       count_of_bits - 1);
+	if (!n)
+		n = count_of_bits / 2;
+
+	if (n > count_of_bits - 1) {
+		ksft_print_msg("Invalid input value for no_of_bits n!\n");
+		ksft_print_msg("Please enter value in range 1 to %d\n",
+			       count_of_bits - 1);
 		return -1;
 	}
 
@@ -164,7 +143,7 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
 		return -1;
 
 	struct resctrl_val_param param = {
-		.resctrl_val	= "cat",
+		.resctrl_val	= CAT_STR,
 		.cpu_no		= cpu_no,
 		.mum_resctrlfs	= 0,
 		.setup		= cat_setup,
diff --git a/tools/testing/selftests/resctrl/cqm_test.c b/tools/testing/selftests/resctrl/cmt_test.c
index c8756152bd61..8968e36db99d 100644
--- a/tools/testing/selftests/resctrl/cqm_test.c
+++ b/tools/testing/selftests/resctrl/cmt_test.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Cache Monitoring Technology (CQM) test
+ * Cache Monitoring Technology (CMT) test
  *
  * Copyright (C) 2018 Intel Corporation
  *
@@ -11,17 +11,17 @@
 #include "resctrl.h"
 #include <unistd.h>
 
-#define RESULT_FILE_NAME	"result_cqm"
+#define RESULT_FILE_NAME	"result_cmt"
 #define NUM_OF_RUNS		5
 #define MAX_DIFF		2000000
 #define MAX_DIFF_PERCENT	15
 
-int count_of_bits;
-char cbm_mask[256];
-unsigned long long_mask;
-unsigned long cache_size;
+static int count_of_bits;
+static char cbm_mask[256];
+static unsigned long long_mask;
+static unsigned long cache_size;
 
-static int cqm_setup(int num, ...)
+static int cmt_setup(int num, ...)
 {
 	struct resctrl_val_param *p;
 	va_list param;
@@ -39,38 +39,6 @@ static int cqm_setup(int num, ...)
 	return 0;
 }
 
-static void show_cache_info(unsigned long sum_llc_occu_resc, int no_of_bits,
-			    unsigned long span)
-{
-	unsigned long avg_llc_occu_resc = 0;
-	float diff_percent;
-	long avg_diff = 0;
-	bool res;
-
-	avg_llc_occu_resc = sum_llc_occu_resc / (NUM_OF_RUNS - 1);
-	avg_diff = (long)abs(span - avg_llc_occu_resc);
-
-	diff_percent = (((float)span - avg_llc_occu_resc) / span) * 100;
-
-	if ((abs((int)diff_percent) <= MAX_DIFF_PERCENT) ||
-	    (abs(avg_diff) <= MAX_DIFF))
-		res = true;
-	else
-		res = false;
-
-	printf("%sok CQM: diff within %d, %d\%%\n", res ? "" : "not",
-	       MAX_DIFF, (int)MAX_DIFF_PERCENT);
-
-	printf("# diff: %ld\n", avg_diff);
-	printf("# percent diff=%d\n", abs((int)diff_percent));
-	printf("# Results are displayed in (Bytes)\n");
-	printf("# Number of bits: %d\n", no_of_bits);
-	printf("# Avg_llc_occu_resc: %lu\n", avg_llc_occu_resc);
-	printf("# llc_occu_exp (span): %lu\n", span);
-
-	tests_run++;
-}
-
 static int check_results(struct resctrl_val_param *param, int no_of_bits)
 {
 	char *token_array[8], temp[512];
@@ -78,7 +46,7 @@ static int check_results(struct resctrl_val_param *param, int no_of_bits)
 	int runs = 0;
 	FILE *fp;
 
-	printf("# checking for pass/fail\n");
+	ksft_print_msg("Checking for pass/fail\n");
 	fp = fopen(param->filename, "r");
 	if (!fp) {
 		perror("# Error in opening file\n");
@@ -86,7 +54,7 @@ static int check_results(struct resctrl_val_param *param, int no_of_bits)
 		return errno;
 	}
 
-	while (fgets(temp, 1024, fp)) {
+	while (fgets(temp, sizeof(temp), fp)) {
 		char *token = strtok(temp, ":\t");
 		int fields = 0;
 
@@ -101,17 +69,18 @@ static int check_results(struct resctrl_val_param *param, int no_of_bits)
 		runs++;
 	}
 	fclose(fp);
-	show_cache_info(sum_llc_occu_resc, no_of_bits, param->span);
 
-	return 0;
+	return show_cache_info(sum_llc_occu_resc, no_of_bits, param->span,
+			       MAX_DIFF, MAX_DIFF_PERCENT, NUM_OF_RUNS,
+			       true, true);
 }
 
-void cqm_test_cleanup(void)
+void cmt_test_cleanup(void)
 {
 	remove(RESULT_FILE_NAME);
 }
 
-int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
+int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
 {
 	int ret, mum_resctrlfs;
 
@@ -122,10 +91,10 @@ int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
 	if (ret)
 		return ret;
 
-	if (!validate_resctrl_feature_request("cqm"))
+	if (!validate_resctrl_feature_request(CMT_STR))
 		return -1;
 
-	ret = get_cbm_mask("L3");
+	ret = get_cbm_mask("L3", cbm_mask);
 	if (ret)
 		return ret;
 
@@ -134,18 +103,18 @@ int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
 	ret = get_cache_size(cpu_no, "L3", &cache_size);
 	if (ret)
 		return ret;
-	printf("cache size :%lu\n", cache_size);
+	ksft_print_msg("Cache size :%lu\n", cache_size);
 
 	count_of_bits = count_bits(long_mask);
 
 	if (n < 1 || n > count_of_bits) {
-		printf("Invalid input value for numbr_of_bits n!\n");
-		printf("Please Enter value in range 1 to %d\n", count_of_bits);
+		ksft_print_msg("Invalid input value for numbr_of_bits n!\n");
+		ksft_print_msg("Please enter value in range 1 to %d\n", count_of_bits);
 		return -1;
 	}
 
 	struct resctrl_val_param param = {
-		.resctrl_val	= "cqm",
+		.resctrl_val	= CMT_STR,
 		.ctrlgrp	= "c1",
 		.mongrp		= "m1",
 		.cpu_no		= cpu_no,
@@ -154,7 +123,7 @@ int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
 		.mask		= ~(long_mask << n) & long_mask,
 		.span		= cache_size * n / count_of_bits,
 		.num_of_runs	= 0,
-		.setup		= cqm_setup,
+		.setup		= cmt_setup,
 	};
 
 	if (strcmp(benchmark_cmd[0], "fill_buf") == 0)
@@ -170,7 +139,7 @@ int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
 	if (ret)
 		return ret;
 
-	cqm_test_cleanup();
+	cmt_test_cleanup();
 
 	return 0;
 }
diff --git a/tools/testing/selftests/resctrl/config b/tools/testing/selftests/resctrl/config
new file mode 100644
index 000000000000..8d9f2deb56ed
--- /dev/null
+++ b/tools/testing/selftests/resctrl/config
@@ -0,0 +1,2 @@
+CONFIG_X86_CPU_RESCTRL=y
+CONFIG_PROC_CPU_RESCTRL=y
diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c
index 79c611c99a3d..51e5cf22632f 100644
--- a/tools/testing/selftests/resctrl/fill_buf.c
+++ b/tools/testing/selftests/resctrl/fill_buf.c
@@ -115,7 +115,7 @@ static int fill_cache_read(unsigned char *start_ptr, unsigned char *end_ptr,
 
 	while (1) {
 		ret = fill_one_span_read(start_ptr, end_ptr);
-		if (!strcmp(resctrl_val, "cat"))
+		if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)))
 			break;
 	}
 
@@ -134,7 +134,7 @@ static int fill_cache_write(unsigned char *start_ptr, unsigned char *end_ptr,
 {
 	while (1) {
 		fill_one_span_write(start_ptr, end_ptr);
-		if (!strcmp(resctrl_val, "cat"))
+		if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)))
 			break;
 	}
 
diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c
index 7bf8eaa6204b..1a1bdb6180cf 100644
--- a/tools/testing/selftests/resctrl/mba_test.c
+++ b/tools/testing/selftests/resctrl/mba_test.c
@@ -12,7 +12,7 @@
 
 #define RESULT_FILE_NAME	"result_mba"
 #define NUM_OF_RUNS		5
-#define MAX_DIFF		300
+#define MAX_DIFF_PERCENT	5
 #define ALLOCATION_MAX		100
 #define ALLOCATION_MIN		10
 #define ALLOCATION_STEP		10
@@ -56,13 +56,14 @@ static void show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc)
 	int allocation, runs;
 	bool failed = false;
 
-	printf("# Results are displayed in (MB)\n");
+	ksft_print_msg("Results are displayed in (MB)\n");
 	/* Memory bandwidth from 100% down to 10% */
 	for (allocation = 0; allocation < ALLOCATION_MAX / ALLOCATION_STEP;
 	     allocation++) {
 		unsigned long avg_bw_imc, avg_bw_resc;
 		unsigned long sum_bw_imc = 0, sum_bw_resc = 0;
-		unsigned long avg_diff;
+		int avg_diff_per;
+		float avg_diff;
 
 		/*
 		 * The first run is discarded due to inaccurate value from
@@ -76,23 +77,26 @@ static void show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc)
 
 		avg_bw_imc = sum_bw_imc / (NUM_OF_RUNS - 1);
 		avg_bw_resc = sum_bw_resc / (NUM_OF_RUNS - 1);
-		avg_diff = labs((long)(avg_bw_resc - avg_bw_imc));
-
-		printf("%sok MBA schemata percentage %u smaller than %d %%\n",
-		       avg_diff > MAX_DIFF ? "not " : "",
-		       ALLOCATION_MAX - ALLOCATION_STEP * allocation,
-		       MAX_DIFF);
-		tests_run++;
-		printf("# avg_diff: %lu\n", avg_diff);
-		printf("# avg_bw_imc: %lu\n", avg_bw_imc);
-		printf("# avg_bw_resc: %lu\n", avg_bw_resc);
-		if (avg_diff > MAX_DIFF)
+		avg_diff = (float)labs(avg_bw_resc - avg_bw_imc) / avg_bw_imc;
+		avg_diff_per = (int)(avg_diff * 100);
+
+		ksft_print_msg("%s Check MBA diff within %d%% for schemata %u\n",
+			       avg_diff_per > MAX_DIFF_PERCENT ?
+			       "Fail:" : "Pass:",
+			       MAX_DIFF_PERCENT,
+			       ALLOCATION_MAX - ALLOCATION_STEP * allocation);
+
+		ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per);
+		ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc);
+		ksft_print_msg("avg_bw_resc: %lu\n", avg_bw_resc);
+		if (avg_diff_per > MAX_DIFF_PERCENT)
 			failed = true;
 	}
 
-	printf("%sok schemata change using MBA%s\n", failed ? "not " : "",
-	       failed ? " # at least one test failed" : "");
-	tests_run++;
+	ksft_print_msg("%s Check schemata change using MBA\n",
+		       failed ? "Fail:" : "Pass:");
+	if (failed)
+		ksft_print_msg("At least one test failed\n");
 }
 
 static int check_results(void)
@@ -141,7 +145,7 @@ void mba_test_cleanup(void)
 int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd)
 {
 	struct resctrl_val_param param = {
-		.resctrl_val	= "mba",
+		.resctrl_val	= MBA_STR,
 		.ctrlgrp	= "c1",
 		.mongrp		= "m1",
 		.cpu_no		= cpu_no,
@@ -154,9 +158,6 @@ int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd)
 
 	remove(RESULT_FILE_NAME);
 
-	if (!validate_resctrl_feature_request("mba"))
-		return -1;
-
 	ret = resctrl_val(benchmark_cmd, &param);
 	if (ret)
 		return ret;
diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c
index 4700f7453f81..8392e5c55ed0 100644
--- a/tools/testing/selftests/resctrl/mbm_test.c
+++ b/tools/testing/selftests/resctrl/mbm_test.c
@@ -11,16 +11,16 @@
 #include "resctrl.h"
 
 #define RESULT_FILE_NAME	"result_mbm"
-#define MAX_DIFF		300
+#define MAX_DIFF_PERCENT	5
 #define NUM_OF_RUNS		5
 
-static void
+static int
 show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, int span)
 {
 	unsigned long avg_bw_imc = 0, avg_bw_resc = 0;
 	unsigned long sum_bw_imc = 0, sum_bw_resc = 0;
-	long avg_diff = 0;
-	int runs;
+	int runs, ret, avg_diff_per;
+	float avg_diff = 0;
 
 	/*
 	 * Discard the first value which is inaccurate due to monitoring setup
@@ -33,15 +33,18 @@ show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, int span)
 
 	avg_bw_imc = sum_bw_imc / 4;
 	avg_bw_resc = sum_bw_resc / 4;
-	avg_diff = avg_bw_resc - avg_bw_imc;
-
-	printf("%sok MBM: diff within %d%%\n",
-	       labs(avg_diff) > MAX_DIFF ? "not " : "", MAX_DIFF);
-	tests_run++;
-	printf("# avg_diff: %lu\n", labs(avg_diff));
-	printf("# Span (MB): %d\n", span);
-	printf("# avg_bw_imc: %lu\n", avg_bw_imc);
-	printf("# avg_bw_resc: %lu\n", avg_bw_resc);
+	avg_diff = (float)labs(avg_bw_resc - avg_bw_imc) / avg_bw_imc;
+	avg_diff_per = (int)(avg_diff * 100);
+
+	ret = avg_diff_per > MAX_DIFF_PERCENT;
+	ksft_print_msg("%s Check MBM diff within %d%%\n",
+		       ret ? "Fail:" : "Pass:", MAX_DIFF_PERCENT);
+	ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per);
+	ksft_print_msg("Span (MB): %d\n", span);
+	ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc);
+	ksft_print_msg("avg_bw_resc: %lu\n", avg_bw_resc);
+
+	return ret;
 }
 
 static int check_results(int span)
@@ -49,10 +52,10 @@ static int check_results(int span)
 	unsigned long bw_imc[NUM_OF_RUNS], bw_resc[NUM_OF_RUNS];
 	char temp[1024], *token_array[8];
 	char output[] = RESULT_FILE_NAME;
-	int runs;
+	int runs, ret;
 	FILE *fp;
 
-	printf("# Checking for pass/fail\n");
+	ksft_print_msg("Checking for pass/fail\n");
 
 	fp = fopen(output, "r");
 	if (!fp) {
@@ -76,11 +79,11 @@ static int check_results(int span)
 		runs++;
 	}
 
-	show_bw_info(bw_imc, bw_resc, span);
+	ret = show_bw_info(bw_imc, bw_resc, span);
 
 	fclose(fp);
 
-	return 0;
+	return ret;
 }
 
 static int mbm_setup(int num, ...)
@@ -114,7 +117,7 @@ void mbm_test_cleanup(void)
 int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd)
 {
 	struct resctrl_val_param param = {
-		.resctrl_val	= "mbm",
+		.resctrl_val	= MBM_STR,
 		.ctrlgrp	= "c1",
 		.mongrp		= "m1",
 		.span		= span,
@@ -128,9 +131,6 @@ int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd)
 
 	remove(RESULT_FILE_NAME);
 
-	if (!validate_resctrl_feature_request("mbm"))
-		return -1;
-
 	ret = resctrl_val(benchmark_cmd, &param);
 	if (ret)
 		return ret;
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 39bf59c6b9c5..1ad10c47e31d 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -23,11 +23,16 @@
 #include <sys/eventfd.h>
 #include <asm/unistd.h>
 #include <linux/perf_event.h>
+#include "../kselftest.h"
 
 #define MB			(1024 * 1024)
 #define RESCTRL_PATH		"/sys/fs/resctrl"
 #define PHYS_ID_PATH		"/sys/devices/system/cpu/cpu"
 #define CBM_MASK_PATH		"/sys/fs/resctrl/info"
+#define L3_PATH			"/sys/fs/resctrl/info/L3"
+#define MB_PATH			"/sys/fs/resctrl/info/MB"
+#define L3_MON_PATH		"/sys/fs/resctrl/info/L3_MON"
+#define L3_MON_FEATURES_PATH	"/sys/fs/resctrl/info/L3_MON/mon_features"
 
 #define PARENT_EXIT(err_msg)			\
 	do {					\
@@ -62,11 +67,15 @@ struct resctrl_val_param {
 	int		(*setup)(int num, ...);
 };
 
-pid_t bm_pid, ppid;
-int tests_run;
+#define MBM_STR			"mbm"
+#define MBA_STR			"mba"
+#define CMT_STR			"cmt"
+#define CAT_STR			"cat"
 
-char llc_occup_path[1024];
-bool is_amd;
+extern pid_t bm_pid, ppid;
+
+extern char llc_occup_path[1024];
+extern bool is_amd;
 
 bool check_resctrlfs_support(void);
 int filter_dmesg(void);
@@ -74,7 +83,7 @@ int remount_resctrlfs(bool mum_resctrlfs);
 int get_resource_id(int cpu_no, int *resource_id);
 int umount_resctrlfs(void);
 int validate_bw_report_request(char *bw_report);
-bool validate_resctrl_feature_request(char *resctrl_val);
+bool validate_resctrl_feature_request(const char *resctrl_val);
 char *fgrep(FILE *inf, const char *str);
 int taskset_benchmark(pid_t bm_pid, int cpu_no);
 void run_benchmark(int signum, siginfo_t *info, void *ucontext);
@@ -92,16 +101,20 @@ void tests_cleanup(void);
 void mbm_test_cleanup(void);
 int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd);
 void mba_test_cleanup(void);
-int get_cbm_mask(char *cache_type);
+int get_cbm_mask(char *cache_type, char *cbm_mask);
 int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size);
 void ctrlc_handler(int signum, siginfo_t *info, void *ptr);
 int cat_val(struct resctrl_val_param *param);
 void cat_test_cleanup(void);
 int cat_perf_miss_val(int cpu_no, int no_of_bits, char *cache_type);
-int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd);
+int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd);
 unsigned int count_bits(unsigned long n);
-void cqm_test_cleanup(void);
+void cmt_test_cleanup(void);
 int get_core_sibling(int cpu_no);
 int measure_cache_vals(struct resctrl_val_param *param, int bm_pid);
+int show_cache_info(unsigned long sum_llc_val, int no_of_bits,
+		    unsigned long cache_span, unsigned long max_diff,
+		    unsigned long max_diff_percent, unsigned long num_of_runs,
+		    bool platform, bool cmt);
 
 #endif /* RESCTRL_H */
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index 425cc85ac883..f51b5fc066a3 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -37,10 +37,10 @@ void detect_amd(void)
 static void cmd_help(void)
 {
 	printf("usage: resctrl_tests [-h] [-b \"benchmark_cmd [options]\"] [-t test list] [-n no_of_bits]\n");
-	printf("\t-b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CQM");
-	printf("\t default benchmark is builtin fill_buf\n");
+	printf("\t-b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CMT\n");
+	printf("\t   default benchmark is builtin fill_buf\n");
 	printf("\t-t test list: run tests specified in the test list, ");
-	printf("e.g. -t mbm, mba, cqm, cat\n");
+	printf("e.g. -t mbm, mba, cmt, cat\n");
 	printf("\t-n no_of_bits: run cache tests using specified no of bits in cache bit mask\n");
 	printf("\t-p cpu_no: specify CPU number to run the test. 1 is default\n");
 	printf("\t-h: help\n");
@@ -50,17 +50,88 @@ void tests_cleanup(void)
 {
 	mbm_test_cleanup();
 	mba_test_cleanup();
-	cqm_test_cleanup();
+	cmt_test_cleanup();
+	cat_test_cleanup();
+}
+
+static void run_mbm_test(bool has_ben, char **benchmark_cmd, int span,
+			 int cpu_no, char *bw_report)
+{
+	int res;
+
+	ksft_print_msg("Starting MBM BW change ...\n");
+
+	if (!validate_resctrl_feature_request(MBM_STR)) {
+		ksft_test_result_skip("Hardware does not support MBM or MBM is disabled\n");
+		return;
+	}
+
+	if (!has_ben)
+		sprintf(benchmark_cmd[5], "%s", MBA_STR);
+	res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd);
+	ksft_test_result(!res, "MBM: bw change\n");
+	mbm_test_cleanup();
+}
+
+static void run_mba_test(bool has_ben, char **benchmark_cmd, int span,
+			 int cpu_no, char *bw_report)
+{
+	int res;
+
+	ksft_print_msg("Starting MBA Schemata change ...\n");
+
+	if (!validate_resctrl_feature_request(MBA_STR)) {
+		ksft_test_result_skip("Hardware does not support MBA or MBA is disabled\n");
+		return;
+	}
+
+	if (!has_ben)
+		sprintf(benchmark_cmd[1], "%d", span);
+	res = mba_schemata_change(cpu_no, bw_report, benchmark_cmd);
+	ksft_test_result(!res, "MBA: schemata change\n");
+	mba_test_cleanup();
+}
+
+static void run_cmt_test(bool has_ben, char **benchmark_cmd, int cpu_no)
+{
+	int res;
+
+	ksft_print_msg("Starting CMT test ...\n");
+	if (!validate_resctrl_feature_request(CMT_STR)) {
+		ksft_test_result_skip("Hardware does not support CMT or CMT is disabled\n");
+		return;
+	}
+
+	if (!has_ben)
+		sprintf(benchmark_cmd[5], "%s", CMT_STR);
+	res = cmt_resctrl_val(cpu_no, 5, benchmark_cmd);
+	ksft_test_result(!res, "CMT: test\n");
+	cmt_test_cleanup();
+}
+
+static void run_cat_test(int cpu_no, int no_of_bits)
+{
+	int res;
+
+	ksft_print_msg("Starting CAT test ...\n");
+
+	if (!validate_resctrl_feature_request(CAT_STR)) {
+		ksft_test_result_skip("Hardware does not support CAT or CAT is disabled\n");
+		return;
+	}
+
+	res = cat_perf_miss_val(cpu_no, no_of_bits, "L3");
+	ksft_test_result(!res, "CAT: test\n");
 	cat_test_cleanup();
 }
 
 int main(int argc, char **argv)
 {
-	bool has_ben = false, mbm_test = true, mba_test = true, cqm_test = true;
-	int res, c, cpu_no = 1, span = 250, argc_new = argc, i, no_of_bits = 5;
+	bool has_ben = false, mbm_test = true, mba_test = true, cmt_test = true;
+	int c, cpu_no = 1, span = 250, argc_new = argc, i, no_of_bits = 0;
 	char *benchmark_cmd[BENCHMARK_ARGS], bw_report[64], bm_type[64];
 	char benchmark_cmd_area[BENCHMARK_ARGS][BENCHMARK_ARG_SIZE];
-	int ben_ind, ben_count;
+	int ben_ind, ben_count, tests = 0;
 	bool cat_test = true;
 
 	for (i = 0; i < argc; i++) {
@@ -73,7 +144,7 @@ int main(int argc, char **argv)
 		}
 	}
 
-	while ((c = getopt(argc_new, argv, "ht:b:")) != -1) {
+	while ((c = getopt(argc_new, argv, "ht:b:n:p:")) != -1) {
 		char *token;
 
 		switch (c) {
@@ -82,17 +153,21 @@ int main(int argc, char **argv)
 
 			mbm_test = false;
 			mba_test = false;
-			cqm_test = false;
+			cmt_test = false;
 			cat_test = false;
 			while (token) {
-				if (!strcmp(token, "mbm")) {
+				if (!strncmp(token, MBM_STR, sizeof(MBM_STR))) {
 					mbm_test = true;
-				} else if (!strcmp(token, "mba")) {
+					tests++;
+				} else if (!strncmp(token, MBA_STR, sizeof(MBA_STR))) {
 					mba_test = true;
-				} else if (!strcmp(token, "cqm")) {
-					cqm_test = true;
-				} else if (!strcmp(token, "cat")) {
+					tests++;
+				} else if (!strncmp(token, CMT_STR, sizeof(CMT_STR))) {
+					cmt_test = true;
+					tests++;
+				} else if (!strncmp(token, CAT_STR, sizeof(CAT_STR))) {
 					cat_test = true;
+					tests++;
 				} else {
 					printf("invalid argument\n");
 
@@ -106,6 +181,10 @@ int main(int argc, char **argv)
 			break;
 		case 'n':
 			no_of_bits = atoi(optarg);
+			if (no_of_bits <= 0) {
+				printf("Bail out! invalid argument for no_of_bits\n");
+				return -1;
+			}
 			break;
 		case 'h':
 			cmd_help();
@@ -118,7 +197,7 @@ int main(int argc, char **argv)
 		}
 	}
 
-	printf("TAP version 13\n");
+	ksft_print_header();
 
 	/*
 	 * Typically we need root privileges, because:
@@ -126,7 +205,7 @@ int main(int argc, char **argv)
 	 * 2. We execute perf commands
 	 */
 	if (geteuid() != 0)
-		printf("# WARNING: not running as root, tests may fail.\n");
+		return ksft_exit_fail_msg("Not running as root, abort testing.\n");
 
 	/* Detect AMD vendor */
 	detect_amd();
@@ -155,48 +234,26 @@ int main(int argc, char **argv)
 	sprintf(bw_report, "reads");
 	sprintf(bm_type, "fill_buf");
 
-	check_resctrlfs_support();
+	if (!check_resctrlfs_support())
+		return ksft_exit_fail_msg("resctrl FS does not exist\n");
+
 	filter_dmesg();
 
-	if (!is_amd && mbm_test) {
-		printf("# Starting MBM BW change ...\n");
-		if (!has_ben)
-			sprintf(benchmark_cmd[5], "%s", "mba");
-		res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd);
-		printf("%sok MBM: bw change\n", res ? "not " : "");
-		mbm_test_cleanup();
-		tests_run++;
-	}
+	ksft_set_plan(tests ? : 4);
 
-	if (!is_amd && mba_test) {
-		printf("# Starting MBA Schemata change ...\n");
-		if (!has_ben)
-			sprintf(benchmark_cmd[1], "%d", span);
-		res = mba_schemata_change(cpu_no, bw_report, benchmark_cmd);
-		printf("%sok MBA: schemata change\n", res ? "not " : "");
-		mba_test_cleanup();
-		tests_run++;
-	}
+	if (!is_amd && mbm_test)
+		run_mbm_test(has_ben, benchmark_cmd, span, cpu_no, bw_report);
 
-	if (cqm_test) {
-		printf("# Starting CQM test ...\n");
-		if (!has_ben)
-			sprintf(benchmark_cmd[5], "%s", "cqm");
-		res = cqm_resctrl_val(cpu_no, no_of_bits, benchmark_cmd);
-		printf("%sok CQM: test\n", res ? "not " : "");
-		cqm_test_cleanup();
-		tests_run++;
-	}
+	if (!is_amd && mba_test)
+		run_mba_test(has_ben, benchmark_cmd, span, cpu_no, bw_report);
 
-	if (cat_test) {
-		printf("# Starting CAT test ...\n");
-		res = cat_perf_miss_val(cpu_no, no_of_bits, "L3");
-		printf("%sok CAT: test\n", res ? "not " : "");
-		tests_run++;
-		cat_test_cleanup();
-	}
+	if (cmt_test)
+		run_cmt_test(has_ben, benchmark_cmd, cpu_no);
+
+	if (cat_test)
+		run_cat_test(cpu_no, no_of_bits);
 
-	printf("1..%d\n", tests_run);
+	umount_resctrlfs();
 
-	return 0;
+	return ksft_exit_pass();
 }
diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
index 520fea3606d1..95224345c78e 100644
--- a/tools/testing/selftests/resctrl/resctrl_val.c
+++ b/tools/testing/selftests/resctrl/resctrl_val.c
@@ -221,8 +221,8 @@ static int read_from_imc_dir(char *imc_dir, int count)
  */
 static int num_of_imcs(void)
 {
+	char imc_dir[512], *temp;
 	unsigned int count = 0;
-	char imc_dir[512];
 	struct dirent *ep;
 	int ret;
 	DIR *dp;
@@ -230,7 +230,25 @@ static int num_of_imcs(void)
 	dp = opendir(DYN_PMU_PATH);
 	if (dp) {
 		while ((ep = readdir(dp))) {
-			if (strstr(ep->d_name, UNCORE_IMC)) {
+			temp = strstr(ep->d_name, UNCORE_IMC);
+			if (!temp)
+				continue;
+
+			/*
+			 * imc counters are named as "uncore_imc_<n>", hence
+			 * increment the pointer to point to <n>. Note that
+			 * sizeof(UNCORE_IMC) would count for null character as
+			 * well and hence the last underscore character in
+			 * uncore_imc'_' need not be counted.
+			 */
+			temp = temp + sizeof(UNCORE_IMC);
+
+			/*
+			 * Some directories under "DYN_PMU_PATH" could have
+			 * names like "uncore_imc_free_running", hence, check if
+			 * first character is a numerical digit or not.
+			 */
+			if (temp[0] >= '0' && temp[0] <= '9') {
 				sprintf(imc_dir, "%s/%s/", DYN_PMU_PATH,
 					ep->d_name);
 				ret = read_from_imc_dir(imc_dir, count);
@@ -282,9 +300,9 @@ static int initialize_mem_bw_imc(void)
  * Memory B/W utilized by a process on a socket can be calculated using
  * iMC counters. Perf events are used to read these counters.
  *
- * Return: >= 0 on success. < 0 on failure.
+ * Return: = 0 on success. < 0 on failure.
  */
-static float get_mem_bw_imc(int cpu_no, char *bw_report)
+static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
 {
 	float reads, writes, of_mul_read, of_mul_write;
 	int imc, j, ret;
@@ -355,13 +373,18 @@ static float get_mem_bw_imc(int cpu_no, char *bw_report)
 		close(imc_counters_config[imc][WRITE].fd);
 	}
 
-	if (strcmp(bw_report, "reads") == 0)
-		return reads;
+	if (strcmp(bw_report, "reads") == 0) {
+		*bw_imc = reads;
+		return 0;
+	}
 
-	if (strcmp(bw_report, "writes") == 0)
-		return writes;
+	if (strcmp(bw_report, "writes") == 0) {
+		*bw_imc = writes;
+		return 0;
+	}
 
-	return (reads + writes);
+	*bw_imc = reads + writes;
+	return 0;
 }
 
 void set_mbm_path(const char *ctrlgrp, const char *mongrp, int resource_id)
@@ -397,10 +420,10 @@ static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp,
 		return;
 	}
 
-	if (strcmp(resctrl_val, "mbm") == 0)
+	if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)))
 		set_mbm_path(ctrlgrp, mongrp, resource_id);
 
-	if ((strcmp(resctrl_val, "mba") == 0)) {
+	if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
 		if (ctrlgrp)
 			sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH,
 				RESCTRL_PATH, ctrlgrp, resource_id);
@@ -420,9 +443,8 @@ static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp,
  * 1. If con_mon grp is given, then read from it
  * 2. If con_mon grp is not given, then read from root con_mon grp
  */
-static unsigned long get_mem_bw_resctrl(void)
+static int get_mem_bw_resctrl(unsigned long *mbm_total)
 {
-	unsigned long mbm_total = 0;
 	FILE *fp;
 
 	fp = fopen(mbm_total_path, "r");
@@ -431,7 +453,7 @@ static unsigned long get_mem_bw_resctrl(void)
 
 		return -1;
 	}
-	if (fscanf(fp, "%lu", &mbm_total) <= 0) {
+	if (fscanf(fp, "%lu", mbm_total) <= 0) {
 		perror("Could not get mbm local bytes");
 		fclose(fp);
 
@@ -439,7 +461,7 @@ static unsigned long get_mem_bw_resctrl(void)
 	}
 	fclose(fp);
 
-	return mbm_total;
+	return 0;
 }
 
 pid_t bm_pid, ppid;
@@ -449,7 +471,7 @@ void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
 	kill(bm_pid, SIGKILL);
 	umount_resctrlfs();
 	tests_cleanup();
-	printf("Ending\n\n");
+	ksft_print_msg("Ending\n\n");
 
 	exit(EXIT_SUCCESS);
 }
@@ -492,7 +514,7 @@ static int print_results_bw(char *filename,  int bm_pid, float bw_imc,
 	return 0;
 }
 
-static void set_cqm_path(const char *ctrlgrp, const char *mongrp, char sock_num)
+static void set_cmt_path(const char *ctrlgrp, const char *mongrp, char sock_num)
 {
 	if (strlen(ctrlgrp) && strlen(mongrp))
 		sprintf(llc_occup_path,	CON_MON_LCC_OCCUP_PATH,	RESCTRL_PATH,
@@ -512,7 +534,7 @@ static void set_cqm_path(const char *ctrlgrp, const char *mongrp, char sock_num)
  * @ctrlgrp:			Name of the control monitor group (con_mon grp)
  * @mongrp:			Name of the monitor group (mon grp)
  * @cpu_no:			CPU number that the benchmark PID is binded to
- * @resctrl_val:		Resctrl feature (Eg: cat, cqm.. etc)
+ * @resctrl_val:		Resctrl feature (Eg: cat, cmt.. etc)
  */
 static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp,
 					int cpu_no, char *resctrl_val)
@@ -524,14 +546,15 @@ static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp,
 		return;
 	}
 
-	if (strcmp(resctrl_val, "cqm") == 0)
-		set_cqm_path(ctrlgrp, mongrp, resource_id);
+	if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
+		set_cmt_path(ctrlgrp, mongrp, resource_id);
 }
 
 static int
 measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start)
 {
-	unsigned long bw_imc, bw_resc, bw_resc_end;
+	unsigned long bw_resc, bw_resc_end;
+	float bw_imc;
 	int ret;
 
 	/*
@@ -541,13 +564,13 @@ measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start)
 	 * Compare the two values to validate resctrl value.
 	 * It takes 1sec to measure the data.
 	 */
-	bw_imc = get_mem_bw_imc(param->cpu_no, param->bw_report);
-	if (bw_imc <= 0)
-		return bw_imc;
+	ret = get_mem_bw_imc(param->cpu_no, param->bw_report, &bw_imc);
+	if (ret < 0)
+		return ret;
 
-	bw_resc_end = get_mem_bw_resctrl();
-	if (bw_resc_end <= 0)
-		return bw_resc_end;
+	ret = get_mem_bw_resctrl(&bw_resc_end);
+	if (ret < 0)
+		return ret;
 
 	bw_resc = (bw_resc_end - *bw_resc_start) / MB;
 	ret = print_results_bw(param->filename, bm_pid, bw_imc, bw_resc);
@@ -579,8 +602,8 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
 	if (strcmp(param->filename, "") == 0)
 		sprintf(param->filename, "stdio");
 
-	if ((strcmp(resctrl_val, "mba")) == 0 ||
-	    (strcmp(resctrl_val, "mbm")) == 0) {
+	if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) ||
+	    !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) {
 		ret = validate_bw_report_request(param->bw_report);
 		if (ret)
 			return ret;
@@ -645,7 +668,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
 		PARENT_EXIT("Child is done");
 	}
 
-	printf("# benchmark PID: %d\n", bm_pid);
+	ksft_print_msg("Benchmark PID: %d\n", bm_pid);
 
 	/*
 	 * Register CTRL-C handler for parent, as it has to kill benchmark
@@ -674,15 +697,15 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
 	if (ret)
 		goto out;
 
-	if ((strcmp(resctrl_val, "mbm") == 0) ||
-	    (strcmp(resctrl_val, "mba") == 0)) {
+	if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
+	    !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
 		ret = initialize_mem_bw_imc();
 		if (ret)
 			goto out;
 
 		initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp,
 					  param->cpu_no, resctrl_val);
-	} else if (strcmp(resctrl_val, "cqm") == 0)
+	} else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
 		initialize_llc_occu_resctrl(param->ctrlgrp, param->mongrp,
 					    param->cpu_no, resctrl_val);
 
@@ -710,8 +733,8 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
 
 	/* Test runs until the callback setup() tells the test to stop. */
 	while (1) {
-		if ((strcmp(resctrl_val, "mbm") == 0) ||
-		    (strcmp(resctrl_val, "mba") == 0)) {
+		if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
+		    !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
 			ret = param->setup(1, param);
 			if (ret) {
 				ret = 0;
@@ -721,7 +744,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
 			ret = measure_vals(param, &bw_resc_start);
 			if (ret)
 				break;
-		} else if (strcmp(resctrl_val, "cqm") == 0) {
+		} else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
 			ret = param->setup(1, param);
 			if (ret) {
 				ret = 0;
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index 19c0ec4045a4..5f5a166ade60 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -10,8 +10,6 @@
  */
 #include "resctrl.h"
 
-int tests_run;
-
 static int find_resctrl_mount(char *buffer)
 {
 	FILE *mounts;
@@ -49,8 +47,6 @@ static int find_resctrl_mount(char *buffer)
 	return -ENOENT;
 }
 
-char cbm_mask[256];
-
 /*
  * remount_resctrlfs - Remount resctrl FS at /sys/fs/resctrl
  * @mum_resctrlfs:	Should the resctrl FS be remounted?
@@ -70,28 +66,25 @@ int remount_resctrlfs(bool mum_resctrlfs)
 	if (ret)
 		strcpy(mountpoint, RESCTRL_PATH);
 
-	if (!ret && mum_resctrlfs && umount(mountpoint)) {
-		printf("not ok unmounting \"%s\"\n", mountpoint);
-		perror("# umount");
-		tests_run++;
-	}
+	if (!ret && mum_resctrlfs && umount(mountpoint))
+		ksft_print_msg("Fail: unmounting \"%s\"\n", mountpoint);
 
 	if (!ret && !mum_resctrlfs)
 		return 0;
 
+	ksft_print_msg("Mounting resctrl to \"%s\"\n", RESCTRL_PATH);
 	ret = mount("resctrl", RESCTRL_PATH, "resctrl", 0, NULL);
-	printf("%sok mounting resctrl to \"%s\"\n", ret ? "not " : "",
-	       RESCTRL_PATH);
 	if (ret)
 		perror("# mount");
 
-	tests_run++;
-
 	return ret;
 }
 
 int umount_resctrlfs(void)
 {
+	if (find_resctrl_mount(NULL))
+		return 0;
+
 	if (umount(RESCTRL_PATH)) {
 		perror("# Unable to umount resctrl");
 
@@ -205,16 +198,18 @@ int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size)
 /*
  * get_cbm_mask - Get cbm mask for given cache
  * @cache_type:	Cache level L2/L3
- *
- * Mask is stored in cbm_mask which is global variable.
+ * @cbm_mask:	cbm_mask returned as a string
  *
  * Return: = 0 on success, < 0 on failure.
  */
-int get_cbm_mask(char *cache_type)
+int get_cbm_mask(char *cache_type, char *cbm_mask)
 {
 	char cbm_mask_path[1024];
 	FILE *fp;
 
+	if (!cbm_mask)
+		return -1;
+
 	sprintf(cbm_mask_path, "%s/%s/cbm_mask", CBM_MASK_PATH, cache_type);
 
 	fp = fopen(cbm_mask_path, "r");
@@ -268,7 +263,7 @@ int get_core_sibling(int cpu_no)
 	while (token) {
 		sibling_cpu_no = atoi(token);
 		/* Skipping core 0 as we don't want to run test on core 0 */
-		if (sibling_cpu_no != 0)
+		if (sibling_cpu_no != 0 && sibling_cpu_no != cpu_no)
 			break;
 		token = strtok(NULL, "-,");
 	}
@@ -334,7 +329,7 @@ void run_benchmark(int signum, siginfo_t *info, void *ucontext)
 		operation = atoi(benchmark_cmd[4]);
 		sprintf(resctrl_val, "%s", benchmark_cmd[5]);
 
-		if (strcmp(resctrl_val, "cqm") != 0)
+		if (strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
 			buffer_span = span * MB;
 		else
 			buffer_span = span;
@@ -458,9 +453,9 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
 	if (ret)
 		goto out;
 
-	/* Create mon grp and write pid into it for "mbm" and "cqm" test */
-	if ((strcmp(resctrl_val, "cqm") == 0) ||
-	    (strcmp(resctrl_val, "mbm") == 0)) {
+	/* Create mon grp and write pid into it for "mbm" and "cmt" test */
+	if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)) ||
+	    !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) {
 		if (strlen(mongrp)) {
 			sprintf(monitorgroup_p, "%s/mon_groups", controlgroup);
 			sprintf(monitorgroup, "%s/%s", monitorgroup_p, mongrp);
@@ -477,13 +472,10 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
 	}
 
 out:
-	printf("%sok writing benchmark parameters to resctrl FS\n",
-	       ret ? "not " : "");
+	ksft_print_msg("Writing benchmark parameters to resctrl FS\n");
 	if (ret)
 		perror("# writing to resctrlfs");
 
-	tests_run++;
-
 	return ret;
 }
 
@@ -505,13 +497,13 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val)
 	int resource_id, ret = 0;
 	FILE *fp;
 
-	if ((strcmp(resctrl_val, "mba") != 0) &&
-	    (strcmp(resctrl_val, "cat") != 0) &&
-	    (strcmp(resctrl_val, "cqm") != 0))
+	if (strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) &&
+	    strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)) &&
+	    strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
 		return -ENOENT;
 
 	if (!schemata) {
-		printf("# Skipping empty schemata update\n");
+		ksft_print_msg("Skipping empty schemata update\n");
 
 		return -1;
 	}
@@ -528,9 +520,10 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val)
 	else
 		sprintf(controlgroup, "%s/schemata", RESCTRL_PATH);
 
-	if (!strcmp(resctrl_val, "cat") || !strcmp(resctrl_val, "cqm"))
+	if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)) ||
+	    !strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
 		sprintf(schema, "%s%d%c%s", "L3:", resource_id, '=', schemata);
-	if (strcmp(resctrl_val, "mba") == 0)
+	if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)))
 		sprintf(schema, "%s%d%c%s", "MB:", resource_id, '=', schemata);
 
 	fp = fopen(controlgroup, "w");
@@ -551,10 +544,9 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val)
 	fclose(fp);
 
 out:
-	printf("%sok Write schema \"%s\" to resctrl FS%s%s\n",
-	       ret ? "not " : "", schema, ret ? " # " : "",
-	       ret ? reason : "");
-	tests_run++;
+	ksft_print_msg("Write schema \"%s\" to resctrl FS%s%s\n",
+		       schema, ret ? " # " : "",
+		       ret ? reason : "");
 
 	return ret;
 }
@@ -578,18 +570,20 @@ bool check_resctrlfs_support(void)
 
 	fclose(inf);
 
-	printf("%sok kernel supports resctrl filesystem\n", ret ? "" : "not ");
-	tests_run++;
+	ksft_print_msg("%s Check kernel supports resctrl filesystem\n",
+		       ret ? "Pass:" : "Fail:");
+
+	if (!ret)
+		return ret;
 
 	dp = opendir(RESCTRL_PATH);
-	printf("%sok resctrl mountpoint \"%s\" exists\n",
-	       dp ? "" : "not ", RESCTRL_PATH);
+	ksft_print_msg("%s Check resctrl mountpoint \"%s\" exists\n",
+		       dp ? "Pass:" : "Fail:", RESCTRL_PATH);
 	if (dp)
 		closedir(dp);
-	tests_run++;
 
-	printf("# resctrl filesystem %s mounted\n",
-	       find_resctrl_mount(NULL) ? "not" : "is");
+	ksft_print_msg("resctrl filesystem %s mounted\n",
+		       find_resctrl_mount(NULL) ? "not" : "is");
 
 	return ret;
 }
@@ -615,26 +609,56 @@ char *fgrep(FILE *inf, const char *str)
  * validate_resctrl_feature_request - Check if requested feature is valid.
  * @resctrl_val:	Requested feature
  *
- * Return: 0 on success, non-zero on failure
+ * Return: True if the feature is supported, else false
  */
-bool validate_resctrl_feature_request(char *resctrl_val)
+bool validate_resctrl_feature_request(const char *resctrl_val)
 {
-	FILE *inf = fopen("/proc/cpuinfo", "r");
+	struct stat statbuf;
 	bool found = false;
 	char *res;
+	FILE *inf;
 
-	if (!inf)
+	if (!resctrl_val)
 		return false;
 
-	res = fgrep(inf, "flags");
-
-	if (res) {
-		char *s = strchr(res, ':');
+	if (remount_resctrlfs(false))
+		return false;
 
-		found = s && !strstr(s, resctrl_val);
-		free(res);
+	if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) {
+		if (!stat(L3_PATH, &statbuf))
+			return true;
+	} else if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
+		if (!stat(MB_PATH, &statbuf))
+			return true;
+	} else if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
+		   !strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
+		if (!stat(L3_MON_PATH, &statbuf)) {
+			inf = fopen(L3_MON_FEATURES_PATH, "r");
+			if (!inf)
+				return false;
+
+			if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
+				res = fgrep(inf, "llc_occupancy");
+				if (res) {
+					found = true;
+					free(res);
+				}
+			}
+
+			if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) {
+				res = fgrep(inf, "mbm_total_bytes");
+				if (res) {
+					free(res);
+					res = fgrep(inf, "mbm_local_bytes");
+					if (res) {
+						found = true;
+						free(res);
+					}
+				}
+			}
+			fclose(inf);
+		}
 	}
-	fclose(inf);
 
 	return found;
 }
@@ -671,9 +695,9 @@ int filter_dmesg(void)
 
 	while (fgets(line, 1024, fp)) {
 		if (strstr(line, "intel_rdt:"))
-			printf("# dmesg: %s", line);
+			ksft_print_msg("dmesg: %s", line);
 		if (strstr(line, "resctrl:"))
-			printf("# dmesg: %s", line);
+			ksft_print_msg("dmesg: %s", line);
 	}
 	fclose(fp);
 	waitpid(pid, NULL, 0);
diff --git a/tools/testing/selftests/sgx/defines.h b/tools/testing/selftests/sgx/defines.h
index 592c1ccf4576..0bd73428d2f3 100644
--- a/tools/testing/selftests/sgx/defines.h
+++ b/tools/testing/selftests/sgx/defines.h
@@ -14,7 +14,7 @@
 #define __aligned(x) __attribute__((__aligned__(x)))
 #define __packed __attribute__((packed))
 
-#include "../../../../arch/x86/kernel/cpu/sgx/arch.h"
+#include "../../../../arch/x86/include/asm/sgx.h"
 #include "../../../../arch/x86/include/asm/enclu.h"
 #include "../../../../arch/x86/include/uapi/asm/sgx.h"
 
diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
index 9d43b75aaa55..f441ac34b4d4 100644
--- a/tools/testing/selftests/sgx/load.c
+++ b/tools/testing/selftests/sgx/load.c
@@ -45,19 +45,19 @@ static bool encl_map_bin(const char *path, struct encl *encl)
 
 	fd = open(path, O_RDONLY);
 	if (fd == -1)  {
-		perror("open()");
+		perror("enclave executable open()");
 		return false;
 	}
 
 	ret = stat(path, &sb);
 	if (ret) {
-		perror("stat()");
+		perror("enclave executable stat()");
 		goto err;
 	}
 
 	bin = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
 	if (bin == MAP_FAILED) {
-		perror("mmap()");
+		perror("enclave executable mmap()");
 		goto err;
 	}
 
@@ -90,8 +90,7 @@ static bool encl_ioc_create(struct encl *encl)
 	ioc.src = (unsigned long)secs;
 	rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_CREATE, &ioc);
 	if (rc) {
-		fprintf(stderr, "SGX_IOC_ENCLAVE_CREATE failed: errno=%d\n",
-			errno);
+		perror("SGX_IOC_ENCLAVE_CREATE failed");
 		munmap((void *)secs->base, encl->encl_size);
 		return false;
 	}
@@ -116,31 +115,72 @@ static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg)
 
 	rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_ADD_PAGES, &ioc);
 	if (rc < 0) {
-		fprintf(stderr, "SGX_IOC_ENCLAVE_ADD_PAGES failed: errno=%d.\n",
-			errno);
+		perror("SGX_IOC_ENCLAVE_ADD_PAGES failed");
 		return false;
 	}
 
 	return true;
 }
 
+
+
 bool encl_load(const char *path, struct encl *encl)
 {
+	const char device_path[] = "/dev/sgx_enclave";
 	Elf64_Phdr *phdr_tbl;
 	off_t src_offset;
 	Elf64_Ehdr *ehdr;
+	struct stat sb;
+	void *ptr;
 	int i, j;
 	int ret;
+	int fd = -1;
 
 	memset(encl, 0, sizeof(*encl));
 
-	ret = open("/dev/sgx_enclave", O_RDWR);
-	if (ret < 0) {
-		fprintf(stderr, "Unable to open /dev/sgx_enclave\n");
+	fd = open(device_path, O_RDWR);
+	if (fd < 0) {
+		perror("Unable to open /dev/sgx_enclave");
+		goto err;
+	}
+
+	ret = stat(device_path, &sb);
+	if (ret) {
+		perror("device file stat()");
+		goto err;
+	}
+
+	/*
+	 * This just checks if the /dev file has these permission
+	 * bits set.  It does not check that the current user is
+	 * the owner or in the owning group.
+	 */
+	if (!(sb.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) {
+		fprintf(stderr, "no execute permissions on device file %s\n", device_path);
+		goto err;
+	}
+
+	ptr = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0);
+	if (ptr == (void *)-1) {
+		perror("mmap for read");
+		goto err;
+	}
+	munmap(ptr, PAGE_SIZE);
+
+#define ERR_MSG \
+"mmap() succeeded for PROT_READ, but failed for PROT_EXEC.\n" \
+" Check that current user has execute permissions on %s and \n" \
+" that /dev does not have noexec set: mount | grep \"/dev .*noexec\"\n" \
+" If so, remount it executable: mount -o remount,exec /dev\n\n"
+
+	ptr = mmap(NULL, PAGE_SIZE, PROT_EXEC, MAP_SHARED, fd, 0);
+	if (ptr == (void *)-1) {
+		fprintf(stderr, ERR_MSG, device_path);
 		goto err;
 	}
+	munmap(ptr, PAGE_SIZE);
 
-	encl->fd = ret;
+	encl->fd = fd;
 
 	if (!encl_map_bin(path, encl))
 		goto err;
@@ -217,6 +257,8 @@ bool encl_load(const char *path, struct encl *encl)
 	return true;
 
 err:
+	if (fd != -1)
+		close(fd);
 	encl_delete(encl);
 	return false;
 }
@@ -229,7 +271,7 @@ static bool encl_map_area(struct encl *encl)
 	area = mmap(NULL, encl_size * 2, PROT_NONE,
 		    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 	if (area == MAP_FAILED) {
-		perror("mmap");
+		perror("reservation mmap()");
 		return false;
 	}
 
@@ -268,8 +310,7 @@ bool encl_build(struct encl *encl)
 	ioc.sigstruct = (uint64_t)&encl->sigstruct;
 	ret = ioctl(encl->fd, SGX_IOC_ENCLAVE_INIT, &ioc);
 	if (ret) {
-		fprintf(stderr, "SGX_IOC_ENCLAVE_INIT failed: errno=%d\n",
-			errno);
+		perror("SGX_IOC_ENCLAVE_INIT failed");
 		return false;
 	}
 
diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
index 724cec700926..d304a4044eb9 100644
--- a/tools/testing/selftests/sgx/main.c
+++ b/tools/testing/selftests/sgx/main.c
@@ -15,6 +15,7 @@
 #include <sys/stat.h>
 #include <sys/time.h>
 #include <sys/types.h>
+#include <sys/auxv.h>
 #include "defines.h"
 #include "main.h"
 #include "../kselftest.h"
@@ -28,24 +29,6 @@ struct vdso_symtab {
 	Elf64_Word *elf_hashtab;
 };
 
-static void *vdso_get_base_addr(char *envp[])
-{
-	Elf64_auxv_t *auxv;
-	int i;
-
-	for (i = 0; envp[i]; i++)
-		;
-
-	auxv = (Elf64_auxv_t *)&envp[i + 1];
-
-	for (i = 0; auxv[i].a_type != AT_NULL; i++) {
-		if (auxv[i].a_type == AT_SYSINFO_EHDR)
-			return (void *)auxv[i].a_un.a_val;
-	}
-
-	return NULL;
-}
-
 static Elf64_Dyn *vdso_get_dyntab(void *addr)
 {
 	Elf64_Ehdr *ehdr = addr;
@@ -162,7 +145,7 @@ static int user_handler(long rdi, long rsi, long rdx, long ursp, long r8, long r
 	return 0;
 }
 
-int main(int argc, char *argv[], char *envp[])
+int main(int argc, char *argv[])
 {
 	struct sgx_enclave_run run;
 	struct vdso_symtab symtab;
@@ -195,7 +178,7 @@ int main(int argc, char *argv[], char *envp[])
 		addr = mmap((void *)encl.encl_base + seg->offset, seg->size,
 			    seg->prot, MAP_SHARED | MAP_FIXED, encl.fd, 0);
 		if (addr == MAP_FAILED) {
-			fprintf(stderr, "mmap() failed, errno=%d.\n", errno);
+			perror("mmap() segment failed");
 			exit(KSFT_FAIL);
 		}
 	}
@@ -203,7 +186,8 @@ int main(int argc, char *argv[], char *envp[])
 	memset(&run, 0, sizeof(run));
 	run.tcs = encl.encl_base;
 
-	addr = vdso_get_base_addr(envp);
+	/* Get vDSO base address */
+	addr = (void *)getauxval(AT_SYSINFO_EHDR);
 	if (!addr)
 		goto err;
 
diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c
index bfc974b4572d..ef8eb3604595 100644
--- a/tools/testing/selftests/timers/clocksource-switch.c
+++ b/tools/testing/selftests/timers/clocksource-switch.c
@@ -3,7 +3,7 @@
  *		(C) Copyright IBM 2012
  *		Licensed under the GPLv2
  *
- *  NOTE: This is a meta-test which quickly changes the clocksourc and
+ *  NOTE: This is a meta-test which quickly changes the clocksource and
  *  then uses other tests to detect problems. Thus this test requires
  *  that the inconsistency-check and nanosleep tests be present in the
  *  same directory it is run from.
@@ -134,7 +134,7 @@ int main(int argv, char **argc)
 		return -1;
 	}
 
-	/* Check everything is sane before we start switching asyncrhonously */
+	/* Check everything is sane before we start switching asynchronously */
 	for (i = 0; i < count; i++) {
 		printf("Validating clocksource %s\n", clocksource_list[i]);
 		if (change_clocksource(clocksource_list[i])) {
diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c
index 19e46ed5dfb5..23eb398c8140 100644
--- a/tools/testing/selftests/timers/leap-a-day.c
+++ b/tools/testing/selftests/timers/leap-a-day.c
@@ -5,7 +5,7 @@
  *              Licensed under the GPLv2
  *
  *  This test signals the kernel to insert a leap second
- *  every day at midnight GMT. This allows for stessing the
+ *  every day at midnight GMT. This allows for stressing the
  *  kernel's leap-second behavior, as well as how well applications
  *  handle the leap-second discontinuity.
  *
diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c
index dc80728ed191..f70802c5dd0d 100644
--- a/tools/testing/selftests/timers/leapcrash.c
+++ b/tools/testing/selftests/timers/leapcrash.c
@@ -4,10 +4,10 @@
  *              (C) Copyright 2013, 2015 Linaro Limited
  *              Licensed under the GPL
  *
- * This test demonstrates leapsecond deadlock that is possibe
+ * This test demonstrates leapsecond deadlock that is possible
  * on kernels from 2.6.26 to 3.3.
  *
- * WARNING: THIS WILL LIKELY HARDHANG SYSTEMS AND MAY LOSE DATA
+ * WARNING: THIS WILL LIKELY HARD HANG SYSTEMS AND MAY LOSE DATA
  * RUN AT YOUR OWN RISK!
  *  To build:
  *	$ gcc leapcrash.c -o leapcrash -lrt
diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c
index cf3e48919874..80aed4bf06fb 100644
--- a/tools/testing/selftests/timers/threadtest.c
+++ b/tools/testing/selftests/timers/threadtest.c
@@ -76,7 +76,7 @@ void checklist(struct timespec *list, int size)
 
 /* The shared thread shares a global list
  * that each thread fills while holding the lock.
- * This stresses clock syncronization across cpus.
+ * This stresses clock synchronization across cpus.
  */
 void *shared_thread(void *arg)
 {
diff --git a/tools/testing/selftests/x86/thunks_32.S b/tools/testing/selftests/x86/thunks_32.S
index a71d92da8f46..f3f56e681e9f 100644
--- a/tools/testing/selftests/x86/thunks_32.S
+++ b/tools/testing/selftests/x86/thunks_32.S
@@ -45,3 +45,5 @@ call64_from_32:
 	ret
 
 .size call64_from_32, .-call64_from_32
+
+.section .note.GNU-stack,"",%progbits
diff --git a/tools/usb/usbip/doc/usbip.8 b/tools/usb/usbip/doc/usbip.8
index a15d20063b98..1f26e4a00638 100644
--- a/tools/usb/usbip/doc/usbip.8
+++ b/tools/usb/usbip/doc/usbip.8
@@ -50,9 +50,16 @@ Attach a remote USB device.
 .PP
 
 .HP
+\fBattach\fR \-\-remote=<\fIhost\fR> \-\-device=<\fIdev_id\fR>
+.IP
+Attach a remote USB gadget.
+Only used when the remote usbipd is in device mode.
+.PP
+
+.HP
 \fBdetach\fR \-\-port=<\fIport\fR>
 .IP
-Detach an imported USB device.
+Detach an imported USB device/gadget.
 .PP
 
 .HP
@@ -74,11 +81,25 @@ List USB devices exported by a remote host.
 .PP
 
 .HP
+\fBlist\fR \-\-device
+.IP
+List USB gadgets of local usbip-vudc.
+Only used when the local usbipd is in device mode.
+Note that this can not list usbip-vudc USB gadgets of the remote device mode usbipd.
+.PP
+
+.HP
 \fBlist\fR \-\-local
 .IP
 List local USB devices.
 .PP
 
+.HP
+\fBport\fR
+.IP
+List imported devices/gadgets.
+.PP
+
 
 .SH EXAMPLES
 
@@ -90,8 +111,27 @@ List local USB devices.
     client:# usbip attach --remote=server --busid=1-2
         - Connect the remote USB device.
 
+    client:# usbip port
+        - List imported devices/gadgets.
+
     client:# usbip detach --port=0
         - Detach the usb device.
 
+The following example shows the usage of device mode
+
+    server:# usbip list --device
+        - List gadgets exported by local usbipd server.
+
+    client:# modprobe vhci-hcd
+
+    client:# usbip attach --remote=server --device=usbip-vudc.0
+        - Connect the remote USB gadget.
+
+    client:# usbip port
+        - List imported devices/gadgets.
+
+    client:# usbip detach --port=0
+        - Detach the usb gadget.
+
 .SH "SEE ALSO"
 \fBusbipd\fP\fB(8)\fB\fP
diff --git a/tools/usb/usbip/doc/usbipd.8 b/tools/usb/usbip/doc/usbipd.8
index fb62a756893b..d974394f86a1 100644
--- a/tools/usb/usbip/doc/usbipd.8
+++ b/tools/usb/usbip/doc/usbipd.8
@@ -30,6 +30,12 @@ Bind to IPv6. Default is both.
 .PP
 
 .HP
+\fB\-e\fR, \fB\-\-device\fR
+.IP
+Run in device mode. Rather than drive an attached device, create a virtual UDC to bind gadgets to.
+.PP
+
+.HP
 \fB\-D\fR, \fB\-\-daemon\fR
 .IP
 Run as a daemon process.
@@ -86,6 +92,26 @@ USB/IP client can connect and use exported devices.
         - A usb device 1-2 is now exportable to other hosts!
         - Use 'usbip unbind --busid=1-2' when you want to shutdown exporting and use the device locally.
 
+The following example shows the usage of device mode
+
+    server:# modprobe usbip-vudc
+        - Use /sys/class/udc/ interface.
+        - usbip-host is independent of this module.
+
+    server:# usbipd -e -D
+        - Start usbip daemon in device mode.
+
+    server:# modprobe g_mass_storage file=/tmp/tmp.img
+        - Bind a gadget to usbip-vudc.
+        - in this example, a mass storage gadget is bound.
+
+    server:# usbip list --device
+        - List gadgets exported by local usbipd server.
+
+    server:# modprobe -r g_mass_storage
+        - Unbind a gadget from usbip-vudc.
+        - in this example, the previous mass storage gadget is unbound.
+
 .SH "SEE ALSO"
 \fBusbip\fP\fB(8)\fB\fP
 
diff --git a/tools/usb/usbip/libsrc/list.h b/tools/usb/usbip/libsrc/list.h
index a941671e4900..9cca2425587b 100644
--- a/tools/usb/usbip/libsrc/list.h
+++ b/tools/usb/usbip/libsrc/list.h
@@ -77,17 +77,17 @@ static inline void __list_del(struct list_head * prev, struct list_head * next)
 #define LIST_POISON1  ((void *) 0x00100100 + POISON_POINTER_DELTA)
 #define LIST_POISON2  ((void *) 0x00200200 + POISON_POINTER_DELTA)
 
+static inline void __list_del_entry(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+}
+
 /**
  * list_del - deletes entry from list.
  * @entry: the element to delete from the list.
  * Note: list_empty() on entry does not return true after this, the entry is
  * in an undefined state.
  */
-static inline void __list_del_entry(struct list_head *entry)
-{
-	__list_del(entry->prev, entry->next);
-}
-
 static inline void list_del(struct list_head *entry)
 {
 	__list_del(entry->prev, entry->next);
diff --git a/tools/usb/usbip/src/usbip_list.c b/tools/usb/usbip/src/usbip_list.c
index 8625b0f514ee..3d810bcca02f 100644
--- a/tools/usb/usbip/src/usbip_list.c
+++ b/tools/usb/usbip/src/usbip_list.c
@@ -33,7 +33,8 @@ static const char usbip_list_usage_string[] =
 	"usbip list [-p|--parsable] <args>\n"
 	"    -p, --parsable         Parsable list format\n"
 	"    -r, --remote=<host>    List the exportable USB devices on <host>\n"
-	"    -l, --local            List the local USB devices\n";
+	"    -l, --local            List the local USB devices\n"
+	"    -d, --device           List the local USB gadgets bound to usbip-vudc\n";
 
 void usbip_list_usage(void)
 {