439 files changed, 29410 insertions, 23460 deletions
diff --git a/arch/x86/include/asm/GEN-for-each-reg.h b/arch/x86/include/asm/GEN-for-each-reg.h
new file mode 100644
index 000000000000..07949102a08d
--- /dev/null
+++ b/arch/x86/include/asm/GEN-for-each-reg.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * These are in machine order; things rely on that.
+ */
+#ifdef CONFIG_64BIT
+GEN(rax)
+GEN(rcx)
+GEN(rdx)
+GEN(rbx)
+GEN(rsp)
+GEN(rbp)
+GEN(rsi)
+GEN(rdi)
+GEN(r8)
+GEN(r9)
+GEN(r10)
+GEN(r11)
+GEN(r12)
+GEN(r13)
+GEN(r14)
+GEN(r15)
+#else
+GEN(eax)
+GEN(ecx)
+GEN(edx)
+GEN(ebx)
+GEN(esp)
+GEN(ebp)
+GEN(esi)
+GEN(edi)
+#endif
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index a0ab9ab61c75..4566000e15c4 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -1,13 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
 
 
+generated-y += orc_hash.h
 generated-y += syscalls_32.h
 generated-y += syscalls_64.h
+generated-y += syscalls_x32.h
 generated-y += unistd_32_ia32.h
 generated-y += unistd_64_x32.h
 generated-y += xen-hypercalls.h
+generated-y += cpufeaturemasks.h
 
-generic-y += dma-contiguous.h
 generic-y += early_ioremap.h
-generic-y += export.h
+generic-y += fprobe.h
 generic-y += mcs_spinlock.h
-generic-y += mm-arch-hooks.h
+generic-y += mmzone.h
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h
deleted file mode 100644
index 7d3ece8bfb61..000000000000
--- a/arch/x86/include/asm/a.out-core.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* a.out coredump register dumper
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#ifndef _ASM_X86_A_OUT_CORE_H
-#define _ASM_X86_A_OUT_CORE_H
-
-#ifdef __KERNEL__
-#ifdef CONFIG_X86_32
-
-#include <linux/user.h>
-#include <linux/elfcore.h>
-#include <linux/mm_types.h>
-
-#include <asm/debugreg.h>
-
-/*
- * fill in the user structure for an a.out core dump
- */
-static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
-{
-/* changed the size calculations - should hopefully work better. lbt */
-	dump->magic = CMAGIC;
-	dump->start_code = 0;
-	dump->start_stack = regs->sp & ~(PAGE_SIZE - 1);
-	dump->u_tsize = ((unsigned long)current->mm->end_code) >> PAGE_SHIFT;
-	dump->u_dsize = ((unsigned long)(current->mm->brk + (PAGE_SIZE - 1)))
-			>> PAGE_SHIFT;
-	dump->u_dsize -= dump->u_tsize;
-	dump->u_ssize = 0;
-	aout_dump_debugregs(dump);
-
-	if (dump->start_stack < TASK_SIZE)
-		dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack))
-				>> PAGE_SHIFT;
-
-	dump->regs.bx = regs->bx;
-	dump->regs.cx = regs->cx;
-	dump->regs.dx = regs->dx;
-	dump->regs.si = regs->si;
-	dump->regs.di = regs->di;
-	dump->regs.bp = regs->bp;
-	dump->regs.ax = regs->ax;
-	dump->regs.ds = (u16)regs->ds;
-	dump->regs.es = (u16)regs->es;
-	dump->regs.fs = (u16)regs->fs;
-	dump->regs.gs = get_user_gs(regs);
-	dump->regs.orig_ax = regs->orig_ax;
-	dump->regs.ip = regs->ip;
-	dump->regs.cs = (u16)regs->cs;
-	dump->regs.flags = regs->flags;
-	dump->regs.sp = regs->sp;
-	dump->regs.ss = (u16)regs->ss;
-
-	dump->u_fpvalid = dump_fpu(regs, &dump->i387);
-}
-
-#endif /* CONFIG_X86_32 */
-#endif /* __KERNEL__ */
-#endif /* _ASM_X86_A_OUT_CORE_H */
diff --git a/arch/x86/include/asm/acenv.h b/arch/x86/include/asm/acenv.h
index 1b010a859b8b..d937c55e717e 100644
--- a/arch/x86/include/asm/acenv.h
+++ b/arch/x86/include/asm/acenv.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * X86 specific ACPICA environments and implementation
  *
  * Copyright (C) 2014, Intel Corporation
  *   Author: Lv Zheng <lv.zheng@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef _ASM_X86_ACENV_H
@@ -16,7 +13,19 @@
 
 /* Asm macros */
 
-#define ACPI_FLUSH_CPU_CACHE()	wbinvd()
+/*
+ * ACPI_FLUSH_CPU_CACHE() flushes caches on entering sleep states.
+ * It is required to prevent data loss.
+ *
+ * While running inside virtual machine, the kernel can bypass cache flushing.
+ * Changing sleep state in a virtual machine doesn't affect the host system
+ * sleep state and cannot lead to data loss.
+ */
+#define ACPI_FLUSH_CPU_CACHE()					\
+do {								\
+	if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR))	\
+		wbinvd();					\
+} while (0)
 
 int __acpi_acquire_global_lock(unsigned int *lock);
 int __acpi_release_global_lock(unsigned int *lock);
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 2f01eb4d6208..a03aa6f999d1 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -1,37 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 #ifndef _ASM_X86_ACPI_H
 #define _ASM_X86_ACPI_H
 
 /*
  *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
  *  Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
-#include <acpi/pdc_intel.h>
+#include <acpi/proc_cap_intel.h>
 
 #include <asm/numa.h>
 #include <asm/fixmap.h>
 #include <asm/processor.h>
 #include <asm/mmu.h>
 #include <asm/mpspec.h>
-#include <asm/realmode.h>
 #include <asm/x86_init.h>
+#include <asm/cpufeature.h>
+#include <asm/irq_vectors.h>
+#include <asm/xen/hypervisor.h>
+
+#include <xen/xen.h>
 
 #ifdef CONFIG_ACPI_APEI
 # include <asm/pgtable_types.h>
@@ -48,6 +35,7 @@ extern int acpi_skip_timer_override;
 extern int acpi_use_timer_override;
 extern int acpi_fix_pin2_polarity;
 extern int acpi_disable_cmcff;
+extern bool acpi_int_src_ovr[NR_IRQS_LEGACY];
 
 extern u8 acpi_sci_flags;
 extern u32 acpi_sci_override_gsi;
@@ -68,6 +56,8 @@ static inline void disable_acpi(void)
 
 extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
 
+extern int acpi_blacklisted(void);
+
 static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
 static inline void acpi_disable_pci(void)
 {
@@ -79,7 +69,21 @@ static inline void acpi_disable_pci(void)
 extern int (*acpi_suspend_lowlevel)(void);
 
 /* Physical address to resume after wakeup */
-#define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start))
+unsigned long acpi_get_wakeup_address(void);
+
+static inline bool acpi_skip_set_wakeup_address(void)
+{
+	return cpu_feature_enabled(X86_FEATURE_XENPV);
+}
+
+#define acpi_skip_set_wakeup_address acpi_skip_set_wakeup_address
+
+union acpi_subtable_headers;
+
+int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
+			      const unsigned long end);
+
+void asm_acpi_mp_play_dead(u64 reset_vector, u64 pgd_pa);
 
 /*
  * Check if the CPU can handle C2 and deeper
@@ -110,23 +114,42 @@ static inline bool arch_has_acpi_pdc(void)
 		c->x86_vendor == X86_VENDOR_CENTAUR);
 }
 
-static inline void arch_acpi_set_pdc_bits(u32 *buf)
+static inline void arch_acpi_set_proc_cap_bits(u32 *cap)
 {
 	struct cpuinfo_x86 *c = &cpu_data(0);
 
-	buf[2] |= ACPI_PDC_C_CAPABILITY_SMP;
+	*cap |= ACPI_PROC_CAP_C_CAPABILITY_SMP;
+
+	/* Enable coordination with firmware's _TSD info */
+	*cap |= ACPI_PROC_CAP_SMP_T_SWCOORD;
 
 	if (cpu_has(c, X86_FEATURE_EST))
-		buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP;
+		*cap |= ACPI_PROC_CAP_EST_CAPABILITY_SWSMP;
 
 	if (cpu_has(c, X86_FEATURE_ACPI))
-		buf[2] |= ACPI_PDC_T_FFH;
+		*cap |= ACPI_PROC_CAP_T_FFH;
+
+	if (cpu_has(c, X86_FEATURE_HWP))
+		*cap |= ACPI_PROC_CAP_COLLAB_PROC_PERF;
 
 	/*
-	 * If mwait/monitor is unsupported, C2/C3_FFH will be disabled
+	 * If mwait/monitor is unsupported, C_C1_FFH and
+	 * C2/C3_FFH will be disabled.
 	 */
-	if (!cpu_has(c, X86_FEATURE_MWAIT))
-		buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
+	if (!cpu_has(c, X86_FEATURE_MWAIT) ||
+	    boot_option_idle_override == IDLE_NOMWAIT)
+		*cap &= ~(ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH);
+
+	if (xen_initial_domain()) {
+		/*
+		 * When Linux is running as Xen dom0, the hypervisor is the
+		 * entity in charge of the processor power management, and so
+		 * Xen needs to check the OS capabilities reported in the
+		 * processor capabilities buffer matches what the hypervisor
+		 * driver supports.
+		 */
+		xen_sanitize_proc_cap_bits(cap);
+	}
 }
 
 static inline bool acpi_has_cpu_in_madt(void)
@@ -134,16 +157,31 @@ static inline bool acpi_has_cpu_in_madt(void)
 	return !!acpi_lapic;
 }
 
+#define ACPI_HAVE_ARCH_SET_ROOT_POINTER
+static __always_inline void acpi_arch_set_root_pointer(u64 addr)
+{
+	x86_init.acpi.set_root_pointer(addr);
+}
+
 #define ACPI_HAVE_ARCH_GET_ROOT_POINTER
-static inline u64 acpi_arch_get_root_pointer(void)
+static __always_inline u64 acpi_arch_get_root_pointer(void)
 {
 	return x86_init.acpi.get_root_pointer();
 }
 
 void acpi_generic_reduced_hw_init(void);
 
+void x86_default_set_root_pointer(u64 addr);
 u64 x86_default_get_root_pointer(void);
 
+#ifdef CONFIG_XEN_PV
+/* A Xen PV domain needs a special acpi_os_ioremap() handling. */
+extern void __iomem * (*acpi_os_ioremap)(acpi_physical_address phys,
+					 acpi_size size);
+void __iomem *x86_acpi_os_ioremap(acpi_physical_address phys, acpi_size size);
+#define acpi_os_ioremap acpi_os_ioremap
+#endif
+
 #else /* !CONFIG_ACPI */
 
 #define acpi_lapic 0
@@ -155,6 +193,8 @@ static inline void disable_acpi(void) { }
 
 static inline void acpi_generic_reduced_hw_init(void) { }
 
+static inline void x86_default_set_root_pointer(u64 addr) { }
+
 static inline u64 x86_default_get_root_pointer(void)
 {
 	return 0;
@@ -168,7 +208,7 @@ static inline u64 x86_default_get_root_pointer(void)
 extern int x86_acpi_numa_init(void);
 #endif /* CONFIG_ACPI_NUMA */
 
-#define acpi_unlazy_tlb(x)	leave_mm(x)
+struct cper_ia_proc_ctx;
 
 #ifdef CONFIG_ACPI_APEI
 static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
@@ -188,6 +228,15 @@ static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
 	 */
 	return PAGE_KERNEL_NOENC;
 }
+
+int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
+			       u64 lapic_id);
+#else
+static inline int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
+					     u64 lapic_id)
+{
+	return -EINVAL;
+}
 #endif
 
 #define ACPI_TABLE_UPGRADE_MAX_PHYS (max_low_pfn_mapped << PAGE_SHIFT)
diff --git a/arch/x86/include/asm/acrn.h b/arch/x86/include/asm/acrn.h
new file mode 100644
index 000000000000..fab11192c60a
--- /dev/null
+++ b/arch/x86/include/asm/acrn.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_ACRN_H
+#define _ASM_X86_ACRN_H
+
+/*
+ * This CPUID returns feature bitmaps in EAX.
+ * Guest VM uses this to detect the appropriate feature bit.
+ */
+#define	ACRN_CPUID_FEATURES		0x40000001
+/* Bit 0 indicates whether guest VM is privileged */
+#define	ACRN_FEATURE_PRIVILEGED_VM	BIT(0)
+
+/*
+ * Timing Information.
+ * This leaf returns the current TSC frequency in kHz.
+ *
+ * EAX: (Virtual) TSC frequency in kHz.
+ * EBX, ECX, EDX: RESERVED (reserved fields are set to zero).
+ */
+#define ACRN_CPUID_TIMING_INFO		0x40000010
+
+void acrn_setup_intr_handler(void (*handler)(void));
+void acrn_remove_intr_handler(void);
+
+static inline u32 acrn_cpuid_base(void)
+{
+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+		return cpuid_base_hypervisor("ACRNACRNACRN", 0);
+
+	return 0;
+}
+
+static inline unsigned long acrn_get_tsc_khz(void)
+{
+	return cpuid_eax(ACRN_CPUID_TIMING_INFO);
+}
+
+/*
+ * Hypercalls for ACRN
+ *
+ * - VMCALL instruction is used to implement ACRN hypercalls.
+ * - ACRN hypercall ABI:
+ *   - Hypercall number is passed in R8 register.
+ *   - Up to 2 arguments are passed in RDI, RSI.
+ *   - Return value will be placed in RAX.
+ *
+ * Because GCC doesn't support R8 register as direct register constraints, use
+ * supported constraint as input with a explicit MOV to R8 in beginning of asm.
+ */
+static inline long acrn_hypercall0(unsigned long hcall_id)
+{
+	long result;
+
+	asm volatile("movl %1, %%r8d\n\t"
+		     "vmcall\n\t"
+		     : "=a" (result)
+		     : "g" (hcall_id)
+		     : "r8", "memory");
+
+	return result;
+}
+
+static inline long acrn_hypercall1(unsigned long hcall_id,
+				   unsigned long param1)
+{
+	long result;
+
+	asm volatile("movl %1, %%r8d\n\t"
+		     "vmcall\n\t"
+		     : "=a" (result)
+		     : "g" (hcall_id), "D" (param1)
+		     : "r8", "memory");
+
+	return result;
+}
+
+static inline long acrn_hypercall2(unsigned long hcall_id,
+				   unsigned long param1,
+				   unsigned long param2)
+{
+	long result;
+
+	asm volatile("movl %1, %%r8d\n\t"
+		     "vmcall\n\t"
+		     : "=a" (result)
+		     : "g" (hcall_id), "D" (param1), "S" (param2)
+		     : "r8", "memory");
+
+	return result;
+}
+
+#endif /* _ASM_X86_ACRN_H */
diff --git a/arch/x86/include/asm/agp.h b/arch/x86/include/asm/agp.h
index 8e25bf4f323a..c8c111d8fbd7 100644
--- a/arch/x86/include/asm/agp.h
+++ b/arch/x86/include/asm/agp.h
@@ -2,14 +2,14 @@
 #ifndef _ASM_X86_AGP_H
 #define _ASM_X86_AGP_H
 
-#include <asm/pgtable.h>
+#include <linux/pgtable.h>
 #include <asm/cacheflush.h>
 
 /*
  * Functions to keep the agpgart mappings coherent with the MMU. The
  * GART gives the CPU a physical alias of pages in memory. The alias
  * region is mapped uncacheable. Make sure there are no conflicting
- * mappings with different cachability attributes for the same
+ * mappings with different cacheability attributes for the same
  * page. This avoids data corruption on some CPUs.
  */
 
@@ -23,10 +23,4 @@
  */
 #define flush_agp_cache() wbinvd()
 
-/* GATT allocation. Returns/accepts GATT kernel virtual address. */
-#define alloc_gatt_pages(order)		\
-	((char *)__get_free_pages(GFP_KERNEL, (order)))
-#define free_gatt_pages(table, order)	\
-	free_pages((unsigned long)(table), (order))
-
 #endif /* _ASM_X86_AGP_H */
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
deleted file mode 100644
index 31b627b43a8e..000000000000
--- a/arch/x86/include/asm/alternative-asm.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_ALTERNATIVE_ASM_H
-#define _ASM_X86_ALTERNATIVE_ASM_H
-
-#ifdef __ASSEMBLY__
-
-#include <asm/asm.h>
-
-#ifdef CONFIG_SMP
-	.macro LOCK_PREFIX
-672:	lock
-	.pushsection .smp_locks,"a"
-	.balign 4
-	.long 672b - .
-	.popsection
-	.endm
-#else
-	.macro LOCK_PREFIX
-	.endm
-#endif
-
-/*
- * Issue one struct alt_instr descriptor entry (need to put it into
- * the section .altinstructions, see below). This entry contains
- * enough information for the alternatives patching code to patch an
- * instruction. See apply_alternatives().
- */
-.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
-	.long \orig - .
-	.long \alt - .
-	.word \feature
-	.byte \orig_len
-	.byte \alt_len
-	.byte \pad_len
-.endm
-
-/*
- * Define an alternative between two instructions. If @feature is
- * present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr. ".skip" directive takes care of proper instruction padding
- * in case @newinstr is longer than @oldinstr.
- */
-.macro ALTERNATIVE oldinstr, newinstr, feature
-140:
-	\oldinstr
-141:
-	.skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
-142:
-
-	.pushsection .altinstructions,"a"
-	altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
-	.popsection
-
-	.pushsection .altinstr_replacement,"ax"
-143:
-	\newinstr
-144:
-	.popsection
-.endm
-
-#define old_len			141b-140b
-#define new_len1		144f-143f
-#define new_len2		145f-144f
-
-/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_short(a, b)	((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
-
-
-/*
- * Same as ALTERNATIVE macro above but for two alternatives. If CPU
- * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
- * @feature2, it replaces @oldinstr with @feature2.
- */
-.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
-140:
-	\oldinstr
-141:
-	.skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
-		(alt_max_short(new_len1, new_len2) - (old_len)),0x90
-142:
-
-	.pushsection .altinstructions,"a"
-	altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
-	altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
-	.popsection
-
-	.pushsection .altinstr_replacement,"ax"
-143:
-	\newinstr1
-144:
-	\newinstr2
-145:
-	.popsection
-.endm
-
-#endif  /*  __ASSEMBLY__  */
-
-#endif /* _ASM_X86_ALTERNATIVE_ASM_H */
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 0660e14690c8..03364510d5fe 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -2,12 +2,23 @@
 #ifndef _ASM_X86_ALTERNATIVE_H
 #define _ASM_X86_ALTERNATIVE_H
 
-#ifndef __ASSEMBLY__
-
 #include <linux/types.h>
-#include <linux/stddef.h>
 #include <linux/stringify.h>
+#include <linux/objtool.h>
 #include <asm/asm.h>
+#include <asm/bug.h>
+
+#define ALT_FLAGS_SHIFT		16
+
+#define ALT_FLAG_NOT		(1 << 0)
+#define ALT_NOT(feature)	((ALT_FLAG_NOT << ALT_FLAGS_SHIFT) | (feature))
+#define ALT_FLAG_DIRECT_CALL	(1 << 1)
+#define ALT_DIRECT_CALL(feature) ((ALT_FLAG_DIRECT_CALL << ALT_FLAGS_SHIFT) | (feature))
+#define ALT_CALL_ALWAYS		ALT_DIRECT_CALL(X86_FEATURE_ALWAYS)
+
+#ifndef __ASSEMBLER__
+
+#include <linux/stddef.h>
 
 /*
  * Alternative inline assembly for SMP.
@@ -38,22 +49,46 @@
 		".popsection\n"				\
 		"671:"
 
-#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; "
+#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock "
 
 #else /* ! CONFIG_SMP */
 #define LOCK_PREFIX_HERE ""
 #define LOCK_PREFIX ""
 #endif
 
+/*
+ * The patching flags are part of the upper bits of the @ft_flags parameter when
+ * specifying them. The split is currently like this:
+ *
+ * [31... flags ...16][15... CPUID feature bit ...0]
+ *
+ * but since this is all hidden in the macros argument being split, those fields can be
+ * extended in the future to fit in a u64 or however the need arises.
+ */
 struct alt_instr {
 	s32 instr_offset;	/* original instruction */
 	s32 repl_offset;	/* offset to replacement instruction */
-	u16 cpuid;		/* cpuid bit set for replacement */
+
+	union {
+		struct {
+			u32 cpuid: 16;	/* CPUID bit set for replacement */
+			u32 flags: 16;	/* patching control flags */
+		};
+		u32 ft_flags;
+	};
+
 	u8  instrlen;		/* length of original instruction */
 	u8  replacementlen;	/* length of new instruction */
-	u8  padlen;		/* length of build-time padding */
 } __packed;
 
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+
+extern s32 __retpoline_sites[], __retpoline_sites_end[];
+extern s32 __return_sites[],	__return_sites_end[];
+extern s32 __cfi_sites[],	__cfi_sites_end[];
+extern s32 __ibt_endbr_seal[],	__ibt_endbr_seal_end[];
+extern s32 __smp_locks[],	__smp_locks_end[];
+
 /*
  * Debug flag that can be tested to see whether alternative
  * instructions were patched in already:
@@ -62,9 +97,71 @@ extern int alternatives_patched;
 
 extern void alternative_instructions(void);
 extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+extern void apply_retpolines(s32 *start, s32 *end);
+extern void apply_returns(s32 *start, s32 *end);
+extern void apply_seal_endbr(s32 *start, s32 *end);
+extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine,
+			  s32 *start_cfi, s32 *end_cfi);
 
 struct module;
 
+struct callthunk_sites {
+	s32				*call_start, *call_end;
+};
+
+#ifdef CONFIG_CALL_THUNKS
+extern void callthunks_patch_builtin_calls(void);
+extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
+					  struct module *mod);
+extern void *callthunks_translate_call_dest(void *dest);
+extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip);
+#else
+static __always_inline void callthunks_patch_builtin_calls(void) {}
+static __always_inline void
+callthunks_patch_module_calls(struct callthunk_sites *sites,
+			      struct module *mod) {}
+static __always_inline void *callthunks_translate_call_dest(void *dest)
+{
+	return dest;
+}
+static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
+							  void *func, void *ip)
+{
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_MITIGATION_ITS
+extern void its_init_mod(struct module *mod);
+extern void its_fini_mod(struct module *mod);
+extern void its_free_mod(struct module *mod);
+extern u8 *its_static_thunk(int reg);
+#else /* CONFIG_MITIGATION_ITS */
+static inline void its_init_mod(struct module *mod) { }
+static inline void its_fini_mod(struct module *mod) { }
+static inline void its_free_mod(struct module *mod) { }
+static inline u8 *its_static_thunk(int reg)
+{
+	WARN_ONCE(1, "ITS not compiled in");
+
+	return NULL;
+}
+#endif
+
+#if defined(CONFIG_MITIGATION_RETHUNK) && defined(CONFIG_OBJTOOL)
+extern bool cpu_wants_rethunk(void);
+extern bool cpu_wants_rethunk_at(void *addr);
+#else
+static __always_inline bool cpu_wants_rethunk(void)
+{
+	return false;
+}
+static __always_inline bool cpu_wants_rethunk_at(void *addr)
+{
+	return false;
+}
+#endif
+
 #ifdef CONFIG_SMP
 extern void alternatives_smp_module_add(struct module *mod, char *name,
 					void *locks, void *locks_end,
@@ -85,74 +182,55 @@ static inline int alternatives_text_reserved(void *start, void *end)
 }
 #endif	/* CONFIG_SMP */
 
-#define b_replacement(num)	"664"#num
-#define e_replacement(num)	"665"#num
-
-#define alt_end_marker		"663"
-#define alt_slen		"662b-661b"
-#define alt_pad_len		alt_end_marker"b-662b"
-#define alt_total_slen		alt_end_marker"b-661b"
-#define alt_rlen(num)		e_replacement(num)"f-"b_replacement(num)"f"
-
-#define __OLDINSTR(oldinstr, num)					\
-	"661:\n\t" oldinstr "\n662:\n"					\
-	".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * "		\
-		"((" alt_rlen(num) ")-(" alt_slen ")),0x90\n"
-
-#define OLDINSTR(oldinstr, num)						\
-	__OLDINSTR(oldinstr, num)					\
-	alt_end_marker ":\n"
-
-/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_short(a, b)	"((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))"
-
-/*
- * Pad the second replacement alternative with additional NOPs if it is
- * additionally longer than the first replacement alternative.
- */
-#define OLDINSTR_2(oldinstr, num1, num2) \
-	"661:\n\t" oldinstr "\n662:\n"								\
-	".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * "	\
-		"(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n"	\
-	alt_end_marker ":\n"
-
-#define ALTINSTR_ENTRY(feature, num)					      \
-	" .long 661b - .\n"				/* label           */ \
-	" .long " b_replacement(num)"f - .\n"		/* new instruction */ \
-	" .word " __stringify(feature) "\n"		/* feature bit     */ \
+#define ALT_CALL_INSTR		"call BUG_func"
+
+#define alt_slen		"772b-771b"
+#define alt_total_slen		"773b-771b"
+#define alt_rlen		"775f-774f"
+
+#define OLDINSTR(oldinstr)						\
+	"# ALT: oldinstr\n"						\
+	"771:\n\t" oldinstr "\n772:\n"					\
+	"# ALT: padding\n"						\
+	".skip -(((" alt_rlen ")-(" alt_slen ")) > 0) * "		\
+		"((" alt_rlen ")-(" alt_slen ")),0x90\n"		\
+	"773:\n"
+
+#define ALTINSTR_ENTRY(ft_flags)					      \
+	".pushsection .altinstructions, \"aM\", @progbits, "		      \
+		      __stringify(ALT_INSTR_SIZE) "\n"			      \
+	" .long 771b - .\n"				/* label           */ \
+	" .long 774f - .\n"				/* new instruction */ \
+	" .4byte " __stringify(ft_flags) "\n"		/* feature + flags */ \
 	" .byte " alt_total_slen "\n"			/* source len      */ \
-	" .byte " alt_rlen(num) "\n"			/* replacement len */ \
-	" .byte " alt_pad_len "\n"			/* pad len */
-
-#define ALTINSTR_REPLACEMENT(newinstr, feature, num)	/* replacement */     \
-	b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t"
-
-/* alternative assembly primitive: */
-#define ALTERNATIVE(oldinstr, newinstr, feature)			\
-	OLDINSTR(oldinstr, 1)						\
-	".pushsection .altinstructions,\"a\"\n"				\
-	ALTINSTR_ENTRY(feature, 1)					\
-	".popsection\n"							\
-	".pushsection .altinstr_replacement, \"ax\"\n"			\
-	ALTINSTR_REPLACEMENT(newinstr, feature, 1)			\
+	" .byte " alt_rlen "\n"				/* replacement len */ \
 	".popsection\n"
 
-#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
-	OLDINSTR_2(oldinstr, 1, 2)					\
-	".pushsection .altinstructions,\"a\"\n"				\
-	ALTINSTR_ENTRY(feature1, 1)					\
-	ALTINSTR_ENTRY(feature2, 2)					\
-	".popsection\n"							\
+#define ALTINSTR_REPLACEMENT(newinstr)		/* replacement */	\
 	".pushsection .altinstr_replacement, \"ax\"\n"			\
-	ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)			\
-	ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)			\
+	ANNOTATE_DATA_SPECIAL "\n"					\
+	"# ALT: replacement\n"						\
+	"774:\n\t" newinstr "\n775:\n"					\
 	".popsection\n"
 
+/* alternative assembly primitive: */
+#define ALTERNATIVE(oldinstr, newinstr, ft_flags)			\
+	OLDINSTR(oldinstr)						\
+	ALTINSTR_ENTRY(ft_flags)					\
+	ALTINSTR_REPLACEMENT(newinstr)
+
+#define ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \
+	ALTERNATIVE(ALTERNATIVE(oldinstr, newinstr1, ft_flags1), newinstr2, ft_flags2)
+
+/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
+#define ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) \
+	ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, newinstr_yes, ft_flags)
+
+#define ALTERNATIVE_3(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, \
+			newinstr3, ft_flags3)				\
+	ALTERNATIVE(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2), \
+		      newinstr3, ft_flags3)
+
 /*
  * Alternative instructions for different CPU types or capabilities.
  *
@@ -165,11 +243,11 @@ static inline int alternatives_text_reserved(void *start, void *end)
  * For non barrier like inlines please define new variants
  * without volatile and memory clobber.
  */
-#define alternative(oldinstr, newinstr, feature)			\
-	asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
+#define alternative(oldinstr, newinstr, ft_flags)			\
+	asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) : : : "memory")
 
-#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
-	asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
+#define alternative_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \
+	asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) ::: "memory")
 
 /*
  * Alternative inline assembly with input.
@@ -177,38 +255,33 @@ static inline int alternatives_text_reserved(void *start, void *end)
  * Peculiarities:
  * No memory clobber here.
  * Argument numbers start with 1.
- * Best is to use constraints that are fixed size (like (%1) ... "r")
- * If you use variable sized constraints like "m" or "g" in the
- * replacement make sure to pad to the worst case length.
  * Leaving an unused argument 0 to keep API compatibility.
  */
-#define alternative_input(oldinstr, newinstr, feature, input...)	\
-	asm volatile (ALTERNATIVE(oldinstr, newinstr, feature)		\
-		: : "i" (0), ## input)
-
-/*
- * This is similar to alternative_input. But it has two features and
- * respective instructions.
- *
- * If CPU has feature2, newinstr2 is used.
- * Otherwise, if CPU has feature1, newinstr1 is used.
- * Otherwise, oldinstr is used.
- */
-#define alternative_input_2(oldinstr, newinstr1, feature1, newinstr2,	     \
-			   feature2, input...)				     \
-	asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1,	     \
-		newinstr2, feature2)					     \
+#define alternative_input(oldinstr, newinstr, ft_flags, input...)	\
+	asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) \
 		: : "i" (0), ## input)
 
 /* Like alternative_input, but with a single output argument */
-#define alternative_io(oldinstr, newinstr, feature, output, input...)	\
-	asm volatile (ALTERNATIVE(oldinstr, newinstr, feature)		\
+#define alternative_io(oldinstr, newinstr, ft_flags, output, input...)	\
+	asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags)	\
 		: output : "i" (0), ## input)
 
-/* Like alternative_io, but for replacing a direct call with another one. */
-#define alternative_call(oldfunc, newfunc, feature, output, input...)	\
-	asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \
-		: output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
+/*
+ * Like alternative_io, but for replacing a direct call with another one.
+ *
+ * Use the %c operand modifier which is the generic way to print a bare
+ * constant expression with all syntax-specific punctuation omitted. %P
+ * is the x86-specific variant which can handle constants too, for
+ * historical reasons, but it should be used primarily for PIC
+ * references: i.e., if used for a function, it would add the PLT
+ * suffix.
+ */
+#define alternative_call(oldfunc, newfunc, ft_flags, output, input, clobbers...)	\
+	asm_inline volatile(ALTERNATIVE("call %c[old]", "call %c[new]", ft_flags)	\
+		: ALT_OUTPUT_SP(output)							\
+		: [old] "i" (oldfunc), [new] "i" (newfunc)				\
+		  COMMA(input)								\
+		: clobbers)
 
 /*
  * Like alternative_call, but there are two features and respective functions.
@@ -216,26 +289,113 @@ static inline int alternatives_text_reserved(void *start, void *end)
  * Otherwise, if CPU has feature1, function1 is used.
  * Otherwise, old function is used.
  */
-#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2,   \
-			   output, input...)				      \
-	asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
-		"call %P[new2]", feature2)				      \
-		: output, ASM_CALL_CONSTRAINT				      \
-		: [old] "i" (oldfunc), [new1] "i" (newfunc1),		      \
-		  [new2] "i" (newfunc2), ## input)
+#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2,		\
+			   output, input, clobbers...)					\
+	asm_inline volatile(ALTERNATIVE_2("call %c[old]", "call %c[new1]", ft_flags1,	\
+		"call %c[new2]", ft_flags2)						\
+		: ALT_OUTPUT_SP(output)							\
+		: [old] "i" (oldfunc), [new1] "i" (newfunc1),				\
+		  [new2] "i" (newfunc2)							\
+		  COMMA(input)								\
+		: clobbers)
+
+#define ALT_OUTPUT_SP(...) ASM_CALL_CONSTRAINT, ## __VA_ARGS__
+
+/* Macro for creating assembler functions avoiding any C magic. */
+#define DEFINE_ASM_FUNC(func, instr, sec)		\
+	asm (".pushsection " #sec ", \"ax\"\n"		\
+	     ".global " #func "\n\t"			\
+	     ".type " #func ", @function\n\t"		\
+	     ASM_FUNC_ALIGN "\n"			\
+	     #func ":\n\t"				\
+	     ASM_ENDBR					\
+	     instr "\n\t"				\
+	     ASM_RET					\
+	     ".size " #func ", . - " #func "\n\t"	\
+	     ".popsection")
+
+void BUG_func(void);
+void nop_func(void);
+
+#else /* __ASSEMBLER__ */
+
+#ifdef CONFIG_SMP
+	.macro LOCK_PREFIX
+672:	lock
+	.pushsection .smp_locks,"a"
+	.balign 4
+	.long 672b - .
+	.popsection
+	.endm
+#else
+	.macro LOCK_PREFIX
+	.endm
+#endif
 
 /*
- * use this macro(s) if you need more than one output parameter
- * in alternative_io
+ * Issue one struct alt_instr descriptor entry (need to put it into
+ * the section .altinstructions, see below). This entry contains
+ * enough information for the alternatives patching code to patch an
+ * instruction. See apply_alternatives().
  */
-#define ASM_OUTPUT2(a...) a
+.macro altinstr_entry orig alt ft_flags orig_len alt_len
+	.long \orig - .
+	.long \alt - .
+	.4byte \ft_flags
+	.byte \orig_len
+	.byte \alt_len
+.endm
+
+.macro ALT_CALL_INSTR
+	call BUG_func
+.endm
 
 /*
- * use this macro if you need clobbers but no inputs in
- * alternative_{input,io,call}()
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr. ".skip" directive takes care of proper instruction padding
+ * in case @newinstr is longer than @oldinstr.
  */
-#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
+#define __ALTERNATIVE(oldinst, newinst, flag)				\
+740:									\
+	oldinst	;							\
+741:									\
+	.skip -(((744f-743f)-(741b-740b)) > 0) * ((744f-743f)-(741b-740b)),0x90	;\
+742:									\
+	.pushsection .altinstructions, "aM", @progbits, ALT_INSTR_SIZE ;\
+	altinstr_entry 740b,743f,flag,742b-740b,744f-743f ;		\
+	.popsection ;							\
+	.pushsection .altinstr_replacement,"ax"	;			\
+743:									\
+	ANNOTATE_DATA_SPECIAL ;						\
+	newinst	;							\
+744:									\
+	.popsection ;
+
+.macro ALTERNATIVE oldinstr, newinstr, ft_flags
+	__ALTERNATIVE(\oldinstr, \newinstr, \ft_flags)
+.endm
 
-#endif /* __ASSEMBLY__ */
+/*
+ * Same as ALTERNATIVE macro above but for two alternatives. If CPU
+ * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
+ * @feature2, it replaces @oldinstr with @feature2.
+ */
+.macro ALTERNATIVE_2 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2
+	__ALTERNATIVE(__ALTERNATIVE(\oldinstr, \newinstr1, \ft_flags1),
+		      \newinstr2, \ft_flags2)
+.endm
+
+.macro ALTERNATIVE_3 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, newinstr3, ft_flags3
+	__ALTERNATIVE(ALTERNATIVE_2(\oldinstr, \newinstr1, \ft_flags1, \newinstr2, \ft_flags2),
+		      \newinstr3, \ft_flags3)
+.endm
+
+/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
+#define ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) \
+	ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS,	\
+	newinstr_yes, ft_flags
+
+#endif /* __ASSEMBLER__ */
 
 #endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/amd/hsmp.h b/arch/x86/include/asm/amd/hsmp.h
new file mode 100644
index 000000000000..2137f62853ed
--- /dev/null
+++ b/arch/x86/include/asm/amd/hsmp.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_X86_AMD_HSMP_H_
+#define _ASM_X86_AMD_HSMP_H_
+
+#include <uapi/asm/amd_hsmp.h>
+
+#if IS_ENABLED(CONFIG_AMD_HSMP)
+int hsmp_send_message(struct hsmp_message *msg);
+#else
+static inline int hsmp_send_message(struct hsmp_message *msg)
+{
+	return -ENODEV;
+}
+#endif
+
+#endif /*_ASM_X86_AMD_HSMP_H_*/
diff --git a/arch/x86/include/asm/amd/ibs.h b/arch/x86/include/asm/amd/ibs.h
new file mode 100644
index 000000000000..3ee5903982c2
--- /dev/null
+++ b/arch/x86/include/asm/amd/ibs.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_AMD_IBS_H
+#define _ASM_X86_AMD_IBS_H
+
+/*
+ * From PPR Vol 1 for AMD Family 19h Model 01h B1
+ * 55898 Rev 0.35 - Feb 5, 2021
+ */
+
+#include <asm/msr-index.h>
+
+/* IBS_OP_DATA2 DataSrc */
+#define IBS_DATA_SRC_LOC_CACHE			 2
+#define IBS_DATA_SRC_DRAM			 3
+#define IBS_DATA_SRC_REM_CACHE			 4
+#define IBS_DATA_SRC_IO				 7
+
+/* IBS_OP_DATA2 DataSrc Extension */
+#define IBS_DATA_SRC_EXT_LOC_CACHE		 1
+#define IBS_DATA_SRC_EXT_NEAR_CCX_CACHE		 2
+#define IBS_DATA_SRC_EXT_DRAM			 3
+#define IBS_DATA_SRC_EXT_FAR_CCX_CACHE		 5
+#define IBS_DATA_SRC_EXT_PMEM			 6
+#define IBS_DATA_SRC_EXT_IO			 7
+#define IBS_DATA_SRC_EXT_EXT_MEM		 8
+#define IBS_DATA_SRC_EXT_PEER_AGENT_MEM		12
+
+/*
+ * IBS Hardware MSRs
+ */
+
+/* MSR 0xc0011030: IBS Fetch Control */
+union ibs_fetch_ctl {
+	__u64 val;
+	struct {
+		__u64	fetch_maxcnt:16,/* 0-15: instruction fetch max. count */
+			fetch_cnt:16,	/* 16-31: instruction fetch count */
+			fetch_lat:16,	/* 32-47: instruction fetch latency */
+			fetch_en:1,	/* 48: instruction fetch enable */
+			fetch_val:1,	/* 49: instruction fetch valid */
+			fetch_comp:1,	/* 50: instruction fetch complete */
+			ic_miss:1,	/* 51: i-cache miss */
+			phy_addr_valid:1,/* 52: physical address valid */
+			l1tlb_pgsz:2,	/* 53-54: i-cache L1TLB page size
+					 *	  (needs IbsPhyAddrValid) */
+			l1tlb_miss:1,	/* 55: i-cache fetch missed in L1TLB */
+			l2tlb_miss:1,	/* 56: i-cache fetch missed in L2TLB */
+			rand_en:1,	/* 57: random tagging enable */
+			fetch_l2_miss:1,/* 58: L2 miss for sampled fetch
+					 *      (needs IbsFetchComp) */
+			l3_miss_only:1,	/* 59: Collect L3 miss samples only */
+			fetch_oc_miss:1,/* 60: Op cache miss for the sampled fetch */
+			fetch_l3_miss:1,/* 61: L3 cache miss for the sampled fetch */
+			reserved:2;	/* 62-63: reserved */
+	};
+};
+
+/* MSR 0xc0011033: IBS Execution Control */
+union ibs_op_ctl {
+	__u64 val;
+	struct {
+		__u64	opmaxcnt:16,	/* 0-15: periodic op max. count */
+			l3_miss_only:1,	/* 16: Collect L3 miss samples only */
+			op_en:1,	/* 17: op sampling enable */
+			op_val:1,	/* 18: op sample valid */
+			cnt_ctl:1,	/* 19: periodic op counter control */
+			opmaxcnt_ext:7,	/* 20-26: upper 7 bits of periodic op maximum count */
+			reserved0:5,	/* 27-31: reserved */
+			opcurcnt:27,	/* 32-58: periodic op counter current count */
+			ldlat_thrsh:4,	/* 59-62: Load Latency threshold */
+			ldlat_en:1;	/* 63: Load Latency enabled */
+	};
+};
+
+/* MSR 0xc0011035: IBS Op Data 1 */
+union ibs_op_data {
+	__u64 val;
+	struct {
+		__u64	comp_to_ret_ctr:16,	/* 0-15: op completion to retire count */
+			tag_to_ret_ctr:16,	/* 15-31: op tag to retire count */
+			reserved1:2,		/* 32-33: reserved */
+			op_return:1,		/* 34: return op */
+			op_brn_taken:1,		/* 35: taken branch op */
+			op_brn_misp:1,		/* 36: mispredicted branch op */
+			op_brn_ret:1,		/* 37: branch op retired */
+			op_rip_invalid:1,	/* 38: RIP is invalid */
+			op_brn_fuse:1,		/* 39: fused branch op */
+			op_microcode:1,		/* 40: microcode op */
+			reserved2:23;		/* 41-63: reserved */
+	};
+};
+
+/* MSR 0xc0011036: IBS Op Data 2 */
+union ibs_op_data2 {
+	__u64 val;
+	struct {
+		__u64	data_src_lo:3,	/* 0-2: data source low */
+			reserved0:1,	/* 3: reserved */
+			rmt_node:1,	/* 4: destination node */
+			cache_hit_st:1,	/* 5: cache hit state */
+			data_src_hi:2,	/* 6-7: data source high */
+			reserved1:56;	/* 8-63: reserved */
+	};
+};
+
+/* MSR 0xc0011037: IBS Op Data 3 */
+union ibs_op_data3 {
+	__u64 val;
+	struct {
+		__u64	ld_op:1,			/* 0: load op */
+			st_op:1,			/* 1: store op */
+			dc_l1tlb_miss:1,		/* 2: data cache L1TLB miss */
+			dc_l2tlb_miss:1,		/* 3: data cache L2TLB hit in 2M page */
+			dc_l1tlb_hit_2m:1,		/* 4: data cache L1TLB hit in 2M page */
+			dc_l1tlb_hit_1g:1,		/* 5: data cache L1TLB hit in 1G page */
+			dc_l2tlb_hit_2m:1,		/* 6: data cache L2TLB hit in 2M page */
+			dc_miss:1,			/* 7: data cache miss */
+			dc_mis_acc:1,			/* 8: misaligned access */
+			reserved:4,			/* 9-12: reserved */
+			dc_wc_mem_acc:1,		/* 13: write combining memory access */
+			dc_uc_mem_acc:1,		/* 14: uncacheable memory access */
+			dc_locked_op:1,			/* 15: locked operation */
+			dc_miss_no_mab_alloc:1,		/* 16: DC miss with no MAB allocated */
+			dc_lin_addr_valid:1,		/* 17: data cache linear address valid */
+			dc_phy_addr_valid:1,		/* 18: data cache physical address valid */
+			dc_l2_tlb_hit_1g:1,		/* 19: data cache L2 hit in 1GB page */
+			l2_miss:1,			/* 20: L2 cache miss */
+			sw_pf:1,			/* 21: software prefetch */
+			op_mem_width:4,			/* 22-25: load/store size in bytes */
+			op_dc_miss_open_mem_reqs:6,	/* 26-31: outstanding mem reqs on DC fill */
+			dc_miss_lat:16,			/* 32-47: data cache miss latency */
+			tlb_refill_lat:16;		/* 48-63: L1 TLB refill latency */
+	};
+};
+
+/* MSR 0xc001103c: IBS Fetch Control Extended */
+union ic_ibs_extd_ctl {
+	__u64 val;
+	struct {
+		__u64	itlb_refill_lat:16,	/* 0-15: ITLB Refill latency for sampled fetch */
+			reserved:48;		/* 16-63: reserved */
+	};
+};
+
+/*
+ * IBS driver related
+ */
+
+struct perf_ibs_data {
+	u32		size;
+	union {
+		u32	data[0];	/* data buffer starts here */
+		u32	caps;
+	};
+	u64		regs[MSR_AMD64_IBS_REG_COUNT_MAX];
+};
+
+#endif /* _ASM_X86_AMD_IBS_H */
diff --git a/arch/x86/include/asm/amd/nb.h b/arch/x86/include/asm/amd/nb.h
new file mode 100644
index 000000000000..ddb5108cf46c
--- /dev/null
+++ b/arch/x86/include/asm/amd/nb.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_AMD_NB_H
+#define _ASM_X86_AMD_NB_H
+
+#include <linux/ioport.h>
+#include <linux/pci.h>
+#include <asm/amd/node.h>
+
+struct amd_nb_bus_dev_range {
+	u8 bus;
+	u8 dev_base;
+	u8 dev_limit;
+};
+
+extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
+
+extern bool early_is_amd_nb(u32 value);
+extern struct resource *amd_get_mmconfig_range(struct resource *res);
+extern void amd_flush_garts(void);
+extern int amd_numa_init(void);
+extern int amd_get_subcaches(int);
+extern int amd_set_subcaches(int, unsigned long);
+
+struct amd_l3_cache {
+	unsigned indices;
+	u8	 subcaches[4];
+};
+
+struct amd_northbridge {
+	struct pci_dev *misc;
+	struct pci_dev *link;
+	struct amd_l3_cache l3_cache;
+};
+
+struct amd_northbridge_info {
+	u16 num;
+	u64 flags;
+	struct amd_northbridge *nb;
+};
+
+#define AMD_NB_GART			BIT(0)
+#define AMD_NB_L3_INDEX_DISABLE		BIT(1)
+#define AMD_NB_L3_PARTITIONING		BIT(2)
+
+#ifdef CONFIG_AMD_NB
+
+u16 amd_nb_num(void);
+bool amd_nb_has_feature(unsigned int feature);
+struct amd_northbridge *node_to_amd_nb(int node);
+
+static inline bool amd_gart_present(void)
+{
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+		return false;
+
+	/* GART present only on Fam15h, up to model 0fh */
+	if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
+	    (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10))
+		return true;
+
+	return false;
+}
+
+#else
+
+#define amd_nb_num(x)		0
+#define amd_nb_has_feature(x)	false
+static inline struct amd_northbridge *node_to_amd_nb(int node)
+{
+	return NULL;
+}
+#define amd_gart_present(x)	false
+
+#endif
+
+
+#endif /* _ASM_X86_AMD_NB_H */
diff --git a/arch/x86/include/asm/amd/node.h b/arch/x86/include/asm/amd/node.h
new file mode 100644
index 000000000000..a672b8765fa8
--- /dev/null
+++ b/arch/x86/include/asm/amd/node.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD Node helper functions and common defines
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
+ *
+ * Note:
+ * Items in this file may only be used in a single place.
+ * However, it's prudent to keep all AMD Node functionality
+ * in a unified place rather than spreading throughout the
+ * kernel.
+ */
+
+#ifndef _ASM_X86_AMD_NODE_H_
+#define _ASM_X86_AMD_NODE_H_
+
+#include <linux/pci.h>
+
+#define MAX_AMD_NUM_NODES	8
+#define AMD_NODE0_PCI_SLOT	0x18
+
+struct pci_dev *amd_node_get_func(u16 node, u8 func);
+
+static inline u16 amd_num_nodes(void)
+{
+	return topology_amd_nodes_per_pkg() * topology_max_packages();
+}
+
+#ifdef CONFIG_AMD_NODE
+int __must_check amd_smn_read(u16 node, u32 address, u32 *value);
+int __must_check amd_smn_write(u16 node, u32 address, u32 value);
+
+/* Should only be used by the HSMP driver. */
+int __must_check amd_smn_hsmp_rdwr(u16 node, u32 address, u32 *value, bool write);
+#else
+static inline int __must_check amd_smn_read(u16 node, u32 address, u32 *value) { return -ENODEV; }
+static inline int __must_check amd_smn_write(u16 node, u32 address, u32 value) { return -ENODEV; }
+
+static inline int __must_check amd_smn_hsmp_rdwr(u16 node, u32 address, u32 *value, bool write)
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_AMD_NODE */
+
+/* helper for use with read_poll_timeout */
+static inline int smn_read_register(u32 reg)
+{
+	int data, rc;
+
+	rc = amd_smn_read(0, reg, &data);
+	if (rc)
+		return rc;
+
+	return data;
+}
+#endif /*_ASM_X86_AMD_NODE_H_*/
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
deleted file mode 100644
index 1ae4e5791afa..000000000000
--- a/arch/x86/include/asm/amd_nb.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_AMD_NB_H
-#define _ASM_X86_AMD_NB_H
-
-#include <linux/ioport.h>
-#include <linux/pci.h>
-#include <linux/refcount.h>
-
-struct amd_nb_bus_dev_range {
-	u8 bus;
-	u8 dev_base;
-	u8 dev_limit;
-};
-
-extern const struct pci_device_id amd_nb_misc_ids[];
-extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
-
-extern bool early_is_amd_nb(u32 value);
-extern struct resource *amd_get_mmconfig_range(struct resource *res);
-extern int amd_cache_northbridges(void);
-extern void amd_flush_garts(void);
-extern int amd_numa_init(void);
-extern int amd_get_subcaches(int);
-extern int amd_set_subcaches(int, unsigned long);
-
-extern int amd_smn_read(u16 node, u32 address, u32 *value);
-extern int amd_smn_write(u16 node, u32 address, u32 value);
-extern int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo);
-
-struct amd_l3_cache {
-	unsigned indices;
-	u8	 subcaches[4];
-};
-
-struct threshold_block {
-	unsigned int	 block;			/* Number within bank */
-	unsigned int	 bank;			/* MCA bank the block belongs to */
-	unsigned int	 cpu;			/* CPU which controls MCA bank */
-	u32		 address;		/* MSR address for the block */
-	u16		 interrupt_enable;	/* Enable/Disable APIC interrupt */
-	bool		 interrupt_capable;	/* Bank can generate an interrupt. */
-
-	u16		 threshold_limit;	/*
-						 * Value upon which threshold
-						 * interrupt is generated.
-						 */
-
-	struct kobject	 kobj;			/* sysfs object */
-	struct list_head miscj;			/*
-						 * List of threshold blocks
-						 * within a bank.
-						 */
-};
-
-struct threshold_bank {
-	struct kobject		*kobj;
-	struct threshold_block	*blocks;
-
-	/* initialized to the number of CPUs on the node sharing this bank */
-	refcount_t		cpus;
-};
-
-struct amd_northbridge {
-	struct pci_dev *root;
-	struct pci_dev *misc;
-	struct pci_dev *link;
-	struct amd_l3_cache l3_cache;
-	struct threshold_bank *bank4;
-};
-
-struct amd_northbridge_info {
-	u16 num;
-	u64 flags;
-	struct amd_northbridge *nb;
-};
-
-#define AMD_NB_GART			BIT(0)
-#define AMD_NB_L3_INDEX_DISABLE		BIT(1)
-#define AMD_NB_L3_PARTITIONING		BIT(2)
-
-#ifdef CONFIG_AMD_NB
-
-u16 amd_nb_num(void);
-bool amd_nb_has_feature(unsigned int feature);
-struct amd_northbridge *node_to_amd_nb(int node);
-
-static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev)
-{
-	struct pci_dev *misc;
-	int i;
-
-	for (i = 0; i != amd_nb_num(); i++) {
-		misc = node_to_amd_nb(i)->misc;
-
-		if (pci_domain_nr(misc->bus) == pci_domain_nr(pdev->bus) &&
-		    PCI_SLOT(misc->devfn) == PCI_SLOT(pdev->devfn))
-			return i;
-	}
-
-	WARN(1, "Unable to find AMD Northbridge id for %s\n", pci_name(pdev));
-	return 0;
-}
-
-static inline bool amd_gart_present(void)
-{
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
-		return false;
-
-	/* GART present only on Fam15h, upto model 0fh */
-	if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
-	    (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10))
-		return true;
-
-	return false;
-}
-
-#else
-
-#define amd_nb_num(x)		0
-#define amd_nb_has_feature(x)	false
-#define node_to_amd_nb(x)	NULL
-#define amd_gart_present(x)	false
-
-#endif
-
-
-#endif /* _ASM_X86_AMD_NB_H */
diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h
deleted file mode 100644
index 0acbac299e49..000000000000
--- a/arch/x86/include/asm/apb_timer.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * apb_timer.h: Driver for Langwell APB timer based on Synopsis DesignWare
- *
- * (C) Copyright 2009 Intel Corporation
- * Author: Jacob Pan (jacob.jun.pan@intel.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
- * Note:
- */
-
-#ifndef ASM_X86_APBT_H
-#define ASM_X86_APBT_H
-#include <linux/sfi.h>
-
-#ifdef CONFIG_APB_TIMER
-
-/* default memory mapped register base */
-#define LNW_SCU_ADDR           0xFF100000
-#define LNW_EXT_TIMER_OFFSET   0x1B800
-#define APBT_DEFAULT_BASE      (LNW_SCU_ADDR+LNW_EXT_TIMER_OFFSET)
-#define LNW_EXT_TIMER_PGOFFSET         0x800
-
-/* APBT clock speed range from PCLK to fabric base, 25-100MHz */
-#define APBT_MAX_FREQ          50000000
-#define APBT_MIN_FREQ          1000000
-#define APBT_MMAP_SIZE         1024
-
-#define APBT_DEV_USED  1
-
-extern void apbt_time_init(void);
-extern unsigned long apbt_quick_calibrate(void);
-extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu);
-extern void apbt_setup_secondary_clock(void);
-
-extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint);
-extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr);
-extern int sfi_mtimer_num;
-
-#else /* CONFIG_APB_TIMER */
-
-static inline unsigned long apbt_quick_calibrate(void) {return 0; }
-static inline void apbt_time_init(void) { }
-
-#endif
-#endif /* ASM_X86_APBT_H */
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 130e81e10fc7..a26e66d66444 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -1,7 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 #ifndef _ASM_X86_APIC_H
 #define _ASM_X86_APIC_H
 
 #include <linux/cpumask.h>
+#include <linux/static_call.h>
 
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
@@ -11,9 +13,16 @@
 #include <asm/mpspec.h>
 #include <asm/msr.h>
 #include <asm/hardirq.h>
+#include <asm/io.h>
+#include <asm/posted_intr.h>
 
 #define ARCH_APICTIMER_STOPS_ON_C3	1
 
+/* Macros for apic_extnmi which controls external NMI masking */
+#define APIC_EXTNMI_BSP		0 /* Default */
+#define APIC_EXTNMI_ALL		1
+#define APIC_EXTNMI_NONE	2
+
 /*
  * Debugging macros
  */
@@ -21,38 +30,40 @@
 #define APIC_VERBOSE 1
 #define APIC_DEBUG   2
 
-/* Macros for apic_extnmi which controls external NMI masking */
-#define APIC_EXTNMI_BSP		0 /* Default */
-#define APIC_EXTNMI_ALL		1
-#define APIC_EXTNMI_NONE	2
-
 /*
- * Define the default level of output to be very little
- * This can be turned up by using apic=verbose for more
- * information and apic=debug for _lots_ of information.
- * apic_verbosity is defined in apic.c
+ * Define the default level of output to be very little This can be turned
+ * up by using apic=verbose for more information and apic=debug for _lots_
+ * of information.  apic_verbosity is defined in apic.c
  */
-#define apic_printk(v, s, a...) do {       \
-		if ((v) <= apic_verbosity) \
-			printk(s, ##a);    \
-	} while (0)
-
+#define apic_printk(v, s, a...)			\
+do {						\
+	if ((v) <= apic_verbosity)		\
+		printk(s, ##a);			\
+} while (0)
+
+#define apic_pr_verbose(s, a...)	apic_printk(APIC_VERBOSE, KERN_INFO s, ##a)
+#define apic_pr_debug(s, a...)		apic_printk(APIC_DEBUG, KERN_DEBUG s, ##a)
+#define apic_pr_debug_cont(s, a...)	apic_printk(APIC_DEBUG, KERN_CONT s, ##a)
+/* Unconditional debug prints for code which is guarded by apic_verbosity already */
+#define apic_dbg(s, a...)		printk(KERN_DEBUG s, ##a)
 
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
-extern void generic_apic_probe(void);
+extern void x86_32_probe_apic(void);
 #else
-static inline void generic_apic_probe(void)
-{
-}
+static inline void x86_32_probe_apic(void) { }
 #endif
 
+extern u32 cpuid_to_apicid[];
+
+#define CPU_ACPIID_INVALID	U32_MAX
+
 #ifdef CONFIG_X86_LOCAL_APIC
 
-extern unsigned int apic_verbosity;
+extern int apic_verbosity;
 extern int local_apic_timer_c2_ok;
 
-extern int disable_apic;
-extern unsigned int lapic_timer_frequency;
+extern bool apic_is_disabled;
+extern unsigned int lapic_timer_period;
 
 extern enum apic_intr_mode_id apic_intr_mode;
 enum apic_intr_mode_id {
@@ -63,20 +74,6 @@ enum apic_intr_mode_id {
 	APIC_SYMMETRIC_IO_NO_ROUTING
 };
 
-#ifdef CONFIG_SMP
-extern void __inquire_remote_apic(int apicid);
-#else /* CONFIG_SMP */
-static inline void __inquire_remote_apic(int apicid)
-{
-}
-#endif /* CONFIG_SMP */
-
-static inline void default_inquire_remote_apic(int apicid)
-{
-	if (apic_verbosity >= APIC_DEBUG)
-		__inquire_remote_apic(apicid);
-}
-
 /*
  * With 82489DX we can't rely on apic feature bit
  * retrieved via cpuid but still have to deal with
@@ -87,7 +84,7 @@ static inline void default_inquire_remote_apic(int apicid)
  */
 static inline bool apic_from_smp_config(void)
 {
-	return smp_found_config && !disable_apic;
+	return smp_found_config && !apic_is_disabled;
 }
 
 /*
@@ -97,24 +94,25 @@ static inline bool apic_from_smp_config(void)
 #include <asm/paravirt.h>
 #endif
 
-extern int setup_profiling_timer(unsigned int);
-
 static inline void native_apic_mem_write(u32 reg, u32 v)
 {
 	volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
 
-	alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP,
-		       ASM_OUTPUT2("=r" (v), "=m" (*addr)),
-		       ASM_OUTPUT2("0" (v), "m" (*addr)));
+	alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP,
+		       ASM_OUTPUT("=r" (v), "=m" (*addr)),
+		       ASM_INPUT("0" (v), "m" (*addr)));
 }
 
 static inline u32 native_apic_mem_read(u32 reg)
 {
-	return *((volatile u32 *)(APIC_BASE + reg));
+	return readl((void __iomem *)(APIC_BASE + reg));
+}
+
+static inline void native_apic_mem_eoi(void)
+{
+	native_apic_mem_write(APIC_EOI, APIC_EOI_ACK);
 }
 
-extern void native_apic_wait_icr_idle(void);
-extern u32 native_safe_apic_wait_icr_idle(void);
 extern void native_apic_icr_write(u32 low, u32 id);
 extern u64 native_apic_icr_read(void);
 
@@ -122,22 +120,22 @@ static inline bool apic_is_x2apic_enabled(void)
 {
 	u64 msr;
 
-	if (rdmsrl_safe(MSR_IA32_APICBASE, &msr))
+	if (rdmsrq_safe(MSR_IA32_APICBASE, &msr))
 		return false;
 	return msr & X2APIC_ENABLE;
 }
 
 extern void enable_IR_x2apic(void);
 
-extern int get_physical_broadcast(void);
-
 extern int lapic_get_maxlvt(void);
 extern void clear_local_APIC(void);
 extern void disconnect_bsp_APIC(int virt_wire_setup);
 extern void disable_local_APIC(void);
+extern void apic_soft_disable(void);
 extern void lapic_shutdown(void);
 extern void sync_Arb_IDs(void);
 extern void init_bsp_APIC(void);
+extern void apic_intr_mode_select(void);
 extern void apic_intr_mode_init(void);
 extern void init_apic_mappings(void);
 void register_lapic_address(unsigned long address);
@@ -146,15 +144,14 @@ extern void setup_secondary_APIC_clock(void);
 extern void lapic_update_tsc_freq(void);
 
 #ifdef CONFIG_X86_64
-static inline int apic_force_enable(unsigned long addr)
+static inline bool apic_force_enable(unsigned long addr)
 {
-	return -1;
+	return false;
 }
 #else
-extern int apic_force_enable(unsigned long addr);
+extern bool apic_force_enable(unsigned long addr);
 #endif
 
-extern void apic_bsp_setup(bool upmode);
 extern void apic_ap_setup(void);
 
 /*
@@ -172,8 +169,20 @@ static inline int apic_is_clustered_box(void)
 extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
 extern void lapic_assign_system_vectors(void);
 extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
+extern void lapic_update_legacy_vectors(void);
 extern void lapic_online(void);
 extern void lapic_offline(void);
+extern bool apic_needs_pit(void);
+
+extern void apic_send_IPI_allbutself(unsigned int vector);
+
+extern void topology_register_apic(u32 apic_id, u32 acpi_id, bool present);
+extern void topology_register_boot_apic(u32 apic_id);
+extern int topology_hotplug_apic(u32 apic_id, u32 acpi_id);
+extern void topology_hotunplug_apic(unsigned int cpu);
+extern void topology_apply_cmdline_limits_early(void);
+extern void topology_init_possible_cpus(void);
+extern void topology_reset_possible_cpus_up(void);
 
 #else /* !CONFIG_X86_LOCAL_APIC */
 static inline void lapic_shutdown(void) { }
@@ -184,34 +193,28 @@ static inline void disable_local_APIC(void) { }
 # define setup_secondary_APIC_clock x86_init_noop
 static inline void lapic_update_tsc_freq(void) { }
 static inline void init_bsp_APIC(void) { }
+static inline void apic_intr_mode_select(void) { }
 static inline void apic_intr_mode_init(void) { }
 static inline void lapic_assign_system_vectors(void) { }
 static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
+static inline bool apic_needs_pit(void) { return true; }
+static inline void topology_apply_cmdline_limits_early(void) { }
+static inline void topology_init_possible_cpus(void) { }
 #endif /* !CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_X2APIC
-/*
- * Make previous memory operations globally visible before
- * sending the IPI through x2apic wrmsr. We need a serializing instruction or
- * mfence for this.
- */
-static inline void x2apic_wrmsr_fence(void)
-{
-	asm volatile("mfence" : : : "memory");
-}
-
 static inline void native_apic_msr_write(u32 reg, u32 v)
 {
 	if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
 	    reg == APIC_LVR)
 		return;
 
-	wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0);
+	wrmsrq(APIC_BASE_MSR + (reg >> 4), v);
 }
 
-static inline void native_apic_msr_eoi_write(u32 reg, u32 v)
+static inline void native_apic_msr_eoi(void)
 {
-	__wrmsr(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0);
+	native_wrmsrq(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK);
 }
 
 static inline u32 native_apic_msr_read(u32 reg)
@@ -221,38 +224,26 @@ static inline u32 native_apic_msr_read(u32 reg)
 	if (reg == APIC_DFR)
 		return -1;
 
-	rdmsrl(APIC_BASE_MSR + (reg >> 4), msr);
+	rdmsrq(APIC_BASE_MSR + (reg >> 4), msr);
 	return (u32)msr;
 }
 
-static inline void native_x2apic_wait_icr_idle(void)
-{
-	/* no need to wait for icr idle in x2apic */
-	return;
-}
-
-static inline u32 native_safe_x2apic_wait_icr_idle(void)
-{
-	/* no need to wait for icr idle in x2apic */
-	return 0;
-}
-
 static inline void native_x2apic_icr_write(u32 low, u32 id)
 {
-	wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
+	wrmsrq(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
 }
 
 static inline u64 native_x2apic_icr_read(void)
 {
 	unsigned long val;
 
-	rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
+	rdmsrq(APIC_BASE_MSR + (APIC_ICR >> 4), val);
 	return val;
 }
 
 extern int x2apic_mode;
 extern int x2apic_phys;
-extern void __init check_x2apic(void);
+extern void __init x2apic_set_max_apicid(u32 apicid);
 extern void x2apic_setup(void);
 static inline int x2apic_enabled(void)
 {
@@ -261,19 +252,18 @@ static inline int x2apic_enabled(void)
 
 #define x2apic_supported()	(boot_cpu_has(X86_FEATURE_X2APIC))
 #else /* !CONFIG_X86_X2APIC */
-static inline void check_x2apic(void) { }
 static inline void x2apic_setup(void) { }
 static inline int x2apic_enabled(void) { return 0; }
-
+static inline u32 native_apic_msr_read(u32 reg) { BUG(); }
 #define x2apic_mode		(0)
 #define	x2apic_supported()	(0)
 #endif /* !CONFIG_X86_X2APIC */
+extern void __init check_x2apic(void);
 
 struct irq_data;
 
 /*
  * Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
  *
  * Generic APIC sub-arch data struct.
  *
@@ -283,8 +273,8 @@ struct irq_data;
  */
 struct apic {
 	/* Hotpath functions first */
-	void	(*eoi_write)(u32 reg, u32 v);
-	void	(*native_eoi_write)(u32 reg, u32 v);
+	void	(*eoi)(void);
+	void	(*native_eoi)(void);
 	void	(*write)(u32 reg, u32 v);
 	u32	(*read)(u32 reg);
 
@@ -299,11 +289,10 @@ struct apic {
 	void	(*send_IPI_all)(int vector);
 	void	(*send_IPI_self)(int vector);
 
-	/* dest_logical is used by the IPI functions */
-	u32	dest_logical;
-	u32	disable_esr;
-	u32	irq_delivery_mode;
-	u32	irq_dest_mode;
+	u32	disable_esr		: 1,
+		dest_mode_logical	: 1,
+		x2apic_set_max_apicid	: 1,
+		nmi_to_offline_cpu	: 1;
 
 	u32	(*calc_dest_apicid)(unsigned int cpu);
 
@@ -311,45 +300,47 @@ struct apic {
 	u64	(*icr_read)(void);
 	void	(*icr_write)(u32 low, u32 high);
 
+	/* The limit of the APIC ID space. */
+	u32	max_apic_id;
+
 	/* Probe, setup and smpboot functions */
 	int	(*probe)(void);
+	void	(*setup)(void);
+	void	(*teardown)(void);
 	int	(*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
-	int	(*apic_id_valid)(u32 apicid);
-	int	(*apic_id_registered)(void);
 
-	bool	(*check_apicid_used)(physid_mask_t *map, int apicid);
 	void	(*init_apic_ldr)(void);
-	void	(*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap);
-	void	(*setup_apic_routing)(void);
-	int	(*cpu_present_to_apicid)(int mps_cpu);
-	void	(*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
-	int	(*check_phys_apicid_present)(int phys_apicid);
-	int	(*phys_pkg_id)(int cpuid_apic, int index_msb);
+	u32	(*cpu_present_to_apicid)(int mps_cpu);
 
-	u32	(*get_apic_id)(unsigned long x);
-	u32	(*set_apic_id)(unsigned int id);
+	u32	(*get_apic_id)(u32 id);
 
 	/* wakeup_secondary_cpu */
-	int	(*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
-
-	void	(*inquire_remote_apic)(int apicid);
-
-#ifdef CONFIG_X86_32
-	/*
-	 * Called very early during boot from get_smp_config().  It should
-	 * return the logical apicid.  x86_[bios]_cpu_to_apicid is
-	 * initialized before this function is called.
-	 *
-	 * If logical apicid can't be determined that early, the function
-	 * may return BAD_APICID.  Logical apicid will be configured after
-	 * init_apic_ldr() while bringing up CPUs.  Note that NUMA affinity
-	 * won't be applied properly during early boot in this case.
-	 */
-	int (*x86_32_early_logical_apicid)(int cpu);
-#endif
+	int	(*wakeup_secondary_cpu)(u32 apicid, unsigned long start_eip, unsigned int cpu);
+	/* wakeup secondary CPU using 64-bit wakeup point */
+	int	(*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip, unsigned int cpu);
+
+	void	(*update_vector)(unsigned int cpu, unsigned int vector, bool set);
+
 	char	*name;
 };
 
+struct apic_override {
+	void	(*eoi)(void);
+	void	(*native_eoi)(void);
+	void	(*write)(u32 reg, u32 v);
+	u32	(*read)(u32 reg);
+	void	(*send_IPI)(int cpu, int vector);
+	void	(*send_IPI_mask)(const struct cpumask *mask, int vector);
+	void	(*send_IPI_mask_allbutself)(const struct cpumask *msk, int vec);
+	void	(*send_IPI_allbutself)(int vector);
+	void	(*send_IPI_all)(int vector);
+	void	(*send_IPI_self)(int vector);
+	u64	(*icr_read)(void);
+	void	(*icr_write)(u32 low, u32 high);
+	int	(*wakeup_secondary_cpu)(u32 apicid, unsigned long start_eip, unsigned int cpu);
+	int	(*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip, unsigned int cpu);
+};
+
 /*
  * Pointer to the local APIC driver in use on this system (there's
  * always just one such driver in use - the kernel decides via an
@@ -361,19 +352,11 @@ extern struct apic *apic;
  * APIC drivers are probed based on how they are listed in the .apicdrivers
  * section. So the order is important and enforced by the ordering
  * of different apic driver files in the Makefile.
- *
- * For the files having two apic drivers, we use apic_drivers()
- * to enforce the order with in them.
  */
 #define apic_driver(sym)					\
 	static const struct apic *__apicdrivers_##sym __used		\
 	__aligned(sizeof(struct apic *))			\
-	__section(.apicdrivers) = { &sym }
-
-#define apic_drivers(sym1, sym2)					\
-	static struct apic *__apicdrivers_##sym1##sym2[2] __used	\
-	__aligned(sizeof(struct apic *))				\
-	__section(.apicdrivers) = { &sym1, &sym2 }
+	__section(".apicdrivers") = { &sym }
 
 extern struct apic *__apicdrivers[], *__apicdrivers_end[];
 
@@ -381,48 +364,121 @@ extern struct apic *__apicdrivers[], *__apicdrivers_end[];
  * APIC functionality to boot other CPUs - only used on SMP:
  */
 #ifdef CONFIG_SMP
-extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
 extern int lapic_can_unplug_cpu(void);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
+extern struct apic_override __x86_apic_override;
+
+void __init apic_setup_apic_calls(void);
+void __init apic_install_driver(struct apic *driver);
+
+#define apic_update_callback(_callback, _fn) {					\
+		__x86_apic_override._callback = _fn;				\
+		apic->_callback = _fn;						\
+		static_call_update(apic_call_##_callback, _fn);			\
+		pr_info("APIC: %s() replaced with %ps()\n", #_callback, _fn);	\
+}
+
+#define DECLARE_APIC_CALL(__cb)							\
+	DECLARE_STATIC_CALL(apic_call_##__cb, *apic->__cb)
+
+DECLARE_APIC_CALL(eoi);
+DECLARE_APIC_CALL(native_eoi);
+DECLARE_APIC_CALL(icr_read);
+DECLARE_APIC_CALL(icr_write);
+DECLARE_APIC_CALL(read);
+DECLARE_APIC_CALL(send_IPI);
+DECLARE_APIC_CALL(send_IPI_mask);
+DECLARE_APIC_CALL(send_IPI_mask_allbutself);
+DECLARE_APIC_CALL(send_IPI_allbutself);
+DECLARE_APIC_CALL(send_IPI_all);
+DECLARE_APIC_CALL(send_IPI_self);
+DECLARE_APIC_CALL(wait_icr_idle);
+DECLARE_APIC_CALL(wakeup_secondary_cpu);
+DECLARE_APIC_CALL(wakeup_secondary_cpu_64);
+DECLARE_APIC_CALL(write);
+
+static __always_inline u32 apic_read(u32 reg)
+{
+	return static_call(apic_call_read)(reg);
+}
+
+static __always_inline void apic_write(u32 reg, u32 val)
+{
+	static_call(apic_call_write)(reg, val);
+}
+
+static __always_inline void apic_eoi(void)
+{
+	static_call(apic_call_eoi)();
+}
+
+static __always_inline void apic_native_eoi(void)
+{
+	static_call(apic_call_native_eoi)();
+}
+
+static __always_inline u64 apic_icr_read(void)
+{
+	return static_call(apic_call_icr_read)();
+}
+
+static __always_inline void apic_icr_write(u32 low, u32 high)
+{
+	static_call(apic_call_icr_write)(low, high);
+}
+
+static __always_inline void __apic_send_IPI(int cpu, int vector)
+{
+	static_call(apic_call_send_IPI)(cpu, vector);
+}
 
-static inline u32 apic_read(u32 reg)
+static __always_inline void __apic_send_IPI_mask(const struct cpumask *mask, int vector)
 {
-	return apic->read(reg);
+	static_call_mod(apic_call_send_IPI_mask)(mask, vector);
 }
 
-static inline void apic_write(u32 reg, u32 val)
+static __always_inline void __apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
 {
-	apic->write(reg, val);
+	static_call(apic_call_send_IPI_mask_allbutself)(mask, vector);
 }
 
-static inline void apic_eoi(void)
+static __always_inline void __apic_send_IPI_allbutself(int vector)
 {
-	apic->eoi_write(APIC_EOI, APIC_EOI_ACK);
+	static_call(apic_call_send_IPI_allbutself)(vector);
 }
 
-static inline u64 apic_icr_read(void)
+static __always_inline void __apic_send_IPI_all(int vector)
 {
-	return apic->icr_read();
+	static_call(apic_call_send_IPI_all)(vector);
 }
 
-static inline void apic_icr_write(u32 low, u32 high)
+static __always_inline void __apic_send_IPI_self(int vector)
 {
-	apic->icr_write(low, high);
+	static_call_mod(apic_call_send_IPI_self)(vector);
 }
 
-static inline void apic_wait_icr_idle(void)
+static __always_inline void apic_wait_icr_idle(void)
 {
-	apic->wait_icr_idle();
+	static_call_cond(apic_call_wait_icr_idle)();
 }
 
-static inline u32 safe_apic_wait_icr_idle(void)
+static __always_inline u32 safe_apic_wait_icr_idle(void)
 {
-	return apic->safe_wait_icr_idle();
+	return apic->safe_wait_icr_idle ? apic->safe_wait_icr_idle() : 0;
 }
 
-extern void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v));
+static __always_inline bool apic_id_valid(u32 apic_id)
+{
+	return apic_id <= apic->max_apic_id;
+}
+
+static __always_inline void apic_update_vector(unsigned int cpu, unsigned int vector, bool set)
+{
+	if (apic->update_vector)
+		apic->update_vector(cpu, vector, set);
+}
 
 #else /* CONFIG_X86_LOCAL_APIC */
 
@@ -433,29 +489,88 @@ static inline u64 apic_icr_read(void) { return 0; }
 static inline void apic_icr_write(u32 low, u32 high) { }
 static inline void apic_wait_icr_idle(void) { }
 static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
-static inline void apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) {}
+static inline void apic_native_eoi(void) { WARN_ON_ONCE(1); }
+static inline void apic_setup_apic_calls(void) { }
+static inline void apic_update_vector(unsigned int cpu, unsigned int vector, bool set) { }
+
+#define apic_update_callback(_callback, _fn) do { } while (0)
 
 #endif /* CONFIG_X86_LOCAL_APIC */
 
 extern void apic_ack_irq(struct irq_data *data);
 
-static inline void ack_APIC_irq(void)
+#define APIC_VECTOR_TO_BIT_NUMBER(v) ((unsigned int)(v) % 32)
+#define APIC_VECTOR_TO_REG_OFFSET(v) ((unsigned int)(v) / 32 * 0x10)
+
+static inline bool lapic_vector_set_in_irr(unsigned int vector)
+{
+	u32 irr = apic_read(APIC_IRR + APIC_VECTOR_TO_REG_OFFSET(vector));
+
+	return !!(irr & (1U << APIC_VECTOR_TO_BIT_NUMBER(vector)));
+}
+
+static inline bool is_vector_pending(unsigned int vector)
+{
+	return lapic_vector_set_in_irr(vector) || pi_pending_this_cpu(vector);
+}
+
+#define MAX_APIC_VECTOR			256
+#define APIC_VECTORS_PER_REG		32
+
+/*
+ * Vector states are maintained by APIC in 32-bit registers that are
+ * 16 bytes aligned. The status of each vector is kept in a single
+ * bit.
+ */
+static inline int apic_find_highest_vector(void *bitmap)
+{
+	int vec;
+	u32 *reg;
+
+	for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; vec >= 0; vec -= APIC_VECTORS_PER_REG) {
+		reg = bitmap + APIC_VECTOR_TO_REG_OFFSET(vec);
+		if (*reg)
+			return __fls(*reg) + vec;
+	}
+
+	return -1;
+}
+
+static inline u32 apic_get_reg(void *regs, int reg)
+{
+	return *((u32 *) (regs + reg));
+}
+
+static inline void apic_set_reg(void *regs, int reg, u32 val)
+{
+	*((u32 *) (regs + reg)) = val;
+}
+
+static __always_inline u64 apic_get_reg64(void *regs, int reg)
+{
+	BUILD_BUG_ON(reg != APIC_ICR);
+	return *((u64 *) (regs + reg));
+}
+
+static __always_inline void apic_set_reg64(void *regs, int reg, u64 val)
+{
+	BUILD_BUG_ON(reg != APIC_ICR);
+	*((u64 *) (regs + reg)) = val;
+}
+
+static inline void apic_clear_vector(int vec, void *bitmap)
 {
-	/*
-	 * ack_APIC_irq() actually gets compiled as a single instruction
-	 * ... yummie.
-	 */
-	apic_eoi();
+	clear_bit(APIC_VECTOR_TO_BIT_NUMBER(vec), bitmap + APIC_VECTOR_TO_REG_OFFSET(vec));
 }
 
-static inline unsigned default_get_apic_id(unsigned long x)
+static inline void apic_set_vector(int vec, void *bitmap)
 {
-	unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
+	set_bit(APIC_VECTOR_TO_BIT_NUMBER(vec), bitmap + APIC_VECTOR_TO_REG_OFFSET(vec));
+}
 
-	if (APIC_XAPIC(ver) || boot_cpu_has(X86_FEATURE_EXTD_APICID))
-		return (x >> 24) & 0xFF;
-	else
-		return (x >> 24) & 0x0F;
+static inline int apic_test_vector(int vec, void *bitmap)
+{
+	return test_bit(APIC_VECTOR_TO_BIT_NUMBER(vec), bitmap + APIC_VECTOR_TO_REG_OFFSET(vec));
 }
 
 /*
@@ -464,83 +579,52 @@ static inline unsigned default_get_apic_id(unsigned long x)
 #define TRAMPOLINE_PHYS_LOW		0x467
 #define TRAMPOLINE_PHYS_HIGH		0x469
 
-#ifdef CONFIG_X86_64
-extern void apic_send_IPI_self(int vector);
-
-DECLARE_PER_CPU(int, x2apic_extra_bits);
-#endif
-
-extern void generic_bigsmp_probe(void);
-
 #ifdef CONFIG_X86_LOCAL_APIC
 
 #include <asm/smp.h>
 
-#define APIC_DFR_VALUE	(APIC_DFR_FLAT)
-
-DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
-
 extern struct apic apic_noop;
 
-static inline unsigned int read_apic_id(void)
+static inline u32 read_apic_id(void)
 {
-	unsigned int reg = apic_read(APIC_ID);
+	u32 reg = apic_read(APIC_ID);
 
 	return apic->get_apic_id(reg);
 }
 
-extern int default_apic_id_valid(u32 apicid);
+#ifdef CONFIG_X86_64
+typedef int (*wakeup_cpu_handler)(int apicid, unsigned long start_eip);
 extern int default_acpi_madt_oem_check(char *, char *);
-extern void default_setup_apic_routing(void);
+extern void x86_64_probe_apic(void);
+#else
+static inline int default_acpi_madt_oem_check(char *a, char *b) { return 0; }
+static inline void x86_64_probe_apic(void) { }
+#endif
 
 extern u32 apic_default_calc_apicid(unsigned int cpu);
 extern u32 apic_flat_calc_apicid(unsigned int cpu);
 
-extern bool default_check_apicid_used(physid_mask_t *map, int apicid);
-extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap);
-extern int default_cpu_present_to_apicid(int mps_cpu);
-extern int default_check_phys_apicid_present(int phys_apicid);
+extern u32 default_cpu_present_to_apicid(int mps_cpu);
 
-#endif /* CONFIG_X86_LOCAL_APIC */
+void apic_send_nmi_to_offline_cpu(unsigned int cpu);
 
-#ifdef CONFIG_SMP
-bool apic_id_is_primary_thread(unsigned int id);
-#else
-static inline bool apic_id_is_primary_thread(unsigned int id) { return false; }
-#endif
-
-extern void irq_enter(void);
-extern void irq_exit(void);
+#else /* CONFIG_X86_LOCAL_APIC */
 
-static inline void entering_irq(void)
-{
-	irq_enter();
-	kvm_set_cpu_l1tf_flush_l1d();
-}
+static inline u32 read_apic_id(void) { return 0; }
 
-static inline void entering_ack_irq(void)
-{
-	entering_irq();
-	ack_APIC_irq();
-}
+#endif /* !CONFIG_X86_LOCAL_APIC */
 
-static inline void ipi_entering_ack_irq(void)
-{
-	irq_enter();
-	ack_APIC_irq();
-	kvm_set_cpu_l1tf_flush_l1d();
-}
+#ifdef CONFIG_SMP
+void apic_smt_update(void);
+#else
+static inline void apic_smt_update(void) { }
+#endif
 
-static inline void exiting_irq(void)
-{
-	irq_exit();
-}
+struct msi_msg;
+struct irq_cfg;
 
-static inline void exiting_ack_irq(void)
-{
-	ack_APIC_irq();
-	irq_exit();
-}
+extern void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg,
+				  bool dmar);
 
 extern void ioapic_zap_locks(void);
 
diff --git a/arch/x86/include/asm/apic_flat_64.h b/arch/x86/include/asm/apic_flat_64.h
deleted file mode 100644
index d3a2b3876ce6..000000000000
--- a/arch/x86/include/asm/apic_flat_64.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_APIC_FLAT_64_H
-#define _ASM_X86_APIC_FLAT_64_H
-
-extern void flat_init_apic_ldr(void);
-
-#endif
-
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 05e694ed8386..be39a543fbe5 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_X86_APICDEF_H
 #define _ASM_X86_APICDEF_H
 
+#include <linux/bits.h>
+
 /*
  * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
  *
@@ -18,6 +20,13 @@
  */
 #define IO_APIC_SLOT_SIZE		1024
 
+#define APIC_DELIVERY_MODE_FIXED	0
+#define APIC_DELIVERY_MODE_LOWESTPRIO	1
+#define APIC_DELIVERY_MODE_SMI		2
+#define APIC_DELIVERY_MODE_NMI		4
+#define APIC_DELIVERY_MODE_INIT		5
+#define APIC_DELIVERY_MODE_EXTINT	7
+
 #define	APIC_ID		0x20
 
 #define	APIC_LVR	0x30
@@ -89,18 +98,12 @@
 #define		APIC_DM_EXTINT		0x00700
 #define		APIC_VECTOR_MASK	0x000FF
 #define	APIC_ICR2	0x310
-#define		GET_APIC_DEST_FIELD(x)	(((x) >> 24) & 0xFF)
-#define		SET_APIC_DEST_FIELD(x)	((x) << 24)
+#define		GET_XAPIC_DEST_FIELD(x)	(((x) >> 24) & 0xFF)
+#define		SET_XAPIC_DEST_FIELD(x)	((x) << 24)
 #define	APIC_LVTT	0x320
 #define	APIC_LVTTHMR	0x330
 #define	APIC_LVTPC	0x340
 #define	APIC_LVT0	0x350
-#define		APIC_LVT_TIMER_BASE_MASK	(0x3 << 18)
-#define		GET_APIC_TIMER_BASE(x)		(((x) >> 18) & 0x3)
-#define		SET_APIC_TIMER_BASE(x)		(((x) << 18))
-#define		APIC_TIMER_BASE_CLKIN		0x0
-#define		APIC_TIMER_BASE_TMBASE		0x1
-#define		APIC_TIMER_BASE_DIV		0x2
 #define		APIC_LVT_TIMER_ONESHOT		(0 << 17)
 #define		APIC_LVT_TIMER_PERIODIC		(1 << 17)
 #define		APIC_LVT_TIMER_TSCDEADLINE	(2 << 17)
@@ -132,6 +135,8 @@
 #define		APIC_TDR_DIV_128	0xA
 #define	APIC_EFEAT	0x400
 #define	APIC_ECTRL	0x410
+#define APIC_SEOI	0x420
+#define APIC_IER	0x480
 #define APIC_EILVTn(n)	(0x500 + 0x10 * n)
 #define		APIC_EILVT_NR_AMD_K8	1	/* # of extended interrupts */
 #define		APIC_EILVT_NR_AMD_10H	4
@@ -144,9 +149,10 @@
 #define		APIC_EILVT_MASKED	(1 << 16)
 
 #define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
-#define APIC_BASE_MSR	0x800
-#define XAPIC_ENABLE	(1UL << 11)
-#define X2APIC_ENABLE	(1UL << 10)
+#define APIC_BASE_MSR		0x800
+#define APIC_X2APIC_ID_MSR	0x802
+#define XAPIC_ENABLE		BIT(11)
+#define X2APIC_ENABLE		BIT(10)
 
 #ifdef CONFIG_X86_32
 # define MAX_IO_APICS 64
@@ -168,279 +174,10 @@
 #define APIC_CPUID(apicid)	((apicid) & XAPIC_DEST_CPUS_MASK)
 #define NUM_APIC_CLUSTERS	((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT)
 
-/*
- * the local APIC register structure, memory mapped. Not terribly well
- * tested, but we might eventually use this one in the future - the
- * problem why we cannot use it right now is the P5 APIC, it has an
- * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
- */
-#define u32 unsigned int
-
-struct local_apic {
-
-/*000*/	struct { u32 __reserved[4]; } __reserved_01;
-
-/*010*/	struct { u32 __reserved[4]; } __reserved_02;
-
-/*020*/	struct { /* APIC ID Register */
-		u32   __reserved_1	: 24,
-			phys_apic_id	:  4,
-			__reserved_2	:  4;
-		u32 __reserved[3];
-	} id;
-
-/*030*/	const
-	struct { /* APIC Version Register */
-		u32   version		:  8,
-			__reserved_1	:  8,
-			max_lvt		:  8,
-			__reserved_2	:  8;
-		u32 __reserved[3];
-	} version;
-
-/*040*/	struct { u32 __reserved[4]; } __reserved_03;
-
-/*050*/	struct { u32 __reserved[4]; } __reserved_04;
-
-/*060*/	struct { u32 __reserved[4]; } __reserved_05;
-
-/*070*/	struct { u32 __reserved[4]; } __reserved_06;
-
-/*080*/	struct { /* Task Priority Register */
-		u32   priority	:  8,
-			__reserved_1	: 24;
-		u32 __reserved_2[3];
-	} tpr;
-
-/*090*/	const
-	struct { /* Arbitration Priority Register */
-		u32   priority	:  8,
-			__reserved_1	: 24;
-		u32 __reserved_2[3];
-	} apr;
-
-/*0A0*/	const
-	struct { /* Processor Priority Register */
-		u32   priority	:  8,
-			__reserved_1	: 24;
-		u32 __reserved_2[3];
-	} ppr;
-
-/*0B0*/	struct { /* End Of Interrupt Register */
-		u32   eoi;
-		u32 __reserved[3];
-	} eoi;
-
-/*0C0*/	struct { u32 __reserved[4]; } __reserved_07;
-
-/*0D0*/	struct { /* Logical Destination Register */
-		u32   __reserved_1	: 24,
-			logical_dest	:  8;
-		u32 __reserved_2[3];
-	} ldr;
-
-/*0E0*/	struct { /* Destination Format Register */
-		u32   __reserved_1	: 28,
-			model		:  4;
-		u32 __reserved_2[3];
-	} dfr;
-
-/*0F0*/	struct { /* Spurious Interrupt Vector Register */
-		u32	spurious_vector	:  8,
-			apic_enabled	:  1,
-			focus_cpu	:  1,
-			__reserved_2	: 22;
-		u32 __reserved_3[3];
-	} svr;
-
-/*100*/	struct { /* In Service Register */
-/*170*/		u32 bitfield;
-		u32 __reserved[3];
-	} isr [8];
-
-/*180*/	struct { /* Trigger Mode Register */
-/*1F0*/		u32 bitfield;
-		u32 __reserved[3];
-	} tmr [8];
-
-/*200*/	struct { /* Interrupt Request Register */
-/*270*/		u32 bitfield;
-		u32 __reserved[3];
-	} irr [8];
-
-/*280*/	union { /* Error Status Register */
-		struct {
-			u32   send_cs_error			:  1,
-				receive_cs_error		:  1,
-				send_accept_error		:  1,
-				receive_accept_error		:  1,
-				__reserved_1			:  1,
-				send_illegal_vector		:  1,
-				receive_illegal_vector		:  1,
-				illegal_register_address	:  1,
-				__reserved_2			: 24;
-			u32 __reserved_3[3];
-		} error_bits;
-		struct {
-			u32 errors;
-			u32 __reserved_3[3];
-		} all_errors;
-	} esr;
-
-/*290*/	struct { u32 __reserved[4]; } __reserved_08;
-
-/*2A0*/	struct { u32 __reserved[4]; } __reserved_09;
-
-/*2B0*/	struct { u32 __reserved[4]; } __reserved_10;
-
-/*2C0*/	struct { u32 __reserved[4]; } __reserved_11;
-
-/*2D0*/	struct { u32 __reserved[4]; } __reserved_12;
-
-/*2E0*/	struct { u32 __reserved[4]; } __reserved_13;
-
-/*2F0*/	struct { u32 __reserved[4]; } __reserved_14;
-
-/*300*/	struct { /* Interrupt Command Register 1 */
-		u32   vector			:  8,
-			delivery_mode		:  3,
-			destination_mode	:  1,
-			delivery_status		:  1,
-			__reserved_1		:  1,
-			level			:  1,
-			trigger			:  1,
-			__reserved_2		:  2,
-			shorthand		:  2,
-			__reserved_3		:  12;
-		u32 __reserved_4[3];
-	} icr1;
-
-/*310*/	struct { /* Interrupt Command Register 2 */
-		union {
-			u32   __reserved_1	: 24,
-				phys_dest	:  4,
-				__reserved_2	:  4;
-			u32   __reserved_3	: 24,
-				logical_dest	:  8;
-		} dest;
-		u32 __reserved_4[3];
-	} icr2;
-
-/*320*/	struct { /* LVT - Timer */
-		u32   vector		:  8,
-			__reserved_1	:  4,
-			delivery_status	:  1,
-			__reserved_2	:  3,
-			mask		:  1,
-			timer_mode	:  1,
-			__reserved_3	: 14;
-		u32 __reserved_4[3];
-	} lvt_timer;
-
-/*330*/	struct { /* LVT - Thermal Sensor */
-		u32  vector		:  8,
-			delivery_mode	:  3,
-			__reserved_1	:  1,
-			delivery_status	:  1,
-			__reserved_2	:  3,
-			mask		:  1,
-			__reserved_3	: 15;
-		u32 __reserved_4[3];
-	} lvt_thermal;
-
-/*340*/	struct { /* LVT - Performance Counter */
-		u32   vector		:  8,
-			delivery_mode	:  3,
-			__reserved_1	:  1,
-			delivery_status	:  1,
-			__reserved_2	:  3,
-			mask		:  1,
-			__reserved_3	: 15;
-		u32 __reserved_4[3];
-	} lvt_pc;
-
-/*350*/	struct { /* LVT - LINT0 */
-		u32   vector		:  8,
-			delivery_mode	:  3,
-			__reserved_1	:  1,
-			delivery_status	:  1,
-			polarity	:  1,
-			remote_irr	:  1,
-			trigger		:  1,
-			mask		:  1,
-			__reserved_2	: 15;
-		u32 __reserved_3[3];
-	} lvt_lint0;
-
-/*360*/	struct { /* LVT - LINT1 */
-		u32   vector		:  8,
-			delivery_mode	:  3,
-			__reserved_1	:  1,
-			delivery_status	:  1,
-			polarity	:  1,
-			remote_irr	:  1,
-			trigger		:  1,
-			mask		:  1,
-			__reserved_2	: 15;
-		u32 __reserved_3[3];
-	} lvt_lint1;
-
-/*370*/	struct { /* LVT - Error */
-		u32   vector		:  8,
-			__reserved_1	:  4,
-			delivery_status	:  1,
-			__reserved_2	:  3,
-			mask		:  1,
-			__reserved_3	: 15;
-		u32 __reserved_4[3];
-	} lvt_error;
-
-/*380*/	struct { /* Timer Initial Count Register */
-		u32   initial_count;
-		u32 __reserved_2[3];
-	} timer_icr;
-
-/*390*/	const
-	struct { /* Timer Current Count Register */
-		u32   curr_count;
-		u32 __reserved_2[3];
-	} timer_ccr;
-
-/*3A0*/	struct { u32 __reserved[4]; } __reserved_16;
-
-/*3B0*/	struct { u32 __reserved[4]; } __reserved_17;
-
-/*3C0*/	struct { u32 __reserved[4]; } __reserved_18;
-
-/*3D0*/	struct { u32 __reserved[4]; } __reserved_19;
-
-/*3E0*/	struct { /* Timer Divide Configuration Register */
-		u32   divisor		:  4,
-			__reserved_1	: 28;
-		u32 __reserved_2[3];
-	} timer_dcr;
-
-/*3F0*/	struct { u32 __reserved[4]; } __reserved_20;
-
-} __attribute__ ((packed));
-
-#undef u32
-
 #ifdef CONFIG_X86_32
  #define BAD_APICID 0xFFu
 #else
  #define BAD_APICID 0xFFFFu
 #endif
 
-enum ioapic_irq_destination_types {
-	dest_Fixed		= 0,
-	dest_LowestPrio		= 1,
-	dest_SMI		= 2,
-	dest__reserved_1	= 3,
-	dest_NMI		= 4,
-	dest_INIT		= 5,
-	dest__reserved_2	= 6,
-	dest_ExtINT		= 7
-};
-
 #endif /* _ASM_X86_APICDEF_H */
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index fc0693569f7a..b5982b94bdba 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -12,15 +12,14 @@
 #define REG_OUT "a"
 #endif
 
-#define __HAVE_ARCH_SW_HWEIGHT
-
 static __always_inline unsigned int __arch_hweight32(unsigned int w)
 {
 	unsigned int res;
 
-	asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
-			 : "="REG_OUT (res)
-			 : REG_IN (w));
+	asm_inline (ALTERNATIVE("call __sw_hweight32",
+				"popcntl %[val], %[cnt]", X86_FEATURE_POPCNT)
+			 : [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT
+			 : [val] REG_IN (w));
 
 	return res;
 }
@@ -46,9 +45,10 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
 {
 	unsigned long res;
 
-	asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
-			 : "="REG_OUT (res)
-			 : REG_IN (w));
+	asm_inline (ALTERNATIVE("call __sw_hweight64",
+				"popcntq %[val], %[cnt]", X86_FEATURE_POPCNT)
+			 : [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT
+			 : [val] REG_IN (w));
 
 	return res;
 }
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index 3ac991d81e74..4c305305871b 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * This file is part of the Linux kernel.
  *
  * Copyright (c) 2011-2014, Intel Corporation
  * Authors: Fenghua Yu <fenghua.yu@intel.com>,
  *          H. Peter Anvin <hpa@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
  */
 
 #ifndef ASM_X86_ARCHRANDOM_H
@@ -28,101 +15,46 @@
 
 #define RDRAND_RETRY_LOOPS	10
 
-#define RDRAND_INT	".byte 0x0f,0xc7,0xf0"
-#define RDSEED_INT	".byte 0x0f,0xc7,0xf8"
-#ifdef CONFIG_X86_64
-# define RDRAND_LONG	".byte 0x48,0x0f,0xc7,0xf0"
-# define RDSEED_LONG	".byte 0x48,0x0f,0xc7,0xf8"
-#else
-# define RDRAND_LONG	RDRAND_INT
-# define RDSEED_LONG	RDSEED_INT
-#endif
-
 /* Unconditional execution of RDRAND and RDSEED */
 
-static inline bool rdrand_long(unsigned long *v)
-{
-	bool ok;
-	unsigned int retry = RDRAND_RETRY_LOOPS;
-	do {
-		asm volatile(RDRAND_LONG
-			     CC_SET(c)
-			     : CC_OUT(c) (ok), "=a" (*v));
-		if (ok)
-			return true;
-	} while (--retry);
-	return false;
-}
-
-static inline bool rdrand_int(unsigned int *v)
+static inline bool __must_check rdrand_long(unsigned long *v)
 {
 	bool ok;
 	unsigned int retry = RDRAND_RETRY_LOOPS;
 	do {
-		asm volatile(RDRAND_INT
-			     CC_SET(c)
-			     : CC_OUT(c) (ok), "=a" (*v));
+		asm volatile("rdrand %[out]"
+			     : "=@ccc" (ok), [out] "=r" (*v));
 		if (ok)
 			return true;
 	} while (--retry);
 	return false;
 }
 
-static inline bool rdseed_long(unsigned long *v)
+static inline bool __must_check rdseed_long(unsigned long *v)
 {
 	bool ok;
-	asm volatile(RDSEED_LONG
-		     CC_SET(c)
-		     : CC_OUT(c) (ok), "=a" (*v));
+	asm volatile("rdseed %[out]"
+		     : "=@ccc" (ok), [out] "=r" (*v));
 	return ok;
 }
 
-static inline bool rdseed_int(unsigned int *v)
-{
-	bool ok;
-	asm volatile(RDSEED_INT
-		     CC_SET(c)
-		     : CC_OUT(c) (ok), "=a" (*v));
-	return ok;
-}
-
-/* Conditional execution based on CPU type */
-#define arch_has_random()	static_cpu_has(X86_FEATURE_RDRAND)
-#define arch_has_random_seed()	static_cpu_has(X86_FEATURE_RDSEED)
-
 /*
  * These are the generic interfaces; they must not be declared if the
- * stubs in <linux/random.h> are to be invoked,
- * i.e. CONFIG_ARCH_RANDOM is not defined.
+ * stubs in <linux/random.h> are to be invoked.
  */
-#ifdef CONFIG_ARCH_RANDOM
-
-static inline bool arch_get_random_long(unsigned long *v)
-{
-	return arch_has_random() ? rdrand_long(v) : false;
-}
-
-static inline bool arch_get_random_int(unsigned int *v)
-{
-	return arch_has_random() ? rdrand_int(v) : false;
-}
 
-static inline bool arch_get_random_seed_long(unsigned long *v)
+static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs)
 {
-	return arch_has_random_seed() ? rdseed_long(v) : false;
+	return max_longs && static_cpu_has(X86_FEATURE_RDRAND) && rdrand_long(v) ? 1 : 0;
 }
 
-static inline bool arch_get_random_seed_int(unsigned int *v)
+static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs)
 {
-	return arch_has_random_seed() ? rdseed_int(v) : false;
+	return max_longs && static_cpu_has(X86_FEATURE_RDSEED) && rdseed_long(v) ? 1 : 0;
 }
 
-extern void x86_init_rdrand(struct cpuinfo_x86 *c);
-
-#else  /* !CONFIG_ARCH_RANDOM */
-
-static inline void x86_init_rdrand(struct cpuinfo_x86 *c) { }
-
-#endif  /* !CONFIG_ARCH_RANDOM */
+#ifndef CONFIG_UML
+void x86_init_rdrand(struct cpuinfo_x86 *c);
+#endif
 
 #endif /* ASM_X86_ARCHRANDOM_H */
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 1908214b9125..11c6fecc3ad7 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -1,41 +1,25 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <asm/ftrace.h>
 #include <linux/uaccess.h>
+#include <linux/pgtable.h>
 #include <asm/string.h>
 #include <asm/page.h>
 #include <asm/checksum.h>
+#include <asm/mce.h>
 
 #include <asm-generic/asm-prototypes.h>
 
-#include <asm/page.h>
-#include <asm/pgtable.h>
 #include <asm/special_insns.h>
 #include <asm/preempt.h>
 #include <asm/asm.h>
+#include <asm/fred.h>
+#include <asm/gsseg.h>
+#include <asm/nospec-branch.h>
 
-#ifndef CONFIG_X86_CMPXCHG64
+#ifndef CONFIG_X86_CX8
 extern void cmpxchg8b_emu(void);
 #endif
 
-#ifdef CONFIG_RETPOLINE
-#ifdef CONFIG_X86_32
-#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
-#else
-#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
-INDIRECT_THUNK(8)
-INDIRECT_THUNK(9)
-INDIRECT_THUNK(10)
-INDIRECT_THUNK(11)
-INDIRECT_THUNK(12)
-INDIRECT_THUNK(13)
-INDIRECT_THUNK(14)
-INDIRECT_THUNK(15)
+#ifdef CONFIG_STACKPROTECTOR
+extern unsigned long __ref_stack_chk_guard;
 #endif
-INDIRECT_THUNK(ax)
-INDIRECT_THUNK(bx)
-INDIRECT_THUNK(cx)
-INDIRECT_THUNK(dx)
-INDIRECT_THUNK(si)
-INDIRECT_THUNK(di)
-INDIRECT_THUNK(bp)
-#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 6467757bb39f..0e8c611bc9e2 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -2,24 +2,31 @@
 #ifndef _ASM_X86_ASM_H
 #define _ASM_X86_ASM_H
 
-#ifdef __ASSEMBLY__
-# define __ASM_FORM(x)	x
-# define __ASM_FORM_RAW(x)     x
-# define __ASM_FORM_COMMA(x) x,
+#include <linux/annotate.h>
+
+#ifdef __ASSEMBLER__
+# define __ASM_FORM(x, ...)		x,## __VA_ARGS__
+# define __ASM_FORM_RAW(x, ...)		x,## __VA_ARGS__
+# define __ASM_FORM_COMMA(x, ...)	x,## __VA_ARGS__,
+# define __ASM_REGPFX			%
 #else
-# define __ASM_FORM(x)	" " #x " "
-# define __ASM_FORM_RAW(x)     #x
-# define __ASM_FORM_COMMA(x) " " #x ","
+#include <linux/stringify.h>
+# define __ASM_FORM(x, ...)		" " __stringify(x,##__VA_ARGS__) " "
+# define __ASM_FORM_RAW(x, ...)		    __stringify(x,##__VA_ARGS__)
+# define __ASM_FORM_COMMA(x, ...)	" " __stringify(x,##__VA_ARGS__) ","
+# define __ASM_REGPFX			%%
 #endif
 
+#define _ASM_BYTES(x, ...)	__ASM_FORM(.byte x,##__VA_ARGS__ ;)
+
 #ifndef __x86_64__
 /* 32 bit */
-# define __ASM_SEL(a,b) __ASM_FORM(a)
-# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
+# define __ASM_SEL(a,b)		__ASM_FORM(a)
+# define __ASM_SEL_RAW(a,b)	__ASM_FORM_RAW(a)
 #else
 /* 64 bit */
-# define __ASM_SEL(a,b) __ASM_FORM(b)
-# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
+# define __ASM_SEL(a,b)		__ASM_FORM(b)
+# define __ASM_SEL_RAW(a,b)	__ASM_FORM_RAW(b)
 #endif
 
 #define __ASM_SIZE(inst, ...)	__ASM_SEL(inst##l##__VA_ARGS__, \
@@ -46,6 +53,9 @@
 #define _ASM_SI		__ASM_REG(si)
 #define _ASM_DI		__ASM_REG(di)
 
+/* Adds a (%rip) suffix on 64 bits only; for immediate memory references */
+#define _ASM_RIP(x)	__ASM_SEL_RAW(x, x (__ASM_REGPFX rip))
+
 #ifndef __x86_64__
 /* 32 bit */
 
@@ -105,102 +115,106 @@
 
 #endif
 
-/*
- * Macros to generate condition code outputs from inline assembly,
- * The output operand must be type "bool".
- */
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-# define CC_SET(c) "\n\t/* output condition code " #c "*/\n"
-# define CC_OUT(c) "=@cc" #c
-#else
-# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n"
-# define CC_OUT(c) [_cc_ ## c] "=qm"
-#endif
-
-/* Exception table entry */
-#ifdef __ASSEMBLY__
-# define _ASM_EXTABLE_HANDLE(from, to, handler)			\
-	.pushsection "__ex_table","a" ;				\
-	.balign 4 ;						\
-	.long (from) - . ;					\
-	.long (to) - . ;					\
-	.long (handler) - . ;					\
-	.popsection
+#ifndef __ASSEMBLER__
+static __always_inline __pure void *rip_rel_ptr(void *p)
+{
+	asm("leaq %c1(%%rip), %0" : "=r"(p) : "i"(p));
 
-# define _ASM_EXTABLE(from, to)					\
-	_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+	return p;
+}
+#endif
 
-# define _ASM_EXTABLE_UA(from, to)				\
-	_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+#ifdef __KERNEL__
 
-# define _ASM_EXTABLE_FAULT(from, to)				\
-	_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+#ifndef COMPILE_OFFSETS
+#include <asm/asm-offsets.h>
+#endif
 
-# define _ASM_EXTABLE_EX(from, to)				\
-	_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
+# include <asm/extable_fixup_types.h>
 
-# define _ASM_EXTABLE_REFCOUNT(from, to)			\
-	_ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
+/* Exception table entry */
+#ifdef __ASSEMBLER__
+
+# define _ASM_EXTABLE_TYPE(from, to, type)				\
+	.pushsection "__ex_table", "aM", @progbits, EXTABLE_SIZE ;	\
+	.balign 4 ;							\
+	.long (from) - . ;						\
+	.long (to) - . ;						\
+	.long type ;							\
+	.popsection
 
-# define _ASM_NOKPROBE(entry)					\
+# ifdef CONFIG_KPROBES
+#  define _ASM_NOKPROBE(entry)					\
 	.pushsection "_kprobe_blacklist","aw" ;			\
 	_ASM_ALIGN ;						\
 	_ASM_PTR (entry);					\
 	.popsection
-
-.macro ALIGN_DESTINATION
-	/* check for bad alignment of destination */
-	movl %edi,%ecx
-	andl $7,%ecx
-	jz 102f				/* already aligned */
-	subl $8,%ecx
-	negl %ecx
-	subl %ecx,%edx
-100:	movb (%rsi),%al
-101:	movb %al,(%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz 100b
-102:
-	.section .fixup,"ax"
-103:	addl %ecx,%edx			/* ecx is zerorest also */
-	jmp copy_user_handle_tail
-	.previous
-
-	_ASM_EXTABLE_UA(100b, 103b)
-	_ASM_EXTABLE_UA(101b, 103b)
-	.endm
-
-#else
-# define _EXPAND_EXTABLE_HANDLE(x) #x
-# define _ASM_EXTABLE_HANDLE(from, to, handler)			\
-	" .pushsection \"__ex_table\",\"a\"\n"			\
+# else
+#  define _ASM_NOKPROBE(entry)
+# endif
+
+#else /* ! __ASSEMBLER__ */
+
+# define DEFINE_EXTABLE_TYPE_REG \
+	".macro extable_type_reg type:req reg:req\n"						\
+	".set .Lfound, 0\n"									\
+	".set .Lregnr, 0\n"									\
+	".irp rs,rax,rcx,rdx,rbx,rsp,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15\n"		\
+	".ifc \\reg, %%\\rs\n"									\
+	".set .Lfound, .Lfound+1\n"								\
+	".long \\type + (.Lregnr << 8)\n"							\
+	".endif\n"										\
+	".set .Lregnr, .Lregnr+1\n"								\
+	".endr\n"										\
+	".set .Lregnr, 0\n"									\
+	".irp rs,eax,ecx,edx,ebx,esp,ebp,esi,edi,r8d,r9d,r10d,r11d,r12d,r13d,r14d,r15d\n"	\
+	".ifc \\reg, %%\\rs\n"									\
+	".set .Lfound, .Lfound+1\n"								\
+	".long \\type + (.Lregnr << 8)\n"							\
+	".endif\n"										\
+	".set .Lregnr, .Lregnr+1\n"								\
+	".endr\n"										\
+	".if (.Lfound != 1)\n"									\
+	".error \"extable_type_reg: bad register argument\"\n"					\
+	".endif\n"										\
+	".endm\n"
+
+# define UNDEFINE_EXTABLE_TYPE_REG \
+	".purgem extable_type_reg\n"
+
+# define _ASM_EXTABLE_TYPE(from, to, type)			\
+	" .pushsection __ex_table, \"aM\", @progbits, "		\
+		       __stringify(EXTABLE_SIZE) "\n"		\
 	" .balign 4\n"						\
 	" .long (" #from ") - .\n"				\
 	" .long (" #to ") - .\n"				\
-	" .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n"	\
+	" .long " __stringify(type) " \n"			\
 	" .popsection\n"
 
-# define _ASM_EXTABLE(from, to)					\
-	_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
-
-# define _ASM_EXTABLE_UA(from, to)				\
-	_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
-
-# define _ASM_EXTABLE_FAULT(from, to)				\
-	_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+# define _ASM_EXTABLE_TYPE_REG(from, to, type, reg)				\
+	" .pushsection __ex_table, \"aM\", @progbits, "				\
+		       __stringify(EXTABLE_SIZE) "\n"				\
+	" .balign 4\n"								\
+	" .long (" #from ") - .\n"						\
+	" .long (" #to ") - .\n"						\
+	DEFINE_EXTABLE_TYPE_REG							\
+	"extable_type_reg reg=" __stringify(reg) ", type=" __stringify(type) " \n"\
+	UNDEFINE_EXTABLE_TYPE_REG						\
+	" .popsection\n"
 
-# define _ASM_EXTABLE_EX(from, to)				\
-	_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
+/* For C file, we already have NOKPROBE_SYMBOL macro */
 
-# define _ASM_EXTABLE_REFCOUNT(from, to)			\
-	_ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
+/* Insert a comma if args are non-empty */
+#define COMMA(x...)		__COMMA(x)
+#define __COMMA(...)		, ##__VA_ARGS__
 
-/* For C file, we already have NOKPROBE_SYMBOL macro */
-#endif
+/*
+ * Combine multiple asm inline constraint args into a single arg for passing to
+ * another macro.
+ */
+#define ASM_OUTPUT(x...)	x
+#define ASM_INPUT(x...)		x
 
-#ifndef __ASSEMBLY__
 /*
  * This output constraint should be used for any inline asm which has a "call"
  * instruction.  Otherwise the asm may be inserted before the frame pointer
@@ -209,6 +223,35 @@
  */
 register unsigned long current_stack_pointer asm(_ASM_SP);
 #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
+#endif /* __ASSEMBLER__ */
+
+#define _ASM_EXTABLE(from, to)					\
+	_ASM_EXTABLE_TYPE(from, to, EX_TYPE_DEFAULT)
+
+#define _ASM_EXTABLE_UA(from, to)				\
+	_ASM_EXTABLE_TYPE(from, to, EX_TYPE_UACCESS)
+
+#define _ASM_EXTABLE_FAULT(from, to)				\
+	_ASM_EXTABLE_TYPE(from, to, EX_TYPE_FAULT)
+
+/*
+ * Both i386 and x86_64 returns 64-bit values in edx:eax for certain
+ * instructions, but GCC's "A" constraint has different meanings.
+ * For i386, "A" means exactly edx:eax, while for x86_64 it
+ * means rax *or* rdx.
+ *
+ * These helpers wrapping these semantic differences save one instruction
+ * clearing the high half of 'low':
+ */
+#ifdef CONFIG_X86_64
+# define EAX_EDX_DECLARE_ARGS(val, low, high)	unsigned long low, high
+# define EAX_EDX_VAL(val, low, high)		((low) | (high) << 32)
+# define EAX_EDX_RET(val, low, high)		"=a" (low), "=d" (high)
+#else
+# define EAX_EDX_DECLARE_ARGS(val, low, high)	u64 val
+# define EAX_EDX_VAL(val, low, high)		(val)
+# define EAX_EDX_RET(val, low, high)		"=A" (val)
 #endif
 
+#endif /* __KERNEL__ */
 #endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index ea3d95275b43..75743f1dfd4e 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -14,206 +14,115 @@
  * resource counting etc..
  */
 
-#define ATOMIC_INIT(i)	{ (i) }
-
-/**
- * arch_atomic_read - read atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically reads the value of @v.
- */
 static __always_inline int arch_atomic_read(const atomic_t *v)
 {
 	/*
 	 * Note for KASAN: we deliberately don't use READ_ONCE_NOCHECK() here,
 	 * it's non-inlined function that increases binary size and stack usage.
 	 */
-	return READ_ONCE((v)->counter);
+	return __READ_ONCE((v)->counter);
 }
 
-/**
- * arch_atomic_set - set atomic variable
- * @v: pointer of type atomic_t
- * @i: required value
- *
- * Atomically sets the value of @v to @i.
- */
 static __always_inline void arch_atomic_set(atomic_t *v, int i)
 {
-	WRITE_ONCE(v->counter, i);
+	__WRITE_ONCE(v->counter, i);
 }
 
-/**
- * arch_atomic_add - add integer to atomic variable
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v.
- */
 static __always_inline void arch_atomic_add(int i, atomic_t *v)
 {
-	asm volatile(LOCK_PREFIX "addl %1,%0"
+	asm_inline volatile(LOCK_PREFIX "addl %1, %0"
 		     : "+m" (v->counter)
-		     : "ir" (i));
+		     : "ir" (i) : "memory");
 }
 
-/**
- * arch_atomic_sub - subtract integer from atomic variable
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v.
- */
 static __always_inline void arch_atomic_sub(int i, atomic_t *v)
 {
-	asm volatile(LOCK_PREFIX "subl %1,%0"
+	asm_inline volatile(LOCK_PREFIX "subl %1, %0"
 		     : "+m" (v->counter)
-		     : "ir" (i));
+		     : "ir" (i) : "memory");
 }
 
-/**
- * arch_atomic_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
 static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
 {
 	return GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, e, "er", i);
 }
 #define arch_atomic_sub_and_test arch_atomic_sub_and_test
 
-/**
- * arch_atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1.
- */
 static __always_inline void arch_atomic_inc(atomic_t *v)
 {
-	asm volatile(LOCK_PREFIX "incl %0"
-		     : "+m" (v->counter));
+	asm_inline volatile(LOCK_PREFIX "incl %0"
+		     : "+m" (v->counter) :: "memory");
 }
 #define arch_atomic_inc arch_atomic_inc
 
-/**
- * arch_atomic_dec - decrement atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1.
- */
 static __always_inline void arch_atomic_dec(atomic_t *v)
 {
-	asm volatile(LOCK_PREFIX "decl %0"
-		     : "+m" (v->counter));
+	asm_inline volatile(LOCK_PREFIX "decl %0"
+		     : "+m" (v->counter) :: "memory");
 }
 #define arch_atomic_dec arch_atomic_dec
 
-/**
- * arch_atomic_dec_and_test - decrement and test
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
 static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
 {
 	return GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, e);
 }
 #define arch_atomic_dec_and_test arch_atomic_dec_and_test
 
-/**
- * arch_atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
 static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
 {
 	return GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, e);
 }
 #define arch_atomic_inc_and_test arch_atomic_inc_and_test
 
-/**
- * arch_atomic_add_negative - add and test if negative
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
 static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v)
 {
 	return GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, s, "er", i);
 }
 #define arch_atomic_add_negative arch_atomic_add_negative
 
-/**
- * arch_atomic_add_return - add integer and return
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v and returns @i + @v
- */
 static __always_inline int arch_atomic_add_return(int i, atomic_t *v)
 {
 	return i + xadd(&v->counter, i);
 }
+#define arch_atomic_add_return arch_atomic_add_return
 
-/**
- * arch_atomic_sub_return - subtract integer and return
- * @v: pointer of type atomic_t
- * @i: integer value to subtract
- *
- * Atomically subtracts @i from @v and returns @v - @i
- */
-static __always_inline int arch_atomic_sub_return(int i, atomic_t *v)
-{
-	return arch_atomic_add_return(-i, v);
-}
+#define arch_atomic_sub_return(i, v) arch_atomic_add_return(-(i), v)
 
 static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
 {
 	return xadd(&v->counter, i);
 }
+#define arch_atomic_fetch_add arch_atomic_fetch_add
 
-static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v)
-{
-	return xadd(&v->counter, -i);
-}
+#define arch_atomic_fetch_sub(i, v) arch_atomic_fetch_add(-(i), v)
 
 static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	return arch_cmpxchg(&v->counter, old, new);
 }
+#define arch_atomic_cmpxchg arch_atomic_cmpxchg
 
-#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg
 static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 {
-	return try_cmpxchg(&v->counter, old, new);
+	return arch_try_cmpxchg(&v->counter, old, new);
 }
+#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg
 
-static inline int arch_atomic_xchg(atomic_t *v, int new)
+static __always_inline int arch_atomic_xchg(atomic_t *v, int new)
 {
 	return arch_xchg(&v->counter, new);
 }
+#define arch_atomic_xchg arch_atomic_xchg
 
-static inline void arch_atomic_and(int i, atomic_t *v)
+static __always_inline void arch_atomic_and(int i, atomic_t *v)
 {
-	asm volatile(LOCK_PREFIX "andl %1,%0"
+	asm_inline volatile(LOCK_PREFIX "andl %1, %0"
 			: "+m" (v->counter)
 			: "ir" (i)
 			: "memory");
 }
 
-static inline int arch_atomic_fetch_and(int i, atomic_t *v)
+static __always_inline int arch_atomic_fetch_and(int i, atomic_t *v)
 {
 	int val = arch_atomic_read(v);
 
@@ -221,16 +130,17 @@ static inline int arch_atomic_fetch_and(int i, atomic_t *v)
 
 	return val;
 }
+#define arch_atomic_fetch_and arch_atomic_fetch_and
 
-static inline void arch_atomic_or(int i, atomic_t *v)
+static __always_inline void arch_atomic_or(int i, atomic_t *v)
 {
-	asm volatile(LOCK_PREFIX "orl %1,%0"
+	asm_inline volatile(LOCK_PREFIX "orl %1, %0"
 			: "+m" (v->counter)
 			: "ir" (i)
 			: "memory");
 }
 
-static inline int arch_atomic_fetch_or(int i, atomic_t *v)
+static __always_inline int arch_atomic_fetch_or(int i, atomic_t *v)
 {
 	int val = arch_atomic_read(v);
 
@@ -238,16 +148,17 @@ static inline int arch_atomic_fetch_or(int i, atomic_t *v)
 
 	return val;
 }
+#define arch_atomic_fetch_or arch_atomic_fetch_or
 
-static inline void arch_atomic_xor(int i, atomic_t *v)
+static __always_inline void arch_atomic_xor(int i, atomic_t *v)
 {
-	asm volatile(LOCK_PREFIX "xorl %1,%0"
+	asm_inline volatile(LOCK_PREFIX "xorl %1, %0"
 			: "+m" (v->counter)
 			: "ir" (i)
 			: "memory");
 }
 
-static inline int arch_atomic_fetch_xor(int i, atomic_t *v)
+static __always_inline int arch_atomic_fetch_xor(int i, atomic_t *v)
 {
 	int val = arch_atomic_read(v);
 
@@ -255,6 +166,7 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v)
 
 	return val;
 }
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
 
 #ifdef CONFIG_X86_32
 # include <asm/atomic64_32.h>
@@ -262,6 +174,4 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v)
 # include <asm/atomic64_64.h>
 #endif
 
-#include <asm-generic/atomic-instrumented.h>
-
 #endif /* _ASM_X86_ATOMIC_H */
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 6a5b0ec460da..ab838205c1c6 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -9,11 +9,37 @@
 /* An 64bit atomic type */
 
 typedef struct {
-	u64 __aligned(8) counter;
+	s64 __aligned(8) counter;
 } atomic64_t;
 
 #define ATOMIC64_INIT(val)	{ (val) }
 
+/*
+ * Read an atomic64_t non-atomically.
+ *
+ * This is intended to be used in cases where a subsequent atomic operation
+ * will handle the torn value, and can be used to prime the first iteration
+ * of unconditional try_cmpxchg() loops, e.g.:
+ *
+ * 	s64 val = arch_atomic64_read_nonatomic(v);
+ * 	do { } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i);
+ *
+ * This is NOT safe to use where the value is not always checked by a
+ * subsequent atomic operation, such as in conditional try_cmpxchg() loops
+ * that can break before the atomic operation, e.g.:
+ *
+ * 	s64 val = arch_atomic64_read_nonatomic(v);
+ * 	do {
+ * 		if (condition(val))
+ * 			break;
+ * 	} while (!arch_atomic64_try_cmpxchg(v, &val, val OP i);
+ */
+static __always_inline s64 arch_atomic64_read_nonatomic(const atomic64_t *v)
+{
+	/* See comment in arch_atomic_read(). */
+	return __READ_ONCE(v->counter);
+}
+
 #define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...)
 #ifndef ATOMIC64_EXPORT
 #define ATOMIC64_DECL_ONE __ATOMIC64_DECL
@@ -22,16 +48,20 @@ typedef struct {
 	ATOMIC64_EXPORT(atomic64_##sym)
 #endif
 
-#ifdef CONFIG_X86_CMPXCHG64
-#define __alternative_atomic64(f, g, out, in...) \
-	asm volatile("call %P[func]" \
-		     : out : [func] "i" (atomic64_##g##_cx8), ## in)
+#ifdef CONFIG_X86_CX8
+#define __alternative_atomic64(f, g, out, in, clobbers...)		\
+	asm volatile("call %c[func]"					\
+		     : ALT_OUTPUT_SP(out) \
+		     : [func] "i" (atomic64_##g##_cx8)			\
+		       COMMA(in)					\
+		     : clobbers)
 
 #define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8)
 #else
-#define __alternative_atomic64(f, g, out, in...) \
-	alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \
-			 X86_FEATURE_CX8, ASM_OUTPUT2(out), ## in)
+#define __alternative_atomic64(f, g, out, in, clobbers...)		\
+	alternative_call(atomic64_##f##_386, atomic64_##g##_cx8,	\
+			 X86_FEATURE_CX8, ASM_OUTPUT(out),		\
+			 ASM_INPUT(in), clobbers)
 
 #define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8); \
 	ATOMIC64_DECL_ONE(sym##_386)
@@ -42,8 +72,8 @@ ATOMIC64_DECL_ONE(inc_386);
 ATOMIC64_DECL_ONE(dec_386);
 #endif
 
-#define alternative_atomic64(f, out, in...) \
-	__alternative_atomic64(f, f, ASM_OUTPUT2(out), ## in)
+#define alternative_atomic64(f, out, in, clobbers...) \
+	__alternative_atomic64(f, f, ASM_OUTPUT(out), ASM_INPUT(in), clobbers)
 
 ATOMIC64_DECL(read);
 ATOMIC64_DECL(set);
@@ -61,204 +91,154 @@ ATOMIC64_DECL(add_unless);
 #undef __ATOMIC64_DECL
 #undef ATOMIC64_EXPORT
 
-/**
- * arch_atomic64_cmpxchg - cmpxchg atomic64 variable
- * @v: pointer to type atomic64_t
- * @o: expected value
- * @n: new value
- *
- * Atomically sets @v to @n if it was equal to @o and returns
- * the old value.
- */
+static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
+{
+	return arch_cmpxchg64(&v->counter, old, new);
+}
+#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
 
-static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o,
-					      long long n)
+static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 {
-	return arch_cmpxchg64(&v->counter, o, n);
+	return arch_try_cmpxchg64(&v->counter, old, new);
 }
+#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
 
-/**
- * arch_atomic64_xchg - xchg atomic64 variable
- * @v: pointer to type atomic64_t
- * @n: value to assign
- *
- * Atomically xchgs the value of @v to @n and returns
- * the old value.
- */
-static inline long long arch_atomic64_xchg(atomic64_t *v, long long n)
+static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
 {
-	long long o;
+	s64 o;
 	unsigned high = (unsigned)(n >> 32);
 	unsigned low = (unsigned)n;
-	alternative_atomic64(xchg, "=&A" (o),
-			     "S" (v), "b" (low), "c" (high)
-			     : "memory");
+	alternative_atomic64(xchg,
+			     "=&A" (o),
+			     ASM_INPUT("S" (v), "b" (low), "c" (high)),
+			     "memory");
 	return o;
 }
+#define arch_atomic64_xchg arch_atomic64_xchg
 
-/**
- * arch_atomic64_set - set atomic64 variable
- * @v: pointer to type atomic64_t
- * @i: value to assign
- *
- * Atomically sets the value of @v to @n.
- */
-static inline void arch_atomic64_set(atomic64_t *v, long long i)
+static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
 {
 	unsigned high = (unsigned)(i >> 32);
 	unsigned low = (unsigned)i;
-	alternative_atomic64(set, /* no output */,
-			     "S" (v), "b" (low), "c" (high)
-			     : "eax", "edx", "memory");
+	alternative_atomic64(set,
+			     /* no output */,
+			     ASM_INPUT("S" (v), "b" (low), "c" (high)),
+			     "eax", "edx", "memory");
 }
 
-/**
- * arch_atomic64_read - read atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically reads the value of @v and returns it.
- */
-static inline long long arch_atomic64_read(const atomic64_t *v)
+static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
 {
-	long long r;
-	alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
+	s64 r;
+	alternative_atomic64(read, "=&A" (r), "c" (v), "memory");
 	return r;
 }
 
-/**
- * arch_atomic64_add_return - add and return
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v and returns @i + *@v
- */
-static inline long long arch_atomic64_add_return(long long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
 {
 	alternative_atomic64(add_return,
-			     ASM_OUTPUT2("+A" (i), "+c" (v)),
-			     ASM_NO_INPUT_CLOBBER("memory"));
+			     ASM_OUTPUT("+A" (i), "+c" (v)),
+			     /* no input */,
+			     "memory");
 	return i;
 }
+#define arch_atomic64_add_return arch_atomic64_add_return
 
-/*
- * Other variants with different arithmetic operators:
- */
-static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
 {
 	alternative_atomic64(sub_return,
-			     ASM_OUTPUT2("+A" (i), "+c" (v)),
-			     ASM_NO_INPUT_CLOBBER("memory"));
+			     ASM_OUTPUT("+A" (i), "+c" (v)),
+			     /* no input */,
+			     "memory");
 	return i;
 }
+#define arch_atomic64_sub_return arch_atomic64_sub_return
 
-static inline long long arch_atomic64_inc_return(atomic64_t *v)
+static __always_inline s64 arch_atomic64_inc_return(atomic64_t *v)
 {
-	long long a;
-	alternative_atomic64(inc_return, "=&A" (a),
-			     "S" (v) : "memory", "ecx");
+	s64 a;
+	alternative_atomic64(inc_return,
+			     "=&A" (a),
+			     "S" (v),
+			     "memory", "ecx");
 	return a;
 }
 #define arch_atomic64_inc_return arch_atomic64_inc_return
 
-static inline long long arch_atomic64_dec_return(atomic64_t *v)
+static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v)
 {
-	long long a;
-	alternative_atomic64(dec_return, "=&A" (a),
-			     "S" (v) : "memory", "ecx");
+	s64 a;
+	alternative_atomic64(dec_return,
+			     "=&A" (a),
+			     "S" (v),
+			     "memory", "ecx");
 	return a;
 }
 #define arch_atomic64_dec_return arch_atomic64_dec_return
 
-/**
- * arch_atomic64_add - add integer to atomic64 variable
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v.
- */
-static inline long long arch_atomic64_add(long long i, atomic64_t *v)
+static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
 {
 	__alternative_atomic64(add, add_return,
-			       ASM_OUTPUT2("+A" (i), "+c" (v)),
-			       ASM_NO_INPUT_CLOBBER("memory"));
-	return i;
+			       ASM_OUTPUT("+A" (i), "+c" (v)),
+			       /* no input */,
+			       "memory");
 }
 
-/**
- * arch_atomic64_sub - subtract the atomic64 variable
- * @i: integer value to subtract
- * @v: pointer to type atomic64_t
- *
- * Atomically subtracts @i from @v.
- */
-static inline long long arch_atomic64_sub(long long i, atomic64_t *v)
+static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v)
 {
 	__alternative_atomic64(sub, sub_return,
-			       ASM_OUTPUT2("+A" (i), "+c" (v)),
-			       ASM_NO_INPUT_CLOBBER("memory"));
-	return i;
+			       ASM_OUTPUT("+A" (i), "+c" (v)),
+			       /* no input */,
+			       "memory");
 }
 
-/**
- * arch_atomic64_inc - increment atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically increments @v by 1.
- */
-static inline void arch_atomic64_inc(atomic64_t *v)
+static __always_inline void arch_atomic64_inc(atomic64_t *v)
 {
-	__alternative_atomic64(inc, inc_return, /* no output */,
-			       "S" (v) : "memory", "eax", "ecx", "edx");
+	__alternative_atomic64(inc, inc_return,
+			       /* no output */,
+			       "S" (v),
+			       "memory", "eax", "ecx", "edx");
 }
 #define arch_atomic64_inc arch_atomic64_inc
 
-/**
- * arch_atomic64_dec - decrement atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically decrements @v by 1.
- */
-static inline void arch_atomic64_dec(atomic64_t *v)
+static __always_inline void arch_atomic64_dec(atomic64_t *v)
 {
-	__alternative_atomic64(dec, dec_return, /* no output */,
-			       "S" (v) : "memory", "eax", "ecx", "edx");
+	__alternative_atomic64(dec, dec_return,
+			       /* no output */,
+			       "S" (v),
+			       "memory", "eax", "ecx", "edx");
 }
 #define arch_atomic64_dec arch_atomic64_dec
 
-/**
- * arch_atomic64_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns non-zero if the add was done, zero otherwise.
- */
-static inline int arch_atomic64_add_unless(atomic64_t *v, long long a,
-					   long long u)
+static __always_inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 {
 	unsigned low = (unsigned)u;
 	unsigned high = (unsigned)(u >> 32);
 	alternative_atomic64(add_unless,
-			     ASM_OUTPUT2("+A" (a), "+c" (low), "+D" (high)),
-			     "S" (v) : "memory");
+			     ASM_OUTPUT("+A" (a), "+c" (low), "+D" (high)),
+			     "S" (v),
+			     "memory");
 	return (int)a;
 }
+#define arch_atomic64_add_unless arch_atomic64_add_unless
 
-static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
+static __always_inline int arch_atomic64_inc_not_zero(atomic64_t *v)
 {
 	int r;
-	alternative_atomic64(inc_not_zero, "=&a" (r),
-			     "S" (v) : "ecx", "edx", "memory");
+	alternative_atomic64(inc_not_zero,
+			     "=&a" (r),
+			     "S" (v),
+			     "ecx", "edx", "memory");
 	return r;
 }
 #define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
 
-static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
+static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
 {
-	long long r;
-	alternative_atomic64(dec_if_positive, "=&A" (r),
-			     "S" (v) : "ecx", "memory");
+	s64 r;
+	alternative_atomic64(dec_if_positive,
+			     "=&A" (r),
+			     "S" (v),
+			     "ecx", "memory");
 	return r;
 }
 #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
@@ -266,69 +246,66 @@ static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
 #undef alternative_atomic64
 #undef __alternative_atomic64
 
-static inline void arch_atomic64_and(long long i, atomic64_t *v)
+static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v)
 {
-	long long old, c = 0;
+	s64 val = arch_atomic64_read_nonatomic(v);
 
-	while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
-		c = old;
+	do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
 }
 
-static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
 {
-	long long old, c = 0;
+	s64 val = arch_atomic64_read_nonatomic(v);
 
-	while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
-		c = old;
+	do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
 
-	return old;
+	return val;
 }
+#define arch_atomic64_fetch_and arch_atomic64_fetch_and
 
-static inline void arch_atomic64_or(long long i, atomic64_t *v)
+static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v)
 {
-	long long old, c = 0;
+	s64 val = arch_atomic64_read_nonatomic(v);
 
-	while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
-		c = old;
+	do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
 }
 
-static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
 {
-	long long old, c = 0;
+	s64 val = arch_atomic64_read_nonatomic(v);
 
-	while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
-		c = old;
+	do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
 
-	return old;
+	return val;
 }
+#define arch_atomic64_fetch_or arch_atomic64_fetch_or
 
-static inline void arch_atomic64_xor(long long i, atomic64_t *v)
+static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v)
 {
-	long long old, c = 0;
+	s64 val = arch_atomic64_read_nonatomic(v);
 
-	while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
-		c = old;
+	do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
 }
 
-static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
 {
-	long long old, c = 0;
+	s64 val = arch_atomic64_read_nonatomic(v);
 
-	while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
-		c = old;
+	do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
 
-	return old;
+	return val;
 }
+#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
 
-static inline long long arch_atomic64_fetch_add(long long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
 {
-	long long old, c = 0;
+	s64 val = arch_atomic64_read_nonatomic(v);
 
-	while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c)
-		c = old;
+	do { } while (!arch_atomic64_try_cmpxchg(v, &val, val + i));
 
-	return old;
+	return val;
 }
+#define arch_atomic64_fetch_add arch_atomic64_fetch_add
 
 #define arch_atomic64_fetch_sub(i, v)	arch_atomic64_fetch_add(-(i), (v))
 
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index dadc20adba21..87b496325b5b 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -10,196 +10,113 @@
 
 #define ATOMIC64_INIT(i)	{ (i) }
 
-/**
- * arch_atomic64_read - read atomic64 variable
- * @v: pointer of type atomic64_t
- *
- * Atomically reads the value of @v.
- * Doesn't imply a read memory barrier.
- */
-static inline long arch_atomic64_read(const atomic64_t *v)
-{
-	return READ_ONCE((v)->counter);
-}
-
-/**
- * arch_atomic64_set - set atomic64 variable
- * @v: pointer to type atomic64_t
- * @i: required value
- *
- * Atomically sets the value of @v to @i.
- */
-static inline void arch_atomic64_set(atomic64_t *v, long i)
-{
-	WRITE_ONCE(v->counter, i);
-}
-
-/**
- * arch_atomic64_add - add integer to atomic64 variable
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v.
- */
-static __always_inline void arch_atomic64_add(long i, atomic64_t *v)
-{
-	asm volatile(LOCK_PREFIX "addq %1,%0"
+static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
+{
+	return __READ_ONCE((v)->counter);
+}
+
+static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
+{
+	__WRITE_ONCE(v->counter, i);
+}
+
+static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
+{
+	asm_inline volatile(LOCK_PREFIX "addq %1, %0"
 		     : "=m" (v->counter)
-		     : "er" (i), "m" (v->counter));
+		     : "er" (i), "m" (v->counter) : "memory");
 }
 
-/**
- * arch_atomic64_sub - subtract the atomic64 variable
- * @i: integer value to subtract
- * @v: pointer to type atomic64_t
- *
- * Atomically subtracts @i from @v.
- */
-static inline void arch_atomic64_sub(long i, atomic64_t *v)
+static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v)
 {
-	asm volatile(LOCK_PREFIX "subq %1,%0"
+	asm_inline volatile(LOCK_PREFIX "subq %1, %0"
 		     : "=m" (v->counter)
-		     : "er" (i), "m" (v->counter));
+		     : "er" (i), "m" (v->counter) : "memory");
 }
 
-/**
- * arch_atomic64_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer to type atomic64_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
+static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
 {
 	return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i);
 }
 #define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
 
-/**
- * arch_atomic64_inc - increment atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically increments @v by 1.
- */
 static __always_inline void arch_atomic64_inc(atomic64_t *v)
 {
-	asm volatile(LOCK_PREFIX "incq %0"
+	asm_inline volatile(LOCK_PREFIX "incq %0"
 		     : "=m" (v->counter)
-		     : "m" (v->counter));
+		     : "m" (v->counter) : "memory");
 }
 #define arch_atomic64_inc arch_atomic64_inc
 
-/**
- * arch_atomic64_dec - decrement atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically decrements @v by 1.
- */
 static __always_inline void arch_atomic64_dec(atomic64_t *v)
 {
-	asm volatile(LOCK_PREFIX "decq %0"
+	asm_inline volatile(LOCK_PREFIX "decq %0"
 		     : "=m" (v->counter)
-		     : "m" (v->counter));
+		     : "m" (v->counter) : "memory");
 }
 #define arch_atomic64_dec arch_atomic64_dec
 
-/**
- * arch_atomic64_dec_and_test - decrement and test
- * @v: pointer to type atomic64_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
+static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v)
 {
 	return GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, e);
 }
 #define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
 
-/**
- * arch_atomic64_inc_and_test - increment and test
- * @v: pointer to type atomic64_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
+static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v)
 {
 	return GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, e);
 }
 #define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
 
-/**
- * arch_atomic64_add_negative - add and test if negative
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
+static __always_inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v)
 {
 	return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i);
 }
 #define arch_atomic64_add_negative arch_atomic64_add_negative
 
-/**
- * arch_atomic64_add_return - add and return
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v and returns @i + @v
- */
-static __always_inline long arch_atomic64_add_return(long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
 {
 	return i + xadd(&v->counter, i);
 }
+#define arch_atomic64_add_return arch_atomic64_add_return
 
-static inline long arch_atomic64_sub_return(long i, atomic64_t *v)
-{
-	return arch_atomic64_add_return(-i, v);
-}
+#define arch_atomic64_sub_return(i, v) arch_atomic64_add_return(-(i), v)
 
-static inline long arch_atomic64_fetch_add(long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
 {
 	return xadd(&v->counter, i);
 }
+#define arch_atomic64_fetch_add arch_atomic64_fetch_add
 
-static inline long arch_atomic64_fetch_sub(long i, atomic64_t *v)
-{
-	return xadd(&v->counter, -i);
-}
+#define arch_atomic64_fetch_sub(i, v) arch_atomic64_fetch_add(-(i), v)
 
-static inline long arch_atomic64_cmpxchg(atomic64_t *v, long old, long new)
+static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 {
 	return arch_cmpxchg(&v->counter, old, new);
 }
+#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
 
-#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
-static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new)
+static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 {
-	return try_cmpxchg(&v->counter, old, new);
+	return arch_try_cmpxchg(&v->counter, old, new);
 }
+#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
 
-static inline long arch_atomic64_xchg(atomic64_t *v, long new)
+static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new)
 {
 	return arch_xchg(&v->counter, new);
 }
+#define arch_atomic64_xchg arch_atomic64_xchg
 
-static inline void arch_atomic64_and(long i, atomic64_t *v)
+static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v)
 {
-	asm volatile(LOCK_PREFIX "andq %1,%0"
+	asm_inline volatile(LOCK_PREFIX "andq %1, %0"
 			: "+m" (v->counter)
 			: "er" (i)
 			: "memory");
 }
 
-static inline long arch_atomic64_fetch_and(long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
 {
 	s64 val = arch_atomic64_read(v);
 
@@ -207,16 +124,17 @@ static inline long arch_atomic64_fetch_and(long i, atomic64_t *v)
 	} while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
 	return val;
 }
+#define arch_atomic64_fetch_and arch_atomic64_fetch_and
 
-static inline void arch_atomic64_or(long i, atomic64_t *v)
+static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v)
 {
-	asm volatile(LOCK_PREFIX "orq %1,%0"
+	asm_inline volatile(LOCK_PREFIX "orq %1, %0"
 			: "+m" (v->counter)
 			: "er" (i)
 			: "memory");
 }
 
-static inline long arch_atomic64_fetch_or(long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
 {
 	s64 val = arch_atomic64_read(v);
 
@@ -224,16 +142,17 @@ static inline long arch_atomic64_fetch_or(long i, atomic64_t *v)
 	} while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
 	return val;
 }
+#define arch_atomic64_fetch_or arch_atomic64_fetch_or
 
-static inline void arch_atomic64_xor(long i, atomic64_t *v)
+static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v)
 {
-	asm volatile(LOCK_PREFIX "xorq %1,%0"
+	asm_inline volatile(LOCK_PREFIX "xorq %1, %0"
 			: "+m" (v->counter)
 			: "er" (i)
 			: "memory");
 }
 
-static inline long arch_atomic64_fetch_xor(long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
 {
 	s64 val = arch_atomic64_read(v);
 
@@ -241,5 +160,6 @@ static inline long arch_atomic64_fetch_xor(long i, atomic64_t *v)
 	} while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
 	return val;
 }
+#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
 
 #endif /* _ASM_X86_ATOMIC64_64_H */
diff --git a/arch/x86/include/asm/audit.h b/arch/x86/include/asm/audit.h
new file mode 100644
index 000000000000..fa918f01333e
--- /dev/null
+++ b/arch/x86/include/asm/audit.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_AUDIT_H
+#define _ASM_X86_AUDIT_H
+
+int ia32_classify_syscall(unsigned int syscall);
+
+extern unsigned ia32_dir_class[];
+extern unsigned ia32_write_class[];
+extern unsigned ia32_read_class[];
+extern unsigned ia32_chattr_class[];
+extern unsigned ia32_signal_class[];
+
+
+#endif /* _ASM_X86_AUDIT_H */
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 14de0432d288..db70832232d4 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -12,16 +12,16 @@
  */
 
 #ifdef CONFIG_X86_32
-#define mb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "mfence", \
+#define mb() asm volatile(ALTERNATIVE("lock addl $0,-4(%%esp)", "mfence", \
 				      X86_FEATURE_XMM2) ::: "memory", "cc")
-#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "lfence", \
+#define rmb() asm volatile(ALTERNATIVE("lock addl $0,-4(%%esp)", "lfence", \
 				       X86_FEATURE_XMM2) ::: "memory", "cc")
-#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "sfence", \
+#define wmb() asm volatile(ALTERNATIVE("lock addl $0,-4(%%esp)", "sfence", \
 				       X86_FEATURE_XMM2) ::: "memory", "cc")
 #else
-#define mb() 	asm volatile("mfence":::"memory")
-#define rmb()	asm volatile("lfence":::"memory")
-#define wmb()	asm volatile("sfence" ::: "memory")
+#define __mb()	asm volatile("mfence":::"memory")
+#define __rmb()	asm volatile("lfence":::"memory")
+#define __wmb()	asm volatile("sfence" ::: "memory")
 #endif
 
 /**
@@ -33,33 +33,25 @@
  * Returns:
  *     0 - (index < size)
  */
-static inline unsigned long array_index_mask_nospec(unsigned long index,
-		unsigned long size)
-{
-	unsigned long mask;
-
-	asm volatile ("cmp %1,%2; sbb %0,%0;"
-			:"=r" (mask)
-			:"g"(size),"r" (index)
-			:"cc");
-	return mask;
-}
-
-/* Override the default implementation from linux/nospec.h. */
-#define array_index_mask_nospec array_index_mask_nospec
+#define array_index_mask_nospec(idx,sz) ({	\
+	typeof((idx)+(sz)) __idx = (idx);	\
+	typeof(__idx) __sz = (sz);		\
+	unsigned long __mask;			\
+	asm volatile ("cmp %1,%2; sbb %0,%0"	\
+			:"=r" (__mask)		\
+			:ASM_INPUT_G (__sz),	\
+			 "r" (__idx)		\
+			:"cc");			\
+	__mask; })
 
 /* Prevent speculative execution past this barrier. */
-#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
-					   "lfence", X86_FEATURE_LFENCE_RDTSC)
+#define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC)
 
-#define dma_rmb()	barrier()
-#define dma_wmb()	barrier()
+#define __dma_rmb()	barrier()
+#define __dma_wmb()	barrier()
+
+#define __smp_mb()	asm volatile("lock addl $0,-4(%%" _ASM_SP ")" ::: "memory", "cc")
 
-#ifdef CONFIG_X86_32
-#define __smp_mb()	asm volatile("lock; addl $0,-4(%%esp)" ::: "memory", "cc")
-#else
-#define __smp_mb()	asm volatile("lock; addl $0,-4(%%rsp)" ::: "memory", "cc")
-#endif
 #define __smp_rmb()	dma_rmb()
 #define __smp_wmb()	barrier()
 #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
@@ -80,8 +72,11 @@ do {									\
 })
 
 /* Atomic operations are already serializing on x86 */
-#define __smp_mb__before_atomic()	barrier()
-#define __smp_mb__after_atomic()	barrier()
+#define __smp_mb__before_atomic()	do { } while (0)
+#define __smp_mb__after_atomic()	do { } while (0)
+
+/* Writing to CR3 provides a full memory barrier in switch_mm(). */
+#define smp_mb__after_switch_mm()	do { } while (0)
 
 #include <asm-generic/barrier.h>
 
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index ad7b210aa3f6..c2ce213f2b9b 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -36,245 +36,130 @@
  * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
  */
 
-#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
-/* Technically wrong, but this avoids compilation errors on some gcc
-   versions. */
-#define BITOP_ADDR(x) "=m" (*(volatile long *) (x))
-#else
-#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
-#endif
+#define RLONG_ADDR(x)			 "m" (*(volatile long *) (x))
+#define WBYTE_ADDR(x)			"+m" (*(volatile char *) (x))
 
-#define ADDR				BITOP_ADDR(addr)
+#define ADDR				RLONG_ADDR(addr)
 
 /*
  * We do the locked ops that don't return the old value as
  * a mask operation on a byte.
  */
-#define IS_IMMEDIATE(nr)		(__builtin_constant_p(nr))
-#define CONST_MASK_ADDR(nr, addr)	BITOP_ADDR((void *)(addr) + ((nr)>>3))
+#define CONST_MASK_ADDR(nr, addr)	WBYTE_ADDR((void *)(addr) + ((nr)>>3))
 #define CONST_MASK(nr)			(1 << ((nr) & 7))
 
-/**
- * set_bit - Atomically set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * This function is atomic and may not be reordered.  See __set_bit()
- * if you do not require the atomic guarantees.
- *
- * Note: there are no guarantees that this function will not be reordered
- * on non x86 architectures, so if you are writing portable code,
- * make sure not to rely on its reordering guarantees.
- *
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
 static __always_inline void
-set_bit(long nr, volatile unsigned long *addr)
+arch_set_bit(long nr, volatile unsigned long *addr)
 {
-	if (IS_IMMEDIATE(nr)) {
-		asm volatile(LOCK_PREFIX "orb %1,%0"
+	if (__builtin_constant_p(nr)) {
+		asm_inline volatile(LOCK_PREFIX "orb %b1,%0"
 			: CONST_MASK_ADDR(nr, addr)
-			: "iq" ((u8)CONST_MASK(nr))
+			: "iq" (CONST_MASK(nr))
 			: "memory");
 	} else {
-		asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
-			: BITOP_ADDR(addr) : "Ir" (nr) : "memory");
+		asm_inline volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
+			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
 	}
 }
 
-/**
- * __set_bit - Set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike set_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch___set_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");
+	asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
 }
 
-/**
- * clear_bit - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and may not be reordered.  However, it does
- * not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic()
- * in order to ensure changes are visible on other processors.
- */
 static __always_inline void
-clear_bit(long nr, volatile unsigned long *addr)
+arch_clear_bit(long nr, volatile unsigned long *addr)
 {
-	if (IS_IMMEDIATE(nr)) {
-		asm volatile(LOCK_PREFIX "andb %1,%0"
+	if (__builtin_constant_p(nr)) {
+		asm_inline volatile(LOCK_PREFIX "andb %b1,%0"
 			: CONST_MASK_ADDR(nr, addr)
-			: "iq" ((u8)~CONST_MASK(nr)));
+			: "iq" (~CONST_MASK(nr)));
 	} else {
-		asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
-			: BITOP_ADDR(addr)
-			: "Ir" (nr));
+		asm_inline volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
+			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
 	}
 }
 
-/*
- * clear_bit_unlock - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and implies release semantics before the memory
- * operation. It can be used for an unlock.
- */
-static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch_clear_bit_unlock(long nr, volatile unsigned long *addr)
 {
 	barrier();
-	clear_bit(nr, addr);
+	arch_clear_bit(nr, addr);
 }
 
-static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch___clear_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));
+	asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
 }
 
-static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
+static __always_inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
+		volatile unsigned long *addr)
 {
 	bool negative;
-	asm volatile(LOCK_PREFIX "andb %2,%1"
-		CC_SET(s)
-		: CC_OUT(s) (negative), ADDR
-		: "ir" ((char) ~(1 << nr)) : "memory");
+	asm_inline volatile(LOCK_PREFIX "xorb %2,%1"
+		: "=@ccs" (negative), WBYTE_ADDR(addr)
+		: "iq" ((char)mask) : "memory");
 	return negative;
 }
+#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte
 
-// Let everybody know we have it
-#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte
-
-/*
- * __clear_bit_unlock - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * __clear_bit() is non-atomic and implies release semantics before the memory
- * operation. It can be used for an unlock if no other CPUs can concurrently
- * modify other bits in the word.
- *
- * No memory barrier is required here, because x86 cannot reorder stores past
- * older loads. Same principle as spin_unlock.
- */
-static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch___clear_bit_unlock(long nr, volatile unsigned long *addr)
 {
-	barrier();
-	__clear_bit(nr, addr);
+	arch___clear_bit(nr, addr);
 }
 
-/**
- * __change_bit - Toggle a bit in memory
- * @nr: the bit to change
- * @addr: the address to start counting from
- *
- * Unlike change_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch___change_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));
+	asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
 }
 
-/**
- * change_bit - Toggle a bit in memory
- * @nr: Bit to change
- * @addr: Address to start counting from
- *
- * change_bit() is atomic and may not be reordered.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static __always_inline void change_bit(long nr, volatile unsigned long *addr)
+static __always_inline void
+arch_change_bit(long nr, volatile unsigned long *addr)
 {
-	if (IS_IMMEDIATE(nr)) {
-		asm volatile(LOCK_PREFIX "xorb %1,%0"
+	if (__builtin_constant_p(nr)) {
+		asm_inline volatile(LOCK_PREFIX "xorb %b1,%0"
 			: CONST_MASK_ADDR(nr, addr)
-			: "iq" ((u8)CONST_MASK(nr)));
+			: "iq" (CONST_MASK(nr)));
 	} else {
-		asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
-			: BITOP_ADDR(addr)
-			: "Ir" (nr));
+		asm_inline volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
+			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
 	}
 }
 
-/**
- * test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch_test_and_set_bit(long nr, volatile unsigned long *addr)
 {
 	return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, c, "Ir", nr);
 }
 
-/**
- * test_and_set_bit_lock - Set a bit and return its old value for lock
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This is the same as test_and_set_bit on x86.
- */
 static __always_inline bool
-test_and_set_bit_lock(long nr, volatile unsigned long *addr)
+arch_test_and_set_bit_lock(long nr, volatile unsigned long *addr)
 {
-	return test_and_set_bit(nr, addr);
+	return arch_test_and_set_bit(nr, addr);
 }
 
-/**
- * __test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
 {
 	bool oldbit;
 
 	asm(__ASM_SIZE(bts) " %2,%1"
-	    CC_SET(c)
-	    : CC_OUT(c) (oldbit), ADDR
-	    : "Ir" (nr));
+	    : "=@ccc" (oldbit)
+	    : ADDR, "Ir" (nr) : "memory");
 	return oldbit;
 }
 
-/**
- * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch_test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
 	return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, c, "Ir", nr);
 }
 
-/**
- * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- *
+/*
  * Note: the operation is performed atomically with respect to
  * the local CPU, but not other CPUs. Portable code should not
  * rely on this behaviour.
@@ -282,39 +167,31 @@ static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *
  * accessed from a hypervisor on the same CPU if running in a VM: don't change
  * this without also updating arch/x86/kernel/kvm.c
  */
-static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
 {
 	bool oldbit;
 
 	asm volatile(__ASM_SIZE(btr) " %2,%1"
-		     CC_SET(c)
-		     : CC_OUT(c) (oldbit), ADDR
-		     : "Ir" (nr));
+		     : "=@ccc" (oldbit)
+		     : ADDR, "Ir" (nr) : "memory");
 	return oldbit;
 }
 
-/* WARNING: non atomic and it can be reordered! */
-static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
 {
 	bool oldbit;
 
 	asm volatile(__ASM_SIZE(btc) " %2,%1"
-		     CC_SET(c)
-		     : CC_OUT(c) (oldbit), ADDR
-		     : "Ir" (nr) : "memory");
+		     : "=@ccc" (oldbit)
+		     : ADDR, "Ir" (nr) : "memory");
 
 	return oldbit;
 }
 
-/**
- * test_and_change_bit - Change a bit and return its old value
- * @nr: Bit to change
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool
+arch_test_and_change_bit(long nr, volatile unsigned long *addr)
 {
 	return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr);
 }
@@ -325,31 +202,51 @@ static __always_inline bool constant_test_bit(long nr, const volatile unsigned l
 		(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
 }
 
+static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr)
+{
+	bool oldbit;
+
+	asm volatile("testb %2,%1"
+		     : "=@ccnz" (oldbit)
+		     : "m" (((unsigned char *)addr)[nr >> 3]),
+		       "i" (1 << (nr & 7))
+		     :"memory");
+
+	return oldbit;
+}
+
 static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr)
 {
 	bool oldbit;
 
 	asm volatile(__ASM_SIZE(bt) " %2,%1"
-		     CC_SET(c)
-		     : CC_OUT(c) (oldbit)
-		     : "m" (*(unsigned long *)addr), "Ir" (nr));
+		     : "=@ccc" (oldbit)
+		     : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
 
 	return oldbit;
 }
 
-#if 0 /* Fool kernel-doc since it doesn't do macros yet */
-/**
- * test_bit - Determine whether a bit is set
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static bool test_bit(int nr, const volatile unsigned long *addr);
-#endif
+static __always_inline bool
+arch_test_bit(unsigned long nr, const volatile unsigned long *addr)
+{
+	return __builtin_constant_p(nr) ? constant_test_bit(nr, addr) :
+					  variable_test_bit(nr, addr);
+}
 
-#define test_bit(nr, addr)			\
-	(__builtin_constant_p((nr))		\
-	 ? constant_test_bit((nr), (addr))	\
-	 : variable_test_bit((nr), (addr)))
+static __always_inline bool
+arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr)
+{
+	return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) :
+					  variable_test_bit(nr, addr);
+}
+
+static __always_inline __attribute_const__ unsigned long variable__ffs(unsigned long word)
+{
+	asm("tzcnt %1,%0"
+		: "=r" (word)
+		: ASM_INPUT_RM (word));
+	return word;
+}
 
 /**
  * __ffs - find first set bit in word
@@ -357,12 +254,14 @@ static bool test_bit(int nr, const volatile unsigned long *addr);
  *
  * Undefined if no bit exists, so code should check against 0 first.
  */
-static __always_inline unsigned long __ffs(unsigned long word)
+#define __ffs(word)				\
+	(__builtin_constant_p(word) ?		\
+	 (unsigned long)__builtin_ctzl(word) :	\
+	 variable__ffs(word))
+
+static __always_inline __attribute_const__ unsigned long variable_ffz(unsigned long word)
 {
-	asm("rep; bsf %1,%0"
-		: "=r" (word)
-		: "rm" (word));
-	return word;
+	return variable__ffs(~word);
 }
 
 /**
@@ -371,13 +270,10 @@ static __always_inline unsigned long __ffs(unsigned long word)
  *
  * Undefined if no zero exists, so code should check against ~0UL first.
  */
-static __always_inline unsigned long ffz(unsigned long word)
-{
-	asm("rep; bsf %1,%0"
-		: "=r" (word)
-		: "r" (~word));
-	return word;
-}
+#define ffz(word)				\
+	(__builtin_constant_p(word) ?		\
+	 (unsigned long)__builtin_ctzl(~word) :	\
+	 variable_ffz(word))
 
 /*
  * __fls: find last set bit in word
@@ -385,29 +281,21 @@ static __always_inline unsigned long ffz(unsigned long word)
  *
  * Undefined if no set bit exists, so code should check against 0 first.
  */
-static __always_inline unsigned long __fls(unsigned long word)
+static __always_inline __attribute_const__ unsigned long __fls(unsigned long word)
 {
+	if (__builtin_constant_p(word))
+		return BITS_PER_LONG - 1 - __builtin_clzl(word);
+
 	asm("bsr %1,%0"
 	    : "=r" (word)
-	    : "rm" (word));
+	    : ASM_INPUT_RM (word));
 	return word;
 }
 
 #undef ADDR
 
 #ifdef __KERNEL__
-/**
- * ffs - find first set bit in word
- * @x: the word to search
- *
- * This is defined the same way as the libc and compiler builtin ffs
- * routines, therefore differs in spirit from the other bitops.
- *
- * ffs(value) returns 0 if value is 0 or the position of the first
- * set bit if value is nonzero. The first (least significant) bit
- * is at position 1.
- */
-static __always_inline int ffs(int x)
+static __always_inline __attribute_const__ int variable_ffs(int x)
 {
 	int r;
 
@@ -423,7 +311,7 @@ static __always_inline int ffs(int x)
 	 */
 	asm("bsfl %1,%0"
 	    : "=r" (r)
-	    : "rm" (x), "0" (-1));
+	    : ASM_INPUT_RM (x), "0" (-1));
 #elif defined(CONFIG_X86_CMOV)
 	asm("bsfl %1,%0\n\t"
 	    "cmovzl %2,%0"
@@ -438,6 +326,19 @@ static __always_inline int ffs(int x)
 }
 
 /**
+ * ffs - find first set bit in word
+ * @x: the word to search
+ *
+ * This is defined the same way as the libc and compiler builtin ffs
+ * routines, therefore differs in spirit from the other bitops.
+ *
+ * ffs(value) returns 0 if value is 0 or the position of the first
+ * set bit if value is nonzero. The first (least significant) bit
+ * is at position 1.
+ */
+#define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x))
+
+/**
  * fls - find last set bit in word
  * @x: the word to search
  *
@@ -448,10 +349,13 @@ static __always_inline int ffs(int x)
  * set bit if value is nonzero. The last (most significant) bit is
  * at position 32.
  */
-static __always_inline int fls(unsigned int x)
+static __always_inline __attribute_const__ int fls(unsigned int x)
 {
 	int r;
 
+	if (__builtin_constant_p(x))
+		return x ? 32 - __builtin_clz(x) : 0;
+
 #ifdef CONFIG_X86_64
 	/*
 	 * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
@@ -464,7 +368,7 @@ static __always_inline int fls(unsigned int x)
 	 */
 	asm("bsrl %1,%0"
 	    : "=r" (r)
-	    : "rm" (x), "0" (-1));
+	    : ASM_INPUT_RM (x), "0" (-1));
 #elif defined(CONFIG_X86_CMOV)
 	asm("bsrl %1,%0\n\t"
 	    "cmovzl %2,%0"
@@ -490,9 +394,12 @@ static __always_inline int fls(unsigned int x)
  * at position 64.
  */
 #ifdef CONFIG_X86_64
-static __always_inline int fls64(__u64 x)
+static __always_inline __attribute_const__ int fls64(__u64 x)
 {
 	int bitpos = -1;
+
+	if (__builtin_constant_p(x))
+		return x ? 64 - __builtin_clzll(x) : 0;
 	/*
 	 * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
 	 * dest reg is undefined if x==0, but their CPU architect says its
@@ -500,21 +407,23 @@ static __always_inline int fls64(__u64 x)
 	 */
 	asm("bsrq %1,%q0"
 	    : "+r" (bitpos)
-	    : "rm" (x));
+	    : ASM_INPUT_RM (x));
 	return bitpos + 1;
 }
 #else
 #include <asm-generic/bitops/fls64.h>
 #endif
 
-#include <asm-generic/bitops/find.h>
-
 #include <asm-generic/bitops/sched.h>
 
 #include <asm/arch_hweight.h>
 
 #include <asm-generic/bitops/const_hweight.h>
 
+#include <asm-generic/bitops/instrumented-atomic.h>
+#include <asm-generic/bitops/instrumented-non-atomic.h>
+#include <asm-generic/bitops/instrumented-lock.h>
+
 #include <asm-generic/bitops/le.h>
 
 #include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 680c320363db..f7b67cb73915 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -6,11 +6,6 @@
 #include <asm/pgtable_types.h>
 #include <uapi/asm/boot.h>
 
-/* Physical address where kernel should be loaded. */
-#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
-				+ (CONFIG_PHYSICAL_ALIGN - 1)) \
-				& ~(CONFIG_PHYSICAL_ALIGN - 1))
-
 /* Minimum kernel alignment, as a power of two */
 #ifdef CONFIG_X86_64
 # define MIN_KERNEL_ALIGN_LG2	PMD_SHIFT
@@ -24,35 +19,82 @@
 # error "Invalid value for CONFIG_PHYSICAL_ALIGN"
 #endif
 
-#ifdef CONFIG_KERNEL_BZIP2
+#if defined(CONFIG_KERNEL_BZIP2)
 # define BOOT_HEAP_SIZE		0x400000
-#else /* !CONFIG_KERNEL_BZIP2 */
+#elif defined(CONFIG_KERNEL_ZSTD)
+/*
+ * Zstd needs to allocate the ZSTD_DCtx in order to decompress the kernel.
+ * The ZSTD_DCtx is ~160KB, so set the heap size to 192KB because it is a
+ * round number and to allow some slack.
+ */
+# define BOOT_HEAP_SIZE		 0x30000
+#else
 # define BOOT_HEAP_SIZE		 0x10000
 #endif
 
 #ifdef CONFIG_X86_64
 # define BOOT_STACK_SIZE	0x4000
 
+/*
+ * Used by decompressor's startup_32() to allocate page tables for identity
+ * mapping of the 4G of RAM in 4-level paging mode:
+ * - 1 level4 table;
+ * - 1 level3 table;
+ * - 4 level2 table that maps everything with 2M pages;
+ *
+ * The additional level5 table needed for 5-level paging is allocated from
+ * trampoline_32bit memory.
+ */
 # define BOOT_INIT_PGT_SIZE	(6*4096)
-# ifdef CONFIG_RANDOMIZE_BASE
+
 /*
- * Assuming all cross the 512GB boundary:
- * 1 page for level4
- * (2+2)*4 pages for kernel, param, cmd_line, and randomized kernel
- * 2 pages for first 2M (video RAM: CONFIG_X86_VERBOSE_BOOTUP).
- * Total is 19 pages.
+ * Total number of page tables kernel_add_identity_map() can allocate,
+ * including page tables consumed by startup_32().
+ *
+ * Worst-case scenario:
+ *  - 5-level paging needs 1 level5 table;
+ *  - KASLR needs to map kernel, boot_params, cmdline and randomized kernel,
+ *    assuming all of them cross 256T boundary:
+ *    + 4*2 level4 table;
+ *    + 4*2 level3 table;
+ *    + 4*2 level2 table;
+ *  - X86_VERBOSE_BOOTUP needs to map the first 2M (video RAM):
+ *    + 1 level4 table;
+ *    + 1 level3 table;
+ *    + 1 level2 table;
+ * Total: 28 tables
+ *
+ * Add 4 spare table in case decompressor touches anything beyond what is
+ * accounted above. Warn if it happens.
  */
-#  ifdef CONFIG_X86_VERBOSE_BOOTUP
-#   define BOOT_PGT_SIZE	(19*4096)
-#  else /* !CONFIG_X86_VERBOSE_BOOTUP */
-#   define BOOT_PGT_SIZE	(17*4096)
-#  endif
-# else /* !CONFIG_RANDOMIZE_BASE */
-#  define BOOT_PGT_SIZE		BOOT_INIT_PGT_SIZE
-# endif
+# define BOOT_PGT_SIZE_WARN	(28*4096)
+# define BOOT_PGT_SIZE		(32*4096)
 
 #else /* !CONFIG_X86_64 */
 # define BOOT_STACK_SIZE	0x1000
 #endif
 
+#define TRAMPOLINE_32BIT_SIZE		(2 * PAGE_SIZE)
+
+#define TRAMPOLINE_32BIT_CODE_OFFSET	PAGE_SIZE
+#define TRAMPOLINE_32BIT_CODE_SIZE	0xA0
+
+#ifndef __ASSEMBLER__
+extern unsigned int output_len;
+extern const unsigned long kernel_text_size;
+extern const unsigned long kernel_inittext_offset;
+extern const unsigned long kernel_inittext_size;
+extern const unsigned long kernel_total_size;
+
+unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
+				void (*error)(char *x));
+
+extern struct boot_params *boot_params_ptr;
+extern unsigned long *trampoline_32bit;
+extern const u16 trampoline_ljmp_imm_offset;
+
+void trampoline_32bit_src(void *trampoline, bool enable_5lvl);
+
+#endif
+
 #endif /* _ASM_X86_BOOT_H */
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index f6f6ef436599..d90ae472fb76 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -18,13 +18,27 @@
  * Note: efi_info is commonly left uninitialized, but that field has a
  * private magic, so it is better to leave it unchanged.
  */
+
+#define sizeof_mbr(type, member) ({ sizeof(((type *)0)->member); })
+
+#define BOOT_PARAM_PRESERVE(struct_member)				\
+	{								\
+		.start = offsetof(struct boot_params, struct_member),	\
+		.len   = sizeof_mbr(struct boot_params, struct_member),	\
+	}
+
+struct boot_params_to_save {
+	unsigned int start;
+	unsigned int len;
+};
+
 static void sanitize_boot_params(struct boot_params *boot_params)
 {
 	/* 
 	 * IMPORTANT NOTE TO BOOTLOADER AUTHORS: do not simply clear
 	 * this field.  The purpose of this field is to guarantee
 	 * compliance with the x86 boot spec located in
-	 * Documentation/x86/boot.txt .  That spec says that the
+	 * Documentation/arch/x86/boot.rst .  That spec says that the
 	 * *whole* structure should be cleared, after which only the
 	 * portion defined by struct setup_header (boot_params->hdr)
 	 * should be copied in.
@@ -35,21 +49,42 @@ static void sanitize_boot_params(struct boot_params *boot_params)
 	 * problems again.
 	 */
 	if (boot_params->sentinel) {
-		/* fields in boot_params are left uninitialized, clear them */
-		boot_params->acpi_rsdp_addr = 0;
-		memset(&boot_params->ext_ramdisk_image, 0,
-		       (char *)&boot_params->efi_info -
-			(char *)&boot_params->ext_ramdisk_image);
-		memset(&boot_params->kbd_status, 0,
-		       (char *)&boot_params->hdr -
-		       (char *)&boot_params->kbd_status);
-		memset(&boot_params->_pad7[0], 0,
-		       (char *)&boot_params->edd_mbr_sig_buffer[0] -
-			(char *)&boot_params->_pad7[0]);
-		memset(&boot_params->_pad8[0], 0,
-		       (char *)&boot_params->eddbuf[0] -
-			(char *)&boot_params->_pad8[0]);
-		memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9));
+		static struct boot_params scratch;
+		char *bp_base = (char *)boot_params;
+		char *save_base = (char *)&scratch;
+		int i;
+
+		const struct boot_params_to_save to_save[] = {
+			BOOT_PARAM_PRESERVE(screen_info),
+			BOOT_PARAM_PRESERVE(apm_bios_info),
+			BOOT_PARAM_PRESERVE(tboot_addr),
+			BOOT_PARAM_PRESERVE(ist_info),
+			BOOT_PARAM_PRESERVE(hd0_info),
+			BOOT_PARAM_PRESERVE(hd1_info),
+			BOOT_PARAM_PRESERVE(sys_desc_table),
+			BOOT_PARAM_PRESERVE(olpc_ofw_header),
+			BOOT_PARAM_PRESERVE(efi_info),
+			BOOT_PARAM_PRESERVE(alt_mem_k),
+			BOOT_PARAM_PRESERVE(scratch),
+			BOOT_PARAM_PRESERVE(e820_entries),
+			BOOT_PARAM_PRESERVE(eddbuf_entries),
+			BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries),
+			BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer),
+			BOOT_PARAM_PRESERVE(secure_boot),
+			BOOT_PARAM_PRESERVE(hdr),
+			BOOT_PARAM_PRESERVE(e820_table),
+			BOOT_PARAM_PRESERVE(eddbuf),
+			BOOT_PARAM_PRESERVE(cc_blob_address),
+		};
+
+		memset(&scratch, 0, sizeof(scratch));
+
+		for (i = 0; i < ARRAY_SIZE(to_save); i++) {
+			memcpy(save_base + to_save[i].start,
+			       bp_base + to_save[i].start, to_save[i].len);
+		}
+
+		memcpy(boot_params, save_base, sizeof(*boot_params));
 	}
 }
 
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 6804d6642767..d561a8443c13 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -3,83 +3,193 @@
 #define _ASM_X86_BUG_H
 
 #include <linux/stringify.h>
+#include <linux/instrumentation.h>
+#include <linux/objtool.h>
+#include <asm/asm.h>
+
+#ifndef __ASSEMBLY__
+struct bug_entry;
+extern void __WARN_trap(struct bug_entry *bug, ...);
+#endif
 
 /*
  * Despite that some emulators terminate on UD2, we use it for WARN().
- *
- * Since various instruction decoders/specs disagree on the encoding of
- * UD0/UD1.
  */
-
-#define ASM_UD0		".byte 0x0f, 0xff" /* + ModRM (for Intel) */
-#define ASM_UD1		".byte 0x0f, 0xb9" /* + ModRM */
-#define ASM_UD2		".byte 0x0f, 0x0b"
-
-#define INSN_UD0	0xff0f
+#define ASM_UD2		_ASM_BYTES(0x0f, 0x0b)
 #define INSN_UD2	0x0b0f
-
 #define LEN_UD2		2
 
+#define ASM_UDB		_ASM_BYTES(0xd6)
+#define INSN_UDB	0xd6
+#define LEN_UDB		1
+
+/*
+ * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit.
+ */
+#define INSN_ASOP		0x67
+#define INSN_LOCK		0xf0
+#define OPCODE_ESCAPE		0x0f
+#define SECOND_BYTE_OPCODE_UD1	0xb9
+#define SECOND_BYTE_OPCODE_UD2	0x0b
+
+#define BUG_NONE		0xffff
+#define BUG_UD2			0xfffe
+#define BUG_UD1			0xfffd
+#define BUG_UD1_UBSAN		0xfffc
+#define BUG_UD1_WARN		0xfffb
+#define BUG_UDB			0xffd6
+#define BUG_LOCK		0xfff0
+
 #ifdef CONFIG_GENERIC_BUG
 
-#ifdef CONFIG_X86_32
-# define __BUG_REL(val)	".long " __stringify(val)
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+#define __BUG_ENTRY_VERBOSE(file, line)					\
+	"\t.long " file " - .\t# bug_entry::file\n"			\
+	"\t.word " line     "\t# bug_entry::line\n"
 #else
-# define __BUG_REL(val)	".long " __stringify(val) " - 2b"
+#define __BUG_ENTRY_VERBOSE(file, line)
 #endif
 
-#ifdef CONFIG_DEBUG_BUGVERBOSE
+#if defined(CONFIG_X86_64) || defined(CONFIG_DEBUG_BUGVERBOSE_DETAILED)
+#define HAVE_ARCH_BUG_FORMAT
+#define __BUG_ENTRY_FORMAT(format)					\
+	"\t.long " format " - .\t# bug_entry::format\n"
+#else
+#define __BUG_ENTRY_FORMAT(format)
+#endif
 
-#define _BUG_FLAGS(ins, flags)						\
-do {									\
-	asm volatile("1:\t" ins "\n"					\
-		     ".pushsection __bug_table,\"aw\"\n"		\
-		     "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"	\
-		     "\t"  __BUG_REL(%c0) "\t# bug_entry::file\n"	\
-		     "\t.word %c1"        "\t# bug_entry::line\n"	\
-		     "\t.word %c2"        "\t# bug_entry::flags\n"	\
-		     "\t.org 2b+%c3\n"					\
-		     ".popsection"					\
-		     : : "i" (__FILE__), "i" (__LINE__),		\
-			 "i" (flags),					\
-			 "i" (sizeof(struct bug_entry)));		\
-} while (0)
+#ifdef CONFIG_X86_64
+#define HAVE_ARCH_BUG_FORMAT_ARGS
+#endif
 
-#else /* !CONFIG_DEBUG_BUGVERBOSE */
+#define __BUG_ENTRY(format, file, line, flags)				\
+	"\t.long 1b - ."	"\t# bug_entry::bug_addr\n"		\
+	__BUG_ENTRY_FORMAT(format)					\
+	__BUG_ENTRY_VERBOSE(file, line)					\
+	"\t.word " flags	"\t# bug_entry::flags\n"
+
+#define _BUG_FLAGS_ASM(format, file, line, flags, size, extra)		\
+	".pushsection __bug_table,\"aw\"\n\t"				\
+	ANNOTATE_DATA_SPECIAL "\n\t"					\
+	"2:\n\t"							\
+	__BUG_ENTRY(format, file, line, flags)				\
+	"\t.org 2b + " size "\n"					\
+	".popsection\n"							\
+	extra
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE_DETAILED
+#define WARN_CONDITION_STR(cond_str) cond_str
+#else
+#define WARN_CONDITION_STR(cond_str) ""
+#endif
 
-#define _BUG_FLAGS(ins, flags)						\
+#define _BUG_FLAGS(cond_str, ins, flags, extra)				\
 do {									\
-	asm volatile("1:\t" ins "\n"					\
-		     ".pushsection __bug_table,\"aw\"\n"		\
-		     "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"	\
-		     "\t.word %c0"        "\t# bug_entry::flags\n"	\
-		     "\t.org 2b+%c1\n"					\
-		     ".popsection"					\
-		     : : "i" (flags),					\
-			 "i" (sizeof(struct bug_entry)));		\
+	asm_inline volatile("1:\t" ins "\n"				\
+			    _BUG_FLAGS_ASM("%c[fmt]", "%c[file]",	\
+					   "%c[line]", "%c[fl]",	\
+					   "%c[size]", extra)		\
+		   : : [fmt] "i" (WARN_CONDITION_STR(cond_str)),	\
+		       [file] "i" (__FILE__),				\
+		       [line] "i" (__LINE__),				\
+		       [fl] "i" (flags),				\
+		       [size] "i" (sizeof(struct bug_entry)));		\
 } while (0)
 
-#endif /* CONFIG_DEBUG_BUGVERBOSE */
+#define ARCH_WARN_ASM(file, line, flags, size)				\
+	".pushsection .rodata.str1.1, \"aMS\", @progbits, 1\n"		\
+	"99:\n"								\
+	"\t.string \"\"\n"						\
+	".popsection\n"							\
+	"1:\t " ASM_UD2 "\n"						\
+	_BUG_FLAGS_ASM("99b", file, line, flags, size, "")
 
 #else
 
-#define _BUG_FLAGS(ins, flags)  asm volatile(ins)
+#define _BUG_FLAGS(cond_str, ins, flags, extra)  asm volatile(ins)
 
 #endif /* CONFIG_GENERIC_BUG */
 
 #define HAVE_ARCH_BUG
 #define BUG()							\
 do {								\
-	_BUG_FLAGS(ASM_UD2, 0);					\
-	unreachable();						\
+	instrumentation_begin();				\
+	_BUG_FLAGS("", ASM_UD2, 0, "");				\
+	__builtin_unreachable();				\
 } while (0)
 
-#define __WARN_FLAGS(flags)					\
-do {								\
-	_BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags));		\
-	annotate_reachable();					\
+/*
+ * This instrumentation_begin() is strictly speaking incorrect; but it
+ * suppresses the complaints from WARN()s in noinstr code. If such a WARN()
+ * were to trigger, we'd rather wreck the machine in an attempt to get the
+ * message out than not know about it.
+ */
+
+#define ARCH_WARN_REACHABLE	ANNOTATE_REACHABLE(1b)
+
+#define __WARN_FLAGS(cond_str, flags)					\
+do {									\
+	auto __flags = BUGFLAG_WARNING|(flags);				\
+	instrumentation_begin();					\
+	_BUG_FLAGS(cond_str, ASM_UD2, __flags, ARCH_WARN_REACHABLE);	\
+	instrumentation_end();						\
+} while (0)
+
+#ifdef HAVE_ARCH_BUG_FORMAT_ARGS
+
+#ifndef __ASSEMBLY__
+#include <linux/static_call_types.h>
+DECLARE_STATIC_CALL(WARN_trap, __WARN_trap);
+
+struct pt_regs;
+struct sysv_va_list { /* from AMD64 System V ABI */
+	unsigned int gp_offset;
+	unsigned int fp_offset;
+	void *overflow_arg_area;
+	void *reg_save_area;
+};
+struct arch_va_list {
+	unsigned long regs[6];
+	struct sysv_va_list args;
+};
+extern void *__warn_args(struct arch_va_list *args, struct pt_regs *regs);
+#endif /* __ASSEMBLY__ */
+
+#define __WARN_bug_entry(flags, format) ({				\
+	struct bug_entry *bug;						\
+	asm_inline volatile("lea (2f)(%%rip), %[addr]\n1:\n"		\
+			    _BUG_FLAGS_ASM("%c[fmt]", "%c[file]",	\
+					   "%c[line]", "%c[fl]",	\
+					   "%c[size]", "")		\
+		   : [addr] "=r" (bug)					\
+		   : [fmt] "i" (format),				\
+		     [file] "i" (__FILE__),				\
+		     [line] "i" (__LINE__),				\
+		     [fl] "i" (flags),					\
+		     [size] "i" (sizeof(struct bug_entry)));		\
+	bug; })
+
+#define __WARN_print_arg(flags, format, arg...)				\
+do {									\
+	int __flags = (flags) | BUGFLAG_WARNING | BUGFLAG_ARGS ;	\
+	static_call_mod(WARN_trap)(__WARN_bug_entry(__flags, format), ## arg); \
+	asm (""); /* inhibit tail-call optimization */			\
 } while (0)
 
+#define __WARN_printf(taint, fmt, arg...) \
+	__WARN_print_arg(BUGFLAG_TAINT(taint), fmt, ## arg)
+
+#define WARN_ONCE(cond, format, arg...) ({				\
+	int __ret_warn_on = !!(cond);					\
+	if (unlikely(__ret_warn_on)) {					\
+		__WARN_print_arg(BUGFLAG_ONCE|BUGFLAG_TAINT(TAINT_WARN),\
+				format, ## arg);			\
+	}								\
+	__ret_warn_on;							\
+})
+
+#endif /* HAVE_ARCH_BUG_FORMAT_ARGS */
+
 #include <asm-generic/bug.h>
 
 #endif /* _ASM_X86_BUG_H */
diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h
index 542509b53e0f..f25ca2d709d4 100644
--- a/arch/x86/include/asm/bugs.h
+++ b/arch/x86/include/asm/bugs.h
@@ -4,18 +4,12 @@
 
 #include <asm/processor.h>
 
-extern void check_bugs(void);
-
-#if defined(CONFIG_CPU_SUP_INTEL)
-void check_mpx_erratum(struct cpuinfo_x86 *c);
-#else
-static inline void check_mpx_erratum(struct cpuinfo_x86 *c) {}
-#endif
-
 #if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32)
 int ppro_with_ram_bug(void);
 #else
 static inline int ppro_with_ram_bug(void) { return 0; }
 #endif
 
+extern void cpu_bugs_smt_update(void);
+
 #endif /* _ASM_X86_BUGS_H */
diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h
index abe08690a887..69404eae9983 100644
--- a/arch/x86/include/asm/cache.h
+++ b/arch/x86/include/asm/cache.h
@@ -8,7 +8,7 @@
 #define L1_CACHE_SHIFT	(CONFIG_X86_L1_CACHE_SHIFT)
 #define L1_CACHE_BYTES	(1 << L1_CACHE_SHIFT)
 
-#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+#define __read_mostly __section(".data..read_mostly")
 
 #define INTERNODE_CACHE_SHIFT CONFIG_X86_INTERNODE_CACHE_SHIFT
 #define INTERNODE_CACHE_BYTES (1 << INTERNODE_CACHE_SHIFT)
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 63feaf2a5f93..b192d917a6d0 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_X86_CACHEFLUSH_H
 #define _ASM_X86_CACHEFLUSH_H
 
+#include <linux/mm.h>
+
 /* Caches aren't brain-dead on the intel. */
 #include <asm-generic/cacheflush.h>
 #include <asm/special_insns.h>
diff --git a/arch/x86/include/asm/cacheinfo.h b/arch/x86/include/asm/cacheinfo.h
index 86b63c7feab7..5aa061199866 100644
--- a/arch/x86/include/asm/cacheinfo.h
+++ b/arch/x86/include/asm/cacheinfo.h
@@ -2,7 +2,17 @@
 #ifndef _ASM_X86_CACHEINFO_H
 #define _ASM_X86_CACHEINFO_H
 
-void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id);
-void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id);
+/* Kernel controls MTRR and/or PAT MSRs. */
+extern unsigned int memory_caching_control;
+#define CACHE_MTRR 0x01
+#define CACHE_PAT  0x02
+
+void cache_disable(void);
+void cache_enable(void);
+void set_cache_aps_delayed_init(bool val);
+bool get_cache_aps_delayed_init(void);
+void cache_bp_init(void);
+void cache_bp_restore(void);
+void cache_aps_init(void);
 
 #endif /* _ASM_X86_CACHEINFO_H */
diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h
deleted file mode 100644
index a8303ebe089f..000000000000
--- a/arch/x86/include/asm/calgary.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Derived from include/asm-powerpc/iommu.h
- *
- * Copyright IBM Corporation, 2006-2007
- *
- * Author: Jon Mason <jdmason@us.ibm.com>
- * Author: Muli Ben-Yehuda <muli@il.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#ifndef _ASM_X86_CALGARY_H
-#define _ASM_X86_CALGARY_H
-
-#include <linux/spinlock.h>
-#include <linux/device.h>
-#include <linux/dma-mapping.h>
-#include <linux/timer.h>
-#include <asm/types.h>
-
-struct iommu_table {
-	const struct cal_chipset_ops *chip_ops; /* chipset specific funcs */
-	unsigned long  it_base;      /* mapped address of tce table */
-	unsigned long  it_hint;      /* Hint for next alloc */
-	unsigned long *it_map;       /* A simple allocation bitmap for now */
-	void __iomem  *bbar;         /* Bridge BAR */
-	u64	       tar_val;      /* Table Address Register */
-	struct timer_list watchdog_timer;
-	spinlock_t     it_lock;      /* Protects it_map */
-	unsigned int   it_size;      /* Size of iommu table in entries */
-	unsigned char  it_busno;     /* Bus number this table belongs to */
-};
-
-struct cal_chipset_ops {
-	void (*handle_quirks)(struct iommu_table *tbl, struct pci_dev *dev);
-	void (*tce_cache_blast)(struct iommu_table *tbl);
-	void (*dump_error_regs)(struct iommu_table *tbl);
-};
-
-#define TCE_TABLE_SIZE_UNSPECIFIED	~0
-#define TCE_TABLE_SIZE_64K		0
-#define TCE_TABLE_SIZE_128K		1
-#define TCE_TABLE_SIZE_256K		2
-#define TCE_TABLE_SIZE_512K		3
-#define TCE_TABLE_SIZE_1M		4
-#define TCE_TABLE_SIZE_2M		5
-#define TCE_TABLE_SIZE_4M		6
-#define TCE_TABLE_SIZE_8M		7
-
-extern int use_calgary;
-
-#ifdef CONFIG_CALGARY_IOMMU
-extern int detect_calgary(void);
-#else
-static inline int detect_calgary(void) { return -ENODEV; }
-#endif
-
-#endif /* _ASM_X86_CALGARY_H */
diff --git a/arch/x86/include/asm/ce4100.h b/arch/x86/include/asm/ce4100.h
index 2930f560d7f3..e1f965bb1e31 100644
--- a/arch/x86/include/asm/ce4100.h
+++ b/arch/x86/include/asm/ce4100.h
@@ -4,4 +4,10 @@
 
 int ce4100_pci_init(void);
 
+#ifdef CONFIG_SERIAL_8250
+void __init sdv_serial_fixup(void);
+#else
+static inline void sdv_serial_fixup(void) {};
+#endif
+
 #endif
diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h
new file mode 100644
index 000000000000..c40b9ebc1fb4
--- /dev/null
+++ b/arch/x86/include/asm/cfi.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_CFI_H
+#define _ASM_X86_CFI_H
+
+/*
+ * Clang Control Flow Integrity (CFI) support.
+ *
+ * Copyright (C) 2022 Google LLC
+ */
+#include <linux/bug.h>
+#include <asm/ibt.h>
+
+/*
+ * An overview of the various calling conventions...
+ *
+ * Traditional:
+ *
+ * foo:
+ *   ... code here ...
+ *   ret
+ *
+ * direct caller:
+ *   call foo
+ *
+ * indirect caller:
+ *   lea foo(%rip), %r11
+ *   ...
+ *   call *%r11
+ *
+ *
+ * IBT:
+ *
+ * foo:
+ *   endbr64
+ *   ... code here ...
+ *   ret
+ *
+ * direct caller:
+ *   call foo / call foo+4
+ *
+ * indirect caller:
+ *   lea foo(%rip), %r11
+ *   ...
+ *   call *%r11
+ *
+ *
+ * kCFI:
+ *
+ * __cfi_foo:
+ *   movl $0x12345678, %eax
+ *				# 11 nops when CONFIG_CALL_PADDING
+ * foo:
+ *   endbr64			# when IBT
+ *   ... code here ...
+ *   ret
+ *
+ * direct call:
+ *   call foo			# / call foo+4 when IBT
+ *
+ * indirect call:
+ *   lea foo(%rip), %r11
+ *   ...
+ *   movl $(-0x12345678), %r10d
+ *   addl -4(%r11), %r10d	# -15 when CONFIG_CALL_PADDING
+ *   jz   1f
+ *   ud2
+ * 1:call *%r11
+ *
+ *
+ * FineIBT (builds as kCFI + CALL_PADDING + IBT + RETPOLINE and runtime patches into):
+ *
+ * __cfi_foo:
+ *   endbr64
+ *   subl 0x12345678, %eax
+ *   jne.32,pn foo+3
+ * foo:
+ *   nopl -42(%rax)		# was endbr64
+ *   ... code here ...
+ *   ret
+ *
+ * direct caller:
+ *   call foo / call foo+4
+ *
+ * indirect caller:
+ *   lea foo(%rip), %r11
+ *   ...
+ *   movl $0x12345678, %eax
+ *   lea  -0x10(%r11), %r11
+ *   nop5
+ *   call *%r11
+ *
+ */
+enum cfi_mode {
+	CFI_AUTO,	/* FineIBT if hardware has IBT, otherwise kCFI */
+	CFI_OFF,	/* Taditional / IBT depending on .config */
+	CFI_KCFI,	/* Optionally CALL_PADDING, IBT, RETPOLINE */
+	CFI_FINEIBT,	/* see arch/x86/kernel/alternative.c */
+};
+
+extern enum cfi_mode cfi_mode;
+
+#ifdef CONFIG_FINEIBT_BHI
+extern bool cfi_bhi;
+#else
+#define cfi_bhi (0)
+#endif
+
+typedef u8 bhi_thunk[32];
+extern bhi_thunk __bhi_args[];
+extern bhi_thunk __bhi_args_end[];
+
+struct pt_regs;
+
+#ifdef CONFIG_CFI
+enum bug_trap_type handle_cfi_failure(struct pt_regs *regs);
+#define __bpfcall
+
+static inline int cfi_get_offset(void)
+{
+	switch (cfi_mode) {
+	case CFI_FINEIBT:
+		return 16;
+	case CFI_KCFI:
+		if (IS_ENABLED(CONFIG_CALL_PADDING))
+			return 16;
+		return 5;
+	default:
+		return 0;
+	}
+}
+#define cfi_get_offset cfi_get_offset
+
+extern u32 cfi_get_func_hash(void *func);
+#define cfi_get_func_hash cfi_get_func_hash
+
+extern int cfi_get_func_arity(void *func);
+
+#ifdef CONFIG_FINEIBT
+extern bool decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type);
+#else
+static inline bool
+decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type)
+{
+	return false;
+}
+
+#endif
+
+#else
+static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
+{
+	return BUG_TRAP_TYPE_NONE;
+}
+static inline int cfi_get_func_arity(void *func)
+{
+	return 0;
+}
+#endif /* CONFIG_CFI */
+
+#if HAS_KERNEL_IBT == 1
+#define CFI_NOSEAL(x)	asm(IBT_NOSEAL(__stringify(x)))
+#endif
+
+#endif /* _ASM_X86_CFI_H */
diff --git a/arch/x86/include/asm/checksum.h b/arch/x86/include/asm/checksum.h
index d79d1e622dcf..6df6ece8a28e 100644
--- a/arch/x86/include/asm/checksum.h
+++ b/arch/x86/include/asm/checksum.h
@@ -1,6 +1,13 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_X86_32
-# include <asm/checksum_32.h>
+#ifdef CONFIG_GENERIC_CSUM
+# include <asm-generic/checksum.h>
 #else
-# include <asm/checksum_64.h>
+# define  _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1
+# define HAVE_CSUM_COPY_USER
+# define _HAVE_ARCH_CSUM_AND_COPY
+# ifdef CONFIG_X86_32
+#  include <asm/checksum_32.h>
+# else
+#  include <asm/checksum_64.h>
+# endif
 #endif
diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h
index f57b94e02c57..17da95387997 100644
--- a/arch/x86/include/asm/checksum_32.h
+++ b/arch/x86/include/asm/checksum_32.h
@@ -27,9 +27,7 @@ asmlinkage __wsum csum_partial(const void *buff, int len, __wsum sum);
  * better 64-bit) boundary
  */
 
-asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst,
-					    int len, __wsum sum,
-					    int *src_err_ptr, int *dst_err_ptr);
+asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst, int len);
 
 /*
  *	Note: when you get a NULL pointer exception here this means someone
@@ -38,24 +36,21 @@ asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst,
  *	If you use these functions directly please don't forget the
  *	access_ok().
  */
-static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst,
-					       int len, __wsum sum)
+static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len)
 {
-	return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL);
+	return csum_partial_copy_generic(src, dst, len);
 }
 
-static inline __wsum csum_partial_copy_from_user(const void __user *src,
-						 void *dst,
-						 int len, __wsum sum,
-						 int *err_ptr)
+static inline __wsum csum_and_copy_from_user(const void __user *src,
+					     void *dst, int len)
 {
 	__wsum ret;
 
 	might_sleep();
-	stac();
-	ret = csum_partial_copy_generic((__force void *)src, dst,
-					len, sum, err_ptr, NULL);
-	clac();
+	if (!user_access_begin(src, len))
+		return 0;
+	ret = csum_partial_copy_generic((__force void *)src, dst, len);
+	user_access_end();
 
 	return ret;
 }
@@ -173,27 +168,19 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 /*
  *	Copy and checksum to user
  */
-#define HAVE_CSUM_COPY_USER
 static inline __wsum csum_and_copy_to_user(const void *src,
 					   void __user *dst,
-					   int len, __wsum sum,
-					   int *err_ptr)
+					   int len)
 {
 	__wsum ret;
 
 	might_sleep();
-	if (access_ok(dst, len)) {
-		stac();
-		ret = csum_partial_copy_generic(src, (__force void *)dst,
-						len, sum, NULL, err_ptr);
-		clac();
-		return ret;
-	}
-
-	if (len)
-		*err_ptr = -EFAULT;
-
-	return (__force __wsum)-1; /* invalid checksum */
+	if (!user_access_begin(dst, len))
+		return 0;
+
+	ret = csum_partial_copy_generic(src, (__force void *)dst, len);
+	user_access_end();
+	return ret;
 }
 
 #endif /* _ASM_X86_CHECKSUM_32_H */
diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h
index 3ec6d3267cf9..4d4a47a3a8ab 100644
--- a/arch/x86/include/asm/checksum_64.h
+++ b/arch/x86/include/asm/checksum_64.h
@@ -9,7 +9,6 @@
  */
 
 #include <linux/compiler.h>
-#include <linux/uaccess.h>
 #include <asm/byteorder.h>
 
 /**
@@ -129,26 +128,12 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
  */
 extern __wsum csum_partial(const void *buff, int len, __wsum sum);
 
-#define  _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1
-#define HAVE_CSUM_COPY_USER 1
-
-
 /* Do not call this directly. Use the wrappers below */
-extern __visible __wsum csum_partial_copy_generic(const void *src, const void *dst,
-					int len, __wsum sum,
-					int *src_err_ptr, int *dst_err_ptr);
-
-
-extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst,
-					  int len, __wsum isum, int *errp);
-extern __wsum csum_partial_copy_to_user(const void *src, void __user *dst,
-					int len, __wsum isum, int *errp);
-extern __wsum csum_partial_copy_nocheck(const void *src, void *dst,
-					int len, __wsum sum);
+extern __visible __wsum csum_partial_copy_generic(const void *src, void *dst, int len);
 
-/* Old names. To be removed. */
-#define csum_and_copy_to_user csum_partial_copy_to_user
-#define csum_and_copy_from_user csum_partial_copy_from_user
+extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len);
+extern __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len);
+extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len);
 
 /**
  * ip_compute_csum - Compute an 16bit IP checksum.
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h
index dc4cfc888d6d..dc9dc7b3911a 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h
@@ -4,14 +4,18 @@
 #ifndef _ASM_X86_CLOCKSOURCE_H
 #define _ASM_X86_CLOCKSOURCE_H
 
-#define VCLOCK_NONE	0	/* No vDSO clock available.		*/
-#define VCLOCK_TSC	1	/* vDSO should use vread_tsc.		*/
-#define VCLOCK_PVCLOCK	2	/* vDSO should use vread_pvclock.	*/
-#define VCLOCK_HVCLOCK	3	/* vDSO should use vread_hvclock.	*/
-#define VCLOCK_MAX	3
+#include <asm/vdso/clocksource.h>
 
-struct arch_clocksource_data {
-	int vclock_mode;
-};
+extern unsigned int vclocks_used;
+
+static inline bool vclock_was_used(int vclock)
+{
+	return READ_ONCE(vclocks_used) & (1U << vclock);
+}
+
+static inline void vclocks_set_used(unsigned int which)
+{
+	WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << which));
+}
 
 #endif /* _ASM_X86_CLOCKSOURCE_H */
diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h
index 6faaf27e8899..6cbd9ae58b21 100644
--- a/arch/x86/include/asm/cmdline.h
+++ b/arch/x86/include/asm/cmdline.h
@@ -2,6 +2,10 @@
 #ifndef _ASM_X86_CMDLINE_H
 #define _ASM_X86_CMDLINE_H
 
+#include <asm/setup.h>
+
+extern char builtin_cmdline[COMMAND_LINE_SIZE];
+
 int cmdline_find_option_bool(const char *cmdline_ptr, const char *option);
 int cmdline_find_option(const char *cmdline_ptr, const char *option,
 			char *buffer, int bufsize);
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index a8bfac131256..a88b06f1c35e 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -22,7 +22,7 @@ extern void __add_wrong_size(void)
 /*
  * Constants for operation sizes. On 32-bit, the 64-bit size it set to
  * -1 because sizeof will never return -1, thereby making those switch
- * case statements guaranteeed dead code which the compiler will
+ * case statements guaranteed dead code which the compiler will
  * eliminate, and allowing the "missing symbol in the default case" to
  * indicate a usage error.
  */
@@ -44,22 +44,22 @@ extern void __add_wrong_size(void)
 	        __typeof__ (*(ptr)) __ret = (arg);			\
 		switch (sizeof(*(ptr))) {				\
 		case __X86_CASE_B:					\
-			asm volatile (lock #op "b %b0, %1\n"		\
+			asm_inline volatile (lock #op "b %b0, %1"	\
 				      : "+q" (__ret), "+m" (*(ptr))	\
 				      : : "memory", "cc");		\
 			break;						\
 		case __X86_CASE_W:					\
-			asm volatile (lock #op "w %w0, %1\n"		\
+			asm_inline volatile (lock #op "w %w0, %1"	\
 				      : "+r" (__ret), "+m" (*(ptr))	\
 				      : : "memory", "cc");		\
 			break;						\
 		case __X86_CASE_L:					\
-			asm volatile (lock #op "l %0, %1\n"		\
+			asm_inline volatile (lock #op "l %0, %1"	\
 				      : "+r" (__ret), "+m" (*(ptr))	\
 				      : : "memory", "cc");		\
 			break;						\
 		case __X86_CASE_Q:					\
-			asm volatile (lock #op "q %q0, %1\n"		\
+			asm_inline volatile (lock #op "q %q0, %1"	\
 				      : "+r" (__ret), "+m" (*(ptr))	\
 				      : : "memory", "cc");		\
 			break;						\
@@ -91,7 +91,7 @@ extern void __add_wrong_size(void)
 	case __X86_CASE_B:						\
 	{								\
 		volatile u8 *__ptr = (volatile u8 *)(ptr);		\
-		asm volatile(lock "cmpxchgb %2,%1"			\
+		asm_inline volatile(lock "cmpxchgb %2, %1"		\
 			     : "=a" (__ret), "+m" (*__ptr)		\
 			     : "q" (__new), "0" (__old)			\
 			     : "memory");				\
@@ -100,7 +100,7 @@ extern void __add_wrong_size(void)
 	case __X86_CASE_W:						\
 	{								\
 		volatile u16 *__ptr = (volatile u16 *)(ptr);		\
-		asm volatile(lock "cmpxchgw %2,%1"			\
+		asm_inline volatile(lock "cmpxchgw %2, %1"		\
 			     : "=a" (__ret), "+m" (*__ptr)		\
 			     : "r" (__new), "0" (__old)			\
 			     : "memory");				\
@@ -109,7 +109,7 @@ extern void __add_wrong_size(void)
 	case __X86_CASE_L:						\
 	{								\
 		volatile u32 *__ptr = (volatile u32 *)(ptr);		\
-		asm volatile(lock "cmpxchgl %2,%1"			\
+		asm_inline volatile(lock "cmpxchgl %2, %1"		\
 			     : "=a" (__ret), "+m" (*__ptr)		\
 			     : "r" (__new), "0" (__old)			\
 			     : "memory");				\
@@ -118,7 +118,7 @@ extern void __add_wrong_size(void)
 	case __X86_CASE_Q:						\
 	{								\
 		volatile u64 *__ptr = (volatile u64 *)(ptr);		\
-		asm volatile(lock "cmpxchgq %2,%1"			\
+		asm_inline volatile(lock "cmpxchgq %2, %1"		\
 			     : "=a" (__ret), "+m" (*__ptr)		\
 			     : "r" (__new), "0" (__old)			\
 			     : "memory");				\
@@ -134,7 +134,7 @@ extern void __add_wrong_size(void)
 	__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
 
 #define __sync_cmpxchg(ptr, old, new, size)				\
-	__raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
+	__raw_cmpxchg((ptr), (old), (new), (size), "lock ")
 
 #define __cmpxchg_local(ptr, old, new, size)				\
 	__raw_cmpxchg((ptr), (old), (new), (size), "")
@@ -165,9 +165,8 @@ extern void __add_wrong_size(void)
 	case __X86_CASE_B:						\
 	{								\
 		volatile u8 *__ptr = (volatile u8 *)(_ptr);		\
-		asm volatile(lock "cmpxchgb %[new], %[ptr]"		\
-			     CC_SET(z)					\
-			     : CC_OUT(z) (success),			\
+		asm_inline volatile(lock "cmpxchgb %[new], %[ptr]"	\
+			     : "=@ccz" (success),			\
 			       [ptr] "+m" (*__ptr),			\
 			       [old] "+a" (__old)			\
 			     : [new] "q" (__new)			\
@@ -177,9 +176,8 @@ extern void __add_wrong_size(void)
 	case __X86_CASE_W:						\
 	{								\
 		volatile u16 *__ptr = (volatile u16 *)(_ptr);		\
-		asm volatile(lock "cmpxchgw %[new], %[ptr]"		\
-			     CC_SET(z)					\
-			     : CC_OUT(z) (success),			\
+		asm_inline volatile(lock "cmpxchgw %[new], %[ptr]"	\
+			     : "=@ccz" (success),			\
 			       [ptr] "+m" (*__ptr),			\
 			       [old] "+a" (__old)			\
 			     : [new] "r" (__new)			\
@@ -189,9 +187,8 @@ extern void __add_wrong_size(void)
 	case __X86_CASE_L:						\
 	{								\
 		volatile u32 *__ptr = (volatile u32 *)(_ptr);		\
-		asm volatile(lock "cmpxchgl %[new], %[ptr]"		\
-			     CC_SET(z)					\
-			     : CC_OUT(z) (success),			\
+		asm_inline volatile(lock "cmpxchgl %[new], %[ptr]"	\
+			     : "=@ccz" (success),			\
 			       [ptr] "+m" (*__ptr),			\
 			       [old] "+a" (__old)			\
 			     : [new] "r" (__new)			\
@@ -201,9 +198,8 @@ extern void __add_wrong_size(void)
 	case __X86_CASE_Q:						\
 	{								\
 		volatile u64 *__ptr = (volatile u64 *)(_ptr);		\
-		asm volatile(lock "cmpxchgq %[new], %[ptr]"		\
-			     CC_SET(z)					\
-			     : CC_OUT(z) (success),			\
+		asm_inline volatile(lock "cmpxchgq %[new], %[ptr]"	\
+			     : "=@ccz" (success),			\
 			       [ptr] "+m" (*__ptr),			\
 			       [old] "+a" (__old)			\
 			     : [new] "r" (__new)			\
@@ -221,9 +217,21 @@ extern void __add_wrong_size(void)
 #define __try_cmpxchg(ptr, pold, new, size)				\
 	__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
 
-#define try_cmpxchg(ptr, pold, new) 					\
+#define __sync_try_cmpxchg(ptr, pold, new, size)			\
+	__raw_try_cmpxchg((ptr), (pold), (new), (size), "lock ")
+
+#define __try_cmpxchg_local(ptr, pold, new, size)			\
+	__raw_try_cmpxchg((ptr), (pold), (new), (size), "")
+
+#define arch_try_cmpxchg(ptr, pold, new) 				\
 	__try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
 
+#define arch_sync_try_cmpxchg(ptr, pold, new) 				\
+	__sync_try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
+
+#define arch_try_cmpxchg_local(ptr, pold, new)				\
+	__try_cmpxchg_local((ptr), (pold), (new), sizeof(*(ptr)))
+
 /*
  * xadd() adds "inc" to "*ptr" and atomically returns the previous
  * value of "*ptr".
@@ -233,29 +241,4 @@ extern void __add_wrong_size(void)
 #define __xadd(ptr, inc, lock)	__xchg_op((ptr), (inc), xadd, lock)
 #define xadd(ptr, inc)		__xadd((ptr), (inc), LOCK_PREFIX)
 
-#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2)			\
-({									\
-	bool __ret;							\
-	__typeof__(*(p1)) __old1 = (o1), __new1 = (n1);			\
-	__typeof__(*(p2)) __old2 = (o2), __new2 = (n2);			\
-	BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long));			\
-	BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long));			\
-	VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long)));		\
-	VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2));	\
-	asm volatile(pfx "cmpxchg%c5b %1"				\
-		     CC_SET(e)						\
-		     : CC_OUT(e) (__ret),				\
-		       "+m" (*(p1)), "+m" (*(p2)),			\
-		       "+a" (__old1), "+d" (__old2)			\
-		     : "i" (2 * sizeof(long)),				\
-		       "b" (__new1), "c" (__new2));			\
-	__ret;								\
-})
-
-#define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2) \
-	__cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2)
-
-#define arch_cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \
-	__cmpxchg_double(, p1, p2, o1, o2, n1, n2)
-
 #endif	/* ASM_X86_CMPXCHG_H */
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 1a2eafca7038..1f80a62be969 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -3,113 +3,153 @@
 #define _ASM_X86_CMPXCHG_32_H
 
 /*
- * Note: if you use set64_bit(), __cmpxchg64(), or their variants, you
+ * Note: if you use __cmpxchg64(), or their variants,
  *       you need to test for the feature in boot_cpu_data.
  */
 
-/*
- * CMPXCHG8B only writes to the target if we had the previous
- * value in registers, otherwise it acts as a read and gives us the
- * "new previous" value.  That is why there is a loop.  Preloading
- * EDX:EAX is a performance optimization: in the common case it means
- * we need only one locked operation.
- *
- * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very
- * least an FPU save and/or %cr0.ts manipulation.
- *
- * cmpxchg8b must be used with the lock prefix here to allow the
- * instruction to be executed atomically.  We need to have the reader
- * side to see the coherent 64bit value.
- */
-static inline void set_64bit(volatile u64 *ptr, u64 value)
+union __u64_halves {
+	u64 full;
+	struct {
+		u32 low, high;
+	};
+};
+
+#define __arch_cmpxchg64(_ptr, _old, _new, _lock)			\
+({									\
+	union __u64_halves o = { .full = (_old), },			\
+			   n = { .full = (_new), };			\
+									\
+	asm_inline volatile(_lock "cmpxchg8b %[ptr]"			\
+		     : [ptr] "+m" (*(_ptr)),				\
+		       "+a" (o.low), "+d" (o.high)			\
+		     : "b" (n.low), "c" (n.high)			\
+		     : "memory");					\
+									\
+	o.full;								\
+})
+
+
+static __always_inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
 {
-	u32 low  = value;
-	u32 high = value >> 32;
-	u64 prev = *ptr;
-
-	asm volatile("\n1:\t"
-		     LOCK_PREFIX "cmpxchg8b %0\n\t"
-		     "jnz 1b"
-		     : "=m" (*ptr), "+A" (prev)
-		     : "b" (low), "c" (high)
-		     : "memory");
+	return __arch_cmpxchg64(ptr, old, new, LOCK_PREFIX);
 }
 
-#ifdef CONFIG_X86_CMPXCHG64
-#define arch_cmpxchg64(ptr, o, n)					\
-	((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \
-					 (unsigned long long)(n)))
-#define arch_cmpxchg64_local(ptr, o, n)					\
-	((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \
-					       (unsigned long long)(n)))
-#endif
+static __always_inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
+{
+	return __arch_cmpxchg64(ptr, old, new,);
+}
+
+#define __arch_try_cmpxchg64(_ptr, _oldp, _new, _lock)			\
+({									\
+	union __u64_halves o = { .full = *(_oldp), },			\
+			   n = { .full = (_new), };			\
+	bool ret;							\
+									\
+	asm_inline volatile(_lock "cmpxchg8b %[ptr]"			\
+		     : "=@ccz" (ret),					\
+		       [ptr] "+m" (*(_ptr)),				\
+		       "+a" (o.low), "+d" (o.high)			\
+		     : "b" (n.low), "c" (n.high)			\
+		     : "memory");					\
+									\
+	if (unlikely(!ret))						\
+		*(_oldp) = o.full;					\
+									\
+	likely(ret);							\
+})
 
-static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
+static __always_inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new)
 {
-	u64 prev;
-	asm volatile(LOCK_PREFIX "cmpxchg8b %1"
-		     : "=A" (prev),
-		       "+m" (*ptr)
-		     : "b" ((u32)new),
-		       "c" ((u32)(new >> 32)),
-		       "0" (old)
-		     : "memory");
-	return prev;
+	return __arch_try_cmpxchg64(ptr, oldp, new, LOCK_PREFIX);
 }
 
-static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
+static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new)
 {
-	u64 prev;
-	asm volatile("cmpxchg8b %1"
-		     : "=A" (prev),
-		       "+m" (*ptr)
-		     : "b" ((u32)new),
-		       "c" ((u32)(new >> 32)),
-		       "0" (old)
-		     : "memory");
-	return prev;
+	return __arch_try_cmpxchg64(ptr, oldp, new,);
 }
 
-#ifndef CONFIG_X86_CMPXCHG64
+#ifdef CONFIG_X86_CX8
+
+#define arch_cmpxchg64 __cmpxchg64
+
+#define arch_cmpxchg64_local __cmpxchg64_local
+
+#define arch_try_cmpxchg64 __try_cmpxchg64
+
+#define arch_try_cmpxchg64_local __try_cmpxchg64_local
+
+#else
+
 /*
  * Building a kernel capable running on 80386 and 80486. It may be necessary
  * to simulate the cmpxchg8b on the 80386 and 80486 CPU.
  */
 
-#define arch_cmpxchg64(ptr, o, n)				\
-({								\
-	__typeof__(*(ptr)) __ret;				\
-	__typeof__(*(ptr)) __old = (o);				\
-	__typeof__(*(ptr)) __new = (n);				\
-	alternative_io(LOCK_PREFIX_HERE				\
-			"call cmpxchg8b_emu",			\
-			"lock; cmpxchg8b (%%esi)" ,		\
-		       X86_FEATURE_CX8,				\
-		       "=A" (__ret),				\
-		       "S" ((ptr)), "0" (__old),		\
-		       "b" ((unsigned int)__new),		\
-		       "c" ((unsigned int)(__new>>32))		\
-		       : "memory");				\
-	__ret; })
-
-
-#define arch_cmpxchg64_local(ptr, o, n)				\
-({								\
-	__typeof__(*(ptr)) __ret;				\
-	__typeof__(*(ptr)) __old = (o);				\
-	__typeof__(*(ptr)) __new = (n);				\
-	alternative_io("call cmpxchg8b_emu",			\
-		       "cmpxchg8b (%%esi)" ,			\
-		       X86_FEATURE_CX8,				\
-		       "=A" (__ret),				\
-		       "S" ((ptr)), "0" (__old),		\
-		       "b" ((unsigned int)__new),		\
-		       "c" ((unsigned int)(__new>>32))		\
-		       : "memory");				\
-	__ret; })
+#define __arch_cmpxchg64_emu(_ptr, _old, _new, _lock_loc, _lock)	\
+({									\
+	union __u64_halves o = { .full = (_old), },			\
+			   n = { .full = (_new), };			\
+									\
+	asm_inline volatile(						\
+		ALTERNATIVE(_lock_loc					\
+			    "call cmpxchg8b_emu",			\
+			    _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8)	\
+		: ALT_OUTPUT_SP("+a" (o.low), "+d" (o.high))		\
+		: "b" (n.low), "c" (n.high),				\
+		  [ptr] "S" (_ptr)					\
+		: "memory");						\
+									\
+	o.full;								\
+})
+
+static __always_inline u64 arch_cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
+{
+	return __arch_cmpxchg64_emu(ptr, old, new, LOCK_PREFIX_HERE, "lock ");
+}
+#define arch_cmpxchg64 arch_cmpxchg64
+
+static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
+{
+	return __arch_cmpxchg64_emu(ptr, old, new, ,);
+}
+#define arch_cmpxchg64_local arch_cmpxchg64_local
+
+#define __arch_try_cmpxchg64_emu(_ptr, _oldp, _new, _lock_loc, _lock)	\
+({									\
+	union __u64_halves o = { .full = *(_oldp), },			\
+			   n = { .full = (_new), };			\
+	bool ret;							\
+									\
+	asm_inline volatile(						\
+		ALTERNATIVE(_lock_loc					\
+			    "call cmpxchg8b_emu",			\
+			    _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
+		: ALT_OUTPUT_SP("=@ccz" (ret),				\
+				"+a" (o.low), "+d" (o.high))		\
+		: "b" (n.low), "c" (n.high),				\
+		  [ptr] "S" (_ptr)					\
+		: "memory");						\
+									\
+	if (unlikely(!ret))						\
+		*(_oldp) = o.full;					\
+									\
+	likely(ret);							\
+})
+
+static __always_inline bool arch_try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new)
+{
+	return __arch_try_cmpxchg64_emu(ptr, oldp, new, LOCK_PREFIX_HERE, "lock ");
+}
+#define arch_try_cmpxchg64 arch_try_cmpxchg64
+
+static __always_inline bool arch_try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new)
+{
+	return __arch_try_cmpxchg64_emu(ptr, oldp, new, ,);
+}
+#define arch_try_cmpxchg64_local arch_try_cmpxchg64_local
 
 #endif
 
-#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX8)
+#define system_has_cmpxchg64()		boot_cpu_has(X86_FEATURE_CX8)
 
 #endif /* _ASM_X86_CMPXCHG_32_H */
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 072e5459fe2f..5afea056fb20 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -2,11 +2,6 @@
 #ifndef _ASM_X86_CMPXCHG_64_H
 #define _ASM_X86_CMPXCHG_64_H
 
-static inline void set_64bit(volatile u64 *ptr, u64 val)
-{
-	*ptr = val;
-}
-
 #define arch_cmpxchg64(ptr, o, n)					\
 ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
@@ -19,6 +14,82 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
 	arch_cmpxchg_local((ptr), (o), (n));				\
 })
 
-#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16)
+#define arch_try_cmpxchg64(ptr, po, n)					\
+({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
+	arch_try_cmpxchg((ptr), (po), (n));				\
+})
+
+#define arch_try_cmpxchg64_local(ptr, po, n)				\
+({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
+	arch_try_cmpxchg_local((ptr), (po), (n));			\
+})
+
+union __u128_halves {
+	u128 full;
+	struct {
+		u64 low, high;
+	};
+};
+
+#define __arch_cmpxchg128(_ptr, _old, _new, _lock)			\
+({									\
+	union __u128_halves o = { .full = (_old), },			\
+			    n = { .full = (_new), };			\
+									\
+	asm_inline volatile(_lock "cmpxchg16b %[ptr]"			\
+		     : [ptr] "+m" (*(_ptr)),				\
+		       "+a" (o.low), "+d" (o.high)			\
+		     : "b" (n.low), "c" (n.high)			\
+		     : "memory");					\
+									\
+	o.full;								\
+})
+
+static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new)
+{
+	return __arch_cmpxchg128(ptr, old, new, LOCK_PREFIX);
+}
+#define arch_cmpxchg128 arch_cmpxchg128
+
+static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old, u128 new)
+{
+	return __arch_cmpxchg128(ptr, old, new,);
+}
+#define arch_cmpxchg128_local arch_cmpxchg128_local
+
+#define __arch_try_cmpxchg128(_ptr, _oldp, _new, _lock)			\
+({									\
+	union __u128_halves o = { .full = *(_oldp), },			\
+			    n = { .full = (_new), };			\
+	bool ret;							\
+									\
+	asm_inline volatile(_lock "cmpxchg16b %[ptr]"			\
+		     : "=@ccz" (ret),					\
+		       [ptr] "+m" (*(_ptr)),				\
+		       "+a" (o.low), "+d" (o.high)			\
+		     : "b" (n.low), "c" (n.high)			\
+		     : "memory");					\
+									\
+	if (unlikely(!ret))						\
+		*(_oldp) = o.full;					\
+									\
+	likely(ret);							\
+})
+
+static __always_inline bool arch_try_cmpxchg128(volatile u128 *ptr, u128 *oldp, u128 new)
+{
+	return __arch_try_cmpxchg128(ptr, oldp, new, LOCK_PREFIX);
+}
+#define arch_try_cmpxchg128 arch_try_cmpxchg128
+
+static __always_inline bool arch_try_cmpxchg128_local(volatile u128 *ptr, u128 *oldp, u128 new)
+{
+	return __arch_try_cmpxchg128(ptr, oldp, new,);
+}
+#define arch_try_cmpxchg128_local arch_try_cmpxchg128_local
+
+#define system_has_cmpxchg128()		boot_cpu_has(X86_FEATURE_CX16)
 
 #endif /* _ASM_X86_CMPXCHG_64_H */
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
new file mode 100644
index 000000000000..e1dbf8df1b69
--- /dev/null
+++ b/arch/x86/include/asm/coco.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_COCO_H
+#define _ASM_X86_COCO_H
+
+#include <asm/asm.h>
+#include <asm/types.h>
+
+enum cc_vendor {
+	CC_VENDOR_NONE,
+	CC_VENDOR_AMD,
+	CC_VENDOR_INTEL,
+};
+
+#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
+extern enum cc_vendor cc_vendor;
+extern u64 cc_mask;
+
+static inline u64 cc_get_mask(void)
+{
+	return cc_mask;
+}
+
+static inline void cc_set_mask(u64 mask)
+{
+	cc_mask = mask;
+}
+
+u64 cc_mkenc(u64 val);
+u64 cc_mkdec(u64 val);
+void cc_random_init(void);
+#else
+#define cc_vendor (CC_VENDOR_NONE)
+static inline u64 cc_get_mask(void)
+{
+	return 0;
+}
+
+static inline u64 cc_mkenc(u64 val)
+{
+	return val;
+}
+
+static inline u64 cc_mkdec(u64 val)
+{
+	return val;
+}
+static inline void cc_random_init(void) { }
+#endif
+
+#endif /* _ASM_X86_COCO_H */
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 22c4dfe65992..b1221da477b7 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -12,34 +12,35 @@
 #include <asm/user32.h>
 #include <asm/unistd.h>
 
-#include <asm-generic/compat.h>
-
-#define COMPAT_USER_HZ		100
-#define COMPAT_UTS_MACHINE	"i686\0\0"
+#define compat_mode_t	compat_mode_t
+typedef u16		compat_mode_t;
 
+#define __compat_uid_t	__compat_uid_t
 typedef u16		__compat_uid_t;
 typedef u16		__compat_gid_t;
-typedef u32		__compat_uid32_t;
-typedef u32		__compat_gid32_t;
-typedef u16		compat_mode_t;
+
+#define compat_dev_t	compat_dev_t
 typedef u16		compat_dev_t;
+
+#define compat_ipc_pid_t compat_ipc_pid_t
+typedef u16		 compat_ipc_pid_t;
+
+#define compat_statfs	compat_statfs
+
+#include <asm-generic/compat.h>
+
+#define COMPAT_UTS_MACHINE	"i686\0\0"
+
 typedef u16		compat_nlink_t;
-typedef u16		compat_ipc_pid_t;
-typedef u32		compat_caddr_t;
-typedef __kernel_fsid_t	compat_fsid_t;
-typedef s64 __attribute__((aligned(4))) compat_s64;
-typedef u64 __attribute__((aligned(4))) compat_u64;
 
 struct compat_stat {
-	compat_dev_t	st_dev;
-	u16		__pad1;
+	u32		st_dev;
 	compat_ino_t	st_ino;
 	compat_mode_t	st_mode;
 	compat_nlink_t	st_nlink;
 	__compat_uid_t	st_uid;
 	__compat_gid_t	st_gid;
-	compat_dev_t	st_rdev;
-	u16		__pad2;
+	u32		st_rdev;
 	u32		st_size;
 	u32		st_blksize;
 	u32		st_blocks;
@@ -53,29 +54,11 @@ struct compat_stat {
 	u32		__unused5;
 };
 
-struct compat_flock {
-	short		l_type;
-	short		l_whence;
-	compat_off_t	l_start;
-	compat_off_t	l_len;
-	compat_pid_t	l_pid;
-};
-
-#define F_GETLK64	12	/*  using 'struct flock64' */
-#define F_SETLK64	13
-#define F_SETLKW64	14
-
 /*
- * IA32 uses 4 byte alignment for 64 bit quantities,
- * so we need to pack this structure.
+ * IA32 uses 4 byte alignment for 64 bit quantities, so we need to pack the
+ * compat flock64 structure.
  */
-struct compat_flock64 {
-	short		l_type;
-	short		l_whence;
-	compat_loff_t	l_start;
-	compat_loff_t	l_len;
-	compat_pid_t	l_pid;
-} __attribute__((packed));
+#define __ARCH_NEED_COMPAT_FLOCK64_PACKED
 
 struct compat_statfs {
 	int		f_type;
@@ -92,122 +75,11 @@ struct compat_statfs {
 	int		f_spare[4];
 };
 
-#define COMPAT_RLIM_INFINITY		0xffffffff
-
-typedef u32		compat_old_sigset_t;	/* at least 32 bits */
-
-#define _COMPAT_NSIG		64
-#define _COMPAT_NSIG_BPW	32
-
-typedef u32               compat_sigset_word;
-
-#define COMPAT_OFF_T_MAX	0x7fffffff
-
-struct compat_ipc64_perm {
-	compat_key_t key;
-	__compat_uid32_t uid;
-	__compat_gid32_t gid;
-	__compat_uid32_t cuid;
-	__compat_gid32_t cgid;
-	unsigned short mode;
-	unsigned short __pad1;
-	unsigned short seq;
-	unsigned short __pad2;
-	compat_ulong_t unused1;
-	compat_ulong_t unused2;
-};
-
-struct compat_semid64_ds {
-	struct compat_ipc64_perm sem_perm;
-	compat_ulong_t sem_otime;
-	compat_ulong_t sem_otime_high;
-	compat_ulong_t sem_ctime;
-	compat_ulong_t sem_ctime_high;
-	compat_ulong_t sem_nsems;
-	compat_ulong_t __unused3;
-	compat_ulong_t __unused4;
-};
-
-struct compat_msqid64_ds {
-	struct compat_ipc64_perm msg_perm;
-	compat_ulong_t msg_stime;
-	compat_ulong_t msg_stime_high;
-	compat_ulong_t msg_rtime;
-	compat_ulong_t msg_rtime_high;
-	compat_ulong_t msg_ctime;
-	compat_ulong_t msg_ctime_high;
-	compat_ulong_t msg_cbytes;
-	compat_ulong_t msg_qnum;
-	compat_ulong_t msg_qbytes;
-	compat_pid_t   msg_lspid;
-	compat_pid_t   msg_lrpid;
-	compat_ulong_t __unused4;
-	compat_ulong_t __unused5;
-};
-
-struct compat_shmid64_ds {
-	struct compat_ipc64_perm shm_perm;
-	compat_size_t  shm_segsz;
-	compat_ulong_t shm_atime;
-	compat_ulong_t shm_atime_high;
-	compat_ulong_t shm_dtime;
-	compat_ulong_t shm_dtime_high;
-	compat_ulong_t shm_ctime;
-	compat_ulong_t shm_ctime_high;
-	compat_pid_t   shm_cpid;
-	compat_pid_t   shm_lpid;
-	compat_ulong_t shm_nattch;
-	compat_ulong_t __unused4;
-	compat_ulong_t __unused5;
-};
-
-/*
- * The type of struct elf_prstatus.pr_reg in compatible core dumps.
- */
-typedef struct user_regs_struct compat_elf_gregset_t;
-
-/* Full regset -- prstatus on x32, otherwise on ia32 */
-#define PRSTATUS_SIZE(S, R) (R != sizeof(S.pr_reg) ? 144 : 296)
-#define SET_PR_FPVALID(S, V, R) \
-  do { *(int *) (((void *) &((S)->pr_reg)) + R) = (V); } \
-  while (0)
-
 #ifdef CONFIG_X86_X32_ABI
 #define COMPAT_USE_64BIT_TIME \
 	(!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT))
 #endif
 
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
-	return (void __user *)(unsigned long)uptr;
-}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
-	return (u32)(unsigned long)uptr;
-}
-
-static inline void __user *arch_compat_alloc_user_space(long len)
-{
-	compat_uptr_t sp;
-
-	if (test_thread_flag(TIF_IA32)) {
-		sp = task_pt_regs(current)->sp;
-	} else {
-		/* -128 for the x32 ABI redzone */
-		sp = task_pt_regs(current)->sp - 128;
-	}
-
-	return (void __user *)round_down(sp - len, 16);
-}
-
 static inline bool in_x32_syscall(void)
 {
 #ifdef CONFIG_X86_X32_ABI
@@ -228,10 +100,15 @@ static inline bool in_compat_syscall(void)
 	return in_32bit_syscall();
 }
 #define in_compat_syscall in_compat_syscall	/* override the generic impl */
+#define compat_need_64bit_alignment_fixup in_ia32_syscall
 #endif
 
 struct compat_siginfo;
-int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
-		const kernel_siginfo_t *from, bool x32_ABI);
+
+#ifdef CONFIG_X86_X32_ABI
+int copy_siginfo_to_user32(struct compat_siginfo __user *to,
+		const kernel_siginfo_t *from);
+#define copy_siginfo_to_user32 copy_siginfo_to_user32
+#endif /* CONFIG_X86_X32_ABI */
 
 #endif /* _ASM_X86_COMPAT_H */
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index adc6cc86b062..ad235dda1ded 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -7,37 +7,64 @@
 #include <linux/topology.h>
 #include <linux/nodemask.h>
 #include <linux/percpu.h>
+#include <asm/ibt.h>
 
-#ifdef CONFIG_SMP
-
-extern void prefill_possible_map(void);
-
-#else /* CONFIG_SMP */
-
-static inline void prefill_possible_map(void) {}
-
+#ifndef CONFIG_SMP
 #define cpu_physical_id(cpu)			boot_cpu_physical_apicid
 #define cpu_acpi_id(cpu)			0
-#define safe_smp_processor_id()			0
-
 #endif /* CONFIG_SMP */
 
-struct x86_cpu {
-	struct cpu cpu;
-};
-
 #ifdef CONFIG_HOTPLUG_CPU
-extern int arch_register_cpu(int num);
-extern void arch_unregister_cpu(int);
-extern void start_cpu0(void);
-#ifdef CONFIG_DEBUG_HOTPLUG_CPU0
-extern int _debug_hotplug_cpu(int cpu, int action);
-#endif
+extern void soft_restart_cpu(void);
 #endif
 
+extern void ap_init_aperfmperf(void);
+
 int mwait_usable(const struct cpuinfo_x86 *);
 
 unsigned int x86_family(unsigned int sig);
 unsigned int x86_model(unsigned int sig);
 unsigned int x86_stepping(unsigned int sig);
+#ifdef CONFIG_X86_BUS_LOCK_DETECT
+extern void __init sld_setup(struct cpuinfo_x86 *c);
+extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
+extern bool handle_guest_split_lock(unsigned long ip);
+extern void handle_bus_lock(struct pt_regs *regs);
+void split_lock_init(void);
+void bus_lock_init(void);
+#else
+static inline void __init sld_setup(struct cpuinfo_x86 *c) {}
+static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
+{
+	return false;
+}
+
+static inline bool handle_guest_split_lock(unsigned long ip)
+{
+	return false;
+}
+
+static inline void handle_bus_lock(struct pt_regs *regs) {}
+static inline void split_lock_init(void) {}
+static inline void bus_lock_init(void) {}
+#endif
+
+#ifdef CONFIG_IA32_FEAT_CTL
+void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
+#else
+static inline void init_ia32_feat_ctl(struct cpuinfo_x86 *c) {}
+#endif
+
+extern __noendbr void cet_disable(void);
+
+struct cpu_signature;
+
+void intel_collect_cpu_info(struct cpu_signature *sig);
+
+extern u64 x86_read_arch_cap_msr(void);
+bool intel_find_matching_signature(void *mc, struct cpu_signature *sig);
+int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type);
+
+extern struct cpumask cpus_stop_mask;
+
 #endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
index baeba0567126..6be777a06944 100644
--- a/arch/x86/include/asm/cpu_device_id.h
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -1,14 +1,207 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _CPU_DEVICE_ID
-#define _CPU_DEVICE_ID 1
+#ifndef _ASM_X86_CPU_DEVICE_ID
+#define _ASM_X86_CPU_DEVICE_ID
+
+/*
+ * Can't use <linux/bitfield.h> because it generates expressions that
+ * cannot be used in structure initializers. Bitfield construction
+ * here must match the union in struct cpuinfo_86:
+ *	union {
+ *		struct {
+ *			__u8	x86_model;
+ *			__u8	x86;
+ *			__u8	x86_vendor;
+ *			__u8	x86_reserved;
+ *		};
+ *		__u32		x86_vfm;
+ *	};
+ */
+#define VFM_MODEL_BIT	0
+#define VFM_FAMILY_BIT	8
+#define VFM_VENDOR_BIT	16
+#define VFM_RSVD_BIT	24
+
+#define	VFM_MODEL_MASK	GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT)
+#define	VFM_FAMILY_MASK	GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT)
+#define	VFM_VENDOR_MASK	GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT)
+
+#define VFM_MODEL(vfm)	(((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT)
+#define VFM_FAMILY(vfm)	(((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT)
+#define VFM_VENDOR(vfm)	(((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT)
+
+#define	VFM_MAKE(_vendor, _family, _model) (	\
+	((_model) << VFM_MODEL_BIT) |		\
+	((_family) << VFM_FAMILY_BIT) |		\
+	((_vendor) << VFM_VENDOR_BIT)		\
+)
 
 /*
  * Declare drivers belonging to specific x86 CPUs
  * Similar in spirit to pci_device_id and related PCI functions
+ *
+ * The wildcard initializers are in mod_devicetable.h because
+ * file2alias needs them. Sigh.
  */
-
 #include <linux/mod_devicetable.h>
+/* Get the INTEL_FAM* model defines */
+#include <asm/intel-family.h>
+/* And the X86_VENDOR_* ones */
+#include <asm/processor.h>
+
+/* Centaur FAM6 models */
+#define X86_CENTAUR_FAM6_C7_A		0xa
+#define X86_CENTAUR_FAM6_C7_D		0xd
+#define X86_CENTAUR_FAM6_NANO		0xf
+
+/* x86_cpu_id::flags */
+#define X86_CPU_ID_FLAG_ENTRY_VALID	BIT(0)
+
+/**
+ * X86_MATCH_CPU -  Base macro for CPU matching
+ * @_vendor:	The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ *		The name is expanded to X86_VENDOR_@_vendor
+ * @_family:	The family number or X86_FAMILY_ANY
+ * @_model:	The model number, model constant or X86_MODEL_ANY
+ * @_steppings:	Bitmask for steppings, stepping constant or X86_STEPPING_ANY
+ * @_feature:	A X86_FEATURE bit or X86_FEATURE_ANY
+ * @_data:	Driver specific data or NULL. The internal storage
+ *		format is unsigned long. The supplied value, pointer
+ *		etc. is casted to unsigned long internally.
+ *
+ * Use only if you need all selectors. Otherwise use one of the shorter
+ * macros of the X86_MATCH_* family. If there is no matching shorthand
+ * macro, consider to add one. If you really need to wrap one of the macros
+ * into another macro at the usage site for good reasons, then please
+ * start this local macro with X86_MATCH to allow easy grepping.
+ */
+#define X86_MATCH_CPU(_vendor, _family, _model, _steppings, _feature, _type, _data) { \
+	.vendor		= _vendor,					\
+	.family		= _family,					\
+	.model		= _model,					\
+	.steppings	= _steppings,					\
+	.feature	= _feature,					\
+	.flags		= X86_CPU_ID_FLAG_ENTRY_VALID,			\
+	.type		= _type,					\
+	.driver_data	= (unsigned long) _data				\
+}
+
+/**
+ * X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature
+ * @vendor:	The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ *		The name is expanded to X86_VENDOR_@vendor
+ * @family:	The family number or X86_FAMILY_ANY
+ * @feature:	A X86_FEATURE bit
+ * @data:	Driver specific data or NULL. The internal storage
+ *		format is unsigned long. The supplied value, pointer
+ *		etc. is casted to unsigned long internally.
+ */
+#define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data)		\
+	X86_MATCH_CPU(X86_VENDOR_##vendor, family, X86_MODEL_ANY,		\
+		      X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data)
+
+/**
+ * X86_MATCH_VENDOR_FEATURE - Macro for matching vendor and CPU feature
+ * @vendor:	The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ *		The name is expanded to X86_VENDOR_@vendor
+ * @feature:	A X86_FEATURE bit
+ * @data:	Driver specific data or NULL. The internal storage
+ *		format is unsigned long. The supplied value, pointer
+ *		etc. is casted to unsigned long internally.
+ */
+#define X86_MATCH_VENDOR_FEATURE(vendor, feature, data)				\
+	X86_MATCH_CPU(X86_VENDOR_##vendor, X86_FAMILY_ANY, X86_MODEL_ANY,	\
+		      X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data)
+
+/**
+ * X86_MATCH_FEATURE - Macro for matching a CPU feature
+ * @feature:	A X86_FEATURE bit
+ * @data:	Driver specific data or NULL. The internal storage
+ *		format is unsigned long. The supplied value, pointer
+ *		etc. is casted to unsigned long internally.
+ */
+#define X86_MATCH_FEATURE(feature, data)					\
+	X86_MATCH_CPU(X86_VENDOR_ANY, X86_FAMILY_ANY, X86_MODEL_ANY,		\
+		      X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data)
+
+/**
+ * X86_MATCH_VENDOR_FAM_MODEL - Match vendor, family and model
+ * @vendor:	The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ *		The name is expanded to X86_VENDOR_@vendor
+ * @family:	The family number or X86_FAMILY_ANY
+ * @model:	The model number, model constant or X86_MODEL_ANY
+ * @data:	Driver specific data or NULL. The internal storage
+ *		format is unsigned long. The supplied value, pointer
+ *		etc. is casted to unsigned long internally.
+ */
+#define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data)			\
+	X86_MATCH_CPU(X86_VENDOR_##vendor, family, model, X86_STEPPING_ANY,	\
+		      X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data)
+
+/**
+ * X86_MATCH_VENDOR_FAM - Match vendor and family
+ * @vendor:	The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
+ *		The name is expanded to X86_VENDOR_@vendor
+ * @family:	The family number or X86_FAMILY_ANY
+ * @data:	Driver specific data or NULL. The internal storage
+ *		format is unsigned long. The supplied value, pointer
+ *		etc. is casted to unsigned long internally.
+ */
+#define X86_MATCH_VENDOR_FAM(vendor, family, data)				\
+	X86_MATCH_CPU(X86_VENDOR_##vendor, family, X86_MODEL_ANY,		\
+		      X86_STEPPING_ANY, X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data)
+
+/**
+ * X86_MATCH_VFM - Match encoded vendor/family/model
+ * @vfm:	Encoded 8-bits each for vendor, family, model
+ * @data:	Driver specific data or NULL. The internal storage
+ *		format is unsigned long. The supplied value, pointer
+ *		etc. is cast to unsigned long internally.
+ */
+#define X86_MATCH_VFM(vfm, data)						\
+	X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm),	VFM_MODEL(vfm),		\
+		      X86_STEPPING_ANY, X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data)
+
+#define __X86_STEPPINGS(mins, maxs)    GENMASK(maxs, mins)
+/**
+ * X86_MATCH_VFM_STEPS - Match encoded vendor/family/model and steppings
+ *			 range.
+ * @vfm:	Encoded 8-bits each for vendor, family, model
+ * @min_step:	Lowest stepping number to match
+ * @max_step:	Highest stepping number to match
+ * @data:	Driver specific data or NULL. The internal storage
+ *		format is unsigned long. The supplied value, pointer
+ *		etc. is cast to unsigned long internally.
+ */
+#define X86_MATCH_VFM_STEPS(vfm, min_step, max_step, data)			\
+	X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm),		\
+		      __X86_STEPPINGS(min_step, max_step), X86_FEATURE_ANY,	\
+		      X86_CPU_TYPE_ANY, data)
+
+/**
+ * X86_MATCH_VFM_FEATURE - Match encoded vendor/family/model/feature
+ * @vfm:	Encoded 8-bits each for vendor, family, model
+ * @feature:	A X86_FEATURE bit
+ * @data:	Driver specific data or NULL. The internal storage
+ *		format is unsigned long. The supplied value, pointer
+ *		etc. is cast to unsigned long internally.
+ */
+#define X86_MATCH_VFM_FEATURE(vfm, feature, data)				\
+	X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm),		\
+		      X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data)
+
+/**
+ * X86_MATCH_VFM_CPU_TYPE - Match encoded vendor/family/model/type
+ * @vfm:	Encoded 8-bits each for vendor, family, model
+ * @type:	CPU type e.g. P-core, E-core
+ * @data:	Driver specific data or NULL. The internal storage
+ *		format is unsigned long. The supplied value, pointer
+ *		etc. is cast to unsigned long internally.
+ */
+#define X86_MATCH_VFM_CPU_TYPE(vfm, type, data)				\
+	X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm),	\
+		      X86_STEPPING_ANY, X86_FEATURE_ANY, type, data)
 
 extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match);
+extern bool x86_match_min_microcode_rev(const struct x86_cpu_id *table);
 
-#endif
+#endif /* _ASM_X86_CPU_DEVICE_ID */
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index 29c706415443..462fc34f1317 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 
 #ifndef _ASM_X86_CPU_ENTRY_AREA_H
 #define _ASM_X86_CPU_ENTRY_AREA_H
@@ -6,6 +6,78 @@
 #include <linux/percpu-defs.h>
 #include <asm/processor.h>
 #include <asm/intel_ds.h>
+#include <asm/pgtable_areas.h>
+
+#ifdef CONFIG_X86_64
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+#define VC_EXCEPTION_STKSZ	EXCEPTION_STKSZ
+#else
+#define VC_EXCEPTION_STKSZ	0
+#endif
+
+/* Macro to enforce the same ordering and stack sizes */
+#define ESTACKS_MEMBERS(guardsize, optional_stack_size)		\
+	char	DF_stack_guard[guardsize];			\
+	char	DF_stack[EXCEPTION_STKSZ];			\
+	char	NMI_stack_guard[guardsize];			\
+	char	NMI_stack[EXCEPTION_STKSZ];			\
+	char	DB_stack_guard[guardsize];			\
+	char	DB_stack[EXCEPTION_STKSZ];			\
+	char	MCE_stack_guard[guardsize];			\
+	char	MCE_stack[EXCEPTION_STKSZ];			\
+	char	VC_stack_guard[guardsize];			\
+	char	VC_stack[optional_stack_size];			\
+	char	VC2_stack_guard[guardsize];			\
+	char	VC2_stack[optional_stack_size];			\
+	char	IST_top_guard[guardsize];			\
+
+/* The exception stacks' physical storage. No guard pages required */
+struct exception_stacks {
+	ESTACKS_MEMBERS(0, VC_EXCEPTION_STKSZ)
+};
+
+/* The effective cpu entry area mapping with guard pages. */
+struct cea_exception_stacks {
+	ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ)
+};
+
+/*
+ * The exception stack ordering in [cea_]exception_stacks
+ */
+enum exception_stack_ordering {
+	ESTACK_DF,
+	ESTACK_NMI,
+	ESTACK_DB,
+	ESTACK_MCE,
+	ESTACK_VC,
+	ESTACK_VC2,
+	N_EXCEPTION_STACKS
+};
+
+#define CEA_ESTACK_SIZE(st)					\
+	sizeof(((struct cea_exception_stacks *)0)->st## _stack)
+
+#define CEA_ESTACK_BOT(ceastp, st)				\
+	((unsigned long)&(ceastp)->st## _stack)
+
+#define CEA_ESTACK_TOP(ceastp, st)				\
+	(CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st))
+
+#define CEA_ESTACK_OFFS(st)					\
+	offsetof(struct cea_exception_stacks, st## _stack)
+
+#define CEA_ESTACK_PAGES					\
+	(sizeof(struct cea_exception_stacks) / PAGE_SIZE)
+
+#endif
+
+#ifdef CONFIG_X86_32
+struct doublefault_stack {
+	unsigned long stack[(PAGE_SIZE - sizeof(struct x86_hw_tss)) / sizeof(unsigned long)];
+	struct x86_hw_tss tss;
+} __aligned(PAGE_SIZE);
+#endif
 
 /*
  * cpu_entry_area is a percpu region that contains things needed by the CPU
@@ -20,10 +92,19 @@ struct cpu_entry_area {
 
 	/*
 	 * The GDT is just below entry_stack and thus serves (on x86_64) as
-	 * a a read-only guard page.
+	 * a read-only guard page. On 32-bit the GDT must be writeable, so
+	 * it needs an extra guard page.
 	 */
+#ifdef CONFIG_X86_32
+	char guard_entry_stack[PAGE_SIZE];
+#endif
 	struct entry_stack_page entry_stack_page;
 
+#ifdef CONFIG_X86_32
+	char guard_doublefault_stack[PAGE_SIZE];
+	struct doublefault_stack doublefault_stack;
+#endif
+
 	/*
 	 * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
 	 * we need task switches to work, and task switches write to the TSS.
@@ -32,14 +113,10 @@ struct cpu_entry_area {
 
 #ifdef CONFIG_X86_64
 	/*
-	 * Exception stacks used for IST entries.
-	 *
-	 * In the future, this should have a separate slot for each stack
-	 * with guard pages between them.
+	 * Exception stacks used for IST entries with guard pages.
 	 */
-	char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
+	struct cea_exception_stacks estacks;
 #endif
-#ifdef CONFIG_CPU_SUP_INTEL
 	/*
 	 * Per CPU debug store for Intel performance monitoring. Wastes a
 	 * full page at the moment.
@@ -50,30 +127,27 @@ struct cpu_entry_area {
 	 * Reserve enough fixmap PTEs.
 	 */
 	struct debug_store_buffers cpu_debug_buffers;
-#endif
 };
 
-#define CPU_ENTRY_AREA_SIZE	(sizeof(struct cpu_entry_area))
-#define CPU_ENTRY_AREA_TOT_SIZE	(CPU_ENTRY_AREA_SIZE * NR_CPUS)
+#define CPU_ENTRY_AREA_SIZE		(sizeof(struct cpu_entry_area))
 
 DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
+DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
 
 extern void setup_cpu_entry_areas(void);
 extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
 
-#define	CPU_ENTRY_AREA_RO_IDT		CPU_ENTRY_AREA_BASE
-#define CPU_ENTRY_AREA_PER_CPU		(CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
-
-#define CPU_ENTRY_AREA_RO_IDT_VADDR	((void *)CPU_ENTRY_AREA_RO_IDT)
-
-#define CPU_ENTRY_AREA_MAP_SIZE			\
-	(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
-
 extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
 
-static inline struct entry_stack *cpu_entry_stack(int cpu)
+static __always_inline struct entry_stack *cpu_entry_stack(int cpu)
 {
 	return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
 }
 
+#define __this_cpu_ist_top_va(name)					\
+	CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name)
+
+#define __this_cpu_ist_bottom_va(name)					\
+	CEA_ESTACK_BOT(__this_cpu_read(cea_exception_stacks), name)
+
 #endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index ce95b8cbd229..3ddc1d33399b 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -4,10 +4,12 @@
 
 #include <asm/processor.h>
 
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#if defined(__KERNEL__) && !defined(__ASSEMBLER__)
 
 #include <asm/asm.h>
 #include <linux/bitops.h>
+#include <asm/alternative.h>
+#include <asm/cpufeaturemasks.h>
 
 enum cpuid_leafs
 {
@@ -22,105 +24,48 @@ enum cpuid_leafs
 	CPUID_LNX_3,
 	CPUID_7_0_EBX,
 	CPUID_D_1_EAX,
-	CPUID_F_0_EDX,
-	CPUID_F_1_EDX,
+	CPUID_LNX_4,
+	CPUID_7_1_EAX,
 	CPUID_8000_0008_EBX,
 	CPUID_6_EAX,
 	CPUID_8000_000A_EDX,
 	CPUID_7_ECX,
-	CPUID_8000_0007_EBX,
+	CPUID_LNX_6,
 	CPUID_7_EDX,
+	CPUID_8000_001F_EAX,
+	CPUID_8000_0021_EAX,
+	CPUID_LNX_5,
+	NR_CPUID_WORDS,
 };
 
-#ifdef CONFIG_X86_FEATURE_NAMES
 extern const char * const x86_cap_flags[NCAPINTS*32];
 extern const char * const x86_power_flags[32];
-#define X86_CAP_FMT "%s"
-#define x86_cap_flag(flag) x86_cap_flags[flag]
-#else
-#define X86_CAP_FMT "%d:%d"
-#define x86_cap_flag(flag) ((flag) >> 5), ((flag) & 31)
-#endif
 
 /*
  * In order to save room, we index into this array by doing
  * X86_BUG_<name> - NCAPINTS*32.
  */
 extern const char * const x86_bug_flags[NBUGINTS*32];
+#define x86_bug_flag(flag) x86_bug_flags[flag]
 
 #define test_cpu_cap(c, bit)						\
-	 test_bit(bit, (unsigned long *)((c)->x86_capability))
-
-/*
- * There are 32 bits/features in each mask word.  The high bits
- * (selected with (bit>>5) give us the word number and the low 5
- * bits give us the bit/feature number inside the word.
- * (1UL<<((bit)&31) gives us a mask for the feature_bit so we can
- * see if it is set in the mask word.
- */
-#define CHECK_BIT_IN_MASK_WORD(maskname, word, bit)	\
-	(((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word ))
-
-#define REQUIRED_MASK_BIT_SET(feature_bit)		\
-	 ( CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  0, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  1, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  2, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  3, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  4, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  5, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  6, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  7, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  8, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  9, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 10, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 11, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 12, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 13, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 14, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) ||	\
-	   REQUIRED_MASK_CHECK					  ||	\
-	   BUILD_BUG_ON_ZERO(NCAPINTS != 19))
-
-#define DISABLED_MASK_BIT_SET(feature_bit)				\
-	 ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  0, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  1, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  2, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  3, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  4, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  5, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  6, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  7, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  8, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  9, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 10, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 11, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 12, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 13, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 14, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) ||	\
-	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) ||	\
-	   DISABLED_MASK_CHECK					  ||	\
-	   BUILD_BUG_ON_ZERO(NCAPINTS != 19))
+	 arch_test_bit(bit, (unsigned long *)((c)->x86_capability))
 
 #define cpu_has(c, bit)							\
 	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
 	 test_cpu_cap(c, bit))
 
 #define this_cpu_has(bit)						\
-	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : 	\
-	 x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
+	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
+	 x86_this_cpu_test_bit(bit, cpu_info.x86_capability))
 
 /*
- * This macro is for detection of features which need kernel
- * infrastructure to be used.  It may *not* directly test the CPU
- * itself.  Use the cpu_has() family if you want true runtime
- * testing of CPU features, like in hypervisor code where you are
- * supporting a possible guest feature where host support for it
+ * This is the default CPU features testing macro to use in code.
+ *
+ * It is for detection of features which need kernel infrastructure to be
+ * used.  It may *not* directly test the CPU itself.  Use the cpu_has() family
+ * if you want true runtime testing of CPU features, like in hypervisor code
+ * where you are supporting a possible guest feature where host support for it
  * is not relevant.
  */
 #define cpu_feature_enabled(bit)	\
@@ -132,70 +77,38 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 
 extern void setup_clear_cpu_cap(unsigned int bit);
 extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
-
-#define setup_force_cpu_cap(bit) do { \
-	set_cpu_cap(&boot_cpu_data, bit);	\
+void check_cpufeature_deps(struct cpuinfo_x86 *c);
+
+#define setup_force_cpu_cap(bit) do {			\
+							\
+	if (!boot_cpu_has(bit))				\
+		WARN_ON(alternatives_patched);		\
+							\
+	set_cpu_cap(&boot_cpu_data, bit);		\
 	set_bit(bit, (unsigned long *)cpu_caps_set);	\
 } while (0)
 
 #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
 
-#if defined(__clang__) && !defined(CONFIG_CC_HAS_ASM_GOTO)
-
-/*
- * Workaround for the sake of BPF compilation which utilizes kernel
- * headers, but clang does not support ASM GOTO and fails the build.
- */
-#ifndef __BPF_TRACING__
-#warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments"
-#endif
-
-#define static_cpu_has(bit)            boot_cpu_has(bit)
-
-#else
-
 /*
- * Static testing of CPU features.  Used the same as boot_cpu_has().
- * These will statically patch the target code for additional
- * performance.
+ * Do not use an "m" constraint for [cap_byte] here: gcc doesn't know
+ * that this is only used on a fallback path and will sometimes cause
+ * it to manifest the address of boot_cpu_data in a register, fouling
+ * the mainline (post-initialization) code.
  */
-static __always_inline __pure bool _static_cpu_has(u16 bit)
+static __always_inline bool _static_cpu_has(u16 bit)
 {
-	asm_volatile_goto("1: jmp 6f\n"
-		 "2:\n"
-		 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
-			 "((5f-4f) - (2b-1b)),0x90\n"
-		 "3:\n"
-		 ".section .altinstructions,\"a\"\n"
-		 " .long 1b - .\n"		/* src offset */
-		 " .long 4f - .\n"		/* repl offset */
-		 " .word %P[always]\n"		/* always replace */
-		 " .byte 3b - 1b\n"		/* src len */
-		 " .byte 5f - 4f\n"		/* repl len */
-		 " .byte 3b - 2b\n"		/* pad len */
-		 ".previous\n"
-		 ".section .altinstr_replacement,\"ax\"\n"
-		 "4: jmp %l[t_no]\n"
-		 "5:\n"
-		 ".previous\n"
-		 ".section .altinstructions,\"a\"\n"
-		 " .long 1b - .\n"		/* src offset */
-		 " .long 0\n"			/* no replacement */
-		 " .word %P[feature]\n"		/* feature bit */
-		 " .byte 3b - 1b\n"		/* src len */
-		 " .byte 0\n"			/* repl len */
-		 " .byte 0\n"			/* pad len */
-		 ".previous\n"
-		 ".section .altinstr_aux,\"ax\"\n"
-		 "6:\n"
-		 " testb %[bitnum],%[cap_byte]\n"
-		 " jnz %l[t_yes]\n"
-		 " jmp %l[t_no]\n"
-		 ".previous\n"
+	asm goto(ALTERNATIVE_TERNARY("jmp 6f", %c[feature], "", "jmp %l[t_no]")
+		".pushsection .altinstr_aux,\"ax\"\n"
+		"6:\n"
+		ANNOTATE_DATA_SPECIAL "\n"
+		" testb %[bitnum], %a[cap_byte]\n"
+		" jnz %l[t_yes]\n"
+		" jmp %l[t_no]\n"
+		".popsection\n"
 		 : : [feature]  "i" (bit),
-		     [always]   "i" (X86_FEATURE_ALWAYS),
 		     [bitnum]   "i" (1 << (bit & 7)),
-		     [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+		     [cap_byte] "i" (&((const char *)boot_cpu_data.x86_capability)[bit >> 3])
 		 : : t_yes, t_no);
 t_yes:
 	return true;
@@ -209,7 +122,6 @@ t_no:
 		boot_cpu_has(bit) :				\
 		_static_cpu_has(bit)				\
 )
-#endif
 
 #define cpu_has_bug(c, bit)		cpu_has(c, (bit))
 #define set_cpu_bug(c, bit)		set_cpu_cap(c, (bit))
@@ -226,5 +138,5 @@ t_no:
 #define CPU_FEATURE_TYPEVAL		boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
 					boot_cpu_data.x86_model
 
-#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
+#endif /* defined(__KERNEL__) && !defined(__ASSEMBLER__) */
 #endif /* _ASM_X86_CPUFEATURE_H */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 6d6122524711..c3b53beb1300 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -2,187 +2,178 @@
 #ifndef _ASM_X86_CPUFEATURES_H
 #define _ASM_X86_CPUFEATURES_H
 
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#include <asm/required-features.h>
-#endif
-
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#include <asm/disabled-features.h>
-#endif
-
 /*
  * Defines x86 CPU feature bits
  */
-#define NCAPINTS			19	   /* N 32-bit words worth of info */
-#define NBUGINTS			1	   /* N 32-bit bug flags */
+#define NCAPINTS			22	   /* N 32-bit words worth of info */
+#define NBUGINTS			2	   /* N 32-bit bug flags */
 
 /*
  * Note: If the comment begins with a quoted string, that string is used
- * in /proc/cpuinfo instead of the macro name.  If the string is "",
- * this feature bit is not displayed in /proc/cpuinfo at all.
+ * in /proc/cpuinfo instead of the macro name.  Otherwise, this feature
+ * bit is not displayed in /proc/cpuinfo at all.
  *
  * When adding new features here that depend on other features,
  * please update the table in kernel/cpu/cpuid-deps.c as well.
  */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */
-#define X86_FEATURE_FPU			( 0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME			( 0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE			( 0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE			( 0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC			( 0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR			( 0*32+ 5) /* Model-Specific Registers */
-#define X86_FEATURE_PAE			( 0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE			( 0*32+ 7) /* Machine Check Exception */
-#define X86_FEATURE_CX8			( 0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC		( 0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP			( 0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR		( 0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE			( 0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA			( 0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV		( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
-#define X86_FEATURE_PAT			( 0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36		( 0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN			( 0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLUSH		( 0*32+19) /* CLFLUSH instruction */
+#define X86_FEATURE_FPU			( 0*32+ 0) /* "fpu" Onboard FPU */
+#define X86_FEATURE_VME			( 0*32+ 1) /* "vme" Virtual Mode Extensions */
+#define X86_FEATURE_DE			( 0*32+ 2) /* "de" Debugging Extensions */
+#define X86_FEATURE_PSE			( 0*32+ 3) /* "pse" Page Size Extensions */
+#define X86_FEATURE_TSC			( 0*32+ 4) /* "tsc" Time Stamp Counter */
+#define X86_FEATURE_MSR			( 0*32+ 5) /* "msr" Model-Specific Registers */
+#define X86_FEATURE_PAE			( 0*32+ 6) /* "pae" Physical Address Extensions */
+#define X86_FEATURE_MCE			( 0*32+ 7) /* "mce" Machine Check Exception */
+#define X86_FEATURE_CX8			( 0*32+ 8) /* "cx8" CMPXCHG8 instruction */
+#define X86_FEATURE_APIC		( 0*32+ 9) /* "apic" Onboard APIC */
+#define X86_FEATURE_SEP			( 0*32+11) /* "sep" SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR		( 0*32+12) /* "mtrr" Memory Type Range Registers */
+#define X86_FEATURE_PGE			( 0*32+13) /* "pge" Page Global Enable */
+#define X86_FEATURE_MCA			( 0*32+14) /* "mca" Machine Check Architecture */
+#define X86_FEATURE_CMOV		( 0*32+15) /* "cmov" CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_PAT			( 0*32+16) /* "pat" Page Attribute Table */
+#define X86_FEATURE_PSE36		( 0*32+17) /* "pse36" 36-bit PSEs */
+#define X86_FEATURE_PN			( 0*32+18) /* "pn" Processor serial number */
+#define X86_FEATURE_CLFLUSH		( 0*32+19) /* "clflush" CLFLUSH instruction */
 #define X86_FEATURE_DS			( 0*32+21) /* "dts" Debug Store */
-#define X86_FEATURE_ACPI		( 0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX			( 0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR		( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_ACPI		( 0*32+22) /* "acpi" ACPI via MSR */
+#define X86_FEATURE_MMX			( 0*32+23) /* "mmx" Multimedia Extensions */
+#define X86_FEATURE_FXSR		( 0*32+24) /* "fxsr" FXSAVE/FXRSTOR, CR4.OSFXSR */
 #define X86_FEATURE_XMM			( 0*32+25) /* "sse" */
 #define X86_FEATURE_XMM2		( 0*32+26) /* "sse2" */
 #define X86_FEATURE_SELFSNOOP		( 0*32+27) /* "ss" CPU self snoop */
-#define X86_FEATURE_HT			( 0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_HT			( 0*32+28) /* "ht" Hyper-Threading */
 #define X86_FEATURE_ACC			( 0*32+29) /* "tm" Automatic clock control */
-#define X86_FEATURE_IA64		( 0*32+30) /* IA-64 processor */
-#define X86_FEATURE_PBE			( 0*32+31) /* Pending Break Enable */
+#define X86_FEATURE_IA64		( 0*32+30) /* "ia64" IA-64 processor */
+#define X86_FEATURE_PBE			( 0*32+31) /* "pbe" Pending Break Enable */
 
 /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
 /* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL		( 1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP			( 1*32+19) /* MP Capable */
-#define X86_FEATURE_NX			( 1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT		( 1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_FXSR_OPT		( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+#define X86_FEATURE_SYSCALL		( 1*32+11) /* "syscall" SYSCALL/SYSRET */
+#define X86_FEATURE_MP			( 1*32+19) /* "mp" MP Capable */
+#define X86_FEATURE_NX			( 1*32+20) /* "nx" Execute Disable */
+#define X86_FEATURE_MMXEXT		( 1*32+22) /* "mmxext" AMD MMX extensions */
+#define X86_FEATURE_FXSR_OPT		( 1*32+25) /* "fxsr_opt" FXSAVE/FXRSTOR optimizations */
 #define X86_FEATURE_GBPAGES		( 1*32+26) /* "pdpe1gb" GB pages */
-#define X86_FEATURE_RDTSCP		( 1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM			( 1*32+29) /* Long Mode (x86-64, 64-bit support) */
-#define X86_FEATURE_3DNOWEXT		( 1*32+30) /* AMD 3DNow extensions */
-#define X86_FEATURE_3DNOW		( 1*32+31) /* 3DNow */
+#define X86_FEATURE_RDTSCP		( 1*32+27) /* "rdtscp" RDTSCP */
+#define X86_FEATURE_LM			( 1*32+29) /* "lm" Long Mode (x86-64, 64-bit support) */
+#define X86_FEATURE_3DNOWEXT		( 1*32+30) /* "3dnowext" AMD 3DNow extensions */
+#define X86_FEATURE_3DNOW		( 1*32+31) /* "3dnow" 3DNow */
 
 /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY		( 2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN		( 2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI		( 2*32+ 3) /* LongRun table interface */
+#define X86_FEATURE_RECOVERY		( 2*32+ 0) /* "recovery" CPU in recovery mode */
+#define X86_FEATURE_LONGRUN		( 2*32+ 1) /* "longrun" Longrun power control */
+#define X86_FEATURE_LRTI		( 2*32+ 3) /* "lrti" LongRun table interface */
 
 /* Other features, Linux-defined mapping, word 3 */
 /* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX		( 3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR		( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR		( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR		( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-
-/* CPU types for specific tunings: */
-#define X86_FEATURE_K8			( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_K7			( 3*32+ 5) /* "" Athlon */
-#define X86_FEATURE_P3			( 3*32+ 6) /* "" P3 */
-#define X86_FEATURE_P4			( 3*32+ 7) /* "" P4 */
-#define X86_FEATURE_CONSTANT_TSC	( 3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP			( 3*32+ 9) /* SMP kernel running on UP */
-#define X86_FEATURE_ART			( 3*32+10) /* Always running timer (ART) */
-#define X86_FEATURE_ARCH_PERFMON	( 3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS		( 3*32+12) /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS			( 3*32+13) /* Branch Trace Store */
-#define X86_FEATURE_SYSCALL32		( 3*32+14) /* "" syscall in IA32 userspace */
-#define X86_FEATURE_SYSENTER32		( 3*32+15) /* "" sysenter in IA32 userspace */
-#define X86_FEATURE_REP_GOOD		( 3*32+16) /* REP microcode works well */
-#define X86_FEATURE_MFENCE_RDTSC	( 3*32+17) /* "" MFENCE synchronizes RDTSC */
-#define X86_FEATURE_LFENCE_RDTSC	( 3*32+18) /* "" LFENCE synchronizes RDTSC */
-#define X86_FEATURE_ACC_POWER		( 3*32+19) /* AMD Accumulated Power Mechanism */
-#define X86_FEATURE_NOPL		( 3*32+20) /* The NOPL (0F 1F) instructions */
-#define X86_FEATURE_ALWAYS		( 3*32+21) /* "" Always-present feature */
-#define X86_FEATURE_XTOPOLOGY		( 3*32+22) /* CPU topology enum extensions */
-#define X86_FEATURE_TSC_RELIABLE	( 3*32+23) /* TSC is known to be reliable */
-#define X86_FEATURE_NONSTOP_TSC		( 3*32+24) /* TSC does not stop in C states */
-#define X86_FEATURE_CPUID		( 3*32+25) /* CPU has CPUID instruction itself */
-#define X86_FEATURE_EXTD_APICID		( 3*32+26) /* Extended APICID (8 bits) */
-#define X86_FEATURE_AMD_DCM		( 3*32+27) /* AMD multi-node processor */
-#define X86_FEATURE_APERFMPERF		( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
-#define X86_FEATURE_NONSTOP_TSC_S3	( 3*32+30) /* TSC doesn't stop in S3 state */
-#define X86_FEATURE_TSC_KNOWN_FREQ	( 3*32+31) /* TSC has known frequency */
+#define X86_FEATURE_CXMMX		( 3*32+ 0) /* "cxmmx" Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR		( 3*32+ 1) /* "k6_mtrr" AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR		( 3*32+ 2) /* "cyrix_arr" Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR		( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */
+#define X86_FEATURE_K8			( 3*32+ 4) /* Opteron, Athlon64 */
+#define X86_FEATURE_ZEN5		( 3*32+ 5) /* CPU based on Zen5 microarchitecture */
+#define X86_FEATURE_ZEN6		( 3*32+ 6) /* CPU based on Zen6 microarchitecture */
+/* Free                                 ( 3*32+ 7) */
+#define X86_FEATURE_CONSTANT_TSC	( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */
+#define X86_FEATURE_UP			( 3*32+ 9) /* "up" SMP kernel running on UP */
+#define X86_FEATURE_ART			( 3*32+10) /* "art" Always running timer (ART) */
+#define X86_FEATURE_ARCH_PERFMON	( 3*32+11) /* "arch_perfmon" Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS		( 3*32+12) /* "pebs" Precise-Event Based Sampling */
+#define X86_FEATURE_BTS			( 3*32+13) /* "bts" Branch Trace Store */
+#define X86_FEATURE_SYSCALL32		( 3*32+14) /* syscall in IA32 userspace */
+#define X86_FEATURE_SYSENTER32		( 3*32+15) /* sysenter in IA32 userspace */
+#define X86_FEATURE_REP_GOOD		( 3*32+16) /* "rep_good" REP microcode works well */
+#define X86_FEATURE_AMD_LBR_V2		( 3*32+17) /* "amd_lbr_v2" AMD Last Branch Record Extension Version 2 */
+#define X86_FEATURE_CLEAR_CPU_BUF	( 3*32+18) /* Clear CPU buffers using VERW */
+#define X86_FEATURE_ACC_POWER		( 3*32+19) /* "acc_power" AMD Accumulated Power Mechanism */
+#define X86_FEATURE_NOPL		( 3*32+20) /* "nopl" The NOPL (0F 1F) instructions */
+#define X86_FEATURE_ALWAYS		( 3*32+21) /* Always-present feature */
+#define X86_FEATURE_XTOPOLOGY		( 3*32+22) /* "xtopology" CPU topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE	( 3*32+23) /* "tsc_reliable" TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC		( 3*32+24) /* "nonstop_tsc" TSC does not stop in C states */
+#define X86_FEATURE_CPUID		( 3*32+25) /* "cpuid" CPU has CPUID instruction itself */
+#define X86_FEATURE_EXTD_APICID		( 3*32+26) /* "extd_apicid" Extended APICID (8 bits) */
+#define X86_FEATURE_AMD_DCM		( 3*32+27) /* "amd_dcm" AMD multi-node processor */
+#define X86_FEATURE_APERFMPERF		( 3*32+28) /* "aperfmperf" P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
+#define X86_FEATURE_RAPL		( 3*32+29) /* "rapl" AMD/Hygon RAPL interface */
+#define X86_FEATURE_NONSTOP_TSC_S3	( 3*32+30) /* "nonstop_tsc_s3" TSC doesn't stop in S3 state */
+#define X86_FEATURE_TSC_KNOWN_FREQ	( 3*32+31) /* "tsc_known_freq" TSC has known frequency */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */
 #define X86_FEATURE_XMM3		( 4*32+ 0) /* "pni" SSE-3 */
-#define X86_FEATURE_PCLMULQDQ		( 4*32+ 1) /* PCLMULQDQ instruction */
-#define X86_FEATURE_DTES64		( 4*32+ 2) /* 64-bit Debug Store */
+#define X86_FEATURE_PCLMULQDQ		( 4*32+ 1) /* "pclmulqdq" PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64		( 4*32+ 2) /* "dtes64" 64-bit Debug Store */
 #define X86_FEATURE_MWAIT		( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */
 #define X86_FEATURE_DSCPL		( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
-#define X86_FEATURE_VMX			( 4*32+ 5) /* Hardware virtualization */
-#define X86_FEATURE_SMX			( 4*32+ 6) /* Safer Mode eXtensions */
-#define X86_FEATURE_EST			( 4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2			( 4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_SSSE3		( 4*32+ 9) /* Supplemental SSE-3 */
-#define X86_FEATURE_CID			( 4*32+10) /* Context ID */
-#define X86_FEATURE_SDBG		( 4*32+11) /* Silicon Debug */
-#define X86_FEATURE_FMA			( 4*32+12) /* Fused multiply-add */
-#define X86_FEATURE_CX16		( 4*32+13) /* CMPXCHG16B instruction */
-#define X86_FEATURE_XTPR		( 4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_PDCM		( 4*32+15) /* Perf/Debug Capabilities MSR */
-#define X86_FEATURE_PCID		( 4*32+17) /* Process Context Identifiers */
-#define X86_FEATURE_DCA			( 4*32+18) /* Direct Cache Access */
+#define X86_FEATURE_VMX			( 4*32+ 5) /* "vmx" Hardware virtualization */
+#define X86_FEATURE_SMX			( 4*32+ 6) /* "smx" Safer Mode eXtensions */
+#define X86_FEATURE_EST			( 4*32+ 7) /* "est" Enhanced SpeedStep */
+#define X86_FEATURE_TM2			( 4*32+ 8) /* "tm2" Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3		( 4*32+ 9) /* "ssse3" Supplemental SSE-3 */
+#define X86_FEATURE_CID			( 4*32+10) /* "cid" Context ID */
+#define X86_FEATURE_SDBG		( 4*32+11) /* "sdbg" Silicon Debug */
+#define X86_FEATURE_FMA			( 4*32+12) /* "fma" Fused multiply-add */
+#define X86_FEATURE_CX16		( 4*32+13) /* "cx16" CMPXCHG16B instruction */
+#define X86_FEATURE_XTPR		( 4*32+14) /* "xtpr" Send Task Priority Messages */
+#define X86_FEATURE_PDCM		( 4*32+15) /* "pdcm" Perf/Debug Capabilities MSR */
+#define X86_FEATURE_PCID		( 4*32+17) /* "pcid" Process Context Identifiers */
+#define X86_FEATURE_DCA			( 4*32+18) /* "dca" Direct Cache Access */
 #define X86_FEATURE_XMM4_1		( 4*32+19) /* "sse4_1" SSE-4.1 */
 #define X86_FEATURE_XMM4_2		( 4*32+20) /* "sse4_2" SSE-4.2 */
-#define X86_FEATURE_X2APIC		( 4*32+21) /* X2APIC */
-#define X86_FEATURE_MOVBE		( 4*32+22) /* MOVBE instruction */
-#define X86_FEATURE_POPCNT		( 4*32+23) /* POPCNT instruction */
-#define X86_FEATURE_TSC_DEADLINE_TIMER	( 4*32+24) /* TSC deadline timer */
-#define X86_FEATURE_AES			( 4*32+25) /* AES instructions */
-#define X86_FEATURE_XSAVE		( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
-#define X86_FEATURE_OSXSAVE		( 4*32+27) /* "" XSAVE instruction enabled in the OS */
-#define X86_FEATURE_AVX			( 4*32+28) /* Advanced Vector Extensions */
-#define X86_FEATURE_F16C		( 4*32+29) /* 16-bit FP conversions */
-#define X86_FEATURE_RDRAND		( 4*32+30) /* RDRAND instruction */
-#define X86_FEATURE_HYPERVISOR		( 4*32+31) /* Running on a hypervisor */
+#define X86_FEATURE_X2APIC		( 4*32+21) /* "x2apic" X2APIC */
+#define X86_FEATURE_MOVBE		( 4*32+22) /* "movbe" MOVBE instruction */
+#define X86_FEATURE_POPCNT		( 4*32+23) /* "popcnt" POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE_TIMER	( 4*32+24) /* "tsc_deadline_timer" TSC deadline timer */
+#define X86_FEATURE_AES			( 4*32+25) /* "aes" AES instructions */
+#define X86_FEATURE_XSAVE		( 4*32+26) /* "xsave" XSAVE/XRSTOR/XSETBV/XGETBV instructions */
+#define X86_FEATURE_OSXSAVE		( 4*32+27) /* XSAVE instruction enabled in the OS */
+#define X86_FEATURE_AVX			( 4*32+28) /* "avx" Advanced Vector Extensions */
+#define X86_FEATURE_F16C		( 4*32+29) /* "f16c" 16-bit FP conversions */
+#define X86_FEATURE_RDRAND		( 4*32+30) /* "rdrand" RDRAND instruction */
+#define X86_FEATURE_HYPERVISOR		( 4*32+31) /* "hypervisor" Running on a hypervisor */
 
 /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
 #define X86_FEATURE_XSTORE		( 5*32+ 2) /* "rng" RNG present (xstore) */
 #define X86_FEATURE_XSTORE_EN		( 5*32+ 3) /* "rng_en" RNG enabled */
 #define X86_FEATURE_XCRYPT		( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
 #define X86_FEATURE_XCRYPT_EN		( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
-#define X86_FEATURE_ACE2		( 5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN		( 5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE			( 5*32+10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN		( 5*32+11) /* PHE enabled */
-#define X86_FEATURE_PMM			( 5*32+12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN		( 5*32+13) /* PMM enabled */
+#define X86_FEATURE_ACE2		( 5*32+ 8) /* "ace2" Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN		( 5*32+ 9) /* "ace2_en" ACE v2 enabled */
+#define X86_FEATURE_PHE			( 5*32+10) /* "phe" PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN		( 5*32+11) /* "phe_en" PHE enabled */
+#define X86_FEATURE_PMM			( 5*32+12) /* "pmm" PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN		( 5*32+13) /* "pmm_en" PMM enabled */
 
 /* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */
-#define X86_FEATURE_LAHF_LM		( 6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY		( 6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVM			( 6*32+ 2) /* Secure Virtual Machine */
-#define X86_FEATURE_EXTAPIC		( 6*32+ 3) /* Extended APIC space */
-#define X86_FEATURE_CR8_LEGACY		( 6*32+ 4) /* CR8 in 32-bit mode */
-#define X86_FEATURE_ABM			( 6*32+ 5) /* Advanced bit manipulation */
-#define X86_FEATURE_SSE4A		( 6*32+ 6) /* SSE-4A */
-#define X86_FEATURE_MISALIGNSSE		( 6*32+ 7) /* Misaligned SSE mode */
-#define X86_FEATURE_3DNOWPREFETCH	( 6*32+ 8) /* 3DNow prefetch instructions */
-#define X86_FEATURE_OSVW		( 6*32+ 9) /* OS Visible Workaround */
-#define X86_FEATURE_IBS			( 6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_XOP			( 6*32+11) /* extended AVX instructions */
-#define X86_FEATURE_SKINIT		( 6*32+12) /* SKINIT/STGI instructions */
-#define X86_FEATURE_WDT			( 6*32+13) /* Watchdog timer */
-#define X86_FEATURE_LWP			( 6*32+15) /* Light Weight Profiling */
-#define X86_FEATURE_FMA4		( 6*32+16) /* 4 operands MAC instructions */
-#define X86_FEATURE_TCE			( 6*32+17) /* Translation Cache Extension */
-#define X86_FEATURE_NODEID_MSR		( 6*32+19) /* NodeId MSR */
-#define X86_FEATURE_TBM			( 6*32+21) /* Trailing Bit Manipulations */
-#define X86_FEATURE_TOPOEXT		( 6*32+22) /* Topology extensions CPUID leafs */
-#define X86_FEATURE_PERFCTR_CORE	( 6*32+23) /* Core performance counter extensions */
-#define X86_FEATURE_PERFCTR_NB		( 6*32+24) /* NB performance counter extensions */
-#define X86_FEATURE_BPEXT		( 6*32+26) /* Data breakpoint extension */
-#define X86_FEATURE_PTSC		( 6*32+27) /* Performance time-stamp counter */
-#define X86_FEATURE_PERFCTR_LLC		( 6*32+28) /* Last Level Cache performance counter extensions */
-#define X86_FEATURE_MWAITX		( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */
+#define X86_FEATURE_LAHF_LM		( 6*32+ 0) /* "lahf_lm" LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY		( 6*32+ 1) /* "cmp_legacy" If yes HyperThreading not valid */
+#define X86_FEATURE_SVM			( 6*32+ 2) /* "svm" Secure Virtual Machine */
+#define X86_FEATURE_EXTAPIC		( 6*32+ 3) /* "extapic" Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY		( 6*32+ 4) /* "cr8_legacy" CR8 in 32-bit mode */
+#define X86_FEATURE_ABM			( 6*32+ 5) /* "abm" Advanced bit manipulation */
+#define X86_FEATURE_SSE4A		( 6*32+ 6) /* "sse4a" SSE-4A */
+#define X86_FEATURE_MISALIGNSSE		( 6*32+ 7) /* "misalignsse" Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH	( 6*32+ 8) /* "3dnowprefetch" 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW		( 6*32+ 9) /* "osvw" OS Visible Workaround */
+#define X86_FEATURE_IBS			( 6*32+10) /* "ibs" Instruction Based Sampling */
+#define X86_FEATURE_XOP			( 6*32+11) /* "xop" Extended AVX instructions */
+#define X86_FEATURE_SKINIT		( 6*32+12) /* "skinit" SKINIT/STGI instructions */
+#define X86_FEATURE_WDT			( 6*32+13) /* "wdt" Watchdog timer */
+#define X86_FEATURE_LWP			( 6*32+15) /* "lwp" Light Weight Profiling */
+#define X86_FEATURE_FMA4		( 6*32+16) /* "fma4" 4 operands MAC instructions */
+#define X86_FEATURE_TCE			( 6*32+17) /* "tce" Translation Cache Extension */
+#define X86_FEATURE_NODEID_MSR		( 6*32+19) /* "nodeid_msr" NodeId MSR */
+#define X86_FEATURE_TBM			( 6*32+21) /* "tbm" Trailing Bit Manipulations */
+#define X86_FEATURE_TOPOEXT		( 6*32+22) /* "topoext" Topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE	( 6*32+23) /* "perfctr_core" Core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB		( 6*32+24) /* "perfctr_nb" NB performance counter extensions */
+#define X86_FEATURE_BPEXT		( 6*32+26) /* "bpext" Data breakpoint extension */
+#define X86_FEATURE_PTSC		( 6*32+27) /* "ptsc" Performance time-stamp counter */
+#define X86_FEATURE_PERFCTR_LLC		( 6*32+28) /* "perfctr_llc" Last Level Cache performance counter extensions */
+#define X86_FEATURE_MWAITX		( 6*32+29) /* "mwaitx" MWAIT extension (MONITORX/MWAITX instructions) */
 
 /*
  * Auxiliary flags: Linux defined - For features scattered in various
@@ -190,196 +181,393 @@
  *
  * Reuse free bits when adding new feature flags!
  */
-#define X86_FEATURE_RING3MWAIT		( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */
-#define X86_FEATURE_CPUID_FAULT		( 7*32+ 1) /* Intel CPUID faulting */
-#define X86_FEATURE_CPB			( 7*32+ 2) /* AMD Core Performance Boost */
-#define X86_FEATURE_EPB			( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-#define X86_FEATURE_CAT_L3		( 7*32+ 4) /* Cache Allocation Technology L3 */
-#define X86_FEATURE_CAT_L2		( 7*32+ 5) /* Cache Allocation Technology L2 */
-#define X86_FEATURE_CDP_L3		( 7*32+ 6) /* Code and Data Prioritization L3 */
-#define X86_FEATURE_INVPCID_SINGLE	( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
-#define X86_FEATURE_HW_PSTATE		( 7*32+ 8) /* AMD HW-PState */
-#define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
-#define X86_FEATURE_SME			( 7*32+10) /* AMD Secure Memory Encryption */
-#define X86_FEATURE_PTI			( 7*32+11) /* Kernel Page Table Isolation enabled */
-#define X86_FEATURE_RETPOLINE		( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_AMD	( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
-#define X86_FEATURE_CDP_L2		( 7*32+15) /* Code and Data Prioritization L2 */
-#define X86_FEATURE_MSR_SPEC_CTRL	( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
-#define X86_FEATURE_SSBD		( 7*32+17) /* Speculative Store Bypass Disable */
-#define X86_FEATURE_MBA			( 7*32+18) /* Memory Bandwidth Allocation */
-#define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* "" Fill RSB on context switches */
-#define X86_FEATURE_SEV			( 7*32+20) /* AMD Secure Encrypted Virtualization */
-#define X86_FEATURE_USE_IBPB		( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
-#define X86_FEATURE_USE_IBRS_FW		( 7*32+22) /* "" Use IBRS during runtime firmware calls */
-#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE	( 7*32+23) /* "" Disable Speculative Store Bypass. */
-#define X86_FEATURE_LS_CFG_SSBD		( 7*32+24)  /* "" AMD SSBD implementation via LS_CFG MSR */
-#define X86_FEATURE_IBRS		( 7*32+25) /* Indirect Branch Restricted Speculation */
-#define X86_FEATURE_IBPB		( 7*32+26) /* Indirect Branch Prediction Barrier */
-#define X86_FEATURE_STIBP		( 7*32+27) /* Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_ZEN			( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
-#define X86_FEATURE_L1TF_PTEINV		( 7*32+29) /* "" L1TF workaround PTE inversion */
-#define X86_FEATURE_IBRS_ENHANCED	( 7*32+30) /* Enhanced IBRS */
+#define X86_FEATURE_RING3MWAIT		( 7*32+ 0) /* "ring3mwait" Ring 3 MONITOR/MWAIT instructions */
+#define X86_FEATURE_CPUID_FAULT		( 7*32+ 1) /* "cpuid_fault" Intel CPUID faulting */
+#define X86_FEATURE_CPB			( 7*32+ 2) /* "cpb" AMD Core Performance Boost */
+#define X86_FEATURE_EPB			( 7*32+ 3) /* "epb" IA32_ENERGY_PERF_BIAS support */
+#define X86_FEATURE_CAT_L3		( 7*32+ 4) /* "cat_l3" Cache Allocation Technology L3 */
+#define X86_FEATURE_CAT_L2		( 7*32+ 5) /* "cat_l2" Cache Allocation Technology L2 */
+#define X86_FEATURE_CDP_L3		( 7*32+ 6) /* "cdp_l3" Code and Data Prioritization L3 */
+#define X86_FEATURE_TDX_HOST_PLATFORM	( 7*32+ 7) /* "tdx_host_platform" Platform supports being a TDX host */
+#define X86_FEATURE_HW_PSTATE		( 7*32+ 8) /* "hw_pstate" AMD HW-PState */
+#define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* "proc_feedback" AMD ProcFeedbackInterface */
+#define X86_FEATURE_XCOMPACTED		( 7*32+10) /* Use compacted XSTATE (XSAVES or XSAVEC) */
+#define X86_FEATURE_PTI			( 7*32+11) /* "pti" Kernel Page Table Isolation enabled */
+#define X86_FEATURE_KERNEL_IBRS		( 7*32+12) /* Set/clear IBRS on kernel entry/exit */
+#define X86_FEATURE_RSB_VMEXIT		( 7*32+13) /* Fill RSB on VM-Exit */
+#define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* "intel_ppin" Intel Processor Inventory Number */
+#define X86_FEATURE_CDP_L2		( 7*32+15) /* "cdp_l2" Code and Data Prioritization L2 */
+#define X86_FEATURE_MSR_SPEC_CTRL	( 7*32+16) /* MSR SPEC_CTRL is implemented */
+#define X86_FEATURE_SSBD		( 7*32+17) /* "ssbd" Speculative Store Bypass Disable */
+#define X86_FEATURE_MBA			( 7*32+18) /* "mba" Memory Bandwidth Allocation */
+#define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* Fill RSB on context switches */
+#define X86_FEATURE_PERFMON_V2		( 7*32+20) /* "perfmon_v2" AMD Performance Monitoring Version 2 */
+#define X86_FEATURE_USE_IBRS_FW		( 7*32+22) /* Use IBRS during runtime firmware calls */
+#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE	( 7*32+23) /* Disable Speculative Store Bypass. */
+#define X86_FEATURE_LS_CFG_SSBD		( 7*32+24)  /* AMD SSBD implementation via LS_CFG MSR */
+#define X86_FEATURE_IBRS		( 7*32+25) /* "ibrs" Indirect Branch Restricted Speculation */
+#define X86_FEATURE_IBPB		( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier without a guaranteed RSB flush */
+#define X86_FEATURE_STIBP		( 7*32+27) /* "stibp" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ZEN			( 7*32+28) /* Generic flag for all Zen and newer */
+#define X86_FEATURE_L1TF_PTEINV		( 7*32+29) /* L1TF workaround PTE inversion */
+#define X86_FEATURE_IBRS_ENHANCED	( 7*32+30) /* "ibrs_enhanced" Enhanced IBRS */
+#define X86_FEATURE_MSR_IA32_FEAT_CTL	( 7*32+31) /* MSR IA32_FEAT_CTL configured */
 
 /* Virtualization flags: Linux defined, word 8 */
-#define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
-#define X86_FEATURE_VNMI		( 8*32+ 1) /* Intel Virtual NMI */
-#define X86_FEATURE_FLEXPRIORITY	( 8*32+ 2) /* Intel FlexPriority */
-#define X86_FEATURE_EPT			( 8*32+ 3) /* Intel Extended Page Table */
-#define X86_FEATURE_VPID		( 8*32+ 4) /* Intel Virtual Processor ID */
+#define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* "tpr_shadow" Intel TPR Shadow */
+#define X86_FEATURE_FLEXPRIORITY	( 8*32+ 1) /* "flexpriority" Intel FlexPriority */
+#define X86_FEATURE_EPT			( 8*32+ 2) /* "ept" Intel Extended Page Table */
+#define X86_FEATURE_VPID		( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */
+#define X86_FEATURE_COHERENCY_SFW_NO	( 8*32+ 4) /* SNP cache coherency software work around not needed */
 
-#define X86_FEATURE_VMMCALL		( 8*32+15) /* Prefer VMMCALL to VMCALL */
-#define X86_FEATURE_XENPV		( 8*32+16) /* "" Xen paravirtual guest */
-#define X86_FEATURE_EPT_AD		( 8*32+17) /* Intel Extended Page Table access-dirty bit */
+#define X86_FEATURE_VMMCALL		( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */
+#define X86_FEATURE_XENPV		( 8*32+16) /* Xen paravirtual guest */
+#define X86_FEATURE_EPT_AD		( 8*32+17) /* "ept_ad" Intel Extended Page Table access-dirty bit */
+#define X86_FEATURE_VMCALL		( 8*32+18) /* Hypervisor supports the VMCALL instruction */
+#define X86_FEATURE_VMW_VMMCALL		( 8*32+19) /* VMware prefers VMMCALL hypercall instruction */
+#define X86_FEATURE_PVUNLOCK		( 8*32+20) /* PV unlock function */
+#define X86_FEATURE_VCPUPREEMPT		( 8*32+21) /* PV vcpu_is_preempted function */
+#define X86_FEATURE_TDX_GUEST		( 8*32+22) /* "tdx_guest" Intel Trust Domain Extensions Guest */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
-#define X86_FEATURE_FSGSBASE		( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
-#define X86_FEATURE_TSC_ADJUST		( 9*32+ 1) /* TSC adjustment MSR 0x3B */
-#define X86_FEATURE_BMI1		( 9*32+ 3) /* 1st group bit manipulation extensions */
-#define X86_FEATURE_HLE			( 9*32+ 4) /* Hardware Lock Elision */
-#define X86_FEATURE_AVX2		( 9*32+ 5) /* AVX2 instructions */
-#define X86_FEATURE_SMEP		( 9*32+ 7) /* Supervisor Mode Execution Protection */
-#define X86_FEATURE_BMI2		( 9*32+ 8) /* 2nd group bit manipulation extensions */
-#define X86_FEATURE_ERMS		( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
-#define X86_FEATURE_INVPCID		( 9*32+10) /* Invalidate Processor Context ID */
-#define X86_FEATURE_RTM			( 9*32+11) /* Restricted Transactional Memory */
-#define X86_FEATURE_CQM			( 9*32+12) /* Cache QoS Monitoring */
-#define X86_FEATURE_MPX			( 9*32+14) /* Memory Protection Extension */
-#define X86_FEATURE_RDT_A		( 9*32+15) /* Resource Director Technology Allocation */
-#define X86_FEATURE_AVX512F		( 9*32+16) /* AVX-512 Foundation */
-#define X86_FEATURE_AVX512DQ		( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
-#define X86_FEATURE_RDSEED		( 9*32+18) /* RDSEED instruction */
-#define X86_FEATURE_ADX			( 9*32+19) /* ADCX and ADOX instructions */
-#define X86_FEATURE_SMAP		( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_AVX512IFMA		( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
-#define X86_FEATURE_CLFLUSHOPT		( 9*32+23) /* CLFLUSHOPT instruction */
-#define X86_FEATURE_CLWB		( 9*32+24) /* CLWB instruction */
-#define X86_FEATURE_INTEL_PT		( 9*32+25) /* Intel Processor Trace */
-#define X86_FEATURE_AVX512PF		( 9*32+26) /* AVX-512 Prefetch */
-#define X86_FEATURE_AVX512ER		( 9*32+27) /* AVX-512 Exponential and Reciprocal */
-#define X86_FEATURE_AVX512CD		( 9*32+28) /* AVX-512 Conflict Detection */
-#define X86_FEATURE_SHA_NI		( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
-#define X86_FEATURE_AVX512BW		( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
-#define X86_FEATURE_AVX512VL		( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+#define X86_FEATURE_FSGSBASE		( 9*32+ 0) /* "fsgsbase" RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
+#define X86_FEATURE_TSC_ADJUST		( 9*32+ 1) /* "tsc_adjust" TSC adjustment MSR 0x3B */
+#define X86_FEATURE_SGX			( 9*32+ 2) /* "sgx" Software Guard Extensions */
+#define X86_FEATURE_BMI1		( 9*32+ 3) /* "bmi1" 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE			( 9*32+ 4) /* "hle" Hardware Lock Elision */
+#define X86_FEATURE_AVX2		( 9*32+ 5) /* "avx2" AVX2 instructions */
+#define X86_FEATURE_FDP_EXCPTN_ONLY	( 9*32+ 6) /* FPU data pointer updated only on x87 exceptions */
+#define X86_FEATURE_SMEP		( 9*32+ 7) /* "smep" Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2		( 9*32+ 8) /* "bmi2" 2nd group bit manipulation extensions */
+#define X86_FEATURE_ERMS		( 9*32+ 9) /* "erms" Enhanced REP MOVSB/STOSB instructions */
+#define X86_FEATURE_INVPCID		( 9*32+10) /* "invpcid" Invalidate Processor Context ID */
+#define X86_FEATURE_RTM			( 9*32+11) /* "rtm" Restricted Transactional Memory */
+#define X86_FEATURE_CQM			( 9*32+12) /* "cqm" Cache QoS Monitoring */
+#define X86_FEATURE_ZERO_FCS_FDS	( 9*32+13) /* Zero out FPU CS and FPU DS */
+#define X86_FEATURE_MPX			( 9*32+14) /* "mpx" Memory Protection Extension */
+#define X86_FEATURE_RDT_A		( 9*32+15) /* "rdt_a" Resource Director Technology Allocation */
+#define X86_FEATURE_AVX512F		( 9*32+16) /* "avx512f" AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ		( 9*32+17) /* "avx512dq" AVX-512 DQ (Double/Quad granular) Instructions */
+#define X86_FEATURE_RDSEED		( 9*32+18) /* "rdseed" RDSEED instruction */
+#define X86_FEATURE_ADX			( 9*32+19) /* "adx" ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP		( 9*32+20) /* "smap" Supervisor Mode Access Prevention */
+#define X86_FEATURE_AVX512IFMA		( 9*32+21) /* "avx512ifma" AVX-512 Integer Fused Multiply-Add instructions */
+#define X86_FEATURE_CLFLUSHOPT		( 9*32+23) /* "clflushopt" CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB		( 9*32+24) /* "clwb" CLWB instruction */
+#define X86_FEATURE_INTEL_PT		( 9*32+25) /* "intel_pt" Intel Processor Trace */
+#define X86_FEATURE_AVX512PF		( 9*32+26) /* "avx512pf" AVX-512 Prefetch */
+#define X86_FEATURE_AVX512ER		( 9*32+27) /* "avx512er" AVX-512 Exponential and Reciprocal */
+#define X86_FEATURE_AVX512CD		( 9*32+28) /* "avx512cd" AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI		( 9*32+29) /* "sha_ni" SHA1/SHA256 Instruction Extensions */
+#define X86_FEATURE_AVX512BW		( 9*32+30) /* "avx512bw" AVX-512 BW (Byte/Word granular) Instructions */
+#define X86_FEATURE_AVX512VL		( 9*32+31) /* "avx512vl" AVX-512 VL (128/256 Vector Length) Extensions */
 
 /* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */
-#define X86_FEATURE_XSAVEOPT		(10*32+ 0) /* XSAVEOPT instruction */
-#define X86_FEATURE_XSAVEC		(10*32+ 1) /* XSAVEC instruction */
-#define X86_FEATURE_XGETBV1		(10*32+ 2) /* XGETBV with ECX = 1 instruction */
-#define X86_FEATURE_XSAVES		(10*32+ 3) /* XSAVES/XRSTORS instructions */
+#define X86_FEATURE_XSAVEOPT		(10*32+ 0) /* "xsaveopt" XSAVEOPT instruction */
+#define X86_FEATURE_XSAVEC		(10*32+ 1) /* "xsavec" XSAVEC instruction */
+#define X86_FEATURE_XGETBV1		(10*32+ 2) /* "xgetbv1" XGETBV with ECX = 1 instruction */
+#define X86_FEATURE_XSAVES		(10*32+ 3) /* "xsaves" XSAVES/XRSTORS instructions */
+#define X86_FEATURE_XFD			(10*32+ 4) /* eXtended Feature Disabling */
 
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
-#define X86_FEATURE_CQM_LLC		(11*32+ 1) /* LLC QoS if 1 */
+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0xf, etc.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_CQM_LLC		(11*32+ 0) /* "cqm_llc" LLC QoS if 1 */
+#define X86_FEATURE_CQM_OCCUP_LLC	(11*32+ 1) /* "cqm_occup_llc" LLC occupancy monitoring */
+#define X86_FEATURE_CQM_MBM_TOTAL	(11*32+ 2) /* "cqm_mbm_total" LLC Total MBM monitoring */
+#define X86_FEATURE_CQM_MBM_LOCAL	(11*32+ 3) /* "cqm_mbm_local" LLC Local MBM monitoring */
+#define X86_FEATURE_FENCE_SWAPGS_USER	(11*32+ 4) /* LFENCE in user entry SWAPGS path */
+#define X86_FEATURE_FENCE_SWAPGS_KERNEL	(11*32+ 5) /* LFENCE in kernel entry SWAPGS path */
+#define X86_FEATURE_SPLIT_LOCK_DETECT	(11*32+ 6) /* "split_lock_detect" #AC for split lock */
+#define X86_FEATURE_PER_THREAD_MBA	(11*32+ 7) /* Per-thread Memory Bandwidth Allocation */
+#define X86_FEATURE_SGX1		(11*32+ 8) /* Basic SGX */
+#define X86_FEATURE_SGX2		(11*32+ 9) /* SGX Enclave Dynamic Memory Management (EDMM) */
+#define X86_FEATURE_ENTRY_IBPB		(11*32+10) /* Issue an IBPB on kernel entry */
+#define X86_FEATURE_RRSBA_CTRL		(11*32+11) /* RET prediction control */
+#define X86_FEATURE_RETPOLINE		(11*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE	(11*32+13) /* Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_RETHUNK		(11*32+14) /* Use REturn THUNK */
+#define X86_FEATURE_UNRET		(11*32+15) /* AMD BTB untrain return */
+#define X86_FEATURE_USE_IBPB_FW		(11*32+16) /* Use IBPB during runtime firmware calls */
+#define X86_FEATURE_RSB_VMEXIT_LITE	(11*32+17) /* Fill RSB on VM exit when EIBRS is enabled */
+#define X86_FEATURE_SGX_EDECCSSA	(11*32+18) /* SGX EDECCSSA user leaf function */
+#define X86_FEATURE_CALL_DEPTH		(11*32+19) /* Call depth tracking for RSB stuffing */
+#define X86_FEATURE_MSR_TSX_CTRL	(11*32+20) /* MSR IA32_TSX_CTRL (Intel) implemented */
+#define X86_FEATURE_SMBA		(11*32+21) /* Slow Memory Bandwidth Allocation */
+#define X86_FEATURE_BMEC		(11*32+22) /* Bandwidth Monitoring Event Configuration */
+#define X86_FEATURE_USER_SHSTK		(11*32+23) /* "user_shstk" Shadow stack support for user mode applications */
+#define X86_FEATURE_SRSO		(11*32+24) /* AMD BTB untrain RETs */
+#define X86_FEATURE_SRSO_ALIAS		(11*32+25) /* AMD BTB untrain RETs through aliasing */
+#define X86_FEATURE_IBPB_ON_VMEXIT	(11*32+26) /* Issue an IBPB only on VMEXIT */
+#define X86_FEATURE_APIC_MSRS_FENCE	(11*32+27) /* IA32_TSC_DEADLINE and X2APIC MSRs need fencing */
+#define X86_FEATURE_ZEN2		(11*32+28) /* CPU based on Zen2 microarchitecture */
+#define X86_FEATURE_ZEN3		(11*32+29) /* CPU based on Zen3 microarchitecture */
+#define X86_FEATURE_ZEN4		(11*32+30) /* CPU based on Zen4 microarchitecture */
+#define X86_FEATURE_ZEN1		(11*32+31) /* CPU based on Zen1 microarchitecture */
 
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */
-#define X86_FEATURE_CQM_OCCUP_LLC	(12*32+ 0) /* LLC occupancy monitoring */
-#define X86_FEATURE_CQM_MBM_TOTAL	(12*32+ 1) /* LLC Total MBM monitoring */
-#define X86_FEATURE_CQM_MBM_LOCAL	(12*32+ 2) /* LLC Local MBM monitoring */
+/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+#define X86_FEATURE_SHA512		(12*32+ 0) /* SHA512 instructions */
+#define X86_FEATURE_SM3			(12*32+ 1) /* SM3 instructions */
+#define X86_FEATURE_SM4			(12*32+ 2) /* SM4 instructions */
+#define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* "avx_vnni" AVX VNNI instructions */
+#define X86_FEATURE_AVX512_BF16		(12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */
+#define X86_FEATURE_LASS		(12*32+ 6) /* "lass" Linear Address Space Separation */
+#define X86_FEATURE_CMPCCXADD           (12*32+ 7) /* CMPccXADD instructions */
+#define X86_FEATURE_ARCH_PERFMON_EXT	(12*32+ 8) /* Intel Architectural PerfMon Extension */
+#define X86_FEATURE_FZRM		(12*32+10) /* Fast zero-length REP MOVSB */
+#define X86_FEATURE_FSRS		(12*32+11) /* Fast short REP STOSB */
+#define X86_FEATURE_FSRC		(12*32+12) /* Fast short REP {CMPSB,SCASB} */
+#define X86_FEATURE_FRED		(12*32+17) /* "fred" Flexible Return and Event Delivery */
+#define X86_FEATURE_LKGS		(12*32+18) /* Like MOV_GS except MSR_KERNEL_GS_BASE = GS.base */
+#define X86_FEATURE_WRMSRNS		(12*32+19) /* Non-serializing WRMSR */
+#define X86_FEATURE_AMX_FP16		(12*32+21) /* AMX fp16 Support */
+#define X86_FEATURE_AVX_IFMA            (12*32+23) /* Support for VPMADD52[H,L]UQ */
+#define X86_FEATURE_LAM			(12*32+26) /* "lam" Linear Address Masking */
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
-#define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
-#define X86_FEATURE_IRPERF		(13*32+ 1) /* Instructions Retired Count */
-#define X86_FEATURE_XSAVEERPTR		(13*32+ 2) /* Always save/restore FP error pointers */
-#define X86_FEATURE_WBNOINVD		(13*32+ 9) /* WBNOINVD instruction */
-#define X86_FEATURE_AMD_IBPB		(13*32+12) /* "" Indirect Branch Prediction Barrier */
-#define X86_FEATURE_AMD_IBRS		(13*32+14) /* "" Indirect Branch Restricted Speculation */
-#define X86_FEATURE_AMD_STIBP		(13*32+15) /* "" Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_AMD_STIBP_ALWAYS_ON	(13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */
-#define X86_FEATURE_AMD_SSBD		(13*32+24) /* "" Speculative Store Bypass Disable */
-#define X86_FEATURE_VIRT_SSBD		(13*32+25) /* Virtualized Speculative Store Bypass Disable */
-#define X86_FEATURE_AMD_SSB_NO		(13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
+#define X86_FEATURE_CLZERO		(13*32+ 0) /* "clzero" CLZERO instruction */
+#define X86_FEATURE_IRPERF		(13*32+ 1) /* "irperf" Instructions Retired Count */
+#define X86_FEATURE_XSAVEERPTR		(13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */
+#define X86_FEATURE_INVLPGB		(13*32+ 3) /* INVLPGB and TLBSYNC instructions supported */
+#define X86_FEATURE_RDPRU		(13*32+ 4) /* "rdpru" Read processor register at user level */
+#define X86_FEATURE_WBNOINVD		(13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */
+#define X86_FEATURE_AMD_IBPB		(13*32+12) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_AMD_IBRS		(13*32+14) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_AMD_STIBP		(13*32+15) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_AMD_STIBP_ALWAYS_ON	(13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */
+#define X86_FEATURE_AMD_IBRS_SAME_MODE	(13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/
+#define X86_FEATURE_EFER_LMSLE_MBZ	(13*32+20) /* EFER.LMSLE must be zero */
+#define X86_FEATURE_AMD_PPIN		(13*32+23) /* "amd_ppin" Protected Processor Inventory Number */
+#define X86_FEATURE_AMD_SSBD		(13*32+24) /* Speculative Store Bypass Disable */
+#define X86_FEATURE_VIRT_SSBD		(13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */
+#define X86_FEATURE_AMD_SSB_NO		(13*32+26) /* Speculative Store Bypass is fixed in hardware. */
+#define X86_FEATURE_CPPC		(13*32+27) /* "cppc" Collaborative Processor Performance Control */
+#define X86_FEATURE_AMD_PSFD            (13*32+28) /* Predictive Store Forwarding Disable */
+#define X86_FEATURE_BTC_NO		(13*32+29) /* Not vulnerable to Branch Type Confusion */
+#define X86_FEATURE_AMD_IBPB_RET	(13*32+30) /* IBPB clears return address predictor */
+#define X86_FEATURE_BRS			(13*32+31) /* "brs" Branch Sampling available */
 
 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
-#define X86_FEATURE_DTHERM		(14*32+ 0) /* Digital Thermal Sensor */
-#define X86_FEATURE_IDA			(14*32+ 1) /* Intel Dynamic Acceleration */
-#define X86_FEATURE_ARAT		(14*32+ 2) /* Always Running APIC Timer */
-#define X86_FEATURE_PLN			(14*32+ 4) /* Intel Power Limit Notification */
-#define X86_FEATURE_PTS			(14*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_HWP			(14*32+ 7) /* Intel Hardware P-states */
-#define X86_FEATURE_HWP_NOTIFY		(14*32+ 8) /* HWP Notification */
-#define X86_FEATURE_HWP_ACT_WINDOW	(14*32+ 9) /* HWP Activity Window */
-#define X86_FEATURE_HWP_EPP		(14*32+10) /* HWP Energy Perf. Preference */
-#define X86_FEATURE_HWP_PKG_REQ		(14*32+11) /* HWP Package Level Request */
+#define X86_FEATURE_DTHERM		(14*32+ 0) /* "dtherm" Digital Thermal Sensor */
+#define X86_FEATURE_IDA			(14*32+ 1) /* "ida" Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT		(14*32+ 2) /* "arat" Always Running APIC Timer */
+#define X86_FEATURE_PLN			(14*32+ 4) /* "pln" Intel Power Limit Notification */
+#define X86_FEATURE_PTS			(14*32+ 6) /* "pts" Intel Package Thermal Status */
+#define X86_FEATURE_HWP			(14*32+ 7) /* "hwp" Intel Hardware P-states */
+#define X86_FEATURE_HWP_NOTIFY		(14*32+ 8) /* "hwp_notify" HWP Notification */
+#define X86_FEATURE_HWP_ACT_WINDOW	(14*32+ 9) /* "hwp_act_window" HWP Activity Window */
+#define X86_FEATURE_HWP_EPP		(14*32+10) /* "hwp_epp" HWP Energy Perf. Preference */
+#define X86_FEATURE_HWP_PKG_REQ		(14*32+11) /* "hwp_pkg_req" HWP Package Level Request */
+#define X86_FEATURE_HWP_HIGHEST_PERF_CHANGE (14*32+15) /* HWP Highest perf change */
+#define X86_FEATURE_HFI			(14*32+19) /* "hfi" Hardware Feedback Interface */
 
 /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
-#define X86_FEATURE_NPT			(15*32+ 0) /* Nested Page Table support */
-#define X86_FEATURE_LBRV		(15*32+ 1) /* LBR Virtualization support */
+#define X86_FEATURE_NPT			(15*32+ 0) /* "npt" Nested Page Table support */
+#define X86_FEATURE_LBRV		(15*32+ 1) /* "lbrv" LBR Virtualization support */
 #define X86_FEATURE_SVML		(15*32+ 2) /* "svm_lock" SVM locking MSR */
 #define X86_FEATURE_NRIPS		(15*32+ 3) /* "nrip_save" SVM next_rip save */
 #define X86_FEATURE_TSCRATEMSR		(15*32+ 4) /* "tsc_scale" TSC scaling support */
 #define X86_FEATURE_VMCBCLEAN		(15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
-#define X86_FEATURE_FLUSHBYASID		(15*32+ 6) /* flush-by-ASID support */
-#define X86_FEATURE_DECODEASSISTS	(15*32+ 7) /* Decode Assists support */
-#define X86_FEATURE_PAUSEFILTER		(15*32+10) /* filtered pause intercept */
-#define X86_FEATURE_PFTHRESHOLD		(15*32+12) /* pause filter threshold */
-#define X86_FEATURE_AVIC		(15*32+13) /* Virtual Interrupt Controller */
-#define X86_FEATURE_V_VMSAVE_VMLOAD	(15*32+15) /* Virtual VMSAVE VMLOAD */
-#define X86_FEATURE_VGIF		(15*32+16) /* Virtual GIF */
+#define X86_FEATURE_FLUSHBYASID		(15*32+ 6) /* "flushbyasid" Flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS	(15*32+ 7) /* "decodeassists" Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER		(15*32+10) /* "pausefilter" Filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD		(15*32+12) /* "pfthreshold" Pause filter threshold */
+#define X86_FEATURE_AVIC		(15*32+13) /* "avic" Virtual Interrupt Controller */
+#define X86_FEATURE_V_VMSAVE_VMLOAD	(15*32+15) /* "v_vmsave_vmload" Virtual VMSAVE VMLOAD */
+#define X86_FEATURE_VGIF		(15*32+16) /* "vgif" Virtual GIF */
+#define X86_FEATURE_X2AVIC		(15*32+18) /* "x2avic" Virtual x2apic */
+#define X86_FEATURE_V_SPEC_CTRL		(15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */
+#define X86_FEATURE_VNMI		(15*32+25) /* "vnmi" Virtual NMI */
+#define X86_FEATURE_SVME_ADDR_CHK	(15*32+28) /* SVME addr check */
+#define X86_FEATURE_BUS_LOCK_THRESHOLD	(15*32+29) /* Bus lock threshold */
+#define X86_FEATURE_IDLE_HLT		(15*32+30) /* IDLE HLT intercept */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
-#define X86_FEATURE_AVX512VBMI		(16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
-#define X86_FEATURE_UMIP		(16*32+ 2) /* User Mode Instruction Protection */
-#define X86_FEATURE_PKU			(16*32+ 3) /* Protection Keys for Userspace */
-#define X86_FEATURE_OSPKE		(16*32+ 4) /* OS Protection Keys Enable */
-#define X86_FEATURE_AVX512_VBMI2	(16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
-#define X86_FEATURE_GFNI		(16*32+ 8) /* Galois Field New Instructions */
-#define X86_FEATURE_VAES		(16*32+ 9) /* Vector AES */
-#define X86_FEATURE_VPCLMULQDQ		(16*32+10) /* Carry-Less Multiplication Double Quadword */
-#define X86_FEATURE_AVX512_VNNI		(16*32+11) /* Vector Neural Network Instructions */
-#define X86_FEATURE_AVX512_BITALG	(16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
-#define X86_FEATURE_TME			(16*32+13) /* Intel Total Memory Encryption */
-#define X86_FEATURE_AVX512_VPOPCNTDQ	(16*32+14) /* POPCNT for vectors of DW/QW */
-#define X86_FEATURE_LA57		(16*32+16) /* 5-level page tables */
-#define X86_FEATURE_RDPID		(16*32+22) /* RDPID instruction */
-#define X86_FEATURE_CLDEMOTE		(16*32+25) /* CLDEMOTE instruction */
-#define X86_FEATURE_MOVDIRI		(16*32+27) /* MOVDIRI instruction */
-#define X86_FEATURE_MOVDIR64B		(16*32+28) /* MOVDIR64B instruction */
-
-/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
-#define X86_FEATURE_OVERFLOW_RECOV	(17*32+ 0) /* MCA overflow recovery support */
-#define X86_FEATURE_SUCCOR		(17*32+ 1) /* Uncorrectable error containment and recovery */
-#define X86_FEATURE_SMCA		(17*32+ 3) /* Scalable MCA */
+#define X86_FEATURE_AVX512VBMI		(16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/
+#define X86_FEATURE_UMIP		(16*32+ 2) /* "umip" User Mode Instruction Protection */
+#define X86_FEATURE_PKU			(16*32+ 3) /* "pku" Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE		(16*32+ 4) /* "ospke" OS Protection Keys Enable */
+#define X86_FEATURE_WAITPKG		(16*32+ 5) /* "waitpkg" UMONITOR/UMWAIT/TPAUSE Instructions */
+#define X86_FEATURE_AVX512_VBMI2	(16*32+ 6) /* "avx512_vbmi2" Additional AVX512 Vector Bit Manipulation Instructions */
+#define X86_FEATURE_SHSTK		(16*32+ 7) /* Shadow stack */
+#define X86_FEATURE_GFNI		(16*32+ 8) /* "gfni" Galois Field New Instructions */
+#define X86_FEATURE_VAES		(16*32+ 9) /* "vaes" Vector AES */
+#define X86_FEATURE_VPCLMULQDQ		(16*32+10) /* "vpclmulqdq" Carry-Less Multiplication Double Quadword */
+#define X86_FEATURE_AVX512_VNNI		(16*32+11) /* "avx512_vnni" Vector Neural Network Instructions */
+#define X86_FEATURE_AVX512_BITALG	(16*32+12) /* "avx512_bitalg" Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
+#define X86_FEATURE_TME			(16*32+13) /* "tme" Intel Total Memory Encryption */
+#define X86_FEATURE_AVX512_VPOPCNTDQ	(16*32+14) /* "avx512_vpopcntdq" POPCNT for vectors of DW/QW */
+#define X86_FEATURE_LA57		(16*32+16) /* "la57" 5-level page tables */
+#define X86_FEATURE_RDPID		(16*32+22) /* "rdpid" RDPID instruction */
+#define X86_FEATURE_BUS_LOCK_DETECT	(16*32+24) /* "bus_lock_detect" Bus Lock detect */
+#define X86_FEATURE_CLDEMOTE		(16*32+25) /* "cldemote" CLDEMOTE instruction */
+#define X86_FEATURE_MOVDIRI		(16*32+27) /* "movdiri" MOVDIRI instruction */
+#define X86_FEATURE_MOVDIR64B		(16*32+28) /* "movdir64b" MOVDIR64B instruction */
+#define X86_FEATURE_ENQCMD		(16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */
+#define X86_FEATURE_SGX_LC		(16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */
+
+/*
+ * Linux-defined word for use with scattered/synthetic bits.
+ */
+#define X86_FEATURE_OVERFLOW_RECOV	(17*32+ 0) /* "overflow_recov" MCA overflow recovery support */
+#define X86_FEATURE_SUCCOR		(17*32+ 1) /* "succor" Uncorrectable error containment and recovery */
+
+#define X86_FEATURE_SMCA		(17*32+ 3) /* "smca" Scalable MCA */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
-#define X86_FEATURE_AVX512_4VNNIW	(18*32+ 2) /* AVX-512 Neural Network Instructions */
-#define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
-#define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
-#define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
-#define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_FLUSH_L1D		(18*32+28) /* Flush L1D cache */
-#define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
-#define X86_FEATURE_SPEC_CTRL_SSBD	(18*32+31) /* "" Speculative Store Bypass Disable */
+#define X86_FEATURE_AVX512_4VNNIW	(18*32+ 2) /* "avx512_4vnniw" AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* "avx512_4fmaps" AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_FSRM		(18*32+ 4) /* "fsrm" Fast Short Rep Mov */
+#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* "avx512_vp2intersect" AVX-512 Intersect for D/Q */
+#define X86_FEATURE_SRBDS_CTRL		(18*32+ 9) /* SRBDS mitigation MSR available */
+#define X86_FEATURE_MD_CLEAR		(18*32+10) /* "md_clear" VERW clears CPU buffers */
+#define X86_FEATURE_RTM_ALWAYS_ABORT	(18*32+11) /* RTM transaction always aborts */
+#define X86_FEATURE_TSX_FORCE_ABORT	(18*32+13) /* TSX_FORCE_ABORT */
+#define X86_FEATURE_SERIALIZE		(18*32+14) /* "serialize" SERIALIZE instruction */
+#define X86_FEATURE_HYBRID_CPU		(18*32+15) /* This part has CPUs of more than one type */
+#define X86_FEATURE_TSXLDTRK		(18*32+16) /* "tsxldtrk" TSX Suspend Load Address Tracking */
+#define X86_FEATURE_PCONFIG		(18*32+18) /* "pconfig" Intel PCONFIG */
+#define X86_FEATURE_ARCH_LBR		(18*32+19) /* "arch_lbr" Intel ARCH LBR */
+#define X86_FEATURE_IBT			(18*32+20) /* "ibt" Indirect Branch Tracking */
+#define X86_FEATURE_AMX_BF16		(18*32+22) /* "amx_bf16" AMX bf16 Support */
+#define X86_FEATURE_AVX512_FP16		(18*32+23) /* "avx512_fp16" AVX512 FP16 */
+#define X86_FEATURE_AMX_TILE		(18*32+24) /* "amx_tile" AMX tile Support */
+#define X86_FEATURE_AMX_INT8		(18*32+25) /* "amx_int8" AMX int8 Support */
+#define X86_FEATURE_SPEC_CTRL		(18*32+26) /* Speculation Control (IBRS + IBPB) */
+#define X86_FEATURE_INTEL_STIBP		(18*32+27) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_FLUSH_L1D		(18*32+28) /* "flush_l1d" Flush L1D cache */
+#define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* "arch_capabilities" IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_CORE_CAPABILITIES	(18*32+30) /* IA32_CORE_CAPABILITIES MSR */
+#define X86_FEATURE_SPEC_CTRL_SSBD	(18*32+31) /* Speculative Store Bypass Disable */
+
+/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */
+#define X86_FEATURE_SME			(19*32+ 0) /* "sme" Secure Memory Encryption */
+#define X86_FEATURE_SEV			(19*32+ 1) /* "sev" Secure Encrypted Virtualization */
+#define X86_FEATURE_VM_PAGE_FLUSH	(19*32+ 2) /* VM Page Flush MSR is supported */
+#define X86_FEATURE_SEV_ES		(19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_SEV_SNP		(19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */
+#define X86_FEATURE_SNP_SECURE_TSC	(19*32+ 8) /* SEV-SNP Secure TSC */
+#define X86_FEATURE_V_TSC_AUX		(19*32+ 9) /* Virtual TSC_AUX */
+#define X86_FEATURE_SME_COHERENT	(19*32+10) /* hardware-enforced cache coherency */
+#define X86_FEATURE_DEBUG_SWAP		(19*32+14) /* "debug_swap" SEV-ES full debug state swap support */
+#define X86_FEATURE_RMPREAD		(19*32+21) /* RMPREAD instruction */
+#define X86_FEATURE_SEGMENTED_RMP	(19*32+23) /* Segmented RMP support */
+#define X86_FEATURE_ALLOWED_SEV_FEATURES (19*32+27) /* Allowed SEV Features */
+#define X86_FEATURE_SVSM		(19*32+28) /* "svsm" SVSM present */
+#define X86_FEATURE_HV_INUSE_WR_ALLOWED	(19*32+30) /* Allow Write to in-use hypervisor-owned pages */
+
+/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
+#define X86_FEATURE_NO_NESTED_DATA_BP	(20*32+ 0) /* No Nested Data Breakpoints */
+#define X86_FEATURE_WRMSR_XX_BASE_NS	(20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */
+#define X86_FEATURE_LFENCE_RDTSC	(20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */
+#define X86_FEATURE_VERW_CLEAR		(20*32+ 5) /* The memory form of VERW mitigates TSA */
+#define X86_FEATURE_NULL_SEL_CLR_BASE	(20*32+ 6) /* Null Selector Clears Base */
+
+#define X86_FEATURE_AUTOIBRS		(20*32+ 8) /* Automatic IBRS */
+#define X86_FEATURE_NO_SMM_CTL_MSR	(20*32+ 9) /* SMM_CTL MSR is not present */
+
+#define X86_FEATURE_GP_ON_USER_CPUID	(20*32+17) /* User CPUID faulting */
+
+#define X86_FEATURE_PREFETCHI		(20*32+20) /* Prefetch Data/Instruction to Cache Level */
+#define X86_FEATURE_SBPB		(20*32+27) /* Selective Branch Prediction Barrier */
+#define X86_FEATURE_IBPB_BRTYPE		(20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
+#define X86_FEATURE_SRSO_NO		(20*32+29) /* CPU is not affected by SRSO */
+#define X86_FEATURE_SRSO_USER_KERNEL_NO	(20*32+30) /* CPU is not affected by SRSO across user/kernel boundaries */
+#define X86_FEATURE_SRSO_BP_SPEC_REDUCE	(20*32+31) /*
+						    * BP_CFG[BpSpecReduce] can be used to mitigate SRSO for VMs.
+						    * (SRSO_MSR_FIX in the official doc).
+						    */
+
+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0x80000022, etc and Linux defined features.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_AMD_LBR_PMC_FREEZE	(21*32+ 0) /* "amd_lbr_pmc_freeze" AMD LBR and PMC Freeze */
+#define X86_FEATURE_CLEAR_BHB_LOOP	(21*32+ 1) /* Clear branch history at syscall entry using SW loop */
+#define X86_FEATURE_BHI_CTRL		(21*32+ 2) /* BHI_DIS_S HW control available */
+#define X86_FEATURE_CLEAR_BHB_HW	(21*32+ 3) /* BHI_DIS_S HW control enabled */
+#define X86_FEATURE_CLEAR_BHB_VMEXIT	(21*32+ 4) /* Clear branch history at vmexit using SW loop */
+#define X86_FEATURE_AMD_FAST_CPPC	(21*32+ 5) /* Fast CPPC */
+#define X86_FEATURE_AMD_HTR_CORES	(21*32+ 6) /* Heterogeneous Core Topology */
+#define X86_FEATURE_AMD_WORKLOAD_CLASS	(21*32+ 7) /* Workload Classification */
+#define X86_FEATURE_PREFER_YMM		(21*32+ 8) /* Avoid ZMM registers due to downclocking */
+#define X86_FEATURE_APX			(21*32+ 9) /* Advanced Performance Extensions */
+#define X86_FEATURE_INDIRECT_THUNK_ITS	(21*32+10) /* Use thunk for indirect branches in lower half of cacheline */
+#define X86_FEATURE_TSA_SQ_NO		(21*32+11) /* AMD CPU not vulnerable to TSA-SQ */
+#define X86_FEATURE_TSA_L1_NO		(21*32+12) /* AMD CPU not vulnerable to TSA-L1 */
+#define X86_FEATURE_CLEAR_CPU_BUF_VM	(21*32+13) /* Clear CPU buffers using VERW before VMRUN */
+#define X86_FEATURE_IBPB_EXIT_TO_USER	(21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */
+#define X86_FEATURE_ABMC		(21*32+15) /* Assignable Bandwidth Monitoring Counters */
+#define X86_FEATURE_MSR_IMM		(21*32+16) /* MSR immediate form instructions */
+#define X86_FEATURE_SGX_EUPDATESVN	(21*32+17) /* Support for ENCLS[EUPDATESVN] instruction */
+
+#define X86_FEATURE_SDCIAE		(21*32+18) /* L3 Smart Data Cache Injection Allocation Enforcement */
+#define X86_FEATURE_CLEAR_CPU_BUF_VM_MMIO (21*32+19) /*
+						      * Clear CPU buffers before VM-Enter if the vCPU
+						      * can access host MMIO (ignored for all intents
+						      * and purposes if CLEAR_CPU_BUF_VM is set).
+						      */
+#define X86_FEATURE_X2AVIC_EXT		(21*32+20) /* AMD SVM x2AVIC support for 4k vCPUs */
 
 /*
  * BUG word(s)
  */
 #define X86_BUG(x)			(NCAPINTS*32 + (x))
 
-#define X86_BUG_F00F			X86_BUG(0) /* Intel F00F */
-#define X86_BUG_FDIV			X86_BUG(1) /* FPU FDIV */
-#define X86_BUG_COMA			X86_BUG(2) /* Cyrix 6x86 coma */
+#define X86_BUG_F00F			X86_BUG(0) /* "f00f" Intel F00F */
+#define X86_BUG_FDIV			X86_BUG(1) /* "fdiv" FPU FDIV */
+#define X86_BUG_COMA			X86_BUG(2) /* "coma" Cyrix 6x86 coma */
 #define X86_BUG_AMD_TLB_MMATCH		X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
 #define X86_BUG_AMD_APIC_C1E		X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
-#define X86_BUG_11AP			X86_BUG(5) /* Bad local APIC aka 11AP */
-#define X86_BUG_FXSAVE_LEAK		X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
-#define X86_BUG_CLFLUSH_MONITOR		X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
-#define X86_BUG_SYSRET_SS_ATTRS		X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#define X86_BUG_11AP			X86_BUG(5) /* "11ap" Bad local APIC aka 11AP */
+#define X86_BUG_FXSAVE_LEAK		X86_BUG(6) /* "fxsave_leak" FXSAVE leaks FOP/FIP/FOP */
+#define X86_BUG_CLFLUSH_MONITOR		X86_BUG(7) /* "clflush_monitor" AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS		X86_BUG(8) /* "sysret_ss_attrs" SYSRET doesn't fix up SS attrs */
 #ifdef CONFIG_X86_32
 /*
  * 64-bit kernels don't use X86_BUG_ESPFIX.  Make the define conditional
  * to avoid confusion.
  */
-#define X86_BUG_ESPFIX			X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
+#define X86_BUG_ESPFIX			X86_BUG(9) /* IRET to 16-bit SS corrupts ESP/RSP high bits */
 #endif
-#define X86_BUG_NULL_SEG		X86_BUG(10) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE		X86_BUG(11) /* SWAPGS without input dep on GS */
-#define X86_BUG_MONITOR			X86_BUG(12) /* IPI required to wake up remote CPU */
-#define X86_BUG_AMD_E400		X86_BUG(13) /* CPU is among the affected by Erratum 400 */
-#define X86_BUG_CPU_MELTDOWN		X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
-#define X86_BUG_SPECTRE_V1		X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
-#define X86_BUG_SPECTRE_V2		X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
-#define X86_BUG_SPEC_STORE_BYPASS	X86_BUG(17) /* CPU is affected by speculative store bypass attack */
-#define X86_BUG_L1TF			X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
+#define X86_BUG_NULL_SEG		X86_BUG(10) /* "null_seg" Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE		X86_BUG(11) /* "swapgs_fence" SWAPGS without input dep on GS */
+#define X86_BUG_MONITOR			X86_BUG(12) /* "monitor" IPI required to wake up remote CPU */
+#define X86_BUG_AMD_E400		X86_BUG(13) /* "amd_e400" CPU is among the affected by Erratum 400 */
+#define X86_BUG_CPU_MELTDOWN		X86_BUG(14) /* "cpu_meltdown" CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1		X86_BUG(15) /* "spectre_v1" CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2		X86_BUG(16) /* "spectre_v2" CPU is affected by Spectre variant 2 attack with indirect branches */
+#define X86_BUG_SPEC_STORE_BYPASS	X86_BUG(17) /* "spec_store_bypass" CPU is affected by speculative store bypass attack */
+#define X86_BUG_L1TF			X86_BUG(18) /* "l1tf" CPU is affected by L1 Terminal Fault */
+#define X86_BUG_MDS			X86_BUG(19) /* "mds" CPU is affected by Microarchitectural data sampling */
+#define X86_BUG_MSBDS_ONLY		X86_BUG(20) /* "msbds_only" CPU is only affected by the  MSDBS variant of BUG_MDS */
+#define X86_BUG_SWAPGS			X86_BUG(21) /* "swapgs" CPU is affected by speculation through SWAPGS */
+#define X86_BUG_TAA			X86_BUG(22) /* "taa" CPU is affected by TSX Async Abort(TAA) */
+#define X86_BUG_ITLB_MULTIHIT		X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */
+#define X86_BUG_SRBDS			X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */
+#define X86_BUG_MMIO_STALE_DATA		X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */
+/* unused, was #define X86_BUG_MMIO_UNKNOWN		X86_BUG(26) "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */
+#define X86_BUG_RETBLEED		X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */
+#define X86_BUG_EIBRS_PBRSB		X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */
+#define X86_BUG_SMT_RSB			X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */
+#define X86_BUG_GDS			X86_BUG(30) /* "gds" CPU is affected by Gather Data Sampling */
+#define X86_BUG_TDX_PW_MCE		X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */
 
+/* BUG word 2 */
+#define X86_BUG_SRSO			X86_BUG( 1*32+ 0) /* "srso" AMD SRSO bug */
+#define X86_BUG_DIV0			X86_BUG( 1*32+ 1) /* "div0" AMD DIV0 speculation bug */
+#define X86_BUG_RFDS			X86_BUG( 1*32+ 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */
+#define X86_BUG_BHI			X86_BUG( 1*32+ 3) /* "bhi" CPU is affected by Branch History Injection */
+#define X86_BUG_IBPB_NO_RET		X86_BUG( 1*32+ 4) /* "ibpb_no_ret" IBPB omits return target predictions */
+#define X86_BUG_SPECTRE_V2_USER		X86_BUG( 1*32+ 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */
+#define X86_BUG_OLD_MICROCODE		X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */
+#define X86_BUG_ITS			X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */
+#define X86_BUG_ITS_NATIVE_ONLY		X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
+#define X86_BUG_TSA			X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */
+#define X86_BUG_VMSCAPE			X86_BUG( 1*32+10) /* "vmscape" CPU is affected by VMSCAPE attacks from guests */
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/cpuid/api.h b/arch/x86/include/asm/cpuid/api.h
new file mode 100644
index 000000000000..44fa82e1267c
--- /dev/null
+++ b/arch/x86/include/asm/cpuid/api.h
@@ -0,0 +1,292 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_CPUID_API_H
+#define _ASM_X86_CPUID_API_H
+
+#include <asm/cpuid/types.h>
+
+#include <linux/build_bug.h>
+#include <linux/types.h>
+
+#include <asm/string.h>
+
+/*
+ * Raw CPUID accessors:
+ */
+
+#ifdef CONFIG_X86_32
+bool cpuid_feature(void);
+#else
+static inline bool cpuid_feature(void)
+{
+	return true;
+}
+#endif
+
+static inline void native_cpuid(u32 *eax, u32 *ebx,
+				u32 *ecx, u32 *edx)
+{
+	/* ecx is often an input as well as an output. */
+	asm volatile("cpuid"
+	    : "=a" (*eax),
+	      "=b" (*ebx),
+	      "=c" (*ecx),
+	      "=d" (*edx)
+	    : "0" (*eax), "2" (*ecx)
+	    : "memory");
+}
+
+#define NATIVE_CPUID_REG(reg)					\
+static inline u32 native_cpuid_##reg(u32 op)			\
+{								\
+	u32 eax = op, ebx, ecx = 0, edx;			\
+								\
+	native_cpuid(&eax, &ebx, &ecx, &edx);			\
+								\
+	return reg;						\
+}
+
+/*
+ * Native CPUID functions returning a single datum:
+ */
+NATIVE_CPUID_REG(eax)
+NATIVE_CPUID_REG(ebx)
+NATIVE_CPUID_REG(ecx)
+NATIVE_CPUID_REG(edx)
+
+#ifdef CONFIG_PARAVIRT_XXL
+# include <asm/paravirt.h>
+#else
+# define __cpuid native_cpuid
+#endif
+
+/*
+ * Generic CPUID function
+ *
+ * Clear ECX since some CPUs (Cyrix MII) do not set or clear ECX
+ * resulting in stale register contents being returned.
+ */
+static inline void cpuid(u32 op,
+			 u32 *eax, u32 *ebx,
+			 u32 *ecx, u32 *edx)
+{
+	*eax = op;
+	*ecx = 0;
+	__cpuid(eax, ebx, ecx, edx);
+}
+
+/* Some CPUID calls want 'count' to be placed in ECX */
+static inline void cpuid_count(u32 op, int count,
+			       u32 *eax, u32 *ebx,
+			       u32 *ecx, u32 *edx)
+{
+	*eax = op;
+	*ecx = count;
+	__cpuid(eax, ebx, ecx, edx);
+}
+
+/*
+ * CPUID functions returning a single datum:
+ */
+
+static inline u32 cpuid_eax(u32 op)
+{
+	u32 eax, ebx, ecx, edx;
+
+	cpuid(op, &eax, &ebx, &ecx, &edx);
+
+	return eax;
+}
+
+static inline u32 cpuid_ebx(u32 op)
+{
+	u32 eax, ebx, ecx, edx;
+
+	cpuid(op, &eax, &ebx, &ecx, &edx);
+
+	return ebx;
+}
+
+static inline u32 cpuid_ecx(u32 op)
+{
+	u32 eax, ebx, ecx, edx;
+
+	cpuid(op, &eax, &ebx, &ecx, &edx);
+
+	return ecx;
+}
+
+static inline u32 cpuid_edx(u32 op)
+{
+	u32 eax, ebx, ecx, edx;
+
+	cpuid(op, &eax, &ebx, &ecx, &edx);
+
+	return edx;
+}
+
+static inline void __cpuid_read(u32 leaf, u32 subleaf, u32 *regs)
+{
+	regs[CPUID_EAX] = leaf;
+	regs[CPUID_ECX] = subleaf;
+	__cpuid(regs + CPUID_EAX, regs + CPUID_EBX, regs + CPUID_ECX, regs + CPUID_EDX);
+}
+
+#define cpuid_subleaf(leaf, subleaf, regs) {		\
+	static_assert(sizeof(*(regs)) == 16);		\
+	__cpuid_read(leaf, subleaf, (u32 *)(regs));	\
+}
+
+#define cpuid_leaf(leaf, regs) {			\
+	static_assert(sizeof(*(regs)) == 16);		\
+	__cpuid_read(leaf, 0, (u32 *)(regs));		\
+}
+
+static inline void __cpuid_read_reg(u32 leaf, u32 subleaf,
+				    enum cpuid_regs_idx regidx, u32 *reg)
+{
+	u32 regs[4];
+
+	__cpuid_read(leaf, subleaf, regs);
+	*reg = regs[regidx];
+}
+
+#define cpuid_subleaf_reg(leaf, subleaf, regidx, reg) {		\
+	static_assert(sizeof(*(reg)) == 4);			\
+	__cpuid_read_reg(leaf, subleaf, regidx, (u32 *)(reg));	\
+}
+
+#define cpuid_leaf_reg(leaf, regidx, reg) {			\
+	static_assert(sizeof(*(reg)) == 4);			\
+	__cpuid_read_reg(leaf, 0, regidx, (u32 *)(reg));	\
+}
+
+/*
+ * Hypervisor-related APIs:
+ */
+
+static __always_inline bool cpuid_function_is_indexed(u32 function)
+{
+	switch (function) {
+	case 4:
+	case 7:
+	case 0xb:
+	case 0xd:
+	case 0xf:
+	case 0x10:
+	case 0x12:
+	case 0x14:
+	case 0x17:
+	case 0x18:
+	case 0x1d:
+	case 0x1e:
+	case 0x1f:
+	case 0x24:
+	case 0x8000001d:
+		return true;
+	}
+
+	return false;
+}
+
+#define for_each_possible_cpuid_base_hypervisor(function) \
+	for (function = 0x40000000; function < 0x40010000; function += 0x100)
+
+static inline u32 cpuid_base_hypervisor(const char *sig, u32 leaves)
+{
+	u32 base, eax, signature[3];
+
+	for_each_possible_cpuid_base_hypervisor(base) {
+		cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
+
+		/*
+		 * This must not compile to "call memcmp" because it's called
+		 * from PVH early boot code before instrumentation is set up
+		 * and memcmp() itself may be instrumented.
+		 */
+		if (!__builtin_memcmp(sig, signature, 12) &&
+		    (leaves == 0 || ((eax - base) >= leaves)))
+			return base;
+	}
+
+	return 0;
+}
+
+/*
+ * CPUID(0x2) parsing:
+ */
+
+/**
+ * cpuid_leaf_0x2() - Return sanitized CPUID(0x2) register output
+ * @regs:	Output parameter
+ *
+ * Query CPUID(0x2) and store its output in @regs.  Force set any
+ * invalid 1-byte descriptor returned by the hardware to zero (the NULL
+ * cache/TLB descriptor) before returning it to the caller.
+ *
+ * Use for_each_cpuid_0x2_desc() to iterate over the register output in
+ * parsed form.
+ */
+static inline void cpuid_leaf_0x2(union leaf_0x2_regs *regs)
+{
+	cpuid_leaf(0x2, regs);
+
+	/*
+	 * All Intel CPUs must report an iteration count of 1.	In case
+	 * of bogus hardware, treat all returned descriptors as NULL.
+	 */
+	if (regs->desc[0] != 0x01) {
+		for (int i = 0; i < 4; i++)
+			regs->regv[i] = 0;
+		return;
+	}
+
+	/*
+	 * The most significant bit (MSB) of each register must be clear.
+	 * If a register is invalid, replace its descriptors with NULL.
+	 */
+	for (int i = 0; i < 4; i++) {
+		if (regs->reg[i].invalid)
+			regs->regv[i] = 0;
+	}
+}
+
+/**
+ * for_each_cpuid_0x2_desc() - Iterator for parsed CPUID(0x2) descriptors
+ * @_regs:	CPUID(0x2) register output, as returned by cpuid_leaf_0x2()
+ * @_ptr:	u8 pointer, for macro internal use only
+ * @_desc:	Pointer to the parsed CPUID(0x2) descriptor at each iteration
+ *
+ * Loop over the 1-byte descriptors in the passed CPUID(0x2) output registers
+ * @_regs.  Provide the parsed information for each descriptor through @_desc.
+ *
+ * To handle cache-specific descriptors, switch on @_desc->c_type.  For TLB
+ * descriptors, switch on @_desc->t_type.
+ *
+ * Example usage for cache descriptors::
+ *
+ *	const struct leaf_0x2_table *desc;
+ *	union leaf_0x2_regs regs;
+ *	u8 *ptr;
+ *
+ *	cpuid_leaf_0x2(&regs);
+ *	for_each_cpuid_0x2_desc(regs, ptr, desc) {
+ *		switch (desc->c_type) {
+ *			...
+ *		}
+ *	}
+ */
+#define for_each_cpuid_0x2_desc(_regs, _ptr, _desc)				\
+	for (_ptr = &(_regs).desc[1];						\
+	     _ptr < &(_regs).desc[16] && (_desc = &cpuid_0x2_table[*_ptr]);	\
+	     _ptr++)
+
+/*
+ * CPUID(0x80000006) parsing:
+ */
+
+static inline bool cpuid_amd_hygon_has_l3_cache(void)
+{
+	return cpuid_edx(0x80000006);
+}
+
+#endif /* _ASM_X86_CPUID_API_H */
diff --git a/arch/x86/include/asm/cpuid/types.h b/arch/x86/include/asm/cpuid/types.h
new file mode 100644
index 000000000000..8a00364b79de
--- /dev/null
+++ b/arch/x86/include/asm/cpuid/types.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_CPUID_TYPES_H
+#define _ASM_X86_CPUID_TYPES_H
+
+#include <linux/build_bug.h>
+#include <linux/types.h>
+
+/*
+ * Types for raw CPUID access:
+ */
+
+struct cpuid_regs {
+	u32 eax;
+	u32 ebx;
+	u32 ecx;
+	u32 edx;
+};
+
+enum cpuid_regs_idx {
+	CPUID_EAX = 0,
+	CPUID_EBX,
+	CPUID_ECX,
+	CPUID_EDX,
+};
+
+#define CPUID_LEAF_MWAIT	0x05
+#define CPUID_LEAF_DCA		0x09
+#define CPUID_LEAF_XSTATE	0x0d
+#define CPUID_LEAF_TSC		0x15
+#define CPUID_LEAF_FREQ		0x16
+#define CPUID_LEAF_TILE		0x1d
+
+/*
+ * Types for CPUID(0x2) parsing:
+ */
+
+struct leaf_0x2_reg {
+		u32		: 31,
+			invalid	: 1;
+};
+
+union leaf_0x2_regs {
+	struct leaf_0x2_reg	reg[4];
+	u32			regv[4];
+	u8			desc[16];
+};
+
+/*
+ * Leaf 0x2 1-byte descriptors' cache types
+ * To be used for their mappings at cpuid_0x2_table[]
+ *
+ * Start at 1 since type 0 is reserved for HW byte descriptors which are
+ * not recognized by the kernel; i.e., those without an explicit mapping.
+ */
+enum _cache_table_type {
+	CACHE_L1_INST		= 1,
+	CACHE_L1_DATA,
+	CACHE_L2,
+	CACHE_L3
+	/* Adjust __TLB_TABLE_TYPE_BEGIN before adding more types */
+} __packed;
+#ifndef __CHECKER__
+static_assert(sizeof(enum _cache_table_type) == 1);
+#endif
+
+/*
+ * Ensure that leaf 0x2 cache and TLB type values do not intersect,
+ * since they share the same type field at struct cpuid_0x2_table.
+ */
+#define __TLB_TABLE_TYPE_BEGIN		(CACHE_L3 + 1)
+
+/*
+ * Leaf 0x2 1-byte descriptors' TLB types
+ * To be used for their mappings at cpuid_0x2_table[]
+ */
+enum _tlb_table_type {
+	TLB_INST_4K		= __TLB_TABLE_TYPE_BEGIN,
+	TLB_INST_4M,
+	TLB_INST_2M_4M,
+	TLB_INST_ALL,
+
+	TLB_DATA_4K,
+	TLB_DATA_4M,
+	TLB_DATA_2M_4M,
+	TLB_DATA_4K_4M,
+	TLB_DATA_1G,
+	TLB_DATA_1G_2M_4M,
+
+	TLB_DATA0_4K,
+	TLB_DATA0_4M,
+	TLB_DATA0_2M_4M,
+
+	STLB_4K,
+	STLB_4K_2M,
+} __packed;
+#ifndef __CHECKER__
+static_assert(sizeof(enum _tlb_table_type) == 1);
+#endif
+
+/*
+ * Combined parsing table for leaf 0x2 cache and TLB descriptors.
+ */
+
+struct leaf_0x2_table {
+	union {
+		enum _cache_table_type	c_type;
+		enum _tlb_table_type	t_type;
+	};
+	union {
+		short			c_size;
+		short			entries;
+	};
+};
+
+extern const struct leaf_0x2_table cpuid_0x2_table[256];
+
+/*
+ * All of leaf 0x2's one-byte TLB descriptors implies the same number of entries
+ * for their respective TLB types.  TLB descriptor 0x63 is an exception: it
+ * implies 4 dTLB entries for 1GB pages and 32 dTLB entries for 2MB or 4MB pages.
+ *
+ * Encode that descriptor's dTLB entry count for 2MB/4MB pages here, as the entry
+ * count for dTLB 1GB pages is already encoded at the cpuid_0x2_table[]'s mapping.
+ */
+#define TLB_0x63_2M_4M_ENTRIES		32
+
+#endif /* _ASM_X86_CPUID_TYPES_H */
diff --git a/arch/x86/include/asm/cpuidle_haltpoll.h b/arch/x86/include/asm/cpuidle_haltpoll.h
new file mode 100644
index 000000000000..c8b39c6716ff
--- /dev/null
+++ b/arch/x86/include/asm/cpuidle_haltpoll.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARCH_HALTPOLL_H
+#define _ARCH_HALTPOLL_H
+
+void arch_haltpoll_enable(unsigned int cpu);
+void arch_haltpoll_disable(unsigned int cpu);
+
+#endif
diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
index 6722ffcef2e6..9df9e9cde670 100644
--- a/arch/x86/include/asm/cpumask.h
+++ b/arch/x86/include/asm/cpumask.h
@@ -1,15 +1,40 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_X86_CPUMASK_H
 #define _ASM_X86_CPUMASK_H
-#ifndef __ASSEMBLY__
-#include <linux/cpumask.h>
+#ifndef __ASSEMBLER__
 
-extern cpumask_var_t cpu_callin_mask;
-extern cpumask_var_t cpu_callout_mask;
-extern cpumask_var_t cpu_initialized_mask;
-extern cpumask_var_t cpu_sibling_setup_mask;
+#include <linux/compiler.h>
+#include <linux/cpumask.h>
 
 extern void setup_cpu_local_masks(void);
 
-#endif /* __ASSEMBLY__ */
+/*
+ * NMI and MCE exceptions need cpu_is_offline() _really_ early,
+ * provide an arch_ special for them to avoid instrumentation.
+ */
+#if NR_CPUS > 1
+static __always_inline bool arch_cpu_online(int cpu)
+{
+	return arch_test_bit(cpu, cpumask_bits(cpu_online_mask));
+}
+
+static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp)
+{
+	arch_clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
+}
+#else
+static __always_inline bool arch_cpu_online(int cpu)
+{
+	return cpu == 0;
+}
+
+static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp)
+{
+	return;
+}
+#endif
+
+#define arch_cpu_is_offline(cpu)	unlikely(!arch_cpu_online(cpu))
+
+#endif /* __ASSEMBLER__ */
 #endif /* _ASM_X86_CPUMASK_H */
diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h
index 0acf5ee45a21..8b6bd63530dc 100644
--- a/arch/x86/include/asm/crash.h
+++ b/arch/x86/include/asm/crash.h
@@ -2,8 +2,9 @@
 #ifndef _ASM_X86_CRASH_H
 #define _ASM_X86_CRASH_H
 
+struct kimage;
+
 int crash_load_segments(struct kimage *image);
-int crash_copy_backup_region(struct kimage *image);
 int crash_setup_memmap_entries(struct kimage *image,
 		struct boot_params *params);
 void crash_smp_send_stop(void);
diff --git a/arch/x86/include/asm/crash_reserve.h b/arch/x86/include/asm/crash_reserve.h
new file mode 100644
index 000000000000..7835b2cdff04
--- /dev/null
+++ b/arch/x86/include/asm/crash_reserve.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _X86_CRASH_RESERVE_H
+#define _X86_CRASH_RESERVE_H
+
+/* 16M alignment for crash kernel regions */
+#define CRASH_ALIGN             SZ_16M
+
+/*
+ * Keep the crash kernel below this limit.
+ *
+ * Earlier 32-bits kernels would limit the kernel to the low 512 MB range
+ * due to mapping restrictions.
+ *
+ * 64-bit kdump kernels need to be restricted to be under 64 TB, which is
+ * the upper limit of system RAM in 4-level paging mode. Since the kdump
+ * jump could be from 5-level paging to 4-level paging, the jump will fail if
+ * the kernel is put above 64 TB, and during the 1st kernel bootup there's
+ * no good way to detect the paging mode of the target kernel which will be
+ * loaded for dumping.
+ */
+extern unsigned long swiotlb_size_or_default(void);
+
+#ifdef CONFIG_X86_32
+# define CRASH_ADDR_LOW_MAX     SZ_512M
+# define CRASH_ADDR_HIGH_MAX    SZ_512M
+#else
+# define CRASH_ADDR_LOW_MAX     SZ_4G
+# define CRASH_ADDR_HIGH_MAX    SZ_64T
+#endif
+
+# define DEFAULT_CRASH_KERNEL_LOW_SIZE crash_low_size_default()
+
+static inline unsigned long crash_low_size_default(void)
+{
+#ifdef CONFIG_X86_64
+	return max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20);
+#else
+	return 0;
+#endif
+}
+
+#define HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
+
+#endif /* _X86_CRASH_RESERVE_H */
diff --git a/arch/x86/include/asm/crypto/aes.h b/arch/x86/include/asm/crypto/aes.h
deleted file mode 100644
index c508521dd190..000000000000
--- a/arch/x86/include/asm/crypto/aes.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASM_X86_AES_H
-#define ASM_X86_AES_H
-
-#include <linux/crypto.h>
-#include <crypto/aes.h>
-
-void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst,
-			    const u8 *src);
-void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst,
-			    const u8 *src);
-#endif
diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h
deleted file mode 100644
index a5d86fc0593f..000000000000
--- a/arch/x86/include/asm/crypto/camellia.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASM_X86_CAMELLIA_H
-#define ASM_X86_CAMELLIA_H
-
-#include <crypto/b128ops.h>
-#include <linux/crypto.h>
-#include <linux/kernel.h>
-
-#define CAMELLIA_MIN_KEY_SIZE	16
-#define CAMELLIA_MAX_KEY_SIZE	32
-#define CAMELLIA_BLOCK_SIZE	16
-#define CAMELLIA_TABLE_BYTE_LEN	272
-#define CAMELLIA_PARALLEL_BLOCKS 2
-
-struct crypto_skcipher;
-
-struct camellia_ctx {
-	u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
-	u32 key_length;
-};
-
-struct camellia_xts_ctx {
-	struct camellia_ctx tweak_ctx;
-	struct camellia_ctx crypt_ctx;
-};
-
-extern int __camellia_setkey(struct camellia_ctx *cctx,
-			     const unsigned char *key,
-			     unsigned int key_len, u32 *flags);
-
-extern int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
-			       unsigned int keylen);
-
-/* regular block cipher functions */
-asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
-				   const u8 *src, bool xor);
-asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
-				 const u8 *src);
-
-/* 2-way parallel cipher functions */
-asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
-					const u8 *src, bool xor);
-asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
-				      const u8 *src);
-
-/* 16-way parallel cipher functions (avx/aes-ni) */
-asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
-				       const u8 *src);
-asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
-				       const u8 *src);
-
-asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
-				       const u8 *src);
-asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
-				   const u8 *src, le128 *iv);
-
-asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst,
-				       const u8 *src, le128 *iv);
-asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst,
-				       const u8 *src, le128 *iv);
-
-static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
-				    const u8 *src)
-{
-	__camellia_enc_blk(ctx, dst, src, false);
-}
-
-static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst,
-					const u8 *src)
-{
-	__camellia_enc_blk(ctx, dst, src, true);
-}
-
-static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
-					 const u8 *src)
-{
-	__camellia_enc_blk_2way(ctx, dst, src, false);
-}
-
-static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst,
-					     const u8 *src)
-{
-	__camellia_enc_blk_2way(ctx, dst, src, true);
-}
-
-/* glue helpers */
-extern void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src);
-extern void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
-			       le128 *iv);
-extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
-				    le128 *iv);
-
-extern void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
-extern void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
-
-#endif /* ASM_X86_CAMELLIA_H */
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
deleted file mode 100644
index d1818634ae7e..000000000000
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Shared glue code for 128bit block ciphers
- */
-
-#ifndef _CRYPTO_GLUE_HELPER_H
-#define _CRYPTO_GLUE_HELPER_H
-
-#include <crypto/internal/skcipher.h>
-#include <linux/kernel.h>
-#include <asm/fpu/api.h>
-#include <crypto/b128ops.h>
-
-typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
-typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src);
-typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src,
-				       le128 *iv);
-typedef void (*common_glue_xts_func_t)(void *ctx, u128 *dst, const u128 *src,
-				       le128 *iv);
-
-#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn))
-#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn))
-#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn))
-#define GLUE_XTS_FUNC_CAST(fn) ((common_glue_xts_func_t)(fn))
-
-struct common_glue_func_entry {
-	unsigned int num_blocks; /* number of blocks that @fn will process */
-	union {
-		common_glue_func_t ecb;
-		common_glue_cbc_func_t cbc;
-		common_glue_ctr_func_t ctr;
-		common_glue_xts_func_t xts;
-	} fn_u;
-};
-
-struct common_glue_ctx {
-	unsigned int num_funcs;
-	int fpu_blocks_limit; /* -1 means fpu not needed at all */
-
-	/*
-	 * First funcs entry must have largest num_blocks and last funcs entry
-	 * must have num_blocks == 1!
-	 */
-	struct common_glue_func_entry funcs[];
-};
-
-static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
-				  struct skcipher_walk *walk,
-				  bool fpu_enabled, unsigned int nbytes)
-{
-	if (likely(fpu_blocks_limit < 0))
-		return false;
-
-	if (fpu_enabled)
-		return true;
-
-	/*
-	 * Vector-registers are only used when chunk to be processed is large
-	 * enough, so do not enable FPU until it is necessary.
-	 */
-	if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
-		return false;
-
-	/* prevent sleeping if FPU is in use */
-	skcipher_walk_atomise(walk);
-
-	kernel_fpu_begin();
-	return true;
-}
-
-static inline void glue_fpu_end(bool fpu_enabled)
-{
-	if (fpu_enabled)
-		kernel_fpu_end();
-}
-
-static inline void le128_to_be128(be128 *dst, const le128 *src)
-{
-	dst->a = cpu_to_be64(le64_to_cpu(src->a));
-	dst->b = cpu_to_be64(le64_to_cpu(src->b));
-}
-
-static inline void be128_to_le128(le128 *dst, const be128 *src)
-{
-	dst->a = cpu_to_le64(be64_to_cpu(src->a));
-	dst->b = cpu_to_le64(be64_to_cpu(src->b));
-}
-
-static inline void le128_inc(le128 *i)
-{
-	u64 a = le64_to_cpu(i->a);
-	u64 b = le64_to_cpu(i->b);
-
-	b++;
-	if (!b)
-		a++;
-
-	i->a = cpu_to_le64(a);
-	i->b = cpu_to_le64(b);
-}
-
-extern int glue_ecb_req_128bit(const struct common_glue_ctx *gctx,
-			       struct skcipher_request *req);
-
-extern int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn,
-				       struct skcipher_request *req);
-
-extern int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx,
-				       struct skcipher_request *req);
-
-extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
-			       struct skcipher_request *req);
-
-extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
-			       struct skcipher_request *req,
-			       common_glue_func_t tweak_fn, void *tweak_ctx,
-			       void *crypt_ctx);
-
-extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src,
-				      le128 *iv, common_glue_func_t fn);
-
-#endif /* _CRYPTO_GLUE_HELPER_H */
diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h
deleted file mode 100644
index db7c9cc32234..000000000000
--- a/arch/x86/include/asm/crypto/serpent-avx.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASM_X86_SERPENT_AVX_H
-#define ASM_X86_SERPENT_AVX_H
-
-#include <crypto/b128ops.h>
-#include <crypto/serpent.h>
-#include <linux/types.h>
-
-struct crypto_skcipher;
-
-#define SERPENT_PARALLEL_BLOCKS 8
-
-struct serpent_xts_ctx {
-	struct serpent_ctx tweak_ctx;
-	struct serpent_ctx crypt_ctx;
-};
-
-asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
-					 const u8 *src);
-asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
-					 const u8 *src);
-
-asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
-					 const u8 *src);
-asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst,
-				     const u8 *src, le128 *iv);
-
-asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
-					 const u8 *src, le128 *iv);
-asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
-					 const u8 *src, le128 *iv);
-
-extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
-				le128 *iv);
-
-extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
-extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
-
-extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key,
-			      unsigned int keylen);
-
-#endif
diff --git a/arch/x86/include/asm/crypto/serpent-sse2.h b/arch/x86/include/asm/crypto/serpent-sse2.h
deleted file mode 100644
index 1a345e8a7496..000000000000
--- a/arch/x86/include/asm/crypto/serpent-sse2.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASM_X86_SERPENT_SSE2_H
-#define ASM_X86_SERPENT_SSE2_H
-
-#include <linux/crypto.h>
-#include <crypto/serpent.h>
-
-#ifdef CONFIG_X86_32
-
-#define SERPENT_PARALLEL_BLOCKS 4
-
-asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst,
-				       const u8 *src, bool xor);
-asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst,
-				     const u8 *src);
-
-static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
-					const u8 *src)
-{
-	__serpent_enc_blk_4way(ctx, dst, src, false);
-}
-
-static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
-					    const u8 *src)
-{
-	__serpent_enc_blk_4way(ctx, dst, src, true);
-}
-
-static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
-					const u8 *src)
-{
-	serpent_dec_blk_4way(ctx, dst, src);
-}
-
-#else
-
-#define SERPENT_PARALLEL_BLOCKS 8
-
-asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst,
-				       const u8 *src, bool xor);
-asmlinkage void serpent_dec_blk_8way(struct serpent_ctx *ctx, u8 *dst,
-				     const u8 *src);
-
-static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
-				   const u8 *src)
-{
-	__serpent_enc_blk_8way(ctx, dst, src, false);
-}
-
-static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
-				       const u8 *src)
-{
-	__serpent_enc_blk_8way(ctx, dst, src, true);
-}
-
-static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
-				   const u8 *src)
-{
-	serpent_dec_blk_8way(ctx, dst, src);
-}
-
-#endif
-
-#endif
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h
deleted file mode 100644
index f618bf272b90..000000000000
--- a/arch/x86/include/asm/crypto/twofish.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASM_X86_TWOFISH_H
-#define ASM_X86_TWOFISH_H
-
-#include <linux/crypto.h>
-#include <crypto/twofish.h>
-#include <crypto/b128ops.h>
-
-/* regular block cipher functions from twofish_x86_64 module */
-asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
-				const u8 *src);
-asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
-				const u8 *src);
-
-/* 3-way parallel cipher functions */
-asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
-				       const u8 *src, bool xor);
-asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
-				     const u8 *src);
-
-/* helpers from twofish_x86_64-3way module */
-extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
-extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
-				le128 *iv);
-extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
-				     le128 *iv);
-
-#endif /* ASM_X86_TWOFISH_H */
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 3e204e6140b5..cc4a3f725b37 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -2,21 +2,31 @@
 #ifndef _ASM_X86_CURRENT_H
 #define _ASM_X86_CURRENT_H
 
+#include <linux/build_bug.h>
 #include <linux/compiler.h>
+
+#ifndef __ASSEMBLER__
+
+#include <linux/cache.h>
 #include <asm/percpu.h>
 
-#ifndef __ASSEMBLY__
 struct task_struct;
 
-DECLARE_PER_CPU(struct task_struct *, current_task);
+DECLARE_PER_CPU_CACHE_HOT(struct task_struct *, current_task);
+/* const-qualified alias provided by the linker. */
+DECLARE_PER_CPU_CACHE_HOT(struct task_struct * const __percpu_seg_override,
+			  const_current_task);
 
 static __always_inline struct task_struct *get_current(void)
 {
+	if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT))
+		return this_cpu_read_const(const_current_task);
+
 	return this_cpu_read_stable(current_task);
 }
 
 #define current get_current()
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* _ASM_X86_CURRENT_H */
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 9e5ca30738e5..a2c1f2d24b64 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -2,10 +2,21 @@
 #ifndef _ASM_X86_DEBUGREG_H
 #define _ASM_X86_DEBUGREG_H
 
-
 #include <linux/bug.h>
+#include <linux/percpu.h>
 #include <uapi/asm/debugreg.h>
 
+#include <asm/cpufeature.h>
+#include <asm/msr.h>
+
+/*
+ * Define bits that are always set to 1 in DR7, only bit 10 is
+ * architecturally reserved to '1'.
+ *
+ * This is also the init/reset value for DR7.
+ */
+#define DR7_FIXED_1	0x00000400
+
 DECLARE_PER_CPU(unsigned long, cpu_dr7);
 
 #ifndef CONFIG_PARAVIRT_XXL
@@ -18,9 +29,9 @@ DECLARE_PER_CPU(unsigned long, cpu_dr7);
 	native_set_debugreg(register, value)
 #endif
 
-static inline unsigned long native_get_debugreg(int regno)
+static __always_inline unsigned long native_get_debugreg(int regno)
 {
-	unsigned long val = 0;	/* Damn you, gcc! */
+	unsigned long val;
 
 	switch (regno) {
 	case 0:
@@ -39,7 +50,20 @@ static inline unsigned long native_get_debugreg(int regno)
 		asm("mov %%db6, %0" :"=r" (val));
 		break;
 	case 7:
-		asm("mov %%db7, %0" :"=r" (val));
+		/*
+		 * Use "asm volatile" for DR7 reads to forbid re-ordering them
+		 * with other code.
+		 *
+		 * This is needed because a DR7 access can cause a #VC exception
+		 * when running under SEV-ES. Taking a #VC exception is not a
+		 * safe thing to do just anywhere in the entry code and
+		 * re-ordering might place the access into an unsafe location.
+		 *
+		 * This happened in the NMI handler, where the DR7 read was
+		 * re-ordered to happen before the call to sev_es_ist_enter(),
+		 * causing stack recursion.
+		 */
+		asm volatile("mov %%db7, %0" : "=r" (val));
 		break;
 	default:
 		BUG();
@@ -47,7 +71,7 @@ static inline unsigned long native_get_debugreg(int regno)
 	return val;
 }
 
-static inline void native_set_debugreg(int regno, unsigned long value)
+static __always_inline void native_set_debugreg(int regno, unsigned long value)
 {
 	switch (regno) {
 	case 0:
@@ -66,7 +90,16 @@ static inline void native_set_debugreg(int regno, unsigned long value)
 		asm("mov %0, %%db6"	::"r" (value));
 		break;
 	case 7:
-		asm("mov %0, %%db7"	::"r" (value));
+		/*
+		 * Use "asm volatile" for DR7 writes to forbid re-ordering them
+		 * with other code.
+		 *
+		 * While is didn't happen with a DR7 write (see the DR7 read
+		 * comment above which explains where it happened), add the
+		 * "asm volatile" here too to avoid similar problems in the
+		 * future.
+		 */
+		asm volatile("mov %0, %%db7"	::"r" (value));
 		break;
 	default:
 		BUG();
@@ -75,8 +108,8 @@ static inline void native_set_debugreg(int regno, unsigned long value)
 
 static inline void hw_breakpoint_disable(void)
 {
-	/* Zero the control register for HW Breakpoint */
-	set_debugreg(0UL, 7);
+	/* Reset the control register for HW Breakpoint */
+	set_debugreg(DR7_FIXED_1, 7);
 
 	/* Zero-out the individual HW breakpoint address registers */
 	set_debugreg(0UL, 0);
@@ -85,40 +118,80 @@ static inline void hw_breakpoint_disable(void)
 	set_debugreg(0UL, 3);
 }
 
-static inline int hw_breakpoint_active(void)
+static __always_inline bool hw_breakpoint_active(void)
 {
 	return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK;
 }
 
-extern void aout_dump_debugregs(struct user *dump);
-
 extern void hw_breakpoint_restore(void);
 
-#ifdef CONFIG_X86_64
-DECLARE_PER_CPU(int, debug_stack_usage);
-static inline void debug_stack_usage_inc(void)
+static __always_inline unsigned long local_db_save(void)
 {
-	__this_cpu_inc(debug_stack_usage);
+	unsigned long dr7;
+
+	if (static_cpu_has(X86_FEATURE_HYPERVISOR) && !hw_breakpoint_active())
+		return 0;
+
+	get_debugreg(dr7, 7);
+
+	/* Architecturally set bit */
+	dr7 &= ~DR7_FIXED_1;
+	if (dr7)
+		set_debugreg(DR7_FIXED_1, 7);
+
+	/*
+	 * Ensure the compiler doesn't lower the above statements into
+	 * the critical section; disabling breakpoints late would not
+	 * be good.
+	 */
+	barrier();
+
+	return dr7;
 }
-static inline void debug_stack_usage_dec(void)
+
+static __always_inline void local_db_restore(unsigned long dr7)
 {
-	__this_cpu_dec(debug_stack_usage);
+	/*
+	 * Ensure the compiler doesn't raise this statement into
+	 * the critical section; enabling breakpoints early would
+	 * not be good.
+	 */
+	barrier();
+	if (dr7)
+		set_debugreg(dr7, 7);
 }
-int is_debug_stack(unsigned long addr);
-void debug_stack_set_zero(void);
-void debug_stack_reset(void);
-#else /* !X86_64 */
-static inline int is_debug_stack(unsigned long addr) { return 0; }
-static inline void debug_stack_set_zero(void) { }
-static inline void debug_stack_reset(void) { }
-static inline void debug_stack_usage_inc(void) { }
-static inline void debug_stack_usage_dec(void) { }
-#endif /* X86_64 */
 
 #ifdef CONFIG_CPU_SUP_AMD
-extern void set_dr_addr_mask(unsigned long mask, int dr);
+extern void amd_set_dr_addr_mask(unsigned long mask, unsigned int dr);
+extern unsigned long amd_get_dr_addr_mask(unsigned int dr);
 #else
-static inline void set_dr_addr_mask(unsigned long mask, int dr) { }
+static inline void amd_set_dr_addr_mask(unsigned long mask, unsigned int dr) { }
+static inline unsigned long amd_get_dr_addr_mask(unsigned int dr)
+{
+	return 0;
+}
+#endif
+
+static inline unsigned long get_debugctlmsr(void)
+{
+	unsigned long debugctlmsr = 0;
+
+#ifndef CONFIG_X86_DEBUGCTLMSR
+	if (boot_cpu_data.x86 < 6)
+		return 0;
 #endif
+	rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
+
+	return debugctlmsr;
+}
+
+static inline void update_debugctlmsr(unsigned long debugctlmsr)
+{
+#ifndef CONFIG_X86_DEBUGCTLMSR
+	if (boot_cpu_data.x86 < 6)
+		return;
+#endif
+	wrmsrq(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
+}
 
 #endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h
index de9e7841f953..630891d25819 100644
--- a/arch/x86/include/asm/delay.h
+++ b/arch/x86/include/asm/delay.h
@@ -3,8 +3,10 @@
 #define _ASM_X86_DELAY_H
 
 #include <asm-generic/delay.h>
+#include <linux/init.h>
 
-void use_tsc_delay(void);
+void __init use_tsc_delay(void);
+void __init use_tpause_delay(void);
 void use_mwaitx_delay(void);
 
 #endif /* _ASM_X86_DELAY_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 68a99d2a5f33..ec95fe44fa3a 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -9,6 +9,7 @@
 #include <asm/irq_vectors.h>
 #include <asm/cpu_entry_area.h>
 
+#include <linux/debug_locks.h>
 #include <linux/smp.h>
 #include <linux/percpu.h>
 
@@ -40,11 +41,6 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
 	desc->l			= 0;
 }
 
-extern struct desc_ptr idt_descr;
-extern gate_desc idt_table[];
-extern const struct desc_ptr debug_idt_descr;
-extern gate_desc debug_idt_table[];
-
 struct gdt_page {
 	struct desc_struct gdt[GDT_ENTRIES];
 } __attribute__((aligned(PAGE_SIZE)));
@@ -214,7 +210,7 @@ static inline void native_load_gdt(const struct desc_ptr *dtr)
 	asm volatile("lgdt %0"::"m" (*dtr));
 }
 
-static inline void native_load_idt(const struct desc_ptr *dtr)
+static __always_inline void native_load_idt(const struct desc_ptr *dtr)
 {
 	asm volatile("lidt %0"::"m" (*dtr));
 }
@@ -229,6 +225,26 @@ static inline void store_idt(struct desc_ptr *dtr)
 	asm volatile("sidt %0":"=m" (*dtr));
 }
 
+static inline void native_gdt_invalidate(void)
+{
+	const struct desc_ptr invalid_gdt = {
+		.address = 0,
+		.size = 0
+	};
+
+	native_load_gdt(&invalid_gdt);
+}
+
+static inline void native_idt_invalidate(void)
+{
+	const struct desc_ptr invalid_idt = {
+		.address = 0,
+		.size = 0
+	};
+
+	native_load_idt(&invalid_idt);
+}
+
 /*
  * The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is
  * a read-only remapping. To prevent a page fault, the GDT is switched to the
@@ -386,66 +402,48 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
 	desc->limit1 = (limit >> 16) & 0xf;
 }
 
-void update_intr_gate(unsigned int n, const void *addr);
-void alloc_intr_gate(unsigned int n, const void *addr);
-
-extern unsigned long system_vectors[];
-
-#ifdef CONFIG_X86_64
-DECLARE_PER_CPU(u32, debug_idt_ctr);
-static inline bool is_debug_idt_enabled(void)
+static inline void init_idt_data(struct idt_data *data, unsigned int n,
+				 const void *addr)
 {
-	if (this_cpu_read(debug_idt_ctr))
-		return true;
+	BUG_ON(n > 0xFF);
 
-	return false;
+	memset(data, 0, sizeof(*data));
+	data->vector	= n;
+	data->addr	= addr;
+	data->segment	= __KERNEL_CS;
+	data->bits.type	= GATE_INTERRUPT;
+	data->bits.p	= 1;
 }
 
-static inline void load_debug_idt(void)
-{
-	load_idt((const struct desc_ptr *)&debug_idt_descr);
-}
-#else
-static inline bool is_debug_idt_enabled(void)
+static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d)
 {
-	return false;
-}
+	unsigned long addr = (unsigned long) d->addr;
 
-static inline void load_debug_idt(void)
-{
-}
+	gate->offset_low	= (u16) addr;
+	gate->segment		= (u16) d->segment;
+	gate->bits		= d->bits;
+	gate->offset_middle	= (u16) (addr >> 16);
+#ifdef CONFIG_X86_64
+	gate->offset_high	= (u32) (addr >> 32);
+	gate->reserved		= 0;
 #endif
-
-/*
- * The load_current_idt() must be called with interrupts disabled
- * to avoid races. That way the IDT will always be set back to the expected
- * descriptor. It's also called when a CPU is being initialized, and
- * that doesn't need to disable interrupts, as nothing should be
- * bothering the CPU then.
- */
-static inline void load_current_idt(void)
-{
-	if (is_debug_idt_enabled())
-		load_debug_idt();
-	else
-		load_idt((const struct desc_ptr *)&idt_descr);
 }
 
+extern unsigned long system_vectors[];
+
+extern void load_current_idt(void);
 extern void idt_setup_early_handler(void);
 extern void idt_setup_early_traps(void);
 extern void idt_setup_traps(void);
 extern void idt_setup_apic_and_irq_gates(void);
+extern bool idt_is_f00f_address(unsigned long address);
 
 #ifdef CONFIG_X86_64
 extern void idt_setup_early_pf(void);
-extern void idt_setup_ist_traps(void);
-extern void idt_setup_debugidt_traps(void);
 #else
 static inline void idt_setup_early_pf(void) { }
-static inline void idt_setup_ist_traps(void) { }
-static inline void idt_setup_debugidt_traps(void) { }
 #endif
 
-extern void idt_invalidate(void *addr);
+extern void idt_invalidate(void);
 
 #endif /* _ASM_X86_DESC_H */
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index a91f3b6e4f2a..7e6b9314758a 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -8,7 +8,57 @@
  * archs.
  */
 
-#ifndef __ASSEMBLY__
+/*
+ * Low-level interface mapping flags/field names to bits
+ */
+
+/* Flags for _DESC_S (non-system) descriptors */
+#define _DESC_ACCESSED		0x0001
+#define _DESC_DATA_WRITABLE	0x0002
+#define _DESC_CODE_READABLE	0x0002
+#define _DESC_DATA_EXPAND_DOWN	0x0004
+#define _DESC_CODE_CONFORMING	0x0004
+#define _DESC_CODE_EXECUTABLE	0x0008
+
+/* Common flags */
+#define _DESC_S			0x0010
+#define _DESC_DPL(dpl)		((dpl) << 5)
+#define _DESC_PRESENT		0x0080
+
+#define _DESC_LONG_CODE		0x2000
+#define _DESC_DB		0x4000
+#define _DESC_GRANULARITY_4K	0x8000
+
+/* System descriptors have a numeric "type" field instead of flags */
+#define _DESC_SYSTEM(code)	(code)
+
+/*
+ * High-level interface mapping intended usage to low-level combinations
+ * of flags
+ */
+
+#define _DESC_DATA		(_DESC_S | _DESC_PRESENT | _DESC_ACCESSED | \
+				 _DESC_DATA_WRITABLE)
+#define _DESC_CODE		(_DESC_S | _DESC_PRESENT | _DESC_ACCESSED | \
+				 _DESC_CODE_READABLE | _DESC_CODE_EXECUTABLE)
+
+#define DESC_DATA16		(_DESC_DATA)
+#define DESC_CODE16		(_DESC_CODE)
+
+#define DESC_DATA32		(_DESC_DATA | _DESC_GRANULARITY_4K | _DESC_DB)
+#define DESC_DATA32_BIOS	(_DESC_DATA | _DESC_DB)
+
+#define DESC_CODE32		(_DESC_CODE | _DESC_GRANULARITY_4K | _DESC_DB)
+#define DESC_CODE32_BIOS	(_DESC_CODE | _DESC_DB)
+
+#define DESC_TSS32		(_DESC_SYSTEM(9) | _DESC_PRESENT)
+
+#define DESC_DATA64		(_DESC_DATA | _DESC_GRANULARITY_4K | _DESC_DB)
+#define DESC_CODE64		(_DESC_CODE | _DESC_GRANULARITY_4K | _DESC_LONG_CODE)
+
+#define DESC_USER		(_DESC_DPL(3))
+
+#ifndef __ASSEMBLER__
 
 #include <linux/types.h>
 
@@ -22,19 +72,19 @@ struct desc_struct {
 
 #define GDT_ENTRY_INIT(flags, base, limit)			\
 	{							\
-		.limit0		= (u16) (limit),		\
-		.limit1		= ((limit) >> 16) & 0x0F,	\
-		.base0		= (u16) (base),			\
-		.base1		= ((base) >> 16) & 0xFF,	\
-		.base2		= ((base) >> 24) & 0xFF,	\
-		.type		= (flags & 0x0f),		\
-		.s		= (flags >> 4) & 0x01,		\
-		.dpl		= (flags >> 5) & 0x03,		\
-		.p		= (flags >> 7) & 0x01,		\
-		.avl		= (flags >> 12) & 0x01,		\
-		.l		= (flags >> 13) & 0x01,		\
-		.d		= (flags >> 14) & 0x01,		\
-		.g		= (flags >> 15) & 0x01,		\
+		.limit0		= ((limit) >>  0) & 0xFFFF,	\
+		.limit1		= ((limit) >> 16) & 0x000F,	\
+		.base0		= ((base)  >>  0) & 0xFFFF,	\
+		.base1		= ((base)  >> 16) & 0x00FF,	\
+		.base2		= ((base)  >> 24) & 0x00FF,	\
+		.type		= ((flags) >>  0) & 0x000F,	\
+		.s		= ((flags) >>  4) & 0x0001,	\
+		.dpl		= ((flags) >>  5) & 0x0003,	\
+		.p		= ((flags) >>  7) & 0x0001,	\
+		.avl		= ((flags) >> 12) & 0x0001,	\
+		.l		= ((flags) >> 13) & 0x0001,	\
+		.d		= ((flags) >> 14) & 0x0001,	\
+		.g		= ((flags) >> 15) & 0x0001,	\
 	}
 
 enum {
@@ -74,6 +124,13 @@ struct idt_bits {
 			p	: 1;
 } __attribute__((packed));
 
+struct idt_data {
+	unsigned int	vector;
+	unsigned int	segment;
+	struct idt_bits	bits;
+	const void	*addr;
+};
+
 struct gate_struct {
 	u16		offset_low;
 	u16		segment;
@@ -87,6 +144,7 @@ struct gate_struct {
 
 typedef struct gate_struct gate_desc;
 
+#ifndef _SETUP
 static inline unsigned long gate_offset(const gate_desc *g)
 {
 #ifdef CONFIG_X86_64
@@ -101,13 +159,17 @@ static inline unsigned long gate_segment(const gate_desc *g)
 {
 	return g->segment;
 }
+#endif
 
 struct desc_ptr {
 	unsigned short size;
 	unsigned long address;
 } __attribute__((packed)) ;
 
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
+
+/* Boot IDT definitions */
+#define	BOOT_IDT_ENTRIES	32
 
 /* Access rights as returned by LAR */
 #define AR_TYPE_RODATA		(0 * (1 << 9))
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index a8f6c809d9b1..7c0a52ca2f4d 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -3,24 +3,8 @@
 #define _ASM_X86_DEVICE_H
 
 struct dev_archdata {
-#if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
-	void *iommu; /* hook for IOMMU specific extension */
-#endif
-#ifdef CONFIG_STA2X11
-	bool is_sta2x11;
-#endif
 };
 
-#if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS)
-struct dma_domain {
-	struct list_head node;
-	const struct dma_map_ops *dma_ops;
-	int domain_nr;
-};
-void add_dma_domain(struct dma_domain *domain);
-void del_dma_domain(struct dma_domain *domain);
-#endif
-
 struct pdev_archdata {
 };
 
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
deleted file mode 100644
index a5ea841cc6d2..000000000000
--- a/arch/x86/include/asm/disabled-features.h
+++ /dev/null
@@ -1,89 +0,0 @@
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#define _ASM_X86_DISABLED_FEATURES_H
-
-/* These features, although they might be available in a CPU
- * will not be used because the compile options to support
- * them are not present.
- *
- * This code allows them to be checked and disabled at
- * compile time without an explicit #ifdef.  Use
- * cpu_feature_enabled().
- */
-
-#ifdef CONFIG_X86_INTEL_MPX
-# define DISABLE_MPX	0
-#else
-# define DISABLE_MPX	(1<<(X86_FEATURE_MPX & 31))
-#endif
-
-#ifdef CONFIG_X86_SMAP
-# define DISABLE_SMAP	0
-#else
-# define DISABLE_SMAP	(1<<(X86_FEATURE_SMAP & 31))
-#endif
-
-#ifdef CONFIG_X86_INTEL_UMIP
-# define DISABLE_UMIP	0
-#else
-# define DISABLE_UMIP	(1<<(X86_FEATURE_UMIP & 31))
-#endif
-
-#ifdef CONFIG_X86_64
-# define DISABLE_VME		(1<<(X86_FEATURE_VME & 31))
-# define DISABLE_K6_MTRR	(1<<(X86_FEATURE_K6_MTRR & 31))
-# define DISABLE_CYRIX_ARR	(1<<(X86_FEATURE_CYRIX_ARR & 31))
-# define DISABLE_CENTAUR_MCR	(1<<(X86_FEATURE_CENTAUR_MCR & 31))
-# define DISABLE_PCID		0
-#else
-# define DISABLE_VME		0
-# define DISABLE_K6_MTRR	0
-# define DISABLE_CYRIX_ARR	0
-# define DISABLE_CENTAUR_MCR	0
-# define DISABLE_PCID		(1<<(X86_FEATURE_PCID & 31))
-#endif /* CONFIG_X86_64 */
-
-#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-# define DISABLE_PKU		0
-# define DISABLE_OSPKE		0
-#else
-# define DISABLE_PKU		(1<<(X86_FEATURE_PKU & 31))
-# define DISABLE_OSPKE		(1<<(X86_FEATURE_OSPKE & 31))
-#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
-
-#ifdef CONFIG_X86_5LEVEL
-# define DISABLE_LA57	0
-#else
-# define DISABLE_LA57	(1<<(X86_FEATURE_LA57 & 31))
-#endif
-
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
-# define DISABLE_PTI		0
-#else
-# define DISABLE_PTI		(1 << (X86_FEATURE_PTI & 31))
-#endif
-
-/*
- * Make sure to add features to the correct mask
- */
-#define DISABLED_MASK0	(DISABLE_VME)
-#define DISABLED_MASK1	0
-#define DISABLED_MASK2	0
-#define DISABLED_MASK3	(DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
-#define DISABLED_MASK4	(DISABLE_PCID)
-#define DISABLED_MASK5	0
-#define DISABLED_MASK6	0
-#define DISABLED_MASK7	(DISABLE_PTI)
-#define DISABLED_MASK8	0
-#define DISABLED_MASK9	(DISABLE_MPX|DISABLE_SMAP)
-#define DISABLED_MASK10	0
-#define DISABLED_MASK11	0
-#define DISABLED_MASK12	0
-#define DISABLED_MASK13	0
-#define DISABLED_MASK14	0
-#define DISABLED_MASK15	0
-#define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP)
-#define DISABLED_MASK17	0
-#define DISABLED_MASK18	0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
-
-#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h
index 20a46150e0a8..30fd06ede751 100644
--- a/arch/x86/include/asm/div64.h
+++ b/arch/x86/include/asm/div64.h
@@ -60,6 +60,12 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
 }
 #define div_u64_rem	div_u64_rem
 
+/*
+ * gcc tends to zero extend 32bit values and do full 64bit maths.
+ * Define asm functions that avoid this.
+ * (clang generates better code for the C versions.)
+ */
+#ifndef __clang__
 static inline u64 mul_u32_u32(u32 a, u32 b)
 {
 	u32 high, low;
@@ -71,8 +77,54 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
 }
 #define mul_u32_u32 mul_u32_u32
 
+static inline u64 add_u64_u32(u64 a, u32 b)
+{
+	u32 high = a >> 32, low = a;
+
+	asm ("addl %[b], %[low]; adcl $0, %[high]"
+		: [low] "+r" (low), [high] "+r" (high)
+		: [b] "rm" (b) );
+
+	return low | (u64)high << 32;
+}
+#define add_u64_u32 add_u64_u32
+#endif
+
+/*
+ * __div64_32() is never called on x86, so prevent the
+ * generic definition from getting built.
+ */
+#define __div64_32
+
 #else
 # include <asm-generic/div64.h>
+
+/*
+ * Will generate an #DE when the result doesn't fit u64, could fix with an
+ * __ex_table[] entry when it becomes an issue.
+ */
+static inline u64 mul_u64_add_u64_div_u64(u64 rax, u64 mul, u64 add, u64 div)
+{
+	u64 rdx;
+
+	asm ("mulq %[mul]" : "+a" (rax), "=d" (rdx) : [mul] "rm" (mul));
+
+	if (!statically_true(!add))
+		asm ("addq %[add], %[lo]; adcq $0, %[hi]" :
+			[lo] "+r" (rax), [hi] "+r" (rdx) : [add] "irm" (add));
+
+	asm ("divq %[div]" : "+a" (rax), "+d" (rdx) : [div] "rm" (div));
+
+	return rax;
+}
+#define mul_u64_add_u64_div_u64 mul_u64_add_u64_div_u64
+
+static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div)
+{
+	return mul_u64_add_u64_div_u64(a, mul, 0, div);
+}
+#define mul_u64_u32_div	mul_u64_u32_div
+
 #endif /* CONFIG_X86_32 */
 
 #endif /* _ASM_X86_DIV64_H */
diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h
deleted file mode 100644
index 1a19251eaac9..000000000000
--- a/arch/x86/include/asm/dma-direct.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASM_X86_DMA_DIRECT_H
-#define ASM_X86_DMA_DIRECT_H 1
-
-bool dma_capable(struct device *dev, dma_addr_t addr, size_t size);
-dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
-
-#endif /* ASM_X86_DMA_DIRECT_H */
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index ce4d176b3d13..d1dac96ee30b 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -2,35 +2,11 @@
 #ifndef _ASM_X86_DMA_MAPPING_H
 #define _ASM_X86_DMA_MAPPING_H
 
-/*
- * IOMMU interface. See Documentation/DMA-API-HOWTO.txt and
- * Documentation/DMA-API.txt for documentation.
- */
-
-#include <linux/scatterlist.h>
-#include <linux/dma-debug.h>
-#include <asm/io.h>
-#include <asm/swiotlb.h>
-#include <linux/dma-contiguous.h>
-
-#ifdef CONFIG_ISA
-# define ISA_DMA_BIT_MASK DMA_BIT_MASK(24)
-#else
-# define ISA_DMA_BIT_MASK DMA_BIT_MASK(32)
-#endif
-
-extern int iommu_merge;
-extern struct device x86_dma_fallback_dev;
-extern int panic_on_overflow;
-
 extern const struct dma_map_ops *dma_ops;
 
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
+static inline const struct dma_map_ops *get_arch_dma_ops(void)
 {
 	return dma_ops;
 }
 
-bool arch_dma_alloc_attrs(struct device **dev);
-#define arch_dma_alloc_attrs arch_dma_alloc_attrs
-
 #endif
diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h
index 00f7cf45e699..8ae6e0e11b8b 100644
--- a/arch/x86/include/asm/dma.h
+++ b/arch/x86/include/asm/dma.h
@@ -74,7 +74,7 @@
 #define MAX_DMA_PFN   ((16UL * 1024 * 1024) >> PAGE_SHIFT)
 
 /* 4GB broken PCI/AGP hardware bus master zone */
-#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
+#define MAX_DMA32_PFN (1UL << (32 - PAGE_SHIFT))
 
 #ifdef CONFIG_X86_32
 /* The maximum address that we can perform a DMA transfer to on this platform */
@@ -307,12 +307,4 @@ extern int request_dma(unsigned int dmanr, const char *device_id);
 extern void free_dma(unsigned int dmanr);
 #endif
 
-/* From PCI */
-
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;
-#else
-#define isa_dma_bridge_buggy	(0)
-#endif
-
 #endif /* _ASM_X86_DMA_H */
diff --git a/arch/x86/include/asm/doublefault.h b/arch/x86/include/asm/doublefault.h
new file mode 100644
index 000000000000..de0e88b32207
--- /dev/null
+++ b/arch/x86/include/asm/doublefault.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_DOUBLEFAULT_H
+#define _ASM_X86_DOUBLEFAULT_H
+
+#include <linux/linkage.h>
+
+#ifdef CONFIG_X86_32
+extern void doublefault_init_cpu_tss(void);
+#else
+static inline void doublefault_init_cpu_tss(void)
+{
+}
+#endif
+
+asmlinkage void __noreturn doublefault_shim(void);
+
+#endif /* _ASM_X86_DOUBLEFAULT_H */
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index ae391f609840..302e11b15da8 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -2,19 +2,10 @@
 #ifndef _ASM_X86_DWARF2_H
 #define _ASM_X86_DWARF2_H
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #warning "asm/dwarf2.h should be only included in pure assembly files"
 #endif
 
-/*
- * Macros for dwarf2 CFI unwind table entries.
- * See "as.info" for details on these pseudo ops. Unfortunately
- * they are only supported in very new binutils, so define them
- * away for older version.
- */
-
-#ifdef CONFIG_AS_CFI
-
 #define CFI_STARTPROC		.cfi_startproc
 #define CFI_ENDPROC		.cfi_endproc
 #define CFI_DEF_CFA		.cfi_def_cfa
@@ -30,20 +21,13 @@
 #define CFI_UNDEFINED		.cfi_undefined
 #define CFI_ESCAPE		.cfi_escape
 
-#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
-#define CFI_SIGNAL_FRAME	.cfi_signal_frame
-#else
-#define CFI_SIGNAL_FRAME
-#endif
-
-#if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__)
 #ifndef BUILD_VDSO
 	/*
 	 * Emit CFI data in .debug_frame sections, not .eh_frame sections.
 	 * The latter we currently just discard since we don't do DWARF
 	 * unwinding at runtime.  So only the offline DWARF information is
-	 * useful to anyone.  Note we should not use this directive if
-	 * vmlinux.lds.S gets changed so it doesn't discard .eh_frame.
+	 * useful to anyone.  Note we should not use this directive if we
+	 * ever decide to enable DWARF unwinding at runtime.
 	 */
 	.cfi_sections .debug_frame
 #else
@@ -53,33 +37,5 @@
 	  */
 	.cfi_sections .eh_frame, .debug_frame
 #endif
-#endif
-
-#else
-
-/*
- * Due to the structure of pre-exisiting code, don't use assembler line
- * comment character # to ignore the arguments. Instead, use a dummy macro.
- */
-.macro cfi_ignore a=0, b=0, c=0, d=0
-.endm
-
-#define CFI_STARTPROC		cfi_ignore
-#define CFI_ENDPROC		cfi_ignore
-#define CFI_DEF_CFA		cfi_ignore
-#define CFI_DEF_CFA_REGISTER	cfi_ignore
-#define CFI_DEF_CFA_OFFSET	cfi_ignore
-#define CFI_ADJUST_CFA_OFFSET	cfi_ignore
-#define CFI_OFFSET		cfi_ignore
-#define CFI_REL_OFFSET		cfi_ignore
-#define CFI_REGISTER		cfi_ignore
-#define CFI_RESTORE		cfi_ignore
-#define CFI_REMEMBER_STATE	cfi_ignore
-#define CFI_RESTORE_STATE	cfi_ignore
-#define CFI_UNDEFINED		cfi_ignore
-#define CFI_ESCAPE		cfi_ignore
-#define CFI_SIGNAL_FRAME	cfi_ignore
-
-#endif
 
 #endif /* _ASM_X86_DWARF2_H */
diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h
index 62be73b23d5c..c83645d5b2a8 100644
--- a/arch/x86/include/asm/e820/api.h
+++ b/arch/x86/include/asm/e820/api.h
@@ -10,12 +10,14 @@ extern struct e820_table *e820_table_firmware;
 
 extern unsigned long pci_mem_start;
 
+extern bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type);
 extern bool e820__mapped_any(u64 start, u64 end, enum e820_type type);
 extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type);
 
 extern void e820__range_add   (u64 start, u64 size, enum e820_type type);
 extern u64  e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
 extern u64  e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type);
+extern u64  e820__range_update_table(struct e820_table *t, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
 
 extern void e820__print_table(char *who);
 extern int  e820__update_table(struct e820_table *table);
@@ -27,7 +29,6 @@ extern unsigned long e820__end_of_low_ram_pfn(void);
 extern u64  e820__memblock_alloc_reserved(u64 size, u64 align);
 extern void e820__memblock_setup(void);
 
-extern void e820__reserve_setup_data(void);
 extern void e820__finish_early_params(void);
 extern void e820__reserve_resources(void);
 extern void e820__reserve_resources_late(void);
diff --git a/arch/x86/include/asm/e820/types.h b/arch/x86/include/asm/e820/types.h
index c3aa4b5e49e2..80c4a7266629 100644
--- a/arch/x86/include/asm/e820/types.h
+++ b/arch/x86/include/asm/e820/types.h
@@ -29,13 +29,12 @@ enum e820_type {
 	E820_TYPE_PRAM		= 12,
 
 	/*
-	 * Reserved RAM used by the kernel itself if
-	 * CONFIG_INTEL_TXT=y is enabled, memory of this type
-	 * will be included in the S3 integrity calculation
-	 * and so should not include any memory that the BIOS
-	 * might alter over the S3 transition:
+	 * Special-purpose memory is indicated to the system via the
+	 * EFI_MEMORY_SP attribute. Define an e820 translation of this
+	 * memory type for the purpose of reserving this range and
+	 * marking it with the IORES_DESC_SOFT_RESERVED designation.
 	 */
-	E820_TYPE_RESERVED_KERN	= 128,
+	E820_TYPE_SOFT_RESERVED	= 0xefffffff,
 };
 
 /*
diff --git a/arch/x86/include/asm/edac.h b/arch/x86/include/asm/edac.h
index 426fc53ff803..dfbd1ebb9f10 100644
--- a/arch/x86/include/asm/edac.h
+++ b/arch/x86/include/asm/edac.h
@@ -13,7 +13,7 @@ static inline void edac_atomic_scrub(void *va, u32 size)
 	 * are interrupt, DMA and SMP safe.
 	 */
 	for (i = 0; i < size / 4; i++, virt_addr++)
-		asm volatile("lock; addl $0, %0"::"m" (*virt_addr));
+		asm volatile("lock addl $0, %0"::"m" (*virt_addr));
 }
 
 #endif /* _ASM_X86_EDAC_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 107283b1eb1e..f227a70ac91f 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -3,11 +3,17 @@
 #define _ASM_X86_EFI_H
 
 #include <asm/fpu/api.h>
-#include <asm/pgtable.h>
 #include <asm/processor-flags.h>
 #include <asm/tlb.h>
 #include <asm/nospec-branch.h>
 #include <asm/mmu_context.h>
+#include <asm/ibt.h>
+#include <linux/build_bug.h>
+#include <linux/kernel.h>
+#include <linux/pgtable.h>
+
+extern unsigned long efi_fw_vendor, efi_config_table;
+extern unsigned long efi_mixed_mode_stack_pa;
 
 /*
  * We map the EFI regions needed for runtime services non-contiguously,
@@ -18,92 +24,93 @@
  *
  * This is the main reason why we're doing stable VA mappings for RT
  * services.
- *
- * This flag is used in conjunction with a chicken bit called
- * "efi=old_map" which can be used as a fallback to the old runtime
- * services mapping method in case there's some b0rkage with a
- * particular EFI implementation (haha, it is hard to hold up the
- * sarcasm here...).
  */
-#define EFI_OLD_MEMMAP		EFI_ARCH_1
 
 #define EFI32_LOADER_SIGNATURE	"EL32"
 #define EFI64_LOADER_SIGNATURE	"EL64"
 
-#define MAX_CMDLINE_ADDRESS	UINT_MAX
-
 #define ARCH_EFI_IRQ_FLAGS_MASK	X86_EFLAGS_IF
 
-#ifdef CONFIG_X86_32
-
-extern asmlinkage unsigned long efi_call_phys(void *, ...);
-
-#define arch_efi_call_virt_setup()					\
-({									\
-	kernel_fpu_begin();						\
-	firmware_restrict_branch_speculation_start();			\
-})
+#define EFI_UNACCEPTED_UNIT_SIZE PMD_SIZE
 
-#define arch_efi_call_virt_teardown()					\
-({									\
-	firmware_restrict_branch_speculation_end();			\
-	kernel_fpu_end();						\
-})
+/*
+ * The EFI services are called through variadic functions in many cases. These
+ * functions are implemented in assembler and support only a fixed number of
+ * arguments. The macros below allows us to check at build time that we don't
+ * try to call them with too many arguments.
+ *
+ * __efi_nargs() will return the number of arguments if it is 7 or less, and
+ * cause a BUILD_BUG otherwise. The limitations of the C preprocessor make it
+ * impossible to calculate the exact number of arguments beyond some
+ * pre-defined limit. The maximum number of arguments currently supported by
+ * any of the thunks is 7, so this is good enough for now and can be extended
+ * in the obvious way if we ever need more.
+ */
 
+#define __efi_nargs(...) __efi_nargs_(__VA_ARGS__)
+#define __efi_nargs_(...) __efi_nargs__(0, ##__VA_ARGS__,	\
+	__efi_arg_sentinel(9), __efi_arg_sentinel(8),		\
+	__efi_arg_sentinel(7), __efi_arg_sentinel(6),		\
+	__efi_arg_sentinel(5), __efi_arg_sentinel(4),		\
+	__efi_arg_sentinel(3), __efi_arg_sentinel(2),		\
+	__efi_arg_sentinel(1), __efi_arg_sentinel(0))
+#define __efi_nargs__(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, n, ...)	\
+	__take_second_arg(n,					\
+		({ BUILD_BUG_ON_MSG(1, "__efi_nargs limit exceeded"); 10; }))
+#define __efi_arg_sentinel(n) , n
 
 /*
- * Wrap all the virtual calls in a way that forces the parameters on the stack.
+ * __efi_nargs_check(f, n, ...) will cause a BUILD_BUG if the ellipsis
+ * represents more than n arguments.
  */
-#define arch_efi_call_virt(p, f, args...)				\
-({									\
-	((efi_##f##_t __attribute__((regparm(0)))*) p->f)(args);	\
+
+#define __efi_nargs_check(f, n, ...)					\
+	__efi_nargs_check_(f, __efi_nargs(__VA_ARGS__), n)
+#define __efi_nargs_check_(f, p, n) __efi_nargs_check__(f, p, n)
+#define __efi_nargs_check__(f, p, n) ({					\
+	BUILD_BUG_ON_MSG(						\
+		(p) > (n),						\
+		#f " called with too many arguments (" #p ">" #n ")");	\
 })
 
-#define efi_ioremap(addr, size, type, attr)	ioremap_cache(addr, size)
+static inline void efi_fpu_begin(void)
+{
+	/*
+	 * The UEFI calling convention (UEFI spec 2.3.2 and 2.3.4) requires
+	 * that FCW and MXCSR (64-bit) must be initialized prior to calling
+	 * UEFI code.  (Oddly the spec does not require that the FPU stack
+	 * be empty.)
+	 */
+	kernel_fpu_begin_mask(KFPU_387 | KFPU_MXCSR);
+}
 
-#else /* !CONFIG_X86_32 */
+static inline void efi_fpu_end(void)
+{
+	kernel_fpu_end();
+}
 
-#define EFI_LOADER_SIGNATURE	"EL64"
+#ifdef CONFIG_X86_32
+#define EFI_X86_KERNEL_ALLOC_LIMIT		(SZ_512M - 1)
+#else /* !CONFIG_X86_32 */
+#define EFI_X86_KERNEL_ALLOC_LIMIT		EFI_ALLOC_LIMIT
 
-extern asmlinkage u64 efi_call(void *fp, ...);
+extern asmlinkage u64 __efi_call(void *fp, ...);
 
-#define efi_call_phys(f, args...)		efi_call((f), args)
+extern bool efi_disable_ibt_for_runtime;
 
-/*
- * struct efi_scratch - Scratch space used while switching to/from efi_mm
- * @phys_stack: stack used during EFI Mixed Mode
- * @prev_mm:    store/restore stolen mm_struct while switching to/from efi_mm
- */
-struct efi_scratch {
-	u64			phys_stack;
-	struct mm_struct	*prev_mm;
-} __packed;
-
-#define arch_efi_call_virt_setup()					\
-({									\
-	efi_sync_low_kernel_mappings();					\
-	kernel_fpu_begin();						\
-	firmware_restrict_branch_speculation_start();			\
-									\
-	if (!efi_enabled(EFI_OLD_MEMMAP))				\
-		efi_switch_mm(&efi_mm);					\
+#define efi_call(...) ({						\
+	__efi_nargs_check(efi_call, 7, __VA_ARGS__);			\
+	__efi_call(__VA_ARGS__);					\
 })
 
-#define arch_efi_call_virt(p, f, args...)				\
-	efi_call((void *)p->f, args)					\
-
-#define arch_efi_call_virt_teardown()					\
-({									\
-	if (!efi_enabled(EFI_OLD_MEMMAP))				\
-		efi_switch_mm(efi_scratch.prev_mm);			\
-									\
-	firmware_restrict_branch_speculation_end();			\
-	kernel_fpu_end();						\
+#undef arch_efi_call_virt
+#define arch_efi_call_virt(p, f, args...) ({				\
+	u64 ret, ibt = ibt_save(efi_disable_ibt_for_runtime);		\
+	ret = efi_call((void *)p->f, args);				\
+	ibt_restore(ibt);						\
+	ret;								\
 })
 
-extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
-					u32 type, u64 attribute);
-
 #ifdef CONFIG_KASAN
 /*
  * CONFIG_KASAN may redefine memset to __memset.  __memset function is present
@@ -118,138 +125,306 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
 
 #endif /* CONFIG_X86_32 */
 
-extern struct efi_scratch efi_scratch;
-extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
 extern int __init efi_memblock_x86_reserve_range(void);
-extern pgd_t * __init efi_call_phys_prolog(void);
-extern void __init efi_call_phys_epilog(pgd_t *save_pgd);
 extern void __init efi_print_memmap(void);
-extern void __init efi_memory_uc(u64 addr, unsigned long size);
 extern void __init efi_map_region(efi_memory_desc_t *md);
 extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
 extern void efi_sync_low_kernel_mappings(void);
 extern int __init efi_alloc_page_tables(void);
 extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);
-extern void __init old_map_region(efi_memory_desc_t *md);
-extern void __init runtime_code_page_mkexec(void);
 extern void __init efi_runtime_update_mappings(void);
 extern void __init efi_dump_pagetable(void);
 extern void __init efi_apply_memmap_quirks(void);
 extern int __init efi_reuse_config(u64 tables, int nr_tables);
 extern void efi_delete_dummy_variable(void);
-extern void efi_switch_mm(struct mm_struct *mm);
-extern void efi_recover_from_page_fault(unsigned long phys_addr);
+extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr);
 extern void efi_free_boot_services(void);
-extern void efi_reserve_boot_services(void);
 
-struct efi_setup_data {
-	u64 fw_vendor;
-	u64 runtime;
-	u64 tables;
-	u64 smbios;
-	u64 reserved[8];
-};
+void arch_efi_call_virt_setup(void);
+void arch_efi_call_virt_teardown(void);
 
 extern u64 efi_setup;
 
 #ifdef CONFIG_EFI
+extern u64 __efi64_thunk(u32, ...);
 
-static inline bool efi_is_native(void)
+#define efi64_thunk(...) ({						\
+	u64 __pad[3]; /* must have space for 3 args on the stack */	\
+	__efi_nargs_check(efi64_thunk, 9, __VA_ARGS__);			\
+	__efi64_thunk(__VA_ARGS__, __pad);				\
+})
+
+static inline bool efi_is_mixed(void)
 {
-	return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT);
+	if (!IS_ENABLED(CONFIG_EFI_MIXED))
+		return false;
+	return IS_ENABLED(CONFIG_X86_64) && !efi_enabled(EFI_64BIT);
 }
 
 static inline bool efi_runtime_supported(void)
 {
-	if (efi_is_native())
+	if (IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT))
 		return true;
 
-	if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_enabled(EFI_OLD_MEMMAP))
-		return true;
-
-	return false;
+	return IS_ENABLED(CONFIG_EFI_MIXED);
 }
 
-extern struct console early_efi_console;
 extern void parse_efi_setup(u64 phys_addr, u32 data_len);
 
-extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
+extern void efi_thunk_runtime_setup(void);
+efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size,
+					 unsigned long descriptor_size,
+					 u32 descriptor_version,
+					 efi_memory_desc_t *virtual_map,
+					 unsigned long systab_phys);
+
+/* arch specific definitions used by the stub code */
 
 #ifdef CONFIG_EFI_MIXED
-extern void efi_thunk_runtime_setup(void);
-extern efi_status_t efi_thunk_set_virtual_address_map(
-	void *phys_set_virtual_address_map,
-	unsigned long memory_map_size,
-	unsigned long descriptor_size,
-	u32 descriptor_version,
-	efi_memory_desc_t *virtual_map);
-#else
-static inline void efi_thunk_runtime_setup(void) {}
-static inline efi_status_t efi_thunk_set_virtual_address_map(
-	void *phys_set_virtual_address_map,
-	unsigned long memory_map_size,
-	unsigned long descriptor_size,
-	u32 descriptor_version,
-	efi_memory_desc_t *virtual_map)
+
+#define EFI_ALLOC_LIMIT		(efi_is_64bit() ? ULONG_MAX : U32_MAX)
+
+#define ARCH_HAS_EFISTUB_WRAPPERS
+
+static inline bool efi_is_64bit(void)
 {
-	return EFI_SUCCESS;
+	extern const bool efi_is64;
+
+	return efi_is64;
 }
-#endif /* CONFIG_EFI_MIXED */
 
+static inline bool efi_is_native(void)
+{
+	return efi_is_64bit();
+}
 
-/* arch specific definitions used by the stub code */
+#define efi_table_attr(inst, attr)					\
+	(efi_is_native() ? (inst)->attr					\
+			 : efi_mixed_table_attr((inst), attr))
 
-struct efi_config {
-	u64 image_handle;
-	u64 table;
-	u64 runtime_services;
-	u64 boot_services;
-	u64 text_output;
-	efi_status_t (*call)(unsigned long, ...);
-	bool is64;
-} __packed;
+#define efi_mixed_table_attr(inst, attr)				\
+	(__typeof__(inst->attr))					\
+		_Generic(inst->mixed_mode.attr,				\
+		u32:		(unsigned long)(inst->mixed_mode.attr),	\
+		default:	(inst->mixed_mode.attr))
 
-__pure const struct efi_config *__efi_early(void);
+/*
+ * The following macros allow translating arguments if necessary from native to
+ * mixed mode. The use case for this is to initialize the upper 32 bits of
+ * output parameters, and where the 32-bit method requires a 64-bit argument,
+ * which must be split up into two arguments to be thunked properly.
+ *
+ * As examples, the AllocatePool boot service returns the address of the
+ * allocation, but it will not set the high 32 bits of the address. To ensure
+ * that the full 64-bit address is initialized, we zero-init the address before
+ * calling the thunk.
+ *
+ * The FreePages boot service takes a 64-bit physical address even in 32-bit
+ * mode. For the thunk to work correctly, a native 64-bit call of
+ * 	free_pages(addr, size)
+ * must be translated to
+ * 	efi64_thunk(free_pages, addr & U32_MAX, addr >> 32, size)
+ * so that the two 32-bit halves of addr get pushed onto the stack separately.
+ */
 
-static inline bool efi_is_64bit(void)
+static inline void *efi64_zero_upper(void *p)
 {
-	if (!IS_ENABLED(CONFIG_X86_64))
-		return false;
+	if (p)
+		((u32 *)p)[1] = 0;
+	return p;
+}
 
-	if (!IS_ENABLED(CONFIG_EFI_MIXED))
-		return true;
+static inline u32 efi64_convert_status(efi_status_t status)
+{
+	return (u32)(status | (u64)status >> 32);
+}
 
-	return __efi_early()->is64;
+#define __efi64_split(val)		(val) & U32_MAX, (u64)(val) >> 32
+
+#define __efi64_argmap_free_pages(addr, size)				\
+	((addr), 0, (size))
+
+#define __efi64_argmap_get_memory_map(mm_size, mm, key, size, ver)	\
+	((mm_size), (mm), efi64_zero_upper(key), efi64_zero_upper(size), (ver))
+
+#define __efi64_argmap_allocate_pool(type, size, buffer)		\
+	((type), (size), efi64_zero_upper(buffer))
+
+#define __efi64_argmap_locate_handle_buffer(type, proto, key, num, buf)	\
+	((type), (proto), (key), efi64_zero_upper(num), efi64_zero_upper(buf))
+
+#define __efi64_argmap_create_event(type, tpl, f, c, event)		\
+	((type), (tpl), (f), (c), efi64_zero_upper(event))
+
+#define __efi64_argmap_set_timer(event, type, time)			\
+	((event), (type), lower_32_bits(time), upper_32_bits(time))
+
+#define __efi64_argmap_wait_for_event(num, event, index)		\
+	((num), (event), efi64_zero_upper(index))
+
+#define __efi64_argmap_handle_protocol(handle, protocol, interface)	\
+	((handle), (protocol), efi64_zero_upper(interface))
+
+#define __efi64_argmap_locate_protocol(protocol, reg, interface)	\
+	((protocol), (reg), efi64_zero_upper(interface))
+
+#define __efi64_argmap_locate_device_path(protocol, path, handle)	\
+	((protocol), (path), efi64_zero_upper(handle))
+
+#define __efi64_argmap_exit(handle, status, size, data)			\
+	((handle), efi64_convert_status(status), (size), (data))
+
+/* PCI I/O */
+#define __efi64_argmap_get_location(protocol, seg, bus, dev, func)	\
+	((protocol), efi64_zero_upper(seg), efi64_zero_upper(bus),	\
+	 efi64_zero_upper(dev), efi64_zero_upper(func))
+
+/* LoadFile */
+#define __efi64_argmap_load_file(protocol, path, policy, bufsize, buf)	\
+	((protocol), (path), (policy), efi64_zero_upper(bufsize), (buf))
+
+/* Graphics Output Protocol */
+#define __efi64_argmap_query_mode(gop, mode, size, info)		\
+	((gop), (mode), efi64_zero_upper(size), efi64_zero_upper(info))
+
+/* TCG2 protocol */
+#define __efi64_argmap_hash_log_extend_event(prot, fl, addr, size, ev)	\
+	((prot), (fl), 0ULL, (u64)(addr), 0ULL, (u64)(size), 0ULL, ev)
+
+/* DXE services */
+#define __efi64_argmap_get_memory_space_descriptor(phys, desc) \
+	(__efi64_split(phys), (desc))
+
+#define __efi64_argmap_set_memory_space_attributes(phys, size, flags) \
+	(__efi64_split(phys), __efi64_split(size), __efi64_split(flags))
+
+/* file protocol */
+#define __efi64_argmap_open(prot, newh, fname, mode, attr) \
+	((prot), efi64_zero_upper(newh), (fname), __efi64_split(mode), \
+	 __efi64_split(attr))
+
+#define __efi64_argmap_set_position(pos) (__efi64_split(pos))
+
+/* file system protocol */
+#define __efi64_argmap_open_volume(prot, file) \
+	((prot), efi64_zero_upper(file))
+
+/* Memory Attribute Protocol */
+#define __efi64_argmap_get_memory_attributes(protocol, phys, size, flags) \
+	((protocol), __efi64_split(phys), __efi64_split(size), (flags))
+
+#define __efi64_argmap_set_memory_attributes(protocol, phys, size, flags) \
+	((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags))
+
+#define __efi64_argmap_clear_memory_attributes(protocol, phys, size, flags) \
+	((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags))
+
+/* EFI SMBIOS protocol */
+#define __efi64_argmap_get_next(protocol, smbioshandle, type, record, phandle) \
+	((protocol), (smbioshandle), (type), efi64_zero_upper(record), \
+	 efi64_zero_upper(phandle))
+/*
+ * The macros below handle the plumbing for the argument mapping. To add a
+ * mapping for a specific EFI method, simply define a macro
+ * __efi64_argmap_<method name>, following the examples above.
+ */
+
+#define __efi64_thunk_map(inst, func, ...)				\
+	efi64_thunk(inst->mixed_mode.func,				\
+		__efi64_argmap(__efi64_argmap_ ## func(__VA_ARGS__),	\
+			       (__VA_ARGS__)))
+
+#define __efi64_argmap(mapped, args)					\
+	__PASTE(__efi64_argmap__, __efi_nargs(__efi_eat mapped))(mapped, args)
+#define __efi64_argmap__0(mapped, args) __efi_eval mapped
+#define __efi64_argmap__1(mapped, args) __efi_eval args
+
+#define __efi_eat(...)
+#define __efi_eval(...) __VA_ARGS__
+
+static inline efi_status_t __efi64_widen_efi_status(u64 status)
+{
+	/* use rotate to move the value of bit #31 into position #63 */
+	return ror64(rol32(status, 1), 1);
 }
 
-#define efi_table_attr(table, attr, instance)				\
-	(efi_is_64bit() ?						\
-		((table##_64_t *)(unsigned long)instance)->attr :	\
-		((table##_32_t *)(unsigned long)instance)->attr)
+/* The macro below handles dispatching via the thunk if needed */
 
-#define efi_call_proto(protocol, f, instance, ...)			\
-	__efi_early()->call(efi_table_attr(protocol, f, instance),	\
-		instance, ##__VA_ARGS__)
+#define efi_fn_call(inst, func, ...)					\
+	(efi_is_native() ? (inst)->func(__VA_ARGS__)			\
+			 : efi_mixed_call((inst), func, ##__VA_ARGS__))
 
-#define efi_call_early(f, ...)						\
-	__efi_early()->call(efi_table_attr(efi_boot_services, f,	\
-		__efi_early()->boot_services), __VA_ARGS__)
+#define efi_mixed_call(inst, func, ...)					\
+	_Generic(inst->func(__VA_ARGS__),				\
+	efi_status_t:							\
+		__efi64_widen_efi_status(				\
+			__efi64_thunk_map(inst, func, ##__VA_ARGS__)),	\
+	u64: ({ BUILD_BUG(); ULONG_MAX; }),				\
+	default:							\
+		(__typeof__(inst->func(__VA_ARGS__)))			\
+			__efi64_thunk_map(inst, func, ##__VA_ARGS__))
 
-#define __efi_call_early(f, ...)					\
-	__efi_early()->call((unsigned long)f, __VA_ARGS__);
+#else /* CONFIG_EFI_MIXED */
+
+static inline bool efi_is_64bit(void)
+{
+	return IS_ENABLED(CONFIG_X86_64);
+}
 
-#define efi_call_runtime(f, ...)					\
-	__efi_early()->call(efi_table_attr(efi_runtime_services, f,	\
-		__efi_early()->runtime_services), __VA_ARGS__)
+#endif /* CONFIG_EFI_MIXED */
 
 extern bool efi_reboot_required(void);
+extern bool efi_is_table_address(unsigned long phys_addr);
 
+extern void efi_reserve_boot_services(void);
 #else
 static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
 static inline bool efi_reboot_required(void)
 {
 	return false;
 }
+static inline  bool efi_is_table_address(unsigned long phys_addr)
+{
+	return false;
+}
+static inline void efi_reserve_boot_services(void)
+{
+}
 #endif /* CONFIG_EFI */
 
+extern int __init efi_memmap_alloc(unsigned int num_entries,
+				   struct efi_memory_map_data *data);
+
+extern int __init efi_memmap_install(struct efi_memory_map_data *data);
+extern int __init efi_memmap_split_count(efi_memory_desc_t *md,
+					 struct range *range);
+extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap,
+				     void *buf, struct efi_mem_range *mem);
+
+extern enum efi_secureboot_mode __x86_ima_efi_boot_mode(void);
+
+#define arch_ima_efi_boot_mode	__x86_ima_efi_boot_mode()
+
+#ifdef CONFIG_EFI_RUNTIME_MAP
+int efi_get_runtime_map_size(void);
+int efi_get_runtime_map_desc_size(void);
+int efi_runtime_map_copy(void *buf, size_t bufsz);
+#else
+static inline int efi_get_runtime_map_size(void)
+{
+	return 0;
+}
+
+static inline int efi_get_runtime_map_desc_size(void)
+{
+	return 0;
+}
+
+static inline int efi_runtime_map_copy(void *buf, size_t bufsz)
+{
+	return 0;
+}
+
+#endif
+
 #endif /* _ASM_X86_EFI_H */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 69c0f892e310..6c8fdc96be7e 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -7,6 +7,7 @@
  */
 #include <linux/thread_info.h>
 
+#include <asm/ia32.h>
 #include <asm/ptrace.h>
 #include <asm/user.h>
 #include <asm/auxvec.h>
@@ -21,8 +22,6 @@ typedef struct user_i387_struct elf_fpregset_t;
 
 #ifdef __i386__
 
-typedef struct user_fxsr_struct elf_fpxregset_t;
-
 #define R_386_NONE	0
 #define R_386_32	1
 #define R_386_PC32	2
@@ -55,8 +54,9 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
 #define R_X86_64_GLOB_DAT	6	/* Create GOT entry */
 #define R_X86_64_JUMP_SLOT	7	/* Create PLT entry */
 #define R_X86_64_RELATIVE	8	/* Adjust by program base */
-#define R_X86_64_GOTPCREL	9	/* 32 bit signed pc relative
-					   offset to GOT */
+#define R_X86_64_GOTPCREL	9	/* 32 bit signed pc relative offset to GOT */
+#define R_X86_64_GOTPCRELX	41
+#define R_X86_64_REX_GOTPCRELX	42
 #define R_X86_64_32		10	/* Direct 32 bit zero extended */
 #define R_X86_64_32S		11	/* Direct 32 bit sign extended */
 #define R_X86_64_16		12	/* Direct 16 bit zero extended */
@@ -76,12 +76,8 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
 
 #include <asm/vdso.h>
 
-#ifdef CONFIG_X86_64
 extern unsigned int vdso64_enabled;
-#endif
-#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
 extern unsigned int vdso32_enabled;
-#endif
 
 /*
  * This is used to ensure we don't load something for the wrong architecture.
@@ -118,7 +114,7 @@ extern unsigned int vdso32_enabled;
  * now struct_user_regs, they are different)
  */
 
-#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs)	\
+#define ELF_CORE_COPY_REGS(pr_reg, regs)	\
 do {						\
 	pr_reg[0] = regs->bx;			\
 	pr_reg[1] = regs->cx;			\
@@ -130,6 +126,7 @@ do {						\
 	pr_reg[7] = regs->ds;			\
 	pr_reg[8] = regs->es;			\
 	pr_reg[9] = regs->fs;			\
+	savesegment(gs, pr_reg[10]);		\
 	pr_reg[11] = regs->orig_ax;		\
 	pr_reg[12] = regs->ip;			\
 	pr_reg[13] = regs->cs;			\
@@ -138,18 +135,6 @@ do {						\
 	pr_reg[16] = regs->ss;			\
 } while (0);
 
-#define ELF_CORE_COPY_REGS(pr_reg, regs)	\
-do {						\
-	ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
-	pr_reg[10] = get_user_gs(regs);		\
-} while (0);
-
-#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs)	\
-do {						\
-	ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
-	savesegment(gs, pr_reg[10]);		\
-} while (0);
-
 #define ELF_PLATFORM	(utsname()->machine)
 #define set_personality_64bit()	do { } while (0)
 
@@ -162,13 +147,9 @@ do {						\
 	((x)->e_machine == EM_X86_64)
 
 #define compat_elf_check_arch(x)					\
-	(elf_check_arch_ia32(x) ||					\
+	((elf_check_arch_ia32(x) && ia32_enabled_verbose()) ||		\
 	 (IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64))
 
-#if __USER32_DS != __USER_DS
-# error "The following code assumes __USER32_DS == __USER_DS"
-#endif
-
 static inline void elf_common_init(struct thread_struct *t,
 				   struct pt_regs *regs, const u16 ds)
 {
@@ -188,8 +169,9 @@ static inline void elf_common_init(struct thread_struct *t,
 #define	COMPAT_ELF_PLAT_INIT(regs, load_addr)		\
 	elf_common_init(&current->thread, regs, __USER_DS)
 
-void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp);
-#define compat_start_thread compat_start_thread
+void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp, bool x32);
+#define COMPAT_START_THREAD(ex, regs, new_ip, new_sp)	\
+	compat_start_thread(regs, new_ip, new_sp, ex->e_machine == EM_X86_64)
 
 void set_personality_ia32(bool);
 #define COMPAT_SET_PERSONALITY(ex)			\
@@ -238,7 +220,6 @@ do {								\
 /* I'm not sure if we can use '-' here */
 #define ELF_PLATFORM       ("x86_64")
 extern void set_personality_64bit(void);
-extern unsigned int sysctl_vsyscall32;
 extern int force_personality32;
 
 #endif /* !CONFIG_X86_32 */
@@ -281,9 +262,29 @@ extern u32 elf_hwcap2;
 /*
  * An executable for which elf_read_implies_exec() returns TRUE will
  * have the READ_IMPLIES_EXEC personality flag set automatically.
+ *
+ * The decision process for determining the results are:
+ *
+ *                 CPU: | lacks NX*  | has NX, ia32     | has NX, x86_64 |
+ * ELF:                 |            |                  |                |
+ * ---------------------|------------|------------------|----------------|
+ * missing PT_GNU_STACK | exec-all   | exec-all         | exec-none      |
+ * PT_GNU_STACK == RWX  | exec-stack | exec-stack       | exec-stack     |
+ * PT_GNU_STACK == RW   | exec-none  | exec-none        | exec-none      |
+ *
+ *  exec-all  : all PROT_READ user mappings are executable, except when
+ *              backed by files on a noexec-filesystem.
+ *  exec-none : only PROT_EXEC user mappings are executable.
+ *  exec-stack: only the stack and PROT_EXEC user mappings are executable.
+ *
+ *  *this column has no architectural effect: NX markings are ignored by
+ *   hardware, but may have behavioral effects when "wants X" collides with
+ *   "cannot be X" constraints in memory permission flags, as in
+ *   https://lkml.kernel.org/r/20190418055759.GA3155@mellanox.com
+ *
  */
 #define elf_read_implies_exec(ex, executable_stack)	\
-	(executable_stack != EXSTACK_DISABLE_X)
+	(mmap_is_ia32() && executable_stack == EXSTACK_DEFAULT)
 
 struct task_struct;
 
@@ -293,6 +294,7 @@ do {									\
 		NEW_AUX_ENT(AT_SYSINFO,	VDSO_ENTRY);			\
 		NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE);	\
 	}								\
+	NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size());		\
 } while (0)
 
 /*
@@ -309,6 +311,7 @@ extern unsigned long task_size_32bit(void);
 extern unsigned long task_size_64bit(int full_addr_space);
 extern unsigned long get_mmap_base(int is_legacy);
 extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len);
+extern unsigned long get_sigframe_size(void);
 
 #ifdef CONFIG_X86_32
 
@@ -330,6 +333,7 @@ do {									\
 	if (vdso64_enabled)						\
 		NEW_AUX_ENT(AT_SYSINFO_EHDR,				\
 			    (unsigned long __force)current->mm->context.vdso); \
+	NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size());		\
 } while (0)
 
 /* As a historical oddity, the x32 and x86_64 vDSOs are controlled together. */
@@ -338,14 +342,15 @@ do {									\
 	if (vdso64_enabled)						\
 		NEW_AUX_ENT(AT_SYSINFO_EHDR,				\
 			    (unsigned long __force)current->mm->context.vdso); \
+	NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size());		\
 } while (0)
 
 #define AT_SYSINFO		32
 
 #define COMPAT_ARCH_DLINFO						\
-if (test_thread_flag(TIF_X32))						\
+if (exec->e_machine == EM_X86_64)					\
 	ARCH_DLINFO_X32;						\
-else									\
+else if (IS_ENABLED(CONFIG_IA32_EMULATION))				\
 	ARCH_DLINFO_IA32
 
 #define COMPAT_ELF_ET_DYN_BASE	(TASK_UNMAPPED_BASE + 0x1000000)
@@ -364,8 +369,12 @@ struct linux_binprm;
 extern int arch_setup_additional_pages(struct linux_binprm *bprm,
 				       int uses_interp);
 extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
-					      int uses_interp);
-#define compat_arch_setup_additional_pages compat_arch_setup_additional_pages
+					      int uses_interp, bool x32);
+#define COMPAT_ARCH_SETUP_ADDITIONAL_PAGES(bprm, ex, interpreter)	\
+	compat_arch_setup_additional_pages(bprm, interpreter,		\
+					   (ex->e_machine == EM_X86_64))
+
+extern bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs);
 
 /* Do not change the values. See get_align_mask() */
 enum align_flags {
@@ -380,5 +389,4 @@ struct va_alignment {
 } ____cacheline_aligned;
 
 extern struct va_alignment va_align;
-extern unsigned long align_vdso_addr(unsigned long);
 #endif /* _ASM_X86_ELF_H */
diff --git a/arch/x86/include/asm/elfcore-compat.h b/arch/x86/include/asm/elfcore-compat.h
new file mode 100644
index 000000000000..f1b6c7a8d8fc
--- /dev/null
+++ b/arch/x86/include/asm/elfcore-compat.h
@@ -0,0 +1,31 @@
+#ifndef _ASM_X86_ELFCORE_COMPAT_H
+#define _ASM_X86_ELFCORE_COMPAT_H
+
+#include <asm/user32.h>
+
+/*
+ * On amd64 we have two 32bit ABIs - i386 and x32.  The latter
+ * has bigger registers, so we use it for compat_elf_regset_t.
+ * The former uses i386_elf_prstatus and PRSTATUS_SIZE/SET_PR_FPVALID
+ * are used to choose the size and location of ->pr_fpvalid of
+ * the layout actually used.
+ */
+typedef struct user_regs_struct compat_elf_gregset_t;
+
+struct i386_elf_prstatus
+{
+	struct compat_elf_prstatus_common	common;
+	struct user_regs_struct32		pr_reg;
+	compat_int_t			pr_fpvalid;
+};
+
+#define PRSTATUS_SIZE \
+	(user_64bit_mode(task_pt_regs(current)) \
+		? sizeof(struct compat_elf_prstatus) \
+		: sizeof(struct i386_elf_prstatus))
+#define SET_PR_FPVALID(S) \
+	(*(user_64bit_mode(task_pt_regs(current)) \
+		? &(S)->pr_fpvalid 	\
+		: &((struct i386_elf_prstatus *)(S))->pr_fpvalid) = 1)
+
+#endif
diff --git a/arch/x86/include/asm/emulate_prefix.h b/arch/x86/include/asm/emulate_prefix.h
new file mode 100644
index 000000000000..70f5b98a5286
--- /dev/null
+++ b/arch/x86/include/asm/emulate_prefix.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_EMULATE_PREFIX_H
+#define _ASM_X86_EMULATE_PREFIX_H
+
+/*
+ * Virt escape sequences to trigger instruction emulation;
+ * ideally these would decode to 'whole' instruction and not destroy
+ * the instruction stream; sadly this is not true for the 'kvm' one :/
+ */
+
+#define __XEN_EMULATE_PREFIX  0x0f,0x0b,0x78,0x65,0x6e  /* ud2 ; .ascii "xen" */
+#define __KVM_EMULATE_PREFIX  0x0f,0x0b,0x6b,0x76,0x6d	/* ud2 ; .ascii "kvm" */
+
+#endif
diff --git a/arch/x86/include/asm/enclu.h b/arch/x86/include/asm/enclu.h
new file mode 100644
index 000000000000..b1314e41a744
--- /dev/null
+++ b/arch/x86/include/asm/enclu.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_ENCLU_H
+#define _ASM_X86_ENCLU_H
+
+#define EENTER	0x02
+#define ERESUME	0x03
+#define EEXIT	0x04
+
+#endif /* _ASM_X86_ENCLU_H */
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
new file mode 100644
index 000000000000..ce3eb6d5fdf9
--- /dev/null
+++ b/arch/x86/include/asm/entry-common.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_X86_ENTRY_COMMON_H
+#define _ASM_X86_ENTRY_COMMON_H
+
+#include <linux/randomize_kstack.h>
+#include <linux/user-return-notifier.h>
+
+#include <asm/nospec-branch.h>
+#include <asm/io_bitmap.h>
+#include <asm/fpu/api.h>
+#include <asm/fred.h>
+
+/* Check that the stack and regs on entry from user mode are sane. */
+static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
+{
+	if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) {
+		/*
+		 * Make sure that the entry code gave us a sensible EFLAGS
+		 * register.  Native because we want to check the actual CPU
+		 * state, not the interrupt state as imagined by Xen.
+		 */
+		unsigned long flags = native_save_fl();
+		unsigned long mask = X86_EFLAGS_DF | X86_EFLAGS_NT;
+
+		/*
+		 * For !SMAP hardware we patch out CLAC on entry.
+		 */
+		if (cpu_feature_enabled(X86_FEATURE_SMAP) ||
+		    cpu_feature_enabled(X86_FEATURE_XENPV))
+			mask |= X86_EFLAGS_AC;
+
+		WARN_ON_ONCE(flags & mask);
+
+		/* We think we came from user mode. Make sure pt_regs agrees. */
+		WARN_ON_ONCE(!user_mode(regs));
+
+		/*
+		 * All entries from user mode (except #DF) should be on the
+		 * normal thread stack and should have user pt_regs in the
+		 * correct location.
+		 */
+		WARN_ON_ONCE(!on_thread_stack());
+		WARN_ON_ONCE(regs != task_pt_regs(current));
+	}
+}
+#define arch_enter_from_user_mode arch_enter_from_user_mode
+
+static inline void arch_exit_work(unsigned long ti_work)
+{
+	if (ti_work & _TIF_USER_RETURN_NOTIFY)
+		fire_user_return_notifiers();
+
+	if (unlikely(ti_work & _TIF_IO_BITMAP))
+		tss_update_io_bitmap();
+
+	if (unlikely(ti_work & _TIF_NEED_FPU_LOAD))
+		switch_fpu_return();
+}
+
+static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
+						  unsigned long ti_work)
+{
+	fpregs_assert_state_consistent();
+
+	if (unlikely(ti_work))
+		arch_exit_work(ti_work);
+
+	fred_update_rsp0();
+
+#ifdef CONFIG_COMPAT
+	/*
+	 * Compat syscalls set TS_COMPAT.  Make sure we clear it before
+	 * returning to user mode.  We need to clear it *after* signal
+	 * handling, because syscall restart has a fixup for compat
+	 * syscalls.  The fixup is exercised by the ptrace_syscall_32
+	 * selftest.
+	 *
+	 * We also need to clear TS_REGS_POKED_I386: the 32-bit tracer
+	 * special case only applies after poking regs and before the
+	 * very next return to user mode.
+	 */
+	current_thread_info()->status &= ~(TS_COMPAT | TS_I386_REGS_POKED);
+#endif
+
+	/*
+	 * This value will get limited by KSTACK_OFFSET_MAX(), which is 10
+	 * bits. The actual entropy will be further reduced by the compiler
+	 * when applying stack alignment constraints (see cc_stack_align4/8 in
+	 * arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32)
+	 * low bits from any entropy chosen here.
+	 *
+	 * Therefore, final stack offset entropy will be 7 (x86_64) or
+	 * 8 (ia32) bits.
+	 */
+	choose_random_kstack_offset(rdtsc());
+
+	/* Avoid unnecessary reads of 'x86_ibpb_exit_to_user' */
+	if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER) &&
+	    this_cpu_read(x86_ibpb_exit_to_user)) {
+		indirect_branch_prediction_barrier();
+		this_cpu_write(x86_ibpb_exit_to_user, false);
+	}
+}
+#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
+
+static __always_inline void arch_exit_to_user_mode(void)
+{
+	amd_clear_divider();
+}
+#define arch_exit_to_user_mode arch_exit_to_user_mode
+
+#endif
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
deleted file mode 100644
index 416422762845..000000000000
--- a/arch/x86/include/asm/entry_arch.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This file is designed to contain the BUILD_INTERRUPT specifications for
- * all of the extra named interrupt vectors used by the architecture.
- * Usually this is the Inter Process Interrupts (IPIs)
- */
-
-/*
- * The following vectors are part of the Linux architecture, there
- * is no hardware IRQ pin equivalent for them, they are triggered
- * through the ICC by us (IPIs)
- */
-#ifdef CONFIG_SMP
-BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
-BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
-BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
-BUILD_INTERRUPT(irq_move_cleanup_interrupt, IRQ_MOVE_CLEANUP_VECTOR)
-BUILD_INTERRUPT(reboot_interrupt, REBOOT_VECTOR)
-#endif
-
-#ifdef CONFIG_HAVE_KVM
-BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR)
-BUILD_INTERRUPT(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR)
-BUILD_INTERRUPT(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR)
-#endif
-
-/*
- * every pentium local APIC has two 'local interrupts', with a
- * soft-definable vector attached to both interrupts, one of
- * which is a timer interrupt, the other one is error counter
- * overflow. Linux uses the local APIC timer interrupt to get
- * a much simpler SMP time architecture:
- */
-#ifdef CONFIG_X86_LOCAL_APIC
-
-BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
-BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
-BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
-BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
-
-#ifdef CONFIG_IRQ_WORK
-BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR)
-#endif
-
-#ifdef CONFIG_X86_THERMAL_VECTOR
-BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
-#endif
-
-#ifdef CONFIG_X86_MCE_THRESHOLD
-BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
-#endif
-
-#ifdef CONFIG_X86_MCE_AMD
-BUILD_INTERRUPT(deferred_error_interrupt, DEFERRED_ERROR_VECTOR)
-#endif
-#endif
diff --git a/arch/x86/include/asm/error-injection.h b/arch/x86/include/asm/error-injection.h
deleted file mode 100644
index 47b7a1296245..000000000000
--- a/arch/x86/include/asm/error-injection.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_ERROR_INJECTION_H
-#define _ASM_ERROR_INJECTION_H
-
-#include <linux/compiler.h>
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-#include <asm-generic/error-injection.h>
-
-asmlinkage void just_return_func(void);
-void override_function_with_return(struct pt_regs *regs);
-
-#endif /* _ASM_ERROR_INJECTION_H */
diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h
index d8c2198d543b..a0e0c6b50155 100644
--- a/arch/x86/include/asm/extable.h
+++ b/arch/x86/include/asm/extable.h
@@ -1,12 +1,18 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_X86_EXTABLE_H
 #define _ASM_X86_EXTABLE_H
+
+#include <asm/extable_fixup_types.h>
+
 /*
- * The exception table consists of triples of addresses relative to the
- * exception table entry itself. The first address is of an instruction
- * that is allowed to fault, the second is the target at which the program
- * should continue. The third is a handler function to deal with the fault
- * caused by the instruction in the first field.
+ * The exception table consists of two addresses relative to the
+ * exception table entry itself and a type selector field.
+ *
+ * The first address is of an instruction that is allowed to fault, the
+ * second is the target at which the program should continue.
+ *
+ * The type entry is used by fixup_exception() to select the handler to
+ * deal with the fault caused by the instruction in the first field.
  *
  * All the routines below use bits of fixup code that are out of line
  * with the main instruction path.  This means when everything is well,
@@ -15,7 +21,7 @@
  */
 
 struct exception_table_entry {
-	int insn, fixup, handler;
+	int insn, fixup, data;
 };
 struct pt_regs;
 
@@ -25,14 +31,30 @@ struct pt_regs;
 	do {							\
 		(a)->fixup = (b)->fixup + (delta);		\
 		(b)->fixup = (tmp).fixup - (delta);		\
-		(a)->handler = (b)->handler + (delta);		\
-		(b)->handler = (tmp).handler - (delta);		\
+		(a)->data = (b)->data;				\
+		(b)->data = (tmp).data;				\
 	} while (0)
 
 extern int fixup_exception(struct pt_regs *regs, int trapnr,
 			   unsigned long error_code, unsigned long fault_addr);
-extern int fixup_bug(struct pt_regs *regs, int trapnr);
-extern bool ex_has_fault_handler(unsigned long ip);
+extern int ex_get_fixup_type(unsigned long ip);
 extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
 
+#ifdef CONFIG_X86_MCE
+extern void __noreturn ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr);
+#else
+static inline void __noreturn ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr)
+{
+	for (;;)
+		cpu_relax();
+}
+#endif
+
+#if defined(CONFIG_BPF_JIT) && defined(CONFIG_X86_64)
+bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs);
+#else
+static inline bool ex_handler_bpf(const struct exception_table_entry *x,
+				  struct pt_regs *regs) { return false; }
+#endif
+
 #endif
diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h
new file mode 100644
index 000000000000..906b0d5541e8
--- /dev/null
+++ b/arch/x86/include/asm/extable_fixup_types.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_EXTABLE_FIXUP_TYPES_H
+#define _ASM_X86_EXTABLE_FIXUP_TYPES_H
+
+/*
+ * Our IMM is signed, as such it must live at the top end of the word. Also,
+ * since C99 hex constants are of ambiguous type, force cast the mask to 'int'
+ * so that FIELD_GET() will DTRT and sign extend the value when it extracts it.
+ */
+#define EX_DATA_TYPE_MASK		((int)0x000000FF)
+#define EX_DATA_REG_MASK		((int)0x00000F00)
+#define EX_DATA_FLAG_MASK		((int)0x0000F000)
+#define EX_DATA_IMM_MASK		((int)0xFFFF0000)
+
+#define EX_DATA_REG_SHIFT		8
+#define EX_DATA_FLAG_SHIFT		12
+#define EX_DATA_IMM_SHIFT		16
+
+#define EX_DATA_REG(reg)		((reg) << EX_DATA_REG_SHIFT)
+#define EX_DATA_FLAG(flag)		((flag) << EX_DATA_FLAG_SHIFT)
+#define EX_DATA_IMM(imm)		((imm) << EX_DATA_IMM_SHIFT)
+
+/* segment regs */
+#define EX_REG_DS			EX_DATA_REG(8)
+#define EX_REG_ES			EX_DATA_REG(9)
+#define EX_REG_FS			EX_DATA_REG(10)
+#define EX_REG_GS			EX_DATA_REG(11)
+
+/* flags */
+#define EX_FLAG_CLEAR_AX		EX_DATA_FLAG(1)
+#define EX_FLAG_CLEAR_DX		EX_DATA_FLAG(2)
+#define EX_FLAG_CLEAR_AX_DX		EX_DATA_FLAG(3)
+
+/* types */
+#define	EX_TYPE_NONE			 0
+#define	EX_TYPE_DEFAULT			 1
+#define	EX_TYPE_FAULT			 2
+#define	EX_TYPE_UACCESS			 3
+/* unused, was: #define EX_TYPE_COPY	 4 */
+#define	EX_TYPE_CLEAR_FS		 5
+#define	EX_TYPE_FPU_RESTORE		 6
+#define	EX_TYPE_BPF			 7
+#define	EX_TYPE_WRMSR			 8
+#define	EX_TYPE_RDMSR			 9
+#define	EX_TYPE_WRMSR_SAFE		10 /* reg := -EIO */
+#define	EX_TYPE_RDMSR_SAFE		11 /* reg := -EIO */
+#define	EX_TYPE_WRMSR_IN_MCE		12
+#define	EX_TYPE_RDMSR_IN_MCE		13
+#define	EX_TYPE_DEFAULT_MCE_SAFE	14
+#define	EX_TYPE_FAULT_MCE_SAFE		15
+
+#define	EX_TYPE_POP_REG			16 /* sp += sizeof(long) */
+#define EX_TYPE_POP_ZERO		(EX_TYPE_POP_REG | EX_DATA_IMM(0))
+
+#define	EX_TYPE_IMM_REG			17 /* reg := (long)imm */
+#define	EX_TYPE_EFAULT_REG		(EX_TYPE_IMM_REG | EX_DATA_IMM(-EFAULT))
+#define	EX_TYPE_ZERO_REG		(EX_TYPE_IMM_REG | EX_DATA_IMM(0))
+#define	EX_TYPE_ONE_REG			(EX_TYPE_IMM_REG | EX_DATA_IMM(1))
+
+#define	EX_TYPE_FAULT_SGX		18
+
+#define	EX_TYPE_UCOPY_LEN		19 /* cx := reg + imm*cx */
+#define	EX_TYPE_UCOPY_LEN1		(EX_TYPE_UCOPY_LEN | EX_DATA_IMM(1))
+#define	EX_TYPE_UCOPY_LEN4		(EX_TYPE_UCOPY_LEN | EX_DATA_IMM(4))
+#define	EX_TYPE_UCOPY_LEN8		(EX_TYPE_UCOPY_LEN | EX_DATA_IMM(8))
+
+#define	EX_TYPE_ZEROPAD			20 /* longword load with zeropad on fault */
+
+#define	EX_TYPE_ERETU			21
+
+#endif
diff --git a/arch/x86/include/asm/fb.h b/arch/x86/include/asm/fb.h
deleted file mode 100644
index ab4c960146e3..000000000000
--- a/arch/x86/include/asm/fb.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_FB_H
-#define _ASM_X86_FB_H
-
-#include <linux/fb.h>
-#include <linux/fs.h>
-#include <asm/page.h>
-
-static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
-				unsigned long off)
-{
-	unsigned long prot;
-
-	prot = pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK;
-	if (boot_cpu_data.x86 > 3)
-		pgprot_val(vma->vm_page_prot) =
-			prot | cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS);
-}
-
-extern int fb_is_primary_device(struct fb_info *info);
-
-#endif /* _ASM_X86_FB_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 50ba74a34a37..4519c9f35ba0 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -14,24 +14,30 @@
 #ifndef _ASM_X86_FIXMAP_H
 #define _ASM_X86_FIXMAP_H
 
+#include <asm/kmap_size.h>
+
 /*
  * Exposed to assembly code for setting up initial page tables. Cannot be
  * calculated in assembly code (fixmap entries are an enum), but is sanity
  * checked in the actual fixmap C code to make sure that the fixmap is
  * covered fully.
  */
-#define FIXMAP_PMD_NUM	2
+#ifndef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP
+# define FIXMAP_PMD_NUM	2
+#else
+# define KM_PMDS	(KM_MAX_IDX * ((CONFIG_NR_CPUS + 511) / 512))
+# define FIXMAP_PMD_NUM (KM_PMDS + 2)
+#endif
 /* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */
 #define FIXMAP_PMD_TOP	507
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #include <linux/kernel.h>
-#include <asm/acpi.h>
 #include <asm/apicdef.h>
 #include <asm/page.h>
+#include <asm/pgtable_types.h>
 #ifdef CONFIG_X86_32
 #include <linux/threads.h>
-#include <asm/kmap_types.h>
 #else
 #include <uapi/asm/vsyscall.h>
 #endif
@@ -42,8 +48,7 @@
  * Because of this, FIXADDR_TOP x86 integration was left as later work.
  */
 #ifdef CONFIG_X86_32
-/* used by vmalloc.c, vsyscall.lds.S.
- *
+/*
  * Leave one empty page between vmalloc'ed areas and
  * the start of the fixmap.
  */
@@ -93,21 +98,16 @@ enum fixed_addresses {
 	FIX_IO_APIC_BASE_0,
 	FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
 #endif
-#ifdef CONFIG_X86_32
+#ifdef CONFIG_KMAP_LOCAL
 	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
-	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+	FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1,
 #ifdef CONFIG_PCI_MMCONFIG
 	FIX_PCIE_MCFG,
 #endif
 #endif
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
 	FIX_PARAVIRT_BOOTMAP,
 #endif
-	FIX_TEXT_POKE1,	/* reserve 2 pages for text_poke() */
-	FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
-#ifdef	CONFIG_X86_INTEL_MID
-	FIX_LNW_VRTC,
-#endif
 
 #ifdef CONFIG_ACPI_APEI_GHES
 	/* Used for GHES mapping from assorted contexts */
@@ -122,7 +122,7 @@ enum fixed_addresses {
 	 * before ioremap() is functional.
 	 *
 	 * If necessary we round it up to the next 512 pages boundary so
-	 * that we can have a single pgd entry and a single pte table:
+	 * that we can have a single pmd entry and a single pte table:
 	 */
 #define NR_FIX_BTMAPS		64
 #define FIX_BTMAPS_SLOTS	8
@@ -154,12 +154,10 @@ extern void reserve_top_address(unsigned long reserve);
 
 extern int fixmaps_set;
 
-extern pte_t *kmap_pte;
-#define kmap_prot PAGE_KERNEL
 extern pte_t *pkmap_page_table;
 
 void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
-void native_set_fixmap(enum fixed_addresses idx,
+void native_set_fixmap(unsigned /* enum fixed_addresses */ idx,
 		       phys_addr_t phys, pgprot_t flags);
 
 #ifndef CONFIG_PARAVIRT_XXL
@@ -198,5 +196,5 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
 void __early_set_fixmap(enum fixed_addresses idx,
 			phys_addr_t phys, pgprot_t flags);
 
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
 #endif /* _ASM_X86_FIXMAP_H */
diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h
index 7ec59edde154..e7a244051c62 100644
--- a/arch/x86/include/asm/floppy.h
+++ b/arch/x86/include/asm/floppy.h
@@ -10,6 +10,7 @@
 #ifndef _ASM_X86_FLOPPY_H
 #define _ASM_X86_FLOPPY_H
 
+#include <linux/sizes.h>
 #include <linux/vmalloc.h>
 
 /*
@@ -22,17 +23,14 @@
  */
 #define _CROSS_64KB(a, s, vdma)						\
 	(!(vdma) &&							\
-	 ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64))
-
-#define CROSS_64KB(a, s) _CROSS_64KB(a, s, use_virtual_dma & 1)
-
+	 ((unsigned long)(a) / SZ_64K != ((unsigned long)(a) + (s) - 1) / SZ_64K))
 
 #define SW fd_routine[use_virtual_dma & 1]
 #define CSW fd_routine[can_use_virtual_dma & 1]
 
 
-#define fd_inb(port)		inb_p(port)
-#define fd_outb(value, port)	outb_p(value, port)
+#define fd_inb(base, reg)		inb_p((base) + (reg))
+#define fd_outb(value, base, reg)	outb_p(value, (base) + (reg))
 
 #define fd_request_dma()	CSW._request_dma(FLOPPY_DMA, "floppy")
 #define fd_free_dma()		CSW._free_dma(FLOPPY_DMA)
@@ -74,28 +72,28 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id)
 		int lcount;
 		char *lptr;
 
-		st = 1;
 		for (lcount = virtual_dma_count, lptr = virtual_dma_addr;
 		     lcount; lcount--, lptr++) {
-			st = inb(virtual_dma_port + 4) & 0xa0;
-			if (st != 0xa0)
+			st = inb(virtual_dma_port + FD_STATUS);
+			st &= STATUS_DMA | STATUS_READY;
+			if (st != (STATUS_DMA | STATUS_READY))
 				break;
 			if (virtual_dma_mode)
-				outb_p(*lptr, virtual_dma_port + 5);
+				outb_p(*lptr, virtual_dma_port + FD_DATA);
 			else
-				*lptr = inb_p(virtual_dma_port + 5);
+				*lptr = inb_p(virtual_dma_port + FD_DATA);
 		}
 		virtual_dma_count = lcount;
 		virtual_dma_addr = lptr;
-		st = inb(virtual_dma_port + 4);
+		st = inb(virtual_dma_port + FD_STATUS);
 	}
 
 #ifdef TRACE_FLPY_INT
 	calls++;
 #endif
-	if (st == 0x20)
+	if (st == STATUS_DMA)
 		return IRQ_HANDLED;
-	if (!(st & 0x20)) {
+	if (!(st & STATUS_DMA)) {
 		virtual_dma_residue += virtual_dma_count;
 		virtual_dma_count = 0;
 #ifdef TRACE_FLPY_INT
@@ -206,7 +204,7 @@ static int vdma_dma_setup(char *addr, unsigned long size, int mode, int io)
 static int hard_dma_setup(char *addr, unsigned long size, int mode, int io)
 {
 #ifdef FLOPPY_SANITY_CHECK
-	if (CROSS_64KB(addr, size)) {
+	if (_CROSS_64KB(addr, size, use_virtual_dma & 1)) {
 		printk("DMA crossing 64-K boundary %p-%p\n", addr, addr+size);
 		return -1;
 	}
diff --git a/arch/x86/include/asm/fpu.h b/arch/x86/include/asm/fpu.h
new file mode 100644
index 000000000000..b2743fe19339
--- /dev/null
+++ b/arch/x86/include/asm/fpu.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef _ASM_X86_FPU_H
+#define _ASM_X86_FPU_H
+
+#include <asm/fpu/api.h>
+
+#define kernel_fpu_available()	true
+
+#endif /* ! _ASM_X86_FPU_H */
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index b56d504af654..cd6f194a912b 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -10,17 +10,94 @@
 
 #ifndef _ASM_X86_FPU_API_H
 #define _ASM_X86_FPU_API_H
+#include <linux/bottom_half.h>
+
+#include <asm/fpu/types.h>
 
 /*
  * Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It
- * disables preemption so be careful if you intend to use it for long periods
- * of time.
- * If you intend to use the FPU in softirq you need to check first with
- * irq_fpu_usable() if it is possible.
+ * disables preemption and softirq processing, so be careful if you intend to
+ * use it for long periods of time.  Kernel-mode FPU cannot be used in all
+ * contexts -- see irq_fpu_usable() for details.
  */
-extern void kernel_fpu_begin(void);
+
+/* Kernel FPU states to initialize in kernel_fpu_begin_mask() */
+#define KFPU_387	_BITUL(0)	/* 387 state will be initialized */
+#define KFPU_MXCSR	_BITUL(1)	/* MXCSR will be initialized */
+
+extern void kernel_fpu_begin_mask(unsigned int kfpu_mask);
 extern void kernel_fpu_end(void);
 extern bool irq_fpu_usable(void);
+extern void fpregs_mark_activate(void);
+
+/* Code that is unaware of kernel_fpu_begin_mask() can use this */
+static inline void kernel_fpu_begin(void)
+{
+#ifdef CONFIG_X86_64
+	/*
+	 * Any 64-bit code that uses 387 instructions must explicitly request
+	 * KFPU_387.
+	 */
+	kernel_fpu_begin_mask(KFPU_MXCSR);
+#else
+	/*
+	 * 32-bit kernel code may use 387 operations as well as SSE2, etc,
+	 * as long as it checks that the CPU has the required capability.
+	 */
+	kernel_fpu_begin_mask(KFPU_387 | KFPU_MXCSR);
+#endif
+}
+
+/*
+ * Use fpregs_lock() while editing CPU's FPU registers or fpu->fpstate, or while
+ * using the FPU in kernel mode.  A context switch will (and softirq might) save
+ * CPU's FPU registers to fpu->fpstate.regs and set TIF_NEED_FPU_LOAD leaving
+ * CPU's FPU registers in a random state.
+ *
+ * local_bh_disable() protects against both preemption and soft interrupts
+ * on !RT kernels.
+ *
+ * On RT kernels local_bh_disable() is not sufficient because it only
+ * serializes soft interrupt related sections via a local lock, but stays
+ * preemptible. Disabling preemption is the right choice here as bottom
+ * half processing is always in thread context on RT kernels so it
+ * implicitly prevents bottom half processing as well.
+ */
+static inline void fpregs_lock(void)
+{
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+		local_bh_disable();
+	else
+		preempt_disable();
+}
+
+static inline void fpregs_unlock(void)
+{
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+		local_bh_enable();
+	else
+		preempt_enable();
+}
+
+/*
+ * FPU state gets lazily restored before returning to userspace. So when in the
+ * kernel, the valid FPU state may be kept in the buffer. This function will force
+ * restore all the fpu state to the registers early if needed, and lock them from
+ * being automatically saved/restored. Then FPU state can be modified safely in the
+ * registers, before unlocking with fpregs_unlock().
+ */
+void fpregs_lock_and_load(void);
+
+#ifdef CONFIG_X86_DEBUG_FPU
+extern void fpregs_assert_state_consistent(void);
+#else
+static inline void fpregs_assert_state_consistent(void) { }
+#endif
+
+/*
+ * Load the task FPU state before returning to userspace.
+ */
+extern void switch_fpu_return(void);
 
 /*
  * Query the presence of one or more xfeatures. Works on any legacy CPU as well.
@@ -31,4 +108,73 @@ extern bool irq_fpu_usable(void);
  */
 extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);
 
+/* Trap handling */
+extern int  fpu__exception_code(struct fpu *fpu, int trap_nr);
+extern void fpu_sync_fpstate(struct fpu *fpu);
+extern void fpu_reset_from_exception_fixup(void);
+
+/* Boot, hotplug and resume */
+extern void fpu__init_cpu(void);
+extern void fpu__init_system(void);
+extern void fpu__init_check_bugs(void);
+extern void fpu__resume_cpu(void);
+
+#ifdef CONFIG_MATH_EMULATION
+extern void fpstate_init_soft(struct swregs_state *soft);
+#else
+static inline void fpstate_init_soft(struct swregs_state *soft) {}
+#endif
+
+/* State tracking */
+DECLARE_PER_CPU(bool, kernel_fpu_allowed);
+DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
+
+/* Process cleanup */
+#ifdef CONFIG_X86_64
+extern void fpstate_free(struct fpu *fpu);
+#else
+static inline void fpstate_free(struct fpu *fpu) { }
+#endif
+
+/* fpstate-related functions which are exported to KVM */
+extern void fpstate_clear_xstate_component(struct fpstate *fpstate, unsigned int xfeature);
+
+extern u64 xstate_get_guest_group_perm(void);
+
+extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
+
+
+/* KVM specific functions */
+extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu);
+extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu);
+extern int fpu_swap_kvm_fpstate(struct fpu_guest *gfpu, bool enter_guest);
+extern int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures);
+
+#ifdef CONFIG_X86_64
+extern void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd);
+extern void fpu_sync_guest_vmexit_xfd_state(void);
+#else
+static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { }
+static inline void fpu_sync_guest_vmexit_xfd_state(void) { }
+#endif
+
+extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
+					   unsigned int size, u64 xfeatures, u32 pkru);
+extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);
+
+static inline void fpstate_set_confidential(struct fpu_guest *gfpu)
+{
+	gfpu->fpstate->is_confidential = true;
+}
+
+static inline bool fpstate_is_confidential(struct fpu_guest *gfpu)
+{
+	return gfpu->fpstate->is_confidential;
+}
+
+/* prctl */
+extern long fpu_xstate_prctl(int option, unsigned long arg2);
+
+extern void fpu_idle_fpregs(void);
+
 #endif /* _ASM_X86_FPU_API_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
deleted file mode 100644
index fa2c93cb42a2..000000000000
--- a/arch/x86/include/asm/fpu/internal.h
+++ /dev/null
@@ -1,610 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 1994 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- *	Gareth Hughes <gareth@valinux.com>, May 2000
- * x86-64 work by Andi Kleen 2002
- */
-
-#ifndef _ASM_X86_FPU_INTERNAL_H
-#define _ASM_X86_FPU_INTERNAL_H
-
-#include <linux/compat.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-
-#include <asm/user.h>
-#include <asm/fpu/api.h>
-#include <asm/fpu/xstate.h>
-#include <asm/cpufeature.h>
-#include <asm/trace/fpu.h>
-
-/*
- * High level FPU state handling functions:
- */
-extern void fpu__initialize(struct fpu *fpu);
-extern void fpu__prepare_read(struct fpu *fpu);
-extern void fpu__prepare_write(struct fpu *fpu);
-extern void fpu__save(struct fpu *fpu);
-extern void fpu__restore(struct fpu *fpu);
-extern int  fpu__restore_sig(void __user *buf, int ia32_frame);
-extern void fpu__drop(struct fpu *fpu);
-extern int  fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu);
-extern void fpu__clear(struct fpu *fpu);
-extern int  fpu__exception_code(struct fpu *fpu, int trap_nr);
-extern int  dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate);
-
-/*
- * Boot time FPU initialization functions:
- */
-extern void fpu__init_cpu(void);
-extern void fpu__init_system_xstate(void);
-extern void fpu__init_cpu_xstate(void);
-extern void fpu__init_system(struct cpuinfo_x86 *c);
-extern void fpu__init_check_bugs(void);
-extern void fpu__resume_cpu(void);
-extern u64 fpu__get_supported_xfeatures_mask(void);
-
-/*
- * Debugging facility:
- */
-#ifdef CONFIG_X86_DEBUG_FPU
-# define WARN_ON_FPU(x) WARN_ON_ONCE(x)
-#else
-# define WARN_ON_FPU(x) ({ (void)(x); 0; })
-#endif
-
-/*
- * FPU related CPU feature flag helper routines:
- */
-static __always_inline __pure bool use_xsaveopt(void)
-{
-	return static_cpu_has(X86_FEATURE_XSAVEOPT);
-}
-
-static __always_inline __pure bool use_xsave(void)
-{
-	return static_cpu_has(X86_FEATURE_XSAVE);
-}
-
-static __always_inline __pure bool use_fxsr(void)
-{
-	return static_cpu_has(X86_FEATURE_FXSR);
-}
-
-/*
- * fpstate handling functions:
- */
-
-extern union fpregs_state init_fpstate;
-
-extern void fpstate_init(union fpregs_state *state);
-#ifdef CONFIG_MATH_EMULATION
-extern void fpstate_init_soft(struct swregs_state *soft);
-#else
-static inline void fpstate_init_soft(struct swregs_state *soft) {}
-#endif
-
-static inline void fpstate_init_xstate(struct xregs_state *xsave)
-{
-	/*
-	 * XRSTORS requires these bits set in xcomp_bv, or it will
-	 * trigger #GP:
-	 */
-	xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask;
-}
-
-static inline void fpstate_init_fxstate(struct fxregs_state *fx)
-{
-	fx->cwd = 0x37f;
-	fx->mxcsr = MXCSR_DEFAULT;
-}
-extern void fpstate_sanitize_xstate(struct fpu *fpu);
-
-#define user_insn(insn, output, input...)				\
-({									\
-	int err;							\
-									\
-	might_fault();							\
-									\
-	asm volatile(ASM_STAC "\n"					\
-		     "1:" #insn "\n\t"					\
-		     "2: " ASM_CLAC "\n"				\
-		     ".section .fixup,\"ax\"\n"				\
-		     "3:  movl $-1,%[err]\n"				\
-		     "    jmp  2b\n"					\
-		     ".previous\n"					\
-		     _ASM_EXTABLE(1b, 3b)				\
-		     : [err] "=r" (err), output				\
-		     : "0"(0), input);					\
-	err;								\
-})
-
-#define kernel_insn(insn, output, input...)				\
-	asm volatile("1:" #insn "\n\t"					\
-		     "2:\n"						\
-		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore)	\
-		     : output : input)
-
-static inline int copy_fregs_to_user(struct fregs_state __user *fx)
-{
-	return user_insn(fnsave %[fx]; fwait,  [fx] "=m" (*fx), "m" (*fx));
-}
-
-static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
-{
-	if (IS_ENABLED(CONFIG_X86_32))
-		return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
-	else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
-		return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
-
-	/* See comment in copy_fxregs_to_kernel() below. */
-	return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
-}
-
-static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
-{
-	if (IS_ENABLED(CONFIG_X86_32)) {
-		kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-	} else {
-		if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
-			kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
-		} else {
-			/* See comment in copy_fxregs_to_kernel() below. */
-			kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
-		}
-	}
-}
-
-static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
-{
-	if (IS_ENABLED(CONFIG_X86_32))
-		return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-	else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
-		return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
-
-	/* See comment in copy_fxregs_to_kernel() below. */
-	return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
-			  "m" (*fx));
-}
-
-static inline void copy_kernel_to_fregs(struct fregs_state *fx)
-{
-	kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-}
-
-static inline int copy_user_to_fregs(struct fregs_state __user *fx)
-{
-	return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-}
-
-static inline void copy_fxregs_to_kernel(struct fpu *fpu)
-{
-	if (IS_ENABLED(CONFIG_X86_32))
-		asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
-	else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
-		asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
-	else {
-		/* Using "rex64; fxsave %0" is broken because, if the memory
-		 * operand uses any extended registers for addressing, a second
-		 * REX prefix will be generated (to the assembler, rex64
-		 * followed by semicolon is a separate instruction), and hence
-		 * the 64-bitness is lost.
-		 *
-		 * Using "fxsaveq %0" would be the ideal choice, but is only
-		 * supported starting with gas 2.16.
-		 *
-		 * Using, as a workaround, the properly prefixed form below
-		 * isn't accepted by any binutils version so far released,
-		 * complaining that the same type of prefix is used twice if
-		 * an extended register is needed for addressing (fix submitted
-		 * to mainline 2005-11-21).
-		 *
-		 *  asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave));
-		 *
-		 * This, however, we can work around by forcing the compiler to
-		 * select an addressing mode that doesn't require extended
-		 * registers.
-		 */
-		asm volatile( "rex64/fxsave (%[fx])"
-			     : "=m" (fpu->state.fxsave)
-			     : [fx] "R" (&fpu->state.fxsave));
-	}
-}
-
-/* These macros all use (%edi)/(%rdi) as the single memory argument. */
-#define XSAVE		".byte " REX_PREFIX "0x0f,0xae,0x27"
-#define XSAVEOPT	".byte " REX_PREFIX "0x0f,0xae,0x37"
-#define XSAVES		".byte " REX_PREFIX "0x0f,0xc7,0x2f"
-#define XRSTOR		".byte " REX_PREFIX "0x0f,0xae,0x2f"
-#define XRSTORS		".byte " REX_PREFIX "0x0f,0xc7,0x1f"
-
-#define XSTATE_OP(op, st, lmask, hmask, err)				\
-	asm volatile("1:" op "\n\t"					\
-		     "xor %[err], %[err]\n"				\
-		     "2:\n\t"						\
-		     ".pushsection .fixup,\"ax\"\n\t"			\
-		     "3: movl $-2,%[err]\n\t"				\
-		     "jmp 2b\n\t"					\
-		     ".popsection\n\t"					\
-		     _ASM_EXTABLE(1b, 3b)				\
-		     : [err] "=r" (err)					\
-		     : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)	\
-		     : "memory")
-
-/*
- * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact
- * format and supervisor states in addition to modified optimization in
- * XSAVEOPT.
- *
- * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT
- * supports modified optimization which is not supported by XSAVE.
- *
- * We use XSAVE as a fallback.
- *
- * The 661 label is defined in the ALTERNATIVE* macros as the address of the
- * original instruction which gets replaced. We need to use it here as the
- * address of the instruction where we might get an exception at.
- */
-#define XSTATE_XSAVE(st, lmask, hmask, err)				\
-	asm volatile(ALTERNATIVE_2(XSAVE,				\
-				   XSAVEOPT, X86_FEATURE_XSAVEOPT,	\
-				   XSAVES,   X86_FEATURE_XSAVES)	\
-		     "\n"						\
-		     "xor %[err], %[err]\n"				\
-		     "3:\n"						\
-		     ".pushsection .fixup,\"ax\"\n"			\
-		     "4: movl $-2, %[err]\n"				\
-		     "jmp 3b\n"						\
-		     ".popsection\n"					\
-		     _ASM_EXTABLE(661b, 4b)				\
-		     : [err] "=r" (err)					\
-		     : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)	\
-		     : "memory")
-
-/*
- * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
- * XSAVE area format.
- */
-#define XSTATE_XRESTORE(st, lmask, hmask)				\
-	asm volatile(ALTERNATIVE(XRSTOR,				\
-				 XRSTORS, X86_FEATURE_XSAVES)		\
-		     "\n"						\
-		     "3:\n"						\
-		     _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\
-		     :							\
-		     : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)	\
-		     : "memory")
-
-/*
- * This function is called only during boot time when x86 caps are not set
- * up and alternative can not be used yet.
- */
-static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
-{
-	u64 mask = -1;
-	u32 lmask = mask;
-	u32 hmask = mask >> 32;
-	int err;
-
-	WARN_ON(system_state != SYSTEM_BOOTING);
-
-	if (static_cpu_has(X86_FEATURE_XSAVES))
-		XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
-	else
-		XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
-
-	/* We should never fault when copying to a kernel buffer: */
-	WARN_ON_FPU(err);
-}
-
-/*
- * This function is called only during boot time when x86 caps are not set
- * up and alternative can not be used yet.
- */
-static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
-{
-	u64 mask = -1;
-	u32 lmask = mask;
-	u32 hmask = mask >> 32;
-	int err;
-
-	WARN_ON(system_state != SYSTEM_BOOTING);
-
-	if (static_cpu_has(X86_FEATURE_XSAVES))
-		XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
-	else
-		XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
-
-	/*
-	 * We should never fault when copying from a kernel buffer, and the FPU
-	 * state we set at boot time should be valid.
-	 */
-	WARN_ON_FPU(err);
-}
-
-/*
- * Save processor xstate to xsave area.
- */
-static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
-{
-	u64 mask = -1;
-	u32 lmask = mask;
-	u32 hmask = mask >> 32;
-	int err;
-
-	WARN_ON_FPU(!alternatives_patched);
-
-	XSTATE_XSAVE(xstate, lmask, hmask, err);
-
-	/* We should never fault when copying to a kernel buffer: */
-	WARN_ON_FPU(err);
-}
-
-/*
- * Restore processor xstate from xsave area.
- */
-static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
-{
-	u32 lmask = mask;
-	u32 hmask = mask >> 32;
-
-	XSTATE_XRESTORE(xstate, lmask, hmask);
-}
-
-/*
- * Save xstate to user space xsave area.
- *
- * We don't use modified optimization because xrstor/xrstors might track
- * a different application.
- *
- * We don't use compacted format xsave area for
- * backward compatibility for old applications which don't understand
- * compacted format of xsave area.
- */
-static inline int copy_xregs_to_user(struct xregs_state __user *buf)
-{
-	int err;
-
-	/*
-	 * Clear the xsave header first, so that reserved fields are
-	 * initialized to zero.
-	 */
-	err = __clear_user(&buf->header, sizeof(buf->header));
-	if (unlikely(err))
-		return -EFAULT;
-
-	stac();
-	XSTATE_OP(XSAVE, buf, -1, -1, err);
-	clac();
-
-	return err;
-}
-
-/*
- * Restore xstate from user space xsave area.
- */
-static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask)
-{
-	struct xregs_state *xstate = ((__force struct xregs_state *)buf);
-	u32 lmask = mask;
-	u32 hmask = mask >> 32;
-	int err;
-
-	stac();
-	XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
-	clac();
-
-	return err;
-}
-
-/*
- * These must be called with preempt disabled. Returns
- * 'true' if the FPU state is still intact and we can
- * keep registers active.
- *
- * The legacy FNSAVE instruction cleared all FPU state
- * unconditionally, so registers are essentially destroyed.
- * Modern FPU state can be kept in registers, if there are
- * no pending FP exceptions.
- */
-static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
-{
-	if (likely(use_xsave())) {
-		copy_xregs_to_kernel(&fpu->state.xsave);
-		return 1;
-	}
-
-	if (likely(use_fxsr())) {
-		copy_fxregs_to_kernel(fpu);
-		return 1;
-	}
-
-	/*
-	 * Legacy FPU register saving, FNSAVE always clears FPU registers,
-	 * so we have to mark them inactive:
-	 */
-	asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
-
-	return 0;
-}
-
-static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask)
-{
-	if (use_xsave()) {
-		copy_kernel_to_xregs(&fpstate->xsave, mask);
-	} else {
-		if (use_fxsr())
-			copy_kernel_to_fxregs(&fpstate->fxsave);
-		else
-			copy_kernel_to_fregs(&fpstate->fsave);
-	}
-}
-
-static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
-{
-	/*
-	 * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is
-	 * pending. Clear the x87 state here by setting it to fixed values.
-	 * "m" is a random variable that should be in L1.
-	 */
-	if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
-		asm volatile(
-			"fnclex\n\t"
-			"emms\n\t"
-			"fildl %P[addr]"	/* set F?P to defined value */
-			: : [addr] "m" (fpstate));
-	}
-
-	__copy_kernel_to_fpregs(fpstate, -1);
-}
-
-extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
-
-/*
- * FPU context switch related helper methods:
- */
-
-DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
-
-/*
- * The in-register FPU state for an FPU context on a CPU is assumed to be
- * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx
- * matches the FPU.
- *
- * If the FPU register state is valid, the kernel can skip restoring the
- * FPU state from memory.
- *
- * Any code that clobbers the FPU registers or updates the in-memory
- * FPU state for a task MUST let the rest of the kernel know that the
- * FPU registers are no longer valid for this task.
- *
- * Either one of these invalidation functions is enough. Invalidate
- * a resource you control: CPU if using the CPU for something else
- * (with preemption disabled), FPU for the current task, or a task that
- * is prevented from running by the current task.
- */
-static inline void __cpu_invalidate_fpregs_state(void)
-{
-	__this_cpu_write(fpu_fpregs_owner_ctx, NULL);
-}
-
-static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
-{
-	fpu->last_cpu = -1;
-}
-
-static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
-{
-	return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
-}
-
-/*
- * These generally need preemption protection to work,
- * do try to avoid using these on their own:
- */
-static inline void fpregs_deactivate(struct fpu *fpu)
-{
-	this_cpu_write(fpu_fpregs_owner_ctx, NULL);
-	trace_x86_fpu_regs_deactivated(fpu);
-}
-
-static inline void fpregs_activate(struct fpu *fpu)
-{
-	this_cpu_write(fpu_fpregs_owner_ctx, fpu);
-	trace_x86_fpu_regs_activated(fpu);
-}
-
-/*
- * FPU state switching for scheduling.
- *
- * This is a two-stage process:
- *
- *  - switch_fpu_prepare() saves the old state.
- *    This is done within the context of the old process.
- *
- *  - switch_fpu_finish() restores the new state as
- *    necessary.
- */
-static inline void
-switch_fpu_prepare(struct fpu *old_fpu, int cpu)
-{
-	if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) {
-		if (!copy_fpregs_to_fpstate(old_fpu))
-			old_fpu->last_cpu = -1;
-		else
-			old_fpu->last_cpu = cpu;
-
-		/* But leave fpu_fpregs_owner_ctx! */
-		trace_x86_fpu_regs_deactivated(old_fpu);
-	} else
-		old_fpu->last_cpu = -1;
-}
-
-/*
- * Misc helper functions:
- */
-
-/*
- * Set up the userspace FPU context for the new task, if the task
- * has used the FPU.
- */
-static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
-{
-	bool preload = static_cpu_has(X86_FEATURE_FPU) &&
-		       new_fpu->initialized;
-
-	if (preload) {
-		if (!fpregs_state_valid(new_fpu, cpu))
-			copy_kernel_to_fpregs(&new_fpu->state);
-		fpregs_activate(new_fpu);
-	}
-}
-
-/*
- * Needs to be preemption-safe.
- *
- * NOTE! user_fpu_begin() must be used only immediately before restoring
- * the save state. It does not do any saving/restoring on its own. In
- * lazy FPU mode, it is just an optimization to avoid a #NM exception,
- * the task can lose the FPU right after preempt_enable().
- */
-static inline void user_fpu_begin(void)
-{
-	struct fpu *fpu = &current->thread.fpu;
-
-	preempt_disable();
-	fpregs_activate(fpu);
-	preempt_enable();
-}
-
-/*
- * MXCSR and XCR definitions:
- */
-
-extern unsigned int mxcsr_feature_mask;
-
-#define XCR_XFEATURE_ENABLED_MASK	0x00000000
-
-static inline u64 xgetbv(u32 index)
-{
-	u32 eax, edx;
-
-	asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
-		     : "=a" (eax), "=d" (edx)
-		     : "c" (index));
-	return eax + ((u64)edx << 32);
-}
-
-static inline void xsetbv(u32 index, u64 value)
-{
-	u32 eax = value;
-	u32 edx = value >> 32;
-
-	asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
-		     : : "a" (eax), "d" (edx), "c" (index));
-}
-
-#endif /* _ASM_X86_FPU_INTERNAL_H */
diff --git a/arch/x86/include/asm/fpu/regset.h b/arch/x86/include/asm/fpu/regset.h
index d5bdffb9d27f..697b77e96025 100644
--- a/arch/x86/include/asm/fpu/regset.h
+++ b/arch/x86/include/asm/fpu/regset.h
@@ -7,11 +7,12 @@
 
 #include <linux/regset.h>
 
-extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active;
-extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
-				xstateregs_get;
+extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active,
+				ssp_active;
+extern user_regset_get2_fn fpregs_get, xfpregs_get, fpregs_soft_get,
+				 xstateregs_get, ssp_get;
 extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
-				 xstateregs_set;
+				 xstateregs_set, ssp_set;
 
 /*
  * xstateregs_active == regset_fpregs_active. Please refer to the comment
diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h
new file mode 100644
index 000000000000..89004f4ca208
--- /dev/null
+++ b/arch/x86/include/asm/fpu/sched.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_FPU_SCHED_H
+#define _ASM_X86_FPU_SCHED_H
+
+#include <linux/sched.h>
+
+#include <asm/cpufeature.h>
+#include <asm/fpu/types.h>
+
+#include <asm/trace/fpu.h>
+
+extern void save_fpregs_to_fpstate(struct fpu *fpu);
+extern void fpu__drop(struct task_struct *tsk);
+extern int  fpu_clone(struct task_struct *dst, u64 clone_flags, bool minimal,
+		      unsigned long shstk_addr);
+extern void fpu_flush_thread(void);
+
+/*
+ * FPU state switching for scheduling.
+ *
+ * switch_fpu() saves the old state and sets TIF_NEED_FPU_LOAD if
+ * TIF_NEED_FPU_LOAD is not set.  This is done within the context
+ * of the old process.
+ *
+ * Once TIF_NEED_FPU_LOAD is set, it is required to load the
+ * registers before returning to userland or using the content
+ * otherwise.
+ *
+ * The FPU context is only stored/restored for a user task and
+ * PF_KTHREAD is used to distinguish between kernel and user threads.
+ */
+static inline void switch_fpu(struct task_struct *old, int cpu)
+{
+	if (!test_tsk_thread_flag(old, TIF_NEED_FPU_LOAD) &&
+	    cpu_feature_enabled(X86_FEATURE_FPU) &&
+	    !(old->flags & (PF_KTHREAD | PF_USER_WORKER))) {
+		struct fpu *old_fpu = x86_task_fpu(old);
+
+		set_tsk_thread_flag(old, TIF_NEED_FPU_LOAD);
+		save_fpregs_to_fpstate(old_fpu);
+		/*
+		 * The save operation preserved register state, so the
+		 * fpu_fpregs_owner_ctx is still @old_fpu. Store the
+		 * current CPU number in @old_fpu, so the next return
+		 * to user space can avoid the FPU register restore
+		 * when is returns on the same CPU and still owns the
+		 * context. See fpregs_restore_userregs().
+		 */
+		old_fpu->last_cpu = cpu;
+
+		trace_x86_fpu_regs_deactivated(old_fpu);
+	}
+}
+
+#endif /* _ASM_X86_FPU_SCHED_H */
diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h
index 44bbc39a57b3..eccc75bc9c4f 100644
--- a/arch/x86/include/asm/fpu/signal.h
+++ b/arch/x86/include/asm/fpu/signal.h
@@ -5,30 +5,33 @@
 #ifndef _ASM_X86_FPU_SIGNAL_H
 #define _ASM_X86_FPU_SIGNAL_H
 
+#include <linux/compat.h>
+#include <linux/user.h>
+
+#include <asm/fpu/types.h>
+
 #ifdef CONFIG_X86_64
 # include <uapi/asm/sigcontext.h>
 # include <asm/user32.h>
-struct ksignal;
-int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
-			compat_sigset_t *set, struct pt_regs *regs);
-int ia32_setup_frame(int sig, struct ksignal *ksig,
-		     compat_sigset_t *set, struct pt_regs *regs);
 #else
 # define user_i387_ia32_struct	user_i387_struct
 # define user32_fxsr_struct	user_fxsr_struct
-# define ia32_setup_frame	__setup_frame
-# define ia32_setup_rt_frame	__setup_rt_frame
 #endif
 
 extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
 			      struct task_struct *tsk);
-extern void convert_to_fxsr(struct task_struct *tsk,
+extern void convert_to_fxsr(struct fxregs_state *fxsave,
 			    const struct user_i387_ia32_struct *env);
 
 unsigned long
 fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
 		     unsigned long *buf_fx, unsigned long *size);
 
-extern void fpu__init_prepare_fx_sw_frame(void);
+unsigned long fpu__get_fpstate_size(void);
+
+extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size, u32 pkru);
+extern void fpu__clear_user_states(struct fpu *fpu);
+extern bool fpu__restore_sig(void __user *buf, int ia32_frame);
 
+extern void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask);
 #endif /* _ASM_X86_FPU_SIGNAL_H */
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 202c53918ecf..93e99d2583d6 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -2,8 +2,10 @@
 /*
  * FPU data structures:
  */
-#ifndef _ASM_X86_FPU_H
-#define _ASM_X86_FPU_H
+#ifndef _ASM_X86_FPU_TYPES_H
+#define _ASM_X86_FPU_TYPES_H
+
+#include <asm/page_types.h>
 
 /*
  * The legacy x87 FPU state format, as saved by FSAVE and
@@ -114,6 +116,16 @@ enum xfeature {
 	XFEATURE_Hi16_ZMM,
 	XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
 	XFEATURE_PKRU,
+	XFEATURE_PASID,
+	XFEATURE_CET_USER,
+	XFEATURE_CET_KERNEL,
+	XFEATURE_RSRVD_COMP_13,
+	XFEATURE_RSRVD_COMP_14,
+	XFEATURE_LBR,
+	XFEATURE_RSRVD_COMP_16,
+	XFEATURE_XTILE_CFG,
+	XFEATURE_XTILE_DATA,
+	XFEATURE_APX,
 
 	XFEATURE_MAX,
 };
@@ -128,12 +140,26 @@ enum xfeature {
 #define XFEATURE_MASK_Hi16_ZMM		(1 << XFEATURE_Hi16_ZMM)
 #define XFEATURE_MASK_PT		(1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
 #define XFEATURE_MASK_PKRU		(1 << XFEATURE_PKRU)
+#define XFEATURE_MASK_PASID		(1 << XFEATURE_PASID)
+#define XFEATURE_MASK_CET_USER		(1 << XFEATURE_CET_USER)
+#define XFEATURE_MASK_CET_KERNEL	(1 << XFEATURE_CET_KERNEL)
+#define XFEATURE_MASK_LBR		(1 << XFEATURE_LBR)
+#define XFEATURE_MASK_XTILE_CFG		(1 << XFEATURE_XTILE_CFG)
+#define XFEATURE_MASK_XTILE_DATA	(1 << XFEATURE_XTILE_DATA)
+#define XFEATURE_MASK_APX		(1 << XFEATURE_APX)
 
 #define XFEATURE_MASK_FPSSE		(XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
 #define XFEATURE_MASK_AVX512		(XFEATURE_MASK_OPMASK \
 					 | XFEATURE_MASK_ZMM_Hi256 \
 					 | XFEATURE_MASK_Hi16_ZMM)
 
+#ifdef CONFIG_X86_64
+# define XFEATURE_MASK_XTILE		(XFEATURE_MASK_XTILE_DATA \
+					 | XFEATURE_MASK_XTILE_CFG)
+#else
+# define XFEATURE_MASK_XTILE		(0)
+#endif
+
 #define FIRST_EXTENDED_XFEATURE	XFEATURE_YMM
 
 struct reg_128_bit {
@@ -145,6 +171,9 @@ struct reg_256_bit {
 struct reg_512_bit {
 	u8	regbytes[512/8];
 };
+struct reg_1024_byte {
+	u8	regbytes[1024];
+};
 
 /*
  * State component 2:
@@ -229,6 +258,78 @@ struct pkru_state {
 	u32				pad;
 } __packed;
 
+/*
+ * State component 11 is Control-flow Enforcement user states
+ */
+struct cet_user_state {
+	/* user control-flow settings */
+	u64 user_cet;
+	/* user shadow stack pointer */
+	u64 user_ssp;
+};
+
+/*
+ * State component 12 is Control-flow Enforcement supervisor states.
+ * This state includes SSP pointers for privilege levels 0 through 2.
+ */
+struct cet_supervisor_state {
+	u64 pl0_ssp;
+	u64 pl1_ssp;
+	u64 pl2_ssp;
+} __packed;
+
+/*
+ * State component 15: Architectural LBR configuration state.
+ * The size of Arch LBR state depends on the number of LBRs (lbr_depth).
+ */
+
+struct lbr_entry {
+	u64 from;
+	u64 to;
+	u64 info;
+};
+
+struct arch_lbr_state {
+	u64 lbr_ctl;
+	u64 lbr_depth;
+	u64 ler_from;
+	u64 ler_to;
+	u64 ler_info;
+	struct lbr_entry		entries[];
+};
+
+/*
+ * State component 17: 64-byte tile configuration register.
+ */
+struct xtile_cfg {
+	u64				tcfg[8];
+} __packed;
+
+/*
+ * State component 18: 1KB tile data register.
+ * Each register represents 16 64-byte rows of the matrix
+ * data. But the number of registers depends on the actual
+ * implementation.
+ */
+struct xtile_data {
+	struct reg_1024_byte		tmm;
+} __packed;
+
+/*
+ * State component 19: 8B extended general purpose register.
+ */
+struct apx_state {
+	u64				egpr[16];
+} __packed;
+
+/*
+ * State component 10 is supervisor state used for context-switching the
+ * PASID state.
+ */
+struct ia32_pasid_state {
+	u64 pasid;
+} __packed;
+
 struct xstate_header {
 	u64				xfeatures;
 	u64				xcomp_bv;
@@ -253,7 +354,7 @@ struct xstate_header {
 struct xregs_state {
 	struct fxregs_state		i387;
 	struct xstate_header		header;
-	u8				extended_state_area[0];
+	u8				extended_state_area[];
 } __attribute__ ((packed, aligned (64)));
 
 /*
@@ -273,6 +374,95 @@ union fpregs_state {
 	u8 __padding[PAGE_SIZE];
 };
 
+struct fpstate {
+	/* @kernel_size: The size of the kernel register image */
+	unsigned int		size;
+
+	/* @user_size: The size in non-compacted UABI format */
+	unsigned int		user_size;
+
+	/* @xfeatures:		xfeatures for which the storage is sized */
+	u64			xfeatures;
+
+	/* @user_xfeatures:	xfeatures valid in UABI buffers */
+	u64			user_xfeatures;
+
+	/* @xfd:		xfeatures disabled to trap userspace use. */
+	u64			xfd;
+
+	/* @is_valloc:		Indicator for dynamically allocated state */
+	unsigned int		is_valloc	: 1;
+
+	/* @is_guest:		Indicator for guest state (KVM) */
+	unsigned int		is_guest	: 1;
+
+	/*
+	 * @is_confidential:	Indicator for KVM confidential mode.
+	 *			The FPU registers are restored by the
+	 *			vmentry firmware from encrypted guest
+	 *			memory. On vmexit the FPU registers are
+	 *			saved by firmware to encrypted guest memory
+	 *			and the registers are scrubbed before
+	 *			returning to the host. So there is no
+	 *			content which is worth saving and restoring.
+	 *			The fpstate has to be there so that
+	 *			preemption and softirq FPU usage works
+	 *			without special casing.
+	 */
+	unsigned int		is_confidential	: 1;
+
+	/* @in_use:		State is in use */
+	unsigned int		in_use		: 1;
+
+	/* @regs: The register state union for all supported formats */
+	union fpregs_state	regs;
+
+	/* @regs is dynamically sized! Don't add anything after @regs! */
+} __aligned(64);
+
+#define FPU_GUEST_PERM_LOCKED		BIT_ULL(63)
+
+struct fpu_state_perm {
+	/*
+	 * @__state_perm:
+	 *
+	 * This bitmap indicates the permission for state components
+	 * available to a thread group, including both user and supervisor
+	 * components and software-defined bits like FPU_GUEST_PERM_LOCKED.
+	 * The permission prctl() sets the enabled state bits in
+	 * thread_group_leader()->thread.fpu.
+	 *
+	 * All run time operations use the per thread information in the
+	 * currently active fpu.fpstate which contains the xfeature masks
+	 * and sizes for kernel and user space.
+	 *
+	 * This master permission field is only to be used when
+	 * task.fpu.fpstate based checks fail to validate whether the task
+	 * is allowed to expand its xfeatures set which requires to
+	 * allocate a larger sized fpstate buffer.
+	 *
+	 * Do not access this field directly.  Use the provided helper
+	 * function. Unlocked access is possible for quick checks.
+	 */
+	u64				__state_perm;
+
+	/*
+	 * @__state_size:
+	 *
+	 * The size required for @__state_perm. Only valid to access
+	 * with sighand locked.
+	 */
+	unsigned int			__state_size;
+
+	/*
+	 * @__user_state_size:
+	 *
+	 * The size required for @__state_perm user part. Only valid to
+	 * access with sighand locked.
+	 */
+	unsigned int			__user_state_size;
+};
+
 /*
  * Highest level per task FPU state data structure that
  * contains the FPU register state plus various FPU
@@ -294,28 +484,164 @@ struct fpu {
 	unsigned int			last_cpu;
 
 	/*
-	 * @initialized:
+	 * @avx512_timestamp:
 	 *
-	 * This flag indicates whether this context is initialized: if the task
-	 * is not running then we can restore from this context, if the task
-	 * is running then we should save into this context.
+	 * Records the timestamp of AVX512 use during last context switch.
 	 */
-	unsigned char			initialized;
+	unsigned long			avx512_timestamp;
 
 	/*
-	 * @state:
+	 * @fpstate:
 	 *
-	 * In-memory copy of all FPU registers that we save/restore
-	 * over context switches. If the task is using the FPU then
-	 * the registers in the FPU are more recent than this state
-	 * copy. If the task context-switches away then they get
-	 * saved here and represent the FPU state.
+	 * Pointer to the active struct fpstate. Initialized to
+	 * point at @__fpstate below.
 	 */
-	union fpregs_state		state;
+	struct fpstate			*fpstate;
+
 	/*
-	 * WARNING: 'state' is dynamically-sized.  Do not put
+	 * @__task_fpstate:
+	 *
+	 * Pointer to an inactive struct fpstate. Initialized to NULL. Is
+	 * used only for KVM support to swap out the regular task fpstate.
+	 */
+	struct fpstate			*__task_fpstate;
+
+	/*
+	 * @perm:
+	 *
+	 * Permission related information
+	 */
+	struct fpu_state_perm		perm;
+
+	/*
+	 * @guest_perm:
+	 *
+	 * Permission related information for guest pseudo FPUs
+	 */
+	struct fpu_state_perm		guest_perm;
+
+	/*
+	 * @__fpstate:
+	 *
+	 * Initial in-memory storage for FPU registers which are saved in
+	 * context switch and when the kernel uses the FPU. The registers
+	 * are restored from this storage on return to user space if they
+	 * are not longer containing the tasks FPU register state.
+	 */
+	struct fpstate			__fpstate;
+	/*
+	 * WARNING: '__fpstate' is dynamically-sized.  Do not put
 	 * anything after it here.
 	 */
 };
 
-#endif /* _ASM_X86_FPU_H */
+/*
+ * Guest pseudo FPU container
+ */
+struct fpu_guest {
+	/*
+	 * @xfeatures:			xfeature bitmap of features which are
+	 *				currently enabled for the guest vCPU.
+	 */
+	u64				xfeatures;
+
+	/*
+	 * @xfd_err:			Save the guest value.
+	 */
+	u64				xfd_err;
+
+	/*
+	 * @uabi_size:			Size required for save/restore
+	 */
+	unsigned int			uabi_size;
+
+	/*
+	 * @fpstate:			Pointer to the allocated guest fpstate
+	 */
+	struct fpstate			*fpstate;
+};
+
+/*
+ * FPU state configuration data for fpu_guest.
+ * Initialized at boot time. Read only after init.
+ */
+struct vcpu_fpu_config {
+	/*
+	 * @size:
+	 *
+	 * The default size of the register state buffer in guest FPUs.
+	 * Includes all supported features except independent managed
+	 * features and features which have to be requested by user space
+	 * before usage.
+	 */
+	unsigned int size;
+
+	/*
+	 * @features:
+	 *
+	 * The default supported features bitmap in guest FPUs. Does not
+	 * include independent managed features and features which have to
+	 * be requested by user space before usage.
+	 */
+	u64 features;
+};
+
+/*
+ * FPU state configuration data. Initialized at boot time. Read only after init.
+ */
+struct fpu_state_config {
+	/*
+	 * @max_size:
+	 *
+	 * The maximum size of the register state buffer. Includes all
+	 * supported features except independent managed features.
+	 */
+	unsigned int		max_size;
+
+	/*
+	 * @default_size:
+	 *
+	 * The default size of the register state buffer. Includes all
+	 * supported features except independent managed features,
+	 * guest-only features and features which have to be requested by
+	 * user space before usage.
+	 */
+	unsigned int		default_size;
+
+	/*
+	 * @max_features:
+	 *
+	 * The maximum supported features bitmap. Does not include
+	 * independent managed features.
+	 */
+	u64 max_features;
+
+	/*
+	 * @default_features:
+	 *
+	 * The default supported features bitmap. Does not include
+	 * independent managed features, guest-only features and features
+	 * which have to be requested by user space before usage.
+	 */
+	u64 default_features;
+	/*
+	 * @legacy_features:
+	 *
+	 * Features which can be reported back to user space
+	 * even without XSAVE support, i.e. legacy features FP + SSE
+	 */
+	u64 legacy_features;
+	/*
+	 * @independent_features:
+	 *
+	 * Features that are supported by XSAVES, but not managed as part of
+	 * the FPU core, such as LBR
+	 */
+	u64 independent_features;
+};
+
+/* FPU state configuration information */
+extern struct fpu_state_config fpu_kernel_cfg, fpu_user_cfg;
+extern struct vcpu_fpu_config guest_default_cfg;
+
+#endif /* _ASM_X86_FPU_TYPES_H */
diff --git a/arch/x86/include/asm/fpu/xcr.h b/arch/x86/include/asm/fpu/xcr.h
new file mode 100644
index 000000000000..9a710c060445
--- /dev/null
+++ b/arch/x86/include/asm/fpu/xcr.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_FPU_XCR_H
+#define _ASM_X86_FPU_XCR_H
+
+#define XCR_XFEATURE_ENABLED_MASK	0x00000000
+#define XCR_XFEATURE_IN_USE_MASK	0x00000001
+
+static __always_inline u64 xgetbv(u32 index)
+{
+	u32 eax, edx;
+
+	asm volatile("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index));
+	return eax + ((u64)edx << 32);
+}
+
+static inline void xsetbv(u32 index, u64 value)
+{
+	u32 eax = value;
+	u32 edx = value >> 32;
+
+	asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
+}
+
+/*
+ * Return a mask of xfeatures which are currently being tracked
+ * by the processor as being in the initial configuration.
+ *
+ * Callers should check X86_FEATURE_XGETBV1.
+ */
+static __always_inline u64 xfeatures_in_use(void)
+{
+	return xgetbv(XCR_XFEATURE_IN_USE_MASK);
+}
+
+#endif /* _ASM_X86_FPU_XCR_H */
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 48581988d78c..7a7dc9d56027 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -2,15 +2,16 @@
 #ifndef __ASM_X86_XSAVE_H
 #define __ASM_X86_XSAVE_H
 
+#include <linux/uaccess.h>
 #include <linux/types.h>
+
 #include <asm/processor.h>
-#include <linux/uaccess.h>
+#include <asm/fpu/api.h>
+#include <asm/user.h>
 
 /* Bit 63 of XCR0 is reserved for future expansion */
 #define XFEATURE_MASK_EXTEND	(~(XFEATURE_MASK_FPSSE | (1ULL << 63)))
 
-#define XSTATE_CPUID		0x0000000d
-
 #define FXSAVE_SIZE	512
 
 #define XSAVE_HDR_SIZE	    64
@@ -19,42 +20,115 @@
 #define XSAVE_YMM_SIZE	    256
 #define XSAVE_YMM_OFFSET    (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
 
-/* Supervisor features */
-#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT)
+#define XSAVE_ALIGNMENT     64
 
-/* All currently supported features */
-#define XCNTXT_MASK		(XFEATURE_MASK_FP | \
-				 XFEATURE_MASK_SSE | \
-				 XFEATURE_MASK_YMM | \
-				 XFEATURE_MASK_OPMASK | \
-				 XFEATURE_MASK_ZMM_Hi256 | \
-				 XFEATURE_MASK_Hi16_ZMM	 | \
-				 XFEATURE_MASK_PKRU | \
-				 XFEATURE_MASK_BNDREGS | \
-				 XFEATURE_MASK_BNDCSR)
+/* All currently supported user features */
+#define XFEATURE_MASK_USER_SUPPORTED (XFEATURE_MASK_FP | \
+				      XFEATURE_MASK_SSE | \
+				      XFEATURE_MASK_YMM | \
+				      XFEATURE_MASK_OPMASK | \
+				      XFEATURE_MASK_ZMM_Hi256 | \
+				      XFEATURE_MASK_Hi16_ZMM	 | \
+				      XFEATURE_MASK_PKRU | \
+				      XFEATURE_MASK_BNDREGS | \
+				      XFEATURE_MASK_BNDCSR | \
+				      XFEATURE_MASK_XTILE | \
+				      XFEATURE_MASK_APX)
 
-#ifdef CONFIG_X86_64
-#define REX_PREFIX	"0x48, "
-#else
-#define REX_PREFIX
-#endif
+/*
+ * Features which are restored when returning to user space.
+ * PKRU is not restored on return to user space because PKRU
+ * is switched eagerly in switch_to() and flush_thread()
+ */
+#define XFEATURE_MASK_USER_RESTORE	\
+	(XFEATURE_MASK_USER_SUPPORTED & ~XFEATURE_MASK_PKRU)
+
+/* Features which are dynamically enabled for a process on request */
+#define XFEATURE_MASK_USER_DYNAMIC	XFEATURE_MASK_XTILE_DATA
+
+/* Supervisor features which are enabled only in guest FPUs */
+#define XFEATURE_MASK_GUEST_SUPERVISOR	XFEATURE_MASK_CET_KERNEL
+
+/* All currently supported supervisor features */
+#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID | \
+					    XFEATURE_MASK_CET_USER | \
+					    XFEATURE_MASK_GUEST_SUPERVISOR)
+
+/*
+ * A supervisor state component may not always contain valuable information,
+ * and its size may be huge. Saving/restoring such supervisor state components
+ * at each context switch can cause high CPU and space overhead, which should
+ * be avoided. Such supervisor state components should only be saved/restored
+ * on demand. The on-demand supervisor features are set in this mask.
+ *
+ * Unlike the existing supported supervisor features, an independent supervisor
+ * feature does not allocate a buffer in task->fpu, and the corresponding
+ * supervisor state component cannot be saved/restored at each context switch.
+ *
+ * To support an independent supervisor feature, a developer should follow the
+ * dos and don'ts as below:
+ * - Do dynamically allocate a buffer for the supervisor state component.
+ * - Do manually invoke the XSAVES/XRSTORS instruction to save/restore the
+ *   state component to/from the buffer.
+ * - Don't set the bit corresponding to the independent supervisor feature in
+ *   IA32_XSS at run time, since it has been set at boot time.
+ */
+#define XFEATURE_MASK_INDEPENDENT (XFEATURE_MASK_LBR)
+
+/*
+ * Unsupported supervisor features. When a supervisor feature in this mask is
+ * supported in the future, move it to the supported supervisor feature mask.
+ */
+#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT)
+
+/* All supervisor states including supported and unsupported states. */
+#define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \
+				      XFEATURE_MASK_INDEPENDENT | \
+				      XFEATURE_MASK_SUPERVISOR_UNSUPPORTED)
+
+/*
+ * The feature mask required to restore FPU state:
+ * - All user states which are not eagerly switched in switch_to()/exec()
+ * - The suporvisor states
+ */
+#define XFEATURE_MASK_FPSTATE	(XFEATURE_MASK_USER_RESTORE | \
+				 XFEATURE_MASK_SUPERVISOR_SUPPORTED)
+
+/*
+ * Features in this mask have space allocated in the signal frame, but may not
+ * have that space initialized when the feature is in its init state.
+ */
+#define XFEATURE_MASK_SIGFRAME_INITOPT	(XFEATURE_MASK_XTILE | \
+					 XFEATURE_MASK_USER_DYNAMIC)
 
-extern u64 xfeatures_mask;
 extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
 
 extern void __init update_regset_xstate_info(unsigned int size,
 					     u64 xstate_mask);
 
-void fpu__xstate_clear_all_cpu_caps(void);
-void *get_xsave_addr(struct xregs_state *xsave, int xstate);
-const void *get_xsave_field_ptr(int xstate_field);
-int using_compacted_format(void);
-int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
-int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
-int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
-int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
-
-/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
-extern int validate_xstate_header(const struct xstate_header *hdr);
+int xfeature_size(int xfeature_nr);
+
+void xsaves(struct xregs_state *xsave, u64 mask);
+void xrstors(struct xregs_state *xsave, u64 mask);
+
+int xfd_enable_feature(u64 xfd_err);
+
+#ifdef CONFIG_X86_64
+DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic);
+#endif
+
+#ifdef CONFIG_X86_64
+DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic);
+
+static __always_inline __pure bool fpu_state_size_dynamic(void)
+{
+	return static_branch_unlikely(&__fpu_state_size_dynamic);
+}
+#else
+static __always_inline __pure bool fpu_state_size_dynamic(void)
+{
+	return false;
+}
+#endif
 
 #endif
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index 5cbce6fbb534..0ab65073c1cc 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -11,7 +11,7 @@
 
 #ifdef CONFIG_FRAME_POINTER
 
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
 
 .macro FRAME_BEGIN
 	push %_ASM_BP
@@ -22,7 +22,36 @@
 	pop %_ASM_BP
 .endm
 
-#else /* !__ASSEMBLY__ */
+#ifdef CONFIG_X86_64
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
+ * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
+ * is just setting the LSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
+ * the original rbp.
+ */
+.macro ENCODE_FRAME_POINTER ptregs_offset=0
+	leaq 1+\ptregs_offset(%rsp), %rbp
+.endm
+#else /* !CONFIG_X86_64 */
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
+ * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
+ * is just clearing the MSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
+ * original ebp.
+ */
+.macro ENCODE_FRAME_POINTER
+	mov %esp, %ebp
+	andl $0x7fffffff, %ebp
+.endm
+#endif /* CONFIG_X86_64 */
+
+#else /* !__ASSEMBLER__ */
 
 #define FRAME_BEGIN				\
 	"push %" _ASM_BP "\n"			\
@@ -30,12 +59,51 @@
 
 #define FRAME_END "pop %" _ASM_BP "\n"
 
-#endif /* __ASSEMBLY__ */
+#ifdef CONFIG_X86_64
+
+#define ENCODE_FRAME_POINTER			\
+	"lea 1(%rsp), %rbp\n\t"
+
+static inline unsigned long encode_frame_pointer(struct pt_regs *regs)
+{
+	return (unsigned long)regs + 1;
+}
+
+#else /* !CONFIG_X86_64 */
+
+#define ENCODE_FRAME_POINTER			\
+	"movl %esp, %ebp\n\t"			\
+	"andl $0x7fffffff, %ebp\n\t"
+
+static inline unsigned long encode_frame_pointer(struct pt_regs *regs)
+{
+	return (unsigned long)regs & 0x7fffffff;
+}
+
+#endif /* CONFIG_X86_64 */
+
+#endif /* __ASSEMBLER__ */
 
 #define FRAME_OFFSET __ASM_SEL(4, 8)
 
 #else /* !CONFIG_FRAME_POINTER */
 
+#ifdef __ASSEMBLER__
+
+.macro ENCODE_FRAME_POINTER ptregs_offset=0
+.endm
+
+#else /* !__ASSEMBLER__ */
+
+#define ENCODE_FRAME_POINTER
+
+static inline unsigned long encode_frame_pointer(struct pt_regs *regs)
+{
+	return 0;
+}
+
+#endif
+
 #define FRAME_BEGIN
 #define FRAME_END
 #define FRAME_OFFSET 0
diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
new file mode 100644
index 000000000000..2bb65677c079
--- /dev/null
+++ b/arch/x86/include/asm/fred.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Macros for Flexible Return and Event Delivery (FRED)
+ */
+
+#ifndef ASM_X86_FRED_H
+#define ASM_X86_FRED_H
+
+#include <linux/const.h>
+
+#include <asm/asm.h>
+#include <asm/msr.h>
+#include <asm/trapnr.h>
+
+/*
+ * FRED event return instruction opcodes for ERET{S,U}; supported in
+ * binutils >= 2.41.
+ */
+#define ERETS			_ASM_BYTES(0xf2,0x0f,0x01,0xca)
+#define ERETU			_ASM_BYTES(0xf3,0x0f,0x01,0xca)
+
+/*
+ * RSP is aligned to a 64-byte boundary before used to push a new stack frame
+ */
+#define FRED_STACK_FRAME_RSP_MASK	_AT(unsigned long, (~0x3f))
+
+/*
+ * Used for the return address for call emulation during code patching,
+ * and measured in 64-byte cache lines.
+ */
+#define FRED_CONFIG_REDZONE_AMOUNT	1
+#define FRED_CONFIG_REDZONE		(_AT(unsigned long, FRED_CONFIG_REDZONE_AMOUNT) << 6)
+#define FRED_CONFIG_INT_STKLVL(l)	(_AT(unsigned long, l) << 9)
+#define FRED_CONFIG_ENTRYPOINT(p)	_AT(unsigned long, (p))
+
+#ifndef __ASSEMBLER__
+
+#ifdef CONFIG_X86_FRED
+#include <linux/kernel.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/ptrace.h>
+
+struct fred_info {
+	/* Event data: CR2, DR6, ... */
+	unsigned long edata;
+	unsigned long resv;
+};
+
+/* Full format of the FRED stack frame */
+struct fred_frame {
+	struct pt_regs   regs;
+	struct fred_info info;
+};
+
+static __always_inline struct fred_info *fred_info(struct pt_regs *regs)
+{
+	return &container_of(regs, struct fred_frame, regs)->info;
+}
+
+static __always_inline unsigned long fred_event_data(struct pt_regs *regs)
+{
+	return fred_info(regs)->edata;
+}
+
+void asm_fred_entrypoint_user(void);
+void asm_fred_entrypoint_kernel(void);
+void asm_fred_entry_from_kvm(struct fred_ss);
+
+__visible void fred_entry_from_user(struct pt_regs *regs);
+__visible void fred_entry_from_kernel(struct pt_regs *regs);
+__visible void __fred_entry_from_kvm(struct pt_regs *regs);
+
+/* Can be called from noinstr code, thus __always_inline */
+static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int vector)
+{
+	struct fred_ss ss = {
+		.ss     =__KERNEL_DS,
+		.type   = type,
+		.vector = vector,
+		.nmi    = type == EVENT_TYPE_NMI,
+		.l      = 1,
+	};
+
+	asm_fred_entry_from_kvm(ss);
+}
+
+void cpu_init_fred_exceptions(void);
+void cpu_init_fred_rsps(void);
+void fred_complete_exception_setup(void);
+
+DECLARE_PER_CPU(unsigned long, fred_rsp0);
+
+static __always_inline void fred_sync_rsp0(unsigned long rsp0)
+{
+	__this_cpu_write(fred_rsp0, rsp0);
+}
+
+static __always_inline void fred_update_rsp0(void)
+{
+	unsigned long rsp0 = (unsigned long) task_stack_page(current) + THREAD_SIZE;
+
+	if (cpu_feature_enabled(X86_FEATURE_FRED) && (__this_cpu_read(fred_rsp0) != rsp0)) {
+		wrmsrns(MSR_IA32_FRED_RSP0, rsp0);
+		__this_cpu_write(fred_rsp0, rsp0);
+	}
+}
+#else /* CONFIG_X86_FRED */
+static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { return 0; }
+static inline void cpu_init_fred_exceptions(void) { }
+static inline void cpu_init_fred_rsps(void) { }
+static inline void fred_complete_exception_setup(void) { }
+static inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
+static inline void fred_sync_rsp0(unsigned long rsp0) { }
+static inline void fred_update_rsp0(void) { }
+#endif /* CONFIG_X86_FRED */
+#endif /* !__ASSEMBLER__ */
+
+#endif /* ASM_X86_FRED_H */
diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h
index bca4c743de77..ab2547f97c2c 100644
--- a/arch/x86/include/asm/fsgsbase.h
+++ b/arch/x86/include/asm/fsgsbase.h
@@ -2,11 +2,11 @@
 #ifndef _ASM_FSGSBASE_H
 #define _ASM_FSGSBASE_H
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #ifdef CONFIG_X86_64
 
-#include <asm/msr-index.h>
+#include <asm/msr.h>
 
 /*
  * Read/write a task's FSBASE or GSBASE. This returns the value that
@@ -19,38 +19,67 @@ extern unsigned long x86_gsbase_read_task(struct task_struct *task);
 extern void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase);
 extern void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase);
 
-/* Helper functions for reading/writing FS/GS base */
+/* Must be protected by X86_FEATURE_FSGSBASE check. */
 
-static inline unsigned long x86_fsbase_read_cpu(void)
+static __always_inline unsigned long rdfsbase(void)
 {
 	unsigned long fsbase;
 
-	rdmsrl(MSR_FS_BASE, fsbase);
+	asm volatile("rdfsbase %0" : "=r" (fsbase) :: "memory");
 
 	return fsbase;
 }
 
-static inline unsigned long x86_gsbase_read_cpu_inactive(void)
+static __always_inline unsigned long rdgsbase(void)
 {
 	unsigned long gsbase;
 
-	rdmsrl(MSR_KERNEL_GS_BASE, gsbase);
+	asm volatile("rdgsbase %0" : "=r" (gsbase) :: "memory");
 
 	return gsbase;
 }
 
-static inline void x86_fsbase_write_cpu(unsigned long fsbase)
+static __always_inline void wrfsbase(unsigned long fsbase)
 {
-	wrmsrl(MSR_FS_BASE, fsbase);
+	asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory");
 }
 
-static inline void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
+static __always_inline void wrgsbase(unsigned long gsbase)
 {
-	wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
+	asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory");
 }
 
+#include <asm/cpufeature.h>
+
+/* Helper functions for reading/writing FS/GS base */
+
+static inline unsigned long x86_fsbase_read_cpu(void)
+{
+	unsigned long fsbase;
+
+	if (boot_cpu_has(X86_FEATURE_FSGSBASE))
+		fsbase = rdfsbase();
+	else
+		rdmsrq(MSR_FS_BASE, fsbase);
+
+	return fsbase;
+}
+
+static inline void x86_fsbase_write_cpu(unsigned long fsbase)
+{
+	if (boot_cpu_has(X86_FEATURE_FSGSBASE))
+		wrfsbase(fsbase);
+	else
+		wrmsrq(MSR_FS_BASE, fsbase);
+}
+
+extern unsigned long x86_gsbase_read_cpu_inactive(void);
+extern void x86_gsbase_write_cpu_inactive(unsigned long gsbase);
+extern unsigned long x86_fsgsbase_read_task(struct task_struct *task,
+					    unsigned short selector);
+
 #endif /* CONFIG_X86_64 */
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* _ASM_FSGSBASE_H */
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index cf350639e76d..b08c95872eed 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -2,24 +2,27 @@
 #ifndef _ASM_X86_FTRACE_H
 #define _ASM_X86_FTRACE_H
 
+#include <asm/ptrace.h>
+
 #ifdef CONFIG_FUNCTION_TRACER
-#ifdef CC_USING_FENTRY
-# define MCOUNT_ADDR		((unsigned long)(__fentry__))
-#else
-# define MCOUNT_ADDR		((unsigned long)(mcount))
-# define HAVE_FUNCTION_GRAPH_FP_TEST
+#ifndef CC_USING_FENTRY
+# error Compiler does not support fentry?
 #endif
+# define MCOUNT_ADDR		((unsigned long)(__fentry__))
 #define MCOUNT_INSN_SIZE	5 /* sizeof mcount call */
 
+/* Ignore unused weak functions which will have non zero offsets */
+#ifdef CONFIG_HAVE_FENTRY
+# include <asm/ibt.h>
+/* Add offset for endbr64 if IBT enabled */
+# define FTRACE_MCOUNT_MAX_OFFSET	ENDBR_INSN_SIZE
+#endif
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 #define ARCH_SUPPORTS_FTRACE_OPS 1
 #endif
 
-#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
-
-#ifndef __ASSEMBLY__
-extern void mcount(void);
-extern atomic_t modifying_ftrace_code;
+#ifndef __ASSEMBLER__
 extern void __fentry__(void);
 
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
@@ -31,33 +34,109 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
 	return addr;
 }
 
+static inline unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip)
+{
+	if (is_endbr((void*)(fentry_ip - ENDBR_INSN_SIZE)))
+		fentry_ip -= ENDBR_INSN_SIZE;
+
+	return fentry_ip;
+}
+#define ftrace_get_symaddr(fentry_ip)	arch_ftrace_get_symaddr(fentry_ip)
+
+#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
+
+#include <linux/ftrace_regs.h>
+
+static __always_inline struct pt_regs *
+arch_ftrace_get_regs(struct ftrace_regs *fregs)
+{
+	/* Only when FL_SAVE_REGS is set, cs will be non zero */
+	if (!arch_ftrace_regs(fregs)->regs.cs)
+		return NULL;
+	return &arch_ftrace_regs(fregs)->regs;
+}
+
+#define arch_ftrace_partial_regs(regs) do {	\
+	regs->flags &= ~X86_EFLAGS_FIXED;	\
+	regs->cs = __KERNEL_CS;			\
+} while (0)
+
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do {	\
+		(_regs)->ip = arch_ftrace_regs(fregs)->regs.ip;		\
+		(_regs)->sp = arch_ftrace_regs(fregs)->regs.sp;		\
+		(_regs)->cs = __KERNEL_CS;		\
+		(_regs)->flags = 0;			\
+	} while (0)
+
+#define ftrace_regs_set_instruction_pointer(fregs, _ip)	\
+	do { arch_ftrace_regs(fregs)->regs.ip = (_ip); } while (0)
+
+
+static __always_inline unsigned long
+ftrace_regs_get_return_address(struct ftrace_regs *fregs)
+{
+	return *(unsigned long *)ftrace_regs_get_stack_pointer(fregs);
+}
+
+struct ftrace_ops;
+#define ftrace_graph_func ftrace_graph_func
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+		       struct ftrace_ops *op, struct ftrace_regs *fregs);
+#else
+#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+/*
+ * When a ftrace registered caller is tracing a function that is
+ * also set by a register_ftrace_direct() call, it needs to be
+ * differentiated in the ftrace_caller trampoline. To do this, we
+ * place the direct caller in the ORIG_AX part of pt_regs. This
+ * tells the ftrace_caller that there's a direct caller.
+ */
+static inline void
+__arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
+{
+	/* Emulate a call */
+	regs->orig_ax = addr;
+}
+#define arch_ftrace_set_direct_caller(fregs, addr) \
+	__arch_ftrace_set_direct_caller(&arch_ftrace_regs(fregs)->regs, addr)
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 struct dyn_arch_ftrace {
 	/* No extra data needed for x86 */
 };
 
-int ftrace_int3_handler(struct pt_regs *regs);
-
-#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR
-
 #endif /*  CONFIG_DYNAMIC_FTRACE */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #endif /* CONFIG_FUNCTION_TRACER */
 
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+
+void prepare_ftrace_return(unsigned long ip, unsigned long *parent,
+			   unsigned long frame_pointer);
+
+#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE)
+extern void set_ftrace_ops_ro(void);
+#else
+static inline void set_ftrace_ops_ro(void) { }
+#endif
 
 #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
 static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
 {
 	/*
 	 * Compare the symbol name with the system call name. Skip the
-	 * "__x64_sys", "__ia32_sys" or simple "sys" prefix.
+	 * "__x64_sys", "__ia32_sys", "__do_sys" or simple "sys" prefix.
 	 */
 	return !strcmp(sym + 3, name + 3) ||
 		(!strncmp(sym, "__x64_", 6) && !strcmp(sym + 9, name + 3)) ||
-		(!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3));
+		(!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3)) ||
+		(!strncmp(sym, "__do_sys", 8) && !strcmp(sym + 8, name + 3));
 }
 
 #ifndef COMPILE_OFFSETS
@@ -80,6 +159,6 @@ static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
 }
 #endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */
 #endif /* !COMPILE_OFFSETS */
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
 
 #endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index 13c83fe97988..fe5d9a10d900 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -12,76 +12,86 @@
 #include <asm/processor.h>
 #include <asm/smap.h>
 
-#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg)	\
-	asm volatile("\t" ASM_STAC "\n"				\
-		     "1:\t" insn "\n"				\
-		     "2:\t" ASM_CLAC "\n"			\
-		     "\t.section .fixup,\"ax\"\n"		\
-		     "3:\tmov\t%3, %1\n"			\
-		     "\tjmp\t2b\n"				\
-		     "\t.previous\n"				\
-		     _ASM_EXTABLE_UA(1b, 3b)			\
+#define unsafe_atomic_op1(insn, oval, uaddr, oparg, label)	\
+do {								\
+	int oldval = 0, ret;					\
+	asm volatile("1:\t" insn "\n"				\
+		     "2:\n"					\
+		     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %1) \
 		     : "=r" (oldval), "=r" (ret), "+m" (*uaddr)	\
-		     : "i" (-EFAULT), "0" (oparg), "1" (0))
+		     : "0" (oparg), "1" (0));	\
+	if (ret)						\
+		goto label;					\
+	*oval = oldval;						\
+} while(0)
 
-#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg)	\
-	asm volatile("\t" ASM_STAC "\n"				\
-		     "1:\tmovl	%2, %0\n"			\
-		     "\tmovl\t%0, %3\n"				\
+
+#define unsafe_atomic_op2(insn, oval, uaddr, oparg, label)	\
+do {								\
+	int oldval = 0, ret, tem;				\
+	asm volatile("1:\tmovl	%2, %0\n"			\
+		     "2:\tmovl\t%0, %3\n"			\
 		     "\t" insn "\n"				\
-		     "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"	\
-		     "\tjnz\t1b\n"				\
-		     "3:\t" ASM_CLAC "\n"			\
-		     "\t.section .fixup,\"ax\"\n"		\
-		     "4:\tmov\t%5, %1\n"			\
-		     "\tjmp\t3b\n"				\
-		     "\t.previous\n"				\
-		     _ASM_EXTABLE_UA(1b, 4b)			\
-		     _ASM_EXTABLE_UA(2b, 4b)			\
+		     "3:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"	\
+		     "\tjnz\t2b\n"				\
+		     "4:\n"					\
+		     _ASM_EXTABLE_TYPE_REG(1b, 4b, EX_TYPE_EFAULT_REG, %1) \
+		     _ASM_EXTABLE_TYPE_REG(3b, 4b, EX_TYPE_EFAULT_REG, %1) \
 		     : "=&a" (oldval), "=&r" (ret),		\
 		       "+m" (*uaddr), "=&r" (tem)		\
-		     : "r" (oparg), "i" (-EFAULT), "1" (0))
+		     : "r" (oparg), "1" (0));			\
+	if (ret)						\
+		goto label;					\
+	*oval = oldval;						\
+} while(0)
 
-static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
-		u32 __user *uaddr)
+static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+						       u32 __user *uaddr)
 {
-	int oldval = 0, ret, tem;
-
-	pagefault_disable();
-
-	switch (op) {
-	case FUTEX_OP_SET:
-		__futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
-		break;
-	case FUTEX_OP_ADD:
-		__futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval,
-				   uaddr, oparg);
-		break;
-	case FUTEX_OP_OR:
-		__futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg);
-		break;
-	case FUTEX_OP_ANDN:
-		__futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg);
-		break;
-	case FUTEX_OP_XOR:
-		__futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg);
-		break;
-	default:
-		ret = -ENOSYS;
+	scoped_user_rw_access(uaddr, Efault) {
+		switch (op) {
+		case FUTEX_OP_SET:
+			unsafe_atomic_op1("xchgl %0, %2", oval, uaddr, oparg, Efault);
+			break;
+		case FUTEX_OP_ADD:
+			unsafe_atomic_op1(LOCK_PREFIX "xaddl %0, %2", oval, uaddr, oparg, Efault);
+			break;
+		case FUTEX_OP_OR:
+			unsafe_atomic_op2("orl %4, %3", oval, uaddr, oparg, Efault);
+			break;
+		case FUTEX_OP_ANDN:
+			unsafe_atomic_op2("andl %4, %3", oval, uaddr, ~oparg, Efault);
+			break;
+		case FUTEX_OP_XOR:
+			unsafe_atomic_op2("xorl %4, %3", oval, uaddr, oparg, Efault);
+			break;
+		default:
+			return -ENOSYS;
+		}
 	}
-
-	pagefault_enable();
-
-	if (!ret)
-		*oval = oldval;
-
-	return ret;
+	return 0;
+Efault:
+	return -EFAULT;
 }
 
 static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 						u32 oldval, u32 newval)
 {
-	return user_atomic_cmpxchg_inatomic(uval, uaddr, oldval, newval);
+	int ret = 0;
+
+	scoped_user_rw_access(uaddr, Efault) {
+		asm_inline volatile("\n"
+				    "1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"
+				    "2:\n"
+				    _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %0)
+				    : "+r" (ret), "=a" (oldval), "+m" (*uaddr)
+				    : "r" (newval), "1" (oldval)
+				    : "memory");
+		*uval = oldval;
+	}
+	return ret;
+Efault:
+	return -EFAULT;
 }
 
 #endif
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
index 318556574345..5af8088a10df 100644
--- a/arch/x86/include/asm/gart.h
+++ b/arch/x86/include/asm/gart.h
@@ -38,7 +38,7 @@ extern int gart_iommu_aperture_disabled;
 extern void early_gart_iommu_check(void);
 extern int gart_iommu_init(void);
 extern void __init gart_parse_options(char *);
-extern int gart_iommu_hole_init(void);
+void gart_iommu_hole_init(void);
 
 #else
 #define gart_iommu_aperture            0
@@ -51,9 +51,8 @@ static inline void early_gart_iommu_check(void)
 static inline void gart_parse_options(char *options)
 {
 }
-static inline int gart_iommu_hole_init(void)
+static inline void gart_iommu_hole_init(void)
 {
-	return -ENODEV;
 }
 #endif
 
diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h
index 7cd73552a4e8..3c7267ef4a9e 100644
--- a/arch/x86/include/asm/geode.h
+++ b/arch/x86/include/asm/geode.h
@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * AMD Geode definitions
  * Copyright (C) 2006, Advanced Micro Devices, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
  */
 
 #ifndef _ASM_X86_GEODE_H
diff --git a/arch/x86/include/asm/gsseg.h b/arch/x86/include/asm/gsseg.h
new file mode 100644
index 000000000000..ab6a595cea70
--- /dev/null
+++ b/arch/x86/include/asm/gsseg.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_X86_GSSEG_H
+#define _ASM_X86_GSSEG_H
+
+#include <linux/types.h>
+
+#include <asm/asm.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative.h>
+#include <asm/processor.h>
+#include <asm/nops.h>
+
+#ifdef CONFIG_X86_64
+
+extern asmlinkage void asm_load_gs_index(u16 selector);
+
+/* Replace with "lkgs %di" once binutils support LKGS instruction */
+#define LKGS_DI _ASM_BYTES(0xf2,0x0f,0x00,0xf7)
+
+static inline void native_lkgs(unsigned int selector)
+{
+	u16 sel = selector;
+	asm_inline volatile("1: " LKGS_DI
+			    _ASM_EXTABLE_TYPE_REG(1b, 1b, EX_TYPE_ZERO_REG, %k[sel])
+			    : [sel] "+D" (sel));
+}
+
+static inline void native_load_gs_index(unsigned int selector)
+{
+	if (cpu_feature_enabled(X86_FEATURE_LKGS)) {
+		native_lkgs(selector);
+	} else {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		asm_load_gs_index(selector);
+		local_irq_restore(flags);
+	}
+}
+
+#endif /* CONFIG_X86_64 */
+
+static inline void __init lkgs_init(void)
+{
+#ifdef CONFIG_PARAVIRT_XXL
+#ifdef CONFIG_X86_64
+	if (cpu_feature_enabled(X86_FEATURE_LKGS))
+		pv_ops.cpu.load_gs_index = native_lkgs;
+#endif
+#endif
+}
+
+#ifndef CONFIG_PARAVIRT_XXL
+
+static inline void load_gs_index(unsigned int selector)
+{
+#ifdef CONFIG_X86_64
+	native_load_gs_index(selector);
+#else
+	loadsegment(gs, selector);
+#endif
+}
+
+#endif /* CONFIG_PARAVIRT_XXL */
+
+#endif /* _ASM_X86_GSSEG_H */
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index d9069bb26c7f..6b6d472baa0b 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -5,8 +5,7 @@
 #include <linux/threads.h>
 
 typedef struct {
-	u16	     __softirq_pending;
-#if IS_ENABLED(CONFIG_KVM_INTEL)
+#if IS_ENABLED(CONFIG_CPU_MITIGATIONS) && IS_ENABLED(CONFIG_KVM_INTEL)
 	u8	     kvm_cpu_l1tf_flush_l1d;
 #endif
 	unsigned int __nmi_count;	/* arch dependent */
@@ -15,7 +14,7 @@ typedef struct {
 	unsigned int irq_spurious_count;
 	unsigned int icr_read_retry_count;
 #endif
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
 	unsigned int kvm_posted_intr_ipis;
 	unsigned int kvm_posted_intr_wakeup_ipis;
 	unsigned int kvm_posted_intr_nested_ipis;
@@ -37,17 +36,23 @@ typedef struct {
 #ifdef CONFIG_X86_MCE_AMD
 	unsigned int irq_deferred_error_count;
 #endif
-#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
+#ifdef CONFIG_X86_HV_CALLBACK_VECTOR
 	unsigned int irq_hv_callback_count;
 #endif
 #if IS_ENABLED(CONFIG_HYPERV)
 	unsigned int irq_hv_reenlightenment_count;
 	unsigned int hyperv_stimer0_count;
 #endif
+#ifdef CONFIG_X86_POSTED_MSI
+	unsigned int posted_msi_notification_count;
+#endif
 } ____cacheline_aligned irq_cpustat_t;
 
 DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
 
+#ifdef CONFIG_X86_POSTED_MSI
+DECLARE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc);
+#endif
 #define __ARCH_IRQ_STAT
 
 #define inc_irq_stat(member)	this_cpu_inc(irq_stat.member)
@@ -60,24 +65,30 @@ extern u64 arch_irq_stat_cpu(unsigned int cpu);
 extern u64 arch_irq_stat(void);
 #define arch_irq_stat		arch_irq_stat
 
+DECLARE_PER_CPU_CACHE_HOT(u16, __softirq_pending);
+#define local_softirq_pending_ref       __softirq_pending
 
-#if IS_ENABLED(CONFIG_KVM_INTEL)
-static inline void kvm_set_cpu_l1tf_flush_l1d(void)
+#if IS_ENABLED(CONFIG_CPU_MITIGATIONS) && IS_ENABLED(CONFIG_KVM_INTEL)
+/*
+ * This function is called from noinstr interrupt contexts
+ * and must be inlined to not get instrumentation.
+ */
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void)
 {
 	__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
 }
 
-static inline void kvm_clear_cpu_l1tf_flush_l1d(void)
+static __always_inline void kvm_clear_cpu_l1tf_flush_l1d(void)
 {
 	__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 0);
 }
 
-static inline bool kvm_get_cpu_l1tf_flush_l1d(void)
+static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void)
 {
 	return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d);
 }
 #else /* !IS_ENABLED(CONFIG_KVM_INTEL) */
-static inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
 #endif /* IS_ENABLED(CONFIG_KVM_INTEL) */
 
 #endif /* _ASM_X86_HARDIRQ_H */
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index a8059930056d..585bdadba47d 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -23,10 +23,10 @@
 
 #include <linux/interrupt.h>
 #include <linux/threads.h>
-#include <asm/kmap_types.h>
 #include <asm/tlbflush.h>
 #include <asm/paravirt.h>
 #include <asm/fixmap.h>
+#include <asm/pgtable_areas.h>
 
 /* declarations for highmem.c */
 extern unsigned long highstart_pfn, highend_pfn;
@@ -58,22 +58,16 @@ extern unsigned long highstart_pfn, highend_pfn;
 #define PKMAP_NR(virt)  ((virt-PKMAP_BASE) >> PAGE_SHIFT)
 #define PKMAP_ADDR(nr)  (PKMAP_BASE + ((nr) << PAGE_SHIFT))
 
-extern void *kmap_high(struct page *page);
-extern void kunmap_high(struct page *page);
-
-void *kmap(struct page *page);
-void kunmap(struct page *page);
-
-void *kmap_atomic_prot(struct page *page, pgprot_t prot);
-void *kmap_atomic(struct page *page);
-void __kunmap_atomic(void *kvaddr);
-void *kmap_atomic_pfn(unsigned long pfn);
-void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
-
 #define flush_cache_kmaps()	do { } while (0)
 
-extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn,
-					unsigned long end_pfn);
+#define	arch_kmap_local_post_map(vaddr, pteval)		\
+	arch_flush_lazy_mmu_mode()
+
+#define	arch_kmap_local_post_unmap(vaddr)		\
+	do {						\
+		flush_tlb_one_kernel((vaddr));		\
+		arch_flush_lazy_mmu_mode();		\
+	} while (0)
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 67385d56d4f4..ab0c78855ecb 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -74,18 +74,6 @@ extern void hpet_disable(void);
 extern unsigned int hpet_readl(unsigned int a);
 extern void force_hpet_resume(void);
 
-struct irq_data;
-struct hpet_dev;
-struct irq_domain;
-
-extern void hpet_msi_unmask(struct irq_data *data);
-extern void hpet_msi_mask(struct irq_data *data);
-extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
-extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);
-extern struct irq_domain *hpet_create_irq_domain(int hpet_id);
-extern int hpet_assign_irq(struct irq_domain *domain,
-			   struct hpet_dev *dev, int dev_num);
-
 #ifdef CONFIG_HPET_EMULATE_RTC
 
 #include <linux/interrupt.h>
@@ -96,7 +84,6 @@ extern int hpet_set_rtc_irq_bit(unsigned long bit_mask);
 extern int hpet_set_alarm_time(unsigned char hrs, unsigned char min,
 			       unsigned char sec);
 extern int hpet_set_periodic_freq(unsigned long freq);
-extern int hpet_rtc_dropped_irq(void);
 extern int hpet_rtc_timer_init(void);
 extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id);
 extern int hpet_register_irq_handler(rtc_irq_handler handler);
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index 7469d321f072..1721b1aadeb1 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -7,18 +7,4 @@
 
 #define hugepages_supported() boot_cpu_has(X86_FEATURE_PSE)
 
-static inline int is_hugepage_only_range(struct mm_struct *mm,
-					 unsigned long addr,
-					 unsigned long len) {
-	return 0;
-}
-
-static inline void arch_clear_hugepage_flags(struct page *page)
-{
-}
-
-#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
-static inline bool gigantic_page_supported(void) { return true; }
-#endif
-
 #endif /* _ASM_X86_HUGETLB_H */
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index a1f0e90d0818..0bc931cd0698 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -44,10 +44,7 @@ struct arch_hw_breakpoint {
 /* Total number of available HW breakpoint registers */
 #define HBP_NUM 4
 
-static inline int hw_breakpoint_slots(int type)
-{
-	return HBP_NUM;
-}
+#define hw_breakpoint_slots(type) (HBP_NUM)
 
 struct perf_event_attr;
 struct perf_event;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 32e666e1231e..cbe19e669080 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -16,9 +16,7 @@
 
 #include <asm/irq_vectors.h>
 
-#define IRQ_MATRIX_BITS		NR_VECTORS
-
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/percpu.h>
 #include <linux/profile.h>
@@ -28,29 +26,7 @@
 #include <asm/irq.h>
 #include <asm/sections.h>
 
-/* Interrupt handlers registered during init_IRQ */
-extern asmlinkage void apic_timer_interrupt(void);
-extern asmlinkage void x86_platform_ipi(void);
-extern asmlinkage void kvm_posted_intr_ipi(void);
-extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
-extern asmlinkage void kvm_posted_intr_nested_ipi(void);
-extern asmlinkage void error_interrupt(void);
-extern asmlinkage void irq_work_interrupt(void);
-extern asmlinkage void uv_bau_message_intr1(void);
-
-extern asmlinkage void spurious_interrupt(void);
-extern asmlinkage void thermal_interrupt(void);
-extern asmlinkage void reschedule_interrupt(void);
-
-extern asmlinkage void irq_move_cleanup_interrupt(void);
-extern asmlinkage void reboot_interrupt(void);
-extern asmlinkage void threshold_interrupt(void);
-extern asmlinkage void deferred_error_interrupt(void);
-
-extern asmlinkage void call_function_interrupt(void);
-extern asmlinkage void call_function_single_interrupt(void);
-
-#ifdef	CONFIG_X86_LOCAL_APIC
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
 struct irq_data;
 struct pci_dev;
 struct msi_desc;
@@ -58,61 +34,54 @@ struct msi_desc;
 enum irq_alloc_type {
 	X86_IRQ_ALLOC_TYPE_IOAPIC = 1,
 	X86_IRQ_ALLOC_TYPE_HPET,
-	X86_IRQ_ALLOC_TYPE_MSI,
-	X86_IRQ_ALLOC_TYPE_MSIX,
+	X86_IRQ_ALLOC_TYPE_PCI_MSI,
+	X86_IRQ_ALLOC_TYPE_PCI_MSIX,
 	X86_IRQ_ALLOC_TYPE_DMAR,
+	X86_IRQ_ALLOC_TYPE_AMDVI,
 	X86_IRQ_ALLOC_TYPE_UV,
 };
 
+struct ioapic_alloc_info {
+	int		pin;
+	int		node;
+	u32		is_level	: 1;
+	u32		active_low	: 1;
+	u32		valid		: 1;
+};
+
+struct uv_alloc_info {
+	int		limit;
+	int		blade;
+	unsigned long	offset;
+	char		*name;
+
+};
+
+/**
+ * irq_alloc_info - X86 specific interrupt allocation info
+ * @type:	X86 specific allocation type
+ * @flags:	Flags for allocation tweaks
+ * @devid:	Device ID for allocations
+ * @hwirq:	Associated hw interrupt number in the domain
+ * @mask:	CPU mask for vector allocation
+ * @desc:	Pointer to msi descriptor
+ * @data:	Allocation specific data
+ *
+ * @ioapic:	IOAPIC specific allocation data
+ * @uv:		UV specific allocation data
+*/
 struct irq_alloc_info {
 	enum irq_alloc_type	type;
 	u32			flags;
-	const struct cpumask	*mask;	/* CPU mask for vector allocation */
+	u32			devid;
+	irq_hw_number_t		hwirq;
+	const struct cpumask	*mask;
+	struct msi_desc		*desc;
+	void			*data;
+
 	union {
-		int		unused;
-#ifdef	CONFIG_HPET_TIMER
-		struct {
-			int		hpet_id;
-			int		hpet_index;
-			void		*hpet_data;
-		};
-#endif
-#ifdef	CONFIG_PCI_MSI
-		struct {
-			struct pci_dev	*msi_dev;
-			irq_hw_number_t	msi_hwirq;
-		};
-#endif
-#ifdef	CONFIG_X86_IO_APIC
-		struct {
-			int		ioapic_id;
-			int		ioapic_pin;
-			int		ioapic_node;
-			u32		ioapic_trigger : 1;
-			u32		ioapic_polarity : 1;
-			u32		ioapic_valid : 1;
-			struct IO_APIC_route_entry *ioapic_entry;
-		};
-#endif
-#ifdef	CONFIG_DMAR_TABLE
-		struct {
-			int		dmar_id;
-			void		*dmar_data;
-		};
-#endif
-#ifdef	CONFIG_X86_UV
-		struct {
-			int		uv_limit;
-			int		uv_blade;
-			unsigned long	uv_offset;
-			char		*uv_name;
-		};
-#endif
-#if IS_ENABLED(CONFIG_VMD)
-		struct {
-			struct msi_desc *desc;
-		};
-#endif
+		struct ioapic_alloc_info	ioapic;
+		struct uv_alloc_info		uv;
 	};
 };
 
@@ -123,21 +92,23 @@ struct irq_cfg {
 
 extern struct irq_cfg *irq_cfg(unsigned int irq);
 extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data);
-extern void lock_vector_lock(void);
-extern void unlock_vector_lock(void);
 #ifdef CONFIG_SMP
-extern void send_cleanup_vector(struct irq_cfg *);
+extern void vector_schedule_cleanup(struct irq_cfg *);
 extern void irq_complete_move(struct irq_cfg *cfg);
 #else
-static inline void send_cleanup_vector(struct irq_cfg *c) { }
+static inline void vector_schedule_cleanup(struct irq_cfg *c) { }
 static inline void irq_complete_move(struct irq_cfg *c) { }
 #endif
-
 extern void apic_ack_edge(struct irq_data *data);
-#else	/*  CONFIG_X86_LOCAL_APIC */
+#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
+
+#ifdef CONFIG_X86_LOCAL_APIC
+extern void lock_vector_lock(void);
+extern void unlock_vector_lock(void);
+#else
 static inline void lock_vector_lock(void) {}
 static inline void unlock_vector_lock(void) {}
-#endif	/* CONFIG_X86_LOCAL_APIC */
+#endif
 
 /* Statistics */
 extern atomic_t irq_err_count;
@@ -150,12 +121,15 @@ extern char irq_entries_start[];
 #define trace_irq_entries_start irq_entries_start
 #endif
 
+extern char spurious_entries_start[];
+
 #define VECTOR_UNUSED		NULL
-#define VECTOR_RETRIGGERED	((void *)~0UL)
+#define VECTOR_SHUTDOWN		((void *)-1L)
+#define VECTOR_RETRIGGERED	((void *)-2L)
 
 typedef struct irq_desc* vector_irq_t[NR_VECTORS];
 DECLARE_PER_CPU(vector_irq_t, vector_irq);
 
-#endif /* !ASSEMBLY_ */
+#endif /* !__ASSEMBLER__ */
 
 #endif /* _ASM_X86_HW_IRQ_H */
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
deleted file mode 100644
index 705dafc2d11a..000000000000
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ /dev/null
@@ -1,869 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-
-/*
- * This file contains definitions from Hyper-V Hypervisor Top-Level Functional
- * Specification (TLFS):
- * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs
- */
-
-#ifndef _ASM_X86_HYPERV_TLFS_H
-#define _ASM_X86_HYPERV_TLFS_H
-
-#include <linux/types.h>
-#include <asm/page.h>
-
-/*
- * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
- * is set by CPUID(HvCpuIdFunctionVersionAndFeatures).
- */
-#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS	0x40000000
-#define HYPERV_CPUID_INTERFACE			0x40000001
-#define HYPERV_CPUID_VERSION			0x40000002
-#define HYPERV_CPUID_FEATURES			0x40000003
-#define HYPERV_CPUID_ENLIGHTMENT_INFO		0x40000004
-#define HYPERV_CPUID_IMPLEMENT_LIMITS		0x40000005
-#define HYPERV_CPUID_NESTED_FEATURES		0x4000000A
-
-#define HYPERV_HYPERVISOR_PRESENT_BIT		0x80000000
-#define HYPERV_CPUID_MIN			0x40000005
-#define HYPERV_CPUID_MAX			0x4000ffff
-
-/*
- * Feature identification. EAX indicates which features are available
- * to the partition based upon the current partition privileges.
- * These are HYPERV_CPUID_FEATURES.EAX bits.
- */
-
-/* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */
-#define HV_X64_MSR_VP_RUNTIME_AVAILABLE		BIT(0)
-/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
-#define HV_MSR_TIME_REF_COUNT_AVAILABLE		BIT(1)
-/*
- * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
- * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
- */
-#define HV_X64_MSR_SYNIC_AVAILABLE		BIT(2)
-/*
- * Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through
- * HV_X64_MSR_STIMER3_COUNT) available
- */
-#define HV_MSR_SYNTIMER_AVAILABLE		BIT(3)
-/*
- * APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR)
- * are available
- */
-#define HV_X64_MSR_APIC_ACCESS_AVAILABLE	BIT(4)
-/* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/
-#define HV_X64_MSR_HYPERCALL_AVAILABLE		BIT(5)
-/* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/
-#define HV_X64_MSR_VP_INDEX_AVAILABLE		BIT(6)
-/* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/
-#define HV_X64_MSR_RESET_AVAILABLE		BIT(7)
-/*
- * Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE,
- * HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE,
- * HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available
- */
-#define HV_X64_MSR_STAT_PAGES_AVAILABLE		BIT(8)
-/* Partition reference TSC MSR is available */
-#define HV_MSR_REFERENCE_TSC_AVAILABLE		BIT(9)
-/* Partition Guest IDLE MSR is available */
-#define HV_X64_MSR_GUEST_IDLE_AVAILABLE		BIT(10)
-/*
- * There is a single feature flag that signifies if the partition has access
- * to MSRs with local APIC and TSC frequencies.
- */
-#define HV_X64_ACCESS_FREQUENCY_MSRS		BIT(11)
-/* AccessReenlightenmentControls privilege */
-#define HV_X64_ACCESS_REENLIGHTENMENT		BIT(13)
-
-/*
- * Feature identification: indicates which flags were specified at partition
- * creation. The format is the same as the partition creation flag structure
- * defined in section Partition Creation Flags.
- * These are HYPERV_CPUID_FEATURES.EBX bits.
- */
-#define HV_X64_CREATE_PARTITIONS		BIT(0)
-#define HV_X64_ACCESS_PARTITION_ID		BIT(1)
-#define HV_X64_ACCESS_MEMORY_POOL		BIT(2)
-#define HV_X64_ADJUST_MESSAGE_BUFFERS		BIT(3)
-#define HV_X64_POST_MESSAGES			BIT(4)
-#define HV_X64_SIGNAL_EVENTS			BIT(5)
-#define HV_X64_CREATE_PORT			BIT(6)
-#define HV_X64_CONNECT_PORT			BIT(7)
-#define HV_X64_ACCESS_STATS			BIT(8)
-#define HV_X64_DEBUGGING			BIT(11)
-#define HV_X64_CPU_POWER_MANAGEMENT		BIT(12)
-
-/*
- * Feature identification. EDX indicates which miscellaneous features
- * are available to the partition.
- * These are HYPERV_CPUID_FEATURES.EDX bits.
- */
-/* The MWAIT instruction is available (per section MONITOR / MWAIT) */
-#define HV_X64_MWAIT_AVAILABLE				BIT(0)
-/* Guest debugging support is available */
-#define HV_X64_GUEST_DEBUGGING_AVAILABLE		BIT(1)
-/* Performance Monitor support is available*/
-#define HV_X64_PERF_MONITOR_AVAILABLE			BIT(2)
-/* Support for physical CPU dynamic partitioning events is available*/
-#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE	BIT(3)
-/*
- * Support for passing hypercall input parameter block via XMM
- * registers is available
- */
-#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE		BIT(4)
-/* Support for a virtual guest idle state is available */
-#define HV_X64_GUEST_IDLE_STATE_AVAILABLE		BIT(5)
-/* Frequency MSRs available */
-#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE		BIT(8)
-/* Crash MSR available */
-#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE		BIT(10)
-/* stimer Direct Mode is available */
-#define HV_STIMER_DIRECT_MODE_AVAILABLE			BIT(19)
-
-/*
- * Implementation recommendations. Indicates which behaviors the hypervisor
- * recommends the OS implement for optimal performance.
- * These are HYPERV_CPUID_ENLIGHTMENT_INFO.EAX bits.
- */
-/*
- * Recommend using hypercall for address space switches rather
- * than MOV to CR3 instruction
- */
-#define HV_X64_AS_SWITCH_RECOMMENDED			BIT(0)
-/* Recommend using hypercall for local TLB flushes rather
- * than INVLPG or MOV to CR3 instructions */
-#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED		BIT(1)
-/*
- * Recommend using hypercall for remote TLB flushes rather
- * than inter-processor interrupts
- */
-#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED		BIT(2)
-/*
- * Recommend using MSRs for accessing APIC registers
- * EOI, ICR and TPR rather than their memory-mapped counterparts
- */
-#define HV_X64_APIC_ACCESS_RECOMMENDED			BIT(3)
-/* Recommend using the hypervisor-provided MSR to initiate a system RESET */
-#define HV_X64_SYSTEM_RESET_RECOMMENDED			BIT(4)
-/*
- * Recommend using relaxed timing for this partition. If used,
- * the VM should disable any watchdog timeouts that rely on the
- * timely delivery of external interrupts
- */
-#define HV_X64_RELAXED_TIMING_RECOMMENDED		BIT(5)
-
-/*
- * Recommend not using Auto End-Of-Interrupt feature
- */
-#define HV_DEPRECATING_AEOI_RECOMMENDED			BIT(9)
-
-/*
- * Recommend using cluster IPI hypercalls.
- */
-#define HV_X64_CLUSTER_IPI_RECOMMENDED			BIT(10)
-
-/* Recommend using the newer ExProcessorMasks interface */
-#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED		BIT(11)
-
-/* Recommend using enlightened VMCS */
-#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED		BIT(14)
-
-/* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */
-#define HV_X64_NESTED_GUEST_MAPPING_FLUSH		BIT(18)
-#define HV_X64_NESTED_MSR_BITMAP			BIT(19)
-
-/* Hyper-V specific model specific registers (MSRs) */
-
-/* MSR used to identify the guest OS. */
-#define HV_X64_MSR_GUEST_OS_ID			0x40000000
-
-/* MSR used to setup pages used to communicate with the hypervisor. */
-#define HV_X64_MSR_HYPERCALL			0x40000001
-
-/* MSR used to provide vcpu index */
-#define HV_X64_MSR_VP_INDEX			0x40000002
-
-/* MSR used to reset the guest OS. */
-#define HV_X64_MSR_RESET			0x40000003
-
-/* MSR used to provide vcpu runtime in 100ns units */
-#define HV_X64_MSR_VP_RUNTIME			0x40000010
-
-/* MSR used to read the per-partition time reference counter */
-#define HV_X64_MSR_TIME_REF_COUNT		0x40000020
-
-/* A partition's reference time stamp counter (TSC) page */
-#define HV_X64_MSR_REFERENCE_TSC		0x40000021
-
-/* MSR used to retrieve the TSC frequency */
-#define HV_X64_MSR_TSC_FREQUENCY		0x40000022
-
-/* MSR used to retrieve the local APIC timer frequency */
-#define HV_X64_MSR_APIC_FREQUENCY		0x40000023
-
-/* Define the virtual APIC registers */
-#define HV_X64_MSR_EOI				0x40000070
-#define HV_X64_MSR_ICR				0x40000071
-#define HV_X64_MSR_TPR				0x40000072
-#define HV_X64_MSR_VP_ASSIST_PAGE		0x40000073
-
-/* Define synthetic interrupt controller model specific registers. */
-#define HV_X64_MSR_SCONTROL			0x40000080
-#define HV_X64_MSR_SVERSION			0x40000081
-#define HV_X64_MSR_SIEFP			0x40000082
-#define HV_X64_MSR_SIMP				0x40000083
-#define HV_X64_MSR_EOM				0x40000084
-#define HV_X64_MSR_SINT0			0x40000090
-#define HV_X64_MSR_SINT1			0x40000091
-#define HV_X64_MSR_SINT2			0x40000092
-#define HV_X64_MSR_SINT3			0x40000093
-#define HV_X64_MSR_SINT4			0x40000094
-#define HV_X64_MSR_SINT5			0x40000095
-#define HV_X64_MSR_SINT6			0x40000096
-#define HV_X64_MSR_SINT7			0x40000097
-#define HV_X64_MSR_SINT8			0x40000098
-#define HV_X64_MSR_SINT9			0x40000099
-#define HV_X64_MSR_SINT10			0x4000009A
-#define HV_X64_MSR_SINT11			0x4000009B
-#define HV_X64_MSR_SINT12			0x4000009C
-#define HV_X64_MSR_SINT13			0x4000009D
-#define HV_X64_MSR_SINT14			0x4000009E
-#define HV_X64_MSR_SINT15			0x4000009F
-
-/*
- * Synthetic Timer MSRs. Four timers per vcpu.
- */
-#define HV_X64_MSR_STIMER0_CONFIG		0x400000B0
-#define HV_X64_MSR_STIMER0_COUNT		0x400000B1
-#define HV_X64_MSR_STIMER1_CONFIG		0x400000B2
-#define HV_X64_MSR_STIMER1_COUNT		0x400000B3
-#define HV_X64_MSR_STIMER2_CONFIG		0x400000B4
-#define HV_X64_MSR_STIMER2_COUNT		0x400000B5
-#define HV_X64_MSR_STIMER3_CONFIG		0x400000B6
-#define HV_X64_MSR_STIMER3_COUNT		0x400000B7
-
-/* Hyper-V guest idle MSR */
-#define HV_X64_MSR_GUEST_IDLE			0x400000F0
-
-/* Hyper-V guest crash notification MSR's */
-#define HV_X64_MSR_CRASH_P0			0x40000100
-#define HV_X64_MSR_CRASH_P1			0x40000101
-#define HV_X64_MSR_CRASH_P2			0x40000102
-#define HV_X64_MSR_CRASH_P3			0x40000103
-#define HV_X64_MSR_CRASH_P4			0x40000104
-#define HV_X64_MSR_CRASH_CTL			0x40000105
-
-/* TSC emulation after migration */
-#define HV_X64_MSR_REENLIGHTENMENT_CONTROL	0x40000106
-#define HV_X64_MSR_TSC_EMULATION_CONTROL	0x40000107
-#define HV_X64_MSR_TSC_EMULATION_STATUS		0x40000108
-
-/*
- * Declare the MSR used to setup pages used to communicate with the hypervisor.
- */
-union hv_x64_msr_hypercall_contents {
-	u64 as_uint64;
-	struct {
-		u64 enable:1;
-		u64 reserved:11;
-		u64 guest_physical_address:52;
-	} __packed;
-};
-
-/*
- * TSC page layout.
- */
-struct ms_hyperv_tsc_page {
-	volatile u32 tsc_sequence;
-	u32 reserved1;
-	volatile u64 tsc_scale;
-	volatile s64 tsc_offset;
-	u64 reserved2[509];
-}  __packed;
-
-/*
- * The guest OS needs to register the guest ID with the hypervisor.
- * The guest ID is a 64 bit entity and the structure of this ID is
- * specified in the Hyper-V specification:
- *
- * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx
- *
- * While the current guideline does not specify how Linux guest ID(s)
- * need to be generated, our plan is to publish the guidelines for
- * Linux and other guest operating systems that currently are hosted
- * on Hyper-V. The implementation here conforms to this yet
- * unpublished guidelines.
- *
- *
- * Bit(s)
- * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
- * 62:56 - Os Type; Linux is 0x100
- * 55:48 - Distro specific identification
- * 47:16 - Linux kernel version number
- * 15:0  - Distro specific identification
- *
- *
- */
-
-#define HV_LINUX_VENDOR_ID              0x8100
-
-struct hv_reenlightenment_control {
-	__u64 vector:8;
-	__u64 reserved1:8;
-	__u64 enabled:1;
-	__u64 reserved2:15;
-	__u64 target_vp:32;
-}  __packed;
-
-struct hv_tsc_emulation_control {
-	__u64 enabled:1;
-	__u64 reserved:63;
-} __packed;
-
-struct hv_tsc_emulation_status {
-	__u64 inprogress:1;
-	__u64 reserved:63;
-} __packed;
-
-#define HV_X64_MSR_HYPERCALL_ENABLE		0x00000001
-#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT	12
-#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK	\
-		(~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
-
-/*
- * Crash notification (HV_X64_MSR_CRASH_CTL) flags.
- */
-#define HV_CRASH_CTL_CRASH_NOTIFY_MSG		BIT_ULL(62)
-#define HV_CRASH_CTL_CRASH_NOTIFY		BIT_ULL(63)
-#define HV_X64_MSR_CRASH_PARAMS		\
-		(1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
-
-#define HV_IPI_LOW_VECTOR	0x10
-#define HV_IPI_HIGH_VECTOR	0xff
-
-/* Declare the various hypercall operations. */
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE	0x0002
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST	0x0003
-#define HVCALL_NOTIFY_LONG_SPIN_WAIT		0x0008
-#define HVCALL_SEND_IPI				0x000b
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX  0x0013
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX   0x0014
-#define HVCALL_SEND_IPI_EX			0x0015
-#define HVCALL_POST_MESSAGE			0x005c
-#define HVCALL_SIGNAL_EVENT			0x005d
-#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
-#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
-
-#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE	0x00000001
-#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT	12
-#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK	\
-		(~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
-
-/* Hyper-V Enlightened VMCS version mask in nested features CPUID */
-#define HV_X64_ENLIGHTENED_VMCS_VERSION		0xff
-
-#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
-#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
-
-#define HV_PROCESSOR_POWER_STATE_C0		0
-#define HV_PROCESSOR_POWER_STATE_C1		1
-#define HV_PROCESSOR_POWER_STATE_C2		2
-#define HV_PROCESSOR_POWER_STATE_C3		3
-
-#define HV_FLUSH_ALL_PROCESSORS			BIT(0)
-#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES	BIT(1)
-#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY	BIT(2)
-#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT	BIT(3)
-
-enum HV_GENERIC_SET_FORMAT {
-	HV_GENERIC_SET_SPARSE_4K,
-	HV_GENERIC_SET_ALL,
-};
-
-#define HV_HYPERCALL_RESULT_MASK	GENMASK_ULL(15, 0)
-#define HV_HYPERCALL_FAST_BIT		BIT(16)
-#define HV_HYPERCALL_VARHEAD_OFFSET	17
-#define HV_HYPERCALL_REP_COMP_OFFSET	32
-#define HV_HYPERCALL_REP_COMP_MASK	GENMASK_ULL(43, 32)
-#define HV_HYPERCALL_REP_START_OFFSET	48
-#define HV_HYPERCALL_REP_START_MASK	GENMASK_ULL(59, 48)
-
-/* hypercall status code */
-#define HV_STATUS_SUCCESS			0
-#define HV_STATUS_INVALID_HYPERCALL_CODE	2
-#define HV_STATUS_INVALID_HYPERCALL_INPUT	3
-#define HV_STATUS_INVALID_ALIGNMENT		4
-#define HV_STATUS_INVALID_PARAMETER		5
-#define HV_STATUS_INSUFFICIENT_MEMORY		11
-#define HV_STATUS_INVALID_PORT_ID		17
-#define HV_STATUS_INVALID_CONNECTION_ID		18
-#define HV_STATUS_INSUFFICIENT_BUFFERS		19
-
-typedef struct _HV_REFERENCE_TSC_PAGE {
-	__u32 tsc_sequence;
-	__u32 res1;
-	__u64 tsc_scale;
-	__s64 tsc_offset;
-}  __packed HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
-
-/* Define the number of synthetic interrupt sources. */
-#define HV_SYNIC_SINT_COUNT		(16)
-/* Define the expected SynIC version. */
-#define HV_SYNIC_VERSION_1		(0x1)
-/* Valid SynIC vectors are 16-255. */
-#define HV_SYNIC_FIRST_VALID_VECTOR	(16)
-
-#define HV_SYNIC_CONTROL_ENABLE		(1ULL << 0)
-#define HV_SYNIC_SIMP_ENABLE		(1ULL << 0)
-#define HV_SYNIC_SIEFP_ENABLE		(1ULL << 0)
-#define HV_SYNIC_SINT_MASKED		(1ULL << 16)
-#define HV_SYNIC_SINT_AUTO_EOI		(1ULL << 17)
-#define HV_SYNIC_SINT_VECTOR_MASK	(0xFF)
-
-#define HV_SYNIC_STIMER_COUNT		(4)
-
-/* Define synthetic interrupt controller message constants. */
-#define HV_MESSAGE_SIZE			(256)
-#define HV_MESSAGE_PAYLOAD_BYTE_COUNT	(240)
-#define HV_MESSAGE_PAYLOAD_QWORD_COUNT	(30)
-
-/* Define hypervisor message types. */
-enum hv_message_type {
-	HVMSG_NONE			= 0x00000000,
-
-	/* Memory access messages. */
-	HVMSG_UNMAPPED_GPA		= 0x80000000,
-	HVMSG_GPA_INTERCEPT		= 0x80000001,
-
-	/* Timer notification messages. */
-	HVMSG_TIMER_EXPIRED			= 0x80000010,
-
-	/* Error messages. */
-	HVMSG_INVALID_VP_REGISTER_VALUE	= 0x80000020,
-	HVMSG_UNRECOVERABLE_EXCEPTION	= 0x80000021,
-	HVMSG_UNSUPPORTED_FEATURE		= 0x80000022,
-
-	/* Trace buffer complete messages. */
-	HVMSG_EVENTLOG_BUFFERCOMPLETE	= 0x80000040,
-
-	/* Platform-specific processor intercept messages. */
-	HVMSG_X64_IOPORT_INTERCEPT		= 0x80010000,
-	HVMSG_X64_MSR_INTERCEPT		= 0x80010001,
-	HVMSG_X64_CPUID_INTERCEPT		= 0x80010002,
-	HVMSG_X64_EXCEPTION_INTERCEPT	= 0x80010003,
-	HVMSG_X64_APIC_EOI			= 0x80010004,
-	HVMSG_X64_LEGACY_FP_ERROR		= 0x80010005
-};
-
-/* Define synthetic interrupt controller message flags. */
-union hv_message_flags {
-	__u8 asu8;
-	struct {
-		__u8 msg_pending:1;
-		__u8 reserved:7;
-	} __packed;
-};
-
-/* Define port identifier type. */
-union hv_port_id {
-	__u32 asu32;
-	struct {
-		__u32 id:24;
-		__u32 reserved:8;
-	} __packed u;
-};
-
-/* Define synthetic interrupt controller message header. */
-struct hv_message_header {
-	__u32 message_type;
-	__u8 payload_size;
-	union hv_message_flags message_flags;
-	__u8 reserved[2];
-	union {
-		__u64 sender;
-		union hv_port_id port;
-	};
-} __packed;
-
-/* Define synthetic interrupt controller message format. */
-struct hv_message {
-	struct hv_message_header header;
-	union {
-		__u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
-	} u;
-} __packed;
-
-/* Define the synthetic interrupt message page layout. */
-struct hv_message_page {
-	struct hv_message sint_message[HV_SYNIC_SINT_COUNT];
-} __packed;
-
-/* Define timer message payload structure. */
-struct hv_timer_message_payload {
-	__u32 timer_index;
-	__u32 reserved;
-	__u64 expiration_time;	/* When the timer expired */
-	__u64 delivery_time;	/* When the message was delivered */
-} __packed;
-
-/* Define virtual processor assist page structure. */
-struct hv_vp_assist_page {
-	__u32 apic_assist;
-	__u32 reserved;
-	__u64 vtl_control[2];
-	__u64 nested_enlightenments_control[2];
-	__u32 enlighten_vmentry;
-	__u32 padding;
-	__u64 current_nested_vmcs;
-} __packed;
-
-struct hv_enlightened_vmcs {
-	u32 revision_id;
-	u32 abort;
-
-	u16 host_es_selector;
-	u16 host_cs_selector;
-	u16 host_ss_selector;
-	u16 host_ds_selector;
-	u16 host_fs_selector;
-	u16 host_gs_selector;
-	u16 host_tr_selector;
-
-	u16 padding16_1;
-
-	u64 host_ia32_pat;
-	u64 host_ia32_efer;
-
-	u64 host_cr0;
-	u64 host_cr3;
-	u64 host_cr4;
-
-	u64 host_ia32_sysenter_esp;
-	u64 host_ia32_sysenter_eip;
-	u64 host_rip;
-	u32 host_ia32_sysenter_cs;
-
-	u32 pin_based_vm_exec_control;
-	u32 vm_exit_controls;
-	u32 secondary_vm_exec_control;
-
-	u64 io_bitmap_a;
-	u64 io_bitmap_b;
-	u64 msr_bitmap;
-
-	u16 guest_es_selector;
-	u16 guest_cs_selector;
-	u16 guest_ss_selector;
-	u16 guest_ds_selector;
-	u16 guest_fs_selector;
-	u16 guest_gs_selector;
-	u16 guest_ldtr_selector;
-	u16 guest_tr_selector;
-
-	u32 guest_es_limit;
-	u32 guest_cs_limit;
-	u32 guest_ss_limit;
-	u32 guest_ds_limit;
-	u32 guest_fs_limit;
-	u32 guest_gs_limit;
-	u32 guest_ldtr_limit;
-	u32 guest_tr_limit;
-	u32 guest_gdtr_limit;
-	u32 guest_idtr_limit;
-
-	u32 guest_es_ar_bytes;
-	u32 guest_cs_ar_bytes;
-	u32 guest_ss_ar_bytes;
-	u32 guest_ds_ar_bytes;
-	u32 guest_fs_ar_bytes;
-	u32 guest_gs_ar_bytes;
-	u32 guest_ldtr_ar_bytes;
-	u32 guest_tr_ar_bytes;
-
-	u64 guest_es_base;
-	u64 guest_cs_base;
-	u64 guest_ss_base;
-	u64 guest_ds_base;
-	u64 guest_fs_base;
-	u64 guest_gs_base;
-	u64 guest_ldtr_base;
-	u64 guest_tr_base;
-	u64 guest_gdtr_base;
-	u64 guest_idtr_base;
-
-	u64 padding64_1[3];
-
-	u64 vm_exit_msr_store_addr;
-	u64 vm_exit_msr_load_addr;
-	u64 vm_entry_msr_load_addr;
-
-	u64 cr3_target_value0;
-	u64 cr3_target_value1;
-	u64 cr3_target_value2;
-	u64 cr3_target_value3;
-
-	u32 page_fault_error_code_mask;
-	u32 page_fault_error_code_match;
-
-	u32 cr3_target_count;
-	u32 vm_exit_msr_store_count;
-	u32 vm_exit_msr_load_count;
-	u32 vm_entry_msr_load_count;
-
-	u64 tsc_offset;
-	u64 virtual_apic_page_addr;
-	u64 vmcs_link_pointer;
-
-	u64 guest_ia32_debugctl;
-	u64 guest_ia32_pat;
-	u64 guest_ia32_efer;
-
-	u64 guest_pdptr0;
-	u64 guest_pdptr1;
-	u64 guest_pdptr2;
-	u64 guest_pdptr3;
-
-	u64 guest_pending_dbg_exceptions;
-	u64 guest_sysenter_esp;
-	u64 guest_sysenter_eip;
-
-	u32 guest_activity_state;
-	u32 guest_sysenter_cs;
-
-	u64 cr0_guest_host_mask;
-	u64 cr4_guest_host_mask;
-	u64 cr0_read_shadow;
-	u64 cr4_read_shadow;
-	u64 guest_cr0;
-	u64 guest_cr3;
-	u64 guest_cr4;
-	u64 guest_dr7;
-
-	u64 host_fs_base;
-	u64 host_gs_base;
-	u64 host_tr_base;
-	u64 host_gdtr_base;
-	u64 host_idtr_base;
-	u64 host_rsp;
-
-	u64 ept_pointer;
-
-	u16 virtual_processor_id;
-	u16 padding16_2[3];
-
-	u64 padding64_2[5];
-	u64 guest_physical_address;
-
-	u32 vm_instruction_error;
-	u32 vm_exit_reason;
-	u32 vm_exit_intr_info;
-	u32 vm_exit_intr_error_code;
-	u32 idt_vectoring_info_field;
-	u32 idt_vectoring_error_code;
-	u32 vm_exit_instruction_len;
-	u32 vmx_instruction_info;
-
-	u64 exit_qualification;
-	u64 exit_io_instruction_ecx;
-	u64 exit_io_instruction_esi;
-	u64 exit_io_instruction_edi;
-	u64 exit_io_instruction_eip;
-
-	u64 guest_linear_address;
-	u64 guest_rsp;
-	u64 guest_rflags;
-
-	u32 guest_interruptibility_info;
-	u32 cpu_based_vm_exec_control;
-	u32 exception_bitmap;
-	u32 vm_entry_controls;
-	u32 vm_entry_intr_info_field;
-	u32 vm_entry_exception_error_code;
-	u32 vm_entry_instruction_len;
-	u32 tpr_threshold;
-
-	u64 guest_rip;
-
-	u32 hv_clean_fields;
-	u32 hv_padding_32;
-	u32 hv_synthetic_controls;
-	struct {
-		u32 nested_flush_hypercall:1;
-		u32 msr_bitmap:1;
-		u32 reserved:30;
-	}  __packed hv_enlightenments_control;
-	u32 hv_vp_id;
-
-	u64 hv_vm_id;
-	u64 partition_assist_page;
-	u64 padding64_4[4];
-	u64 guest_bndcfgs;
-	u64 padding64_5[7];
-	u64 xss_exit_bitmap;
-	u64 padding64_6[7];
-} __packed;
-
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE			0
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP		BIT(0)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP		BIT(1)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2		BIT(2)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1		BIT(3)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC		BIT(4)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT		BIT(5)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY		BIT(6)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN		BIT(7)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR			BIT(8)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT		BIT(9)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC		BIT(10)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1		BIT(11)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2		BIT(12)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER		BIT(13)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1		BIT(14)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL	BIT(15)
-
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL			0xFFFF
-
-/* Define synthetic interrupt controller flag constants. */
-#define HV_EVENT_FLAGS_COUNT		(256 * 8)
-#define HV_EVENT_FLAGS_LONG_COUNT	(256 / sizeof(unsigned long))
-
-/*
- * Synthetic timer configuration.
- */
-union hv_stimer_config {
-	u64 as_uint64;
-	struct {
-		u64 enable:1;
-		u64 periodic:1;
-		u64 lazy:1;
-		u64 auto_enable:1;
-		u64 apic_vector:8;
-		u64 direct_mode:1;
-		u64 reserved_z0:3;
-		u64 sintx:4;
-		u64 reserved_z1:44;
-	} __packed;
-};
-
-
-/* Define the synthetic interrupt controller event flags format. */
-union hv_synic_event_flags {
-	unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT];
-};
-
-/* Define SynIC control register. */
-union hv_synic_scontrol {
-	u64 as_uint64;
-	struct {
-		u64 enable:1;
-		u64 reserved:63;
-	} __packed;
-};
-
-/* Define synthetic interrupt source. */
-union hv_synic_sint {
-	u64 as_uint64;
-	struct {
-		u64 vector:8;
-		u64 reserved1:8;
-		u64 masked:1;
-		u64 auto_eoi:1;
-		u64 reserved2:46;
-	} __packed;
-};
-
-/* Define the format of the SIMP register */
-union hv_synic_simp {
-	u64 as_uint64;
-	struct {
-		u64 simp_enabled:1;
-		u64 preserved:11;
-		u64 base_simp_gpa:52;
-	} __packed;
-};
-
-/* Define the format of the SIEFP register */
-union hv_synic_siefp {
-	u64 as_uint64;
-	struct {
-		u64 siefp_enabled:1;
-		u64 preserved:11;
-		u64 base_siefp_gpa:52;
-	} __packed;
-};
-
-struct hv_vpset {
-	u64 format;
-	u64 valid_bank_mask;
-	u64 bank_contents[];
-} __packed;
-
-/* HvCallSendSyntheticClusterIpi hypercall */
-struct hv_send_ipi {
-	u32 vector;
-	u32 reserved;
-	u64 cpu_mask;
-} __packed;
-
-/* HvCallSendSyntheticClusterIpiEx hypercall */
-struct hv_send_ipi_ex {
-	u32 vector;
-	u32 reserved;
-	struct hv_vpset vp_set;
-} __packed;
-
-/* HvFlushGuestPhysicalAddressSpace hypercalls */
-struct hv_guest_mapping_flush {
-	u64 address_space;
-	u64 flags;
-} __packed;
-
-/*
- *  HV_MAX_FLUSH_PAGES = "additional_pages" + 1. It's limited
- *  by the bitwidth of "additional_pages" in union hv_gpa_page_range.
- */
-#define HV_MAX_FLUSH_PAGES (2048)
-
-/* HvFlushGuestPhysicalAddressList hypercall */
-union hv_gpa_page_range {
-	u64 address_space;
-	struct {
-		u64 additional_pages:11;
-		u64 largepage:1;
-		u64 basepfn:52;
-	} page;
-};
-
-/*
- * All input flush parameters should be in single page. The max flush
- * count is equal with how many entries of union hv_gpa_page_range can
- * be populated into the input parameter page.
- */
-#define HV_MAX_FLUSH_REP_COUNT (PAGE_SIZE - 2 * sizeof(u64) /	\
-				sizeof(union hv_gpa_page_range))
-
-struct hv_guest_mapping_flush_list {
-	u64 address_space;
-	u64 flags;
-	union hv_gpa_page_range gpa_list[HV_MAX_FLUSH_REP_COUNT];
-};
-
-/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
-struct hv_tlb_flush {
-	u64 address_space;
-	u64 flags;
-	u64 processor_mask;
-	u64 gva_list[];
-} __packed;
-
-/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
-struct hv_tlb_flush_ex {
-	u64 address_space;
-	u64 flags;
-	struct hv_vpset hv_vp_set;
-	u64 gva_list[];
-} __packed;
-
-#endif
diff --git a/arch/x86/include/asm/hyperv_timer.h b/arch/x86/include/asm/hyperv_timer.h
new file mode 100644
index 000000000000..388fa81b8f38
--- /dev/null
+++ b/arch/x86/include/asm/hyperv_timer.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_HYPERV_TIMER_H
+#define _ASM_X86_HYPERV_TIMER_H
+
+#include <asm/msr.h>
+
+#define hv_get_raw_timer() rdtsc_ordered()
+
+#endif
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 8c5aaba6633f..9ad86a7d13f6 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -29,6 +29,8 @@ enum x86_hypervisor_type {
 	X86_HYPER_XEN_HVM,
 	X86_HYPER_KVM,
 	X86_HYPER_JAILHOUSE,
+	X86_HYPER_ACRN,
+	X86_HYPER_BHYVE,
 };
 
 #ifdef CONFIG_HYPERVISOR_GUEST
@@ -52,8 +54,21 @@ struct hypervisor_x86 {
 
 	/* runtime callbacks */
 	struct x86_hyper_runtime runtime;
+
+	/* ignore nopv parameter */
+	bool ignore_nopv;
 };
 
+extern const struct hypervisor_x86 x86_hyper_vmware;
+extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
+extern const struct hypervisor_x86 x86_hyper_xen_pv;
+extern const struct hypervisor_x86 x86_hyper_kvm;
+extern const struct hypervisor_x86 x86_hyper_jailhouse;
+extern const struct hypervisor_x86 x86_hyper_acrn;
+extern const struct hypervisor_x86 x86_hyper_bhyve;
+extern struct hypervisor_x86 x86_hyper_xen_hvm;
+
+extern bool nopv;
 extern enum x86_hypervisor_type x86_hyper_type;
 extern void init_hypervisor_platform(void);
 static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 89789e8c80f6..c715097e92fd 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -19,6 +19,8 @@ extern unsigned int cached_irq_mask;
 #define PIC_MASTER_OCW3		PIC_MASTER_ISR
 #define PIC_SLAVE_CMD		0xa0
 #define PIC_SLAVE_IMR		0xa1
+#define PIC_ELCR1		0x4d0
+#define PIC_ELCR2		0x4d1
 
 /* i8259A PIC related value */
 #define PIC_CASCADE_IR		2
@@ -67,6 +69,8 @@ struct legacy_pic {
 	void (*make_irq)(unsigned int irq);
 };
 
+void legacy_pic_pcat_compat(void);
+
 extern struct legacy_pic *legacy_pic;
 extern struct legacy_pic null_legacy_pic;
 
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 2c5f7861d373..9d69f3f8dbab 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -2,7 +2,6 @@
 #ifndef _ASM_X86_IA32_H
 #define _ASM_X86_IA32_H
 
-
 #ifdef CONFIG_IA32_EMULATION
 
 #include <linux/compat.h>
@@ -57,17 +56,37 @@ struct stat64 {
 	unsigned long long	st_ino;
 } __attribute__((packed));
 
-#define IA32_STACK_TOP IA32_PAGE_OFFSET
+extern bool __ia32_enabled;
+
+static __always_inline bool ia32_enabled(void)
+{
+	return __ia32_enabled;
+}
+
+static inline void ia32_disable(void)
+{
+	__ia32_enabled = false;
+}
+
+#else /* !CONFIG_IA32_EMULATION */
 
-#ifdef __KERNEL__
-struct linux_binprm;
-extern int ia32_setup_arg_pages(struct linux_binprm *bprm,
-				unsigned long stack_top, int exec_stack);
-struct mm_struct;
-extern void ia32_pick_mmap_layout(struct mm_struct *mm);
+static __always_inline bool ia32_enabled(void)
+{
+	return IS_ENABLED(CONFIG_X86_32);
+}
+
+static inline void ia32_disable(void) {}
 
 #endif
 
-#endif /* !CONFIG_IA32_SUPPORT */
+static inline bool ia32_enabled_verbose(void)
+{
+	bool enabled = ia32_enabled();
+
+	if (IS_ENABLED(CONFIG_IA32_EMULATION) && !enabled)
+		pr_notice_once("32-bit emulation disabled. You can reenable with ia32_emulation=on\n");
+
+	return enabled;
+}
 
 #endif /* _ASM_X86_IA32_H */
diff --git a/arch/x86/include/asm/ia32_unistd.h b/arch/x86/include/asm/ia32_unistd.h
deleted file mode 100644
index aa065c94ccf5..000000000000
--- a/arch/x86/include/asm/ia32_unistd.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_IA32_UNISTD_H
-#define _ASM_X86_IA32_UNISTD_H
-
-/*
- * This file contains the system call numbers of the ia32 compat ABI,
- * this is for the kernel only.
- */
-#define __SYSCALL_ia32_NR(x) (x)
-#include <asm/unistd_32_ia32.h>
-
-#endif /* _ASM_X86_IA32_UNISTD_H */
diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h
new file mode 100644
index 000000000000..5e45d6424722
--- /dev/null
+++ b/arch/x86/include/asm/ibt.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_IBT_H
+#define _ASM_X86_IBT_H
+
+#include <linux/types.h>
+
+/*
+ * The rules for enabling IBT are:
+ *
+ *  - CC_HAS_IBT:         the toolchain supports it
+ *  - X86_KERNEL_IBT:     it is selected in Kconfig
+ *  - !__DISABLE_EXPORTS: this is regular kernel code
+ *
+ * Esp. that latter one is a bit non-obvious, but some code like compressed,
+ * purgatory, realmode etc.. is built with custom CFLAGS that do not include
+ * -fcf-protection=branch and things will go *bang*.
+ *
+ * When all the above are satisfied, HAS_KERNEL_IBT will be 1, otherwise 0.
+ */
+#if defined(CONFIG_X86_KERNEL_IBT) && !defined(__DISABLE_EXPORTS)
+
+#define HAS_KERNEL_IBT	1
+
+#ifndef __ASSEMBLER__
+
+#ifdef CONFIG_X86_64
+#define ASM_ENDBR	"endbr64\n\t"
+#else
+#define ASM_ENDBR	"endbr32\n\t"
+#endif
+
+#define __noendbr	__attribute__((nocf_check))
+
+/*
+ * Create a dummy function pointer reference to prevent objtool from marking
+ * the function as needing to be "sealed" (i.e. ENDBR converted to NOP by
+ * apply_seal_endbr()).
+ */
+#define IBT_NOSEAL(fname)				\
+	".pushsection .discard.ibt_endbr_noseal\n\t"	\
+	_ASM_PTR fname "\n\t"				\
+	".popsection\n\t"
+
+static __always_inline __attribute_const__ u32 gen_endbr(void)
+{
+	u32 endbr;
+
+	/*
+	 * Generate ENDBR64 in a way that is sure to not result in
+	 * an ENDBR64 instruction as immediate.
+	 */
+	asm ( "mov $~0xfa1e0ff3, %[endbr]\n\t"
+	      "not %[endbr]\n\t"
+	       : [endbr] "=&r" (endbr) );
+
+	return endbr;
+}
+
+static __always_inline __attribute_const__ u32 gen_endbr_poison(void)
+{
+	/*
+	 * 4 byte NOP that isn't NOP4, such that it will be unique to (former)
+	 * ENDBR sites. Additionally it carries UDB as immediate.
+	 */
+	return 0xd6401f0f; /* nopl -42(%rax) */
+}
+
+static inline bool __is_endbr(u32 val)
+{
+	if (val == gen_endbr_poison())
+		return true;
+
+	val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */
+	return val == gen_endbr();
+}
+
+extern __noendbr bool is_endbr(u32 *val);
+extern __noendbr u64 ibt_save(bool disable);
+extern __noendbr void ibt_restore(u64 save);
+
+#else /* __ASSEMBLER__ */
+
+#ifdef CONFIG_X86_64
+#define ENDBR	endbr64
+#else
+#define ENDBR	endbr32
+#endif
+
+#endif /* __ASSEMBLER__ */
+
+#else /* !IBT */
+
+#define HAS_KERNEL_IBT	0
+
+#ifndef __ASSEMBLER__
+
+#define ASM_ENDBR
+#define IBT_NOSEAL(name)
+
+#define __noendbr
+
+static inline bool is_endbr(u32 *val) { return false; }
+
+static inline u64 ibt_save(bool disable) { return 0; }
+static inline void ibt_restore(u64 save) { }
+
+#else /* __ASSEMBLER__ */
+
+#define ENDBR
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* CONFIG_X86_KERNEL_IBT */
+
+#define ENDBR_INSN_SIZE		(4*HAS_KERNEL_IBT)
+
+#endif /* _ASM_X86_IBT_H */
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
new file mode 100644
index 000000000000..3218770670d3
--- /dev/null
+++ b/arch/x86/include/asm/idtentry.h
@@ -0,0 +1,775 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_IDTENTRY_H
+#define _ASM_X86_IDTENTRY_H
+
+/* Interrupts/Exceptions */
+#include <asm/trapnr.h>
+
+#define IDT_ALIGN	(8 * (1 + HAS_KERNEL_IBT))
+
+#ifndef __ASSEMBLER__
+#include <linux/entry-common.h>
+#include <linux/hardirq.h>
+
+#include <asm/irq_stack.h>
+
+typedef void (*idtentry_t)(struct pt_regs *regs);
+
+/**
+ * DECLARE_IDTENTRY - Declare functions for simple IDT entry points
+ *		      No error code pushed by hardware
+ * @vector:	Vector number (ignored for C)
+ * @func:	Function name of the entry point
+ *
+ * Declares four functions:
+ * - The ASM entry point: asm_##func
+ * - The XEN PV trap entry point: xen_##func (maybe unused)
+ * - The C handler called from the FRED event dispatcher (maybe unused)
+ * - The C handler called from the ASM entry point
+ *
+ * Note: This is the C variant of DECLARE_IDTENTRY(). As the name says it
+ * declares the entry points for usage in C code. There is an ASM variant
+ * as well which is used to emit the entry stubs in entry_32/64.S.
+ */
+#define DECLARE_IDTENTRY(vector, func)					\
+	asmlinkage void asm_##func(void);				\
+	asmlinkage void xen_asm_##func(void);				\
+	void fred_##func(struct pt_regs *regs);				\
+	__visible void func(struct pt_regs *regs)
+
+/**
+ * DEFINE_IDTENTRY - Emit code for simple IDT entry points
+ * @func:	Function name of the entry point
+ *
+ * @func is called from ASM entry code with interrupts disabled.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ *
+ * irqentry_enter() contains common code which has to be invoked before
+ * arbitrary code in the body. irqentry_exit() contains common code
+ * which has to run before returning to the low level assembly code.
+ */
+#define DEFINE_IDTENTRY(func)						\
+static __always_inline void __##func(struct pt_regs *regs);		\
+									\
+__visible noinstr void func(struct pt_regs *regs)			\
+{									\
+	irqentry_state_t state = irqentry_enter(regs);			\
+									\
+	instrumentation_begin();					\
+	__##func (regs);						\
+	instrumentation_end();						\
+	irqentry_exit(regs, state);					\
+}									\
+									\
+static __always_inline void __##func(struct pt_regs *regs)
+
+/* Special case for 32bit IRET 'trap' */
+#define DECLARE_IDTENTRY_SW	DECLARE_IDTENTRY
+#define DEFINE_IDTENTRY_SW	DEFINE_IDTENTRY
+
+/**
+ * DECLARE_IDTENTRY_ERRORCODE - Declare functions for simple IDT entry points
+ *				Error code pushed by hardware
+ * @vector:	Vector number (ignored for C)
+ * @func:	Function name of the entry point
+ *
+ * Declares three functions:
+ * - The ASM entry point: asm_##func
+ * - The XEN PV trap entry point: xen_##func (maybe unused)
+ * - The C handler called from the ASM entry point
+ *
+ * Same as DECLARE_IDTENTRY, but has an extra error_code argument for the
+ * C-handler.
+ */
+#define DECLARE_IDTENTRY_ERRORCODE(vector, func)			\
+	asmlinkage void asm_##func(void);				\
+	asmlinkage void xen_asm_##func(void);				\
+	__visible void func(struct pt_regs *regs, unsigned long error_code)
+
+/**
+ * DEFINE_IDTENTRY_ERRORCODE - Emit code for simple IDT entry points
+ *			       Error code pushed by hardware
+ * @func:	Function name of the entry point
+ *
+ * Same as DEFINE_IDTENTRY, but has an extra error_code argument
+ */
+#define DEFINE_IDTENTRY_ERRORCODE(func)					\
+static __always_inline void __##func(struct pt_regs *regs,		\
+				     unsigned long error_code);		\
+									\
+__visible noinstr void func(struct pt_regs *regs,			\
+			    unsigned long error_code)			\
+{									\
+	irqentry_state_t state = irqentry_enter(regs);			\
+									\
+	instrumentation_begin();					\
+	__##func (regs, error_code);					\
+	instrumentation_end();						\
+	irqentry_exit(regs, state);					\
+}									\
+									\
+static __always_inline void __##func(struct pt_regs *regs,		\
+				     unsigned long error_code)
+
+/**
+ * DECLARE_IDTENTRY_RAW - Declare functions for raw IDT entry points
+ *		      No error code pushed by hardware
+ * @vector:	Vector number (ignored for C)
+ * @func:	Function name of the entry point
+ *
+ * Maps to DECLARE_IDTENTRY().
+ */
+#define DECLARE_IDTENTRY_RAW(vector, func)				\
+	DECLARE_IDTENTRY(vector, func)
+
+/**
+ * DEFINE_IDTENTRY_RAW - Emit code for raw IDT entry points
+ * @func:	Function name of the entry point
+ *
+ * @func is called from ASM entry code with interrupts disabled.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ *
+ * Contrary to DEFINE_IDTENTRY() this does not invoke the
+ * idtentry_enter/exit() helpers before and after the body invocation. This
+ * needs to be done in the body itself if applicable. Use if extra work
+ * is required before the enter/exit() helpers are invoked.
+ */
+#define DEFINE_IDTENTRY_RAW(func)					\
+__visible noinstr void func(struct pt_regs *regs)
+
+/**
+ * DEFINE_FREDENTRY_RAW - Emit code for raw FRED entry points
+ * @func:	Function name of the entry point
+ *
+ * @func is called from the FRED event dispatcher with interrupts disabled.
+ *
+ * See @DEFINE_IDTENTRY_RAW for further details.
+ */
+#define DEFINE_FREDENTRY_RAW(func)					\
+noinstr void fred_##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_IDTENTRY_RAW_ERRORCODE - Declare functions for raw IDT entry points
+ *				    Error code pushed by hardware
+ * @vector:	Vector number (ignored for C)
+ * @func:	Function name of the entry point
+ *
+ * Maps to DECLARE_IDTENTRY_ERRORCODE()
+ */
+#define DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func)			\
+	DECLARE_IDTENTRY_ERRORCODE(vector, func)
+
+/**
+ * DEFINE_IDTENTRY_RAW_ERRORCODE - Emit code for raw IDT entry points
+ * @func:	Function name of the entry point
+ *
+ * @func is called from ASM entry code with interrupts disabled.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ *
+ * Contrary to DEFINE_IDTENTRY_ERRORCODE() this does not invoke the
+ * irqentry_enter/exit() helpers before and after the body invocation. This
+ * needs to be done in the body itself if applicable. Use if extra work
+ * is required before the enter/exit() helpers are invoked.
+ */
+#define DEFINE_IDTENTRY_RAW_ERRORCODE(func)				\
+__visible noinstr void func(struct pt_regs *regs, unsigned long error_code)
+
+/**
+ * DECLARE_IDTENTRY_IRQ - Declare functions for device interrupt IDT entry
+ *			  points (common/spurious)
+ * @vector:	Vector number (ignored for C)
+ * @func:	Function name of the entry point
+ *
+ * Maps to DECLARE_IDTENTRY_ERRORCODE()
+ */
+#define DECLARE_IDTENTRY_IRQ(vector, func)				\
+	DECLARE_IDTENTRY_ERRORCODE(vector, func)
+
+/**
+ * DEFINE_IDTENTRY_IRQ - Emit code for device interrupt IDT entry points
+ * @func:	Function name of the entry point
+ *
+ * The vector number is pushed by the low level entry stub and handed
+ * to the function as error_code argument which needs to be truncated
+ * to an u8 because the push is sign extending.
+ *
+ * irq_enter/exit_rcu() are invoked before the function body and the
+ * KVM L1D flush request is set. Stack switching to the interrupt stack
+ * has to be done in the function body if necessary.
+ */
+#define DEFINE_IDTENTRY_IRQ(func)					\
+static void __##func(struct pt_regs *regs, u32 vector);			\
+									\
+__visible noinstr void func(struct pt_regs *regs,			\
+			    unsigned long error_code)			\
+{									\
+	irqentry_state_t state = irqentry_enter(regs);			\
+	u32 vector = (u32)(u8)error_code;				\
+									\
+	kvm_set_cpu_l1tf_flush_l1d();                                   \
+	instrumentation_begin();					\
+	run_irq_on_irqstack_cond(__##func, regs, vector);		\
+	instrumentation_end();						\
+	irqentry_exit(regs, state);					\
+}									\
+									\
+static noinline void __##func(struct pt_regs *regs, u32 vector)
+
+/**
+ * DECLARE_IDTENTRY_SYSVEC - Declare functions for system vector entry points
+ * @vector:	Vector number (ignored for C)
+ * @func:	Function name of the entry point
+ *
+ * Declares three functions:
+ * - The ASM entry point: asm_##func
+ * - The XEN PV trap entry point: xen_##func (maybe unused)
+ * - The C handler called from the ASM entry point
+ *
+ * Maps to DECLARE_IDTENTRY().
+ */
+#define DECLARE_IDTENTRY_SYSVEC(vector, func)				\
+	DECLARE_IDTENTRY(vector, func)
+
+/**
+ * DEFINE_IDTENTRY_SYSVEC - Emit code for system vector IDT entry points
+ * @func:	Function name of the entry point
+ *
+ * irqentry_enter/exit() and irq_enter/exit_rcu() are invoked before the
+ * function body. KVM L1D flush request is set.
+ *
+ * Runs the function on the interrupt stack if the entry hit kernel mode
+ */
+#define DEFINE_IDTENTRY_SYSVEC(func)					\
+static void __##func(struct pt_regs *regs);				\
+									\
+static __always_inline void instr_##func(struct pt_regs *regs)		\
+{									\
+	run_sysvec_on_irqstack_cond(__##func, regs);			\
+}									\
+									\
+__visible noinstr void func(struct pt_regs *regs)			\
+{									\
+	irqentry_state_t state = irqentry_enter(regs);			\
+									\
+	kvm_set_cpu_l1tf_flush_l1d();                                   \
+	instrumentation_begin();					\
+	instr_##func (regs);						\
+	instrumentation_end();						\
+	irqentry_exit(regs, state);					\
+}									\
+									\
+void fred_##func(struct pt_regs *regs)					\
+{									\
+	instr_##func (regs);						\
+}									\
+									\
+static noinline void __##func(struct pt_regs *regs)
+
+/**
+ * DEFINE_IDTENTRY_SYSVEC_SIMPLE - Emit code for simple system vector IDT
+ *				   entry points
+ * @func:	Function name of the entry point
+ *
+ * Runs the function on the interrupted stack. No switch to IRQ stack and
+ * only the minimal __irq_enter/exit() handling.
+ *
+ * Only use for 'empty' vectors like reschedule IPI and KVM posted
+ * interrupt vectors.
+ */
+#define DEFINE_IDTENTRY_SYSVEC_SIMPLE(func)				\
+static __always_inline void __##func(struct pt_regs *regs);		\
+									\
+static __always_inline void instr_##func(struct pt_regs *regs)		\
+{									\
+	__irq_enter_raw();						\
+	__##func (regs);						\
+	__irq_exit_raw();						\
+}									\
+									\
+__visible noinstr void func(struct pt_regs *regs)			\
+{									\
+	irqentry_state_t state = irqentry_enter(regs);			\
+									\
+	kvm_set_cpu_l1tf_flush_l1d();                                   \
+	instrumentation_begin();					\
+	instr_##func (regs);						\
+	instrumentation_end();						\
+	irqentry_exit(regs, state);					\
+}									\
+									\
+void fred_##func(struct pt_regs *regs)					\
+{									\
+	instr_##func (regs);						\
+}									\
+									\
+static __always_inline void __##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_IDTENTRY_XENCB - Declare functions for XEN HV callback entry point
+ * @vector:	Vector number (ignored for C)
+ * @func:	Function name of the entry point
+ *
+ * Declares three functions:
+ * - The ASM entry point: asm_##func
+ * - The XEN PV trap entry point: xen_##func (maybe unused)
+ * - The C handler called from the ASM entry point
+ *
+ * Maps to DECLARE_IDTENTRY(). Distinct entry point to handle the 32/64-bit
+ * difference
+ */
+#define DECLARE_IDTENTRY_XENCB(vector, func)				\
+	DECLARE_IDTENTRY(vector, func)
+
+#ifdef CONFIG_X86_64
+/**
+ * DECLARE_IDTENTRY_IST - Declare functions for IST handling IDT entry points
+ * @vector:	Vector number (ignored for C)
+ * @func:	Function name of the entry point
+ *
+ * Maps to DECLARE_IDTENTRY_RAW, but declares also the NOIST C handler
+ * which is called from the ASM entry point on user mode entry
+ */
+#define DECLARE_IDTENTRY_IST(vector, func)				\
+	DECLARE_IDTENTRY_RAW(vector, func);				\
+	__visible void noist_##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_IDTENTRY_VC - Declare functions for the VC entry point
+ * @vector:	Vector number (ignored for C)
+ * @func:	Function name of the entry point
+ *
+ * Maps to DECLARE_IDTENTRY_RAW_ERRORCODE, but declares also the
+ * safe_stack C handler.
+ */
+#define DECLARE_IDTENTRY_VC(vector, func)				\
+	DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func);			\
+	__visible noinstr void kernel_##func(struct pt_regs *regs, unsigned long error_code);	\
+	__visible noinstr void   user_##func(struct pt_regs *regs, unsigned long error_code)
+
+/**
+ * DEFINE_IDTENTRY_IST - Emit code for IST entry points
+ * @func:	Function name of the entry point
+ *
+ * Maps to DEFINE_IDTENTRY_RAW
+ */
+#define DEFINE_IDTENTRY_IST(func)					\
+	DEFINE_IDTENTRY_RAW(func)
+
+/**
+ * DEFINE_IDTENTRY_NOIST - Emit code for NOIST entry points which
+ *			   belong to a IST entry point (MCE, DB)
+ * @func:	Function name of the entry point. Must be the same as
+ *		the function name of the corresponding IST variant
+ *
+ * Maps to DEFINE_IDTENTRY_RAW().
+ */
+#define DEFINE_IDTENTRY_NOIST(func)					\
+	DEFINE_IDTENTRY_RAW(noist_##func)
+
+/**
+ * DECLARE_IDTENTRY_DF - Declare functions for double fault
+ * @vector:	Vector number (ignored for C)
+ * @func:	Function name of the entry point
+ *
+ * Maps to DECLARE_IDTENTRY_RAW_ERRORCODE
+ */
+#define DECLARE_IDTENTRY_DF(vector, func)				\
+	DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func)
+
+/**
+ * DEFINE_IDTENTRY_DF - Emit code for double fault
+ * @func:	Function name of the entry point
+ *
+ * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
+ */
+#define DEFINE_IDTENTRY_DF(func)					\
+	DEFINE_IDTENTRY_RAW_ERRORCODE(func)
+
+/**
+ * DEFINE_IDTENTRY_VC_KERNEL - Emit code for VMM communication handler
+ *			       when raised from kernel mode
+ * @func:	Function name of the entry point
+ *
+ * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
+ */
+#define DEFINE_IDTENTRY_VC_KERNEL(func)				\
+	DEFINE_IDTENTRY_RAW_ERRORCODE(kernel_##func)
+
+/**
+ * DEFINE_IDTENTRY_VC_USER - Emit code for VMM communication handler
+ *			     when raised from user mode
+ * @func:	Function name of the entry point
+ *
+ * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
+ */
+#define DEFINE_IDTENTRY_VC_USER(func)				\
+	DEFINE_IDTENTRY_RAW_ERRORCODE(user_##func)
+
+#else	/* CONFIG_X86_64 */
+
+/**
+ * DECLARE_IDTENTRY_DF - Declare functions for double fault 32bit variant
+ * @vector:	Vector number (ignored for C)
+ * @func:	Function name of the entry point
+ *
+ * Declares two functions:
+ * - The ASM entry point: asm_##func
+ * - The C handler called from the C shim
+ */
+#define DECLARE_IDTENTRY_DF(vector, func)				\
+	asmlinkage void asm_##func(void);				\
+	__visible void func(struct pt_regs *regs,			\
+			    unsigned long error_code,			\
+			    unsigned long address)
+
+/**
+ * DEFINE_IDTENTRY_DF - Emit code for double fault on 32bit
+ * @func:	Function name of the entry point
+ *
+ * This is called through the doublefault shim which already provides
+ * cr2 in the address argument.
+ */
+#define DEFINE_IDTENTRY_DF(func)					\
+__visible noinstr void func(struct pt_regs *regs,			\
+			    unsigned long error_code,			\
+			    unsigned long address)
+
+#endif	/* !CONFIG_X86_64 */
+
+/* C-Code mapping */
+#define DECLARE_IDTENTRY_NMI		DECLARE_IDTENTRY_RAW
+#define DEFINE_IDTENTRY_NMI		DEFINE_IDTENTRY_RAW
+#define DEFINE_FREDENTRY_NMI		DEFINE_FREDENTRY_RAW
+
+#ifdef CONFIG_X86_64
+#define DECLARE_IDTENTRY_MCE		DECLARE_IDTENTRY_IST
+#define DEFINE_IDTENTRY_MCE		DEFINE_IDTENTRY_IST
+#define DEFINE_IDTENTRY_MCE_USER	DEFINE_IDTENTRY_NOIST
+#define DEFINE_FREDENTRY_MCE		DEFINE_FREDENTRY_RAW
+
+#define DECLARE_IDTENTRY_DEBUG		DECLARE_IDTENTRY_IST
+#define DEFINE_IDTENTRY_DEBUG		DEFINE_IDTENTRY_IST
+#define DEFINE_IDTENTRY_DEBUG_USER	DEFINE_IDTENTRY_NOIST
+#define DEFINE_FREDENTRY_DEBUG		DEFINE_FREDENTRY_RAW
+#endif
+
+void idt_install_sysvec(unsigned int n, const void *function);
+void fred_install_sysvec(unsigned int vector, const idtentry_t function);
+
+#define sysvec_install(vector, function) {				\
+	if (IS_ENABLED(CONFIG_X86_FRED))				\
+		fred_install_sysvec(vector, function);			\
+	if (!cpu_feature_enabled(X86_FEATURE_FRED))			\
+		idt_install_sysvec(vector, asm_##function);		\
+}
+
+#else /* !__ASSEMBLER__ */
+
+/*
+ * The ASM variants for DECLARE_IDTENTRY*() which emit the ASM entry stubs.
+ */
+#define DECLARE_IDTENTRY(vector, func)					\
+	idtentry vector asm_##func func has_error_code=0
+
+#define DECLARE_IDTENTRY_ERRORCODE(vector, func)			\
+	idtentry vector asm_##func func has_error_code=1
+
+/* Special case for 32bit IRET 'trap'. Do not emit ASM code */
+#define DECLARE_IDTENTRY_SW(vector, func)
+
+#define DECLARE_IDTENTRY_RAW(vector, func)				\
+	DECLARE_IDTENTRY(vector, func)
+
+#define DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func)			\
+	DECLARE_IDTENTRY_ERRORCODE(vector, func)
+
+/* Entries for common/spurious (device) interrupts */
+#define DECLARE_IDTENTRY_IRQ(vector, func)				\
+	idtentry_irq vector func
+
+/* System vector entries */
+#define DECLARE_IDTENTRY_SYSVEC(vector, func)				\
+	DECLARE_IDTENTRY(vector, func)
+
+#ifdef CONFIG_X86_64
+# define DECLARE_IDTENTRY_MCE(vector, func)				\
+	idtentry_mce_db vector asm_##func func
+
+# define DECLARE_IDTENTRY_DEBUG(vector, func)				\
+	idtentry_mce_db vector asm_##func func
+
+# define DECLARE_IDTENTRY_DF(vector, func)				\
+	idtentry_df vector asm_##func func
+
+# define DECLARE_IDTENTRY_XENCB(vector, func)				\
+	DECLARE_IDTENTRY(vector, func)
+
+# define DECLARE_IDTENTRY_VC(vector, func)				\
+	idtentry_vc vector asm_##func func
+
+#else
+# define DECLARE_IDTENTRY_MCE(vector, func)				\
+	DECLARE_IDTENTRY(vector, func)
+
+/* No ASM emitted for DF as this goes through a C shim */
+# define DECLARE_IDTENTRY_DF(vector, func)
+
+/* No ASM emitted for XEN hypervisor callback */
+# define DECLARE_IDTENTRY_XENCB(vector, func)
+
+#endif
+
+/* No ASM code emitted for NMI */
+#define DECLARE_IDTENTRY_NMI(vector, func)
+
+/*
+ * ASM code to emit the common vector entry stubs where each stub is
+ * packed into IDT_ALIGN bytes.
+ *
+ * Note, that the 'pushq imm8' is emitted via '.byte 0x6a, vector' because
+ * GCC treats the local vector variable as unsigned int and would expand
+ * all vectors above 0x7F to a 5 byte push. The original code did an
+ * adjustment of the vector number to be in the signed byte range to avoid
+ * this. While clever it's mindboggling counterintuitive and requires the
+ * odd conversion back to a real vector number in the C entry points. Using
+ * .byte achieves the same thing and the only fixup needed in the C entry
+ * point is to mask off the bits above bit 7 because the push is sign
+ * extending.
+ */
+	.align IDT_ALIGN
+SYM_CODE_START(irq_entries_start)
+    vector=FIRST_EXTERNAL_VECTOR
+    .rept NR_EXTERNAL_VECTORS
+	UNWIND_HINT_IRET_REGS
+0 :
+	ENDBR
+	.byte	0x6a, vector
+	jmp	asm_common_interrupt
+	/* Ensure that the above is IDT_ALIGN bytes max */
+	.fill 0b + IDT_ALIGN - ., 1, 0xcc
+	vector = vector+1
+    .endr
+SYM_CODE_END(irq_entries_start)
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	.align IDT_ALIGN
+SYM_CODE_START(spurious_entries_start)
+    vector=FIRST_SYSTEM_VECTOR
+    .rept NR_SYSTEM_VECTORS
+	UNWIND_HINT_IRET_REGS
+0 :
+	ENDBR
+	.byte	0x6a, vector
+	jmp	asm_spurious_interrupt
+	/* Ensure that the above is IDT_ALIGN bytes max */
+	.fill 0b + IDT_ALIGN - ., 1, 0xcc
+	vector = vector+1
+    .endr
+SYM_CODE_END(spurious_entries_start)
+#endif
+
+#endif /* __ASSEMBLER__ */
+
+/*
+ * The actual entry points. Note that DECLARE_IDTENTRY*() serves two
+ * purposes:
+ *  - provide the function declarations when included from C-Code
+ *  - emit the ASM stubs when included from entry_32/64.S
+ *
+ * This avoids duplicate defines and ensures that everything is consistent.
+ */
+
+/*
+ * Dummy trap number so the low level ASM macro vector number checks do not
+ * match which results in emitting plain IDTENTRY stubs without bells and
+ * whistles.
+ */
+#define X86_TRAP_OTHER		0xFFFF
+
+/* Simple exception entry points. No hardware error code */
+DECLARE_IDTENTRY(X86_TRAP_DE,		exc_divide_error);
+DECLARE_IDTENTRY(X86_TRAP_OF,		exc_overflow);
+DECLARE_IDTENTRY(X86_TRAP_BR,		exc_bounds);
+DECLARE_IDTENTRY(X86_TRAP_NM,		exc_device_not_available);
+DECLARE_IDTENTRY(X86_TRAP_OLD_MF,	exc_coproc_segment_overrun);
+DECLARE_IDTENTRY(X86_TRAP_SPURIOUS,	exc_spurious_interrupt_bug);
+DECLARE_IDTENTRY(X86_TRAP_MF,		exc_coprocessor_error);
+DECLARE_IDTENTRY(X86_TRAP_XF,		exc_simd_coprocessor_error);
+
+/* 32bit software IRET trap. Do not emit ASM code */
+DECLARE_IDTENTRY_SW(X86_TRAP_IRET,	iret_error);
+
+/* Simple exception entries with error code pushed by hardware */
+DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_TS,	exc_invalid_tss);
+DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_NP,	exc_segment_not_present);
+DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_SS,	exc_stack_segment);
+DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_GP,	exc_general_protection);
+DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC,	exc_alignment_check);
+
+/* Raw exception entries which need extra work */
+DECLARE_IDTENTRY_RAW(X86_TRAP_UD,		exc_invalid_op);
+DECLARE_IDTENTRY_RAW(X86_TRAP_BP,		exc_int3);
+DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF,	exc_page_fault);
+
+#if defined(CONFIG_IA32_EMULATION)
+DECLARE_IDTENTRY_RAW(IA32_SYSCALL_VECTOR,	int80_emulation);
+#endif
+
+#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_64
+DECLARE_IDTENTRY_MCE(X86_TRAP_MC,	exc_machine_check);
+#else
+DECLARE_IDTENTRY_RAW(X86_TRAP_MC,	exc_machine_check);
+#endif
+#ifdef CONFIG_XEN_PV
+DECLARE_IDTENTRY_RAW(X86_TRAP_MC,	xenpv_exc_machine_check);
+#endif
+#endif
+
+/* NMI */
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+/*
+ * Special entry point for VMX which invokes this on the kernel stack, even for
+ * 64-bit, i.e. without using an IST.  asm_exc_nmi() requires an IST to work
+ * correctly vs. the NMI 'executing' marker.  Used for 32-bit kernels as well
+ * to avoid more ifdeffery.
+ */
+DECLARE_IDTENTRY(X86_TRAP_NMI,		exc_nmi_kvm_vmx);
+#endif
+
+DECLARE_IDTENTRY_NMI(X86_TRAP_NMI,	exc_nmi);
+#ifdef CONFIG_XEN_PV
+DECLARE_IDTENTRY_RAW(X86_TRAP_NMI,	xenpv_exc_nmi);
+#endif
+
+/* #DB */
+#ifdef CONFIG_X86_64
+DECLARE_IDTENTRY_DEBUG(X86_TRAP_DB,	exc_debug);
+#else
+DECLARE_IDTENTRY_RAW(X86_TRAP_DB,	exc_debug);
+#endif
+#ifdef CONFIG_XEN_PV
+DECLARE_IDTENTRY_RAW(X86_TRAP_DB,	xenpv_exc_debug);
+#endif
+
+/* #DF */
+DECLARE_IDTENTRY_DF(X86_TRAP_DF,	exc_double_fault);
+#ifdef CONFIG_XEN_PV
+DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_DF,	xenpv_exc_double_fault);
+#endif
+
+/* #CP */
+#ifdef CONFIG_X86_CET
+DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_CP,	exc_control_protection);
+#endif
+
+/* #VC */
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+DECLARE_IDTENTRY_VC(X86_TRAP_VC,	exc_vmm_communication);
+#endif
+
+#ifdef CONFIG_XEN_PV
+DECLARE_IDTENTRY_XENCB(X86_TRAP_OTHER,	exc_xen_hypervisor_callback);
+DECLARE_IDTENTRY_RAW(X86_TRAP_OTHER,	exc_xen_unknown_trap);
+#endif
+
+#ifdef CONFIG_INTEL_TDX_GUEST
+DECLARE_IDTENTRY(X86_TRAP_VE,		exc_virtualization_exception);
+#endif
+
+/* Device interrupts common/spurious */
+DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER,	common_interrupt);
+#ifdef CONFIG_X86_LOCAL_APIC
+DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER,	spurious_interrupt);
+#endif
+
+/* System vector entry points */
+#ifdef CONFIG_X86_LOCAL_APIC
+DECLARE_IDTENTRY_SYSVEC(ERROR_APIC_VECTOR,		sysvec_error_interrupt);
+DECLARE_IDTENTRY_SYSVEC(SPURIOUS_APIC_VECTOR,		sysvec_spurious_apic_interrupt);
+DECLARE_IDTENTRY_SYSVEC(LOCAL_TIMER_VECTOR,		sysvec_apic_timer_interrupt);
+DECLARE_IDTENTRY_SYSVEC(X86_PLATFORM_IPI_VECTOR,	sysvec_x86_platform_ipi);
+#endif
+
+#ifdef CONFIG_SMP
+DECLARE_IDTENTRY(RESCHEDULE_VECTOR,			sysvec_reschedule_ipi);
+DECLARE_IDTENTRY_SYSVEC(REBOOT_VECTOR,			sysvec_reboot);
+DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_SINGLE_VECTOR,	sysvec_call_function_single);
+DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_VECTOR,		sysvec_call_function);
+#else
+# define fred_sysvec_reschedule_ipi			NULL
+# define fred_sysvec_reboot				NULL
+# define fred_sysvec_call_function_single		NULL
+# define fred_sysvec_call_function			NULL
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+# ifdef CONFIG_X86_MCE_THRESHOLD
+DECLARE_IDTENTRY_SYSVEC(THRESHOLD_APIC_VECTOR,		sysvec_threshold);
+# else
+# define fred_sysvec_threshold				NULL
+# endif
+
+# ifdef CONFIG_X86_MCE_AMD
+DECLARE_IDTENTRY_SYSVEC(DEFERRED_ERROR_VECTOR,		sysvec_deferred_error);
+# else
+# define fred_sysvec_deferred_error			NULL
+# endif
+
+# ifdef CONFIG_X86_THERMAL_VECTOR
+DECLARE_IDTENTRY_SYSVEC(THERMAL_APIC_VECTOR,		sysvec_thermal);
+# else
+# define fred_sysvec_thermal				NULL
+# endif
+
+# ifdef CONFIG_IRQ_WORK
+DECLARE_IDTENTRY_SYSVEC(IRQ_WORK_VECTOR,		sysvec_irq_work);
+# else
+# define fred_sysvec_irq_work				NULL
+# endif
+#endif
+
+#if IS_ENABLED(CONFIG_KVM)
+DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_VECTOR,		sysvec_kvm_posted_intr_ipi);
+DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_WAKEUP_VECTOR,	sysvec_kvm_posted_intr_wakeup_ipi);
+DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR,	sysvec_kvm_posted_intr_nested_ipi);
+#else
+# define fred_sysvec_kvm_posted_intr_ipi		NULL
+# define fred_sysvec_kvm_posted_intr_wakeup_ipi		NULL
+# define fred_sysvec_kvm_posted_intr_nested_ipi		NULL
+#endif
+
+# ifdef CONFIG_X86_POSTED_MSI
+DECLARE_IDTENTRY_SYSVEC(POSTED_MSI_NOTIFICATION_VECTOR,	sysvec_posted_msi_notification);
+#else
+# define fred_sysvec_posted_msi_notification		NULL
+# endif
+
+#if IS_ENABLED(CONFIG_HYPERV)
+DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR,	sysvec_hyperv_callback);
+DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR,	sysvec_hyperv_reenlightenment);
+DECLARE_IDTENTRY_SYSVEC(HYPERV_STIMER0_VECTOR,		sysvec_hyperv_stimer0);
+#endif
+
+#if IS_ENABLED(CONFIG_ACRN_GUEST)
+DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR,	sysvec_acrn_hv_callback);
+#endif
+
+#ifdef CONFIG_XEN_PVHVM
+DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR,	sysvec_xen_hvm_callback);
+#endif
+
+#ifdef CONFIG_KVM_GUEST
+DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR,	sysvec_kvm_asyncpf_interrupt);
+#endif
+
+#undef X86_TRAP_OTHER
+
+#endif
diff --git a/arch/x86/include/asm/imr.h b/arch/x86/include/asm/imr.h
index ebea2c9d2cdc..0d1dbf235679 100644
--- a/arch/x86/include/asm/imr.h
+++ b/arch/x86/include/asm/imr.h
@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * imr.h: Isolated Memory Region API
  *
  * Copyright(c) 2013 Intel Corporation.
  * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
  */
 #ifndef _IMR_H
 #define _IMR_H
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
index 1c78580e58be..1b3060a3425c 100644
--- a/arch/x86/include/asm/inat.h
+++ b/arch/x86/include/asm/inat.h
@@ -1,26 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 #ifndef _ASM_X86_INAT_H
 #define _ASM_X86_INAT_H
 /*
  * x86 instruction attributes
  *
  * Written by Masami Hiramatsu <mhiramat@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
  */
-#include <asm/inat_types.h>
+#include <asm/inat_types.h> /* __ignore_sync_check__ */
 
 /*
  * Internal bits. Don't use bitmasks directly, because these bits are
@@ -49,6 +35,10 @@
 #define INAT_PFX_VEX2	13	/* 2-bytes VEX prefix */
 #define INAT_PFX_VEX3	14	/* 3-bytes VEX prefix */
 #define INAT_PFX_EVEX	15	/* EVEX prefix */
+/* x86-64 REX2 prefix */
+#define INAT_PFX_REX2	16	/* 0xD5 */
+/* AMD XOP prefix */
+#define INAT_PFX_XOP	17	/* 0x8F */
 
 #define INAT_LSTPFX_MAX	3
 #define INAT_LGCPFX_MAX	11
@@ -64,7 +54,7 @@
 
 /* Legacy prefix */
 #define INAT_PFX_OFFS	0
-#define INAT_PFX_BITS	4
+#define INAT_PFX_BITS	5
 #define INAT_PFX_MAX    ((1 << INAT_PFX_BITS) - 1)
 #define INAT_PFX_MASK	(INAT_PFX_MAX << INAT_PFX_OFFS)
 /* Escape opcodes */
@@ -89,8 +79,13 @@
 #define INAT_MOFFSET	(1 << (INAT_FLAG_OFFS + 3))
 #define INAT_VARIANT	(1 << (INAT_FLAG_OFFS + 4))
 #define INAT_VEXOK	(1 << (INAT_FLAG_OFFS + 5))
+#define INAT_XOPOK	INAT_VEXOK
 #define INAT_VEXONLY	(1 << (INAT_FLAG_OFFS + 6))
 #define INAT_EVEXONLY	(1 << (INAT_FLAG_OFFS + 7))
+#define INAT_NO_REX2	(1 << (INAT_FLAG_OFFS + 8))
+#define INAT_REX2_VARIANT	(1 << (INAT_FLAG_OFFS + 9))
+#define INAT_EVEX_SCALABLE	(1 << (INAT_FLAG_OFFS + 10))
+#define INAT_INV64	(1 << (INAT_FLAG_OFFS + 11))
 /* Attribute making macros for attribute tables */
 #define INAT_MAKE_PREFIX(pfx)	(pfx << INAT_PFX_OFFS)
 #define INAT_MAKE_ESCAPE(esc)	(esc << INAT_ESC_OFFS)
@@ -119,6 +114,8 @@ extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
 extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
 					  insn_byte_t vex_m,
 					  insn_byte_t vex_pp);
+extern insn_attr_t inat_get_xop_attribute(insn_byte_t opcode,
+					  insn_byte_t map_select);
 
 /* Attribute checking functions */
 static inline int inat_is_legacy_prefix(insn_attr_t attr)
@@ -142,6 +139,11 @@ static inline int inat_is_rex_prefix(insn_attr_t attr)
 	return (attr & INAT_PFX_MASK) == INAT_PFX_REX;
 }
 
+static inline int inat_is_rex2_prefix(insn_attr_t attr)
+{
+	return (attr & INAT_PFX_MASK) == INAT_PFX_REX2;
+}
+
 static inline int inat_last_prefix_id(insn_attr_t attr)
 {
 	if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX)
@@ -167,6 +169,11 @@ static inline int inat_is_vex3_prefix(insn_attr_t attr)
 	return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3;
 }
 
+static inline int inat_is_xop_prefix(insn_attr_t attr)
+{
+	return (attr & INAT_PFX_MASK) == INAT_PFX_XOP;
+}
+
 static inline int inat_is_escape(insn_attr_t attr)
 {
 	return attr & INAT_ESC_MASK;
@@ -232,6 +239,11 @@ static inline int inat_accept_vex(insn_attr_t attr)
 	return attr & INAT_VEXOK;
 }
 
+static inline int inat_accept_xop(insn_attr_t attr)
+{
+	return attr & INAT_XOPOK;
+}
+
 static inline int inat_must_vex(insn_attr_t attr)
 {
 	return attr & (INAT_VEXONLY | INAT_EVEXONLY);
@@ -241,4 +253,14 @@ static inline int inat_must_evex(insn_attr_t attr)
 {
 	return attr & INAT_EVEXONLY;
 }
+
+static inline int inat_evex_scalable(insn_attr_t attr)
+{
+	return attr & INAT_EVEX_SCALABLE;
+}
+
+static inline int inat_is_invalid64(insn_attr_t attr)
+{
+	return attr & INAT_INV64;
+}
 #endif
diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h
index cb3c20ce39cf..b047efa9ddc2 100644
--- a/arch/x86/include/asm/inat_types.h
+++ b/arch/x86/include/asm/inat_types.h
@@ -1,24 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 #ifndef _ASM_X86_INAT_TYPES_H
 #define _ASM_X86_INAT_TYPES_H
 /*
  * x86 instruction attributes
  *
  * Written by Masami Hiramatsu <mhiramat@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
  */
 
 /* Instruction attributes */
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 5f1d3c421f68..01ccdd168df0 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -4,6 +4,7 @@
 
 struct x86_mapping_info {
 	void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
+	void (*free_pgt_page)(void *, void *); /* free buf for page table */
 	void *context;			 /* context for alloc_pgt_page */
 	unsigned long page_flag;	 /* page flag for PMD or PUD entry */
 	unsigned long offset;		 /* ident mapping offset */
@@ -14,4 +15,6 @@ struct x86_mapping_info {
 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
 				unsigned long pstart, unsigned long pend);
 
+void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd);
+
 #endif /* _ASM_X86_INIT_H */
diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index 2b6ccf2c49f1..4733e9064ee5 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -15,9 +15,35 @@
 #define INSN_CODE_SEG_OPND_SZ(params) (params & 0xf)
 #define INSN_CODE_SEG_PARAMS(oper_sz, addr_sz) (oper_sz | (addr_sz << 4))
 
+int pt_regs_offset(struct pt_regs *regs, int regno);
+
+bool insn_has_rep_prefix(struct insn *insn);
 void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs);
 int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs);
+int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs);
+unsigned long *insn_get_modrm_reg_ptr(struct insn *insn, struct pt_regs *regs);
 unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
 int insn_get_code_seg_params(struct pt_regs *regs);
+int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip);
+int insn_fetch_from_user(struct pt_regs *regs,
+			 unsigned char buf[MAX_INSN_SIZE]);
+int insn_fetch_from_user_inatomic(struct pt_regs *regs,
+				  unsigned char buf[MAX_INSN_SIZE]);
+bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs,
+			   unsigned char buf[MAX_INSN_SIZE], int buf_size);
+
+enum insn_mmio_type {
+	INSN_MMIO_DECODE_FAILED,
+	INSN_MMIO_WRITE,
+	INSN_MMIO_WRITE_IMM,
+	INSN_MMIO_READ,
+	INSN_MMIO_READ_ZERO_EXTEND,
+	INSN_MMIO_READ_SIGN_EXTEND,
+	INSN_MMIO_MOVS,
+};
+
+enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes);
+
+bool insn_is_nop(struct insn *insn);
 
 #endif /* _ASM_X86_INSN_EVAL_H */
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index c2c01f84df75..846d21c1a7f8 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -1,27 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 #ifndef _ASM_X86_INSN_H
 #define _ASM_X86_INSN_H
 /*
  * x86 instruction analysis
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
  * Copyright (C) IBM Corporation, 2009
  */
 
+#include <asm/byteorder.h>
 /* insn_attr_t is defined in inat.h */
-#include <asm/inat.h>
+#include <asm/inat.h> /* __ignore_sync_check__ */
+
+#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN)
 
 struct insn_field {
 	union {
@@ -33,13 +23,58 @@ struct insn_field {
 	unsigned char nbytes;
 };
 
+static inline void insn_field_set(struct insn_field *p, insn_value_t v,
+				  unsigned char n)
+{
+	p->value = v;
+	p->nbytes = n;
+}
+
+static inline void insn_set_byte(struct insn_field *p, unsigned char n,
+				 insn_byte_t v)
+{
+	p->bytes[n] = v;
+}
+
+#else
+
+struct insn_field {
+	insn_value_t value;
+	union {
+		insn_value_t little;
+		insn_byte_t bytes[4];
+	};
+	/* !0 if we've run insn_get_xxx() for this field */
+	unsigned char got;
+	unsigned char nbytes;
+};
+
+static inline void insn_field_set(struct insn_field *p, insn_value_t v,
+				  unsigned char n)
+{
+	p->value = v;
+	p->little = __cpu_to_le32(v);
+	p->nbytes = n;
+}
+
+static inline void insn_set_byte(struct insn_field *p, unsigned char n,
+				 insn_byte_t v)
+{
+	p->bytes[n] = v;
+	p->value = __le32_to_cpu(p->little);
+}
+#endif
+
 struct insn {
 	struct insn_field prefixes;	/*
 					 * Prefixes
 					 * prefixes.bytes[3]: last prefix
 					 */
 	struct insn_field rex_prefix;	/* REX prefix */
-	struct insn_field vex_prefix;	/* VEX prefix */
+	union {
+		struct insn_field vex_prefix;	/* VEX prefix */
+		struct insn_field xop_prefix;	/* XOP prefix */
+	};
 	struct insn_field opcode;	/*
 					 * opcode.bytes[0]: opcode1
 					 * opcode.bytes[1]: opcode2
@@ -58,6 +93,7 @@ struct insn {
 		struct insn_field immediate2;	/* for 64bit imm or seg16 */
 	};
 
+	int	emulate_prefix_size;
 	insn_attr_t attr;
 	unsigned char opnd_bytes;
 	unsigned char addr_bytes;
@@ -79,10 +115,15 @@ struct insn {
 #define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3)
 #define X86_SIB_BASE(sib) ((sib) & 0x07)
 
-#define X86_REX_W(rex) ((rex) & 8)
-#define X86_REX_R(rex) ((rex) & 4)
-#define X86_REX_X(rex) ((rex) & 2)
-#define X86_REX_B(rex) ((rex) & 1)
+#define X86_REX2_M(rex) ((rex) & 0x80)	/* REX2 M0 */
+#define X86_REX2_R(rex) ((rex) & 0x40)	/* REX2 R4 */
+#define X86_REX2_X(rex) ((rex) & 0x20)	/* REX2 X4 */
+#define X86_REX2_B(rex) ((rex) & 0x10)	/* REX2 B4 */
+
+#define X86_REX_W(rex) ((rex) & 8)	/* REX or REX2 W */
+#define X86_REX_R(rex) ((rex) & 4)	/* REX or REX2 R3 */
+#define X86_REX_X(rex) ((rex) & 2)	/* REX or REX2 X3 */
+#define X86_REX_B(rex) ((rex) & 1)	/* REX or REX2 B3 */
 
 /* VEX bit flags  */
 #define X86_VEX_W(vex)	((vex) & 0x80)	/* VEX3 Byte2 */
@@ -91,21 +132,44 @@ struct insn {
 #define X86_VEX_B(vex)	((vex) & 0x20)	/* VEX3 Byte1 */
 #define X86_VEX_L(vex)	((vex) & 0x04)	/* VEX3 Byte2, VEX2 Byte1 */
 /* VEX bit fields */
-#define X86_EVEX_M(vex)	((vex) & 0x03)		/* EVEX Byte1 */
+#define X86_EVEX_M(vex)	((vex) & 0x07)		/* EVEX Byte1 */
 #define X86_VEX3_M(vex)	((vex) & 0x1f)		/* VEX3 Byte1 */
 #define X86_VEX2_M	1			/* VEX2.M always 1 */
 #define X86_VEX_V(vex)	(((vex) & 0x78) >> 3)	/* VEX3 Byte2, VEX2 Byte1 */
 #define X86_VEX_P(vex)	((vex) & 0x03)		/* VEX3 Byte2, VEX2 Byte1 */
 #define X86_VEX_M_MAX	0x1f			/* VEX3.M Maximum value */
+/* XOP bit fields */
+#define X86_XOP_R(xop)	((xop) & 0x80)	/* XOP Byte2 */
+#define X86_XOP_X(xop)	((xop) & 0x40)	/* XOP Byte2 */
+#define X86_XOP_B(xop)	((xop) & 0x20)	/* XOP Byte2 */
+#define X86_XOP_M(xop)	((xop) & 0x1f)	/* XOP Byte2 */
+#define X86_XOP_W(xop)	((xop) & 0x80)	/* XOP Byte3 */
+#define X86_XOP_V(xop)	((xop) & 0x78)	/* XOP Byte3 */
+#define X86_XOP_L(xop)	((xop) & 0x04)	/* XOP Byte3 */
+#define X86_XOP_P(xop)	((xop) & 0x03)	/* XOP Byte3 */
+#define X86_XOP_M_MIN	0x08	/* Min of XOP.M */
+#define X86_XOP_M_MAX	0x1f	/* Max of XOP.M */
 
 extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
-extern void insn_get_prefixes(struct insn *insn);
-extern void insn_get_opcode(struct insn *insn);
-extern void insn_get_modrm(struct insn *insn);
-extern void insn_get_sib(struct insn *insn);
-extern void insn_get_displacement(struct insn *insn);
-extern void insn_get_immediate(struct insn *insn);
-extern void insn_get_length(struct insn *insn);
+extern int insn_get_prefixes(struct insn *insn);
+extern int insn_get_opcode(struct insn *insn);
+extern int insn_get_modrm(struct insn *insn);
+extern int insn_get_sib(struct insn *insn);
+extern int insn_get_displacement(struct insn *insn);
+extern int insn_get_immediate(struct insn *insn);
+extern int insn_get_length(struct insn *insn);
+
+enum insn_mode {
+	INSN_MODE_32,
+	INSN_MODE_64,
+	/* Mode is determined by the current kernel build. */
+	INSN_MODE_KERN,
+	INSN_NUM_MODES,
+};
+
+extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m);
+
+#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN)
 
 /* Attribute will be determined after getting ModRM (for opcode groups) */
 static inline void insn_get_attribute(struct insn *insn)
@@ -116,18 +180,19 @@ static inline void insn_get_attribute(struct insn *insn)
 /* Instruction uses RIP-relative addressing */
 extern int insn_rip_relative(struct insn *insn);
 
-/* Init insn for kernel text */
-static inline void kernel_insn_init(struct insn *insn,
-				    const void *kaddr, int buf_len)
+static inline int insn_is_rex2(struct insn *insn)
 {
-#ifdef CONFIG_X86_64
-	insn_init(insn, kaddr, buf_len, 1);
-#else /* CONFIG_X86_32 */
-	insn_init(insn, kaddr, buf_len, 0);
-#endif
+	if (!insn->prefixes.got)
+		insn_get_prefixes(insn);
+	return insn->rex_prefix.nbytes == 2;
 }
 
-static inline int insn_is_avx(struct insn *insn)
+static inline insn_byte_t insn_rex2_m_bit(struct insn *insn)
+{
+	return X86_REX2_M(insn->rex_prefix.bytes[1]);
+}
+
+static inline int insn_is_avx_or_xop(struct insn *insn)
 {
 	if (!insn->prefixes.got)
 		insn_get_prefixes(insn);
@@ -141,11 +206,25 @@ static inline int insn_is_evex(struct insn *insn)
 	return (insn->vex_prefix.nbytes == 4);
 }
 
-/* Ensure this instruction is decoded completely */
-static inline int insn_complete(struct insn *insn)
+/* If we already know this is AVX/XOP encoded */
+static inline int avx_insn_is_xop(struct insn *insn)
+{
+	insn_attr_t attr = inat_get_opcode_attribute(insn->vex_prefix.bytes[0]);
+
+	return inat_is_xop_prefix(attr);
+}
+
+static inline int insn_is_xop(struct insn *insn)
+{
+	if (!insn_is_avx_or_xop(insn))
+		return 0;
+
+	return avx_insn_is_xop(insn);
+}
+
+static inline int insn_has_emulate_prefix(struct insn *insn)
 {
-	return insn->opcode.got && insn->modrm.got && insn->sib.got &&
-		insn->displacement.got && insn->immediate.got;
+	return !!insn->emulate_prefix_size;
 }
 
 static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
@@ -166,11 +245,33 @@ static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
 		return X86_VEX_P(insn->vex_prefix.bytes[2]);
 }
 
+static inline insn_byte_t insn_vex_w_bit(struct insn *insn)
+{
+	if (insn->vex_prefix.nbytes < 3)
+		return 0;
+	return X86_VEX_W(insn->vex_prefix.bytes[2]);
+}
+
+static inline insn_byte_t insn_xop_map_bits(struct insn *insn)
+{
+	if (insn->xop_prefix.nbytes < 3)	/* XOP is 3 bytes */
+		return 0;
+	return X86_XOP_M(insn->xop_prefix.bytes[1]);
+}
+
+static inline insn_byte_t insn_xop_p_bits(struct insn *insn)
+{
+	return X86_XOP_P(insn->vex_prefix.bytes[2]);
+}
+
 /* Get the last prefix id from last prefix or VEX prefix */
 static inline int insn_last_prefix_id(struct insn *insn)
 {
-	if (insn_is_avx(insn))
+	if (insn_is_avx_or_xop(insn)) {
+		if (avx_insn_is_xop(insn))
+			return insn_xop_p_bits(insn);
 		return insn_vex_p_bits(insn);	/* VEX_p is a SIMD prefix id */
+	}
 
 	if (insn->prefixes.bytes[3])
 		return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
@@ -208,6 +309,20 @@ static inline int insn_offset_immediate(struct insn *insn)
 	return insn_offset_displacement(insn) + insn->displacement.nbytes;
 }
 
+/**
+ * for_each_insn_prefix() -- Iterate prefixes in the instruction
+ * @insn: Pointer to struct insn.
+ * @prefix: Prefix byte.
+ *
+ * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
+ * and the index is stored in @idx (note that this @idx is just for a cursor,
+ * do not change it.)
+ * Since prefixes.nbytes can be bigger than 4 if some prefixes
+ * are repeated, it cannot be used for looping over the prefixes.
+ */
+#define for_each_insn_prefix(insn, prefix)	\
+	for (int idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
+
 #define POP_SS_OPCODE 0x1f
 #define MOV_SREG_OPCODE 0x8e
 
diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h
index f5a796da07f8..e48a00b3311d 100644
--- a/arch/x86/include/asm/inst.h
+++ b/arch/x86/include/asm/inst.h
@@ -6,13 +6,12 @@
 #ifndef X86_ASM_INST_H
 #define X86_ASM_INST_H
 
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
 
 #define REG_NUM_INVALID		100
 
 #define REG_TYPE_R32		0
 #define REG_TYPE_R64		1
-#define REG_TYPE_XMM		2
 #define REG_TYPE_INVALID	100
 
 	.macro R32_NUM opd r32
@@ -123,77 +122,18 @@
 #endif
 	.endm
 
-	.macro XMM_NUM opd xmm
-	\opd = REG_NUM_INVALID
-	.ifc \xmm,%xmm0
-	\opd = 0
-	.endif
-	.ifc \xmm,%xmm1
-	\opd = 1
-	.endif
-	.ifc \xmm,%xmm2
-	\opd = 2
-	.endif
-	.ifc \xmm,%xmm3
-	\opd = 3
-	.endif
-	.ifc \xmm,%xmm4
-	\opd = 4
-	.endif
-	.ifc \xmm,%xmm5
-	\opd = 5
-	.endif
-	.ifc \xmm,%xmm6
-	\opd = 6
-	.endif
-	.ifc \xmm,%xmm7
-	\opd = 7
-	.endif
-	.ifc \xmm,%xmm8
-	\opd = 8
-	.endif
-	.ifc \xmm,%xmm9
-	\opd = 9
-	.endif
-	.ifc \xmm,%xmm10
-	\opd = 10
-	.endif
-	.ifc \xmm,%xmm11
-	\opd = 11
-	.endif
-	.ifc \xmm,%xmm12
-	\opd = 12
-	.endif
-	.ifc \xmm,%xmm13
-	\opd = 13
-	.endif
-	.ifc \xmm,%xmm14
-	\opd = 14
-	.endif
-	.ifc \xmm,%xmm15
-	\opd = 15
-	.endif
-	.endm
-
 	.macro REG_TYPE type reg
 	R32_NUM reg_type_r32 \reg
 	R64_NUM reg_type_r64 \reg
-	XMM_NUM reg_type_xmm \reg
 	.if reg_type_r64 <> REG_NUM_INVALID
 	\type = REG_TYPE_R64
 	.elseif reg_type_r32 <> REG_NUM_INVALID
 	\type = REG_TYPE_R32
-	.elseif reg_type_xmm <> REG_NUM_INVALID
-	\type = REG_TYPE_XMM
 	.else
 	\type = REG_TYPE_INVALID
 	.endif
 	.endm
 
-	.macro PFX_OPD_SIZE
-	.byte 0x66
-	.endm
-
 	.macro PFX_REX opd1 opd2 W=0
 	.if ((\opd1 | \opd2) & 8) || \W
 	.byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3)
@@ -203,109 +143,6 @@
 	.macro MODRM mod opd1 opd2
 	.byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3)
 	.endm
-
-	.macro PSHUFB_XMM xmm1 xmm2
-	XMM_NUM pshufb_opd1 \xmm1
-	XMM_NUM pshufb_opd2 \xmm2
-	PFX_OPD_SIZE
-	PFX_REX pshufb_opd1 pshufb_opd2
-	.byte 0x0f, 0x38, 0x00
-	MODRM 0xc0 pshufb_opd1 pshufb_opd2
-	.endm
-
-	.macro PCLMULQDQ imm8 xmm1 xmm2
-	XMM_NUM clmul_opd1 \xmm1
-	XMM_NUM clmul_opd2 \xmm2
-	PFX_OPD_SIZE
-	PFX_REX clmul_opd1 clmul_opd2
-	.byte 0x0f, 0x3a, 0x44
-	MODRM 0xc0 clmul_opd1 clmul_opd2
-	.byte \imm8
-	.endm
-
-	.macro PEXTRD imm8 xmm gpr
-	R32_NUM extrd_opd1 \gpr
-	XMM_NUM extrd_opd2 \xmm
-	PFX_OPD_SIZE
-	PFX_REX extrd_opd1 extrd_opd2
-	.byte 0x0f, 0x3a, 0x16
-	MODRM 0xc0 extrd_opd1 extrd_opd2
-	.byte \imm8
-	.endm
-
-	.macro AESKEYGENASSIST rcon xmm1 xmm2
-	XMM_NUM aeskeygen_opd1 \xmm1
-	XMM_NUM aeskeygen_opd2 \xmm2
-	PFX_OPD_SIZE
-	PFX_REX aeskeygen_opd1 aeskeygen_opd2
-	.byte 0x0f, 0x3a, 0xdf
-	MODRM 0xc0 aeskeygen_opd1 aeskeygen_opd2
-	.byte \rcon
-	.endm
-
-	.macro AESIMC xmm1 xmm2
-	XMM_NUM aesimc_opd1 \xmm1
-	XMM_NUM aesimc_opd2 \xmm2
-	PFX_OPD_SIZE
-	PFX_REX aesimc_opd1 aesimc_opd2
-	.byte 0x0f, 0x38, 0xdb
-	MODRM 0xc0 aesimc_opd1 aesimc_opd2
-	.endm
-
-	.macro AESENC xmm1 xmm2
-	XMM_NUM aesenc_opd1 \xmm1
-	XMM_NUM aesenc_opd2 \xmm2
-	PFX_OPD_SIZE
-	PFX_REX aesenc_opd1 aesenc_opd2
-	.byte 0x0f, 0x38, 0xdc
-	MODRM 0xc0 aesenc_opd1 aesenc_opd2
-	.endm
-
-	.macro AESENCLAST xmm1 xmm2
-	XMM_NUM aesenclast_opd1 \xmm1
-	XMM_NUM aesenclast_opd2 \xmm2
-	PFX_OPD_SIZE
-	PFX_REX aesenclast_opd1 aesenclast_opd2
-	.byte 0x0f, 0x38, 0xdd
-	MODRM 0xc0 aesenclast_opd1 aesenclast_opd2
-	.endm
-
-	.macro AESDEC xmm1 xmm2
-	XMM_NUM aesdec_opd1 \xmm1
-	XMM_NUM aesdec_opd2 \xmm2
-	PFX_OPD_SIZE
-	PFX_REX aesdec_opd1 aesdec_opd2
-	.byte 0x0f, 0x38, 0xde
-	MODRM 0xc0 aesdec_opd1 aesdec_opd2
-	.endm
-
-	.macro AESDECLAST xmm1 xmm2
-	XMM_NUM aesdeclast_opd1 \xmm1
-	XMM_NUM aesdeclast_opd2 \xmm2
-	PFX_OPD_SIZE
-	PFX_REX aesdeclast_opd1 aesdeclast_opd2
-	.byte 0x0f, 0x38, 0xdf
-	MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2
-	.endm
-
-	.macro MOVQ_R64_XMM opd1 opd2
-	REG_TYPE movq_r64_xmm_opd1_type \opd1
-	.if movq_r64_xmm_opd1_type == REG_TYPE_XMM
-	XMM_NUM movq_r64_xmm_opd1 \opd1
-	R64_NUM movq_r64_xmm_opd2 \opd2
-	.else
-	R64_NUM movq_r64_xmm_opd1 \opd1
-	XMM_NUM movq_r64_xmm_opd2 \opd2
-	.endif
-	PFX_OPD_SIZE
-	PFX_REX movq_r64_xmm_opd1 movq_r64_xmm_opd2 1
-	.if movq_r64_xmm_opd1_type == REG_TYPE_XMM
-	.byte 0x0f, 0x7e
-	.else
-	.byte 0x0f, 0x6e
-	.endif
-	MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2
-	.endm
 #endif
 
 #endif
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 0dd6b0f4000e..950bfd006905 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -5,89 +5,223 @@
 /*
  * "Big Core" Processors (Branded as Core, Xeon, etc...)
  *
- * The "_X" parts are generally the EP and EX Xeons, or the
- * "Extreme" ones, like Broadwell-E.
- *
  * While adding a new CPUID for a new microarchitecture, add a new
  * group to keep logically sorted out in chronological order. Within
  * that group keep the CPUID for the variants sorted by model number.
+ *
+ * The defined symbol names have the following form:
+ *	INTEL_{OPTFAMILY}_{MICROARCH}{OPTDIFF}
+ * where:
+ * OPTFAMILY	Describes the family of CPUs that this belongs to. Default
+ *		is assumed to be "_CORE" (and should be omitted). Other values
+ *		currently in use are _ATOM and _XEON_PHI
+ * MICROARCH	Is the code name for the micro-architecture for this core.
+ *		N.B. Not the platform name.
+ * OPTDIFF	If needed, a short string to differentiate by market segment.
+ *
+ *		Common OPTDIFFs:
+ *
+ *			- regular client parts
+ *		_L	- regular mobile parts
+ *		_G	- parts with extra graphics on
+ *		_X	- regular server parts
+ *		_D	- micro server parts
+ *		_N,_P	- other mobile parts
+ *		_H	- premium mobile parts
+ *		_S	- other client parts
+ *
+ *		Historical OPTDIFFs:
+ *
+ *		_EP	- 2 socket server parts
+ *		_EX	- 4+ socket server parts
+ *
+ * The #define line may optionally include a comment including platform or core
+ * names. An exception is made for skylake/kabylake where steppings seem to have gotten
+ * their own names :-(
  */
 
-#define INTEL_FAM6_CORE_YONAH		0x0E
+#define IFM(_fam, _model)	VFM_MAKE(X86_VENDOR_INTEL, _fam, _model)
+
+/* Wildcard match so X86_MATCH_VFM(ANY) works */
+#define INTEL_ANY			IFM(X86_FAMILY_ANY, X86_MODEL_ANY)
+
+/* Family 5 */
+#define INTEL_FAM5_START		IFM(5, 0x00) /* Notational marker, also P5 A-step */
+#define INTEL_PENTIUM_75		IFM(5, 0x02) /* P54C */
+#define INTEL_PENTIUM_MMX		IFM(5, 0x04) /* P55C */
+#define INTEL_QUARK_X1000		IFM(5, 0x09) /* Quark X1000 SoC */
+
+/* Family 6, 18, 19 */
+#define INTEL_PENTIUM_PRO		IFM(6, 0x01)
+#define INTEL_PENTIUM_II_KLAMATH	IFM(6, 0x03)
+#define INTEL_PENTIUM_III_DESCHUTES	IFM(6, 0x05)
+#define INTEL_PENTIUM_III_TUALATIN	IFM(6, 0x0B)
+#define INTEL_PENTIUM_M_DOTHAN		IFM(6, 0x0D)
+
+#define INTEL_CORE_YONAH		IFM(6, 0x0E)
+
+#define INTEL_CORE2_MEROM		IFM(6, 0x0F)
+#define INTEL_CORE2_MEROM_L		IFM(6, 0x16)
+#define INTEL_CORE2_PENRYN		IFM(6, 0x17)
+#define INTEL_CORE2_DUNNINGTON		IFM(6, 0x1D)
+
+#define INTEL_NEHALEM			IFM(6, 0x1E)
+#define INTEL_NEHALEM_G			IFM(6, 0x1F) /* Auburndale / Havendale */
+#define INTEL_NEHALEM_EP		IFM(6, 0x1A)
+#define INTEL_NEHALEM_EX		IFM(6, 0x2E)
+
+#define INTEL_WESTMERE			IFM(6, 0x25)
+#define INTEL_WESTMERE_EP		IFM(6, 0x2C)
+#define INTEL_WESTMERE_EX		IFM(6, 0x2F)
+
+#define INTEL_SANDYBRIDGE		IFM(6, 0x2A)
+#define INTEL_SANDYBRIDGE_X		IFM(6, 0x2D)
+#define INTEL_IVYBRIDGE			IFM(6, 0x3A)
+#define INTEL_IVYBRIDGE_X		IFM(6, 0x3E)
+
+#define INTEL_HASWELL			IFM(6, 0x3C)
+#define INTEL_HASWELL_X			IFM(6, 0x3F)
+#define INTEL_HASWELL_L			IFM(6, 0x45)
+#define INTEL_HASWELL_G			IFM(6, 0x46)
+
+#define INTEL_BROADWELL			IFM(6, 0x3D)
+#define INTEL_BROADWELL_G		IFM(6, 0x47)
+#define INTEL_BROADWELL_X		IFM(6, 0x4F)
+#define INTEL_BROADWELL_D		IFM(6, 0x56)
+
+#define INTEL_SKYLAKE_L			IFM(6, 0x4E) /* Sky Lake */
+#define INTEL_SKYLAKE			IFM(6, 0x5E) /* Sky Lake */
+#define INTEL_SKYLAKE_X			IFM(6, 0x55) /* Sky Lake */
+/*                 CASCADELAKE_X	0x55	   Sky Lake -- s: 7     */
+/*                 COOPERLAKE_X		0x55	   Sky Lake -- s: 11    */
+
+#define INTEL_KABYLAKE_L		IFM(6, 0x8E) /* Sky Lake */
+/*                 AMBERLAKE_L		0x8E	   Sky Lake -- s: 9     */
+/*                 COFFEELAKE_L		0x8E	   Sky Lake -- s: 10    */
+/*                 WHISKEYLAKE_L	0x8E       Sky Lake -- s: 11,12 */
+
+#define INTEL_KABYLAKE			IFM(6, 0x9E) /* Sky Lake */
+/*                 COFFEELAKE		0x9E	   Sky Lake -- s: 10-13 */
+
+#define INTEL_COMETLAKE			IFM(6, 0xA5) /* Sky Lake */
+#define INTEL_COMETLAKE_L		IFM(6, 0xA6) /* Sky Lake */
+
+#define INTEL_CANNONLAKE_L		IFM(6, 0x66) /* Palm Cove */
 
-#define INTEL_FAM6_CORE2_MEROM		0x0F
-#define INTEL_FAM6_CORE2_MEROM_L	0x16
-#define INTEL_FAM6_CORE2_PENRYN		0x17
-#define INTEL_FAM6_CORE2_DUNNINGTON	0x1D
+#define INTEL_ICELAKE_X			IFM(6, 0x6A) /* Sunny Cove */
+#define INTEL_ICELAKE_D			IFM(6, 0x6C) /* Sunny Cove */
+#define INTEL_ICELAKE			IFM(6, 0x7D) /* Sunny Cove */
+#define INTEL_ICELAKE_L			IFM(6, 0x7E) /* Sunny Cove */
+#define INTEL_ICELAKE_NNPI		IFM(6, 0x9D) /* Sunny Cove */
 
-#define INTEL_FAM6_NEHALEM		0x1E
-#define INTEL_FAM6_NEHALEM_G		0x1F /* Auburndale / Havendale */
-#define INTEL_FAM6_NEHALEM_EP		0x1A
-#define INTEL_FAM6_NEHALEM_EX		0x2E
+#define INTEL_ROCKETLAKE		IFM(6, 0xA7) /* Cypress Cove */
 
-#define INTEL_FAM6_WESTMERE		0x25
-#define INTEL_FAM6_WESTMERE_EP		0x2C
-#define INTEL_FAM6_WESTMERE_EX		0x2F
+#define INTEL_TIGERLAKE_L		IFM(6, 0x8C) /* Willow Cove */
+#define INTEL_TIGERLAKE			IFM(6, 0x8D) /* Willow Cove */
 
-#define INTEL_FAM6_SANDYBRIDGE		0x2A
-#define INTEL_FAM6_SANDYBRIDGE_X	0x2D
-#define INTEL_FAM6_IVYBRIDGE		0x3A
-#define INTEL_FAM6_IVYBRIDGE_X		0x3E
+#define INTEL_SAPPHIRERAPIDS_X		IFM(6, 0x8F) /* Golden Cove */
 
-#define INTEL_FAM6_HASWELL_CORE		0x3C
-#define INTEL_FAM6_HASWELL_X		0x3F
-#define INTEL_FAM6_HASWELL_ULT		0x45
-#define INTEL_FAM6_HASWELL_GT3E		0x46
+#define INTEL_EMERALDRAPIDS_X		IFM(6, 0xCF) /* Raptor Cove */
 
-#define INTEL_FAM6_BROADWELL_CORE	0x3D
-#define INTEL_FAM6_BROADWELL_GT3E	0x47
-#define INTEL_FAM6_BROADWELL_X		0x4F
-#define INTEL_FAM6_BROADWELL_XEON_D	0x56
+#define INTEL_GRANITERAPIDS_X		IFM(6, 0xAD) /* Redwood Cove */
+#define INTEL_GRANITERAPIDS_D		IFM(6, 0xAE)
 
-#define INTEL_FAM6_SKYLAKE_MOBILE	0x4E
-#define INTEL_FAM6_SKYLAKE_DESKTOP	0x5E
-#define INTEL_FAM6_SKYLAKE_X		0x55
-#define INTEL_FAM6_KABYLAKE_MOBILE	0x8E
-#define INTEL_FAM6_KABYLAKE_DESKTOP	0x9E
+#define INTEL_DIAMONDRAPIDS_X		IFM(19, 0x01) /* Panther Cove */
 
-#define INTEL_FAM6_CANNONLAKE_MOBILE	0x66
+#define INTEL_BARTLETTLAKE		IFM(6, 0xD7) /* Raptor Cove */
 
-/* "Small Core" Processors (Atom) */
+/* "Hybrid" Processors (P-Core/E-Core) */
 
-#define INTEL_FAM6_ATOM_BONNELL		0x1C /* Diamondville, Pineview */
-#define INTEL_FAM6_ATOM_BONNELL_MID	0x26 /* Silverthorne, Lincroft */
+#define INTEL_LAKEFIELD			IFM(6, 0x8A) /* Sunny Cove / Tremont */
 
-#define INTEL_FAM6_ATOM_SALTWELL	0x36 /* Cedarview */
-#define INTEL_FAM6_ATOM_SALTWELL_MID	0x27 /* Penwell */
-#define INTEL_FAM6_ATOM_SALTWELL_TABLET	0x35 /* Cloverview */
+#define INTEL_ALDERLAKE			IFM(6, 0x97) /* Golden Cove / Gracemont */
+#define INTEL_ALDERLAKE_L		IFM(6, 0x9A) /* Golden Cove / Gracemont */
 
-#define INTEL_FAM6_ATOM_SILVERMONT	0x37 /* Bay Trail, Valleyview */
-#define INTEL_FAM6_ATOM_SILVERMONT_X	0x4D /* Avaton, Rangely */
-#define INTEL_FAM6_ATOM_SILVERMONT_MID	0x4A /* Merriefield */
+#define INTEL_RAPTORLAKE		IFM(6, 0xB7) /* Raptor Cove / Enhanced Gracemont */
+#define INTEL_RAPTORLAKE_P		IFM(6, 0xBA)
+#define INTEL_RAPTORLAKE_S		IFM(6, 0xBF)
 
-#define INTEL_FAM6_ATOM_AIRMONT		0x4C /* Cherry Trail, Braswell */
-#define INTEL_FAM6_ATOM_AIRMONT_MID	0x5A /* Moorefield */
+#define INTEL_METEORLAKE		IFM(6, 0xAC) /* Redwood Cove / Crestmont */
+#define INTEL_METEORLAKE_L		IFM(6, 0xAA)
 
-#define INTEL_FAM6_ATOM_GOLDMONT	0x5C /* Apollo Lake */
-#define INTEL_FAM6_ATOM_GOLDMONT_X	0x5F /* Denverton */
-#define INTEL_FAM6_ATOM_GOLDMONT_PLUS	0x7A /* Gemini Lake */
+#define INTEL_ARROWLAKE_H		IFM(6, 0xC5) /* Lion Cove / Skymont */
+#define INTEL_ARROWLAKE			IFM(6, 0xC6)
+#define INTEL_ARROWLAKE_U		IFM(6, 0xB5)
+
+#define INTEL_LUNARLAKE_M		IFM(6, 0xBD) /* Lion Cove / Skymont */
+
+#define INTEL_PANTHERLAKE_L		IFM(6, 0xCC) /* Cougar Cove / Darkmont */
+
+#define INTEL_WILDCATLAKE_L		IFM(6, 0xD5)
+
+#define INTEL_NOVALAKE			IFM(18, 0x01) /* Coyote Cove / Arctic Wolf */
+#define INTEL_NOVALAKE_L		IFM(18, 0x03) /* Coyote Cove / Arctic Wolf */
+
+/* "Small Core" Processors (Atom/E-Core) */
+
+#define INTEL_ATOM_BONNELL		IFM(6, 0x1C) /* Diamondville, Pineview */
+#define INTEL_ATOM_BONNELL_MID		IFM(6, 0x26) /* Silverthorne, Lincroft */
+
+#define INTEL_ATOM_SALTWELL		IFM(6, 0x36) /* Cedarview */
+#define INTEL_ATOM_SALTWELL_MID		IFM(6, 0x27) /* Penwell */
+#define INTEL_ATOM_SALTWELL_TABLET	IFM(6, 0x35) /* Cloverview */
+
+#define INTEL_ATOM_SILVERMONT		IFM(6, 0x37) /* Bay Trail, Valleyview */
+#define INTEL_ATOM_SILVERMONT_D		IFM(6, 0x4D) /* Avaton, Rangely */
+#define INTEL_ATOM_SILVERMONT_MID	IFM(6, 0x4A) /* Merriefield */
+#define INTEL_ATOM_SILVERMONT_MID2	IFM(6, 0x5A) /* Anniedale */
+
+#define INTEL_ATOM_AIRMONT		IFM(6, 0x4C) /* Cherry Trail, Braswell */
+#define INTEL_ATOM_AIRMONT_NP		IFM(6, 0x75) /* Lightning Mountain */
+
+#define INTEL_ATOM_GOLDMONT		IFM(6, 0x5C) /* Apollo Lake */
+#define INTEL_ATOM_GOLDMONT_D		IFM(6, 0x5F) /* Denverton */
+
+/* Note: the micro-architecture is "Goldmont Plus" */
+#define INTEL_ATOM_GOLDMONT_PLUS	IFM(6, 0x7A) /* Gemini Lake */
+
+#define INTEL_ATOM_TREMONT_D		IFM(6, 0x86) /* Jacobsville */
+#define INTEL_ATOM_TREMONT		IFM(6, 0x96) /* Elkhart Lake */
+#define INTEL_ATOM_TREMONT_L		IFM(6, 0x9C) /* Jasper Lake */
+
+#define INTEL_ATOM_GRACEMONT		IFM(6, 0xBE) /* Alderlake N */
+
+#define INTEL_ATOM_CRESTMONT_X		IFM(6, 0xAF) /* Sierra Forest */
+#define INTEL_ATOM_CRESTMONT		IFM(6, 0xB6) /* Grand Ridge */
+
+#define INTEL_ATOM_DARKMONT_X		IFM(6, 0xDD) /* Clearwater Forest */
 
 /* Xeon Phi */
 
-#define INTEL_FAM6_XEON_PHI_KNL		0x57 /* Knights Landing */
-#define INTEL_FAM6_XEON_PHI_KNM		0x85 /* Knights Mill */
-
-/* Useful macros */
-#define INTEL_CPU_FAM_ANY(_family, _model, _driver_data)	\
-{								\
-	.vendor		= X86_VENDOR_INTEL,			\
-	.family		= _family,				\
-	.model		= _model,				\
-	.feature	= X86_FEATURE_ANY,			\
-	.driver_data	= (kernel_ulong_t)&_driver_data		\
-}
-
-#define INTEL_CPU_FAM6(_model, _driver_data)			\
-	INTEL_CPU_FAM_ANY(6, INTEL_FAM6_##_model, _driver_data)
+#define INTEL_XEON_PHI_KNL		IFM(6, 0x57) /* Knights Landing */
+#define INTEL_XEON_PHI_KNM		IFM(6, 0x85) /* Knights Mill */
+
+/* Notational marker denoting the last Family 6 model */
+#define INTEL_FAM6_LAST			IFM(6, 0xFF)
+
+/* Family 15 - NetBurst */
+#define INTEL_P4_WILLAMETTE		IFM(15, 0x01) /* Also Xeon Foster */
+#define INTEL_P4_PRESCOTT		IFM(15, 0x03)
+#define INTEL_P4_PRESCOTT_2M		IFM(15, 0x04)
+#define INTEL_P4_CEDARMILL		IFM(15, 0x06) /* Also Xeon Dempsey */
+
+/*
+ * Intel CPU core types
+ *
+ * CPUID.1AH.EAX[31:0] uniquely identifies the microarchitecture
+ * of the core. Bits 31-24 indicates its core type (Core or Atom)
+ * and Bits [23:0] indicates the native model ID of the core.
+ * Core type and native model ID are defined in below enumerations.
+ */
+enum intel_cpu_type {
+	INTEL_CPU_TYPE_UNKNOWN,
+	INTEL_CPU_TYPE_ATOM = 0x20,
+	INTEL_CPU_TYPE_CORE = 0x40,
+};
+
+enum intel_native_id {
+	INTEL_ATOM_CMT_NATIVE_ID = 0x2,  /* Crestmont */
+	INTEL_ATOM_SKT_NATIVE_ID = 0x3,  /* Skymont */
+};
 
 #endif /* _ASM_X86_INTEL_FAMILY_H */
diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 52f815a80539..a3abdcd89a32 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -1,19 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * intel-mid.h: Intel MID specific setup code
+ * Intel MID specific setup code
  *
- * (C) Copyright 2009 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
+ * (C) Copyright 2009, 2021 Intel Corporation
  */
 #ifndef _ASM_X86_INTEL_MID_H
 #define _ASM_X86_INTEL_MID_H
 
-#include <linux/sfi.h>
 #include <linux/pci.h>
-#include <linux/platform_device.h>
 
 extern int intel_mid_pci_init(void);
 extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state);
@@ -26,109 +20,4 @@ extern void intel_mid_pwr_power_off(void);
 
 extern int intel_mid_pwr_get_lss_id(struct pci_dev *pdev);
 
-extern int get_gpio_by_name(const char *name);
-extern int __init sfi_parse_mrtc(struct sfi_table_header *table);
-extern int __init sfi_parse_mtmr(struct sfi_table_header *table);
-extern int sfi_mrtc_num;
-extern struct sfi_rtc_table_entry sfi_mrtc_array[];
-
-/*
- * Here defines the array of devices platform data that IAFW would export
- * through SFI "DEVS" table, we use name and type to match the device and
- * its platform data.
- */
-struct devs_id {
-	char name[SFI_NAME_LEN + 1];
-	u8 type;
-	u8 delay;
-	u8 msic;
-	void *(*get_platform_data)(void *info);
-};
-
-#define sfi_device(i)								\
-	static const struct devs_id *const __intel_mid_sfi_##i##_dev __used	\
-	__attribute__((__section__(".x86_intel_mid_dev.init"))) = &i
-
-/**
-* struct mid_sd_board_info - template for SD device creation
-* @name:		identifies the driver
-* @bus_num:		board-specific identifier for a given SD controller
-* @max_clk:		the maximum frequency device supports
-* @platform_data:	the particular data stored there is driver-specific
-*/
-struct mid_sd_board_info {
-	char		name[SFI_NAME_LEN];
-	int		bus_num;
-	unsigned short	addr;
-	u32		max_clk;
-	void		*platform_data;
-};
-
-/*
- * Medfield is the follow-up of Moorestown, it combines two chip solution into
- * one. Other than that it also added always-on and constant tsc and lapic
- * timers. Medfield is the platform name, and the chip name is called Penwell
- * we treat Medfield/Penwell as a variant of Moorestown. Penwell can be
- * identified via MSRs.
- */
-enum intel_mid_cpu_type {
-	/* 1 was Moorestown */
-	INTEL_MID_CPU_CHIP_PENWELL = 2,
-	INTEL_MID_CPU_CHIP_CLOVERVIEW,
-	INTEL_MID_CPU_CHIP_TANGIER,
-};
-
-extern enum intel_mid_cpu_type __intel_mid_cpu_chip;
-
-#ifdef CONFIG_X86_INTEL_MID
-
-static inline enum intel_mid_cpu_type intel_mid_identify_cpu(void)
-{
-	return __intel_mid_cpu_chip;
-}
-
-static inline bool intel_mid_has_msic(void)
-{
-	return (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_PENWELL);
-}
-
-#else /* !CONFIG_X86_INTEL_MID */
-
-#define intel_mid_identify_cpu()	0
-#define intel_mid_has_msic()		0
-
-#endif /* !CONFIG_X86_INTEL_MID */
-
-enum intel_mid_timer_options {
-	INTEL_MID_TIMER_DEFAULT,
-	INTEL_MID_TIMER_APBT_ONLY,
-	INTEL_MID_TIMER_LAPIC_APBT,
-};
-
-extern enum intel_mid_timer_options intel_mid_timer_options;
-
-/* Bus Select SoC Fuse value */
-#define BSEL_SOC_FUSE_MASK		0x7
-/* FSB 133MHz */
-#define BSEL_SOC_FUSE_001		0x1
-/* FSB 100MHz */
-#define BSEL_SOC_FUSE_101		0x5
-/* FSB 83MHz */
-#define BSEL_SOC_FUSE_111		0x7
-
-#define SFI_MTMR_MAX_NUM		8
-#define SFI_MRTC_MAX			8
-
-extern void intel_scu_devices_create(void);
-extern void intel_scu_devices_destroy(void);
-
-/* VRTC timer */
-#define MRST_VRTC_MAP_SZ		1024
-/* #define MRST_VRTC_PGOFFSET		0xc00 */
-
-extern void intel_mid_rtc_init(void);
-
-/* The offset for the mapping of global gpio pin to irq */
-#define INTEL_MID_IRQ_OFFSET		0x100
-
 #endif /* _ASM_X86_INTEL_MID_H */
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
index ae26df1c2789..695f87efbeb8 100644
--- a/arch/x86/include/asm/intel_ds.h
+++ b/arch/x86/include/asm/intel_ds.h
@@ -4,11 +4,21 @@
 #include <linux/percpu-defs.h>
 
 #define BTS_BUFFER_SIZE		(PAGE_SIZE << 4)
-#define PEBS_BUFFER_SIZE	(PAGE_SIZE << 4)
+#define PEBS_BUFFER_SHIFT	4
+#define PEBS_BUFFER_SIZE	(PAGE_SIZE << PEBS_BUFFER_SHIFT)
+
+/*
+ * The largest PEBS record could consume a page, ensure
+ * a record at least can be written after triggering PMI.
+ */
+#define ARCH_PEBS_THRESH_MULTI	((PEBS_BUFFER_SIZE - PAGE_SIZE) >> PEBS_BUFFER_SHIFT)
+#define ARCH_PEBS_THRESH_SINGLE	1
 
 /* The maximal number of PEBS events: */
-#define MAX_PEBS_EVENTS		8
-#define MAX_FIXED_PEBS_EVENTS	3
+#define MAX_PEBS_EVENTS_FMT4	8
+#define MAX_PEBS_EVENTS		32
+#define MAX_PEBS_EVENTS_MASK	GENMASK_ULL(MAX_PEBS_EVENTS - 1, 0)
+#define MAX_FIXED_PEBS_EVENTS	16
 
 /*
  * A debug store configuration.
diff --git a/arch/x86/include/asm/intel_mid_vrtc.h b/arch/x86/include/asm/intel_mid_vrtc.h
deleted file mode 100644
index 0b44b1abe4d9..000000000000
--- a/arch/x86/include/asm/intel_mid_vrtc.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _INTEL_MID_VRTC_H
-#define _INTEL_MID_VRTC_H
-
-extern unsigned char vrtc_cmos_read(unsigned char reg);
-extern void vrtc_cmos_write(unsigned char val, unsigned char reg);
-extern void vrtc_get_time(struct timespec64 *now);
-extern int vrtc_set_mmss(const struct timespec64 *now);
-
-#endif
diff --git a/arch/x86/include/asm/intel_pconfig.h b/arch/x86/include/asm/intel_pconfig.h
deleted file mode 100644
index 3cb002b1d0f9..000000000000
--- a/arch/x86/include/asm/intel_pconfig.h
+++ /dev/null
@@ -1,65 +0,0 @@
-#ifndef	_ASM_X86_INTEL_PCONFIG_H
-#define	_ASM_X86_INTEL_PCONFIG_H
-
-#include <asm/asm.h>
-#include <asm/processor.h>
-
-enum pconfig_target {
-	INVALID_TARGET	= 0,
-	MKTME_TARGET	= 1,
-	PCONFIG_TARGET_NR
-};
-
-int pconfig_target_supported(enum pconfig_target target);
-
-enum pconfig_leaf {
-	MKTME_KEY_PROGRAM	= 0,
-	PCONFIG_LEAF_INVALID,
-};
-
-#define PCONFIG ".byte 0x0f, 0x01, 0xc5"
-
-/* Defines and structure for MKTME_KEY_PROGRAM of PCONFIG instruction */
-
-/* mktme_key_program::keyid_ctrl COMMAND, bits [7:0] */
-#define MKTME_KEYID_SET_KEY_DIRECT	0
-#define MKTME_KEYID_SET_KEY_RANDOM	1
-#define MKTME_KEYID_CLEAR_KEY		2
-#define MKTME_KEYID_NO_ENCRYPT		3
-
-/* mktme_key_program::keyid_ctrl ENC_ALG, bits [23:8] */
-#define MKTME_AES_XTS_128	(1 << 8)
-
-/* Return codes from the PCONFIG MKTME_KEY_PROGRAM */
-#define MKTME_PROG_SUCCESS	0
-#define MKTME_INVALID_PROG_CMD	1
-#define MKTME_ENTROPY_ERROR	2
-#define MKTME_INVALID_KEYID	3
-#define MKTME_INVALID_ENC_ALG	4
-#define MKTME_DEVICE_BUSY	5
-
-/* Hardware requires the structure to be 256 byte alinged. Otherwise #GP(0). */
-struct mktme_key_program {
-	u16 keyid;
-	u32 keyid_ctrl;
-	u8 __rsvd[58];
-	u8 key_field_1[64];
-	u8 key_field_2[64];
-} __packed __aligned(256);
-
-static inline int mktme_key_program(struct mktme_key_program *key_program)
-{
-	unsigned long rax = MKTME_KEY_PROGRAM;
-
-	if (!pconfig_target_supported(MKTME_TARGET))
-		return -ENXIO;
-
-	asm volatile(PCONFIG
-		: "=a" (rax), "=b" (key_program)
-		: "0" (rax), "1" (key_program)
-		: "memory", "cc");
-
-	return rax;
-}
-
-#endif	/* _ASM_X86_INTEL_PCONFIG_H */
diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h
deleted file mode 100644
index 9e7adcdbe031..000000000000
--- a/arch/x86/include/asm/intel_pmc_ipc.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_INTEL_PMC_IPC_H_
-#define  _ASM_X86_INTEL_PMC_IPC_H_
-
-/* Commands */
-#define PMC_IPC_PMIC_ACCESS		0xFF
-#define		PMC_IPC_PMIC_ACCESS_READ	0x0
-#define		PMC_IPC_PMIC_ACCESS_WRITE	0x1
-#define PMC_IPC_USB_PWR_CTRL		0xF0
-#define PMC_IPC_PMIC_BLACKLIST_SEL	0xEF
-#define PMC_IPC_PHY_CONFIG		0xEE
-#define PMC_IPC_NORTHPEAK_CTRL		0xED
-#define PMC_IPC_PM_DEBUG		0xEC
-#define PMC_IPC_PMC_TELEMTRY		0xEB
-#define PMC_IPC_PMC_FW_MSG_CTRL		0xEA
-
-/* IPC return code */
-#define IPC_ERR_NONE			0
-#define IPC_ERR_CMD_NOT_SUPPORTED	1
-#define IPC_ERR_CMD_NOT_SERVICED	2
-#define IPC_ERR_UNABLE_TO_SERVICE	3
-#define IPC_ERR_CMD_INVALID		4
-#define IPC_ERR_CMD_FAILED		5
-#define IPC_ERR_EMSECURITY		6
-#define IPC_ERR_UNSIGNEDKERNEL		7
-
-/* GCR reg offsets from gcr base*/
-#define PMC_GCR_PMC_CFG_REG		0x08
-#define PMC_GCR_TELEM_DEEP_S0IX_REG	0x78
-#define PMC_GCR_TELEM_SHLW_S0IX_REG	0x80
-
-#if IS_ENABLED(CONFIG_INTEL_PMC_IPC)
-
-int intel_pmc_ipc_simple_command(int cmd, int sub);
-int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen,
-		u32 *out, u32 outlen, u32 dptr, u32 sptr);
-int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen,
-		u32 *out, u32 outlen);
-int intel_pmc_s0ix_counter_read(u64 *data);
-int intel_pmc_gcr_read(u32 offset, u32 *data);
-int intel_pmc_gcr_read64(u32 offset, u64 *data);
-int intel_pmc_gcr_write(u32 offset, u32 data);
-int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val);
-
-#else
-
-static inline int intel_pmc_ipc_simple_command(int cmd, int sub)
-{
-	return -EINVAL;
-}
-
-static inline int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen,
-		u32 *out, u32 outlen, u32 dptr, u32 sptr)
-{
-	return -EINVAL;
-}
-
-static inline int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen,
-		u32 *out, u32 outlen)
-{
-	return -EINVAL;
-}
-
-static inline int intel_pmc_s0ix_counter_read(u64 *data)
-{
-	return -EINVAL;
-}
-
-static inline int intel_pmc_gcr_read(u32 offset, u32 *data)
-{
-	return -EINVAL;
-}
-
-static inline int intel_pmc_gcr_read64(u32 offset, u64 *data)
-{
-	return -EINVAL;
-}
-
-static inline int intel_pmc_gcr_write(u32 offset, u32 data)
-{
-	return -EINVAL;
-}
-
-static inline int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val)
-{
-	return -EINVAL;
-}
-
-#endif /*CONFIG_INTEL_PMC_IPC*/
-
-#endif
diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h
index 634f99b1dc22..c796e9bc98b6 100644
--- a/arch/x86/include/asm/intel_pt.h
+++ b/arch/x86/include/asm/intel_pt.h
@@ -3,7 +3,7 @@
 #define _ASM_X86_INTEL_PT_H
 
 #define PT_CPUID_LEAVES		2
-#define PT_CPUID_REGS_NUM	4 /* number of regsters (eax, ebx, ecx, edx) */
+#define PT_CPUID_REGS_NUM	4 /* number of registers (eax, ebx, ecx, edx) */
 
 enum pt_capabilities {
 	PT_CAP_max_subleaf = 0,
@@ -13,6 +13,8 @@ enum pt_capabilities {
 	PT_CAP_mtc,
 	PT_CAP_ptwrite,
 	PT_CAP_power_event_trace,
+	PT_CAP_event_trace,
+	PT_CAP_tnt_disable,
 	PT_CAP_topa_output,
 	PT_CAP_topa_multiple_entries,
 	PT_CAP_single_range_output,
@@ -28,10 +30,12 @@ enum pt_capabilities {
 void cpu_emergency_stop_pt(void);
 extern u32 intel_pt_validate_hw_cap(enum pt_capabilities cap);
 extern u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities cap);
+extern int is_intel_pt_event(struct perf_event *event);
 #else
 static inline void cpu_emergency_stop_pt(void) {}
 static inline u32 intel_pt_validate_hw_cap(enum pt_capabilities cap) { return 0; }
 static inline u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability) { return 0; }
+static inline int is_intel_pt_event(struct perf_event *event) { return 0; }
 #endif
 
 #endif /* _ASM_X86_INTEL_PT_H */
diff --git a/arch/x86/include/asm/intel_punit_ipc.h b/arch/x86/include/asm/intel_punit_ipc.h
index ce16da719596..1f9b5d225912 100644
--- a/arch/x86/include/asm/intel_punit_ipc.h
+++ b/arch/x86/include/asm/intel_punit_ipc.h
@@ -80,17 +80,10 @@ typedef enum {
 
 #if IS_ENABLED(CONFIG_INTEL_PUNIT_IPC)
 
-int intel_punit_ipc_simple_command(int cmd, int para1, int para2);
 int intel_punit_ipc_command(u32 cmd, u32 para1, u32 para2, u32 *in, u32 *out);
 
 #else
 
-static inline int intel_punit_ipc_simple_command(int cmd,
-						  int para1, int para2)
-{
-	return -ENODEV;
-}
-
 static inline int intel_punit_ipc_command(u32 cmd, u32 para1, u32 para2,
 					  u32 *in, u32 *out)
 {
diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h
deleted file mode 100644
index 4a8c6e817398..000000000000
--- a/arch/x86/include/asm/intel_scu_ipc.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_INTEL_SCU_IPC_H_
-#define  _ASM_X86_INTEL_SCU_IPC_H_
-
-#include <linux/notifier.h>
-
-#define IPCMSG_INDIRECT_READ	0x02
-#define IPCMSG_INDIRECT_WRITE	0x05
-
-#define IPCMSG_COLD_OFF		0x80	/* Only for Tangier */
-
-#define IPCMSG_WARM_RESET	0xF0
-#define IPCMSG_COLD_RESET	0xF1
-#define IPCMSG_SOFT_RESET	0xF2
-#define IPCMSG_COLD_BOOT	0xF3
-
-#define IPCMSG_VRTC		0xFA	 /* Set vRTC device */
-	/* Command id associated with message IPCMSG_VRTC */
-	#define IPC_CMD_VRTC_SETTIME      1 /* Set time */
-	#define IPC_CMD_VRTC_SETALARM     2 /* Set alarm */
-
-/* Read single register */
-int intel_scu_ipc_ioread8(u16 addr, u8 *data);
-
-/* Read two sequential registers */
-int intel_scu_ipc_ioread16(u16 addr, u16 *data);
-
-/* Read four sequential registers */
-int intel_scu_ipc_ioread32(u16 addr, u32 *data);
-
-/* Read a vector */
-int intel_scu_ipc_readv(u16 *addr, u8 *data, int len);
-
-/* Write single register */
-int intel_scu_ipc_iowrite8(u16 addr, u8 data);
-
-/* Write two sequential registers */
-int intel_scu_ipc_iowrite16(u16 addr, u16 data);
-
-/* Write four sequential registers */
-int intel_scu_ipc_iowrite32(u16 addr, u32 data);
-
-/* Write a vector */
-int intel_scu_ipc_writev(u16 *addr, u8 *data, int len);
-
-/* Update single register based on the mask */
-int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask);
-
-/* Issue commands to the SCU with or without data */
-int intel_scu_ipc_simple_command(int cmd, int sub);
-int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen,
-			  u32 *out, int outlen);
-int intel_scu_ipc_raw_command(int cmd, int sub, u8 *in, int inlen,
-			      u32 *out, int outlen, u32 dptr, u32 sptr);
-
-/* I2C control api */
-int intel_scu_ipc_i2c_cntrl(u32 addr, u32 *data);
-
-/* Update FW version */
-int intel_scu_ipc_fw_update(u8 *buffer, u32 length);
-
-extern struct blocking_notifier_head intel_scu_notifier;
-
-static inline void intel_scu_notifier_add(struct notifier_block *nb)
-{
-	blocking_notifier_chain_register(&intel_scu_notifier, nb);
-}
-
-static inline void intel_scu_notifier_remove(struct notifier_block *nb)
-{
-	blocking_notifier_chain_unregister(&intel_scu_notifier, nb);
-}
-
-static inline int intel_scu_notifier_post(unsigned long v, void *p)
-{
-	return blocking_notifier_call_chain(&intel_scu_notifier, v, p);
-}
-
-#define		SCU_AVAILABLE		1
-#define		SCU_DOWN		2
-
-#endif
diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h
index 85029b58d0cd..944637a4e6de 100644
--- a/arch/x86/include/asm/intel_telemetry.h
+++ b/arch/x86/include/asm/intel_telemetry.h
@@ -1,17 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Intel SOC Telemetry Driver Header File
  * Copyright (C) 2015, Intel Corporation.
  * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
  */
 #ifndef INTEL_TELEMETRY_H
 #define INTEL_TELEMETRY_H
@@ -19,6 +10,8 @@
 #define TELEM_MAX_EVENTS_SRAM		28
 #define TELEM_MAX_OS_ALLOCATED_EVENTS	20
 
+#include <linux/platform_data/x86/intel_scu_ipc.h>
+
 enum telemetry_unit {
 	TELEM_PSS = 0,
 	TELEM_IOSS,
@@ -49,13 +42,10 @@ struct telemetry_evtmap {
 struct telemetry_unit_config {
 	struct telemetry_evtmap *telem_evts;
 	void __iomem *regmap;
-	u32 ssram_base_addr;
 	u8 ssram_evts_used;
 	u8 curr_period;
 	u8 max_period;
 	u8 min_period;
-	u32 ssram_size;
-
 };
 
 struct telemetry_plt_config {
@@ -63,22 +53,12 @@ struct telemetry_plt_config {
 	struct telemetry_unit_config ioss_config;
 	struct mutex telem_trace_lock;
 	struct mutex telem_lock;
+	struct intel_pmc_dev *pmc;
+	struct intel_scu_ipc_dev *scu;
 	bool telem_in_use;
 };
 
 struct telemetry_core_ops {
-	int (*get_sampling_period)(u8 *pss_min_period, u8 *pss_max_period,
-				   u8 *ioss_min_period, u8 *ioss_max_period);
-
-	int (*get_eventconfig)(struct telemetry_evtconfig *pss_evtconfig,
-			       struct telemetry_evtconfig *ioss_evtconfig,
-			       int pss_len, int ioss_len);
-
-	int (*update_events)(struct telemetry_evtconfig pss_evtconfig,
-			     struct telemetry_evtconfig ioss_evtconfig);
-
-	int (*set_sampling_period)(u8 pss_period, u8 ioss_period);
-
 	int (*get_trace_verbosity)(enum telemetry_unit telem_unit,
 				   u32 *verbosity);
 
@@ -92,11 +72,6 @@ struct telemetry_core_ops {
 	int (*read_eventlog)(enum telemetry_unit telem_unit,
 			     struct telemetry_evtlog *evtlog,
 			     int len, int log_all_evts);
-
-	int (*add_events)(u8 num_pss_evts, u8 num_ioss_evts,
-			  u32 *pss_evtmap, u32 *ioss_evtmap);
-
-	int (*reset_events)(void);
 };
 
 int telemetry_set_pltdata(const struct telemetry_core_ops *ops,
@@ -104,40 +79,20 @@ int telemetry_set_pltdata(const struct telemetry_core_ops *ops,
 
 int telemetry_clear_pltdata(void);
 
-int telemetry_pltconfig_valid(void);
+struct telemetry_plt_config *telemetry_get_pltdata(void);
 
 int telemetry_get_evtname(enum telemetry_unit telem_unit,
 			  const char **name, int len);
 
-int telemetry_update_events(struct telemetry_evtconfig pss_evtconfig,
-			    struct telemetry_evtconfig ioss_evtconfig);
-
-int telemetry_add_events(u8 num_pss_evts, u8 num_ioss_evts,
-			 u32 *pss_evtmap, u32 *ioss_evtmap);
-
-int telemetry_reset_events(void);
-
-int telemetry_get_eventconfig(struct telemetry_evtconfig *pss_config,
-			      struct telemetry_evtconfig *ioss_config,
-			      int pss_len, int ioss_len);
-
 int telemetry_read_events(enum telemetry_unit telem_unit,
 			  struct telemetry_evtlog *evtlog, int len);
 
-int telemetry_raw_read_events(enum telemetry_unit telem_unit,
-			      struct telemetry_evtlog *evtlog, int len);
-
 int telemetry_read_eventlog(enum telemetry_unit telem_unit,
 			    struct telemetry_evtlog *evtlog, int len);
 
 int telemetry_raw_read_eventlog(enum telemetry_unit telem_unit,
 				struct telemetry_evtlog *evtlog, int len);
 
-int telemetry_get_sampling_period(u8 *pss_min_period, u8 *pss_max_period,
-				  u8 *ioss_min_period, u8 *ioss_max_period);
-
-int telemetry_set_sampling_period(u8 pss_period, u8 ioss_period);
-
 int telemetry_set_trace_verbosity(enum telemetry_unit telem_unit,
 				  u32 verbosity);
 
diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h
index 989cfa86de85..734482afbf81 100644
--- a/arch/x86/include/asm/invpcid.h
+++ b/arch/x86/include/asm/invpcid.h
@@ -12,12 +12,9 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr,
 	 * stale TLB entries and, especially if we're flushing global
 	 * mappings, we don't want the compiler to reorder any subsequent
 	 * memory accesses before the TLB flush.
-	 *
-	 * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
-	 * invpcid (%rcx), %rax in long mode.
 	 */
-	asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
-		      : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+	asm volatile("invpcid %[desc], %[type]"
+		     :: [desc] "m" (desc), [type] "r" (type) : "memory");
 }
 
 #define INVPCID_TYPE_INDIV_ADDR		0
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 686247db3106..ca309a3227c7 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -35,14 +35,14 @@
   *  - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
   */
 
-#define ARCH_HAS_IOREMAP_WC
-#define ARCH_HAS_IOREMAP_WT
-
 #include <linux/string.h>
 #include <linux/compiler.h>
+#include <linux/cc_platform.h>
 #include <asm/page.h>
 #include <asm/early_ioremap.h>
 #include <asm/pgtable_types.h>
+#include <asm/shared/io.h>
+#include <asm/special_insns.h>
 
 #define build_mmio_read(name, size, type, reg, barrier) \
 static inline type name(const volatile void __iomem *addr) \
@@ -90,8 +90,6 @@ build_mmio_write(__writel, "l", unsigned int, "r", )
 #define __raw_writew __writew
 #define __raw_writel __writel
 
-#define mmiowb() barrier()
-
 #ifdef CONFIG_X86_64
 
 build_mmio_read(readq, "q", u64, "=r", :"memory")
@@ -154,46 +152,32 @@ static inline void *phys_to_virt(phys_addr_t address)
 #define phys_to_virt phys_to_virt
 
 /*
- * Change "struct page" to physical address.
- */
-#define page_to_phys(page)    ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
-
-/*
  * ISA I/O bus memory addresses are 1:1 with the physical address.
  * However, we truncate the address to unsigned int to avoid undesirable
- * promitions in legacy drivers.
+ * promotions in legacy drivers.
  */
 static inline unsigned int isa_virt_to_bus(volatile void *address)
 {
 	return (unsigned int)virt_to_phys(address);
 }
-#define isa_page_to_bus(page)	((unsigned int)page_to_phys(page))
 #define isa_bus_to_virt		phys_to_virt
 
 /*
- * However PCI ones are not necessarily 1:1 and therefore these interfaces
- * are forbidden in portable PCI drivers.
- *
- * Allow them on x86 for legacy drivers, though.
- */
-#define virt_to_bus virt_to_phys
-#define bus_to_virt phys_to_virt
-
-/*
  * The default ioremap() behavior is non-cached; if you need something
  * else, you probably want one of the following.
  */
-extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
-#define ioremap_nocache ioremap_nocache
 extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);
 #define ioremap_uc ioremap_uc
 extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
 #define ioremap_cache ioremap_cache
-extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val);
+extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,  pgprot_t prot);
 #define ioremap_prot ioremap_prot
 extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size);
 #define ioremap_encrypted ioremap_encrypted
 
+void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags);
+#define arch_memremap_wb arch_memremap_wb
+
 /**
  * ioremap     -   map bus memory into CPU space
  * @offset:    bus address of the memory
@@ -208,17 +192,12 @@ extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long
  * If the area you are trying to map is a PCI BAR you should have a
  * look at pci_iomap().
  */
-static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
-{
-	return ioremap_nocache(offset, size);
-}
+void __iomem *ioremap(resource_size_t offset, unsigned long size);
 #define ioremap ioremap
 
 extern void iounmap(volatile void __iomem *addr);
 #define iounmap iounmap
 
-extern void set_iounmap_nonlazy(void);
-
 #ifdef __KERNEL__
 
 void memcpy_fromio(void *, const volatile void __iomem *, size_t);
@@ -229,7 +208,22 @@ void memset_io(volatile void __iomem *, int, size_t);
 #define memcpy_toio memcpy_toio
 #define memset_io memset_io
 
-#include <asm-generic/iomap.h>
+#ifdef CONFIG_X86_64
+/*
+ * Commit 0f07496144c2 ("[PATCH] Add faster __iowrite32_copy routine for
+ * x86_64") says that circa 2006 rep movsl is noticeably faster than a copy
+ * loop.
+ */
+static inline void __iowrite32_copy(void __iomem *to, const void *from,
+				    size_t count)
+{
+	asm volatile("rep movsl"
+		     : "=&c"(count), "=&D"(to), "=&S"(from)
+		     : "0"(count), "1"(to), "2"(from)
+		     : "memory");
+}
+#define __iowrite32_copy __iowrite32_copy
+#endif
 
 /*
  * ISA space is 'always mapped' on a typical x86 system, no need to
@@ -264,88 +258,57 @@ static inline void slow_down_io(void)
 
 #endif
 
-#ifdef CONFIG_AMD_MEM_ENCRYPT
-#include <linux/jump_label.h>
-
-extern struct static_key_false sev_enable_key;
-static inline bool sev_key_active(void)
-{
-	return static_branch_unlikely(&sev_enable_key);
-}
-
-#else /* !CONFIG_AMD_MEM_ENCRYPT */
-
-static inline bool sev_key_active(void) { return false; }
-
-#endif /* CONFIG_AMD_MEM_ENCRYPT */
-
-#define BUILDIO(bwl, bw, type)						\
-static inline void out##bwl(unsigned type value, int port)		\
-{									\
-	asm volatile("out" #bwl " %" #bw "0, %w1"			\
-		     : : "a"(value), "Nd"(port));			\
-}									\
-									\
-static inline unsigned type in##bwl(int port)				\
-{									\
-	unsigned type value;						\
-	asm volatile("in" #bwl " %w1, %" #bw "0"			\
-		     : "=a"(value) : "Nd"(port));			\
-	return value;							\
-}									\
-									\
-static inline void out##bwl##_p(unsigned type value, int port)		\
+#define BUILDIO(bwl, type)						\
+static inline void out##bwl##_p(type value, u16 port)			\
 {									\
 	out##bwl(value, port);						\
 	slow_down_io();							\
 }									\
 									\
-static inline unsigned type in##bwl##_p(int port)			\
+static inline type in##bwl##_p(u16 port)				\
 {									\
-	unsigned type value = in##bwl(port);				\
+	type value = in##bwl(port);					\
 	slow_down_io();							\
 	return value;							\
 }									\
 									\
-static inline void outs##bwl(int port, const void *addr, unsigned long count) \
+static inline void outs##bwl(u16 port, const void *addr, unsigned long count) \
 {									\
-	if (sev_key_active()) {						\
-		unsigned type *value = (unsigned type *)addr;		\
+	if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) {		\
+		type *value = (type *)addr;				\
 		while (count) {						\
 			out##bwl(*value, port);				\
 			value++;					\
 			count--;					\
 		}							\
 	} else {							\
-		asm volatile("rep; outs" #bwl				\
+		asm volatile("rep outs" #bwl				\
 			     : "+S"(addr), "+c"(count)			\
 			     : "d"(port) : "memory");			\
 	}								\
 }									\
 									\
-static inline void ins##bwl(int port, void *addr, unsigned long count)	\
+static inline void ins##bwl(u16 port, void *addr, unsigned long count)	\
 {									\
-	if (sev_key_active()) {						\
-		unsigned type *value = (unsigned type *)addr;		\
+	if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) {		\
+		type *value = (type *)addr;				\
 		while (count) {						\
 			*value = in##bwl(port);				\
 			value++;					\
 			count--;					\
 		}							\
 	} else {							\
-		asm volatile("rep; ins" #bwl				\
+		asm volatile("rep ins" #bwl				\
 			     : "+D"(addr), "+c"(count)			\
 			     : "d"(port) : "memory");			\
 	}								\
 }
 
-BUILDIO(b, b, char)
-BUILDIO(w, w, short)
-BUILDIO(l, , int)
+BUILDIO(b, u8)
+BUILDIO(w, u16)
+BUILDIO(l, u32)
+#undef BUILDIO
 
-#define inb inb
-#define inw inw
-#define inl inl
 #define inb_p inb_p
 #define inw_p inw_p
 #define inl_p inl_p
@@ -353,9 +316,6 @@ BUILDIO(l, , int)
 #define insw insw
 #define insl insl
 
-#define outb outb
-#define outw outw
-#define outl outl
 #define outb_p outb_p
 #define outw_p outw_p
 #define outl_p outl_p
@@ -399,6 +359,7 @@ extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size)
 #define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc
 #endif
 
+#ifdef CONFIG_AMD_MEM_ENCRYPT
 extern bool arch_memremap_can_ram_remap(resource_size_t offset,
 					unsigned long size,
 					unsigned long flags);
@@ -406,5 +367,37 @@ extern bool arch_memremap_can_ram_remap(resource_size_t offset,
 
 extern bool phys_mem_access_encrypted(unsigned long phys_addr,
 				      unsigned long size);
+#else
+static inline bool phys_mem_access_encrypted(unsigned long phys_addr,
+					     unsigned long size)
+{
+	return true;
+}
+#endif
+
+/**
+ * iosubmit_cmds512 - copy data to single MMIO location, in 512-bit units
+ * @dst: destination, in MMIO space (must be 512-bit aligned)
+ * @src: source
+ * @count: number of 512 bits quantities to submit
+ *
+ * Submit data from kernel space to MMIO space, in units of 512 bits at a
+ * time.  Order of access is not guaranteed, nor is a memory barrier
+ * performed afterwards.
+ *
+ * Warning: Do not use this helper unless your driver has checked that the CPU
+ * instruction is supported on the platform.
+ */
+static inline void iosubmit_cmds512(void __iomem *dst, const void *src,
+				    size_t count)
+{
+	const u8 *from = src;
+	const u8 *end = from + count * 64;
+
+	while (from < end) {
+		movdir64b_io(dst, from);
+		from += 64;
+	}
+}
 
 #endif /* _ASM_X86_IO_H */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index fd20a2334885..0d806513c4b3 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -13,15 +13,6 @@
  * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
  */
 
-/* I/O Unit Redirection Table */
-#define IO_APIC_REDIR_VECTOR_MASK	0x000FF
-#define IO_APIC_REDIR_DEST_LOGICAL	0x00800
-#define IO_APIC_REDIR_DEST_PHYSICAL	0x00000
-#define IO_APIC_REDIR_SEND_PENDING	(1 << 12)
-#define IO_APIC_REDIR_REMOTE_IRR	(1 << 14)
-#define IO_APIC_REDIR_LEVEL_TRIGGER	(1 << 15)
-#define IO_APIC_REDIR_MASKED		(1 << 16)
-
 /*
  * The structure of the IO-APIC:
  */
@@ -65,53 +56,40 @@ union IO_APIC_reg_03 {
 };
 
 struct IO_APIC_route_entry {
-	__u32	vector		:  8,
-		delivery_mode	:  3,	/* 000: FIXED
-					 * 001: lowest prio
-					 * 111: ExtINT
-					 */
-		dest_mode	:  1,	/* 0: physical, 1: logical */
-		delivery_status	:  1,
-		polarity	:  1,
-		irr		:  1,
-		trigger		:  1,	/* 0: edge, 1: level */
-		mask		:  1,	/* 0: enabled, 1: disabled */
-		__reserved_2	: 15;
-
-	__u32	__reserved_3	: 24,
-		dest		:  8;
-} __attribute__ ((packed));
-
-struct IR_IO_APIC_route_entry {
-	__u64	vector		: 8,
-		zero		: 3,
-		index2		: 1,
-		delivery_status : 1,
-		polarity	: 1,
-		irr		: 1,
-		trigger		: 1,
-		mask		: 1,
-		reserved	: 31,
-		format		: 1,
-		index		: 15;
+	union {
+		struct {
+			u64	vector			:  8,
+				delivery_mode		:  3,
+				dest_mode_logical	:  1,
+				delivery_status		:  1,
+				active_low		:  1,
+				irr			:  1,
+				is_level		:  1,
+				masked			:  1,
+				reserved_0		: 15,
+				reserved_1		: 17,
+				virt_destid_8_14	:  7,
+				destid_0_7		:  8;
+		};
+		struct {
+			u64	ir_shared_0		:  8,
+				ir_zero			:  3,
+				ir_index_15		:  1,
+				ir_shared_1		:  5,
+				ir_reserved_0		: 31,
+				ir_format		:  1,
+				ir_index_0_14		: 15;
+		};
+		struct {
+			u64	w1			: 32,
+				w2			: 32;
+		};
+	};
 } __attribute__ ((packed));
 
 struct irq_alloc_info;
 struct ioapic_domain_cfg;
 
-#define IOAPIC_AUTO			-1
-#define IOAPIC_EDGE			0
-#define IOAPIC_LEVEL			1
-
-#define IOAPIC_MASKED			1
-#define IOAPIC_UNMASKED			0
-
-#define IOAPIC_POL_HIGH			0
-#define IOAPIC_POL_LOW			1
-
-#define IOAPIC_DEST_MODE_PHYSICAL	0
-#define IOAPIC_DEST_MODE_LOGICAL	1
-
 #define	IOAPIC_MAP_ALLOC		0x1
 #define	IOAPIC_MAP_CHECK		0x2
 
@@ -131,8 +109,8 @@ extern int mp_irq_entries;
 /* MP IRQ source entries */
 extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
 
-/* 1 if "noapic" boot option passed */
-extern int skip_ioapic_setup;
+/* True if "noapic" boot option passed */
+extern bool ioapic_is_disabled;
 
 /* 1 if "noapic" boot option passed */
 extern int noioapicquirk;
@@ -151,7 +129,7 @@ extern unsigned long io_apic_irqs;
  * assignment of PCI IRQ's.
  */
 #define io_apic_assign_pci_irqs \
-	(mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
+	(mp_irq_entries && !ioapic_is_disabled && io_apic_irqs)
 
 struct irq_cfg;
 extern void ioapic_insert_resources(void);
@@ -162,7 +140,6 @@ extern void mask_ioapic_entries(void);
 extern int restore_ioapic_entries(void);
 
 extern void setup_ioapic_ids_from_mpc(void);
-extern void setup_ioapic_ids_from_mpc_nocheck(void);
 
 extern int mp_find_ioapic(u32 gsi);
 extern int mp_find_ioapic_pin(int ioapic, u32 gsi);
@@ -201,6 +178,7 @@ extern void print_IO_APICs(void);
 #define IO_APIC_IRQ(x)		0
 #define io_apic_assign_pci_irqs 0
 #define setup_ioapic_ids_from_mpc x86_init_noop
+#define nr_ioapics		(0)
 static inline void ioapic_insert_resources(void) { }
 static inline int arch_early_ioapic_init(void) { return 0; }
 static inline void print_IO_APICs(void) {}
diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h
new file mode 100644
index 000000000000..7f080f5c7def
--- /dev/null
+++ b/arch/x86/include/asm/io_bitmap.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_IOBITMAP_H
+#define _ASM_X86_IOBITMAP_H
+
+#include <linux/refcount.h>
+#include <asm/processor.h>
+
+struct io_bitmap {
+	u64		sequence;
+	refcount_t	refcnt;
+	/* The maximum number of bytes to copy so all zero bits are covered */
+	unsigned int	max;
+	unsigned long	bitmap[IO_BITMAP_LONGS];
+};
+
+struct task_struct;
+
+#ifdef CONFIG_X86_IOPL_IOPERM
+void io_bitmap_share(struct task_struct *tsk);
+void io_bitmap_exit(struct task_struct *tsk);
+
+static inline void native_tss_invalidate_io_bitmap(void)
+{
+	/*
+	 * Invalidate the I/O bitmap by moving io_bitmap_base outside the
+	 * TSS limit so any subsequent I/O access from user space will
+	 * trigger a #GP.
+	 *
+	 * This is correct even when VMEXIT rewrites the TSS limit
+	 * to 0x67 as the only requirement is that the base points
+	 * outside the limit.
+	 */
+	this_cpu_write(cpu_tss_rw.x86_tss.io_bitmap_base,
+		       IO_BITMAP_OFFSET_INVALID);
+}
+
+void native_tss_update_io_bitmap(void);
+
+#ifdef CONFIG_PARAVIRT_XXL
+#include <asm/paravirt.h>
+#else
+#define tss_update_io_bitmap native_tss_update_io_bitmap
+#define tss_invalidate_io_bitmap native_tss_invalidate_io_bitmap
+#endif
+
+#else
+static inline void io_bitmap_share(struct task_struct *tsk) { }
+static inline void io_bitmap_exit(struct task_struct *tsk) { }
+static inline void tss_update_io_bitmap(void) { }
+#endif
+
+#endif
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
index 363e33eb6ec1..e2de092fc38c 100644
--- a/arch/x86/include/asm/iomap.h
+++ b/arch/x86/include/asm/iomap.h
@@ -1,41 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 #ifndef _ASM_X86_IOMAP_H
 #define _ASM_X86_IOMAP_H
 
 /*
  * Copyright © 2008 Ingo Molnar
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
  */
 
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/uaccess.h>
+#include <linux/highmem.h>
 #include <asm/cacheflush.h>
-#include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 
-void __iomem *
-iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
+void __iomem *__iomap_local_pfn_prot(unsigned long pfn, pgprot_t prot);
 
-void
-iounmap_atomic(void __iomem *kvaddr);
+int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot);
 
-int
-iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot);
-
-void
-iomap_free(resource_size_t base, unsigned long size);
+void iomap_free(resource_size_t base, unsigned long size);
 
 #endif /* _ASM_X86_IOMAP_H */
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index baedab8ac538..3be2451e7bc8 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -2,11 +2,39 @@
 #ifndef _ASM_X86_IOMMU_H
 #define _ASM_X86_IOMMU_H
 
+#include <linux/acpi.h>
+
+#include <asm/e820/api.h>
+
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
-extern int iommu_pass_through;
+extern int iommu_merge;
+extern int panic_on_overflow;
+extern bool amd_iommu_snp_en;
+
+#ifdef CONFIG_SWIOTLB
+extern bool x86_swiotlb_enable;
+#else
+#define x86_swiotlb_enable false
+#endif
 
 /* 10 seconds */
 #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
 
+static inline int __init
+arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
+{
+	u64 start = rmrr->base_address;
+	u64 end = rmrr->end_address + 1;
+	int entry_type;
+
+	entry_type = e820__get_entry_type(start, end);
+	if (entry_type == E820_TYPE_RESERVED || entry_type == E820_TYPE_NVS)
+		return 0;
+
+	pr_err(FW_BUG "No firmware reserved region can cover this RMRR [%#018Lx-%#018Lx], contact BIOS vendor for fixes\n",
+	       start, end - 1);
+	return -EINVAL;
+}
+
 #endif /* _ASM_X86_IOMMU_H */
diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h
deleted file mode 100644
index 1fb3fd1a83c2..000000000000
--- a/arch/x86/include/asm/iommu_table.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_IOMMU_TABLE_H
-#define _ASM_X86_IOMMU_TABLE_H
-
-#include <asm/swiotlb.h>
-
-/*
- * History lesson:
- * The execution chain of IOMMUs in 2.6.36 looks as so:
- *
- *            [xen-swiotlb]
- *                 |
- *         +----[swiotlb *]--+
- *        /         |         \
- *       /          |          \
- *    [GART]     [Calgary]  [Intel VT-d]
- *     /
- *    /
- * [AMD-Vi]
- *
- * *: if SWIOTLB detected 'iommu=soft'/'swiotlb=force' it would skip
- * over the rest of IOMMUs and unconditionally initialize the SWIOTLB.
- * Also it would surreptitiously initialize set the swiotlb=1 if there were
- * more than 4GB and if the user did not pass in 'iommu=off'. The swiotlb
- * flag would be turned off by all IOMMUs except the Calgary one.
- *
- * The IOMMU_INIT* macros allow a similar tree (or more complex if desired)
- * to be built by defining who we depend on.
- *
- * And all that needs to be done is to use one of the macros in the IOMMU
- * and the pci-dma.c will take care of the rest.
- */
-
-struct iommu_table_entry {
-	initcall_t	detect;
-	initcall_t	depend;
-	void		(*early_init)(void); /* No memory allocate available. */
-	void		(*late_init)(void); /* Yes, can allocate memory. */
-#define IOMMU_FINISH_IF_DETECTED (1<<0)
-#define IOMMU_DETECTED		 (1<<1)
-	int		flags;
-};
-/*
- * Macro fills out an entry in the .iommu_table that is equivalent
- * to the fields that 'struct iommu_table_entry' has. The entries
- * that are put in the .iommu_table section are not put in any order
- * hence during boot-time we will have to resort them based on
- * dependency. */
-
-
-#define __IOMMU_INIT(_detect, _depend, _early_init, _late_init, _finish)\
-	static const struct iommu_table_entry				\
-		__iommu_entry_##_detect __used				\
-	__attribute__ ((unused, __section__(".iommu_table"),		\
-			aligned((sizeof(void *)))))	\
-	= {_detect, _depend, _early_init, _late_init,			\
-	   _finish ? IOMMU_FINISH_IF_DETECTED : 0}
-/*
- * The simplest IOMMU definition. Provide the detection routine
- * and it will be run after the SWIOTLB and the other IOMMUs
- * that utilize this macro. If the IOMMU is detected (ie, the
- * detect routine returns a positive value), the other IOMMUs
- * are also checked. You can use IOMMU_INIT_POST_FINISH if you prefer
- * to stop detecting the other IOMMUs after yours has been detected.
- */
-#define IOMMU_INIT_POST(_detect)					\
-	__IOMMU_INIT(_detect, pci_swiotlb_detect_4gb,  NULL, NULL, 0)
-
-#define IOMMU_INIT_POST_FINISH(detect)					\
-	__IOMMU_INIT(_detect, pci_swiotlb_detect_4gb,  NULL, NULL, 1)
-
-/*
- * A more sophisticated version of IOMMU_INIT. This variant requires:
- *  a). A detection routine function.
- *  b). The name of the detection routine we depend on to get called
- *      before us.
- *  c). The init routine which gets called if the detection routine
- *      returns a positive value from the pci_iommu_alloc. This means
- *      no presence of a memory allocator.
- *  d). Similar to the 'init', except that this gets called from pci_iommu_init
- *      where we do have a memory allocator.
- *
- * The standard IOMMU_INIT differs from the IOMMU_INIT_FINISH variant
- * in that the former will continue detecting other IOMMUs in the call
- * list after the detection routine returns a positive number, while the
- * latter will stop the execution chain upon first successful detection.
- * Both variants will still call the 'init' and 'late_init' functions if
- * they are set.
- */
-#define IOMMU_INIT_FINISH(_detect, _depend, _init, _late_init)		\
-	__IOMMU_INIT(_detect, _depend, _init, _late_init, 1)
-
-#define IOMMU_INIT(_detect, _depend, _init, _late_init)			\
-	__IOMMU_INIT(_detect, _depend, _init, _late_init, 0)
-
-void sort_iommu_table(struct iommu_table_entry *start,
-		      struct iommu_table_entry *finish);
-
-void check_iommu_entries(struct iommu_table_entry *start,
-			 struct iommu_table_entry *finish);
-
-#endif /* _ASM_X86_IOMMU_TABLE_H */
diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h
index 5270ff39b9af..8ace6559d399 100644
--- a/arch/x86/include/asm/iosf_mbi.h
+++ b/arch/x86/include/asm/iosf_mbi.h
@@ -39,6 +39,7 @@
 #define BT_MBI_UNIT_PMC		0x04
 #define BT_MBI_UNIT_GFX		0x06
 #define BT_MBI_UNIT_SMI		0x0C
+#define BT_MBI_UNIT_CCK		0x14
 #define BT_MBI_UNIT_USB		0x43
 #define BT_MBI_UNIT_SATA	0xA3
 #define BT_MBI_UNIT_PCIE	0xA6
@@ -110,7 +111,7 @@ int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask);
  * This function will block all kernel access to the PMIC I2C bus, so that the
  * P-Unit can safely access the PMIC over the shared I2C bus.
  *
- * Note on these systems the i2c-bus driver will request a sempahore from the
+ * Note on these systems the i2c-bus driver will request a semaphore from the
  * P-Unit for exclusive access to the PMIC bus when i2c drivers are accessing
  * it, but this does not appear to be sufficient, we still need to avoid making
  * certain P-Unit requests during the access window to avoid problems.
@@ -167,13 +168,6 @@ void iosf_mbi_unblock_punit_i2c_access(void);
 int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb);
 
 /**
- * iosf_mbi_register_pmic_bus_access_notifier - Unregister PMIC bus notifier
- *
- * @nb: notifier_block to unregister
- */
-int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb);
-
-/**
  * iosf_mbi_unregister_pmic_bus_access_notifier_unlocked - Unregister PMIC bus
  *                                                         notifier, unlocked
  *
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
deleted file mode 100644
index a4fe16e42b7b..000000000000
--- a/arch/x86/include/asm/ipi.h
+++ /dev/null
@@ -1,110 +0,0 @@
-#ifndef _ASM_X86_IPI_H
-#define _ASM_X86_IPI_H
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
-/*
- * Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
- *
- * Generic APIC InterProcessor Interrupt code.
- *
- * Moved to include file by James Cleverdon from
- * arch/x86-64/kernel/smp.c
- *
- * Copyrights from kernel/smp.c:
- *
- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
- * (c) 2002,2003 Andi Kleen, SuSE Labs.
- * Subject to the GNU Public License, v.2
- */
-
-#include <asm/hw_irq.h>
-#include <asm/apic.h>
-#include <asm/smp.h>
-
-/*
- * the following functions deal with sending IPIs between CPUs.
- *
- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
- */
-
-static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector,
-					 unsigned int dest)
-{
-	unsigned int icr = shortcut | dest;
-
-	switch (vector) {
-	default:
-		icr |= APIC_DM_FIXED | vector;
-		break;
-	case NMI_VECTOR:
-		icr |= APIC_DM_NMI;
-		break;
-	}
-	return icr;
-}
-
-static inline int __prepare_ICR2(unsigned int mask)
-{
-	return SET_APIC_DEST_FIELD(mask);
-}
-
-static inline void __xapic_wait_icr_idle(void)
-{
-	while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY)
-		cpu_relax();
-}
-
-void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest);
-
-/*
- * This is used to send an IPI with no shorthand notation (the destination is
- * specified in bits 56 to 63 of the ICR).
- */
-void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest);
-
-extern void default_send_IPI_single(int cpu, int vector);
-extern void default_send_IPI_single_phys(int cpu, int vector);
-extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask,
-						 int vector);
-extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
-							 int vector);
-
-/* Avoid include hell */
-#define NMI_VECTOR 0x02
-
-extern int no_broadcast;
-
-static inline void __default_local_send_IPI_allbutself(int vector)
-{
-	if (no_broadcast || vector == NMI_VECTOR)
-		apic->send_IPI_mask_allbutself(cpu_online_mask, vector);
-	else
-		__default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical);
-}
-
-static inline void __default_local_send_IPI_all(int vector)
-{
-	if (no_broadcast || vector == NMI_VECTOR)
-		apic->send_IPI_mask(cpu_online_mask, vector);
-	else
-		__default_send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical);
-}
-
-#ifdef CONFIG_X86_32
-extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
-							 int vector);
-extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
-							 int vector);
-extern void default_send_IPI_mask_logical(const struct cpumask *mask,
-						 int vector);
-extern void default_send_IPI_allbutself(int vector);
-extern void default_send_IPI_all(int vector);
-extern void default_send_IPI_self(int vector);
-#endif
-
-#endif
-
-#endif /* _ASM_X86_IPI_H */
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index fbb16e6b6c18..194dfff84cb1 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -11,46 +11,39 @@
 #include <asm/apicdef.h>
 #include <asm/irq_vectors.h>
 
+/*
+ * The irq entry code is in the noinstr section and the start/end of
+ * __irqentry_text is emitted via labels. Make the build fail if
+ * something moves a C function into the __irq_entry section.
+ */
+#define __irq_entry __invalid_section
+
 static inline int irq_canonicalize(int irq)
 {
 	return ((irq == 2) ? 9 : irq);
 }
 
-#ifdef CONFIG_X86_32
-extern void irq_ctx_init(int cpu);
-#else
-# define irq_ctx_init(cpu) do { } while (0)
-#endif
-
-#define __ARCH_HAS_DO_SOFTIRQ
+extern int irq_init_percpu_irqstack(unsigned int cpu);
 
 struct irq_desc;
 
 extern void fixup_irqs(void);
 
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
 extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
-extern __visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs);
-extern __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs);
-extern __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs);
 #endif
 
 extern void (*x86_platform_ipi_callback)(void);
 extern void native_init_IRQ(void);
 
-extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs);
-
-extern __visible unsigned int do_IRQ(struct pt_regs *regs);
+extern void __handle_irq(struct irq_desc *desc, struct pt_regs *regs);
 
 extern void init_ISA_irqs(void);
 
-extern void __init init_IRQ(void);
-
 #ifdef CONFIG_X86_LOCAL_APIC
 void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
-				    bool exclude_self);
+				    int exclude_cpu);
 
-extern __visible void smp_x86_platform_ipi(struct pt_regs *regs);
 #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
 #endif
 
diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h
deleted file mode 100644
index 8f3bee821e6c..000000000000
--- a/arch/x86/include/asm/irq_regs.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Per-cpu current frame pointer - the location of the last exception frame on
- * the stack, stored in the per-cpu area.
- *
- * Jeremy Fitzhardinge <jeremy@goop.org>
- */
-#ifndef _ASM_X86_IRQ_REGS_H
-#define _ASM_X86_IRQ_REGS_H
-
-#include <asm/percpu.h>
-
-#define ARCH_HAS_OWN_IRQ_REGS
-
-DECLARE_PER_CPU(struct pt_regs *, irq_regs);
-
-static inline struct pt_regs *get_irq_regs(void)
-{
-	return this_cpu_read(irq_regs);
-}
-
-static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
-{
-	struct pt_regs *old_regs;
-
-	old_regs = get_irq_regs();
-	this_cpu_write(irq_regs, new_regs);
-
-	return old_regs;
-}
-
-#endif /* _ASM_X86_IRQ_REGS_32_H */
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 67ed72f31cc2..5a0d42464d44 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -1,20 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2012 Advanced Micro Devices, Inc.
  * Author: Joerg Roedel <joerg.roedel@amd.com>
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- *
  * This header file contains the interface of the interrupt remapping code to
  * the x86 interrupt management code.
  */
@@ -38,7 +26,22 @@ enum {
 	IRQ_REMAP_X2APIC_MODE,
 };
 
-struct vcpu_data {
+/*
+ * This is mainly used to communicate information back-and-forth
+ * between SVM and IOMMU for setting up and tearing down posted
+ * interrupt
+ */
+struct amd_iommu_pi_data {
+	u64 vapic_addr;		/* Physical address of the vCPU's vAPIC. */
+	u32 ga_tag;
+	u32 vector;		/* Guest vector of the interrupt */
+	int cpu;
+	bool ga_log_intr;
+	bool is_guest_mode;
+	void *ir_data;
+};
+
+struct intel_iommu_pi_data {
 	u64 pi_desc_addr;	/* Physical address of PI Descriptor */
 	u32 vector;		/* Guest vector of the interrupt */
 };
@@ -56,21 +59,19 @@ extern int irq_remapping_reenable(int);
 extern int irq_remap_enable_fault_handling(void);
 extern void panic_if_irq_remap(const char *msg);
 
-extern struct irq_domain *
-irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info);
-extern struct irq_domain *
-irq_remapping_get_irq_domain(struct irq_alloc_info *info);
-
-/* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */
-extern struct irq_domain *
-arch_create_remap_msi_irq_domain(struct irq_domain *par, const char *n, int id);
-
 /* Get parent irqdomain for interrupt remapping irqdomain */
 static inline struct irq_domain *arch_get_ir_parent_domain(void)
 {
 	return x86_vector_domain;
 }
 
+extern bool enable_posted_msi;
+
+static inline bool posted_msi_supported(void)
+{
+	return enable_posted_msi && irq_remapping_cap(IRQ_POSTING_CAP);
+}
+
 #else  /* CONFIG_IRQ_REMAP */
 
 static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; }
@@ -85,17 +86,5 @@ static inline void panic_if_irq_remap(const char *msg)
 {
 }
 
-static inline struct irq_domain *
-irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info)
-{
-	return NULL;
-}
-
-static inline struct irq_domain *
-irq_remapping_get_irq_domain(struct irq_alloc_info *info)
-{
-	return NULL;
-}
-
 #endif /* CONFIG_IRQ_REMAP */
 #endif /* __X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
new file mode 100644
index 000000000000..8325b79f2ac6
--- /dev/null
+++ b/arch/x86/include/asm/irq_stack.h
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_IRQ_STACK_H
+#define _ASM_X86_IRQ_STACK_H
+
+#include <linux/ptrace.h>
+#include <linux/objtool.h>
+
+#include <asm/processor.h>
+
+#ifdef CONFIG_X86_64
+
+/*
+ * Macro to inline switching to an interrupt stack and invoking function
+ * calls from there. The following rules apply:
+ *
+ * - Ordering:
+ *
+ *   1. Write the stack pointer into the top most place of the irq
+ *	stack. This ensures that the various unwinders can link back to the
+ *	original stack.
+ *
+ *   2. Switch the stack pointer to the top of the irq stack.
+ *
+ *   3. Invoke whatever needs to be done (@asm_call argument)
+ *
+ *   4. Pop the original stack pointer from the top of the irq stack
+ *	which brings it back to the original stack where it left off.
+ *
+ * - Function invocation:
+ *
+ *   To allow flexible usage of the macro, the actual function code including
+ *   the store of the arguments in the call ABI registers is handed in via
+ *   the @asm_call argument.
+ *
+ * - Local variables:
+ *
+ *   @tos:
+ *	The @tos variable holds a pointer to the top of the irq stack and
+ *	_must_ be allocated in a non-callee saved register as this is a
+ *	restriction coming from objtool.
+ *
+ *	Note, that (tos) is both in input and output constraints to ensure
+ *	that the compiler does not assume that R11 is left untouched in
+ *	case this macro is used in some place where the per cpu interrupt
+ *	stack pointer is used again afterwards
+ *
+ * - Function arguments:
+ *	The function argument(s), if any, have to be defined in register
+ *	variables at the place where this is invoked. Storing the
+ *	argument(s) in the proper register(s) is part of the @asm_call
+ *
+ * - Constraints:
+ *
+ *   The constraints have to be done very carefully because the compiler
+ *   does not know about the assembly call.
+ *
+ *   output:
+ *     As documented already above the @tos variable is required to be in
+ *     the output constraints to make the compiler aware that R11 cannot be
+ *     reused after the asm() statement.
+ *
+ *     For builds with CONFIG_UNWINDER_FRAME_POINTER, ASM_CALL_CONSTRAINT is
+ *     required as well as this prevents certain creative GCC variants from
+ *     misplacing the ASM code.
+ *
+ *  input:
+ *    - func:
+ *	  Immediate, which tells the compiler that the function is referenced.
+ *
+ *    - tos:
+ *	  Register. The actual register is defined by the variable declaration.
+ *
+ *    - function arguments:
+ *	  The constraints are handed in via the 'argconstr' argument list. They
+ *	  describe the register arguments which are used in @asm_call.
+ *
+ *  clobbers:
+ *     Function calls can clobber anything except the callee-saved
+ *     registers. Tell the compiler.
+ */
+#define call_on_stack(stack, func, asm_call, argconstr...)		\
+{									\
+	register void *tos asm("r11");					\
+									\
+	tos = ((void *)(stack));					\
+									\
+	asm_inline volatile(						\
+	"movq	%%rsp, (%[tos])				\n"		\
+	"movq	%[tos], %%rsp				\n"		\
+									\
+	asm_call							\
+									\
+	"popq	%%rsp					\n"		\
+									\
+	: "+r" (tos), ASM_CALL_CONSTRAINT				\
+	: [__func] "i" (func), [tos] "r" (tos) argconstr		\
+	: "cc", "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10",	\
+	  "memory"							\
+	);								\
+}
+
+#define ASM_CALL_ARG0							\
+	"1: call %c[__func]				\n"		\
+	ANNOTATE_REACHABLE(1b) "			\n"
+
+#define ASM_CALL_ARG1							\
+	"movq	%[arg1], %%rdi				\n"		\
+	ASM_CALL_ARG0
+
+#define ASM_CALL_ARG2							\
+	"movq	%[arg2], %%rsi				\n"		\
+	ASM_CALL_ARG1
+
+#define ASM_CALL_ARG3							\
+	"movq	%[arg3], %%rdx				\n"		\
+	ASM_CALL_ARG2
+
+#define call_on_irqstack(func, asm_call, argconstr...)			\
+	call_on_stack(__this_cpu_read(hardirq_stack_ptr),		\
+		      func, asm_call, argconstr)
+
+/* Macros to assert type correctness for run_*_on_irqstack macros */
+#define assert_function_type(func, proto)				\
+	static_assert(__builtin_types_compatible_p(typeof(&func), proto))
+
+#define assert_arg_type(arg, proto)					\
+	static_assert(__builtin_types_compatible_p(typeof(arg), proto))
+
+/*
+ * Macro to invoke system vector and device interrupt C handlers.
+ */
+#define call_on_irqstack_cond(func, regs, asm_call, constr, c_args...)	\
+{									\
+	/*								\
+	 * User mode entry and interrupt on the irq stack do not	\
+	 * switch stacks. If from user mode the task stack is empty.	\
+	 */								\
+	if (user_mode(regs) || __this_cpu_read(hardirq_stack_inuse)) {	\
+		irq_enter_rcu();					\
+		func(c_args);						\
+		irq_exit_rcu();						\
+	} else {							\
+		/*							\
+		 * Mark the irq stack inuse _before_ and unmark _after_	\
+		 * switching stacks. Interrupts are disabled in both	\
+		 * places. Invoke the stack switch macro with the call	\
+		 * sequence which matches the above direct invocation.	\
+		 */							\
+		__this_cpu_write(hardirq_stack_inuse, true);		\
+		call_on_irqstack(func, asm_call, constr);		\
+		__this_cpu_write(hardirq_stack_inuse, false);		\
+	}								\
+}
+
+/*
+ * Function call sequence for __call_on_irqstack() for system vectors.
+ *
+ * Note that irq_enter_rcu() and irq_exit_rcu() do not use the input
+ * mechanism because these functions are global and cannot be optimized out
+ * when compiling a particular source file which uses one of these macros.
+ *
+ * The argument (regs) does not need to be pushed or stashed in a callee
+ * saved register to be safe vs. the irq_enter_rcu() call because the
+ * clobbers already prevent the compiler from storing it in a callee
+ * clobbered register. As the compiler has to preserve @regs for the final
+ * call to idtentry_exit() anyway, it's likely that it does not cause extra
+ * effort for this asm magic.
+ */
+#define ASM_CALL_SYSVEC							\
+	"call irq_enter_rcu				\n"		\
+	ASM_CALL_ARG1							\
+	"call irq_exit_rcu				\n"
+
+#define SYSVEC_CONSTRAINTS	, [arg1] "r" (regs)
+
+#define run_sysvec_on_irqstack_cond(func, regs)				\
+{									\
+	assert_function_type(func, void (*)(struct pt_regs *));		\
+	assert_arg_type(regs, struct pt_regs *);			\
+									\
+	call_on_irqstack_cond(func, regs, ASM_CALL_SYSVEC,		\
+			      SYSVEC_CONSTRAINTS, regs);		\
+}
+
+/*
+ * As in ASM_CALL_SYSVEC above the clobbers force the compiler to store
+ * @regs and @vector in callee saved registers.
+ */
+#define ASM_CALL_IRQ							\
+	"call irq_enter_rcu				\n"		\
+	ASM_CALL_ARG2							\
+	"call irq_exit_rcu				\n"
+
+#define IRQ_CONSTRAINTS	, [arg1] "r" (regs), [arg2] "r" ((unsigned long)vector)
+
+#define run_irq_on_irqstack_cond(func, regs, vector)			\
+{									\
+	assert_function_type(func, void (*)(struct pt_regs *, u32));	\
+	assert_arg_type(regs, struct pt_regs *);			\
+	assert_arg_type(vector, u32);					\
+									\
+	call_on_irqstack_cond(func, regs, ASM_CALL_IRQ,			\
+			      IRQ_CONSTRAINTS, regs, vector);		\
+}
+
+#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
+/*
+ * Macro to invoke __do_softirq on the irq stack. This is only called from
+ * task context when bottom halves are about to be reenabled and soft
+ * interrupts are pending to be processed. The interrupt stack cannot be in
+ * use here.
+ */
+#define do_softirq_own_stack()						\
+{									\
+	__this_cpu_write(hardirq_stack_inuse, true);			\
+	call_on_irqstack(__do_softirq, ASM_CALL_ARG0);			\
+	__this_cpu_write(hardirq_stack_inuse, false);			\
+}
+
+#endif
+
+#else /* CONFIG_X86_64 */
+/* System vector handlers always run on the stack they interrupted. */
+#define run_sysvec_on_irqstack_cond(func, regs)				\
+{									\
+	irq_enter_rcu();						\
+	func(regs);							\
+	irq_exit_rcu();							\
+}
+
+/* Switches to the irq stack within func() */
+#define run_irq_on_irqstack_cond(func, regs, vector)			\
+{									\
+	irq_enter_rcu();						\
+	func(regs, vector);						\
+	irq_exit_rcu();							\
+}
+
+#endif /* !CONFIG_X86_64 */
+
+#endif
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 548d90bbf919..47051871b436 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -18,16 +18,16 @@
  *  Vectors   0 ...  31 : system traps and exceptions - hardcoded events
  *  Vectors  32 ... 127 : device interrupts
  *  Vector  128         : legacy int80 syscall interface
- *  Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts
- *  Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
+ *  Vectors 129 ... FIRST_SYSTEM_VECTOR-1 : device interrupts
+ *  Vectors FIRST_SYSTEM_VECTOR ... 255   : special interrupts
  *
  * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
  *
  * This file enumerates the exact layout of them:
  */
 
+/* This is used as an interrupt vector when programming the APIC. */
 #define NMI_VECTOR			0x02
-#define MCE_VECTOR			0x12
 
 /*
  * IDT vectors usable for external interrupt sources start at 0x20.
@@ -35,13 +35,6 @@
  */
 #define FIRST_EXTERNAL_VECTOR		0x20
 
-/*
- * Reserve the lowest usable vector (and hence lowest priority)  0x20 for
- * triggering cleanup after irq migration. 0x21-0x2f will still be used
- * for device interrupts.
- */
-#define IRQ_MOVE_CLEANUP_VECTOR		FIRST_EXTERNAL_VECTOR
-
 #define IA32_SYSCALL_VECTOR		0x80
 
 /*
@@ -84,18 +77,16 @@
  */
 #define IRQ_WORK_VECTOR			0xf6
 
-#define UV_BAU_MESSAGE			0xf5
+/* 0xf5 - unused, was UV_BAU_MESSAGE */
 #define DEFERRED_ERROR_VECTOR		0xf4
 
 /* Vector on which hypervisor callbacks will be delivered */
 #define HYPERVISOR_CALLBACK_VECTOR	0xf3
 
 /* Vector for KVM to deliver posted interrupt IPI */
-#ifdef CONFIG_HAVE_KVM
 #define POSTED_INTR_VECTOR		0xf2
 #define POSTED_INTR_WAKEUP_VECTOR	0xf1
 #define POSTED_INTR_NESTED_VECTOR	0xf0
-#endif
 
 #define MANAGED_IRQ_SHUTDOWN_VECTOR	0xef
 
@@ -106,14 +97,23 @@
 
 #define LOCAL_TIMER_VECTOR		0xec
 
+/*
+ * Posted interrupt notification vector for all device MSIs delivered to
+ * the host kernel.
+ */
+#define POSTED_MSI_NOTIFICATION_VECTOR	0xeb
+
 #define NR_VECTORS			 256
 
 #ifdef CONFIG_X86_LOCAL_APIC
-#define FIRST_SYSTEM_VECTOR		LOCAL_TIMER_VECTOR
+#define FIRST_SYSTEM_VECTOR		POSTED_MSI_NOTIFICATION_VECTOR
 #else
 #define FIRST_SYSTEM_VECTOR		NR_VECTORS
 #endif
 
+#define NR_EXTERNAL_VECTORS		(FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+#define NR_SYSTEM_VECTORS		(NR_VECTORS - FIRST_SYSTEM_VECTOR)
+
 /*
  * Size the maximum number of interrupts.
  *
diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h
index 80b35e3adf03..6b4d36c95165 100644
--- a/arch/x86/include/asm/irq_work.h
+++ b/arch/x86/include/asm/irq_work.h
@@ -9,8 +9,6 @@ static inline bool arch_irq_work_has_interrupt(void)
 {
 	return boot_cpu_has(X86_FEATURE_APIC);
 }
-extern void arch_irq_work_raise(void);
-extern __visible void smp_irq_work_interrupt(struct pt_regs *regs);
 #else
 static inline bool arch_irq_work_has_interrupt(void)
 {
diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
index c066ffae222b..30c325c235c0 100644
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -7,11 +7,12 @@
 
 #ifdef CONFIG_X86_LOCAL_APIC
 enum {
-	/* Allocate contiguous CPU vectors */
-	X86_IRQ_ALLOC_CONTIGUOUS_VECTORS		= 0x1,
-	X86_IRQ_ALLOC_LEGACY				= 0x2,
+	X86_IRQ_ALLOC_LEGACY				= 0x1,
 };
 
+extern int x86_fwspec_is_ioapic(struct irq_fwspec *fwspec);
+extern int x86_fwspec_is_hpet(struct irq_fwspec *fwspec);
+
 extern struct irq_domain *x86_vector_domain;
 
 extern void init_irq_alloc_info(struct irq_alloc_info *info,
@@ -51,9 +52,13 @@ extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain);
 #endif /* CONFIG_X86_IO_APIC */
 
 #ifdef CONFIG_PCI_MSI
-extern void arch_init_msi_domain(struct irq_domain *domain);
+void x86_create_pci_msi_domain(void);
+struct irq_domain *native_create_pci_msi_domain(void);
+extern struct irq_domain *x86_pci_msi_default_domain;
 #else
-static inline void arch_init_msi_domain(struct irq_domain *domain) { }
+static inline void x86_create_pci_msi_domain(void) { }
+#define native_create_pci_msi_domain	NULL
+#define x86_pci_msi_default_domain	NULL
 #endif
 
 #endif
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 058e40fed167..b30e5474c18e 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -4,10 +4,9 @@
 
 #include <asm/processor-flags.h>
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
-/* Provide __cpuidle; we can't safely include <linux/cpu.h> */
-#define __cpuidle __attribute__((__section__(".cpuidle.text")))
+#include <asm/nospec-branch.h>
 
 /*
  * Interrupt control:
@@ -15,7 +14,7 @@
 
 /* Declaration required for gcc < 4.9 to prevent -Werror=missing-prototypes */
 extern inline unsigned long native_save_fl(void);
-extern inline unsigned long native_save_fl(void)
+extern __always_inline unsigned long native_save_fl(void)
 {
 	unsigned long flags;
 
@@ -33,68 +32,57 @@ extern inline unsigned long native_save_fl(void)
 	return flags;
 }
 
-extern inline void native_restore_fl(unsigned long flags);
-extern inline void native_restore_fl(unsigned long flags)
-{
-	asm volatile("push %0 ; popf"
-		     : /* no output */
-		     :"g" (flags)
-		     :"memory", "cc");
-}
-
-static inline void native_irq_disable(void)
+static __always_inline void native_irq_disable(void)
 {
 	asm volatile("cli": : :"memory");
 }
 
-static inline void native_irq_enable(void)
+static __always_inline void native_irq_enable(void)
 {
 	asm volatile("sti": : :"memory");
 }
 
-static inline __cpuidle void native_safe_halt(void)
+static __always_inline void native_safe_halt(void)
 {
+	x86_idle_clear_cpu_buffers();
 	asm volatile("sti; hlt": : :"memory");
 }
 
-static inline __cpuidle void native_halt(void)
+static __always_inline void native_halt(void)
 {
+	x86_idle_clear_cpu_buffers();
 	asm volatile("hlt": : :"memory");
 }
 
-#endif
-
-#ifdef CONFIG_PARAVIRT_XXL
-#include <asm/paravirt.h>
-#else
-#ifndef __ASSEMBLY__
-#include <linux/types.h>
-
-static inline notrace unsigned long arch_local_save_flags(void)
+static __always_inline int native_irqs_disabled_flags(unsigned long flags)
 {
-	return native_save_fl();
+	return !(flags & X86_EFLAGS_IF);
 }
 
-static inline notrace void arch_local_irq_restore(unsigned long flags)
+static __always_inline unsigned long native_local_irq_save(void)
 {
-	native_restore_fl(flags);
-}
+	unsigned long flags = native_save_fl();
 
-static inline notrace void arch_local_irq_disable(void)
-{
 	native_irq_disable();
+
+	return flags;
 }
 
-static inline notrace void arch_local_irq_enable(void)
+static __always_inline void native_local_irq_restore(unsigned long flags)
 {
-	native_irq_enable();
+	if (!native_irqs_disabled_flags(flags))
+		native_irq_enable();
 }
 
+#endif
+
+#ifndef CONFIG_PARAVIRT
+#ifndef __ASSEMBLY__
 /*
  * Used in the idle loop; sti takes one instruction cycle
  * to complete:
  */
-static inline __cpuidle void arch_safe_halt(void)
+static __always_inline void arch_safe_halt(void)
 {
 	native_safe_halt();
 }
@@ -103,15 +91,38 @@ static inline __cpuidle void arch_safe_halt(void)
  * Used when interrupts are already enabled or to
  * shutdown the processor:
  */
-static inline __cpuidle void halt(void)
+static __always_inline void halt(void)
 {
 	native_halt();
 }
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_PARAVIRT */
+
+#ifdef CONFIG_PARAVIRT_XXL
+#include <asm/paravirt.h>
+#else
+#ifndef __ASSEMBLER__
+#include <linux/types.h>
+
+static __always_inline unsigned long arch_local_save_flags(void)
+{
+	return native_save_fl();
+}
+
+static __always_inline void arch_local_irq_disable(void)
+{
+	native_irq_disable();
+}
+
+static __always_inline void arch_local_irq_enable(void)
+{
+	native_irq_enable();
+}
 
 /*
  * For spinlocks, etc:
  */
-static inline notrace unsigned long arch_local_irq_save(void)
+static __always_inline unsigned long arch_local_irq_save(void)
 {
 	unsigned long flags = arch_local_save_flags();
 	arch_local_irq_disable();
@@ -119,87 +130,34 @@ static inline notrace unsigned long arch_local_irq_save(void)
 }
 #else
 
-#define ENABLE_INTERRUPTS(x)	sti
-#define DISABLE_INTERRUPTS(x)	cli
-
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_DEBUG_ENTRY
-#define SAVE_FLAGS(x)		pushfq; popq %rax
+#define SAVE_FLAGS		pushfq; popq %rax
 #endif
 
-#define SWAPGS	swapgs
-/*
- * Currently paravirt can't handle swapgs nicely when we
- * don't have a stack we can rely on (such as a user space
- * stack).  So we either find a way around these or just fault
- * and emulate if a guest tries to call swapgs directly.
- *
- * Either way, this is a good way to document that we don't
- * have a reliable stack. x86_64 only.
- */
-#define SWAPGS_UNSAFE_STACK	swapgs
-
-#define INTERRUPT_RETURN	jmp native_iret
-#define USERGS_SYSRET64				\
-	swapgs;					\
-	sysretq;
-#define USERGS_SYSRET32				\
-	swapgs;					\
-	sysretl
-
-#else
-#define INTERRUPT_RETURN		iret
 #endif
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #endif /* CONFIG_PARAVIRT_XXL */
 
-#ifndef __ASSEMBLY__
-static inline int arch_irqs_disabled_flags(unsigned long flags)
+#ifndef __ASSEMBLER__
+static __always_inline int arch_irqs_disabled_flags(unsigned long flags)
 {
 	return !(flags & X86_EFLAGS_IF);
 }
 
-static inline int arch_irqs_disabled(void)
+static __always_inline int arch_irqs_disabled(void)
 {
 	unsigned long flags = arch_local_save_flags();
 
 	return arch_irqs_disabled_flags(flags);
 }
-#endif /* !__ASSEMBLY__ */
 
-#ifdef __ASSEMBLY__
-#ifdef CONFIG_TRACE_IRQFLAGS
-#  define TRACE_IRQS_ON		call trace_hardirqs_on_thunk;
-#  define TRACE_IRQS_OFF	call trace_hardirqs_off_thunk;
-#else
-#  define TRACE_IRQS_ON
-#  define TRACE_IRQS_OFF
-#endif
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-#  ifdef CONFIG_X86_64
-#    define LOCKDEP_SYS_EXIT		call lockdep_sys_exit_thunk
-#    define LOCKDEP_SYS_EXIT_IRQ \
-	TRACE_IRQS_ON; \
-	sti; \
-	call lockdep_sys_exit_thunk; \
-	cli; \
-	TRACE_IRQS_OFF;
-#  else
-#    define LOCKDEP_SYS_EXIT \
-	pushl %eax;				\
-	pushl %ecx;				\
-	pushl %edx;				\
-	call lockdep_sys_exit;			\
-	popl %edx;				\
-	popl %ecx;				\
-	popl %eax;
-#    define LOCKDEP_SYS_EXIT_IRQ
-#  endif
-#else
-#  define LOCKDEP_SYS_EXIT
-#  define LOCKDEP_SYS_EXIT_IRQ
-#endif
-#endif /* __ASSEMBLY__ */
+static __always_inline void arch_local_irq_restore(unsigned long flags)
+{
+	if (!arch_irqs_disabled_flags(flags))
+		arch_local_irq_enable();
+}
+#endif /* !__ASSEMBLER__ */
 
 #endif
diff --git a/arch/x86/include/asm/ist.h b/arch/x86/include/asm/ist.h
index c9803f1a2033..7ede2731dc92 100644
--- a/arch/x86/include/asm/ist.h
+++ b/arch/x86/include/asm/ist.h
@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Include file for the interface to IST BIOS
  * Copyright 2002 Andy Grover <andrew.grover@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 #ifndef _ASM_X86_IST_H
 #define _ASM_X86_IST_H
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 65191ce8e1cf..05b16299588d 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -2,31 +2,39 @@
 #ifndef _ASM_X86_JUMP_LABEL_H
 #define _ASM_X86_JUMP_LABEL_H
 
-#define JUMP_LABEL_NOP_SIZE 5
-
-#ifdef CONFIG_X86_64
-# define STATIC_KEY_INIT_NOP P6_NOP5_ATOMIC
-#else
-# define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC
-#endif
+#define HAVE_JUMP_LABEL_BATCH
 
 #include <asm/asm.h>
 #include <asm/nops.h>
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/stringify.h>
 #include <linux/types.h>
 
-static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+#define JUMP_TABLE_ENTRY(key, label)			\
+	".pushsection __jump_table,  \"aw\" \n\t"	\
+	_ASM_ALIGN "\n\t"				\
+	ANNOTATE_DATA_SPECIAL "\n"			\
+	".long 1b - . \n\t"				\
+	".long " label " - . \n\t"			\
+	_ASM_PTR " " key " - . \n\t"			\
+	".popsection \n\t"
+
+/* This macro is also expanded on the Rust side. */
+#ifdef CONFIG_HAVE_JUMP_LABEL_HACK
+#define ARCH_STATIC_BRANCH_ASM(key, label)		\
+	"1: jmp " label " # objtool NOPs this \n\t"	\
+	JUMP_TABLE_ENTRY(key " + 2", label)
+#else /* !CONFIG_HAVE_JUMP_LABEL_HACK */
+#define ARCH_STATIC_BRANCH_ASM(key, label)		\
+	"1: .byte " __stringify(BYTES_NOP5) "\n\t"	\
+	JUMP_TABLE_ENTRY(key, label)
+#endif /* CONFIG_HAVE_JUMP_LABEL_HACK */
+
+static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
 {
-	asm_volatile_goto("1:"
-		".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
-		".pushsection __jump_table,  \"aw\" \n\t"
-		_ASM_ALIGN "\n\t"
-		".long 1b - ., %l[l_yes] - . \n\t"
-		_ASM_PTR "%c0 + %c1 - .\n\t"
-		".popsection \n\t"
+	asm goto(ARCH_STATIC_BRANCH_ASM("%c0 + %c1", "%l[l_yes]")
 		: :  "i" (key), "i" (branch) : : l_yes);
 
 	return false;
@@ -34,16 +42,11 @@ l_yes:
 	return true;
 }
 
-static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
 {
-	asm_volatile_goto("1:"
-		".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
-		"2:\n\t"
-		".pushsection __jump_table,  \"aw\" \n\t"
-		_ASM_ALIGN "\n\t"
-		".long 1b - ., %l[l_yes] - . \n\t"
-		_ASM_PTR "%c0 + %c1 - .\n\t"
-		".popsection \n\t"
+	asm goto("1:"
+		"jmp %l[l_yes]\n\t"
+		JUMP_TABLE_ENTRY("%c0 + %c1", "%l[l_yes]")
 		: :  "i" (key), "i" (branch) : : l_yes);
 
 	return false;
@@ -51,42 +54,8 @@ l_yes:
 	return true;
 }
 
-#else	/* __ASSEMBLY__ */
-
-.macro STATIC_JUMP_IF_TRUE target, key, def
-.Lstatic_jump_\@:
-	.if \def
-	/* Equivalent to "jmp.d32 \target" */
-	.byte		0xe9
-	.long		\target - .Lstatic_jump_after_\@
-.Lstatic_jump_after_\@:
-	.else
-	.byte		STATIC_KEY_INIT_NOP
-	.endif
-	.pushsection __jump_table, "aw"
-	_ASM_ALIGN
-	.long		.Lstatic_jump_\@ - ., \target - .
-	_ASM_PTR	\key - .
-	.popsection
-.endm
-
-.macro STATIC_JUMP_IF_FALSE target, key, def
-.Lstatic_jump_\@:
-	.if \def
-	.byte		STATIC_KEY_INIT_NOP
-	.else
-	/* Equivalent to "jmp.d32 \target" */
-	.byte		0xe9
-	.long		\target - .Lstatic_jump_after_\@
-.Lstatic_jump_after_\@:
-	.endif
-	.pushsection __jump_table, "aw"
-	_ASM_ALIGN
-	.long		.Lstatic_jump_\@ - ., \target - .
-	_ASM_PTR	\key + 1 - .
-	.popsection
-.endm
+extern int arch_jump_entry_size(struct jump_entry *entry);
 
-#endif	/* __ASSEMBLY__ */
+#endif	/* __ASSEMBLER__ */
 
 #endif
diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h
index 13e70da38bed..d7e33c7f096b 100644
--- a/arch/x86/include/asm/kasan.h
+++ b/arch/x86/include/asm/kasan.h
@@ -23,14 +23,17 @@
 					(1ULL << (__VIRTUAL_MASK_SHIFT - \
 						  KASAN_SHADOW_SCALE_SHIFT)))
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #ifdef CONFIG_KASAN
 void __init kasan_early_init(void);
 void __init kasan_init(void);
+void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid);
 #else
 static inline void kasan_early_init(void) { }
 static inline void kasan_init(void) { }
+static inline void kasan_populate_shadow_for_vaddr(void *va, size_t size,
+						   int nid) { }
 #endif
 
 #endif
diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
index db7ba2feb947..0648190467ba 100644
--- a/arch/x86/include/asm/kaslr.h
+++ b/arch/x86/include/asm/kaslr.h
@@ -6,8 +6,10 @@ unsigned long kaslr_get_random_long(const char *purpose);
 
 #ifdef CONFIG_RANDOMIZE_MEMORY
 void kernel_randomize_memory(void);
+void init_trampoline_kaslr(void);
 #else
 static inline void kernel_randomize_memory(void) { }
+static inline void init_trampoline_kaslr(void) {}
 #endif /* CONFIG_RANDOMIZE_MEMORY */
 
 #endif
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 75f1e35e7c15..d1514e70477b 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -33,10 +33,12 @@ enum show_regs_mode {
 };
 
 extern void die(const char *, struct pt_regs *,long);
+void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_stack_regs(struct pt_regs *regs);
-extern void __show_regs(struct pt_regs *regs, enum show_regs_mode);
-extern void show_iret_regs(struct pt_regs *regs);
+extern void __show_regs(struct pt_regs *regs, enum show_regs_mode,
+			const char *log_lvl);
+extern void show_iret_regs(struct pt_regs *regs, const char *log_lvl);
 extern unsigned long oops_begin(void);
 extern void oops_end(unsigned long, struct pt_regs *, int signr);
 
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 003f2daa3b0f..5cfb27f26583 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -9,23 +9,30 @@
 # define PA_SWAP_PAGE		3
 # define PAGES_NR		4
 #else
-# define PA_CONTROL_PAGE	0
-# define VA_CONTROL_PAGE	1
-# define PA_TABLE_PAGE		2
-# define PA_SWAP_PAGE		3
-# define PAGES_NR		4
+/* Size of each exception handler referenced by the IDT */
+# define KEXEC_DEBUG_EXC_HANDLER_SIZE	6 /* PUSHI, PUSHI, 2-byte JMP */
+#endif
+
+#ifdef CONFIG_X86_64
+
+#include <linux/bits.h>
+
+#define RELOC_KERNEL_PRESERVE_CONTEXT	BIT(0)
+#define RELOC_KERNEL_CACHE_INCOHERENT	BIT(1)
+
 #endif
 
+# define KEXEC_CONTROL_PAGE_SIZE	4096
 # define KEXEC_CONTROL_CODE_MAX_SIZE	2048
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/string.h>
 #include <linux/kernel.h>
 
+#include <asm/asm.h>
 #include <asm/page.h>
 #include <asm/ptrace.h>
-#include <asm/bootparam.h>
 
 struct kimage;
 
@@ -44,7 +51,6 @@ struct kimage;
 /* Maximum address we can use for the control code buffer */
 # define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
 
-# define KEXEC_CONTROL_PAGE_SIZE	4096
 
 /* The native architecture */
 # define KEXEC_ARCH KEXEC_ARCH_386
@@ -59,32 +65,17 @@ struct kimage;
 /* Maximum address we can use for the control pages */
 # define KEXEC_CONTROL_MEMORY_LIMIT     (MAXMEM-1)
 
-/* Allocate one page for the pdp and the second for the code */
-# define KEXEC_CONTROL_PAGE_SIZE  (4096UL + 4096UL)
-
 /* The native architecture */
 # define KEXEC_ARCH KEXEC_ARCH_X86_64
-#endif
 
-/* Memory to backup during crash kdump */
-#define KEXEC_BACKUP_SRC_START	(0UL)
-#define KEXEC_BACKUP_SRC_END	(640 * 1024UL - 1)	/* 640K */
-
-/*
- * CPU does not save ss and sp on stack if execution is already
- * running in kernel mode at the time of NMI occurrence. This code
- * fixes it.
- */
-static inline void crash_fixup_ss_esp(struct pt_regs *newregs,
-				      struct pt_regs *oldregs)
-{
-#ifdef CONFIG_X86_32
-	newregs->sp = (unsigned long)&(oldregs->sp);
-	asm volatile("xorl %%eax, %%eax\n\t"
-		     "movw %%ss, %%ax\n\t"
-		     :"=a"(newregs->ss));
+extern unsigned long kexec_va_control_page;
+extern unsigned long kexec_pa_table_page;
+extern unsigned long kexec_pa_swap_page;
+extern gate_desc kexec_debug_idt[];
+extern unsigned char kexec_debug_exc_vectors[];
+extern uint16_t kexec_debug_8250_port;
+extern unsigned long kexec_debug_8250_mmio32;
 #endif
-}
 
 /*
  * This function is responsible for capturing register states if coming
@@ -96,63 +87,52 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
 {
 	if (oldregs) {
 		memcpy(newregs, oldregs, sizeof(*newregs));
-		crash_fixup_ss_esp(newregs, oldregs);
 	} else {
+		asm volatile("mov %%" _ASM_BX ",%0" : "=m"(newregs->bx));
+		asm volatile("mov %%" _ASM_CX ",%0" : "=m"(newregs->cx));
+		asm volatile("mov %%" _ASM_DX ",%0" : "=m"(newregs->dx));
+		asm volatile("mov %%" _ASM_SI ",%0" : "=m"(newregs->si));
+		asm volatile("mov %%" _ASM_DI ",%0" : "=m"(newregs->di));
+		asm volatile("mov %%" _ASM_BP ",%0" : "=m"(newregs->bp));
+		asm volatile("mov %%" _ASM_AX ",%0" : "=m"(newregs->ax));
+		asm volatile("mov %%" _ASM_SP ",%0" : "=m"(newregs->sp));
+#ifdef CONFIG_X86_64
+		asm volatile("mov %%r8,%0" : "=m"(newregs->r8));
+		asm volatile("mov %%r9,%0" : "=m"(newregs->r9));
+		asm volatile("mov %%r10,%0" : "=m"(newregs->r10));
+		asm volatile("mov %%r11,%0" : "=m"(newregs->r11));
+		asm volatile("mov %%r12,%0" : "=m"(newregs->r12));
+		asm volatile("mov %%r13,%0" : "=m"(newregs->r13));
+		asm volatile("mov %%r14,%0" : "=m"(newregs->r14));
+		asm volatile("mov %%r15,%0" : "=m"(newregs->r15));
+#endif
+		asm volatile("mov %%ss,%k0" : "=a"(newregs->ss));
+		asm volatile("mov %%cs,%k0" : "=a"(newregs->cs));
 #ifdef CONFIG_X86_32
-		asm volatile("movl %%ebx,%0" : "=m"(newregs->bx));
-		asm volatile("movl %%ecx,%0" : "=m"(newregs->cx));
-		asm volatile("movl %%edx,%0" : "=m"(newregs->dx));
-		asm volatile("movl %%esi,%0" : "=m"(newregs->si));
-		asm volatile("movl %%edi,%0" : "=m"(newregs->di));
-		asm volatile("movl %%ebp,%0" : "=m"(newregs->bp));
-		asm volatile("movl %%eax,%0" : "=m"(newregs->ax));
-		asm volatile("movl %%esp,%0" : "=m"(newregs->sp));
-		asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss));
-		asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs));
-		asm volatile("movl %%ds, %%eax;" :"=a"(newregs->ds));
-		asm volatile("movl %%es, %%eax;" :"=a"(newregs->es));
-		asm volatile("pushfl; popl %0" :"=m"(newregs->flags));
-#else
-		asm volatile("movq %%rbx,%0" : "=m"(newregs->bx));
-		asm volatile("movq %%rcx,%0" : "=m"(newregs->cx));
-		asm volatile("movq %%rdx,%0" : "=m"(newregs->dx));
-		asm volatile("movq %%rsi,%0" : "=m"(newregs->si));
-		asm volatile("movq %%rdi,%0" : "=m"(newregs->di));
-		asm volatile("movq %%rbp,%0" : "=m"(newregs->bp));
-		asm volatile("movq %%rax,%0" : "=m"(newregs->ax));
-		asm volatile("movq %%rsp,%0" : "=m"(newregs->sp));
-		asm volatile("movq %%r8,%0" : "=m"(newregs->r8));
-		asm volatile("movq %%r9,%0" : "=m"(newregs->r9));
-		asm volatile("movq %%r10,%0" : "=m"(newregs->r10));
-		asm volatile("movq %%r11,%0" : "=m"(newregs->r11));
-		asm volatile("movq %%r12,%0" : "=m"(newregs->r12));
-		asm volatile("movq %%r13,%0" : "=m"(newregs->r13));
-		asm volatile("movq %%r14,%0" : "=m"(newregs->r14));
-		asm volatile("movq %%r15,%0" : "=m"(newregs->r15));
-		asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss));
-		asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs));
-		asm volatile("pushfq; popq %0" :"=m"(newregs->flags));
+		asm volatile("mov %%ds,%k0" : "=a"(newregs->ds));
+		asm volatile("mov %%es,%k0" : "=a"(newregs->es));
 #endif
+		asm volatile("pushf\n\t"
+			     "pop %0" : "=m"(newregs->flags));
 		newregs->ip = _THIS_IP_;
 	}
 }
 
 #ifdef CONFIG_X86_32
-asmlinkage unsigned long
-relocate_kernel(unsigned long indirection_page,
-		unsigned long control_page,
-		unsigned long start_address,
-		unsigned int has_pae,
-		unsigned int preserve_context);
+typedef asmlinkage unsigned long
+relocate_kernel_fn(unsigned long indirection_page,
+		   unsigned long control_page,
+		   unsigned long start_address,
+		   unsigned int has_pae,
+		   unsigned int preserve_context);
 #else
-unsigned long
-relocate_kernel(unsigned long indirection_page,
-		unsigned long page_list,
-		unsigned long start_address,
-		unsigned int preserve_context,
-		unsigned int sme_active);
+typedef unsigned long
+relocate_kernel_fn(unsigned long indirection_page,
+		   unsigned long pa_control_page,
+		   unsigned long start_address,
+		   unsigned int flags);
 #endif
-
+extern relocate_kernel_fn relocate_kernel;
 #define ARCH_HAS_KIMAGE_ARCH
 
 #ifdef CONFIG_X86_32
@@ -167,21 +147,23 @@ struct kimage_arch {
 };
 #else
 struct kimage_arch {
+	/*
+	 * This is a kimage control page, as it must not overlap with either
+	 * source or destination address ranges.
+	 */
+	pgd_t *pgd;
+	/*
+	 * The virtual mapping of the control code page itself is used only
+	 * during the transition, while the current kernel's pages are all
+	 * in place. Thus the intermediate page table pages used to map it
+	 * are not control pages, but instead just normal pages obtained
+	 * with get_zeroed_page(). And have to be tracked (below) so that
+	 * they can be freed.
+	 */
 	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
-	/* Details of backup region */
-	unsigned long backup_src_start;
-	unsigned long backup_src_sz;
-
-	/* Physical address of backup segment */
-	unsigned long backup_load_addr;
-
-	/* Core ELF header buffer */
-	void *elf_headers;
-	unsigned long elf_headers_sz;
-	unsigned long elf_load_addr;
 };
 #endif /* CONFIG_X86_32 */
 
@@ -218,12 +200,38 @@ extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
 extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
 #define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
 
+void arch_kexec_protect_crashkres(void);
+#define arch_kexec_protect_crashkres arch_kexec_protect_crashkres
+
+void arch_kexec_unprotect_crashkres(void);
+#define arch_kexec_unprotect_crashkres arch_kexec_unprotect_crashkres
+
+#ifdef CONFIG_KEXEC_FILE
+struct purgatory_info;
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+				     Elf_Shdr *section,
+				     const Elf_Shdr *relsec,
+				     const Elf_Shdr *symtab);
+#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image);
+#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
+#endif
 #endif
 
-typedef void crash_vmclear_fn(void);
-extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
 extern void kdump_nmi_shootdown_cpus(void);
 
-#endif /* __ASSEMBLY__ */
+#ifdef CONFIG_CRASH_HOTPLUG
+void arch_crash_handle_hotplug_event(struct kimage *image, void *arg);
+#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
+
+int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags);
+#define arch_crash_hotplug_support arch_crash_hotplug_support
+
+unsigned int arch_crash_get_elfcorehdr_size(void);
+#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size
+#endif
+
+#endif /* __ASSEMBLER__ */
 
 #endif /* _ASM_X86_KEXEC_H */
diff --git a/arch/x86/include/asm/kfence.h b/arch/x86/include/asm/kfence.h
new file mode 100644
index 000000000000..ff5c7134a37a
--- /dev/null
+++ b/arch/x86/include/asm/kfence.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * x86 KFENCE support.
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#ifndef _ASM_X86_KFENCE_H
+#define _ASM_X86_KFENCE_H
+
+#ifndef MODULE
+
+#include <linux/bug.h>
+#include <linux/kfence.h>
+
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/set_memory.h>
+#include <asm/tlbflush.h>
+
+/* Force 4K pages for __kfence_pool. */
+static inline bool arch_kfence_init_pool(void)
+{
+	unsigned long addr;
+
+	for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr);
+	     addr += PAGE_SIZE) {
+		unsigned int level;
+
+		if (!lookup_address(addr, &level))
+			return false;
+
+		if (level != PG_LEVEL_4K)
+			set_memory_4k(addr, 1);
+	}
+
+	return true;
+}
+
+/* Protect the given page and flush TLB. */
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+	unsigned int level;
+	pte_t *pte = lookup_address(addr, &level);
+
+	if (WARN_ON(!pte || level != PG_LEVEL_4K))
+		return false;
+
+	/*
+	 * We need to avoid IPIs, as we may get KFENCE allocations or faults
+	 * with interrupts disabled. Therefore, the below is best-effort, and
+	 * does not flush TLBs on all CPUs. We can tolerate some inaccuracy;
+	 * lazy fault handling takes care of faults after the page is PRESENT.
+	 */
+
+	if (protect)
+		set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
+	else
+		set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
+
+	/*
+	 * Flush this CPU's TLB, assuming whoever did the allocation/free is
+	 * likely to continue running on this CPU.
+	 */
+	preempt_disable();
+	flush_tlb_one_kernel(addr);
+	preempt_enable();
+	return true;
+}
+
+#endif /* !MODULE */
+
+#endif /* _ASM_X86_KFENCE_H */
diff --git a/arch/x86/include/asm/kmap_types.h b/arch/x86/include/asm/kmap_types.h
deleted file mode 100644
index 04ab8266e347..000000000000
--- a/arch/x86/include/asm/kmap_types.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_KMAP_TYPES_H
-#define _ASM_X86_KMAP_TYPES_H
-
-#if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM)
-#define  __WITH_KM_FENCE
-#endif
-
-#include <asm-generic/kmap_types.h>
-
-#undef __WITH_KM_FENCE
-
-#endif /* _ASM_X86_KMAP_TYPES_H */
diff --git a/arch/x86/include/asm/kmsan.h b/arch/x86/include/asm/kmsan.h
new file mode 100644
index 000000000000..d91b37f5b4bb
--- /dev/null
+++ b/arch/x86/include/asm/kmsan.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * x86 KMSAN support.
+ *
+ * Copyright (C) 2022, Google LLC
+ * Author: Alexander Potapenko <glider@google.com>
+ */
+
+#ifndef _ASM_X86_KMSAN_H
+#define _ASM_X86_KMSAN_H
+
+#ifndef MODULE
+
+#include <asm/cpu_entry_area.h>
+#include <asm/processor.h>
+#include <linux/mmzone.h>
+
+DECLARE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_shadow);
+DECLARE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_origin);
+
+/*
+ * Functions below are declared in the header to make sure they are inlined.
+ * They all are called from kmsan_get_metadata() for every memory access in
+ * the kernel, so speed is important here.
+ */
+
+/*
+ * Compute metadata addresses for the CPU entry area on x86.
+ */
+static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin)
+{
+	unsigned long addr64 = (unsigned long)addr;
+	char *metadata_array;
+	unsigned long off;
+	int cpu;
+
+	if ((addr64 < CPU_ENTRY_AREA_BASE) ||
+	    (addr64 >= (CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE)))
+		return NULL;
+	cpu = (addr64 - CPU_ENTRY_AREA_BASE) / CPU_ENTRY_AREA_SIZE;
+	off = addr64 - (unsigned long)get_cpu_entry_area(cpu);
+	if ((off < 0) || (off >= CPU_ENTRY_AREA_SIZE))
+		return NULL;
+	metadata_array = is_origin ? cpu_entry_area_origin :
+				     cpu_entry_area_shadow;
+	return &per_cpu(metadata_array[off], cpu);
+}
+
+/*
+ * Taken from arch/x86/mm/physaddr.h to avoid using an instrumented version.
+ */
+static inline bool kmsan_phys_addr_valid(unsigned long addr)
+{
+	if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
+		return !(addr >> boot_cpu_data.x86_phys_bits);
+	else
+		return true;
+}
+
+/*
+ * Taken from arch/x86/mm/physaddr.c to avoid using an instrumented version.
+ */
+static inline bool kmsan_virt_addr_valid(void *addr)
+{
+	unsigned long x = (unsigned long)addr;
+	unsigned long y = x - __START_KERNEL_map;
+	bool ret;
+
+	/* use the carry flag to determine if x was < __START_KERNEL_map */
+	if (unlikely(x > y)) {
+		x = y + phys_base;
+
+		if (y >= KERNEL_IMAGE_SIZE)
+			return false;
+	} else {
+		x = y + (__START_KERNEL_map - PAGE_OFFSET);
+
+		/* carry flag will be set if starting x was >= PAGE_OFFSET */
+		if ((x > y) || !kmsan_phys_addr_valid(x))
+			return false;
+	}
+
+	/*
+	 * pfn_valid() relies on RCU, and may call into the scheduler on exiting
+	 * the critical section. However, this would result in recursion with
+	 * KMSAN. Therefore, disable preemption here, and re-enable preemption
+	 * below while suppressing reschedules to avoid recursion.
+	 *
+	 * Note, this sacrifices occasionally breaking scheduling guarantees.
+	 * Although, a kernel compiled with KMSAN has already given up on any
+	 * performance guarantees due to being heavily instrumented.
+	 */
+	preempt_disable();
+	ret = pfn_valid(x >> PAGE_SHIFT);
+	preempt_enable_no_resched();
+
+	return ret;
+}
+
+#endif /* !MODULE */
+
+#endif /* _ASM_X86_KMSAN_H */
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index c8cec1b39b88..5939694dfb28 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -1,22 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 #ifndef _ASM_X86_KPROBES_H
 #define _ASM_X86_KPROBES_H
 /*
  *  Kernel Probes (KProbes)
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
  * Copyright (C) IBM Corporation, 2002, 2004
  *
  * See arch/x86/kernel/kprobes.c for x86 kprobes history.
@@ -24,12 +11,11 @@
 
 #include <asm-generic/kprobes.h>
 
-#define BREAKPOINT_INSTRUCTION	0xcc
-
 #ifdef CONFIG_KPROBES
 #include <linux/types.h>
 #include <linux/ptrace.h>
 #include <linux/percpu.h>
+#include <asm/text-patching.h>
 #include <asm/insn.h>
 
 #define  __ARCH_WANT_KPROBES_INSN_SLOT
@@ -38,10 +24,7 @@ struct pt_regs;
 struct kprobe;
 
 typedef u8 kprobe_opcode_t;
-#define RELATIVEJUMP_OPCODE 0xe9
-#define RELATIVEJUMP_SIZE 5
-#define RELATIVECALL_OPCODE 0xe8
-#define RELATIVE_ADDR_SIZE 4
+
 #define MAX_STACK_SIZE 64
 #define CUR_STACK_SIZE(ADDR) \
 	(current_top_of_stack() - (unsigned long)(ADDR))
@@ -53,40 +36,55 @@ typedef u8 kprobe_opcode_t;
 
 /* optinsn template addresses */
 extern __visible kprobe_opcode_t optprobe_template_entry[];
+extern __visible kprobe_opcode_t optprobe_template_clac[];
 extern __visible kprobe_opcode_t optprobe_template_val[];
 extern __visible kprobe_opcode_t optprobe_template_call[];
 extern __visible kprobe_opcode_t optprobe_template_end[];
-#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE)
+#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + DISP32_SIZE)
 #define MAX_OPTINSN_SIZE 				\
 	(((unsigned long)optprobe_template_end -	\
 	  (unsigned long)optprobe_template_entry) +	\
-	 MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE)
+	 MAX_OPTIMIZED_LENGTH + JMP32_INSN_SIZE)
 
 extern const int kretprobe_blacklist_size;
 
 void arch_remove_kprobe(struct kprobe *p);
-asmlinkage void kretprobe_trampoline(void);
-
-extern void arch_kprobe_override_function(struct pt_regs *regs);
 
 /* Architecture specific copy of original instruction*/
 struct arch_specific_insn {
 	/* copy of the original instruction */
 	kprobe_opcode_t *insn;
 	/*
-	 * boostable = false: This instruction type is not boostable.
-	 * boostable = true: This instruction has been boosted: we have
+	 * boostable = 0: This instruction type is not boostable.
+	 * boostable = 1: This instruction has been boosted: we have
 	 * added a relative jump after the instruction copy in insn,
 	 * so no single-step and fixup are needed (unless there's
 	 * a post_handler).
 	 */
-	bool boostable;
-	bool if_modifier;
+	unsigned boostable:1;
+	unsigned char size;	/* The size of insn */
+	union {
+		unsigned char opcode;
+		struct {
+			unsigned char type;
+		} jcc;
+		struct {
+			unsigned char type;
+			unsigned char asize;
+		} loop;
+		struct {
+			unsigned char reg;
+		} indirect;
+	};
+	s32 rel32;	/* relative offset must be s32, s16, or s8 */
+	void (*emulate_op)(struct kprobe *p, struct pt_regs *regs);
+	/* Number of bytes of text poked */
+	int tp_len;
 };
 
 struct arch_optimized_insn {
 	/* copy of the original instructions */
-	kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE];
+	kprobe_opcode_t copied_insn[DISP32_SIZE];
 	/* detour code buffer */
 	kprobe_opcode_t *insn;
 	/* the size of instructions copied to detour code buffer */
@@ -115,10 +113,11 @@ struct kprobe_ctlblk {
 };
 
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
-extern int kprobe_exceptions_notify(struct notifier_block *self,
-				    unsigned long val, void *data);
 extern int kprobe_int3_handler(struct pt_regs *regs);
-extern int kprobe_debug_handler(struct pt_regs *regs);
+
+#else
+
+static inline int kprobe_debug_handler(struct pt_regs *regs) { return 0; }
 
 #endif /* CONFIG_KPROBES */
 #endif /* _ASM_X86_KPROBES_H */
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
new file mode 100644
index 000000000000..de709fb5bd76
--- /dev/null
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(KVM_X86_OP) || !defined(KVM_X86_OP_OPTIONAL)
+BUILD_BUG_ON(1)
+#endif
+
+/*
+ * KVM_X86_OP() and KVM_X86_OP_OPTIONAL() are used to help generate
+ * both DECLARE/DEFINE_STATIC_CALL() invocations and
+ * "static_call_update()" calls.
+ *
+ * KVM_X86_OP_OPTIONAL() can be used for those functions that can have
+ * a NULL definition.  KVM_X86_OP_OPTIONAL_RET0() can be used likewise
+ * to make a definition optional, but in this case the default will
+ * be __static_call_return0.
+ */
+KVM_X86_OP(check_processor_compatibility)
+KVM_X86_OP(enable_virtualization_cpu)
+KVM_X86_OP(disable_virtualization_cpu)
+KVM_X86_OP(hardware_unsetup)
+KVM_X86_OP(has_emulated_msr)
+KVM_X86_OP(vcpu_after_set_cpuid)
+KVM_X86_OP(vm_init)
+KVM_X86_OP_OPTIONAL(vm_destroy)
+KVM_X86_OP_OPTIONAL(vm_pre_destroy)
+KVM_X86_OP_OPTIONAL_RET0(vcpu_precreate)
+KVM_X86_OP(vcpu_create)
+KVM_X86_OP(vcpu_free)
+KVM_X86_OP(vcpu_reset)
+KVM_X86_OP(prepare_switch_to_guest)
+KVM_X86_OP(vcpu_load)
+KVM_X86_OP(vcpu_put)
+KVM_X86_OP(update_exception_bitmap)
+KVM_X86_OP(get_msr)
+KVM_X86_OP(set_msr)
+KVM_X86_OP(get_segment_base)
+KVM_X86_OP(get_segment)
+KVM_X86_OP(get_cpl)
+KVM_X86_OP(get_cpl_no_cache)
+KVM_X86_OP(set_segment)
+KVM_X86_OP(get_cs_db_l_bits)
+KVM_X86_OP(is_valid_cr0)
+KVM_X86_OP(set_cr0)
+KVM_X86_OP_OPTIONAL(post_set_cr3)
+KVM_X86_OP(is_valid_cr4)
+KVM_X86_OP(set_cr4)
+KVM_X86_OP(set_efer)
+KVM_X86_OP(get_idt)
+KVM_X86_OP(set_idt)
+KVM_X86_OP(get_gdt)
+KVM_X86_OP(set_gdt)
+KVM_X86_OP(sync_dirty_debug_regs)
+KVM_X86_OP(set_dr7)
+KVM_X86_OP(cache_reg)
+KVM_X86_OP(get_rflags)
+KVM_X86_OP(set_rflags)
+KVM_X86_OP(get_if_flag)
+KVM_X86_OP(flush_tlb_all)
+KVM_X86_OP(flush_tlb_current)
+#if IS_ENABLED(CONFIG_HYPERV)
+KVM_X86_OP_OPTIONAL(flush_remote_tlbs)
+KVM_X86_OP_OPTIONAL(flush_remote_tlbs_range)
+#endif
+KVM_X86_OP(flush_tlb_gva)
+KVM_X86_OP(flush_tlb_guest)
+KVM_X86_OP(vcpu_pre_run)
+KVM_X86_OP(vcpu_run)
+KVM_X86_OP(handle_exit)
+KVM_X86_OP(skip_emulated_instruction)
+KVM_X86_OP_OPTIONAL(update_emulated_instruction)
+KVM_X86_OP(set_interrupt_shadow)
+KVM_X86_OP(get_interrupt_shadow)
+KVM_X86_OP(patch_hypercall)
+KVM_X86_OP(inject_irq)
+KVM_X86_OP(inject_nmi)
+KVM_X86_OP_OPTIONAL_RET0(is_vnmi_pending)
+KVM_X86_OP_OPTIONAL_RET0(set_vnmi_pending)
+KVM_X86_OP(inject_exception)
+KVM_X86_OP(cancel_injection)
+KVM_X86_OP(interrupt_allowed)
+KVM_X86_OP(nmi_allowed)
+KVM_X86_OP(get_nmi_mask)
+KVM_X86_OP(set_nmi_mask)
+KVM_X86_OP(enable_nmi_window)
+KVM_X86_OP(enable_irq_window)
+KVM_X86_OP_OPTIONAL(update_cr8_intercept)
+KVM_X86_OP(refresh_apicv_exec_ctrl)
+KVM_X86_OP_OPTIONAL(hwapic_isr_update)
+KVM_X86_OP_OPTIONAL(load_eoi_exitmap)
+KVM_X86_OP_OPTIONAL(set_virtual_apic_mode)
+KVM_X86_OP_OPTIONAL(set_apic_access_page_addr)
+KVM_X86_OP(deliver_interrupt)
+KVM_X86_OP_OPTIONAL(sync_pir_to_irr)
+KVM_X86_OP_OPTIONAL_RET0(set_tss_addr)
+KVM_X86_OP_OPTIONAL_RET0(set_identity_map_addr)
+KVM_X86_OP_OPTIONAL_RET0(get_mt_mask)
+KVM_X86_OP(load_mmu_pgd)
+KVM_X86_OP_OPTIONAL(link_external_spt)
+KVM_X86_OP_OPTIONAL(set_external_spte)
+KVM_X86_OP_OPTIONAL(free_external_spt)
+KVM_X86_OP_OPTIONAL(remove_external_spte)
+KVM_X86_OP(has_wbinvd_exit)
+KVM_X86_OP(get_l2_tsc_offset)
+KVM_X86_OP(get_l2_tsc_multiplier)
+KVM_X86_OP(write_tsc_offset)
+KVM_X86_OP(write_tsc_multiplier)
+KVM_X86_OP(get_exit_info)
+KVM_X86_OP(get_entry_info)
+KVM_X86_OP(check_intercept)
+KVM_X86_OP(handle_exit_irqoff)
+KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)
+KVM_X86_OP_OPTIONAL(vcpu_blocking)
+KVM_X86_OP_OPTIONAL(vcpu_unblocking)
+KVM_X86_OP_OPTIONAL(pi_update_irte)
+KVM_X86_OP_OPTIONAL(pi_start_bypass)
+KVM_X86_OP_OPTIONAL(apicv_pre_state_restore)
+KVM_X86_OP_OPTIONAL(apicv_post_state_restore)
+KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt)
+KVM_X86_OP_OPTIONAL(protected_apic_has_interrupt)
+KVM_X86_OP_OPTIONAL(set_hv_timer)
+KVM_X86_OP_OPTIONAL(cancel_hv_timer)
+KVM_X86_OP(setup_mce)
+#ifdef CONFIG_KVM_SMM
+KVM_X86_OP(smi_allowed)
+KVM_X86_OP(enter_smm)
+KVM_X86_OP(leave_smm)
+KVM_X86_OP(enable_smi_window)
+#endif
+KVM_X86_OP_OPTIONAL(dev_get_attr)
+KVM_X86_OP_OPTIONAL(mem_enc_ioctl)
+KVM_X86_OP_OPTIONAL(vcpu_mem_enc_ioctl)
+KVM_X86_OP_OPTIONAL(vcpu_mem_enc_unlocked_ioctl)
+KVM_X86_OP_OPTIONAL(mem_enc_register_region)
+KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
+KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from)
+KVM_X86_OP_OPTIONAL(vm_move_enc_context_from)
+KVM_X86_OP_OPTIONAL(guest_memory_reclaimed)
+KVM_X86_OP(get_feature_msr)
+KVM_X86_OP(check_emulate_instruction)
+KVM_X86_OP(apic_init_signal_blocked)
+KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush)
+KVM_X86_OP_OPTIONAL(migrate_timers)
+KVM_X86_OP(recalc_intercepts)
+KVM_X86_OP(complete_emulated_msr)
+KVM_X86_OP(vcpu_deliver_sipi_vector)
+KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
+KVM_X86_OP_OPTIONAL(get_untagged_addr)
+KVM_X86_OP_OPTIONAL(alloc_apic_backing_page)
+KVM_X86_OP_OPTIONAL_RET0(gmem_prepare)
+KVM_X86_OP_OPTIONAL_RET0(gmem_max_mapping_level)
+KVM_X86_OP_OPTIONAL(gmem_invalidate)
+
+#undef KVM_X86_OP
+#undef KVM_X86_OP_OPTIONAL
+#undef KVM_X86_OP_OPTIONAL_RET0
diff --git a/arch/x86/include/asm/kvm-x86-pmu-ops.h b/arch/x86/include/asm/kvm-x86-pmu-ops.h
new file mode 100644
index 000000000000..9159bf1a4730
--- /dev/null
+++ b/arch/x86/include/asm/kvm-x86-pmu-ops.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(KVM_X86_PMU_OP) || !defined(KVM_X86_PMU_OP_OPTIONAL)
+BUILD_BUG_ON(1)
+#endif
+
+/*
+ * KVM_X86_PMU_OP() and KVM_X86_PMU_OP_OPTIONAL() are used to help generate
+ * both DECLARE/DEFINE_STATIC_CALL() invocations and
+ * "static_call_update()" calls.
+ *
+ * KVM_X86_PMU_OP_OPTIONAL() can be used for those functions that can have
+ * a NULL definition.
+ */
+KVM_X86_PMU_OP(rdpmc_ecx_to_pmc)
+KVM_X86_PMU_OP(msr_idx_to_pmc)
+KVM_X86_PMU_OP_OPTIONAL(check_rdpmc_early)
+KVM_X86_PMU_OP(is_valid_msr)
+KVM_X86_PMU_OP(get_msr)
+KVM_X86_PMU_OP(set_msr)
+KVM_X86_PMU_OP(refresh)
+KVM_X86_PMU_OP(init)
+KVM_X86_PMU_OP_OPTIONAL(reset)
+KVM_X86_PMU_OP_OPTIONAL(deliver_pmi)
+KVM_X86_PMU_OP_OPTIONAL(cleanup)
+
+#undef KVM_X86_PMU_OP
+#undef KVM_X86_PMU_OP_OPTIONAL
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
deleted file mode 100644
index 93c4bf598fb0..000000000000
--- a/arch/x86/include/asm/kvm_emulate.h
+++ /dev/null
@@ -1,457 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/******************************************************************************
- * x86_emulate.h
- *
- * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
- *
- * Copyright (c) 2005 Keir Fraser
- *
- * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
- */
-
-#ifndef _ASM_X86_KVM_X86_EMULATE_H
-#define _ASM_X86_KVM_X86_EMULATE_H
-
-#include <asm/desc_defs.h>
-
-struct x86_emulate_ctxt;
-enum x86_intercept;
-enum x86_intercept_stage;
-
-struct x86_exception {
-	u8 vector;
-	bool error_code_valid;
-	u16 error_code;
-	bool nested_page_fault;
-	u64 address; /* cr2 or nested page fault gpa */
-	u8 async_page_fault;
-};
-
-/*
- * This struct is used to carry enough information from the instruction
- * decoder to main KVM so that a decision can be made whether the
- * instruction needs to be intercepted or not.
- */
-struct x86_instruction_info {
-	u8  intercept;          /* which intercept                      */
-	u8  rep_prefix;         /* rep prefix?                          */
-	u8  modrm_mod;		/* mod part of modrm			*/
-	u8  modrm_reg;          /* index of register used               */
-	u8  modrm_rm;		/* rm part of modrm			*/
-	u64 src_val;            /* value of source operand              */
-	u64 dst_val;            /* value of destination operand         */
-	u8  src_bytes;          /* size of source operand               */
-	u8  dst_bytes;          /* size of destination operand          */
-	u8  ad_bytes;           /* size of src/dst address              */
-	u64 next_rip;           /* rip following the instruction        */
-};
-
-/*
- * x86_emulate_ops:
- *
- * These operations represent the instruction emulator's interface to memory.
- * There are two categories of operation: those that act on ordinary memory
- * regions (*_std), and those that act on memory regions known to require
- * special treatment or emulation (*_emulated).
- *
- * The emulator assumes that an instruction accesses only one 'emulated memory'
- * location, that this location is the given linear faulting address (cr2), and
- * that this is one of the instruction's data operands. Instruction fetches and
- * stack operations are assumed never to access emulated memory. The emulator
- * automatically deduces which operand of a string-move operation is accessing
- * emulated memory, and assumes that the other operand accesses normal memory.
- *
- * NOTES:
- *  1. The emulator isn't very smart about emulated vs. standard memory.
- *     'Emulated memory' access addresses should be checked for sanity.
- *     'Normal memory' accesses may fault, and the caller must arrange to
- *     detect and handle reentrancy into the emulator via recursive faults.
- *     Accesses may be unaligned and may cross page boundaries.
- *  2. If the access fails (cannot emulate, or a standard access faults) then
- *     it is up to the memop to propagate the fault to the guest VM via
- *     some out-of-band mechanism, unknown to the emulator. The memop signals
- *     failure by returning X86EMUL_PROPAGATE_FAULT to the emulator, which will
- *     then immediately bail.
- *  3. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only
- *     cmpxchg8b_emulated need support 8-byte accesses.
- *  4. The emulator cannot handle 64-bit mode emulation on an x86/32 system.
- */
-/* Access completed successfully: continue emulation as normal. */
-#define X86EMUL_CONTINUE        0
-/* Access is unhandleable: bail from emulation and return error to caller. */
-#define X86EMUL_UNHANDLEABLE    1
-/* Terminate emulation but return success to the caller. */
-#define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */
-#define X86EMUL_RETRY_INSTR     3 /* retry the instruction for some reason */
-#define X86EMUL_CMPXCHG_FAILED  4 /* cmpxchg did not see expected value */
-#define X86EMUL_IO_NEEDED       5 /* IO is needed to complete emulation */
-#define X86EMUL_INTERCEPTED     6 /* Intercepted by nested VMCB/VMCS */
-
-struct x86_emulate_ops {
-	/*
-	 * read_gpr: read a general purpose register (rax - r15)
-	 *
-	 * @reg: gpr number.
-	 */
-	ulong (*read_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg);
-	/*
-	 * write_gpr: write a general purpose register (rax - r15)
-	 *
-	 * @reg: gpr number.
-	 * @val: value to write.
-	 */
-	void (*write_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val);
-	/*
-	 * read_std: Read bytes of standard (non-emulated/special) memory.
-	 *           Used for descriptor reading.
-	 *  @addr:  [IN ] Linear address from which to read.
-	 *  @val:   [OUT] Value read from memory, zero-extended to 'u_long'.
-	 *  @bytes: [IN ] Number of bytes to read from memory.
-	 *  @system:[IN ] Whether the access is forced to be at CPL0.
-	 */
-	int (*read_std)(struct x86_emulate_ctxt *ctxt,
-			unsigned long addr, void *val,
-			unsigned int bytes,
-			struct x86_exception *fault, bool system);
-
-	/*
-	 * read_phys: Read bytes of standard (non-emulated/special) memory.
-	 *            Used for descriptor reading.
-	 *  @addr:  [IN ] Physical address from which to read.
-	 *  @val:   [OUT] Value read from memory.
-	 *  @bytes: [IN ] Number of bytes to read from memory.
-	 */
-	int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr,
-			void *val, unsigned int bytes);
-
-	/*
-	 * write_std: Write bytes of standard (non-emulated/special) memory.
-	 *            Used for descriptor writing.
-	 *  @addr:  [IN ] Linear address to which to write.
-	 *  @val:   [OUT] Value write to memory, zero-extended to 'u_long'.
-	 *  @bytes: [IN ] Number of bytes to write to memory.
-	 *  @system:[IN ] Whether the access is forced to be at CPL0.
-	 */
-	int (*write_std)(struct x86_emulate_ctxt *ctxt,
-			 unsigned long addr, void *val, unsigned int bytes,
-			 struct x86_exception *fault, bool system);
-	/*
-	 * fetch: Read bytes of standard (non-emulated/special) memory.
-	 *        Used for instruction fetch.
-	 *  @addr:  [IN ] Linear address from which to read.
-	 *  @val:   [OUT] Value read from memory, zero-extended to 'u_long'.
-	 *  @bytes: [IN ] Number of bytes to read from memory.
-	 */
-	int (*fetch)(struct x86_emulate_ctxt *ctxt,
-		     unsigned long addr, void *val, unsigned int bytes,
-		     struct x86_exception *fault);
-
-	/*
-	 * read_emulated: Read bytes from emulated/special memory area.
-	 *  @addr:  [IN ] Linear address from which to read.
-	 *  @val:   [OUT] Value read from memory, zero-extended to 'u_long'.
-	 *  @bytes: [IN ] Number of bytes to read from memory.
-	 */
-	int (*read_emulated)(struct x86_emulate_ctxt *ctxt,
-			     unsigned long addr, void *val, unsigned int bytes,
-			     struct x86_exception *fault);
-
-	/*
-	 * write_emulated: Write bytes to emulated/special memory area.
-	 *  @addr:  [IN ] Linear address to which to write.
-	 *  @val:   [IN ] Value to write to memory (low-order bytes used as
-	 *                required).
-	 *  @bytes: [IN ] Number of bytes to write to memory.
-	 */
-	int (*write_emulated)(struct x86_emulate_ctxt *ctxt,
-			      unsigned long addr, const void *val,
-			      unsigned int bytes,
-			      struct x86_exception *fault);
-
-	/*
-	 * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
-	 *                   emulated/special memory area.
-	 *  @addr:  [IN ] Linear address to access.
-	 *  @old:   [IN ] Value expected to be current at @addr.
-	 *  @new:   [IN ] Value to write to @addr.
-	 *  @bytes: [IN ] Number of bytes to access using CMPXCHG.
-	 */
-	int (*cmpxchg_emulated)(struct x86_emulate_ctxt *ctxt,
-				unsigned long addr,
-				const void *old,
-				const void *new,
-				unsigned int bytes,
-				struct x86_exception *fault);
-	void (*invlpg)(struct x86_emulate_ctxt *ctxt, ulong addr);
-
-	int (*pio_in_emulated)(struct x86_emulate_ctxt *ctxt,
-			       int size, unsigned short port, void *val,
-			       unsigned int count);
-
-	int (*pio_out_emulated)(struct x86_emulate_ctxt *ctxt,
-				int size, unsigned short port, const void *val,
-				unsigned int count);
-
-	bool (*get_segment)(struct x86_emulate_ctxt *ctxt, u16 *selector,
-			    struct desc_struct *desc, u32 *base3, int seg);
-	void (*set_segment)(struct x86_emulate_ctxt *ctxt, u16 selector,
-			    struct desc_struct *desc, u32 base3, int seg);
-	unsigned long (*get_cached_segment_base)(struct x86_emulate_ctxt *ctxt,
-						 int seg);
-	void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
-	void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
-	void (*set_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
-	void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
-	ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr);
-	int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val);
-	int (*cpl)(struct x86_emulate_ctxt *ctxt);
-	int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
-	int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
-	u64 (*get_smbase)(struct x86_emulate_ctxt *ctxt);
-	void (*set_smbase)(struct x86_emulate_ctxt *ctxt, u64 smbase);
-	int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
-	int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
-	int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc);
-	int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata);
-	void (*halt)(struct x86_emulate_ctxt *ctxt);
-	void (*wbinvd)(struct x86_emulate_ctxt *ctxt);
-	int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt);
-	int (*intercept)(struct x86_emulate_ctxt *ctxt,
-			 struct x86_instruction_info *info,
-			 enum x86_intercept_stage stage);
-
-	bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx,
-			  u32 *ecx, u32 *edx, bool check_limit);
-	void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
-
-	unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
-	void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags);
-	int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt, u64 smbase);
-
-};
-
-typedef u32 __attribute__((vector_size(16))) sse128_t;
-
-/* Type, address-of, and value of an instruction's operand. */
-struct operand {
-	enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_MM, OP_NONE } type;
-	unsigned int bytes;
-	unsigned int count;
-	union {
-		unsigned long orig_val;
-		u64 orig_val64;
-	};
-	union {
-		unsigned long *reg;
-		struct segmented_address {
-			ulong ea;
-			unsigned seg;
-		} mem;
-		unsigned xmm;
-		unsigned mm;
-	} addr;
-	union {
-		unsigned long val;
-		u64 val64;
-		char valptr[sizeof(sse128_t)];
-		sse128_t vec_val;
-		u64 mm_val;
-		void *data;
-	};
-};
-
-struct fetch_cache {
-	u8 data[15];
-	u8 *ptr;
-	u8 *end;
-};
-
-struct read_cache {
-	u8 data[1024];
-	unsigned long pos;
-	unsigned long end;
-};
-
-/* Execution mode, passed to the emulator. */
-enum x86emul_mode {
-	X86EMUL_MODE_REAL,	/* Real mode.             */
-	X86EMUL_MODE_VM86,	/* Virtual 8086 mode.     */
-	X86EMUL_MODE_PROT16,	/* 16-bit protected mode. */
-	X86EMUL_MODE_PROT32,	/* 32-bit protected mode. */
-	X86EMUL_MODE_PROT64,	/* 64-bit (long) mode.    */
-};
-
-/* These match some of the HF_* flags defined in kvm_host.h  */
-#define X86EMUL_GUEST_MASK           (1 << 5) /* VCPU is in guest-mode */
-#define X86EMUL_SMM_MASK             (1 << 6)
-#define X86EMUL_SMM_INSIDE_NMI_MASK  (1 << 7)
-
-struct x86_emulate_ctxt {
-	const struct x86_emulate_ops *ops;
-
-	/* Register state before/after emulation. */
-	unsigned long eflags;
-	unsigned long eip; /* eip before instruction emulation */
-	/* Emulated execution mode, represented by an X86EMUL_MODE value. */
-	enum x86emul_mode mode;
-
-	/* interruptibility state, as a result of execution of STI or MOV SS */
-	int interruptibility;
-
-	bool perm_ok; /* do not check permissions if true */
-	bool ud;	/* inject an #UD if host doesn't support insn */
-	bool tf;	/* TF value before instruction (after for syscall/sysret) */
-
-	bool have_exception;
-	struct x86_exception exception;
-
-	/*
-	 * decode cache
-	 */
-
-	/* current opcode length in bytes */
-	u8 opcode_len;
-	u8 b;
-	u8 intercept;
-	u8 op_bytes;
-	u8 ad_bytes;
-	struct operand src;
-	struct operand src2;
-	struct operand dst;
-	int (*execute)(struct x86_emulate_ctxt *ctxt);
-	int (*check_perm)(struct x86_emulate_ctxt *ctxt);
-	/*
-	 * The following six fields are cleared together,
-	 * the rest are initialized unconditionally in x86_decode_insn
-	 * or elsewhere
-	 */
-	bool rip_relative;
-	u8 rex_prefix;
-	u8 lock_prefix;
-	u8 rep_prefix;
-	/* bitmaps of registers in _regs[] that can be read */
-	u32 regs_valid;
-	/* bitmaps of registers in _regs[] that have been written */
-	u32 regs_dirty;
-	/* modrm */
-	u8 modrm;
-	u8 modrm_mod;
-	u8 modrm_reg;
-	u8 modrm_rm;
-	u8 modrm_seg;
-	u8 seg_override;
-	u64 d;
-	unsigned long _eip;
-	struct operand memop;
-	/* Fields above regs are cleared together. */
-	unsigned long _regs[NR_VCPU_REGS];
-	struct operand *memopp;
-	struct fetch_cache fetch;
-	struct read_cache io_read;
-	struct read_cache mem_read;
-};
-
-/* Repeat String Operation Prefix */
-#define REPE_PREFIX	0xf3
-#define REPNE_PREFIX	0xf2
-
-/* CPUID vendors */
-#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
-#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
-#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65
-
-#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx 0x69444d41
-#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx 0x21726574
-#define X86EMUL_CPUID_VENDOR_AMDisbetterI_edx 0x74656273
-
-#define X86EMUL_CPUID_VENDOR_HygonGenuine_ebx 0x6f677948
-#define X86EMUL_CPUID_VENDOR_HygonGenuine_ecx 0x656e6975
-#define X86EMUL_CPUID_VENDOR_HygonGenuine_edx 0x6e65476e
-
-#define X86EMUL_CPUID_VENDOR_GenuineIntel_ebx 0x756e6547
-#define X86EMUL_CPUID_VENDOR_GenuineIntel_ecx 0x6c65746e
-#define X86EMUL_CPUID_VENDOR_GenuineIntel_edx 0x49656e69
-
-enum x86_intercept_stage {
-	X86_ICTP_NONE = 0,   /* Allow zero-init to not match anything */
-	X86_ICPT_PRE_EXCEPT,
-	X86_ICPT_POST_EXCEPT,
-	X86_ICPT_POST_MEMACCESS,
-};
-
-enum x86_intercept {
-	x86_intercept_none,
-	x86_intercept_cr_read,
-	x86_intercept_cr_write,
-	x86_intercept_clts,
-	x86_intercept_lmsw,
-	x86_intercept_smsw,
-	x86_intercept_dr_read,
-	x86_intercept_dr_write,
-	x86_intercept_lidt,
-	x86_intercept_sidt,
-	x86_intercept_lgdt,
-	x86_intercept_sgdt,
-	x86_intercept_lldt,
-	x86_intercept_sldt,
-	x86_intercept_ltr,
-	x86_intercept_str,
-	x86_intercept_rdtsc,
-	x86_intercept_rdpmc,
-	x86_intercept_pushf,
-	x86_intercept_popf,
-	x86_intercept_cpuid,
-	x86_intercept_rsm,
-	x86_intercept_iret,
-	x86_intercept_intn,
-	x86_intercept_invd,
-	x86_intercept_pause,
-	x86_intercept_hlt,
-	x86_intercept_invlpg,
-	x86_intercept_invlpga,
-	x86_intercept_vmrun,
-	x86_intercept_vmload,
-	x86_intercept_vmsave,
-	x86_intercept_vmmcall,
-	x86_intercept_stgi,
-	x86_intercept_clgi,
-	x86_intercept_skinit,
-	x86_intercept_rdtscp,
-	x86_intercept_icebp,
-	x86_intercept_wbinvd,
-	x86_intercept_monitor,
-	x86_intercept_mwait,
-	x86_intercept_rdmsr,
-	x86_intercept_wrmsr,
-	x86_intercept_in,
-	x86_intercept_ins,
-	x86_intercept_out,
-	x86_intercept_outs,
-
-	nr_x86_intercepts
-};
-
-/* Host execution mode. */
-#if defined(CONFIG_X86_32)
-#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
-#elif defined(CONFIG_X86_64)
-#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
-#endif
-
-int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len);
-bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
-#define EMULATION_FAILED -1
-#define EMULATION_OK 0
-#define EMULATION_RESTART 1
-#define EMULATION_INTERCEPTED 2
-void init_decode_cache(struct x86_emulate_ctxt *ctxt);
-int x86_emulate_insn(struct x86_emulate_ctxt *ctxt);
-int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
-			 u16 tss_selector, int idt_index, int reason,
-			 bool has_error_code, u32 error_code);
-int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq);
-void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt);
-void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt);
-bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt);
-
-#endif /* _ASM_X86_KVM_X86_EMULATE_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4660ce90de7f..5a3bfa293e8b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Kernel-based Virtual Machine driver for Linux
  *
  * This header defines architecture specific interfaces, x86 version
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- *
  */
 
 #ifndef _ASM_X86_KVM_HOST_H
@@ -18,6 +15,7 @@
 #include <linux/cpumask.h>
 #include <linux/irq_work.h>
 #include <linux/irq.h>
+#include <linux/workqueue.h>
 
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
@@ -26,43 +24,81 @@
 #include <linux/pvclock_gtod.h>
 #include <linux/clocksource.h>
 #include <linux/irqbypass.h>
-#include <linux/hyperv.h>
+#include <linux/kfifo.h>
+#include <linux/sched/vhost_task.h>
+#include <linux/call_once.h>
+#include <linux/atomic.h>
 
 #include <asm/apic.h>
 #include <asm/pvclock-abi.h>
+#include <asm/debugreg.h>
 #include <asm/desc.h>
 #include <asm/mtrr.h>
 #include <asm/msr-index.h>
+#include <asm/msr.h>
 #include <asm/asm.h>
+#include <asm/irq_remapping.h>
 #include <asm/kvm_page_track.h>
-#include <asm/hyperv-tlfs.h>
+#include <asm/kvm_vcpu_regs.h>
+#include <asm/reboot.h>
+#include <hyperv/hvhdk.h>
+
+#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
+
+/*
+ * CONFIG_KVM_MAX_NR_VCPUS is defined iff CONFIG_KVM!=n, provide a dummy max if
+ * KVM is disabled (arbitrarily use the default from CONFIG_KVM_MAX_NR_VCPUS).
+ */
+#ifdef CONFIG_KVM_MAX_NR_VCPUS
+#define KVM_MAX_VCPUS CONFIG_KVM_MAX_NR_VCPUS
+#else
+#define KVM_MAX_VCPUS 1024
+#endif
+
+/*
+ * In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs
+ * might be larger than the actual number of VCPUs because the
+ * APIC ID encodes CPU topology information.
+ *
+ * In the worst case, we'll need less than one extra bit for the
+ * Core ID, and less than one extra bit for the Package (Die) ID,
+ * so ratio of 4 should be enough.
+ */
+#define KVM_VCPU_ID_RATIO 4
+#define KVM_MAX_VCPU_IDS (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO)
 
-#define KVM_MAX_VCPUS 288
-#define KVM_SOFT_MAX_VCPUS 240
-#define KVM_MAX_VCPU_ID 1023
-#define KVM_USER_MEM_SLOTS 509
 /* memory slots that are not exposed to userspace */
-#define KVM_PRIVATE_MEM_SLOTS 3
-#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
+#define KVM_INTERNAL_MEM_SLOTS 3
 
 #define KVM_HALT_POLL_NS_DEFAULT 200000
 
 #define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
 
+#define KVM_DIRTY_LOG_MANUAL_CAPS   (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
+					KVM_DIRTY_LOG_INITIALLY_SET)
+
+#define KVM_BUS_LOCK_DETECTION_VALID_MODE	(KVM_BUS_LOCK_DETECTION_OFF | \
+						 KVM_BUS_LOCK_DETECTION_EXIT)
+
+#define KVM_X86_NOTIFY_VMEXIT_VALID_BITS	(KVM_X86_NOTIFY_VMEXIT_ENABLED | \
+						 KVM_X86_NOTIFY_VMEXIT_USER)
+
 /* x86-specific vcpu->requests bit members */
 #define KVM_REQ_MIGRATE_TIMER		KVM_ARCH_REQ(0)
 #define KVM_REQ_REPORT_TPR_ACCESS	KVM_ARCH_REQ(1)
 #define KVM_REQ_TRIPLE_FAULT		KVM_ARCH_REQ(2)
 #define KVM_REQ_MMU_SYNC		KVM_ARCH_REQ(3)
 #define KVM_REQ_CLOCK_UPDATE		KVM_ARCH_REQ(4)
-#define KVM_REQ_LOAD_CR3		KVM_ARCH_REQ(5)
+#define KVM_REQ_LOAD_MMU_PGD		KVM_ARCH_REQ(5)
 #define KVM_REQ_EVENT			KVM_ARCH_REQ(6)
 #define KVM_REQ_APF_HALT		KVM_ARCH_REQ(7)
 #define KVM_REQ_STEAL_UPDATE		KVM_ARCH_REQ(8)
 #define KVM_REQ_NMI			KVM_ARCH_REQ(9)
 #define KVM_REQ_PMU			KVM_ARCH_REQ(10)
 #define KVM_REQ_PMI			KVM_ARCH_REQ(11)
+#ifdef CONFIG_KVM_SMM
 #define KVM_REQ_SMI			KVM_ARCH_REQ(12)
+#endif
 #define KVM_REQ_MASTERCLOCK_UPDATE	KVM_ARCH_REQ(13)
 #define KVM_REQ_MCLOCK_INPROGRESS \
 	KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
@@ -77,7 +113,22 @@
 #define KVM_REQ_HV_EXIT			KVM_ARCH_REQ(21)
 #define KVM_REQ_HV_STIMER		KVM_ARCH_REQ(22)
 #define KVM_REQ_LOAD_EOI_EXITMAP	KVM_ARCH_REQ(23)
-#define KVM_REQ_GET_VMCS12_PAGES	KVM_ARCH_REQ(24)
+#define KVM_REQ_GET_NESTED_STATE_PAGES	KVM_ARCH_REQ(24)
+#define KVM_REQ_APICV_UPDATE \
+	KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_TLB_FLUSH_CURRENT	KVM_ARCH_REQ(26)
+#define KVM_REQ_TLB_FLUSH_GUEST \
+	KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_APF_READY		KVM_ARCH_REQ(28)
+#define KVM_REQ_RECALC_INTERCEPTS	KVM_ARCH_REQ(29)
+#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
+	KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \
+	KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_HV_TLB_FLUSH \
+	KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_UPDATE_PROTECTED_GUEST_STATE \
+	KVM_ARCH_REQ_FLAGS(34, KVM_REQUEST_WAIT)
 
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -90,7 +141,8 @@
 			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
 			  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
 			  | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
-			  | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
+			  | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
+			  | X86_CR4_LAM_SUP | X86_CR4_CET))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
 
@@ -99,71 +151,56 @@
 #define INVALID_PAGE (~(hpa_t)0)
 #define VALID_PAGE(x) ((x) != INVALID_PAGE)
 
-#define UNMAPPED_GVA (~(gpa_t)0)
-
 /* KVM Hugepage definitions for x86 */
-enum {
-	PT_PAGE_TABLE_LEVEL   = 1,
-	PT_DIRECTORY_LEVEL    = 2,
-	PT_PDPE_LEVEL         = 3,
-	/* set max level to the biggest one */
-	PT_MAX_HUGEPAGE_LEVEL = PT_PDPE_LEVEL,
-};
-#define KVM_NR_PAGE_SIZES	(PT_MAX_HUGEPAGE_LEVEL - \
-				 PT_PAGE_TABLE_LEVEL + 1)
+#define KVM_MAX_HUGEPAGE_LEVEL	PG_LEVEL_1G
+#define KVM_NR_PAGE_SIZES	(KVM_MAX_HUGEPAGE_LEVEL - PG_LEVEL_4K + 1)
 #define KVM_HPAGE_GFN_SHIFT(x)	(((x) - 1) * 9)
 #define KVM_HPAGE_SHIFT(x)	(PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x))
 #define KVM_HPAGE_SIZE(x)	(1UL << KVM_HPAGE_SHIFT(x))
 #define KVM_HPAGE_MASK(x)	(~(KVM_HPAGE_SIZE(x) - 1))
 #define KVM_PAGES_PER_HPAGE(x)	(KVM_HPAGE_SIZE(x) / PAGE_SIZE)
 
-static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
-{
-	/* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
-	return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
-		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
-}
-
-#define KVM_PERMILLE_MMU_PAGES 20
-#define KVM_MIN_ALLOC_MMU_PAGES 64
+#define KVM_MEMSLOT_PAGES_TO_MMU_PAGES_RATIO 50
+#define KVM_MIN_ALLOC_MMU_PAGES 64UL
 #define KVM_MMU_HASH_SHIFT 12
 #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
 #define KVM_MIN_FREE_MMU_PAGES 5
 #define KVM_REFILL_PAGES 25
-#define KVM_MAX_CPUID_ENTRIES 80
-#define KVM_NR_FIXED_MTRR_REGION 88
+#define KVM_MAX_CPUID_ENTRIES 256
 #define KVM_NR_VAR_MTRR 8
 
 #define ASYNC_PF_PER_VCPU 64
 
 enum kvm_reg {
-	VCPU_REGS_RAX = 0,
-	VCPU_REGS_RCX = 1,
-	VCPU_REGS_RDX = 2,
-	VCPU_REGS_RBX = 3,
-	VCPU_REGS_RSP = 4,
-	VCPU_REGS_RBP = 5,
-	VCPU_REGS_RSI = 6,
-	VCPU_REGS_RDI = 7,
+	VCPU_REGS_RAX = __VCPU_REGS_RAX,
+	VCPU_REGS_RCX = __VCPU_REGS_RCX,
+	VCPU_REGS_RDX = __VCPU_REGS_RDX,
+	VCPU_REGS_RBX = __VCPU_REGS_RBX,
+	VCPU_REGS_RSP = __VCPU_REGS_RSP,
+	VCPU_REGS_RBP = __VCPU_REGS_RBP,
+	VCPU_REGS_RSI = __VCPU_REGS_RSI,
+	VCPU_REGS_RDI = __VCPU_REGS_RDI,
 #ifdef CONFIG_X86_64
-	VCPU_REGS_R8 = 8,
-	VCPU_REGS_R9 = 9,
-	VCPU_REGS_R10 = 10,
-	VCPU_REGS_R11 = 11,
-	VCPU_REGS_R12 = 12,
-	VCPU_REGS_R13 = 13,
-	VCPU_REGS_R14 = 14,
-	VCPU_REGS_R15 = 15,
+	VCPU_REGS_R8  = __VCPU_REGS_R8,
+	VCPU_REGS_R9  = __VCPU_REGS_R9,
+	VCPU_REGS_R10 = __VCPU_REGS_R10,
+	VCPU_REGS_R11 = __VCPU_REGS_R11,
+	VCPU_REGS_R12 = __VCPU_REGS_R12,
+	VCPU_REGS_R13 = __VCPU_REGS_R13,
+	VCPU_REGS_R14 = __VCPU_REGS_R14,
+	VCPU_REGS_R15 = __VCPU_REGS_R15,
 #endif
 	VCPU_REGS_RIP,
-	NR_VCPU_REGS
-};
+	NR_VCPU_REGS,
 
-enum kvm_reg_ex {
 	VCPU_EXREG_PDPTR = NR_VCPU_REGS,
+	VCPU_EXREG_CR0,
 	VCPU_EXREG_CR3,
+	VCPU_EXREG_CR4,
 	VCPU_EXREG_RFLAGS,
 	VCPU_EXREG_SEGMENTS,
+	VCPU_EXREG_EXIT_INFO_1,
+	VCPU_EXREG_EXIT_INFO_2,
 };
 
 enum {
@@ -177,54 +214,79 @@ enum {
 	VCPU_SREG_LDTR,
 };
 
-#include <asm/kvm_emulate.h>
+enum exit_fastpath_completion {
+	EXIT_FASTPATH_NONE,
+	EXIT_FASTPATH_REENTER_GUEST,
+	EXIT_FASTPATH_EXIT_HANDLED,
+	EXIT_FASTPATH_EXIT_USERSPACE,
+};
+typedef enum exit_fastpath_completion fastpath_t;
 
-#define KVM_NR_MEM_OBJS 40
+struct x86_emulate_ctxt;
+struct x86_exception;
+union kvm_smram;
+enum x86_intercept;
+enum x86_intercept_stage;
 
 #define KVM_NR_DB_REGS	4
 
+#define DR6_BUS_LOCK   (1 << 11)
 #define DR6_BD		(1 << 13)
 #define DR6_BS		(1 << 14)
 #define DR6_BT		(1 << 15)
 #define DR6_RTM		(1 << 16)
-#define DR6_FIXED_1	0xfffe0ff0
-#define DR6_INIT	0xffff0ff0
-#define DR6_VOLATILE	0x0001e00f
+/*
+ * DR6_ACTIVE_LOW combines fixed-1 and active-low bits.
+ * We can regard all the bits in DR6_FIXED_1 as active_low bits;
+ * they will never be 0 for now, but when they are defined
+ * in the future it will require no code change.
+ *
+ * DR6_ACTIVE_LOW is also used as the init/reset value for DR6.
+ */
+#define DR6_ACTIVE_LOW	0xffff0ff0
+#define DR6_VOLATILE	0x0001e80f
+#define DR6_FIXED_1	(DR6_ACTIVE_LOW & ~DR6_VOLATILE)
 
 #define DR7_BP_EN_MASK	0x000000ff
 #define DR7_GE		(1 << 9)
 #define DR7_GD		(1 << 13)
-#define DR7_FIXED_1	0x00000400
 #define DR7_VOLATILE	0xffff2bff
 
-#define PFERR_PRESENT_BIT 0
-#define PFERR_WRITE_BIT 1
-#define PFERR_USER_BIT 2
-#define PFERR_RSVD_BIT 3
-#define PFERR_FETCH_BIT 4
-#define PFERR_PK_BIT 5
-#define PFERR_GUEST_FINAL_BIT 32
-#define PFERR_GUEST_PAGE_BIT 33
-
-#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
-#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
-#define PFERR_USER_MASK (1U << PFERR_USER_BIT)
-#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
-#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
-#define PFERR_PK_MASK (1U << PFERR_PK_BIT)
-#define PFERR_GUEST_FINAL_MASK (1ULL << PFERR_GUEST_FINAL_BIT)
-#define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT)
-
-#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK |	\
-				 PFERR_WRITE_MASK |		\
-				 PFERR_PRESENT_MASK)
+#define KVM_GUESTDBG_VALID_MASK \
+	(KVM_GUESTDBG_ENABLE | \
+	KVM_GUESTDBG_SINGLESTEP | \
+	KVM_GUESTDBG_USE_HW_BP | \
+	KVM_GUESTDBG_USE_SW_BP | \
+	KVM_GUESTDBG_INJECT_BP | \
+	KVM_GUESTDBG_INJECT_DB | \
+	KVM_GUESTDBG_BLOCKIRQ)
+
+#define PFERR_PRESENT_MASK	BIT(0)
+#define PFERR_WRITE_MASK	BIT(1)
+#define PFERR_USER_MASK		BIT(2)
+#define PFERR_RSVD_MASK		BIT(3)
+#define PFERR_FETCH_MASK	BIT(4)
+#define PFERR_PK_MASK		BIT(5)
+#define PFERR_SS_MASK		BIT(6)
+#define PFERR_SGX_MASK		BIT(15)
+#define PFERR_GUEST_RMP_MASK	BIT_ULL(31)
+#define PFERR_GUEST_FINAL_MASK	BIT_ULL(32)
+#define PFERR_GUEST_PAGE_MASK	BIT_ULL(33)
+#define PFERR_GUEST_ENC_MASK	BIT_ULL(34)
+#define PFERR_GUEST_SIZEM_MASK	BIT_ULL(35)
+#define PFERR_GUEST_VMPL_MASK	BIT_ULL(36)
 
 /*
- * The mask used to denote special SPTEs, which can be either MMIO SPTEs or
- * Access Tracking SPTEs. We use bit 62 instead of bit 63 to avoid conflicting
- * with the SVE bit in EPT PTEs.
+ * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP checks
+ * when emulating instructions that triggers implicit access.
  */
-#define SPTE_SPECIAL_MASK (1ULL << 62)
+#define PFERR_IMPLICIT_ACCESS	BIT_ULL(48)
+/*
+ * PRIVATE_ACCESS is a KVM-defined flag us to indicate that a fault occurred
+ * when the guest was accessing private memory.
+ */
+#define PFERR_PRIVATE_ACCESS   BIT_ULL(49)
+#define PFERR_SYNTHETIC_MASK   (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS)
 
 /* apic attention bits */
 #define KVM_APIC_CHECK_VAPIC	0
@@ -236,41 +298,66 @@ enum {
  */
 #define KVM_APIC_PV_EOI_PENDING	1
 
+struct kvm_kernel_irqfd;
 struct kvm_kernel_irq_routing_entry;
 
 /*
- * We don't want allocation failures within the mmu code, so we preallocate
- * enough memory for a single page fault in a cache.
- */
-struct kvm_mmu_memory_cache {
-	int nobjs;
-	void *objects[KVM_NR_MEM_OBJS];
-};
-
-/*
- * the pages used as guest page table on soft mmu are tracked by
- * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used
- * by indirect shadow page can not be more than 15 bits.
+ * kvm_mmu_page_role tracks the properties of a shadow page (where shadow page
+ * also includes TDP pages) to determine whether or not a page can be used in
+ * the given MMU context.  This is a subset of the overall kvm_cpu_role to
+ * minimize the size of kvm_memory_slot.arch.gfn_write_track, i.e. allows
+ * allocating 2 bytes per gfn instead of 4 bytes per gfn.
+ *
+ * Upper-level shadow pages having gptes are tracked for write-protection via
+ * gfn_write_track.  As above, gfn_write_track is a 16 bit counter, so KVM must
+ * not create more than 2^16-1 upper-level shadow pages at a single gfn,
+ * otherwise gfn_write_track will overflow and explosions will ensue.
  *
- * Currently, we used 14 bits that are @level, @cr4_pae, @quadrant, @access,
- * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp.
+ * A unique shadow page (SP) for a gfn is created if and only if an existing SP
+ * cannot be reused.  The ability to reuse a SP is tracked by its role, which
+ * incorporates various mode bits and properties of the SP.  Roughly speaking,
+ * the number of unique SPs that can theoretically be created is 2^n, where n
+ * is the number of bits that are used to compute the role.
+ *
+ * But, even though there are 20 bits in the mask below, not all combinations
+ * of modes and flags are possible:
+ *
+ *   - invalid shadow pages are not accounted, mirror pages are not shadowed,
+ *     so the bits are effectively 18.
+ *
+ *   - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging);
+ *     execonly and ad_disabled are only used for nested EPT which has
+ *     has_4_byte_gpte=0.  Therefore, 2 bits are always unused.
+ *
+ *   - the 4 bits of level are effectively limited to the values 2/3/4/5,
+ *     as 4k SPs are not tracked (allowed to go unsync).  In addition non-PAE
+ *     paging has exactly one upper level, making level completely redundant
+ *     when has_4_byte_gpte=1.
+ *
+ *   - on top of this, smep_andnot_wp and smap_andnot_wp are only set if
+ *     cr0_wp=0, therefore these three bits only give rise to 5 possibilities.
+ *
+ * Therefore, the maximum number of possible upper-level shadow pages for a
+ * single gfn is a bit less than 2^13.
  */
 union kvm_mmu_page_role {
 	u32 word;
 	struct {
 		unsigned level:4;
-		unsigned cr4_pae:1;
+		unsigned has_4_byte_gpte:1;
 		unsigned quadrant:2;
 		unsigned direct:1;
 		unsigned access:3;
 		unsigned invalid:1;
-		unsigned nxe:1;
+		unsigned efer_nx:1;
 		unsigned cr0_wp:1;
 		unsigned smep_andnot_wp:1;
 		unsigned smap_andnot_wp:1;
 		unsigned ad_disabled:1;
 		unsigned guest_mode:1;
-		unsigned :6;
+		unsigned passthrough:1;
+		unsigned is_mirror:1;
+		unsigned :4;
 
 		/*
 		 * This is left at the top of the word so that
@@ -282,27 +369,40 @@ union kvm_mmu_page_role {
 	};
 };
 
-union kvm_mmu_extended_role {
 /*
- * This structure complements kvm_mmu_page_role caching everything needed for
- * MMU configuration. If nothing in both these structures changed, MMU
- * re-configuration can be skipped. @valid bit is set on first usage so we don't
- * treat all-zero structure as valid data.
+ * kvm_mmu_extended_role complements kvm_mmu_page_role, tracking properties
+ * relevant to the current MMU configuration.   When loading CR0, CR4, or EFER,
+ * including on nested transitions, if nothing in the full role changes then
+ * MMU re-configuration can be skipped. @valid bit is set on first usage so we
+ * don't treat all-zero structure as valid data.
+ *
+ * The properties that are tracked in the extended role but not the page role
+ * are for things that either (a) do not affect the validity of the shadow page
+ * or (b) are indirectly reflected in the shadow page's role.  For example,
+ * CR4.PKE only affects permission checks for software walks of the guest page
+ * tables (because KVM doesn't support Protection Keys with shadow paging), and
+ * CR0.PG, CR4.PAE, and CR4.PSE are indirectly reflected in role.level.
+ *
+ * Note, SMEP and SMAP are not redundant with sm*p_andnot_wp in the page role.
+ * If CR0.WP=1, KVM can reuse shadow pages for the guest regardless of SMEP and
+ * SMAP, but the MMU's permission checks for software walks need to be SMEP and
+ * SMAP aware regardless of CR0.WP.
  */
+union kvm_mmu_extended_role {
 	u32 word;
 	struct {
 		unsigned int valid:1;
 		unsigned int execonly:1;
-		unsigned int cr0_pg:1;
 		unsigned int cr4_pse:1;
 		unsigned int cr4_pke:1;
 		unsigned int cr4_smap:1;
 		unsigned int cr4_smep:1;
 		unsigned int cr4_la57:1;
+		unsigned int efer_lma:1;
 	};
 };
 
-union kvm_mmu_role {
+union kvm_cpu_role {
 	u64 as_u64;
 	struct {
 		union kvm_mmu_page_role base;
@@ -311,43 +411,7 @@ union kvm_mmu_role {
 };
 
 struct kvm_rmap_head {
-	unsigned long val;
-};
-
-struct kvm_mmu_page {
-	struct list_head link;
-	struct hlist_node hash_link;
-	bool unsync;
-
-	/*
-	 * The following two entries are used to key the shadow page in the
-	 * hash table.
-	 */
-	union kvm_mmu_page_role role;
-	gfn_t gfn;
-
-	u64 *spt;
-	/* hold the gfn of each spte inside spt */
-	gfn_t *gfns;
-	int root_count;          /* Currently serving as active root */
-	unsigned int unsync_children;
-	struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
-
-	/* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen.  */
-	unsigned long mmu_valid_gen;
-
-	DECLARE_BITMAP(unsync_child_bitmap, 512);
-
-#ifdef CONFIG_X86_32
-	/*
-	 * Used out of the mmu-lock to avoid reading spte values while an
-	 * update is in progress; see the comments in __get_spte_lockless().
-	 */
-	int clear_spte_count;
-#endif
-
-	/* Number of writes since the last time traversal visited this page.  */
-	atomic_t write_flooding_count;
+	atomic_long_t val;
 };
 
 struct kvm_pio_request {
@@ -365,51 +429,44 @@ struct rsvd_bits_validate {
 };
 
 struct kvm_mmu_root_info {
-	gpa_t cr3;
+	gpa_t pgd;
 	hpa_t hpa;
 };
 
 #define KVM_MMU_ROOT_INFO_INVALID \
-	((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })
+	((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE })
 
 #define KVM_MMU_NUM_PREV_ROOTS 3
 
+#define KVM_MMU_ROOT_CURRENT		BIT(0)
+#define KVM_MMU_ROOT_PREVIOUS(i)	BIT(1+i)
+#define KVM_MMU_ROOTS_ALL		(BIT(1 + KVM_MMU_NUM_PREV_ROOTS) - 1)
+
+#define KVM_HAVE_MMU_RWLOCK
+
+struct kvm_mmu_page;
+struct kvm_page_fault;
+
 /*
  * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
  * and 2-level 32-bit).  The kvm_mmu structure abstracts the details of the
  * current mmu mode.
  */
 struct kvm_mmu {
-	void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
-	unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
+	unsigned long (*get_guest_pgd)(struct kvm_vcpu *vcpu);
 	u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
-	int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err,
-			  bool prefault);
+	int (*page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
 	void (*inject_page_fault)(struct kvm_vcpu *vcpu,
 				  struct x86_exception *fault);
-	gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
+	gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+			    gpa_t gva_or_gpa, u64 access,
 			    struct x86_exception *exception);
-	gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
-			       struct x86_exception *exception);
-	int (*sync_page)(struct kvm_vcpu *vcpu,
-			 struct kvm_mmu_page *sp);
-	void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
-	void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
-			   u64 *spte, const void *pte);
-	hpa_t root_hpa;
-	union kvm_mmu_role mmu_role;
-	u8 root_level;
-	u8 shadow_root_level;
-	u8 ept_ad;
-	bool direct_map;
-	struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];
-
-	/*
-	 * Bitmap; bit set = permission fault
-	 * Byte index: page fault error code [4:1]
-	 * Bit index: pte permissions in ACC_* format
-	 */
-	u8 permissions[16];
+	int (*sync_spte)(struct kvm_vcpu *vcpu,
+			 struct kvm_mmu_page *sp, int i);
+	struct kvm_mmu_root_info root;
+	hpa_t mirror_root_hpa;
+	union kvm_cpu_role cpu_role;
+	union kvm_mmu_page_role root_role;
 
 	/*
 	* The pkru_mask indicates if protection key checks are needed.  It
@@ -419,8 +476,18 @@ struct kvm_mmu {
 	*/
 	u32 pkru_mask;
 
+	struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];
+
+	/*
+	 * Bitmap; bit set = permission fault
+	 * Byte index: page fault error code [4:1]
+	 * Bit index: pte permissions in ACC_* format
+	 */
+	u8 permissions[16];
+
 	u64 *pae_root;
-	u64 *lm_root;
+	u64 *pml4_root;
+	u64 *pml5_root;
 
 	/*
 	 * check zero bits on shadow page table entries, these
@@ -431,19 +498,9 @@ struct kvm_mmu {
 
 	struct rsvd_bits_validate guest_rsvd_check;
 
-	/* Can have large pages at levels 2..last_nonleaf_level-1. */
-	u8 last_nonleaf_level;
-
-	bool nx;
-
 	u64 pdptrs[4]; /* pae */
 };
 
-struct kvm_tlb_range {
-	u64 start_gfn;
-	u64 pages;
-};
-
 enum pmc_type {
 	KVM_PMC_GP = 0,
 	KVM_PMC_FIXED,
@@ -452,50 +509,128 @@ enum pmc_type {
 struct kvm_pmc {
 	enum pmc_type type;
 	u8 idx;
+	bool is_paused;
+	bool intr;
+	/*
+	 * Base value of the PMC counter, relative to the *consumed* count in
+	 * the associated perf_event.  This value includes counter updates from
+	 * the perf_event and emulated_count since the last time the counter
+	 * was reprogrammed, but it is *not* the current value as seen by the
+	 * guest or userspace.
+	 *
+	 * The count is relative to the associated perf_event so that KVM
+	 * doesn't need to reprogram the perf_event every time the guest writes
+	 * to the counter.
+	 */
 	u64 counter;
+	/*
+	 * PMC events triggered by KVM emulation that haven't been fully
+	 * processed, i.e. haven't undergone overflow detection.
+	 */
+	u64 emulated_counter;
 	u64 eventsel;
 	struct perf_event *perf_event;
 	struct kvm_vcpu *vcpu;
+	/*
+	 * only for creating or reusing perf_event,
+	 * eventsel value for general purpose counters,
+	 * ctrl value for fixed counters.
+	 */
+	u64 current_config;
 };
 
+/* More counters may conflict with other existing Architectural MSRs */
+#define KVM_MAX(a, b)	((a) >= (b) ? (a) : (b))
+#define KVM_MAX_NR_INTEL_GP_COUNTERS	8
+#define KVM_MAX_NR_AMD_GP_COUNTERS	6
+#define KVM_MAX_NR_GP_COUNTERS		KVM_MAX(KVM_MAX_NR_INTEL_GP_COUNTERS, \
+						KVM_MAX_NR_AMD_GP_COUNTERS)
+
+#define KVM_MAX_NR_INTEL_FIXED_COUNTERS	3
+#define KVM_MAX_NR_AMD_FIXED_COUNTERS	0
+#define KVM_MAX_NR_FIXED_COUNTERS	KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUNTERS, \
+						KVM_MAX_NR_AMD_FIXED_COUNTERS)
+
 struct kvm_pmu {
+	u8 version;
 	unsigned nr_arch_gp_counters;
 	unsigned nr_arch_fixed_counters;
 	unsigned available_event_types;
 	u64 fixed_ctr_ctrl;
+	u64 fixed_ctr_ctrl_rsvd;
 	u64 global_ctrl;
 	u64 global_status;
-	u64 global_ovf_ctrl;
 	u64 counter_bitmask[2];
-	u64 global_ctrl_mask;
+	u64 global_ctrl_rsvd;
+	u64 global_status_rsvd;
 	u64 reserved_bits;
-	u8 version;
-	struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
-	struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
-	struct irq_work irq_work;
-	u64 reprogram_pmi;
+	u64 raw_event_mask;
+	struct kvm_pmc gp_counters[KVM_MAX_NR_GP_COUNTERS];
+	struct kvm_pmc fixed_counters[KVM_MAX_NR_FIXED_COUNTERS];
+
+	/*
+	 * Overlay the bitmap with a 64-bit atomic so that all bits can be
+	 * set in a single access, e.g. to reprogram all counters when the PMU
+	 * filter changes.
+	 */
+	union {
+		DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
+		atomic64_t __reprogram_pmi;
+	};
+	DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
+	DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
+
+	DECLARE_BITMAP(pmc_counting_instructions, X86_PMC_IDX_MAX);
+	DECLARE_BITMAP(pmc_counting_branches, X86_PMC_IDX_MAX);
+
+	u64 ds_area;
+	u64 pebs_enable;
+	u64 pebs_enable_rsvd;
+	u64 pebs_data_cfg;
+	u64 pebs_data_cfg_rsvd;
+
+	/*
+	 * If a guest counter is cross-mapped to host counter with different
+	 * index, its PEBS capability will be temporarily disabled.
+	 *
+	 * The user should make sure that this mask is updated
+	 * after disabling interrupts and before perf_guest_get_msrs();
+	 */
+	u64 host_cross_mapped_mask;
+
+	/*
+	 * The gate to release perf_events not marked in
+	 * pmc_in_use only once in a vcpu time slice.
+	 */
+	bool need_cleanup;
+
+	/*
+	 * The total number of programmed perf_events and it helps to avoid
+	 * redundant check before cleanup if guest don't use vPMU at all.
+	 */
+	u8 event_count;
 };
 
 struct kvm_pmu_ops;
 
 enum {
-	KVM_DEBUGREG_BP_ENABLED = 1,
-	KVM_DEBUGREG_WONT_EXIT = 2,
-	KVM_DEBUGREG_RELOAD = 4,
-};
-
-struct kvm_mtrr_range {
-	u64 base;
-	u64 mask;
-	struct list_head node;
+	KVM_DEBUGREG_BP_ENABLED		= BIT(0),
+	KVM_DEBUGREG_WONT_EXIT		= BIT(1),
+	/*
+	 * Guest debug registers (DR0-3, DR6 and DR7) are saved/restored by
+	 * hardware on exit from or enter to guest. KVM needn't switch them.
+	 * DR0-3, DR6 and DR7 are set to their architectural INIT value on VM
+	 * exit, host values need to be restored.
+	 */
+	KVM_DEBUGREG_AUTO_SWITCH	= BIT(2),
 };
 
 struct kvm_mtrr {
-	struct kvm_mtrr_range var_ranges[KVM_NR_VAR_MTRR];
-	mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION];
+	u64 var[KVM_NR_VAR_MTRR * 2];
+	u64 fixed_64k;
+	u64 fixed_16k[2];
+	u64 fixed_4k[8];
 	u64 deftype;
-
-	struct list_head head;
 };
 
 /* Hyper-V SynIC timer */
@@ -523,8 +658,32 @@ struct kvm_vcpu_hv_synic {
 	bool dont_zero_synic_pages;
 };
 
+/* The maximum number of entries on the TLB flush fifo. */
+#define KVM_HV_TLB_FLUSH_FIFO_SIZE (16)
+/*
+ * Note: the following 'magic' entry is made up by KVM to avoid putting
+ * anything besides GVA on the TLB flush fifo. It is theoretically possible
+ * to observe a request to flush 4095 PFNs starting from 0xfffffffffffff000
+ * which will look identical. KVM's action to 'flush everything' instead of
+ * flushing these particular addresses is, however, fully legitimate as
+ * flushing more than requested is always OK.
+ */
+#define KVM_HV_TLB_FLUSHALL_ENTRY  ((u64)-1)
+
+enum hv_tlb_flush_fifos {
+	HV_L1_TLB_FLUSH_FIFO,
+	HV_L2_TLB_FLUSH_FIFO,
+	HV_NR_TLB_FLUSH_FIFOS,
+};
+
+struct kvm_vcpu_hv_tlb_flush_fifo {
+	spinlock_t write_lock;
+	DECLARE_KFIFO(entries, u64, KVM_HV_TLB_FLUSH_FIFO_SIZE);
+};
+
 /* Hyper-V per vcpu emulation context */
 struct kvm_vcpu_hv {
+	struct kvm_vcpu *vcpu;
 	u32 vp_index;
 	u64 hv_vapic;
 	s64 runtime_offset;
@@ -532,7 +691,94 @@ struct kvm_vcpu_hv {
 	struct kvm_hyperv_exit exit;
 	struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
 	DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
-	cpumask_t tlb_flush;
+	bool enforce_cpuid;
+	struct {
+		u32 features_eax; /* HYPERV_CPUID_FEATURES.EAX */
+		u32 features_ebx; /* HYPERV_CPUID_FEATURES.EBX */
+		u32 features_edx; /* HYPERV_CPUID_FEATURES.EDX */
+		u32 enlightenments_eax; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
+		u32 enlightenments_ebx; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EBX */
+		u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
+		u32 nested_eax; /* HYPERV_CPUID_NESTED_FEATURES.EAX */
+		u32 nested_ebx; /* HYPERV_CPUID_NESTED_FEATURES.EBX */
+	} cpuid_cache;
+
+	struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS];
+
+	/*
+	 * Preallocated buffers for handling hypercalls that pass sparse vCPU
+	 * sets (for high vCPU counts, they're too large to comfortably fit on
+	 * the stack).
+	 */
+	u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS];
+	DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
+
+	struct hv_vp_assist_page vp_assist_page;
+
+	struct {
+		u64 pa_page_gpa;
+		u64 vm_id;
+		u32 vp_id;
+	} nested;
+};
+
+struct kvm_hypervisor_cpuid {
+	u32 base;
+	u32 limit;
+};
+
+#ifdef CONFIG_KVM_XEN
+/* Xen HVM per vcpu emulation context */
+struct kvm_vcpu_xen {
+	u64 hypercall_rip;
+	u32 current_runstate;
+	u8 upcall_vector;
+	struct gfn_to_pfn_cache vcpu_info_cache;
+	struct gfn_to_pfn_cache vcpu_time_info_cache;
+	struct gfn_to_pfn_cache runstate_cache;
+	struct gfn_to_pfn_cache runstate2_cache;
+	u64 last_steal;
+	u64 runstate_entry_time;
+	u64 runstate_times[4];
+	unsigned long evtchn_pending_sel;
+	u32 vcpu_id; /* The Xen / ACPI vCPU ID */
+	u32 timer_virq;
+	u64 timer_expires; /* In guest epoch */
+	atomic_t timer_pending;
+	struct hrtimer timer;
+	int poll_evtchn;
+	struct timer_list poll_timer;
+	struct kvm_hypervisor_cpuid cpuid;
+};
+#endif
+
+struct kvm_queued_exception {
+	bool pending;
+	bool injected;
+	bool has_error_code;
+	u8 vector;
+	u32 error_code;
+	unsigned long payload;
+	bool has_payload;
+};
+
+/*
+ * Hardware-defined CPUID leafs that are either scattered by the kernel or are
+ * unknown to the kernel, but need to be directly used by KVM.  Note, these
+ * word values conflict with the kernel's "bug" caps, but KVM doesn't use those.
+ */
+enum kvm_only_cpuid_leafs {
+	CPUID_12_EAX	 = NCAPINTS,
+	CPUID_7_1_EDX,
+	CPUID_8000_0007_EDX,
+	CPUID_8000_0022_EAX,
+	CPUID_7_2_EDX,
+	CPUID_24_0_EBX,
+	CPUID_8000_0021_ECX,
+	CPUID_7_1_ECX,
+	NR_KVM_CPU_CAPS,
+
+	NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
 };
 
 struct kvm_vcpu_arch {
@@ -550,13 +796,15 @@ struct kvm_vcpu_arch {
 	unsigned long cr3;
 	unsigned long cr4;
 	unsigned long cr4_guest_owned_bits;
+	unsigned long cr4_guest_rsvd_bits;
 	unsigned long cr8;
+	u32 host_pkru;
 	u32 pkru;
 	u32 hflags;
 	u64 efer;
+	u64 host_debugctl;
 	u64 apic_base;
 	struct kvm_lapic *apic;    /* kernel irqchip context */
-	bool apicv_active;
 	bool load_eoi_exitmap_pending;
 	DECLARE_BITMAP(ioapic_handled_vectors, 256);
 	unsigned long apic_attention;
@@ -565,9 +813,12 @@ struct kvm_vcpu_arch {
 	u64 ia32_misc_enable_msr;
 	u64 smbase;
 	u64 smi_count;
+	bool at_instruction_boundary;
 	bool tpr_access_reporting;
-	u64 ia32_xss;
+	bool xfd_no_write_intercept;
 	u64 microcode_version;
+	u64 arch_capabilities;
+	u64 perf_capabilities;
 
 	/*
 	 * Paging state of the vcpu
@@ -588,7 +839,7 @@ struct kvm_vcpu_arch {
 	 * Paging state of an L2 guest (used for nested npt)
 	 *
 	 * This context will save all necessary information to walk page tables
-	 * of the an L2 guest. This context is only initialized for page table
+	 * of an L2 guest. This context is only initialized for page table
 	 * walking and not for faulting since we never handle l2 page faults on
 	 * the host.
 	 */
@@ -601,41 +852,46 @@ struct kvm_vcpu_arch {
 	struct kvm_mmu *walk_mmu;
 
 	struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
-	struct kvm_mmu_memory_cache mmu_page_cache;
+	struct kvm_mmu_memory_cache mmu_shadow_page_cache;
+	struct kvm_mmu_memory_cache mmu_shadowed_info_cache;
 	struct kvm_mmu_memory_cache mmu_page_header_cache;
+	/*
+	 * This cache is to allocate external page table. E.g. private EPT used
+	 * by the TDX module.
+	 */
+	struct kvm_mmu_memory_cache mmu_external_spt_cache;
 
 	/*
 	 * QEMU userspace and the guest each have their own FPU state.
-	 * In vcpu_run, we switch between the user, maintained in the
-	 * task_struct struct, and guest FPU contexts. While running a VCPU,
-	 * the VCPU thread will have the guest FPU context.
+	 * In vcpu_run, we switch between the user and guest FPU contexts.
+	 * While running a VCPU, the VCPU thread will have the guest FPU
+	 * context.
 	 *
 	 * Note that while the PKRU state lives inside the fpu registers,
 	 * it is switched out separately at VMENTER and VMEXIT time. The
-	 * "guest_fpu" state here contains the guest FPU context, with the
+	 * "guest_fpstate" state here contains the guest FPU context, with the
 	 * host PRKU bits.
 	 */
-	struct fpu *guest_fpu;
+	struct fpu_guest guest_fpu;
 
 	u64 xcr0;
 	u64 guest_supported_xcr0;
-	u32 guest_xstate_size;
+	u64 ia32_xss;
+	u64 guest_supported_xss;
 
 	struct kvm_pio_request pio;
 	void *pio_data;
+	void *sev_pio_data;
+	unsigned sev_pio_count;
 
 	u8 event_exit_inst_len;
 
-	struct kvm_queued_exception {
-		bool pending;
-		bool injected;
-		bool has_error_code;
-		u8 nr;
-		u32 error_code;
-		unsigned long payload;
-		bool has_payload;
-		u8 nested_apf;
-	} exception;
+	bool exception_from_userspace;
+
+	/* Exceptions to be injected to the guest. */
+	struct kvm_queued_exception exception;
+	/* Exception VM-Exits to be synthesized to L1. */
+	struct kvm_queued_exception exception_vmexit;
 
 	struct kvm_queued_interrupt {
 		bool injected;
@@ -646,33 +902,55 @@ struct kvm_vcpu_arch {
 	int halt_request; /* real mode on Intel only */
 
 	int cpuid_nent;
-	struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+	struct kvm_cpuid_entry2 *cpuid_entries;
+	bool cpuid_dynamic_bits_dirty;
+	bool is_amd_compatible;
 
+	/*
+	 * cpu_caps holds the effective guest capabilities, i.e. the features
+	 * the vCPU is allowed to use.  Typically, but not always, features can
+	 * be used by the guest if and only if both KVM and userspace want to
+	 * expose the feature to the guest.
+	 *
+	 * A common exception is for virtualization holes, i.e. when KVM can't
+	 * prevent the guest from using a feature, in which case the vCPU "has"
+	 * the feature regardless of what KVM or userspace desires.
+	 *
+	 * Note, features that don't require KVM involvement in any way are
+	 * NOT enforced/sanitized by KVM, i.e. are taken verbatim from the
+	 * guest CPUID provided by userspace.
+	 */
+	u32 cpu_caps[NR_KVM_CPU_CAPS];
+
+	u64 reserved_gpa_bits;
 	int maxphyaddr;
 
 	/* emulate context */
 
-	struct x86_emulate_ctxt emulate_ctxt;
+	struct x86_emulate_ctxt *emulate_ctxt;
 	bool emulate_regs_need_sync_to_vcpu;
 	bool emulate_regs_need_sync_from_vcpu;
 	int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
+	unsigned long cui_linear_rip;
+	int cui_rdmsr_imm_reg;
 
 	gpa_t time;
-	struct pvclock_vcpu_time_info hv_clock;
+	s8  pvclock_tsc_shift;
+	u32 pvclock_tsc_mul;
 	unsigned int hw_tsc_khz;
-	struct gfn_to_hva_cache pv_time;
-	bool pv_time_enabled;
+	struct gfn_to_pfn_cache pv_time;
 	/* set guest stopped flag in pvclock flags field */
 	bool pvclock_set_guest_stopped_request;
 
 	struct {
+		u8 preempted;
 		u64 msr_val;
 		u64 last_steal;
-		struct gfn_to_hva_cache stime;
-		struct kvm_steal_time steal;
+		struct gfn_to_hva_cache cache;
 	} st;
 
-	u64 tsc_offset;
+	u64 l1_tsc_offset;
+	u64 tsc_offset; /* current tsc offset */
 	u64 last_guest_tsc;
 	u64 last_host_tsc;
 	u64 tsc_offset_adjustment;
@@ -685,12 +963,16 @@ struct kvm_vcpu_arch {
 	u32 virtual_tsc_mult;
 	u32 virtual_tsc_khz;
 	s64 ia32_tsc_adjust_msr;
-	u64 tsc_scaling_ratio;
+	u64 msr_ia32_power_ctl;
+	u64 l1_tsc_scaling_ratio;
+	u64 tsc_scaling_ratio; /* current scaling ratio */
 
 	atomic_t nmi_queued;  /* unprocessed asynchronous NMIs */
-	unsigned nmi_pending; /* NMI queued after currently running handler */
+	/* Number of NMIs pending injection, not including hardware vNMIs. */
+	unsigned int nmi_pending;
 	bool nmi_injected;    /* Trying to inject an NMI this entry */
 	bool smi_pending;    /* SMI queued after currently running handler */
+	u8 handling_intr_from_guest;
 
 	struct kvm_mtrr mtrr_state;
 	u64 pat;
@@ -709,10 +991,11 @@ struct kvm_vcpu_arch {
 	u64 mcg_ctl;
 	u64 mcg_ext_ctl;
 	u64 *mce_banks;
+	u64 *mci_ctl2_banks;
 
 	/* Cache MMIO info */
 	u64 mmio_gva;
-	unsigned access;
+	unsigned mmio_access;
 	gfn_t mmio_gfn;
 	u64 mmio_gen;
 
@@ -721,8 +1004,13 @@ struct kvm_vcpu_arch {
 	/* used for guest single stepping over the given code position */
 	unsigned long singlestep_rip;
 
-	struct kvm_vcpu_hv hyperv;
-
+#ifdef CONFIG_KVM_HYPERV
+	bool hyperv_enabled;
+	struct kvm_vcpu_hv *hyperv;
+#endif
+#ifdef CONFIG_KVM_XEN
+	struct kvm_vcpu_xen xen;
+#endif
 	cpumask_var_t wbinvd_dirty_mask;
 
 	unsigned long last_retry_eip;
@@ -730,14 +1018,16 @@ struct kvm_vcpu_arch {
 
 	struct {
 		bool halted;
-		gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
+		gfn_t gfns[ASYNC_PF_PER_VCPU];
 		struct gfn_to_hva_cache data;
-		u64 msr_val;
+		u64 msr_en_val; /* MSR_KVM_ASYNC_PF_EN */
+		u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */
+		u16 vec;
 		u32 id;
-		bool send_user_only;
-		u32 host_apf_reason;
-		unsigned long nested_apf_token;
+		u32 host_apf_flags;
+		bool send_always;
 		bool delivery_as_pf_vmexit;
+		bool pageready_pending;
 	} apf;
 
 	/* OSVW MSRs (AMD only) */
@@ -751,15 +1041,7 @@ struct kvm_vcpu_arch {
 		struct gfn_to_hva_cache data;
 	} pv_eoi;
 
-	/*
-	 * Indicate whether the access faults on its page table in guest
-	 * which is set when fix page fault and used to detect unhandeable
-	 * instruction.
-	 */
-	bool write_fault_to_shadow_pgtable;
-
-	/* set at EPT violation at this point */
-	unsigned long exit_qualification;
+	u64 msr_kvm_poll_control;
 
 	/* pv related host specific info */
 	struct {
@@ -768,16 +1050,45 @@ struct kvm_vcpu_arch {
 
 	int pending_ioapic_eoi;
 	int pending_external_vector;
-
-	/* GPA available */
-	bool gpa_available;
-	gpa_t gpa_val;
+	int highest_stale_pending_ioapic_eoi;
 
 	/* be preempted when it's in kernel-mode(cpl=0) */
 	bool preempted_in_kernel;
 
-	/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
-	bool l1tf_flush_l1d;
+	/* Host CPU on which VM-entry was most recently attempted */
+	int last_vmentry_cpu;
+
+	/* AMD MSRC001_0015 Hardware Configuration */
+	u64 msr_hwcr;
+
+	/* pv related cpuid info */
+	struct {
+		/*
+		 * value of the eax register in the KVM_CPUID_FEATURES CPUID
+		 * leaf.
+		 */
+		u32 features;
+
+		/*
+		 * indicates whether pv emulation should be disabled if features
+		 * are not present in the guest's cpuid
+		 */
+		bool enforce;
+	} pv_cpuid;
+
+	/* Protected Guests */
+	bool guest_state_protected;
+	bool guest_tsc_protected;
+
+	/*
+	 * Set when PDPTS were loaded directly by the userspace without
+	 * reading the guest memory
+	 */
+	bool pdptrs_from_userspace;
+
+#if IS_ENABLED(CONFIG_HYPERV)
+	hpa_t hv_root_tdp;
+#endif
 };
 
 struct kvm_lpage_info {
@@ -787,23 +1098,34 @@ struct kvm_lpage_info {
 struct kvm_arch_memory_slot {
 	struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
 	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
-	unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
+	unsigned short *gfn_write_track;
 };
 
 /*
- * We use as the mode the number of bits allocated in the LDR for the
- * logical processor ID.  It happens that these are all powers of two.
- * This makes it is very easy to detect cases where the APICs are
- * configured for multiple modes; in that case, we cannot use the map and
- * hence cannot use kvm_irq_delivery_to_apic_fast either.
+ * Track the mode of the optimized logical map, as the rules for decoding the
+ * destination vary per mode.  Enabling the optimized logical map requires all
+ * software-enabled local APIs to be in the same mode, each addressable APIC to
+ * be mapped to only one MDA, and each MDA to map to at most one APIC.
  */
-#define KVM_APIC_MODE_XAPIC_CLUSTER          4
-#define KVM_APIC_MODE_XAPIC_FLAT             8
-#define KVM_APIC_MODE_X2APIC                16
+enum kvm_apic_logical_mode {
+	/* All local APICs are software disabled. */
+	KVM_APIC_MODE_SW_DISABLED,
+	/* All software enabled local APICs in xAPIC cluster addressing mode. */
+	KVM_APIC_MODE_XAPIC_CLUSTER,
+	/* All software enabled local APICs in xAPIC flat addressing mode. */
+	KVM_APIC_MODE_XAPIC_FLAT,
+	/* All software enabled local APICs in x2APIC mode. */
+	KVM_APIC_MODE_X2APIC,
+	/*
+	 * Optimized map disabled, e.g. not all local APICs in the same logical
+	 * mode, same logical ID assigned to multiple APICs, etc.
+	 */
+	KVM_APIC_MODE_MAP_DISABLED,
+};
 
 struct kvm_apic_map {
 	struct rcu_head rcu;
-	u8 mode;
+	enum kvm_apic_logical_mode logical_mode;
 	u32 max_apic_id;
 	union {
 		struct kvm_lapic *xapic_flat_map[8];
@@ -812,28 +1134,91 @@ struct kvm_apic_map {
 	struct kvm_lapic *phys_map[];
 };
 
+/* Hyper-V synthetic debugger (SynDbg)*/
+struct kvm_hv_syndbg {
+	struct {
+		u64 control;
+		u64 status;
+		u64 send_page;
+		u64 recv_page;
+		u64 pending_page;
+	} control;
+	u64 options;
+};
+
+/* Current state of Hyper-V TSC page clocksource */
+enum hv_tsc_page_status {
+	/* TSC page was not set up or disabled */
+	HV_TSC_PAGE_UNSET = 0,
+	/* TSC page MSR was written by the guest, update pending */
+	HV_TSC_PAGE_GUEST_CHANGED,
+	/* TSC page update was triggered from the host side */
+	HV_TSC_PAGE_HOST_CHANGED,
+	/* TSC page was properly set up and is currently active  */
+	HV_TSC_PAGE_SET,
+	/* TSC page was set up with an inaccessible GPA */
+	HV_TSC_PAGE_BROKEN,
+};
+
+#ifdef CONFIG_KVM_HYPERV
 /* Hyper-V emulation context */
 struct kvm_hv {
 	struct mutex hv_lock;
 	u64 hv_guest_os_id;
 	u64 hv_hypercall;
 	u64 hv_tsc_page;
+	enum hv_tsc_page_status hv_tsc_page_status;
 
 	/* Hyper-v based guest crash (NT kernel bugcheck) parameters */
 	u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
 	u64 hv_crash_ctl;
 
-	HV_REFERENCE_TSC_PAGE tsc_ref;
+	struct ms_hyperv_tsc_page tsc_ref;
 
 	struct idr conn_to_evt;
 
 	u64 hv_reenlightenment_control;
 	u64 hv_tsc_emulation_control;
 	u64 hv_tsc_emulation_status;
+	u64 hv_invtsc_control;
 
 	/* How many vCPUs have VP index != vCPU index */
 	atomic_t num_mismatched_vp_indexes;
+
+	/*
+	 * How many SynICs use 'AutoEOI' feature
+	 * (protected by arch.apicv_update_lock)
+	 */
+	unsigned int synic_auto_eoi_used;
+
+	struct kvm_hv_syndbg hv_syndbg;
+
+	bool xsaves_xsavec_checked;
 };
+#endif
+
+struct msr_bitmap_range {
+	u32 flags;
+	u32 nmsrs;
+	u32 base;
+	unsigned long *bitmap;
+};
+
+#ifdef CONFIG_KVM_XEN
+/* Xen emulation context */
+struct kvm_xen {
+	struct mutex xen_lock;
+	u32 xen_version;
+	bool long_mode;
+	bool runstate_update_flag;
+	u8 upcall_vector;
+	struct gfn_to_pfn_cache shinfo_cache;
+	struct idr evtchn_ports;
+	unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+
+	struct kvm_xen_hvm_config hvm_config;
+};
+#endif
 
 enum kvm_irqchip_mode {
 	KVM_IRQCHIP_NONE,
@@ -841,72 +1226,241 @@ enum kvm_irqchip_mode {
 	KVM_IRQCHIP_SPLIT,        /* created with KVM_CAP_SPLIT_IRQCHIP */
 };
 
-struct kvm_arch {
-	unsigned int n_used_mmu_pages;
-	unsigned int n_requested_mmu_pages;
-	unsigned int n_max_mmu_pages;
-	unsigned int indirect_shadow_pages;
-	unsigned long mmu_valid_gen;
-	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
+struct kvm_x86_msr_filter {
+	u8 count;
+	bool default_allow:1;
+	struct msr_bitmap_range ranges[16];
+};
+
+struct kvm_x86_pmu_event_filter {
+	__u32 action;
+	__u32 nevents;
+	__u32 fixed_counter_bitmap;
+	__u32 flags;
+	__u32 nr_includes;
+	__u32 nr_excludes;
+	__u64 *includes;
+	__u64 *excludes;
+	__u64 events[];
+};
+
+enum kvm_apicv_inhibit {
+
+	/********************************************************************/
+	/* INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. */
+	/********************************************************************/
+
+	/*
+	 * APIC acceleration is disabled by a module parameter
+	 * and/or not supported in hardware.
+	 */
+	APICV_INHIBIT_REASON_DISABLED,
+
+	/*
+	 * APIC acceleration is inhibited because AutoEOI feature is
+	 * being used by a HyperV guest.
+	 */
+	APICV_INHIBIT_REASON_HYPERV,
+
+	/*
+	 * APIC acceleration is inhibited because the userspace didn't yet
+	 * enable the kernel/split irqchip.
+	 */
+	APICV_INHIBIT_REASON_ABSENT,
+
+	/* APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ
+	 * (out of band, debug measure of blocking all interrupts on this vCPU)
+	 * was enabled, to avoid AVIC/APICv bypassing it.
+	 */
+	APICV_INHIBIT_REASON_BLOCKIRQ,
+
+	/*
+	 * APICv is disabled because not all vCPUs have a 1:1 mapping between
+	 * APIC ID and vCPU, _and_ KVM is not applying its x2APIC hotplug hack.
+	 */
+	APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED,
+
 	/*
-	 * Hash table of struct kvm_mmu_page.
+	 * For simplicity, the APIC acceleration is inhibited
+	 * first time either APIC ID or APIC base are changed by the guest
+	 * from their reset values.
 	 */
+	APICV_INHIBIT_REASON_APIC_ID_MODIFIED,
+	APICV_INHIBIT_REASON_APIC_BASE_MODIFIED,
+
+	/******************************************************/
+	/* INHIBITs that are relevant only to the AMD's AVIC. */
+	/******************************************************/
+
+	/*
+	 * AVIC is inhibited on a vCPU because it runs a nested guest.
+	 *
+	 * This is needed because unlike APICv, the peers of this vCPU
+	 * cannot use the doorbell mechanism to signal interrupts via AVIC when
+	 * a vCPU runs nested.
+	 */
+	APICV_INHIBIT_REASON_NESTED,
+
+	/*
+	 * On SVM, the wait for the IRQ window is implemented with pending vIRQ,
+	 * which cannot be injected when the AVIC is enabled, thus AVIC
+	 * is inhibited while KVM waits for IRQ window.
+	 */
+	APICV_INHIBIT_REASON_IRQWIN,
+
+	/*
+	 * PIT (i8254) 're-inject' mode, relies on EOI intercept,
+	 * which AVIC doesn't support for edge triggered interrupts.
+	 */
+	APICV_INHIBIT_REASON_PIT_REINJ,
+
+	/*
+	 * AVIC is disabled because SEV doesn't support it.
+	 */
+	APICV_INHIBIT_REASON_SEV,
+
+	/*
+	 * AVIC is disabled because not all vCPUs with a valid LDR have a 1:1
+	 * mapping between logical ID and vCPU.
+	 */
+	APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED,
+
+	/*
+	 * AVIC is disabled because the vCPU's APIC ID is beyond the max
+	 * supported by AVIC/x2AVIC, i.e. the vCPU is unaddressable.
+	 */
+	APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG,
+
+	NR_APICV_INHIBIT_REASONS,
+};
+
+#define __APICV_INHIBIT_REASON(reason)			\
+	{ BIT(APICV_INHIBIT_REASON_##reason), #reason }
+
+#define APICV_INHIBIT_REASONS				\
+	__APICV_INHIBIT_REASON(DISABLED),		\
+	__APICV_INHIBIT_REASON(HYPERV),			\
+	__APICV_INHIBIT_REASON(ABSENT),			\
+	__APICV_INHIBIT_REASON(BLOCKIRQ),		\
+	__APICV_INHIBIT_REASON(PHYSICAL_ID_ALIASED),	\
+	__APICV_INHIBIT_REASON(APIC_ID_MODIFIED),	\
+	__APICV_INHIBIT_REASON(APIC_BASE_MODIFIED),	\
+	__APICV_INHIBIT_REASON(NESTED),			\
+	__APICV_INHIBIT_REASON(IRQWIN),			\
+	__APICV_INHIBIT_REASON(PIT_REINJ),		\
+	__APICV_INHIBIT_REASON(SEV),			\
+	__APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED),	\
+	__APICV_INHIBIT_REASON(PHYSICAL_ID_TOO_BIG)
+
+struct kvm_possible_nx_huge_pages {
+	/*
+	 * A list of kvm_mmu_page structs that, if zapped, could possibly be
+	 * replaced by an NX huge page.  A shadow page is on this list if its
+	 * existence disallows an NX huge page (nx_huge_page_disallowed is set)
+	 * and there are no other conditions that prevent a huge page, e.g.
+	 * the backing host page is huge, dirtly logging is not enabled for its
+	 * memslot, etc...  Note, zapping shadow pages on this list doesn't
+	 * guarantee an NX huge page will be created in its stead, e.g. if the
+	 * guest attempts to execute from the region then KVM obviously can't
+	 * create an NX huge page (without hanging the guest).
+	 */
+	struct list_head pages;
+	u64 nr_pages;
+};
+
+enum kvm_mmu_type {
+	KVM_SHADOW_MMU,
+#ifdef CONFIG_X86_64
+	KVM_TDP_MMU,
+#endif
+	KVM_NR_MMU_TYPES,
+};
+
+struct kvm_arch {
+	unsigned long n_used_mmu_pages;
+	unsigned long n_requested_mmu_pages;
+	unsigned long n_max_mmu_pages;
+	unsigned int indirect_shadow_pages;
+	u8 mmu_valid_gen;
+	u8 vm_type;
+	bool has_private_mem;
+	bool has_protected_state;
+	bool has_protected_eoi;
+	bool pre_fault_allowed;
+	struct hlist_head *mmu_page_hash;
 	struct list_head active_mmu_pages;
-	struct list_head zapped_obsolete_pages;
-	struct kvm_page_track_notifier_node mmu_sp_tracker;
+	struct kvm_possible_nx_huge_pages possible_nx_huge_pages[KVM_NR_MMU_TYPES];
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
 	struct kvm_page_track_notifier_head track_notifier_head;
+#endif
+	/*
+	 * Protects marking pages unsync during page faults, as TDP MMU page
+	 * faults only take mmu_lock for read.  For simplicity, the unsync
+	 * pages lock is always taken when marking pages unsync regardless of
+	 * whether mmu_lock is held for read or write.
+	 */
+	spinlock_t mmu_unsync_pages_lock;
+
+	u64 shadow_mmio_value;
 
-	struct list_head assigned_dev_head;
-	struct iommu_domain *iommu_domain;
-	bool iommu_noncoherent;
 #define __KVM_HAVE_ARCH_NONCOHERENT_DMA
 	atomic_t noncoherent_dma_count;
-#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE
-	atomic_t assigned_device_count;
+	unsigned long nr_possible_bypass_irqs;
+
+#ifdef CONFIG_KVM_IOAPIC
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
+#endif
 	atomic_t vapics_in_nmi_mode;
 	struct mutex apic_map_lock;
-	struct kvm_apic_map *apic_map;
+	struct kvm_apic_map __rcu *apic_map;
+	atomic_t apic_map_dirty;
 
-	bool apic_access_page_done;
+	bool apic_access_memslot_enabled;
+	bool apic_access_memslot_inhibited;
+
+	/* Protects apicv_inhibit_reasons */
+	struct rw_semaphore apicv_update_lock;
+	unsigned long apicv_inhibit_reasons;
 
 	gpa_t wall_clock;
 
-	bool mwait_in_guest;
-	bool hlt_in_guest;
-	bool pause_in_guest;
+	u64 disabled_exits;
 
-	unsigned long irq_sources_bitmap;
 	s64 kvmclock_offset;
+
+	/*
+	 * This also protects nr_vcpus_matched_tsc which is read from a
+	 * preemption-disabled region, so it must be a raw spinlock.
+	 */
 	raw_spinlock_t tsc_write_lock;
 	u64 last_tsc_nsec;
 	u64 last_tsc_write;
 	u32 last_tsc_khz;
+	u64 last_tsc_offset;
 	u64 cur_tsc_nsec;
 	u64 cur_tsc_write;
 	u64 cur_tsc_offset;
 	u64 cur_tsc_generation;
 	int nr_vcpus_matched_tsc;
 
-	spinlock_t pvclock_gtod_sync_lock;
+	u32 default_tsc_khz;
+	bool user_set_tsc;
+	u64 apic_bus_cycle_ns;
+
+	seqcount_raw_spinlock_t pvclock_sc;
 	bool use_master_clock;
 	u64 master_kernel_ns;
 	u64 master_cycle_now;
-	struct delayed_work kvmclock_update_work;
-	struct delayed_work kvmclock_sync_work;
-
-	struct kvm_xen_hvm_config xen_hvm_config;
-
-	/* reads protected by irq_srcu, writes by irq_lock */
-	struct hlist_head mask_notifier_list;
 
+#ifdef CONFIG_KVM_HYPERV
 	struct kvm_hv hyperv;
+#endif
 
-	#ifdef CONFIG_KVM_MMU_AUDIT
-	int audit_point;
-	#endif
+#ifdef CONFIG_KVM_XEN
+	struct kvm_xen xen;
+#endif
 
 	bool backwards_tsc_observed;
 	bool boot_vcpu_runs_old_kvmclock;
@@ -922,26 +1476,175 @@ struct kvm_arch {
 	bool x2apic_format;
 	bool x2apic_broadcast_quirk_disabled;
 
+	bool has_mapped_host_mmio;
 	bool guest_can_read_msr_platform_info;
 	bool exception_payload_enabled;
+
+	bool triple_fault_event;
+
+	bool bus_lock_detection_enabled;
+	bool enable_pmu;
+
+	u32 notify_window;
+	u32 notify_vmexit_flags;
+	/*
+	 * If exit_on_emulation_error is set, and the in-kernel instruction
+	 * emulator fails to emulate an instruction, allow userspace
+	 * the opportunity to look at it.
+	 */
+	bool exit_on_emulation_error;
+
+	/* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
+	u32 user_space_msr_mask;
+	struct kvm_x86_msr_filter __rcu *msr_filter;
+
+	u32 hypercall_exit_enabled;
+
+	/* Guest can access the SGX PROVISIONKEY. */
+	bool sgx_provisioning_allowed;
+
+	struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter;
+	struct vhost_task *nx_huge_page_recovery_thread;
+	u64 nx_huge_page_last;
+	struct once nx_once;
+
+#ifdef CONFIG_X86_64
+#ifdef CONFIG_KVM_PROVE_MMU
+	/*
+	 * The number of TDP MMU pages across all roots.  Used only to sanity
+	 * check that KVM isn't leaking TDP MMU pages.
+	 */
+	atomic64_t tdp_mmu_pages;
+#endif
+
+	/*
+	 * List of struct kvm_mmu_pages being used as roots.
+	 * All struct kvm_mmu_pages in the list should have
+	 * tdp_mmu_page set.
+	 *
+	 * For reads, this list is protected by:
+	 *	RCU alone or
+	 *	the MMU lock in read mode + RCU or
+	 *	the MMU lock in write mode
+	 *
+	 * For writes, this list is protected by tdp_mmu_pages_lock; see
+	 * below for the details.
+	 *
+	 * Roots will remain in the list until their tdp_mmu_root_count
+	 * drops to zero, at which point the thread that decremented the
+	 * count to zero should removed the root from the list and clean
+	 * it up, freeing the root after an RCU grace period.
+	 */
+	struct list_head tdp_mmu_roots;
+
+	/*
+	 * Protects accesses to the following fields when the MMU lock
+	 * is held in read mode:
+	 *  - tdp_mmu_roots (above)
+	 *  - the link field of kvm_mmu_page structs used by the TDP MMU
+	 *  - possible_nx_huge_pages[KVM_TDP_MMU];
+	 *  - the possible_nx_huge_page_link field of kvm_mmu_page structs used
+	 *    by the TDP MMU
+	 * Because the lock is only taken within the MMU lock, strictly
+	 * speaking it is redundant to acquire this lock when the thread
+	 * holds the MMU lock in write mode.  However it often simplifies
+	 * the code to do so.
+	 */
+	spinlock_t tdp_mmu_pages_lock;
+#endif /* CONFIG_X86_64 */
+
+	/*
+	 * If set, at least one shadow root has been allocated. This flag
+	 * is used as one input when determining whether certain memslot
+	 * related allocations are necessary.
+	 */
+	bool shadow_root_allocated;
+
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
+	/*
+	 * If set, the VM has (or had) an external write tracking user, and
+	 * thus all write tracking metadata has been allocated, even if KVM
+	 * itself isn't using write tracking.
+	 */
+	bool external_write_tracking_enabled;
+#endif
+
+#if IS_ENABLED(CONFIG_HYPERV)
+	hpa_t	hv_root_tdp;
+	spinlock_t hv_root_tdp_lock;
+	struct hv_partition_assist_pg *hv_pa_pg;
+#endif
+	/*
+	 * VM-scope maximum vCPU ID. Used to determine the size of structures
+	 * that increase along with the maximum vCPU ID, in which case, using
+	 * the global KVM_MAX_VCPU_IDS may lead to significant memory waste.
+	 */
+	u32 max_vcpu_ids;
+
+	bool disable_nx_huge_pages;
+
+	/*
+	 * Memory caches used to allocate shadow pages when performing eager
+	 * page splitting. No need for a shadowed_info_cache since eager page
+	 * splitting only allocates direct shadow pages.
+	 *
+	 * Protected by kvm->slots_lock.
+	 */
+	struct kvm_mmu_memory_cache split_shadow_page_cache;
+	struct kvm_mmu_memory_cache split_page_header_cache;
+
+	/*
+	 * Memory cache used to allocate pte_list_desc structs while splitting
+	 * huge pages. In the worst case, to split one huge page, 512
+	 * pte_list_desc structs are needed to add each lower level leaf sptep
+	 * to the rmap plus 1 to extend the parent_ptes rmap of the lower level
+	 * page table.
+	 *
+	 * Protected by kvm->slots_lock.
+	 */
+#define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
+	struct kvm_mmu_memory_cache split_desc_cache;
+
+	gfn_t gfn_direct_bits;
+
+	/*
+	 * Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A Zero
+	 * value indicates CPU dirty logging is unsupported or disabled in
+	 * current VM.
+	 */
+	int cpu_dirty_log_size;
 };
 
 struct kvm_vm_stat {
-	ulong mmu_shadow_zapped;
-	ulong mmu_pte_write;
-	ulong mmu_pte_updated;
-	ulong mmu_pde_zapped;
-	ulong mmu_flooded;
-	ulong mmu_recycled;
-	ulong mmu_cache_miss;
-	ulong mmu_unsync;
-	ulong remote_tlb_flush;
-	ulong lpages;
-	ulong max_mmu_page_hash_collisions;
+	struct kvm_vm_stat_generic generic;
+	u64 mmu_shadow_zapped;
+	u64 mmu_pte_write;
+	u64 mmu_pde_zapped;
+	u64 mmu_flooded;
+	u64 mmu_recycled;
+	u64 mmu_cache_miss;
+	u64 mmu_unsync;
+	union {
+		struct {
+			atomic64_t pages_4k;
+			atomic64_t pages_2m;
+			atomic64_t pages_1g;
+		};
+		atomic64_t pages[KVM_NR_PAGE_SIZES];
+	};
+	u64 nx_lpage_splits;
+	u64 max_mmu_page_hash_collisions;
+	u64 max_mmu_rmap_size;
 };
 
 struct kvm_vcpu_stat {
+	struct kvm_vcpu_stat_generic generic;
+	u64 pf_taken;
 	u64 pf_fixed;
+	u64 pf_emulate;
+	u64 pf_spurious;
+	u64 pf_fast;
+	u64 pf_mmio_spte_created;
 	u64 pf_guest;
 	u64 tlb_flush;
 	u64 invlpg;
@@ -954,10 +1657,6 @@ struct kvm_vcpu_stat {
 	u64 nmi_window_exits;
 	u64 l1d_flush;
 	u64 halt_exits;
-	u64 halt_successful_poll;
-	u64 halt_attempted_poll;
-	u64 halt_poll_invalid;
-	u64 halt_wakeup;
 	u64 request_irq_exits;
 	u64 irq_exits;
 	u64 host_state_reload;
@@ -968,6 +1667,13 @@ struct kvm_vcpu_stat {
 	u64 irq_injections;
 	u64 nmi_injections;
 	u64 req_event;
+	u64 nested_run;
+	u64 directed_yield_attempted;
+	u64 directed_yield_successful;
+	u64 preemption_reported;
+	u64 preemption_other;
+	u64 guest_mode;
+	u64 notify_window_exits;
 };
 
 struct x86_instruction_info;
@@ -989,65 +1695,86 @@ struct kvm_lapic_irq {
 	bool msi_redir_hint;
 };
 
+static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical)
+{
+	return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
+}
+
+enum kvm_x86_run_flags {
+	KVM_RUN_FORCE_IMMEDIATE_EXIT	= BIT(0),
+	KVM_RUN_LOAD_GUEST_DR6		= BIT(1),
+	KVM_RUN_LOAD_DEBUGCTL		= BIT(2),
+};
+
 struct kvm_x86_ops {
-	int (*cpu_has_kvm_support)(void);          /* __init */
-	int (*disabled_by_bios)(void);             /* __init */
-	int (*hardware_enable)(void);
-	void (*hardware_disable)(void);
-	void (*check_processor_compatibility)(void *rtn);
-	int (*hardware_setup)(void);               /* __init */
-	void (*hardware_unsetup)(void);            /* __exit */
-	bool (*cpu_has_accelerated_tpr)(void);
-	bool (*has_emulated_msr)(int index);
-	void (*cpuid_update)(struct kvm_vcpu *vcpu);
-
-	struct kvm *(*vm_alloc)(void);
-	void (*vm_free)(struct kvm *);
+	const char *name;
+
+	int (*check_processor_compatibility)(void);
+
+	int (*enable_virtualization_cpu)(void);
+	void (*disable_virtualization_cpu)(void);
+	cpu_emergency_virt_cb *emergency_disable_virtualization_cpu;
+
+	void (*hardware_unsetup)(void);
+	bool (*has_emulated_msr)(struct kvm *kvm, u32 index);
+	void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
+
+	unsigned int vm_size;
 	int (*vm_init)(struct kvm *kvm);
 	void (*vm_destroy)(struct kvm *kvm);
+	void (*vm_pre_destroy)(struct kvm *kvm);
 
 	/* Create, but do not attach this VCPU */
-	struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
+	int (*vcpu_precreate)(struct kvm *kvm);
+	int (*vcpu_create)(struct kvm_vcpu *vcpu);
 	void (*vcpu_free)(struct kvm_vcpu *vcpu);
 	void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event);
 
-	void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
+	void (*prepare_switch_to_guest)(struct kvm_vcpu *vcpu);
 	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
 	void (*vcpu_put)(struct kvm_vcpu *vcpu);
 
-	void (*update_bp_intercept)(struct kvm_vcpu *vcpu);
+	/*
+	 * Mask of DEBUGCTL bits that are owned by the host, i.e. that need to
+	 * match the host's value even while the guest is active.
+	 */
+	const u64 HOST_OWNED_DEBUGCTL;
+
+	void (*update_exception_bitmap)(struct kvm_vcpu *vcpu);
 	int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
 	int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
 	u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
 	void (*get_segment)(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg);
 	int (*get_cpl)(struct kvm_vcpu *vcpu);
+	int (*get_cpl_no_cache)(struct kvm_vcpu *vcpu);
 	void (*set_segment)(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg);
 	void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
-	void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
-	void (*decache_cr3)(struct kvm_vcpu *vcpu);
-	void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
+	bool (*is_valid_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
 	void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
-	void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
-	int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
-	void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
+	void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
+	bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
+	void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
+	int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
 	void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
 	void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
 	void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
 	void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
-	u64 (*get_dr6)(struct kvm_vcpu *vcpu);
-	void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
 	void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
 	void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
 	void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
 	unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
 	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
-
-	void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
-	int  (*tlb_remote_flush)(struct kvm *kvm);
-	int  (*tlb_remote_flush_with_range)(struct kvm *kvm,
-			struct kvm_tlb_range *range);
+	bool (*get_if_flag)(struct kvm_vcpu *vcpu);
+
+	void (*flush_tlb_all)(struct kvm_vcpu *vcpu);
+	void (*flush_tlb_current)(struct kvm_vcpu *vcpu);
+#if IS_ENABLED(CONFIG_HYPERV)
+	int  (*flush_remote_tlbs)(struct kvm *kvm);
+	int  (*flush_remote_tlbs_range)(struct kvm *kvm, gfn_t gfn,
+					gfn_t nr_pages);
+#endif
 
 	/*
 	 * Flush any TLB entries associated with the given GVA.
@@ -1055,145 +1782,194 @@ struct kvm_x86_ops {
 	 * Can potentially get non-canonical addresses through INVLPGs, which
 	 * the implementation may choose to ignore if appropriate.
 	 */
-	void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
+	void (*flush_tlb_gva)(struct kvm_vcpu *vcpu, gva_t addr);
 
-	void (*run)(struct kvm_vcpu *vcpu);
-	int (*handle_exit)(struct kvm_vcpu *vcpu);
-	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+	/*
+	 * Flush any TLB entries created by the guest.  Like tlb_flush_gva(),
+	 * does not need to flush GPA->HPA mappings.
+	 */
+	void (*flush_tlb_guest)(struct kvm_vcpu *vcpu);
+
+	int (*vcpu_pre_run)(struct kvm_vcpu *vcpu);
+	enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu,
+						  u64 run_flags);
+	int (*handle_exit)(struct kvm_vcpu *vcpu,
+		enum exit_fastpath_completion exit_fastpath);
+	int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+	void (*update_emulated_instruction)(struct kvm_vcpu *vcpu);
 	void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
 	u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
 	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
 				unsigned char *hypercall_addr);
-	void (*set_irq)(struct kvm_vcpu *vcpu);
-	void (*set_nmi)(struct kvm_vcpu *vcpu);
-	void (*queue_exception)(struct kvm_vcpu *vcpu);
+	void (*inject_irq)(struct kvm_vcpu *vcpu, bool reinjected);
+	void (*inject_nmi)(struct kvm_vcpu *vcpu);
+	void (*inject_exception)(struct kvm_vcpu *vcpu);
 	void (*cancel_injection)(struct kvm_vcpu *vcpu);
-	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
-	int (*nmi_allowed)(struct kvm_vcpu *vcpu);
+	int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
+	int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
 	bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
 	void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
+	/* Whether or not a virtual NMI is pending in hardware. */
+	bool (*is_vnmi_pending)(struct kvm_vcpu *vcpu);
+	/*
+	 * Attempt to pend a virtual NMI in hardware.  Returns %true on success
+	 * to allow using static_call_ret0 as the fallback.
+	 */
+	bool (*set_vnmi_pending)(struct kvm_vcpu *vcpu);
 	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
 	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
 	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
-	bool (*get_enable_apicv)(struct kvm_vcpu *vcpu);
+
+	const bool x2apic_icr_is_split;
+	const unsigned long required_apicv_inhibits;
+	bool allow_apicv_in_x2apic_without_x2apic_virtualization;
 	void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
-	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
 	void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
-	bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu);
 	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 	void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
-	void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
-	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
+	void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu);
+	void (*deliver_interrupt)(struct kvm_lapic *apic, int delivery_mode,
+				  int trig_mode, int vector);
 	int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
-	int (*get_tdp_level)(struct kvm_vcpu *vcpu);
-	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
-	int (*get_lpage_level)(void);
-	bool (*rdtscp_supported)(void);
-	bool (*invpcid_supported)(void);
+	u8 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
+
+	void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa,
+			     int root_level);
+
+	/* Update external mapping with page table link. */
+	int (*link_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+				void *external_spt);
+	/* Update the external page table from spte getting set. */
+	int (*set_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+				 u64 mirror_spte);
 
-	void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
+	/* Update external page tables for page table about to be freed. */
+	int (*free_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+				 void *external_spt);
 
-	void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry);
+	/* Update external page table from spte getting removed, and flush TLB. */
+	void (*remove_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+				     u64 mirror_spte);
 
 	bool (*has_wbinvd_exit)(void);
 
-	u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu);
-	/* Returns actual tsc_offset set in active VMCS */
-	u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
+	u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
+	u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu);
+	void (*write_tsc_offset)(struct kvm_vcpu *vcpu);
+	void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu);
 
-	void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
+	/*
+	 * Retrieve somewhat arbitrary exit/entry information.  Intended to
+	 * be used only from within tracepoints or error paths.
+	 */
+	void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason,
+			      u64 *info1, u64 *info2,
+			      u32 *intr_info, u32 *error_code);
+
+	void (*get_entry_info)(struct kvm_vcpu *vcpu,
+			       u32 *intr_info, u32 *error_code);
 
 	int (*check_intercept)(struct kvm_vcpu *vcpu,
 			       struct x86_instruction_info *info,
-			       enum x86_intercept_stage stage);
-	void (*handle_external_intr)(struct kvm_vcpu *vcpu);
-	bool (*mpx_supported)(void);
-	bool (*xsaves_supported)(void);
-	bool (*umip_emulated)(void);
-	bool (*pt_supported)(void);
-
-	int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
-	void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
-
-	void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
+			       enum x86_intercept_stage stage,
+			       struct x86_exception *exception);
+	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
 
-	/*
-	 * Arch-specific dirty logging hooks. These hooks are only supposed to
-	 * be valid if the specific arch has hardware-accelerated dirty logging
-	 * mechanism. Currently only for PML on VMX.
-	 *
-	 *  - slot_enable_log_dirty:
-	 *	called when enabling log dirty mode for the slot.
-	 *  - slot_disable_log_dirty:
-	 *	called when disabling log dirty mode for the slot.
-	 *	also called when slot is created with log dirty disabled.
-	 *  - flush_log_dirty:
-	 *	called before reporting dirty_bitmap to userspace.
-	 *  - enable_log_dirty_pt_masked:
-	 *	called when reenabling log dirty for the GFNs in the mask after
-	 *	corresponding bits are cleared in slot->dirty_bitmap.
-	 */
-	void (*slot_enable_log_dirty)(struct kvm *kvm,
-				      struct kvm_memory_slot *slot);
-	void (*slot_disable_log_dirty)(struct kvm *kvm,
-				       struct kvm_memory_slot *slot);
-	void (*flush_log_dirty)(struct kvm *kvm);
-	void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
-					   struct kvm_memory_slot *slot,
-					   gfn_t offset, unsigned long mask);
-	int (*write_log_dirty)(struct kvm_vcpu *vcpu);
-
-	/* pmu operations of sub-arch */
-	const struct kvm_pmu_ops *pmu_ops;
+	void (*update_cpu_dirty_logging)(struct kvm_vcpu *vcpu);
 
-	/*
-	 * Architecture specific hooks for vCPU blocking due to
-	 * HLT instruction.
-	 * Returns for .pre_block():
-	 *    - 0 means continue to block the vCPU.
-	 *    - 1 means we cannot block the vCPU since some event
-	 *        happens during this period, such as, 'ON' bit in
-	 *        posted-interrupts descriptor is set.
-	 */
-	int (*pre_block)(struct kvm_vcpu *vcpu);
-	void (*post_block)(struct kvm_vcpu *vcpu);
+	const struct kvm_x86_nested_ops *nested_ops;
 
 	void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
 	void (*vcpu_unblocking)(struct kvm_vcpu *vcpu);
 
-	int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
-			      uint32_t guest_irq, bool set);
+	int (*pi_update_irte)(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
+			      unsigned int host_irq, uint32_t guest_irq,
+			      struct kvm_vcpu *vcpu, u32 vector);
+	void (*pi_start_bypass)(struct kvm *kvm);
+	void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu);
 	void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
+	bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
+	bool (*protected_apic_has_interrupt)(struct kvm_vcpu *vcpu);
 
-	int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc);
+	int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
+			    bool *expired);
 	void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
 
 	void (*setup_mce)(struct kvm_vcpu *vcpu);
 
-	int (*get_nested_state)(struct kvm_vcpu *vcpu,
-				struct kvm_nested_state __user *user_kvm_nested_state,
-				unsigned user_data_size);
-	int (*set_nested_state)(struct kvm_vcpu *vcpu,
-				struct kvm_nested_state __user *user_kvm_nested_state,
-				struct kvm_nested_state *kvm_state);
-	void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
+#ifdef CONFIG_KVM_SMM
+	int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
+	int (*enter_smm)(struct kvm_vcpu *vcpu, union kvm_smram *smram);
+	int (*leave_smm)(struct kvm_vcpu *vcpu, const union kvm_smram *smram);
+	void (*enable_smi_window)(struct kvm_vcpu *vcpu);
+#endif
+
+	int (*dev_get_attr)(u32 group, u64 attr, u64 *val);
+	int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
+	int (*vcpu_mem_enc_ioctl)(struct kvm_vcpu *vcpu, void __user *argp);
+	int (*vcpu_mem_enc_unlocked_ioctl)(struct kvm_vcpu *vcpu, void __user *argp);
+	int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+	int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+	int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
+	int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
+	void (*guest_memory_reclaimed)(struct kvm *kvm);
+
+	int (*get_feature_msr)(u32 msr, u64 *data);
+
+	int (*check_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type,
+					 void *insn, int insn_len);
+
+	bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
+	int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu);
+
+	void (*migrate_timers)(struct kvm_vcpu *vcpu);
+	void (*recalc_intercepts)(struct kvm_vcpu *vcpu);
+	int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
+
+	void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
+
+	/*
+	 * Returns vCPU specific APICv inhibit reasons
+	 */
+	unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
 
-	int (*smi_allowed)(struct kvm_vcpu *vcpu);
-	int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
-	int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
-	int (*enable_smi_window)(struct kvm_vcpu *vcpu);
+	gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
+	void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
+	int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order);
+	void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end);
+	int (*gmem_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn, bool is_private);
+};
 
-	int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
-	int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
-	int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+struct kvm_x86_nested_ops {
+	void (*leave_nested)(struct kvm_vcpu *vcpu);
+	bool (*is_exception_vmexit)(struct kvm_vcpu *vcpu, u8 vector,
+				    u32 error_code);
+	int (*check_events)(struct kvm_vcpu *vcpu);
+	bool (*has_events)(struct kvm_vcpu *vcpu, bool for_injection);
+	void (*triple_fault)(struct kvm_vcpu *vcpu);
+	int (*get_state)(struct kvm_vcpu *vcpu,
+			 struct kvm_nested_state __user *user_kvm_nested_state,
+			 unsigned user_data_size);
+	int (*set_state)(struct kvm_vcpu *vcpu,
+			 struct kvm_nested_state __user *user_kvm_nested_state,
+			 struct kvm_nested_state *kvm_state);
+	bool (*get_nested_state_pages)(struct kvm_vcpu *vcpu);
+	int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
+
+	int (*enable_evmcs)(struct kvm_vcpu *vcpu,
+			    uint16_t *vmcs_version);
+	uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu);
+	void (*hv_inject_synthetic_vmexit_post_tlb_flush)(struct kvm_vcpu *vcpu);
+};
 
-	int (*get_msr_feature)(struct kvm_msr_entry *entry);
+struct kvm_x86_init_ops {
+	int (*hardware_setup)(void);
+	unsigned int (*handle_intel_pt_intr)(void);
 
-	int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu,
-				   uint16_t *vmcs_version);
-	uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu);
+	struct kvm_x86_ops *runtime_ops;
+	struct kvm_pmu_ops *pmu_ops;
 };
 
 struct kvm_arch_async_pf {
@@ -1201,160 +1977,264 @@ struct kvm_arch_async_pf {
 	gfn_t gfn;
 	unsigned long cr3;
 	bool direct_map;
+	u64 error_code;
 };
 
-extern struct kvm_x86_ops *kvm_x86_ops;
-extern struct kmem_cache *x86_fpu_cache;
+extern u32 __read_mostly kvm_nr_uret_msrs;
+extern bool __read_mostly allow_smaller_maxphyaddr;
+extern bool __read_mostly enable_apicv;
+extern bool __read_mostly enable_ipiv;
+extern bool __read_mostly enable_device_posted_irqs;
+extern struct kvm_x86_ops kvm_x86_ops;
+
+#define kvm_x86_call(func) static_call(kvm_x86_##func)
+#define kvm_pmu_call(func) static_call(kvm_x86_pmu_##func)
+
+#define KVM_X86_OP(func) \
+	DECLARE_STATIC_CALL(kvm_x86_##func, *(((struct kvm_x86_ops *)0)->func));
+#define KVM_X86_OP_OPTIONAL KVM_X86_OP
+#define KVM_X86_OP_OPTIONAL_RET0 KVM_X86_OP
+#include <asm/kvm-x86-ops.h>
+
+int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops);
+void kvm_x86_vendor_exit(void);
 
 #define __KVM_HAVE_ARCH_VM_ALLOC
 static inline struct kvm *kvm_arch_alloc_vm(void)
 {
-	return kvm_x86_ops->vm_alloc();
+	return kvzalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT);
 }
 
-static inline void kvm_arch_free_vm(struct kvm *kvm)
-{
-	return kvm_x86_ops->vm_free(kvm);
-}
+#define __KVM_HAVE_ARCH_VM_FREE
+void kvm_arch_free_vm(struct kvm *kvm);
 
-#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+#if IS_ENABLED(CONFIG_HYPERV)
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
 {
-	if (kvm_x86_ops->tlb_remote_flush &&
-	    !kvm_x86_ops->tlb_remote_flush(kvm))
+	if (kvm_x86_ops.flush_remote_tlbs &&
+	    !kvm_x86_call(flush_remote_tlbs)(kvm))
 		return 0;
 	else
 		return -ENOTSUPP;
 }
 
-int kvm_mmu_module_init(void);
-void kvm_mmu_module_exit(void);
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn,
+						   u64 nr_pages)
+{
+	if (!kvm_x86_ops.flush_remote_tlbs_range)
+		return -EOPNOTSUPP;
+
+	return kvm_x86_call(flush_remote_tlbs_range)(kvm, gfn, nr_pages);
+}
+#endif /* CONFIG_HYPERV */
+
+enum kvm_intr_type {
+	/* Values are arbitrary, but must be non-zero. */
+	KVM_HANDLING_IRQ = 1,
+	KVM_HANDLING_NMI,
+};
+
+/* Enable perf NMI and timer modes to work, and minimise false positives. */
+#define kvm_arch_pmi_in_guest(vcpu) \
+	((vcpu) && (vcpu)->arch.handling_intr_from_guest && \
+	 (!!in_nmi() == ((vcpu)->arch.handling_intr_from_guest == KVM_HANDLING_NMI)))
+
+void __init kvm_mmu_x86_module_init(void);
+int kvm_mmu_vendor_module_init(void);
+void kvm_mmu_vendor_module_exit(void);
 
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
 int kvm_mmu_create(struct kvm_vcpu *vcpu);
-void kvm_mmu_init_vm(struct kvm *kvm);
+int kvm_mmu_init_vm(struct kvm *kvm);
 void kvm_mmu_uninit_vm(struct kvm *kvm);
-void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
-		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
-		u64 acc_track_mask, u64 me_mask);
 
+void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm,
+					    struct kvm_memory_slot *slot);
+
+void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu);
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
-				      struct kvm_memory_slot *memslot);
-void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
-				   const struct kvm_memory_slot *memslot);
+				      const struct kvm_memory_slot *memslot,
+				      int start_level);
+void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm,
+				       const struct kvm_memory_slot *memslot,
+				       int target_level);
+void kvm_mmu_try_split_huge_pages(struct kvm *kvm,
+				  const struct kvm_memory_slot *memslot,
+				  u64 start, u64 end,
+				  int target_level);
+void kvm_mmu_recover_huge_pages(struct kvm *kvm,
+				const struct kvm_memory_slot *memslot);
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
-				   struct kvm_memory_slot *memslot);
-void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
-					struct kvm_memory_slot *memslot);
-void kvm_mmu_slot_set_dirty(struct kvm *kvm,
-			    struct kvm_memory_slot *memslot);
-void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
-				   struct kvm_memory_slot *slot,
-				   gfn_t gfn_offset, unsigned long mask);
-void kvm_mmu_zap_all(struct kvm *kvm);
-void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots);
-unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
-void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
-
-int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
-bool pdptrs_changed(struct kvm_vcpu *vcpu);
+				   const struct kvm_memory_slot *memslot);
+void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
+void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
+
+int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
 
 int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 			  const void *val, int bytes);
 
-struct kvm_irq_mask_notifier {
-	void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
-	int irq;
-	struct hlist_node link;
-};
-
-void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
-				    struct kvm_irq_mask_notifier *kimn);
-void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
-				      struct kvm_irq_mask_notifier *kimn);
-void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
-			     bool mask);
-
 extern bool tdp_enabled;
 
 u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
 
-/* control of guest tsc rate supported? */
-extern bool kvm_has_tsc_control;
-/* maximum supported tsc_khz for guests */
-extern u32  kvm_max_guest_tsc_khz;
-/* number of bits of the fractional part of the TSC scaling ratio */
-extern u8   kvm_tsc_scaling_ratio_frac_bits;
-/* maximum allowed value of TSC scaling ratio */
-extern u64  kvm_max_tsc_scaling_ratio;
-/* 1ull << kvm_tsc_scaling_ratio_frac_bits */
-extern u64  kvm_default_tsc_scaling_ratio;
-
-extern u64 kvm_mce_cap_supported;
-
-enum emulation_result {
-	EMULATE_DONE,         /* no further processing */
-	EMULATE_USER_EXIT,    /* kvm_run ready for userspace exit */
-	EMULATE_FAIL,         /* can't emulate this instruction */
-};
-
+/*
+ * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
+ *			userspace I/O) to indicate that the emulation context
+ *			should be reused as is, i.e. skip initialization of
+ *			emulation context, instruction fetch and decode.
+ *
+ * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware.
+ *		      Indicates that only select instructions (tagged with
+ *		      EmulateOnUD) should be emulated (to minimize the emulator
+ *		      attack surface).  See also EMULTYPE_TRAP_UD_FORCED.
+ *
+ * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
+ *		   decode the instruction length.  For use *only* by
+ *		   kvm_x86_ops.skip_emulated_instruction() implementations if
+ *		   EMULTYPE_COMPLETE_USER_EXIT is not set.
+ *
+ * EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to
+ *			     retry native execution under certain conditions,
+ *			     Can only be set in conjunction with EMULTYPE_PF.
+ *
+ * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was
+ *			     triggered by KVM's magic "force emulation" prefix,
+ *			     which is opt in via module param (off by default).
+ *			     Bypasses EmulateOnUD restriction despite emulating
+ *			     due to an intercepted #UD (see EMULTYPE_TRAP_UD).
+ *			     Used to test the full emulator from userspace.
+ *
+ * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware
+ *			backdoor emulation, which is opt in via module param.
+ *			VMware backdoor emulation handles select instructions
+ *			and reinjects the #GP for all other cases.
+ *
+ * EMULTYPE_PF - Set when an intercepted #PF triggers the emulation, in which case
+ *		 the CR2/GPA value pass on the stack is valid.
+ *
+ * EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility
+ *				 state and inject single-step #DBs after skipping
+ *				 an instruction (after completing userspace I/O).
+ *
+ * EMULTYPE_WRITE_PF_TO_SP - Set when emulating an intercepted page fault that
+ *			     is attempting to write a gfn that contains one or
+ *			     more of the PTEs used to translate the write itself,
+ *			     and the owning page table is being shadowed by KVM.
+ *			     If emulation of the faulting instruction fails and
+ *			     this flag is set, KVM will exit to userspace instead
+ *			     of retrying emulation as KVM cannot make forward
+ *			     progress.
+ *
+ *			     If emulation fails for a write to guest page tables,
+ *			     KVM unprotects (zaps) the shadow page for the target
+ *			     gfn and resumes the guest to retry the non-emulatable
+ *			     instruction (on hardware).  Unprotecting the gfn
+ *			     doesn't allow forward progress for a self-changing
+ *			     access because doing so also zaps the translation for
+ *			     the gfn, i.e. retrying the instruction will hit a
+ *			     !PRESENT fault, which results in a new shadow page
+ *			     and sends KVM back to square one.
+ *
+ * EMULTYPE_SKIP_SOFT_INT - Set in combination with EMULTYPE_SKIP to only skip
+ *                          an instruction if it could generate a given software
+ *                          interrupt, which must be encoded via
+ *                          EMULTYPE_SET_SOFT_INT_VECTOR().
+ */
 #define EMULTYPE_NO_DECODE	    (1 << 0)
 #define EMULTYPE_TRAP_UD	    (1 << 1)
 #define EMULTYPE_SKIP		    (1 << 2)
-#define EMULTYPE_ALLOW_RETRY	    (1 << 3)
-#define EMULTYPE_NO_UD_ON_FAIL	    (1 << 4)
-#define EMULTYPE_VMWARE		    (1 << 5)
+#define EMULTYPE_ALLOW_RETRY_PF	    (1 << 3)
+#define EMULTYPE_TRAP_UD_FORCED	    (1 << 4)
+#define EMULTYPE_VMWARE_GP	    (1 << 5)
+#define EMULTYPE_PF		    (1 << 6)
+#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
+#define EMULTYPE_WRITE_PF_TO_SP	    (1 << 8)
+#define EMULTYPE_SKIP_SOFT_INT	    (1 << 9)
+
+#define EMULTYPE_SET_SOFT_INT_VECTOR(v)	((u32)((v) & 0xff) << 16)
+#define EMULTYPE_GET_SOFT_INT_VECTOR(e)	(((e) >> 16) & 0xff)
+
+static inline bool kvm_can_emulate_event_vectoring(int emul_type)
+{
+	return !(emul_type & EMULTYPE_PF);
+}
+
 int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
 int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
 					void *insn, int insn_len);
+void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu,
+					  u64 *data, u8 ndata);
+void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu);
+
+void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa);
+void kvm_prepare_unexpected_reason_exit(struct kvm_vcpu *vcpu, u64 exit_reason);
 
 void kvm_enable_efer_bits(u64);
 bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
-int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
-int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
-
-struct x86_emulate_ctxt;
+int kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data);
+int __kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int __kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data);
+int kvm_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int kvm_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data);
+int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
+int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg);
+int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu);
+int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg);
+int kvm_emulate_as_nop(struct kvm_vcpu *vcpu);
+int kvm_emulate_invd(struct kvm_vcpu *vcpu);
+int kvm_emulate_mwait(struct kvm_vcpu *vcpu);
+int kvm_handle_invalid_op(struct kvm_vcpu *vcpu);
+int kvm_emulate_monitor(struct kvm_vcpu *vcpu);
 
 int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
 int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
-int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
+int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu);
+int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu);
 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
 
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
+void kvm_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
 void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
 
 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 		    int reason, bool has_error_code, u32 error_code);
 
+void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0);
+void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4);
 int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
 int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
 int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
-int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);
+unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr);
 unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
-void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
-int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);
+int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);
+int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu);
 
 int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
 
 unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
 void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
-bool kvm_rdpmc(struct kvm_vcpu *vcpu);
+int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu);
 
 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
-void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
-void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
+void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload);
+void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr,
+			   bool has_error_code, u32 error_code);
 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
-int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
-			    gfn_t gfn, void *data, int offset, int len,
-			    u32 access);
+void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
+				    struct x86_exception *fault);
 bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
 bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr);
 
@@ -1370,59 +2250,68 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state,
 	return !!(*irq_state);
 }
 
-#define KVM_MMU_ROOT_CURRENT		BIT(0)
-#define KVM_MMU_ROOT_PREVIOUS(i)	BIT(1+i)
-#define KVM_MMU_ROOTS_ALL		(~0UL)
+void kvm_inject_nmi(struct kvm_vcpu *vcpu);
+int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);
 
-int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
-void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
+void kvm_update_dr7(struct kvm_vcpu *vcpu);
 
-void kvm_inject_nmi(struct kvm_vcpu *vcpu);
+bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+				       bool always_retry);
 
-int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
-int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
-void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
-int kvm_mmu_load(struct kvm_vcpu *vcpu);
-void kvm_mmu_unload(struct kvm_vcpu *vcpu);
-void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+static inline bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu,
+						   gpa_t cr2_or_gpa)
+{
+	return __kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa, false);
+}
+
+void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
 			ulong roots_to_free);
-gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
-			   struct x86_exception *exception);
+void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu);
 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
 			      struct x86_exception *exception);
-gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
-			       struct x86_exception *exception);
 gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
 			       struct x86_exception *exception);
 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
 				struct x86_exception *exception);
 
-void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);
+bool kvm_apicv_activated(struct kvm *kvm);
+bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu);
+void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
+void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
+				      enum kvm_apicv_inhibit reason, bool set);
+void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
+				    enum kvm_apicv_inhibit reason, bool set);
 
-int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
+static inline void kvm_set_apicv_inhibit(struct kvm *kvm,
+					 enum kvm_apicv_inhibit reason)
+{
+	kvm_set_or_clear_apicv_inhibit(kvm, reason, true);
+}
 
-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
+static inline void kvm_clear_apicv_inhibit(struct kvm *kvm,
+					   enum kvm_apicv_inhibit reason)
+{
+	kvm_set_or_clear_apicv_inhibit(kvm, reason, false);
+}
+
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
 		       void *insn, int insn_len);
+void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg);
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
+void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+			     u64 addr, unsigned long roots);
 void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
-void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush);
+void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
 
-void kvm_enable_tdp(void);
-void kvm_disable_tdp(void);
+void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
+		       int tdp_max_root_level, int tdp_huge_page_level);
 
-static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
-				  struct x86_exception *exception)
-{
-	return gpa;
-}
 
-static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
-{
-	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
+#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
+#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem)
+#endif
 
-	return (struct kvm_mmu_page *)page_private(page);
-}
+#define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state)
 
 static inline u16 kvm_read_ldt(void)
 {
@@ -1441,16 +2330,11 @@ static inline unsigned long read_msr(unsigned long msr)
 {
 	u64 value;
 
-	rdmsrl(msr, value);
+	rdmsrq(msr, value);
 	return value;
 }
 #endif
 
-static inline u32 get_rdx_init_val(void)
-{
-	return 0x600; /* P6 family */
-}
-
 static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
 {
 	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
@@ -1470,113 +2354,90 @@ enum {
 	TASK_SWITCH_GATE = 3,
 };
 
-#define HF_GIF_MASK		(1 << 0)
-#define HF_HIF_MASK		(1 << 1)
-#define HF_VINTR_MASK		(1 << 2)
-#define HF_NMI_MASK		(1 << 3)
-#define HF_IRET_MASK		(1 << 4)
-#define HF_GUEST_MASK		(1 << 5) /* VCPU is in guest-mode */
-#define HF_SMM_MASK		(1 << 6)
-#define HF_SMM_INSIDE_NMI_MASK	(1 << 7)
+#define HF_GUEST_MASK		(1 << 0) /* VCPU is in guest-mode */
 
-#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
-#define KVM_ADDRESS_SPACE_NUM 2
+#ifdef CONFIG_KVM_SMM
+#define HF_SMM_MASK		(1 << 1)
+#define HF_SMM_INSIDE_NMI_MASK	(1 << 2)
 
-#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
-#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
+# define KVM_MAX_NR_ADDRESS_SPACES	2
+/* SMM is currently unsupported for guests with private memory. */
+# define kvm_arch_nr_memslot_as_ids(kvm) (kvm_arch_has_private_mem(kvm) ? 1 : 2)
+# define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
+# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
+#else
+# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0)
+#endif
 
-/*
- * Hardware virtualization extension instructions may fault if a
- * reboot turns off virtualization while processes are running.
- * Trap the fault and ignore the instruction if that happens.
- */
-asmlinkage void kvm_spurious_fault(void);
-
-#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn)	\
-	"666: " insn "\n\t" \
-	"668: \n\t"                           \
-	".pushsection .fixup, \"ax\" \n" \
-	"667: \n\t" \
-	cleanup_insn "\n\t"		      \
-	"cmpb $0, kvm_rebooting \n\t"	      \
-	"jne 668b \n\t"      		      \
-	__ASM_SIZE(push) " $666b \n\t"	      \
-	"jmp kvm_spurious_fault \n\t"	      \
-	".popsection \n\t" \
-	_ASM_EXTABLE(666b, 667b)
-
-#define __kvm_handle_fault_on_reboot(insn)		\
-	____kvm_handle_fault_on_reboot(insn, "")
-
-#define KVM_ARCH_WANT_MMU_NOTIFIER
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
-int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
-int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
-int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
+int kvm_cpu_has_extint(struct kvm_vcpu *v);
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
+int kvm_cpu_get_extint(struct kvm_vcpu *v);
 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
-void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
 
 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
 		    unsigned long ipi_bitmap_high, u32 min,
 		    unsigned long icr, int op_64_bit);
 
-u64 kvm_get_arch_capabilities(void);
-void kvm_define_shared_msr(unsigned index, u32 msr);
-int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
+int kvm_add_user_return_msr(u32 msr);
+int kvm_find_user_return_msr(u32 msr);
+int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
+u64 kvm_get_user_return_msr(unsigned int slot);
 
-u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
+static inline bool kvm_is_supported_user_return_msr(u32 msr)
+{
+	return kvm_find_user_return_msr(msr) >= 0;
+}
+
+u64 kvm_scale_tsc(u64 tsc, u64 ratio);
 u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
+u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier);
+u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier);
 
 unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
 bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
 
-void kvm_make_mclock_inprogress_request(struct kvm *kvm);
 void kvm_make_scan_ioapic_request(struct kvm *kvm);
+void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
+				       unsigned long *vcpu_bitmap);
 
-void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
 				     struct kvm_async_pf *work);
 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 				 struct kvm_async_pf *work);
 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
 			       struct kvm_async_pf *work);
-bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
+void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu);
+bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu);
 extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
 int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
-void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
-
-int kvm_is_in_guest(void);
 
-int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
-int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
+void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
+				     u32 size);
 bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
 bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
 
-bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
-			     struct kvm_vcpu **dest_vcpu);
-
-void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
-		     struct kvm_lapic_irq *irq);
+static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
+{
+	/* We can only post Fixed and LowPrio IRQs */
+	return (irq->delivery_mode == APIC_DM_FIXED ||
+		irq->delivery_mode == APIC_DM_LOWEST);
+}
 
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
 {
-	if (kvm_x86_ops->vcpu_blocking)
-		kvm_x86_ops->vcpu_blocking(vcpu);
+	kvm_x86_call(vcpu_blocking)(vcpu);
 }
 
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
 {
-	if (kvm_x86_ops->vcpu_unblocking)
-		kvm_x86_ops->vcpu_unblocking(vcpu);
+	kvm_x86_call(vcpu_unblocking)(vcpu);
 }
 
-static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
-
 static inline int kvm_cpu_get_apicid(int mps_cpu)
 {
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -1587,7 +2448,37 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
 #endif
 }
 
-#define put_smstate(type, buf, offset, val)                      \
-	*(type *)((buf) + (offset) - 0x7e00) = val
+int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
+
+#define KVM_CLOCK_VALID_FLAGS						\
+	(KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC)
+
+#define KVM_X86_VALID_QUIRKS			\
+	(KVM_X86_QUIRK_LINT0_REENABLED |	\
+	 KVM_X86_QUIRK_CD_NW_CLEARED |		\
+	 KVM_X86_QUIRK_LAPIC_MMIO_HOLE |	\
+	 KVM_X86_QUIRK_OUT_7E_INC_RIP |		\
+	 KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT |	\
+	 KVM_X86_QUIRK_FIX_HYPERCALL_INSN |	\
+	 KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS |	\
+	 KVM_X86_QUIRK_SLOT_ZAP_ALL |		\
+	 KVM_X86_QUIRK_STUFF_FEATURE_MSRS |	\
+	 KVM_X86_QUIRK_IGNORE_GUEST_PAT)
+
+#define KVM_X86_CONDITIONAL_QUIRKS		\
+	(KVM_X86_QUIRK_CD_NW_CLEARED |		\
+	 KVM_X86_QUIRK_IGNORE_GUEST_PAT)
+
+/*
+ * KVM previously used a u32 field in kvm_run to indicate the hypercall was
+ * initiated from long mode. KVM now sets bit 0 to indicate long mode, but the
+ * remaining 31 lower bits must be 0 to preserve ABI.
+ */
+#define KVM_EXIT_HYPERCALL_MBZ		GENMASK_ULL(31, 1)
+
+static inline bool kvm_arch_has_irq_bypass(void)
+{
+	return enable_device_posted_irqs;
+}
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
index 172f9749dbb2..3d040741044b 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -2,11 +2,9 @@
 #ifndef _ASM_X86_KVM_PAGE_TRACK_H
 #define _ASM_X86_KVM_PAGE_TRACK_H
 
-enum kvm_page_track_mode {
-	KVM_PAGE_TRACK_WRITE,
-	KVM_PAGE_TRACK_MAX,
-};
+#include <linux/kvm_types.h>
 
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
 /*
  * The notifier represented by @kvm_page_track_notifier_node is linked into
  * the head which will be notified when guest is triggering the track event.
@@ -26,50 +24,39 @@ struct kvm_page_track_notifier_node {
 	 * It is called when guest is writing the write-tracked page
 	 * and write emulation is finished at that time.
 	 *
-	 * @vcpu: the vcpu where the write access happened.
 	 * @gpa: the physical address written by guest.
 	 * @new: the data was written to the address.
 	 * @bytes: the written length.
 	 * @node: this node
 	 */
-	void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
-			    int bytes, struct kvm_page_track_notifier_node *node);
+	void (*track_write)(gpa_t gpa, const u8 *new, int bytes,
+			    struct kvm_page_track_notifier_node *node);
+
 	/*
-	 * It is called when memory slot is being moved or removed
-	 * users can drop write-protection for the pages in that memory slot
+	 * Invoked when a memory region is removed from the guest.  Or in KVM
+	 * terms, when a memslot is deleted.
 	 *
-	 * @kvm: the kvm where memory slot being moved or removed
-	 * @slot: the memory slot being moved or removed
-	 * @node: this node
+	 * @gfn:       base gfn of the region being removed
+	 * @nr_pages:  number of pages in the to-be-removed region
+	 * @node:      this node
 	 */
-	void (*track_flush_slot)(struct kvm *kvm, struct kvm_memory_slot *slot,
-			    struct kvm_page_track_notifier_node *node);
+	void (*track_remove_region)(gfn_t gfn, unsigned long nr_pages,
+				    struct kvm_page_track_notifier_node *node);
 };
 
-void kvm_page_track_init(struct kvm *kvm);
-void kvm_page_track_cleanup(struct kvm *kvm);
-
-void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
-				 struct kvm_memory_slot *dont);
-int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
-				  unsigned long npages);
+int kvm_page_track_register_notifier(struct kvm *kvm,
+				     struct kvm_page_track_notifier_node *n);
+void kvm_page_track_unregister_notifier(struct kvm *kvm,
+					struct kvm_page_track_notifier_node *n);
 
-void kvm_slot_page_track_add_page(struct kvm *kvm,
-				  struct kvm_memory_slot *slot, gfn_t gfn,
-				  enum kvm_page_track_mode mode);
-void kvm_slot_page_track_remove_page(struct kvm *kvm,
-				     struct kvm_memory_slot *slot, gfn_t gfn,
-				     enum kvm_page_track_mode mode);
-bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
-			      enum kvm_page_track_mode mode);
+int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn);
+int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn);
+#else
+/*
+ * Allow defining a node in a structure even if page tracking is disabled, e.g.
+ * to play nice with testing headers via direct inclusion from the command line.
+ */
+struct kvm_page_track_notifier_node {};
+#endif /* CONFIG_KVM_EXTERNAL_WRITE_TRACKING */
 
-void
-kvm_page_track_register_notifier(struct kvm *kvm,
-				 struct kvm_page_track_notifier_node *n);
-void
-kvm_page_track_unregister_notifier(struct kvm *kvm,
-				   struct kvm_page_track_notifier_node *n);
-void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
-			  int bytes);
-void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
 #endif
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 5ed3cf1c3934..4a47c16e2df8 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -4,9 +4,10 @@
 
 #include <asm/processor.h>
 #include <asm/alternative.h>
+#include <linux/interrupt.h>
 #include <uapi/asm/kvm_para.h>
 
-extern void kvmclock_init(void);
+#include <asm/tdx.h>
 
 #ifdef CONFIG_KVM_GUEST
 bool kvm_check_and_clear_guest_paused(void);
@@ -18,7 +19,7 @@ static inline bool kvm_check_and_clear_guest_paused(void)
 #endif /* CONFIG_KVM_GUEST */
 
 #define KVM_HYPERCALL \
-        ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL)
+        ALTERNATIVE("vmcall", "vmmcall", X86_FEATURE_VMMCALL)
 
 /* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
  * instruction.  The hypervisor may replace it with something else but only the
@@ -33,6 +34,10 @@ static inline bool kvm_check_and_clear_guest_paused(void)
 static inline long kvm_hypercall0(unsigned int nr)
 {
 	long ret;
+
+	if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+		return tdx_kvm_hypercall(nr, 0, 0, 0, 0);
+
 	asm volatile(KVM_HYPERCALL
 		     : "=a"(ret)
 		     : "a"(nr)
@@ -43,6 +48,10 @@ static inline long kvm_hypercall0(unsigned int nr)
 static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
 {
 	long ret;
+
+	if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+		return tdx_kvm_hypercall(nr, p1, 0, 0, 0);
+
 	asm volatile(KVM_HYPERCALL
 		     : "=a"(ret)
 		     : "a"(nr), "b"(p1)
@@ -54,6 +63,10 @@ static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
 				  unsigned long p2)
 {
 	long ret;
+
+	if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+		return tdx_kvm_hypercall(nr, p1, p2, 0, 0);
+
 	asm volatile(KVM_HYPERCALL
 		     : "=a"(ret)
 		     : "a"(nr), "b"(p1), "c"(p2)
@@ -65,6 +78,10 @@ static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
 				  unsigned long p2, unsigned long p3)
 {
 	long ret;
+
+	if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+		return tdx_kvm_hypercall(nr, p1, p2, p3, 0);
+
 	asm volatile(KVM_HYPERCALL
 		     : "=a"(ret)
 		     : "a"(nr), "b"(p1), "c"(p2), "d"(p3)
@@ -77,6 +94,10 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
 				  unsigned long p4)
 {
 	long ret;
+
+	if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+		return tdx_kvm_hypercall(nr, p1, p2, p3, p4);
+
 	asm volatile(KVM_HYPERCALL
 		     : "=a"(ret)
 		     : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4)
@@ -84,15 +105,37 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
 	return ret;
 }
 
+static inline long kvm_sev_hypercall3(unsigned int nr, unsigned long p1,
+				      unsigned long p2, unsigned long p3)
+{
+	long ret;
+
+	asm volatile("vmmcall"
+		     : "=a"(ret)
+		     : "a"(nr), "b"(p1), "c"(p2), "d"(p3)
+		     : "memory");
+	return ret;
+}
+
 #ifdef CONFIG_KVM_GUEST
+void kvmclock_init(void);
+void kvmclock_disable(void);
 bool kvm_para_available(void);
 unsigned int kvm_arch_para_features(void);
 unsigned int kvm_arch_para_hints(void);
-void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
-void kvm_async_pf_task_wake(u32 token);
-u32 kvm_read_and_reset_pf_reason(void);
-extern void kvm_disable_steal_time(void);
-void do_async_page_fault(struct pt_regs *regs, unsigned long error_code);
+void kvm_async_pf_task_wait_schedule(u32 token);
+u32 kvm_read_and_reset_apf_flags(void);
+bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);
+
+DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
+
+static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
+{
+	if (static_branch_unlikely(&kvm_async_pf_enabled))
+		return __kvm_handle_async_pf(regs, token);
+	else
+		return false;
+}
 
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 void __init kvm_spinlock_init(void);
@@ -103,8 +146,7 @@ static inline void kvm_spinlock_init(void)
 #endif /* CONFIG_PARAVIRT_SPINLOCKS */
 
 #else /* CONFIG_KVM_GUEST */
-#define kvm_async_pf_task_wait(T, I) do {} while(0)
-#define kvm_async_pf_task_wake(T) do {} while(0)
+#define kvm_async_pf_task_wait_schedule(T) do {} while(0)
 
 static inline bool kvm_para_available(void)
 {
@@ -121,14 +163,14 @@ static inline unsigned int kvm_arch_para_hints(void)
 	return 0;
 }
 
-static inline u32 kvm_read_and_reset_pf_reason(void)
+static inline u32 kvm_read_and_reset_apf_flags(void)
 {
 	return 0;
 }
 
-static inline void kvm_disable_steal_time(void)
+static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
 {
-	return;
+	return false;
 }
 #endif
 
diff --git a/arch/x86/include/asm/kvm_types.h b/arch/x86/include/asm/kvm_types.h
new file mode 100644
index 000000000000..d7c704ed1be9
--- /dev/null
+++ b/arch/x86/include/asm/kvm_types.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_KVM_TYPES_H
+#define _ASM_X86_KVM_TYPES_H
+
+#if IS_MODULE(CONFIG_KVM_AMD) && IS_MODULE(CONFIG_KVM_INTEL)
+#define KVM_SUB_MODULES kvm-amd,kvm-intel
+#elif IS_MODULE(CONFIG_KVM_AMD)
+#define KVM_SUB_MODULES kvm-amd
+#elif IS_MODULE(CONFIG_KVM_INTEL)
+#define KVM_SUB_MODULES kvm-intel
+#else
+#undef KVM_SUB_MODULES
+/*
+ * Don't export symbols for KVM without vendor modules, as kvm.ko is built iff
+ * at least one vendor module is enabled.
+ */
+#define EXPORT_SYMBOL_FOR_KVM(symbol)
+#endif
+
+#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40
+
+#endif /* _ASM_X86_KVM_TYPES_H */
diff --git a/arch/x86/include/asm/kvm_vcpu_regs.h b/arch/x86/include/asm/kvm_vcpu_regs.h
new file mode 100644
index 000000000000..1af2cb59233b
--- /dev/null
+++ b/arch/x86/include/asm/kvm_vcpu_regs.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_KVM_VCPU_REGS_H
+#define _ASM_X86_KVM_VCPU_REGS_H
+
+#define __VCPU_REGS_RAX  0
+#define __VCPU_REGS_RCX  1
+#define __VCPU_REGS_RDX  2
+#define __VCPU_REGS_RBX  3
+#define __VCPU_REGS_RSP  4
+#define __VCPU_REGS_RBP  5
+#define __VCPU_REGS_RSI  6
+#define __VCPU_REGS_RDI  7
+
+#ifdef CONFIG_X86_64
+#define __VCPU_REGS_R8   8
+#define __VCPU_REGS_R9   9
+#define __VCPU_REGS_R10 10
+#define __VCPU_REGS_R11 11
+#define __VCPU_REGS_R12 12
+#define __VCPU_REGS_R13 13
+#define __VCPU_REGS_R14 14
+#define __VCPU_REGS_R15 15
+#endif
+
+#endif /* _ASM_X86_KVM_VCPU_REGS_H */
diff --git a/arch/x86/include/asm/kvmclock.h b/arch/x86/include/asm/kvmclock.h
index eceea9299097..f163176d6f7f 100644
--- a/arch/x86/include/asm/kvmclock.h
+++ b/arch/x86/include/asm/kvmclock.h
@@ -2,6 +2,18 @@
 #ifndef _ASM_X86_KVM_CLOCK_H
 #define _ASM_X86_KVM_CLOCK_H
 
-extern struct clocksource kvm_clock;
+#include <linux/percpu.h>
+
+DECLARE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
+
+static __always_inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
+{
+	return &this_cpu_read(hv_clock_per_cpu)->pvti;
+}
+
+static inline struct pvclock_vsyscall_time_info *this_cpu_hvclock(void)
+{
+	return this_cpu_read(hv_clock_per_cpu);
+}
 
 #endif /* _ASM_X86_KVM_CLOCK_H */
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 14caa9d9fb7f..9d38ae744a2e 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -3,26 +3,153 @@
 #define _ASM_X86_LINKAGE_H
 
 #include <linux/stringify.h>
+#include <asm/ibt.h>
 
 #undef notrace
 #define notrace __attribute__((no_instrument_function))
 
+#ifdef CONFIG_64BIT
+/*
+ * The generic version tends to create spurious ENDBR instructions under
+ * certain conditions.
+ */
+#define _THIS_IP_ ({ unsigned long __here; asm ("lea 0(%%rip), %0" : "=r" (__here)); __here; })
+#endif
+
 #ifdef CONFIG_X86_32
 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
 #endif /* CONFIG_X86_32 */
 
-#ifdef __ASSEMBLY__
+#define __ALIGN		.balign CONFIG_FUNCTION_ALIGNMENT, 0x90;
+#define __ALIGN_STR	__stringify(__ALIGN)
 
-#define GLOBAL(name)	\
-	.globl name;	\
-	name:
+#if defined(CONFIG_CALL_PADDING) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#define FUNCTION_PADDING	.skip CONFIG_FUNCTION_ALIGNMENT, 0x90;
+#else
+#define FUNCTION_PADDING
+#endif
 
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16)
-#define __ALIGN		.p2align 4, 0x90
-#define __ALIGN_STR	__stringify(__ALIGN)
+#if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+# define __FUNC_ALIGN		__ALIGN; FUNCTION_PADDING
+#else
+# define __FUNC_ALIGN		__ALIGN
+#endif
+
+#define ASM_FUNC_ALIGN		__stringify(__FUNC_ALIGN)
+#define SYM_F_ALIGN		__FUNC_ALIGN
+
+#ifdef __ASSEMBLER__
+
+#if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#define RET	jmp __x86_return_thunk
+#else /* CONFIG_MITIGATION_RETPOLINE */
+#ifdef CONFIG_MITIGATION_SLS
+#define RET	ret; int3
+#else
+#define RET	ret
 #endif
+#endif /* CONFIG_MITIGATION_RETPOLINE */
+
+#else /* __ASSEMBLER__ */
+
+#if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#define ASM_RET	"jmp __x86_return_thunk\n\t"
+#else /* CONFIG_MITIGATION_RETPOLINE */
+#ifdef CONFIG_MITIGATION_SLS
+#define ASM_RET	"ret; int3\n\t"
+#else
+#define ASM_RET	"ret\n\t"
+#endif
+#endif /* CONFIG_MITIGATION_RETPOLINE */
+
+#endif /* __ASSEMBLER__ */
+
+/*
+ * Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_PADDING) the
+ * CFI symbol layout changes.
+ *
+ * Without CALL_THUNKS:
+ *
+ * 	.align	FUNCTION_ALIGNMENT
+ * __cfi_##name:
+ * 	.skip	FUNCTION_PADDING, 0x90
+ * 	.byte   0xb8
+ * 	.long	__kcfi_typeid_##name
+ * name:
+ *
+ * With CALL_THUNKS:
+ *
+ * 	.align FUNCTION_ALIGNMENT
+ * __cfi_##name:
+ * 	.byte	0xb8
+ * 	.long	__kcfi_typeid_##name
+ * 	.skip	FUNCTION_PADDING, 0x90
+ * name:
+ *
+ * In both cases the whole thing is FUNCTION_ALIGNMENT aligned and sized.
+ */
 
-#endif /* __ASSEMBLY__ */
+#ifdef CONFIG_CALL_PADDING
+#define CFI_PRE_PADDING
+#define CFI_POST_PADDING	.skip	CONFIG_FUNCTION_PADDING_BYTES, 0x90;
+#else
+#define CFI_PRE_PADDING		.skip	CONFIG_FUNCTION_PADDING_BYTES, 0x90;
+#define CFI_POST_PADDING
+#endif
+
+#define __CFI_TYPE(name)					\
+	SYM_START(__cfi_##name, SYM_L_LOCAL, SYM_A_NONE)	\
+	CFI_PRE_PADDING						\
+	.byte 0xb8 ASM_NL					\
+	.long __kcfi_typeid_##name ASM_NL			\
+	CFI_POST_PADDING					\
+	SYM_FUNC_END(__cfi_##name)
+
+/* UML needs to be able to override memcpy() and friends for KASAN. */
+#ifdef CONFIG_UML
+# define SYM_FUNC_ALIAS_MEMFUNC	SYM_FUNC_ALIAS_WEAK
+#else
+# define SYM_FUNC_ALIAS_MEMFUNC	SYM_FUNC_ALIAS
+#endif
+
+/* SYM_TYPED_FUNC_START -- use for indirectly called globals, w/ CFI type */
+#define SYM_TYPED_FUNC_START(name)				\
+	SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_F_ALIGN)	\
+	ENDBR
+
+/* SYM_FUNC_START -- use for global functions */
+#define SYM_FUNC_START(name)				\
+	SYM_START(name, SYM_L_GLOBAL, SYM_F_ALIGN)
+
+/* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */
+#define SYM_FUNC_START_NOALIGN(name)			\
+	SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE)
+
+/* SYM_FUNC_START_LOCAL -- use for local functions */
+#define SYM_FUNC_START_LOCAL(name)			\
+	SYM_START(name, SYM_L_LOCAL, SYM_F_ALIGN)
+
+/* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */
+#define SYM_FUNC_START_LOCAL_NOALIGN(name)		\
+	SYM_START(name, SYM_L_LOCAL, SYM_A_NONE)
+
+/* SYM_FUNC_START_WEAK -- use for weak functions */
+#define SYM_FUNC_START_WEAK(name)			\
+	SYM_START(name, SYM_L_WEAK, SYM_F_ALIGN)
+
+/* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */
+#define SYM_FUNC_START_WEAK_NOALIGN(name)		\
+	SYM_START(name, SYM_L_WEAK, SYM_A_NONE)
+
+/*
+ * Expose 'sym' to the startup code in arch/x86/boot/startup/, by emitting an
+ * alias prefixed with __pi_
+ */
+#ifdef __ASSEMBLER__
+#define SYM_PIC_ALIAS(sym)	SYM_ALIAS(__pi_ ## sym, sym, SYM_L_GLOBAL)
+#else
+#define SYM_PIC_ALIAS(sym)	extern typeof(sym) __PASTE(__pi_, sym) __alias(sym)
+#endif
 
 #endif /* _ASM_X86_LINKAGE_H */
 
diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h
deleted file mode 100644
index ed80003ce3e2..000000000000
--- a/arch/x86/include/asm/livepatch.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * livepatch.h - x86-specific Kernel Live Patching Core
- *
- * Copyright (C) 2014 Seth Jennings <sjenning@redhat.com>
- * Copyright (C) 2014 SUSE
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _ASM_X86_LIVEPATCH_H
-#define _ASM_X86_LIVEPATCH_H
-
-#include <asm/setup.h>
-#include <linux/ftrace.h>
-
-static inline int klp_check_compiler_support(void)
-{
-#ifndef CC_USING_FENTRY
-	return 1;
-#endif
-	return 0;
-}
-
-static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
-{
-	regs->ip = ip;
-}
-
-#endif /* _ASM_X86_LIVEPATCH_H */
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 349a47acaa4a..59aa966dc212 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -120,34 +120,54 @@ static inline long local_sub_return(long i, local_t *l)
 #define local_inc_return(l)  (local_add_return(1, l))
 #define local_dec_return(l)  (local_sub_return(1, l))
 
-#define local_cmpxchg(l, o, n) \
-	(cmpxchg_local(&((l)->a.counter), (o), (n)))
-/* Always has a lock prefix */
-#define local_xchg(l, n) (xchg(&((l)->a.counter), (n)))
+static inline long local_cmpxchg(local_t *l, long old, long new)
+{
+	return cmpxchg_local(&l->a.counter, old, new);
+}
+
+static inline bool local_try_cmpxchg(local_t *l, long *old, long new)
+{
+	return try_cmpxchg_local(&l->a.counter,
+				 (typeof(l->a.counter) *) old, new);
+}
+
+/*
+ * Implement local_xchg using CMPXCHG instruction without the LOCK prefix.
+ * XCHG is expensive due to the implied LOCK prefix.  The processor
+ * cannot prefetch cachelines if XCHG is used.
+ */
+static __always_inline long
+local_xchg(local_t *l, long n)
+{
+	long c = local_read(l);
+
+	do { } while (!local_try_cmpxchg(l, &c, n));
+
+	return c;
+}
 
 /**
- * local_add_unless - add unless the number is a given value
+ * local_add_unless - add unless the number is already a given value
  * @l: pointer of type local_t
  * @a: the amount to add to l...
  * @u: ...unless l is equal to u.
  *
- * Atomically adds @a to @l, so long as it was not @u.
- * Returns non-zero if @l was not @u, and zero otherwise.
+ * Atomically adds @a to @l, if @v was not already @u.
+ * Returns true if the addition was done.
  */
-#define local_add_unless(l, a, u)				\
-({								\
-	long c, old;						\
-	c = local_read((l));					\
-	for (;;) {						\
-		if (unlikely(c == (u)))				\
-			break;					\
-		old = local_cmpxchg((l), c, c + (a));		\
-		if (likely(old == c))				\
-			break;					\
-		c = old;					\
-	}							\
-	c != (u);						\
-})
+static __always_inline bool
+local_add_unless(local_t *l, long a, long u)
+{
+	long c = local_read(l);
+
+	do {
+		if (unlikely(c == u))
+			return false;
+	} while (!local_try_cmpxchg(l, &c, c + a));
+
+	return true;
+}
+
 #define local_inc_not_zero(l) local_add_unless((l), 1, 0)
 
 /* On x86_32, these are no better than the atomic variants.
diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h
deleted file mode 100644
index 36c93b5cc239..000000000000
--- a/arch/x86/include/asm/local64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h
index 97198001e567..6115bb3d5795 100644
--- a/arch/x86/include/asm/mc146818rtc.h
+++ b/arch/x86/include/asm/mc146818rtc.h
@@ -95,7 +95,7 @@ static inline unsigned char current_lock_cmos_reg(void)
 unsigned char rtc_cmos_read(unsigned char addr);
 void rtc_cmos_write(unsigned char val, unsigned char addr);
 
-extern int mach_set_rtc_mmss(const struct timespec64 *now);
+extern int mach_set_cmos_time(const struct timespec64 *now);
 extern void mach_get_cmos_time(struct timespec64 *now);
 
 #define RTC_IRQ 8
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index c1a812bd5a27..2d98886de09a 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -13,6 +13,7 @@
 #define MCG_CTL_P		BIT_ULL(8)   /* MCG_CTL register available */
 #define MCG_EXT_P		BIT_ULL(9)   /* Extended registers available */
 #define MCG_CMCI_P		BIT_ULL(10)  /* CMCI supported */
+#define MCG_SEAM_NR		BIT_ULL(12)  /* MCG_STATUS_SEAM_NR supported */
 #define MCG_EXT_CNT_MASK	0xff0000     /* Number of Extended registers */
 #define MCG_EXT_CNT_SHIFT	16
 #define MCG_EXT_CNT(c)		(((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
@@ -25,6 +26,7 @@
 #define MCG_STATUS_EIPV		BIT_ULL(1)   /* ip points to correct instruction */
 #define MCG_STATUS_MCIP		BIT_ULL(2)   /* machine check in progress */
 #define MCG_STATUS_LMCES	BIT_ULL(3)   /* LMCE signaled */
+#define MCG_STATUS_SEAM_NR	BIT_ULL(12)  /* Machine check inside SEAM non-root mode */
 
 /* MCG_EXT_CTL register defines */
 #define MCG_EXT_CTL_LMCE_EN	BIT_ULL(0) /* Enable LMCE */
@@ -42,12 +44,15 @@
 #define MCI_STATUS_CEC_SHIFT	38           /* Corrected Error Count */
 #define MCI_STATUS_CEC_MASK	GENMASK_ULL(52,38)
 #define MCI_STATUS_CEC(c)	(((c) & MCI_STATUS_CEC_MASK) >> MCI_STATUS_CEC_SHIFT)
+#define MCI_STATUS_MSCOD(m)	(((m) >> 16) & 0xffff)
 
 /* AMD-specific bits */
 #define MCI_STATUS_TCC		BIT_ULL(55)  /* Task context corrupt */
+#define MCI_STATUS_PADDRV	BIT_ULL(54)  /* Valid System Physical Address */
 #define MCI_STATUS_SYNDV	BIT_ULL(53)  /* synd reg. valid */
 #define MCI_STATUS_DEFERRED	BIT_ULL(44)  /* uncorrected error, deferred exception */
 #define MCI_STATUS_POISON	BIT_ULL(43)  /* access poisonous data */
+#define MCI_STATUS_SCRUB	BIT_ULL(40)  /* Error detected during scrub operation */
 
 /*
  * McaX field if set indicates a given bank supports MCA extensions:
@@ -57,6 +62,8 @@
  *  - TCC bit is present in MCx_STATUS.
  */
 #define MCI_CONFIG_MCAX		0x1
+#define MCI_CONFIG_FRUTEXT	BIT_ULL(9)
+#define MCI_CONFIG_PADDRV	BIT_ULL(11)
 #define MCI_IPID_MCATYPE	0xFFFF0000
 #define MCI_IPID_HWID		0xFFF
 
@@ -86,6 +93,9 @@
 #define  MCI_MISC_ADDR_MEM	3	/* memory address */
 #define  MCI_MISC_ADDR_GENERIC	7	/* generic */
 
+/* MCi_ADDR register defines */
+#define MCI_ADDR_PHYSADDR	GENMASK_ULL(boot_cpu_data.x86_phys_bits - 1, 0)
+
 /* CTL2 register defines */
 #define MCI_CTL2_CMCI_EN		BIT_ULL(30)
 #define MCI_CTL2_CMCI_THRESHOLD_MASK	0x7fffULL
@@ -101,7 +111,7 @@
 
 #define MCE_OVERFLOW 0		/* bit 0 in flags means overflow */
 
-#define MCE_LOG_LEN 32
+#define MCE_LOG_MIN_LEN 32U
 #define MCE_LOG_SIGNATURE	"MACHINECHECK"
 
 /* AMD Scalable MCA */
@@ -115,6 +125,9 @@
 #define MSR_AMD64_SMCA_MC0_DESTAT	0xc0002008
 #define MSR_AMD64_SMCA_MC0_DEADDR	0xc0002009
 #define MSR_AMD64_SMCA_MC0_MISC1	0xc000200a
+/* Registers MISC2 to MISC4 are at offsets B to D. */
+#define MSR_AMD64_SMCA_MC0_SYND1	0xc000200e
+#define MSR_AMD64_SMCA_MC0_SYND2	0xc000200f
 #define MSR_AMD64_SMCA_MCx_CTL(x)	(MSR_AMD64_SMCA_MC0_CTL + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_STATUS(x)	(MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_ADDR(x)	(MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x))
@@ -125,6 +138,40 @@
 #define MSR_AMD64_SMCA_MCx_DESTAT(x)	(MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_DEADDR(x)	(MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_MISCy(x, y)	((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
+#define MSR_AMD64_SMCA_MCx_SYND1(x)	(MSR_AMD64_SMCA_MC0_SYND1 + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_SYND2(x)	(MSR_AMD64_SMCA_MC0_SYND2 + 0x10*(x))
+
+#define XEC(x, mask)			(((x) >> 16) & mask)
+
+/* mce.kflags flag bits for logging etc. */
+#define	MCE_HANDLED_CEC		BIT_ULL(0)
+#define	MCE_HANDLED_UC		BIT_ULL(1)
+#define	MCE_HANDLED_EXTLOG	BIT_ULL(2)
+#define	MCE_HANDLED_NFIT	BIT_ULL(3)
+#define	MCE_HANDLED_EDAC	BIT_ULL(4)
+#define	MCE_HANDLED_MCELOG	BIT_ULL(5)
+
+/*
+ * Indicates an MCE which has happened in kernel space but from
+ * which the kernel can recover simply by executing fixup_exception()
+ * so that an error is returned to the caller of the function that
+ * hit the machine check.
+ */
+#define MCE_IN_KERNEL_RECOV	BIT_ULL(6)
+
+/*
+ * Indicates an MCE that happened in kernel space while copying data
+ * from user. In this case fixup_exception() gets the kernel to the
+ * error exit for the copy function. Machine check handler can then
+ * treat it like a fault taken in user mode.
+ */
+#define MCE_IN_KERNEL_COPYIN	BIT_ULL(7)
+
+/*
+ * Indicates that handler should check and clear Deferred error registers
+ * rather than common ones.
+ */
+#define MCE_CHECK_DFR_REGS	BIT_ULL(8)
 
 /*
  * This structure contains all data related to the MCE log.  Also
@@ -134,23 +181,52 @@
  */
 struct mce_log_buffer {
 	char signature[12]; /* "MACHINECHECK" */
-	unsigned len;	    /* = MCE_LOG_LEN */
+	unsigned len;	    /* = elements in .mce_entry[] */
 	unsigned next;
 	unsigned flags;
 	unsigned recordlen;	/* length of struct mce */
-	struct mce entry[MCE_LOG_LEN];
+	struct mce entry[];
 };
 
+/* Highest last */
 enum mce_notifier_prios {
-	MCE_PRIO_FIRST		= INT_MAX,
-	MCE_PRIO_SRAO		= INT_MAX - 1,
-	MCE_PRIO_EXTLOG		= INT_MAX - 2,
-	MCE_PRIO_NFIT		= INT_MAX - 3,
-	MCE_PRIO_EDAC		= INT_MAX - 4,
-	MCE_PRIO_MCELOG		= 1,
-	MCE_PRIO_LOWEST		= 0,
+	MCE_PRIO_LOWEST,
+	MCE_PRIO_MCELOG,
+	MCE_PRIO_EDAC,
+	MCE_PRIO_NFIT,
+	MCE_PRIO_EXTLOG,
+	MCE_PRIO_UC,
+	MCE_PRIO_EARLY,
+	MCE_PRIO_CEC,
+	MCE_PRIO_HIGHEST = MCE_PRIO_CEC
+};
+
+/**
+ * struct mce_hw_err - Hardware Error Record.
+ * @m:		Machine Check record.
+ * @vendor:	Vendor-specific error information.
+ *
+ * Vendor-specific fields should not be added to struct mce. Instead, vendors
+ * should export their vendor-specific data through their structure in the
+ * vendor union below.
+ *
+ * AMD's vendor data is parsed by error decoding tools for supplemental error
+ * information. Thus, current offsets of existing fields must be maintained.
+ * Only add new fields at the end of AMD's vendor structure.
+ */
+struct mce_hw_err {
+	struct mce m;
+
+	union vendor_info {
+		struct {
+			u64 synd1;		/* MCA_SYND1 MSR */
+			u64 synd2;		/* MCA_SYND2 MSR */
+		} amd;
+	} vendor;
 };
 
+#define	to_mce_hw_err(mce) container_of(mce, struct mce_hw_err, m)
+
 struct notifier_block;
 extern void mce_register_decode_chain(struct notifier_block *nb);
 extern void mce_unregister_decode_chain(struct notifier_block *nb);
@@ -160,38 +236,39 @@ extern void mce_unregister_decode_chain(struct notifier_block *nb);
 
 extern int mce_p5_enabled;
 
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+extern void enable_copy_mc_fragile(void);
+unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt);
+#else
+static inline void enable_copy_mc_fragile(void)
+{
+}
+#endif
+
+struct cper_ia_proc_ctx;
+
 #ifdef CONFIG_X86_MCE
 int mcheck_init(void);
+void mca_bsp_init(struct cpuinfo_x86 *c);
 void mcheck_cpu_init(struct cpuinfo_x86 *c);
 void mcheck_cpu_clear(struct cpuinfo_x86 *c);
-void mcheck_vendor_init_severity(void);
+int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
+			       u64 lapic_id);
 #else
 static inline int mcheck_init(void) { return 0; }
+static inline void mca_bsp_init(struct cpuinfo_x86 *c) {}
 static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
 static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {}
-static inline void mcheck_vendor_init_severity(void) {}
-#endif
-
-#ifdef CONFIG_X86_ANCIENT_MCE
-void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
-void winchip_mcheck_init(struct cpuinfo_x86 *c);
-static inline void enable_p5_mce(void) { mce_p5_enabled = 1; }
-#else
-static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
-static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
-static inline void enable_p5_mce(void) {}
+static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
+					     u64 lapic_id) { return -EINVAL; }
 #endif
 
-void mce_setup(struct mce *m);
-void mce_log(struct mce *m);
+void mce_prep_record(struct mce_hw_err *err);
+void mce_log(struct mce_hw_err *err);
 DECLARE_PER_CPU(struct device *, mce_device);
 
-/*
- * Maximum banks number.
- * This is the limit of the current register layout on
- * Intel CPUs.
- */
-#define MAX_NR_BANKS 32
+/* Maximum number of MCA banks per CPU. */
+#define MAX_NR_BANKS 64
 
 #ifdef CONFIG_X86_MCE_INTEL
 void mce_intel_feature_init(struct cpuinfo_x86 *c);
@@ -209,20 +286,10 @@ static inline void cmci_rediscover(void) {}
 static inline void cmci_recheck(void) {}
 #endif
 
-#ifdef CONFIG_X86_MCE_AMD
-void mce_amd_feature_init(struct cpuinfo_x86 *c);
-int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);
-#else
-static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
-static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; };
-#endif
-
-static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); }
-
-int mce_available(struct cpuinfo_x86 *c);
+bool mce_available(struct cpuinfo_x86 *c);
 bool mce_is_memory_error(struct mce *m);
 bool mce_is_correctable(struct mce *m);
-int mce_usable_address(struct mce *m);
+bool mce_usable_address(struct mce *m);
 
 DECLARE_PER_CPU(unsigned, mce_exception_count);
 DECLARE_PER_CPU(unsigned, mce_poll_count);
@@ -233,24 +300,26 @@ DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
 enum mcp_flags {
 	MCP_TIMESTAMP	= BIT(0),	/* log time stamp */
 	MCP_UC		= BIT(1),	/* log uncorrected errors */
-	MCP_DONTLOG	= BIT(2),	/* only clear, don't log */
+	MCP_QUEUE_LOG	= BIT(2),	/* only queue to genpool */
 };
-bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
 
-int mce_notify_irq(void);
+void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
 
 DECLARE_PER_CPU(struct mce, injectm);
 
 /* Disable CMCI/polling for MCA bank claimed by firmware */
 extern void mce_disable_bank(int bank);
 
+#ifdef CONFIG_X86_MCE_THRESHOLD
+void mce_save_apei_thr_limit(u32 thr_limit);
+#else
+static inline void mce_save_apei_thr_limit(u32 thr_limit) { }
+#endif /* CONFIG_X86_MCE_THRESHOLD */
+
 /*
  * Exception handler
  */
-
-/* Call the installed machine check handler for this CPU setup. */
-extern void (*machine_check_vector)(struct pt_regs *, long error_code);
-void do_machine_check(struct pt_regs *, long);
+void do_machine_check(struct pt_regs *pt_regs);
 
 /*
  * Threshold handler
@@ -261,28 +330,6 @@ extern void (*mce_threshold_vector)(void);
 extern void (*deferred_error_int_vector)(void);
 
 /*
- * Thermal handler
- */
-
-void intel_init_thermal(struct cpuinfo_x86 *c);
-
-/* Interrupt Handler for core thermal thresholds */
-extern int (*platform_thermal_notify)(__u64 msr_val);
-
-/* Interrupt Handler for package thermal thresholds */
-extern int (*platform_thermal_package_notify)(__u64 msr_val);
-
-/* Callback support of rate control, return true, if
- * callback has rate control */
-extern bool (*platform_thermal_package_rate_control)(void);
-
-#ifdef CONFIG_X86_THERMAL_VECTOR
-extern void mcheck_intel_therm_init(void);
-#else
-static inline void mcheck_intel_therm_init(void) { }
-#endif
-
-/*
  * Used by APEI to report memory error via /dev/mcelog
  */
 
@@ -299,6 +346,7 @@ extern void apei_mce_report_mem_error(int corrected,
 /* These may be used by multiple smca_hwid_mcatypes */
 enum smca_bank_types {
 	SMCA_LS = 0,	/* Load Store */
+	SMCA_LS_V2,
 	SMCA_IF,	/* Instruction Fetch */
 	SMCA_L2_CACHE,	/* L2 Cache */
 	SMCA_DE,	/* Decoder Unit */
@@ -307,43 +355,44 @@ enum smca_bank_types {
 	SMCA_FP,	/* Floating Point */
 	SMCA_L3_CACHE,	/* L3 Cache */
 	SMCA_CS,	/* Coherent Slave */
+	SMCA_CS_V2,
 	SMCA_PIE,	/* Power, Interrupts, etc. */
 	SMCA_UMC,	/* Unified Memory Controller */
+	SMCA_UMC_V2,
+	SMCA_MA_LLC,	/* Memory Attached Last Level Cache */
 	SMCA_PB,	/* Parameter Block */
 	SMCA_PSP,	/* Platform Security Processor */
+	SMCA_PSP_V2,
 	SMCA_SMU,	/* System Management Unit */
+	SMCA_SMU_V2,
+	SMCA_MP5,	/* Microprocessor 5 Unit */
+	SMCA_MPDMA,	/* MPDMA Unit */
+	SMCA_NBIO,	/* Northbridge IO Unit */
+	SMCA_PCIE,	/* PCI Express Unit */
+	SMCA_PCIE_V2,
+	SMCA_XGMI_PCS,	/* xGMI PCS Unit */
+	SMCA_NBIF,	/* NBIF Unit */
+	SMCA_SHUB,	/* System HUB Unit */
+	SMCA_SATA,	/* SATA Unit */
+	SMCA_USB,	/* USB Unit */
+	SMCA_USR_DP,	/* Ultra Short Reach Data Plane Controller */
+	SMCA_USR_CP,	/* Ultra Short Reach Control Plane Controller */
+	SMCA_GMI_PCS,	/* GMI PCS Unit */
+	SMCA_XGMI_PHY,	/* xGMI PHY Unit */
+	SMCA_WAFL_PHY,	/* WAFL PHY Unit */
+	SMCA_GMI_PHY,	/* GMI PHY Unit */
 	N_SMCA_BANK_TYPES
 };
 
-#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype))
-
-struct smca_hwid {
-	unsigned int bank_type;	/* Use with smca_bank_types for easy indexing. */
-	u32 hwid_mcatype;	/* (hwid,mcatype) tuple */
-	u32 xec_bitmap;		/* Bitmap of valid ExtErrorCodes; current max is 21. */
-	u8 count;		/* Number of instances. */
-};
-
-struct smca_bank {
-	struct smca_hwid *hwid;
-	u32 id;			/* Value of MCA_IPID[InstanceId]. */
-	u8 sysfs_id;		/* Value used for sysfs name. */
-};
-
-extern struct smca_bank smca_banks[MAX_NR_BANKS];
-
-extern const char *smca_get_long_name(enum smca_bank_types t);
 extern bool amd_mce_is_memory_error(struct mce *m);
 
-extern int mce_threshold_create_device(unsigned int cpu);
-extern int mce_threshold_remove_device(unsigned int cpu);
-
+void mce_amd_feature_init(struct cpuinfo_x86 *c);
+enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank);
 #else
-
-static inline int mce_threshold_create_device(unsigned int cpu) { return 0; };
-static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; };
-static inline bool amd_mce_is_memory_error(struct mce *m) { return false; };
-
+static inline bool amd_mce_is_memory_error(struct mce *m)		{ return false; };
+static inline void mce_amd_feature_init(struct cpuinfo_x86 *c)		{ }
 #endif
 
+unsigned long copy_mc_fragile_handle_tail(char *to, char *from, unsigned len);
+
 #endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/mcsafe_test.h b/arch/x86/include/asm/mcsafe_test.h
deleted file mode 100644
index eb59804b6201..000000000000
--- a/arch/x86/include/asm/mcsafe_test.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MCSAFE_TEST_H_
-#define _MCSAFE_TEST_H_
-
-#ifndef __ASSEMBLY__
-#ifdef CONFIG_MCSAFE_TEST
-extern unsigned long mcsafe_test_src;
-extern unsigned long mcsafe_test_dst;
-
-static inline void mcsafe_inject_src(void *addr)
-{
-	if (addr)
-		mcsafe_test_src = (unsigned long) addr;
-	else
-		mcsafe_test_src = ~0UL;
-}
-
-static inline void mcsafe_inject_dst(void *addr)
-{
-	if (addr)
-		mcsafe_test_dst = (unsigned long) addr;
-	else
-		mcsafe_test_dst = ~0UL;
-}
-#else /* CONFIG_MCSAFE_TEST */
-static inline void mcsafe_inject_src(void *addr)
-{
-}
-
-static inline void mcsafe_inject_dst(void *addr)
-{
-}
-#endif /* CONFIG_MCSAFE_TEST */
-
-#else /* __ASSEMBLY__ */
-#include <asm/export.h>
-
-#ifdef CONFIG_MCSAFE_TEST
-.macro MCSAFE_TEST_CTL
-	.pushsection .data
-	.align 8
-	.globl mcsafe_test_src
-	mcsafe_test_src:
-		.quad 0
-	EXPORT_SYMBOL_GPL(mcsafe_test_src)
-	.globl mcsafe_test_dst
-	mcsafe_test_dst:
-		.quad 0
-	EXPORT_SYMBOL_GPL(mcsafe_test_dst)
-	.popsection
-.endm
-
-.macro MCSAFE_TEST_SRC reg count target
-	leaq \count(\reg), %r9
-	cmp mcsafe_test_src, %r9
-	ja \target
-.endm
-
-.macro MCSAFE_TEST_DST reg count target
-	leaq \count(\reg), %r9
-	cmp mcsafe_test_dst, %r9
-	ja \target
-.endm
-#else
-.macro MCSAFE_TEST_CTL
-.endm
-
-.macro MCSAFE_TEST_SRC reg count target
-.endm
-
-.macro MCSAFE_TEST_DST reg count target
-.endm
-#endif /* CONFIG_MCSAFE_TEST */
-#endif /* __ASSEMBLY__ */
-#endif /* _MCSAFE_TEST_H_ */
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 616f8e637bc3..ea6494628cb0 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -1,28 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * AMD Memory Encryption Support
  *
  * Copyright (C) 2016 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef __X86_MEM_ENCRYPT_H__
 #define __X86_MEM_ENCRYPT_H__
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/init.h>
+#include <linux/cc_platform.h>
+
+#include <asm/asm.h>
+struct boot_params;
 
-#include <asm/bootparam.h>
+#ifdef CONFIG_X86_MEM_ENCRYPT
+void __init mem_encrypt_init(void);
+void __init mem_encrypt_setup_arch(void);
+#else
+static inline void mem_encrypt_init(void) { }
+static inline void __init mem_encrypt_setup_arch(void) { }
+#endif
 
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 
 extern u64 sme_me_mask;
-extern bool sev_enabled;
+extern u64 sev_status;
 
 void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
 			 unsigned long decrypted_kernel_vaddr,
@@ -40,24 +47,29 @@ void __init sme_unmap_bootdata(char *real_mode_data);
 
 void __init sme_early_init(void);
 
-void __init sme_encrypt_kernel(struct boot_params *bp);
-void __init sme_enable(struct boot_params *bp);
+void sme_encrypt_kernel(struct boot_params *bp);
+void sme_enable(struct boot_params *bp);
 
 int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
 int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
+void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr,
+					    unsigned long size, bool enc);
 
-/* Architecture __weak replacement functions */
-void __init mem_encrypt_init(void);
 void __init mem_encrypt_free_decrypted_mem(void);
 
-bool sme_active(void);
-bool sev_active(void);
+void __init sev_es_init_vc_handling(void);
 
-#define __bss_decrypted __attribute__((__section__(".bss..decrypted")))
+static inline u64 sme_get_me_mask(void)
+{
+	return sme_me_mask;
+}
+
+#define __bss_decrypted __section(".bss..decrypted")
 
 #else	/* !CONFIG_AMD_MEM_ENCRYPT */
 
 #define sme_me_mask	0ULL
+#define sev_status	0ULL
 
 static inline void __init sme_early_encrypt(resource_size_t paddr,
 					    unsigned long size) { }
@@ -69,21 +81,28 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
 
 static inline void __init sme_early_init(void) { }
 
-static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
-static inline void __init sme_enable(struct boot_params *bp) { }
+static inline void sme_encrypt_kernel(struct boot_params *bp) { }
+static inline void sme_enable(struct boot_params *bp) { }
 
-static inline bool sme_active(void) { return false; }
-static inline bool sev_active(void) { return false; }
+static inline void sev_es_init_vc_handling(void) { }
 
 static inline int __init
 early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; }
 static inline int __init
 early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
+static inline void __init
+early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc) {}
+
+static inline void mem_encrypt_free_decrypted_mem(void) { }
+
+static inline u64 sme_get_me_mask(void) { return 0; }
 
 #define __bss_decrypted
 
 #endif	/* CONFIG_AMD_MEM_ENCRYPT */
 
+void add_encrypt_protection_map(void);
+
 /*
  * The __sme_pa() and __sme_pa_nodebug() macros are meant for use when
  * writing to or comparing values from the cr3 register.  Having the
@@ -95,6 +114,6 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0;
 
 extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
 
-#endif	/* __ASSEMBLY__ */
+#endif	/* __ASSEMBLER__ */
 
 #endif	/* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/include/asm/memtype.h b/arch/x86/include/asm/memtype.h
new file mode 100644
index 000000000000..113b2fa51849
--- /dev/null
+++ b/arch/x86/include/asm/memtype.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_MEMTYPE_H
+#define _ASM_X86_MEMTYPE_H
+
+#include <linux/types.h>
+#include <asm/pgtable_types.h>
+
+extern bool pat_enabled(void);
+extern void pat_bp_init(void);
+extern void pat_cpu_init(void);
+
+extern int memtype_reserve(u64 start, u64 end,
+		enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
+extern int memtype_free(u64 start, u64 end);
+
+extern int memtype_kernel_map_sync(u64 base, unsigned long size,
+		enum page_cache_mode pcm);
+
+extern int memtype_reserve_io(resource_size_t start, resource_size_t end,
+			enum page_cache_mode *pcm);
+
+extern void memtype_free_io(resource_size_t start, resource_size_t end);
+
+extern bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn);
+
+bool x86_has_pat_wp(void);
+enum page_cache_mode pgprot2cachemode(pgprot_t pgprot);
+
+#endif /* _ASM_X86_MEMTYPE_H */
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 2b7cc5397f80..8b41f26f003b 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -2,18 +2,7 @@
 #ifndef _ASM_X86_MICROCODE_H
 #define _ASM_X86_MICROCODE_H
 
-#include <asm/cpu.h>
-#include <linux/earlycpio.h>
-#include <linux/initrd.h>
-
-struct ucode_patch {
-	struct list_head plist;
-	void *data;		/* Intel uses only this one */
-	u32 patch_id;
-	u16 equiv_cpu;
-};
-
-extern struct list_head microcode_cache;
+#include <asm/msr.h>
 
 struct cpu_signature {
 	unsigned int sig;
@@ -21,125 +10,84 @@ struct cpu_signature {
 	unsigned int rev;
 };
 
-struct device;
-
-enum ucode_state {
-	UCODE_OK	= 0,
-	UCODE_NEW,
-	UCODE_UPDATED,
-	UCODE_NFOUND,
-	UCODE_ERROR,
+struct ucode_cpu_info {
+	struct cpu_signature	cpu_sig;
+	void			*mc;
 };
 
-struct microcode_ops {
-	enum ucode_state (*request_microcode_user) (int cpu,
-				const void __user *buf, size_t size);
-
-	enum ucode_state (*request_microcode_fw) (int cpu, struct device *,
-						  bool refresh_fw);
-
-	void (*microcode_fini_cpu) (int cpu);
+#ifdef CONFIG_MICROCODE
+void load_ucode_bsp(void);
+void load_ucode_ap(void);
+void microcode_bsp_resume(void);
+bool __init microcode_loader_disabled(void);
+#else
+static inline void load_ucode_bsp(void)	{ }
+static inline void load_ucode_ap(void) { }
+static inline void microcode_bsp_resume(void) { }
+static inline bool __init microcode_loader_disabled(void) { return false; }
+#endif
 
-	/*
-	 * The generic 'microcode_core' part guarantees that
-	 * the callbacks below run on a target cpu when they
-	 * are being called.
-	 * See also the "Synchronization" section in microcode_core.c.
-	 */
-	enum ucode_state (*apply_microcode) (int cpu);
-	int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
+extern unsigned long initrd_start_early;
+
+#ifdef CONFIG_CPU_SUP_INTEL
+/* Intel specific microcode defines. Public for IFS */
+struct microcode_header_intel {
+	unsigned int	hdrver;
+	unsigned int	rev;
+	unsigned int	date;
+	unsigned int	sig;
+	unsigned int	cksum;
+	unsigned int	ldrver;
+	unsigned int	pf;
+	unsigned int	datasize;
+	unsigned int	totalsize;
+	unsigned int	metasize;
+	unsigned int	min_req_ver;
+	unsigned int	reserved;
 };
 
-struct ucode_cpu_info {
-	struct cpu_signature	cpu_sig;
-	int			valid;
-	void			*mc;
+struct microcode_intel {
+	struct microcode_header_intel	hdr;
+	unsigned int			bits[];
 };
-extern struct ucode_cpu_info ucode_cpu_info[];
-struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa);
 
-#ifdef CONFIG_MICROCODE_INTEL
-extern struct microcode_ops * __init init_intel_microcode(void);
-#else
-static inline struct microcode_ops * __init init_intel_microcode(void)
-{
-	return NULL;
-}
-#endif /* CONFIG_MICROCODE_INTEL */
+#define DEFAULT_UCODE_DATASIZE		(2000)
+#define MC_HEADER_SIZE			(sizeof(struct microcode_header_intel))
+#define MC_HEADER_TYPE_MICROCODE	1
+#define MC_HEADER_TYPE_IFS		2
 
-#ifdef CONFIG_MICROCODE_AMD
-extern struct microcode_ops * __init init_amd_microcode(void);
-extern void __exit exit_amd_microcode(void);
-#else
-static inline struct microcode_ops * __init init_amd_microcode(void)
+static inline int intel_microcode_get_datasize(struct microcode_header_intel *hdr)
 {
-	return NULL;
+	return hdr->datasize ? : DEFAULT_UCODE_DATASIZE;
 }
-static inline void __exit exit_amd_microcode(void) {}
-#endif
 
-#define MAX_UCODE_COUNT 128
-
-#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
-#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
-#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
-#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
-#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
-#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
-#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
-
-#define CPUID_IS(a, b, c, ebx, ecx, edx)	\
-		(!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c))))
-
-/*
- * In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
- * x86_cpuid_vendor() gets vendor id for BSP.
- *
- * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
- * coding, we still use x86_cpuid_vendor() to get vendor id for AP.
- *
- * x86_cpuid_vendor() gets vendor information directly from CPUID.
- */
-static inline int x86_cpuid_vendor(void)
+static inline u32 intel_get_microcode_revision(void)
 {
-	u32 eax = 0x00000000;
-	u32 ebx, ecx = 0, edx;
+	u32 rev, dummy;
 
-	native_cpuid(&eax, &ebx, &ecx, &edx);
+	native_wrmsrq(MSR_IA32_UCODE_REV, 0);
 
-	if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
-		return X86_VENDOR_INTEL;
+	/* As documented in the SDM: Do a CPUID 1 here */
+	native_cpuid_eax(1);
 
-	if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
-		return X86_VENDOR_AMD;
+	/* get the current revision from MSR 0x8B */
+	native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev);
 
-	return X86_VENDOR_UNKNOWN;
+	return rev;
 }
+#endif /* !CONFIG_CPU_SUP_INTEL */
 
-static inline unsigned int x86_cpuid_family(void)
-{
-	u32 eax = 0x00000001;
-	u32 ebx, ecx = 0, edx;
-
-	native_cpuid(&eax, &ebx, &ecx, &edx);
+bool microcode_nmi_handler(void);
+void microcode_offline_nmi_handler(void);
 
-	return x86_family(eax);
+#ifdef CONFIG_MICROCODE_LATE_LOADING
+DECLARE_STATIC_KEY_FALSE(microcode_nmi_handler_enable);
+static __always_inline bool microcode_nmi_handler_enabled(void)
+{
+	return static_branch_unlikely(&microcode_nmi_handler_enable);
 }
-
-#ifdef CONFIG_MICROCODE
-int __init microcode_init(void);
-extern void __init load_ucode_bsp(void);
-extern void load_ucode_ap(void);
-void reload_early_microcode(void);
-extern bool get_builtin_firmware(struct cpio_data *cd, const char *name);
-extern bool initrd_gone;
 #else
-static inline int __init microcode_init(void)			{ return 0; };
-static inline void __init load_ucode_bsp(void)			{ }
-static inline void load_ucode_ap(void)				{ }
-static inline void reload_early_microcode(void)			{ }
-static inline bool
-get_builtin_firmware(struct cpio_data *cd, const char *name)	{ return false; }
+static __always_inline bool microcode_nmi_handler_enabled(void) { return false; }
 #endif
 
 #endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
deleted file mode 100644
index 209492849566..000000000000
--- a/arch/x86/include/asm/microcode_amd.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_MICROCODE_AMD_H
-#define _ASM_X86_MICROCODE_AMD_H
-
-#include <asm/microcode.h>
-
-#define UCODE_MAGIC			0x00414d44
-#define UCODE_EQUIV_CPU_TABLE_TYPE	0x00000000
-#define UCODE_UCODE_TYPE		0x00000001
-
-#define SECTION_HDR_SIZE		8
-#define CONTAINER_HDR_SZ		12
-
-struct equiv_cpu_entry {
-	u32	installed_cpu;
-	u32	fixed_errata_mask;
-	u32	fixed_errata_compare;
-	u16	equiv_cpu;
-	u16	res;
-} __attribute__((packed));
-
-struct microcode_header_amd {
-	u32	data_code;
-	u32	patch_id;
-	u16	mc_patch_data_id;
-	u8	mc_patch_data_len;
-	u8	init_flag;
-	u32	mc_patch_data_checksum;
-	u32	nb_dev_id;
-	u32	sb_dev_id;
-	u16	processor_rev_id;
-	u8	nb_rev_id;
-	u8	sb_rev_id;
-	u8	bios_api_rev;
-	u8	reserved1[3];
-	u32	match_reg[8];
-} __attribute__((packed));
-
-struct microcode_amd {
-	struct microcode_header_amd	hdr;
-	unsigned int			mpb[0];
-};
-
-#define PATCH_MAX_SIZE PAGE_SIZE
-
-#ifdef CONFIG_MICROCODE_AMD
-extern void __init load_ucode_amd_bsp(unsigned int family);
-extern void load_ucode_amd_ap(unsigned int family);
-extern int __init save_microcode_in_initrd_amd(unsigned int family);
-void reload_ucode_amd(void);
-#else
-static inline void __init load_ucode_amd_bsp(unsigned int family) {}
-static inline void load_ucode_amd_ap(unsigned int family) {}
-static inline int __init
-save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; }
-void reload_ucode_amd(void) {}
-#endif
-#endif /* _ASM_X86_MICROCODE_AMD_H */
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
deleted file mode 100644
index d85a07d7154f..000000000000
--- a/arch/x86/include/asm/microcode_intel.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_MICROCODE_INTEL_H
-#define _ASM_X86_MICROCODE_INTEL_H
-
-#include <asm/microcode.h>
-
-struct microcode_header_intel {
-	unsigned int            hdrver;
-	unsigned int            rev;
-	unsigned int            date;
-	unsigned int            sig;
-	unsigned int            cksum;
-	unsigned int            ldrver;
-	unsigned int            pf;
-	unsigned int            datasize;
-	unsigned int            totalsize;
-	unsigned int            reserved[3];
-};
-
-struct microcode_intel {
-	struct microcode_header_intel hdr;
-	unsigned int            bits[0];
-};
-
-/* microcode format is extended from prescott processors */
-struct extended_signature {
-	unsigned int            sig;
-	unsigned int            pf;
-	unsigned int            cksum;
-};
-
-struct extended_sigtable {
-	unsigned int            count;
-	unsigned int            cksum;
-	unsigned int            reserved[3];
-	struct extended_signature sigs[0];
-};
-
-#define DEFAULT_UCODE_DATASIZE	(2000)
-#define MC_HEADER_SIZE		(sizeof(struct microcode_header_intel))
-#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
-#define EXT_HEADER_SIZE		(sizeof(struct extended_sigtable))
-#define EXT_SIGNATURE_SIZE	(sizeof(struct extended_signature))
-
-#define get_totalsize(mc) \
-	(((struct microcode_intel *)mc)->hdr.datasize ? \
-	 ((struct microcode_intel *)mc)->hdr.totalsize : \
-	 DEFAULT_UCODE_TOTALSIZE)
-
-#define get_datasize(mc) \
-	(((struct microcode_intel *)mc)->hdr.datasize ? \
-	 ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
-
-#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
-
-static inline u32 intel_get_microcode_revision(void)
-{
-	u32 rev, dummy;
-
-	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
-
-	/* As documented in the SDM: Do a CPUID 1 here */
-	native_cpuid_eax(1);
-
-	/* get the current revision from MSR 0x8B */
-	native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev);
-
-	return rev;
-}
-
-#ifdef CONFIG_MICROCODE_INTEL
-extern void __init load_ucode_intel_bsp(void);
-extern void load_ucode_intel_ap(void);
-extern void show_ucode_info_early(void);
-extern int __init save_microcode_in_initrd_intel(void);
-void reload_ucode_intel(void);
-#else
-static inline __init void load_ucode_intel_bsp(void) {}
-static inline void load_ucode_intel_ap(void) {}
-static inline void show_ucode_info_early(void) {}
-static inline int __init save_microcode_in_initrd_intel(void) { return -EINVAL; }
-static inline void reload_ucode_intel(void) {}
-#endif
-
-#endif /* _ASM_X86_MICROCODE_INTEL_H */
diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h
new file mode 100644
index 000000000000..12b820259b9f
--- /dev/null
+++ b/arch/x86/include/asm/mman.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MMAN_H__
+#define __ASM_MMAN_H__
+
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+#define arch_calc_vm_prot_bits(prot, key) (		\
+		((key) & 0x1 ? VM_PKEY_BIT0 : 0) |      \
+		((key) & 0x2 ? VM_PKEY_BIT1 : 0) |      \
+		((key) & 0x4 ? VM_PKEY_BIT2 : 0) |      \
+		((key) & 0x8 ? VM_PKEY_BIT3 : 0))
+#endif
+
+#include <uapi/asm/mman.h>
+
+#endif /* __ASM_MMAN_H__ */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 5ff3e8af2c20..0fe9c569d171 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -6,6 +6,18 @@
 #include <linux/rwsem.h>
 #include <linux/mutex.h>
 #include <linux/atomic.h>
+#include <linux/bits.h>
+
+/* Uprobes on this MM assume 32-bit code */
+#define MM_CONTEXT_UPROBE_IA32		0
+/* vsyscall page is accessible on this MM */
+#define MM_CONTEXT_HAS_VSYSCALL		1
+/* Do not allow changing LAM mode */
+#define MM_CONTEXT_LOCK_LAM		2
+/* Allow LAM and SVA coexisting */
+#define MM_CONTEXT_FORCE_TAGGED_SVA	3
+/* Tracks mm_cpumask */
+#define MM_CONTEXT_NOTRACK		4
 
 /*
  * x86 has arch-specific MMU state beyond what lives in mm_struct.
@@ -27,14 +39,21 @@ typedef struct {
 	 */
 	atomic64_t tlb_gen;
 
+	unsigned long next_trim_cpumask;
+
 #ifdef CONFIG_MODIFY_LDT_SYSCALL
 	struct rw_semaphore	ldt_usr_sem;
 	struct ldt_struct	*ldt;
 #endif
 
-#ifdef CONFIG_X86_64
-	/* True if mm supports a task running in 32 bit compatibility mode. */
-	unsigned short ia32_compat;
+	unsigned long flags;
+
+#ifdef CONFIG_ADDRESS_MASKING
+	/* Active LAM mode:  X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */
+	unsigned long lam_cr3_mask;
+
+	/* Significant bits of the virtual address. Excludes tag bits. */
+	u64 untag_mask;
 #endif
 
 	struct mutex lock;
@@ -45,22 +64,32 @@ typedef struct {
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
 	/*
 	 * One bit per protection key says whether userspace can
-	 * use it or not.  protected by mmap_sem.
+	 * use it or not.  protected by mmap_lock.
 	 */
 	u16 pkey_allocation_map;
 	s16 execute_only_pkey;
 #endif
-#ifdef CONFIG_X86_INTEL_MPX
-	/* address of the bounds directory */
-	void __user *bd_addr;
+
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
+	/*
+	 * The global ASID will be a non-zero value when the process has
+	 * the same ASID across all CPUs, allowing it to make use of
+	 * hardware-assisted remote TLB invalidation like AMD INVLPGB.
+	 */
+	u16 global_asid;
+
+	/* The process is transitioning to a new global ASID number. */
+	bool asid_transition;
 #endif
 } mm_context_t;
 
 #define INIT_MM_CONTEXT(mm)						\
 	.context = {							\
 		.ctx_id = 1,						\
+		.lock = __MUTEX_INITIALIZER(mm.context.lock),		\
 	}
 
-void leave_mm(int cpu);
+void leave_mm(void);
+#define leave_mm leave_mm
 
 #endif /* _ASM_X86_MMU_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 0ca50611e8ce..73bf3b1b44e8 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -2,41 +2,24 @@
 #ifndef _ASM_X86_MMU_CONTEXT_H
 #define _ASM_X86_MMU_CONTEXT_H
 
-#include <asm/desc.h>
 #include <linux/atomic.h>
 #include <linux/mm_types.h>
 #include <linux/pkeys.h>
 
 #include <trace/events/tlb.h>
 
-#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/paravirt.h>
-#include <asm/mpx.h>
+#include <asm/debugreg.h>
+#include <asm/gsseg.h>
+#include <asm/desc.h>
 
 extern atomic64_t last_mm_ctx_id;
 
-#ifndef CONFIG_PARAVIRT_XXL
-static inline void paravirt_activate_mm(struct mm_struct *prev,
-					struct mm_struct *next)
-{
-}
-#endif	/* !CONFIG_PARAVIRT_XXL */
-
 #ifdef CONFIG_PERF_EVENTS
-
+DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key);
 DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key);
-
-static inline void load_mm_cr4(struct mm_struct *mm)
-{
-	if (static_branch_unlikely(&rdpmc_always_available_key) ||
-	    atomic_read(&mm->context.perf_rdpmc_allowed))
-		cr4_set_bits(X86_CR4_PCE);
-	else
-		cr4_clear_bits(X86_CR4_PCE);
-}
-#else
-static inline void load_mm_cr4(struct mm_struct *mm) {}
+void cr4_update_pce(void *ignored);
 #endif
 
 #ifdef CONFIG_MODIFY_LDT_SYSCALL
@@ -66,14 +49,6 @@ struct ldt_struct {
 	int			slot;
 };
 
-/* This is a multiple of PAGE_SIZE. */
-#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
-
-static inline void *ldt_slot_va(int slot)
-{
-	return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
-}
-
 /*
  * Used for LDT copy/destruction.
  */
@@ -96,88 +71,84 @@ static inline void destroy_context_ldt(struct mm_struct *mm) { }
 static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
 #endif
 
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+extern void load_mm_ldt(struct mm_struct *mm);
+extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next);
+#else
 static inline void load_mm_ldt(struct mm_struct *mm)
 {
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
-	struct ldt_struct *ldt;
-
-	/* READ_ONCE synchronizes with smp_store_release */
-	ldt = READ_ONCE(mm->context.ldt);
+	clear_LDT();
+}
+static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
+{
+	DEBUG_LOCKS_WARN_ON(preemptible());
+}
+#endif
 
+#ifdef CONFIG_ADDRESS_MASKING
+static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
+{
 	/*
-	 * Any change to mm->context.ldt is followed by an IPI to all
-	 * CPUs with the mm active.  The LDT will not be freed until
-	 * after the IPI is handled by all such CPUs.  This means that,
-	 * if the ldt_struct changes before we return, the values we see
-	 * will be safe, and the new values will be loaded before we run
-	 * any user code.
-	 *
-	 * NB: don't try to convert this to use RCU without extreme care.
-	 * We would still need IRQs off, because we don't want to change
-	 * the local LDT after an IPI loaded a newer value than the one
-	 * that we can see.
+	 * When switch_mm_irqs_off() is called for a kthread, it may race with
+	 * LAM enablement. switch_mm_irqs_off() uses the LAM mask to do two
+	 * things: populate CR3 and populate 'cpu_tlbstate.lam'. Make sure it
+	 * reads a single value for both.
 	 */
+	return READ_ONCE(mm->context.lam_cr3_mask);
+}
 
-	if (unlikely(ldt)) {
-		if (static_cpu_has(X86_FEATURE_PTI)) {
-			if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
-				/*
-				 * Whoops -- either the new LDT isn't mapped
-				 * (if slot == -1) or is mapped into a bogus
-				 * slot (if slot > 1).
-				 */
-				clear_LDT();
-				return;
-			}
-
-			/*
-			 * If page table isolation is enabled, ldt->entries
-			 * will not be mapped in the userspace pagetables.
-			 * Tell the CPU to access the LDT through the alias
-			 * at ldt_slot_va(ldt->slot).
-			 */
-			set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
-		} else {
-			set_ldt(ldt->entries, ldt->nr_entries);
-		}
-	} else {
-		clear_LDT();
-	}
+static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
+{
+	mm->context.lam_cr3_mask = oldmm->context.lam_cr3_mask;
+	mm->context.untag_mask = oldmm->context.untag_mask;
+}
+
+#define mm_untag_mask mm_untag_mask
+static inline unsigned long mm_untag_mask(struct mm_struct *mm)
+{
+	return mm->context.untag_mask;
+}
+
+static inline void mm_reset_untag_mask(struct mm_struct *mm)
+{
+	mm->context.untag_mask = -1UL;
+}
+
+#define arch_pgtable_dma_compat arch_pgtable_dma_compat
+static inline bool arch_pgtable_dma_compat(struct mm_struct *mm)
+{
+	return !mm_lam_cr3_mask(mm) ||
+		test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags);
+}
 #else
-	clear_LDT();
-#endif
+
+static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
+{
+	return 0;
 }
 
-static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
+static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
 {
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
-	/*
-	 * Load the LDT if either the old or new mm had an LDT.
-	 *
-	 * An mm will never go from having an LDT to not having an LDT.  Two
-	 * mms never share an LDT, so we don't gain anything by checking to
-	 * see whether the LDT changed.  There's also no guarantee that
-	 * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
-	 * then prev->context.ldt will also be non-NULL.
-	 *
-	 * If we really cared, we could optimize the case where prev == next
-	 * and we're exiting lazy mode.  Most of the time, if this happens,
-	 * we don't actually need to reload LDTR, but modify_ldt() is mostly
-	 * used by legacy code and emulators where we don't need this level of
-	 * performance.
-	 *
-	 * This uses | instead of || because it generates better code.
-	 */
-	if (unlikely((unsigned long)prev->context.ldt |
-		     (unsigned long)next->context.ldt))
-		load_mm_ldt(next);
-#endif
+}
 
-	DEBUG_LOCKS_WARN_ON(preemptible());
+static inline void mm_reset_untag_mask(struct mm_struct *mm)
+{
 }
+#endif
+
+#define enter_lazy_tlb enter_lazy_tlb
+extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
 
-void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
+#define mm_init_global_asid mm_init_global_asid
+extern void mm_init_global_asid(struct mm_struct *mm);
 
+extern void mm_free_global_asid(struct mm_struct *mm);
+
+/*
+ * Init a new mm.  Used on mm copies, like at fork()
+ * and on mm's that are brand-new, like at execve().
+ */
+#define init_new_context init_new_context
 static inline int init_new_context(struct task_struct *tsk,
 				   struct mm_struct *mm)
 {
@@ -185,6 +156,7 @@ static inline int init_new_context(struct task_struct *tsk,
 
 	mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
 	atomic64_set(&mm->context.tlb_gen, 0);
+	mm->context.next_trim_cpumask = jiffies + HZ;
 
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
 	if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
@@ -194,12 +166,18 @@ static inline int init_new_context(struct task_struct *tsk,
 		mm->context.execute_only_pkey = -1;
 	}
 #endif
+
+	mm_init_global_asid(mm);
+	mm_reset_untag_mask(mm);
 	init_new_context_ldt(mm);
 	return 0;
 }
+
+#define destroy_context destroy_context
 static inline void destroy_context(struct mm_struct *mm)
 {
 	destroy_context_ldt(mm);
+	mm_free_global_asid(mm);
 }
 
 extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
@@ -211,26 +189,42 @@ extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
 #define activate_mm(prev, next)			\
 do {						\
-	paravirt_activate_mm((prev), (next));	\
-	switch_mm((prev), (next), NULL);	\
+	paravirt_enter_mmap(next);		\
+	switch_mm_irqs_off((prev), (next), NULL);	\
 } while (0);
 
 #ifdef CONFIG_X86_32
 #define deactivate_mm(tsk, mm)			\
 do {						\
-	lazy_load_gs(0);			\
+	loadsegment(gs, 0);			\
 } while (0)
 #else
 #define deactivate_mm(tsk, mm)			\
 do {						\
+	shstk_free(tsk);			\
 	load_gs_index(0);			\
 	loadsegment(fs, 0);			\
 } while (0)
 #endif
 
+static inline void arch_dup_pkeys(struct mm_struct *oldmm,
+				  struct mm_struct *mm)
+{
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+	if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
+		return;
+
+	/* Duplicate the oldmm pkey state in mm: */
+	mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map;
+	mm->context.execute_only_pkey   = oldmm->context.execute_only_pkey;
+#endif
+}
+
 static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
 {
-	paravirt_arch_dup_mmap(oldmm, mm);
+	arch_dup_pkeys(oldmm, mm);
+	paravirt_enter_mmap(mm);
+	dup_lam(oldmm, mm);
 	return ldt_dup_context(oldmm, mm);
 }
 
@@ -244,7 +238,7 @@ static inline void arch_exit_mmap(struct mm_struct *mm)
 static inline bool is_64bit_mm(struct mm_struct *mm)
 {
 	return	!IS_ENABLED(CONFIG_IA32_EMULATION) ||
-		!(mm->context.ia32_compat == TIF_IA32);
+		!test_bit(MM_CONTEXT_UPROBE_IA32, &mm->context.flags);
 }
 #else
 static inline bool is_64bit_mm(struct mm_struct *mm)
@@ -253,34 +247,14 @@ static inline bool is_64bit_mm(struct mm_struct *mm)
 }
 #endif
 
-static inline void arch_bprm_mm_init(struct mm_struct *mm,
-		struct vm_area_struct *vma)
+static inline bool is_notrack_mm(struct mm_struct *mm)
 {
-	mpx_mm_init(mm);
+	return test_bit(MM_CONTEXT_NOTRACK, &mm->context.flags);
 }
 
-static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
-			      unsigned long start, unsigned long end)
+static inline void set_notrack_mm(struct mm_struct *mm)
 {
-	/*
-	 * mpx_notify_unmap() goes and reads a rarely-hot
-	 * cacheline in the mm_struct.  That can be expensive
-	 * enough to be seen in profiles.
-	 *
-	 * The mpx_notify_unmap() call and its contents have been
-	 * observed to affect munmap() performance on hardware
-	 * where MPX is not present.
-	 *
-	 * The unlikely() optimizes for the fast case: no MPX
-	 * in the CPU, or no MPX use in the process.  Even if
-	 * we get this wrong (in the unlikely event that MPX
-	 * is widely enabled on some system) the overhead of
-	 * MPX itself (reading bounds tables) is expected to
-	 * overwhelm the overhead of getting this unlikely()
-	 * consistently wrong.
-	 */
-	if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX)))
-		mpx_notify_unmap(mm, vma, start, end);
+	set_bit(MM_CONTEXT_NOTRACK, &mm->context.flags);
 }
 
 /*
@@ -292,21 +266,6 @@ static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
  * So do not enforce things if the VMA is not from the current
  * mm, or if we are in a kernel thread.
  */
-static inline bool vma_is_foreign(struct vm_area_struct *vma)
-{
-	if (!current->mm)
-		return true;
-	/*
-	 * Should PKRU be enforced on the access to this VMA?  If
-	 * the VMA is from another process, then PKRU has no
-	 * relevance and should not be enforced.
-	 */
-	if (current->mm != vma->vm_mm)
-		return true;
-
-	return false;
-}
-
 static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
 		bool write, bool execute, bool foreign)
 {
@@ -319,23 +278,11 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
 	return __pkru_allows_pkey(vma_pkey(vma), write);
 }
 
-/*
- * This can be used from process context to figure out what the value of
- * CR3 is without needing to do a (slow) __read_cr3().
- *
- * It's intended to be used for code like KVM that sneakily changes CR3
- * and needs to restore it.  It needs to be used very carefully.
- */
-static inline unsigned long __get_current_cr3_fast(void)
-{
-	unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
-		this_cpu_read(cpu_tlbstate.loaded_mm_asid));
+unsigned long __get_current_cr3_fast(void);
 
-	/* For now, be very restrictive about when this can be called. */
-	VM_WARN_ON(in_nmi() || preemptible());
+#include <asm-generic/mmu_context.h>
 
-	VM_BUG_ON(cr3 != __read_cr3());
-	return cr3;
-}
+extern struct mm_struct *use_temporary_mm(struct mm_struct *temp_mm);
+extern void unuse_temporary_mm(struct mm_struct *prev_mm);
 
 #endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/mmx.h b/arch/x86/include/asm/mmx.h
deleted file mode 100644
index f572d0f944bb..000000000000
--- a/arch/x86/include/asm/mmx.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_MMX_H
-#define _ASM_X86_MMX_H
-
-/*
- *	MMX 3Dnow! helper operations
- */
-
-#include <linux/types.h>
-
-extern void *_mmx_memcpy(void *to, const void *from, size_t size);
-extern void mmx_clear_page(void *page);
-extern void mmx_copy_page(void *to, void *from);
-
-#endif /* _ASM_X86_MMX_H */
diff --git a/arch/x86/include/asm/mmzone.h b/arch/x86/include/asm/mmzone.h
deleted file mode 100644
index c41b41edd691..000000000000
--- a/arch/x86/include/asm/mmzone.h
+++ /dev/null
@@ -1,6 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_X86_32
-# include <asm/mmzone_32.h>
-#else
-# include <asm/mmzone_64.h>
-#endif
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
deleted file mode 100644
index 73d8dd14dda2..000000000000
--- a/arch/x86/include/asm/mmzone_32.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Written by Pat Gaughen (gone@us.ibm.com) Mar 2002
- *
- */
-
-#ifndef _ASM_X86_MMZONE_32_H
-#define _ASM_X86_MMZONE_32_H
-
-#include <asm/smp.h>
-
-#ifdef CONFIG_NUMA
-extern struct pglist_data *node_data[];
-#define NODE_DATA(nid)	(node_data[nid])
-#endif /* CONFIG_NUMA */
-
-#ifdef CONFIG_DISCONTIGMEM
-
-/*
- * generic node memory support, the following assumptions apply:
- *
- * 1) memory comes in 64Mb contiguous chunks which are either present or not
- * 2) we will not have more than 64Gb in total
- *
- * for now assume that 64Gb is max amount of RAM for whole system
- *    64Gb / 4096bytes/page = 16777216 pages
- */
-#define MAX_NR_PAGES 16777216
-#define MAX_SECTIONS 1024
-#define PAGES_PER_SECTION (MAX_NR_PAGES/MAX_SECTIONS)
-
-extern s8 physnode_map[];
-
-static inline int pfn_to_nid(unsigned long pfn)
-{
-#ifdef CONFIG_NUMA
-	return((int) physnode_map[(pfn) / PAGES_PER_SECTION]);
-#else
-	return 0;
-#endif
-}
-
-static inline int pfn_valid(int pfn)
-{
-	int nid = pfn_to_nid(pfn);
-
-	if (nid >= 0)
-		return (pfn < node_end_pfn(nid));
-	return 0;
-}
-
-#define early_pfn_valid(pfn)	pfn_valid((pfn))
-
-#endif /* CONFIG_DISCONTIGMEM */
-
-#endif /* _ASM_X86_MMZONE_32_H */
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
deleted file mode 100644
index 0c585046f744..000000000000
--- a/arch/x86/include/asm/mmzone_64.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* K8 NUMA support */
-/* Copyright 2002,2003 by Andi Kleen, SuSE Labs */
-/* 2.5 Version loosely based on the NUMAQ Code by Pat Gaughen. */
-#ifndef _ASM_X86_MMZONE_64_H
-#define _ASM_X86_MMZONE_64_H
-
-#ifdef CONFIG_NUMA
-
-#include <linux/mmdebug.h>
-#include <asm/smp.h>
-
-extern struct pglist_data *node_data[];
-
-#define NODE_DATA(nid)		(node_data[nid])
-
-#endif
-#endif /* _ASM_X86_MMZONE_64_H */
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 7948a17febb4..3c2de4ce3b10 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -5,70 +5,20 @@
 #include <asm-generic/module.h>
 #include <asm/orc_types.h>
 
+struct its_array {
+#ifdef CONFIG_MITIGATION_ITS
+	void **pages;
+	int num;
+#endif
+};
+
 struct mod_arch_specific {
 #ifdef CONFIG_UNWINDER_ORC
 	unsigned int num_orcs;
 	int *orc_unwind_ip;
 	struct orc_entry *orc_unwind;
 #endif
+	struct its_array its_pages;
 };
 
-#ifdef CONFIG_X86_64
-/* X86_64 does not define MODULE_PROC_FAMILY */
-#elif defined CONFIG_M486
-#define MODULE_PROC_FAMILY "486 "
-#elif defined CONFIG_M586
-#define MODULE_PROC_FAMILY "586 "
-#elif defined CONFIG_M586TSC
-#define MODULE_PROC_FAMILY "586TSC "
-#elif defined CONFIG_M586MMX
-#define MODULE_PROC_FAMILY "586MMX "
-#elif defined CONFIG_MCORE2
-#define MODULE_PROC_FAMILY "CORE2 "
-#elif defined CONFIG_MATOM
-#define MODULE_PROC_FAMILY "ATOM "
-#elif defined CONFIG_M686
-#define MODULE_PROC_FAMILY "686 "
-#elif defined CONFIG_MPENTIUMII
-#define MODULE_PROC_FAMILY "PENTIUMII "
-#elif defined CONFIG_MPENTIUMIII
-#define MODULE_PROC_FAMILY "PENTIUMIII "
-#elif defined CONFIG_MPENTIUMM
-#define MODULE_PROC_FAMILY "PENTIUMM "
-#elif defined CONFIG_MPENTIUM4
-#define MODULE_PROC_FAMILY "PENTIUM4 "
-#elif defined CONFIG_MK6
-#define MODULE_PROC_FAMILY "K6 "
-#elif defined CONFIG_MK7
-#define MODULE_PROC_FAMILY "K7 "
-#elif defined CONFIG_MK8
-#define MODULE_PROC_FAMILY "K8 "
-#elif defined CONFIG_MELAN
-#define MODULE_PROC_FAMILY "ELAN "
-#elif defined CONFIG_MCRUSOE
-#define MODULE_PROC_FAMILY "CRUSOE "
-#elif defined CONFIG_MEFFICEON
-#define MODULE_PROC_FAMILY "EFFICEON "
-#elif defined CONFIG_MWINCHIPC6
-#define MODULE_PROC_FAMILY "WINCHIPC6 "
-#elif defined CONFIG_MWINCHIP3D
-#define MODULE_PROC_FAMILY "WINCHIP3D "
-#elif defined CONFIG_MCYRIXIII
-#define MODULE_PROC_FAMILY "CYRIXIII "
-#elif defined CONFIG_MVIAC3_2
-#define MODULE_PROC_FAMILY "VIAC3-2 "
-#elif defined CONFIG_MVIAC7
-#define MODULE_PROC_FAMILY "VIAC7 "
-#elif defined CONFIG_MGEODEGX1
-#define MODULE_PROC_FAMILY "GEODEGX1 "
-#elif defined CONFIG_MGEODE_LX
-#define MODULE_PROC_FAMILY "GEODE "
-#else
-#error unknown processor family
-#endif
-
-#ifdef CONFIG_X86_32
-# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY
-#endif
-
 #endif /* _ASM_X86_MODULE_H */
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 606cbaebd336..d593e52e6635 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -2,6 +2,7 @@
 #ifndef _ASM_X86_MPSPEC_H
 #define _ASM_X86_MPSPEC_H
 
+#include <linux/types.h>
 
 #include <asm/mpspec_def.h>
 #include <asm/x86_init.h>
@@ -15,16 +16,14 @@ extern int pic_mode;
  * Summit or generic (i.e. installer) kernels need lots of bus entries.
  * Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets.
  */
-#if CONFIG_BASE_SMALL == 0
-# define MAX_MP_BUSSES		260
-#else
+#ifdef CONFIG_BASE_SMALL
 # define MAX_MP_BUSSES		32
+#else
+# define MAX_MP_BUSSES		260
 #endif
 
 #define MAX_IRQ_SOURCES		256
 
-extern unsigned int def_to_bigsmp;
-
 #else /* CONFIG_X86_64: */
 
 #define MAX_MP_BUSSES		256
@@ -39,9 +38,8 @@ extern int mp_bus_id_to_type[MAX_MP_BUSSES];
 
 extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 
-extern unsigned int boot_cpu_physical_apicid;
+extern u32 boot_cpu_physical_apicid;
 extern u8 boot_cpu_apic_version;
-extern unsigned long mp_lapic_addr;
 
 #ifdef CONFIG_X86_LOCAL_APIC
 extern int smp_found_config;
@@ -49,106 +47,31 @@ extern int smp_found_config;
 # define smp_found_config 0
 #endif
 
-static inline void get_smp_config(void)
-{
-	x86_init.mpparse.get_smp_config(0);
-}
-
-static inline void early_get_smp_config(void)
-{
-	x86_init.mpparse.get_smp_config(1);
-}
-
-static inline void find_smp_config(void)
-{
-	x86_init.mpparse.find_smp_config();
-}
-
 #ifdef CONFIG_X86_MPPARSE
 extern void e820__memblock_alloc_reserved_mpc_new(void);
 extern int enable_update_mptable;
-extern int default_mpc_apic_id(struct mpc_cpu *m);
-extern void default_smp_read_mpc_oem(struct mpc_table *mpc);
-# ifdef CONFIG_X86_IO_APIC
-extern void default_mpc_oem_bus_info(struct mpc_bus *m, char *str);
-# else
-#  define default_mpc_oem_bus_info NULL
-# endif
-extern void default_find_smp_config(void);
-extern void default_get_smp_config(unsigned int early);
+extern void mpparse_find_mptable(void);
+extern void mpparse_parse_early_smp_config(void);
+extern void mpparse_parse_smp_config(void);
 #else
 static inline void e820__memblock_alloc_reserved_mpc_new(void) { }
-#define enable_update_mptable 0
-#define default_mpc_apic_id NULL
-#define default_smp_read_mpc_oem NULL
-#define default_mpc_oem_bus_info NULL
-#define default_find_smp_config x86_init_noop
-#define default_get_smp_config x86_init_uint_noop
+#define enable_update_mptable		0
+#define mpparse_find_mptable		x86_init_noop
+#define mpparse_parse_early_smp_config	x86_init_noop
+#define mpparse_parse_smp_config	x86_init_noop
 #endif
 
-int generic_processor_info(int apicid, int version);
-
-#define PHYSID_ARRAY_SIZE	BITS_TO_LONGS(MAX_LOCAL_APIC)
-
-struct physid_mask {
-	unsigned long mask[PHYSID_ARRAY_SIZE];
-};
+extern DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC);
 
-typedef struct physid_mask physid_mask_t;
-
-#define physid_set(physid, map)			set_bit(physid, (map).mask)
-#define physid_clear(physid, map)		clear_bit(physid, (map).mask)
-#define physid_isset(physid, map)		test_bit(physid, (map).mask)
-#define physid_test_and_set(physid, map)			\
-	test_and_set_bit(physid, (map).mask)
-
-#define physids_and(dst, src1, src2)					\
-	bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC)
-
-#define physids_or(dst, src1, src2)					\
-	bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC)
-
-#define physids_clear(map)					\
-	bitmap_zero((map).mask, MAX_LOCAL_APIC)
-
-#define physids_complement(dst, src)				\
-	bitmap_complement((dst).mask, (src).mask, MAX_LOCAL_APIC)
-
-#define physids_empty(map)					\
-	bitmap_empty((map).mask, MAX_LOCAL_APIC)
-
-#define physids_equal(map1, map2)				\
-	bitmap_equal((map1).mask, (map2).mask, MAX_LOCAL_APIC)
-
-#define physids_weight(map)					\
-	bitmap_weight((map).mask, MAX_LOCAL_APIC)
-
-#define physids_shift_right(d, s, n)				\
-	bitmap_shift_right((d).mask, (s).mask, n, MAX_LOCAL_APIC)
-
-#define physids_shift_left(d, s, n)				\
-	bitmap_shift_left((d).mask, (s).mask, n, MAX_LOCAL_APIC)
-
-static inline unsigned long physids_coerce(physid_mask_t *map)
-{
-	return map->mask[0];
-}
-
-static inline void physids_promote(unsigned long physids, physid_mask_t *map)
+static inline void reset_phys_cpu_present_map(u32 apicid)
 {
-	physids_clear(*map);
-	map->mask[0] = physids;
+	bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC);
+	set_bit(apicid, phys_cpu_present_map);
 }
 
-static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map)
+static inline void copy_phys_cpu_present_map(unsigned long *dst)
 {
-	physids_clear(*map);
-	physid_set(physid, *map);
+	bitmap_copy(dst, phys_cpu_present_map, MAX_LOCAL_APIC);
 }
 
-#define PHYSID_MASK_ALL		{ {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} }
-#define PHYSID_MASK_NONE	{ {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} }
-
-extern physid_mask_t phys_cpu_present_map;
-
 #endif /* _ASM_X86_MPSPEC_H */
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
deleted file mode 100644
index d0b1434fb0b6..000000000000
--- a/arch/x86/include/asm/mpx.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_MPX_H
-#define _ASM_X86_MPX_H
-
-#include <linux/types.h>
-#include <linux/mm_types.h>
-
-#include <asm/ptrace.h>
-#include <asm/insn.h>
-
-/*
- * NULL is theoretically a valid place to put the bounds
- * directory, so point this at an invalid address.
- */
-#define MPX_INVALID_BOUNDS_DIR	((void __user *)-1)
-#define MPX_BNDCFG_ENABLE_FLAG	0x1
-#define MPX_BD_ENTRY_VALID_FLAG	0x1
-
-/*
- * The upper 28 bits [47:20] of the virtual address in 64-bit
- * are used to index into bounds directory (BD).
- *
- * The directory is 2G (2^31) in size, and with 8-byte entries
- * it has 2^28 entries.
- */
-#define MPX_BD_SIZE_BYTES_64	(1UL<<31)
-#define MPX_BD_ENTRY_BYTES_64	8
-#define MPX_BD_NR_ENTRIES_64	(MPX_BD_SIZE_BYTES_64/MPX_BD_ENTRY_BYTES_64)
-
-/*
- * The 32-bit directory is 4MB (2^22) in size, and with 4-byte
- * entries it has 2^20 entries.
- */
-#define MPX_BD_SIZE_BYTES_32	(1UL<<22)
-#define MPX_BD_ENTRY_BYTES_32	4
-#define MPX_BD_NR_ENTRIES_32	(MPX_BD_SIZE_BYTES_32/MPX_BD_ENTRY_BYTES_32)
-
-/*
- * A 64-bit table is 4MB total in size, and an entry is
- * 4 64-bit pointers in size.
- */
-#define MPX_BT_SIZE_BYTES_64	(1UL<<22)
-#define MPX_BT_ENTRY_BYTES_64	32
-#define MPX_BT_NR_ENTRIES_64	(MPX_BT_SIZE_BYTES_64/MPX_BT_ENTRY_BYTES_64)
-
-/*
- * A 32-bit table is 16kB total in size, and an entry is
- * 4 32-bit pointers in size.
- */
-#define MPX_BT_SIZE_BYTES_32	(1UL<<14)
-#define MPX_BT_ENTRY_BYTES_32	16
-#define MPX_BT_NR_ENTRIES_32	(MPX_BT_SIZE_BYTES_32/MPX_BT_ENTRY_BYTES_32)
-
-#define MPX_BNDSTA_TAIL		2
-#define MPX_BNDCFG_TAIL		12
-#define MPX_BNDSTA_ADDR_MASK	(~((1UL<<MPX_BNDSTA_TAIL)-1))
-#define MPX_BNDCFG_ADDR_MASK	(~((1UL<<MPX_BNDCFG_TAIL)-1))
-#define MPX_BNDSTA_ERROR_CODE	0x3
-
-struct mpx_fault_info {
-	void __user *addr;
-	void __user *lower;
-	void __user *upper;
-};
-
-#ifdef CONFIG_X86_INTEL_MPX
-int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs);
-int mpx_handle_bd_fault(void);
-static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
-{
-	return (mm->context.bd_addr != MPX_INVALID_BOUNDS_DIR);
-}
-static inline void mpx_mm_init(struct mm_struct *mm)
-{
-	/*
-	 * NULL is theoretically a valid place to put the bounds
-	 * directory, so point this at an invalid address.
-	 */
-	mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR;
-}
-void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
-		      unsigned long start, unsigned long end);
-
-unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len,
-		unsigned long flags);
-#else
-static inline int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs)
-{
-	return -EINVAL;
-}
-static inline int mpx_handle_bd_fault(void)
-{
-	return -EINVAL;
-}
-static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
-{
-	return 0;
-}
-static inline void mpx_mm_init(struct mm_struct *mm)
-{
-}
-static inline void mpx_notify_unmap(struct mm_struct *mm,
-				    struct vm_area_struct *vma,
-				    unsigned long start, unsigned long end)
-{
-}
-
-static inline unsigned long mpx_unmapped_area_check(unsigned long addr,
-		unsigned long len, unsigned long flags)
-{
-	return addr;
-}
-#endif /* CONFIG_X86_INTEL_MPX */
-
-#endif /* _ASM_X86_MPX_H */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index cc60e617931c..eef4c3a5ba28 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -3,162 +3,84 @@
 #define _ASM_X86_MSHYPER_H
 
 #include <linux/types.h>
-#include <linux/atomic.h>
 #include <linux/nmi.h>
-#include <asm/io.h>
-#include <asm/hyperv-tlfs.h>
+#include <linux/msi.h>
+#include <linux/io.h>
+#include <linux/static_call.h>
 #include <asm/nospec-branch.h>
+#include <asm/paravirt.h>
+#include <asm/msr.h>
+#include <hyperv/hvhdk.h>
+#include <asm/fpu/types.h>
 
-#define VP_INVAL	U32_MAX
+/*
+ * Hyper-V always provides a single IO-APIC at this MMIO address.
+ * Ideally, the value should be looked up in ACPI tables, but it
+ * is needed for mapping the IO-APIC early in boot on Confidential
+ * VMs, before ACPI functions can be used.
+ */
+#define HV_IOAPIC_BASE_ADDRESS 0xfec00000
 
-struct ms_hyperv_info {
-	u32 features;
-	u32 misc_features;
-	u32 hints;
-	u32 nested_features;
-	u32 max_vp_index;
-	u32 max_lp_index;
-};
+#define HV_VTL_NORMAL 0x0
+#define HV_VTL_SECURE 0x1
+#define HV_VTL_MGMT   0x2
 
-extern struct ms_hyperv_info ms_hyperv;
+union hv_ghcb;
 
+DECLARE_STATIC_KEY_FALSE(isolation_type_snp);
+DECLARE_STATIC_KEY_FALSE(isolation_type_tdx);
 
 typedef int (*hyperv_fill_flush_list_func)(
 		struct hv_guest_mapping_flush_list *flush,
 		void *data);
 
-/*
- * Generate the guest ID.
- */
-
-static inline  __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version,
-				       __u64 d_info2)
-{
-	__u64 guest_id = 0;
-
-	guest_id = (((__u64)HV_LINUX_VENDOR_ID) << 48);
-	guest_id |= (d_info1 << 48);
-	guest_id |= (kernel_version << 16);
-	guest_id |= d_info2;
-
-	return guest_id;
-}
-
+void hyperv_vector_handler(struct pt_regs *regs);
 
-/* Free the message slot and signal end-of-message if required */
-static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
+static inline unsigned char hv_get_nmi_reason(void)
 {
-	/*
-	 * On crash we're reading some other CPU's message page and we need
-	 * to be careful: this other CPU may already had cleared the header
-	 * and the host may already had delivered some other message there.
-	 * In case we blindly write msg->header.message_type we're going
-	 * to lose it. We can still lose a message of the same type but
-	 * we count on the fact that there can only be one
-	 * CHANNELMSG_UNLOAD_RESPONSE and we don't care about other messages
-	 * on crash.
-	 */
-	if (cmpxchg(&msg->header.message_type, old_msg_type,
-		    HVMSG_NONE) != old_msg_type)
-		return;
-
-	/*
-	 * Make sure the write to MessageType (ie set to
-	 * HVMSG_NONE) happens before we read the
-	 * MessagePending and EOMing. Otherwise, the EOMing
-	 * will not deliver any more messages since there is
-	 * no empty slot
-	 */
-	mb();
-
-	if (msg->header.message_flags.msg_pending) {
-		/*
-		 * This will cause message queue rescan to
-		 * possibly deliver another msg from the
-		 * hypervisor
-		 */
-		wrmsrl(HV_X64_MSR_EOM, 0);
-	}
+	return 0;
 }
 
-#define hv_init_timer(timer, tick) \
-	wrmsrl(HV_X64_MSR_STIMER0_COUNT + (2*timer), tick)
-#define hv_init_timer_config(timer, val) \
-	wrmsrl(HV_X64_MSR_STIMER0_CONFIG + (2*timer), val)
-
-#define hv_get_simp(val) rdmsrl(HV_X64_MSR_SIMP, val)
-#define hv_set_simp(val) wrmsrl(HV_X64_MSR_SIMP, val)
+extern u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
+extern u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2);
+extern u64 hv_std_hypercall(u64 control, u64 param1, u64 param2);
 
-#define hv_get_siefp(val) rdmsrl(HV_X64_MSR_SIEFP, val)
-#define hv_set_siefp(val) wrmsrl(HV_X64_MSR_SIEFP, val)
-
-#define hv_get_synic_state(val) rdmsrl(HV_X64_MSR_SCONTROL, val)
-#define hv_set_synic_state(val) wrmsrl(HV_X64_MSR_SCONTROL, val)
-
-#define hv_get_vp_index(index) rdmsrl(HV_X64_MSR_VP_INDEX, index)
+#if IS_ENABLED(CONFIG_HYPERV)
+extern void *hv_hypercall_pg;
 
-#define hv_get_synint_state(int_num, val) \
-	rdmsrl(HV_X64_MSR_SINT0 + int_num, val)
-#define hv_set_synint_state(int_num, val) \
-	wrmsrl(HV_X64_MSR_SINT0 + int_num, val)
+extern union hv_ghcb * __percpu *hv_ghcb_pg;
 
-#define hv_get_crash_ctl(val) \
-	rdmsrl(HV_X64_MSR_CRASH_CTL, val)
+bool hv_isolation_type_snp(void);
+bool hv_isolation_type_tdx(void);
 
-void hyperv_callback_vector(void);
-void hyperv_reenlightenment_vector(void);
-#ifdef CONFIG_TRACING
-#define trace_hyperv_callback_vector hyperv_callback_vector
+#ifdef CONFIG_X86_64
+DECLARE_STATIC_CALL(hv_hypercall, hv_std_hypercall);
 #endif
-void hyperv_vector_handler(struct pt_regs *regs);
-void hv_setup_vmbus_irq(void (*handler)(void));
-void hv_remove_vmbus_irq(void);
-
-void hv_setup_kexec_handler(void (*handler)(void));
-void hv_remove_kexec_handler(void);
-void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
-void hv_remove_crash_handler(void);
 
 /*
- * Routines for stimer0 Direct Mode handling.
- * On x86/x64, there are no percpu actions to take.
+ * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA
+ * to start AP in enlightened SEV guest.
  */
-void hv_stimer0_vector_handler(struct pt_regs *regs);
-void hv_stimer0_callback_vector(void);
-int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void));
-void hv_remove_stimer0_irq(int irq);
-
-static inline void hv_enable_stimer0_percpu_irq(int irq) {}
-static inline void hv_disable_stimer0_percpu_irq(int irq) {}
-
-
-#if IS_ENABLED(CONFIG_HYPERV)
-extern struct clocksource *hyperv_cs;
-extern void *hv_hypercall_pg;
-extern void  __percpu  **hyperv_pcpu_input_arg;
+#define HV_AP_INIT_GPAT_DEFAULT		0x0007040600070406ULL
+#define HV_AP_SEGMENT_LIMIT		0xffffffff
 
+/*
+ * If the hypercall involves no input or output parameters, the hypervisor
+ * ignores the corresponding GPA pointer.
+ */
 static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 {
 	u64 input_address = input ? virt_to_phys(input) : 0;
 	u64 output_address = output ? virt_to_phys(output) : 0;
-	u64 hv_status;
 
 #ifdef CONFIG_X86_64
-	if (!hv_hypercall_pg)
-		return U64_MAX;
-
-	__asm__ __volatile__("mov %4, %%r8\n"
-			     CALL_NOSPEC
-			     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
-			       "+c" (control), "+d" (input_address)
-			     :  "r" (output_address),
-				THUNK_TARGET(hv_hypercall_pg)
-			     : "cc", "memory", "r8", "r9", "r10", "r11");
+	return static_call_mod(hv_hypercall)(control, input_address, output_address);
 #else
 	u32 input_address_hi = upper_32_bits(input_address);
 	u32 input_address_lo = lower_32_bits(input_address);
 	u32 output_address_hi = upper_32_bits(output_address);
 	u32 output_address_lo = lower_32_bits(output_address);
+	u64 hv_status;
 
 	if (!hv_hypercall_pg)
 		return U64_MAX;
@@ -171,115 +93,69 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 			       "D"(output_address_hi), "S"(output_address_lo),
 			       THUNK_TARGET(hv_hypercall_pg)
 			     : "cc", "memory");
-#endif /* !x86_64 */
 	return hv_status;
+#endif /* !x86_64 */
 }
 
 /* Fast hypercall with 8 bytes of input and no output */
-static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
+static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
 {
-	u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
-
 #ifdef CONFIG_X86_64
-	{
-		__asm__ __volatile__(CALL_NOSPEC
-				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
-				       "+c" (control), "+d" (input1)
-				     : THUNK_TARGET(hv_hypercall_pg)
-				     : "cc", "r8", "r9", "r10", "r11");
-	}
+	return static_call_mod(hv_hypercall)(control, input1, 0);
 #else
-	{
-		u32 input1_hi = upper_32_bits(input1);
-		u32 input1_lo = lower_32_bits(input1);
-
-		__asm__ __volatile__ (CALL_NOSPEC
-				      : "=A"(hv_status),
-					"+c"(input1_lo),
-					ASM_CALL_CONSTRAINT
-				      :	"A" (control),
-					"b" (input1_hi),
-					THUNK_TARGET(hv_hypercall_pg)
-				      : "cc", "edi", "esi");
-	}
+	u32 input1_hi = upper_32_bits(input1);
+	u32 input1_lo = lower_32_bits(input1);
+	u64 hv_status;
+
+	__asm__ __volatile__ (CALL_NOSPEC
+			      : "=A"(hv_status),
+			      "+c"(input1_lo),
+			      ASM_CALL_CONSTRAINT
+			      :	"A" (control),
+			      "b" (input1_hi),
+			      THUNK_TARGET(hv_hypercall_pg)
+			      : "cc", "edi", "esi");
+	return hv_status;
 #endif
-		return hv_status;
 }
 
-/* Fast hypercall with 16 bytes of input */
-static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
+static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 {
-	u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
+	u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
 
+	return _hv_do_fast_hypercall8(control, input1);
+}
+
+/* Fast hypercall with 16 bytes of input */
+static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
+{
 #ifdef CONFIG_X86_64
-	{
-		__asm__ __volatile__("mov %4, %%r8\n"
-				     CALL_NOSPEC
-				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
-				       "+c" (control), "+d" (input1)
-				     : "r" (input2),
-				       THUNK_TARGET(hv_hypercall_pg)
-				     : "cc", "r8", "r9", "r10", "r11");
-	}
+	return static_call_mod(hv_hypercall)(control, input1, input2);
 #else
-	{
-		u32 input1_hi = upper_32_bits(input1);
-		u32 input1_lo = lower_32_bits(input1);
-		u32 input2_hi = upper_32_bits(input2);
-		u32 input2_lo = lower_32_bits(input2);
-
-		__asm__ __volatile__ (CALL_NOSPEC
-				      : "=A"(hv_status),
-					"+c"(input1_lo), ASM_CALL_CONSTRAINT
-				      :	"A" (control), "b" (input1_hi),
-					"D"(input2_hi), "S"(input2_lo),
-					THUNK_TARGET(hv_hypercall_pg)
-				      : "cc");
-	}
-#endif
+	u32 input1_hi = upper_32_bits(input1);
+	u32 input1_lo = lower_32_bits(input1);
+	u32 input2_hi = upper_32_bits(input2);
+	u32 input2_lo = lower_32_bits(input2);
+	u64 hv_status;
+
+	__asm__ __volatile__ (CALL_NOSPEC
+			      : "=A"(hv_status),
+			      "+c"(input1_lo), ASM_CALL_CONSTRAINT
+			      :	"A" (control), "b" (input1_hi),
+			      "D"(input2_hi), "S"(input2_lo),
+			      THUNK_TARGET(hv_hypercall_pg)
+			      : "cc");
 	return hv_status;
+#endif
 }
 
-/*
- * Rep hypercalls. Callers of this functions are supposed to ensure that
- * rep_count and varhead_size comply with Hyper-V hypercall definition.
- */
-static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
-				      void *input, void *output)
+static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
 {
-	u64 control = code;
-	u64 status;
-	u16 rep_comp;
-
-	control |= (u64)varhead_size << HV_HYPERCALL_VARHEAD_OFFSET;
-	control |= (u64)rep_count << HV_HYPERCALL_REP_COMP_OFFSET;
-
-	do {
-		status = hv_do_hypercall(control, input, output);
-		if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS)
-			return status;
-
-		/* Bits 32-43 of status have 'Reps completed' data. */
-		rep_comp = (status & HV_HYPERCALL_REP_COMP_MASK) >>
-			HV_HYPERCALL_REP_COMP_OFFSET;
-
-		control &= ~HV_HYPERCALL_REP_START_MASK;
-		control |= (u64)rep_comp << HV_HYPERCALL_REP_START_OFFSET;
-
-		touch_nmi_watchdog();
-	} while (rep_comp < rep_count);
+	u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
 
-	return status;
+	return _hv_do_fast_hypercall16(control, input1, input2);
 }
 
-/*
- * Hypervisor's notion of virtual processor ID is different from
- * Linux' notion of CPU ID. This information can only be retrieved
- * in the context of the calling CPU. Setup a map for easy access
- * to this information.
- */
-extern u32 *hv_vp_index;
-extern u32 hv_max_vp_index;
 extern struct hv_vp_assist_page **hv_vp_assist_page;
 
 static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
@@ -290,65 +166,8 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
 	return hv_vp_assist_page[cpu];
 }
 
-/**
- * hv_cpu_number_to_vp_number() - Map CPU to VP.
- * @cpu_number: CPU number in Linux terms
- *
- * This function returns the mapping between the Linux processor
- * number and the hypervisor's virtual processor number, useful
- * in making hypercalls and such that talk about specific
- * processors.
- *
- * Return: Virtual processor number in Hyper-V terms
- */
-static inline int hv_cpu_number_to_vp_number(int cpu_number)
-{
-	return hv_vp_index[cpu_number];
-}
-
-static inline int cpumask_to_vpset(struct hv_vpset *vpset,
-				    const struct cpumask *cpus)
-{
-	int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
-
-	/* valid_bank_mask can represent up to 64 banks */
-	if (hv_max_vp_index / 64 >= 64)
-		return 0;
-
-	/*
-	 * Clear all banks up to the maximum possible bank as hv_tlb_flush_ex
-	 * structs are not cleared between calls, we risk flushing unneeded
-	 * vCPUs otherwise.
-	 */
-	for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
-		vpset->bank_contents[vcpu_bank] = 0;
-
-	/*
-	 * Some banks may end up being empty but this is acceptable.
-	 */
-	for_each_cpu(cpu, cpus) {
-		vcpu = hv_cpu_number_to_vp_number(cpu);
-		if (vcpu == VP_INVAL)
-			return -1;
-		vcpu_bank = vcpu / 64;
-		vcpu_offset = vcpu % 64;
-		__set_bit(vcpu_offset, (unsigned long *)
-			  &vpset->bank_contents[vcpu_bank]);
-		if (vcpu_bank >= nr_bank)
-			nr_bank = vcpu_bank + 1;
-	}
-	vpset->valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
-	return nr_bank;
-}
-
 void __init hyperv_init(void);
 void hyperv_setup_mmu_ops(void);
-void hyperv_report_panic(struct pt_regs *regs, long err);
-void hyperv_report_panic_msg(phys_addr_t pa, size_t size);
-bool hv_is_hyperv_initialized(void);
-void hyperv_cleanup(void);
-
-void hyperv_reenlightenment_intr(struct pt_regs *regs);
 void set_hv_tscchange_cb(void (*cb)(void));
 void clear_hv_tscchange_cb(void);
 void hyperv_stop_tsc_emulation(void);
@@ -358,6 +177,8 @@ int hyperv_flush_guest_mapping_range(u64 as,
 int hyperv_fill_flush_guest_mapping_list(
 		struct hv_guest_mapping_flush_list *flush,
 		u64 start_gfn, u64 end_gfn);
+void hv_sleep_notifiers_register(void);
+void hv_machine_power_off(void);
 
 #ifdef CONFIG_X86_64
 void hv_apic_init(void);
@@ -367,10 +188,69 @@ bool hv_vcpu_is_preempted(int vcpu);
 static inline void hv_apic_init(void) {}
 #endif
 
+struct irq_domain *hv_create_pci_msi_domain(void);
+
+int hv_map_msi_interrupt(struct irq_data *data,
+			 struct hv_interrupt_entry *out_entry);
+int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
+		struct hv_interrupt_entry *entry);
+int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+bool hv_ghcb_negotiate_protocol(void);
+void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason);
+int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu);
+#else
+static inline bool hv_ghcb_negotiate_protocol(void) { return false; }
+static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {}
+static inline int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip,
+		unsigned int cpu) { return 0; }
+#endif
+
+#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST)
+void hv_vtom_init(void);
+void hv_ivm_msr_write(u64 msr, u64 value);
+void hv_ivm_msr_read(u64 msr, u64 *value);
+#else
+static inline void hv_vtom_init(void) {}
+static inline void hv_ivm_msr_write(u64 msr, u64 value) {}
+static inline void hv_ivm_msr_read(u64 msr, u64 *value) {}
+#endif
+
+static inline bool hv_is_synic_msr(unsigned int reg)
+{
+	return (reg >= HV_X64_MSR_SCONTROL) &&
+	       (reg <= HV_X64_MSR_SINT15);
+}
+
+static inline bool hv_is_sint_msr(unsigned int reg)
+{
+	return (reg >= HV_X64_MSR_SINT0) &&
+	       (reg <= HV_X64_MSR_SINT15);
+}
+
+u64 hv_get_msr(unsigned int reg);
+void hv_set_msr(unsigned int reg, u64 value);
+u64 hv_get_non_nested_msr(unsigned int reg);
+void hv_set_non_nested_msr(unsigned int reg, u64 value);
+
+static __always_inline u64 hv_raw_get_msr(unsigned int reg)
+{
+	return native_rdmsrq(reg);
+}
+int hv_apicid_to_vp_index(u32 apic_id);
+
+#if IS_ENABLED(CONFIG_MSHV_ROOT) && IS_ENABLED(CONFIG_CRASH_DUMP)
+void hv_root_crash_init(void);
+void hv_crash_asm32(void);
+void hv_crash_asm64(void);
+void hv_crash_asm_end(void);
+#else   /* CONFIG_MSHV_ROOT && CONFIG_CRASH_DUMP */
+static inline void hv_root_crash_init(void) {}
+#endif  /* CONFIG_MSHV_ROOT && CONFIG_CRASH_DUMP */
+
 #else /* CONFIG_HYPERV */
 static inline void hyperv_init(void) {}
-static inline bool hv_is_hyperv_initialized(void) { return false; }
-static inline void hyperv_cleanup(void) {}
 static inline void hyperv_setup_mmu_ops(void) {}
 static inline void set_hv_tscchange_cb(void (*cb)(void)) {}
 static inline void clear_hv_tscchange_cb(void) {}
@@ -385,75 +265,55 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
 {
 	return -1;
 }
+static inline void hv_set_msr(unsigned int reg, u64 value) { }
+static inline u64 hv_get_msr(unsigned int reg) { return 0; }
+static inline void hv_set_non_nested_msr(unsigned int reg, u64 value) { }
+static inline u64 hv_get_non_nested_msr(unsigned int reg) { return 0; }
+static inline int hv_apicid_to_vp_index(u32 apic_id) { return -EINVAL; }
 #endif /* CONFIG_HYPERV */
 
-#ifdef CONFIG_HYPERV_TSCPAGE
-struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
-static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
-				       u64 *cur_tsc)
-{
-	u64 scale, offset;
-	u32 sequence;
-
-	/*
-	 * The protocol for reading Hyper-V TSC page is specified in Hypervisor
-	 * Top-Level Functional Specification ver. 3.0 and above. To get the
-	 * reference time we must do the following:
-	 * - READ ReferenceTscSequence
-	 *   A special '0' value indicates the time source is unreliable and we
-	 *   need to use something else. The currently published specification
-	 *   versions (up to 4.0b) contain a mistake and wrongly claim '-1'
-	 *   instead of '0' as the special value, see commit c35b82ef0294.
-	 * - ReferenceTime =
-	 *        ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset
-	 * - READ ReferenceTscSequence again. In case its value has changed
-	 *   since our first reading we need to discard ReferenceTime and repeat
-	 *   the whole sequence as the hypervisor was updating the page in
-	 *   between.
-	 */
-	do {
-		sequence = READ_ONCE(tsc_pg->tsc_sequence);
-		if (!sequence)
-			return U64_MAX;
-		/*
-		 * Make sure we read sequence before we read other values from
-		 * TSC page.
-		 */
-		smp_rmb();
-
-		scale = READ_ONCE(tsc_pg->tsc_scale);
-		offset = READ_ONCE(tsc_pg->tsc_offset);
-		*cur_tsc = rdtsc_ordered();
-
-		/*
-		 * Make sure we read sequence after we read all other values
-		 * from TSC page.
-		 */
-		smp_rmb();
-
-	} while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
-
-	return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset;
-}
-
-static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
-{
-	u64 cur_tsc;
-
-	return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc);
-}
+struct mshv_vtl_cpu_context {
+	union {
+		struct {
+			u64 rax;
+			u64 rcx;
+			u64 rdx;
+			u64 rbx;
+			u64 cr2;
+			u64 rbp;
+			u64 rsi;
+			u64 rdi;
+			u64 r8;
+			u64 r9;
+			u64 r10;
+			u64 r11;
+			u64 r12;
+			u64 r13;
+			u64 r14;
+			u64 r15;
+		};
+		u64 gp_regs[16];
+	};
+
+	struct fxregs_state fx_state;
+};
 
+#ifdef CONFIG_HYPERV_VTL_MODE
+void __init hv_vtl_init_platform(void);
+int __init hv_vtl_early_init(void);
+void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0);
+void mshv_vtl_return_call_init(u64 vtl_return_offset);
+void mshv_vtl_return_hypercall(void);
+void __mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0);
 #else
-static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
-{
-	return NULL;
-}
-
-static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
-				       u64 *cur_tsc)
-{
-	BUG();
-	return U64_MAX;
-}
+static inline void __init hv_vtl_init_platform(void) {}
+static inline int __init hv_vtl_early_init(void) { return 0; }
+static inline void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0) {}
+static inline void mshv_vtl_return_call_init(u64 vtl_return_offset) {}
+static inline void mshv_vtl_return_hypercall(void) {}
+static inline void __mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0) {}
 #endif
+
+#include <asm-generic/mshyperv.h>
+
 #endif
diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h
index 25ddd0916bb2..935c6d470341 100644
--- a/arch/x86/include/asm/msi.h
+++ b/arch/x86/include/asm/msi.h
@@ -9,6 +9,63 @@ typedef struct irq_alloc_info msi_alloc_info_t;
 int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
 		    msi_alloc_info_t *arg);
 
-void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc);
+/* Structs and defines for the X86 specific MSI message format */
+
+typedef struct x86_msi_data {
+	union {
+		struct {
+			u32	vector			:  8,
+				delivery_mode		:  3,
+				dest_mode_logical	:  1,
+				reserved		:  2,
+				active_low		:  1,
+				is_level		:  1;
+		};
+		u32	dmar_subhandle;
+	};
+} __attribute__ ((packed)) arch_msi_msg_data_t;
+#define arch_msi_msg_data	x86_msi_data
+
+typedef struct x86_msi_addr_lo {
+	union {
+		struct {
+			u32	reserved_0		:  2,
+				dest_mode_logical	:  1,
+				redirect_hint		:  1,
+				reserved_1		:  1,
+				virt_destid_8_14	:  7,
+				destid_0_7		:  8,
+				base_address		: 12;
+		};
+		struct {
+			u32	dmar_reserved_0		:  2,
+				dmar_index_15		:  1,
+				dmar_subhandle_valid	:  1,
+				dmar_format		:  1,
+				dmar_index_0_14		: 15,
+				dmar_base_address	: 12;
+		};
+	};
+} __attribute__ ((packed)) arch_msi_msg_addr_lo_t;
+#define arch_msi_msg_addr_lo	x86_msi_addr_lo
+
+#define X86_MSI_BASE_ADDRESS_LOW	(0xfee00000 >> 20)
+
+typedef struct x86_msi_addr_hi {
+	u32	reserved		:  8,
+		destid_8_31		: 24;
+} __attribute__ ((packed)) arch_msi_msg_addr_hi_t;
+#define arch_msi_msg_addr_hi	x86_msi_addr_hi
+
+#define X86_MSI_BASE_ADDRESS_HIGH	(0)
+
+struct msi_msg;
+u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid);
+
+#define X86_VECTOR_MSI_FLAGS_SUPPORTED					\
+	(MSI_GENERIC_FLAGS_MASK | MSI_FLAG_PCI_MSIX | MSI_FLAG_PCI_MSIX_ALLOC_DYN)
+
+#define X86_VECTOR_MSI_FLAGS_REQUIRED					\
+	(MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS)
 
 #endif /* _ASM_X86_MSI_H */
diff --git a/arch/x86/include/asm/msidef.h b/arch/x86/include/asm/msidef.h
deleted file mode 100644
index ee2f8ccc32d0..000000000000
--- a/arch/x86/include/asm/msidef.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_MSIDEF_H
-#define _ASM_X86_MSIDEF_H
-
-/*
- * Constants for Intel APIC based MSI messages.
- */
-
-/*
- * Shifts for MSI data
- */
-
-#define MSI_DATA_VECTOR_SHIFT		0
-#define  MSI_DATA_VECTOR_MASK		0x000000ff
-#define	 MSI_DATA_VECTOR(v)		(((v) << MSI_DATA_VECTOR_SHIFT) & \
-					 MSI_DATA_VECTOR_MASK)
-
-#define MSI_DATA_DELIVERY_MODE_SHIFT	8
-#define  MSI_DATA_DELIVERY_FIXED	(0 << MSI_DATA_DELIVERY_MODE_SHIFT)
-#define  MSI_DATA_DELIVERY_LOWPRI	(1 << MSI_DATA_DELIVERY_MODE_SHIFT)
-
-#define MSI_DATA_LEVEL_SHIFT		14
-#define	 MSI_DATA_LEVEL_DEASSERT	(0 << MSI_DATA_LEVEL_SHIFT)
-#define	 MSI_DATA_LEVEL_ASSERT		(1 << MSI_DATA_LEVEL_SHIFT)
-
-#define MSI_DATA_TRIGGER_SHIFT		15
-#define  MSI_DATA_TRIGGER_EDGE		(0 << MSI_DATA_TRIGGER_SHIFT)
-#define  MSI_DATA_TRIGGER_LEVEL		(1 << MSI_DATA_TRIGGER_SHIFT)
-
-/*
- * Shift/mask fields for msi address
- */
-
-#define MSI_ADDR_BASE_HI		0
-#define MSI_ADDR_BASE_LO		0xfee00000
-
-#define MSI_ADDR_DEST_MODE_SHIFT	2
-#define  MSI_ADDR_DEST_MODE_PHYSICAL	(0 << MSI_ADDR_DEST_MODE_SHIFT)
-#define	 MSI_ADDR_DEST_MODE_LOGICAL	(1 << MSI_ADDR_DEST_MODE_SHIFT)
-
-#define MSI_ADDR_REDIRECTION_SHIFT	3
-#define  MSI_ADDR_REDIRECTION_CPU	(0 << MSI_ADDR_REDIRECTION_SHIFT)
-					/* dedicated cpu */
-#define  MSI_ADDR_REDIRECTION_LOWPRI	(1 << MSI_ADDR_REDIRECTION_SHIFT)
-					/* lowest priority */
-
-#define MSI_ADDR_DEST_ID_SHIFT		12
-#define	 MSI_ADDR_DEST_ID_MASK		0x00ffff0
-#define  MSI_ADDR_DEST_ID(dest)		(((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
-					 MSI_ADDR_DEST_ID_MASK)
-#define MSI_ADDR_EXT_DEST_ID(dest)	((dest) & 0xffffff00)
-
-#define MSI_ADDR_IR_EXT_INT		(1 << 4)
-#define MSI_ADDR_IR_SHV			(1 << 3)
-#define MSI_ADDR_IR_INDEX1(index)	((index & 0x8000) >> 13)
-#define MSI_ADDR_IR_INDEX2(index)	((index & 0x7fff) << 5)
-#endif /* _ASM_X86_MSIDEF_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 8e40c2446fd1..3d0a0950d20a 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -2,12 +2,9 @@
 #ifndef _ASM_X86_MSR_INDEX_H
 #define _ASM_X86_MSR_INDEX_H
 
-/*
- * CPU model specific register (MSR) numbers.
- *
- * Do not add new entries to this file unless the definitions are shared
- * between multiple compilation units.
- */
+#include <linux/bits.h>
+
+/* CPU model specific register (MSR) numbers. */
 
 /* x86-64 specific MSRs */
 #define MSR_EFER		0xc0000080 /* extended feature register */
@@ -28,6 +25,8 @@
 #define _EFER_SVME		12 /* Enable virtualization */
 #define _EFER_LMSLE		13 /* Long Mode Segment Limit Enable */
 #define _EFER_FFXSR		14 /* Enable Fast FXSAVE/FXRSTOR */
+#define _EFER_TCE		15 /* Enable Translation Cache Extensions */
+#define _EFER_AUTOIBRS		21 /* Enable Automatic IBRS */
 
 #define EFER_SCE		(1<<_EFER_SCE)
 #define EFER_LME		(1<<_EFER_LME)
@@ -36,18 +35,59 @@
 #define EFER_SVME		(1<<_EFER_SVME)
 #define EFER_LMSLE		(1<<_EFER_LMSLE)
 #define EFER_FFXSR		(1<<_EFER_FFXSR)
+#define EFER_TCE		(1<<_EFER_TCE)
+#define EFER_AUTOIBRS		(1<<_EFER_AUTOIBRS)
+
+/*
+ * Architectural memory types that are common to MTRRs, PAT, VMX MSRs, etc.
+ * Most MSRs support/allow only a subset of memory types, but the values
+ * themselves are common across all relevant MSRs.
+ */
+#define X86_MEMTYPE_UC		0ull	/* Uncacheable, a.k.a. Strong Uncacheable */
+#define X86_MEMTYPE_WC		1ull	/* Write Combining */
+/* RESERVED			2 */
+/* RESERVED			3 */
+#define X86_MEMTYPE_WT		4ull	/* Write Through */
+#define X86_MEMTYPE_WP		5ull	/* Write Protected */
+#define X86_MEMTYPE_WB		6ull	/* Write Back */
+#define X86_MEMTYPE_UC_MINUS	7ull	/* Weak Uncacheabled (PAT only) */
+
+/* FRED MSRs */
+#define MSR_IA32_FRED_RSP0	0x1cc			/* Level 0 stack pointer */
+#define MSR_IA32_FRED_RSP1	0x1cd			/* Level 1 stack pointer */
+#define MSR_IA32_FRED_RSP2	0x1ce			/* Level 2 stack pointer */
+#define MSR_IA32_FRED_RSP3	0x1cf			/* Level 3 stack pointer */
+#define MSR_IA32_FRED_STKLVLS	0x1d0			/* Exception stack levels */
+#define MSR_IA32_FRED_SSP0	MSR_IA32_PL0_SSP	/* Level 0 shadow stack pointer */
+#define MSR_IA32_FRED_SSP1	0x1d1			/* Level 1 shadow stack pointer */
+#define MSR_IA32_FRED_SSP2	0x1d2			/* Level 2 shadow stack pointer */
+#define MSR_IA32_FRED_SSP3	0x1d3			/* Level 3 shadow stack pointer */
+#define MSR_IA32_FRED_CONFIG	0x1d4			/* Entrypoint and interrupt stack level */
 
 /* Intel MSRs. Some also available on other CPUs */
+#define MSR_TEST_CTRL				0x00000033
+#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT	29
+#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT		BIT(MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT)
 
 #define MSR_IA32_SPEC_CTRL		0x00000048 /* Speculation Control */
-#define SPEC_CTRL_IBRS			(1 << 0)   /* Indirect Branch Restricted Speculation */
+#define SPEC_CTRL_IBRS			BIT(0)	   /* Indirect Branch Restricted Speculation */
 #define SPEC_CTRL_STIBP_SHIFT		1	   /* Single Thread Indirect Branch Predictor (STIBP) bit */
-#define SPEC_CTRL_STIBP			(1 << SPEC_CTRL_STIBP_SHIFT)	/* STIBP mask */
+#define SPEC_CTRL_STIBP			BIT(SPEC_CTRL_STIBP_SHIFT)	/* STIBP mask */
 #define SPEC_CTRL_SSBD_SHIFT		2	   /* Speculative Store Bypass Disable bit */
-#define SPEC_CTRL_SSBD			(1 << SPEC_CTRL_SSBD_SHIFT)	/* Speculative Store Bypass Disable */
+#define SPEC_CTRL_SSBD			BIT(SPEC_CTRL_SSBD_SHIFT)	/* Speculative Store Bypass Disable */
+#define SPEC_CTRL_RRSBA_DIS_S_SHIFT	6	   /* Disable RRSBA behavior */
+#define SPEC_CTRL_RRSBA_DIS_S		BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+#define SPEC_CTRL_BHI_DIS_S_SHIFT	10	   /* Disable Branch History Injection behavior */
+#define SPEC_CTRL_BHI_DIS_S		BIT(SPEC_CTRL_BHI_DIS_S_SHIFT)
+
+/* A mask for bits which the kernel toggles when controlling mitigations */
+#define SPEC_CTRL_MITIGATIONS_MASK	(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
+							| SPEC_CTRL_RRSBA_DIS_S \
+							| SPEC_CTRL_BHI_DIS_S)
 
 #define MSR_IA32_PRED_CMD		0x00000049 /* Prediction Command */
-#define PRED_CMD_IBPB			(1 << 0)   /* Indirect Branch Prediction Barrier */
+#define PRED_CMD_IBPB			BIT(0)	   /* Indirect Branch Prediction Barrier */
+#define PRED_CMD_SBPB			BIT(7)	   /* Selective Branch Prediction Barrier */
 
 #define MSR_PPIN_CTL			0x0000004e
 #define MSR_PPIN			0x0000004f
@@ -59,6 +99,22 @@
 #define MSR_PLATFORM_INFO_CPUID_FAULT_BIT	31
 #define MSR_PLATFORM_INFO_CPUID_FAULT		BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
 
+#define MSR_IA32_UMWAIT_CONTROL			0xe1
+#define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE	BIT(0)
+#define MSR_IA32_UMWAIT_CONTROL_RESERVED	BIT(1)
+/*
+ * The time field is bit[31:2], but representing a 32bit value with
+ * bit[1:0] zero.
+ */
+#define MSR_IA32_UMWAIT_CONTROL_TIME_MASK	(~0x03U)
+
+/* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */
+#define MSR_IA32_CORE_CAPS			  0x000000cf
+#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT	  2
+#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS	  BIT(MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT)
+#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT  5
+#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT	  BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT)
+
 #define MSR_PKG_CST_CONFIG_CONTROL	0x000000e2
 #define NHM_C3_AUTO_DEMOTE		(1UL << 25)
 #define NHM_C1_AUTO_DEMOTE		(1UL << 26)
@@ -69,24 +125,125 @@
 #define MSR_MTRRcap			0x000000fe
 
 #define MSR_IA32_ARCH_CAPABILITIES	0x0000010a
-#define ARCH_CAP_RDCL_NO		(1 << 0)   /* Not susceptible to Meltdown */
-#define ARCH_CAP_IBRS_ALL		(1 << 1)   /* Enhanced IBRS support */
-#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH	(1 << 3)   /* Skip L1D flush on vmentry */
-#define ARCH_CAP_SSB_NO			(1 << 4)   /*
-						    * Not susceptible to Speculative Store Bypass
-						    * attack, so no Speculative Store Bypass
-						    * control required.
-						    */
+#define ARCH_CAP_RDCL_NO		BIT(0)	/* Not susceptible to Meltdown */
+#define ARCH_CAP_IBRS_ALL		BIT(1)	/* Enhanced IBRS support */
+#define ARCH_CAP_RSBA			BIT(2)	/* RET may use alternative branch predictors */
+#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH	BIT(3)	/* Skip L1D flush on vmentry */
+#define ARCH_CAP_SSB_NO			BIT(4)	/*
+						 * Not susceptible to Speculative Store Bypass
+						 * attack, so no Speculative Store Bypass
+						 * control required.
+						 */
+#define ARCH_CAP_MDS_NO			BIT(5)   /*
+						  * Not susceptible to
+						  * Microarchitectural Data
+						  * Sampling (MDS) vulnerabilities.
+						  */
+#define ARCH_CAP_PSCHANGE_MC_NO		BIT(6)	 /*
+						  * The processor is not susceptible to a
+						  * machine check error due to modifying the
+						  * code page size along with either the
+						  * physical address or cache type
+						  * without TLB invalidation.
+						  */
+#define ARCH_CAP_TSX_CTRL_MSR		BIT(7)	/* MSR for TSX control is available. */
+#define ARCH_CAP_TAA_NO			BIT(8)	/*
+						 * Not susceptible to
+						 * TSX Async Abort (TAA) vulnerabilities.
+						 */
+#define ARCH_CAP_SBDR_SSDP_NO		BIT(13)	/*
+						 * Not susceptible to SBDR and SSDP
+						 * variants of Processor MMIO stale data
+						 * vulnerabilities.
+						 */
+#define ARCH_CAP_FBSDP_NO		BIT(14)	/*
+						 * Not susceptible to FBSDP variant of
+						 * Processor MMIO stale data
+						 * vulnerabilities.
+						 */
+#define ARCH_CAP_PSDP_NO		BIT(15)	/*
+						 * Not susceptible to PSDP variant of
+						 * Processor MMIO stale data
+						 * vulnerabilities.
+						 */
+#define ARCH_CAP_MCU_ENUM		BIT(16) /*
+						 * Indicates the presence of microcode update
+						 * feature enumeration and status information.
+						 */
+#define ARCH_CAP_FB_CLEAR		BIT(17)	/*
+						 * VERW clears CPU fill buffer
+						 * even on MDS_NO CPUs.
+						 */
+#define ARCH_CAP_FB_CLEAR_CTRL		BIT(18)	/*
+						 * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]
+						 * bit available to control VERW
+						 * behavior.
+						 */
+#define ARCH_CAP_RRSBA			BIT(19)	/*
+						 * Indicates RET may use predictors
+						 * other than the RSB. With eIBRS
+						 * enabled predictions in kernel mode
+						 * are restricted to targets in
+						 * kernel.
+						 */
+#define ARCH_CAP_BHI_NO			BIT(20)	/*
+						 * CPU is not affected by Branch
+						 * History Injection.
+						 */
+#define ARCH_CAP_XAPIC_DISABLE		BIT(21)	/*
+						 * IA32_XAPIC_DISABLE_STATUS MSR
+						 * supported
+						 */
+#define ARCH_CAP_PBRSB_NO		BIT(24)	/*
+						 * Not susceptible to Post-Barrier
+						 * Return Stack Buffer Predictions.
+						 */
+#define ARCH_CAP_GDS_CTRL		BIT(25)	/*
+						 * CPU is vulnerable to Gather
+						 * Data Sampling (GDS) and
+						 * has controls for mitigation.
+						 */
+#define ARCH_CAP_GDS_NO			BIT(26)	/*
+						 * CPU is not vulnerable to Gather
+						 * Data Sampling (GDS).
+						 */
+#define ARCH_CAP_RFDS_NO		BIT(27)	/*
+						 * Not susceptible to Register
+						 * File Data Sampling.
+						 */
+#define ARCH_CAP_RFDS_CLEAR		BIT(28)	/*
+						 * VERW clears CPU Register
+						 * File.
+						 */
+#define ARCH_CAP_ITS_NO			BIT_ULL(62) /*
+						     * Not susceptible to
+						     * Indirect Target Selection.
+						     * This bit is not set by
+						     * HW, but is synthesized by
+						     * VMMs for guests to know
+						     * their affected status.
+						     */
 
 #define MSR_IA32_FLUSH_CMD		0x0000010b
-#define L1D_FLUSH			(1 << 0)   /*
-						    * Writeback and invalidate the
-						    * L1 data cache.
-						    */
+#define L1D_FLUSH			BIT(0)	/*
+						 * Writeback and invalidate the
+						 * L1 data cache.
+						 */
 
 #define MSR_IA32_BBL_CR_CTL		0x00000119
 #define MSR_IA32_BBL_CR_CTL3		0x0000011e
 
+#define MSR_IA32_TSX_CTRL		0x00000122
+#define TSX_CTRL_RTM_DISABLE		BIT(0)	/* Disable RTM feature */
+#define TSX_CTRL_CPUID_CLEAR		BIT(1)	/* Disable TSX enumeration */
+
+#define MSR_IA32_MCU_OPT_CTRL		0x00000123
+#define RNGDS_MITG_DIS			BIT(0)	/* SRBDS support */
+#define RTM_ALLOW			BIT(1)	/* TSX development mode */
+#define FB_CLEAR_DIS			BIT(3)	/* CPU Fill buffer clear disable */
+#define GDS_MITG_DIS			BIT(4)	/* Disable GDS mitigation */
+#define GDS_MITG_LOCKED			BIT(5)	/* GDS mitigation locked */
+
 #define MSR_IA32_SYSENTER_CS		0x00000174
 #define MSR_IA32_SYSENTER_ESP		0x00000175
 #define MSR_IA32_SYSENTER_EIP		0x00000176
@@ -94,6 +251,7 @@
 #define MSR_IA32_MCG_CAP		0x00000179
 #define MSR_IA32_MCG_STATUS		0x0000017a
 #define MSR_IA32_MCG_CTL		0x0000017b
+#define MSR_ERROR_CONTROL		0x0000017f
 #define MSR_IA32_MCG_EXT_CTL		0x000004d0
 
 #define MSR_OFFCORE_RSP_0		0x000001a6
@@ -102,8 +260,25 @@
 #define MSR_TURBO_RATIO_LIMIT1		0x000001ae
 #define MSR_TURBO_RATIO_LIMIT2		0x000001af
 
+#define MSR_SNOOP_RSP_0			0x00001328
+#define MSR_SNOOP_RSP_1			0x00001329
+
 #define MSR_LBR_SELECT			0x000001c8
 #define MSR_LBR_TOS			0x000001c9
+
+#define MSR_IA32_POWER_CTL		0x000001fc
+#define MSR_IA32_POWER_CTL_BIT_EE	19
+
+/* Abbreviated from Intel SDM name IA32_INTEGRITY_CAPABILITIES */
+#define MSR_INTEGRITY_CAPS			0x000002d9
+#define MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT      2
+#define MSR_INTEGRITY_CAPS_ARRAY_BIST          BIT(MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT)
+#define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT	4
+#define MSR_INTEGRITY_CAPS_PERIODIC_BIST	BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT)
+#define MSR_INTEGRITY_CAPS_SBAF_BIT		8
+#define MSR_INTEGRITY_CAPS_SBAF			BIT(MSR_INTEGRITY_CAPS_SBAF_BIT)
+#define MSR_INTEGRITY_CAPS_SAF_GEN_MASK	GENMASK_ULL(10, 9)
+
 #define MSR_LBR_NHM_FROM		0x00000680
 #define MSR_LBR_NHM_TO			0x000006c0
 #define MSR_LBR_CORE_FROM		0x00000040
@@ -113,13 +288,69 @@
 #define LBR_INFO_MISPRED		BIT_ULL(63)
 #define LBR_INFO_IN_TX			BIT_ULL(62)
 #define LBR_INFO_ABORT			BIT_ULL(61)
+#define LBR_INFO_CYC_CNT_VALID		BIT_ULL(60)
 #define LBR_INFO_CYCLES			0xffff
+#define LBR_INFO_BR_TYPE_OFFSET		56
+#define LBR_INFO_BR_TYPE		(0xfull << LBR_INFO_BR_TYPE_OFFSET)
+#define LBR_INFO_BR_CNTR_OFFSET		32
+#define LBR_INFO_BR_CNTR_NUM		4
+#define LBR_INFO_BR_CNTR_BITS		2
+#define LBR_INFO_BR_CNTR_MASK		GENMASK_ULL(LBR_INFO_BR_CNTR_BITS - 1, 0)
+#define LBR_INFO_BR_CNTR_FULL_MASK	GENMASK_ULL(LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS - 1, 0)
+
+#define MSR_ARCH_LBR_CTL		0x000014ce
+#define ARCH_LBR_CTL_LBREN		BIT(0)
+#define ARCH_LBR_CTL_CPL_OFFSET		1
+#define ARCH_LBR_CTL_CPL		(0x3ull << ARCH_LBR_CTL_CPL_OFFSET)
+#define ARCH_LBR_CTL_STACK_OFFSET	3
+#define ARCH_LBR_CTL_STACK		(0x1ull << ARCH_LBR_CTL_STACK_OFFSET)
+#define ARCH_LBR_CTL_FILTER_OFFSET	16
+#define ARCH_LBR_CTL_FILTER		(0x7full << ARCH_LBR_CTL_FILTER_OFFSET)
+#define MSR_ARCH_LBR_DEPTH		0x000014cf
+#define MSR_ARCH_LBR_FROM_0		0x00001500
+#define MSR_ARCH_LBR_TO_0		0x00001600
+#define MSR_ARCH_LBR_INFO_0		0x00001200
 
 #define MSR_IA32_PEBS_ENABLE		0x000003f1
+#define MSR_PEBS_DATA_CFG		0x000003f2
 #define MSR_IA32_DS_AREA		0x00000600
 #define MSR_IA32_PERF_CAPABILITIES	0x00000345
+#define PERF_CAP_METRICS_IDX		15
+#define PERF_CAP_PT_IDX			16
+
 #define MSR_PEBS_LD_LAT_THRESHOLD	0x000003f6
 
+#define PERF_CAP_LBR_FMT		0x3f
+#define PERF_CAP_PEBS_TRAP		BIT_ULL(6)
+#define PERF_CAP_ARCH_REG		BIT_ULL(7)
+#define PERF_CAP_PEBS_FORMAT		0xf00
+#define PERF_CAP_FW_WRITES		BIT_ULL(13)
+#define PERF_CAP_PEBS_BASELINE		BIT_ULL(14)
+#define PERF_CAP_PEBS_TIMING_INFO	BIT_ULL(17)
+#define PERF_CAP_PEBS_MASK		(PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
+					 PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \
+					 PERF_CAP_PEBS_TIMING_INFO)
+
+/* Arch PEBS */
+#define MSR_IA32_PEBS_BASE		0x000003f4
+#define MSR_IA32_PEBS_INDEX		0x000003f5
+#define ARCH_PEBS_OFFSET_MASK		0x7fffff
+#define ARCH_PEBS_INDEX_WR_SHIFT	4
+
+#define ARCH_PEBS_RELOAD		0xffffffff
+#define ARCH_PEBS_CNTR_ALLOW		BIT_ULL(35)
+#define ARCH_PEBS_CNTR_GP		BIT_ULL(36)
+#define ARCH_PEBS_CNTR_FIXED		BIT_ULL(37)
+#define ARCH_PEBS_CNTR_METRICS		BIT_ULL(38)
+#define ARCH_PEBS_LBR_SHIFT		40
+#define ARCH_PEBS_LBR			(0x3ull << ARCH_PEBS_LBR_SHIFT)
+#define ARCH_PEBS_VECR_XMM		BIT_ULL(49)
+#define ARCH_PEBS_GPR			BIT_ULL(61)
+#define ARCH_PEBS_AUX			BIT_ULL(62)
+#define ARCH_PEBS_EN			BIT_ULL(63)
+#define ARCH_PEBS_CNTR_MASK		(ARCH_PEBS_CNTR_GP | ARCH_PEBS_CNTR_FIXED | \
+					 ARCH_PEBS_CNTR_METRICS)
+
 #define MSR_IA32_RTIT_CTL		0x00000570
 #define RTIT_CTL_TRACEEN		BIT(0)
 #define RTIT_CTL_CYCLEACC		BIT(1)
@@ -135,6 +366,8 @@
 #define RTIT_CTL_DISRETC		BIT(11)
 #define RTIT_CTL_PTW_EN			BIT(12)
 #define RTIT_CTL_BRANCH_EN		BIT(13)
+#define RTIT_CTL_EVENT_EN		BIT(31)
+#define RTIT_CTL_NOTNT			BIT_ULL(55)
 #define RTIT_CTL_MTC_RANGE_OFFSET	14
 #define RTIT_CTL_MTC_RANGE		(0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
 #define RTIT_CTL_CYC_THRESH_OFFSET	19
@@ -185,16 +418,27 @@
 
 #define MSR_IA32_CR_PAT			0x00000277
 
+#define PAT_VALUE(p0, p1, p2, p3, p4, p5, p6, p7)			\
+	((X86_MEMTYPE_ ## p0)      | (X86_MEMTYPE_ ## p1 << 8)  |	\
+	(X86_MEMTYPE_ ## p2 << 16) | (X86_MEMTYPE_ ## p3 << 24) |	\
+	(X86_MEMTYPE_ ## p4 << 32) | (X86_MEMTYPE_ ## p5 << 40) |	\
+	(X86_MEMTYPE_ ## p6 << 48) | (X86_MEMTYPE_ ## p7 << 56))
+
 #define MSR_IA32_DEBUGCTLMSR		0x000001d9
 #define MSR_IA32_LASTBRANCHFROMIP	0x000001db
 #define MSR_IA32_LASTBRANCHTOIP		0x000001dc
 #define MSR_IA32_LASTINTFROMIP		0x000001dd
 #define MSR_IA32_LASTINTTOIP		0x000001de
 
+#define MSR_IA32_PASID			0x00000d93
+#define MSR_IA32_PASID_VALID		BIT_ULL(31)
+
 /* DEBUGCTLMSR bits (others vary by model): */
-#define DEBUGCTLMSR_LBR			(1UL <<  0) /* last branch recording */
+#define DEBUGCTLMSR_LBR_BIT		0	     /* last branch recording */
+#define DEBUGCTLMSR_LBR			(1UL <<  DEBUGCTLMSR_LBR_BIT)
 #define DEBUGCTLMSR_BTF_SHIFT		1
 #define DEBUGCTLMSR_BTF			(1UL <<  1) /* single-step on branches */
+#define DEBUGCTLMSR_BUS_LOCK_DETECT	(1UL <<  2)
 #define DEBUGCTLMSR_TR			(1UL <<  6)
 #define DEBUGCTLMSR_BTS			(1UL <<  7)
 #define DEBUGCTLMSR_BTINT		(1UL <<  8)
@@ -204,11 +448,10 @@
 #define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI	(1UL << 12)
 #define DEBUGCTLMSR_FREEZE_IN_SMM_BIT	14
 #define DEBUGCTLMSR_FREEZE_IN_SMM	(1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
+#define DEBUGCTLMSR_RTM_DEBUG		BIT(15)
 
 #define MSR_PEBS_FRONTEND		0x000003f7
 
-#define MSR_IA32_POWER_CTL		0x000001fc
-
 #define MSR_IA32_MC0_CTL		0x00000400
 #define MSR_IA32_MC0_STATUS		0x00000401
 #define MSR_IA32_MC0_ADDR		0x00000402
@@ -238,6 +481,7 @@
 
 /* Run Time Average Power Limiting (RAPL) Interface */
 
+#define MSR_VR_CURRENT_CONFIG	0x00000601
 #define MSR_RAPL_POWER_UNIT		0x00000606
 
 #define MSR_PKG_POWER_LIMIT		0x00000610
@@ -259,6 +503,10 @@
 #define MSR_PP1_ENERGY_STATUS		0x00000641
 #define MSR_PP1_POLICY			0x00000642
 
+#define MSR_AMD_RAPL_POWER_UNIT		0xc0010299
+#define MSR_AMD_CORE_ENERGY_STATUS		0xc001029a
+#define MSR_AMD_PKG_ENERGY_STATUS	0xc001029b
+
 /* Config TDP MSRs */
 #define MSR_CONFIG_TDP_NOMINAL		0x00000648
 #define MSR_CONFIG_TDP_LEVEL_1		0x00000649
@@ -267,6 +515,7 @@
 #define MSR_TURBO_ACTIVATION_RATIO	0x0000064C
 
 #define MSR_PLATFORM_ENERGY_STATUS	0x0000064D
+#define MSR_SECONDARY_TURBO_RATIO_LIMIT	0x00000650
 
 #define MSR_PKG_WEIGHTED_CORE_C0_RES	0x00000658
 #define MSR_PKG_ANY_CORE_C0_RES		0x00000659
@@ -284,11 +533,29 @@
 #define MSR_ATOM_CORE_TURBO_RATIOS	0x0000066c
 #define MSR_ATOM_CORE_TURBO_VIDS	0x0000066d
 
-
 #define MSR_CORE_PERF_LIMIT_REASONS	0x00000690
 #define MSR_GFX_PERF_LIMIT_REASONS	0x000006B0
 #define MSR_RING_PERF_LIMIT_REASONS	0x000006B1
 
+/* Control-flow Enforcement Technology MSRs */
+#define MSR_IA32_U_CET			0x000006a0 /* user mode cet */
+#define MSR_IA32_S_CET			0x000006a2 /* kernel mode cet */
+#define CET_SHSTK_EN			BIT_ULL(0)
+#define CET_WRSS_EN			BIT_ULL(1)
+#define CET_ENDBR_EN			BIT_ULL(2)
+#define CET_LEG_IW_EN			BIT_ULL(3)
+#define CET_NO_TRACK_EN			BIT_ULL(4)
+#define CET_SUPPRESS_DISABLE		BIT_ULL(5)
+#define CET_RESERVED			(BIT_ULL(6) | BIT_ULL(7) | BIT_ULL(8) | BIT_ULL(9))
+#define CET_SUPPRESS			BIT_ULL(10)
+#define CET_WAIT_ENDBR			BIT_ULL(11)
+
+#define MSR_IA32_PL0_SSP		0x000006a4 /* ring-0 shadow stack pointer */
+#define MSR_IA32_PL1_SSP		0x000006a5 /* ring-1 shadow stack pointer */
+#define MSR_IA32_PL2_SSP		0x000006a6 /* ring-2 shadow stack pointer */
+#define MSR_IA32_PL3_SSP		0x000006a7 /* ring-3 shadow stack pointer */
+#define MSR_IA32_INT_SSP_TAB		0x000006a8 /* exception shadow stack table */
+
 /* Hardware P state interface */
 #define MSR_PPERF			0x0000064e
 #define MSR_PERF_LIMIT_REASONS		0x0000064f
@@ -296,7 +563,7 @@
 #define MSR_HWP_CAPABILITIES		0x00000771
 #define MSR_HWP_REQUEST_PKG		0x00000772
 #define MSR_HWP_INTERRUPT		0x00000773
-#define MSR_HWP_REQUEST 		0x00000774
+#define MSR_HWP_REQUEST			0x00000774
 #define MSR_HWP_STATUS			0x00000777
 
 /* CPUID.6.EAX */
@@ -313,16 +580,16 @@
 #define HWP_LOWEST_PERF(x)		(((x) >> 24) & 0xff)
 
 /* IA32_HWP_REQUEST */
-#define HWP_MIN_PERF(x) 		(x & 0xff)
-#define HWP_MAX_PERF(x) 		((x & 0xff) << 8)
+#define HWP_MIN_PERF(x)			(x & 0xff)
+#define HWP_MAX_PERF(x)			((x & 0xff) << 8)
 #define HWP_DESIRED_PERF(x)		((x & 0xff) << 16)
-#define HWP_ENERGY_PERF_PREFERENCE(x)	(((unsigned long long) x & 0xff) << 24)
+#define HWP_ENERGY_PERF_PREFERENCE(x)	(((u64)x & 0xff) << 24)
 #define HWP_EPP_PERFORMANCE		0x00
 #define HWP_EPP_BALANCE_PERFORMANCE	0x80
 #define HWP_EPP_BALANCE_POWERSAVE	0xC0
 #define HWP_EPP_POWERSAVE		0xFF
-#define HWP_ACTIVITY_WINDOW(x)		((unsigned long long)(x & 0xff3) << 32)
-#define HWP_PACKAGE_CONTROL(x)		((unsigned long long)(x & 0x1) << 42)
+#define HWP_ACTIVITY_WINDOW(x)		((u64)(x & 0xff3) << 32)
+#define HWP_PACKAGE_CONTROL(x)		((u64)(x & 0x1) << 42)
 
 /* IA32_HWP_STATUS */
 #define HWP_GUARANTEED_CHANGE(x)	(x & 0x1)
@@ -358,17 +625,55 @@
 /* Alternative perfctr range with full access. */
 #define MSR_IA32_PMC0			0x000004c1
 
-/* AMD64 MSRs. Not complete. See the architecture manual for a more
-   complete list. */
+/* Auto-reload via MSR instead of DS area */
+#define MSR_RELOAD_PMC0			0x000014c1
+#define MSR_RELOAD_FIXED_CTR0		0x00001309
+
+/* V6 PMON MSR range */
+#define MSR_IA32_PMC_V6_GP0_CTR		0x1900
+#define MSR_IA32_PMC_V6_GP0_CFG_A	0x1901
+#define MSR_IA32_PMC_V6_GP0_CFG_B	0x1902
+#define MSR_IA32_PMC_V6_GP0_CFG_C	0x1903
+#define MSR_IA32_PMC_V6_FX0_CTR		0x1980
+#define MSR_IA32_PMC_V6_FX0_CFG_B	0x1982
+#define MSR_IA32_PMC_V6_FX0_CFG_C	0x1983
+#define MSR_IA32_PMC_V6_STEP		4
 
+/* KeyID partitioning between MKTME and TDX */
+#define MSR_IA32_MKTME_KEYID_PARTITIONING	0x00000087
+
+/*
+ * AMD64 MSRs. Not complete. See the architecture manual for a more
+ * complete list.
+ */
 #define MSR_AMD64_PATCH_LEVEL		0x0000008b
 #define MSR_AMD64_TSC_RATIO		0xc0000104
 #define MSR_AMD64_NB_CFG		0xc001001f
 #define MSR_AMD64_PATCH_LOADER		0xc0010020
+#define MSR_AMD_PERF_CTL		0xc0010062
+#define MSR_AMD_PERF_STATUS		0xc0010063
+#define MSR_AMD_PSTATE_DEF_BASE		0xc0010064
+#define MSR_AMD64_GUEST_TSC_FREQ	0xc0010134
 #define MSR_AMD64_OSVW_ID_LENGTH	0xc0010140
 #define MSR_AMD64_OSVW_STATUS		0xc0010141
+#define MSR_AMD_PPIN_CTL		0xc00102f0
+#define MSR_AMD_PPIN			0xc00102f1
+#define MSR_AMD64_CPUID_FN_7		0xc0011002
+#define MSR_AMD64_CPUID_FN_1		0xc0011004
+
+#define MSR_AMD64_CPUID_EXT_FEAT	0xc0011005
+#define MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT	54
+#define MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT	BIT_ULL(MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT)
+
 #define MSR_AMD64_LS_CFG		0xc0011020
 #define MSR_AMD64_DC_CFG		0xc0011022
+#define MSR_AMD64_TW_CFG		0xc0011023
+
+#define MSR_AMD64_DE_CFG		0xc0011029
+#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT	 1
+#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE	BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT)
+#define MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT 9
+
 #define MSR_AMD64_BU_CFG2		0xc001102a
 #define MSR_AMD64_IBSFETCHCTL		0xc0011030
 #define MSR_AMD64_IBSFETCHLINAD		0xc0011031
@@ -386,13 +691,111 @@
 #define MSR_AMD64_IBSOP_REG_MASK	((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
 #define MSR_AMD64_IBSCTL		0xc001103a
 #define MSR_AMD64_IBSBRTARGET		0xc001103b
+#define MSR_AMD64_ICIBSEXTDCTL		0xc001103c
 #define MSR_AMD64_IBSOPDATA4		0xc001103d
 #define MSR_AMD64_IBS_REG_COUNT_MAX	8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_SVM_AVIC_DOORBELL	0xc001011b
+#define MSR_AMD64_VM_PAGE_FLUSH		0xc001011e
+#define MSR_AMD64_VIRT_SPEC_CTRL	0xc001011f
+#define MSR_AMD64_SEV_ES_GHCB		0xc0010130
 #define MSR_AMD64_SEV			0xc0010131
 #define MSR_AMD64_SEV_ENABLED_BIT	0
 #define MSR_AMD64_SEV_ENABLED		BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
-
-#define MSR_AMD64_VIRT_SPEC_CTRL	0xc001011f
+#define MSR_AMD64_SEV_ES_ENABLED_BIT	1
+#define MSR_AMD64_SEV_ES_ENABLED	BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
+#define MSR_AMD64_SEV_SNP_ENABLED_BIT	2
+#define MSR_AMD64_SEV_SNP_ENABLED	BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
+#define MSR_AMD64_SNP_VTOM_BIT		3
+#define MSR_AMD64_SNP_VTOM		BIT_ULL(MSR_AMD64_SNP_VTOM_BIT)
+#define MSR_AMD64_SNP_REFLECT_VC_BIT	4
+#define MSR_AMD64_SNP_REFLECT_VC	BIT_ULL(MSR_AMD64_SNP_REFLECT_VC_BIT)
+#define MSR_AMD64_SNP_RESTRICTED_INJ_BIT 5
+#define MSR_AMD64_SNP_RESTRICTED_INJ	BIT_ULL(MSR_AMD64_SNP_RESTRICTED_INJ_BIT)
+#define MSR_AMD64_SNP_ALT_INJ_BIT	6
+#define MSR_AMD64_SNP_ALT_INJ		BIT_ULL(MSR_AMD64_SNP_ALT_INJ_BIT)
+#define MSR_AMD64_SNP_DEBUG_SWAP_BIT	7
+#define MSR_AMD64_SNP_DEBUG_SWAP	BIT_ULL(MSR_AMD64_SNP_DEBUG_SWAP_BIT)
+#define MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT 8
+#define MSR_AMD64_SNP_PREVENT_HOST_IBS	BIT_ULL(MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT)
+#define MSR_AMD64_SNP_BTB_ISOLATION_BIT	9
+#define MSR_AMD64_SNP_BTB_ISOLATION	BIT_ULL(MSR_AMD64_SNP_BTB_ISOLATION_BIT)
+#define MSR_AMD64_SNP_VMPL_SSS_BIT	10
+#define MSR_AMD64_SNP_VMPL_SSS		BIT_ULL(MSR_AMD64_SNP_VMPL_SSS_BIT)
+#define MSR_AMD64_SNP_SECURE_TSC_BIT	11
+#define MSR_AMD64_SNP_SECURE_TSC	BIT_ULL(MSR_AMD64_SNP_SECURE_TSC_BIT)
+#define MSR_AMD64_SNP_VMGEXIT_PARAM_BIT	12
+#define MSR_AMD64_SNP_VMGEXIT_PARAM	BIT_ULL(MSR_AMD64_SNP_VMGEXIT_PARAM_BIT)
+#define MSR_AMD64_SNP_RESERVED_BIT13	BIT_ULL(13)
+#define MSR_AMD64_SNP_IBS_VIRT_BIT	14
+#define MSR_AMD64_SNP_IBS_VIRT		BIT_ULL(MSR_AMD64_SNP_IBS_VIRT_BIT)
+#define MSR_AMD64_SNP_RESERVED_BIT15	BIT_ULL(15)
+#define MSR_AMD64_SNP_VMSA_REG_PROT_BIT	16
+#define MSR_AMD64_SNP_VMSA_REG_PROT	BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT)
+#define MSR_AMD64_SNP_SMT_PROT_BIT	17
+#define MSR_AMD64_SNP_SMT_PROT		BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
+#define MSR_AMD64_SNP_SECURE_AVIC_BIT	18
+#define MSR_AMD64_SNP_SECURE_AVIC	BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT)
+#define MSR_AMD64_SNP_RESV_BIT		19
+#define MSR_AMD64_SNP_RESERVED_MASK	GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
+#define MSR_AMD64_SAVIC_CONTROL		0xc0010138
+#define MSR_AMD64_SAVIC_EN_BIT		0
+#define MSR_AMD64_SAVIC_EN		BIT_ULL(MSR_AMD64_SAVIC_EN_BIT)
+#define MSR_AMD64_SAVIC_ALLOWEDNMI_BIT	1
+#define MSR_AMD64_SAVIC_ALLOWEDNMI	BIT_ULL(MSR_AMD64_SAVIC_ALLOWEDNMI_BIT)
+#define MSR_AMD64_RMP_BASE		0xc0010132
+#define MSR_AMD64_RMP_END		0xc0010133
+#define MSR_AMD64_RMP_CFG		0xc0010136
+#define MSR_AMD64_SEG_RMP_ENABLED_BIT	0
+#define MSR_AMD64_SEG_RMP_ENABLED	BIT_ULL(MSR_AMD64_SEG_RMP_ENABLED_BIT)
+#define MSR_AMD64_RMP_SEGMENT_SHIFT(x)	(((x) & GENMASK_ULL(13, 8)) >> 8)
+
+#define MSR_SVSM_CAA			0xc001f000
+
+/* AMD Collaborative Processor Performance Control MSRs */
+#define MSR_AMD_CPPC_CAP1		0xc00102b0
+#define MSR_AMD_CPPC_ENABLE		0xc00102b1
+#define MSR_AMD_CPPC_CAP2		0xc00102b2
+#define MSR_AMD_CPPC_REQ		0xc00102b3
+#define MSR_AMD_CPPC_STATUS		0xc00102b4
+
+/* Masks for use with MSR_AMD_CPPC_CAP1 */
+#define AMD_CPPC_LOWEST_PERF_MASK	GENMASK(7, 0)
+#define AMD_CPPC_LOWNONLIN_PERF_MASK	GENMASK(15, 8)
+#define AMD_CPPC_NOMINAL_PERF_MASK	GENMASK(23, 16)
+#define AMD_CPPC_HIGHEST_PERF_MASK	GENMASK(31, 24)
+
+/* Masks for use with MSR_AMD_CPPC_REQ */
+#define AMD_CPPC_MAX_PERF_MASK		GENMASK(7, 0)
+#define AMD_CPPC_MIN_PERF_MASK		GENMASK(15, 8)
+#define AMD_CPPC_DES_PERF_MASK		GENMASK(23, 16)
+#define AMD_CPPC_EPP_PERF_MASK		GENMASK(31, 24)
+
+/* AMD Performance Counter Global Status and Control MSRs */
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS	0xc0000300
+#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL		0xc0000301
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR	0xc0000302
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET	0xc0000303
+
+/* AMD Hardware Feedback Support MSRs */
+#define MSR_AMD_WORKLOAD_CLASS_CONFIG		0xc0000500
+#define MSR_AMD_WORKLOAD_CLASS_ID		0xc0000501
+#define MSR_AMD_WORKLOAD_HRST			0xc0000502
+
+/* AMD Last Branch Record MSRs */
+#define MSR_AMD64_LBR_SELECT			0xc000010e
+
+/* Zen4 */
+#define MSR_ZEN4_BP_CFG                 0xc001102e
+#define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4
+#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5
+
+/* Fam 19h MSRs */
+#define MSR_F19H_UMC_PERF_CTL           0xc0010800
+#define MSR_F19H_UMC_PERF_CTR           0xc0010801
+
+/* Zen 2 */
+#define MSR_ZEN2_SPECTRAL_CHICKEN       0xc00110e3
+#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT   BIT_ULL(1)
 
 /* Fam 17h MSRs */
 #define MSR_F17H_IRPERF			0xc00000e9
@@ -406,6 +809,8 @@
 #define MSR_F16H_DR0_ADDR_MASK		0xc0011027
 
 /* Fam 15h MSRs */
+#define MSR_F15H_CU_PWR_ACCUMULATOR     0xc001007a
+#define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b
 #define MSR_F15H_PERF_CTL		0xc0010200
 #define MSR_F15H_PERF_CTL0		MSR_F15H_PERF_CTL
 #define MSR_F15H_PERF_CTL1		(MSR_F15H_PERF_CTL + 2)
@@ -436,16 +841,20 @@
 #define FAM10H_MMIO_CONF_BASE_MASK	0xfffffffULL
 #define FAM10H_MMIO_CONF_BASE_SHIFT	20
 #define MSR_FAM10H_NODE_ID		0xc001100c
-#define MSR_F10H_DECFG			0xc0011029
-#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT	1
-#define MSR_F10H_DECFG_LFENCE_SERIALIZE		BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
 
 /* K8 MSRs */
 #define MSR_K8_TOP_MEM1			0xc001001a
 #define MSR_K8_TOP_MEM2			0xc001001d
-#define MSR_K8_SYSCFG			0xc0010010
-#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT	23
-#define MSR_K8_SYSCFG_MEM_ENCRYPT	BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_AMD64_SYSCFG		0xc0010010
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT	BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_AMD64_SYSCFG_SNP_EN_BIT	24
+#define MSR_AMD64_SYSCFG_SNP_EN		BIT_ULL(MSR_AMD64_SYSCFG_SNP_EN_BIT)
+#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT 25
+#define MSR_AMD64_SYSCFG_SNP_VMPL_EN	BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT)
+#define MSR_AMD64_SYSCFG_MFDM_BIT	19
+#define MSR_AMD64_SYSCFG_MFDM		BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT)
+
 #define MSR_K8_INT_PENDING_MSG		0xc0010055
 /* C1E active bits in int pending message */
 #define K8_INTP_C1E_ACTIVE_MASK		0x18000000
@@ -468,8 +877,13 @@
 #define MSR_K7_HWCR			0xc0010015
 #define MSR_K7_HWCR_SMMLOCK_BIT		0
 #define MSR_K7_HWCR_SMMLOCK		BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
+#define MSR_K7_HWCR_IRPERF_EN_BIT	30
+#define MSR_K7_HWCR_IRPERF_EN		BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT)
+#define MSR_K7_HWCR_CPUID_USER_DIS_BIT	35
 #define MSR_K7_FID_VID_CTL		0xc0010041
 #define MSR_K7_FID_VID_STATUS		0xc0010042
+#define MSR_K7_HWCR_CPB_DIS_BIT		25
+#define MSR_K7_HWCR_CPB_DIS		BIT_ULL(MSR_K7_HWCR_CPB_DIS_BIT)
 
 /* K6 MSRs */
 #define MSR_K6_WHCR			0xc0000082
@@ -514,38 +928,55 @@
 #define MSR_IA32_EBL_CR_POWERON		0x0000002a
 #define MSR_EBC_FREQUENCY_ID		0x0000002c
 #define MSR_SMI_COUNT			0x00000034
-#define MSR_IA32_FEATURE_CONTROL        0x0000003a
+
+/* Referred to as IA32_FEATURE_CONTROL in Intel's SDM. */
+#define MSR_IA32_FEAT_CTL		0x0000003a
+#define FEAT_CTL_LOCKED				BIT(0)
+#define FEAT_CTL_VMX_ENABLED_INSIDE_SMX		BIT(1)
+#define FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX	BIT(2)
+#define FEAT_CTL_SGX_LC_ENABLED			BIT(17)
+#define FEAT_CTL_SGX_ENABLED			BIT(18)
+#define FEAT_CTL_LMCE_ENABLED			BIT(20)
+
 #define MSR_IA32_TSC_ADJUST             0x0000003b
 #define MSR_IA32_BNDCFGS		0x00000d90
 
 #define MSR_IA32_BNDCFGS_RSVD		0x00000ffc
 
+#define MSR_IA32_XFD			0x000001c4
+#define MSR_IA32_XFD_ERR		0x000001c5
 #define MSR_IA32_XSS			0x00000da0
 
-#define FEATURE_CONTROL_LOCKED				(1<<0)
-#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX	(1<<1)
-#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX	(1<<2)
-#define FEATURE_CONTROL_LMCE				(1<<20)
-
 #define MSR_IA32_APICBASE		0x0000001b
 #define MSR_IA32_APICBASE_BSP		(1<<8)
 #define MSR_IA32_APICBASE_ENABLE	(1<<11)
 #define MSR_IA32_APICBASE_BASE		(0xfffff<<12)
 
-#define MSR_IA32_TSCDEADLINE		0x000006e0
-
 #define MSR_IA32_UCODE_WRITE		0x00000079
+
+#define MSR_IA32_MCU_ENUMERATION	0x0000007b
+#define MCU_STAGING			BIT(4)
+
 #define MSR_IA32_UCODE_REV		0x0000008b
 
+/* Intel SGX Launch Enclave Public Key Hash MSRs */
+#define MSR_IA32_SGXLEPUBKEYHASH0	0x0000008C
+#define MSR_IA32_SGXLEPUBKEYHASH1	0x0000008D
+#define MSR_IA32_SGXLEPUBKEYHASH2	0x0000008E
+#define MSR_IA32_SGXLEPUBKEYHASH3	0x0000008F
+
 #define MSR_IA32_SMM_MONITOR_CTL	0x0000009b
 #define MSR_IA32_SMBASE			0x0000009e
 
 #define MSR_IA32_PERF_STATUS		0x00000198
 #define MSR_IA32_PERF_CTL		0x00000199
 #define INTEL_PERF_CTL_MASK		0xffff
-#define MSR_AMD_PSTATE_DEF_BASE		0xc0010064
-#define MSR_AMD_PERF_STATUS		0xc0010063
-#define MSR_AMD_PERF_CTL		0xc0010062
+
+/* AMD Branch Sampling configuration */
+#define MSR_AMD_DBG_EXTN_CFG		0xc000010f
+#define MSR_AMD_SAMP_BR_FROM		0xc0010300
+
+#define DBG_EXTN_CFG_LBRV2EN		BIT_ULL(6)
 
 #define MSR_IA32_MPERF			0x000000e7
 #define MSR_IA32_APERF			0x000000e8
@@ -577,6 +1008,7 @@
 #define ENERGY_PERF_BIAS_PERFORMANCE		0
 #define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE	4
 #define ENERGY_PERF_BIAS_NORMAL			6
+#define ENERGY_PERF_BIAS_NORMAL_POWERSAVE	7
 #define ENERGY_PERF_BIAS_BALANCE_POWERSAVE	8
 #define ENERGY_PERF_BIAS_POWERSAVE		15
 
@@ -584,12 +1016,14 @@
 
 #define PACKAGE_THERM_STATUS_PROCHOT		(1 << 0)
 #define PACKAGE_THERM_STATUS_POWER_LIMIT	(1 << 10)
+#define PACKAGE_THERM_STATUS_HFI_UPDATED	(1 << 26)
 
 #define MSR_IA32_PACKAGE_THERM_INTERRUPT	0x000001b2
 
 #define PACKAGE_THERM_INT_HIGH_ENABLE		(1 << 0)
 #define PACKAGE_THERM_INT_LOW_ENABLE		(1 << 1)
 #define PACKAGE_THERM_INT_PLN_ENABLE		(1 << 24)
+#define PACKAGE_THERM_INT_HFI_ENABLE		(1 << 25)
 
 /* Thermal Thresholds Support */
 #define THERM_INT_THRESHOLD0_ENABLE    (1 << 15)
@@ -666,6 +1100,16 @@
 
 #define MSR_IA32_TSC_DEADLINE		0x000006E0
 
+
+#define MSR_TSX_FORCE_ABORT		0x0000010F
+
+#define MSR_TFA_RTM_FORCE_ABORT_BIT	0
+#define MSR_TFA_RTM_FORCE_ABORT		BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT)
+#define MSR_TFA_TSX_CPUID_CLEAR_BIT	1
+#define MSR_TFA_TSX_CPUID_CLEAR		BIT_ULL(MSR_TFA_TSX_CPUID_CLEAR_BIT)
+#define MSR_TFA_SDV_ENABLE_RTM_BIT	2
+#define MSR_TFA_SDV_ENABLE_RTM		BIT_ULL(MSR_TFA_SDV_ENABLE_RTM_BIT)
+
 /* P4/Xeon+ specific */
 #define MSR_IA32_MCG_EAX		0x00000180
 #define MSR_IA32_MCG_EBX		0x00000181
@@ -770,11 +1214,22 @@
 #define MSR_CORE_PERF_FIXED_CTR0	0x00000309
 #define MSR_CORE_PERF_FIXED_CTR1	0x0000030a
 #define MSR_CORE_PERF_FIXED_CTR2	0x0000030b
+#define MSR_CORE_PERF_FIXED_CTR3	0x0000030c
 #define MSR_CORE_PERF_FIXED_CTR_CTRL	0x0000038d
 #define MSR_CORE_PERF_GLOBAL_STATUS	0x0000038e
 #define MSR_CORE_PERF_GLOBAL_CTRL	0x0000038f
 #define MSR_CORE_PERF_GLOBAL_OVF_CTRL	0x00000390
 
+#define MSR_PERF_METRICS		0x00000329
+
+/* PERF_GLOBAL_OVF_CTL bits */
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT	55
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI		(1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT)
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT		62
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF			(1ULL <<  MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT)
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT		63
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD			(1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT)
+
 /* Geode defined MSRs */
 #define MSR_GEODE_BUSCONT_CONF0		0x00001900
 
@@ -797,24 +1252,48 @@
 #define MSR_IA32_VMX_TRUE_EXIT_CTLS      0x0000048f
 #define MSR_IA32_VMX_TRUE_ENTRY_CTLS     0x00000490
 #define MSR_IA32_VMX_VMFUNC             0x00000491
+#define MSR_IA32_VMX_PROCBASED_CTLS3	0x00000492
+
+#define MSR_IA32_MCU_STAGING_MBOX_ADDR	0x000007a5
+
+/* Resctrl MSRs: */
+/* - Intel: */
+#define MSR_IA32_L3_QOS_CFG		0xc81
+#define MSR_IA32_L2_QOS_CFG		0xc82
+#define MSR_IA32_QM_EVTSEL		0xc8d
+#define MSR_IA32_QM_CTR			0xc8e
+#define MSR_IA32_PQR_ASSOC		0xc8f
+#define MSR_IA32_L3_CBM_BASE		0xc90
+#define MSR_RMID_SNC_CONFIG		0xca0
+#define MSR_IA32_L2_CBM_BASE		0xd10
+#define MSR_IA32_MBA_THRTL_BASE		0xd50
+
+/* - AMD: */
+#define MSR_IA32_MBA_BW_BASE		0xc0000200
+#define MSR_IA32_SMBA_BW_BASE		0xc0000280
+#define MSR_IA32_L3_QOS_ABMC_CFG	0xc00003fd
+#define MSR_IA32_L3_QOS_EXT_CFG		0xc00003ff
+#define MSR_IA32_EVT_CFG_BASE		0xc0000400
 
-/* VMX_BASIC bits and bitmasks */
-#define VMX_BASIC_VMCS_SIZE_SHIFT	32
-#define VMX_BASIC_TRUE_CTLS		(1ULL << 55)
-#define VMX_BASIC_64		0x0001000000000000LLU
-#define VMX_BASIC_MEM_TYPE_SHIFT	50
-#define VMX_BASIC_MEM_TYPE_MASK	0x003c000000000000LLU
-#define VMX_BASIC_MEM_TYPE_WB	6LLU
-#define VMX_BASIC_INOUT		0x0040000000000000LLU
-
-/* MSR_IA32_VMX_MISC bits */
-#define MSR_IA32_VMX_MISC_INTEL_PT                 (1ULL << 14)
-#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
-#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE   0x1F
 /* AMD-V MSRs */
-
 #define MSR_VM_CR                       0xc0010114
 #define MSR_VM_IGNNE                    0xc0010115
 #define MSR_VM_HSAVE_PA                 0xc0010117
 
+#define SVM_VM_CR_VALID_MASK		0x001fULL
+#define SVM_VM_CR_SVM_LOCK_MASK		0x0008ULL
+#define SVM_VM_CR_SVM_DIS_MASK		0x0010ULL
+
+/* Hardware Feedback Interface */
+#define MSR_IA32_HW_FEEDBACK_PTR        0x17d0
+#define MSR_IA32_HW_FEEDBACK_CONFIG     0x17d1
+
+/* x2APIC locked status */
+#define MSR_IA32_XAPIC_DISABLE_STATUS	0xBD
+#define LEGACY_XAPIC_DISABLED		BIT(0) /*
+						* x2APIC mode is locked and
+						* disabling x2APIC will cause
+						* a #GP
+						*/
+
 #endif /* _ASM_X86_MSR_INDEX_H */
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 91e4cf189914..9c2ea29e12a9 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -4,28 +4,22 @@
 
 #include "msr-index.h"
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <asm/asm.h>
 #include <asm/errno.h>
 #include <asm/cpumask.h>
 #include <uapi/asm/msr.h>
+#include <asm/shared/msr.h>
 
-struct msr {
-	union {
-		struct {
-			u32 l;
-			u32 h;
-		};
-		u64 q;
-	};
-};
+#include <linux/types.h>
+#include <linux/percpu.h>
 
 struct msr_info {
-	u32 msr_no;
-	struct msr reg;
-	struct msr *msrs;
-	int err;
+	u32			msr_no;
+	struct msr		reg;
+	struct msr __percpu	*msrs;
+	int			err;
 };
 
 struct msr_regs_info {
@@ -44,41 +38,22 @@ struct saved_msrs {
 };
 
 /*
- * both i386 and x86_64 returns 64-bit value in edx:eax, but gcc's "A"
- * constraint has different meanings. For i386, "A" means exactly
- * edx:eax, while for x86_64 it doesn't mean rdx:rax or edx:eax. Instead,
- * it means rax *or* rdx.
- */
-#ifdef CONFIG_X86_64
-/* Using 64-bit values saves one instruction clearing the high half of low */
-#define DECLARE_ARGS(val, low, high)	unsigned long low, high
-#define EAX_EDX_VAL(val, low, high)	((low) | (high) << 32)
-#define EAX_EDX_RET(val, low, high)	"=a" (low), "=d" (high)
-#else
-#define DECLARE_ARGS(val, low, high)	unsigned long long val
-#define EAX_EDX_VAL(val, low, high)	(val)
-#define EAX_EDX_RET(val, low, high)	"=A" (val)
-#endif
-
-#ifdef CONFIG_TRACEPOINTS
-/*
  * Be very careful with includes. This header is prone to include loops.
  */
 #include <asm/atomic.h>
 #include <linux/tracepoint-defs.h>
 
-extern struct tracepoint __tracepoint_read_msr;
-extern struct tracepoint __tracepoint_write_msr;
-extern struct tracepoint __tracepoint_rdpmc;
-#define msr_tracepoint_active(t) static_key_false(&(t).key)
-extern void do_trace_write_msr(unsigned int msr, u64 val, int failed);
-extern void do_trace_read_msr(unsigned int msr, u64 val, int failed);
-extern void do_trace_rdpmc(unsigned int msr, u64 val, int failed);
+#ifdef CONFIG_TRACEPOINTS
+DECLARE_TRACEPOINT(read_msr);
+DECLARE_TRACEPOINT(write_msr);
+DECLARE_TRACEPOINT(rdpmc);
+extern void do_trace_write_msr(u32 msr, u64 val, int failed);
+extern void do_trace_read_msr(u32 msr, u64 val, int failed);
+extern void do_trace_rdpmc(u32 msr, u64 val, int failed);
 #else
-#define msr_tracepoint_active(t) false
-static inline void do_trace_write_msr(unsigned int msr, u64 val, int failed) {}
-static inline void do_trace_read_msr(unsigned int msr, u64 val, int failed) {}
-static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {}
+static inline void do_trace_write_msr(u32 msr, u64 val, int failed) {}
+static inline void do_trace_read_msr(u32 msr, u64 val, int failed) {}
+static inline void do_trace_rdpmc(u32 msr, u64 val, int failed) {}
 #endif
 
 /*
@@ -88,24 +63,24 @@ static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {}
  * think of extending them - you will be slapped with a stinking trout or a frozen
  * shark will reach you, wherever you are! You've been warned.
  */
-static inline unsigned long long notrace __rdmsr(unsigned int msr)
+static __always_inline u64 __rdmsr(u32 msr)
 {
-	DECLARE_ARGS(val, low, high);
+	EAX_EDX_DECLARE_ARGS(val, low, high);
 
 	asm volatile("1: rdmsr\n"
 		     "2:\n"
-		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe)
+		     _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR)
 		     : EAX_EDX_RET(val, low, high) : "c" (msr));
 
 	return EAX_EDX_VAL(val, low, high);
 }
 
-static inline void notrace __wrmsr(unsigned int msr, u32 low, u32 high)
+static __always_inline void __wrmsrq(u32 msr, u64 val)
 {
 	asm volatile("1: wrmsr\n"
 		     "2:\n"
-		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe)
-		     : : "c" (msr), "a"(low), "d" (high) : "memory");
+		     _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
+		     : : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)) : "memory");
 }
 
 #define native_rdmsr(msr, val1, val2)			\
@@ -115,129 +90,81 @@ do {							\
 	(void)((val2) = (u32)(__val >> 32));		\
 } while (0)
 
+static __always_inline u64 native_rdmsrq(u32 msr)
+{
+	return __rdmsr(msr);
+}
+
 #define native_wrmsr(msr, low, high)			\
-	__wrmsr(msr, low, high)
+	__wrmsrq((msr), (u64)(high) << 32 | (low))
 
-#define native_wrmsrl(msr, val)				\
-	__wrmsr((msr), (u32)((u64)(val)),		\
-		       (u32)((u64)(val) >> 32))
+#define native_wrmsrq(msr, val)				\
+	__wrmsrq((msr), (val))
 
-static inline unsigned long long native_read_msr(unsigned int msr)
+static inline u64 native_read_msr(u32 msr)
 {
-	unsigned long long val;
+	u64 val;
 
 	val = __rdmsr(msr);
 
-	if (msr_tracepoint_active(__tracepoint_read_msr))
+	if (tracepoint_enabled(read_msr))
 		do_trace_read_msr(msr, val, 0);
 
 	return val;
 }
 
-static inline unsigned long long native_read_msr_safe(unsigned int msr,
-						      int *err)
+static inline int native_read_msr_safe(u32 msr, u64 *p)
 {
-	DECLARE_ARGS(val, low, high);
-
-	asm volatile("2: rdmsr ; xor %[err],%[err]\n"
-		     "1:\n\t"
-		     ".section .fixup,\"ax\"\n\t"
-		     "3: mov %[fault],%[err]\n\t"
-		     "xorl %%eax, %%eax\n\t"
-		     "xorl %%edx, %%edx\n\t"
-		     "jmp 1b\n\t"
-		     ".previous\n\t"
-		     _ASM_EXTABLE(2b, 3b)
-		     : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
-		     : "c" (msr), [fault] "i" (-EIO));
-	if (msr_tracepoint_active(__tracepoint_read_msr))
-		do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err);
-	return EAX_EDX_VAL(val, low, high);
+	int err;
+	EAX_EDX_DECLARE_ARGS(val, low, high);
+
+	asm volatile("1: rdmsr ; xor %[err],%[err]\n"
+		     "2:\n\t"
+		     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_RDMSR_SAFE, %[err])
+		     : [err] "=r" (err), EAX_EDX_RET(val, low, high)
+		     : "c" (msr));
+	if (tracepoint_enabled(read_msr))
+		do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), err);
+
+	*p = EAX_EDX_VAL(val, low, high);
+
+	return err;
 }
 
 /* Can be uninlined because referenced by paravirt */
-static inline void notrace
-native_write_msr(unsigned int msr, u32 low, u32 high)
+static inline void notrace native_write_msr(u32 msr, u64 val)
 {
-	__wrmsr(msr, low, high);
+	native_wrmsrq(msr, val);
 
-	if (msr_tracepoint_active(__tracepoint_write_msr))
-		do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
+	if (tracepoint_enabled(write_msr))
+		do_trace_write_msr(msr, val, 0);
 }
 
 /* Can be uninlined because referenced by paravirt */
-static inline int notrace
-native_write_msr_safe(unsigned int msr, u32 low, u32 high)
+static inline int notrace native_write_msr_safe(u32 msr, u64 val)
 {
 	int err;
 
-	asm volatile("2: wrmsr ; xor %[err],%[err]\n"
-		     "1:\n\t"
-		     ".section .fixup,\"ax\"\n\t"
-		     "3:  mov %[fault],%[err] ; jmp 1b\n\t"
-		     ".previous\n\t"
-		     _ASM_EXTABLE(2b, 3b)
+	asm volatile("1: wrmsr ; xor %[err],%[err]\n"
+		     "2:\n\t"
+		     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_WRMSR_SAFE, %[err])
 		     : [err] "=a" (err)
-		     : "c" (msr), "0" (low), "d" (high),
-		       [fault] "i" (-EIO)
+		     : "c" (msr), "0" ((u32)val), "d" ((u32)(val >> 32))
 		     : "memory");
-	if (msr_tracepoint_active(__tracepoint_write_msr))
-		do_trace_write_msr(msr, ((u64)high << 32 | low), err);
+	if (tracepoint_enabled(write_msr))
+		do_trace_write_msr(msr, val, err);
 	return err;
 }
 
 extern int rdmsr_safe_regs(u32 regs[8]);
 extern int wrmsr_safe_regs(u32 regs[8]);
 
-/**
- * rdtsc() - returns the current TSC without ordering constraints
- *
- * rdtsc() returns the result of RDTSC as a 64-bit integer.  The
- * only ordering constraint it supplies is the ordering implied by
- * "asm volatile": it will put the RDTSC in the place you expect.  The
- * CPU can and will speculatively execute that RDTSC, though, so the
- * results can be non-monotonic if compared on different CPUs.
- */
-static __always_inline unsigned long long rdtsc(void)
+static inline u64 native_read_pmc(int counter)
 {
-	DECLARE_ARGS(val, low, high);
-
-	asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));
-
-	return EAX_EDX_VAL(val, low, high);
-}
-
-/**
- * rdtsc_ordered() - read the current TSC in program order
- *
- * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer.
- * It is ordered like a load to a global in-memory counter.  It should
- * be impossible to observe non-monotonic rdtsc_unordered() behavior
- * across multiple CPUs as long as the TSC is synced.
- */
-static __always_inline unsigned long long rdtsc_ordered(void)
-{
-	/*
-	 * The RDTSC instruction is not ordered relative to memory
-	 * access.  The Intel SDM and the AMD APM are both vague on this
-	 * point, but empirically an RDTSC instruction can be
-	 * speculatively executed before prior loads.  An RDTSC
-	 * immediately after an appropriate barrier appears to be
-	 * ordered as a normal load, that is, it provides the same
-	 * ordering guarantees as reading from a global memory location
-	 * that some other imaginary CPU is updating continuously with a
-	 * time stamp.
-	 */
-	barrier_nospec();
-	return rdtsc();
-}
-
-static inline unsigned long long native_read_pmc(int counter)
-{
-	DECLARE_ARGS(val, low, high);
+	EAX_EDX_DECLARE_ARGS(val, low, high);
 
 	asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter));
-	if (msr_tracepoint_active(__tracepoint_rdpmc))
+	if (tracepoint_enabled(rdpmc))
 		do_trace_rdpmc(counter, EAX_EDX_VAL(val, low, high), 0);
 	return EAX_EDX_VAL(val, low, high);
 }
@@ -259,82 +186,86 @@ do {								\
 	(void)((high) = (u32)(__val >> 32));			\
 } while (0)
 
-static inline void wrmsr(unsigned int msr, u32 low, u32 high)
+static inline void wrmsr(u32 msr, u32 low, u32 high)
 {
-	native_write_msr(msr, low, high);
+	native_write_msr(msr, (u64)high << 32 | low);
 }
 
-#define rdmsrl(msr, val)			\
+#define rdmsrq(msr, val)			\
 	((val) = native_read_msr((msr)))
 
-static inline void wrmsrl(unsigned int msr, u64 val)
+static inline void wrmsrq(u32 msr, u64 val)
 {
-	native_write_msr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32));
+	native_write_msr(msr, val);
 }
 
 /* wrmsr with exception handling */
-static inline int wrmsr_safe(unsigned int msr, u32 low, u32 high)
+static inline int wrmsrq_safe(u32 msr, u64 val)
 {
-	return native_write_msr_safe(msr, low, high);
+	return native_write_msr_safe(msr, val);
 }
 
 /* rdmsr with exception handling */
 #define rdmsr_safe(msr, low, high)				\
 ({								\
-	int __err;						\
-	u64 __val = native_read_msr_safe((msr), &__err);	\
+	u64 __val;						\
+	int __err = native_read_msr_safe((msr), &__val);	\
 	(*low) = (u32)__val;					\
 	(*high) = (u32)(__val >> 32);				\
 	__err;							\
 })
 
-static inline int rdmsrl_safe(unsigned int msr, unsigned long long *p)
+static inline int rdmsrq_safe(u32 msr, u64 *p)
 {
-	int err;
+	return native_read_msr_safe(msr, p);
+}
 
-	*p = native_read_msr_safe(msr, &err);
-	return err;
+static __always_inline u64 rdpmc(int counter)
+{
+	return native_read_pmc(counter);
 }
 
-#define rdpmc(counter, low, high)			\
-do {							\
-	u64 _l = native_read_pmc((counter));		\
-	(low)  = (u32)_l;				\
-	(high) = (u32)(_l >> 32);			\
-} while (0)
+#endif	/* !CONFIG_PARAVIRT_XXL */
 
-#define rdpmcl(counter, val) ((val) = native_read_pmc(counter))
+/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */
+#define ASM_WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6)
 
-#endif	/* !CONFIG_PARAVIRT_XXL */
+/* Non-serializing WRMSR, when available.  Falls back to a serializing WRMSR. */
+static __always_inline void wrmsrns(u32 msr, u64 val)
+{
+	/*
+	 * WRMSR is 2 bytes.  WRMSRNS is 3 bytes.  Pad WRMSR with a redundant
+	 * DS prefix to avoid a trailing NOP.
+	 */
+	asm volatile("1: " ALTERNATIVE("ds wrmsr", ASM_WRMSRNS, X86_FEATURE_WRMSRNS)
+		     "2: " _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
+		     : : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)));
+}
 
 /*
- * 64-bit version of wrmsr_safe():
+ * Dual u32 version of wrmsrq_safe():
  */
-static inline int wrmsrl_safe(u32 msr, u64 val)
+static inline int wrmsr_safe(u32 msr, u32 low, u32 high)
 {
-	return wrmsr_safe(msr, (u32)val,  (u32)(val >> 32));
+	return wrmsrq_safe(msr, (u64)high << 32 | low);
 }
 
-#define write_tsc(low, high) wrmsr(MSR_IA32_TSC, (low), (high))
-
-#define write_rdtscp_aux(val) wrmsr(MSR_TSC_AUX, (val), 0)
-
-struct msr *msrs_alloc(void);
-void msrs_free(struct msr *msrs);
+struct msr __percpu *msrs_alloc(void);
+void msrs_free(struct msr __percpu *msrs);
 int msr_set_bit(u32 msr, u8 bit);
 int msr_clear_bit(u32 msr, u8 bit);
 
 #ifdef CONFIG_SMP
 int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
 int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
-int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
-int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
-void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs);
-void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs);
+int rdmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
+int wrmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
+void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs);
+void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs);
 int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
 int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
-int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
-int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
+int rdmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
+int wrmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
 int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]);
 int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]);
 #else  /*  CONFIG_SMP  */
@@ -348,25 +279,25 @@ static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
 	wrmsr(msr_no, l, h);
 	return 0;
 }
-static inline int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
+static inline int rdmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
 {
-	rdmsrl(msr_no, *q);
+	rdmsrq(msr_no, *q);
 	return 0;
 }
-static inline int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
+static inline int wrmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
 {
-	wrmsrl(msr_no, q);
+	wrmsrq(msr_no, q);
 	return 0;
 }
 static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no,
-				struct msr *msrs)
+				struct msr __percpu *msrs)
 {
-	rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h));
+	rdmsr_on_cpu(0, msr_no, raw_cpu_ptr(&msrs->l), raw_cpu_ptr(&msrs->h));
 }
 static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no,
-				struct msr *msrs)
+				struct msr __percpu *msrs)
 {
-	wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h);
+	wrmsr_on_cpu(0, msr_no, raw_cpu_read(msrs->l), raw_cpu_read(msrs->h));
 }
 static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no,
 				    u32 *l, u32 *h)
@@ -377,13 +308,13 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
 {
 	return wrmsr_safe(msr_no, l, h);
 }
-static inline int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
+static inline int rdmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
 {
-	return rdmsrl_safe(msr_no, q);
+	return rdmsrq_safe(msr_no, q);
 }
-static inline int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
+static inline int wrmsrq_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
 {
-	return wrmsrl_safe(msr_no, q);
+	return wrmsrq_safe(msr_no, q);
 }
 static inline int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
 {
@@ -394,5 +325,11 @@ static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
 	return wrmsr_safe_regs(regs);
 }
 #endif  /* CONFIG_SMP */
-#endif /* __ASSEMBLY__ */
+
+/* Compatibility wrappers: */
+#define rdmsrl(msr, val) rdmsrq(msr, val)
+#define wrmsrl(msr, val) wrmsrq(msr, val)
+#define rdmsrl_on_cpu(cpu, msr, q) rdmsrq_on_cpu(cpu, msr, q)
+
+#endif /* __ASSEMBLER__ */
 #endif /* _ASM_X86_MSR_H */
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index dbff1456d215..76b95bd1a405 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -1,21 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.0+ */
 /*  Generic MTRR (Memory Type Range Register) ioctls.
 
     Copyright (C) 1997-1999  Richard Gooch
 
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Library General Public
-    License as published by the Free Software Foundation; either
-    version 2 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Library General Public License for more details.
-
-    You should have received a copy of the GNU Library General Public
-    License along with this library; if not, write to the Free
-    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
     Richard Gooch may be reached by email at  rgooch@atnf.csiro.au
     The postal address is:
       Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
@@ -23,15 +10,43 @@
 #ifndef _ASM_X86_MTRR_H
 #define _ASM_X86_MTRR_H
 
+#include <linux/bits.h>
 #include <uapi/asm/mtrr.h>
-#include <asm/pat.h>
 
+/* Defines for hardware MTRR registers. */
+#define MTRR_CAP_VCNT		GENMASK(7, 0)
+#define MTRR_CAP_FIX		BIT_MASK(8)
+#define MTRR_CAP_WC		BIT_MASK(10)
+
+#define MTRR_DEF_TYPE_TYPE	GENMASK(7, 0)
+#define MTRR_DEF_TYPE_FE	BIT_MASK(10)
+#define MTRR_DEF_TYPE_E		BIT_MASK(11)
+
+#define MTRR_DEF_TYPE_ENABLE	(MTRR_DEF_TYPE_FE | MTRR_DEF_TYPE_E)
+#define MTRR_DEF_TYPE_DISABLE	~(MTRR_DEF_TYPE_TYPE | MTRR_DEF_TYPE_ENABLE)
+
+#define MTRR_PHYSBASE_TYPE	GENMASK(7, 0)
+#define MTRR_PHYSBASE_RSVD	GENMASK(11, 8)
+
+#define MTRR_PHYSMASK_RSVD	GENMASK(10, 0)
+#define MTRR_PHYSMASK_V		BIT_MASK(11)
+
+struct mtrr_state_type {
+	struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
+	mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
+	unsigned char enabled;
+	bool have_fixed;
+	mtrr_type def_type;
+};
 
 /*
  * The following functions are for use by other drivers that cannot use
  * arch_phys_wc_add and arch_phys_wc_del.
  */
 # ifdef CONFIG_MTRR
+void mtrr_bp_init(void);
+void guest_force_mtrr_state(struct mtrr_var_range *var, unsigned int num_var,
+			    mtrr_type def_type);
 extern u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform);
 extern void mtrr_save_fixed_ranges(void *);
 extern void mtrr_save_state(void);
@@ -41,21 +56,27 @@ extern int mtrr_add_page(unsigned long base, unsigned long size,
 			 unsigned int type, bool increment);
 extern int mtrr_del(int reg, unsigned long base, unsigned long size);
 extern int mtrr_del_page(int reg, unsigned long base, unsigned long size);
-extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
-extern void mtrr_ap_init(void);
-extern void mtrr_bp_init(void);
-extern void set_mtrr_aps_delayed_init(void);
-extern void mtrr_aps_init(void);
-extern void mtrr_bp_restore(void);
 extern int mtrr_trim_uncached_memory(unsigned long end_pfn);
 extern int amd_special_default_mtrr(void);
+void mtrr_disable(void);
+void mtrr_enable(void);
+void mtrr_generic_set_state(void);
 #  else
+static inline void guest_force_mtrr_state(struct mtrr_var_range *var,
+					  unsigned int num_var,
+					  mtrr_type def_type)
+{
+}
+
 static inline u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform)
 {
 	/*
-	 * Return no-MTRRs:
+	 * Return the default MTRR type, without any known other types in
+	 * that range.
 	 */
-	return MTRR_TYPE_INVALID;
+	*uniform = 1;
+
+	return MTRR_TYPE_UNCACHABLE;
 }
 #define mtrr_save_fixed_ranges(arg) do {} while (0)
 #define mtrr_save_state() do {} while (0)
@@ -81,18 +102,10 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
 {
 	return 0;
 }
-static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
-{
-}
-static inline void mtrr_bp_init(void)
-{
-	pat_disable("MTRRs disabled, skipping PAT initialization too.");
-}
-
-#define mtrr_ap_init() do {} while (0)
-#define set_mtrr_aps_delayed_init() do {} while (0)
-#define mtrr_aps_init() do {} while (0)
-#define mtrr_bp_restore() do {} while (0)
+#define mtrr_bp_init() do {} while (0)
+#define mtrr_disable() do {} while (0)
+#define mtrr_enable() do {} while (0)
+#define mtrr_generic_set_state() do {} while (0)
 #  endif
 
 #ifdef CONFIG_COMPAT
@@ -127,7 +140,8 @@ struct mtrr_gentry32 {
 #endif /* CONFIG_COMPAT */
 
 /* Bit fields for enabled in struct mtrr_state_type */
-#define MTRR_STATE_MTRR_FIXED_ENABLED	0x01
-#define MTRR_STATE_MTRR_ENABLED		0x02
+#define MTRR_STATE_SHIFT		10
+#define MTRR_STATE_MTRR_FIXED_ENABLED	(MTRR_DEF_TYPE_FE >> MTRR_STATE_SHIFT)
+#define MTRR_STATE_MTRR_ENABLED		(MTRR_DEF_TYPE_E >> MTRR_STATE_SHIFT)
 
 #endif /* _ASM_X86_MTRR_H */
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 39a2fb29378a..e4815e15dc9a 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -6,43 +6,46 @@
 #include <linux/sched/idle.h>
 
 #include <asm/cpufeature.h>
+#include <asm/nospec-branch.h>
 
 #define MWAIT_SUBSTATE_MASK		0xf
 #define MWAIT_CSTATE_MASK		0xf
 #define MWAIT_SUBSTATE_SIZE		4
 #define MWAIT_HINT2CSTATE(hint)		(((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
 #define MWAIT_HINT2SUBSTATE(hint)	((hint) & MWAIT_CSTATE_MASK)
+#define MWAIT_C1_SUBSTATE_MASK  0xf0
 
-#define CPUID_MWAIT_LEAF		5
 #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
 #define CPUID5_ECX_INTERRUPT_BREAK	0x2
 
 #define MWAIT_ECX_INTERRUPT_BREAK	0x1
 #define MWAITX_ECX_TIMER_ENABLE		BIT(1)
-#define MWAITX_MAX_LOOPS		((u32)-1)
-#define MWAITX_DISABLE_CSTATES		0xf
+#define MWAITX_MAX_WAIT_CYCLES		UINT_MAX
+#define MWAITX_DISABLE_CSTATES		0xf0
+#define TPAUSE_C01_STATE		1
+#define TPAUSE_C02_STATE		0
 
-static inline void __monitor(const void *eax, unsigned long ecx,
-			     unsigned long edx)
+static __always_inline void __monitor(const void *eax, u32 ecx, u32 edx)
 {
-	/* "monitor %eax, %ecx, %edx;" */
-	asm volatile(".byte 0x0f, 0x01, 0xc8;"
-		     :: "a" (eax), "c" (ecx), "d"(edx));
+	/*
+	 * Use the instruction mnemonic with implicit operands, as the LLVM
+	 * assembler fails to assemble the mnemonic with explicit operands:
+	 */
+	asm volatile("monitor" :: "a" (eax), "c" (ecx), "d" (edx));
 }
 
-static inline void __monitorx(const void *eax, unsigned long ecx,
-			      unsigned long edx)
+static __always_inline void __monitorx(const void *eax, u32 ecx, u32 edx)
 {
-	/* "monitorx %eax, %ecx, %edx;" */
-	asm volatile(".byte 0x0f, 0x01, 0xfa;"
-		     :: "a" (eax), "c" (ecx), "d"(edx));
+	asm volatile("monitorx" :: "a" (eax), "c" (ecx), "d"(edx));
 }
 
-static inline void __mwait(unsigned long eax, unsigned long ecx)
+static __always_inline void __mwait(u32 eax, u32 ecx)
 {
-	/* "mwait %eax, %ecx;" */
-	asm volatile(".byte 0x0f, 0x01, 0xc9;"
-		     :: "a" (eax), "c" (ecx));
+	/*
+	 * Use the instruction mnemonic with implicit operands, as the LLVM
+	 * assembler fails to assemble the mnemonic with explicit operands:
+	 */
+	asm volatile("mwait" :: "a" (eax), "c" (ecx));
 }
 
 /*
@@ -71,20 +74,26 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
  * EAX                     (logical) address to monitor
  * ECX                     #GP if not zero
  */
-static inline void __mwaitx(unsigned long eax, unsigned long ebx,
-			    unsigned long ecx)
+static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx)
 {
-	/* "mwaitx %eax, %ebx, %ecx;" */
-	asm volatile(".byte 0x0f, 0x01, 0xfb;"
-		     :: "a" (eax), "b" (ebx), "c" (ecx));
+	/* No need for TSA buffer clearing on AMD */
+
+	asm volatile("mwaitx" :: "a" (eax), "b" (ebx), "c" (ecx));
 }
 
-static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+/*
+ * Re-enable interrupts right upon calling mwait in such a way that
+ * no interrupt can fire _before_ the execution of mwait, ie: no
+ * instruction must be placed between "sti" and "mwait".
+ *
+ * This is necessary because if an interrupt queues a timer before
+ * executing mwait, it would otherwise go unnoticed and the next tick
+ * would not be reprogrammed accordingly before mwait ever wakes up.
+ */
+static __always_inline void __sti_mwait(u32 eax, u32 ecx)
 {
-	trace_hardirqs_on();
-	/* "mwait %eax, %ecx;" */
-	asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
-		     :: "a" (eax), "c" (ecx));
+
+	asm volatile("sti; mwait" :: "a" (eax), "c" (ecx));
 }
 
 /*
@@ -97,20 +106,45 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
  * New with Core Duo processors, MWAIT can take some hints based on CPU
  * capability.
  */
-static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
+static __always_inline void mwait_idle_with_hints(u32 eax, u32 ecx)
 {
+	if (need_resched())
+		return;
+
+	x86_idle_clear_cpu_buffers();
+
 	if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) {
-		if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) {
-			mb();
-			clflush((void *)&current_thread_info()->flags);
-			mb();
-		}
+		const void *addr = &current_thread_info()->flags;
+
+		alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr));
+		__monitor(addr, 0, 0);
 
-		__monitor((void *)&current_thread_info()->flags, 0, 0);
-		if (!need_resched())
+		if (need_resched())
+			goto out;
+
+		if (ecx & 1) {
 			__mwait(eax, ecx);
+		} else {
+			__sti_mwait(eax, ecx);
+			raw_local_irq_disable();
+		}
 	}
+
+out:
 	current_clr_polling();
 }
 
+/*
+ * Caller can specify whether to enter C0.1 (low latency, less
+ * power saving) or C0.2 state (saves more power, but longer wakeup
+ * latency). This may be overridden by the IA32_UMWAIT_CONTROL MSR
+ * which can force requests for C0.2 to be downgraded to C0.1.
+ */
+static inline void __tpause(u32 ecx, u32 edx, u32 eax)
+{
+	/* "tpause %ecx" */
+	asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1"
+		     :: "c" (ecx), "d" (edx), "a" (eax));
+}
+
 #endif /* _ASM_X86_MWAIT_H */
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 75ded1d13d98..79d88d12c8fb 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -9,21 +9,31 @@
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
-extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
 extern int reserve_perfctr_nmi(unsigned int);
 extern void release_perfctr_nmi(unsigned int);
 extern int reserve_evntsel_nmi(unsigned int);
 extern void release_evntsel_nmi(unsigned int);
 
-struct ctl_table;
-extern int proc_nmi_enabled(struct ctl_table *, int ,
-			void __user *, size_t *, loff_t *);
-extern int unknown_nmi_panic;
-
 #endif /* CONFIG_X86_LOCAL_APIC */
 
+extern int unknown_nmi_panic;
+extern int panic_on_unrecovered_nmi;
+extern int panic_on_io_nmi;
+
+/* NMI handler flags */
 #define NMI_FLAG_FIRST	1
 
+/**
+ * enum - NMI types.
+ * @NMI_LOCAL:    Local NMI, CPU-specific NMI generated by the Local APIC.
+ * @NMI_UNKNOWN:  Unknown NMI, the source of the NMI may not be identified.
+ * @NMI_SERR:     System Error NMI, typically triggered by PCI errors.
+ * @NMI_IO_CHECK: I/O Check NMI, related to I/O errors.
+ * @NMI_MAX:      Maximum value for NMI types.
+ *
+ * NMI types are used to categorize NMIs and to dispatch them to the
+ * appropriate handler.
+ */
 enum {
 	NMI_LOCAL=0,
 	NMI_UNKNOWN,
@@ -32,6 +42,7 @@ enum {
 	NMI_MAX
 };
 
+/* NMI handler return values */
 #define NMI_DONE	0
 #define NMI_HANDLED	1
 
@@ -41,14 +52,33 @@ struct nmiaction {
 	struct list_head	list;
 	nmi_handler_t		handler;
 	u64			max_duration;
-	struct irq_work		irq_work;
 	unsigned long		flags;
 	const char		*name;
 };
 
+/**
+ * register_nmi_handler - Register a handler for a specific NMI type
+ * @t:    NMI type (e.g. NMI_LOCAL)
+ * @fn:   The NMI handler
+ * @fg:   Flags associated with the NMI handler
+ * @n:    Name of the NMI handler
+ * @init: Optional __init* attributes for struct nmiaction
+ *
+ * Adds the provided handler to the list of handlers for the specified
+ * NMI type. Handlers flagged with NMI_FLAG_FIRST would be executed first.
+ *
+ * Sometimes the source of an NMI can't be reliably determined which
+ * results in an NMI being tagged as "unknown". Register an additional
+ * handler using the NMI type - NMI_UNKNOWN to handle such cases. The
+ * caller would get one last chance to assume responsibility for the
+ * NMI.
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
 #define register_nmi_handler(t, fn, fg, n, init...)	\
 ({							\
 	static struct nmiaction init fn##_na = {	\
+		.list = LIST_HEAD_INIT(fn##_na.list),	\
 		.handler = (fn),			\
 		.name = (n),				\
 		.flags = (fg),				\
@@ -58,7 +88,18 @@ struct nmiaction {
 
 int __register_nmi_handler(unsigned int, struct nmiaction *);
 
-void unregister_nmi_handler(unsigned int, const char *);
+/**
+ * unregister_nmi_handler - Unregister a handler for a specific NMI type
+ * @type: NMI type (e.g. NMI_LOCAL)
+ * @name: Name of the NMI handler used during registration
+ *
+ * Removes the handler associated with the specified NMI type from the
+ * NMI handler list. The "name" is used as a lookup key to identify the
+ * handler.
+ */
+void unregister_nmi_handler(unsigned int type, const char *name);
+
+void set_emergency_nmi_handler(unsigned int type, nmi_handler_t handler);
 
 void stop_nmi(void);
 void restart_nmi(void);
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h
index 12f12b5cf2ca..cd94221d8335 100644
--- a/arch/x86/include/asm/nops.h
+++ b/arch/x86/include/asm/nops.h
@@ -2,146 +2,88 @@
 #ifndef _ASM_X86_NOPS_H
 #define _ASM_X86_NOPS_H
 
+#include <asm/asm.h>
+
 /*
  * Define nops for use with alternative() and for tracing.
- *
- * *_NOP5_ATOMIC must be a single instruction.
  */
 
-#define NOP_DS_PREFIX 0x3e
+#ifndef CONFIG_64BIT
 
-/* generic versions from gas
-   1: nop
-   the following instructions are NOT nops in 64-bit mode,
-   for 64-bit mode use K8 or P6 nops instead
-   2: movl %esi,%esi
-   3: leal 0x00(%esi),%esi
-   4: leal 0x00(,%esi,1),%esi
-   6: leal 0x00000000(%esi),%esi
-   7: leal 0x00000000(,%esi,1),%esi
-*/
-#define GENERIC_NOP1 0x90
-#define GENERIC_NOP2 0x89,0xf6
-#define GENERIC_NOP3 0x8d,0x76,0x00
-#define GENERIC_NOP4 0x8d,0x74,0x26,0x00
-#define GENERIC_NOP5 GENERIC_NOP1,GENERIC_NOP4
-#define GENERIC_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00
-#define GENERIC_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00
-#define GENERIC_NOP8 GENERIC_NOP1,GENERIC_NOP7
-#define GENERIC_NOP5_ATOMIC NOP_DS_PREFIX,GENERIC_NOP4
+/*
+ * Generic 32bit nops from GAS:
+ *
+ * 1: nop
+ * 2: movl %esi,%esi
+ * 3: leal 0x0(%esi),%esi
+ * 4: leal 0x0(%esi,%eiz,1),%esi
+ * 5: leal %ds:0x0(%esi,%eiz,1),%esi
+ * 6: leal 0x0(%esi),%esi
+ * 7: leal 0x0(%esi,%eiz,1),%esi
+ * 8: leal %ds:0x0(%esi,%eiz,1),%esi
+ *
+ * Except 5 and 8, which are DS prefixed 4 and 7 resp, where GAS would emit 2
+ * nop instructions.
+ */
+#define BYTES_NOP1	0x90
+#define BYTES_NOP2	0x89,0xf6
+#define BYTES_NOP3	0x8d,0x76,0x00
+#define BYTES_NOP4	0x8d,0x74,0x26,0x00
+#define BYTES_NOP5	0x3e,BYTES_NOP4
+#define BYTES_NOP6	0x8d,0xb6,0x00,0x00,0x00,0x00
+#define BYTES_NOP7	0x8d,0xb4,0x26,0x00,0x00,0x00,0x00
+#define BYTES_NOP8	0x3e,BYTES_NOP7
 
-/* Opteron 64bit nops
-   1: nop
-   2: osp nop
-   3: osp osp nop
-   4: osp osp osp nop
-*/
-#define K8_NOP1 GENERIC_NOP1
-#define K8_NOP2	0x66,K8_NOP1
-#define K8_NOP3	0x66,K8_NOP2
-#define K8_NOP4	0x66,K8_NOP3
-#define K8_NOP5	K8_NOP3,K8_NOP2
-#define K8_NOP6	K8_NOP3,K8_NOP3
-#define K8_NOP7	K8_NOP4,K8_NOP3
-#define K8_NOP8	K8_NOP4,K8_NOP4
-#define K8_NOP5_ATOMIC 0x66,K8_NOP4
+#define ASM_NOP_MAX 8
 
-/* K7 nops
-   uses eax dependencies (arbitrary choice)
-   1: nop
-   2: movl %eax,%eax
-   3: leal (,%eax,1),%eax
-   4: leal 0x00(,%eax,1),%eax
-   6: leal 0x00000000(%eax),%eax
-   7: leal 0x00000000(,%eax,1),%eax
-*/
-#define K7_NOP1	GENERIC_NOP1
-#define K7_NOP2	0x8b,0xc0
-#define K7_NOP3	0x8d,0x04,0x20
-#define K7_NOP4	0x8d,0x44,0x20,0x00
-#define K7_NOP5	K7_NOP4,K7_NOP1
-#define K7_NOP6	0x8d,0x80,0,0,0,0
-#define K7_NOP7	0x8D,0x04,0x05,0,0,0,0
-#define K7_NOP8	K7_NOP7,K7_NOP1
-#define K7_NOP5_ATOMIC NOP_DS_PREFIX,K7_NOP4
+#else
 
-/* P6 nops
-   uses eax dependencies (Intel-recommended choice)
-   1: nop
-   2: osp nop
-   3: nopl (%eax)
-   4: nopl 0x00(%eax)
-   5: nopl 0x00(%eax,%eax,1)
-   6: osp nopl 0x00(%eax,%eax,1)
-   7: nopl 0x00000000(%eax)
-   8: nopl 0x00000000(%eax,%eax,1)
-   Note: All the above are assumed to be a single instruction.
-	There is kernel code that depends on this.
-*/
-#define P6_NOP1	GENERIC_NOP1
-#define P6_NOP2	0x66,0x90
-#define P6_NOP3	0x0f,0x1f,0x00
-#define P6_NOP4	0x0f,0x1f,0x40,0
-#define P6_NOP5	0x0f,0x1f,0x44,0x00,0
-#define P6_NOP6	0x66,0x0f,0x1f,0x44,0x00,0
-#define P6_NOP7	0x0f,0x1f,0x80,0,0,0,0
-#define P6_NOP8	0x0f,0x1f,0x84,0x00,0,0,0,0
-#define P6_NOP5_ATOMIC P6_NOP5
+/*
+ * Generic 64bit nops from GAS:
+ *
+ * 1: nop
+ * 2: osp nop
+ * 3: nopl (%eax)
+ * 4: nopl 0x00(%eax)
+ * 5: nopl 0x00(%eax,%eax,1)
+ * 6: osp nopl 0x00(%eax,%eax,1)
+ * 7: nopl 0x00000000(%eax)
+ * 8: nopl 0x00000000(%eax,%eax,1)
+ * 9: cs nopl 0x00000000(%eax,%eax,1)
+ * 10: osp cs nopl 0x00000000(%eax,%eax,1)
+ * 11: osp osp cs nopl 0x00000000(%eax,%eax,1)
+ */
+#define BYTES_NOP1	0x90
+#define BYTES_NOP2	0x66,BYTES_NOP1
+#define BYTES_NOP3	0x0f,0x1f,0x00
+#define BYTES_NOP4	0x0f,0x1f,0x40,0x00
+#define BYTES_NOP5	0x0f,0x1f,0x44,0x00,0x00
+#define BYTES_NOP6	0x66,BYTES_NOP5
+#define BYTES_NOP7	0x0f,0x1f,0x80,0x00,0x00,0x00,0x00
+#define BYTES_NOP8	0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
+#define BYTES_NOP9	0x2e,BYTES_NOP8
+#define BYTES_NOP10	0x66,BYTES_NOP9
+#define BYTES_NOP11	0x66,BYTES_NOP10
 
-#ifdef __ASSEMBLY__
-#define _ASM_MK_NOP(x) .byte x
-#else
-#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
-#endif
+#define ASM_NOP9  _ASM_BYTES(BYTES_NOP9)
+#define ASM_NOP10 _ASM_BYTES(BYTES_NOP10)
+#define ASM_NOP11 _ASM_BYTES(BYTES_NOP11)
 
-#if defined(CONFIG_MK7)
-#define ASM_NOP1 _ASM_MK_NOP(K7_NOP1)
-#define ASM_NOP2 _ASM_MK_NOP(K7_NOP2)
-#define ASM_NOP3 _ASM_MK_NOP(K7_NOP3)
-#define ASM_NOP4 _ASM_MK_NOP(K7_NOP4)
-#define ASM_NOP5 _ASM_MK_NOP(K7_NOP5)
-#define ASM_NOP6 _ASM_MK_NOP(K7_NOP6)
-#define ASM_NOP7 _ASM_MK_NOP(K7_NOP7)
-#define ASM_NOP8 _ASM_MK_NOP(K7_NOP8)
-#define ASM_NOP5_ATOMIC _ASM_MK_NOP(K7_NOP5_ATOMIC)
-#elif defined(CONFIG_X86_P6_NOP)
-#define ASM_NOP1 _ASM_MK_NOP(P6_NOP1)
-#define ASM_NOP2 _ASM_MK_NOP(P6_NOP2)
-#define ASM_NOP3 _ASM_MK_NOP(P6_NOP3)
-#define ASM_NOP4 _ASM_MK_NOP(P6_NOP4)
-#define ASM_NOP5 _ASM_MK_NOP(P6_NOP5)
-#define ASM_NOP6 _ASM_MK_NOP(P6_NOP6)
-#define ASM_NOP7 _ASM_MK_NOP(P6_NOP7)
-#define ASM_NOP8 _ASM_MK_NOP(P6_NOP8)
-#define ASM_NOP5_ATOMIC _ASM_MK_NOP(P6_NOP5_ATOMIC)
-#elif defined(CONFIG_X86_64)
-#define ASM_NOP1 _ASM_MK_NOP(K8_NOP1)
-#define ASM_NOP2 _ASM_MK_NOP(K8_NOP2)
-#define ASM_NOP3 _ASM_MK_NOP(K8_NOP3)
-#define ASM_NOP4 _ASM_MK_NOP(K8_NOP4)
-#define ASM_NOP5 _ASM_MK_NOP(K8_NOP5)
-#define ASM_NOP6 _ASM_MK_NOP(K8_NOP6)
-#define ASM_NOP7 _ASM_MK_NOP(K8_NOP7)
-#define ASM_NOP8 _ASM_MK_NOP(K8_NOP8)
-#define ASM_NOP5_ATOMIC _ASM_MK_NOP(K8_NOP5_ATOMIC)
-#else
-#define ASM_NOP1 _ASM_MK_NOP(GENERIC_NOP1)
-#define ASM_NOP2 _ASM_MK_NOP(GENERIC_NOP2)
-#define ASM_NOP3 _ASM_MK_NOP(GENERIC_NOP3)
-#define ASM_NOP4 _ASM_MK_NOP(GENERIC_NOP4)
-#define ASM_NOP5 _ASM_MK_NOP(GENERIC_NOP5)
-#define ASM_NOP6 _ASM_MK_NOP(GENERIC_NOP6)
-#define ASM_NOP7 _ASM_MK_NOP(GENERIC_NOP7)
-#define ASM_NOP8 _ASM_MK_NOP(GENERIC_NOP8)
-#define ASM_NOP5_ATOMIC _ASM_MK_NOP(GENERIC_NOP5_ATOMIC)
-#endif
+#define ASM_NOP_MAX 11
 
-#define ASM_NOP_MAX 8
-#define NOP_ATOMIC5 (ASM_NOP_MAX+1)	/* Entry for the 5-byte atomic NOP */
+#endif /* CONFIG_64BIT */
+
+#define ASM_NOP1 _ASM_BYTES(BYTES_NOP1)
+#define ASM_NOP2 _ASM_BYTES(BYTES_NOP2)
+#define ASM_NOP3 _ASM_BYTES(BYTES_NOP3)
+#define ASM_NOP4 _ASM_BYTES(BYTES_NOP4)
+#define ASM_NOP5 _ASM_BYTES(BYTES_NOP5)
+#define ASM_NOP6 _ASM_BYTES(BYTES_NOP6)
+#define ASM_NOP7 _ASM_BYTES(BYTES_NOP7)
+#define ASM_NOP8 _ASM_BYTES(BYTES_NOP8)
 
-#ifndef __ASSEMBLY__
-extern const unsigned char * const *ideal_nops;
-extern void arch_init_ideal_nops(void);
+#ifndef __ASSEMBLER__
+extern const unsigned char * const x86_nops[];
 #endif
 
 #endif /* _ASM_X86_NOPS_H */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index dad12b767ba0..4f4b5e8a1574 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -4,11 +4,102 @@
 #define _ASM_X86_NOSPEC_BRANCH_H_
 
 #include <linux/static_key.h>
+#include <linux/objtool.h>
+#include <linux/linkage.h>
 
 #include <asm/alternative.h>
-#include <asm/alternative-asm.h>
 #include <asm/cpufeatures.h>
 #include <asm/msr-index.h>
+#include <asm/unwind_hints.h>
+#include <asm/percpu.h>
+
+/*
+ * Call depth tracking for Intel SKL CPUs to address the RSB underflow
+ * issue in software.
+ *
+ * The tracking does not use a counter. It uses uses arithmetic shift
+ * right on call entry and logical shift left on return.
+ *
+ * The depth tracking variable is initialized to 0x8000.... when the call
+ * depth is zero. The arithmetic shift right sign extends the MSB and
+ * saturates after the 12th call. The shift count is 5 for both directions
+ * so the tracking covers 12 nested calls.
+ *
+ *  Call
+ *  0: 0x8000000000000000	0x0000000000000000
+ *  1: 0xfc00000000000000	0xf000000000000000
+ * ...
+ * 11: 0xfffffffffffffff8	0xfffffffffffffc00
+ * 12: 0xffffffffffffffff	0xffffffffffffffe0
+ *
+ * After a return buffer fill the depth is credited 12 calls before the
+ * next stuffing has to take place.
+ *
+ * There is a inaccuracy for situations like this:
+ *
+ *  10 calls
+ *   5 returns
+ *   3 calls
+ *   4 returns
+ *   3 calls
+ *   ....
+ *
+ * The shift count might cause this to be off by one in either direction,
+ * but there is still a cushion vs. the RSB depth. The algorithm does not
+ * claim to be perfect and it can be speculated around by the CPU, but it
+ * is considered that it obfuscates the problem enough to make exploitation
+ * extremely difficult.
+ */
+#define RET_DEPTH_SHIFT			5
+#define RSB_RET_STUFF_LOOPS		16
+#define RET_DEPTH_INIT			0x8000000000000000ULL
+#define RET_DEPTH_INIT_FROM_CALL	0xfc00000000000000ULL
+#define RET_DEPTH_CREDIT		0xffffffffffffffffULL
+
+#ifdef CONFIG_CALL_THUNKS_DEBUG
+# define CALL_THUNKS_DEBUG_INC_CALLS				\
+	incq	PER_CPU_VAR(__x86_call_count);
+# define CALL_THUNKS_DEBUG_INC_RETS				\
+	incq	PER_CPU_VAR(__x86_ret_count);
+# define CALL_THUNKS_DEBUG_INC_STUFFS				\
+	incq	PER_CPU_VAR(__x86_stuffs_count);
+# define CALL_THUNKS_DEBUG_INC_CTXSW				\
+	incq	PER_CPU_VAR(__x86_ctxsw_count);
+#else
+# define CALL_THUNKS_DEBUG_INC_CALLS
+# define CALL_THUNKS_DEBUG_INC_RETS
+# define CALL_THUNKS_DEBUG_INC_STUFFS
+# define CALL_THUNKS_DEBUG_INC_CTXSW
+#endif
+
+#if defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS)
+
+#include <asm/asm-offsets.h>
+
+#define CREDIT_CALL_DEPTH					\
+	movq	$-1, PER_CPU_VAR(__x86_call_depth);
+
+#define RESET_CALL_DEPTH					\
+	xor	%eax, %eax;					\
+	bts	$63, %rax;					\
+	movq	%rax, PER_CPU_VAR(__x86_call_depth);
+
+#define RESET_CALL_DEPTH_FROM_CALL				\
+	movb	$0xfc, %al;					\
+	shl	$56, %rax;					\
+	movq	%rax, PER_CPU_VAR(__x86_call_depth);		\
+	CALL_THUNKS_DEBUG_INC_CALLS
+
+#define INCREMENT_CALL_DEPTH					\
+	sarq	$5, PER_CPU_VAR(__x86_call_depth);		\
+	CALL_THUNKS_DEBUG_INC_CALLS
+
+#else
+#define CREDIT_CALL_DEPTH
+#define RESET_CALL_DEPTH
+#define RESET_CALL_DEPTH_FROM_CALL
+#define INCREMENT_CALL_DEPTH
+#endif
 
 /*
  * Fill the CPU return stack buffer.
@@ -27,112 +118,121 @@
  * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
  */
 
+#define RETPOLINE_THUNK_SIZE	32
 #define RSB_CLEAR_LOOPS		32	/* To forcibly overwrite all entries */
-#define RSB_FILL_LOOPS		16	/* To avoid underflow */
 
 /*
+ * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN.
+ */
+#define __FILL_RETURN_SLOT			\
+	ANNOTATE_INTRA_FUNCTION_CALL;		\
+	call	772f;				\
+	int3;					\
+772:
+
+/*
+ * Stuff the entire RSB.
+ *
  * Google experimented with loop-unrolling and this turned out to be
- * the optimal version — two calls, each with their own speculation
+ * the optimal version - two calls, each with their own speculation
  * trap should their return address end up getting used, in a loop.
  */
-#define __FILL_RETURN_BUFFER(reg, nr, sp)	\
-	mov	$(nr/2), reg;			\
-771:						\
-	call	772f;				\
-773:	/* speculation trap */			\
-	pause;					\
-	lfence;					\
-	jmp	773b;				\
-772:						\
-	call	774f;				\
-775:	/* speculation trap */			\
-	pause;					\
-	lfence;					\
-	jmp	775b;				\
-774:						\
-	dec	reg;				\
-	jnz	771b;				\
-	add	$(BITS_PER_LONG/8) * nr, sp;
-
-#ifdef __ASSEMBLY__
+#ifdef CONFIG_X86_64
+#define __FILL_RETURN_BUFFER(reg, nr)			\
+	mov	$(nr/2), reg;				\
+771:							\
+	__FILL_RETURN_SLOT				\
+	__FILL_RETURN_SLOT				\
+	add	$(BITS_PER_LONG/8) * 2, %_ASM_SP;	\
+	dec	reg;					\
+	jnz	771b;					\
+	/* barrier for jnz misprediction */		\
+	lfence;						\
+	CREDIT_CALL_DEPTH				\
+	CALL_THUNKS_DEBUG_INC_CTXSW
+#else
+/*
+ * i386 doesn't unconditionally have LFENCE, as such it can't
+ * do a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr)			\
+	.rept nr;					\
+	__FILL_RETURN_SLOT;				\
+	.endr;						\
+	add	$(BITS_PER_LONG/8) * nr, %_ASM_SP;
+#endif
 
 /*
- * This should be used immediately before a retpoline alternative.  It tells
- * objtool where the retpolines are so that it can make sense of the control
- * flow by just reading the original instruction(s) and ignoring the
- * alternatives.
+ * Stuff a single RSB slot.
+ *
+ * To mitigate Post-Barrier RSB speculation, one CALL instruction must be
+ * forced to retire before letting a RET instruction execute.
+ *
+ * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
+ * before this point.
  */
-.macro ANNOTATE_NOSPEC_ALTERNATIVE
-	.Lannotate_\@:
-	.pushsection .discard.nospec
-	.long .Lannotate_\@ - .
-	.popsection
-.endm
+#define __FILL_ONE_RETURN				\
+	__FILL_RETURN_SLOT				\
+	add	$(BITS_PER_LONG/8), %_ASM_SP;		\
+	lfence;
+
+#ifdef __ASSEMBLER__
 
 /*
- * This should be used immediately before an indirect jump/call. It tells
- * objtool the subsequent indirect jump/call is vouched safe for retpoline
- * builds.
+ * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions
+ * vs RETBleed validation.
  */
-.macro ANNOTATE_RETPOLINE_SAFE
-	.Lannotate_\@:
-	.pushsection .discard.retpoline_safe
-	_ASM_PTR .Lannotate_\@
-	.popsection
-.endm
+#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE
 
 /*
- * These are the bare retpoline primitives for indirect jmp and call.
- * Do not use these directly; they only exist to make the ALTERNATIVE
- * invocation below less ugly.
+ * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should
+ * eventually turn into its own annotation.
  */
-.macro RETPOLINE_JMP reg:req
-	call	.Ldo_rop_\@
-.Lspec_trap_\@:
-	pause
-	lfence
-	jmp	.Lspec_trap_\@
-.Ldo_rop_\@:
-	mov	\reg, (%_ASM_SP)
-	ret
+.macro VALIDATE_UNRET_END
+#if defined(CONFIG_NOINSTR_VALIDATION) && \
+	(defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO))
+	ANNOTATE_RETPOLINE_SAFE
+	nop
+#endif
 .endm
 
 /*
- * This is a wrapper around RETPOLINE_JMP so the called function in reg
- * returns to the instruction after the macro.
+ * Emits a conditional CS prefix that is compatible with
+ * -mindirect-branch-cs-prefix.
  */
-.macro RETPOLINE_CALL reg:req
-	jmp	.Ldo_call_\@
-.Ldo_retpoline_jmp_\@:
-	RETPOLINE_JMP \reg
-.Ldo_call_\@:
-	call	.Ldo_retpoline_jmp_\@
+.macro __CS_PREFIX reg:req
+	.irp rs,r8,r9,r10,r11,r12,r13,r14,r15
+	.ifc \reg,\rs
+	.byte 0x2e
+	.endif
+	.endr
 .endm
 
 /*
  * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
  * indirect jmp/call which may be susceptible to the Spectre variant 2
  * attack.
+ *
+ * NOTE: these do not take kCFI into account and are thus not comparable to C
+ * indirect calls, take care when using. The target of these should be an ENDBR
+ * instruction irrespective of kCFI.
  */
 .macro JMP_NOSPEC reg:req
-#ifdef CONFIG_RETPOLINE
-	ANNOTATE_NOSPEC_ALTERNATIVE
-	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg),	\
-		__stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE,	\
-		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+#ifdef CONFIG_MITIGATION_RETPOLINE
+	__CS_PREFIX \reg
+	jmp	__x86_indirect_thunk_\reg
 #else
-	jmp	*\reg
+	jmp	*%\reg
+	int3
 #endif
 .endm
 
 .macro CALL_NOSPEC reg:req
-#ifdef CONFIG_RETPOLINE
-	ANNOTATE_NOSPEC_ALTERNATIVE
-	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg),	\
-		__stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
-		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
+#ifdef CONFIG_MITIGATION_RETPOLINE
+	__CS_PREFIX \reg
+	call	__x86_indirect_thunk_\reg
 #else
-	call	*\reg
+	call	*%\reg
 #endif
 .endm
 
@@ -140,48 +240,222 @@
   * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
   * monstrosity above, manually.
   */
-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
-#ifdef CONFIG_RETPOLINE
-	ANNOTATE_NOSPEC_ALTERNATIVE
-	ALTERNATIVE "jmp .Lskip_rsb_\@",				\
-		__stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))	\
-		\ftr
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
+	ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
+		__stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
+		__stringify(nop;nop;__FILL_ONE_RETURN), \ftr2
+
 .Lskip_rsb_\@:
+.endm
+
+/*
+ * The CALL to srso_alias_untrain_ret() must be patched in directly at
+ * the spot where untraining must be done, ie., srso_alias_untrain_ret()
+ * must be the target of a CALL instruction instead of indirectly
+ * jumping to a wrapper which then calls it. Therefore, this macro is
+ * called outside of __UNTRAIN_RET below, for the time being, before the
+ * kernel can support nested alternatives with arbitrary nesting.
+ */
+.macro CALL_UNTRAIN_RET
+#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)
+	ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \
+		          "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
+#endif
+.endm
+
+/*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
+ * return thunk isn't mapped into the userspace tables (then again, AMD
+ * typically has NO_MELTDOWN).
+ *
+ * While retbleed_untrain_ret() doesn't clobber anything but requires stack,
+ * write_ibpb() will clobber AX, CX, DX.
+ *
+ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
+ * where we have a stack but before any RET instruction.
+ */
+.macro __UNTRAIN_RET ibpb_feature, call_depth_insns
+#if defined(CONFIG_MITIGATION_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY)
+	VALIDATE_UNRET_END
+	CALL_UNTRAIN_RET
+	ALTERNATIVE_2 "",						\
+		      "call write_ibpb", \ibpb_feature,			\
+		     __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH
+#endif
+.endm
+
+#define UNTRAIN_RET \
+	__UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH)
+
+#define UNTRAIN_RET_VM \
+	__UNTRAIN_RET X86_FEATURE_IBPB_ON_VMEXIT, __stringify(RESET_CALL_DEPTH)
+
+#define UNTRAIN_RET_FROM_CALL \
+	__UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH_FROM_CALL)
+
+
+.macro CALL_DEPTH_ACCOUNT
+#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
+	ALTERNATIVE "",							\
+		    __stringify(INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
+#endif
+.endm
+
+/*
+ * Macro to execute VERW insns that mitigate transient data sampling
+ * attacks such as MDS or TSA. On affected systems a microcode update
+ * overloaded VERW insns to also clear the CPU buffers. VERW clobbers
+ * CFLAGS.ZF.
+ * Note: Only the memory operand variant of VERW clears the CPU buffers.
+ */
+#ifdef CONFIG_X86_64
+#define VERW	verw x86_verw_sel(%rip)
+#else
+/*
+ * In 32bit mode, the memory operand must be a %cs reference. The data segments
+ * may not be usable (vm86 mode), and the stack segment may not be flat (ESPFIX32).
+ */
+#define VERW	verw %cs:x86_verw_sel
 #endif
+
+/*
+ * Provide a stringified VERW macro for simple usage, and a non-stringified
+ * VERW macro for use in more elaborate sequences, e.g. to encode a conditional
+ * VERW within an ALTERNATIVE.
+ */
+#define __CLEAR_CPU_BUFFERS	__stringify(VERW)
+
+/* If necessary, emit VERW on exit-to-userspace to clear CPU buffers. */
+#define CLEAR_CPU_BUFFERS \
+	ALTERNATIVE "", __CLEAR_CPU_BUFFERS, X86_FEATURE_CLEAR_CPU_BUF
+
+#ifdef CONFIG_X86_64
+.macro CLEAR_BRANCH_HISTORY
+	ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
+.endm
+
+.macro CLEAR_BRANCH_HISTORY_VMEXIT
+	ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_VMEXIT
 .endm
+#else
+#define CLEAR_BRANCH_HISTORY
+#define CLEAR_BRANCH_HISTORY_VMEXIT
+#endif
+
+#else /* __ASSEMBLER__ */
+
+#define ITS_THUNK_SIZE	64
+
+typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
+typedef u8 its_thunk_t[ITS_THUNK_SIZE];
+extern retpoline_thunk_t __x86_indirect_thunk_array[];
+extern retpoline_thunk_t __x86_indirect_call_thunk_array[];
+extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];
+extern its_thunk_t	 __x86_indirect_its_thunk_array[];
+
+#ifdef CONFIG_MITIGATION_RETHUNK
+extern void __x86_return_thunk(void);
+#else
+static inline void __x86_return_thunk(void) {}
+#endif
+
+#ifdef CONFIG_MITIGATION_UNRET_ENTRY
+extern void retbleed_return_thunk(void);
+#else
+static inline void retbleed_return_thunk(void) {}
+#endif
+
+extern void srso_alias_untrain_ret(void);
+
+#ifdef CONFIG_MITIGATION_SRSO
+extern void srso_return_thunk(void);
+extern void srso_alias_return_thunk(void);
+#else
+static inline void srso_return_thunk(void) {}
+static inline void srso_alias_return_thunk(void) {}
+#endif
+
+#ifdef CONFIG_MITIGATION_ITS
+extern void its_return_thunk(void);
+#else
+static inline void its_return_thunk(void) {}
+#endif
+
+extern void retbleed_return_thunk(void);
+extern void srso_return_thunk(void);
+extern void srso_alias_return_thunk(void);
+
+extern void entry_untrain_ret(void);
+extern void write_ibpb(void);
+
+#ifdef CONFIG_X86_64
+extern void clear_bhb_loop(void);
+#endif
+
+extern void (*x86_return_thunk)(void);
+
+extern void __warn_thunk(void);
+
+#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
+extern void call_depth_return_thunk(void);
+
+#define CALL_DEPTH_ACCOUNT					\
+	ALTERNATIVE("",						\
+		    __stringify(INCREMENT_CALL_DEPTH),		\
+		    X86_FEATURE_CALL_DEPTH)
+
+DECLARE_PER_CPU_CACHE_HOT(u64, __x86_call_depth);
+
+#ifdef CONFIG_CALL_THUNKS_DEBUG
+DECLARE_PER_CPU(u64, __x86_call_count);
+DECLARE_PER_CPU(u64, __x86_ret_count);
+DECLARE_PER_CPU(u64, __x86_stuffs_count);
+DECLARE_PER_CPU(u64, __x86_ctxsw_count);
+#endif
+#else /* !CONFIG_MITIGATION_CALL_DEPTH_TRACKING */
+
+static inline void call_depth_return_thunk(void) {}
+#define CALL_DEPTH_ACCOUNT ""
+
+#endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */
+
+#ifdef CONFIG_MITIGATION_RETPOLINE
 
-#else /* __ASSEMBLY__ */
+#define GEN(reg) \
+	extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
 
-#define ANNOTATE_NOSPEC_ALTERNATIVE				\
-	"999:\n\t"						\
-	".pushsection .discard.nospec\n\t"			\
-	".long 999b - .\n\t"					\
-	".popsection\n\t"
+#define GEN(reg)						\
+	extern retpoline_thunk_t __x86_indirect_call_thunk_ ## reg;
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
 
-#define ANNOTATE_RETPOLINE_SAFE					\
-	"999:\n\t"						\
-	".pushsection .discard.retpoline_safe\n\t"		\
-	_ASM_PTR " 999b\n\t"					\
-	".popsection\n\t"
+#define GEN(reg)						\
+	extern retpoline_thunk_t __x86_indirect_jump_thunk_ ## reg;
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
 
-#ifdef CONFIG_RETPOLINE
 #ifdef CONFIG_X86_64
 
 /*
+ * Emits a conditional CS prefix that is compatible with
+ * -mindirect-branch-cs-prefix.
+ */
+#define __CS_PREFIX(reg)				\
+	".irp rs,r8,r9,r10,r11,r12,r13,r14,r15\n"	\
+	".ifc \\rs," reg "\n"				\
+	".byte 0x2e\n"					\
+	".endif\n"					\
+	".endr\n"
+
+/*
  * Inline asm uses the %V modifier which is only in newer GCC
- * which is ensured when CONFIG_RETPOLINE is defined.
+ * which is ensured when CONFIG_MITIGATION_RETPOLINE is defined.
  */
-# define CALL_NOSPEC						\
-	ANNOTATE_NOSPEC_ALTERNATIVE				\
-	ALTERNATIVE_2(						\
-	ANNOTATE_RETPOLINE_SAFE					\
-	"call *%[thunk_target]\n",				\
-	"call __x86_indirect_thunk_%V[thunk_target]\n",		\
-	X86_FEATURE_RETPOLINE,					\
-	"lfence;\n"						\
-	ANNOTATE_RETPOLINE_SAFE					\
-	"call *%[thunk_target]\n",				\
-	X86_FEATURE_RETPOLINE_AMD)
+#define CALL_NOSPEC	__CS_PREFIX("%V[thunk_target]")	\
+			"call __x86_indirect_thunk_%V[thunk_target]\n"
+
 # define THUNK_TARGET(addr) [thunk_target] "r" (addr)
 
 #else /* CONFIG_X86_32 */
@@ -191,9 +465,8 @@
  * here, anyway.
  */
 # define CALL_NOSPEC						\
-	ANNOTATE_NOSPEC_ALTERNATIVE				\
 	ALTERNATIVE_2(						\
-	ANNOTATE_RETPOLINE_SAFE					\
+	ANNOTATE_RETPOLINE_SAFE "\n"				\
 	"call *%[thunk_target]\n",				\
 	"       jmp    904f;\n"					\
 	"       .align 16\n"					\
@@ -202,16 +475,16 @@
 	"    	lfence;\n"					\
 	"       jmp    902b;\n"					\
 	"       .align 16\n"					\
-	"903:	addl   $4, %%esp;\n"				\
+	"903:	lea    4(%%esp), %%esp;\n"			\
 	"       pushl  %[thunk_target];\n"			\
 	"       ret;\n"						\
 	"       .align 16\n"					\
 	"904:	call   901b;\n",				\
 	X86_FEATURE_RETPOLINE,					\
 	"lfence;\n"						\
-	ANNOTATE_RETPOLINE_SAFE					\
+	ANNOTATE_RETPOLINE_SAFE "\n"				\
 	"call *%[thunk_target]\n",				\
-	X86_FEATURE_RETPOLINE_AMD)
+	X86_FEATURE_RETPOLINE_LFENCE)
 
 # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
 #endif
@@ -223,9 +496,12 @@
 /* The Spectre V2 mitigation variants */
 enum spectre_v2_mitigation {
 	SPECTRE_V2_NONE,
-	SPECTRE_V2_RETPOLINE_GENERIC,
-	SPECTRE_V2_RETPOLINE_AMD,
-	SPECTRE_V2_IBRS_ENHANCED,
+	SPECTRE_V2_RETPOLINE,
+	SPECTRE_V2_LFENCE,
+	SPECTRE_V2_EIBRS,
+	SPECTRE_V2_EIBRS_RETPOLINE,
+	SPECTRE_V2_EIBRS_LFENCE,
+	SPECTRE_V2_IBRS,
 };
 
 /* The indirect branch speculation control variants */
@@ -240,35 +516,12 @@ enum spectre_v2_user_mitigation {
 /* The Speculative Store Bypass disable variants */
 enum ssb_mitigation {
 	SPEC_STORE_BYPASS_NONE,
+	SPEC_STORE_BYPASS_AUTO,
 	SPEC_STORE_BYPASS_DISABLE,
 	SPEC_STORE_BYPASS_PRCTL,
 	SPEC_STORE_BYPASS_SECCOMP,
 };
 
-extern char __indirect_thunk_start[];
-extern char __indirect_thunk_end[];
-
-/*
- * On VMEXIT we must ensure that no RSB predictions learned in the guest
- * can be followed in the host, by overwriting the RSB completely. Both
- * retpoline and IBRS mitigations for Spectre v2 need this; only on future
- * CPUs with IBRS_ALL *might* it be avoided.
- */
-static inline void vmexit_fill_RSB(void)
-{
-#ifdef CONFIG_RETPOLINE
-	unsigned long loops;
-
-	asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
-		      ALTERNATIVE("jmp 910f",
-				  __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
-				  X86_FEATURE_RETPOLINE)
-		      "910:"
-		      : "=r" (loops), ASM_CALL_CONSTRAINT
-		      : : "memory" );
-#endif
-}
-
 static __always_inline
 void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
 {
@@ -280,15 +533,20 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
 		: "memory");
 }
 
+DECLARE_PER_CPU(bool, x86_ibpb_exit_to_user);
+
 static inline void indirect_branch_prediction_barrier(void)
 {
-	u64 val = PRED_CMD_IBPB;
-
-	alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
+	asm_inline volatile(ALTERNATIVE("", "call write_ibpb", X86_FEATURE_IBPB)
+			    : ASM_CALL_CONSTRAINT
+			    :: "rax", "rcx", "rdx", "memory");
 }
 
 /* The Intel SPEC CTRL MSR base value cache */
 extern u64 x86_spec_ctrl_base;
+DECLARE_PER_CPU(u64, x86_spec_ctrl_current);
+extern void update_spec_ctrl_cond(u64 val);
+extern u64 spec_ctrl_current(void);
 
 /*
  * With retpoline, we must use IBRS to restrict branch prediction
@@ -298,18 +556,18 @@ extern u64 x86_spec_ctrl_base;
  */
 #define firmware_restrict_branch_speculation_start()			\
 do {									\
-	u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS;			\
-									\
 	preempt_disable();						\
-	alternative_msr_write(MSR_IA32_SPEC_CTRL, val,			\
+	alternative_msr_write(MSR_IA32_SPEC_CTRL,			\
+			      spec_ctrl_current() | SPEC_CTRL_IBRS,	\
 			      X86_FEATURE_USE_IBRS_FW);			\
+	alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,		\
+			      X86_FEATURE_USE_IBPB_FW);			\
 } while (0)
 
 #define firmware_restrict_branch_speculation_end()			\
 do {									\
-	u64 val = x86_spec_ctrl_base;					\
-									\
-	alternative_msr_write(MSR_IA32_SPEC_CTRL, val,			\
+	alternative_msr_write(MSR_IA32_SPEC_CTRL,			\
+			      spec_ctrl_current(),			\
 			      X86_FEATURE_USE_IBRS_FW);			\
 	preempt_enable();						\
 } while (0)
@@ -318,65 +576,51 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
 DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
 DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
 
-#endif /* __ASSEMBLY__ */
+DECLARE_STATIC_KEY_FALSE(switch_vcpu_ibpb);
 
-/*
- * Below is used in the eBPF JIT compiler and emits the byte sequence
- * for the following assembly:
- *
- * With retpolines configured:
- *
- *    callq do_rop
- *  spec_trap:
- *    pause
- *    lfence
- *    jmp spec_trap
- *  do_rop:
- *    mov %rax,(%rsp) for x86_64
- *    mov %edx,(%esp) for x86_32
- *    retq
+DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear);
+
+DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
+
+extern u16 x86_verw_sel;
+
+#include <asm/segment.h>
+
+/**
+ * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns
  *
- * Without retpolines configured:
+ * This uses the otherwise unused and obsolete VERW instruction in
+ * combination with microcode which triggers a CPU buffer flush when the
+ * instruction is executed.
+ */
+static __always_inline void x86_clear_cpu_buffers(void)
+{
+	static const u16 ds = __KERNEL_DS;
+
+	/*
+	 * Has to be the memory-operand variant because only that
+	 * guarantees the CPU buffer flush functionality according to
+	 * documentation. The register-operand variant does not.
+	 * Works with any segment selector, but a valid writable
+	 * data segment is the fastest variant.
+	 *
+	 * "cc" clobber is required because VERW modifies ZF.
+	 */
+	asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
+}
+
+/**
+ * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS
+ * and TSA vulnerabilities.
  *
- *    jmp *%rax for x86_64
- *    jmp *%edx for x86_32
+ * Clear CPU buffers if the corresponding static key is enabled
  */
-#ifdef CONFIG_RETPOLINE
-# ifdef CONFIG_X86_64
-#  define RETPOLINE_RAX_BPF_JIT_SIZE	17
-#  define RETPOLINE_RAX_BPF_JIT()				\
-do {								\
-	EMIT1_off32(0xE8, 7);	 /* callq do_rop */		\
-	/* spec_trap: */					\
-	EMIT2(0xF3, 0x90);       /* pause */			\
-	EMIT3(0x0F, 0xAE, 0xE8); /* lfence */			\
-	EMIT2(0xEB, 0xF9);       /* jmp spec_trap */		\
-	/* do_rop: */						\
-	EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */	\
-	EMIT1(0xC3);             /* retq */			\
-} while (0)
-# else /* !CONFIG_X86_64 */
-#  define RETPOLINE_EDX_BPF_JIT()				\
-do {								\
-	EMIT1_off32(0xE8, 7);	 /* call do_rop */		\
-	/* spec_trap: */					\
-	EMIT2(0xF3, 0x90);       /* pause */			\
-	EMIT3(0x0F, 0xAE, 0xE8); /* lfence */			\
-	EMIT2(0xEB, 0xF9);       /* jmp spec_trap */		\
-	/* do_rop: */						\
-	EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */		\
-	EMIT1(0xC3);             /* ret */			\
-} while (0)
-# endif
-#else /* !CONFIG_RETPOLINE */
-# ifdef CONFIG_X86_64
-#  define RETPOLINE_RAX_BPF_JIT_SIZE	2
-#  define RETPOLINE_RAX_BPF_JIT()				\
-	EMIT2(0xFF, 0xE0);       /* jmp *%rax */
-# else /* !CONFIG_X86_64 */
-#  define RETPOLINE_EDX_BPF_JIT()				\
-	EMIT2(0xFF, 0xE2)        /* jmp *%edx */
-# endif
-#endif
+static __always_inline void x86_idle_clear_cpu_buffers(void)
+{
+	if (static_branch_likely(&cpu_buf_idle_clear))
+		x86_clear_cpu_buffers();
+}
+
+#endif /* __ASSEMBLER__ */
 
 #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index bbfde3d2662f..53ba39ce010c 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -3,21 +3,13 @@
 #define _ASM_X86_NUMA_H
 
 #include <linux/nodemask.h>
+#include <linux/errno.h>
 
 #include <asm/topology.h>
 #include <asm/apicdef.h>
 
 #ifdef CONFIG_NUMA
 
-#define NR_NODE_MEMBLKS		(MAX_NUMNODES*2)
-
-/*
- * Too small node sizes may confuse the VM badly. Usually they
- * result from BIOS bugs. So dont recognize nodes as standalone
- * NUMA entities that have less than this amount of RAM listed:
- */
-#define NODE_MIN_SIZE (4*1024*1024)
-
 extern int numa_off;
 
 /*
@@ -31,9 +23,6 @@ extern int numa_off;
 extern s16 __apicid_to_node[MAX_LOCAL_APIC];
 extern nodemask_t numa_nodes_parsed __initdata;
 
-extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
-extern void __init numa_set_distance(int from, int to, int distance);
-
 static inline void set_apicid_to_node(int apicid, s16 node)
 {
 	__apicid_to_node[apicid] = node;
@@ -52,32 +41,24 @@ static inline int numa_cpu_node(int cpu)
 }
 #endif	/* CONFIG_NUMA */
 
-#ifdef CONFIG_X86_32
-# include <asm/numa_32.h>
-#endif
-
 #ifdef CONFIG_NUMA
 extern void numa_set_node(int cpu, int node);
 extern void numa_clear_node(int cpu);
 extern void __init init_cpu_to_node(void);
-extern void numa_add_cpu(int cpu);
-extern void numa_remove_cpu(int cpu);
+extern void numa_add_cpu(unsigned int cpu);
+extern void numa_remove_cpu(unsigned int cpu);
+extern void init_gi_nodes(void);
 #else	/* CONFIG_NUMA */
 static inline void numa_set_node(int cpu, int node)	{ }
 static inline void numa_clear_node(int cpu)		{ }
 static inline void init_cpu_to_node(void)		{ }
-static inline void numa_add_cpu(int cpu)		{ }
-static inline void numa_remove_cpu(int cpu)		{ }
+static inline void numa_add_cpu(unsigned int cpu)	{ }
+static inline void numa_remove_cpu(unsigned int cpu)	{ }
+static inline void init_gi_nodes(void)			{ }
 #endif	/* CONFIG_NUMA */
 
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
-void debug_cpumask_set_cpu(int cpu, int node, bool enable);
+void debug_cpumask_set_cpu(unsigned int cpu, int node, bool enable);
 #endif
 
-#ifdef CONFIG_NUMA_EMU
-#define FAKE_NODE_MIN_SIZE	((u64)32 << 20)
-#define FAKE_NODE_MIN_HASH_MASK	(~(FAKE_NODE_MIN_SIZE - 1UL))
-void numa_emu_cmdline(char *);
-#endif /* CONFIG_NUMA_EMU */
-
 #endif	/* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
deleted file mode 100644
index 9c8e9e85be77..000000000000
--- a/arch/x86/include/asm/numa_32.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_NUMA_32_H
-#define _ASM_X86_NUMA_32_H
-
-#ifdef CONFIG_HIGHMEM
-extern void set_highmem_pages_init(void);
-#else
-static inline void set_highmem_pages_init(void)
-{
-}
-#endif
-
-#endif /* _ASM_X86_NUMA_32_H */
diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h
index c2bf1de5d901..6fe76282aceb 100644
--- a/arch/x86/include/asm/olpc.h
+++ b/arch/x86/include/asm/olpc.h
@@ -9,12 +9,10 @@
 struct olpc_platform_t {
 	int flags;
 	uint32_t boardrev;
-	int ecver;
 };
 
 #define OLPC_F_PRESENT		0x01
 #define OLPC_F_DCON		0x02
-#define OLPC_F_EC_WIDE_SCI	0x04
 
 #ifdef CONFIG_OLPC
 
@@ -64,13 +62,6 @@ static inline int olpc_board_at_least(uint32_t rev)
 	return olpc_platform_info.boardrev >= rev;
 }
 
-extern void olpc_ec_wakeup_set(u16 value);
-extern void olpc_ec_wakeup_clear(u16 value);
-extern bool olpc_ec_wakeup_available(void);
-
-extern int olpc_ec_mask_write(u16 bits);
-extern int olpc_ec_sci_query(u16 *sci_value);
-
 #else
 
 static inline int machine_is_olpc(void)
@@ -83,14 +74,6 @@ static inline int olpc_has_dcon(void)
 	return 0;
 }
 
-static inline void olpc_ec_wakeup_set(u16 value) { }
-static inline void olpc_ec_wakeup_clear(u16 value) { }
-
-static inline bool olpc_ec_wakeup_available(void)
-{
-	return false;
-}
-
 #endif
 
 #ifdef CONFIG_OLPC_XO1_PM
@@ -101,20 +84,6 @@ extern void olpc_xo1_pm_wakeup_clear(u16 value);
 
 extern int pci_olpc_init(void);
 
-/* SCI source values */
-
-#define EC_SCI_SRC_EMPTY	0x00
-#define EC_SCI_SRC_GAME		0x01
-#define EC_SCI_SRC_BATTERY	0x02
-#define EC_SCI_SRC_BATSOC	0x04
-#define EC_SCI_SRC_BATERR	0x08
-#define EC_SCI_SRC_EBOOK	0x10	/* XO-1 only */
-#define EC_SCI_SRC_WLAN		0x20	/* XO-1 only */
-#define EC_SCI_SRC_ACPWR	0x40
-#define EC_SCI_SRC_BATCRIT	0x80
-#define EC_SCI_SRC_GPWAKE	0x100	/* XO-1.5 only */
-#define EC_SCI_SRC_ALL		0x1FF
-
 /* GPIO assignments */
 
 #define OLPC_GPIO_MIC_AC	1
diff --git a/arch/x86/include/asm/orc_header.h b/arch/x86/include/asm/orc_header.h
new file mode 100644
index 000000000000..07bacf3e160e
--- /dev/null
+++ b/arch/x86/include/asm/orc_header.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#ifndef _ORC_HEADER_H
+#define _ORC_HEADER_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <asm/orc_hash.h>
+
+/*
+ * The header is currently a 20-byte hash of the ORC entry definition; see
+ * scripts/orc_hash.sh.
+ */
+#define ORC_HEADER					\
+	__used __section(".orc_header") __aligned(4)	\
+	static const u8 orc_header[] = { ORC_HASH }
+
+#endif /* _ORC_HEADER_H */
diff --git a/arch/x86/include/asm/orc_lookup.h b/arch/x86/include/asm/orc_lookup.h
index 91c8d868424d..241631282e43 100644
--- a/arch/x86/include/asm/orc_lookup.h
+++ b/arch/x86/include/asm/orc_lookup.h
@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 #ifndef _ORC_LOOKUP_H
 #define _ORC_LOOKUP_H
diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
index 46f516dd80ce..e0125afa53fb 100644
--- a/arch/x86/include/asm/orc_types.h
+++ b/arch/x86/include/asm/orc_types.h
@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _ORC_TYPES_H
@@ -51,29 +39,15 @@
 #define ORC_REG_SP_INDIRECT		9
 #define ORC_REG_MAX			15
 
-/*
- * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the
- * caller's SP right before it made the call).  Used for all callable
- * functions, i.e. all C code and all callable asm functions.
- *
- * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points
- * to a fully populated pt_regs from a syscall, interrupt, or exception.
- *
- * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
- * points to the iret return frame.
- *
- * The UNWIND_HINT macros are used only for the unwind_hint struct.  They
- * aren't used in struct orc_entry due to size and complexity constraints.
- * Objtool converts them to real types when it converts the hints to orc
- * entries.
- */
-#define ORC_TYPE_CALL			0
-#define ORC_TYPE_REGS			1
-#define ORC_TYPE_REGS_IRET		2
-#define UNWIND_HINT_TYPE_SAVE		3
-#define UNWIND_HINT_TYPE_RESTORE	4
+#define ORC_TYPE_UNDEFINED		0
+#define ORC_TYPE_END_OF_STACK		1
+#define ORC_TYPE_CALL			2
+#define ORC_TYPE_REGS			3
+#define ORC_TYPE_REGS_PARTIAL		4
+
+#ifndef __ASSEMBLER__
+#include <asm/byteorder.h>
 
-#ifndef __ASSEMBLY__
 /*
  * This struct is more or less a vastly simplified version of the DWARF Call
  * Frame Information standard.  It contains only the necessary parts of DWARF
@@ -85,25 +59,20 @@
 struct orc_entry {
 	s16		sp_offset;
 	s16		bp_offset;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
 	unsigned	sp_reg:4;
 	unsigned	bp_reg:4;
-	unsigned	type:2;
-	unsigned	end:1;
+	unsigned	type:3;
+	unsigned	signal:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+	unsigned	bp_reg:4;
+	unsigned	sp_reg:4;
+	unsigned	unused:4;
+	unsigned	signal:1;
+	unsigned	type:3;
+#endif
 } __packed;
 
-/*
- * This struct is used by asm and inline asm code to manually annotate the
- * location of registers on the stack for the ORC unwinder.
- *
- * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
- */
-struct unwind_hint {
-	u32		ip;
-	s16		sp_offset;
-	u8		sp_reg;
-	u8		type;
-	u8		end;
-};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* _ORC_TYPES_H */
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 7555b48803a8..9265f2fca99a 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -14,7 +14,7 @@
 #include <asm/page_32.h>
 #endif	/* CONFIG_X86_64 */
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 struct page;
 
@@ -34,9 +34,8 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
 	copy_page(to, from);
 }
 
-#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
-	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+#define vma_alloc_zeroed_movable_folio(vma, vaddr) \
+	vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr)
 
 #ifndef __pa
 #define __pa(x)		__phys_addr((unsigned long)(x))
@@ -67,11 +66,25 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
  * virt_addr_valid(kaddr) returns true.
  */
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
 extern bool __virt_addr_valid(unsigned long kaddr);
 #define virt_addr_valid(kaddr)	__virt_addr_valid((unsigned long) (kaddr))
 
-#endif	/* __ASSEMBLY__ */
+static __always_inline void *pfn_to_kaddr(unsigned long pfn)
+{
+	return __va(pfn << PAGE_SHIFT);
+}
+
+static __always_inline u64 __canonical_address(u64 vaddr, u8 vaddr_bits)
+{
+	return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
+}
+
+static __always_inline u64 __is_canonical_address(u64 vaddr, u8 vaddr_bits)
+{
+	return __canonical_address(vaddr, vaddr_bits) == vaddr;
+}
+
+#endif	/* __ASSEMBLER__ */
 
 #include <asm-generic/memory_model.h>
 #include <asm-generic/getorder.h>
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
index 94dbd51df58f..0c623706cb7e 100644
--- a/arch/x86/include/asm/page_32.h
+++ b/arch/x86/include/asm/page_32.h
@@ -4,7 +4,7 @@
 
 #include <asm/page_32_types.h>
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #define __phys_addr_nodebug(x)	((x) - PAGE_OFFSET)
 #ifdef CONFIG_DEBUG_VIRTUAL
@@ -15,23 +15,6 @@ extern unsigned long __phys_addr(unsigned long);
 #define __phys_addr_symbol(x)	__phys_addr(x)
 #define __phys_reloc_hide(x)	RELOC_HIDE((x), 0)
 
-#ifdef CONFIG_FLATMEM
-#define pfn_valid(pfn)		((pfn) < max_mapnr)
-#endif /* CONFIG_FLATMEM */
-
-#ifdef CONFIG_X86_USE_3DNOW
-#include <asm/mmx.h>
-
-static inline void clear_page(void *page)
-{
-	mmx_clear_page(page);
-}
-
-static inline void copy_page(void *to, void *from)
-{
-	mmx_copy_page(to, from);
-}
-#else  /* !CONFIG_X86_USE_3DNOW */
 #include <linux/string.h>
 
 static inline void clear_page(void *page)
@@ -43,7 +26,6 @@ static inline void copy_page(void *to, void *from)
 {
 	memcpy(to, from, PAGE_SIZE);
 }
-#endif	/* CONFIG_X86_3DNOW */
-#endif	/* !__ASSEMBLY__ */
+#endif	/* !__ASSEMBLER__ */
 
 #endif /* _ASM_X86_PAGE_32_H */
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index 0d5c739eebd7..623f1e9f493e 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -11,8 +11,8 @@
  * a virtual address space of one gigabyte, which limits the
  * amount of physical memory you can use to about 950MB.
  *
- * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
- * and CONFIG_HIGHMEM64G options in the kernel configuration.
+ * If you want more physical memory than this then see the CONFIG_VMSPLIT_2G
+ * and CONFIG_HIGHMEM4G options in the kernel configuration.
  */
 #define __PAGE_OFFSET_BASE	_AC(CONFIG_PAGE_OFFSET, UL)
 #define __PAGE_OFFSET		__PAGE_OFFSET_BASE
@@ -22,11 +22,9 @@
 #define THREAD_SIZE_ORDER	1
 #define THREAD_SIZE		(PAGE_SIZE << THREAD_SIZE_ORDER)
 
-#define DOUBLEFAULT_STACK 1
-#define NMI_STACK 0
-#define DEBUG_STACK 0
-#define MCE_STACK 0
-#define N_EXCEPTION_STACKS 1
+#define IRQ_STACK_SIZE		THREAD_SIZE
+
+#define N_EXCEPTION_STACKS	1
 
 #ifdef CONFIG_X86_PAE
 /*
@@ -44,11 +42,28 @@
 #endif	/* CONFIG_X86_PAE */
 
 /*
- * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S)
+ * User space process size: 3GB (default).
+ */
+#define IA32_PAGE_OFFSET	__PAGE_OFFSET
+#define TASK_SIZE		__PAGE_OFFSET
+#define TASK_SIZE_LOW		TASK_SIZE
+#define TASK_SIZE_MAX		TASK_SIZE
+#define DEFAULT_MAP_WINDOW	TASK_SIZE
+#define STACK_TOP		TASK_SIZE
+#define STACK_TOP_MAX		STACK_TOP
+
+/*
+ * In spite of the name, KERNEL_IMAGE_SIZE is a limit on the maximum virtual
+ * address for the kernel image, rather than the limit on the size itself. On
+ * 32-bit, this is not a strict limit, but this value is used to limit the
+ * link-time virtual address range of the kernel, and by KASLR to limit the
+ * randomized address from which the kernel is executed. A relocatable kernel
+ * can be loaded somewhat higher than KERNEL_IMAGE_SIZE as long as enough space
+ * remains for the vmalloc area.
  */
 #define KERNEL_IMAGE_SIZE	(512 * 1024 * 1024)
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 /*
  * This much address space is reserved for vmalloc() and iomap()
@@ -58,8 +73,7 @@ extern unsigned int __VMALLOC_RESERVE;
 extern int sysctl_legacy_va_layout;
 
 extern void find_low_pfn_range(void);
-extern void setup_bootmem_allocator(void);
 
-#endif	/* !__ASSEMBLY__ */
+#endif	/* !__ASSEMBLER__ */
 
 #endif /* _ASM_X86_PAGE_32_DEFS_H */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 939b1cff4a7b..2f0e47be79a4 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -4,9 +4,13 @@
 
 #include <asm/page_64_types.h>
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+#include <asm/cpufeatures.h>
 #include <asm/alternative.h>
 
+#include <linux/kmsan-checks.h>
+#include <linux/mmdebug.h>
+
 /* duplicated to the one in bootmem.h */
 extern unsigned long max_pfn;
 extern unsigned long phys_base;
@@ -14,8 +18,9 @@ extern unsigned long phys_base;
 extern unsigned long page_offset_base;
 extern unsigned long vmalloc_base;
 extern unsigned long vmemmap_base;
+extern unsigned long direct_map_physmem_end;
 
-static inline unsigned long __phys_addr_nodebug(unsigned long x)
+static __always_inline unsigned long __phys_addr_nodebug(unsigned long x)
 {
 	unsigned long y = x - __START_KERNEL_map;
 
@@ -27,36 +32,79 @@ static inline unsigned long __phys_addr_nodebug(unsigned long x)
 
 #ifdef CONFIG_DEBUG_VIRTUAL
 extern unsigned long __phys_addr(unsigned long);
-extern unsigned long __phys_addr_symbol(unsigned long);
 #else
 #define __phys_addr(x)		__phys_addr_nodebug(x)
-#define __phys_addr_symbol(x) \
-	((unsigned long)(x) - __START_KERNEL_map + phys_base)
 #endif
 
-#define __phys_reloc_hide(x)	(x)
+static inline unsigned long __phys_addr_symbol(unsigned long x)
+{
+	unsigned long y = x - __START_KERNEL_map;
 
-#ifdef CONFIG_FLATMEM
-#define pfn_valid(pfn)          ((pfn) < max_pfn)
-#endif
+	/* only check upper bounds since lower bounds will trigger carry */
+	VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
+
+	return y + phys_base;
+}
+
+#define __phys_reloc_hide(x)	(x)
 
 void clear_page_orig(void *page);
 void clear_page_rep(void *page);
 void clear_page_erms(void *page);
+KCFI_REFERENCE(clear_page_orig);
+KCFI_REFERENCE(clear_page_rep);
+KCFI_REFERENCE(clear_page_erms);
 
 static inline void clear_page(void *page)
 {
+	/*
+	 * Clean up KMSAN metadata for the page being cleared. The assembly call
+	 * below clobbers @page, so we perform unpoisoning before it.
+	 */
+	kmsan_unpoison_memory(page, PAGE_SIZE);
 	alternative_call_2(clear_page_orig,
 			   clear_page_rep, X86_FEATURE_REP_GOOD,
 			   clear_page_erms, X86_FEATURE_ERMS,
 			   "=D" (page),
-			   "0" (page)
-			   : "cc", "memory", "rax", "rcx");
+			   "D" (page),
+			   "cc", "memory", "rax", "rcx");
 }
 
 void copy_page(void *to, void *from);
+KCFI_REFERENCE(copy_page);
+
+/*
+ * User space process size.  This is the first address outside the user range.
+ * There are a few constraints that determine this:
+ *
+ * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
+ * address, then that syscall will enter the kernel with a
+ * non-canonical return address, and SYSRET will explode dangerously.
+ * We avoid this particular problem by preventing anything
+ * from being mapped at the maximum canonical address.
+ *
+ * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
+ * CPUs malfunction if they execute code from the highest canonical page.
+ * They'll speculate right off the end of the canonical space, and
+ * bad things happen.  This is worked around in the same way as the
+ * Intel problem.
+ *
+ * With page table isolation enabled, we map the LDT in ... [stay tuned]
+ */
+static __always_inline unsigned long task_size_max(void)
+{
+	unsigned long ret;
+
+	alternative_io("movq %[small],%0","movq %[large],%0",
+			X86_FEATURE_LA57,
+			"=r" (ret),
+			[small] "i" ((1ul << 47)-PAGE_SIZE),
+			[large] "i" ((1ul << 56)-PAGE_SIZE));
+
+	return ret;
+}
 
-#endif	/* !__ASSEMBLY__ */
+#endif	/* !__ASSEMBLER__ */
 
 #ifdef CONFIG_X86_VSYSCALL_EMULATION
 # define __HAVE_ARCH_GATE_AREA 1
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 8f657286d599..7400dab373fe 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_X86_PAGE_64_DEFS_H
 #define _ASM_X86_PAGE_64_DEFS_H
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #include <asm/kaslr.h>
 #endif
 
@@ -14,22 +14,21 @@
 
 #define THREAD_SIZE_ORDER	(2 + KASAN_STACK_ORDER)
 #define THREAD_SIZE  (PAGE_SIZE << THREAD_SIZE_ORDER)
-#define CURRENT_MASK (~(THREAD_SIZE - 1))
 
-#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER)
+#define EXCEPTION_STACK_ORDER (1 + KASAN_STACK_ORDER)
 #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
 
-#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
-#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
-
 #define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER)
 #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
 
-#define DOUBLEFAULT_STACK 1
-#define NMI_STACK 2
-#define DEBUG_STACK 3
-#define MCE_STACK 4
-#define N_EXCEPTION_STACKS 4  /* hw limit: 7 */
+/*
+ * The index for the tss.ist[] array. The hardware limit is 7 entries.
+ */
+#define	IST_INDEX_DF		0
+#define	IST_INDEX_NMI		1
+#define	IST_INDEX_DB		2
+#define	IST_INDEX_MCE		3
+#define	IST_INDEX_VC		4
 
 /*
  * Set __PAGE_OFFSET to the most negative possible address +
@@ -42,27 +41,39 @@
 #define __PAGE_OFFSET_BASE_L5	_AC(0xff11000000000000, UL)
 #define __PAGE_OFFSET_BASE_L4	_AC(0xffff888000000000, UL)
 
-#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
 #define __PAGE_OFFSET           page_offset_base
-#else
-#define __PAGE_OFFSET           __PAGE_OFFSET_BASE_L4
-#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
 
 #define __START_KERNEL_map	_AC(0xffffffff80000000, UL)
 
-/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
+/* See Documentation/arch/x86/x86_64/mm.rst for a description of the memory map. */
 
 #define __PHYSICAL_MASK_SHIFT	52
-
-#ifdef CONFIG_X86_5LEVEL
 #define __VIRTUAL_MASK_SHIFT	(pgtable_l5_enabled() ? 56 : 47)
-#else
-#define __VIRTUAL_MASK_SHIFT	47
-#endif
+
+#define TASK_SIZE_MAX		task_size_max()
+#define DEFAULT_MAP_WINDOW	((1UL << 47) - PAGE_SIZE)
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define IA32_PAGE_OFFSET	((current->personality & ADDR_LIMIT_3GB) ? \
+					0xc0000000 : 0xFFFFe000)
+
+#define TASK_SIZE_LOW		(test_thread_flag(TIF_ADDR32) ? \
+					IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
+#define TASK_SIZE		(test_thread_flag(TIF_ADDR32) ? \
+					IA32_PAGE_OFFSET : TASK_SIZE_MAX)
+#define TASK_SIZE_OF(child)	((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
+					IA32_PAGE_OFFSET : TASK_SIZE_MAX)
+
+#define STACK_TOP		TASK_SIZE_LOW
+#define STACK_TOP_MAX		TASK_SIZE_MAX
 
 /*
- * Maximum kernel image size is limited to 1 GiB, due to the fixmap living
- * in the next 1 GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S).
+ * In spite of the name, KERNEL_IMAGE_SIZE is a limit on the maximum virtual
+ * address for the kernel image, rather than the limit on the size itself.
+ * This can be at most 1 GiB, due to the fixmap living in the next 1 GiB (see
+ * level2_kernel_pgt in arch/x86/kernel/head_64.S).
  *
  * On KASLR use 1 GiB by default, leaving 1 GiB for modules once the
  * page tables are fully set up.
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index c85e15010f48..018a8d906ca3 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -6,25 +6,16 @@
 #include <linux/types.h>
 #include <linux/mem_encrypt.h>
 
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT		12
-#define PAGE_SIZE		(_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK		(~(PAGE_SIZE-1))
-
-#define PMD_PAGE_SIZE		(_AC(1, UL) << PMD_SHIFT)
-#define PMD_PAGE_MASK		(~(PMD_PAGE_SIZE-1))
-
-#define PUD_PAGE_SIZE		(_AC(1, UL) << PUD_SHIFT)
-#define PUD_PAGE_MASK		(~(PUD_PAGE_SIZE-1))
+#include <vdso/page.h>
 
 #define __VIRTUAL_MASK		((1UL << __VIRTUAL_MASK_SHIFT) - 1)
 
-/* Cast *PAGE_MASK to a signed type so that it is sign-extended if
+/* Cast P*D_MASK to a signed type so that it is sign-extended if
    virtual addresses are 32-bits but physical addresses are larger
    (ie, 32-bit PAE). */
 #define PHYSICAL_PAGE_MASK	(((signed long)PAGE_MASK) & __PHYSICAL_MASK)
-#define PHYSICAL_PMD_PAGE_MASK	(((signed long)PMD_PAGE_MASK) & __PHYSICAL_MASK)
-#define PHYSICAL_PUD_PAGE_MASK	(((signed long)PUD_PAGE_MASK) & __PHYSICAL_MASK)
+#define PHYSICAL_PMD_PAGE_MASK	(((signed long)PMD_MASK) & __PHYSICAL_MASK)
+#define PHYSICAL_PUD_PAGE_MASK	(((signed long)PUD_MASK) & __PHYSICAL_MASK)
 
 #define HPAGE_SHIFT		PMD_SHIFT
 #define HPAGE_SIZE		(_AC(1,UL) << HPAGE_SHIFT)
@@ -35,14 +26,12 @@
 
 #define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
 
-#define VM_DATA_DEFAULT_FLAGS \
-	(((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
-	 VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_TSK_EXEC
 
-#define __PHYSICAL_START	ALIGN(CONFIG_PHYSICAL_START, \
-				      CONFIG_PHYSICAL_ALIGN)
+/* Physical address where kernel should be loaded. */
+#define LOAD_PHYSICAL_ADDR	__ALIGN_KERNEL_MASK(CONFIG_PHYSICAL_START, CONFIG_PHYSICAL_ALIGN - 1)
 
-#define __START_KERNEL		(__START_KERNEL_map + __PHYSICAL_START)
+#define __START_KERNEL		(__START_KERNEL_map + LOAD_PHYSICAL_ADDR)
 
 #ifdef CONFIG_X86_64
 #include <asm/page_64_types.h>
@@ -52,7 +41,7 @@
 #define IOREMAP_MAX_ORDER       (PMD_SHIFT)
 #endif	/* CONFIG_X86_64 */
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
 extern phys_addr_t physical_mask;
@@ -73,11 +62,8 @@ static inline phys_addr_t get_max_mapped(void)
 
 bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn);
 
-extern unsigned long init_memory_mapping(unsigned long start,
-					 unsigned long end);
-
 extern void initmem_init(void);
 
-#endif	/* !__ASSEMBLY__ */
+#endif	/* !__ASSEMBLER__ */
 
 #endif	/* _ASM_X86_PAGE_DEFS_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a97f28d914d5..b5e59a7ba0d0 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -4,22 +4,35 @@
 /* Various instructions on x86 need to be replaced for
  * para-virtualization: those hooks are defined here. */
 
+#include <asm/paravirt_types.h>
+
+#ifndef __ASSEMBLER__
+struct mm_struct;
+#endif
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/pgtable_types.h>
 #include <asm/asm.h>
 #include <asm/nospec-branch.h>
 
-#include <asm/paravirt_types.h>
-
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #include <linux/bug.h>
 #include <linux/types.h>
 #include <linux/cpumask.h>
+#include <linux/static_call_types.h>
 #include <asm/frame.h>
 
-static inline unsigned long long paravirt_sched_clock(void)
+u64 dummy_steal_clock(int cpu);
+u64 dummy_sched_clock(void);
+
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+DECLARE_STATIC_CALL(pv_sched_clock, dummy_sched_clock);
+
+void paravirt_set_sched_clock(u64 (*func)(void));
+
+static __always_inline u64 paravirt_sched_clock(void)
 {
-	return PVOP_CALL0(unsigned long long, time.sched_clock);
+	return static_call(pv_sched_clock)();
 }
 
 struct static_key;
@@ -33,21 +46,31 @@ bool pv_is_native_vcpu_is_preempted(void);
 
 static inline u64 paravirt_steal_clock(int cpu)
 {
-	return PVOP_CALL1(u64, time.steal_clock, cpu);
+	return static_call(pv_steal_clock)(cpu);
 }
 
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void __init paravirt_set_cap(void);
+#endif
+
 /* The paravirtualized I/O functions */
 static inline void slow_down_io(void)
 {
-	pv_ops.cpu.io_delay();
+	PVOP_VCALL0(cpu.io_delay);
 #ifdef REALLY_SLOW_IO
-	pv_ops.cpu.io_delay();
-	pv_ops.cpu.io_delay();
-	pv_ops.cpu.io_delay();
+	PVOP_VCALL0(cpu.io_delay);
+	PVOP_VCALL0(cpu.io_delay);
+	PVOP_VCALL0(cpu.io_delay);
 #endif
 }
 
-static inline void __flush_tlb(void)
+void native_flush_tlb_local(void);
+void native_flush_tlb_global(void);
+void native_flush_tlb_one_user(unsigned long addr);
+void native_flush_tlb_multi(const struct cpumask *cpumask,
+			     const struct flush_tlb_info *info);
+
+static inline void __flush_tlb_local(void)
 {
 	PVOP_VCALL0(mmu.flush_tlb_user);
 }
@@ -62,20 +85,31 @@ static inline void __flush_tlb_one_user(unsigned long addr)
 	PVOP_VCALL1(mmu.flush_tlb_one_user, addr);
 }
 
-static inline void flush_tlb_others(const struct cpumask *cpumask,
-				    const struct flush_tlb_info *info)
+static inline void __flush_tlb_multi(const struct cpumask *cpumask,
+				      const struct flush_tlb_info *info)
 {
-	PVOP_VCALL2(mmu.flush_tlb_others, cpumask, info);
+	PVOP_VCALL2(mmu.flush_tlb_multi, cpumask, info);
 }
 
-static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
+static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
 {
-	PVOP_VCALL2(mmu.tlb_remove_table, tlb, table);
+	PVOP_VCALL1(mmu.exit_mmap, mm);
 }
 
-static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
+static inline void notify_page_enc_status_changed(unsigned long pfn,
+						  int npages, bool enc)
 {
-	PVOP_VCALL1(mmu.exit_mmap, mm);
+	PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc);
+}
+
+static __always_inline void arch_safe_halt(void)
+{
+	PVOP_VCALL0(irq.safe_halt);
+}
+
+static inline void halt(void)
+{
+	PVOP_VCALL0(irq.halt);
 }
 
 #ifdef CONFIG_PARAVIRT_XXL
@@ -94,12 +128,12 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
 /*
  * These special macros can be used to get or set a debugging register
  */
-static inline unsigned long paravirt_get_debugreg(int reg)
+static __always_inline unsigned long paravirt_get_debugreg(int reg)
 {
 	return PVOP_CALL1(unsigned long, cpu.get_debugreg, reg);
 }
 #define get_debugreg(var, reg) var = paravirt_get_debugreg(reg)
-static inline void set_debugreg(unsigned long val, int reg)
+static __always_inline void set_debugreg(unsigned long val, int reg)
 {
 	PVOP_VCALL2(cpu.set_debugreg, reg, val);
 }
@@ -114,24 +148,26 @@ static inline void write_cr0(unsigned long x)
 	PVOP_VCALL1(cpu.write_cr0, x);
 }
 
-static inline unsigned long read_cr2(void)
+static __always_inline unsigned long read_cr2(void)
 {
-	return PVOP_CALL0(unsigned long, mmu.read_cr2);
+	return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2,
+				"mov %%cr2, %%rax;", ALT_NOT_XEN);
 }
 
-static inline void write_cr2(unsigned long x)
+static __always_inline void write_cr2(unsigned long x)
 {
 	PVOP_VCALL1(mmu.write_cr2, x);
 }
 
 static inline unsigned long __read_cr3(void)
 {
-	return PVOP_CALL0(unsigned long, mmu.read_cr3);
+	return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3,
+			      "mov %%cr3, %%rax;", ALT_NOT_XEN);
 }
 
 static inline void write_cr3(unsigned long x)
 {
-	PVOP_VCALL1(mmu.write_cr3, x);
+	PVOP_ALT_VCALL1(mmu.write_cr3, x, "mov %%rdi, %%cr3", ALT_NOT_XEN);
 }
 
 static inline void __write_cr4(unsigned long x)
@@ -139,55 +175,24 @@ static inline void __write_cr4(unsigned long x)
 	PVOP_VCALL1(cpu.write_cr4, x);
 }
 
-#ifdef CONFIG_X86_64
-static inline unsigned long read_cr8(void)
-{
-	return PVOP_CALL0(unsigned long, cpu.read_cr8);
-}
-
-static inline void write_cr8(unsigned long x)
-{
-	PVOP_VCALL1(cpu.write_cr8, x);
-}
-#endif
-
-static inline void arch_safe_halt(void)
-{
-	PVOP_VCALL0(irq.safe_halt);
-}
-
-static inline void halt(void)
-{
-	PVOP_VCALL0(irq.halt);
-}
-
-static inline void wbinvd(void)
-{
-	PVOP_VCALL0(cpu.wbinvd);
-}
-
-#define get_kernel_rpl()  (pv_info.kernel_rpl)
-
-static inline u64 paravirt_read_msr(unsigned msr)
+static inline u64 paravirt_read_msr(u32 msr)
 {
 	return PVOP_CALL1(u64, cpu.read_msr, msr);
 }
 
-static inline void paravirt_write_msr(unsigned msr,
-				      unsigned low, unsigned high)
+static inline void paravirt_write_msr(u32 msr, u64 val)
 {
-	PVOP_VCALL3(cpu.write_msr, msr, low, high);
+	PVOP_VCALL2(cpu.write_msr, msr, val);
 }
 
-static inline u64 paravirt_read_msr_safe(unsigned msr, int *err)
+static inline int paravirt_read_msr_safe(u32 msr, u64 *val)
 {
-	return PVOP_CALL2(u64, cpu.read_msr_safe, msr, err);
+	return PVOP_CALL2(int, cpu.read_msr_safe, msr, val);
 }
 
-static inline int paravirt_write_msr_safe(unsigned msr,
-					  unsigned low, unsigned high)
+static inline int paravirt_write_msr_safe(u32 msr, u64 val)
 {
-	return PVOP_CALL3(int, cpu.write_msr_safe, msr, low, high);
+	return PVOP_CALL2(int, cpu.write_msr_safe, msr, val);
 }
 
 #define rdmsr(msr, val1, val2)			\
@@ -197,55 +202,46 @@ do {						\
 	val2 = _l >> 32;			\
 } while (0)
 
-#define wrmsr(msr, val1, val2)			\
-do {						\
-	paravirt_write_msr(msr, val1, val2);	\
-} while (0)
+static __always_inline void wrmsr(u32 msr, u32 low, u32 high)
+{
+	paravirt_write_msr(msr, (u64)high << 32 | low);
+}
 
-#define rdmsrl(msr, val)			\
+#define rdmsrq(msr, val)			\
 do {						\
 	val = paravirt_read_msr(msr);		\
 } while (0)
 
-static inline void wrmsrl(unsigned msr, u64 val)
+static inline void wrmsrq(u32 msr, u64 val)
 {
-	wrmsr(msr, (u32)val, (u32)(val>>32));
+	paravirt_write_msr(msr, val);
 }
 
-#define wrmsr_safe(msr, a, b)	paravirt_write_msr_safe(msr, a, b)
+static inline int wrmsrq_safe(u32 msr, u64 val)
+{
+	return paravirt_write_msr_safe(msr, val);
+}
 
 /* rdmsr with exception handling */
 #define rdmsr_safe(msr, a, b)				\
 ({							\
-	int _err;					\
-	u64 _l = paravirt_read_msr_safe(msr, &_err);	\
+	u64 _l;						\
+	int _err = paravirt_read_msr_safe((msr), &_l);	\
 	(*a) = (u32)_l;					\
-	(*b) = _l >> 32;				\
+	(*b) = (u32)(_l >> 32);				\
 	_err;						\
 })
 
-static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
+static __always_inline int rdmsrq_safe(u32 msr, u64 *p)
 {
-	int err;
-
-	*p = paravirt_read_msr_safe(msr, &err);
-	return err;
+	return paravirt_read_msr_safe(msr, p);
 }
 
-static inline unsigned long long paravirt_read_pmc(int counter)
+static __always_inline u64 rdpmc(int counter)
 {
 	return PVOP_CALL1(u64, cpu.read_pmc, counter);
 }
 
-#define rdpmc(counter, low, high)		\
-do {						\
-	u64 _l = paravirt_read_pmc(counter);	\
-	low = (u32)_l;				\
-	high = _l >> 32;			\
-} while (0)
-
-#define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter))
-
 static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
 {
 	PVOP_VCALL2(cpu.alloc_ldt, ldt, entries);
@@ -283,12 +279,10 @@ static inline void load_TLS(struct thread_struct *t, unsigned cpu)
 	PVOP_VCALL2(cpu.load_tls, t, cpu);
 }
 
-#ifdef CONFIG_X86_64
 static inline void load_gs_index(unsigned int gs)
 {
 	PVOP_VCALL1(cpu.load_gs_index, gs);
 }
-#endif
 
 static inline void write_ldt_entry(struct desc_struct *dt, int entry,
 				   const void *desc)
@@ -306,21 +300,22 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
 {
 	PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g);
 }
-static inline void set_iopl_mask(unsigned mask)
+
+#ifdef CONFIG_X86_IOPL_IOPERM
+static inline void tss_invalidate_io_bitmap(void)
 {
-	PVOP_VCALL1(cpu.set_iopl_mask, mask);
+	PVOP_VCALL0(cpu.invalidate_io_bitmap);
 }
 
-static inline void paravirt_activate_mm(struct mm_struct *prev,
-					struct mm_struct *next)
+static inline void tss_update_io_bitmap(void)
 {
-	PVOP_VCALL2(mmu.activate_mm, prev, next);
+	PVOP_VCALL0(cpu.update_io_bitmap);
 }
+#endif
 
-static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm,
-					  struct mm_struct *mm)
+static inline void paravirt_enter_mmap(struct mm_struct *next)
 {
-	PVOP_VCALL2(mmu.dup_mmap, oldmm, mm);
+	PVOP_VCALL1(mmu.enter_mmap, next);
 }
 
 static inline int paravirt_pgd_alloc(struct mm_struct *mm)
@@ -373,157 +368,92 @@ static inline void paravirt_release_p4d(unsigned long pfn)
 
 static inline pte_t __pte(pteval_t val)
 {
-	pteval_t ret;
-
-	if (sizeof(pteval_t) > sizeof(long))
-		ret = PVOP_CALLEE2(pteval_t, mmu.make_pte, val, (u64)val >> 32);
-	else
-		ret = PVOP_CALLEE1(pteval_t, mmu.make_pte, val);
-
-	return (pte_t) { .pte = ret };
+	return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val,
+					  "mov %%rdi, %%rax", ALT_NOT_XEN) };
 }
 
 static inline pteval_t pte_val(pte_t pte)
 {
-	pteval_t ret;
-
-	if (sizeof(pteval_t) > sizeof(long))
-		ret = PVOP_CALLEE2(pteval_t, mmu.pte_val,
-				   pte.pte, (u64)pte.pte >> 32);
-	else
-		ret = PVOP_CALLEE1(pteval_t, mmu.pte_val, pte.pte);
-
-	return ret;
+	return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte,
+				"mov %%rdi, %%rax", ALT_NOT_XEN);
 }
 
 static inline pgd_t __pgd(pgdval_t val)
 {
-	pgdval_t ret;
-
-	if (sizeof(pgdval_t) > sizeof(long))
-		ret = PVOP_CALLEE2(pgdval_t, mmu.make_pgd, val, (u64)val >> 32);
-	else
-		ret = PVOP_CALLEE1(pgdval_t, mmu.make_pgd, val);
-
-	return (pgd_t) { ret };
+	return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val,
+					  "mov %%rdi, %%rax", ALT_NOT_XEN) };
 }
 
 static inline pgdval_t pgd_val(pgd_t pgd)
 {
-	pgdval_t ret;
-
-	if (sizeof(pgdval_t) > sizeof(long))
-		ret =  PVOP_CALLEE2(pgdval_t, mmu.pgd_val,
-				    pgd.pgd, (u64)pgd.pgd >> 32);
-	else
-		ret =  PVOP_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd);
-
-	return ret;
+	return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd,
+				"mov %%rdi, %%rax", ALT_NOT_XEN);
 }
 
 #define  __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
-static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
+static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
 					   pte_t *ptep)
 {
 	pteval_t ret;
 
-	ret = PVOP_CALL3(pteval_t, mmu.ptep_modify_prot_start, mm, addr, ptep);
+	ret = PVOP_CALL3(pteval_t, mmu.ptep_modify_prot_start, vma, addr, ptep);
 
 	return (pte_t) { .pte = ret };
 }
 
-static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
-					   pte_t *ptep, pte_t pte)
+static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
+					   pte_t *ptep, pte_t old_pte, pte_t pte)
 {
-	if (sizeof(pteval_t) > sizeof(long))
-		/* 5 arg words */
-		pv_ops.mmu.ptep_modify_prot_commit(mm, addr, ptep, pte);
-	else
-		PVOP_VCALL4(mmu.ptep_modify_prot_commit,
-			    mm, addr, ptep, pte.pte);
-}
 
-static inline void set_pte(pte_t *ptep, pte_t pte)
-{
-	if (sizeof(pteval_t) > sizeof(long))
-		PVOP_VCALL3(mmu.set_pte, ptep, pte.pte, (u64)pte.pte >> 32);
-	else
-		PVOP_VCALL2(mmu.set_pte, ptep, pte.pte);
+	PVOP_VCALL4(mmu.ptep_modify_prot_commit, vma, addr, ptep, pte.pte);
 }
 
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep, pte_t pte)
+static inline void set_pte(pte_t *ptep, pte_t pte)
 {
-	if (sizeof(pteval_t) > sizeof(long))
-		/* 5 arg words */
-		pv_ops.mmu.set_pte_at(mm, addr, ptep, pte);
-	else
-		PVOP_VCALL4(mmu.set_pte_at, mm, addr, ptep, pte.pte);
+	PVOP_VCALL2(mmu.set_pte, ptep, pte.pte);
 }
 
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
-	pmdval_t val = native_pmd_val(pmd);
-
-	if (sizeof(pmdval_t) > sizeof(long))
-		PVOP_VCALL3(mmu.set_pmd, pmdp, val, (u64)val >> 32);
-	else
-		PVOP_VCALL2(mmu.set_pmd, pmdp, val);
+	PVOP_VCALL2(mmu.set_pmd, pmdp, native_pmd_val(pmd));
 }
 
-#if CONFIG_PGTABLE_LEVELS >= 3
 static inline pmd_t __pmd(pmdval_t val)
 {
-	pmdval_t ret;
-
-	if (sizeof(pmdval_t) > sizeof(long))
-		ret = PVOP_CALLEE2(pmdval_t, mmu.make_pmd, val, (u64)val >> 32);
-	else
-		ret = PVOP_CALLEE1(pmdval_t, mmu.make_pmd, val);
-
-	return (pmd_t) { ret };
+	return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, mmu.make_pmd, val,
+					  "mov %%rdi, %%rax", ALT_NOT_XEN) };
 }
 
 static inline pmdval_t pmd_val(pmd_t pmd)
 {
-	pmdval_t ret;
-
-	if (sizeof(pmdval_t) > sizeof(long))
-		ret =  PVOP_CALLEE2(pmdval_t, mmu.pmd_val,
-				    pmd.pmd, (u64)pmd.pmd >> 32);
-	else
-		ret =  PVOP_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd);
-
-	return ret;
+	return PVOP_ALT_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd,
+				"mov %%rdi, %%rax", ALT_NOT_XEN);
 }
 
 static inline void set_pud(pud_t *pudp, pud_t pud)
 {
-	pudval_t val = native_pud_val(pud);
-
-	if (sizeof(pudval_t) > sizeof(long))
-		PVOP_VCALL3(mmu.set_pud, pudp, val, (u64)val >> 32);
-	else
-		PVOP_VCALL2(mmu.set_pud, pudp, val);
+	PVOP_VCALL2(mmu.set_pud, pudp, native_pud_val(pud));
 }
-#if CONFIG_PGTABLE_LEVELS >= 4
+
 static inline pud_t __pud(pudval_t val)
 {
 	pudval_t ret;
 
-	ret = PVOP_CALLEE1(pudval_t, mmu.make_pud, val);
+	ret = PVOP_ALT_CALLEE1(pudval_t, mmu.make_pud, val,
+			       "mov %%rdi, %%rax", ALT_NOT_XEN);
 
 	return (pud_t) { ret };
 }
 
 static inline pudval_t pud_val(pud_t pud)
 {
-	return PVOP_CALLEE1(pudval_t, mmu.pud_val, pud.pud);
+	return PVOP_ALT_CALLEE1(pudval_t, mmu.pud_val, pud.pud,
+				"mov %%rdi, %%rax", ALT_NOT_XEN);
 }
 
 static inline void pud_clear(pud_t *pudp)
 {
-	set_pud(pudp, __pud(0));
+	set_pud(pudp, native_make_pud(0));
 }
 
 static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
@@ -533,18 +463,18 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
 	PVOP_VCALL2(mmu.set_p4d, p4dp, val);
 }
 
-#if CONFIG_PGTABLE_LEVELS >= 5
-
 static inline p4d_t __p4d(p4dval_t val)
 {
-	p4dval_t ret = PVOP_CALLEE1(p4dval_t, mmu.make_p4d, val);
+	p4dval_t ret = PVOP_ALT_CALLEE1(p4dval_t, mmu.make_p4d, val,
+					"mov %%rdi, %%rax", ALT_NOT_XEN);
 
 	return (p4d_t) { ret };
 }
 
 static inline p4dval_t p4d_val(p4d_t p4d)
 {
-	return PVOP_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d);
+	return PVOP_ALT_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d,
+				"mov %%rdi, %%rax", ALT_NOT_XEN);
 }
 
 static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd)
@@ -560,40 +490,15 @@ static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd)
 } while (0)
 
 #define pgd_clear(pgdp) do {						\
-	if (pgtable_l5_enabled())						\
-		set_pgd(pgdp, __pgd(0));				\
+	if (pgtable_l5_enabled())					\
+		set_pgd(pgdp, native_make_pgd(0));			\
 } while (0)
 
-#endif  /* CONFIG_PGTABLE_LEVELS == 5 */
-
 static inline void p4d_clear(p4d_t *p4dp)
 {
-	set_p4d(p4dp, __p4d(0));
+	set_p4d(p4dp, native_make_p4d(0));
 }
 
-#endif	/* CONFIG_PGTABLE_LEVELS == 4 */
-
-#endif	/* CONFIG_PGTABLE_LEVELS >= 3 */
-
-#ifdef CONFIG_X86_PAE
-/* Special-case pte-setting operations for PAE, which can't update a
-   64-bit pte atomically */
-static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
-{
-	PVOP_VCALL3(mmu.set_pte_atomic, ptep, pte.pte, pte.pte >> 32);
-}
-
-static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
-			     pte_t *ptep)
-{
-	PVOP_VCALL3(mmu.pte_clear, mm, addr, ptep);
-}
-
-static inline void pmd_clear(pmd_t *pmdp)
-{
-	PVOP_VCALL1(mmu.pmd_clear, pmdp);
-}
-#else  /* !CONFIG_X86_PAE */
 static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
 {
 	set_pte(ptep, pte);
@@ -602,14 +507,13 @@ static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
 			     pte_t *ptep)
 {
-	set_pte_at(mm, addr, ptep, __pte(0));
+	set_pte(ptep, native_make_pte(0));
 }
 
 static inline void pmd_clear(pmd_t *pmdp)
 {
-	set_pmd(pmdp, __pmd(0));
+	set_pmd(pmdp, native_make_pmd(0));
 }
-#endif	/* CONFIG_X86_PAE */
 
 #define  __HAVE_ARCH_START_CONTEXT_SWITCH
 static inline void arch_start_context_switch(struct task_struct *prev)
@@ -655,7 +559,9 @@ static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock,
 
 static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock)
 {
-	PVOP_VCALLEE1(lock.queued_spin_unlock, lock);
+	PVOP_ALT_VCALLEE1(lock.queued_spin_unlock, lock,
+			  "movb $0, (%%" _ASM_ARG1 ");",
+			  ALT_NOT(X86_FEATURE_PVUNLOCK));
 }
 
 static __always_inline void pv_wait(u8 *ptr, u8 val)
@@ -670,7 +576,9 @@ static __always_inline void pv_kick(int cpu)
 
 static __always_inline bool pv_vcpu_is_preempted(long cpu)
 {
-	return PVOP_CALLEE1(bool, lock.vcpu_is_preempted, cpu);
+	return PVOP_ALT_CALLEE1(bool, lock.vcpu_is_preempted, cpu,
+				"xor %%" _ASM_AX ", %%" _ASM_AX ";",
+				ALT_NOT(X86_FEATURE_VCPUPREEMPT));
 }
 
 void __raw_callee_save___native_queued_spin_unlock(struct qspinlock *lock);
@@ -679,16 +587,9 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
 #endif /* SMP && PARAVIRT_SPINLOCKS */
 
 #ifdef CONFIG_X86_32
-#define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
-#define PV_RESTORE_REGS "popl %edx; popl %ecx;"
-
 /* save and restore all caller-save registers, except return value */
 #define PV_SAVE_ALL_CALLER_REGS		"pushl %ecx;"
 #define PV_RESTORE_ALL_CALLER_REGS	"popl  %ecx;"
-
-#define PV_FLAGS_ARG "0"
-#define PV_EXTRA_CLOBBERS
-#define PV_VEXTRA_CLOBBERS
 #else
 /* save and restore all caller-save registers, except return value */
 #define PV_SAVE_ALL_CALLER_REGS						\
@@ -709,14 +610,6 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
 	"pop %rsi;"							\
 	"pop %rdx;"							\
 	"pop %rcx;"
-
-/* We save some registers, but all of them, that's too much. We clobber all
- * caller saved registers but the argument parameter */
-#define PV_SAVE_REGS "pushq %%rdi;"
-#define PV_RESTORE_REGS "popq %%rdi;"
-#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx", "rsi"
-#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx", "rsi"
-#define PV_FLAGS_ARG "D"
 #endif
 
 /*
@@ -732,21 +625,27 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
  * functions.
  */
 #define PV_THUNK_NAME(func) "__raw_callee_save_" #func
-#define PV_CALLEE_SAVE_REGS_THUNK(func)					\
+#define __PV_CALLEE_SAVE_REGS_THUNK(func, section)			\
 	extern typeof(func) __raw_callee_save_##func;			\
 									\
-	asm(".pushsection .text;"					\
+	asm(".pushsection " section ", \"ax\";"				\
 	    ".globl " PV_THUNK_NAME(func) ";"				\
 	    ".type " PV_THUNK_NAME(func) ", @function;"			\
+	    ASM_FUNC_ALIGN						\
 	    PV_THUNK_NAME(func) ":"					\
+	    ASM_ENDBR							\
 	    FRAME_BEGIN							\
 	    PV_SAVE_ALL_CALLER_REGS					\
 	    "call " #func ";"						\
 	    PV_RESTORE_ALL_CALLER_REGS					\
 	    FRAME_END							\
-	    "ret;"							\
+	    ASM_RET							\
+	    ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";"	\
 	    ".popsection")
 
+#define PV_CALLEE_SAVE_REGS_THUNK(func)			\
+	__PV_CALLEE_SAVE_REGS_THUNK(func, ".text")
+
 /* Get a reference to a callee-save function */
 #define PV_CALLEE_SAVE(func)						\
 	((struct paravirt_callee_save) { __raw_callee_save_##func })
@@ -756,27 +655,23 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
 	((struct paravirt_callee_save) { func })
 
 #ifdef CONFIG_PARAVIRT_XXL
-static inline notrace unsigned long arch_local_save_flags(void)
+static __always_inline unsigned long arch_local_save_flags(void)
 {
-	return PVOP_CALLEE0(unsigned long, irq.save_fl);
+	return PVOP_ALT_CALLEE0(unsigned long, irq.save_fl, "pushf; pop %%rax;",
+				ALT_NOT_XEN);
 }
 
-static inline notrace void arch_local_irq_restore(unsigned long f)
+static __always_inline void arch_local_irq_disable(void)
 {
-	PVOP_VCALLEE1(irq.restore_fl, f);
+	PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT_XEN);
 }
 
-static inline notrace void arch_local_irq_disable(void)
+static __always_inline void arch_local_irq_enable(void)
 {
-	PVOP_VCALLEE0(irq.irq_disable);
+	PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT_XEN);
 }
 
-static inline notrace void arch_local_irq_enable(void)
-{
-	PVOP_VCALLEE0(irq.irq_enable);
-}
-
-static inline notrace unsigned long arch_local_irq_save(void)
+static __always_inline unsigned long arch_local_irq_save(void)
 {
 	unsigned long f;
 
@@ -803,144 +698,42 @@ static inline notrace unsigned long arch_local_irq_save(void)
 #undef PVOP_CALL4
 
 extern void default_banner(void);
+void native_pv_lock_init(void) __init;
 
-#else  /* __ASSEMBLY__ */
-
-#define _PVSITE(ptype, ops, word, algn)		\
-771:;						\
-	ops;					\
-772:;						\
-	.pushsection .parainstructions,"a";	\
-	 .align	algn;				\
-	 word 771b;				\
-	 .byte ptype;				\
-	 .byte 772b-771b;			\
-	.popsection
-
-
-#define COND_PUSH(set, mask, reg)			\
-	.if ((~(set)) & mask); push %reg; .endif
-#define COND_POP(set, mask, reg)			\
-	.if ((~(set)) & mask); pop %reg; .endif
-
-#ifdef CONFIG_X86_64
-
-#define PV_SAVE_REGS(set)			\
-	COND_PUSH(set, CLBR_RAX, rax);		\
-	COND_PUSH(set, CLBR_RCX, rcx);		\
-	COND_PUSH(set, CLBR_RDX, rdx);		\
-	COND_PUSH(set, CLBR_RSI, rsi);		\
-	COND_PUSH(set, CLBR_RDI, rdi);		\
-	COND_PUSH(set, CLBR_R8, r8);		\
-	COND_PUSH(set, CLBR_R9, r9);		\
-	COND_PUSH(set, CLBR_R10, r10);		\
-	COND_PUSH(set, CLBR_R11, r11)
-#define PV_RESTORE_REGS(set)			\
-	COND_POP(set, CLBR_R11, r11);		\
-	COND_POP(set, CLBR_R10, r10);		\
-	COND_POP(set, CLBR_R9, r9);		\
-	COND_POP(set, CLBR_R8, r8);		\
-	COND_POP(set, CLBR_RDI, rdi);		\
-	COND_POP(set, CLBR_RSI, rsi);		\
-	COND_POP(set, CLBR_RDX, rdx);		\
-	COND_POP(set, CLBR_RCX, rcx);		\
-	COND_POP(set, CLBR_RAX, rax)
-
-#define PARA_PATCH(off)		((off) / 8)
-#define PARA_SITE(ptype, ops)	_PVSITE(ptype, ops, .quad, 8)
-#define PARA_INDIRECT(addr)	*addr(%rip)
-#else
-#define PV_SAVE_REGS(set)			\
-	COND_PUSH(set, CLBR_EAX, eax);		\
-	COND_PUSH(set, CLBR_EDI, edi);		\
-	COND_PUSH(set, CLBR_ECX, ecx);		\
-	COND_PUSH(set, CLBR_EDX, edx)
-#define PV_RESTORE_REGS(set)			\
-	COND_POP(set, CLBR_EDX, edx);		\
-	COND_POP(set, CLBR_ECX, ecx);		\
-	COND_POP(set, CLBR_EDI, edi);		\
-	COND_POP(set, CLBR_EAX, eax)
-
-#define PARA_PATCH(off)		((off) / 4)
-#define PARA_SITE(ptype, ops)	_PVSITE(ptype, ops, .long, 4)
-#define PARA_INDIRECT(addr)	*%cs:addr
-#endif
-
-#ifdef CONFIG_PARAVIRT_XXL
-#define INTERRUPT_RETURN						\
-	PARA_SITE(PARA_PATCH(PV_CPU_iret),				\
-		  ANNOTATE_RETPOLINE_SAFE;				\
-		  jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);)
-
-#define DISABLE_INTERRUPTS(clobbers)					\
-	PARA_SITE(PARA_PATCH(PV_IRQ_irq_disable),			\
-		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
-		  ANNOTATE_RETPOLINE_SAFE;				\
-		  call PARA_INDIRECT(pv_ops+PV_IRQ_irq_disable);	\
-		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-
-#define ENABLE_INTERRUPTS(clobbers)					\
-	PARA_SITE(PARA_PATCH(PV_IRQ_irq_enable),			\
-		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
-		  ANNOTATE_RETPOLINE_SAFE;				\
-		  call PARA_INDIRECT(pv_ops+PV_IRQ_irq_enable);		\
-		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-#endif
+#else  /* __ASSEMBLER__ */
 
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_PARAVIRT_XXL
-/*
- * If swapgs is used while the userspace stack is still current,
- * there's no way to call a pvop.  The PV replacement *must* be
- * inlined, or the swapgs instruction must be trapped and emulated.
- */
-#define SWAPGS_UNSAFE_STACK						\
-	PARA_SITE(PARA_PATCH(PV_CPU_swapgs), swapgs)
-
-/*
- * Note: swapgs is very special, and in practise is either going to be
- * implemented with a single "swapgs" instruction or something very
- * special.  Either way, we don't need to save any registers for
- * it.
- */
-#define SWAPGS								\
-	PARA_SITE(PARA_PATCH(PV_CPU_swapgs),				\
-		  ANNOTATE_RETPOLINE_SAFE;				\
-		  call PARA_INDIRECT(pv_ops+PV_CPU_swapgs);		\
-		 )
-#endif
+#ifdef CONFIG_DEBUG_ENTRY
 
-#define GET_CR2_INTO_RAX				\
-	ANNOTATE_RETPOLINE_SAFE;				\
-	call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2);
+#define PARA_INDIRECT(addr)	*addr(%rip)
 
-#ifdef CONFIG_PARAVIRT_XXL
-#define USERGS_SYSRET64							\
-	PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64),			\
-		  ANNOTATE_RETPOLINE_SAFE;				\
-		  jmp PARA_INDIRECT(pv_ops+PV_CPU_usergs_sysret64);)
+.macro PARA_IRQ_save_fl
+	ANNOTATE_RETPOLINE_SAFE;
+	call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);
+.endm
 
-#ifdef CONFIG_DEBUG_ENTRY
-#define SAVE_FLAGS(clobbers)                                        \
-	PARA_SITE(PARA_PATCH(PV_IRQ_save_fl),			    \
-		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);        \
-		  ANNOTATE_RETPOLINE_SAFE;			    \
-		  call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);	    \
-		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
+#define SAVE_FLAGS ALTERNATIVE_2 "PARA_IRQ_save_fl;",			\
+				 "ALT_CALL_INSTR;", ALT_CALL_ALWAYS,	\
+				 "pushf; pop %rax;", ALT_NOT_XEN
 #endif
-#endif
-
-#endif	/* CONFIG_X86_32 */
+#endif /* CONFIG_PARAVIRT_XXL */
+#endif	/* CONFIG_X86_64 */
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #else  /* CONFIG_PARAVIRT */
 # define default_banner x86_init_noop
+
+#ifndef __ASSEMBLER__
+static inline void native_pv_lock_init(void)
+{
+}
+#endif
 #endif /* !CONFIG_PARAVIRT */
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #ifndef CONFIG_PARAVIRT_XXL
-static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm,
-					  struct mm_struct *mm)
+static inline void paravirt_enter_mmap(struct mm_struct *mm)
 {
 }
 #endif
@@ -950,5 +743,11 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
 {
 }
 #endif
-#endif /* __ASSEMBLY__ */
+
+#ifndef CONFIG_PARAVIRT_SPINLOCKS
+static inline void paravirt_set_cap(void)
+{
+}
+#endif
+#endif /* __ASSEMBLER__ */
 #endif /* _ASM_X86_PARAVIRT_H */
diff --git a/arch/x86/include/asm/paravirt_api_clock.h b/arch/x86/include/asm/paravirt_api_clock.h
new file mode 100644
index 000000000000..65ac7cee0dad
--- /dev/null
+++ b/arch/x86/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include <asm/paravirt.h>
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 488c59686a73..3502939415ad 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -2,46 +2,12 @@
 #ifndef _ASM_X86_PARAVIRT_TYPES_H
 #define _ASM_X86_PARAVIRT_TYPES_H
 
-/* Bitmask of what can be clobbered: usually at least eax. */
-#define CLBR_NONE 0
-#define CLBR_EAX  (1 << 0)
-#define CLBR_ECX  (1 << 1)
-#define CLBR_EDX  (1 << 2)
-#define CLBR_EDI  (1 << 3)
+#ifdef CONFIG_PARAVIRT
 
-#ifdef CONFIG_X86_32
-/* CLBR_ANY should match all regs platform has. For i386, that's just it */
-#define CLBR_ANY  ((1 << 4) - 1)
-
-#define CLBR_ARG_REGS	(CLBR_EAX | CLBR_EDX | CLBR_ECX)
-#define CLBR_RET_REG	(CLBR_EAX | CLBR_EDX)
-#define CLBR_SCRATCH	(0)
-#else
-#define CLBR_RAX  CLBR_EAX
-#define CLBR_RCX  CLBR_ECX
-#define CLBR_RDX  CLBR_EDX
-#define CLBR_RDI  CLBR_EDI
-#define CLBR_RSI  (1 << 4)
-#define CLBR_R8   (1 << 5)
-#define CLBR_R9   (1 << 6)
-#define CLBR_R10  (1 << 7)
-#define CLBR_R11  (1 << 8)
-
-#define CLBR_ANY  ((1 << 9) - 1)
-
-#define CLBR_ARG_REGS	(CLBR_RDI | CLBR_RSI | CLBR_RDX | \
-			 CLBR_RCX | CLBR_R8 | CLBR_R9)
-#define CLBR_RET_REG	(CLBR_RAX)
-#define CLBR_SCRATCH	(CLBR_R10 | CLBR_R11)
-
-#endif /* X86_64 */
-
-#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG)
-
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+#include <linux/types.h>
 
 #include <asm/desc_defs.h>
-#include <asm/kmap_types.h>
 #include <asm/pgtable_types.h>
 #include <asm/nospec-branch.h>
 
@@ -55,6 +21,7 @@ struct task_struct;
 struct cpumask;
 struct flush_tlb_info;
 struct mmu_gather;
+struct vm_area_struct;
 
 /*
  * Wrapper type for pointers to code which uses the non-standard
@@ -67,30 +34,12 @@ struct paravirt_callee_save {
 /* general info */
 struct pv_info {
 #ifdef CONFIG_PARAVIRT_XXL
-	unsigned int kernel_rpl;
-	int shared_kernel_pmd;
-
-#ifdef CONFIG_X86_64
 	u16 extra_user_64bit_cs;  /* __USER_CS if none */
 #endif
-#endif
 
 	const char *name;
 };
 
-struct pv_init_ops {
-	/*
-	 * Patch may replace one of the defined code sequences with
-	 * arbitrary code, subject to the same register constraints.
-	 * This generally means the code is not free to clobber any
-	 * registers other than EAX.  The patch function should return
-	 * the number of bytes of code generated, as we nop pad the
-	 * rest in generic code.
-	 */
-	unsigned (*patch)(u8 type, void *insnbuf,
-			  unsigned long addr, unsigned len);
-} __no_randomize_layout;
-
 #ifdef CONFIG_PARAVIRT_XXL
 struct pv_lazy_ops {
 	/* Set deferred update mode, used for batching operations. */
@@ -100,11 +49,6 @@ struct pv_lazy_ops {
 } __no_randomize_layout;
 #endif
 
-struct pv_time_ops {
-	unsigned long long (*sched_clock)(void);
-	unsigned long long (*steal_clock)(int cpu);
-} __no_randomize_layout;
-
 struct pv_cpu_ops {
 	/* hooks for various privileged instructions */
 	void (*io_delay)(void);
@@ -118,11 +62,6 @@ struct pv_cpu_ops {
 
 	void (*write_cr4)(unsigned long);
 
-#ifdef CONFIG_X86_64
-	unsigned long (*read_cr8)(void);
-	void (*write_cr8)(unsigned long);
-#endif
-
 	/* Segment descriptor handling */
 	void (*load_tr_desc)(void);
 	void (*load_gdt)(const struct desc_ptr *);
@@ -130,9 +69,7 @@ struct pv_cpu_ops {
 	void (*set_ldt)(const void *desc, unsigned entries);
 	unsigned long (*store_tr)(void);
 	void (*load_tls)(struct thread_struct *t, unsigned int cpu);
-#ifdef CONFIG_X86_64
 	void (*load_gs_index)(unsigned int idx);
-#endif
 	void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum,
 				const void *desc);
 	void (*write_gdt_entry)(struct desc_struct *,
@@ -144,41 +81,28 @@ struct pv_cpu_ops {
 
 	void (*load_sp0)(unsigned long sp0);
 
-	void (*set_iopl_mask)(unsigned mask);
-
-	void (*wbinvd)(void);
+#ifdef CONFIG_X86_IOPL_IOPERM
+	void (*invalidate_io_bitmap)(void);
+	void (*update_io_bitmap)(void);
+#endif
 
 	/* cpuid emulation, mostly so that caps bits can be disabled */
 	void (*cpuid)(unsigned int *eax, unsigned int *ebx,
 		      unsigned int *ecx, unsigned int *edx);
 
 	/* Unsafe MSR operations.  These will warn or panic on failure. */
-	u64 (*read_msr)(unsigned int msr);
-	void (*write_msr)(unsigned int msr, unsigned low, unsigned high);
+	u64 (*read_msr)(u32 msr);
+	void (*write_msr)(u32 msr, u64 val);
 
 	/*
 	 * Safe MSR operations.
-	 * read sets err to 0 or -EIO.  write returns 0 or -EIO.
+	 * Returns 0 or -EIO.
 	 */
-	u64 (*read_msr_safe)(unsigned int msr, int *err);
-	int (*write_msr_safe)(unsigned int msr, unsigned low, unsigned high);
+	int (*read_msr_safe)(u32 msr, u64 *val);
+	int (*write_msr_safe)(u32 msr, u64 val);
 
 	u64 (*read_pmc)(int counter);
 
-	/*
-	 * Switch to usermode gs and return to 64-bit usermode using
-	 * sysret.  Only used in 64-bit kernels to return to 64-bit
-	 * processes.  Usermode register state, including %rsp, must
-	 * already be restored.
-	 */
-	void (*usergs_sysret64)(void);
-
-	/* Normal iret.  Jump to this with the standard iret stack
-	   frame set up. */
-	void (*iret)(void);
-
-	void (*swapgs)(void);
-
 	void (*start_context_switch)(struct task_struct *prev);
 	void (*end_context_switch)(struct task_struct *next);
 #endif
@@ -187,22 +111,18 @@ struct pv_cpu_ops {
 struct pv_irq_ops {
 #ifdef CONFIG_PARAVIRT_XXL
 	/*
-	 * Get/set interrupt state.  save_fl and restore_fl are only
-	 * expected to use X86_EFLAGS_IF; all other bits
-	 * returned from save_fl are undefined, and may be ignored by
-	 * restore_fl.
+	 * Get/set interrupt state.  save_fl is expected to use X86_EFLAGS_IF;
+	 * all other bits returned from save_fl are undefined.
 	 *
 	 * NOTE: These functions callers expect the callee to preserve
 	 * more registers than the standard C calling convention.
 	 */
 	struct paravirt_callee_save save_fl;
-	struct paravirt_callee_save restore_fl;
 	struct paravirt_callee_save irq_disable;
 	struct paravirt_callee_save irq_enable;
-
+#endif
 	void (*safe_halt)(void);
 	void (*halt)(void);
-#endif
 } __no_randomize_layout;
 
 struct pv_mmu_ops {
@@ -210,26 +130,22 @@ struct pv_mmu_ops {
 	void (*flush_tlb_user)(void);
 	void (*flush_tlb_kernel)(void);
 	void (*flush_tlb_one_user)(unsigned long addr);
-	void (*flush_tlb_others)(const struct cpumask *cpus,
-				 const struct flush_tlb_info *info);
-
-	void (*tlb_remove_table)(struct mmu_gather *tlb, void *table);
+	void (*flush_tlb_multi)(const struct cpumask *cpus,
+				const struct flush_tlb_info *info);
 
 	/* Hook for intercepting the destruction of an mm_struct. */
 	void (*exit_mmap)(struct mm_struct *mm);
+	void (*notify_page_enc_status_changed)(unsigned long pfn, int npages, bool enc);
 
 #ifdef CONFIG_PARAVIRT_XXL
-	unsigned long (*read_cr2)(void);
+	struct paravirt_callee_save read_cr2;
 	void (*write_cr2)(unsigned long);
 
 	unsigned long (*read_cr3)(void);
 	void (*write_cr3)(unsigned long);
 
-	/* Hooks for intercepting the creation/use of an mm_struct. */
-	void (*activate_mm)(struct mm_struct *prev,
-			    struct mm_struct *next);
-	void (*dup_mmap)(struct mm_struct *oldmm,
-			 struct mm_struct *mm);
+	/* Hook for intercepting the creation/use of an mm_struct. */
+	void (*enter_mmap)(struct mm_struct *mm);
 
 	/* Hooks for allocating and freeing a pagetable top-level */
 	int  (*pgd_alloc)(struct mm_struct *mm);
@@ -250,13 +166,11 @@ struct pv_mmu_ops {
 
 	/* Pagetable manipulation functions */
 	void (*set_pte)(pte_t *ptep, pte_t pteval);
-	void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
-			   pte_t *ptep, pte_t pteval);
 	void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
 
-	pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
+	pte_t (*ptep_modify_prot_start)(struct vm_area_struct *vma, unsigned long addr,
 					pte_t *ptep);
-	void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
+	void (*ptep_modify_prot_commit)(struct vm_area_struct *vma, unsigned long addr,
 					pte_t *ptep, pte_t pte);
 
 	struct paravirt_callee_save pte_val;
@@ -265,36 +179,20 @@ struct pv_mmu_ops {
 	struct paravirt_callee_save pgd_val;
 	struct paravirt_callee_save make_pgd;
 
-#if CONFIG_PGTABLE_LEVELS >= 3
-#ifdef CONFIG_X86_PAE
-	void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
-	void (*pte_clear)(struct mm_struct *mm, unsigned long addr,
-			  pte_t *ptep);
-	void (*pmd_clear)(pmd_t *pmdp);
-
-#endif	/* CONFIG_X86_PAE */
-
 	void (*set_pud)(pud_t *pudp, pud_t pudval);
 
 	struct paravirt_callee_save pmd_val;
 	struct paravirt_callee_save make_pmd;
 
-#if CONFIG_PGTABLE_LEVELS >= 4
 	struct paravirt_callee_save pud_val;
 	struct paravirt_callee_save make_pud;
 
 	void (*set_p4d)(p4d_t *p4dp, p4d_t p4dval);
 
-#if CONFIG_PGTABLE_LEVELS >= 5
 	struct paravirt_callee_save p4d_val;
 	struct paravirt_callee_save make_p4d;
 
 	void (*set_pgd)(pgd_t *pgdp, pgd_t pgdval);
-#endif	/* CONFIG_PGTABLE_LEVELS >= 5 */
-
-#endif	/* CONFIG_PGTABLE_LEVELS >= 4 */
-
-#endif	/* CONFIG_PGTABLE_LEVELS >= 3 */
 
 	struct pv_lazy_ops lazy_mode;
 
@@ -328,8 +226,6 @@ struct pv_lock_ops {
  * number for each function using the offset which we use to indicate
  * what to patch. */
 struct paravirt_patch_template {
-	struct pv_init_ops	init;
-	struct pv_time_ops	time;
 	struct pv_cpu_ops	cpu;
 	struct pv_irq_ops	irq;
 	struct pv_mmu_ops	mmu;
@@ -339,60 +235,22 @@ struct paravirt_patch_template {
 extern struct pv_info pv_info;
 extern struct paravirt_patch_template pv_ops;
 
-#define PARAVIRT_PATCH(x)					\
-	(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
-
-#define paravirt_type(op)				\
-	[paravirt_typenum] "i" (PARAVIRT_PATCH(op)),	\
-	[paravirt_opptr] "i" (&(pv_ops.op))
-#define paravirt_clobber(clobber)		\
-	[paravirt_clobber] "i" (clobber)
-
-/*
- * Generate some code, and mark it as patchable by the
- * apply_paravirt() alternate instruction patcher.
- */
-#define _paravirt_alt(insn_string, type, clobber)	\
-	"771:\n\t" insn_string "\n" "772:\n"		\
-	".pushsection .parainstructions,\"a\"\n"	\
-	_ASM_ALIGN "\n"					\
-	_ASM_PTR " 771b\n"				\
-	"  .byte " type "\n"				\
-	"  .byte 772b-771b\n"				\
-	"  .short " clobber "\n"			\
-	".popsection\n"
-
-/* Generate patchable code, with the default asm parameters. */
-#define paravirt_alt(insn_string)					\
-	_paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
-
-/* Simple instruction patching code. */
-#define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
-
-#define DEF_NATIVE(ops, name, code)					\
-	__visible extern const char start_##ops##_##name[], end_##ops##_##name[];	\
-	asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name))
-
-unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
-unsigned paravirt_patch_default(u8 type, void *insnbuf,
-				unsigned long addr, unsigned len);
-
-unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
-			      const char *start, const char *end);
-
-unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len);
-
-int paravirt_disable_iospace(void);
+#define paravirt_ptr(op)	[paravirt_opptr] "m" (pv_ops.op)
 
 /*
  * This generates an indirect call based on the operation type number.
- * The type number, computed in PARAVIRT_PATCH, is derived from the
- * offset into the paravirt_patch_template structure, and can therefore be
- * freely converted back into a structure offset.
+ *
+ * Since alternatives run after enabling CET/IBT -- the latter setting/clearing
+ * capabilities and the former requiring all capabilities being finalized --
+ * these indirect calls are subject to IBT and the paravirt stubs should have
+ * ENDBR on.
+ *
+ * OTOH since this is effectively a __nocfi indirect call, the paravirt stubs
+ * don't need to bother with CFI prefixes.
  */
 #define PARAVIRT_CALL					\
-	ANNOTATE_RETPOLINE_SAFE				\
-	"call *%c[paravirt_opptr];"
+	ANNOTATE_RETPOLINE_SAFE "\n\t"			\
+	"call *%[paravirt_opptr];"
 
 /*
  * These macros are intended to wrap calls through one of the paravirt
@@ -405,7 +263,7 @@ int paravirt_disable_iospace(void);
  * Unfortunately, this is a relatively slow operation for modern CPUs,
  * because it cannot necessarily determine what the destination
  * address is.  In this case, the address is a runtime constant, so at
- * the very least we can patch the call to e a simple direct call, or
+ * the very least we can patch the call to a simple direct call, or,
  * ideally, patch an inline implementation into the callsite.  (Direct
  * calls are essentially free, because the call and return addresses
  * are completely predictable.)
@@ -416,18 +274,12 @@ int paravirt_disable_iospace(void);
  * on the stack.  All caller-save registers (eax,edx,ecx) are expected
  * to be modified (either clobbered or used for return values).
  * X86_64, on the other hand, already specifies a register-based calling
- * conventions, returning at %rax, with parameteres going on %rdi, %rsi,
+ * conventions, returning at %rax, with parameters going in %rdi, %rsi,
  * %rdx, and %rcx. Note that for this reason, x86_64 does not need any
  * special handling for dealing with 4 arguments, unlike i386.
- * However, x86_64 also have to clobber all caller saved registers, which
+ * However, x86_64 also has to clobber all caller saved registers, which
  * unfortunately, are quite a bit (r8 - r11)
  *
- * The call instruction itself is marked by placing its start address
- * and size into the .parainstructions section, so that
- * apply_paravirt() in arch/i386/kernel/alternative.c can do the
- * appropriate patching under the control of the backend pv_init_ops
- * implementation.
- *
  * Unfortunately there's no way to get gcc to generate the args setup
  * for the call, and then allow the call itself to be generated by an
  * inline asm.  Because of this, we must do the complete arg setup and
@@ -437,33 +289,31 @@ int paravirt_disable_iospace(void);
  * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments.
  * It could be extended to more arguments, but there would be little
  * to be gained from that.  For each number of arguments, there are
- * the two VCALL and CALL variants for void and non-void functions.
+ * two VCALL and CALL variants for void and non-void functions.
  *
  * When there is a return value, the invoker of the macro must specify
  * the return type.  The macro then uses sizeof() on that type to
- * determine whether its a 32 or 64 bit value, and places the return
+ * determine whether it's a 32 or 64 bit value and places the return
  * in the right register(s) (just %eax for 32-bit, and %edx:%eax for
- * 64-bit). For x86_64 machines, it just returns at %rax regardless of
+ * 64-bit). For x86_64 machines, it just returns in %rax regardless of
  * the return value size.
  *
- * 64-bit arguments are passed as a pair of adjacent 32-bit arguments
+ * 64-bit arguments are passed as a pair of adjacent 32-bit arguments;
  * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments
  * in low,high order
  *
  * Small structures are passed and returned in registers.  The macro
  * calling convention can't directly deal with this, so the wrapper
- * functions must do this.
+ * functions must do it.
  *
  * These PVOP_* macros are only defined within this header.  This
  * means that all uses must be wrapped in inline functions.  This also
  * makes sure the incoming and outgoing types are always correct.
  */
 #ifdef CONFIG_X86_32
-#define PVOP_VCALL_ARGS							\
+#define PVOP_CALL_ARGS							\
 	unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;
 
-#define PVOP_CALL_ARGS			PVOP_VCALL_ARGS
-
 #define PVOP_CALL_ARG1(x)		"a" ((unsigned long)(x))
 #define PVOP_CALL_ARG2(x)		"d" ((unsigned long)(x))
 #define PVOP_CALL_ARG3(x)		"c" ((unsigned long)(x))
@@ -479,12 +329,10 @@ int paravirt_disable_iospace(void);
 #define VEXTRA_CLOBBERS
 #else  /* CONFIG_X86_64 */
 /* [re]ax isn't an arg, but the return val */
-#define PVOP_VCALL_ARGS						\
+#define PVOP_CALL_ARGS						\
 	unsigned long __edi = __edi, __esi = __esi,		\
 		__edx = __edx, __ecx = __ecx, __eax = __eax;
 
-#define PVOP_CALL_ARGS		PVOP_VCALL_ARGS
-
 #define PVOP_CALL_ARG1(x)		"D" ((unsigned long)(x))
 #define PVOP_CALL_ARG2(x)		"S" ((unsigned long)(x))
 #define PVOP_CALL_ARG3(x)		"d" ((unsigned long)(x))
@@ -495,8 +343,17 @@ int paravirt_disable_iospace(void);
 				"=c" (__ecx)
 #define PVOP_CALL_CLOBBERS	PVOP_VCALL_CLOBBERS, "=a" (__eax)
 
-/* void functions are still allowed [re]ax for scratch */
+/*
+ * void functions are still allowed [re]ax for scratch.
+ *
+ * The ZERO_CALL_USED REGS feature may end up zeroing out callee-saved
+ * registers. Make sure we model this with the appropriate clobbers.
+ */
+#ifdef CONFIG_ZERO_CALL_USED_REGS
+#define PVOP_VCALLEE_CLOBBERS	"=a" (__eax), PVOP_VCALL_CLOBBERS
+#else
 #define PVOP_VCALLEE_CLOBBERS	"=a" (__eax)
+#endif
 #define PVOP_CALLEE_CLOBBERS	PVOP_VCALLEE_CLOBBERS
 
 #define EXTRA_CLOBBERS	 , "r8", "r9", "r10", "r11"
@@ -509,183 +366,165 @@ int paravirt_disable_iospace(void);
 #define PVOP_TEST_NULL(op)	((void)pv_ops.op)
 #endif
 
-#define PVOP_RETMASK(rettype)						\
+#define PVOP_RETVAL(rettype)						\
 	({	unsigned long __mask = ~0UL;				\
+		BUILD_BUG_ON(sizeof(rettype) > sizeof(unsigned long));	\
 		switch (sizeof(rettype)) {				\
 		case 1: __mask =       0xffUL; break;			\
 		case 2: __mask =     0xffffUL; break;			\
 		case 4: __mask = 0xffffffffUL; break;			\
 		default: break;						\
 		}							\
-		__mask;							\
+		__mask & __eax;						\
 	})
 
-
-#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr,		\
-		      pre, post, ...)					\
+/*
+ * Use alternative patching for paravirt calls:
+ * - For replacing an indirect call with a direct one, use the "normal"
+ *   ALTERNATIVE() macro with the indirect call as the initial code sequence,
+ *   which will be replaced with the related direct call by using the
+ *   ALT_FLAG_DIRECT_CALL special case and the "always on" feature.
+ * - In case the replacement is either a direct call or a short code sequence
+ *   depending on a feature bit, the ALTERNATIVE_2() macro is being used.
+ *   The indirect call is the initial code sequence again, while the special
+ *   code sequence is selected with the specified feature bit. In case the
+ *   feature is not active, the direct call is used as above via the
+ *   ALT_FLAG_DIRECT_CALL special case and the "always on" feature.
+ */
+#define ____PVOP_CALL(ret, op, call_clbr, extra_clbr, ...)	\
 	({								\
-		rettype __ret;						\
 		PVOP_CALL_ARGS;						\
 		PVOP_TEST_NULL(op);					\
-		/* This is 32-bit specific, but is okay in 64-bit */	\
-		/* since this condition will never hold */		\
-		if (sizeof(rettype) > sizeof(unsigned long)) {		\
-			asm volatile(pre				\
-				     paravirt_alt(PARAVIRT_CALL)	\
-				     post				\
-				     : call_clbr, ASM_CALL_CONSTRAINT	\
-				     : paravirt_type(op),		\
-				       paravirt_clobber(clbr),		\
-				       ##__VA_ARGS__			\
-				     : "memory", "cc" extra_clbr);	\
-			__ret = (rettype)((((u64)__edx) << 32) | __eax); \
-		} else {						\
-			asm volatile(pre				\
-				     paravirt_alt(PARAVIRT_CALL)	\
-				     post				\
-				     : call_clbr, ASM_CALL_CONSTRAINT	\
-				     : paravirt_type(op),		\
-				       paravirt_clobber(clbr),		\
-				       ##__VA_ARGS__			\
-				     : "memory", "cc" extra_clbr);	\
-			__ret = (rettype)(__eax & PVOP_RETMASK(rettype));	\
-		}							\
-		__ret;							\
+		asm volatile(ALTERNATIVE(PARAVIRT_CALL, ALT_CALL_INSTR,	\
+				ALT_CALL_ALWAYS)			\
+			     : call_clbr, ASM_CALL_CONSTRAINT		\
+			     : paravirt_ptr(op),			\
+			       ##__VA_ARGS__				\
+			     : "memory", "cc" extra_clbr);		\
+		ret;							\
 	})
 
-#define __PVOP_CALL(rettype, op, pre, post, ...)			\
-	____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS,	\
-		      EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__)
-
-#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...)			\
-	____PVOP_CALL(rettype, op.func, CLBR_RET_REG,			\
-		      PVOP_CALLEE_CLOBBERS, ,				\
-		      pre, post, ##__VA_ARGS__)
-
-
-#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...)	\
+#define ____PVOP_ALT_CALL(ret, op, alt, cond, call_clbr,		\
+			  extra_clbr, ...)				\
 	({								\
-		PVOP_VCALL_ARGS;					\
+		PVOP_CALL_ARGS;						\
 		PVOP_TEST_NULL(op);					\
-		asm volatile(pre					\
-			     paravirt_alt(PARAVIRT_CALL)		\
-			     post					\
+		asm volatile(ALTERNATIVE_2(PARAVIRT_CALL,		\
+				 ALT_CALL_INSTR, ALT_CALL_ALWAYS,	\
+				 alt, cond)				\
 			     : call_clbr, ASM_CALL_CONSTRAINT		\
-			     : paravirt_type(op),			\
-			       paravirt_clobber(clbr),			\
+			     : paravirt_ptr(op),			\
 			       ##__VA_ARGS__				\
 			     : "memory", "cc" extra_clbr);		\
+		ret;							\
 	})
 
-#define __PVOP_VCALL(op, pre, post, ...)				\
-	____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS,		\
-		       VEXTRA_CLOBBERS,					\
-		       pre, post, ##__VA_ARGS__)
+#define __PVOP_CALL(rettype, op, ...)					\
+	____PVOP_CALL(PVOP_RETVAL(rettype), op,				\
+		      PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, ##__VA_ARGS__)
 
-#define __PVOP_VCALLEESAVE(op, pre, post, ...)				\
-	____PVOP_VCALL(op.func, CLBR_RET_REG,				\
-		      PVOP_VCALLEE_CLOBBERS, ,				\
-		      pre, post, ##__VA_ARGS__)
+#define __PVOP_ALT_CALL(rettype, op, alt, cond, ...)			\
+	____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op, alt, cond,		\
+			  PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS,		\
+			  ##__VA_ARGS__)
 
+#define __PVOP_CALLEESAVE(rettype, op, ...)				\
+	____PVOP_CALL(PVOP_RETVAL(rettype), op.func,			\
+		      PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
+
+#define __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, ...)		\
+	____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op.func, alt, cond,	\
+			  PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
+
+
+#define __PVOP_VCALL(op, ...)						\
+	(void)____PVOP_CALL(, op, PVOP_VCALL_CLOBBERS,			\
+		       VEXTRA_CLOBBERS, ##__VA_ARGS__)
+
+#define __PVOP_ALT_VCALL(op, alt, cond, ...)				\
+	(void)____PVOP_ALT_CALL(, op, alt, cond,			\
+				PVOP_VCALL_CLOBBERS, VEXTRA_CLOBBERS,	\
+				##__VA_ARGS__)
+
+#define __PVOP_VCALLEESAVE(op, ...)					\
+	(void)____PVOP_CALL(, op.func,					\
+			    PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
+
+#define __PVOP_ALT_VCALLEESAVE(op, alt, cond, ...)			\
+	(void)____PVOP_ALT_CALL(, op.func, alt, cond,			\
+				PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
 
 
 #define PVOP_CALL0(rettype, op)						\
-	__PVOP_CALL(rettype, op, "", "")
+	__PVOP_CALL(rettype, op)
 #define PVOP_VCALL0(op)							\
-	__PVOP_VCALL(op, "", "")
+	__PVOP_VCALL(op)
+#define PVOP_ALT_CALL0(rettype, op, alt, cond)				\
+	__PVOP_ALT_CALL(rettype, op, alt, cond)
+#define PVOP_ALT_VCALL0(op, alt, cond)					\
+	__PVOP_ALT_VCALL(op, alt, cond)
 
 #define PVOP_CALLEE0(rettype, op)					\
-	__PVOP_CALLEESAVE(rettype, op, "", "")
+	__PVOP_CALLEESAVE(rettype, op)
 #define PVOP_VCALLEE0(op)						\
-	__PVOP_VCALLEESAVE(op, "", "")
+	__PVOP_VCALLEESAVE(op)
+#define PVOP_ALT_CALLEE0(rettype, op, alt, cond)			\
+	__PVOP_ALT_CALLEESAVE(rettype, op, alt, cond)
+#define PVOP_ALT_VCALLEE0(op, alt, cond)				\
+	__PVOP_ALT_VCALLEESAVE(op, alt, cond)
 
 
 #define PVOP_CALL1(rettype, op, arg1)					\
-	__PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
+	__PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1))
 #define PVOP_VCALL1(op, arg1)						\
-	__PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1))
+	__PVOP_VCALL(op, PVOP_CALL_ARG1(arg1))
+#define PVOP_ALT_VCALL1(op, arg1, alt, cond)				\
+	__PVOP_ALT_VCALL(op, alt, cond, PVOP_CALL_ARG1(arg1))
 
 #define PVOP_CALLEE1(rettype, op, arg1)					\
-	__PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
+	__PVOP_CALLEESAVE(rettype, op, PVOP_CALL_ARG1(arg1))
 #define PVOP_VCALLEE1(op, arg1)						\
-	__PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1))
+	__PVOP_VCALLEESAVE(op, PVOP_CALL_ARG1(arg1))
+#define PVOP_ALT_CALLEE1(rettype, op, arg1, alt, cond)			\
+	__PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, PVOP_CALL_ARG1(arg1))
+#define PVOP_ALT_VCALLEE1(op, arg1, alt, cond)				\
+	__PVOP_ALT_VCALLEESAVE(op, alt, cond, PVOP_CALL_ARG1(arg1))
 
 
 #define PVOP_CALL2(rettype, op, arg1, arg2)				\
-	__PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1),		\
-		    PVOP_CALL_ARG2(arg2))
+	__PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2))
 #define PVOP_VCALL2(op, arg1, arg2)					\
-	__PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1),			\
-		     PVOP_CALL_ARG2(arg2))
-
-#define PVOP_CALLEE2(rettype, op, arg1, arg2)				\
-	__PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1),	\
-			  PVOP_CALL_ARG2(arg2))
-#define PVOP_VCALLEE2(op, arg1, arg2)					\
-	__PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1),		\
-			   PVOP_CALL_ARG2(arg2))
-
+	__PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2))
 
 #define PVOP_CALL3(rettype, op, arg1, arg2, arg3)			\
-	__PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1),		\
+	__PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1),			\
 		    PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
 #define PVOP_VCALL3(op, arg1, arg2, arg3)				\
-	__PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1),			\
+	__PVOP_VCALL(op, PVOP_CALL_ARG1(arg1),				\
 		     PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
 
-/* This is the only difference in x86_64. We can make it much simpler */
-#ifdef CONFIG_X86_32
 #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4)			\
 	__PVOP_CALL(rettype, op,					\
-		    "push %[_arg4];", "lea 4(%%esp),%%esp;",		\
-		    PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),		\
-		    PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4)))
-#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4)				\
-	__PVOP_VCALL(op,						\
-		    "push %[_arg4];", "lea 4(%%esp),%%esp;",		\
-		    "0" ((u32)(arg1)), "1" ((u32)(arg2)),		\
-		    "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
-#else
-#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4)			\
-	__PVOP_CALL(rettype, op, "", "",				\
 		    PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),		\
 		    PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
 #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4)				\
-	__PVOP_VCALL(op, "", "",					\
-		     PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),	\
+	__PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),	\
 		     PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
-#endif
-
-/* Lazy mode for batching updates / context switch */
-enum paravirt_lazy_mode {
-	PARAVIRT_LAZY_NONE,
-	PARAVIRT_LAZY_MMU,
-	PARAVIRT_LAZY_CPU,
-};
-
-enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
-void paravirt_start_context_switch(struct task_struct *prev);
-void paravirt_end_context_switch(struct task_struct *next);
 
-void paravirt_enter_lazy_mmu(void);
-void paravirt_leave_lazy_mmu(void);
-void paravirt_flush_lazy_mmu(void);
-
-void _paravirt_nop(void);
+unsigned long paravirt_ret0(void);
+#ifdef CONFIG_PARAVIRT_XXL
 u64 _paravirt_ident_64(u64);
+unsigned long pv_native_save_fl(void);
+void pv_native_irq_disable(void);
+void pv_native_irq_enable(void);
+unsigned long pv_native_read_cr2(void);
+#endif
 
-#define paravirt_nop	((void *)_paravirt_nop)
-
-/* These all sit in the .parainstructions section to tell us what to patch. */
-struct paravirt_patch_site {
-	u8 *instr; 		/* original instructions */
-	u8 instrtype;		/* type of this instruction */
-	u8 len;			/* length of original instruction */
-};
+#define paravirt_nop	((void *)nop_func)
 
-extern struct paravirt_patch_site __parainstructions[],
-	__parainstructions_end[];
+#endif	/* __ASSEMBLER__ */
 
-#endif	/* __ASSEMBLY__ */
+#define ALT_NOT_XEN	ALT_NOT(X86_FEATURE_XENPV)
 
+#endif  /* CONFIG_PARAVIRT */
 #endif	/* _ASM_X86_PARAVIRT_TYPES_H */
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
deleted file mode 100644
index 92015c65fa2a..000000000000
--- a/arch/x86/include/asm/pat.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_PAT_H
-#define _ASM_X86_PAT_H
-
-#include <linux/types.h>
-#include <asm/pgtable_types.h>
-
-bool pat_enabled(void);
-void pat_disable(const char *reason);
-extern void pat_init(void);
-extern void init_cache_modes(void);
-
-extern int reserve_memtype(u64 start, u64 end,
-		enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
-extern int free_memtype(u64 start, u64 end);
-
-extern int kernel_map_sync_memtype(u64 base, unsigned long size,
-		enum page_cache_mode pcm);
-
-int io_reserve_memtype(resource_size_t start, resource_size_t end,
-			enum page_cache_mode *pcm);
-
-void io_free_memtype(resource_size_t start, resource_size_t end);
-
-bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn);
-
-#endif /* _ASM_X86_PAT_H */
diff --git a/arch/x86/include/asm/pc-conf-reg.h b/arch/x86/include/asm/pc-conf-reg.h
new file mode 100644
index 000000000000..56bceceacf5f
--- /dev/null
+++ b/arch/x86/include/asm/pc-conf-reg.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support for the configuration register space at port I/O locations
+ * 0x22 and 0x23 variously used by PC architectures, e.g. the MP Spec,
+ * Cyrix CPUs, numerous chipsets.
+ */
+#ifndef _ASM_X86_PC_CONF_REG_H
+#define _ASM_X86_PC_CONF_REG_H
+
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#define PC_CONF_INDEX		0x22
+#define PC_CONF_DATA		0x23
+
+#define PC_CONF_MPS_IMCR	0x70
+
+extern raw_spinlock_t pc_conf_lock;
+
+static inline u8 pc_conf_get(u8 reg)
+{
+	outb(reg, PC_CONF_INDEX);
+	return inb(PC_CONF_DATA);
+}
+
+static inline void pc_conf_set(u8 reg, u8 data)
+{
+	outb(reg, PC_CONF_INDEX);
+	outb(data, PC_CONF_DATA);
+}
+
+#endif /* _ASM_X86_PC_CONF_REG_H */
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 662963681ea6..b3ab80a03365 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -7,11 +7,9 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/scatterlist.h>
+#include <linux/numa.h>
 #include <asm/io.h>
-#include <asm/pat.h>
-#include <asm/x86_init.h>
-
-#ifdef __KERNEL__
+#include <asm/memtype.h>
 
 struct pci_sysdata {
 	int		domain;		/* PCI domain */
@@ -22,11 +20,11 @@ struct pci_sysdata {
 #ifdef CONFIG_X86_64
 	void		*iommu;		/* IOMMU private data */
 #endif
-#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
+#ifdef CONFIG_PCI_MSI
 	void		*fwnode;	/* IRQ domain for MSI assignment */
 #endif
 #if IS_ENABLED(CONFIG_VMD)
-	bool vmd_domain;		/* True if in Intel VMD domain */
+	struct pci_dev	*vmd_dev;	/* VMD Device if in Intel VMD domain */
 #endif
 };
 
@@ -34,14 +32,17 @@ extern int pci_routeirq;
 extern int noioapicquirk;
 extern int noioapicreroute;
 
+static inline struct pci_sysdata *to_pci_sysdata(const struct pci_bus *bus)
+{
+	return bus->sysdata;
+}
+
 #ifdef CONFIG_PCI
 
 #ifdef CONFIG_PCI_DOMAINS
 static inline int pci_domain_nr(struct pci_bus *bus)
 {
-	struct pci_sysdata *sd = bus->sysdata;
-
-	return sd->domain;
+	return to_pci_sysdata(bus)->domain;
 }
 
 static inline int pci_proc_domain(struct pci_bus *bus)
@@ -50,27 +51,23 @@ static inline int pci_proc_domain(struct pci_bus *bus)
 }
 #endif
 
-#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
+#ifdef CONFIG_PCI_MSI
 static inline void *_pci_root_bus_fwnode(struct pci_bus *bus)
 {
-	struct pci_sysdata *sd = bus->sysdata;
-
-	return sd->fwnode;
+	return to_pci_sysdata(bus)->fwnode;
 }
 
 #define pci_root_bus_fwnode	_pci_root_bus_fwnode
 #endif
 
+#if IS_ENABLED(CONFIG_VMD)
 static inline bool is_vmd(struct pci_bus *bus)
 {
-#if IS_ENABLED(CONFIG_VMD)
-	struct pci_sysdata *sd = bus->sysdata;
-
-	return sd->vmd_domain;
-#else
-	return false;
-#endif
+	return to_pci_sysdata(bus)->vmd_dev != NULL;
 }
+#else
+#define is_vmd(bus)		false
+#endif /* CONFIG_VMD */
 
 /* Can be used to override the logic in pci_scan_bus for skipping
    already-configured bus numbers - to be used for buggy BIOSes
@@ -94,6 +91,7 @@ void pcibios_scan_root(int bus);
 struct irq_routing_table *pcibios_get_irq_routing_table(void);
 int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
 
+bool pci_dev_has_default_msi_parent_domain(struct pci_dev *dev);
 
 #define HAVE_PCI_MMAP
 #define arch_can_pci_mmap_wc()	pat_enabled()
@@ -107,32 +105,11 @@ static inline void early_quirks(void) { }
 
 extern void pci_iommu_alloc(void);
 
-#ifdef CONFIG_PCI_MSI
-/* implemented in arch/x86/kernel/apic/io_apic. */
-struct msi_desc;
-int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
-void native_teardown_msi_irq(unsigned int irq);
-void native_restore_msi_irqs(struct pci_dev *dev);
-#else
-#define native_setup_msi_irqs		NULL
-#define native_teardown_msi_irq		NULL
-#endif
-#endif  /* __KERNEL__ */
-
-#ifdef CONFIG_X86_64
-#include <asm/pci_64.h>
-#endif
-
-/* generic pci stuff */
-#include <asm-generic/pci.h>
-
 #ifdef CONFIG_NUMA
 /* Returns the node based on pci bus */
 static inline int __pcibus_to_node(const struct pci_bus *bus)
 {
-	const struct pci_sysdata *sd = bus->sysdata;
-
-	return sd->node;
+	return to_pci_sysdata(bus)->node;
 }
 
 static inline const struct cpumask *
@@ -141,21 +118,9 @@ cpumask_of_pcibus(const struct pci_bus *bus)
 	int node;
 
 	node = __pcibus_to_node(bus);
-	return (node == -1) ? cpu_online_mask :
+	return (node == NUMA_NO_NODE) ? cpu_online_mask :
 			      cpumask_of_node(node);
 }
 #endif
 
-struct pci_setup_rom {
-	struct setup_data data;
-	uint16_t vendor;
-	uint16_t devid;
-	uint64_t pcilen;
-	unsigned long segment;
-	unsigned long bus;
-	unsigned long device;
-	unsigned long function;
-	uint8_t romdata[0];
-};
-
 #endif /* _ASM_X86_PCI_H */
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h
deleted file mode 100644
index f5411de0ae11..000000000000
--- a/arch/x86/include/asm/pci_64.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_PCI_64_H
-#define _ASM_X86_PCI_64_H
-
-#ifdef __KERNEL__
-
-#ifdef CONFIG_CALGARY_IOMMU
-static inline void *pci_iommu(struct pci_bus *bus)
-{
-	struct pci_sysdata *sd = bus->sysdata;
-	return sd->iommu;
-}
-
-static inline void set_pci_iommu(struct pci_bus *bus, void *val)
-{
-	struct pci_sysdata *sd = bus->sysdata;
-	sd->iommu = val;
-}
-#endif /* CONFIG_CALGARY_IOMMU */
-
-extern int (*pci_config_read)(int seg, int bus, int dev, int fn,
-			      int reg, int len, u32 *value);
-extern int (*pci_config_write)(int seg, int bus, int dev, int fn,
-			       int reg, int len, u32 value);
-
-#endif /* __KERNEL__ */
-
-#endif /* _ASM_X86_PCI_64_H */
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 73bb404f4d2a..70533fdcbf02 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -5,7 +5,10 @@
  *	(c) 1999 Martin Mares <mj@ucw.cz>
  */
 
+#include <linux/errno.h>
+#include <linux/init.h>
 #include <linux/ioport.h>
+#include <linux/spinlock.h>
 
 #undef DEBUG
 
@@ -39,6 +42,8 @@ do {						\
 #define PCI_ROOT_NO_CRS		0x100000
 #define PCI_NOASSIGN_BARS	0x200000
 #define PCI_BIG_ROOT_WINDOW	0x400000
+#define PCI_USE_E820		0x800000
+#define PCI_NO_E820		0x1000000
 
 extern unsigned int pci_probe;
 extern unsigned long pirq_table_addr;
@@ -64,6 +69,8 @@ void pcibios_scan_specific_bus(int busn);
 
 /* pci-irq.c */
 
+struct pci_dev;
+
 struct irq_info {
 	u8 bus, devfn;			/* Bus, device and function */
 	struct {
@@ -87,7 +94,16 @@ struct irq_routing_table {
 	u32 miniport_data;		/* Crap */
 	u8 rfu[11];
 	u8 checksum;			/* Modulo 256 checksum must give 0 */
-	struct irq_info slots[0];
+	struct irq_info slots[];
+} __attribute__((packed));
+
+struct irt_routing_table {
+	u32 signature;			/* IRT_SIGNATURE should be here */
+	u8 size;			/* Number of entries provided */
+	u8 used;			/* Number of entries actually used */
+	u16 exclusive_irqs;		/* IRQs devoted exclusively to
+					   PCI usage */
+	struct irq_info slots[];
 } __attribute__((packed));
 
 extern unsigned int pcibios_irq_mask;
@@ -114,9 +130,20 @@ extern const struct pci_raw_ops pci_direct_conf1;
 extern bool port_cf9_safe;
 
 /* arch_initcall level */
+#ifdef CONFIG_PCI_DIRECT
 extern int pci_direct_probe(void);
 extern void pci_direct_init(int type);
+#else
+static inline int pci_direct_probe(void) { return -1; }
+static inline  void pci_direct_init(int type) { }
+#endif
+
+#ifdef CONFIG_PCI_BIOS
 extern void pci_pcbios_init(void);
+#else
+static inline void pci_pcbios_init(void) { }
+#endif
+
 extern void __init dmi_check_pciprobe(void);
 extern void __init dmi_check_skip_isa_align(void);
 
@@ -221,3 +248,9 @@ static inline void mmio_config_writel(void __iomem *pos, u32 val)
 # define x86_default_pci_init_irq	NULL
 # define x86_default_pci_fixup_irqs	NULL
 #endif
+
+#if defined(CONFIG_PCI) && defined(CONFIG_ACPI)
+extern bool pci_use_e820;
+#else
+#define pci_use_e820 false
+#endif
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 1a19d11cfbbd..725d0eff7acd 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -3,550 +3,604 @@
 #define _ASM_X86_PERCPU_H
 
 #ifdef CONFIG_X86_64
-#define __percpu_seg		gs
-#define __percpu_mov_op		movq
+# define __percpu_seg		gs
+# define __percpu_rel		(%rip)
 #else
-#define __percpu_seg		fs
-#define __percpu_mov_op		movl
+# define __percpu_seg		fs
+# define __percpu_rel
 #endif
 
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
 
-/*
- * PER_CPU finds an address of a per-cpu variable.
- *
- * Args:
- *    var - variable name
- *    reg - 32bit register
- *
- * The resulting address is stored in the "reg" argument.
- *
- * Example:
- *    PER_CPU(cpu_gdt_descr, %ebx)
- */
 #ifdef CONFIG_SMP
-#define PER_CPU(var, reg)						\
-	__percpu_mov_op %__percpu_seg:this_cpu_off, reg;		\
-	lea var(reg), reg
-#define PER_CPU_VAR(var)	%__percpu_seg:var
-#else /* ! SMP */
-#define PER_CPU(var, reg)	__percpu_mov_op $var, reg
-#define PER_CPU_VAR(var)	var
-#endif	/* SMP */
-
-#ifdef CONFIG_X86_64_SMP
-#define INIT_PER_CPU_VAR(var)  init_per_cpu__##var
+# define __percpu		%__percpu_seg:
 #else
-#define INIT_PER_CPU_VAR(var)  var
+# define __percpu
 #endif
 
-#else /* ...!ASSEMBLY */
+#define PER_CPU_VAR(var)	__percpu(var)__percpu_rel
 
-#include <linux/kernel.h>
+#else /* !__ASSEMBLY__: */
+
+#include <linux/args.h>
+#include <linux/bits.h>
+#include <linux/build_bug.h>
 #include <linux/stringify.h>
+#include <asm/asm.h>
 
 #ifdef CONFIG_SMP
-#define __percpu_prefix		"%%"__stringify(__percpu_seg)":"
-#define __my_cpu_offset		this_cpu_read(this_cpu_off)
+
+#define __force_percpu_prefix	"%%"__stringify(__percpu_seg)":"
+
+#ifdef CONFIG_CC_HAS_NAMED_AS
+
+#ifdef __CHECKER__
+# define __seg_gs		__attribute__((address_space(__seg_gs)))
+# define __seg_fs		__attribute__((address_space(__seg_fs)))
+#endif
+
+#define __percpu_prefix
+#define __percpu_seg_override	CONCATENATE(__seg_, __percpu_seg)
+
+#else /* !CONFIG_CC_HAS_NAMED_AS: */
+
+#define __percpu_prefix		__force_percpu_prefix
+#define __percpu_seg_override
+
+#endif /* CONFIG_CC_HAS_NAMED_AS */
 
 /*
  * Compared to the generic __my_cpu_offset version, the following
  * saves one instruction and avoids clobbering a temp register.
  */
-#define arch_raw_cpu_ptr(ptr)				\
-({							\
-	unsigned long tcp_ptr__;			\
-	asm volatile("add " __percpu_arg(1) ", %0"	\
-		     : "=r" (tcp_ptr__)			\
-		     : "m" (this_cpu_off), "0" (ptr));	\
-	(typeof(*(ptr)) __kernel __force *)tcp_ptr__;	\
+#define __my_cpu_offset		this_cpu_read(this_cpu_off)
+
+/*
+ * arch_raw_cpu_ptr should not be used in 32-bit VDSO for a 64-bit
+ * kernel, because games are played with CONFIG_X86_64 there and
+ * sizeof(this_cpu_off) becames 4.
+ */
+#ifndef BUILD_VDSO32_64
+#define arch_raw_cpu_ptr(_ptr)						\
+({									\
+	unsigned long tcp_ptr__ = raw_cpu_read_long(this_cpu_off);	\
+									\
+	tcp_ptr__ += (__force unsigned long)(_ptr);			\
+	(TYPEOF_UNQUAL(*(_ptr)) __force __kernel *)tcp_ptr__;		\
 })
 #else
-#define __percpu_prefix		""
+#define arch_raw_cpu_ptr(_ptr)						\
+({									\
+	BUILD_BUG();							\
+	(TYPEOF_UNQUAL(*(_ptr)) __force __kernel *)0;			\
+})
 #endif
 
+#define PER_CPU_VAR(var)	%__percpu_seg:(var)__percpu_rel
+
+#else /* !CONFIG_SMP: */
+
+#define __force_percpu_prefix
+#define __percpu_prefix
+#define __percpu_seg_override
+
+#define PER_CPU_VAR(var)	(var)__percpu_rel
+
+#endif /* CONFIG_SMP */
+
+#if defined(CONFIG_USE_X86_SEG_SUPPORT) && defined(USE_TYPEOF_UNQUAL)
+# define __my_cpu_type(var)	typeof(var)
+# define __my_cpu_ptr(ptr)	(ptr)
+# define __my_cpu_var(var)	(var)
+
+# define __percpu_qual		__percpu_seg_override
+#else
+# define __my_cpu_type(var)	typeof(var) __percpu_seg_override
+# define __my_cpu_ptr(ptr)	(__my_cpu_type(*(ptr))*)(__force uintptr_t)(ptr)
+# define __my_cpu_var(var)	(*__my_cpu_ptr(&(var)))
+#endif
+
+#define __force_percpu_arg(x)	__force_percpu_prefix "%" #x
 #define __percpu_arg(x)		__percpu_prefix "%" #x
 
 /*
- * Initialized pointers to per-cpu variables needed for the boot
- * processor need to use these macros to get the proper address
- * offset from __per_cpu_load on SMP.
- *
- * There also must be an entry in vmlinux_64.lds.S
+ * For arch-specific code, we can use direct single-insn ops (they
+ * don't give an lvalue though).
  */
-#define DECLARE_INIT_PER_CPU(var) \
-       extern typeof(var) init_per_cpu_var(var)
 
-#ifdef CONFIG_X86_64_SMP
-#define init_per_cpu_var(var)  init_per_cpu__##var
-#else
-#define init_per_cpu_var(var)  var
-#endif
+#define __pcpu_type_1		u8
+#define __pcpu_type_2		u16
+#define __pcpu_type_4		u32
+#define __pcpu_type_8		u64
 
-/* For arch-specific code, we can use direct single-insn ops (they
- * don't give an lvalue though). */
-extern void __bad_percpu_size(void);
-
-#define percpu_to_op(op, var, val)			\
-do {							\
-	typedef typeof(var) pto_T__;			\
-	if (0) {					\
-		pto_T__ pto_tmp__;			\
-		pto_tmp__ = (val);			\
-		(void)pto_tmp__;			\
-	}						\
-	switch (sizeof(var)) {				\
-	case 1:						\
-		asm(op "b %1,"__percpu_arg(0)		\
-		    : "+m" (var)			\
-		    : "qi" ((pto_T__)(val)));		\
-		break;					\
-	case 2:						\
-		asm(op "w %1,"__percpu_arg(0)		\
-		    : "+m" (var)			\
-		    : "ri" ((pto_T__)(val)));		\
-		break;					\
-	case 4:						\
-		asm(op "l %1,"__percpu_arg(0)		\
-		    : "+m" (var)			\
-		    : "ri" ((pto_T__)(val)));		\
-		break;					\
-	case 8:						\
-		asm(op "q %1,"__percpu_arg(0)		\
-		    : "+m" (var)			\
-		    : "re" ((pto_T__)(val)));		\
-		break;					\
-	default: __bad_percpu_size();			\
-	}						\
+#define __pcpu_cast_1(val)	((u8)(((unsigned long) val) & 0xff))
+#define __pcpu_cast_2(val)	((u16)(((unsigned long) val) & 0xffff))
+#define __pcpu_cast_4(val)	((u32)(((unsigned long) val) & 0xffffffff))
+#define __pcpu_cast_8(val)	((u64)(val))
+
+#define __pcpu_op_1(op)		op "b "
+#define __pcpu_op_2(op)		op "w "
+#define __pcpu_op_4(op)		op "l "
+#define __pcpu_op_8(op)		op "q "
+
+#define __pcpu_reg_1(mod, x)	mod "q" (x)
+#define __pcpu_reg_2(mod, x)	mod "r" (x)
+#define __pcpu_reg_4(mod, x)	mod "r" (x)
+#define __pcpu_reg_8(mod, x)	mod "r" (x)
+
+#define __pcpu_reg_imm_1(x)	"qi" (x)
+#define __pcpu_reg_imm_2(x)	"ri" (x)
+#define __pcpu_reg_imm_4(x)	"ri" (x)
+#define __pcpu_reg_imm_8(x)	"re" (x)
+
+#ifdef CONFIG_USE_X86_SEG_SUPPORT
+
+#define __raw_cpu_read(size, qual, pcp)					\
+({									\
+	*(qual __my_cpu_type(pcp) *)__my_cpu_ptr(&(pcp));		\
+})
+
+#define __raw_cpu_write(size, qual, pcp, val)				\
+do {									\
+	*(qual __my_cpu_type(pcp) *)__my_cpu_ptr(&(pcp)) = (val);	\
+} while (0)
+
+#define __raw_cpu_read_const(pcp)	__raw_cpu_read(, , pcp)
+
+#else /* !CONFIG_USE_X86_SEG_SUPPORT: */
+
+#define __raw_cpu_read(size, qual, _var)				\
+({									\
+	__pcpu_type_##size pfo_val__;					\
+									\
+	asm qual (__pcpu_op_##size("mov")				\
+		  __percpu_arg([var]) ", %[val]"			\
+	    : [val] __pcpu_reg_##size("=", pfo_val__)			\
+	    : [var] "m" (__my_cpu_var(_var)));				\
+									\
+	(typeof(_var))(unsigned long) pfo_val__;			\
+})
+
+#define __raw_cpu_write(size, qual, _var, _val)				\
+do {									\
+	__pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val);	\
+									\
+	if (0) {		                                        \
+		TYPEOF_UNQUAL(_var) pto_tmp__;				\
+		pto_tmp__ = (_val);					\
+		(void)pto_tmp__;					\
+	}								\
+	asm qual (__pcpu_op_##size("mov") "%[val], "			\
+		  __percpu_arg([var])					\
+	    : [var] "=m" (__my_cpu_var(_var))				\
+	    : [val] __pcpu_reg_imm_##size(pto_val__));			\
 } while (0)
 
 /*
- * Generate a percpu add to memory instruction and optimize code
+ * The generic per-CPU infrastrucutre is not suitable for
+ * reading const-qualified variables.
+ */
+#define __raw_cpu_read_const(pcp)	({ BUILD_BUG(); (typeof(pcp))0; })
+
+#endif /* CONFIG_USE_X86_SEG_SUPPORT */
+
+#define __raw_cpu_read_stable(size, _var)				\
+({									\
+	__pcpu_type_##size pfo_val__;					\
+									\
+	asm(__pcpu_op_##size("mov")					\
+	    __force_percpu_arg(a[var]) ", %[val]"			\
+	    : [val] __pcpu_reg_##size("=", pfo_val__)			\
+	    : [var] "i" (&(_var)));					\
+									\
+	(typeof(_var))(unsigned long) pfo_val__;			\
+})
+
+#define percpu_unary_op(size, qual, op, _var)				\
+({									\
+	asm qual (__pcpu_op_##size(op) __percpu_arg([var])		\
+	    : [var] "+m" (__my_cpu_var(_var)));				\
+})
+
+#define percpu_binary_op(size, qual, op, _var, _val)			\
+do {									\
+	__pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val);	\
+									\
+	if (0) {		                                        \
+		TYPEOF_UNQUAL(_var) pto_tmp__;				\
+		pto_tmp__ = (_val);					\
+		(void)pto_tmp__;					\
+	}								\
+	asm qual (__pcpu_op_##size(op) "%[val], " __percpu_arg([var])	\
+	    : [var] "+m" (__my_cpu_var(_var))				\
+	    : [val] __pcpu_reg_imm_##size(pto_val__));			\
+} while (0)
+
+/*
+ * Generate a per-CPU add to memory instruction and optimize code
  * if one is added or subtracted.
  */
-#define percpu_add_op(var, val)						\
+#define percpu_add_op(size, qual, var, val)				\
 do {									\
-	typedef typeof(var) pao_T__;					\
-	const int pao_ID__ = (__builtin_constant_p(val) &&		\
-			      ((val) == 1 || (val) == -1)) ?		\
-				(int)(val) : 0;				\
+	const int pao_ID__ =						\
+		(__builtin_constant_p(val) &&				\
+			((val) == 1 ||					\
+			 (val) == (typeof(val))-1)) ? (int)(val) : 0;	\
+									\
 	if (0) {							\
-		pao_T__ pao_tmp__;					\
+		TYPEOF_UNQUAL(var) pao_tmp__;				\
 		pao_tmp__ = (val);					\
 		(void)pao_tmp__;					\
 	}								\
-	switch (sizeof(var)) {						\
-	case 1:								\
-		if (pao_ID__ == 1)					\
-			asm("incb "__percpu_arg(0) : "+m" (var));	\
-		else if (pao_ID__ == -1)				\
-			asm("decb "__percpu_arg(0) : "+m" (var));	\
-		else							\
-			asm("addb %1, "__percpu_arg(0)			\
-			    : "+m" (var)				\
-			    : "qi" ((pao_T__)(val)));			\
-		break;							\
-	case 2:								\
-		if (pao_ID__ == 1)					\
-			asm("incw "__percpu_arg(0) : "+m" (var));	\
-		else if (pao_ID__ == -1)				\
-			asm("decw "__percpu_arg(0) : "+m" (var));	\
-		else							\
-			asm("addw %1, "__percpu_arg(0)			\
-			    : "+m" (var)				\
-			    : "ri" ((pao_T__)(val)));			\
-		break;							\
-	case 4:								\
-		if (pao_ID__ == 1)					\
-			asm("incl "__percpu_arg(0) : "+m" (var));	\
-		else if (pao_ID__ == -1)				\
-			asm("decl "__percpu_arg(0) : "+m" (var));	\
-		else							\
-			asm("addl %1, "__percpu_arg(0)			\
-			    : "+m" (var)				\
-			    : "ri" ((pao_T__)(val)));			\
-		break;							\
-	case 8:								\
-		if (pao_ID__ == 1)					\
-			asm("incq "__percpu_arg(0) : "+m" (var));	\
-		else if (pao_ID__ == -1)				\
-			asm("decq "__percpu_arg(0) : "+m" (var));	\
-		else							\
-			asm("addq %1, "__percpu_arg(0)			\
-			    : "+m" (var)				\
-			    : "re" ((pao_T__)(val)));			\
-		break;							\
-	default: __bad_percpu_size();					\
-	}								\
+	if (pao_ID__ == 1)						\
+		percpu_unary_op(size, qual, "inc", var);		\
+	else if (pao_ID__ == -1)					\
+		percpu_unary_op(size, qual, "dec", var);		\
+	else								\
+		percpu_binary_op(size, qual, "add", var, val);		\
 } while (0)
 
-#define percpu_from_op(op, var)				\
-({							\
-	typeof(var) pfo_ret__;				\
-	switch (sizeof(var)) {				\
-	case 1:						\
-		asm volatile(op "b "__percpu_arg(1)",%0"\
-		    : "=q" (pfo_ret__)			\
-		    : "m" (var));			\
-		break;					\
-	case 2:						\
-		asm volatile(op "w "__percpu_arg(1)",%0"\
-		    : "=r" (pfo_ret__)			\
-		    : "m" (var));			\
-		break;					\
-	case 4:						\
-		asm volatile(op "l "__percpu_arg(1)",%0"\
-		    : "=r" (pfo_ret__)			\
-		    : "m" (var));			\
-		break;					\
-	case 8:						\
-		asm volatile(op "q "__percpu_arg(1)",%0"\
-		    : "=r" (pfo_ret__)			\
-		    : "m" (var));			\
-		break;					\
-	default: __bad_percpu_size();			\
-	}						\
-	pfo_ret__;					\
-})
-
-#define percpu_stable_op(op, var)			\
-({							\
-	typeof(var) pfo_ret__;				\
-	switch (sizeof(var)) {				\
-	case 1:						\
-		asm(op "b "__percpu_arg(P1)",%0"	\
-		    : "=q" (pfo_ret__)			\
-		    : "p" (&(var)));			\
-		break;					\
-	case 2:						\
-		asm(op "w "__percpu_arg(P1)",%0"	\
-		    : "=r" (pfo_ret__)			\
-		    : "p" (&(var)));			\
-		break;					\
-	case 4:						\
-		asm(op "l "__percpu_arg(P1)",%0"	\
-		    : "=r" (pfo_ret__)			\
-		    : "p" (&(var)));			\
-		break;					\
-	case 8:						\
-		asm(op "q "__percpu_arg(P1)",%0"	\
-		    : "=r" (pfo_ret__)			\
-		    : "p" (&(var)));			\
-		break;					\
-	default: __bad_percpu_size();			\
-	}						\
-	pfo_ret__;					\
-})
-
-#define percpu_unary_op(op, var)			\
-({							\
-	switch (sizeof(var)) {				\
-	case 1:						\
-		asm(op "b "__percpu_arg(0)		\
-		    : "+m" (var));			\
-		break;					\
-	case 2:						\
-		asm(op "w "__percpu_arg(0)		\
-		    : "+m" (var));			\
-		break;					\
-	case 4:						\
-		asm(op "l "__percpu_arg(0)		\
-		    : "+m" (var));			\
-		break;					\
-	case 8:						\
-		asm(op "q "__percpu_arg(0)		\
-		    : "+m" (var));			\
-		break;					\
-	default: __bad_percpu_size();			\
-	}						\
-})
-
 /*
  * Add return operation
  */
-#define percpu_add_return_op(var, val)					\
+#define percpu_add_return_op(size, qual, _var, _val)			\
 ({									\
-	typeof(var) paro_ret__ = val;					\
-	switch (sizeof(var)) {						\
-	case 1:								\
-		asm("xaddb %0, "__percpu_arg(1)				\
-			    : "+q" (paro_ret__), "+m" (var)		\
-			    : : "memory");				\
-		break;							\
-	case 2:								\
-		asm("xaddw %0, "__percpu_arg(1)				\
-			    : "+r" (paro_ret__), "+m" (var)		\
-			    : : "memory");				\
-		break;							\
-	case 4:								\
-		asm("xaddl %0, "__percpu_arg(1)				\
-			    : "+r" (paro_ret__), "+m" (var)		\
-			    : : "memory");				\
-		break;							\
-	case 8:								\
-		asm("xaddq %0, "__percpu_arg(1)				\
-			    : "+re" (paro_ret__), "+m" (var)		\
-			    : : "memory");				\
-		break;							\
-	default: __bad_percpu_size();					\
-	}								\
-	paro_ret__ += val;						\
-	paro_ret__;							\
+	__pcpu_type_##size paro_tmp__ = __pcpu_cast_##size(_val);	\
+									\
+	asm qual (__pcpu_op_##size("xadd") "%[tmp], "			\
+		  __percpu_arg([var])					\
+		  : [tmp] __pcpu_reg_##size("+", paro_tmp__),		\
+		    [var] "+m" (__my_cpu_var(_var))			\
+		  : : "memory");					\
+	(typeof(_var))(unsigned long) (paro_tmp__ + _val);		\
 })
 
 /*
- * xchg is implemented using cmpxchg without a lock prefix. xchg is
- * expensive due to the implied lock prefix.  The processor cannot prefetch
- * cachelines if xchg is used.
+ * raw_cpu_xchg() can use a load-store since
+ * it is not required to be IRQ-safe.
  */
-#define percpu_xchg_op(var, nval)					\
+#define raw_percpu_xchg_op(_var, _nval)					\
 ({									\
-	typeof(var) pxo_ret__;						\
-	typeof(var) pxo_new__ = (nval);					\
-	switch (sizeof(var)) {						\
-	case 1:								\
-		asm("\n\tmov "__percpu_arg(1)",%%al"			\
-		    "\n1:\tcmpxchgb %2, "__percpu_arg(1)		\
-		    "\n\tjnz 1b"					\
-			    : "=&a" (pxo_ret__), "+m" (var)		\
-			    : "q" (pxo_new__)				\
-			    : "memory");				\
-		break;							\
-	case 2:								\
-		asm("\n\tmov "__percpu_arg(1)",%%ax"			\
-		    "\n1:\tcmpxchgw %2, "__percpu_arg(1)		\
-		    "\n\tjnz 1b"					\
-			    : "=&a" (pxo_ret__), "+m" (var)		\
-			    : "r" (pxo_new__)				\
-			    : "memory");				\
-		break;							\
-	case 4:								\
-		asm("\n\tmov "__percpu_arg(1)",%%eax"			\
-		    "\n1:\tcmpxchgl %2, "__percpu_arg(1)		\
-		    "\n\tjnz 1b"					\
-			    : "=&a" (pxo_ret__), "+m" (var)		\
-			    : "r" (pxo_new__)				\
-			    : "memory");				\
-		break;							\
-	case 8:								\
-		asm("\n\tmov "__percpu_arg(1)",%%rax"			\
-		    "\n1:\tcmpxchgq %2, "__percpu_arg(1)		\
-		    "\n\tjnz 1b"					\
-			    : "=&a" (pxo_ret__), "+m" (var)		\
-			    : "r" (pxo_new__)				\
-			    : "memory");				\
-		break;							\
-	default: __bad_percpu_size();					\
-	}								\
-	pxo_ret__;							\
+	TYPEOF_UNQUAL(_var) pxo_old__ = raw_cpu_read(_var);		\
+									\
+	raw_cpu_write(_var, _nval);					\
+									\
+	pxo_old__;							\
 })
 
 /*
- * cmpxchg has no such implied lock semantics as a result it is much
- * more efficient for cpu local operations.
+ * this_cpu_xchg() is implemented using CMPXCHG without a LOCK prefix.
+ * XCHG is expensive due to the implied LOCK prefix. The processor
+ * cannot prefetch cachelines if XCHG is used.
  */
-#define percpu_cmpxchg_op(var, oval, nval)				\
+#define this_percpu_xchg_op(_var, _nval)				\
 ({									\
-	typeof(var) pco_ret__;						\
-	typeof(var) pco_old__ = (oval);					\
-	typeof(var) pco_new__ = (nval);					\
-	switch (sizeof(var)) {						\
-	case 1:								\
-		asm("cmpxchgb %2, "__percpu_arg(1)			\
-			    : "=a" (pco_ret__), "+m" (var)		\
-			    : "q" (pco_new__), "0" (pco_old__)		\
-			    : "memory");				\
-		break;							\
-	case 2:								\
-		asm("cmpxchgw %2, "__percpu_arg(1)			\
-			    : "=a" (pco_ret__), "+m" (var)		\
-			    : "r" (pco_new__), "0" (pco_old__)		\
-			    : "memory");				\
-		break;							\
-	case 4:								\
-		asm("cmpxchgl %2, "__percpu_arg(1)			\
-			    : "=a" (pco_ret__), "+m" (var)		\
-			    : "r" (pco_new__), "0" (pco_old__)		\
-			    : "memory");				\
-		break;							\
-	case 8:								\
-		asm("cmpxchgq %2, "__percpu_arg(1)			\
-			    : "=a" (pco_ret__), "+m" (var)		\
-			    : "r" (pco_new__), "0" (pco_old__)		\
-			    : "memory");				\
-		break;							\
-	default: __bad_percpu_size();					\
-	}								\
-	pco_ret__;							\
+	TYPEOF_UNQUAL(_var) pxo_old__ = this_cpu_read(_var);		\
+									\
+	do { } while (!this_cpu_try_cmpxchg(_var, &pxo_old__, _nval));	\
+									\
+	pxo_old__;							\
 })
 
 /*
- * this_cpu_read() makes gcc load the percpu variable every time it is
- * accessed while this_cpu_read_stable() allows the value to be cached.
- * this_cpu_read_stable() is more efficient and can be used if its value
- * is guaranteed to be valid across cpus.  The current users include
- * get_current() and get_thread_info() both of which are actually
- * per-thread variables implemented as per-cpu variables and thus
- * stable for the duration of the respective task.
+ * CMPXCHG has no such implied lock semantics as a result it is much
+ * more efficient for CPU-local operations.
  */
-#define this_cpu_read_stable(var)	percpu_stable_op("mov", var)
-
-#define raw_cpu_read_1(pcp)		percpu_from_op("mov", pcp)
-#define raw_cpu_read_2(pcp)		percpu_from_op("mov", pcp)
-#define raw_cpu_read_4(pcp)		percpu_from_op("mov", pcp)
-
-#define raw_cpu_write_1(pcp, val)	percpu_to_op("mov", (pcp), val)
-#define raw_cpu_write_2(pcp, val)	percpu_to_op("mov", (pcp), val)
-#define raw_cpu_write_4(pcp, val)	percpu_to_op("mov", (pcp), val)
-#define raw_cpu_add_1(pcp, val)		percpu_add_op((pcp), val)
-#define raw_cpu_add_2(pcp, val)		percpu_add_op((pcp), val)
-#define raw_cpu_add_4(pcp, val)		percpu_add_op((pcp), val)
-#define raw_cpu_and_1(pcp, val)		percpu_to_op("and", (pcp), val)
-#define raw_cpu_and_2(pcp, val)		percpu_to_op("and", (pcp), val)
-#define raw_cpu_and_4(pcp, val)		percpu_to_op("and", (pcp), val)
-#define raw_cpu_or_1(pcp, val)		percpu_to_op("or", (pcp), val)
-#define raw_cpu_or_2(pcp, val)		percpu_to_op("or", (pcp), val)
-#define raw_cpu_or_4(pcp, val)		percpu_to_op("or", (pcp), val)
-#define raw_cpu_xchg_1(pcp, val)	percpu_xchg_op(pcp, val)
-#define raw_cpu_xchg_2(pcp, val)	percpu_xchg_op(pcp, val)
-#define raw_cpu_xchg_4(pcp, val)	percpu_xchg_op(pcp, val)
-
-#define this_cpu_read_1(pcp)		percpu_from_op("mov", pcp)
-#define this_cpu_read_2(pcp)		percpu_from_op("mov", pcp)
-#define this_cpu_read_4(pcp)		percpu_from_op("mov", pcp)
-#define this_cpu_write_1(pcp, val)	percpu_to_op("mov", (pcp), val)
-#define this_cpu_write_2(pcp, val)	percpu_to_op("mov", (pcp), val)
-#define this_cpu_write_4(pcp, val)	percpu_to_op("mov", (pcp), val)
-#define this_cpu_add_1(pcp, val)	percpu_add_op((pcp), val)
-#define this_cpu_add_2(pcp, val)	percpu_add_op((pcp), val)
-#define this_cpu_add_4(pcp, val)	percpu_add_op((pcp), val)
-#define this_cpu_and_1(pcp, val)	percpu_to_op("and", (pcp), val)
-#define this_cpu_and_2(pcp, val)	percpu_to_op("and", (pcp), val)
-#define this_cpu_and_4(pcp, val)	percpu_to_op("and", (pcp), val)
-#define this_cpu_or_1(pcp, val)		percpu_to_op("or", (pcp), val)
-#define this_cpu_or_2(pcp, val)		percpu_to_op("or", (pcp), val)
-#define this_cpu_or_4(pcp, val)		percpu_to_op("or", (pcp), val)
-#define this_cpu_xchg_1(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_2(pcp, nval)	percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_4(pcp, nval)	percpu_xchg_op(pcp, nval)
-
-#define raw_cpu_add_return_1(pcp, val)		percpu_add_return_op(pcp, val)
-#define raw_cpu_add_return_2(pcp, val)		percpu_add_return_op(pcp, val)
-#define raw_cpu_add_return_4(pcp, val)		percpu_add_return_op(pcp, val)
-#define raw_cpu_cmpxchg_1(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
-#define raw_cpu_cmpxchg_2(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
-#define raw_cpu_cmpxchg_4(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
-
-#define this_cpu_add_return_1(pcp, val)		percpu_add_return_op(pcp, val)
-#define this_cpu_add_return_2(pcp, val)		percpu_add_return_op(pcp, val)
-#define this_cpu_add_return_4(pcp, val)		percpu_add_return_op(pcp, val)
-#define this_cpu_cmpxchg_1(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
-#define this_cpu_cmpxchg_2(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
-#define this_cpu_cmpxchg_4(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
-
-#ifdef CONFIG_X86_CMPXCHG64
-#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2)		\
+#define percpu_cmpxchg_op(size, qual, _var, _oval, _nval)		\
 ({									\
-	bool __ret;							\
-	typeof(pcp1) __o1 = (o1), __n1 = (n1);				\
-	typeof(pcp2) __o2 = (o2), __n2 = (n2);				\
-	asm volatile("cmpxchg8b "__percpu_arg(1)			\
-		     CC_SET(z)						\
-		     : CC_OUT(z) (__ret), "+m" (pcp1), "+m" (pcp2), "+a" (__o1), "+d" (__o2) \
-		     : "b" (__n1), "c" (__n2));				\
-	__ret;								\
+	__pcpu_type_##size pco_old__ = __pcpu_cast_##size(_oval);	\
+	__pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval);	\
+									\
+	asm qual (__pcpu_op_##size("cmpxchg") "%[nval], "		\
+		  __percpu_arg([var])					\
+		  : [oval] "+a" (pco_old__),				\
+		    [var] "+m" (__my_cpu_var(_var))			\
+		  : [nval] __pcpu_reg_##size(, pco_new__)		\
+		  : "memory");						\
+									\
+	(typeof(_var))(unsigned long) pco_old__;			\
 })
 
-#define raw_cpu_cmpxchg_double_4	percpu_cmpxchg8b_double
-#define this_cpu_cmpxchg_double_4	percpu_cmpxchg8b_double
-#endif /* CONFIG_X86_CMPXCHG64 */
+#define percpu_try_cmpxchg_op(size, qual, _var, _ovalp, _nval)		\
+({									\
+	bool success;							\
+	__pcpu_type_##size *pco_oval__ = (__pcpu_type_##size *)(_ovalp); \
+	__pcpu_type_##size pco_old__ = *pco_oval__;			\
+	__pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval);	\
+									\
+	asm qual (__pcpu_op_##size("cmpxchg") "%[nval], "		\
+		  __percpu_arg([var])					\
+		  : "=@ccz" (success),					\
+		    [oval] "+a" (pco_old__),				\
+		    [var] "+m" (__my_cpu_var(_var))			\
+		  : [nval] __pcpu_reg_##size(, pco_new__)		\
+		  : "memory");						\
+	if (unlikely(!success))						\
+		*pco_oval__ = pco_old__;				\
+									\
+	likely(success);						\
+})
+
+#if defined(CONFIG_X86_32) && !defined(CONFIG_UML)
+
+#define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval)		\
+({									\
+	union {								\
+		u64 var;						\
+		struct {						\
+			u32 low, high;					\
+		};							\
+	} old__, new__;							\
+									\
+	old__.var = _oval;						\
+	new__.var = _nval;						\
+									\
+	asm_inline qual (						\
+		ALTERNATIVE("call this_cpu_cmpxchg8b_emu",		\
+			    "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
+		: ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)),	\
+				"+a" (old__.low), "+d" (old__.high))	\
+		: "b" (new__.low), "c" (new__.high),			\
+		  "S" (&(_var))						\
+		: "memory");						\
+									\
+	old__.var;							\
+})
+
+#define raw_cpu_cmpxchg64(pcp, oval, nval)		percpu_cmpxchg64_op(8,         , pcp, oval, nval)
+#define this_cpu_cmpxchg64(pcp, oval, nval)		percpu_cmpxchg64_op(8, volatile, pcp, oval, nval)
+
+#define percpu_try_cmpxchg64_op(size, qual, _var, _ovalp, _nval)	\
+({									\
+	bool success;							\
+	u64 *_oval = (u64 *)(_ovalp);					\
+	union {								\
+		u64 var;						\
+		struct {						\
+			u32 low, high;					\
+		};							\
+	} old__, new__;							\
+									\
+	old__.var = *_oval;						\
+	new__.var = _nval;						\
+									\
+	asm_inline qual (						\
+		ALTERNATIVE("call this_cpu_cmpxchg8b_emu",		\
+			    "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
+		: ALT_OUTPUT_SP("=@ccz" (success),			\
+				[var] "+m" (__my_cpu_var(_var)),	\
+				"+a" (old__.low), "+d" (old__.high))	\
+		: "b" (new__.low), "c" (new__.high),			\
+		  "S" (&(_var))						\
+		: "memory");						\
+	if (unlikely(!success))						\
+		*_oval = old__.var;					\
+									\
+	likely(success);						\
+})
+
+#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval)		percpu_try_cmpxchg64_op(8,         , pcp, ovalp, nval)
+#define this_cpu_try_cmpxchg64(pcp, ovalp, nval)	percpu_try_cmpxchg64_op(8, volatile, pcp, ovalp, nval)
+
+#endif /* defined(CONFIG_X86_32) && !defined(CONFIG_UML) */
 
-/*
- * Per cpu atomic 64 bit operations are only available under 64 bit.
- * 32 bit must fall back to generic operations.
- */
 #ifdef CONFIG_X86_64
-#define raw_cpu_read_8(pcp)			percpu_from_op("mov", pcp)
-#define raw_cpu_write_8(pcp, val)		percpu_to_op("mov", (pcp), val)
-#define raw_cpu_add_8(pcp, val)			percpu_add_op((pcp), val)
-#define raw_cpu_and_8(pcp, val)			percpu_to_op("and", (pcp), val)
-#define raw_cpu_or_8(pcp, val)			percpu_to_op("or", (pcp), val)
-#define raw_cpu_add_return_8(pcp, val)		percpu_add_return_op(pcp, val)
-#define raw_cpu_xchg_8(pcp, nval)		percpu_xchg_op(pcp, nval)
-#define raw_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
-
-#define this_cpu_read_8(pcp)			percpu_from_op("mov", pcp)
-#define this_cpu_write_8(pcp, val)		percpu_to_op("mov", (pcp), val)
-#define this_cpu_add_8(pcp, val)		percpu_add_op((pcp), val)
-#define this_cpu_and_8(pcp, val)		percpu_to_op("and", (pcp), val)
-#define this_cpu_or_8(pcp, val)			percpu_to_op("or", (pcp), val)
-#define this_cpu_add_return_8(pcp, val)		percpu_add_return_op(pcp, val)
-#define this_cpu_xchg_8(pcp, nval)		percpu_xchg_op(pcp, nval)
-#define this_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
+#define raw_cpu_cmpxchg64(pcp, oval, nval)		percpu_cmpxchg_op(8,         , pcp, oval, nval);
+#define this_cpu_cmpxchg64(pcp, oval, nval)		percpu_cmpxchg_op(8, volatile, pcp, oval, nval);
 
-/*
- * Pretty complex macro to generate cmpxchg16 instruction.  The instruction
- * is not supported on early AMD64 processors so we must be able to emulate
- * it in software.  The address used in the cmpxchg16 instruction must be
- * aligned to a 16 byte boundary.
- */
-#define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2)		\
+#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval)		percpu_try_cmpxchg_op(8,         , pcp, ovalp, nval);
+#define this_cpu_try_cmpxchg64(pcp, ovalp, nval)	percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval);
+
+#define percpu_cmpxchg128_op(size, qual, _var, _oval, _nval)		\
 ({									\
-	bool __ret;							\
-	typeof(pcp1) __o1 = (o1), __n1 = (n1);				\
-	typeof(pcp2) __o2 = (o2), __n2 = (n2);				\
-	alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \
-		       "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t",	\
-		       X86_FEATURE_CX16,				\
-		       ASM_OUTPUT2("=a" (__ret), "+m" (pcp1),		\
-				   "+m" (pcp2), "+d" (__o2)),		\
-		       "b" (__n1), "c" (__n2), "a" (__o1) : "rsi");	\
-	__ret;								\
+	union {								\
+		u128 var;						\
+		struct {						\
+			u64 low, high;					\
+		};							\
+	} old__, new__;							\
+									\
+	old__.var = _oval;						\
+	new__.var = _nval;						\
+									\
+	asm_inline qual (						\
+		ALTERNATIVE("call this_cpu_cmpxchg16b_emu",		\
+			    "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
+		: ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)),	\
+				"+a" (old__.low), "+d" (old__.high))	\
+		: "b" (new__.low), "c" (new__.high),			\
+		  "S" (&(_var))						\
+		: "memory");						\
+									\
+	old__.var;							\
 })
 
-#define raw_cpu_cmpxchg_double_8	percpu_cmpxchg16b_double
-#define this_cpu_cmpxchg_double_8	percpu_cmpxchg16b_double
+#define raw_cpu_cmpxchg128(pcp, oval, nval)		percpu_cmpxchg128_op(16,         , pcp, oval, nval)
+#define this_cpu_cmpxchg128(pcp, oval, nval)		percpu_cmpxchg128_op(16, volatile, pcp, oval, nval)
 
-#endif
+#define percpu_try_cmpxchg128_op(size, qual, _var, _ovalp, _nval)	\
+({									\
+	bool success;							\
+	u128 *_oval = (u128 *)(_ovalp);					\
+	union {								\
+		u128 var;						\
+		struct {						\
+			u64 low, high;					\
+		};							\
+	} old__, new__;							\
+									\
+	old__.var = *_oval;						\
+	new__.var = _nval;						\
+									\
+	asm_inline qual (						\
+		ALTERNATIVE("call this_cpu_cmpxchg16b_emu",		\
+			    "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
+		: ALT_OUTPUT_SP("=@ccz" (success),			\
+				[var] "+m" (__my_cpu_var(_var)),	\
+				"+a" (old__.low), "+d" (old__.high))	\
+		: "b" (new__.low), "c" (new__.high),			\
+		  "S" (&(_var))						\
+		: "memory");						\
+	if (unlikely(!success))						\
+		*_oval = old__.var;					\
+									\
+	likely(success);						\
+})
 
-static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr,
-                        const unsigned long __percpu *addr)
-{
-	unsigned long __percpu *a =
-		(unsigned long __percpu *)addr + nr / BITS_PER_LONG;
+#define raw_cpu_try_cmpxchg128(pcp, ovalp, nval)	percpu_try_cmpxchg128_op(16,         , pcp, ovalp, nval)
+#define this_cpu_try_cmpxchg128(pcp, ovalp, nval)	percpu_try_cmpxchg128_op(16, volatile, pcp, ovalp, nval)
+
+#endif /* CONFIG_X86_64 */
+
+#define raw_cpu_read_1(pcp)				__raw_cpu_read(1, , pcp)
+#define raw_cpu_read_2(pcp)				__raw_cpu_read(2, , pcp)
+#define raw_cpu_read_4(pcp)				__raw_cpu_read(4, , pcp)
+#define raw_cpu_write_1(pcp, val)			__raw_cpu_write(1, , pcp, val)
+#define raw_cpu_write_2(pcp, val)			__raw_cpu_write(2, , pcp, val)
+#define raw_cpu_write_4(pcp, val)			__raw_cpu_write(4, , pcp, val)
+
+#define this_cpu_read_1(pcp)				__raw_cpu_read(1, volatile, pcp)
+#define this_cpu_read_2(pcp)				__raw_cpu_read(2, volatile, pcp)
+#define this_cpu_read_4(pcp)				__raw_cpu_read(4, volatile, pcp)
+#define this_cpu_write_1(pcp, val)			__raw_cpu_write(1, volatile, pcp, val)
+#define this_cpu_write_2(pcp, val)			__raw_cpu_write(2, volatile, pcp, val)
+#define this_cpu_write_4(pcp, val)			__raw_cpu_write(4, volatile, pcp, val)
+
+#define this_cpu_read_stable_1(pcp)			__raw_cpu_read_stable(1, pcp)
+#define this_cpu_read_stable_2(pcp)			__raw_cpu_read_stable(2, pcp)
+#define this_cpu_read_stable_4(pcp)			__raw_cpu_read_stable(4, pcp)
+
+#define raw_cpu_add_1(pcp, val)				percpu_add_op(1, , (pcp), val)
+#define raw_cpu_add_2(pcp, val)				percpu_add_op(2, , (pcp), val)
+#define raw_cpu_add_4(pcp, val)				percpu_add_op(4, , (pcp), val)
+#define raw_cpu_and_1(pcp, val)				percpu_binary_op(1, , "and", (pcp), val)
+#define raw_cpu_and_2(pcp, val)				percpu_binary_op(2, , "and", (pcp), val)
+#define raw_cpu_and_4(pcp, val)				percpu_binary_op(4, , "and", (pcp), val)
+#define raw_cpu_or_1(pcp, val)				percpu_binary_op(1, , "or", (pcp), val)
+#define raw_cpu_or_2(pcp, val)				percpu_binary_op(2, , "or", (pcp), val)
+#define raw_cpu_or_4(pcp, val)				percpu_binary_op(4, , "or", (pcp), val)
+#define raw_cpu_xchg_1(pcp, val)			raw_percpu_xchg_op(pcp, val)
+#define raw_cpu_xchg_2(pcp, val)			raw_percpu_xchg_op(pcp, val)
+#define raw_cpu_xchg_4(pcp, val)			raw_percpu_xchg_op(pcp, val)
+
+#define this_cpu_add_1(pcp, val)			percpu_add_op(1, volatile, (pcp), val)
+#define this_cpu_add_2(pcp, val)			percpu_add_op(2, volatile, (pcp), val)
+#define this_cpu_add_4(pcp, val)			percpu_add_op(4, volatile, (pcp), val)
+#define this_cpu_and_1(pcp, val)			percpu_binary_op(1, volatile, "and", (pcp), val)
+#define this_cpu_and_2(pcp, val)			percpu_binary_op(2, volatile, "and", (pcp), val)
+#define this_cpu_and_4(pcp, val)			percpu_binary_op(4, volatile, "and", (pcp), val)
+#define this_cpu_or_1(pcp, val)				percpu_binary_op(1, volatile, "or", (pcp), val)
+#define this_cpu_or_2(pcp, val)				percpu_binary_op(2, volatile, "or", (pcp), val)
+#define this_cpu_or_4(pcp, val)				percpu_binary_op(4, volatile, "or", (pcp), val)
+#define this_cpu_xchg_1(pcp, nval)			this_percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_2(pcp, nval)			this_percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_4(pcp, nval)			this_percpu_xchg_op(pcp, nval)
+
+#define raw_cpu_add_return_1(pcp, val)			percpu_add_return_op(1, , pcp, val)
+#define raw_cpu_add_return_2(pcp, val)			percpu_add_return_op(2, , pcp, val)
+#define raw_cpu_add_return_4(pcp, val)			percpu_add_return_op(4, , pcp, val)
+#define raw_cpu_cmpxchg_1(pcp, oval, nval)		percpu_cmpxchg_op(1, , pcp, oval, nval)
+#define raw_cpu_cmpxchg_2(pcp, oval, nval)		percpu_cmpxchg_op(2, , pcp, oval, nval)
+#define raw_cpu_cmpxchg_4(pcp, oval, nval)		percpu_cmpxchg_op(4, , pcp, oval, nval)
+#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval)		percpu_try_cmpxchg_op(1, , pcp, ovalp, nval)
+#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval)		percpu_try_cmpxchg_op(2, , pcp, ovalp, nval)
+#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval)		percpu_try_cmpxchg_op(4, , pcp, ovalp, nval)
+
+#define this_cpu_add_return_1(pcp, val)			percpu_add_return_op(1, volatile, pcp, val)
+#define this_cpu_add_return_2(pcp, val)			percpu_add_return_op(2, volatile, pcp, val)
+#define this_cpu_add_return_4(pcp, val)			percpu_add_return_op(4, volatile, pcp, val)
+#define this_cpu_cmpxchg_1(pcp, oval, nval)		percpu_cmpxchg_op(1, volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_2(pcp, oval, nval)		percpu_cmpxchg_op(2, volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_4(pcp, oval, nval)		percpu_cmpxchg_op(4, volatile, pcp, oval, nval)
+#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval)	percpu_try_cmpxchg_op(1, volatile, pcp, ovalp, nval)
+#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval)	percpu_try_cmpxchg_op(2, volatile, pcp, ovalp, nval)
+#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval)	percpu_try_cmpxchg_op(4, volatile, pcp, ovalp, nval)
 
+/*
+ * Per-CPU atomic 64-bit operations are only available under 64-bit kernels.
+ * 32-bit kernels must fall back to generic operations.
+ */
 #ifdef CONFIG_X86_64
-	return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0;
-#else
-	return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_4(*a)) != 0;
-#endif
-}
 
-static inline bool x86_this_cpu_variable_test_bit(int nr,
-                        const unsigned long __percpu *addr)
-{
-	bool oldbit;
+#define raw_cpu_read_8(pcp)				__raw_cpu_read(8, , pcp)
+#define raw_cpu_write_8(pcp, val)			__raw_cpu_write(8, , pcp, val)
+
+#define this_cpu_read_8(pcp)				__raw_cpu_read(8, volatile, pcp)
+#define this_cpu_write_8(pcp, val)			__raw_cpu_write(8, volatile, pcp, val)
+
+#define this_cpu_read_stable_8(pcp)			__raw_cpu_read_stable(8, pcp)
+
+#define raw_cpu_add_8(pcp, val)				percpu_add_op(8, , (pcp), val)
+#define raw_cpu_and_8(pcp, val)				percpu_binary_op(8, , "and", (pcp), val)
+#define raw_cpu_or_8(pcp, val)				percpu_binary_op(8, , "or", (pcp), val)
+#define raw_cpu_add_return_8(pcp, val)			percpu_add_return_op(8, , pcp, val)
+#define raw_cpu_xchg_8(pcp, nval)			raw_percpu_xchg_op(pcp, nval)
+#define raw_cpu_cmpxchg_8(pcp, oval, nval)		percpu_cmpxchg_op(8, , pcp, oval, nval)
+#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval)		percpu_try_cmpxchg_op(8, , pcp, ovalp, nval)
+
+#define this_cpu_add_8(pcp, val)			percpu_add_op(8, volatile, (pcp), val)
+#define this_cpu_and_8(pcp, val)			percpu_binary_op(8, volatile, "and", (pcp), val)
+#define this_cpu_or_8(pcp, val)				percpu_binary_op(8, volatile, "or", (pcp), val)
+#define this_cpu_add_return_8(pcp, val)			percpu_add_return_op(8, volatile, pcp, val)
+#define this_cpu_xchg_8(pcp, nval)			this_percpu_xchg_op(pcp, nval)
+#define this_cpu_cmpxchg_8(pcp, oval, nval)		percpu_cmpxchg_op(8, volatile, pcp, oval, nval)
+#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval)	percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval)
+
+#define raw_cpu_read_long(pcp)				raw_cpu_read_8(pcp)
+
+#else /* !CONFIG_X86_64: */
 
-	asm volatile("btl "__percpu_arg(2)",%1"
-			CC_SET(c)
-			: CC_OUT(c) (oldbit)
-			: "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
+/* There is no generic 64-bit read stable operation for 32-bit targets. */
+#define this_cpu_read_stable_8(pcp)			({ BUILD_BUG(); (typeof(pcp))0; })
 
-	return oldbit;
-}
+#define raw_cpu_read_long(pcp)				raw_cpu_read_4(pcp)
 
-#define x86_this_cpu_test_bit(nr, addr)			\
-	(__builtin_constant_p((nr))			\
-	 ? x86_this_cpu_constant_test_bit((nr), (addr))	\
-	 : x86_this_cpu_variable_test_bit((nr), (addr)))
+#endif /* CONFIG_X86_64 */
+
+#define this_cpu_read_const(pcp)			__raw_cpu_read_const(pcp)
+
+/*
+ * this_cpu_read() makes the compiler load the per-CPU variable every time
+ * it is accessed while this_cpu_read_stable() allows the value to be cached.
+ * this_cpu_read_stable() is more efficient and can be used if its value
+ * is guaranteed to be valid across CPUs.  The current users include
+ * current_task and cpu_current_top_of_stack, both of which are
+ * actually per-thread variables implemented as per-CPU variables and
+ * thus stable for the duration of the respective task.
+ */
+#define this_cpu_read_stable(pcp)			__pcpu_size_call_return(this_cpu_read_stable_, pcp)
+
+#define x86_this_cpu_constant_test_bit(_nr, _var)			\
+({									\
+	unsigned long __percpu *addr__ =				\
+		(unsigned long __percpu *)&(_var) + BIT_WORD(_nr);	\
+									\
+	!!(BIT_MASK(_nr) & raw_cpu_read(*addr__));			\
+})
+
+#define x86_this_cpu_variable_test_bit(_nr, _var)			\
+({									\
+	bool oldbit;							\
+									\
+	asm volatile("btl %[nr], " __percpu_arg([var])			\
+		     : "=@ccc" (oldbit)					\
+		     : [var] "m" (__my_cpu_var(_var)),			\
+		       [nr] "rI" (_nr));				\
+	oldbit;								\
+})
+
+#define x86_this_cpu_test_bit(_nr, _var)				\
+	(__builtin_constant_p(_nr)					\
+	 ? x86_this_cpu_constant_test_bit(_nr, _var)			\
+	 : x86_this_cpu_variable_test_bit(_nr, _var))
 
 
 #include <asm-generic/percpu.h>
 
 /* We can use this directly for local CPU (faster). */
-DECLARE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off);
+DECLARE_PER_CPU_CACHE_HOT(unsigned long, this_cpu_off);
 
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
 
 #ifdef CONFIG_SMP
 
@@ -568,46 +622,47 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off);
 				{ [0 ... NR_CPUS-1] = _initvalue };	\
 	__typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map
 
-#define EXPORT_EARLY_PER_CPU_SYMBOL(_name)			\
+#define EXPORT_EARLY_PER_CPU_SYMBOL(_name)				\
 	EXPORT_PER_CPU_SYMBOL(_name)
 
-#define DECLARE_EARLY_PER_CPU(_type, _name)			\
-	DECLARE_PER_CPU(_type, _name);				\
-	extern __typeof__(_type) *_name##_early_ptr;		\
+#define DECLARE_EARLY_PER_CPU(_type, _name)				\
+	DECLARE_PER_CPU(_type, _name);					\
+	extern __typeof__(_type) *_name##_early_ptr;			\
 	extern __typeof__(_type)  _name##_early_map[]
 
-#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name)		\
-	DECLARE_PER_CPU_READ_MOSTLY(_type, _name);		\
-	extern __typeof__(_type) *_name##_early_ptr;		\
+#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name)			\
+	DECLARE_PER_CPU_READ_MOSTLY(_type, _name);			\
+	extern __typeof__(_type) *_name##_early_ptr;			\
 	extern __typeof__(_type)  _name##_early_map[]
 
-#define	early_per_cpu_ptr(_name) (_name##_early_ptr)
-#define	early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
-#define	early_per_cpu(_name, _cpu) 				\
-	*(early_per_cpu_ptr(_name) ?				\
-		&early_per_cpu_ptr(_name)[_cpu] :		\
+#define	early_per_cpu_ptr(_name)			(_name##_early_ptr)
+#define	early_per_cpu_map(_name, _idx)			(_name##_early_map[_idx])
+
+#define	early_per_cpu(_name, _cpu)					\
+	*(early_per_cpu_ptr(_name) ?					\
+		&early_per_cpu_ptr(_name)[_cpu] :			\
 		&per_cpu(_name, _cpu))
 
-#else	/* !CONFIG_SMP */
-#define	DEFINE_EARLY_PER_CPU(_type, _name, _initvalue)		\
+#else /* !CONFIG_SMP: */
+#define	DEFINE_EARLY_PER_CPU(_type, _name, _initvalue)			\
 	DEFINE_PER_CPU(_type, _name) = _initvalue
 
 #define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue)	\
 	DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue
 
-#define EXPORT_EARLY_PER_CPU_SYMBOL(_name)			\
+#define EXPORT_EARLY_PER_CPU_SYMBOL(_name)				\
 	EXPORT_PER_CPU_SYMBOL(_name)
 
-#define DECLARE_EARLY_PER_CPU(_type, _name)			\
+#define DECLARE_EARLY_PER_CPU(_type, _name)				\
 	DECLARE_PER_CPU(_type, _name)
 
-#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name)		\
+#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name)			\
 	DECLARE_PER_CPU_READ_MOSTLY(_type, _name)
 
-#define	early_per_cpu(_name, _cpu) per_cpu(_name, _cpu)
-#define	early_per_cpu_ptr(_name) NULL
+#define	early_per_cpu(_name, _cpu)			per_cpu(_name, _cpu)
+#define	early_per_cpu_ptr(_name)			NULL
 /* no early_per_cpu_map() */
 
-#endif	/* !CONFIG_SMP */
+#endif /* !CONFIG_SMP */
 
 #endif /* _ASM_X86_PERCPU_H */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8bdf74902293..7276ba70c88a 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -2,12 +2,14 @@
 #ifndef _ASM_X86_PERF_EVENT_H
 #define _ASM_X86_PERF_EVENT_H
 
+#include <linux/static_call.h>
+
 /*
  * Performance event hw details:
  */
 
 #define INTEL_PMC_MAX_GENERIC				       32
-#define INTEL_PMC_MAX_FIXED					3
+#define INTEL_PMC_MAX_FIXED				       16
 #define INTEL_PMC_IDX_FIXED				       32
 
 #define X86_PMC_IDX_MAX					       64
@@ -29,9 +31,29 @@
 #define ARCH_PERFMON_EVENTSEL_ENABLE			(1ULL << 22)
 #define ARCH_PERFMON_EVENTSEL_INV			(1ULL << 23)
 #define ARCH_PERFMON_EVENTSEL_CMASK			0xFF000000ULL
+#define ARCH_PERFMON_EVENTSEL_BR_CNTR			(1ULL << 35)
+#define ARCH_PERFMON_EVENTSEL_EQ			(1ULL << 36)
+#define ARCH_PERFMON_EVENTSEL_UMASK2			(0xFFULL << 40)
+
+#define INTEL_FIXED_BITS_STRIDE			4
+#define INTEL_FIXED_0_KERNEL				(1ULL << 0)
+#define INTEL_FIXED_0_USER				(1ULL << 1)
+#define INTEL_FIXED_0_ANYTHREAD			(1ULL << 2)
+#define INTEL_FIXED_0_ENABLE_PMI			(1ULL << 3)
+#define INTEL_FIXED_3_METRICS_CLEAR			(1ULL << 2)
 
 #define HSW_IN_TX					(1ULL << 32)
 #define HSW_IN_TX_CHECKPOINTED				(1ULL << 33)
+#define ICL_EVENTSEL_ADAPTIVE				(1ULL << 34)
+#define ICL_FIXED_0_ADAPTIVE				(1ULL << 32)
+
+#define INTEL_FIXED_BITS_MASK					\
+	(INTEL_FIXED_0_KERNEL | INTEL_FIXED_0_USER |		\
+	 INTEL_FIXED_0_ANYTHREAD | INTEL_FIXED_0_ENABLE_PMI |	\
+	 ICL_FIXED_0_ADAPTIVE)
+
+#define intel_fixed_bits_by_idx(_idx, _bits)			\
+	((_bits) << ((_idx) * INTEL_FIXED_BITS_STRIDE))
 
 #define AMD64_EVENTSEL_INT_CORE_ENABLE			(1ULL << 36)
 #define AMD64_EVENTSEL_GUESTONLY			(1ULL << 40)
@@ -48,11 +70,22 @@
 
 #define AMD64_L3_SLICE_SHIFT				48
 #define AMD64_L3_SLICE_MASK				\
-	((0xFULL) << AMD64_L3_SLICE_SHIFT)
+	(0xFULL << AMD64_L3_SLICE_SHIFT)
+#define AMD64_L3_SLICEID_MASK				\
+	(0x7ULL << AMD64_L3_SLICE_SHIFT)
 
 #define AMD64_L3_THREAD_SHIFT				56
 #define AMD64_L3_THREAD_MASK				\
-	((0xFFULL) << AMD64_L3_THREAD_SHIFT)
+	(0xFFULL << AMD64_L3_THREAD_SHIFT)
+#define AMD64_L3_F19H_THREAD_MASK			\
+	(0x3ULL << AMD64_L3_THREAD_SHIFT)
+
+#define AMD64_L3_EN_ALL_CORES				BIT_ULL(47)
+#define AMD64_L3_EN_ALL_SLICES				BIT_ULL(46)
+
+#define AMD64_L3_COREID_SHIFT				42
+#define AMD64_L3_COREID_MASK				\
+	(0x7ULL << AMD64_L3_COREID_SHIFT)
 
 #define X86_RAW_EVENT_MASK		\
 	(ARCH_PERFMON_EVENTSEL_EVENT |	\
@@ -74,6 +107,26 @@
 #define AMD64_RAW_EVENT_MASK_NB		\
 	(AMD64_EVENTSEL_EVENT        |  \
 	 ARCH_PERFMON_EVENTSEL_UMASK)
+
+#define AMD64_PERFMON_V2_EVENTSEL_EVENT_NB	\
+	(AMD64_EVENTSEL_EVENT	|		\
+	 GENMASK_ULL(37, 36))
+
+#define AMD64_PERFMON_V2_EVENTSEL_UMASK_NB	\
+	(ARCH_PERFMON_EVENTSEL_UMASK	|	\
+	 GENMASK_ULL(27, 24))
+
+#define AMD64_PERFMON_V2_RAW_EVENT_MASK_NB		\
+	(AMD64_PERFMON_V2_EVENTSEL_EVENT_NB	|	\
+	 AMD64_PERFMON_V2_EVENTSEL_UMASK_NB)
+
+#define AMD64_PERFMON_V2_ENABLE_UMC			BIT_ULL(31)
+#define AMD64_PERFMON_V2_EVENTSEL_EVENT_UMC		GENMASK_ULL(7, 0)
+#define AMD64_PERFMON_V2_EVENTSEL_RDWRMASK_UMC		GENMASK_ULL(9, 8)
+#define AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC		\
+	(AMD64_PERFMON_V2_EVENTSEL_EVENT_UMC	|	\
+	 AMD64_PERFMON_V2_EVENTSEL_RDWRMASK_UMC)
+
 #define AMD64_NUM_COUNTERS				4
 #define AMD64_NUM_COUNTERS_CORE				6
 #define AMD64_NUM_COUNTERS_NB				4
@@ -87,6 +140,21 @@
 #define ARCH_PERFMON_BRANCH_MISSES_RETIRED		6
 #define ARCH_PERFMON_EVENTS_COUNT			7
 
+#define PEBS_DATACFG_MEMINFO	BIT_ULL(0)
+#define PEBS_DATACFG_GP		BIT_ULL(1)
+#define PEBS_DATACFG_XMMS	BIT_ULL(2)
+#define PEBS_DATACFG_LBRS	BIT_ULL(3)
+#define PEBS_DATACFG_CNTR	BIT_ULL(4)
+#define PEBS_DATACFG_METRICS	BIT_ULL(5)
+#define PEBS_DATACFG_LBR_SHIFT	24
+#define PEBS_DATACFG_CNTR_SHIFT	32
+#define PEBS_DATACFG_CNTR_MASK	GENMASK_ULL(15, 0)
+#define PEBS_DATACFG_FIX_SHIFT	48
+#define PEBS_DATACFG_FIX_MASK	GENMASK_ULL(7, 0)
+
+/* Steal the highest bit of pebs_data_cfg for SW usage */
+#define PEBS_UPDATE_DS_SW	BIT_ULL(63)
+
 /*
  * Intel "Architectural Performance Monitoring" CPUID
  * detection/enumeration details:
@@ -118,11 +186,112 @@ union cpuid10_edx {
 	struct {
 		unsigned int num_counters_fixed:5;
 		unsigned int bit_width_fixed:8;
-		unsigned int reserved:19;
+		unsigned int reserved1:2;
+		unsigned int anythread_deprecated:1;
+		unsigned int reserved2:16;
 	} split;
 	unsigned int full;
 };
 
+/*
+ * Intel "Architectural Performance Monitoring extension" CPUID
+ * detection/enumeration details:
+ */
+#define ARCH_PERFMON_EXT_LEAF			0x00000023
+#define ARCH_PERFMON_NUM_COUNTER_LEAF		0x1
+#define ARCH_PERFMON_ACR_LEAF			0x2
+#define ARCH_PERFMON_PEBS_CAP_LEAF		0x4
+#define ARCH_PERFMON_PEBS_COUNTER_LEAF		0x5
+
+union cpuid35_eax {
+	struct {
+		unsigned int	leaf0:1;
+		/* Counters Sub-Leaf */
+		unsigned int    cntr_subleaf:1;
+		/* Auto Counter Reload Sub-Leaf */
+		unsigned int    acr_subleaf:1;
+		/* Events Sub-Leaf */
+		unsigned int    events_subleaf:1;
+		/* arch-PEBS Sub-Leaves */
+		unsigned int	pebs_caps_subleaf:1;
+		unsigned int	pebs_cnts_subleaf:1;
+		unsigned int	reserved:26;
+	} split;
+	unsigned int            full;
+};
+
+union cpuid35_ebx {
+	struct {
+		/* UnitMask2 Supported */
+		unsigned int    umask2:1;
+		/* EQ-bit Supported */
+		unsigned int    eq:1;
+		unsigned int	reserved:30;
+	} split;
+	unsigned int            full;
+};
+
+/*
+ * Intel Architectural LBR CPUID detection/enumeration details:
+ */
+union cpuid28_eax {
+	struct {
+		/* Supported LBR depth values */
+		unsigned int	lbr_depth_mask:8;
+		unsigned int	reserved:22;
+		/* Deep C-state Reset */
+		unsigned int	lbr_deep_c_reset:1;
+		/* IP values contain LIP */
+		unsigned int	lbr_lip:1;
+	} split;
+	unsigned int		full;
+};
+
+union cpuid28_ebx {
+	struct {
+		/* CPL Filtering Supported */
+		unsigned int    lbr_cpl:1;
+		/* Branch Filtering Supported */
+		unsigned int    lbr_filter:1;
+		/* Call-stack Mode Supported */
+		unsigned int    lbr_call_stack:1;
+	} split;
+	unsigned int            full;
+};
+
+union cpuid28_ecx {
+	struct {
+		/* Mispredict Bit Supported */
+		unsigned int    lbr_mispred:1;
+		/* Timed LBRs Supported */
+		unsigned int    lbr_timed_lbr:1;
+		/* Branch Type Field Supported */
+		unsigned int    lbr_br_type:1;
+		unsigned int	reserved:13;
+		/* Branch counters (Event Logging) Supported */
+		unsigned int	lbr_counters:4;
+	} split;
+	unsigned int            full;
+};
+
+/*
+ * AMD "Extended Performance Monitoring and Debug" CPUID
+ * detection/enumeration details:
+ */
+union cpuid_0x80000022_ebx {
+	struct {
+		/* Number of Core Performance Counters */
+		unsigned int	num_core_pmc:4;
+		/* Number of available LBR Stack Entries */
+		unsigned int	lbr_v2_stack_sz:6;
+		/* Number of Data Fabric Counters */
+		unsigned int	num_df_pmc:6;
+		/* Number of Unified Memory Controller Counters */
+		unsigned int	num_umc_pmc:6;
+	} split;
+	unsigned int		full;
+};
+
 struct x86_pmu_capability {
 	int		version;
 	int		num_counters_gp;
@@ -131,19 +300,36 @@ struct x86_pmu_capability {
 	int		bit_width_fixed;
 	unsigned int	events_mask;
 	int		events_mask_len;
+	unsigned int	pebs_ept	:1;
 };
 
 /*
  * Fixed-purpose performance events:
  */
 
+/* RDPMC offset for Fixed PMCs */
+#define INTEL_PMC_FIXED_RDPMC_BASE		(1 << 30)
+#define INTEL_PMC_FIXED_RDPMC_METRICS		(1 << 29)
+
 /*
- * All 3 fixed-mode PMCs are configured via this single MSR:
+ * All the fixed-mode PMCs are configured via this single MSR:
  */
 #define MSR_ARCH_PERFMON_FIXED_CTR_CTRL	0x38d
 
 /*
- * The counts are available in three separate MSRs:
+ * There is no event-code assigned to the fixed-mode PMCs.
+ *
+ * For a fixed-mode PMC, which has an equivalent event on a general-purpose
+ * PMC, the event-code of the equivalent event is used for the fixed-mode PMC,
+ * e.g., Instr_Retired.Any and CPU_CLK_Unhalted.Core.
+ *
+ * For a fixed-mode PMC, which doesn't have an equivalent event, a
+ * pseudo-encoding is used, e.g., CPU_CLK_Unhalted.Ref and TOPDOWN.SLOTS.
+ * The pseudo event-code for a fixed-mode PMC must be 0x00.
+ * The pseudo umask-code is 0xX. The X equals the index of the fixed
+ * counter + 1, e.g., the fixed counter 2 has the pseudo-encoding 0x0300.
+ *
+ * The counts are available in separate MSRs:
  */
 
 /* Instr_Retired.Any: */
@@ -154,27 +340,280 @@ struct x86_pmu_capability {
 #define MSR_ARCH_PERFMON_FIXED_CTR1	0x30a
 #define INTEL_PMC_IDX_FIXED_CPU_CYCLES	(INTEL_PMC_IDX_FIXED + 1)
 
-/* CPU_CLK_Unhalted.Ref: */
+/* CPU_CLK_Unhalted.Ref: event=0x00,umask=0x3 (pseudo-encoding) */
 #define MSR_ARCH_PERFMON_FIXED_CTR2	0x30b
 #define INTEL_PMC_IDX_FIXED_REF_CYCLES	(INTEL_PMC_IDX_FIXED + 2)
 #define INTEL_PMC_MSK_FIXED_REF_CYCLES	(1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES)
 
+/* TOPDOWN.SLOTS: event=0x00,umask=0x4 (pseudo-encoding) */
+#define MSR_ARCH_PERFMON_FIXED_CTR3	0x30c
+#define INTEL_PMC_IDX_FIXED_SLOTS	(INTEL_PMC_IDX_FIXED + 3)
+#define INTEL_PMC_MSK_FIXED_SLOTS	(1ULL << INTEL_PMC_IDX_FIXED_SLOTS)
+
+/* TOPDOWN_BAD_SPECULATION.ALL: fixed counter 4 (Atom only) */
+/* TOPDOWN_FE_BOUND.ALL: fixed counter 5 (Atom only) */
+/* TOPDOWN_RETIRING.ALL: fixed counter 6 (Atom only) */
+
+static inline bool use_fixed_pseudo_encoding(u64 code)
+{
+	return !(code & 0xff);
+}
+
 /*
  * We model BTS tracing as another fixed-mode PMC.
  *
- * We choose a value in the middle of the fixed event range, since lower
+ * We choose the value 47 for the fixed index of BTS, since lower
  * values are used by actual fixed events and higher values are used
  * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
  */
-#define INTEL_PMC_IDX_FIXED_BTS				(INTEL_PMC_IDX_FIXED + 16)
+#define INTEL_PMC_IDX_FIXED_BTS			(INTEL_PMC_IDX_FIXED + 15)
 
-#define GLOBAL_STATUS_COND_CHG				BIT_ULL(63)
-#define GLOBAL_STATUS_BUFFER_OVF			BIT_ULL(62)
-#define GLOBAL_STATUS_UNC_OVF				BIT_ULL(61)
-#define GLOBAL_STATUS_ASIF				BIT_ULL(60)
-#define GLOBAL_STATUS_COUNTERS_FROZEN			BIT_ULL(59)
-#define GLOBAL_STATUS_LBRS_FROZEN			BIT_ULL(58)
-#define GLOBAL_STATUS_TRACE_TOPAPMI			BIT_ULL(55)
+/*
+ * The PERF_METRICS MSR is modeled as several magic fixed-mode PMCs, one for
+ * each TopDown metric event.
+ *
+ * Internally the TopDown metric events are mapped to the FxCtr 3 (SLOTS).
+ */
+#define INTEL_PMC_IDX_METRIC_BASE		(INTEL_PMC_IDX_FIXED + 16)
+#define INTEL_PMC_IDX_TD_RETIRING		(INTEL_PMC_IDX_METRIC_BASE + 0)
+#define INTEL_PMC_IDX_TD_BAD_SPEC		(INTEL_PMC_IDX_METRIC_BASE + 1)
+#define INTEL_PMC_IDX_TD_FE_BOUND		(INTEL_PMC_IDX_METRIC_BASE + 2)
+#define INTEL_PMC_IDX_TD_BE_BOUND		(INTEL_PMC_IDX_METRIC_BASE + 3)
+#define INTEL_PMC_IDX_TD_HEAVY_OPS		(INTEL_PMC_IDX_METRIC_BASE + 4)
+#define INTEL_PMC_IDX_TD_BR_MISPREDICT		(INTEL_PMC_IDX_METRIC_BASE + 5)
+#define INTEL_PMC_IDX_TD_FETCH_LAT		(INTEL_PMC_IDX_METRIC_BASE + 6)
+#define INTEL_PMC_IDX_TD_MEM_BOUND		(INTEL_PMC_IDX_METRIC_BASE + 7)
+#define INTEL_PMC_IDX_METRIC_END		INTEL_PMC_IDX_TD_MEM_BOUND
+#define INTEL_PMC_MSK_TOPDOWN			((0xffull << INTEL_PMC_IDX_METRIC_BASE) | \
+						INTEL_PMC_MSK_FIXED_SLOTS)
+
+/*
+ * There is no event-code assigned to the TopDown events.
+ *
+ * For the slots event, use the pseudo code of the fixed counter 3.
+ *
+ * For the metric events, the pseudo event-code is 0x00.
+ * The pseudo umask-code starts from the middle of the pseudo event
+ * space, 0x80.
+ */
+#define INTEL_TD_SLOTS				0x0400	/* TOPDOWN.SLOTS */
+/* Level 1 metrics */
+#define INTEL_TD_METRIC_RETIRING		0x8000	/* Retiring metric */
+#define INTEL_TD_METRIC_BAD_SPEC		0x8100	/* Bad speculation metric */
+#define INTEL_TD_METRIC_FE_BOUND		0x8200	/* FE bound metric */
+#define INTEL_TD_METRIC_BE_BOUND		0x8300	/* BE bound metric */
+/* Level 2 metrics */
+#define INTEL_TD_METRIC_HEAVY_OPS		0x8400  /* Heavy Operations metric */
+#define INTEL_TD_METRIC_BR_MISPREDICT		0x8500  /* Branch Mispredict metric */
+#define INTEL_TD_METRIC_FETCH_LAT		0x8600  /* Fetch Latency metric */
+#define INTEL_TD_METRIC_MEM_BOUND		0x8700  /* Memory bound metric */
+
+#define INTEL_TD_METRIC_MAX			INTEL_TD_METRIC_MEM_BOUND
+#define INTEL_TD_METRIC_NUM			8
+
+#define INTEL_TD_CFG_METRIC_CLEAR_BIT		0
+#define INTEL_TD_CFG_METRIC_CLEAR		BIT_ULL(INTEL_TD_CFG_METRIC_CLEAR_BIT)
+
+static inline bool is_metric_idx(int idx)
+{
+	return (unsigned)(idx - INTEL_PMC_IDX_METRIC_BASE) < INTEL_TD_METRIC_NUM;
+}
+
+static inline bool is_topdown_idx(int idx)
+{
+	return is_metric_idx(idx) || idx == INTEL_PMC_IDX_FIXED_SLOTS;
+}
+
+#define INTEL_PMC_OTHER_TOPDOWN_BITS(bit)	\
+			(~(0x1ull << bit) & INTEL_PMC_MSK_TOPDOWN)
+
+#define GLOBAL_STATUS_COND_CHG			BIT_ULL(63)
+#define GLOBAL_STATUS_BUFFER_OVF_BIT		62
+#define GLOBAL_STATUS_BUFFER_OVF		BIT_ULL(GLOBAL_STATUS_BUFFER_OVF_BIT)
+#define GLOBAL_STATUS_UNC_OVF			BIT_ULL(61)
+#define GLOBAL_STATUS_ASIF			BIT_ULL(60)
+#define GLOBAL_STATUS_COUNTERS_FROZEN		BIT_ULL(59)
+#define GLOBAL_STATUS_LBRS_FROZEN_BIT		58
+#define GLOBAL_STATUS_LBRS_FROZEN		BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT)
+#define GLOBAL_STATUS_TRACE_TOPAPMI_BIT		55
+#define GLOBAL_STATUS_TRACE_TOPAPMI		BIT_ULL(GLOBAL_STATUS_TRACE_TOPAPMI_BIT)
+#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT	54
+#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD	BIT_ULL(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT)
+#define GLOBAL_STATUS_PERF_METRICS_OVF_BIT	48
+
+#define GLOBAL_CTRL_EN_PERF_METRICS		BIT_ULL(48)
+/*
+ * We model guest LBR event tracing as another fixed-mode PMC like BTS.
+ *
+ * We choose bit 58 because it's used to indicate LBR stack frozen state
+ * for architectural perfmon v4, also we unconditionally mask that bit in
+ * the handle_pmi_common(), so it'll never be set in the overflow handling.
+ *
+ * With this fake counter assigned, the guest LBR event user (such as KVM),
+ * can program the LBR registers on its own, and we don't actually do anything
+ * with then in the host context.
+ */
+#define INTEL_PMC_IDX_FIXED_VLBR	(GLOBAL_STATUS_LBRS_FROZEN_BIT)
+
+/*
+ * Pseudo-encoding the guest LBR event as event=0x00,umask=0x1b,
+ * since it would claim bit 58 which is effectively Fixed26.
+ */
+#define INTEL_FIXED_VLBR_EVENT	0x1b00
+
+/*
+ * Adaptive PEBS v4
+ */
+
+struct pebs_basic {
+	u64 format_group:32,
+	    retire_latency:16,
+	    format_size:16;
+	u64 ip;
+	u64 applicable_counters;
+	u64 tsc;
+};
+
+struct pebs_meminfo {
+	u64 address;
+	u64 aux;
+	union {
+		/* pre Alder Lake */
+		u64 mem_latency;
+		/* Alder Lake and later */
+		struct {
+			u64 instr_latency:16;
+			u64 pad2:16;
+			u64 cache_latency:16;
+			u64 pad3:16;
+		};
+	};
+	u64 tsx_tuning;
+};
+
+struct pebs_gprs {
+	u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di;
+	u64 r8, r9, r10, r11, r12, r13, r14, r15;
+};
+
+struct pebs_xmm {
+	u64 xmm[16*2];	/* two entries for each register */
+};
+
+struct pebs_cntr_header {
+	u32 cntr;
+	u32 fixed;
+	u32 metrics;
+	u32 reserved;
+};
+
+#define INTEL_CNTR_METRICS		0x3
+
+/*
+ * Arch PEBS
+ */
+union arch_pebs_index {
+	struct {
+		u64 rsvd:4,
+		    wr:23,
+		    rsvd2:4,
+		    full:1,
+		    en:1,
+		    rsvd3:3,
+		    thresh:23,
+		    rsvd4:5;
+	};
+	u64 whole;
+};
+
+struct arch_pebs_header {
+	union {
+		u64 format;
+		struct {
+			u64 size:16,	/* Record size */
+			    rsvd:14,
+			    mode:1,	/* 64BIT_MODE */
+			    cont:1,
+			    rsvd2:3,
+			    cntr:5,
+			    lbr:2,
+			    rsvd3:7,
+			    xmm:1,
+			    ymmh:1,
+			    rsvd4:2,
+			    opmask:1,
+			    zmmh:1,
+			    h16zmm:1,
+			    rsvd5:5,
+			    gpr:1,
+			    aux:1,
+			    basic:1;
+		};
+	};
+	u64 rsvd6;
+};
+
+struct arch_pebs_basic {
+	u64 ip;
+	u64 applicable_counters;
+	u64 tsc;
+	u64 retire	:16,	/* Retire Latency */
+	    valid	:1,
+	    rsvd	:47;
+	u64 rsvd2;
+	u64 rsvd3;
+};
+
+struct arch_pebs_aux {
+	u64 address;
+	u64 rsvd;
+	u64 rsvd2;
+	u64 rsvd3;
+	u64 rsvd4;
+	u64 aux;
+	u64 instr_latency	:16,
+	    pad2		:16,
+	    cache_latency	:16,
+	    pad3		:16;
+	u64 tsx_tuning;
+};
+
+struct arch_pebs_gprs {
+	u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di;
+	u64 r8, r9, r10, r11, r12, r13, r14, r15, ssp;
+	u64 rsvd;
+};
+
+struct arch_pebs_xer_header {
+	u64 xstate;
+	u64 rsvd;
+};
+
+#define ARCH_PEBS_LBR_NAN		0x0
+#define ARCH_PEBS_LBR_NUM_8		0x1
+#define ARCH_PEBS_LBR_NUM_16		0x2
+#define ARCH_PEBS_LBR_NUM_VAR		0x3
+#define ARCH_PEBS_BASE_LBR_ENTRIES	8
+struct arch_pebs_lbr_header {
+	u64 rsvd;
+	u64 ctl;
+	u64 depth;
+	u64 ler_from;
+	u64 ler_to;
+	u64 ler_info;
+};
+
+struct arch_pebs_cntr_header {
+	u32 cntr;
+	u32 fixed;
+	u32 metrics;
+	u32 reserved;
+};
+
+/*
+ * AMD Extended Performance Monitoring and Debug cpuid feature detection
+ */
+#define EXT_PERFMON_DEBUG_FEATURES		0x80000022
 
 /*
  * IBS cpuid feature detection
@@ -197,6 +636,9 @@ struct x86_pmu_capability {
 #define IBS_CAPS_OPBRNFUSE		(1U<<8)
 #define IBS_CAPS_FETCHCTLEXTD		(1U<<9)
 #define IBS_CAPS_OPDATA4		(1U<<10)
+#define IBS_CAPS_ZEN4			(1U<<11)
+#define IBS_CAPS_OPLDLAT		(1U<<12)
+#define IBS_CAPS_OPDTLBPGSIZE		(1U<<19)
 
 #define IBS_CAPS_DEFAULT		(IBS_CAPS_AVAIL		\
 					 | IBS_CAPS_FETCHSAM	\
@@ -209,27 +651,39 @@ struct x86_pmu_capability {
 #define IBSCTL_LVT_OFFSET_VALID		(1ULL<<8)
 #define IBSCTL_LVT_OFFSET_MASK		0x0F
 
-/* ibs fetch bits/masks */
+/* IBS fetch bits/masks */
+#define IBS_FETCH_L3MISSONLY	(1ULL<<59)
 #define IBS_FETCH_RAND_EN	(1ULL<<57)
 #define IBS_FETCH_VAL		(1ULL<<49)
 #define IBS_FETCH_ENABLE	(1ULL<<48)
 #define IBS_FETCH_CNT		0xFFFF0000ULL
 #define IBS_FETCH_MAX_CNT	0x0000FFFFULL
 
-/* ibs op bits/masks */
-/* lower 4 bits of the current count are ignored: */
-#define IBS_OP_CUR_CNT		(0xFFFF0ULL<<32)
+/*
+ * IBS op bits/masks
+ * The lower 7 bits of the current count are random bits
+ * preloaded by hardware and ignored in software
+ */
+#define IBS_OP_LDLAT_EN		(1ULL<<63)
+#define IBS_OP_LDLAT_THRSH	(0xFULL<<59)
+#define IBS_OP_CUR_CNT		(0xFFF80ULL<<32)
+#define IBS_OP_CUR_CNT_RAND	(0x0007FULL<<32)
+#define IBS_OP_CUR_CNT_EXT_MASK	(0x7FULL<<52)
 #define IBS_OP_CNT_CTL		(1ULL<<19)
 #define IBS_OP_VAL		(1ULL<<18)
 #define IBS_OP_ENABLE		(1ULL<<17)
+#define IBS_OP_L3MISSONLY	(1ULL<<16)
 #define IBS_OP_MAX_CNT		0x0000FFFFULL
 #define IBS_OP_MAX_CNT_EXT	0x007FFFFFULL	/* not a register bit mask */
+#define IBS_OP_MAX_CNT_EXT_MASK	(0x7FULL<<20)	/* separate upper 7 bits */
 #define IBS_RIP_INVALID		(1ULL<<38)
 
 #ifdef CONFIG_X86_LOCAL_APIC
 extern u32 get_ibs_caps(void);
+extern int forward_event_to_ibs(struct perf_event *event);
 #else
 static inline u32 get_ibs_caps(void) { return 0; }
+static inline int forward_event_to_ibs(struct perf_event *event) { return -ENOENT; }
 #endif
 
 #ifdef CONFIG_PERF_EVENTS
@@ -248,26 +702,28 @@ extern void perf_events_lapic_init(void);
 #define PERF_EFLAGS_VM		(1UL << 5)
 
 struct pt_regs;
-extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
-extern unsigned long perf_misc_flags(struct pt_regs *regs);
-#define perf_misc_flags(regs)	perf_misc_flags(regs)
+struct x86_perf_regs {
+	struct pt_regs	regs;
+	u64		*xmm_regs;
+};
+
+extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_arch_misc_flags(struct pt_regs *regs);
+extern unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs);
+#define perf_arch_misc_flags(regs)	perf_arch_misc_flags(regs)
+#define perf_arch_guest_misc_flags(regs)	perf_arch_guest_misc_flags(regs)
 
 #include <asm/stacktrace.h>
 
 /*
- * We abuse bit 3 from flags to pass exact information, see perf_misc_flags
- * and the comment with PERF_EFLAGS_EXACT.
+ * We abuse bit 3 from flags to pass exact information, see
+ * perf_arch_misc_flags() and the comment with PERF_EFLAGS_EXACT.
  */
 #define perf_arch_fetch_caller_regs(regs, __ip)		{	\
 	(regs)->ip = (__ip);					\
-	(regs)->bp = caller_frame_pointer();			\
+	(regs)->sp = (unsigned long)__builtin_frame_address(0);	\
 	(regs)->cs = __KERNEL_CS;				\
 	regs->flags = 0;					\
-	asm volatile(						\
-		_ASM_MOV "%%"_ASM_SP ", %0\n"			\
-		: "=m" ((regs)->sp)				\
-		:: "memory"					\
-	);							\
 }
 
 struct perf_guest_switch_msr {
@@ -275,33 +731,78 @@ struct perf_guest_switch_msr {
 	u64 host, guest;
 };
 
-extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
+struct x86_pmu_lbr {
+	unsigned int	nr;
+	unsigned int	from;
+	unsigned int	to;
+	unsigned int	info;
+	bool		has_callstack;
+};
+
 extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
+extern u64 perf_get_hw_event_config(int hw_event);
 extern void perf_check_microcode(void);
+extern void perf_clear_dirty_counters(void);
 extern int x86_perf_rdpmc_index(struct perf_event *event);
 #else
-static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
+static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
 {
-	*nr = 0;
-	return NULL;
+	memset(cap, 0, sizeof(*cap));
 }
 
-static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
+static inline u64 perf_get_hw_event_config(int hw_event)
 {
-	memset(cap, 0, sizeof(*cap));
+	return 0;
 }
 
 static inline void perf_events_lapic_init(void)	{ }
 static inline void perf_check_microcode(void) { }
 #endif
 
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
+extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data);
+extern void x86_perf_get_lbr(struct x86_pmu_lbr *lbr);
+#else
+struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data);
+static inline void x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
+{
+	memset(lbr, 0, sizeof(*lbr));
+}
+#endif
+
 #ifdef CONFIG_CPU_SUP_INTEL
  extern void intel_pt_handle_vmx(int on);
+#else
+static inline void intel_pt_handle_vmx(int on)
+{
+
+}
 #endif
 
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
  extern void amd_pmu_enable_virt(void);
  extern void amd_pmu_disable_virt(void);
+
+#if defined(CONFIG_PERF_EVENTS_AMD_BRS)
+
+#define PERF_NEEDS_LOPWR_CB 1
+
+/*
+ * architectural low power callback impacts
+ * drivers/acpi/processor_idle.c
+ * drivers/acpi/acpi_pad.c
+ */
+extern void perf_amd_brs_lopwr_cb(bool lopwr_in);
+
+DECLARE_STATIC_CALL(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
+
+static __always_inline void perf_lopwr_cb(bool lopwr_in)
+{
+	static_call_mod(perf_lopwr_cb)(lopwr_in);
+}
+
+#endif /* PERF_NEEDS_LOPWR_CB */
+
 #else
  static inline void amd_pmu_enable_virt(void) { }
  static inline void amd_pmu_disable_virt(void) { }
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 94de1a05aeba..d65e338b6a5f 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -181,7 +181,7 @@ static inline u64 p4_clear_ht_bit(u64 config)
 static inline int p4_ht_active(void)
 {
 #ifdef CONFIG_SMP
-	return smp_num_siblings > 1;
+	return __max_threads_per_core > 1;
 #endif
 	return 0;
 }
@@ -189,7 +189,7 @@ static inline int p4_ht_active(void)
 static inline int p4_ht_thread(int cpu)
 {
 #ifdef CONFIG_SMP
-	if (smp_num_siblings == 2)
+	if (__max_threads_per_core == 2)
 		return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map));
 #endif
 	return 0;
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index a281e61ec60c..c88691b15f3c 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -6,6 +6,12 @@
 #include <linux/mm.h>		/* for struct page */
 #include <linux/pagemap.h>
 
+#include <asm/cpufeature.h>
+
+#define __HAVE_ARCH_PTE_ALLOC_ONE
+#define __HAVE_ARCH_PGD_FREE
+#include <asm-generic/pgalloc.h>
+
 static inline int  __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; }
 
 #ifdef CONFIG_PARAVIRT_XXL
@@ -26,20 +32,16 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
 #endif
 
 /*
- * Flags to use when allocating a user page table page.
- */
-extern gfp_t __userpte_alloc_gfp;
-
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
-/*
- * Instead of one PGD, we acquire two PGDs.  Being order-1, it is
- * both 8k in size and 8k-aligned.  That lets us just flip bit 12
- * in a pointer to swap between the two 4k halves.
+ * In case of Page Table Isolation active, we acquire two PGDs instead of one.
+ * Being order-1, it is both 8k in size and 8k-aligned.  That lets us just
+ * flip bit 12 in a pointer to swap between the two 4k halves.
  */
-#define PGD_ALLOCATION_ORDER 1
-#else
-#define PGD_ALLOCATION_ORDER 0
-#endif
+static inline unsigned int pgd_allocation_order(void)
+{
+	if (cpu_feature_enabled(X86_FEATURE_PTI))
+		return 1;
+	return 0;
+}
 
 /*
  * Allocate and free page tables.
@@ -47,24 +49,8 @@ extern gfp_t __userpte_alloc_gfp;
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
 extern pgtable_t pte_alloc_one(struct mm_struct *);
 
-/* Should really implement gc for free page table pages. This could be
-   done with a reference count in struct page. */
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-	BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
-	free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
-{
-	pgtable_page_dtor(pte);
-	__free_page(pte);
-}
-
 extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
 
 static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
@@ -96,33 +82,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 	set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
 }
 
-#define pmd_pgtable(pmd) pmd_page(pmd)
-
 #if CONFIG_PGTABLE_LEVELS > 2
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-	struct page *page;
-	gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO;
-
-	if (mm == &init_mm)
-		gfp &= ~__GFP_ACCOUNT;
-	page = alloc_pages(gfp, 0);
-	if (!page)
-		return NULL;
-	if (!pgtable_pmd_page_ctor(page)) {
-		__free_pages(page, 0);
-		return NULL;
-	}
-	return (pmd_t *)page_address(page);
-}
-
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
-{
-	BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
-	pgtable_pmd_page_dtor(virt_to_page(pmd));
-	free_page((unsigned long)pmd);
-}
-
 extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
 
 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
@@ -160,21 +120,6 @@ static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d, pud_t *pu
 	set_p4d_safe(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
 }
 
-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-	gfp_t gfp = GFP_KERNEL_ACCOUNT;
-
-	if (mm == &init_mm)
-		gfp &= ~__GFP_ACCOUNT;
-	return (pud_t *)get_zeroed_page(gfp);
-}
-
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
-{
-	BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
-	free_page((unsigned long)pud);
-}
-
 extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
 
 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
@@ -200,24 +145,6 @@ static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4
 	set_pgd_safe(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
 }
 
-static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-	gfp_t gfp = GFP_KERNEL_ACCOUNT;
-
-	if (mm == &init_mm)
-		gfp &= ~__GFP_ACCOUNT;
-	return (p4d_t *)get_zeroed_page(gfp);
-}
-
-static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
-{
-	if (!pgtable_l5_enabled())
-		return;
-
-	BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
-	free_page((unsigned long)p4d);
-}
-
 extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d);
 
 static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 60d0f9015317..e9482a11ac52 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -80,21 +80,37 @@ static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshi
 	return ((value >> rightshift) & mask) << leftshift;
 }
 
-/* Encode and de-code a swap entry */
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ *   3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *   <----------------- offset ------------------> 0 E <- type --> 0
+ *
+ *   E is the exclusive marker that is not stored in swap entries.
+ */
 #define SWP_TYPE_BITS 5
+#define _SWP_TYPE_MASK ((1U << SWP_TYPE_BITS) - 1)
+#define _SWP_TYPE_SHIFT (_PAGE_BIT_PRESENT + 1)
 #define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
 
-#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
+#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
 
-#define __swp_type(x)			(((x).val >> (_PAGE_BIT_PRESENT + 1)) \
-					 & ((1U << SWP_TYPE_BITS) - 1))
+#define __swp_type(x)			(((x).val >> _SWP_TYPE_SHIFT) \
+					 & _SWP_TYPE_MASK)
 #define __swp_offset(x)			((x).val >> SWP_OFFSET_SHIFT)
 #define __swp_entry(type, offset)	((swp_entry_t) { \
-					 ((type) << (_PAGE_BIT_PRESENT + 1)) \
+					 (((type) & _SWP_TYPE_MASK) << _SWP_TYPE_SHIFT) \
 					 | ((offset) << SWP_OFFSET_SHIFT) })
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { (pte).pte_low })
 #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
 
+/* We borrow bit 7 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE	_PAGE_PSE
+
 /* No inverted PFNs on 2 level page tables */
 
 static inline u64 protnone_mask(u64 val)
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h
index 6deb6cd236e3..54690bd4ddbe 100644
--- a/arch/x86/include/asm/pgtable-2level_types.h
+++ b/arch/x86/include/asm/pgtable-2level_types.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_X86_PGTABLE_2LEVEL_DEFS_H
 #define _ASM_X86_PGTABLE_2LEVEL_DEFS_H
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #include <linux/types.h>
 
 typedef unsigned long	pteval_t;
@@ -16,22 +16,22 @@ typedef union {
 	pteval_t pte;
 	pteval_t pte_low;
 } pte_t;
-#endif	/* !__ASSEMBLY__ */
+#endif	/* !__ASSEMBLER__ */
 
-#define SHARED_KERNEL_PMD	0
+#define ARCH_PAGE_TABLE_SYNC_MASK	PGTBL_PMD_MODIFIED
 
 /*
- * traditional i386 two-level paging structure:
+ * Traditional i386 two-level paging structure:
  */
 
 #define PGDIR_SHIFT	22
 #define PTRS_PER_PGD	1024
 
-
 /*
- * the i386 is two-level, so we don't really have any
- * PMD directory physically.
+ * The i386 is two-level, so we don't really have any
+ * PMD directory physically:
  */
+#define PTRS_PER_PMD	1
 
 #define PTRS_PER_PTE	1024
 
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index f8b1ad2c3828..dabafba957ea 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -2,8 +2,6 @@
 #ifndef _ASM_X86_PGTABLE_3LEVEL_H
 #define _ASM_X86_PGTABLE_3LEVEL_H
 
-#include <asm/atomic64_32.h>
-
 /*
  * Intel Physical Address Extension (PAE) Mode - three-level page
  * tables on PPro+ CPUs.
@@ -21,7 +19,15 @@
 	pr_err("%s:%d: bad pgd %p(%016Lx)\n",				\
 	       __FILE__, __LINE__, &(e), pgd_val(e))
 
-/* Rules for using set_pte: the pte being assigned *must* be
+#define pxx_xchg64(_pxx, _ptr, _val) ({					\
+	_pxx##val_t *_p = (_pxx##val_t *)_ptr;				\
+	_pxx##val_t _o = *_p;						\
+	do { } while (!try_cmpxchg64(_p, &_o, (_val)));			\
+	native_make_##_pxx(_o);						\
+})
+
+/*
+ * Rules for using set_pte: the pte being assigned *must* be
  * either not present or in a state where the hardware will
  * not attempt to update the pte.  In places where this is
  * not possible, use pte_get_and_clear to obtain the old pte
@@ -29,81 +35,27 @@
  */
 static inline void native_set_pte(pte_t *ptep, pte_t pte)
 {
-	ptep->pte_high = pte.pte_high;
+	WRITE_ONCE(ptep->pte_high, pte.pte_high);
 	smp_wmb();
-	ptep->pte_low = pte.pte_low;
-}
-
-#define pmd_read_atomic pmd_read_atomic
-/*
- * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with
- * a "*pmdp" dereference done by gcc. Problem is, in certain places
- * where pte_offset_map_lock is called, concurrent page faults are
- * allowed, if the mmap_sem is hold for reading. An example is mincore
- * vs page faults vs MADV_DONTNEED. On the page fault side
- * pmd_populate rightfully does a set_64bit, but if we're reading the
- * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen
- * because gcc will not read the 64bit of the pmd atomically. To fix
- * this all places running pmd_offset_map_lock() while holding the
- * mmap_sem in read mode, shall read the pmdp pointer using this
- * function to know if the pmd is null nor not, and in turn to know if
- * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd
- * operations.
- *
- * Without THP if the mmap_sem is hold for reading, the pmd can only
- * transition from null to not null while pmd_read_atomic runs. So
- * we can always return atomic pmd values with this function.
- *
- * With THP if the mmap_sem is hold for reading, the pmd can become
- * trans_huge or none or point to a pte (and in turn become "stable")
- * at any time under pmd_read_atomic. We could read it really
- * atomically here with a atomic64_read for the THP enabled case (and
- * it would be a whole lot simpler), but to avoid using cmpxchg8b we
- * only return an atomic pmdval if the low part of the pmdval is later
- * found stable (i.e. pointing to a pte). And we're returning a none
- * pmdval if the low part of the pmd is none. In some cases the high
- * and low part of the pmdval returned may not be consistent if THP is
- * enabled (the low part may point to previously mapped hugepage,
- * while the high part may point to a more recently mapped hugepage),
- * but pmd_none_or_trans_huge_or_clear_bad() only needs the low part
- * of the pmd to be read atomically to decide if the pmd is unstable
- * or not, with the only exception of when the low part of the pmd is
- * zero in which case we return a none pmd.
- */
-static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
-{
-	pmdval_t ret;
-	u32 *tmp = (u32 *)pmdp;
-
-	ret = (pmdval_t) (*tmp);
-	if (ret) {
-		/*
-		 * If the low part is null, we must not read the high part
-		 * or we can end up with a partial pmd.
-		 */
-		smp_rmb();
-		ret |= ((pmdval_t)*(tmp + 1)) << 32;
-	}
-
-	return (pmd_t) { ret };
+	WRITE_ONCE(ptep->pte_low, pte.pte_low);
 }
 
 static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
 {
-	set_64bit((unsigned long long *)(ptep), native_pte_val(pte));
+	pxx_xchg64(pte, ptep, native_pte_val(pte));
 }
 
 static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
-	set_64bit((unsigned long long *)(pmdp), native_pmd_val(pmd));
+	pxx_xchg64(pmd, pmdp, native_pmd_val(pmd));
 }
 
 static inline void native_set_pud(pud_t *pudp, pud_t pud)
 {
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
 	pud.p4d.pgd = pti_set_user_pgtbl(&pudp->p4d.pgd, pud.p4d.pgd);
 #endif
-	set_64bit((unsigned long long *)(pudp), native_pud_val(pud));
+	pxx_xchg64(pud, pudp, native_pud_val(pud));
 }
 
 /*
@@ -114,17 +66,16 @@ static inline void native_set_pud(pud_t *pudp, pud_t pud)
 static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
 				    pte_t *ptep)
 {
-	ptep->pte_low = 0;
+	WRITE_ONCE(ptep->pte_low, 0);
 	smp_wmb();
-	ptep->pte_high = 0;
+	WRITE_ONCE(ptep->pte_high, 0);
 }
 
-static inline void native_pmd_clear(pmd_t *pmd)
+static inline void native_pmd_clear(pmd_t *pmdp)
 {
-	u32 *tmp = (u32 *)pmd;
-	*tmp = 0;
+	WRITE_ONCE(pmdp->pmd_low, 0);
 	smp_wmb();
-	*(tmp + 1) = 0;
+	WRITE_ONCE(pmdp->pmd_high, 0);
 }
 
 static inline void native_pud_clear(pud_t *pudp)
@@ -147,41 +98,26 @@ static inline void pud_clear(pud_t *pudp)
 	 */
 }
 
+
 #ifdef CONFIG_SMP
 static inline pte_t native_ptep_get_and_clear(pte_t *ptep)
 {
-	pte_t res;
-
-	res.pte = (pteval_t)arch_atomic64_xchg((atomic64_t *)ptep, 0);
-
-	return res;
+	return pxx_xchg64(pte, ptep, 0ULL);
 }
-#else
-#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
-#endif
-
-union split_pmd {
-	struct {
-		u32 pmd_low;
-		u32 pmd_high;
-	};
-	pmd_t pmd;
-};
 
-#ifdef CONFIG_SMP
 static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
 {
-	union split_pmd res, *orig = (union split_pmd *)pmdp;
-
-	/* xchg acts as a barrier before setting of the high bits */
-	res.pmd_low = xchg(&orig->pmd_low, 0);
-	res.pmd_high = orig->pmd_high;
-	orig->pmd_high = 0;
+	return pxx_xchg64(pmd, pmdp, 0ULL);
+}
 
-	return res.pmd;
+static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
+{
+	return pxx_xchg64(pud, pudp, 0ULL);
 }
 #else
+#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
 #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
+#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
 #endif
 
 #ifndef pmdp_establish
@@ -197,67 +133,47 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 	 * anybody.
 	 */
 	if (!(pmd_val(pmd) & _PAGE_PRESENT)) {
-		union split_pmd old, new, *ptr;
-
-		ptr = (union split_pmd *)pmdp;
-
-		new.pmd = pmd;
-
 		/* xchg acts as a barrier before setting of the high bits */
-		old.pmd_low = xchg(&ptr->pmd_low, new.pmd_low);
-		old.pmd_high = ptr->pmd_high;
-		ptr->pmd_high = new.pmd_high;
-		return old.pmd;
-	}
-
-	do {
-		old = *pmdp;
-	} while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd);
+		old.pmd_low = xchg(&pmdp->pmd_low, pmd.pmd_low);
+		old.pmd_high = READ_ONCE(pmdp->pmd_high);
+		WRITE_ONCE(pmdp->pmd_high, pmd.pmd_high);
 
-	return old;
-}
-#endif
-
-#ifdef CONFIG_SMP
-union split_pud {
-	struct {
-		u32 pud_low;
-		u32 pud_high;
-	};
-	pud_t pud;
-};
-
-static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
-{
-	union split_pud res, *orig = (union split_pud *)pudp;
-
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
-	pti_set_user_pgtbl(&pudp->p4d.pgd, __pgd(0));
-#endif
-
-	/* xchg acts as a barrier before setting of the high bits */
-	res.pud_low = xchg(&orig->pud_low, 0);
-	res.pud_high = orig->pud_high;
-	orig->pud_high = 0;
+		return old;
+	}
 
-	return res.pud;
+	return pxx_xchg64(pmd, pmdp, pmd.pmd);
 }
-#else
-#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
 #endif
 
-/* Encode and de-code a swap entry */
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ *   6 6 6 6 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 3 3 3 3 3 3 3 3
+ *   3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2
+ *   < type -> <---------------------- offset ----------------------
+ *
+ *   3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *   --------------------------------------------> 0 E 0 0 0 0 0 0 0
+ *
+ *   E is the exclusive marker that is not stored in swap entries.
+ */
 #define SWP_TYPE_BITS		5
+#define _SWP_TYPE_MASK ((1U << SWP_TYPE_BITS) - 1)
 
 #define SWP_OFFSET_FIRST_BIT	(_PAGE_BIT_PROTNONE + 1)
 
 /* We always extract/encode the offset by shifting it all the way up, and then down again */
 #define SWP_OFFSET_SHIFT	(SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS)
 
-#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
-#define __swp_type(x)			(((x).val) & 0x1f)
-#define __swp_offset(x)			((x).val >> 5)
-#define __swp_entry(type, offset)	((swp_entry_t){(type) | (offset) << 5})
+#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
+#define __swp_type(x)			(((x).val) & _SWP_TYPE_MASK)
+#define __swp_offset(x)			((x).val >> SWP_TYPE_BITS)
+#define __swp_entry(type, offset)	((swp_entry_t){((type) & _SWP_TYPE_MASK) \
+					| (offset) << SWP_TYPE_BITS})
 
 /*
  * Normally, __swp_entry() converts from arch-independent swp_entry_t to
@@ -285,52 +201,8 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
 #define __pte_to_swp_entry(pte)	(__swp_entry(__pteval_swp_type(pte), \
 					     __pteval_swp_offset(pte)))
 
-#define gup_get_pte gup_get_pte
-/*
- * WARNING: only to be used in the get_user_pages_fast() implementation.
- *
- * With get_user_pages_fast(), we walk down the pagetables without taking
- * any locks.  For this we would like to load the pointers atomically,
- * but that is not possible (without expensive cmpxchg8b) on PAE.  What
- * we do have is the guarantee that a PTE will only either go from not
- * present to present, or present to not present or both -- it will not
- * switch to a completely different present page without a TLB flush in
- * between; something that we are blocking by holding interrupts off.
- *
- * Setting ptes from not present to present goes:
- *
- *   ptep->pte_high = h;
- *   smp_wmb();
- *   ptep->pte_low = l;
- *
- * And present to not present goes:
- *
- *   ptep->pte_low = 0;
- *   smp_wmb();
- *   ptep->pte_high = 0;
- *
- * We must ensure here that the load of pte_low sees 'l' iff pte_high
- * sees 'h'. We load pte_high *after* loading pte_low, which ensures we
- * don't see an older value of pte_high.  *Then* we recheck pte_low,
- * which ensures that we haven't picked up a changed pte high. We might
- * have gotten rubbish values from pte_low and pte_high, but we are
- * guaranteed that pte_low will not have the present bit set *unless*
- * it is 'l'. Because get_user_pages_fast() only operates on present ptes
- * we're safe.
- */
-static inline pte_t gup_get_pte(pte_t *ptep)
-{
-	pte_t pte;
-
-	do {
-		pte.pte_low = ptep->pte_low;
-		smp_rmb();
-		pte.pte_high = ptep->pte_high;
-		smp_rmb();
-	} while (unlikely(pte.pte_low != ptep->pte_low));
-
-	return pte;
-}
+/* We borrow bit 7 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE	_PAGE_PSE
 
 #include <asm/pgtable-invert.h>
 
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index 33845d36897c..580b09bf6a45 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_X86_PGTABLE_3LEVEL_DEFS_H
 #define _ASM_X86_PGTABLE_3LEVEL_DEFS_H
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #include <linux/types.h>
 
 typedef u64	pteval_t;
@@ -18,14 +18,16 @@ typedef union {
 	};
 	pteval_t pte;
 } pte_t;
-#endif	/* !__ASSEMBLY__ */
-
-#ifdef CONFIG_PARAVIRT_XXL
-#define SHARED_KERNEL_PMD	((!static_cpu_has(X86_FEATURE_PTI) &&	\
-				 (pv_info.shared_kernel_pmd)))
-#else
-#define SHARED_KERNEL_PMD	(!static_cpu_has(X86_FEATURE_PTI))
-#endif
+
+typedef union {
+	struct {
+		unsigned long pmd_low, pmd_high;
+	};
+	pmdval_t pmd;
+} pmd_t;
+#endif	/* !__ASSEMBLER__ */
+
+#define ARCH_PAGE_TABLE_SYNC_MASK	PGTBL_PMD_MODIFIED
 
 /*
  * PGDIR_SHIFT determines what a top-level page table entry can map
diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h
index a0c1525f1b6f..e12e52ae8083 100644
--- a/arch/x86/include/asm/pgtable-invert.h
+++ b/arch/x86/include/asm/pgtable-invert.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_PGTABLE_INVERT_H
 #define _ASM_PGTABLE_INVERT_H 1
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 /*
  * A clear pte value is special, and doesn't get inverted.
@@ -36,6 +36,6 @@ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
 	return val;
 }
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 40616e805292..e33df3da6980 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -15,28 +15,35 @@
 		     cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS)))	\
 	 : (prot))
 
-/*
- * Macros to add or remove encryption attribute
- */
-#define pgprot_encrypted(prot)	__pgprot(__sme_set(pgprot_val(prot)))
-#define pgprot_decrypted(prot)	__pgprot(__sme_clr(pgprot_val(prot)))
-
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+#include <linux/spinlock.h>
 #include <asm/x86_init.h>
+#include <asm/pkru.h>
+#include <asm/fpu/api.h>
+#include <asm/coco.h>
+#include <asm-generic/pgtable_uffd.h>
+#include <linux/page_table_check.h>
 
 extern pgd_t early_top_pgt[PTRS_PER_PGD];
-int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
-
-void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
-void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
-void ptdump_walk_pgd_level_checkwx(void);
+bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
+
+struct seq_file;
+void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm);
+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
+				   bool user);
+bool ptdump_walk_pgd_level_checkwx(void);
+#define ptdump_check_wx ptdump_walk_pgd_level_checkwx
 void ptdump_walk_user_pgd_level_checkwx(void);
 
+/*
+ * Macros to add or remove encryption attribute
+ */
+#define pgprot_encrypted(prot)	__pgprot(cc_mkenc(pgprot_val(prot)))
+#define pgprot_decrypted(prot)	__pgprot(cc_mkdec(pgprot_val(prot)))
+
 #ifdef CONFIG_DEBUG_WX
-#define debug_checkwx()		ptdump_walk_pgd_level_checkwx()
 #define debug_checkwx_user()	ptdump_walk_user_pgd_level_checkwx()
 #else
-#define debug_checkwx()		do { } while (0)
 #define debug_checkwx_user()	do { } while (0)
 #endif
 
@@ -46,7 +53,7 @@ void ptdump_walk_user_pgd_level_checkwx(void);
  */
 extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
 	__visible;
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+#define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page))
 
 extern spinlock_t pgd_lock;
 extern struct list_head pgd_list;
@@ -59,7 +66,6 @@ extern pmdval_t early_pmd_flags;
 #include <asm/paravirt.h>
 #else  /* !CONFIG_PARAVIRT_XXL */
 #define set_pte(ptep, pte)		native_set_pte(ptep, pte)
-#define set_pte_at(mm, addr, ptep, pte)	native_set_pte_at(mm, addr, ptep, pte)
 
 #define set_pte_atomic(ptep, pte)					\
 	native_set_pte_atomic(ptep, pte)
@@ -114,47 +120,81 @@ extern pmdval_t early_pmd_flags;
 #define arch_end_context_switch(prev)	do {} while(0)
 #endif	/* CONFIG_PARAVIRT_XXL */
 
+static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
+{
+	pmdval_t v = native_pmd_val(pmd);
+
+	return native_make_pmd(v | set);
+}
+
+static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
+{
+	pmdval_t v = native_pmd_val(pmd);
+
+	return native_make_pmd(v & ~clear);
+}
+
+static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
+{
+	pudval_t v = native_pud_val(pud);
+
+	return native_make_pud(v | set);
+}
+
+static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
+{
+	pudval_t v = native_pud_val(pud);
+
+	return native_make_pud(v & ~clear);
+}
+
 /*
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
  */
-static inline int pte_dirty(pte_t pte)
+static inline bool pte_dirty(pte_t pte)
 {
-	return pte_flags(pte) & _PAGE_DIRTY;
+	return pte_flags(pte) & _PAGE_DIRTY_BITS;
 }
 
+static inline bool pte_shstk(pte_t pte)
+{
+	return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
+	       (pte_flags(pte) & (_PAGE_RW | _PAGE_DIRTY)) == _PAGE_DIRTY;
+}
 
-static inline u32 read_pkru(void)
+static inline int pte_young(pte_t pte)
 {
-	if (boot_cpu_has(X86_FEATURE_OSPKE))
-		return __read_pkru();
-	return 0;
+	return pte_flags(pte) & _PAGE_ACCESSED;
 }
 
-static inline void write_pkru(u32 pkru)
+static inline bool pte_decrypted(pte_t pte)
 {
-	if (boot_cpu_has(X86_FEATURE_OSPKE))
-		__write_pkru(pkru);
+	return cc_mkdec(pte_val(pte)) == pte_val(pte);
 }
 
-static inline int pte_young(pte_t pte)
+#define pmd_dirty pmd_dirty
+static inline bool pmd_dirty(pmd_t pmd)
 {
-	return pte_flags(pte) & _PAGE_ACCESSED;
+	return pmd_flags(pmd) & _PAGE_DIRTY_BITS;
 }
 
-static inline int pmd_dirty(pmd_t pmd)
+static inline bool pmd_shstk(pmd_t pmd)
 {
-	return pmd_flags(pmd) & _PAGE_DIRTY;
+	return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
+	       (pmd_flags(pmd) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) ==
+	       (_PAGE_DIRTY | _PAGE_PSE);
 }
 
+#define pmd_young pmd_young
 static inline int pmd_young(pmd_t pmd)
 {
 	return pmd_flags(pmd) & _PAGE_ACCESSED;
 }
 
-static inline int pud_dirty(pud_t pud)
+static inline bool pud_dirty(pud_t pud)
 {
-	return pud_flags(pud) & _PAGE_DIRTY;
+	return pud_flags(pud) & _PAGE_DIRTY_BITS;
 }
 
 static inline int pud_young(pud_t pud)
@@ -162,9 +202,36 @@ static inline int pud_young(pud_t pud)
 	return pud_flags(pud) & _PAGE_ACCESSED;
 }
 
+static inline bool pud_shstk(pud_t pud)
+{
+	return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
+	       (pud_flags(pud) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) ==
+	       (_PAGE_DIRTY | _PAGE_PSE);
+}
+
 static inline int pte_write(pte_t pte)
 {
-	return pte_flags(pte) & _PAGE_RW;
+	/*
+	 * Shadow stack pages are logically writable, but do not have
+	 * _PAGE_RW.  Check for them separately from _PAGE_RW itself.
+	 */
+	return (pte_flags(pte) & _PAGE_RW) || pte_shstk(pte);
+}
+
+#define pmd_write pmd_write
+static inline int pmd_write(pmd_t pmd)
+{
+	/*
+	 * Shadow stack pages are logically writable, but do not have
+	 * _PAGE_RW.  Check for them separately from _PAGE_RW itself.
+	 */
+	return (pmd_flags(pmd) & _PAGE_RW) || pmd_shstk(pmd);
+}
+
+#define pud_write pud_write
+static inline int pud_write(pud_t pud)
+{
+	return pud_flags(pud) & _PAGE_RW;
 }
 
 static inline int pte_huge(pte_t pte)
@@ -191,6 +258,8 @@ static inline int pte_special(pte_t pte)
 
 static inline u64 protnone_mask(u64 val);
 
+#define PFN_PTE_SHIFT	PAGE_SHIFT
+
 static inline unsigned long pte_pfn(pte_t pte)
 {
 	phys_addr_t pfn = pte_val(pte);
@@ -205,6 +274,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
 	return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
 }
 
+#define pud_pfn pud_pfn
 static inline unsigned long pud_pfn(pud_t pud)
 {
 	phys_addr_t pfn = pud_val(pud);
@@ -222,15 +292,10 @@ static inline unsigned long pgd_pfn(pgd_t pgd)
 	return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT;
 }
 
-static inline int p4d_large(p4d_t p4d)
-{
-	/* No 512 GiB pages yet */
-	return 0;
-}
-
 #define pte_page(pte)	pfn_to_page(pte_pfn(pte))
 
-static inline int pmd_large(pmd_t pte)
+#define pmd_leaf pmd_leaf
+static inline bool pmd_leaf(pmd_t pte)
 {
 	return pmd_flags(pte) & _PAGE_PSE;
 }
@@ -238,13 +303,13 @@ static inline int pmd_large(pmd_t pte)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline int pmd_trans_huge(pmd_t pmd)
 {
-	return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
+	return (pmd_val(pmd) & _PAGE_PSE) == _PAGE_PSE;
 }
 
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 static inline int pud_trans_huge(pud_t pud)
 {
-	return (pud_val(pud) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
+	return (pud_val(pud) & _PAGE_PSE) == _PAGE_PSE;
 }
 #endif
 
@@ -254,29 +319,29 @@ static inline int has_transparent_hugepage(void)
 	return boot_cpu_has(X86_FEATURE_PSE);
 }
 
-#ifdef __HAVE_ARCH_PTE_DEVMAP
-static inline int pmd_devmap(pmd_t pmd)
+#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
+static inline bool pmd_special(pmd_t pmd)
 {
-	return !!(pmd_val(pmd) & _PAGE_DEVMAP);
+	return pmd_flags(pmd) & _PAGE_SPECIAL;
 }
 
-#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
-static inline int pud_devmap(pud_t pud)
+static inline pmd_t pmd_mkspecial(pmd_t pmd)
 {
-	return !!(pud_val(pud) & _PAGE_DEVMAP);
+	return pmd_set_flags(pmd, _PAGE_SPECIAL);
 }
-#else
-static inline int pud_devmap(pud_t pud)
+#endif	/* CONFIG_ARCH_SUPPORTS_PMD_PFNMAP */
+
+#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
+static inline bool pud_special(pud_t pud)
 {
-	return 0;
+	return pud_flags(pud) & _PAGE_SPECIAL;
 }
-#endif
 
-static inline int pgd_devmap(pgd_t pgd)
+static inline pud_t pud_mkspecial(pud_t pud)
 {
-	return 0;
+	return pud_set_flags(pud, _PAGE_SPECIAL);
 }
-#endif
+#endif	/* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 static inline pte_t pte_set_flags(pte_t pte, pteval_t set)
@@ -293,19 +358,90 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
 	return native_make_pte(v & ~clear);
 }
 
-static inline pte_t pte_mkclean(pte_t pte)
+/*
+ * Write protection operations can result in Dirty=1,Write=0 PTEs. But in the
+ * case of X86_FEATURE_USER_SHSTK, these PTEs denote shadow stack memory. So
+ * when creating dirty, write-protected memory, a software bit is used:
+ * _PAGE_BIT_SAVED_DIRTY. The following functions take a PTE and transition the
+ * Dirty bit to SavedDirty, and vice-vesra.
+ *
+ * This shifting is only done if needed. In the case of shifting
+ * Dirty->SavedDirty, the condition is if the PTE is Write=0. In the case of
+ * shifting SavedDirty->Dirty, the condition is Write=1.
+ */
+static inline pgprotval_t mksaveddirty_shift(pgprotval_t v)
+{
+	pgprotval_t cond = (~v >> _PAGE_BIT_RW) & 1;
+
+	v |= ((v >> _PAGE_BIT_DIRTY) & cond) << _PAGE_BIT_SAVED_DIRTY;
+	v &= ~(cond << _PAGE_BIT_DIRTY);
+
+	return v;
+}
+
+static inline pgprotval_t clear_saveddirty_shift(pgprotval_t v)
 {
-	return pte_clear_flags(pte, _PAGE_DIRTY);
+	pgprotval_t cond = (v >> _PAGE_BIT_RW) & 1;
+
+	v |= ((v >> _PAGE_BIT_SAVED_DIRTY) & cond) << _PAGE_BIT_DIRTY;
+	v &= ~(cond << _PAGE_BIT_SAVED_DIRTY);
+
+	return v;
 }
 
-static inline pte_t pte_mkold(pte_t pte)
+static inline pte_t pte_mksaveddirty(pte_t pte)
 {
-	return pte_clear_flags(pte, _PAGE_ACCESSED);
+	pteval_t v = native_pte_val(pte);
+
+	v = mksaveddirty_shift(v);
+	return native_make_pte(v);
+}
+
+static inline pte_t pte_clear_saveddirty(pte_t pte)
+{
+	pteval_t v = native_pte_val(pte);
+
+	v = clear_saveddirty_shift(v);
+	return native_make_pte(v);
 }
 
 static inline pte_t pte_wrprotect(pte_t pte)
 {
-	return pte_clear_flags(pte, _PAGE_RW);
+	pte = pte_clear_flags(pte, _PAGE_RW);
+
+	/*
+	 * Blindly clearing _PAGE_RW might accidentally create
+	 * a shadow stack PTE (Write=0,Dirty=1). Move the hardware
+	 * dirty value to the software bit, if present.
+	 */
+	return pte_mksaveddirty(pte);
+}
+
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+static inline int pte_uffd_wp(pte_t pte)
+{
+	return pte_flags(pte) & _PAGE_UFFD_WP;
+}
+
+static inline pte_t pte_mkuffd_wp(pte_t pte)
+{
+	return pte_wrprotect(pte_set_flags(pte, _PAGE_UFFD_WP));
+}
+
+static inline pte_t pte_clear_uffd_wp(pte_t pte)
+{
+	return pte_clear_flags(pte, _PAGE_UFFD_WP);
+}
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	return pte_clear_flags(pte, _PAGE_DIRTY_BITS);
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+	return pte_clear_flags(pte, _PAGE_ACCESSED);
 }
 
 static inline pte_t pte_mkexec(pte_t pte)
@@ -315,7 +451,16 @@ static inline pte_t pte_mkexec(pte_t pte)
 
 static inline pte_t pte_mkdirty(pte_t pte)
 {
-	return pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+	pte = pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+
+	return pte_mksaveddirty(pte);
+}
+
+static inline pte_t pte_mkwrite_shstk(pte_t pte)
+{
+	pte = pte_clear_flags(pte, _PAGE_RW);
+
+	return pte_set_flags(pte, _PAGE_DIRTY);
 }
 
 static inline pte_t pte_mkyoung(pte_t pte)
@@ -323,11 +468,15 @@ static inline pte_t pte_mkyoung(pte_t pte)
 	return pte_set_flags(pte, _PAGE_ACCESSED);
 }
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	return pte_set_flags(pte, _PAGE_RW);
 }
 
+struct vm_area_struct;
+pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma);
+#define pte_mkwrite pte_mkwrite
+
 static inline pte_t pte_mkhuge(pte_t pte)
 {
 	return pte_set_flags(pte, _PAGE_PSE);
@@ -353,48 +502,75 @@ static inline pte_t pte_mkspecial(pte_t pte)
 	return pte_set_flags(pte, _PAGE_SPECIAL);
 }
 
-static inline pte_t pte_mkdevmap(pte_t pte)
+/* See comments above mksaveddirty_shift() */
+static inline pmd_t pmd_mksaveddirty(pmd_t pmd)
 {
-	return pte_set_flags(pte, _PAGE_SPECIAL|_PAGE_DEVMAP);
+	pmdval_t v = native_pmd_val(pmd);
+
+	v = mksaveddirty_shift(v);
+	return native_make_pmd(v);
 }
 
-static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
+/* See comments above mksaveddirty_shift() */
+static inline pmd_t pmd_clear_saveddirty(pmd_t pmd)
 {
 	pmdval_t v = native_pmd_val(pmd);
 
-	return native_make_pmd(v | set);
+	v = clear_saveddirty_shift(v);
+	return native_make_pmd(v);
 }
 
-static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
 {
-	pmdval_t v = native_pmd_val(pmd);
+	pmd = pmd_clear_flags(pmd, _PAGE_RW);
 
-	return native_make_pmd(v & ~clear);
+	/*
+	 * Blindly clearing _PAGE_RW might accidentally create
+	 * a shadow stack PMD (RW=0, Dirty=1). Move the hardware
+	 * dirty value to the software bit.
+	 */
+	return pmd_mksaveddirty(pmd);
 }
 
-static inline pmd_t pmd_mkold(pmd_t pmd)
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+static inline int pmd_uffd_wp(pmd_t pmd)
 {
-	return pmd_clear_flags(pmd, _PAGE_ACCESSED);
+	return pmd_flags(pmd) & _PAGE_UFFD_WP;
 }
 
-static inline pmd_t pmd_mkclean(pmd_t pmd)
+static inline pmd_t pmd_mkuffd_wp(pmd_t pmd)
 {
-	return pmd_clear_flags(pmd, _PAGE_DIRTY);
+	return pmd_wrprotect(pmd_set_flags(pmd, _PAGE_UFFD_WP));
 }
 
-static inline pmd_t pmd_wrprotect(pmd_t pmd)
+static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd)
+{
+	return pmd_clear_flags(pmd, _PAGE_UFFD_WP);
+}
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
 {
-	return pmd_clear_flags(pmd, _PAGE_RW);
+	return pmd_clear_flags(pmd, _PAGE_ACCESSED);
+}
+
+static inline pmd_t pmd_mkclean(pmd_t pmd)
+{
+	return pmd_clear_flags(pmd, _PAGE_DIRTY_BITS);
 }
 
 static inline pmd_t pmd_mkdirty(pmd_t pmd)
 {
-	return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+	pmd = pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+
+	return pmd_mksaveddirty(pmd);
 }
 
-static inline pmd_t pmd_mkdevmap(pmd_t pmd)
+static inline pmd_t pmd_mkwrite_shstk(pmd_t pmd)
 {
-	return pmd_set_flags(pmd, _PAGE_DEVMAP);
+	pmd = pmd_clear_flags(pmd, _PAGE_RW);
+
+	return pmd_set_flags(pmd, _PAGE_DIRTY);
 }
 
 static inline pmd_t pmd_mkhuge(pmd_t pmd)
@@ -407,23 +583,30 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
 	return pmd_set_flags(pmd, _PAGE_ACCESSED);
 }
 
-static inline pmd_t pmd_mkwrite(pmd_t pmd)
+static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
 {
 	return pmd_set_flags(pmd, _PAGE_RW);
 }
 
-static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
+pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
+#define pmd_mkwrite pmd_mkwrite
+
+/* See comments above mksaveddirty_shift() */
+static inline pud_t pud_mksaveddirty(pud_t pud)
 {
 	pudval_t v = native_pud_val(pud);
 
-	return native_make_pud(v | set);
+	v = mksaveddirty_shift(v);
+	return native_make_pud(v);
 }
 
-static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
+/* See comments above mksaveddirty_shift() */
+static inline pud_t pud_clear_saveddirty(pud_t pud)
 {
 	pudval_t v = native_pud_val(pud);
 
-	return native_make_pud(v & ~clear);
+	v = clear_saveddirty_shift(v);
+	return native_make_pud(v);
 }
 
 static inline pud_t pud_mkold(pud_t pud)
@@ -433,22 +616,26 @@ static inline pud_t pud_mkold(pud_t pud)
 
 static inline pud_t pud_mkclean(pud_t pud)
 {
-	return pud_clear_flags(pud, _PAGE_DIRTY);
+	return pud_clear_flags(pud, _PAGE_DIRTY_BITS);
 }
 
 static inline pud_t pud_wrprotect(pud_t pud)
 {
-	return pud_clear_flags(pud, _PAGE_RW);
+	pud = pud_clear_flags(pud, _PAGE_RW);
+
+	/*
+	 * Blindly clearing _PAGE_RW might accidentally create
+	 * a shadow stack PUD (RW=0, Dirty=1). Move the hardware
+	 * dirty value to the software bit.
+	 */
+	return pud_mksaveddirty(pud);
 }
 
 static inline pud_t pud_mkdirty(pud_t pud)
 {
-	return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
-}
+	pud = pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
 
-static inline pud_t pud_mkdevmap(pud_t pud)
-{
-	return pud_set_flags(pud, _PAGE_DEVMAP);
+	return pud_mksaveddirty(pud);
 }
 
 static inline pud_t pud_mkhuge(pud_t pud)
@@ -463,7 +650,9 @@ static inline pud_t pud_mkyoung(pud_t pud)
 
 static inline pud_t pud_mkwrite(pud_t pud)
 {
-	return pud_set_flags(pud, _PAGE_RW);
+	pud = pud_set_flags(pud, _PAGE_RW);
+
+	return pud_clear_saveddirty(pud);
 }
 
 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
@@ -548,6 +737,9 @@ static inline pgprotval_t check_pgprot(pgprot_t pgprot)
 static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
 {
 	phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
+	/* This bit combination is used to mark shadow stacks */
+	WARN_ON_ONCE((pgprot_val(pgprot) & (_PAGE_DIRTY | _PAGE_RW)) ==
+			_PAGE_DIRTY);
 	pfn ^= protnone_mask(pgprot_val(pgprot));
 	pfn &= PTE_PFN_MASK;
 	return __pte(pfn | check_pgprot(pgprot));
@@ -569,16 +761,16 @@ static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
 	return __pud(pfn | check_pgprot(pgprot));
 }
 
-static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+static inline pmd_t pmd_mkinvalid(pmd_t pmd)
 {
 	return pfn_pmd(pmd_pfn(pmd),
 		      __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
 }
 
-static inline pud_t pud_mknotpresent(pud_t pud)
+static inline pud_t pud_mkinvalid(pud_t pud)
 {
 	return pfn_pud(pud_pfn(pud),
-	      __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
+		       __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
 }
 
 static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
@@ -586,6 +778,7 @@ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
 	pteval_t val = pte_val(pte), oldval = val;
+	pte_t pte_result;
 
 	/*
 	 * Chop off the NX bit (if present), and add the NX portion of
@@ -594,25 +787,82 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 	val &= _PAGE_CHG_MASK;
 	val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK;
 	val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
-	return __pte(val);
+
+	pte_result = __pte(val);
+
+	/*
+	 * To avoid creating Write=0,Dirty=1 PTEs, pte_modify() needs to avoid:
+	 *  1. Marking Write=0 PTEs Dirty=1
+	 *  2. Marking Dirty=1 PTEs Write=0
+	 *
+	 * The first case cannot happen because the _PAGE_CHG_MASK will filter
+	 * out any Dirty bit passed in newprot. Handle the second case by
+	 * going through the mksaveddirty exercise. Only do this if the old
+	 * value was Write=1 to avoid doing this on Shadow Stack PTEs.
+	 */
+	if (oldval & _PAGE_RW)
+		pte_result = pte_mksaveddirty(pte_result);
+	else
+		pte_result = pte_clear_saveddirty(pte_result);
+
+	return pte_result;
 }
 
 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 {
 	pmdval_t val = pmd_val(pmd), oldval = val;
+	pmd_t pmd_result;
 
-	val &= _HPAGE_CHG_MASK;
+	val &= (_HPAGE_CHG_MASK & ~_PAGE_DIRTY);
 	val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
 	val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);
-	return __pmd(val);
+
+	pmd_result = __pmd(val);
+
+	/*
+	 * Avoid creating shadow stack PMD by accident.  See comment in
+	 * pte_modify().
+	 */
+	if (oldval & _PAGE_RW)
+		pmd_result = pmd_mksaveddirty(pmd_result);
+	else
+		pmd_result = pmd_clear_saveddirty(pmd_result);
+
+	return pmd_result;
+}
+
+static inline pud_t pud_modify(pud_t pud, pgprot_t newprot)
+{
+	pudval_t val = pud_val(pud), oldval = val;
+	pud_t pud_result;
+
+	val &= _HPAGE_CHG_MASK;
+	val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
+	val = flip_protnone_guard(oldval, val, PHYSICAL_PUD_PAGE_MASK);
+
+	pud_result = __pud(val);
+
+	/*
+	 * Avoid creating shadow stack PUD by accident.  See comment in
+	 * pte_modify().
+	 */
+	if (oldval & _PAGE_RW)
+		pud_result = pud_mksaveddirty(pud_result);
+	else
+		pud_result = pud_clear_saveddirty(pud_result);
+
+	return pud_result;
 }
 
-/* mprotect needs to preserve PAT bits when updating vm_page_prot */
+/*
+ * mprotect needs to preserve PAT and encryption bits when updating
+ * vm_page_prot
+ */
 #define pgprot_modify pgprot_modify
 static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 {
 	pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK;
-	pgprotval_t addbits = pgprot_val(newprot);
+	pgprotval_t addbits = pgprot_val(newprot) & ~_PAGE_CHG_MASK;
 	return __pgprot(preservebits | addbits);
 }
 
@@ -623,11 +873,6 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 
 #define canon_pgprot(p) __pgprot(massage_pgprot(p))
 
-static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
-{
-	return canon_pgprot(prot);
-}
-
 static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
 					 enum page_cache_mode pcm,
 					 enum page_cache_mode new_pcm)
@@ -663,7 +908,7 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
 pmd_t *populate_extra_pmd(unsigned long vaddr);
 pte_t *populate_extra_pte(unsigned long vaddr);
 
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
 pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd);
 
 /*
@@ -677,14 +922,14 @@ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
 		return pgd;
 	return __pti_set_user_pgtbl(pgdp, pgd);
 }
-#else   /* CONFIG_PAGE_TABLE_ISOLATION */
+#else   /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */
 static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
 {
 	return pgd;
 }
-#endif  /* CONFIG_PAGE_TABLE_ISOLATION */
+#endif  /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */
 
-#endif	/* __ASSEMBLY__ */
+#endif	/* __ASSEMBLER__ */
 
 
 #ifdef CONFIG_X86_32
@@ -693,7 +938,7 @@ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
 # include <asm/pgtable_64.h>
 #endif
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #include <linux/mm_types.h>
 #include <linux/mmdebug.h>
 #include <linux/log2.h>
@@ -710,17 +955,18 @@ static inline int pte_same(pte_t a, pte_t b)
 	return a.pte == b.pte;
 }
 
-static inline int pte_present(pte_t a)
+static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
 {
-	return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
+	if (__pte_needs_invert(pte_val(pte)))
+		return __pte(pte_val(pte) - (nr << PFN_PTE_SHIFT));
+	return __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT));
 }
+#define pte_advance_pfn	pte_advance_pfn
 
-#ifdef __HAVE_ARCH_PTE_DEVMAP
-static inline int pte_devmap(pte_t a)
+static inline int pte_present(pte_t a)
 {
-	return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP;
+	return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
 }
-#endif
 
 #define pte_accessible pte_accessible
 static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
@@ -729,7 +975,7 @@ static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
 		return true;
 
 	if ((pte_flags(a) & _PAGE_PROTNONE) &&
-			mm_tlb_flush_pending(mm))
+			atomic_read(&mm->tlb_flush_pending))
 		return true;
 
 	return false;
@@ -749,7 +995,7 @@ static inline int pmd_present(pmd_t pmd)
 #ifdef CONFIG_NUMA_BALANCING
 /*
  * These work without NUMA balancing but the kernel does not care. See the
- * comment in include/asm-generic/pgtable.h
+ * comment in include/linux/pgtable.h
  */
 static inline int pte_protnone(pte_t pte)
 {
@@ -783,45 +1029,10 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
  */
 #define pmd_page(pmd)	pfn_to_page(pmd_pfn(pmd))
 
-/*
- * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
- *
- * this macro returns the index of the entry in the pmd page which would
- * control the given virtual address
- */
-static inline unsigned long pmd_index(unsigned long address)
-{
-	return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
-}
-
-/*
- * Conversion functions: convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
- *
- * (Currently stuck as a macro because of indirect forward reference
- * to linux/mm.h:page_to_nid())
- */
-#define mk_pte(page, pgprot)   pfn_pte(page_to_pfn(page), (pgprot))
-
-/*
- * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
- *
- * this function returns the index of the entry in the pte page which would
- * control the given virtual address
- */
-static inline unsigned long pte_index(unsigned long address)
-{
-	return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
-}
-
-static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
-{
-	return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
-}
-
 static inline int pmd_bad(pmd_t pmd)
 {
-	return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
+	return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) !=
+	       (_KERNPG_TABLE & ~_PAGE_ACCESSED);
 }
 
 static inline unsigned long pages_to_mb(unsigned long npg)
@@ -840,9 +1051,9 @@ static inline int pud_present(pud_t pud)
 	return pud_flags(pud) & _PAGE_PRESENT;
 }
 
-static inline unsigned long pud_page_vaddr(pud_t pud)
+static inline pmd_t *pud_pgtable(pud_t pud)
 {
-	return (unsigned long)__va(pud_val(pud) & pud_pfn_mask(pud));
+	return (pmd_t *)__va(pud_val(pud) & pud_pfn_mask(pud));
 }
 
 /*
@@ -851,34 +1062,18 @@ static inline unsigned long pud_page_vaddr(pud_t pud)
  */
 #define pud_page(pud)	pfn_to_page(pud_pfn(pud))
 
-/* Find an entry in the second-level page table.. */
-static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
-{
-	return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
-}
-
-static inline int pud_large(pud_t pud)
+#define pud_leaf pud_leaf
+static inline bool pud_leaf(pud_t pud)
 {
-	return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
-		(_PAGE_PSE | _PAGE_PRESENT);
+	return pud_val(pud) & _PAGE_PSE;
 }
 
 static inline int pud_bad(pud_t pud)
 {
 	return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
 }
-#else
-static inline int pud_large(pud_t pud)
-{
-	return 0;
-}
 #endif	/* CONFIG_PGTABLE_LEVELS > 2 */
 
-static inline unsigned long pud_index(unsigned long address)
-{
-	return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
-}
-
 #if CONFIG_PGTABLE_LEVELS > 3
 static inline int p4d_none(p4d_t p4d)
 {
@@ -890,9 +1085,9 @@ static inline int p4d_present(p4d_t p4d)
 	return p4d_flags(p4d) & _PAGE_PRESENT;
 }
 
-static inline unsigned long p4d_page_vaddr(p4d_t p4d)
+static inline pud_t *p4d_pgtable(p4d_t p4d)
 {
-	return (unsigned long)__va(p4d_val(p4d) & p4d_pfn_mask(p4d));
+	return (pud_t *)__va(p4d_val(p4d) & p4d_pfn_mask(p4d));
 }
 
 /*
@@ -901,17 +1096,11 @@ static inline unsigned long p4d_page_vaddr(p4d_t p4d)
  */
 #define p4d_page(p4d)	pfn_to_page(p4d_pfn(p4d))
 
-/* Find an entry in the third-level page table.. */
-static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
-{
-	return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
-}
-
 static inline int p4d_bad(p4d_t p4d)
 {
 	unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
 
-	if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+	if (IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION))
 		ignore_flags |= _PAGE_NX;
 
 	return (p4d_flags(p4d) & ~ignore_flags) != 0;
@@ -957,7 +1146,7 @@ static inline int pgd_bad(pgd_t pgd)
 	if (!pgtable_l5_enabled())
 		return 0;
 
-	if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+	if (IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION))
 		ignore_flags |= _PAGE_NX;
 
 	return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
@@ -977,57 +1166,22 @@ static inline int pgd_none(pgd_t pgd)
 }
 #endif	/* CONFIG_PGTABLE_LEVELS > 4 */
 
-#endif	/* __ASSEMBLY__ */
-
-/*
- * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
- *
- * this macro returns the index of the entry in the pgd page which would
- * control the given virtual address
- */
-#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
-
-/*
- * pgd_offset() returns a (pgd_t *)
- * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
- */
-#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
-/*
- * a shortcut to get a pgd_t in a given mm
- */
-#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
-/*
- * a shortcut which implies the use of the kernel's pgd, instead
- * of a process's
- */
-#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
-
+#endif	/* __ASSEMBLER__ */
 
 #define KERNEL_PGD_BOUNDARY	pgd_index(PAGE_OFFSET)
 #define KERNEL_PGD_PTRS		(PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 extern int direct_gbpages;
 void init_mem_mapping(void);
 void early_alloc_pgt_buf(void);
-extern void memblock_find_dma_reserve(void);
+void __init poking_init(void);
+unsigned long init_memory_mapping(unsigned long start,
+				  unsigned long end, pgprot_t prot);
 
 #ifdef CONFIG_X86_64
-/* Realmode trampoline initialization. */
 extern pgd_t trampoline_pgd_entry;
-static inline void __meminit init_trampoline_default(void)
-{
-	/* Default trampoline pgd value */
-	trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)];
-}
-# ifdef CONFIG_RANDOMIZE_MEMORY
-void __meminit init_trampoline(void);
-# else
-#  define init_trampoline init_trampoline_default
-# endif
-#else
-static inline void init_trampoline(void) { }
 #endif
 
 /* local pte updates need not use xchg for locking */
@@ -1056,21 +1210,17 @@ static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
 	return res;
 }
 
-static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
-				     pte_t *ptep , pte_t pte)
-{
-	native_set_pte(ptep, pte);
-}
-
 static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 			      pmd_t *pmdp, pmd_t pmd)
 {
-	native_set_pmd(pmdp, pmd);
+	page_table_check_pmd_set(mm, pmdp, pmd);
+	set_pmd(pmdp, pmd);
 }
 
 static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
 			      pud_t *pudp, pud_t pud)
 {
+	page_table_check_pud_set(mm, pudp, pud);
 	native_set_pud(pudp, pud);
 }
 
@@ -1101,6 +1251,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 				       pte_t *ptep)
 {
 	pte_t pte = native_ptep_get_and_clear(ptep);
+	page_table_check_pte_clear(mm, pte);
 	return pte;
 }
 
@@ -1116,6 +1267,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 		 * care about updates and native needs no locking
 		 */
 		pte = native_local_ptep_get_and_clear(ptep);
+		page_table_check_pte_clear(mm, pte);
 	} else {
 		pte = ptep_get_and_clear(mm, addr, ptep);
 	}
@@ -1126,12 +1278,20 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 static inline void ptep_set_wrprotect(struct mm_struct *mm,
 				      unsigned long addr, pte_t *ptep)
 {
-	clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
-}
+	/*
+	 * Avoid accidentally creating shadow stack PTEs
+	 * (Write=0,Dirty=1).  Use cmpxchg() to prevent races with
+	 * the hardware setting Dirty=1.
+	 */
+	pte_t old_pte, new_pte;
 
-#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
+	old_pte = READ_ONCE(*ptep);
+	do {
+		new_pte = pte_wrprotect(old_pte);
+	} while (!try_cmpxchg((long *)&ptep->pte, (long *)&old_pte, *(long *)&new_pte));
+}
 
-#define mk_pmd(page, pgprot)   pfn_pmd(page_to_pfn(page), (pgprot))
+#define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0)
 
 #define  __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
 extern int pmdp_set_access_flags(struct vm_area_struct *vma,
@@ -1152,37 +1312,43 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
 				  unsigned long address, pmd_t *pmdp);
 
 
-#define pmd_write pmd_write
-static inline int pmd_write(pmd_t pmd)
-{
-	return pmd_flags(pmd) & _PAGE_RW;
-}
-
 #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
 static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr,
 				       pmd_t *pmdp)
 {
-	return native_pmdp_get_and_clear(pmdp);
+	pmd_t pmd = native_pmdp_get_and_clear(pmdp);
+
+	page_table_check_pmd_clear(mm, pmd);
+
+	return pmd;
 }
 
 #define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
 static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
 					unsigned long addr, pud_t *pudp)
 {
-	return native_pudp_get_and_clear(pudp);
+	pud_t pud = native_pudp_get_and_clear(pudp);
+
+	page_table_check_pud_clear(mm, pud);
+
+	return pud;
 }
 
 #define __HAVE_ARCH_PMDP_SET_WRPROTECT
 static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 				      unsigned long addr, pmd_t *pmdp)
 {
-	clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp);
-}
+	/*
+	 * Avoid accidentally creating shadow stack PTEs
+	 * (Write=0,Dirty=1).  Use cmpxchg() to prevent races with
+	 * the hardware setting Dirty=1.
+	 */
+	pmd_t old_pmd, new_pmd;
 
-#define pud_write pud_write
-static inline int pud_write(pud_t pud)
-{
-	return pud_flags(pud) & _PAGE_RW;
+	old_pmd = READ_ONCE(*pmdp);
+	do {
+		new_pmd = pmd_wrprotect(old_pmd);
+	} while (!try_cmpxchg((long *)pmdp, (long *)&old_pmd, *(long *)&new_pmd));
 }
 
 #ifndef pmdp_establish
@@ -1190,6 +1356,7 @@ static inline int pud_write(pud_t pud)
 static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 		unsigned long address, pmd_t *pmdp, pmd_t pmd)
 {
+	page_table_check_pmd_set(vma->vm_mm, pmdp, pmd);
 	if (IS_ENABLED(CONFIG_SMP)) {
 		return xchg(pmdp, pmd);
 	} else {
@@ -1199,6 +1366,29 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 	}
 }
 #endif
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static inline pud_t pudp_establish(struct vm_area_struct *vma,
+		unsigned long address, pud_t *pudp, pud_t pud)
+{
+	page_table_check_pud_set(vma->vm_mm, pudp, pud);
+	if (IS_ENABLED(CONFIG_SMP)) {
+		return xchg(pudp, pud);
+	} else {
+		pud_t old = *pudp;
+		WRITE_ONCE(*pudp, pud);
+		return old;
+	}
+}
+#endif
+
+#define __HAVE_ARCH_PMDP_INVALIDATE_AD
+extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma,
+				unsigned long address, pmd_t *pmdp);
+
+pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+		      pud_t *pudp);
+
 /*
  * Page table pages are page-aligned.  The lower half of the top
  * level is used for userspace and the top half for the kernel.
@@ -1213,11 +1403,9 @@ static inline bool pgdp_maps_userspace(void *__ptr)
 	return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START);
 }
 
-static inline int pgd_large(pgd_t pgd) { return 0; }
-
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
 /*
- * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
+ * All top-level MITIGATION_PAGE_TABLE_ISOLATION page tables are order-1 pages
  * (8k-aligned and 8k in size).  The kernel one is at the beginning 4k and
  * the user one is in the last 4k.  To switch between them, you
  * just need to flip the 12th bit in their addresses.
@@ -1262,12 +1450,12 @@ static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
 {
 	return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
 }
-#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+#endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */
 
 /*
  * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
  *
- *  dst - pointer to pgd range anwhere on a pgd page
+ *  dst - pointer to pgd range anywhere on a pgd page
  *  src - ""
  *  count - the number of pgds to copy.
  *
@@ -1277,7 +1465,7 @@ static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
 static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
 {
 	memcpy(dst, src, count * sizeof(pgd_t));
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
 	if (!static_cpu_has(X86_FEATURE_PTI))
 		return;
 	/* Clone the user space pgd as well */
@@ -1308,6 +1496,11 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 		unsigned long addr, pte_t *ptep)
 {
 }
+static inline void update_mmu_cache_range(struct vm_fault *vmf,
+		struct vm_area_struct *vma, unsigned long addr,
+		pte_t *ptep, unsigned int nr)
+{
+}
 static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
 		unsigned long addr, pmd_t *pmd)
 {
@@ -1316,6 +1509,20 @@ static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
 		unsigned long addr, pud_t *pud)
 {
 }
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+	return pte_set_flags(pte, _PAGE_SWP_EXCLUSIVE);
+}
+
+static inline bool pte_swp_exclusive(pte_t pte)
+{
+	return pte_flags(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+	return pte_clear_flags(pte, _PAGE_SWP_EXCLUSIVE);
+}
 
 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
 static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
@@ -1351,26 +1558,38 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
 #endif
 #endif
 
-#define PKRU_AD_BIT 0x1
-#define PKRU_WD_BIT 0x2
-#define PKRU_BITS_PER_PKEY 2
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+static inline pte_t pte_swp_mkuffd_wp(pte_t pte)
+{
+	return pte_set_flags(pte, _PAGE_SWP_UFFD_WP);
+}
+
+static inline int pte_swp_uffd_wp(pte_t pte)
+{
+	return pte_flags(pte) & _PAGE_SWP_UFFD_WP;
+}
 
-static inline bool __pkru_allows_read(u32 pkru, u16 pkey)
+static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
 {
-	int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
-	return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits));
+	return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP);
 }
 
-static inline bool __pkru_allows_write(u32 pkru, u16 pkey)
+static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd)
 {
-	int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
-	/*
-	 * Access-disable disables writes too so we need to check
-	 * both bits here.
-	 */
-	return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits));
+	return pmd_set_flags(pmd, _PAGE_SWP_UFFD_WP);
 }
 
+static inline int pmd_swp_uffd_wp(pmd_t pmd)
+{
+	return pmd_flags(pmd) & _PAGE_SWP_UFFD_WP;
+}
+
+static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd)
+{
+	return pmd_clear_flags(pmd, _PAGE_SWP_UFFD_WP);
+}
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
+
 static inline u16 pte_flags_pkey(unsigned long pte_flags)
 {
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
@@ -1402,6 +1621,11 @@ static inline bool __pte_access_permitted(unsigned long pteval, bool write)
 {
 	unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER;
 
+	/*
+	 * Write=0,Dirty=1 PTEs are shadow stack, which the kernel
+	 * shouldn't generally allow access to, but since they
+	 * are already Write=0, the below logic covers both cases.
+	 */
 	if (write)
 		need_pte_bits |= _PAGE_RW;
 
@@ -1437,7 +1661,85 @@ static inline bool arch_has_pfn_modify_check(void)
 	return boot_cpu_has_bug(X86_BUG_L1TF);
 }
 
-#include <asm-generic/pgtable.h>
-#endif	/* __ASSEMBLY__ */
+#define arch_check_zapped_pte arch_check_zapped_pte
+void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte);
+
+#define arch_check_zapped_pmd arch_check_zapped_pmd
+void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd);
+
+#define arch_check_zapped_pud arch_check_zapped_pud
+void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud);
+
+#ifdef CONFIG_XEN_PV
+#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young
+static inline bool arch_has_hw_nonleaf_pmd_young(void)
+{
+	return !cpu_feature_enabled(X86_FEATURE_XENPV);
+}
+#endif
+
+#ifdef CONFIG_PAGE_TABLE_CHECK
+static inline bool pte_user_accessible_page(pte_t pte)
+{
+	return (pte_val(pte) & _PAGE_PRESENT) && (pte_val(pte) & _PAGE_USER);
+}
+
+static inline bool pmd_user_accessible_page(pmd_t pmd)
+{
+	return pmd_leaf(pmd) && (pmd_val(pmd) & _PAGE_PRESENT) && (pmd_val(pmd) & _PAGE_USER);
+}
+
+static inline bool pud_user_accessible_page(pud_t pud)
+{
+	return pud_leaf(pud) && (pud_val(pud) & _PAGE_PRESENT) && (pud_val(pud) & _PAGE_USER);
+}
+#endif
+
+#ifdef CONFIG_X86_SGX
+int arch_memory_failure(unsigned long pfn, int flags);
+#define arch_memory_failure arch_memory_failure
+
+bool arch_is_platform_page(u64 paddr);
+#define arch_is_platform_page arch_is_platform_page
+#endif
+
+/*
+ * Use set_p*_safe(), and elide TLB flushing, when confident that *no*
+ * TLB flush will be required as a result of the "set". For example, use
+ * in scenarios where it is known ahead of time that the routine is
+ * setting non-present entries, or re-setting an existing entry to the
+ * same value. Otherwise, use the typical "set" helpers and flush the
+ * TLB.
+ */
+#define set_pte_safe(ptep, pte) \
+({ \
+	WARN_ON_ONCE(pte_present(*ptep) && !pte_same(*ptep, pte)); \
+	set_pte(ptep, pte); \
+})
+
+#define set_pmd_safe(pmdp, pmd) \
+({ \
+	WARN_ON_ONCE(pmd_present(*pmdp) && !pmd_same(*pmdp, pmd)); \
+	set_pmd(pmdp, pmd); \
+})
+
+#define set_pud_safe(pudp, pud) \
+({ \
+	WARN_ON_ONCE(pud_present(*pudp) && !pud_same(*pudp, pud)); \
+	set_pud(pudp, pud); \
+})
+
+#define set_p4d_safe(p4dp, p4d) \
+({ \
+	WARN_ON_ONCE(p4d_present(*p4dp) && !p4d_same(*p4dp, p4d)); \
+	set_p4d(p4dp, p4d); \
+})
+
+#define set_pgd_safe(pgdp, pgd) \
+({ \
+	WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \
+	set_pgd(pgdp, pgd); \
+})
+#endif	/* __ASSEMBLER__ */
 
 #endif /* _ASM_X86_PGTABLE_H */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 4fe9e7fc74d3..b612cc57a4d3 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -13,7 +13,7 @@
  * This file contains the functions and defines necessary to modify and use
  * the i386 page table tree.
  */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #include <asm/processor.h>
 #include <linux/threads.h>
 #include <asm/paravirt.h>
@@ -29,68 +29,32 @@ extern pgd_t swapper_pg_dir[1024];
 extern pgd_t initial_page_table[1024];
 extern pmd_t initial_pg_pmd[];
 
-static inline void pgtable_cache_init(void) { }
-static inline void check_pgt_cache(void) { }
 void paging_init(void);
 void sync_initial_page_table(void);
 
-/*
- * Define this if things work differently on an i386 and an i486:
- * it will (on an i486) warn about kernel memory accesses that are
- * done without a 'access_ok( ..)'
- */
-#undef TEST_ACCESS_OK
-
 #ifdef CONFIG_X86_PAE
 # include <asm/pgtable-3level.h>
 #else
 # include <asm/pgtable-2level.h>
 #endif
 
-#if defined(CONFIG_HIGHPTE)
-#define pte_offset_map(dir, address)					\
-	((pte_t *)kmap_atomic(pmd_page(*(dir))) +		\
-	 pte_index((address)))
-#define pte_unmap(pte) kunmap_atomic((pte))
-#else
-#define pte_offset_map(dir, address)					\
-	((pte_t *)page_address(pmd_page(*(dir))) + pte_index((address)))
-#define pte_unmap(pte) do { } while (0)
-#endif
-
 /* Clear a kernel PTE and flush it from the TLB */
 #define kpte_clear_flush(ptep, vaddr)		\
 do {						\
 	pte_clear(&init_mm, (vaddr), (ptep));	\
-	__flush_tlb_one_kernel((vaddr));		\
+	flush_tlb_one_kernel((vaddr));		\
 } while (0)
 
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
 
 /*
- * kern_addr_valid() is (1) for FLATMEM and (0) for
- * SPARSEMEM and DISCONTIGMEM
- */
-#ifdef CONFIG_FLATMEM
-#define kern_addr_valid(addr)	(1)
-#else
-#define kern_addr_valid(kaddr)	(0)
-#endif
-
-/*
- * This is how much memory in addition to the memory covered up to
- * and including _end we need mapped initially.
- * We need:
- *     (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
- *     (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
- *
- * Modulo rounding, each megabyte assigned here requires a kilobyte of
- * memory, which is currently unreclaimed.
- *
- * This should be a multiple of a page.
+ * This is used to calculate the .brk reservation for initial pagetables.
+ * Enough space is reserved to allocate pagetables sufficient to cover all
+ * of LOWMEM_PAGES, which is an upper bound on the size of the direct map of
+ * lowmem.
  *
- * KERNEL_IMAGE_SIZE should be greater than pa(_end)
- * and small than max_low_pfn, otherwise will waste some page table entries
+ * With PAE paging (PTRS_PER_PMD > 1), we allocate PTRS_PER_PGD == 4 pages for
+ * the PMD's in addition to the pages required for the last level pagetables.
  */
 #if PTRS_PER_PMD > 1
 #define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
@@ -106,6 +70,6 @@ do {						\
  * with only a host target support using a 32-bit type for internal
  * representation.
  */
-#define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT))
+#define LOWMEM_PAGES ((((_ULL(2)<<31) - __PAGE_OFFSET) >> PAGE_SHIFT))
 
 #endif /* _ASM_X86_PGTABLE_32_H */
diff --git a/arch/x86/include/asm/pgtable_32_areas.h b/arch/x86/include/asm/pgtable_32_areas.h
new file mode 100644
index 000000000000..921148b42967
--- /dev/null
+++ b/arch/x86/include/asm/pgtable_32_areas.h
@@ -0,0 +1,53 @@
+#ifndef _ASM_X86_PGTABLE_32_AREAS_H
+#define _ASM_X86_PGTABLE_32_AREAS_H
+
+#include <asm/cpu_entry_area.h>
+
+/*
+ * Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 8MB value just means that there will be a 8MB "hole" after the
+ * physical memory until the kernel virtual memory starts.  That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ */
+#define VMALLOC_OFFSET	(8 * 1024 * 1024)
+
+#ifndef __ASSEMBLER__
+extern bool __vmalloc_start_set; /* set once high_memory is set */
+#endif
+
+#define VMALLOC_START	((unsigned long)high_memory + VMALLOC_OFFSET)
+#ifdef CONFIG_X86_PAE
+#define LAST_PKMAP 512
+#else
+#define LAST_PKMAP 1024
+#endif
+
+#define CPU_ENTRY_AREA_PAGES		(NR_CPUS * DIV_ROUND_UP(sizeof(struct cpu_entry_area), PAGE_SIZE))
+
+/* The +1 is for the readonly IDT page: */
+#define CPU_ENTRY_AREA_BASE	\
+	((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
+
+#define LDT_BASE_ADDR		\
+	((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
+
+#define LDT_END_ADDR		(LDT_BASE_ADDR + PMD_SIZE)
+
+#define PKMAP_BASE		\
+	((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
+
+#ifdef CONFIG_HIGHMEM
+# define VMALLOC_END	(PKMAP_BASE - 2 * PAGE_SIZE)
+#else
+# define VMALLOC_END	(LDT_BASE_ADDR - 2 * PAGE_SIZE)
+#endif
+
+#define MODULES_VADDR	VMALLOC_START
+#define MODULES_END	VMALLOC_END
+#define MODULES_LEN	(MODULES_VADDR - MODULES_END)
+
+#define MAXMEM	(VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
+
+#endif /* _ASM_X86_PGTABLE_32_AREAS_H */
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index b0bc0fff5f1f..5356a46b0373 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_PGTABLE_32_DEFS_H
-#define _ASM_X86_PGTABLE_32_DEFS_H
+#ifndef _ASM_X86_PGTABLE_32_TYPES_H
+#define _ASM_X86_PGTABLE_32_TYPES_H
 
 /*
  * The Linux x86 paging architecture is 'compile-time dual-mode', it
@@ -20,54 +20,4 @@
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE - 1))
 
-/* Just any arbitrary offset to the start of the vmalloc VM area: the
- * current 8MB value just means that there will be a 8MB "hole" after the
- * physical memory until the kernel virtual memory starts.  That means that
- * any out-of-bounds memory accesses will hopefully be caught.
- * The vmalloc() routines leaves a hole of 4kB between each vmalloced
- * area for the same reason. ;)
- */
-#define VMALLOC_OFFSET	(8 * 1024 * 1024)
-
-#ifndef __ASSEMBLY__
-extern bool __vmalloc_start_set; /* set once high_memory is set */
-#endif
-
-#define VMALLOC_START	((unsigned long)high_memory + VMALLOC_OFFSET)
-#ifdef CONFIG_X86_PAE
-#define LAST_PKMAP 512
-#else
-#define LAST_PKMAP 1024
-#endif
-
-/*
- * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
- * to avoid include recursion hell
- */
-#define CPU_ENTRY_AREA_PAGES	(NR_CPUS * 40)
-
-#define CPU_ENTRY_AREA_BASE						\
-	((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1))   \
-	 & PMD_MASK)
-
-#define LDT_BASE_ADDR		\
-	((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
-
-#define LDT_END_ADDR		(LDT_BASE_ADDR + PMD_SIZE)
-
-#define PKMAP_BASE		\
-	((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
-
-#ifdef CONFIG_HIGHMEM
-# define VMALLOC_END	(PKMAP_BASE - 2 * PAGE_SIZE)
-#else
-# define VMALLOC_END	(LDT_BASE_ADDR - 2 * PAGE_SIZE)
-#endif
-
-#define MODULES_VADDR	VMALLOC_START
-#define MODULES_END	VMALLOC_END
-#define MODULES_LEN	(MODULES_VADDR - MODULES_END)
-
-#define MAXMEM	(VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
-
-#endif /* _ASM_X86_PGTABLE_32_DEFS_H */
+#endif /* _ASM_X86_PGTABLE_32_TYPES_H */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 9c85b54bf03c..f06e5d6a2747 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -5,7 +5,7 @@
 #include <linux/const.h>
 #include <asm/pgtable_64_types.h>
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 /*
  * This file contains the functions and defines necessary to modify and use
@@ -41,11 +41,9 @@ static inline void sync_initial_page_table(void) { }
 	pr_err("%s:%d: bad pud %p(%016lx)\n",		\
 	       __FILE__, __LINE__, &(e), pud_val(e))
 
-#if CONFIG_PGTABLE_LEVELS >= 5
 #define p4d_ERROR(e)					\
 	pr_err("%s:%d: bad p4d %p(%016lx)\n",		\
 	       __FILE__, __LINE__, &(e), p4d_val(e))
-#endif
 
 #define pgd_ERROR(e)					\
 	pr_err("%s:%d: bad pgd %p(%016lx)\n",		\
@@ -53,6 +51,12 @@ static inline void sync_initial_page_table(void) { }
 
 struct mm_struct;
 
+#define mm_p4d_folded mm_p4d_folded
+static inline bool mm_p4d_folded(struct mm_struct *mm)
+{
+	return !pgtable_l5_enabled();
+}
+
 void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte);
 void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
 
@@ -137,7 +141,8 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
 {
 	pgd_t pgd;
 
-	if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
+	if (pgtable_l5_enabled() ||
+	    !IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION)) {
 		WRITE_ONCE(*p4dp, p4d);
 		return;
 	}
@@ -162,34 +167,25 @@ static inline void native_pgd_clear(pgd_t *pgd)
 	native_set_pgd(pgd, native_make_pgd(0));
 }
 
-extern void sync_global_pgds(unsigned long start, unsigned long end);
-
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
  */
 
-/*
- * Level 4 access.
- */
-#define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE)
-
-/* PUD - Level3 access */
+/* PGD - Level 4 access */
 
-/* PMD  - Level 2 access */
+/* PUD - Level 3 access */
 
-/* PTE - Level 1 access. */
+/* PMD - Level 2 access */
 
-/* x86-64 always has all page tables mapped. */
-#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
-#define pte_unmap(pte) ((void)(pte))/* NOP */
+/* PTE - Level 1 access */
 
 /*
  * Encode and de-code a swap entry
  *
  * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2| 1|0| <- bit number
  * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
- * | TYPE (59-63) | ~OFFSET (9-58)  |0|0|X|X| X| X|X|SD|0| <- swp entry
+ * | TYPE (59-63) | ~OFFSET (9-58)  |0|0|X|X| X| E|F|SD|0| <- swp entry
  *
  * G (8) is aliased and used as a PROT_NONE indicator for
  * !present ptes.  We need to start storing swap entries above
@@ -197,9 +193,17 @@ extern void sync_global_pgds(unsigned long start, unsigned long end);
  * erratum where they can be incorrectly set by hardware on
  * non-present PTEs.
  *
+ * SD Bits 1-4 are not used in non-present format and available for
+ * special use described below:
+ *
  * SD (1) in swp entry is used to store soft dirty bit, which helps us
  * remember soft dirty over page migration
  *
+ * F (2) in swp entry is used to record when a pagetable is
+ * writeprotected by userfaultfd WP support.
+ *
+ * E (3) in swp entry is used to remember PG_anon_exclusive.
+ *
  * Bit 7 in swp entry should be 0 because pmd_present checks not only P,
  * but also L and G.
  *
@@ -232,18 +236,14 @@ extern void sync_global_pgds(unsigned long start, unsigned long end);
 
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
 #define __pmd_to_swp_entry(pmd)		((swp_entry_t) { pmd_val((pmd)) })
-#define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
-#define __swp_entry_to_pmd(x)		((pmd_t) { .pmd = (x).val })
+#define __swp_entry_to_pte(x)		(__pte((x).val))
+#define __swp_entry_to_pmd(x)		(__pmd((x).val))
 
-extern int kern_addr_valid(unsigned long addr);
 extern void cleanup_highmap(void);
 
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 
-#define pgtable_cache_init()   do { } while (0)
-#define check_pgt_cache()      do { } while (0)
-
 #define PAGE_AGP    PAGE_KERNEL_NOCACHE
 #define HAVE_PAGE_AGP 1
 
@@ -259,15 +259,8 @@ extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
 extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
 
 #define gup_fast_permitted gup_fast_permitted
-static inline bool gup_fast_permitted(unsigned long start, int nr_pages,
-		int write)
+static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
 {
-	unsigned long len, end;
-
-	len = (unsigned long)nr_pages << PAGE_SHIFT;
-	end = start + len;
-	if (end < start)
-		return false;
 	if (end >> __VIRTUAL_MASK_SHIFT)
 		return false;
 	return true;
@@ -275,5 +268,26 @@ static inline bool gup_fast_permitted(unsigned long start, int nr_pages,
 
 #include <asm/pgtable-invert.h>
 
-#endif /* !__ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
+
+#define l4_index(x)	(((x) >> 39) & 511)
+#define pud_index(x)	(((x) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
+L4_START_KERNEL = l4_index(__START_KERNEL_map)
+
+L3_START_KERNEL = pud_index(__START_KERNEL_map)
+
+#define SYM_DATA_START_PAGE_ALIGNED(name)			\
+	SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE)
+
+/* Automate the creation of 1 to 1 mapping pmd entries */
+#define PMDS(START, PERM, COUNT)			\
+	i = 0 ;						\
+	.rept (COUNT) ;					\
+	.quad	(START) + (i << PMD_SHIFT) + (PERM) ;	\
+	i = i + 1 ;					\
+	.endr
+
+#endif /* __ASSEMBLER__ */
 #endif /* _ASM_X86_PGTABLE_64_H */
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 88bca456da99..7eb61ef6a185 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -4,7 +4,7 @@
 
 #include <asm/sparsemem.h>
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #include <linux/types.h>
 #include <asm/kaslr.h>
 
@@ -19,8 +19,8 @@ typedef unsigned long	pgdval_t;
 typedef unsigned long	pgprotval_t;
 
 typedef struct { pteval_t pte; } pte_t;
+typedef struct { pmdval_t pmd; } pmd_t;
 
-#ifdef CONFIG_X86_5LEVEL
 extern unsigned int __pgtable_l5_enabled;
 
 #ifdef USE_EARLY_PGTABLE_L5
@@ -36,18 +36,13 @@ static inline bool pgtable_l5_enabled(void)
 #define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57)
 #endif /* USE_EARLY_PGTABLE_L5 */
 
-#else
-#define pgtable_l5_enabled() 0
-#endif /* CONFIG_X86_5LEVEL */
+#define ARCH_PAGE_TABLE_SYNC_MASK \
+	(pgtable_l5_enabled() ? PGTBL_PGD_MODIFIED : PGTBL_P4D_MODIFIED)
 
 extern unsigned int pgdir_shift;
 extern unsigned int ptrs_per_p4d;
 
-#endif	/* !__ASSEMBLY__ */
-
-#define SHARED_KERNEL_PMD	0
-
-#ifdef CONFIG_X86_5LEVEL
+#endif	/* !__ASSEMBLER__ */
 
 /*
  * PGDIR_SHIFT determines what a top-level page table entry can map
@@ -66,17 +61,6 @@ extern unsigned int ptrs_per_p4d;
 
 #define MAX_POSSIBLE_PHYSMEM_BITS	52
 
-#else /* CONFIG_X86_5LEVEL */
-
-/*
- * PGDIR_SHIFT determines what a top-level page table entry can map
- */
-#define PGDIR_SHIFT		39
-#define PTRS_PER_PGD		512
-#define MAX_PTRS_PER_P4D	1
-
-#endif /* CONFIG_X86_5LEVEL */
-
 /*
  * 3rd level page
  */
@@ -103,7 +87,7 @@ extern unsigned int ptrs_per_p4d;
 #define PGDIR_MASK	(~(PGDIR_SIZE - 1))
 
 /*
- * See Documentation/x86/x86_64/mm.txt for a description of the memory map.
+ * See Documentation/arch/x86/x86_64/mm.rst for a description of the memory map.
  *
  * Be very careful vs. KASLR when changing anything here. The KASLR address
  * range must not overlap with anything except the KASAN shadow area, which
@@ -129,21 +113,68 @@ extern unsigned int ptrs_per_p4d;
 #define __VMEMMAP_BASE_L4	0xffffea0000000000UL
 #define __VMEMMAP_BASE_L5	0xffd4000000000000UL
 
-#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
 # define VMALLOC_START		vmalloc_base
 # define VMALLOC_SIZE_TB	(pgtable_l5_enabled() ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4)
 # define VMEMMAP_START		vmemmap_base
+
+#ifdef CONFIG_RANDOMIZE_MEMORY
+# define DIRECT_MAP_PHYSMEM_END	direct_map_physmem_end
+#endif
+
+/*
+ * End of the region for which vmalloc page tables are pre-allocated.
+ * For non-KMSAN builds, this is the same as VMALLOC_END.
+ * For KMSAN builds, VMALLOC_START..VMEMORY_END is 4 times bigger than
+ * VMALLOC_START..VMALLOC_END (see below).
+ */
+#define VMEMORY_END		(VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)
+
+#ifndef CONFIG_KMSAN
+#define VMALLOC_END		VMEMORY_END
 #else
-# define VMALLOC_START		__VMALLOC_BASE_L4
-# define VMALLOC_SIZE_TB	VMALLOC_SIZE_TB_L4
-# define VMEMMAP_START		__VMEMMAP_BASE_L4
-#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
+/*
+ * In KMSAN builds vmalloc area is four times smaller, and the remaining 3/4
+ * are used to keep the metadata for virtual pages. The memory formerly
+ * belonging to vmalloc area is now laid out as follows:
+ *
+ * 1st quarter: VMALLOC_START to VMALLOC_END - new vmalloc area
+ * 2nd quarter: KMSAN_VMALLOC_SHADOW_START to
+ *              VMALLOC_END+KMSAN_VMALLOC_SHADOW_OFFSET - vmalloc area shadow
+ * 3rd quarter: KMSAN_VMALLOC_ORIGIN_START to
+ *              VMALLOC_END+KMSAN_VMALLOC_ORIGIN_OFFSET - vmalloc area origins
+ * 4th quarter: KMSAN_MODULES_SHADOW_START to KMSAN_MODULES_ORIGIN_START
+ *              - shadow for modules,
+ *              KMSAN_MODULES_ORIGIN_START to
+ *              KMSAN_MODULES_ORIGIN_START + MODULES_LEN - origins for modules.
+ */
+#define VMALLOC_QUARTER_SIZE	((VMALLOC_SIZE_TB << 40) >> 2)
+#define VMALLOC_END		(VMALLOC_START + VMALLOC_QUARTER_SIZE - 1)
 
-#define VMALLOC_END		(VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)
+/*
+ * vmalloc metadata addresses are calculated by adding shadow/origin offsets
+ * to vmalloc address.
+ */
+#define KMSAN_VMALLOC_SHADOW_OFFSET	VMALLOC_QUARTER_SIZE
+#define KMSAN_VMALLOC_ORIGIN_OFFSET	(VMALLOC_QUARTER_SIZE << 1)
+
+#define KMSAN_VMALLOC_SHADOW_START	(VMALLOC_START + KMSAN_VMALLOC_SHADOW_OFFSET)
+#define KMSAN_VMALLOC_ORIGIN_START	(VMALLOC_START + KMSAN_VMALLOC_ORIGIN_OFFSET)
+
+/*
+ * The shadow/origin for modules are placed one by one in the last 1/4 of
+ * vmalloc space.
+ */
+#define KMSAN_MODULES_SHADOW_START	(VMALLOC_END + KMSAN_VMALLOC_ORIGIN_OFFSET + 1)
+#define KMSAN_MODULES_ORIGIN_START	(KMSAN_MODULES_SHADOW_START + MODULES_LEN)
+#endif /* CONFIG_KMSAN */
 
 #define MODULES_VADDR		(__START_KERNEL_map + KERNEL_IMAGE_SIZE)
 /* The module sections ends with the start of the fixmap */
-#define MODULES_END		_AC(0xffffffffff000000, UL)
+#ifndef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP
+# define MODULES_END		_AC(0xffffffffff000000, UL)
+#else
+# define MODULES_END		_AC(0xfffffffffe000000, UL)
+#endif
 #define MODULES_LEN		(MODULES_END - MODULES_VADDR)
 
 #define ESPFIX_PGD_ENTRY	_AC(-2, UL)
@@ -159,4 +190,9 @@ extern unsigned int ptrs_per_p4d;
 
 #define PGD_KERNEL_START	((PAGE_SIZE / 2) / sizeof(pgd_t))
 
+/*
+ * We borrow bit 3 to remember PG_anon_exclusive.
+ */
+#define _PAGE_SWP_EXCLUSIVE	_PAGE_PWT
+
 #endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h
new file mode 100644
index 000000000000..4f056fb88174
--- /dev/null
+++ b/arch/x86/include/asm/pgtable_areas.h
@@ -0,0 +1,22 @@
+#ifndef _ASM_X86_PGTABLE_AREAS_H
+#define _ASM_X86_PGTABLE_AREAS_H
+
+#ifdef CONFIG_X86_32
+# include <asm/pgtable_32_areas.h>
+#endif
+
+/* Single page reserved for the readonly IDT mapping: */
+#define CPU_ENTRY_AREA_RO_IDT		CPU_ENTRY_AREA_BASE
+#define CPU_ENTRY_AREA_PER_CPU		(CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
+
+#define CPU_ENTRY_AREA_RO_IDT_VADDR	((void *)CPU_ENTRY_AREA_RO_IDT)
+
+#ifdef CONFIG_X86_32
+#define CPU_ENTRY_AREA_MAP_SIZE		(CPU_ENTRY_AREA_PER_CPU +		\
+					 (CPU_ENTRY_AREA_SIZE * NR_CPUS) -	\
+					 CPU_ENTRY_AREA_BASE)
+#else
+#define CPU_ENTRY_AREA_MAP_SIZE		P4D_SIZE
+#endif
+
+#endif /* _ASM_X86_PGTABLE_AREAS_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index d6ff0bbdb394..2ec250ba467e 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -7,8 +7,6 @@
 
 #include <asm/page_types.h>
 
-#define FIRST_USER_ADDRESS	0UL
-
 #define _PAGE_BIT_PRESENT	0	/* is present */
 #define _PAGE_BIT_RW		1	/* writeable */
 #define _PAGE_BIT_USER		2	/* userspace addressable */
@@ -23,7 +21,8 @@
 #define _PAGE_BIT_SOFTW2	10	/* " */
 #define _PAGE_BIT_SOFTW3	11	/* " */
 #define _PAGE_BIT_PAT_LARGE	12	/* On 2MB or 1GB pages */
-#define _PAGE_BIT_SOFTW4	58	/* available for programmer */
+#define _PAGE_BIT_SOFTW4	57	/* available for programmer */
+#define _PAGE_BIT_SOFTW5	58	/* available for programmer */
 #define _PAGE_BIT_PKEY_BIT0	59	/* Protection Keys, bit 1/4 */
 #define _PAGE_BIT_PKEY_BIT1	60	/* Protection Keys, bit 2/4 */
 #define _PAGE_BIT_PKEY_BIT2	61	/* Protection Keys, bit 3/4 */
@@ -32,8 +31,18 @@
 
 #define _PAGE_BIT_SPECIAL	_PAGE_BIT_SOFTW1
 #define _PAGE_BIT_CPA_TEST	_PAGE_BIT_SOFTW1
+#define _PAGE_BIT_UFFD_WP	_PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */
 #define _PAGE_BIT_SOFT_DIRTY	_PAGE_BIT_SOFTW3 /* software dirty tracking */
-#define _PAGE_BIT_DEVMAP	_PAGE_BIT_SOFTW4
+#define _PAGE_BIT_KERNEL_4K	_PAGE_BIT_SOFTW3 /* page must not be converted to large */
+
+#ifdef CONFIG_X86_64
+#define _PAGE_BIT_SAVED_DIRTY	_PAGE_BIT_SOFTW5 /* Saved Dirty bit (leaf) */
+#define _PAGE_BIT_NOPTISHADOW	_PAGE_BIT_SOFTW5 /* No PTI shadow (root PGD) */
+#else
+/* Shared with _PAGE_BIT_UFFD_WP which is not supported on 32 bit */
+#define _PAGE_BIT_SAVED_DIRTY	_PAGE_BIT_SOFTW2 /* Saved Dirty bit (leaf) */
+#define _PAGE_BIT_NOPTISHADOW	_PAGE_BIT_SOFTW2 /* No PTI shadow (root PGD) */
+#endif
 
 /* If _PAGE_BIT_PRESENT is clear, we use these: */
 /* - if the user mapped it with PROT_NONE; pte_present gives true */
@@ -55,6 +64,7 @@
 #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
 #define _PAGE_SPECIAL	(_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
 #define _PAGE_CPA_TEST	(_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
+#define _PAGE_KERNEL_4K	(_AT(pteval_t, 1) << _PAGE_BIT_KERNEL_4K)
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
 #define _PAGE_PKEY_BIT0	(_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT0)
 #define _PAGE_PKEY_BIT1	(_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT1)
@@ -100,21 +110,37 @@
 #define _PAGE_SWP_SOFT_DIRTY	(_AT(pteval_t, 0))
 #endif
 
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+#define _PAGE_UFFD_WP		(_AT(pteval_t, 1) << _PAGE_BIT_UFFD_WP)
+#define _PAGE_SWP_UFFD_WP	_PAGE_USER
+#else
+#define _PAGE_UFFD_WP		(_AT(pteval_t, 0))
+#define _PAGE_SWP_UFFD_WP	(_AT(pteval_t, 0))
+#endif
+
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 #define _PAGE_NX	(_AT(pteval_t, 1) << _PAGE_BIT_NX)
-#define _PAGE_DEVMAP	(_AT(u64, 1) << _PAGE_BIT_DEVMAP)
-#define __HAVE_ARCH_PTE_DEVMAP
+#define _PAGE_SOFTW4	(_AT(pteval_t, 1) << _PAGE_BIT_SOFTW4)
 #else
 #define _PAGE_NX	(_AT(pteval_t, 0))
-#define _PAGE_DEVMAP	(_AT(pteval_t, 0))
+#define _PAGE_SOFTW4	(_AT(pteval_t, 0))
 #endif
 
+/*
+ * The hardware requires shadow stack to be Write=0,Dirty=1. However,
+ * there are valid cases where the kernel might create read-only PTEs that
+ * are dirty (e.g., fork(), mprotect(), uffd-wp(), soft-dirty tracking). In
+ * this case, the _PAGE_SAVED_DIRTY bit is used instead of the HW-dirty bit,
+ * to avoid creating a wrong "shadow stack" PTEs. Such PTEs have
+ * (Write=0,SavedDirty=1,Dirty=0) set.
+ */
+#define _PAGE_SAVED_DIRTY	(_AT(pteval_t, 1) << _PAGE_BIT_SAVED_DIRTY)
+
+#define _PAGE_DIRTY_BITS (_PAGE_DIRTY | _PAGE_SAVED_DIRTY)
+
 #define _PAGE_PROTNONE	(_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
 
-#define _PAGE_TABLE_NOENC	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\
-				 _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _KERNPG_TABLE_NOENC	(_PAGE_PRESENT | _PAGE_RW |		\
-				 _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _PAGE_NOPTISHADOW (_AT(pteval_t, 1) << _PAGE_BIT_NOPTISHADOW)
 
 /*
  * Set of bits not changed in pte_modify.  The pte's
@@ -122,10 +148,12 @@
  * instance, and is *not* included in this mask since
  * pte_modify() does modify it.
  */
-#define _PAGE_CHG_MASK	(PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |		\
-			 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY |	\
-			 _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
-#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
+#define _COMMON_PAGE_CHG_MASK	(PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |	\
+				 _PAGE_SPECIAL | _PAGE_ACCESSED |	\
+				 _PAGE_DIRTY_BITS | _PAGE_SOFT_DIRTY |	\
+				 _PAGE_CC | _PAGE_UFFD_WP)
+#define _PAGE_CHG_MASK	(_COMMON_PAGE_CHG_MASK | _PAGE_PAT)
+#define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE)
 
 /*
  * The cache modes defined here are used to translate between pure SW usage
@@ -135,103 +163,102 @@
  * to have the WB mode at index 0 (all bits clear). This is the default
  * right now and likely would break too much if changed.
  */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 enum page_cache_mode {
-	_PAGE_CACHE_MODE_WB = 0,
-	_PAGE_CACHE_MODE_WC = 1,
+	_PAGE_CACHE_MODE_WB       = 0,
+	_PAGE_CACHE_MODE_WC       = 1,
 	_PAGE_CACHE_MODE_UC_MINUS = 2,
-	_PAGE_CACHE_MODE_UC = 3,
-	_PAGE_CACHE_MODE_WT = 4,
-	_PAGE_CACHE_MODE_WP = 5,
-	_PAGE_CACHE_MODE_NUM = 8
+	_PAGE_CACHE_MODE_UC       = 3,
+	_PAGE_CACHE_MODE_WT       = 4,
+	_PAGE_CACHE_MODE_WP       = 5,
+
+	_PAGE_CACHE_MODE_NUM      = 8
 };
 #endif
 
-#define _PAGE_CACHE_MASK	(_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)
+#define _PAGE_CC		(_AT(pteval_t, cc_get_mask()))
+#define _PAGE_ENC		(_AT(pteval_t, sme_me_mask))
+
+#define _PAGE_CACHE_MASK	(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)
+#define _PAGE_LARGE_CACHE_MASK	(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT_LARGE)
+
 #define _PAGE_NOCACHE		(cachemode2protval(_PAGE_CACHE_MODE_UC))
 #define _PAGE_CACHE_WP		(cachemode2protval(_PAGE_CACHE_MODE_WP))
 
-#define PAGE_NONE	__pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
-#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
-				 _PAGE_ACCESSED | _PAGE_NX)
-
-#define PAGE_SHARED_EXEC	__pgprot(_PAGE_PRESENT | _PAGE_RW |	\
-					 _PAGE_USER | _PAGE_ACCESSED)
-#define PAGE_COPY_NOEXEC	__pgprot(_PAGE_PRESENT | _PAGE_USER |	\
-					 _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_COPY_EXEC		__pgprot(_PAGE_PRESENT | _PAGE_USER |	\
-					 _PAGE_ACCESSED)
-#define PAGE_COPY		PAGE_COPY_NOEXEC
-#define PAGE_READONLY		__pgprot(_PAGE_PRESENT | _PAGE_USER |	\
-					 _PAGE_ACCESSED | _PAGE_NX)
-#define PAGE_READONLY_EXEC	__pgprot(_PAGE_PRESENT | _PAGE_USER |	\
-					 _PAGE_ACCESSED)
-
-#define __PAGE_KERNEL_EXEC						\
-	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL)
-#define __PAGE_KERNEL		(__PAGE_KERNEL_EXEC | _PAGE_NX)
-
-#define __PAGE_KERNEL_RO		(__PAGE_KERNEL & ~_PAGE_RW)
-#define __PAGE_KERNEL_RX		(__PAGE_KERNEL_EXEC & ~_PAGE_RW)
-#define __PAGE_KERNEL_NOCACHE		(__PAGE_KERNEL | _PAGE_NOCACHE)
-#define __PAGE_KERNEL_VVAR		(__PAGE_KERNEL_RO | _PAGE_USER)
-#define __PAGE_KERNEL_LARGE		(__PAGE_KERNEL | _PAGE_PSE)
-#define __PAGE_KERNEL_LARGE_EXEC	(__PAGE_KERNEL_EXEC | _PAGE_PSE)
-#define __PAGE_KERNEL_WP		(__PAGE_KERNEL | _PAGE_CACHE_WP)
-
-#define __PAGE_KERNEL_IO		(__PAGE_KERNEL)
-#define __PAGE_KERNEL_IO_NOCACHE	(__PAGE_KERNEL_NOCACHE)
-
-#ifndef __ASSEMBLY__
-
-#define _PAGE_ENC	(_AT(pteval_t, sme_me_mask))
-
-#define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED |	\
-			 _PAGE_DIRTY | _PAGE_ENC)
-#define _PAGE_TABLE	(_KERNPG_TABLE | _PAGE_USER)
-
-#define __PAGE_KERNEL_ENC	(__PAGE_KERNEL | _PAGE_ENC)
-#define __PAGE_KERNEL_ENC_WP	(__PAGE_KERNEL_WP | _PAGE_ENC)
-
-#define __PAGE_KERNEL_NOENC	(__PAGE_KERNEL)
-#define __PAGE_KERNEL_NOENC_WP	(__PAGE_KERNEL_WP)
-
-#define default_pgprot(x)	__pgprot((x) & __default_kernel_pte_mask)
-
-#define PAGE_KERNEL		default_pgprot(__PAGE_KERNEL | _PAGE_ENC)
-#define PAGE_KERNEL_NOENC	default_pgprot(__PAGE_KERNEL)
-#define PAGE_KERNEL_RO		default_pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
-#define PAGE_KERNEL_EXEC	default_pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_EXEC_NOENC	default_pgprot(__PAGE_KERNEL_EXEC)
-#define PAGE_KERNEL_RX		default_pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
-#define PAGE_KERNEL_NOCACHE	default_pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
-#define PAGE_KERNEL_LARGE	default_pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
-#define PAGE_KERNEL_LARGE_EXEC	default_pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_VVAR	default_pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
-
-#define PAGE_KERNEL_IO		default_pgprot(__PAGE_KERNEL_IO)
-#define PAGE_KERNEL_IO_NOCACHE	default_pgprot(__PAGE_KERNEL_IO_NOCACHE)
-
-#endif	/* __ASSEMBLY__ */
-
-/*         xwr */
-#define __P000	PAGE_NONE
-#define __P001	PAGE_READONLY
-#define __P010	PAGE_COPY
-#define __P011	PAGE_COPY
-#define __P100	PAGE_READONLY_EXEC
-#define __P101	PAGE_READONLY_EXEC
-#define __P110	PAGE_COPY_EXEC
-#define __P111	PAGE_COPY_EXEC
-
-#define __S000	PAGE_NONE
-#define __S001	PAGE_READONLY
-#define __S010	PAGE_SHARED
-#define __S011	PAGE_SHARED
-#define __S100	PAGE_READONLY_EXEC
-#define __S101	PAGE_READONLY_EXEC
-#define __S110	PAGE_SHARED_EXEC
-#define __S111	PAGE_SHARED_EXEC
+#define __PP _PAGE_PRESENT
+#define __RW _PAGE_RW
+#define _USR _PAGE_USER
+#define ___A _PAGE_ACCESSED
+#define ___D _PAGE_DIRTY
+#define ___G _PAGE_GLOBAL
+#define __NX _PAGE_NX
+
+#define _ENC _PAGE_ENC
+#define __WP _PAGE_CACHE_WP
+#define __NC _PAGE_NOCACHE
+#define _PSE _PAGE_PSE
+
+#define pgprot_val(x)		((x).pgprot)
+#define __pgprot(x)		((pgprot_t) { (x) } )
+#define __pg(x)			__pgprot(x)
+
+#define PAGE_NONE	     __pg(   0|   0|   0|___A|   0|   0|   0|___G)
+#define PAGE_SHARED	     __pg(__PP|__RW|_USR|___A|__NX|   0|   0|   0)
+#define PAGE_SHARED_EXEC     __pg(__PP|__RW|_USR|___A|   0|   0|   0|   0)
+#define PAGE_COPY_NOEXEC     __pg(__PP|   0|_USR|___A|__NX|   0|   0|   0)
+#define PAGE_COPY_EXEC	     __pg(__PP|   0|_USR|___A|   0|   0|   0|   0)
+#define PAGE_COPY	     __pg(__PP|   0|_USR|___A|__NX|   0|   0|   0)
+#define PAGE_READONLY	     __pg(__PP|   0|_USR|___A|__NX|   0|   0|   0)
+#define PAGE_READONLY_EXEC   __pg(__PP|   0|_USR|___A|   0|   0|   0|   0)
+
+/*
+ * Page tables needs to have Write=1 in order for any lower PTEs to be
+ * writable. This includes shadow stack memory (Write=0, Dirty=1)
+ */
+#define _KERNPG_TABLE_NOENC	 (__PP|__RW|   0|___A|   0|___D|   0|   0)
+#define _KERNPG_TABLE		 (__PP|__RW|   0|___A|   0|___D|   0|   0| _ENC)
+#define _PAGE_TABLE_NOENC	 (__PP|__RW|_USR|___A|   0|___D|   0|   0)
+#define _PAGE_TABLE		 (__PP|__RW|_USR|___A|   0|___D|   0|   0| _ENC)
+
+#define __PAGE_KERNEL_RO	 (__PP|   0|   0|___A|__NX|   0|   0|___G)
+#define __PAGE_KERNEL_ROX	 (__PP|   0|   0|___A|   0|   0|   0|___G)
+#define __PAGE_KERNEL		 (__PP|__RW|   0|___A|__NX|___D|   0|___G)
+#define __PAGE_KERNEL_EXEC	 (__PP|__RW|   0|___A|   0|___D|   0|___G)
+#define __PAGE_KERNEL_NOCACHE	 (__PP|__RW|   0|___A|__NX|___D|   0|___G| __NC)
+#define __PAGE_KERNEL_VVAR	 (__PP|   0|_USR|___A|__NX|   0|   0|___G)
+#define __PAGE_KERNEL_LARGE	 (__PP|__RW|   0|___A|__NX|___D|_PSE|___G)
+#define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW|   0|___A|   0|___D|_PSE|___G)
+#define __PAGE_KERNEL_WP	 (__PP|__RW|   0|___A|__NX|___D|   0|___G| __WP)
+
+
+#define __PAGE_KERNEL_IO		__PAGE_KERNEL
+#define __PAGE_KERNEL_IO_NOCACHE	__PAGE_KERNEL_NOCACHE
+
+
+#ifndef __ASSEMBLER__
+
+#define __PAGE_KERNEL_ENC	(__PAGE_KERNEL    | _ENC)
+#define __PAGE_KERNEL_ENC_WP	(__PAGE_KERNEL_WP | _ENC)
+#define __PAGE_KERNEL_NOENC	(__PAGE_KERNEL    |    0)
+#define __PAGE_KERNEL_NOENC_WP	(__PAGE_KERNEL_WP |    0)
+
+#define __pgprot_mask(x)	__pgprot((x) & __default_kernel_pte_mask)
+
+#define PAGE_KERNEL		__pgprot_mask(__PAGE_KERNEL            | _ENC)
+#define PAGE_KERNEL_NOENC	__pgprot_mask(__PAGE_KERNEL            |    0)
+#define PAGE_KERNEL_RO		__pgprot_mask(__PAGE_KERNEL_RO         | _ENC)
+#define PAGE_KERNEL_EXEC	__pgprot_mask(__PAGE_KERNEL_EXEC       | _ENC)
+#define PAGE_KERNEL_EXEC_NOENC	__pgprot_mask(__PAGE_KERNEL_EXEC       |    0)
+#define PAGE_KERNEL_ROX		__pgprot_mask(__PAGE_KERNEL_ROX        | _ENC)
+#define PAGE_KERNEL_NOCACHE	__pgprot_mask(__PAGE_KERNEL_NOCACHE    | _ENC)
+#define PAGE_KERNEL_LARGE	__pgprot_mask(__PAGE_KERNEL_LARGE      | _ENC)
+#define PAGE_KERNEL_LARGE_EXEC	__pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC)
+#define PAGE_KERNEL_VVAR	__pgprot_mask(__PAGE_KERNEL_VVAR       | _ENC)
+
+#define PAGE_KERNEL_IO		__pgprot_mask(__PAGE_KERNEL_IO)
+#define PAGE_KERNEL_IO_NOCACHE	__pgprot_mask(__PAGE_KERNEL_IO_NOCACHE)
+
+#endif	/* __ASSEMBLER__ */
 
 /*
  * early identity mapping  pte attrib macros.
@@ -250,7 +277,7 @@ enum page_cache_mode {
 # include <asm/pgtable_64_types.h>
 #endif
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/types.h>
 
@@ -267,6 +294,12 @@ typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
 
 typedef struct { pgdval_t pgd; } pgd_t;
 
+static inline pgprot_t pgprot_nx(pgprot_t prot)
+{
+	return __pgprot(pgprot_val(prot) | _PAGE_NX);
+}
+#define pgprot_nx pgprot_nx
+
 #ifdef CONFIG_X86_PAE
 
 /*
@@ -358,11 +391,9 @@ static inline pudval_t native_pud_val(pud_t pud)
 #endif
 
 #if CONFIG_PGTABLE_LEVELS > 2
-typedef struct { pmdval_t pmd; } pmd_t;
-
 static inline pmd_t native_make_pmd(pmdval_t val)
 {
-	return (pmd_t) { val };
+	return (pmd_t) { .pmd = val };
 }
 
 static inline pmdval_t native_pmd_val(pmd_t pmd)
@@ -450,12 +481,6 @@ static inline pteval_t pte_flags(pte_t pte)
 	return native_pte_val(pte) & PTE_FLAGS_MASK;
 }
 
-#define pgprot_val(x)	((x).pgprot)
-#define __pgprot(x)	((pgprot_t) { (x) } )
-
-extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM];
-extern uint8_t __pte2cachemode_tbl[8];
-
 #define __pte2cm_idx(cb)				\
 	((((cb) >> (_PAGE_BIT_PAT - 2)) & 4) |		\
 	 (((cb) >> (_PAGE_BIT_PCD - 1)) & 2) |		\
@@ -465,43 +490,26 @@ extern uint8_t __pte2cachemode_tbl[8];
 	 (((i) & 2) << (_PAGE_BIT_PCD - 1)) |		\
 	 (((i) & 1) << _PAGE_BIT_PWT))
 
-static inline unsigned long cachemode2protval(enum page_cache_mode pcm)
+unsigned long cachemode2protval(enum page_cache_mode pcm);
+
+static inline pgprotval_t protval_4k_2_large(pgprotval_t val)
 {
-	if (likely(pcm == 0))
-		return 0;
-	return __cachemode2pte_tbl[pcm];
+	return (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
+		((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
 }
-static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm)
+static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot)
 {
-	return __pgprot(cachemode2protval(pcm));
+	return __pgprot(protval_4k_2_large(pgprot_val(pgprot)));
 }
-static inline enum page_cache_mode pgprot2cachemode(pgprot_t pgprot)
+static inline pgprotval_t protval_large_2_4k(pgprotval_t val)
 {
-	unsigned long masked;
-
-	masked = pgprot_val(pgprot) & _PAGE_CACHE_MASK;
-	if (likely(masked == 0))
-		return 0;
-	return __pte2cachemode_tbl[__pte2cm_idx(masked)];
-}
-static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot)
-{
-	pgprotval_t val = pgprot_val(pgprot);
-	pgprot_t new;
-
-	pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
-		((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
-	return new;
+	return (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
+		((val & _PAGE_PAT_LARGE) >>
+		 (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
 }
 static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot)
 {
-	pgprotval_t val = pgprot_val(pgprot);
-	pgprot_t new;
-
-	pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
-			  ((val & _PAGE_PAT_LARGE) >>
-			   (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
-	return new;
+	return __pgprot(protval_large_2_4k(pgprot_val(pgprot)));
 }
 
 
@@ -509,8 +517,6 @@ typedef struct page *pgtable_t;
 
 extern pteval_t __supported_pte_mask;
 extern pteval_t __default_kernel_pte_mask;
-extern void set_nx(void);
-extern int nx_enabled;
 
 #define pgprot_writecombine	pgprot_writecombine
 extern pgprot_t pgprot_writecombine(pgprot_t prot);
@@ -535,15 +541,13 @@ extern void native_pagetable_init(void);
 #define native_pagetable_init        paging_init
 #endif
 
-struct seq_file;
-extern void arch_report_meminfo(struct seq_file *m);
-
 enum pg_level {
 	PG_LEVEL_NONE,
 	PG_LEVEL_4K,
 	PG_LEVEL_2M,
 	PG_LEVEL_1G,
 	PG_LEVEL_512G,
+	PG_LEVEL_256T,
 	PG_LEVEL_NUM
 };
 
@@ -562,6 +566,8 @@ static inline void update_page_count(int level, unsigned long pages) { }
 extern pte_t *lookup_address(unsigned long address, unsigned int *level);
 extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
 				    unsigned int *level);
+pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
+				  unsigned int *level, bool *nx, bool *rw);
 extern pmd_t *lookup_pmd_address(unsigned long address);
 extern phys_addr_t slow_virt_to_phys(void *__address);
 extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn,
@@ -570,6 +576,6 @@ extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn,
 					  unsigned long page_flags);
 extern int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
 					    unsigned long numpages);
-#endif	/* !__ASSEMBLY__ */
+#endif	/* !__ASSEMBLER__ */
 
 #endif /* _ASM_X86_PGTABLE_DEFS_H */
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 19b137f1b3be..2e6c04d8a45b 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -2,16 +2,19 @@
 #ifndef _ASM_X86_PKEYS_H
 #define _ASM_X86_PKEYS_H
 
-#define ARCH_DEFAULT_PKEY	0
-
-#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
+/*
+ * If more than 16 keys are ever supported, a thorough audit
+ * will be necessary to ensure that the types that store key
+ * numbers and masks have sufficient capacity.
+ */
+#define arch_max_pkey() (cpu_feature_enabled(X86_FEATURE_OSPKE) ? 16 : 1)
 
 extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 		unsigned long init_val);
 
 static inline bool arch_pkeys_enabled(void)
 {
-	return boot_cpu_has(X86_FEATURE_OSPKE);
+	return cpu_feature_enabled(X86_FEATURE_OSPKE);
 }
 
 /*
@@ -21,7 +24,7 @@ static inline bool arch_pkeys_enabled(void)
 extern int __execute_only_pkey(struct mm_struct *mm);
 static inline int execute_only_pkey(struct mm_struct *mm)
 {
-	if (!boot_cpu_has(X86_FEATURE_OSPKE))
+	if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
 		return ARCH_DEFAULT_PKEY;
 
 	return __execute_only_pkey(mm);
@@ -32,15 +35,12 @@ extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma,
 static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
 		int prot, int pkey)
 {
-	if (!boot_cpu_has(X86_FEATURE_OSPKE))
+	if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
 		return 0;
 
 	return __arch_override_mprotect_pkey(vma, prot, pkey);
 }
 
-extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
-		unsigned long init_val);
-
 #define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | VM_PKEY_BIT3)
 
 #define mm_pkey_allocation_map(mm)	(mm->context.pkey_allocation_map)
@@ -115,12 +115,6 @@ int mm_pkey_free(struct mm_struct *mm, int pkey)
 	return 0;
 }
 
-extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
-		unsigned long init_val);
-extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
-		unsigned long init_val);
-extern void copy_init_pkru_to_fpregs(void);
-
 static inline int vma_pkey(struct vm_area_struct *vma)
 {
 	unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
diff --git a/arch/x86/include/asm/pkru.h b/arch/x86/include/asm/pkru.h
new file mode 100644
index 000000000000..74f0a2d34ffd
--- /dev/null
+++ b/arch/x86/include/asm/pkru.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_PKRU_H
+#define _ASM_X86_PKRU_H
+
+#include <asm/cpufeature.h>
+
+#define PKRU_AD_BIT 0x1u
+#define PKRU_WD_BIT 0x2u
+#define PKRU_BITS_PER_PKEY 2
+
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+extern u32 init_pkru_value;
+#define pkru_get_init_value()	READ_ONCE(init_pkru_value)
+#else
+#define init_pkru_value	0
+#define pkru_get_init_value()	0
+#endif
+
+static inline bool __pkru_allows_read(u32 pkru, u16 pkey)
+{
+	int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
+	return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits));
+}
+
+static inline bool __pkru_allows_write(u32 pkru, u16 pkey)
+{
+	int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
+	/*
+	 * Access-disable disables writes too so we need to check
+	 * both bits here.
+	 */
+	return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits));
+}
+
+static inline u32 read_pkru(void)
+{
+	if (cpu_feature_enabled(X86_FEATURE_OSPKE))
+		return rdpkru();
+	return 0;
+}
+
+static inline void write_pkru(u32 pkru)
+{
+	if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
+		return;
+	/*
+	 * WRPKRU is relatively expensive compared to RDPKRU.
+	 * Avoid WRPKRU when it would not change the value.
+	 */
+	if (pkru != rdpkru())
+		wrpkru(pkru);
+}
+
+static inline void pkru_write_default(void)
+{
+	if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
+		return;
+
+	wrpkru(pkru_get_init_value());
+}
+
+#endif
diff --git a/arch/x86/include/asm/platform_sst_audio.h b/arch/x86/include/asm/platform_sst_audio.h
index 059823bb8af7..40f92270515b 100644
--- a/arch/x86/include/asm/platform_sst_audio.h
+++ b/arch/x86/include/asm/platform_sst_audio.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * platform_sst_audio.h:  sst audio platform data header file
  *
@@ -5,17 +6,10 @@
  * Author: Jeeja KP <jeeja.kp@intel.com>
  * 	Omair Mohammed Abdullah <omair.m.abdullah@intel.com>
  *	Vinod Koul ,vinod.koul@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
  */
 #ifndef _PLATFORM_SST_AUDIO_H_
 #define _PLATFORM_SST_AUDIO_H_
 
-#include <linux/sfi.h>
-
 #define MAX_NUM_STREAMS_MRFLD	25
 #define MAX_NUM_STREAMS	MAX_NUM_STREAMS_MRFLD
 
diff --git a/arch/x86/include/asm/posted_intr.h b/arch/x86/include/asm/posted_intr.h
new file mode 100644
index 000000000000..a5f761fbf45b
--- /dev/null
+++ b/arch/x86/include/asm/posted_intr.h
@@ -0,0 +1,187 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _X86_POSTED_INTR_H
+#define _X86_POSTED_INTR_H
+
+#include <asm/cmpxchg.h>
+#include <asm/rwonce.h>
+#include <asm/irq_vectors.h>
+
+#include <linux/bitmap.h>
+
+#define POSTED_INTR_ON  0
+#define POSTED_INTR_SN  1
+
+#define PID_TABLE_ENTRY_VALID 1
+
+#define NR_PIR_VECTORS	256
+#define NR_PIR_WORDS	(NR_PIR_VECTORS / BITS_PER_LONG)
+
+/* Posted-Interrupt Descriptor */
+struct pi_desc {
+	unsigned long pir[NR_PIR_WORDS];     /* Posted interrupt requested */
+	union {
+		struct {
+			u16	notifications; /* Suppress and outstanding bits */
+			u8	nv;
+			u8	rsvd_2;
+			u32	ndst;
+		};
+		u64 control;
+	};
+	u32 rsvd[6];
+} __aligned(64);
+
+/*
+ * De-multiplexing posted interrupts is on the performance path, the code
+ * below is written to optimize the cache performance based on the following
+ * considerations:
+ * 1.Posted interrupt descriptor (PID) fits in a cache line that is frequently
+ *   accessed by both CPU and IOMMU.
+ * 2.During software processing of posted interrupts, the CPU needs to do
+ *   natural width read and xchg for checking and clearing posted interrupt
+ *   request (PIR), a 256 bit field within the PID.
+ * 3.On the other side, the IOMMU does atomic swaps of the entire PID cache
+ *   line when posting interrupts and setting control bits.
+ * 4.The CPU can access the cache line a magnitude faster than the IOMMU.
+ * 5.Each time the IOMMU does interrupt posting to the PIR will evict the PID
+ *   cache line. The cache line states after each operation are as follows,
+ *   assuming a 64-bit kernel:
+ *   CPU		IOMMU			PID Cache line state
+ *   ---------------------------------------------------------------
+ *...read64					exclusive
+ *...lock xchg64				modified
+ *...			post/atomic swap	invalid
+ *...-------------------------------------------------------------
+ *
+ * To reduce L1 data cache miss, it is important to avoid contention with
+ * IOMMU's interrupt posting/atomic swap. Therefore, a copy of PIR is used
+ * when processing posted interrupts in software, e.g. to dispatch interrupt
+ * handlers for posted MSIs, or to move interrupts from the PIR to the vIRR
+ * in KVM.
+ *
+ * In addition, the code is trying to keep the cache line state consistent
+ * as much as possible. e.g. when making a copy and clearing the PIR
+ * (assuming non-zero PIR bits are present in the entire PIR), it does:
+ *		read, read, read, read, xchg, xchg, xchg, xchg
+ * instead of:
+ *		read, xchg, read, xchg, read, xchg, read, xchg
+ */
+static __always_inline bool pi_harvest_pir(unsigned long *pir,
+					   unsigned long *pir_vals)
+{
+	unsigned long pending = 0;
+	int i;
+
+	for (i = 0; i < NR_PIR_WORDS; i++) {
+		pir_vals[i] = READ_ONCE(pir[i]);
+		pending |= pir_vals[i];
+	}
+
+	if (!pending)
+		return false;
+
+	for (i = 0; i < NR_PIR_WORDS; i++) {
+		if (!pir_vals[i])
+			continue;
+
+		pir_vals[i] = arch_xchg(&pir[i], 0);
+	}
+
+	return true;
+}
+
+static inline bool pi_test_and_set_on(struct pi_desc *pi_desc)
+{
+	return test_and_set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc)
+{
+	return test_and_clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_and_clear_sn(struct pi_desc *pi_desc)
+{
+	return test_and_clear_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
+{
+	return test_and_set_bit(vector, pi_desc->pir);
+}
+
+static inline bool pi_is_pir_empty(struct pi_desc *pi_desc)
+{
+	return bitmap_empty(pi_desc->pir, NR_VECTORS);
+}
+
+static inline void pi_set_sn(struct pi_desc *pi_desc)
+{
+	set_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_set_on(struct pi_desc *pi_desc)
+{
+	set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_clear_on(struct pi_desc *pi_desc)
+{
+	clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_clear_sn(struct pi_desc *pi_desc)
+{
+	clear_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_on(struct pi_desc *pi_desc)
+{
+	return test_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_sn(struct pi_desc *pi_desc)
+{
+	return test_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_pir(int vector, struct pi_desc *pi_desc)
+{
+	return test_bit(vector, (unsigned long *)pi_desc->pir);
+}
+
+/* Non-atomic helpers */
+static inline void __pi_set_sn(struct pi_desc *pi_desc)
+{
+	pi_desc->notifications |= BIT(POSTED_INTR_SN);
+}
+
+static inline void __pi_clear_sn(struct pi_desc *pi_desc)
+{
+	pi_desc->notifications &= ~BIT(POSTED_INTR_SN);
+}
+
+#ifdef CONFIG_X86_POSTED_MSI
+/*
+ * Not all external vectors are subject to interrupt remapping, e.g. IOMMU's
+ * own interrupts. Here we do not distinguish them since those vector bits in
+ * PIR will always be zero.
+ */
+static inline bool pi_pending_this_cpu(unsigned int vector)
+{
+	struct pi_desc *pid = this_cpu_ptr(&posted_msi_pi_desc);
+
+	if (WARN_ON_ONCE(vector > NR_VECTORS || vector < FIRST_EXTERNAL_VECTOR))
+		return false;
+
+	return test_bit(vector, pid->pir);
+}
+
+extern void intel_posted_msi_init(void);
+#else
+static inline bool pi_pending_this_cpu(unsigned int vector) { return false; }
+
+static inline void intel_posted_msi_init(void) {};
+#endif /* X86_POSTED_MSI */
+
+#endif /* _X86_POSTED_INTR_H */
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 99a7fa9ab0a3..578441db09f0 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -4,9 +4,10 @@
 
 #include <asm/rmwcc.h>
 #include <asm/percpu.h>
-#include <linux/thread_info.h>
 
-DECLARE_PER_CPU(int, __preempt_count);
+#include <linux/static_call_types.h>
+
+DECLARE_PER_CPU_CACHE_HOT(int, __preempt_count);
 
 /* We use the MSB mostly because its available */
 #define PREEMPT_NEED_RESCHED	0x80000000
@@ -30,11 +31,11 @@ static __always_inline void preempt_count_set(int pc)
 {
 	int old, new;
 
+	old = raw_cpu_read_4(__preempt_count);
 	do {
-		old = raw_cpu_read_4(__preempt_count);
 		new = (old & PREEMPT_NEED_RESCHED) |
 			(pc & ~PREEMPT_NEED_RESCHED);
-	} while (raw_cpu_cmpxchg_4(__preempt_count, old, new) != old);
+	} while (!raw_cpu_try_cmpxchg_4(__preempt_count, &old, new));
 }
 
 /*
@@ -43,7 +44,7 @@ static __always_inline void preempt_count_set(int pc)
 #define init_task_preempt_count(p) do { } while (0)
 
 #define init_idle_preempt_count(p, cpu) do { \
-	per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \
+	per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \
 } while (0)
 
 /*
@@ -91,7 +92,8 @@ static __always_inline void __preempt_count_sub(int val)
  */
 static __always_inline bool __preempt_count_dec_and_test(void)
 {
-	return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var]));
+	return GEN_UNARY_RMWcc("decl", __my_cpu_var(__preempt_count), e,
+			       __percpu_arg([var]));
 }
 
 /*
@@ -102,17 +104,48 @@ static __always_inline bool should_resched(int preempt_offset)
 	return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
 }
 
-#ifdef CONFIG_PREEMPT
-  extern asmlinkage void ___preempt_schedule(void);
-# define __preempt_schedule() \
-	asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT)
+#ifdef CONFIG_PREEMPTION
+
+extern asmlinkage void preempt_schedule(void);
+extern asmlinkage void preempt_schedule_thunk(void);
+
+#define preempt_schedule_dynamic_enabled	preempt_schedule_thunk
+#define preempt_schedule_dynamic_disabled	NULL
+
+extern asmlinkage void preempt_schedule_notrace(void);
+extern asmlinkage void preempt_schedule_notrace_thunk(void);
+
+#define preempt_schedule_notrace_dynamic_enabled	preempt_schedule_notrace_thunk
+#define preempt_schedule_notrace_dynamic_disabled	NULL
+
+#ifdef CONFIG_PREEMPT_DYNAMIC
+
+DECLARE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled);
+
+#define __preempt_schedule() \
+do { \
+	__STATIC_CALL_MOD_ADDRESSABLE(preempt_schedule); \
+	asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule) : ASM_CALL_CONSTRAINT); \
+} while (0)
+
+DECLARE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled);
+
+#define __preempt_schedule_notrace() \
+do { \
+	__STATIC_CALL_MOD_ADDRESSABLE(preempt_schedule_notrace); \
+	asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule_notrace) : ASM_CALL_CONSTRAINT); \
+} while (0)
+
+#else /* PREEMPT_DYNAMIC */
+
+#define __preempt_schedule() \
+	asm volatile ("call preempt_schedule_thunk" : ASM_CALL_CONSTRAINT);
+
+#define __preempt_schedule_notrace() \
+	asm volatile ("call preempt_schedule_notrace_thunk" : ASM_CALL_CONSTRAINT);
 
-  extern asmlinkage void preempt_schedule(void);
-  extern asmlinkage void ___preempt_schedule_notrace(void);
-# define __preempt_schedule_notrace() \
-	asm volatile ("call ___preempt_schedule_notrace" : ASM_CALL_CONSTRAINT)
+#endif /* PREEMPT_DYNAMIC */
 
-  extern asmlinkage void preempt_schedule_notrace(void);
-#endif
+#endif /* PREEMPTION */
 
 #endif /* __ASM_PREEMPT_H */
diff --git a/arch/x86/include/asm/processor-cyrix.h b/arch/x86/include/asm/processor-cyrix.h
index aaedd73ea2c6..efe3e46e454b 100644
--- a/arch/x86/include/asm/processor-cyrix.h
+++ b/arch/x86/include/asm/processor-cyrix.h
@@ -3,37 +3,16 @@
  * NSC/Cyrix CPU indexed register access. Must be inlined instead of
  * macros to ensure correct access ordering
  * Access order is always 0x22 (=offset), 0x23 (=value)
- *
- * When using the old macros a line like
- *   setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88);
- * gets expanded to:
- *  do {
- *    outb((CX86_CCR2), 0x22);
- *    outb((({
- *        outb((CX86_CCR2), 0x22);
- *        inb(0x23);
- *    }) | 0x88), 0x23);
- *  } while (0);
- *
- * which in fact violates the access order (= 0x22, 0x22, 0x23, 0x23).
  */
 
+#include <asm/pc-conf-reg.h>
+
 static inline u8 getCx86(u8 reg)
 {
-	outb(reg, 0x22);
-	return inb(0x23);
+	return pc_conf_get(reg);
 }
 
 static inline void setCx86(u8 reg, u8 data)
 {
-	outb(reg, 0x22);
-	outb(data, 0x23);
+	pc_conf_set(reg, data);
 }
-
-#define getCx86_old(reg) ({ outb((reg), 0x22); inb(0x23); })
-
-#define setCx86_old(reg, data) do { \
-	outb((reg), 0x22); \
-	outb((data), 0x23); \
-} while (0)
-
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 02c2cbda4a74..e5f204b9b33d 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -28,6 +28,8 @@
  * On systems with SME, one bit (in a variable position!) is stolen to indicate
  * that the top-level paging structure is encrypted.
  *
+ * On systemms with LAM, bits 61 and 62 are used to indicate LAM mode.
+ *
  * All of the remaining bits indicate the physical address of the top-level
  * paging structure.
  *
@@ -35,7 +37,7 @@
  */
 #ifdef CONFIG_X86_64
 /* Mask off the address space ID and SME encryption bits. */
-#define CR3_ADDR_MASK	__sme_clr(0x7FFFFFFFFFFFF000ull)
+#define CR3_ADDR_MASK	__sme_clr(PHYSICAL_PAGE_MASK)
 #define CR3_PCID_MASK	0xFFFull
 #define CR3_NOFLUSH	BIT_ULL(63)
 
@@ -49,7 +51,7 @@
 #define CR3_NOFLUSH	0
 #endif
 
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
 # define X86_CR3_PTI_PCID_USER_BIT	11
 #endif
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 33051436c864..a24c7805acdb 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -7,6 +7,7 @@
 /* Forward declaration, a strange C thing */
 struct task_struct;
 struct mm_struct;
+struct io_bitmap;
 struct vm86;
 
 #include <asm/math_emu.h>
@@ -15,15 +16,18 @@ struct vm86;
 #include <uapi/asm/sigcontext.h>
 #include <asm/current.h>
 #include <asm/cpufeatures.h>
+#include <asm/cpuid/api.h>
 #include <asm/page.h>
 #include <asm/pgtable_types.h>
 #include <asm/percpu.h>
-#include <asm/msr.h>
 #include <asm/desc_defs.h>
 #include <asm/nops.h>
 #include <asm/special_insns.h>
 #include <asm/fpu/types.h>
 #include <asm/unwind_hints.h>
+#include <asm/vmxfeatures.h>
+#include <asm/vdso/processor.h>
+#include <asm/shstk.h>
 
 #include <linux/personality.h>
 #include <linux/cache.h>
@@ -56,86 +60,135 @@ struct vm86;
 # define ARCH_MIN_MMSTRUCT_ALIGN	0
 #endif
 
-enum tlb_infos {
-	ENTRIES,
-	NR_INFO
-};
-
-extern u16 __read_mostly tlb_lli_4k[NR_INFO];
-extern u16 __read_mostly tlb_lli_2m[NR_INFO];
-extern u16 __read_mostly tlb_lli_4m[NR_INFO];
-extern u16 __read_mostly tlb_lld_4k[NR_INFO];
-extern u16 __read_mostly tlb_lld_2m[NR_INFO];
-extern u16 __read_mostly tlb_lld_4m[NR_INFO];
-extern u16 __read_mostly tlb_lld_1g[NR_INFO];
+extern u16 __read_mostly tlb_lli_4k;
+extern u16 __read_mostly tlb_lli_2m;
+extern u16 __read_mostly tlb_lli_4m;
+extern u16 __read_mostly tlb_lld_4k;
+extern u16 __read_mostly tlb_lld_2m;
+extern u16 __read_mostly tlb_lld_4m;
+extern u16 __read_mostly tlb_lld_1g;
 
 /*
- *  CPU type and hardware bug flags. Kept separately for each CPU.
- *  Members of this structure are referenced in head_32.S, so think twice
- *  before touching them. [mj]
+ * CPU type and hardware bug flags. Kept separately for each CPU.
  */
 
+struct cpuinfo_topology {
+	// Real APIC ID read from the local APIC
+	u32			apicid;
+	// The initial APIC ID provided by CPUID
+	u32			initial_apicid;
+
+	// Physical package ID
+	u32			pkg_id;
+
+	// Physical die ID on AMD, Relative on Intel
+	u32			die_id;
+
+	// Compute unit ID - AMD specific
+	u32			cu_id;
+
+	// Core ID relative to the package
+	u32			core_id;
+
+	// Logical ID mappings
+	u32			logical_pkg_id;
+	u32			logical_die_id;
+	u32			logical_core_id;
+
+	// AMD Node ID and Nodes per Package info
+	u32			amd_node_id;
+
+	// Cache level topology IDs
+	u32			llc_id;
+	u32			l2c_id;
+
+	// Hardware defined CPU-type
+	union {
+		u32		cpu_type;
+		struct {
+			// CPUID.1A.EAX[23-0]
+			u32	intel_native_model_id	:24;
+			// CPUID.1A.EAX[31-24]
+			u32	intel_type		:8;
+		};
+		struct {
+			// CPUID 0x80000026.EBX
+			u32	amd_num_processors	:16,
+				amd_power_eff_ranking	:8,
+				amd_native_model_id	:4,
+				amd_type		:4;
+		};
+	};
+};
+
 struct cpuinfo_x86 {
-	__u8			x86;		/* CPU family */
-	__u8			x86_vendor;	/* CPU vendor */
-	__u8			x86_model;
+	union {
+		/*
+		 * The particular ordering (low-to-high) of (vendor,
+		 * family, model) is done in case range of models, like
+		 * it is usually done on AMD, need to be compared.
+		 */
+		struct {
+			__u8	x86_model;
+			/* CPU family */
+			__u8	x86;
+			/* CPU vendor */
+			__u8	x86_vendor;
+			__u8	x86_reserved;
+		};
+		/* combined vendor, family, model */
+		__u32		x86_vfm;
+	};
 	__u8			x86_stepping;
 #ifdef CONFIG_X86_64
 	/* Number of 4K pages in DTLB/ITLB combined(in pages): */
 	int			x86_tlbsize;
 #endif
+#ifdef CONFIG_X86_VMX_FEATURE_NAMES
+	__u32			vmx_capability[NVMXINTS];
+#endif
 	__u8			x86_virt_bits;
 	__u8			x86_phys_bits;
-	/* CPUID returned core id bits: */
-	__u8			x86_coreid_bits;
-	__u8			cu_id;
 	/* Max extended CPUID function supported: */
 	__u32			extended_cpuid_level;
 	/* Maximum supported CPUID level, -1=no CPUID: */
 	int			cpuid_level;
-	__u32			x86_capability[NCAPINTS + NBUGINTS];
+	/*
+	 * Align to size of unsigned long because the x86_capability array
+	 * is passed to bitops which require the alignment. Use unnamed
+	 * union to enforce the array is aligned to size of unsigned long.
+	 */
+	union {
+		__u32		x86_capability[NCAPINTS + NBUGINTS];
+		unsigned long	x86_capability_alignment;
+	};
 	char			x86_vendor_id[16];
 	char			x86_model_id[64];
+	struct cpuinfo_topology	topo;
 	/* in KB - valid for CPUS which support this call: */
 	unsigned int		x86_cache_size;
 	int			x86_cache_alignment;	/* In bytes */
-	/* Cache QoS architectural values: */
+	/* Cache QoS architectural values, valid only on the BSP: */
 	int			x86_cache_max_rmid;	/* max index */
 	int			x86_cache_occ_scale;	/* scale to bytes */
+	int			x86_cache_mbm_width_offset;
 	int			x86_power;
 	unsigned long		loops_per_jiffy;
-	/* cpuid returned max cores value: */
-	u16			 x86_max_cores;
-	u16			apicid;
-	u16			initial_apicid;
+	/* protected processor identification number */
+	u64			ppin;
 	u16			x86_clflush_size;
 	/* number of cores as seen by the OS: */
 	u16			booted_cores;
-	/* Physical processor id: */
-	u16			phys_proc_id;
-	/* Logical processor id: */
-	u16			logical_proc_id;
-	/* Core id: */
-	u16			cpu_core_id;
 	/* Index into per_cpu list: */
 	u16			cpu_index;
+	/*  Is SMT active on this core? */
+	bool			smt_active;
 	u32			microcode;
 	/* Address space bits used by the cache internally */
 	u8			x86_cache_bits;
 	unsigned		initialized : 1;
 } __randomize_layout;
 
-struct cpuid_regs {
-	u32 eax, ebx, ecx, edx;
-};
-
-enum cpuid_regs_idx {
-	CPUID_EAX = 0,
-	CPUID_EBX,
-	CPUID_ECX,
-	CPUID_EDX,
-};
-
 #define X86_VENDOR_INTEL	0
 #define X86_VENDOR_CYRIX	1
 #define X86_VENDOR_AMD		2
@@ -144,7 +197,9 @@ enum cpuid_regs_idx {
 #define X86_VENDOR_TRANSMETA	7
 #define X86_VENDOR_NSC		8
 #define X86_VENDOR_HYGON	9
-#define X86_VENDOR_NUM		10
+#define X86_VENDOR_ZHAOXIN	10
+#define X86_VENDOR_VORTEX	11
+#define X86_VENDOR_NUM		12
 
 #define X86_VENDOR_UNKNOWN	0xff
 
@@ -154,17 +209,11 @@ enum cpuid_regs_idx {
 extern struct cpuinfo_x86	boot_cpu_data;
 extern struct cpuinfo_x86	new_cpu_data;
 
-extern struct x86_hw_tss	doublefault_tss;
 extern __u32			cpu_caps_cleared[NCAPINTS + NBUGINTS];
 extern __u32			cpu_caps_set[NCAPINTS + NBUGINTS];
 
-#ifdef CONFIG_SMP
 DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
 #define cpu_data(cpu)		per_cpu(cpu_info, cpu)
-#else
-#define cpu_info		boot_cpu_data
-#define cpu_data(cpu)		boot_cpu_data
-#endif
 
 extern const struct seq_operations cpuinfo_op;
 
@@ -177,51 +226,13 @@ static inline unsigned long long l1tf_pfn_limit(void)
 	return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT);
 }
 
+void init_cpu_devs(void);
+void get_cpu_vendor(struct cpuinfo_x86 *c);
 extern void early_cpu_init(void);
-extern void identify_boot_cpu(void);
-extern void identify_secondary_cpu(struct cpuinfo_x86 *);
+extern void identify_secondary_cpu(unsigned int cpu);
 extern void print_cpu_info(struct cpuinfo_x86 *);
 void print_cpu_msr(struct cpuinfo_x86 *);
 
-#ifdef CONFIG_X86_32
-extern int have_cpuid_p(void);
-#else
-static inline int have_cpuid_p(void)
-{
-	return 1;
-}
-#endif
-static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
-				unsigned int *ecx, unsigned int *edx)
-{
-	/* ecx is often an input as well as an output. */
-	asm volatile("cpuid"
-	    : "=a" (*eax),
-	      "=b" (*ebx),
-	      "=c" (*ecx),
-	      "=d" (*edx)
-	    : "0" (*eax), "2" (*ecx)
-	    : "memory");
-}
-
-#define native_cpuid_reg(reg)					\
-static inline unsigned int native_cpuid_##reg(unsigned int op)	\
-{								\
-	unsigned int eax = op, ebx, ecx = 0, edx;		\
-								\
-	native_cpuid(&eax, &ebx, &ecx, &edx);			\
-								\
-	return reg;						\
-}
-
-/*
- * Native CPUID functions returning a single datum.
- */
-native_cpuid_reg(eax)
-native_cpuid_reg(ebx)
-native_cpuid_reg(ecx)
-native_cpuid_reg(edx)
-
 /*
  * Friendlier CR3 helpers.
  */
@@ -297,11 +308,6 @@ struct x86_hw_tss {
 struct x86_hw_tss {
 	u32			reserved1;
 	u64			sp0;
-
-	/*
-	 * We store cpu_current_top_of_stack in sp1 so it's always accessible.
-	 * Linux does not use ring 1, so sp1 is not otherwise needed.
-	 */
 	u64			sp1;
 
 	/*
@@ -325,26 +331,56 @@ struct x86_hw_tss {
  * IO-bitmap sizes:
  */
 #define IO_BITMAP_BITS			65536
-#define IO_BITMAP_BYTES			(IO_BITMAP_BITS/8)
-#define IO_BITMAP_LONGS			(IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET		(offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
-#define INVALID_IO_BITMAP_OFFSET	0x8000
+#define IO_BITMAP_BYTES			(IO_BITMAP_BITS / BITS_PER_BYTE)
+#define IO_BITMAP_LONGS			(IO_BITMAP_BYTES / sizeof(long))
+
+#define IO_BITMAP_OFFSET_VALID_MAP				\
+	(offsetof(struct tss_struct, io_bitmap.bitmap) -	\
+	 offsetof(struct tss_struct, x86_tss))
+
+#define IO_BITMAP_OFFSET_VALID_ALL				\
+	(offsetof(struct tss_struct, io_bitmap.mapall) -	\
+	 offsetof(struct tss_struct, x86_tss))
+
+#ifdef CONFIG_X86_IOPL_IOPERM
+/*
+ * sizeof(unsigned long) coming from an extra "long" at the end of the
+ * iobitmap. The limit is inclusive, i.e. the last valid byte.
+ */
+# define __KERNEL_TSS_LIMIT	\
+	(IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \
+	 sizeof(unsigned long) - 1)
+#else
+# define __KERNEL_TSS_LIMIT	\
+	(offsetof(struct tss_struct, x86_tss) + sizeof(struct x86_hw_tss) - 1)
+#endif
+
+/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */
+#define IO_BITMAP_OFFSET_INVALID	(__KERNEL_TSS_LIMIT + 1)
 
 struct entry_stack {
-	unsigned long		words[64];
+	char	stack[PAGE_SIZE];
 };
 
 struct entry_stack_page {
 	struct entry_stack stack;
 } __aligned(PAGE_SIZE);
 
-struct tss_struct {
+/*
+ * All IO bitmap related data stored in the TSS:
+ */
+struct x86_io_bitmap {
+	/* The sequence number of the last active bitmap. */
+	u64			prev_sequence;
+
 	/*
-	 * The fixed hardware portion.  This must not cross a page boundary
-	 * at risk of violating the SDM's advice and potentially triggering
-	 * errata.
+	 * Store the dirty size of the last io bitmap offender. The next
+	 * one will have to do the cleanup as the switch out to a non io
+	 * bitmap user will just set x86_tss.io_bitmap_base to a value
+	 * outside of the TSS limit. So for sane tasks there is no need to
+	 * actually touch the io_bitmap at all.
 	 */
-	struct x86_hw_tss	x86_tss;
+	unsigned int		prev_max;
 
 	/*
 	 * The extra 1 is there because the CPU will access an
@@ -352,101 +388,63 @@ struct tss_struct {
 	 * bitmap. The extra byte must be all 1 bits, and must
 	 * be within the limit.
 	 */
-	unsigned long		io_bitmap[IO_BITMAP_LONGS + 1];
+	unsigned long		bitmap[IO_BITMAP_LONGS + 1];
+
+	/*
+	 * Special I/O bitmap to emulate IOPL(3). All bytes zero,
+	 * except the additional byte at the end.
+	 */
+	unsigned long		mapall[IO_BITMAP_LONGS + 1];
+};
+
+struct tss_struct {
+	/*
+	 * The fixed hardware portion.  This must not cross a page boundary
+	 * at risk of violating the SDM's advice and potentially triggering
+	 * errata.
+	 */
+	struct x86_hw_tss	x86_tss;
+
+	struct x86_io_bitmap	io_bitmap;
 } __aligned(PAGE_SIZE);
 
 DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
 
-/*
- * sizeof(unsigned long) coming from an extra "long" at the end
- * of the iobitmap.
- *
- * -1? seg base+limit should be pointing to the address of the
- * last valid byte
- */
-#define __KERNEL_TSS_LIMIT	\
-	(IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
+/* Per CPU interrupt stacks */
+struct irq_stack {
+	char		stack[IRQ_STACK_SIZE];
+} __aligned(IRQ_STACK_SIZE);
 
-#ifdef CONFIG_X86_32
-DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr);
+#ifdef CONFIG_X86_64
+DECLARE_PER_CPU_CACHE_HOT(bool, hardirq_stack_inuse);
 #else
-/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
-#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
+DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, softirq_stack_ptr);
 #endif
 
-/*
- * Save the original ist values for checking stack pointers during debugging
- */
-struct orig_ist {
-	unsigned long		ist[7];
-};
+DECLARE_PER_CPU_CACHE_HOT(unsigned long, cpu_current_top_of_stack);
+/* const-qualified alias provided by the linker. */
+DECLARE_PER_CPU_CACHE_HOT(const unsigned long __percpu_seg_override,
+			  const_cpu_current_top_of_stack);
 
 #ifdef CONFIG_X86_64
-DECLARE_PER_CPU(struct orig_ist, orig_ist);
-
-union irq_stack_union {
-	char irq_stack[IRQ_STACK_SIZE];
-	/*
-	 * GCC hardcodes the stack canary as %gs:40.  Since the
-	 * irq_stack is the object at %gs:0, we reserve the bottom
-	 * 48 bytes of the irq stack for the canary.
-	 */
-	struct {
-		char gs_base[40];
-		unsigned long stack_canary;
-	};
-};
-
-DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
-DECLARE_INIT_PER_CPU(irq_stack_union);
-
 static inline unsigned long cpu_kernelmode_gs_base(int cpu)
 {
-	return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu);
+#ifdef CONFIG_SMP
+	return per_cpu_offset(cpu);
+#else
+	return 0;
+#endif
 }
 
-DECLARE_PER_CPU(char *, irq_stack_ptr);
-DECLARE_PER_CPU(unsigned int, irq_count);
-extern asmlinkage void ignore_sysret(void);
+extern asmlinkage void entry_SYSCALL32_ignore(void);
 
-#if IS_ENABLED(CONFIG_KVM)
 /* Save actual FS/GS selectors and bases to current->thread */
-void save_fsgs_for_kvm(void);
-#endif
-#else	/* X86_64 */
-#ifdef CONFIG_STACKPROTECTOR
-/*
- * Make sure stack canary segment base is cached-aligned:
- *   "For Intel Atom processors, avoid non zero segment base address
- *    that is not aligned to cache line boundary at all cost."
- * (Optim Ref Manual Assembly/Compiler Coding Rule 15.)
- */
-struct stack_canary {
-	char __pad[20];		/* canary at %gs:20 */
-	unsigned long canary;
-};
-DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
-#endif
-/*
- * per-CPU IRQ handling stacks
- */
-struct irq_stack {
-	u32                     stack[THREAD_SIZE/sizeof(u32)];
-} __aligned(THREAD_SIZE);
-
-DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
-DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
+void current_save_fsgs(void);
 #endif	/* X86_64 */
 
-extern unsigned int fpu_kernel_xstate_size;
-extern unsigned int fpu_user_xstate_size;
-
 struct perf_event;
 
-typedef struct {
-	unsigned long		seg;
-} mm_segment_t;
-
 struct thread_struct {
 	/* Cached TLS descriptors: */
 	struct desc_struct	tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -478,7 +476,7 @@ struct thread_struct {
 	/* Save middle states of ptrace breakpoints */
 	struct perf_event	*ptrace_bps[HBP_NUM];
 	/* Debug status used for traps, single steps, etc... */
-	unsigned long           debugreg6;
+	unsigned long           virtual_dr6;
 	/* Keep track of the exact dr7 value set by the user */
 	unsigned long           ptrace_dr7;
 	/* Fault info: */
@@ -490,58 +488,46 @@ struct thread_struct {
 	struct vm86		*vm86;
 #endif
 	/* IO permissions: */
-	unsigned long		*io_bitmap_ptr;
-	unsigned long		iopl;
-	/* Max allowed port in the bitmap, in bytes: */
-	unsigned		io_bitmap_max;
+	struct io_bitmap	*io_bitmap;
 
-	mm_segment_t		addr_limit;
+	/*
+	 * IOPL. Privilege level dependent I/O permission which is
+	 * emulated via the I/O bitmap to prevent user space from disabling
+	 * interrupts.
+	 */
+	unsigned long		iopl_emul;
 
-	unsigned int		sig_on_uaccess_err:1;
-	unsigned int		uaccess_err:1;	/* uaccess failed */
+	unsigned int		iopl_warn:1;
 
-	/* Floating point and extended processor state */
-	struct fpu		fpu;
 	/*
-	 * WARNING: 'fpu' is dynamically-sized.  It *MUST* be at
-	 * the end.
+	 * Protection Keys Register for Userspace.  Loaded immediately on
+	 * context switch. Store it in thread_struct to avoid a lookup in
+	 * the tasks's FPU xstate buffer. This value is only valid when a
+	 * task is scheduled out. For 'current' the authoritative source of
+	 * PKRU is the hardware itself.
 	 */
+	u32			pkru;
+
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+	unsigned long		features;
+	unsigned long		features_locked;
+
+	struct thread_shstk	shstk;
+#endif
 };
 
-/* Whitelist the FPU state from the task_struct for hardened usercopy. */
-static inline void arch_thread_struct_whitelist(unsigned long *offset,
-						unsigned long *size)
-{
-	*offset = offsetof(struct thread_struct, fpu.state);
-	*size = fpu_kernel_xstate_size;
-}
+#ifdef CONFIG_X86_DEBUG_FPU
+extern struct fpu *x86_task_fpu(struct task_struct *task);
+#else
+# define x86_task_fpu(task)	((struct fpu *)((void *)(task) + sizeof(*(task))))
+#endif
 
-/*
- * Thread-synchronous status.
- *
- * This is different from the flags in that nobody else
- * ever touches our thread-synchronous status, so we don't
- * have to worry about atomic accesses.
- */
-#define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/
+extern void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size);
 
-/*
- * Set IOPL bits in EFLAGS from given mask
- */
-static inline void native_set_iopl_mask(unsigned mask)
+static inline void arch_thread_struct_whitelist(unsigned long *offset,
+						unsigned long *size)
 {
-#ifdef CONFIG_X86_32
-	unsigned int reg;
-
-	asm volatile ("pushfl;"
-		      "popl %0;"
-		      "andl %1, %0;"
-		      "orl %2, %0;"
-		      "pushl %0;"
-		      "popfl"
-		      : "=&r" (reg)
-		      : "i" (~X86_EFLAGS_IOPL), "r" (mask));
-#endif
+	fpu_thread_struct_whitelist(offset, size);
 }
 
 static inline void
@@ -550,24 +536,27 @@ native_load_sp0(unsigned long sp0)
 	this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
 }
 
-static inline void native_swapgs(void)
+static __always_inline void native_swapgs(void)
 {
 #ifdef CONFIG_X86_64
 	asm volatile("swapgs" ::: "memory");
 #endif
 }
 
-static inline unsigned long current_top_of_stack(void)
+static __always_inline unsigned long current_top_of_stack(void)
 {
 	/*
 	 *  We can't read directly from tss.sp0: sp0 on x86_32 is special in
 	 *  and around vm86 mode and sp0 on x86_64 is special because of the
 	 *  entry trampoline.
 	 */
+	if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT))
+		return this_cpu_read_const(const_cpu_current_top_of_stack);
+
 	return this_cpu_read_stable(cpu_current_top_of_stack);
 }
 
-static inline bool on_thread_stack(void)
+static __always_inline bool on_thread_stack(void)
 {
 	return (unsigned long)(current_top_of_stack() -
 			       current_stack_pointer) < THREAD_SIZE;
@@ -576,162 +565,17 @@ static inline bool on_thread_stack(void)
 #ifdef CONFIG_PARAVIRT_XXL
 #include <asm/paravirt.h>
 #else
-#define __cpuid			native_cpuid
 
 static inline void load_sp0(unsigned long sp0)
 {
 	native_load_sp0(sp0);
 }
 
-#define set_iopl_mask native_set_iopl_mask
 #endif /* CONFIG_PARAVIRT_XXL */
 
-/* Free all resources held by a thread. */
-extern void release_thread(struct task_struct *);
-
-unsigned long get_wchan(struct task_struct *p);
-
-/*
- * Generic CPUID function
- * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
- * resulting in stale register contents being returned.
- */
-static inline void cpuid(unsigned int op,
-			 unsigned int *eax, unsigned int *ebx,
-			 unsigned int *ecx, unsigned int *edx)
-{
-	*eax = op;
-	*ecx = 0;
-	__cpuid(eax, ebx, ecx, edx);
-}
-
-/* Some CPUID calls want 'count' to be placed in ecx */
-static inline void cpuid_count(unsigned int op, int count,
-			       unsigned int *eax, unsigned int *ebx,
-			       unsigned int *ecx, unsigned int *edx)
-{
-	*eax = op;
-	*ecx = count;
-	__cpuid(eax, ebx, ecx, edx);
-}
-
-/*
- * CPUID functions returning a single datum
- */
-static inline unsigned int cpuid_eax(unsigned int op)
-{
-	unsigned int eax, ebx, ecx, edx;
-
-	cpuid(op, &eax, &ebx, &ecx, &edx);
-
-	return eax;
-}
-
-static inline unsigned int cpuid_ebx(unsigned int op)
-{
-	unsigned int eax, ebx, ecx, edx;
-
-	cpuid(op, &eax, &ebx, &ecx, &edx);
-
-	return ebx;
-}
-
-static inline unsigned int cpuid_ecx(unsigned int op)
-{
-	unsigned int eax, ebx, ecx, edx;
-
-	cpuid(op, &eax, &ebx, &ecx, &edx);
-
-	return ecx;
-}
-
-static inline unsigned int cpuid_edx(unsigned int op)
-{
-	unsigned int eax, ebx, ecx, edx;
-
-	cpuid(op, &eax, &ebx, &ecx, &edx);
-
-	return edx;
-}
-
-/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static __always_inline void rep_nop(void)
-{
-	asm volatile("rep; nop" ::: "memory");
-}
-
-static __always_inline void cpu_relax(void)
-{
-	rep_nop();
-}
-
-/*
- * This function forces the icache and prefetched instruction stream to
- * catch up with reality in two very specific cases:
- *
- *  a) Text was modified using one virtual address and is about to be executed
- *     from the same physical page at a different virtual address.
- *
- *  b) Text was modified on a different CPU, may subsequently be
- *     executed on this CPU, and you want to make sure the new version
- *     gets executed.  This generally means you're calling this in a IPI.
- *
- * If you're calling this for a different reason, you're probably doing
- * it wrong.
- */
-static inline void sync_core(void)
-{
-	/*
-	 * There are quite a few ways to do this.  IRET-to-self is nice
-	 * because it works on every CPU, at any CPL (so it's compatible
-	 * with paravirtualization), and it never exits to a hypervisor.
-	 * The only down sides are that it's a bit slow (it seems to be
-	 * a bit more than 2x slower than the fastest options) and that
-	 * it unmasks NMIs.  The "push %cs" is needed because, in
-	 * paravirtual environments, __KERNEL_CS may not be a valid CS
-	 * value when we do IRET directly.
-	 *
-	 * In case NMI unmasking or performance ever becomes a problem,
-	 * the next best option appears to be MOV-to-CR2 and an
-	 * unconditional jump.  That sequence also works on all CPUs,
-	 * but it will fault at CPL3 (i.e. Xen PV).
-	 *
-	 * CPUID is the conventional way, but it's nasty: it doesn't
-	 * exist on some 486-like CPUs, and it usually exits to a
-	 * hypervisor.
-	 *
-	 * Like all of Linux's memory ordering operations, this is a
-	 * compiler barrier as well.
-	 */
-#ifdef CONFIG_X86_32
-	asm volatile (
-		"pushfl\n\t"
-		"pushl %%cs\n\t"
-		"pushl $1f\n\t"
-		"iret\n\t"
-		"1:"
-		: ASM_CALL_CONSTRAINT : : "memory");
-#else
-	unsigned int tmp;
-
-	asm volatile (
-		UNWIND_HINT_SAVE
-		"mov %%ss, %0\n\t"
-		"pushq %q0\n\t"
-		"pushq %%rsp\n\t"
-		"addq $8, (%%rsp)\n\t"
-		"pushfq\n\t"
-		"mov %%cs, %0\n\t"
-		"pushq %q0\n\t"
-		"pushq $1f\n\t"
-		"iretq\n\t"
-		UNWIND_HINT_RESTORE
-		"1:"
-		: "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
-#endif
-}
+unsigned long __get_wchan(struct task_struct *p);
 
-extern void select_idle_routine(const struct cpuinfo_x86 *c);
+extern void select_idle_routine(void);
 extern void amd_e400_c1e_apic_setup(void);
 
 extern unsigned long		boot_option_idle_override;
@@ -740,40 +584,18 @@ enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
 			 IDLE_POLL};
 
 extern void enable_sep_cpu(void);
-extern int sysenter_setup(void);
 
-void early_trap_pf_init(void);
 
 /* Defined in head.S */
 extern struct desc_ptr		early_gdt_descr;
 
-extern void switch_to_new_gdt(int);
+extern void switch_gdt_and_percpu_base(int);
 extern void load_direct_gdt(int);
 extern void load_fixmap_gdt(int);
-extern void load_percpu_segment(int);
 extern void cpu_init(void);
-
-static inline unsigned long get_debugctlmsr(void)
-{
-	unsigned long debugctlmsr = 0;
-
-#ifndef CONFIG_X86_DEBUGCTLMSR
-	if (boot_cpu_data.x86 < 6)
-		return 0;
-#endif
-	rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
-
-	return debugctlmsr;
-}
-
-static inline void update_debugctlmsr(unsigned long debugctlmsr)
-{
-#ifndef CONFIG_X86_DEBUGCTLMSR
-	if (boot_cpu_data.x86 < 6)
-		return;
-#endif
-	wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
-}
+extern void cpu_init_exception_handling(bool boot_cpu);
+extern void cpu_init_replace_early_idt(void);
+extern void cr4_init(void);
 
 extern void set_task_blockstep(struct task_struct *task, bool on);
 
@@ -785,13 +607,12 @@ extern char			ignore_fpu_irq;
 
 #define HAVE_ARCH_PICK_MMAP_LAYOUT 1
 #define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
 
 #ifdef CONFIG_X86_32
 # define BASE_PREFETCH		""
 # define ARCH_HAS_PREFETCH
 #else
-# define BASE_PREFETCH		"prefetcht0 %P1"
+# define BASE_PREFETCH		"prefetcht0 %1"
 #endif
 
 /*
@@ -802,7 +623,7 @@ extern char			ignore_fpu_irq;
  */
 static inline void prefetch(const void *x)
 {
-	alternative_input(BASE_PREFETCH, "prefetchnta %P1",
+	alternative_input(BASE_PREFETCH, "prefetchnta %1",
 			  X86_FEATURE_XMM,
 			  "m" (*(const char *)x));
 }
@@ -812,18 +633,13 @@ static inline void prefetch(const void *x)
  * Useful for spinlocks to avoid one state transition in the
  * cache coherency protocol:
  */
-static inline void prefetchw(const void *x)
+static __always_inline void prefetchw(const void *x)
 {
-	alternative_input(BASE_PREFETCH, "prefetchw %P1",
+	alternative_input(BASE_PREFETCH, "prefetchw %1",
 			  X86_FEATURE_3DNOWPREFETCH,
 			  "m" (*(const char *)x));
 }
 
-static inline void spin_lock_prefetch(const void *x)
-{
-	prefetchw(x);
-}
-
 #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
 			   TOP_OF_KERNEL_STACK_PADDING)
 
@@ -837,71 +653,18 @@ static inline void spin_lock_prefetch(const void *x)
 })
 
 #ifdef CONFIG_X86_32
-/*
- * User space process size: 3GB (default).
- */
-#define IA32_PAGE_OFFSET	PAGE_OFFSET
-#define TASK_SIZE		PAGE_OFFSET
-#define TASK_SIZE_LOW		TASK_SIZE
-#define TASK_SIZE_MAX		TASK_SIZE
-#define DEFAULT_MAP_WINDOW	TASK_SIZE
-#define STACK_TOP		TASK_SIZE
-#define STACK_TOP_MAX		STACK_TOP
-
 #define INIT_THREAD  {							  \
 	.sp0			= TOP_OF_INIT_STACK,			  \
 	.sysenter_cs		= __KERNEL_CS,				  \
-	.io_bitmap_ptr		= NULL,					  \
-	.addr_limit		= KERNEL_DS,				  \
 }
 
-#define KSTK_ESP(task)		(task_pt_regs(task)->sp)
-
 #else
-/*
- * User space process size.  This is the first address outside the user range.
- * There are a few constraints that determine this:
- *
- * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
- * address, then that syscall will enter the kernel with a
- * non-canonical return address, and SYSRET will explode dangerously.
- * We avoid this particular problem by preventing anything executable
- * from being mapped at the maximum canonical address.
- *
- * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
- * CPUs malfunction if they execute code from the highest canonical page.
- * They'll speculate right off the end of the canonical space, and
- * bad things happen.  This is worked around in the same way as the
- * Intel problem.
- *
- * With page table isolation enabled, we map the LDT in ... [stay tuned]
- */
-#define TASK_SIZE_MAX	((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
-
-#define DEFAULT_MAP_WINDOW	((1UL << 47) - PAGE_SIZE)
-
-/* This decides where the kernel will search for a free chunk of vm
- * space during mmap's.
- */
-#define IA32_PAGE_OFFSET	((current->personality & ADDR_LIMIT_3GB) ? \
-					0xc0000000 : 0xFFFFe000)
-
-#define TASK_SIZE_LOW		(test_thread_flag(TIF_ADDR32) ? \
-					IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
-#define TASK_SIZE		(test_thread_flag(TIF_ADDR32) ? \
-					IA32_PAGE_OFFSET : TASK_SIZE_MAX)
-#define TASK_SIZE_OF(child)	((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
-					IA32_PAGE_OFFSET : TASK_SIZE_MAX)
-
-#define STACK_TOP		TASK_SIZE_LOW
-#define STACK_TOP_MAX		TASK_SIZE_MAX
+extern unsigned long __top_init_kernel_stack[];
 
-#define INIT_THREAD  {						\
-	.addr_limit		= KERNEL_DS,			\
+#define INIT_THREAD {							\
+	.sp	= (unsigned long)&__top_init_kernel_stack,		\
 }
 
-extern unsigned long KSTK_ESP(struct task_struct *task);
-
 #endif /* CONFIG_X86_64 */
 
 extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
@@ -915,6 +678,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
 #define TASK_UNMAPPED_BASE		__TASK_UNMAPPED_BASE(TASK_SIZE_LOW)
 
 #define KSTK_EIP(task)		(task_pt_regs(task)->ip)
+#define KSTK_ESP(task)		(task_pt_regs(task)->sp)
 
 /* Get/set a process' ability to use the timestamp counter instruction */
 #define GET_TSC_CTL(adr)	get_tsc_mode((adr))
@@ -925,50 +689,36 @@ extern int set_tsc_mode(unsigned int val);
 
 DECLARE_PER_CPU(u64, msr_misc_features_shadow);
 
-/* Register/unregister a process' MPX related resource */
-#define MPX_ENABLE_MANAGEMENT()	mpx_enable_management()
-#define MPX_DISABLE_MANAGEMENT()	mpx_disable_management()
-
-#ifdef CONFIG_X86_INTEL_MPX
-extern int mpx_enable_management(void);
-extern int mpx_disable_management(void);
-#else
-static inline int mpx_enable_management(void)
+static inline u32 per_cpu_llc_id(unsigned int cpu)
 {
-	return -EINVAL;
+	return per_cpu(cpu_info.topo.llc_id, cpu);
 }
-static inline int mpx_disable_management(void)
+
+static inline u32 per_cpu_l2c_id(unsigned int cpu)
 {
-	return -EINVAL;
+	return per_cpu(cpu_info.topo.l2c_id, cpu);
 }
-#endif /* CONFIG_X86_INTEL_MPX */
 
 #ifdef CONFIG_CPU_SUP_AMD
-extern u16 amd_get_nb_id(int cpu);
-extern u32 amd_get_nodes_per_socket(void);
-#else
-static inline u16 amd_get_nb_id(int cpu)		{ return 0; }
-static inline u32 amd_get_nodes_per_socket(void)	{ return 0; }
-#endif
-
-static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
+/*
+ * Issue a DIV 0/1 insn to clear any division data from previous DIV
+ * operations.
+ */
+static __always_inline void amd_clear_divider(void)
 {
-	uint32_t base, eax, signature[3];
-
-	for (base = 0x40000000; base < 0x40010000; base += 0x100) {
-		cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
-
-		if (!memcmp(sig, signature, 12) &&
-		    (leaves == 0 || ((eax - base) >= leaves)))
-			return base;
-	}
-
-	return 0;
+	asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0)
+		     :: "a" (0), "d" (0), "r" (1));
 }
 
+extern void amd_check_microcode(void);
+#else
+static inline void amd_clear_divider(void)		{ }
+static inline void amd_check_microcode(void)		{ }
+#endif
+
 extern unsigned long arch_align_stack(unsigned long sp);
 void free_init_pages(const char *what, unsigned long begin, unsigned long end);
-extern void free_kernel_image_pages(void *begin, void *end);
+extern void free_kernel_image_pages(const char *what, void *begin, void *end);
 
 void default_idle(void);
 #ifdef	CONFIG_XEN
@@ -977,12 +727,15 @@ bool xen_set_default_idle(void);
 #define xen_set_default_idle 0
 #endif
 
-void stop_this_cpu(void *dummy);
-void df_debug(struct pt_regs *regs, long error_code);
-void microcode_check(void);
+void __noreturn stop_this_cpu(void *dummy);
+void microcode_check(struct cpuinfo_x86 *prev_info);
+void store_cpu_caps(struct cpuinfo_x86 *info);
+
+DECLARE_PER_CPU(bool, cache_state_incoherent);
 
 enum l1tf_mitigations {
 	L1TF_MITIGATION_OFF,
+	L1TF_MITIGATION_AUTO,
 	L1TF_MITIGATION_FLUSH_NOWARN,
 	L1TF_MITIGATION_FLUSH,
 	L1TF_MITIGATION_FLUSH_NOSMT,
@@ -992,4 +745,31 @@ enum l1tf_mitigations {
 
 extern enum l1tf_mitigations l1tf_mitigation;
 
+enum mds_mitigations {
+	MDS_MITIGATION_OFF,
+	MDS_MITIGATION_AUTO,
+	MDS_MITIGATION_FULL,
+	MDS_MITIGATION_VMWERV,
+};
+
+extern bool gds_ucode_mitigated(void);
+
+/*
+ * Make previous memory operations globally visible before
+ * a WRMSR.
+ *
+ * MFENCE makes writes visible, but only affects load/store
+ * instructions.  WRMSR is unfortunately not a load/store
+ * instruction and is unaffected by MFENCE.  The LFENCE ensures
+ * that the WRMSR is not reordered.
+ *
+ * Most WRMSRs are full serializing instructions themselves and
+ * do not require this barrier.  This is only required for the
+ * IA32_TSC_DEADLINE and X2APIC MSRs.
+ */
+static inline void weak_wrmsr_fence(void)
+{
+	alternative("mfence; lfence", "", ALT_NOT(X86_FEATURE_APIC_MSRS_FENCE));
+}
+
 #endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index 1d081ac1cd69..5d0dbab85264 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -1,18 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Definitions for Device tree / OpenFirmware handling on X86
  *
  * based on arch/powerpc/include/asm/prom.h which is
  *         Copyright (C) 1996-2005 Paul Mackerras.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #ifndef _ASM_X86_PROM_H
 #define _ASM_X86_PROM_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/of.h>
 #include <linux/types.h>
@@ -27,15 +23,15 @@ extern int of_ioapic;
 extern u64 initial_dtb;
 extern void add_dtb(u64 data);
 void x86_of_pci_init(void);
-void x86_dtb_init(void);
+void x86_flattree_get_config(void);
 #else
 static inline void add_dtb(u64 data) { }
 static inline void x86_of_pci_init(void) { }
-static inline void x86_dtb_init(void) { }
+static inline void x86_flattree_get_config(void) { }
 #define of_ioapic 0
 #endif
 
 extern char cmd_line[COMMAND_LINE_SIZE];
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #endif
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 6e81788a30c1..05224a695872 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -4,13 +4,17 @@
 
 #include <asm/ldt.h>
 
+struct task_struct;
+
 /* misc architecture specific prototypes */
 
 void syscall_init(void);
 
 #ifdef CONFIG_X86_64
 void entry_SYSCALL_64(void);
-long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
+void entry_SYSCALL_64_safe_stack(void);
+void entry_SYSRETQ_unsafe_stack(void);
+void entry_SYSRETQ_end(void);
 #endif
 
 #ifdef CONFIG_X86_32
@@ -24,18 +28,18 @@ void __end_SYSENTER_singlestep_region(void);
 void entry_SYSENTER_compat(void);
 void __end_entry_SYSENTER_compat(void);
 void entry_SYSCALL_compat(void);
-void entry_INT80_compat(void);
-#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
-void xen_entry_INT80_compat(void);
-#endif
+void entry_SYSCALL_compat_safe_stack(void);
+void entry_SYSRETL_compat_unsafe_stack(void);
+void entry_SYSRETL_compat_end(void);
+#else /* !CONFIG_IA32_EMULATION */
+#define entry_SYSCALL_compat NULL
+#define entry_SYSENTER_compat NULL
 #endif
 
 void x86_configure_nx(void);
-void x86_report_nx(void);
 
 extern int reboot_force;
 
-long do_arch_prctl_common(struct task_struct *task, int option,
-			  unsigned long cpuid_enabled);
+long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
 
 #endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
index 5df09a0b80b8..88d0a1ab1f77 100644
--- a/arch/x86/include/asm/pti.h
+++ b/arch/x86/include/asm/pti.h
@@ -1,9 +1,9 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_X86_PTI_H
 #define _ASM_X86_PTI_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
 extern void pti_init(void);
 extern void pti_check_boottime_disable(void);
 extern void pti_finalize(void);
@@ -11,5 +11,5 @@ extern void pti_finalize(void);
 static inline void pti_check_boottime_disable(void) { }
 #endif
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #endif /* _ASM_X86_PTI_H */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 8a7fc0cca2d1..35d062a2e304 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -6,7 +6,7 @@
 #include <asm/page_types.h>
 #include <uapi/asm/ptrace.h>
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #ifdef __i386__
 
 struct pt_regs {
@@ -37,7 +37,10 @@ struct pt_regs {
 	unsigned short __esh;
 	unsigned short fs;
 	unsigned short __fsh;
-	/* On interrupt, gs and __gsh store the vector number. */
+	/*
+	 * On interrupt, gs and __gsh store the vector number.  They never
+	 * store gs any more.
+	 */
 	unsigned short gs;
 	unsigned short __gsh;
 	/* On interrupt, this is the error code. */
@@ -53,18 +56,64 @@ struct pt_regs {
 
 #else /* __i386__ */
 
+struct fred_cs {
+		/* CS selector */
+	u64	cs	: 16,
+		/* Stack level at event time */
+		sl	:  2,
+		/* IBT in WAIT_FOR_ENDBRANCH state */
+		wfe	:  1,
+			: 45;
+};
+
+struct fred_ss {
+		/* SS selector */
+	u64	ss	: 16,
+		/* STI state */
+		sti	:  1,
+		/* Set if syscall, sysenter or INT n */
+		swevent	:  1,
+		/* Event is NMI type */
+		nmi	:  1,
+			: 13,
+		/* Event vector */
+		vector	:  8,
+			:  8,
+		/* Event type */
+		type	:  4,
+			:  4,
+		/* Event was incident to enclave execution */
+		enclave	:  1,
+		/* CPU was in 64-bit mode */
+		l	:  1,
+		/*
+		 * Nested exception during FRED delivery, not set
+		 * for #DF.
+		 */
+		nested	:  1,
+			:  1,
+		/*
+		 * The length of the instruction causing the event.
+		 * Only set for INTO, INT1, INT3, INT n, SYSCALL
+		 * and SYSENTER.  0 otherwise.
+		 */
+		insnlen	:  4;
+};
+
 struct pt_regs {
-/*
- * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
- * unless syscall needs a complete, fully filled "struct pt_regs".
- */
+	/*
+	 * C ABI says these regs are callee-preserved. They aren't saved on
+	 * kernel entry unless syscall needs a complete, fully filled
+	 * "struct pt_regs".
+	 */
 	unsigned long r15;
 	unsigned long r14;
 	unsigned long r13;
 	unsigned long r12;
 	unsigned long bp;
 	unsigned long bx;
-/* These regs are callee-clobbered. Always saved on kernel entry. */
+
+	/* These regs are callee-clobbered. Always saved on kernel entry. */
 	unsigned long r11;
 	unsigned long r10;
 	unsigned long r9;
@@ -74,18 +123,50 @@ struct pt_regs {
 	unsigned long dx;
 	unsigned long si;
 	unsigned long di;
-/*
- * On syscall entry, this is syscall#. On CPU exception, this is error code.
- * On hw interrupt, it's IRQ number:
- */
+
+	/*
+	 * orig_ax is used on entry for:
+	 * - the syscall number (syscall, sysenter, int80)
+	 * - error_code stored by the CPU on traps and exceptions
+	 * - the interrupt number for device interrupts
+	 *
+	 * A FRED stack frame starts here:
+	 *   1) It _always_ includes an error code;
+	 *
+	 *   2) The return frame for ERET[US] starts here, but
+	 *      the content of orig_ax is ignored.
+	 */
 	unsigned long orig_ax;
-/* Return frame for iretq */
+
+	/* The IRETQ return frame starts here */
 	unsigned long ip;
-	unsigned long cs;
+
+	union {
+		/* CS selector */
+		u16		cs;
+		/* The extended 64-bit data slot containing CS */
+		u64		csx;
+		/* The FRED CS extension */
+		struct fred_cs	fred_cs;
+	};
+
 	unsigned long flags;
 	unsigned long sp;
-	unsigned long ss;
-/* top of stack page */
+
+	union {
+		/* SS selector */
+		u16		ss;
+		/* The extended 64-bit data slot containing SS */
+		u64		ssx;
+		/* The FRED SS extension */
+		struct fred_ss	fred_ss;
+	};
+
+	/*
+	 * Top of stack on IDT systems, while FRED systems have extra fields
+	 * defined above for storing exception related information, e.g. CR2 or
+	 * DR6.
+	 */
 };
 
 #endif /* !__i386__ */
@@ -94,24 +175,24 @@ struct pt_regs {
 #include <asm/paravirt_types.h>
 #endif
 
+#include <asm/proto.h>
+
 struct cpuinfo_x86;
 struct task_struct;
 
 extern unsigned long profile_pc(struct pt_regs *regs);
-#define profile_pc profile_pc
 
 extern unsigned long
 convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
-extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
-			 int error_code, int si_code);
+extern void send_sigtrap(struct pt_regs *regs, int error_code, int si_code);
 
 
-static inline unsigned long regs_return_value(struct pt_regs *regs)
+static __always_inline unsigned long regs_return_value(struct pt_regs *regs)
 {
 	return regs->ax;
 }
 
-static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+static __always_inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
 {
 	regs->ax = rc;
 }
@@ -125,7 +206,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
  * On x86_64, vm86 mode is mercifully nonexistent, and we don't need
  * the extra check.
  */
-static inline int user_mode(struct pt_regs *regs)
+static __always_inline int user_mode(struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_32
 	return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= USER_RPL;
@@ -134,7 +215,7 @@ static inline int user_mode(struct pt_regs *regs)
 #endif
 }
 
-static inline int v8086_mode(struct pt_regs *regs)
+static __always_inline int v8086_mode(struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_32
 	return (regs->flags & X86_VM_MASK);
@@ -161,25 +242,77 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
 #endif
 }
 
+/*
+ * Determine whether the register set came from any context that is running in
+ * 64-bit mode.
+ */
+static inline bool any_64bit_mode(struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_64
+	return !user_mode(regs) || user_64bit_mode(regs);
+#else
+	return false;
+#endif
+}
+
 #ifdef CONFIG_X86_64
 #define current_user_stack_pointer()	current_pt_regs()->sp
 #define compat_user_stack_pointer()	current_pt_regs()->sp
+
+static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs)
+{
+	bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
+		    regs->ip <  (unsigned long)entry_SYSCALL_64_safe_stack);
+
+	ret = ret || (regs->ip >= (unsigned long)entry_SYSRETQ_unsafe_stack &&
+		      regs->ip <  (unsigned long)entry_SYSRETQ_end);
+#ifdef CONFIG_IA32_EMULATION
+	ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat &&
+		      regs->ip <  (unsigned long)entry_SYSCALL_compat_safe_stack);
+	ret = ret || (regs->ip >= (unsigned long)entry_SYSRETL_compat_unsafe_stack &&
+		      regs->ip <  (unsigned long)entry_SYSRETL_compat_end);
 #endif
 
-#ifdef CONFIG_X86_32
-extern unsigned long kernel_stack_pointer(struct pt_regs *regs);
-#else
-static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+	return ret;
+}
+#endif
+
+static __always_inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
 {
 	return regs->sp;
 }
-#endif
 
-#define GET_IP(regs) ((regs)->ip)
-#define GET_FP(regs) ((regs)->bp)
-#define GET_USP(regs) ((regs)->sp)
+static __always_inline unsigned long instruction_pointer(struct pt_regs *regs)
+{
+	return regs->ip;
+}
 
-#include <asm-generic/ptrace.h>
+static __always_inline
+void instruction_pointer_set(struct pt_regs *regs, unsigned long val)
+{
+	regs->ip = val;
+}
+
+static __always_inline unsigned long frame_pointer(struct pt_regs *regs)
+{
+	return regs->bp;
+}
+
+static __always_inline unsigned long user_stack_pointer(struct pt_regs *regs)
+{
+	return regs->sp;
+}
+
+static __always_inline
+void user_stack_pointer_set(struct pt_regs *regs, unsigned long val)
+{
+	regs->sp = val;
+}
+
+static __always_inline bool regs_irqs_disabled(struct pt_regs *regs)
+{
+	return !(regs->flags & X86_EFLAGS_IF);
+}
 
 /* Query offset/name of register from its name/offset */
 extern int regs_query_register_offset(const char *name);
@@ -201,14 +334,6 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
 	if (unlikely(offset > MAX_REG_OFFSET))
 		return 0;
 #ifdef CONFIG_X86_32
-	/*
-	 * Traps from the kernel do not save sp and ss.
-	 * Use the helper function to retrieve sp.
-	 */
-	if (offset == offsetof(struct pt_regs, sp) &&
-	    regs->cs == __KERNEL_CS)
-		return kernel_stack_pointer(regs);
-
 	/* The selector fields are 16-bit. */
 	if (offset == offsetof(struct pt_regs, cs) ||
 	    offset == offsetof(struct pt_regs, ss) ||
@@ -234,8 +359,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
 static inline int regs_within_kernel_stack(struct pt_regs *regs,
 					   unsigned long addr)
 {
-	return ((addr & ~(THREAD_SIZE - 1))  ==
-		(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
+	return ((addr & ~(THREAD_SIZE - 1)) == (regs->sp & ~(THREAD_SIZE - 1)));
 }
 
 /**
@@ -249,7 +373,7 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs,
  */
 static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n)
 {
-	unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+	unsigned long *addr = (unsigned long *)regs->sp;
 
 	addr += n;
 	if (regs_within_kernel_stack(regs, (unsigned long)addr))
@@ -259,7 +383,7 @@ static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs
 }
 
 /* To avoid include hell, we can't include uaccess.h */
-extern long probe_kernel_read(void *dst, const void *src, size_t size);
+extern long copy_from_kernel_nofault(void *dst, const void *src, size_t size);
 
 /**
  * regs_get_kernel_stack_nth() - get Nth entry of the stack
@@ -279,7 +403,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
 
 	addr = regs_get_kernel_stack_nth_addr(regs, n);
 	if (addr) {
-		ret = probe_kernel_read(&val, addr, sizeof(val));
+		ret = copy_from_kernel_nofault(&val, addr, sizeof(val));
 		if (!ret)
 			return val;
 	}
@@ -303,8 +427,8 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
 	static const unsigned int argument_offs[] = {
 #ifdef __i386__
 		offsetof(struct pt_regs, ax),
-		offsetof(struct pt_regs, cx),
 		offsetof(struct pt_regs, dx),
+		offsetof(struct pt_regs, cx),
 #define NR_REG_ARGUMENTS 3
 #else
 		offsetof(struct pt_regs, di),
@@ -333,27 +457,17 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
 
 #define ARCH_HAS_USER_SINGLE_STEP_REPORT
 
-/*
- * When hitting ptrace_stop(), we cannot return using SYSRET because
- * that does not restore the full CPU state, only a minimal set.  The
- * ptracer can change arbitrary register values, which is usually okay
- * because the usual ptrace stops run off the signal delivery path which
- * forces IRET; however, ptrace_event() stops happen in arbitrary places
- * in the kernel and don't force IRET path.
- *
- * So force IRET path after a ptrace stop.
- */
-#define arch_ptrace_stop_needed(code, info)				\
-({									\
-	force_iret();							\
-	false;								\
-})
-
 struct user_desc;
 extern int do_get_thread_area(struct task_struct *p, int idx,
 			      struct user_desc __user *info);
 extern int do_set_thread_area(struct task_struct *p, int idx,
 			      struct user_desc __user *info, int can_allocate);
 
-#endif /* !__ASSEMBLY__ */
+#ifdef CONFIG_X86_64
+# define do_set_thread_area_64(p, s, t)	do_arch_prctl_64(p, s, t)
+#else
+# define do_set_thread_area_64(p, s, t)	(0)
+#endif
+
+#endif /* !__ASSEMBLER__ */
 #endif /* _ASM_X86_PTRACE_H */
diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h
index 92c34e517da1..2fee5e9f1ccc 100644
--- a/arch/x86/include/asm/purgatory.h
+++ b/arch/x86/include/asm/purgatory.h
@@ -2,20 +2,10 @@
 #ifndef _ASM_X86_PURGATORY_H
 #define _ASM_X86_PURGATORY_H
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #include <linux/purgatory.h>
 
 extern void purgatory(void);
-/*
- * These forward declarations serve two purposes:
- *
- * 1) Make sparse happy when checking arch/purgatory
- * 2) Document that these are required to be global so the symbol
- *    lookup in kexec works
- */
-extern unsigned long purgatory_backup_dest;
-extern unsigned long purgatory_backup_src;
-extern unsigned long purgatory_backup_sz;
-#endif	/* __ASSEMBLY__ */
+#endif	/* __ASSEMBLER__ */
 
 #endif /* _ASM_PURGATORY_H */
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h
index 1436226efe3e..b9fece5fc96d 100644
--- a/arch/x86/include/asm/pvclock-abi.h
+++ b/arch/x86/include/asm/pvclock-abi.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_X86_PVCLOCK_ABI_H
 #define _ASM_X86_PVCLOCK_ABI_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 /*
  * These structs MUST NOT be changed.
@@ -44,5 +44,5 @@ struct pvclock_wall_clock {
 #define PVCLOCK_GUEST_STOPPED	(1 << 1)
 /* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */
 #define PVCLOCK_COUNTS_FROM_ZERO (1 << 2)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #endif /* _ASM_X86_PVCLOCK_ABI_H */
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index b6033680d458..6e4f8fae3ce9 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -2,11 +2,13 @@
 #ifndef _ASM_X86_PVCLOCK_H
 #define _ASM_X86_PVCLOCK_H
 
-#include <linux/clocksource.h>
+#include <asm/clocksource.h>
 #include <asm/pvclock-abi.h>
 
+struct timespec64;
 /* some helper functions for xen and kvm pv clock sources */
 u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
+u64 pvclock_clocksource_read_nowd(struct pvclock_vcpu_time_info *src);
 u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src);
 void pvclock_set_flags(u8 flags);
 unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
@@ -39,7 +41,7 @@ bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src,
  * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
  * yielding a 64-bit result.
  */
-static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
+static __always_inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
 {
 	u64 product;
 #ifdef __i386__
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index bd5ac6cc37db..68da67df304d 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -32,6 +32,7 @@ extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
 extern void __pv_init_lock_hash(void);
 extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
 extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
+extern bool nopvspin;
 
 #define	queued_spin_unlock queued_spin_unlock
 /**
@@ -52,6 +53,7 @@ static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 
 static inline void queued_spin_unlock(struct qspinlock *lock)
 {
+	kcsan_release();
 	pv_queued_spin_unlock(lock);
 }
 
@@ -63,13 +65,30 @@ static inline bool vcpu_is_preempted(long cpu)
 #endif
 
 #ifdef CONFIG_PARAVIRT
-DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key);
-
-void native_pv_lock_init(void) __init;
+/*
+ * virt_spin_lock_key - disables by default the virt_spin_lock() hijack.
+ *
+ * Native (and PV wanting native due to vCPU pinning) should keep this key
+ * disabled. Native does not touch the key.
+ *
+ * When in a guest then native_pv_lock_init() enables the key first and
+ * KVM/XEN might conditionally disable it later in the boot process again.
+ */
+DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key);
 
+/*
+ * Shortcut for the queued_spin_lock_slowpath() function that allows
+ * virt to hijack it.
+ *
+ * Returns:
+ *   true - lock has been negotiated, all done;
+ *   false - queued_spin_lock_slowpath() will do its thing.
+ */
 #define virt_spin_lock virt_spin_lock
 static inline bool virt_spin_lock(struct qspinlock *lock)
 {
+	int val;
+
 	if (!static_branch_likely(&virt_spin_lock_key))
 		return false;
 
@@ -79,17 +98,17 @@ static inline bool virt_spin_lock(struct qspinlock *lock)
 	 * horrible lock 'holder' preemption issues.
 	 */
 
-	do {
-		while (atomic_read(&lock->val) != 0)
-			cpu_relax();
-	} while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0);
+ __retry:
+	val = atomic_read(&lock->val);
+
+	if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) {
+		cpu_relax();
+		goto __retry;
+	}
 
 	return true;
 }
-#else
-static inline void native_pv_lock_init(void)
-{
-}
+
 #endif /* CONFIG_PARAVIRT */
 
 #include <asm-generic/qspinlock.h>
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
index 159622ee0674..0a985784be9b 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -2,6 +2,10 @@
 #ifndef __ASM_QSPINLOCK_PARAVIRT_H
 #define __ASM_QSPINLOCK_PARAVIRT_H
 
+#include <asm/ibt.h>
+
+void __lockfunc __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked);
+
 /*
  * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit
  * registers. For i386, however, only 1 32-bit register needs to be saved
@@ -10,21 +14,20 @@
  */
 #ifdef CONFIG_64BIT
 
-PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
+__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
 #define __pv_queued_spin_unlock	__pv_queued_spin_unlock
-#define PV_UNLOCK		"__raw_callee_save___pv_queued_spin_unlock"
-#define PV_UNLOCK_SLOWPATH	"__raw_callee_save___pv_queued_spin_unlock_slowpath"
 
 /*
  * Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock
  * which combines the registers saving trunk and the body of the following
- * C code:
+ * C code.  Note that it puts the code in the .spinlock.text section which
+ * is equivalent to adding __lockfunc in the C code:
  *
- * void __pv_queued_spin_unlock(struct qspinlock *lock)
+ * void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock)
  * {
- *	u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0);
+ *	u8 lockval = _Q_LOCKED_VAL;
  *
- *	if (likely(lockval == _Q_LOCKED_VAL))
+ *	if (try_cmpxchg(&lock->locked, &lockval, 0))
  *		return;
  *	pv_queued_spin_unlock_slowpath(lock, lockval);
  * }
@@ -34,36 +37,31 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
  *   rsi = lockval           (second argument)
  *   rdx = internal variable (set to 0)
  */
-asm    (".pushsection .text;"
-	".globl " PV_UNLOCK ";"
-	".type " PV_UNLOCK ", @function;"
-	".align 4,0x90;"
-	PV_UNLOCK ": "
-	FRAME_BEGIN
-	"push  %rdx;"
-	"mov   $0x1,%eax;"
-	"xor   %edx,%edx;"
-	LOCK_PREFIX "cmpxchg %dl,(%rdi);"
-	"cmp   $0x1,%al;"
-	"jne   .slowpath;"
-	"pop   %rdx;"
+#define PV_UNLOCK_ASM							\
+	FRAME_BEGIN							\
+	"push  %rdx\n\t"						\
+	"mov   $" __stringify(_Q_LOCKED_VAL) ",%eax\n\t"		\
+	"xor   %edx,%edx\n\t"						\
+	LOCK_PREFIX "cmpxchg %dl,(%rdi)\n\t"				\
+	"jne   .slowpath\n\t"						\
+	"pop   %rdx\n\t"						\
+	FRAME_END							\
+	ASM_RET								\
+	".slowpath:\n\t"						\
+	"push   %rsi\n\t"						\
+	"movzbl %al,%esi\n\t"						\
+	"call __raw_callee_save___pv_queued_spin_unlock_slowpath\n\t"	\
+	"pop    %rsi\n\t"						\
+	"pop    %rdx\n\t"						\
 	FRAME_END
-	"ret;"
-	".slowpath: "
-	"push   %rsi;"
-	"movzbl %al,%esi;"
-	"call " PV_UNLOCK_SLOWPATH ";"
-	"pop    %rsi;"
-	"pop    %rdx;"
-	FRAME_END
-	"ret;"
-	".size " PV_UNLOCK ", .-" PV_UNLOCK ";"
-	".popsection");
+
+DEFINE_ASM_FUNC(__raw_callee_save___pv_queued_spin_unlock,
+		PV_UNLOCK_ASM, .spinlock.text);
 
 #else /* CONFIG_64BIT */
 
-extern void __pv_queued_spin_unlock(struct qspinlock *lock);
-PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);
+extern void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock);
+__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock, ".spinlock.text");
 
 #endif /* CONFIG_64BIT */
 #endif
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index 63b3393bd98e..e406a1e92c63 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -9,20 +9,23 @@
 #define TH_FLAGS_SME_ACTIVE_BIT		0
 #define TH_FLAGS_SME_ACTIVE		BIT(TH_FLAGS_SME_ACTIVE_BIT)
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/types.h>
 #include <asm/io.h>
 
-/* This must match data at realmode.S */
+/* This must match data at realmode/rm/header.S */
 struct real_mode_header {
 	u32	text_start;
 	u32	ro_end;
 	/* SMP trampoline */
 	u32	trampoline_start;
-	u32	trampoline_status;
 	u32	trampoline_header;
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+	u32	sev_es_trampoline_start;
+#endif
 #ifdef CONFIG_X86_64
+	u32	trampoline_start64;
 	u32	trampoline_pgd;
 #endif
 	/* ACPI S3 wakeup */
@@ -37,7 +40,7 @@ struct real_mode_header {
 #endif
 };
 
-/* This must match data at trampoline_32/64.S */
+/* This must match data at realmode/rm/trampoline_{32,64}.S */
 struct trampoline_header {
 #ifdef CONFIG_X86_32
 	u32 start;
@@ -49,6 +52,7 @@ struct trampoline_header {
 	u64 efer;
 	u32 cr4;
 	u32 flags;
+	u32 lock;
 #endif
 };
 
@@ -56,8 +60,12 @@ extern struct real_mode_header *real_mode_header;
 extern unsigned char real_mode_blob_end[];
 
 extern unsigned long initial_code;
-extern unsigned long initial_gs;
 extern unsigned long initial_stack;
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+extern unsigned long initial_vc_handler;
+#endif
+
+extern u32 *trampoline_lock;
 
 extern unsigned char real_mode_blob[];
 extern unsigned char real_mode_relocs[];
@@ -67,9 +75,10 @@ extern unsigned char startup_32_smp[];
 extern unsigned char boot_gdt[];
 #else
 extern unsigned char secondary_startup_64[];
+extern unsigned char secondary_startup_64_no_verify[];
 #endif
 
-static inline size_t real_mode_size_needed(void)
+static __always_inline size_t real_mode_size_needed(void)
 {
 	if (real_mode_header)
 		return 0;	/* already allocated. */
@@ -77,9 +86,15 @@ static inline size_t real_mode_size_needed(void)
 	return ALIGN(real_mode_blob_end - real_mode_blob, PAGE_SIZE);
 }
 
-void set_real_mode_mem(phys_addr_t mem, size_t size);
+static inline void set_real_mode_mem(phys_addr_t mem)
+{
+	real_mode_header = (struct real_mode_header *) __va(mem);
+}
+
 void reserve_real_mode(void);
+void load_trampoline_pgtable(void);
+void init_real_mode(void);
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* _ARCH_X86_REALMODE_H */
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 04c17be9b5fd..ecd58ea9a837 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -25,8 +25,18 @@ void __noreturn machine_real_restart(unsigned int type);
 #define MRR_BIOS	0
 #define MRR_APM		1
 
+typedef void (cpu_emergency_virt_cb)(void);
+#if IS_ENABLED(CONFIG_KVM_X86)
+void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback);
+void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback);
+void cpu_emergency_disable_virtualization(void);
+#else
+static inline void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) {}
+static inline void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) {}
+static inline void cpu_emergency_disable_virtualization(void) {}
+#endif /* CONFIG_KVM_X86 */
+
 typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
-void nmi_panic_self_stop(struct pt_regs *regs);
 void nmi_shootdown_cpus(nmi_shootdown_cb callback);
 void run_crash_ipi_callback(struct pt_regs *regs);
 
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
deleted file mode 100644
index dbaed55c1c24..000000000000
--- a/arch/x86/include/asm/refcount.h
+++ /dev/null
@@ -1,112 +0,0 @@
-#ifndef __ASM_X86_REFCOUNT_H
-#define __ASM_X86_REFCOUNT_H
-/*
- * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from
- * PaX/grsecurity.
- */
-#include <linux/refcount.h>
-#include <asm/bug.h>
-
-/*
- * This is the first portion of the refcount error handling, which lives in
- * .text.unlikely, and is jumped to from the CPU flag check (in the
- * following macros). This saves the refcount value location into CX for
- * the exception handler to use (in mm/extable.c), and then triggers the
- * central refcount exception. The fixup address for the exception points
- * back to the regular execution flow in .text.
- */
-#define _REFCOUNT_EXCEPTION				\
-	".pushsection .text..refcount\n"		\
-	"111:\tlea %[var], %%" _ASM_CX "\n"		\
-	"112:\t" ASM_UD2 "\n"				\
-	ASM_UNREACHABLE					\
-	".popsection\n"					\
-	"113:\n"					\
-	_ASM_EXTABLE_REFCOUNT(112b, 113b)
-
-/* Trigger refcount exception if refcount result is negative. */
-#define REFCOUNT_CHECK_LT_ZERO				\
-	"js 111f\n\t"					\
-	_REFCOUNT_EXCEPTION
-
-/* Trigger refcount exception if refcount result is zero or negative. */
-#define REFCOUNT_CHECK_LE_ZERO				\
-	"jz 111f\n\t"					\
-	REFCOUNT_CHECK_LT_ZERO
-
-/* Trigger refcount exception unconditionally. */
-#define REFCOUNT_ERROR					\
-	"jmp 111f\n\t"					\
-	_REFCOUNT_EXCEPTION
-
-static __always_inline void refcount_add(unsigned int i, refcount_t *r)
-{
-	asm volatile(LOCK_PREFIX "addl %1,%0\n\t"
-		REFCOUNT_CHECK_LT_ZERO
-		: [var] "+m" (r->refs.counter)
-		: "ir" (i)
-		: "cc", "cx");
-}
-
-static __always_inline void refcount_inc(refcount_t *r)
-{
-	asm volatile(LOCK_PREFIX "incl %0\n\t"
-		REFCOUNT_CHECK_LT_ZERO
-		: [var] "+m" (r->refs.counter)
-		: : "cc", "cx");
-}
-
-static __always_inline void refcount_dec(refcount_t *r)
-{
-	asm volatile(LOCK_PREFIX "decl %0\n\t"
-		REFCOUNT_CHECK_LE_ZERO
-		: [var] "+m" (r->refs.counter)
-		: : "cc", "cx");
-}
-
-static __always_inline __must_check
-bool refcount_sub_and_test(unsigned int i, refcount_t *r)
-{
-	return GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl",
-					 REFCOUNT_CHECK_LT_ZERO,
-					 r->refs.counter, e, "er", i, "cx");
-}
-
-static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
-{
-	return GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl",
-					REFCOUNT_CHECK_LT_ZERO,
-					r->refs.counter, e, "cx");
-}
-
-static __always_inline __must_check
-bool refcount_add_not_zero(unsigned int i, refcount_t *r)
-{
-	int c, result;
-
-	c = atomic_read(&(r->refs));
-	do {
-		if (unlikely(c == 0))
-			return false;
-
-		result = c + i;
-
-		/* Did we try to increment from/to an undesirable state? */
-		if (unlikely(c < 0 || c == INT_MAX || result < c)) {
-			asm volatile(REFCOUNT_ERROR
-				     : : [var] "m" (r->refs.counter)
-				     : "cc", "cx");
-			break;
-		}
-
-	} while (!atomic_try_cmpxchg(&(r->refs), &c, result));
-
-	return c != 0;
-}
-
-static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r)
-{
-	return refcount_add_not_zero(1, r);
-}
-
-#endif
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
deleted file mode 100644
index 6847d85400a8..000000000000
--- a/arch/x86/include/asm/required-features.h
+++ /dev/null
@@ -1,106 +0,0 @@
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#define _ASM_X86_REQUIRED_FEATURES_H
-
-/* Define minimum CPUID feature set for kernel These bits are checked
-   really early to actually display a visible error message before the
-   kernel dies.  Make sure to assign features to the proper mask!
-
-   Some requirements that are not in CPUID yet are also in the
-   CONFIG_X86_MINIMUM_CPU_FAMILY which is checked too.
-
-   The real information is in arch/x86/Kconfig.cpu, this just converts
-   the CONFIGs into a bitmask */
-
-#ifndef CONFIG_MATH_EMULATION
-# define NEED_FPU	(1<<(X86_FEATURE_FPU & 31))
-#else
-# define NEED_FPU	0
-#endif
-
-#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
-# define NEED_PAE	(1<<(X86_FEATURE_PAE & 31))
-#else
-# define NEED_PAE	0
-#endif
-
-#ifdef CONFIG_X86_CMPXCHG64
-# define NEED_CX8	(1<<(X86_FEATURE_CX8 & 31))
-#else
-# define NEED_CX8	0
-#endif
-
-#if defined(CONFIG_X86_CMOV) || defined(CONFIG_X86_64)
-# define NEED_CMOV	(1<<(X86_FEATURE_CMOV & 31))
-#else
-# define NEED_CMOV	0
-#endif
-
-#ifdef CONFIG_X86_USE_3DNOW
-# define NEED_3DNOW	(1<<(X86_FEATURE_3DNOW & 31))
-#else
-# define NEED_3DNOW	0
-#endif
-
-#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64)
-# define NEED_NOPL	(1<<(X86_FEATURE_NOPL & 31))
-#else
-# define NEED_NOPL	0
-#endif
-
-#ifdef CONFIG_MATOM
-# define NEED_MOVBE	(1<<(X86_FEATURE_MOVBE & 31))
-#else
-# define NEED_MOVBE	0
-#endif
-
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_PARAVIRT
-/* Paravirtualized systems may not have PSE or PGE available */
-#define NEED_PSE	0
-#define NEED_PGE	0
-#else
-#define NEED_PSE	(1<<(X86_FEATURE_PSE) & 31)
-#define NEED_PGE	(1<<(X86_FEATURE_PGE) & 31)
-#endif
-#define NEED_MSR	(1<<(X86_FEATURE_MSR & 31))
-#define NEED_FXSR	(1<<(X86_FEATURE_FXSR & 31))
-#define NEED_XMM	(1<<(X86_FEATURE_XMM & 31))
-#define NEED_XMM2	(1<<(X86_FEATURE_XMM2 & 31))
-#define NEED_LM		(1<<(X86_FEATURE_LM & 31))
-#else
-#define NEED_PSE	0
-#define NEED_MSR	0
-#define NEED_PGE	0
-#define NEED_FXSR	0
-#define NEED_XMM	0
-#define NEED_XMM2	0
-#define NEED_LM		0
-#endif
-
-#define REQUIRED_MASK0	(NEED_FPU|NEED_PSE|NEED_MSR|NEED_PAE|\
-			 NEED_CX8|NEED_PGE|NEED_FXSR|NEED_CMOV|\
-			 NEED_XMM|NEED_XMM2)
-#define SSE_MASK	(NEED_XMM|NEED_XMM2)
-
-#define REQUIRED_MASK1	(NEED_LM|NEED_3DNOW)
-
-#define REQUIRED_MASK2	0
-#define REQUIRED_MASK3	(NEED_NOPL)
-#define REQUIRED_MASK4	(NEED_MOVBE)
-#define REQUIRED_MASK5	0
-#define REQUIRED_MASK6	0
-#define REQUIRED_MASK7	0
-#define REQUIRED_MASK8	0
-#define REQUIRED_MASK9	0
-#define REQUIRED_MASK10	0
-#define REQUIRED_MASK11	0
-#define REQUIRED_MASK12	0
-#define REQUIRED_MASK13	0
-#define REQUIRED_MASK14	0
-#define REQUIRED_MASK15	0
-#define REQUIRED_MASK16	0
-#define REQUIRED_MASK17	0
-#define REQUIRED_MASK18	0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
-
-#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
new file mode 100644
index 000000000000..575f8408a9e7
--- /dev/null
+++ b/arch/x86/include/asm/resctrl.h
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_RESCTRL_H
+#define _ASM_X86_RESCTRL_H
+
+#ifdef CONFIG_X86_CPU_RESCTRL
+
+#include <linux/jump_label.h>
+#include <linux/percpu.h>
+#include <linux/resctrl_types.h>
+#include <linux/sched.h>
+
+#include <asm/msr.h>
+
+/*
+ * This value can never be a valid CLOSID, and is used when mapping a
+ * (closid, rmid) pair to an index and back. On x86 only the RMID is
+ * needed. The index is a software defined value.
+ */
+#define X86_RESCTRL_EMPTY_CLOSID         ((u32)~0)
+
+/**
+ * struct resctrl_pqr_state - State cache for the PQR MSR
+ * @cur_rmid:		The cached Resource Monitoring ID
+ * @cur_closid:	The cached Class Of Service ID
+ * @default_rmid:	The user assigned Resource Monitoring ID
+ * @default_closid:	The user assigned cached Class Of Service ID
+ *
+ * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
+ * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
+ * contains both parts, so we need to cache them. This also
+ * stores the user configured per cpu CLOSID and RMID.
+ *
+ * The cache also helps to avoid pointless updates if the value does
+ * not change.
+ */
+struct resctrl_pqr_state {
+	u32			cur_rmid;
+	u32			cur_closid;
+	u32			default_rmid;
+	u32			default_closid;
+};
+
+DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state);
+
+extern bool rdt_alloc_capable;
+extern bool rdt_mon_capable;
+
+DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
+DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
+DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
+
+static inline bool resctrl_arch_alloc_capable(void)
+{
+	return rdt_alloc_capable;
+}
+
+static inline void resctrl_arch_enable_alloc(void)
+{
+	static_branch_enable_cpuslocked(&rdt_alloc_enable_key);
+	static_branch_inc_cpuslocked(&rdt_enable_key);
+}
+
+static inline void resctrl_arch_disable_alloc(void)
+{
+	static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
+	static_branch_dec_cpuslocked(&rdt_enable_key);
+}
+
+static inline bool resctrl_arch_mon_capable(void)
+{
+	return rdt_mon_capable;
+}
+
+static inline void resctrl_arch_enable_mon(void)
+{
+	static_branch_enable_cpuslocked(&rdt_mon_enable_key);
+	static_branch_inc_cpuslocked(&rdt_enable_key);
+}
+
+static inline void resctrl_arch_disable_mon(void)
+{
+	static_branch_disable_cpuslocked(&rdt_mon_enable_key);
+	static_branch_dec_cpuslocked(&rdt_enable_key);
+}
+
+/*
+ * __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR
+ *
+ * Following considerations are made so that this has minimal impact
+ * on scheduler hot path:
+ * - This will stay as no-op unless we are running on an Intel SKU
+ *   which supports resource control or monitoring and we enable by
+ *   mounting the resctrl file system.
+ * - Caches the per cpu CLOSid/RMID values and does the MSR write only
+ *   when a task with a different CLOSid/RMID is scheduled in.
+ * - We allocate RMIDs/CLOSids globally in order to keep this as
+ *   simple as possible.
+ * Must be called with preemption disabled.
+ */
+static inline void __resctrl_sched_in(struct task_struct *tsk)
+{
+	struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
+	u32 closid = READ_ONCE(state->default_closid);
+	u32 rmid = READ_ONCE(state->default_rmid);
+	u32 tmp;
+
+	/*
+	 * If this task has a closid/rmid assigned, use it.
+	 * Else use the closid/rmid assigned to this cpu.
+	 */
+	if (static_branch_likely(&rdt_alloc_enable_key)) {
+		tmp = READ_ONCE(tsk->closid);
+		if (tmp)
+			closid = tmp;
+	}
+
+	if (static_branch_likely(&rdt_mon_enable_key)) {
+		tmp = READ_ONCE(tsk->rmid);
+		if (tmp)
+			rmid = tmp;
+	}
+
+	if (closid != state->cur_closid || rmid != state->cur_rmid) {
+		state->cur_closid = closid;
+		state->cur_rmid = rmid;
+		wrmsr(MSR_IA32_PQR_ASSOC, rmid, closid);
+	}
+}
+
+static inline unsigned int resctrl_arch_round_mon_val(unsigned int val)
+{
+	unsigned int scale = boot_cpu_data.x86_cache_occ_scale;
+
+	/* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */
+	val /= scale;
+	return val * scale;
+}
+
+static inline void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid,
+							    u32 rmid)
+{
+	WRITE_ONCE(per_cpu(pqr_state.default_closid, cpu), closid);
+	WRITE_ONCE(per_cpu(pqr_state.default_rmid, cpu), rmid);
+}
+
+static inline void resctrl_arch_set_closid_rmid(struct task_struct *tsk,
+						u32 closid, u32 rmid)
+{
+	WRITE_ONCE(tsk->closid, closid);
+	WRITE_ONCE(tsk->rmid, rmid);
+}
+
+static inline bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid)
+{
+	return READ_ONCE(tsk->closid) == closid;
+}
+
+static inline bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 ignored,
+					   u32 rmid)
+{
+	return READ_ONCE(tsk->rmid) == rmid;
+}
+
+static inline void resctrl_arch_sched_in(struct task_struct *tsk)
+{
+	if (static_branch_likely(&rdt_enable_key))
+		__resctrl_sched_in(tsk);
+}
+
+static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
+{
+	*rmid = idx;
+	*closid = X86_RESCTRL_EMPTY_CLOSID;
+}
+
+static inline u32 resctrl_arch_rmid_idx_encode(u32 ignored, u32 rmid)
+{
+	return rmid;
+}
+
+/* x86 can always read an rmid, nothing needs allocating */
+struct rdt_resource;
+static inline void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r,
+					       enum resctrl_event_id evtid)
+{
+	might_sleep();
+	return NULL;
+}
+
+static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r,
+					     enum resctrl_event_id evtid,
+					     void *ctx) { }
+
+void resctrl_cpu_detect(struct cpuinfo_x86 *c);
+
+#else
+
+static inline void resctrl_arch_sched_in(struct task_struct *tsk) {}
+static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {}
+
+#endif /* CONFIG_X86_CPU_RESCTRL */
+
+#endif /* _ASM_X86_RESCTRL_H */
diff --git a/arch/x86/include/asm/resctrl_sched.h b/arch/x86/include/asm/resctrl_sched.h
deleted file mode 100644
index 40ebddde6ac2..000000000000
--- a/arch/x86/include/asm/resctrl_sched.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_RESCTRL_SCHED_H
-#define _ASM_X86_RESCTRL_SCHED_H
-
-#ifdef CONFIG_X86_RESCTRL
-
-#include <linux/sched.h>
-#include <linux/jump_label.h>
-
-#define IA32_PQR_ASSOC	0x0c8f
-
-/**
- * struct resctrl_pqr_state - State cache for the PQR MSR
- * @cur_rmid:		The cached Resource Monitoring ID
- * @cur_closid:	The cached Class Of Service ID
- * @default_rmid:	The user assigned Resource Monitoring ID
- * @default_closid:	The user assigned cached Class Of Service ID
- *
- * The upper 32 bits of IA32_PQR_ASSOC contain closid and the
- * lower 10 bits rmid. The update to IA32_PQR_ASSOC always
- * contains both parts, so we need to cache them. This also
- * stores the user configured per cpu CLOSID and RMID.
- *
- * The cache also helps to avoid pointless updates if the value does
- * not change.
- */
-struct resctrl_pqr_state {
-	u32			cur_rmid;
-	u32			cur_closid;
-	u32			default_rmid;
-	u32			default_closid;
-};
-
-DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state);
-
-DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
-DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
-DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
-
-/*
- * __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR
- *
- * Following considerations are made so that this has minimal impact
- * on scheduler hot path:
- * - This will stay as no-op unless we are running on an Intel SKU
- *   which supports resource control or monitoring and we enable by
- *   mounting the resctrl file system.
- * - Caches the per cpu CLOSid/RMID values and does the MSR write only
- *   when a task with a different CLOSid/RMID is scheduled in.
- * - We allocate RMIDs/CLOSids globally in order to keep this as
- *   simple as possible.
- * Must be called with preemption disabled.
- */
-static void __resctrl_sched_in(void)
-{
-	struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
-	u32 closid = state->default_closid;
-	u32 rmid = state->default_rmid;
-
-	/*
-	 * If this task has a closid/rmid assigned, use it.
-	 * Else use the closid/rmid assigned to this cpu.
-	 */
-	if (static_branch_likely(&rdt_alloc_enable_key)) {
-		if (current->closid)
-			closid = current->closid;
-	}
-
-	if (static_branch_likely(&rdt_mon_enable_key)) {
-		if (current->rmid)
-			rmid = current->rmid;
-	}
-
-	if (closid != state->cur_closid || rmid != state->cur_rmid) {
-		state->cur_closid = closid;
-		state->cur_rmid = rmid;
-		wrmsr(IA32_PQR_ASSOC, rmid, closid);
-	}
-}
-
-static inline void resctrl_sched_in(void)
-{
-	if (static_branch_likely(&rdt_enable_key))
-		__resctrl_sched_in();
-}
-
-#else
-
-static inline void resctrl_sched_in(void) {}
-
-#endif /* CONFIG_X86_RESCTRL */
-
-#endif /* _ASM_X86_RESCTRL_SCHED_H */
diff --git a/arch/x86/include/asm/rio.h b/arch/x86/include/asm/rio.h
deleted file mode 100644
index 0a21986d2238..000000000000
--- a/arch/x86/include/asm/rio.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Derived from include/asm-x86/mach-summit/mach_mpparse.h
- *          and include/asm-x86/mach-default/bios_ebda.h
- *
- * Author: Laurent Vivier <Laurent.Vivier@bull.net>
- */
-
-#ifndef _ASM_X86_RIO_H
-#define _ASM_X86_RIO_H
-
-#define RIO_TABLE_VERSION	3
-
-struct rio_table_hdr {
-	u8 version;		/* Version number of this data structure  */
-	u8 num_scal_dev;	/* # of Scalability devices               */
-	u8 num_rio_dev;		/* # of RIO I/O devices                   */
-} __attribute__((packed));
-
-struct scal_detail {
-	u8 node_id;		/* Scalability Node ID                    */
-	u32 CBAR;		/* Address of 1MB register space          */
-	u8 port0node;		/* Node ID port connected to: 0xFF=None   */
-	u8 port0port;		/* Port num port connected to: 0,1,2, or  */
-				/* 0xFF=None                              */
-	u8 port1node;		/* Node ID port connected to: 0xFF = None */
-	u8 port1port;		/* Port num port connected to: 0,1,2, or  */
-				/* 0xFF=None                              */
-	u8 port2node;		/* Node ID port connected to: 0xFF = None */
-	u8 port2port;		/* Port num port connected to: 0,1,2, or  */
-				/* 0xFF=None                              */
-	u8 chassis_num;		/* 1 based Chassis number (1 = boot node) */
-} __attribute__((packed));
-
-struct rio_detail {
-	u8 node_id;		/* RIO Node ID                            */
-	u32 BBAR;		/* Address of 1MB register space          */
-	u8 type;		/* Type of device                         */
-	u8 owner_id;		/* Node ID of Hurricane that owns this    */
-				/* node                                   */
-	u8 port0node;		/* Node ID port connected to: 0xFF=None   */
-	u8 port0port;		/* Port num port connected to: 0,1,2, or  */
-				/* 0xFF=None                              */
-	u8 port1node;		/* Node ID port connected to: 0xFF=None   */
-	u8 port1port;		/* Port num port connected to: 0,1,2, or  */
-				/* 0xFF=None                              */
-	u8 first_slot;		/* Lowest slot number below this Calgary  */
-	u8 status;		/* Bit 0 = 1 : the XAPIC is used          */
-				/*       = 0 : the XAPIC is not used, ie: */
-				/*            ints fwded to another XAPIC */
-				/*           Bits1:7 Reserved             */
-	u8 WP_index;		/* instance index - lower ones have       */
-				/*     lower slot numbers/PCI bus numbers */
-	u8 chassis_num;		/* 1 based Chassis number                 */
-} __attribute__((packed));
-
-enum {
-	HURR_SCALABILTY	= 0,	/* Hurricane Scalability info */
-	HURR_RIOIB	= 2,	/* Hurricane RIOIB info       */
-	COMPAT_CALGARY	= 4,	/* Compatibility Calgary      */
-	ALT_CALGARY	= 5,	/* Second Planar Calgary      */
-};
-
-#endif /* _ASM_X86_RIO_H */
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 8a9eba191516..54c8fc430684 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -2,53 +2,26 @@
 #ifndef _ASM_X86_RMWcc
 #define _ASM_X86_RMWcc
 
-/* This counts to 12. Any more, it will return 13th argument. */
-#define __RMWcc_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
-#define RMWcc_ARGS(X...) __RMWcc_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
-
-#define __RMWcc_CONCAT(a, b) a ## b
-#define RMWcc_CONCAT(a, b) __RMWcc_CONCAT(a, b)
+#include <linux/args.h>
 
 #define __CLOBBERS_MEM(clb...)	"memory", ## clb
 
-#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CONFIG_CC_HAS_ASM_GOTO)
-
-/* Use asm goto */
-
-#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...)			\
-({									\
-	bool c = false;							\
-	asm_volatile_goto (fullop "; j" #cc " %l[cc_label]"		\
-			: : [var] "m" (_var), ## __VA_ARGS__		\
-			: clobbers : cc_label);				\
-	if (0) {							\
-cc_label:	c = true;						\
-	}								\
-	c;								\
-})
-
-#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CONFIG_CC_HAS_ASM_GOTO) */
-
-/* Use flags output or a set instruction */
-
 #define __GEN_RMWcc(fullop, _var, cc, clobbers, ...)			\
 ({									\
 	bool c;								\
-	asm volatile (fullop CC_SET(cc)					\
-			: [var] "+m" (_var), CC_OUT(cc) (c)		\
+	asm_inline volatile (fullop					\
+			: [var] "+m" (_var), "=@cc" #cc (c)		\
 			: __VA_ARGS__ : clobbers);			\
 	c;								\
 })
 
-#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CONFIG_CC_HAS_ASM_GOTO) */
-
 #define GEN_UNARY_RMWcc_4(op, var, cc, arg0)				\
 	__GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM())
 
 #define GEN_UNARY_RMWcc_3(op, var, cc)					\
 	GEN_UNARY_RMWcc_4(op, var, cc, "%[var]")
 
-#define GEN_UNARY_RMWcc(X...) RMWcc_CONCAT(GEN_UNARY_RMWcc_, RMWcc_ARGS(X))(X)
+#define GEN_UNARY_RMWcc(X...)	CONCATENATE(GEN_UNARY_RMWcc_, COUNT_ARGS(X))(X)
 
 #define GEN_BINARY_RMWcc_6(op, var, cc, vcon, _val, arg0)		\
 	__GEN_RMWcc(op " %[val], " arg0, var, cc,			\
@@ -57,7 +30,7 @@ cc_label:	c = true;						\
 #define GEN_BINARY_RMWcc_5(op, var, cc, vcon, val)			\
 	GEN_BINARY_RMWcc_6(op, var, cc, vcon, val, "%[var]")
 
-#define GEN_BINARY_RMWcc(X...) RMWcc_CONCAT(GEN_BINARY_RMWcc_, RMWcc_ARGS(X))(X)
+#define GEN_BINARY_RMWcc(X...)	CONCATENATE(GEN_BINARY_RMWcc_, COUNT_ARGS(X))(X)
 
 #define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, cc, clobbers...)	\
 	__GEN_RMWcc(op " %[var]\n\t" suffix, var, cc,			\
diff --git a/arch/x86/include/asm/rqspinlock.h b/arch/x86/include/asm/rqspinlock.h
new file mode 100644
index 000000000000..24a885449ee6
--- /dev/null
+++ b/arch/x86/include/asm/rqspinlock.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_RQSPINLOCK_H
+#define _ASM_X86_RQSPINLOCK_H
+
+#include <asm/paravirt.h>
+
+#ifdef CONFIG_PARAVIRT
+DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key);
+
+#define resilient_virt_spin_lock_enabled resilient_virt_spin_lock_enabled
+static __always_inline bool resilient_virt_spin_lock_enabled(void)
+{
+       return static_branch_likely(&virt_spin_lock_key);
+}
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+typedef struct qspinlock rqspinlock_t;
+#else
+typedef struct rqspinlock rqspinlock_t;
+#endif
+extern int resilient_tas_spin_lock(rqspinlock_t *lock);
+
+#define resilient_virt_spin_lock resilient_virt_spin_lock
+static inline int resilient_virt_spin_lock(rqspinlock_t *lock)
+{
+	return resilient_tas_spin_lock(lock);
+}
+
+#endif /* CONFIG_PARAVIRT */
+
+#include <asm-generic/rqspinlock.h>
+
+#endif /* _ASM_X86_RQSPINLOCK_H */
diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h
new file mode 100644
index 000000000000..e5a13dc8816e
--- /dev/null
+++ b/arch/x86/include/asm/runtime-const.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RUNTIME_CONST_H
+#define _ASM_RUNTIME_CONST_H
+
+#ifdef MODULE
+  #error "Cannot use runtime-const infrastructure from modules"
+#endif
+
+#ifdef __ASSEMBLY__
+
+.macro RUNTIME_CONST_PTR sym reg
+	movq	$0x0123456789abcdef, %\reg
+	1:
+	.pushsection runtime_ptr_\sym, "a"
+	.long	1b - 8 - .
+	.popsection
+.endm
+
+#else /* __ASSEMBLY__ */
+
+#define runtime_const_ptr(sym) ({				\
+	typeof(sym) __ret;					\
+	asm_inline("mov %1,%0\n1:\n"				\
+		".pushsection runtime_ptr_" #sym ",\"a\"\n\t"	\
+		".long 1b - %c2 - .\n"				\
+		".popsection"					\
+		:"=r" (__ret)					\
+		:"i" ((unsigned long)0x0123456789abcdefull),	\
+		 "i" (sizeof(long)));				\
+	__ret; })
+
+// The 'typeof' will create at _least_ a 32-bit type, but
+// will happily also take a bigger type and the 'shrl' will
+// clear the upper bits
+#define runtime_const_shift_right_32(val, sym) ({		\
+	typeof(0u+(val)) __ret = (val);				\
+	asm_inline("shrl $12,%k0\n1:\n"				\
+		".pushsection runtime_shift_" #sym ",\"a\"\n\t"	\
+		".long 1b - 1 - .\n"				\
+		".popsection"					\
+		:"+r" (__ret));					\
+	__ret; })
+
+#define runtime_const_init(type, sym) do {		\
+	extern s32 __start_runtime_##type##_##sym[];	\
+	extern s32 __stop_runtime_##type##_##sym[];	\
+	runtime_const_fixup(__runtime_fixup_##type,	\
+		(unsigned long)(sym), 			\
+		__start_runtime_##type##_##sym,		\
+		__stop_runtime_##type##_##sym);		\
+} while (0)
+
+/*
+ * The text patching is trivial - you can only do this at init time,
+ * when the text section hasn't been marked RO, and before the text
+ * has ever been executed.
+ */
+static inline void __runtime_fixup_ptr(void *where, unsigned long val)
+{
+	*(unsigned long *)where = val;
+}
+
+static inline void __runtime_fixup_shift(void *where, unsigned long val)
+{
+	*(unsigned char *)where = val;
+}
+
+static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
+	unsigned long val, s32 *start, s32 *end)
+{
+	while (start < end) {
+		fn(*start + (void *)start, val);
+		start++;
+	}
+}
+
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
deleted file mode 100644
index 4c25cf6caefa..000000000000
--- a/arch/x86/include/asm/rwsem.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for i486+
- *
- * Written by David Howells (dhowells@redhat.com).
- *
- * Derived from asm-x86/semaphore.h
- *
- *
- * The MSW of the count is the negated number of active writers and waiting
- * lockers, and the LSW is the total number of active locks
- *
- * The lock count is initialized to 0 (no active and no waiting lockers).
- *
- * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an
- * uncontended lock. This can be determined because XADD returns the old value.
- * Readers increment by 1 and see a positive value when uncontended, negative
- * if there are writers (and maybe) readers waiting (in which case it goes to
- * sleep).
- *
- * The value of WAITING_BIAS supports up to 32766 waiting processes. This can
- * be extended to 65534 by manually checking the whole MSW rather than relying
- * on the S flag.
- *
- * The value of ACTIVE_BIAS supports up to 65535 active processes.
- *
- * This should be totally fair - if anything is waiting, a process that wants a
- * lock will go to the back of the queue. When the currently active lock is
- * released, if there's a writer at the front of the queue, then that and only
- * that will be woken up; if there's a bunch of consecutive readers at the
- * front, then they'll all be woken up, but no other readers will be.
- */
-
-#ifndef _ASM_X86_RWSEM_H
-#define _ASM_X86_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-#include <asm/asm.h>
-
-/*
- * The bias values and the counter type limits the number of
- * potential readers/writers to 32767 for 32 bits and 2147483647
- * for 64 bits.
- */
-
-#ifdef CONFIG_X86_64
-# define RWSEM_ACTIVE_MASK		0xffffffffL
-#else
-# define RWSEM_ACTIVE_MASK		0x0000ffffL
-#endif
-
-#define RWSEM_UNLOCKED_VALUE		0x00000000L
-#define RWSEM_ACTIVE_BIAS		0x00000001L
-#define RWSEM_WAITING_BIAS		(-RWSEM_ACTIVE_MASK-1)
-#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-#define ____down_read(sem, slow_path)					\
-({									\
-	struct rw_semaphore* ret;					\
-	asm volatile("# beginning down_read\n\t"			\
-		     LOCK_PREFIX _ASM_INC "(%[sem])\n\t"		\
-		     /* adds 0x00000001 */				\
-		     "  jns        1f\n"				\
-		     "  call " slow_path "\n"				\
-		     "1:\n\t"						\
-		     "# ending down_read\n\t"				\
-		     : "+m" (sem->count), "=a" (ret),			\
-			ASM_CALL_CONSTRAINT				\
-		     : [sem] "a" (sem)					\
-		     : "memory", "cc");					\
-	ret;								\
-})
-
-static inline void __down_read(struct rw_semaphore *sem)
-{
-	____down_read(sem, "call_rwsem_down_read_failed");
-}
-
-static inline int __down_read_killable(struct rw_semaphore *sem)
-{
-	if (IS_ERR(____down_read(sem, "call_rwsem_down_read_failed_killable")))
-		return -EINTR;
-	return 0;
-}
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-static inline bool __down_read_trylock(struct rw_semaphore *sem)
-{
-	long result, tmp;
-	asm volatile("# beginning __down_read_trylock\n\t"
-		     "  mov          %[count],%[result]\n\t"
-		     "1:\n\t"
-		     "  mov          %[result],%[tmp]\n\t"
-		     "  add          %[inc],%[tmp]\n\t"
-		     "  jle	     2f\n\t"
-		     LOCK_PREFIX "  cmpxchg  %[tmp],%[count]\n\t"
-		     "  jnz	     1b\n\t"
-		     "2:\n\t"
-		     "# ending __down_read_trylock\n\t"
-		     : [count] "+m" (sem->count), [result] "=&a" (result),
-		       [tmp] "=&r" (tmp)
-		     : [inc] "i" (RWSEM_ACTIVE_READ_BIAS)
-		     : "memory", "cc");
-	return result >= 0;
-}
-
-/*
- * lock for writing
- */
-#define ____down_write(sem, slow_path)			\
-({							\
-	long tmp;					\
-	struct rw_semaphore* ret;			\
-							\
-	asm volatile("# beginning down_write\n\t"	\
-		     LOCK_PREFIX "  xadd      %[tmp],(%[sem])\n\t"	\
-		     /* adds 0xffff0001, returns the old value */ \
-		     "  test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \
-		     /* was the active mask 0 before? */\
-		     "  jz        1f\n"			\
-		     "  call " slow_path "\n"		\
-		     "1:\n"				\
-		     "# ending down_write"		\
-		     : "+m" (sem->count), [tmp] "=d" (tmp),	\
-		       "=a" (ret), ASM_CALL_CONSTRAINT	\
-		     : [sem] "a" (sem), "[tmp]" (RWSEM_ACTIVE_WRITE_BIAS) \
-		     : "memory", "cc");			\
-	ret;						\
-})
-
-static inline void __down_write(struct rw_semaphore *sem)
-{
-	____down_write(sem, "call_rwsem_down_write_failed");
-}
-
-static inline int __down_write_killable(struct rw_semaphore *sem)
-{
-	if (IS_ERR(____down_write(sem, "call_rwsem_down_write_failed_killable")))
-		return -EINTR;
-
-	return 0;
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-static inline bool __down_write_trylock(struct rw_semaphore *sem)
-{
-	bool result;
-	long tmp0, tmp1;
-	asm volatile("# beginning __down_write_trylock\n\t"
-		     "  mov          %[count],%[tmp0]\n\t"
-		     "1:\n\t"
-		     "  test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t"
-		     /* was the active mask 0 before? */
-		     "  jnz          2f\n\t"
-		     "  mov          %[tmp0],%[tmp1]\n\t"
-		     "  add          %[inc],%[tmp1]\n\t"
-		     LOCK_PREFIX "  cmpxchg  %[tmp1],%[count]\n\t"
-		     "  jnz	     1b\n\t"
-		     "2:\n\t"
-		     CC_SET(e)
-		     "# ending __down_write_trylock\n\t"
-		     : [count] "+m" (sem->count), [tmp0] "=&a" (tmp0),
-		       [tmp1] "=&r" (tmp1), CC_OUT(e) (result)
-		     : [inc] "er" (RWSEM_ACTIVE_WRITE_BIAS)
-		     : "memory");
-	return result;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
-	long tmp;
-	asm volatile("# beginning __up_read\n\t"
-		     LOCK_PREFIX "  xadd      %[tmp],(%[sem])\n\t"
-		     /* subtracts 1, returns the old value */
-		     "  jns        1f\n\t"
-		     "  call call_rwsem_wake\n" /* expects old value in %edx */
-		     "1:\n"
-		     "# ending __up_read\n"
-		     : "+m" (sem->count), [tmp] "=d" (tmp)
-		     : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_READ_BIAS)
-		     : "memory", "cc");
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
-	long tmp;
-	asm volatile("# beginning __up_write\n\t"
-		     LOCK_PREFIX "  xadd      %[tmp],(%[sem])\n\t"
-		     /* subtracts 0xffff0001, returns the old value */
-		     "  jns        1f\n\t"
-		     "  call call_rwsem_wake\n" /* expects old value in %edx */
-		     "1:\n\t"
-		     "# ending __up_write\n"
-		     : "+m" (sem->count), [tmp] "=d" (tmp)
-		     : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_WRITE_BIAS)
-		     : "memory", "cc");
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
-	asm volatile("# beginning __downgrade_write\n\t"
-		     LOCK_PREFIX _ASM_ADD "%[inc],(%[sem])\n\t"
-		     /*
-		      * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386)
-		      *     0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64)
-		      */
-		     "  jns       1f\n\t"
-		     "  call call_rwsem_downgrade_wake\n"
-		     "1:\n\t"
-		     "# ending __downgrade_write\n"
-		     : "+m" (sem->count)
-		     : [sem] "a" (sem), [inc] "er" (-RWSEM_WAITING_BIAS)
-		     : "memory", "cc");
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_X86_RWSEM_H */
diff --git a/arch/x86/include/asm/seccomp.h b/arch/x86/include/asm/seccomp.h
index 2bd1338de236..42bcd42d70d1 100644
--- a/arch/x86/include/asm/seccomp.h
+++ b/arch/x86/include/asm/seccomp.h
@@ -9,13 +9,33 @@
 #endif
 
 #ifdef CONFIG_COMPAT
-#include <asm/ia32_unistd.h>
+#include <asm/unistd_32_ia32.h>
 #define __NR_seccomp_read_32		__NR_ia32_read
 #define __NR_seccomp_write_32		__NR_ia32_write
 #define __NR_seccomp_exit_32		__NR_ia32_exit
 #define __NR_seccomp_sigreturn_32	__NR_ia32_sigreturn
 #endif
 
+#ifdef CONFIG_X86_64
+# define SECCOMP_ARCH_NATIVE		AUDIT_ARCH_X86_64
+# define SECCOMP_ARCH_NATIVE_NR		NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME	"x86_64"
+# ifdef CONFIG_COMPAT
+#  define SECCOMP_ARCH_COMPAT		AUDIT_ARCH_I386
+#  define SECCOMP_ARCH_COMPAT_NR	IA32_NR_syscalls
+#  define SECCOMP_ARCH_COMPAT_NAME	"ia32"
+# endif
+/*
+ * x32 will have __X32_SYSCALL_BIT set in syscall number. We don't support
+ * caching them and they are treated as out of range syscalls, which will
+ * always pass through the BPF filter.
+ */
+#else /* !CONFIG_X86_64 */
+# define SECCOMP_ARCH_NATIVE		AUDIT_ARCH_I386
+# define SECCOMP_ARCH_NATIVE_NR	        NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME	"ia32"
+#endif
+
 #include <asm-generic/seccomp.h>
 
 #endif /* _ASM_X86_SECCOMP_H */
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 8ea1cfdbeabc..30e8ee7006f9 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -5,12 +5,16 @@
 #include <asm-generic/sections.h>
 #include <asm/extable.h>
 
+extern char __relocate_kernel_start[], __relocate_kernel_end[];
 extern char __brk_base[], __brk_limit[];
-extern struct exception_table_entry __stop___ex_table[];
 extern char __end_rodata_aligned[];
 
 #if defined(CONFIG_X86_64)
 extern char __end_rodata_hpage_align[];
 #endif
 
+extern char __end_of_kernel_reserve[];
+
+extern unsigned long _brk_start, _brk_end;
+
 #endif	/* _ASM_X86_SECTIONS_H */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index ac3892920419..f59ae7186940 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -4,6 +4,7 @@
 
 #include <linux/const.h>
 #include <asm/alternative.h>
+#include <asm/ibt.h>
 
 /*
  * Constructor for a conventional segment GDT (or LDT) entry.
@@ -31,6 +32,18 @@
  */
 #define SEGMENT_RPL_MASK	0x3
 
+/*
+ * When running on Xen PV, the actual privilege level of the kernel is 1,
+ * not 0. Testing the Requested Privilege Level in a segment selector to
+ * determine whether the context is user mode or kernel mode with
+ * SEGMENT_RPL_MASK is wrong because the PV kernel's privilege level
+ * matches the 0x3 mask.
+ *
+ * Testing with USER_SEGMENT_RPL_MASK is valid for both native and Xen PV
+ * kernels because privilege level 2 is never used.
+ */
+#define USER_SEGMENT_RPL_MASK	0x2
+
 /* User mode is privilege level 3: */
 #define USER_RPL		0x3
 
@@ -43,7 +56,7 @@
 
 #define GDT_ENTRY_INVALID_SEG	0
 
-#ifdef CONFIG_X86_32
+#if defined(CONFIG_X86_32) && !defined(BUILD_VDSO32_64)
 /*
  * The layout of the per-CPU GDT under Linux:
  *
@@ -83,7 +96,7 @@
  *
  *  26 - ESPFIX small SS
  *  27 - per-cpu			[ offset to per-cpu data area ]
- *  28 - stack_canary-20		[ for stack protector ]		<=== cacheline #8
+ *  28 - VDSO getcpu
  *  29 - unused
  *  30 - unused
  *  31 - TSS for double fault handler
@@ -106,7 +119,7 @@
 
 #define GDT_ENTRY_ESPFIX_SS		26
 #define GDT_ENTRY_PERCPU		27
-#define GDT_ENTRY_STACK_CANARY		28
+#define GDT_ENTRY_CPUNODE		28
 
 #define GDT_ENTRY_DOUBLEFAULT_TSS	31
 
@@ -123,6 +136,7 @@
 #define __KERNEL_DS			(GDT_ENTRY_KERNEL_DS*8)
 #define __USER_DS			(GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
 #define __USER_CS			(GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
+#define __USER32_CS			__USER_CS
 #define __ESPFIX_SS			(GDT_ENTRY_ESPFIX_SS*8)
 
 /* segment for calling fn: */
@@ -146,11 +160,7 @@
 # define __KERNEL_PERCPU		0
 #endif
 
-#ifdef CONFIG_STACKPROTECTOR
-# define __KERNEL_STACK_CANARY		(GDT_ENTRY_STACK_CANARY*8)
-#else
-# define __KERNEL_STACK_CANARY		0
-#endif
+#define __CPUNODE_SEG			(GDT_ENTRY_CPUNODE*8 + 3)
 
 #else /* 64-bit: */
 
@@ -204,33 +214,26 @@
 #define __KERNEL_DS			(GDT_ENTRY_KERNEL_DS*8)
 #define __USER32_CS			(GDT_ENTRY_DEFAULT_USER32_CS*8 + 3)
 #define __USER_DS			(GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
-#define __USER32_DS			__USER_DS
 #define __USER_CS			(GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
 #define __CPUNODE_SEG			(GDT_ENTRY_CPUNODE*8 + 3)
 
 #endif
 
-#ifndef CONFIG_PARAVIRT_XXL
-# define get_kernel_rpl()		0
-#endif
-
 #define IDT_ENTRIES			256
 #define NUM_EXCEPTION_VECTORS		32
 
 /* Bitmask of exception vectors which push an error code on the stack: */
-#define EXCEPTION_ERRCODE_MASK		0x00027d00
+#define EXCEPTION_ERRCODE_MASK		0x20027d00
 
 #define GDT_SIZE			(GDT_ENTRIES*8)
 #define GDT_ENTRY_TLS_ENTRIES		3
 #define TLS_SIZE			(GDT_ENTRY_TLS_ENTRIES* 8)
 
-#ifdef CONFIG_X86_64
-
 /* Bit size and mask of CPU number stored in the per CPU data (and TSC_AUX) */
 #define VDSO_CPUNODE_BITS		12
 #define VDSO_CPUNODE_MASK		0xfff
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 /* Helper functions to store/load CPU and node numbers */
 
@@ -241,7 +244,7 @@ static inline unsigned long vdso_encode_cpunode(int cpu, unsigned long node)
 
 static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
 {
-	unsigned int p;
+	unsigned long p;
 
 	/*
 	 * Load CPU and node number from the GDT.  LSL is faster than RDTSCP
@@ -251,10 +254,10 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
 	 *
 	 * If RDPID is available, use it.
 	 */
-	alternative_io ("lsl %[seg],%[p]",
-			".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
+	alternative_io ("lsl %[seg],%k[p]",
+			"rdpid %[p]",
 			X86_FEATURE_RDPID,
-			[p] "=a" (p), [seg] "r" (__CPUNODE_SEG));
+			[p] "=r" (p), [seg] "r" (__CPUNODE_SEG));
 
 	if (cpu)
 		*cpu = (p & VDSO_CPUNODE_MASK);
@@ -262,8 +265,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
 		*node = (p >> VDSO_CPUNODE_BITS);
 }
 
-#endif /* !__ASSEMBLY__ */
-#endif /* CONFIG_X86_64 */
+#endif /* !__ASSEMBLER__ */
 
 #ifdef __KERNEL__
 
@@ -274,7 +276,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
  * vector has no error code (two bytes), a 'push $vector_number' (two
  * bytes), and a jump to the common entry code (up to five bytes).
  */
-#define EARLY_IDT_HANDLER_SIZE 9
+#define EARLY_IDT_HANDLER_SIZE (9 + ENDBR_INSN_SIZE)
 
 /*
  * xen_early_idt_handler_array is for Xen pv guests: for each entry in
@@ -282,14 +284,14 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
  * pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to
  * max 8 bytes.
  */
-#define XEN_EARLY_IDT_HANDLER_SIZE 8
+#define XEN_EARLY_IDT_HANDLER_SIZE (8 + ENDBR_INSN_SIZE)
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
 extern void early_ignore_irq(void);
 
-#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+#ifdef CONFIG_XEN_PV
 extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE];
 #endif
 
@@ -306,14 +308,7 @@ do {									\
 									\
 	asm volatile("						\n"	\
 		     "1:	movl %k0,%%" #seg "		\n"	\
-									\
-		     ".section .fixup,\"ax\"			\n"	\
-		     "2:	xorl %k0,%k0			\n"	\
-		     "		jmp 1b				\n"	\
-		     ".previous					\n"	\
-									\
-		     _ASM_EXTABLE(1b, 2b)				\
-									\
+		     _ASM_EXTABLE_TYPE_REG(1b, 1b, EX_TYPE_ZERO_REG, %k0)\
 		     : "+r" (__val) : : "memory");			\
 } while (0)
 
@@ -338,7 +333,7 @@ static inline void __loadsegment_fs(unsigned short value)
 		     "1:	movw %0, %%fs			\n"
 		     "2:					\n"
 
-		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_clear_fs)
+		     _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_CLEAR_FS)
 
 		     : : "rm" (value) : "memory");
 }
@@ -355,26 +350,7 @@ static inline void __loadsegment_fs(unsigned short value)
 #define savesegment(seg, value)				\
 	asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
 
-/*
- * x86-32 user GS accessors:
- */
-#ifdef CONFIG_X86_32
-# ifdef CONFIG_X86_32_LAZY_GS
-#  define get_user_gs(regs)		(u16)({ unsigned long v; savesegment(gs, v); v; })
-#  define set_user_gs(regs, v)		loadsegment(gs, (unsigned long)(v))
-#  define task_user_gs(tsk)		((tsk)->thread.gs)
-#  define lazy_save_gs(v)		savesegment(gs, (v))
-#  define lazy_load_gs(v)		loadsegment(gs, (v))
-# else	/* X86_32_LAZY_GS */
-#  define get_user_gs(regs)		(u16)((regs)->gs)
-#  define set_user_gs(regs, v)		do { (regs)->gs = (v); } while (0)
-#  define task_user_gs(tsk)		(task_pt_regs(tsk)->gs)
-#  define lazy_save_gs(v)		do { } while (0)
-#  define lazy_load_gs(v)		do { } while (0)
-# endif	/* X86_32_LAZY_GS */
-#endif	/* X86_32 */
-
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_X86_SEGMENT_H */
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 07a25753e85c..61f56cdaccb5 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -4,12 +4,16 @@
 
 #include <asm/page.h>
 #include <asm-generic/set_memory.h>
+#include <asm/pgtable.h>
+
+#define set_memory_rox set_memory_rox
+int set_memory_rox(unsigned long addr, int numpages);
 
 /*
  * The set_memory_* API can be used to change various attributes of a virtual
  * address range. The attributes include:
- * Cachability   : UnCached, WriteCombining, WriteThrough, WriteBack
- * Executability : eXeutable, NoteXecutable
+ * Cacheability  : UnCached, WriteCombining, WriteThrough, WriteBack
+ * Executability : eXecutable, NoteXecutable
  * Read/Write    : ReadOnly, ReadWrite
  * Presence      : NotPresent
  * Encryption    : Encrypted, Decrypted
@@ -34,28 +38,28 @@
  * The caller is required to take care of these.
  */
 
+int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot);
 int _set_memory_uc(unsigned long addr, int numpages);
 int _set_memory_wc(unsigned long addr, int numpages);
 int _set_memory_wt(unsigned long addr, int numpages);
 int _set_memory_wb(unsigned long addr, int numpages);
 int set_memory_uc(unsigned long addr, int numpages);
 int set_memory_wc(unsigned long addr, int numpages);
-int set_memory_wt(unsigned long addr, int numpages);
 int set_memory_wb(unsigned long addr, int numpages);
 int set_memory_np(unsigned long addr, int numpages);
+int set_memory_p(unsigned long addr, int numpages);
 int set_memory_4k(unsigned long addr, int numpages);
+
+bool set_memory_enc_stop_conversion(void);
 int set_memory_encrypted(unsigned long addr, int numpages);
 int set_memory_decrypted(unsigned long addr, int numpages);
-int set_memory_np_noalias(unsigned long addr, int numpages);
 
-int set_memory_array_uc(unsigned long *addr, int addrinarray);
-int set_memory_array_wc(unsigned long *addr, int addrinarray);
-int set_memory_array_wt(unsigned long *addr, int addrinarray);
-int set_memory_array_wb(unsigned long *addr, int addrinarray);
+int set_memory_np_noalias(unsigned long addr, int numpages);
+int set_memory_nonglobal(unsigned long addr, int numpages);
+int set_memory_global(unsigned long addr, int numpages);
 
 int set_pages_array_uc(struct page **pages, int addrinarray);
 int set_pages_array_wc(struct page **pages, int addrinarray);
-int set_pages_array_wt(struct page **pages, int addrinarray);
 int set_pages_array_wb(struct page **pages, int addrinarray);
 
 /*
@@ -80,55 +84,14 @@ int set_pages_array_wb(struct page **pages, int addrinarray);
 
 int set_pages_uc(struct page *page, int numpages);
 int set_pages_wb(struct page *page, int numpages);
-int set_pages_x(struct page *page, int numpages);
-int set_pages_nx(struct page *page, int numpages);
 int set_pages_ro(struct page *page, int numpages);
 int set_pages_rw(struct page *page, int numpages);
 
-extern int kernel_set_to_readonly;
-void set_kernel_text_rw(void);
-void set_kernel_text_ro(void);
-
-#ifdef CONFIG_X86_64
-static inline int set_mce_nospec(unsigned long pfn)
-{
-	unsigned long decoy_addr;
-	int rc;
+int set_direct_map_invalid_noflush(struct page *page);
+int set_direct_map_default_noflush(struct page *page);
+int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid);
+bool kernel_page_present(struct page *page);
 
-	/*
-	 * Mark the linear address as UC to make sure we don't log more
-	 * errors because of speculative access to the page.
-	 * We would like to just call:
-	 *      set_memory_uc((unsigned long)pfn_to_kaddr(pfn), 1);
-	 * but doing that would radically increase the odds of a
-	 * speculative access to the poison page because we'd have
-	 * the virtual address of the kernel 1:1 mapping sitting
-	 * around in registers.
-	 * Instead we get tricky.  We create a non-canonical address
-	 * that looks just like the one we want, but has bit 63 flipped.
-	 * This relies on set_memory_uc() properly sanitizing any __pa()
-	 * results with __PHYSICAL_MASK or PTE_PFN_MASK.
-	 */
-	decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
-
-	rc = set_memory_uc(decoy_addr, 1);
-	if (rc)
-		pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
-	return rc;
-}
-#define set_mce_nospec set_mce_nospec
-
-/* Restore full speculative operation to the pfn. */
-static inline int clear_mce_nospec(unsigned long pfn)
-{
-	return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1);
-}
-#define clear_mce_nospec clear_mce_nospec
-#else
-/*
- * Few people would run a 32-bit kernel on a machine that supports
- * recoverable errors because they have too much memory to boot 32-bit.
- */
-#endif
+extern int kernel_set_to_readonly;
 
 #endif /* _ASM_X86_SET_MEMORY_H */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ed8ec011a9fd..914eb32581c7 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -8,6 +8,7 @@
 
 #include <linux/linkage.h>
 #include <asm/page_types.h>
+#include <asm/ibt.h>
 
 #ifdef __i386__
 
@@ -26,12 +27,12 @@
 #define OLD_CL_ADDRESS		0x020	/* Relative to real mode data */
 #define NEW_CL_POINTER		0x228	/* Relative to real mode data */
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+#include <linux/cache.h>
+
 #include <asm/bootparam.h>
 #include <asm/x86_init.h>
 
-extern u64 relocated_ramdisk;
-
 /* Interrupt control for vSMPowered x86_64 systems */
 #ifdef CONFIG_X86_64
 void vsmp_init(void);
@@ -39,16 +40,22 @@ void vsmp_init(void);
 static inline void vsmp_init(void) { }
 #endif
 
+struct pt_regs;
+
 void setup_bios_corruption_check(void);
 void early_platform_quirks(void);
 
 extern unsigned long saved_video_mode;
+extern unsigned long acpi_realmode_flags;
 
 extern void reserve_standard_io_resources(void);
 extern void i386_reserve_resources(void);
-extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp);
-extern unsigned long __startup_secondary_64(void);
-extern int early_make_pgtable(unsigned long address);
+extern unsigned long __startup_64(unsigned long p2v_offset, struct boot_params *bp);
+extern void startup_64_setup_gdt_idt(void);
+extern void startup_64_load_idt(void *vc_handler);
+extern void __pi_startup_64_load_idt(void *vc_handler);
+extern void early_setup_idt(void);
+extern void __init do_early_exception(struct pt_regs *regs, int trapnr);
 
 #ifdef CONFIG_X86_INTEL_MID
 extern void x86_intel_mid_early_setup(void);
@@ -62,6 +69,8 @@ extern void x86_ce4100_early_setup(void);
 static inline void x86_ce4100_early_setup(void) { }
 #endif
 
+#include <linux/kexec_handover.h>
+
 #ifndef _SETUP
 
 #include <asm/espfix.h>
@@ -75,7 +84,17 @@ extern char _text[];
 
 static inline bool kaslr_enabled(void)
 {
-	return !!(boot_params.hdr.loadflags & KASLR_FLAG);
+	return IS_ENABLED(CONFIG_RANDOMIZE_MEMORY) &&
+		!!(boot_params.hdr.loadflags & KASLR_FLAG);
+}
+
+/*
+ * Apply no randomization if KASLR was disabled at boot or if KASAN
+ * is enabled. KASAN shadow mappings rely on regions being PGD aligned.
+ */
+static inline bool kaslr_memory_enabled(void)
+{
+	return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN);
 }
 
 static inline unsigned long kaslr_offset(void)
@@ -94,50 +113,51 @@ extern unsigned long _brk_end;
 void *extend_brk(size_t size, size_t align);
 
 /*
- * Reserve space in the brk section.  The name must be unique within
- * the file, and somewhat descriptive.  The size is in bytes.  Must be
- * used at file scope.
+ * Reserve space in the .brk section, which is a block of memory from which the
+ * caller is allowed to allocate very early (before even memblock is available)
+ * by calling extend_brk().  All allocated memory will be eventually converted
+ * to memblock.  Any leftover unallocated memory will be freed.
  *
- * (This uses a temp function to wrap the asm so we can pass it the
- * size parameter; otherwise we wouldn't be able to.  We can't use a
- * "section" attribute on a normal variable because it always ends up
- * being @progbits, which ends up allocating space in the vmlinux
- * executable.)
+ * The size is in bytes.
  */
-#define RESERVE_BRK(name,sz)						\
-	static void __section(.discard.text) __used notrace		\
-	__brk_reservation_fn_##name##__(void) {				\
-		asm volatile (						\
-			".pushsection .brk_reservation,\"aw\",@nobits;" \
-			".brk." #name ":"				\
-			" 1:.skip %c0;"					\
-			" .size .brk." #name ", . - 1b;"		\
-			" .popsection"					\
-			: : "i" (sz));					\
-	}
-
-/* Helper for reserving space for arrays of things */
-#define RESERVE_BRK_ARRAY(type, name, entries)		\
-	type *name;					\
-	RESERVE_BRK(name, sizeof(type) * entries)
+#define RESERVE_BRK(name, size)					\
+	__section(".bss..brk") __aligned(1) __used	\
+	static char __brk_##name[size]
 
 extern void probe_roms(void);
+
+void clear_bss(void);
+
 #ifdef __i386__
 
-asmlinkage void __init i386_start_kernel(void);
+asmlinkage void __init __noreturn i386_start_kernel(void);
+void __init mk_early_pgtbl_32(void);
 
 #else
-asmlinkage void __init x86_64_start_kernel(char *real_mode);
-asmlinkage void __init x86_64_start_reservations(char *real_mode_data);
+asmlinkage void __init __noreturn x86_64_start_kernel(char *real_mode);
+asmlinkage void __init __noreturn x86_64_start_reservations(char *real_mode_data);
 
 #endif /* __i386__ */
 #endif /* _SETUP */
+
+#ifdef CONFIG_CMDLINE_BOOL
+extern bool builtin_cmdline_added __ro_after_init;
 #else
-#define RESERVE_BRK(name,sz)				\
-	.pushsection .brk_reservation,"aw",@nobits;	\
-.brk.name:						\
-1:	.skip sz;					\
-	.size .brk.name,.-1b;				\
+#define builtin_cmdline_added 0
+#endif
+
+#else  /* __ASSEMBLER__ */
+
+.macro __RESERVE_BRK name, size
+	.pushsection .bss..brk, "aw"
+SYM_DATA_START(__brk_\name)
+	.skip \size
+SYM_DATA_END(__brk_\name)
 	.popsection
-#endif /* __ASSEMBLY__ */
+.endm
+
+#define RESERVE_BRK(name, size) __RESERVE_BRK name, size
+
+#endif /* __ASSEMBLER__ */
+
 #endif /* _ASM_X86_SETUP_H */
diff --git a/arch/x86/include/asm/setup_data.h b/arch/x86/include/asm/setup_data.h
new file mode 100644
index 000000000000..7bb16f843c93
--- /dev/null
+++ b/arch/x86/include/asm/setup_data.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SETUP_DATA_H
+#define _ASM_X86_SETUP_DATA_H
+
+#include <uapi/asm/setup_data.h>
+
+#ifndef __ASSEMBLER__
+
+struct pci_setup_rom {
+	struct setup_data data;
+	uint16_t vendor;
+	uint16_t devid;
+	uint64_t pcilen;
+	unsigned long segment;
+	unsigned long bus;
+	unsigned long device;
+	unsigned long function;
+	uint8_t romdata[];
+};
+
+/* kexec external ABI */
+struct efi_setup_data {
+	u64 fw_vendor;
+	u64 __unused;
+	u64 tables;
+	u64 smbios;
+	u64 reserved[8];
+};
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* _ASM_X86_SETUP_DATA_H */
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
new file mode 100644
index 000000000000..01a6e4dbe423
--- /dev/null
+++ b/arch/x86/include/asm/sev-common.h
@@ -0,0 +1,261 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD SEV header common between the guest and the hypervisor.
+ *
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ */
+
+#ifndef __ASM_X86_SEV_COMMON_H
+#define __ASM_X86_SEV_COMMON_H
+
+#define GHCB_MSR_INFO_POS		0
+#define GHCB_DATA_LOW			12
+#define GHCB_MSR_INFO_MASK		(BIT_ULL(GHCB_DATA_LOW) - 1)
+
+#define GHCB_DATA(v)			\
+	(((unsigned long)(v) & ~GHCB_MSR_INFO_MASK) >> GHCB_DATA_LOW)
+
+/* SEV Information Request/Response */
+#define GHCB_MSR_SEV_INFO_RESP		0x001
+#define GHCB_MSR_SEV_INFO_REQ		0x002
+
+#define GHCB_MSR_SEV_INFO(_max, _min, _cbit)	\
+	/* GHCBData[63:48] */			\
+	((((_max) & 0xffff) << 48) |		\
+	 /* GHCBData[47:32] */			\
+	 (((_min) & 0xffff) << 32) |		\
+	 /* GHCBData[31:24] */			\
+	 (((_cbit) & 0xff)  << 24) |		\
+	 GHCB_MSR_SEV_INFO_RESP)
+
+#define GHCB_MSR_INFO(v)		((v) & 0xfffUL)
+#define GHCB_MSR_PROTO_MAX(v)		(((v) >> 48) & 0xffff)
+#define GHCB_MSR_PROTO_MIN(v)		(((v) >> 32) & 0xffff)
+
+/* CPUID Request/Response */
+#define GHCB_MSR_CPUID_REQ		0x004
+#define GHCB_MSR_CPUID_RESP		0x005
+#define GHCB_MSR_CPUID_FUNC_POS		32
+#define GHCB_MSR_CPUID_FUNC_MASK	0xffffffff
+#define GHCB_MSR_CPUID_VALUE_POS	32
+#define GHCB_MSR_CPUID_VALUE_MASK	0xffffffff
+#define GHCB_MSR_CPUID_REG_POS		30
+#define GHCB_MSR_CPUID_REG_MASK		0x3
+#define GHCB_CPUID_REQ_EAX		0
+#define GHCB_CPUID_REQ_EBX		1
+#define GHCB_CPUID_REQ_ECX		2
+#define GHCB_CPUID_REQ_EDX		3
+#define GHCB_CPUID_REQ(fn, reg)				\
+	/* GHCBData[11:0] */				\
+	(GHCB_MSR_CPUID_REQ |				\
+	/* GHCBData[31:12] */				\
+	(((unsigned long)(reg) & 0x3) << 30) |		\
+	/* GHCBData[63:32] */				\
+	(((unsigned long)fn) << 32))
+
+/* AP Reset Hold */
+#define GHCB_MSR_AP_RESET_HOLD_REQ		0x006
+#define GHCB_MSR_AP_RESET_HOLD_RESP		0x007
+#define GHCB_MSR_AP_RESET_HOLD_RESULT_POS	12
+#define GHCB_MSR_AP_RESET_HOLD_RESULT_MASK	GENMASK_ULL(51, 0)
+
+/* Preferred GHCB GPA Request */
+#define GHCB_MSR_PREF_GPA_REQ		0x010
+#define GHCB_MSR_GPA_VALUE_POS		12
+#define GHCB_MSR_GPA_VALUE_MASK		GENMASK_ULL(51, 0)
+
+#define GHCB_MSR_PREF_GPA_RESP		0x011
+#define GHCB_MSR_PREF_GPA_NONE		0xfffffffffffff
+
+/* GHCB GPA Register */
+#define GHCB_MSR_REG_GPA_REQ		0x012
+#define GHCB_MSR_REG_GPA_REQ_VAL(v)			\
+	/* GHCBData[63:12] */				\
+	(((u64)((v) & GENMASK_ULL(51, 0)) << 12) |	\
+	/* GHCBData[11:0] */				\
+	GHCB_MSR_REG_GPA_REQ)
+
+#define GHCB_MSR_REG_GPA_RESP		0x013
+#define GHCB_MSR_REG_GPA_RESP_VAL(v)			\
+	/* GHCBData[63:12] */				\
+	(((u64)(v) & GENMASK_ULL(63, 12)) >> 12)
+
+/*
+ * SNP Page State Change Operation
+ *
+ * GHCBData[55:52] - Page operation:
+ *   0x0001	Page assignment, Private
+ *   0x0002	Page assignment, Shared
+ */
+enum psc_op {
+	SNP_PAGE_STATE_PRIVATE = 1,
+	SNP_PAGE_STATE_SHARED,
+};
+
+#define GHCB_MSR_PSC_REQ		0x014
+#define GHCB_MSR_PSC_REQ_GFN(gfn, op)			\
+	/* GHCBData[55:52] */				\
+	(((u64)((op) & 0xf) << 52) |			\
+	/* GHCBData[51:12] */				\
+	((u64)((gfn) & GENMASK_ULL(39, 0)) << 12) |	\
+	/* GHCBData[11:0] */				\
+	GHCB_MSR_PSC_REQ)
+
+#define GHCB_MSR_PSC_REQ_TO_GFN(msr) (((msr) & GENMASK_ULL(51, 12)) >> 12)
+#define GHCB_MSR_PSC_REQ_TO_OP(msr) (((msr) & GENMASK_ULL(55, 52)) >> 52)
+
+#define GHCB_MSR_PSC_RESP		0x015
+#define GHCB_MSR_PSC_RESP_VAL(val)			\
+	/* GHCBData[63:32] */				\
+	(((u64)(val) & GENMASK_ULL(63, 32)) >> 32)
+
+/* Set highest bit as a generic error response */
+#define GHCB_MSR_PSC_RESP_ERROR (BIT_ULL(63) | GHCB_MSR_PSC_RESP)
+
+/* GHCB Run at VMPL Request/Response */
+#define GHCB_MSR_VMPL_REQ		0x016
+#define GHCB_MSR_VMPL_REQ_LEVEL(v)			\
+	/* GHCBData[39:32] */				\
+	((((u64)(v) & GENMASK_ULL(7, 0)) << 32) |	\
+	/* GHCBDdata[11:0] */				\
+	GHCB_MSR_VMPL_REQ)
+
+#define GHCB_MSR_VMPL_RESP		0x017
+#define GHCB_MSR_VMPL_RESP_VAL(v)			\
+	/* GHCBData[63:32] */				\
+	(((u64)(v) & GENMASK_ULL(63, 32)) >> 32)
+
+/* GHCB Hypervisor Feature Request/Response */
+#define GHCB_MSR_HV_FT_REQ		0x080
+#define GHCB_MSR_HV_FT_RESP		0x081
+#define GHCB_MSR_HV_FT_POS		12
+#define GHCB_MSR_HV_FT_MASK		GENMASK_ULL(51, 0)
+#define GHCB_MSR_HV_FT_RESP_VAL(v)			\
+	/* GHCBData[63:12] */				\
+	(((u64)(v) & GENMASK_ULL(63, 12)) >> 12)
+
+#define GHCB_HV_FT_SNP			BIT_ULL(0)
+#define GHCB_HV_FT_SNP_AP_CREATION	BIT_ULL(1)
+#define GHCB_HV_FT_SNP_MULTI_VMPL	BIT_ULL(5)
+
+/*
+ * SNP Page State Change NAE event
+ *   The VMGEXIT_PSC_MAX_ENTRY determines the size of the PSC structure, which
+ *   is a local stack variable in set_pages_state(). Do not increase this value
+ *   without evaluating the impact to stack usage.
+ *
+ *   Use VMGEXIT_PSC_MAX_COUNT in cases where the actual GHCB-defined max value
+ *   is needed, such as when processing GHCB requests on the hypervisor side.
+ */
+#define VMGEXIT_PSC_MAX_ENTRY		64
+#define VMGEXIT_PSC_MAX_COUNT		253
+
+#define VMGEXIT_PSC_ERROR_GENERIC	(0x100UL << 32)
+#define VMGEXIT_PSC_ERROR_INVALID_HDR	((1UL << 32) | 1)
+#define VMGEXIT_PSC_ERROR_INVALID_ENTRY	((1UL << 32) | 2)
+
+#define VMGEXIT_PSC_OP_PRIVATE		1
+#define VMGEXIT_PSC_OP_SHARED		2
+
+struct psc_hdr {
+	u16 cur_entry;
+	u16 end_entry;
+	u32 reserved;
+} __packed;
+
+struct psc_entry {
+	u64	cur_page	: 12,
+		gfn		: 40,
+		operation	: 4,
+		pagesize	: 1,
+		reserved	: 7;
+} __packed;
+
+struct snp_psc_desc {
+	struct psc_hdr hdr;
+	struct psc_entry entries[VMGEXIT_PSC_MAX_ENTRY];
+} __packed;
+
+#define GHCB_MSR_TERM_REQ		0x100
+#define GHCB_MSR_TERM_REASON_SET_POS	12
+#define GHCB_MSR_TERM_REASON_SET_MASK	0xf
+#define GHCB_MSR_TERM_REASON_POS	16
+#define GHCB_MSR_TERM_REASON_MASK	0xff
+
+#define GHCB_SEV_TERM_REASON(reason_set, reason_val)	\
+	/* GHCBData[15:12] */				\
+	(((((u64)reason_set) &  0xf) << 12) |		\
+	 /* GHCBData[23:16] */				\
+	((((u64)reason_val) & 0xff) << 16))
+
+/* Error codes from reason set 0 */
+#define SEV_TERM_SET_GEN		0
+#define GHCB_SEV_ES_GEN_REQ		0
+#define GHCB_SEV_ES_PROT_UNSUPPORTED	1
+#define GHCB_SNP_UNSUPPORTED		2
+
+/* Linux-specific reason codes (used with reason set 1) */
+#define SEV_TERM_SET_LINUX		1
+#define GHCB_TERM_REGISTER		0	/* GHCB GPA registration failure */
+#define GHCB_TERM_PSC			1	/* Page State Change failure */
+#define GHCB_TERM_PVALIDATE		2	/* Pvalidate failure */
+#define GHCB_TERM_NOT_VMPL0		3	/* SNP guest is not running at VMPL-0 */
+#define GHCB_TERM_CPUID			4	/* CPUID-validation failure */
+#define GHCB_TERM_CPUID_HV		5	/* CPUID failure during hypervisor fallback */
+#define GHCB_TERM_SECRETS_PAGE		6	/* Secrets page failure */
+#define GHCB_TERM_NO_SVSM		7	/* SVSM is not advertised in the secrets page */
+#define GHCB_TERM_SVSM_VMPL0		8	/* SVSM is present but has set VMPL to 0 */
+#define GHCB_TERM_SVSM_CAA		9	/* SVSM is present but CAA is not page aligned */
+#define GHCB_TERM_SECURE_TSC		10	/* Secure TSC initialization failed */
+#define GHCB_TERM_SVSM_CA_REMAP_FAIL	11	/* SVSM is present but CA could not be remapped */
+#define GHCB_TERM_SAVIC_FAIL		12	/* Secure AVIC-specific failure */
+
+#define GHCB_RESP_CODE(v)		((v) & GHCB_MSR_INFO_MASK)
+
+/*
+ * GHCB-defined return codes that are communicated back to the guest via
+ * SW_EXITINFO1.
+ */
+#define GHCB_HV_RESP_NO_ACTION		0
+#define GHCB_HV_RESP_ISSUE_EXCEPTION	1
+#define GHCB_HV_RESP_MALFORMED_INPUT	2
+
+/*
+ * GHCB-defined sub-error codes for malformed input (see above) that are
+ * communicated back to the guest via SW_EXITINFO2[31:0].
+ */
+#define GHCB_ERR_NOT_REGISTERED		1
+#define GHCB_ERR_INVALID_USAGE		2
+#define GHCB_ERR_INVALID_SCRATCH_AREA	3
+#define GHCB_ERR_MISSING_INPUT		4
+#define GHCB_ERR_INVALID_INPUT		5
+#define GHCB_ERR_INVALID_EVENT		6
+
+struct sev_config {
+	__u64 debug		: 1,
+
+	      /*
+	       * Indicates when the per-CPU GHCB has been created and registered
+	       * and thus can be used by the BSP instead of the early boot GHCB.
+	       *
+	       * For APs, the per-CPU GHCB is created before they are started
+	       * and registered upon startup, so this flag can be used globally
+	       * for the BSP and APs.
+	       */
+	      ghcbs_initialized	: 1,
+
+	      /*
+	       * Indicates when the per-CPU SVSM CA is to be used instead of the
+	       * boot SVSM CA.
+	       *
+	       * For APs, the per-CPU SVSM CA is created as part of the AP
+	       * bringup, so this flag can be used globally for the BSP and APs.
+	       */
+	      use_cas		: 1,
+
+	      __reserved	: 61;
+};
+
+extern struct sev_config sev_cfg;
+
+#endif
diff --git a/arch/x86/include/asm/sev-internal.h b/arch/x86/include/asm/sev-internal.h
new file mode 100644
index 000000000000..c58c47c68ab6
--- /dev/null
+++ b/arch/x86/include/asm/sev-internal.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define DR7_RESET_VALUE        0x400
+
+extern u64 sev_hv_features;
+extern u64 sev_secrets_pa;
+
+/* #VC handler runtime per-CPU data */
+struct sev_es_runtime_data {
+	struct ghcb ghcb_page;
+
+	/*
+	 * Reserve one page per CPU as backup storage for the unencrypted GHCB.
+	 * It is needed when an NMI happens while the #VC handler uses the real
+	 * GHCB, and the NMI handler itself is causing another #VC exception. In
+	 * that case the GHCB content of the first handler needs to be backed up
+	 * and restored.
+	 */
+	struct ghcb backup_ghcb;
+
+	/*
+	 * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions.
+	 * There is no need for it to be atomic, because nothing is written to
+	 * the GHCB between the read and the write of ghcb_active. So it is safe
+	 * to use it when a nested #VC exception happens before the write.
+	 *
+	 * This is necessary for example in the #VC->NMI->#VC case when the NMI
+	 * happens while the first #VC handler uses the GHCB. When the NMI code
+	 * raises a second #VC handler it might overwrite the contents of the
+	 * GHCB written by the first handler. To avoid this the content of the
+	 * GHCB is saved and restored when the GHCB is detected to be in use
+	 * already.
+	 */
+	bool ghcb_active;
+	bool backup_ghcb_active;
+
+	/*
+	 * Cached DR7 value - write it on DR7 writes and return it on reads.
+	 * That value will never make it to the real hardware DR7 as debugging
+	 * is currently unsupported in SEV-ES guests.
+	 */
+	unsigned long dr7;
+};
+
+struct ghcb_state {
+	struct ghcb *ghcb;
+};
+
+extern struct svsm_ca boot_svsm_ca_page;
+
+struct ghcb *__sev_get_ghcb(struct ghcb_state *state);
+void __sev_put_ghcb(struct ghcb_state *state);
+
+DECLARE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
+DECLARE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
+
+void early_set_pages_state(unsigned long vaddr, unsigned long paddr,
+			   unsigned long npages, const struct psc_desc *desc);
+
+DECLARE_PER_CPU(struct svsm_ca *, svsm_caa);
+DECLARE_PER_CPU(u64, svsm_caa_pa);
+
+extern u64 boot_svsm_caa_pa;
+
+enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt);
+void vc_forward_exception(struct es_em_ctxt *ctxt);
+
+static inline u64 sev_es_rd_ghcb_msr(void)
+{
+	return native_rdmsrq(MSR_AMD64_SEV_ES_GHCB);
+}
+
+static __always_inline void sev_es_wr_ghcb_msr(u64 val)
+{
+	u32 low, high;
+
+	low  = (u32)(val);
+	high = (u32)(val >> 32);
+
+	native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high);
+}
+
+enum es_result sev_es_ghcb_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write);
+
+u64 get_hv_features(void);
+
+const struct snp_cpuid_table *snp_cpuid_get_table(void);
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
new file mode 100644
index 000000000000..0e6c0940100f
--- /dev/null
+++ b/arch/x86/include/asm/sev.h
@@ -0,0 +1,682 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD Encrypted Register State Support
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
+
+#ifndef __ASM_ENCRYPTED_STATE_H
+#define __ASM_ENCRYPTED_STATE_H
+
+#include <linux/types.h>
+#include <linux/sev-guest.h>
+
+#include <asm/insn.h>
+#include <asm/sev-common.h>
+#include <asm/coco.h>
+#include <asm/set_memory.h>
+#include <asm/svm.h>
+
+#define GHCB_PROTOCOL_MIN	1ULL
+#define GHCB_PROTOCOL_MAX	2ULL
+#define GHCB_DEFAULT_USAGE	0ULL
+
+#define	VMGEXIT()			{ asm volatile("rep; vmmcall\n\r"); }
+
+struct boot_params;
+
+enum es_result {
+	ES_OK,			/* All good */
+	ES_UNSUPPORTED,		/* Requested operation not supported */
+	ES_VMM_ERROR,		/* Unexpected state from the VMM */
+	ES_DECODE_FAILED,	/* Instruction decoding failed */
+	ES_EXCEPTION,		/* Instruction caused exception */
+	ES_RETRY,		/* Retry instruction emulation */
+};
+
+struct es_fault_info {
+	unsigned long vector;
+	unsigned long error_code;
+	unsigned long cr2;
+};
+
+struct pt_regs;
+
+/* ES instruction emulation context */
+struct es_em_ctxt {
+	struct pt_regs *regs;
+	struct insn insn;
+	struct es_fault_info fi;
+};
+
+/*
+ * AMD SEV Confidential computing blob structure. The structure is
+ * defined in OVMF UEFI firmware header:
+ * https://github.com/tianocore/edk2/blob/master/OvmfPkg/Include/Guid/ConfidentialComputingSevSnpBlob.h
+ */
+#define CC_BLOB_SEV_HDR_MAGIC	0x45444d41
+struct cc_blob_sev_info {
+	u32 magic;
+	u16 version;
+	u16 reserved;
+	u64 secrets_phys;
+	u32 secrets_len;
+	u32 rsvd1;
+	u64 cpuid_phys;
+	u32 cpuid_len;
+	u32 rsvd2;
+} __packed;
+
+void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code);
+
+static inline u64 lower_bits(u64 val, unsigned int bits)
+{
+	u64 mask = (1ULL << bits) - 1;
+
+	return (val & mask);
+}
+
+struct real_mode_header;
+enum stack_type;
+
+/* Early IDT entry points for #VC handler */
+extern void vc_no_ghcb(void);
+extern void vc_boot_ghcb(void);
+extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
+
+/*
+ * Individual entries of the SNP CPUID table, as defined by the SNP
+ * Firmware ABI, Revision 0.9, Section 7.1, Table 14.
+ */
+struct snp_cpuid_fn {
+	u32 eax_in;
+	u32 ecx_in;
+	u64 xcr0_in;
+	u64 xss_in;
+	u32 eax;
+	u32 ebx;
+	u32 ecx;
+	u32 edx;
+	u64 __reserved;
+} __packed;
+
+/*
+ * SNP CPUID table, as defined by the SNP Firmware ABI, Revision 0.9,
+ * Section 8.14.2.6. Also noted there is the SNP firmware-enforced limit
+ * of 64 entries per CPUID table.
+ */
+#define SNP_CPUID_COUNT_MAX 64
+
+struct snp_cpuid_table {
+	u32 count;
+	u32 __reserved1;
+	u64 __reserved2;
+	struct snp_cpuid_fn fn[SNP_CPUID_COUNT_MAX];
+} __packed;
+
+/* PVALIDATE return codes */
+#define PVALIDATE_FAIL_SIZEMISMATCH	6
+
+/* Software defined (when rFlags.CF = 1) */
+#define PVALIDATE_FAIL_NOUPDATE		255
+
+/* RMUPDATE detected 4K page and 2MB page overlap. */
+#define RMPUPDATE_FAIL_OVERLAP		4
+
+/* PSMASH failed due to concurrent access by another CPU */
+#define PSMASH_FAIL_INUSE		3
+
+/* RMP page size */
+#define RMP_PG_SIZE_4K			0
+#define RMP_PG_SIZE_2M			1
+#define RMP_TO_PG_LEVEL(level)		(((level) == RMP_PG_SIZE_4K) ? PG_LEVEL_4K : PG_LEVEL_2M)
+#define PG_LEVEL_TO_RMP(level)		(((level) == PG_LEVEL_4K) ? RMP_PG_SIZE_4K : RMP_PG_SIZE_2M)
+
+struct rmp_state {
+	u64 gpa;
+	u8 assigned;
+	u8 pagesize;
+	u8 immutable;
+	u8 rsvd;
+	u32 asid;
+} __packed;
+
+#define RMPADJUST_VMSA_PAGE_BIT		BIT(16)
+
+/* SNP Guest message request */
+struct snp_req_data {
+	unsigned long req_gpa;
+	unsigned long resp_gpa;
+	unsigned long data_gpa;
+	unsigned int data_npages;
+};
+
+#define MAX_AUTHTAG_LEN		32
+#define AUTHTAG_LEN		16
+#define AAD_LEN			48
+#define MSG_HDR_VER		1
+
+#define SNP_REQ_MAX_RETRY_DURATION      (60*HZ)
+#define SNP_REQ_RETRY_DELAY             (2*HZ)
+
+/* See SNP spec SNP_GUEST_REQUEST section for the structure */
+enum msg_type {
+	SNP_MSG_TYPE_INVALID = 0,
+	SNP_MSG_CPUID_REQ,
+	SNP_MSG_CPUID_RSP,
+	SNP_MSG_KEY_REQ,
+	SNP_MSG_KEY_RSP,
+	SNP_MSG_REPORT_REQ,
+	SNP_MSG_REPORT_RSP,
+	SNP_MSG_EXPORT_REQ,
+	SNP_MSG_EXPORT_RSP,
+	SNP_MSG_IMPORT_REQ,
+	SNP_MSG_IMPORT_RSP,
+	SNP_MSG_ABSORB_REQ,
+	SNP_MSG_ABSORB_RSP,
+	SNP_MSG_VMRK_REQ,
+	SNP_MSG_VMRK_RSP,
+
+	SNP_MSG_TSC_INFO_REQ = 17,
+	SNP_MSG_TSC_INFO_RSP,
+
+	SNP_MSG_TYPE_MAX
+};
+
+enum aead_algo {
+	SNP_AEAD_INVALID,
+	SNP_AEAD_AES_256_GCM,
+};
+
+struct snp_guest_msg_hdr {
+	u8 authtag[MAX_AUTHTAG_LEN];
+	u64 msg_seqno;
+	u8 rsvd1[8];
+	u8 algo;
+	u8 hdr_version;
+	u16 hdr_sz;
+	u8 msg_type;
+	u8 msg_version;
+	u16 msg_sz;
+	u32 rsvd2;
+	u8 msg_vmpck;
+	u8 rsvd3[35];
+} __packed;
+
+struct snp_guest_msg {
+	struct snp_guest_msg_hdr hdr;
+	u8 payload[PAGE_SIZE - sizeof(struct snp_guest_msg_hdr)];
+} __packed;
+
+#define SNP_TSC_INFO_REQ_SZ	128
+
+struct snp_tsc_info_req {
+	u8 rsvd[SNP_TSC_INFO_REQ_SZ];
+} __packed;
+
+struct snp_tsc_info_resp {
+	u32 status;
+	u32 rsvd1;
+	u64 tsc_scale;
+	u64 tsc_offset;
+	u32 tsc_factor;
+	u8 rsvd2[100];
+} __packed;
+
+/*
+ * Obtain the mean TSC frequency by decreasing the nominal TSC frequency with
+ * TSC_FACTOR as documented in the SNP Firmware ABI specification:
+ *
+ * GUEST_TSC_FREQ * (1 - (TSC_FACTOR * 0.00001))
+ *
+ * which is equivalent to:
+ *
+ * GUEST_TSC_FREQ -= (GUEST_TSC_FREQ * TSC_FACTOR) / 100000;
+ */
+#define SNP_SCALE_TSC_FREQ(freq, factor) ((freq) - (freq) * (factor) / 100000)
+
+struct snp_guest_req {
+	void *req_buf;
+	size_t req_sz;
+
+	void *resp_buf;
+	size_t resp_sz;
+
+	u64 exit_code;
+	u64 exitinfo2;
+	unsigned int vmpck_id;
+	u8 msg_version;
+	u8 msg_type;
+
+	struct snp_req_data input;
+	void *certs_data;
+};
+
+/*
+ * The secrets page contains 96-bytes of reserved field that can be used by
+ * the guest OS. The guest OS uses the area to save the message sequence
+ * number for each VMPCK.
+ *
+ * See the GHCB spec section Secret page layout for the format for this area.
+ */
+struct secrets_os_area {
+	u32 msg_seqno_0;
+	u32 msg_seqno_1;
+	u32 msg_seqno_2;
+	u32 msg_seqno_3;
+	u64 ap_jump_table_pa;
+	u8 rsvd[40];
+	u8 guest_usage[32];
+} __packed;
+
+#define VMPCK_KEY_LEN		32
+
+/* See the SNP spec version 0.9 for secrets page format */
+struct snp_secrets_page {
+	u32 version;
+	u32 imien	: 1,
+	    rsvd1	: 31;
+	u32 fms;
+	u32 rsvd2;
+	u8 gosvw[16];
+	u8 vmpck0[VMPCK_KEY_LEN];
+	u8 vmpck1[VMPCK_KEY_LEN];
+	u8 vmpck2[VMPCK_KEY_LEN];
+	u8 vmpck3[VMPCK_KEY_LEN];
+	struct secrets_os_area os_area;
+
+	u8 vmsa_tweak_bitmap[64];
+
+	/* SVSM fields */
+	u64 svsm_base;
+	u64 svsm_size;
+	u64 svsm_caa;
+	u32 svsm_max_version;
+	u8 svsm_guest_vmpl;
+	u8 rsvd3[3];
+
+	/* The percentage decrease from nominal to mean TSC frequency. */
+	u32 tsc_factor;
+
+	/* Remainder of page */
+	u8 rsvd4[3740];
+} __packed;
+
+struct snp_msg_desc {
+	/* request and response are in unencrypted memory */
+	struct snp_guest_msg *request, *response;
+
+	/*
+	 * Avoid information leakage by double-buffering shared messages
+	 * in fields that are in regular encrypted memory.
+	 */
+	struct snp_guest_msg secret_request, secret_response;
+
+	struct snp_secrets_page *secrets;
+
+	struct aesgcm_ctx *ctx;
+
+	u32 *os_area_msg_seqno;
+	u8 *vmpck;
+	int vmpck_id;
+};
+
+/*
+ * The SVSM Calling Area (CA) related structures.
+ */
+struct svsm_ca {
+	u8 call_pending;
+	u8 mem_available;
+	u8 rsvd1[6];
+
+	u8 svsm_buffer[PAGE_SIZE - 8];
+};
+
+#define SVSM_SUCCESS				0
+#define SVSM_ERR_INCOMPLETE			0x80000000
+#define SVSM_ERR_UNSUPPORTED_PROTOCOL		0x80000001
+#define SVSM_ERR_UNSUPPORTED_CALL		0x80000002
+#define SVSM_ERR_INVALID_ADDRESS		0x80000003
+#define SVSM_ERR_INVALID_FORMAT			0x80000004
+#define SVSM_ERR_INVALID_PARAMETER		0x80000005
+#define SVSM_ERR_INVALID_REQUEST		0x80000006
+#define SVSM_ERR_BUSY				0x80000007
+#define SVSM_PVALIDATE_FAIL_SIZEMISMATCH	0x80001006
+
+/*
+ * The SVSM PVALIDATE related structures
+ */
+struct svsm_pvalidate_entry {
+	u64 page_size		: 2,
+	    action		: 1,
+	    ignore_cf		: 1,
+	    rsvd		: 8,
+	    pfn			: 52;
+};
+
+struct svsm_pvalidate_call {
+	u16 num_entries;
+	u16 cur_index;
+
+	u8 rsvd1[4];
+
+	struct svsm_pvalidate_entry entry[];
+};
+
+#define SVSM_PVALIDATE_MAX_COUNT	((sizeof_field(struct svsm_ca, svsm_buffer) -		\
+					  offsetof(struct svsm_pvalidate_call, entry)) /	\
+					 sizeof(struct svsm_pvalidate_entry))
+
+/*
+ * The SVSM Attestation related structures
+ */
+struct svsm_loc_entry {
+	u64 pa;
+	u32 len;
+	u8 rsvd[4];
+};
+
+struct svsm_attest_call {
+	struct svsm_loc_entry report_buf;
+	struct svsm_loc_entry nonce;
+	struct svsm_loc_entry manifest_buf;
+	struct svsm_loc_entry certificates_buf;
+
+	/* For attesting a single service */
+	u8 service_guid[16];
+	u32 service_manifest_ver;
+	u8 rsvd[4];
+};
+
+/* PTE descriptor used for the prepare_pte_enc() operations. */
+struct pte_enc_desc {
+	pte_t *kpte;
+	int pte_level;
+	bool encrypt;
+	/* pfn of the kpte above */
+	unsigned long pfn;
+	/* physical address of @pfn */
+	unsigned long pa;
+	/* virtual address of @pfn */
+	void *va;
+	/* memory covered by the pte */
+	unsigned long size;
+	pgprot_t new_pgprot;
+};
+
+/*
+ * SVSM protocol structure
+ */
+struct svsm_call {
+	struct svsm_ca *caa;
+	u64 rax;
+	u64 rcx;
+	u64 rdx;
+	u64 r8;
+	u64 r9;
+	u64 rax_out;
+	u64 rcx_out;
+	u64 rdx_out;
+	u64 r8_out;
+	u64 r9_out;
+};
+
+#define SVSM_CORE_CALL(x)		((0ULL << 32) | (x))
+#define SVSM_CORE_REMAP_CA		0
+#define SVSM_CORE_PVALIDATE		1
+#define SVSM_CORE_CREATE_VCPU		2
+#define SVSM_CORE_DELETE_VCPU		3
+
+#define SVSM_ATTEST_CALL(x)		((1ULL << 32) | (x))
+#define SVSM_ATTEST_SERVICES		0
+#define SVSM_ATTEST_SINGLE_SERVICE	1
+
+#define SVSM_VTPM_CALL(x)		((2ULL << 32) | (x))
+#define SVSM_VTPM_QUERY			0
+#define SVSM_VTPM_CMD			1
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+
+extern u8 snp_vmpl;
+
+extern void __sev_es_ist_enter(struct pt_regs *regs);
+extern void __sev_es_ist_exit(void);
+static __always_inline void sev_es_ist_enter(struct pt_regs *regs)
+{
+	if (cc_vendor == CC_VENDOR_AMD &&
+	    cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
+		__sev_es_ist_enter(regs);
+}
+static __always_inline void sev_es_ist_exit(void)
+{
+	if (cc_vendor == CC_VENDOR_AMD &&
+	    cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
+		__sev_es_ist_exit();
+}
+extern int sev_es_setup_ap_jump_table(struct real_mode_header *rmh);
+extern void __sev_es_nmi_complete(void);
+static __always_inline void sev_es_nmi_complete(void)
+{
+	if (cc_vendor == CC_VENDOR_AMD &&
+	    cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
+		__sev_es_nmi_complete();
+}
+extern int __init sev_es_efi_map_ghcbs_cas(pgd_t *pgd);
+extern void sev_enable(struct boot_params *bp);
+
+/*
+ * RMPADJUST modifies the RMP permissions of a page of a lesser-
+ * privileged (numerically higher) VMPL.
+ *
+ * If the guest is running at a higher-privilege than the privilege
+ * level the instruction is targeting, the instruction will succeed,
+ * otherwise, it will fail.
+ */
+static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
+{
+	int rc;
+
+	/* "rmpadjust" mnemonic support in binutils 2.36 and newer */
+	asm volatile(".byte 0xF3,0x0F,0x01,0xFE\n\t"
+		     : "=a"(rc)
+		     : "a"(vaddr), "c"(rmp_psize), "d"(attrs)
+		     : "memory", "cc");
+
+	return rc;
+}
+static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
+{
+	bool no_rmpupdate;
+	int rc;
+
+	/* "pvalidate" mnemonic support in binutils 2.36 and newer */
+	asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFF\n\t"
+		     : "=@ccc"(no_rmpupdate), "=a"(rc)
+		     : "a"(vaddr), "c"(rmp_psize), "d"(validate)
+		     : "memory", "cc");
+
+	if (no_rmpupdate)
+		return PVALIDATE_FAIL_NOUPDATE;
+
+	return rc;
+}
+
+void setup_ghcb(void);
+void snp_register_ghcb_early(unsigned long paddr);
+void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+				  unsigned long npages);
+void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+				 unsigned long npages);
+void snp_set_memory_shared(unsigned long vaddr, unsigned long npages);
+void snp_set_memory_private(unsigned long vaddr, unsigned long npages);
+void snp_set_wakeup_secondary_cpu(void);
+bool snp_init(struct boot_params *bp);
+void snp_dmi_setup(void);
+int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input);
+void snp_accept_memory(phys_addr_t start, phys_addr_t end);
+u64 snp_get_unsupported_features(u64 status);
+u64 sev_get_status(void);
+void sev_show_status(void);
+int prepare_pte_enc(struct pte_enc_desc *d);
+void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot);
+void snp_kexec_finish(void);
+void snp_kexec_begin(void);
+
+int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id);
+struct snp_msg_desc *snp_msg_alloc(void);
+void snp_msg_free(struct snp_msg_desc *mdesc);
+int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req);
+
+int snp_svsm_vtpm_send_command(u8 *buffer);
+
+void __init snp_secure_tsc_prepare(void);
+void __init snp_secure_tsc_init(void);
+enum es_result savic_register_gpa(u64 gpa);
+enum es_result savic_unregister_gpa(u64 *gpa);
+u64 savic_ghcb_msr_read(u32 reg);
+void savic_ghcb_msr_write(u32 reg, u64 value);
+
+static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
+{
+	ghcb->save.sw_exit_code = 0;
+	__builtin_memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
+}
+
+/* I/O parameters for CPUID-related helpers */
+struct cpuid_leaf {
+	u32 fn;
+	u32 subfn;
+	u32 eax;
+	u32 ebx;
+	u32 ecx;
+	u32 edx;
+};
+
+int svsm_perform_msr_protocol(struct svsm_call *call);
+int __pi_svsm_perform_msr_protocol(struct svsm_call *call);
+int snp_cpuid(void (*cpuid_fn)(void *ctx, struct cpuid_leaf *leaf),
+	      void *ctx, struct cpuid_leaf *leaf);
+
+void svsm_issue_call(struct svsm_call *call, u8 *pending);
+int svsm_process_result_codes(struct svsm_call *call);
+
+void __noreturn sev_es_terminate(unsigned int set, unsigned int reason);
+enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
+				   struct es_em_ctxt *ctxt,
+				   u64 exit_code, u64 exit_info_1,
+				   u64 exit_info_2);
+
+bool sev_es_negotiate_protocol(void);
+bool sev_es_check_cpu_features(void);
+
+extern u16 ghcb_version;
+extern struct ghcb *boot_ghcb;
+extern bool sev_snp_needs_sfw;
+
+struct psc_desc {
+	enum psc_op op;
+	struct svsm_ca *ca;
+	u64 caa_pa;
+};
+
+static inline void sev_evict_cache(void *va, int npages)
+{
+	volatile u8 val __always_unused;
+	u8 *bytes = va;
+	int page_idx;
+
+	/*
+	 * For SEV guests, a read from the first/last cache-lines of a 4K page
+	 * using the guest key is sufficient to cause a flush of all cache-lines
+	 * associated with that 4K page without incurring all the overhead of a
+	 * full CLFLUSH sequence.
+	 */
+	for (page_idx = 0; page_idx < npages; page_idx++) {
+		val = bytes[page_idx * PAGE_SIZE];
+		val = bytes[page_idx * PAGE_SIZE + PAGE_SIZE - 1];
+	}
+}
+
+#else	/* !CONFIG_AMD_MEM_ENCRYPT */
+
+#define snp_vmpl 0
+static inline void sev_es_ist_enter(struct pt_regs *regs) { }
+static inline void sev_es_ist_exit(void) { }
+static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
+static inline void sev_es_nmi_complete(void) { }
+static inline int sev_es_efi_map_ghcbs_cas(pgd_t *pgd) { return 0; }
+static inline void sev_enable(struct boot_params *bp) { }
+static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; }
+static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; }
+static inline void setup_ghcb(void) { }
+static inline void __init
+early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
+static inline void __init
+early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
+static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { }
+static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { }
+static inline void snp_set_wakeup_secondary_cpu(void) { }
+static inline bool snp_init(struct boot_params *bp) { return false; }
+static inline void snp_dmi_setup(void) { }
+static inline int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input)
+{
+	return -ENOTTY;
+}
+static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
+static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
+static inline u64 sev_get_status(void) { return 0; }
+static inline void sev_show_status(void) { }
+static inline int prepare_pte_enc(struct pte_enc_desc *d) { return 0; }
+static inline void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot) { }
+static inline void snp_kexec_finish(void) { }
+static inline void snp_kexec_begin(void) { }
+static inline int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id) { return -1; }
+static inline struct snp_msg_desc *snp_msg_alloc(void) { return NULL; }
+static inline void snp_msg_free(struct snp_msg_desc *mdesc) { }
+static inline int snp_send_guest_request(struct snp_msg_desc *mdesc,
+					 struct snp_guest_req *req) { return -ENODEV; }
+static inline int snp_svsm_vtpm_send_command(u8 *buffer) { return -ENODEV; }
+static inline void __init snp_secure_tsc_prepare(void) { }
+static inline void __init snp_secure_tsc_init(void) { }
+static inline void sev_evict_cache(void *va, int npages) {}
+static inline enum es_result savic_register_gpa(u64 gpa) { return ES_UNSUPPORTED; }
+static inline enum es_result savic_unregister_gpa(u64 *gpa) { return ES_UNSUPPORTED; }
+static inline void savic_ghcb_msr_write(u32 reg, u64 value) { }
+static inline u64 savic_ghcb_msr_read(u32 reg) { return 0; }
+
+#endif	/* CONFIG_AMD_MEM_ENCRYPT */
+
+#ifdef CONFIG_KVM_AMD_SEV
+bool snp_probe_rmptable_info(void);
+int snp_rmptable_init(void);
+int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level);
+void snp_dump_hva_rmpentry(unsigned long address);
+int psmash(u64 pfn);
+int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable);
+int rmp_make_shared(u64 pfn, enum pg_level level);
+void __snp_leak_pages(u64 pfn, unsigned int npages, bool dump_rmp);
+void kdump_sev_callback(void);
+void snp_fixup_e820_tables(void);
+static inline void snp_leak_pages(u64 pfn, unsigned int pages)
+{
+	__snp_leak_pages(pfn, pages, true);
+}
+#else
+static inline bool snp_probe_rmptable_info(void) { return false; }
+static inline int snp_rmptable_init(void) { return -ENOSYS; }
+static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
+static inline void snp_dump_hva_rmpentry(unsigned long address) {}
+static inline int psmash(u64 pfn) { return -ENODEV; }
+static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid,
+				   bool immutable)
+{
+	return -ENODEV;
+}
+static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
+static inline void __snp_leak_pages(u64 pfn, unsigned int npages, bool dump_rmp) {}
+static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
+static inline void kdump_sev_callback(void) { }
+static inline void snp_fixup_e820_tables(void) {}
+#endif
+
+#endif
diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
new file mode 100644
index 000000000000..04958459a7ca
--- /dev/null
+++ b/arch/x86/include/asm/sgx.h
@@ -0,0 +1,430 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(c) 2016-20 Intel Corporation.
+ *
+ * Intel Software Guard Extensions (SGX) support.
+ */
+#ifndef _ASM_X86_SGX_H
+#define _ASM_X86_SGX_H
+
+#include <linux/bits.h>
+#include <linux/types.h>
+
+/*
+ * This file contains both data structures defined by SGX architecture and Linux
+ * defined software data structures and functions.  The two should not be mixed
+ * together for better readability.  The architectural definitions come first.
+ */
+
+/* The SGX specific CPUID function. */
+#define SGX_CPUID		0x12
+/* EPC enumeration. */
+#define SGX_CPUID_EPC		2
+/* An invalid EPC section, i.e. the end marker. */
+#define SGX_CPUID_EPC_INVALID	0x0
+/* A valid EPC section. */
+#define SGX_CPUID_EPC_SECTION	0x1
+/* The bitmask for the EPC section type. */
+#define SGX_CPUID_EPC_MASK	GENMASK(3, 0)
+
+enum sgx_encls_function {
+	ECREATE		= 0x00,
+	EADD		= 0x01,
+	EINIT		= 0x02,
+	EREMOVE		= 0x03,
+	EDGBRD		= 0x04,
+	EDGBWR		= 0x05,
+	EEXTEND		= 0x06,
+	ELDU		= 0x08,
+	EBLOCK		= 0x09,
+	EPA		= 0x0A,
+	EWB		= 0x0B,
+	ETRACK		= 0x0C,
+	EAUG		= 0x0D,
+	EMODPR		= 0x0E,
+	EMODT		= 0x0F,
+	EUPDATESVN	= 0x18,
+};
+
+/**
+ * SGX_ENCLS_FAULT_FLAG - flag signifying an ENCLS return code is a trapnr
+ *
+ * ENCLS has its own (positive value) error codes and also generates
+ * ENCLS specific #GP and #PF faults.  And the ENCLS values get munged
+ * with system error codes as everything percolates back up the stack.
+ * Unfortunately (for us), we need to precisely identify each unique
+ * error code, e.g. the action taken if EWB fails varies based on the
+ * type of fault and on the exact SGX error code, i.e. we can't simply
+ * convert all faults to -EFAULT.
+ *
+ * To make all three error types coexist, we set bit 30 to identify an
+ * ENCLS fault.  Bit 31 (technically bits N:31) is used to differentiate
+ * between positive (faults and SGX error codes) and negative (system
+ * error codes) values.
+ */
+#define SGX_ENCLS_FAULT_FLAG 0x40000000
+
+/**
+ * enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV
+ * @SGX_EPC_PAGE_CONFLICT:	Page is being written by other ENCLS function.
+ * @SGX_NOT_TRACKED:		Previous ETRACK's shootdown sequence has not
+ *				been completed yet.
+ * @SGX_CHILD_PRESENT:		SECS has child pages present in the EPC.
+ * @SGX_INVALID_EINITTOKEN:	EINITTOKEN is invalid and enclave signer's
+ *				public key does not match IA32_SGXLEPUBKEYHASH.
+ * @SGX_PAGE_NOT_MODIFIABLE:	The EPC page cannot be modified because it
+ *				is in the PENDING or MODIFIED state.
+ * @SGX_INSUFFICIENT_ENTROPY:	Insufficient entropy in RNG.
+ * @SGX_NO_UPDATE:		EUPDATESVN could not update the CPUSVN because the
+ *				current SVN was not newer than CPUSVN. This is the most
+ *				common error code returned by EUPDATESVN.
+ * @SGX_UNMASKED_EVENT:		An unmasked event, e.g. INTR, was received
+ */
+enum sgx_return_code {
+	SGX_EPC_PAGE_CONFLICT		= 7,
+	SGX_NOT_TRACKED			= 11,
+	SGX_CHILD_PRESENT		= 13,
+	SGX_INVALID_EINITTOKEN		= 16,
+	SGX_PAGE_NOT_MODIFIABLE		= 20,
+	SGX_INSUFFICIENT_ENTROPY	= 29,
+	SGX_NO_UPDATE			= 31,
+	SGX_UNMASKED_EVENT		= 128,
+};
+
+/* The modulus size for 3072-bit RSA keys. */
+#define SGX_MODULUS_SIZE 384
+
+/**
+ * enum sgx_miscselect - additional information to an SSA frame
+ * @SGX_MISC_EXINFO:	Report #PF or #GP to the SSA frame.
+ *
+ * Save State Area (SSA) is a stack inside the enclave used to store processor
+ * state when an exception or interrupt occurs. This enum defines additional
+ * information stored to an SSA frame.
+ */
+enum sgx_miscselect {
+	SGX_MISC_EXINFO		= BIT(0),
+};
+
+#define SGX_MISC_RESERVED_MASK	GENMASK_ULL(63, 1)
+
+#define SGX_SSA_GPRS_SIZE		184
+#define SGX_SSA_MISC_EXINFO_SIZE	16
+
+/**
+ * enum sgx_attribute - the attributes field in &struct sgx_secs
+ * @SGX_ATTR_INIT:		Enclave can be entered (is initialized).
+ * @SGX_ATTR_DEBUG:		Allow ENCLS(EDBGRD) and ENCLS(EDBGWR).
+ * @SGX_ATTR_MODE64BIT:		Tell that this a 64-bit enclave.
+ * @SGX_ATTR_PROVISIONKEY:      Allow to use provisioning keys for remote
+ *				attestation.
+ * @SGX_ATTR_KSS:		Allow to use key separation and sharing (KSS).
+ * @SGX_ATTR_EINITTOKENKEY:	Allow to use token signing key that is used to
+ *				sign cryptographic tokens that can be passed to
+ *				EINIT as an authorization to run an enclave.
+ * @SGX_ATTR_ASYNC_EXIT_NOTIFY:	Allow enclaves to be notified after an
+ *				asynchronous exit has occurred.
+ */
+enum sgx_attribute {
+	SGX_ATTR_INIT		   = BIT(0),
+	SGX_ATTR_DEBUG		   = BIT(1),
+	SGX_ATTR_MODE64BIT	   = BIT(2),
+				  /* BIT(3) is reserved */
+	SGX_ATTR_PROVISIONKEY	   = BIT(4),
+	SGX_ATTR_EINITTOKENKEY	   = BIT(5),
+				  /* BIT(6) is for CET */
+	SGX_ATTR_KSS		   = BIT(7),
+				  /* BIT(8) is reserved */
+				  /* BIT(9) is reserved */
+	SGX_ATTR_ASYNC_EXIT_NOTIFY = BIT(10),
+};
+
+#define SGX_ATTR_RESERVED_MASK	(BIT_ULL(3) | \
+				 BIT_ULL(6) | \
+				 BIT_ULL(8) | \
+				 BIT_ULL(9) | \
+				 GENMASK_ULL(63, 11))
+
+#define SGX_ATTR_UNPRIV_MASK	(SGX_ATTR_DEBUG	    | \
+				 SGX_ATTR_MODE64BIT | \
+				 SGX_ATTR_KSS	    | \
+				 SGX_ATTR_ASYNC_EXIT_NOTIFY)
+
+#define SGX_ATTR_PRIV_MASK	(SGX_ATTR_PROVISIONKEY	| \
+				 SGX_ATTR_EINITTOKENKEY)
+
+/**
+ * struct sgx_secs - SGX Enclave Control Structure (SECS)
+ * @size:		size of the address space
+ * @base:		base address of the  address space
+ * @ssa_frame_size:	size of an SSA frame
+ * @miscselect:		additional information stored to an SSA frame
+ * @attributes:		attributes for enclave
+ * @xfrm:		XSave-Feature Request Mask (subset of XCR0)
+ * @mrenclave:		SHA256-hash of the enclave contents
+ * @mrsigner:		SHA256-hash of the public key used to sign the SIGSTRUCT
+ * @config_id:		a user-defined value that is used in key derivation
+ * @isv_prod_id:	a user-defined value that is used in key derivation
+ * @isv_svn:		a user-defined value that is used in key derivation
+ * @config_svn:		a user-defined value that is used in key derivation
+ *
+ * SGX Enclave Control Structure (SECS) is a special enclave page that is not
+ * visible in the address space. In fact, this structure defines the address
+ * range and other global attributes for the enclave and it is the first EPC
+ * page created for any enclave. It is moved from a temporary buffer to an EPC
+ * by the means of ENCLS[ECREATE] function.
+ */
+struct sgx_secs {
+	u64 size;
+	u64 base;
+	u32 ssa_frame_size;
+	u32 miscselect;
+	u8  reserved1[24];
+	u64 attributes;
+	u64 xfrm;
+	u32 mrenclave[8];
+	u8  reserved2[32];
+	u32 mrsigner[8];
+	u8  reserved3[32];
+	u32 config_id[16];
+	u16 isv_prod_id;
+	u16 isv_svn;
+	u16 config_svn;
+	u8  reserved4[3834];
+} __packed;
+
+/**
+ * enum sgx_tcs_flags - execution flags for TCS
+ * @SGX_TCS_DBGOPTIN:	If enabled allows single-stepping and breakpoints
+ *			inside an enclave. It is cleared by EADD but can
+ *			be set later with EDBGWR.
+ */
+enum sgx_tcs_flags {
+	SGX_TCS_DBGOPTIN	= 0x01,
+};
+
+#define SGX_TCS_RESERVED_MASK	GENMASK_ULL(63, 1)
+#define SGX_TCS_RESERVED_SIZE	4024
+
+/**
+ * struct sgx_tcs - Thread Control Structure (TCS)
+ * @state:		used to mark an entered TCS
+ * @flags:		execution flags (cleared by EADD)
+ * @ssa_offset:		SSA stack offset relative to the enclave base
+ * @ssa_index:		the current SSA frame index (cleard by EADD)
+ * @nr_ssa_frames:	the number of frame in the SSA stack
+ * @entry_offset:	entry point offset relative to the enclave base
+ * @exit_addr:		address outside the enclave to exit on an exception or
+ *			interrupt
+ * @fs_offset:		offset relative to the enclave base to become FS
+ *			segment inside the enclave
+ * @gs_offset:		offset relative to the enclave base to become GS
+ *			segment inside the enclave
+ * @fs_limit:		size to become a new FS-limit (only 32-bit enclaves)
+ * @gs_limit:		size to become a new GS-limit (only 32-bit enclaves)
+ *
+ * Thread Control Structure (TCS) is an enclave page visible in its address
+ * space that defines an entry point inside the enclave. A thread enters inside
+ * an enclave by supplying address of TCS to ENCLU(EENTER). A TCS can be entered
+ * by only one thread at a time.
+ */
+struct sgx_tcs {
+	u64 state;
+	u64 flags;
+	u64 ssa_offset;
+	u32 ssa_index;
+	u32 nr_ssa_frames;
+	u64 entry_offset;
+	u64 exit_addr;
+	u64 fs_offset;
+	u64 gs_offset;
+	u32 fs_limit;
+	u32 gs_limit;
+	u8  reserved[SGX_TCS_RESERVED_SIZE];
+} __packed;
+
+/**
+ * struct sgx_pageinfo - an enclave page descriptor
+ * @addr:	address of the enclave page
+ * @contents:	pointer to the page contents
+ * @metadata:	pointer either to a SECINFO or PCMD instance
+ * @secs:	address of the SECS page
+ */
+struct sgx_pageinfo {
+	u64 addr;
+	u64 contents;
+	u64 metadata;
+	u64 secs;
+} __packed __aligned(32);
+
+
+/**
+ * enum sgx_page_type - bits in the SECINFO flags defining the page type
+ * @SGX_PAGE_TYPE_SECS:	a SECS page
+ * @SGX_PAGE_TYPE_TCS:	a TCS page
+ * @SGX_PAGE_TYPE_REG:	a regular page
+ * @SGX_PAGE_TYPE_VA:	a VA page
+ * @SGX_PAGE_TYPE_TRIM:	a page in trimmed state
+ *
+ * Make sure when making changes to this enum that its values can still fit
+ * in the bitfield within &struct sgx_encl_page
+ */
+enum sgx_page_type {
+	SGX_PAGE_TYPE_SECS,
+	SGX_PAGE_TYPE_TCS,
+	SGX_PAGE_TYPE_REG,
+	SGX_PAGE_TYPE_VA,
+	SGX_PAGE_TYPE_TRIM,
+};
+
+#define SGX_NR_PAGE_TYPES	5
+#define SGX_PAGE_TYPE_MASK	GENMASK(7, 0)
+
+/**
+ * enum sgx_secinfo_flags - the flags field in &struct sgx_secinfo
+ * @SGX_SECINFO_R:	allow read
+ * @SGX_SECINFO_W:	allow write
+ * @SGX_SECINFO_X:	allow execution
+ * @SGX_SECINFO_SECS:	a SECS page
+ * @SGX_SECINFO_TCS:	a TCS page
+ * @SGX_SECINFO_REG:	a regular page
+ * @SGX_SECINFO_VA:	a VA page
+ * @SGX_SECINFO_TRIM:	a page in trimmed state
+ */
+enum sgx_secinfo_flags {
+	SGX_SECINFO_R			= BIT(0),
+	SGX_SECINFO_W			= BIT(1),
+	SGX_SECINFO_X			= BIT(2),
+	SGX_SECINFO_SECS		= (SGX_PAGE_TYPE_SECS << 8),
+	SGX_SECINFO_TCS			= (SGX_PAGE_TYPE_TCS << 8),
+	SGX_SECINFO_REG			= (SGX_PAGE_TYPE_REG << 8),
+	SGX_SECINFO_VA			= (SGX_PAGE_TYPE_VA << 8),
+	SGX_SECINFO_TRIM		= (SGX_PAGE_TYPE_TRIM << 8),
+};
+
+#define SGX_SECINFO_PERMISSION_MASK	GENMASK_ULL(2, 0)
+#define SGX_SECINFO_PAGE_TYPE_MASK	(SGX_PAGE_TYPE_MASK << 8)
+#define SGX_SECINFO_RESERVED_MASK	~(SGX_SECINFO_PERMISSION_MASK | \
+					  SGX_SECINFO_PAGE_TYPE_MASK)
+
+/**
+ * struct sgx_secinfo - describes attributes of an EPC page
+ * @flags:	permissions and type
+ *
+ * Used together with ENCLS leaves that add or modify an EPC page to an
+ * enclave to define page permissions and type.
+ */
+struct sgx_secinfo {
+	u64 flags;
+	u8  reserved[56];
+} __packed __aligned(64);
+
+#define SGX_PCMD_RESERVED_SIZE 40
+
+/**
+ * struct sgx_pcmd - Paging Crypto Metadata (PCMD)
+ * @enclave_id:	enclave identifier
+ * @mac:	MAC over PCMD, page contents and isvsvn
+ *
+ * PCMD is stored for every swapped page to the regular memory. When ELDU loads
+ * the page back it recalculates the MAC by using a isvsvn number stored in a
+ * VA page. Together these two structures bring integrity and rollback
+ * protection.
+ */
+struct sgx_pcmd {
+	struct sgx_secinfo secinfo;
+	u64 enclave_id;
+	u8  reserved[SGX_PCMD_RESERVED_SIZE];
+	u8  mac[16];
+} __packed __aligned(128);
+
+#define SGX_SIGSTRUCT_RESERVED1_SIZE 84
+#define SGX_SIGSTRUCT_RESERVED2_SIZE 20
+#define SGX_SIGSTRUCT_RESERVED3_SIZE 32
+#define SGX_SIGSTRUCT_RESERVED4_SIZE 12
+
+/**
+ * struct sgx_sigstruct_header -  defines author of the enclave
+ * @header1:		constant byte string
+ * @vendor:		must be either 0x0000 or 0x8086
+ * @date:		YYYYMMDD in BCD
+ * @header2:		constant byte string
+ * @swdefined:		software defined value
+ */
+struct sgx_sigstruct_header {
+	u64 header1[2];
+	u32 vendor;
+	u32 date;
+	u64 header2[2];
+	u32 swdefined;
+	u8  reserved1[84];
+} __packed;
+
+/**
+ * struct sgx_sigstruct_body - defines contents of the enclave
+ * @miscselect:		additional information stored to an SSA frame
+ * @misc_mask:		required miscselect in SECS
+ * @attributes:		attributes for enclave
+ * @xfrm:		XSave-Feature Request Mask (subset of XCR0)
+ * @attributes_mask:	required attributes in SECS
+ * @xfrm_mask:		required XFRM in SECS
+ * @mrenclave:		SHA256-hash of the enclave contents
+ * @isvprodid:		a user-defined value that is used in key derivation
+ * @isvsvn:		a user-defined value that is used in key derivation
+ */
+struct sgx_sigstruct_body {
+	u32 miscselect;
+	u32 misc_mask;
+	u8  reserved2[20];
+	u64 attributes;
+	u64 xfrm;
+	u64 attributes_mask;
+	u64 xfrm_mask;
+	u8  mrenclave[32];
+	u8  reserved3[32];
+	u16 isvprodid;
+	u16 isvsvn;
+} __packed;
+
+/**
+ * struct sgx_sigstruct - an enclave signature
+ * @header:		defines author of the enclave
+ * @modulus:		the modulus of the public key
+ * @exponent:		the exponent of the public key
+ * @signature:		the signature calculated over the fields except modulus,
+ * @body:		defines contents of the enclave
+ * @q1:			a value used in RSA signature verification
+ * @q2:			a value used in RSA signature verification
+ *
+ * Header and body are the parts that are actual signed. The remaining fields
+ * define the signature of the enclave.
+ */
+struct sgx_sigstruct {
+	struct sgx_sigstruct_header header;
+	u8  modulus[SGX_MODULUS_SIZE];
+	u32 exponent;
+	u8  signature[SGX_MODULUS_SIZE];
+	struct sgx_sigstruct_body body;
+	u8  reserved4[12];
+	u8  q1[SGX_MODULUS_SIZE];
+	u8  q2[SGX_MODULUS_SIZE];
+} __packed;
+
+#define SGX_LAUNCH_TOKEN_SIZE 304
+
+/*
+ * Do not put any hardware-defined SGX structure representations below this
+ * comment!
+ */
+
+#ifdef CONFIG_X86_SGX_KVM
+int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
+		     int *trapnr);
+int sgx_virt_einit(void __user *sigstruct, void __user *token,
+		   void __user *secs, u64 *lepubkeyhash, int *trapnr);
+#endif
+
+int sgx_set_attribute(unsigned long *allowed_attributes,
+		      unsigned int attribute_fd);
+
+#endif /* _ASM_X86_SGX_H */
diff --git a/arch/x86/include/asm/shared/io.h b/arch/x86/include/asm/shared/io.h
new file mode 100644
index 000000000000..8009d781c2f9
--- /dev/null
+++ b/arch/x86/include/asm/shared/io.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHARED_IO_H
+#define _ASM_X86_SHARED_IO_H
+
+#include <linux/types.h>
+
+#define BUILDIO(bwl, bw, type)						\
+static __always_inline void __out##bwl(type value, u16 port)		\
+{									\
+	asm volatile("out" #bwl " %" #bw "0, %w1"			\
+		     : : "a"(value), "Nd"(port));			\
+}									\
+									\
+static __always_inline type __in##bwl(u16 port)				\
+{									\
+	type value;							\
+	asm volatile("in" #bwl " %w1, %" #bw "0"			\
+		     : "=a"(value) : "Nd"(port));			\
+	return value;							\
+}
+
+BUILDIO(b, b, u8)
+BUILDIO(w, w, u16)
+BUILDIO(l,  , u32)
+#undef BUILDIO
+
+#define inb __inb
+#define inw __inw
+#define inl __inl
+#define outb __outb
+#define outw __outw
+#define outl __outl
+
+#endif
diff --git a/arch/x86/include/asm/shared/msr.h b/arch/x86/include/asm/shared/msr.h
new file mode 100644
index 000000000000..a20b1c08c99f
--- /dev/null
+++ b/arch/x86/include/asm/shared/msr.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHARED_MSR_H
+#define _ASM_X86_SHARED_MSR_H
+
+struct msr {
+	union {
+		struct {
+			u32 l;
+			u32 h;
+		};
+		u64 q;
+	};
+};
+
+/*
+ * The kernel proper already defines rdmsr()/wrmsr(), but they are not for the
+ * boot kernel since they rely on tracepoint/exception handling infrastructure
+ * that's not available here.
+ */
+static inline void raw_rdmsr(unsigned int reg, struct msr *m)
+{
+	asm volatile("rdmsr" : "=a" (m->l), "=d" (m->h) : "c" (reg));
+}
+
+static inline void raw_wrmsr(unsigned int reg, const struct msr *m)
+{
+	asm volatile("wrmsr" : : "c" (reg), "a"(m->l), "d" (m->h) : "memory");
+}
+
+#endif /* _ASM_X86_SHARED_MSR_H */
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
new file mode 100644
index 000000000000..8bc074c8d7c6
--- /dev/null
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHARED_TDX_H
+#define _ASM_X86_SHARED_TDX_H
+
+#include <linux/bits.h>
+#include <linux/types.h>
+
+#define TDX_HYPERCALL_STANDARD  0
+
+#define TDX_CPUID_LEAF_ID	0x21
+#define TDX_IDENT		"IntelTDX    "
+
+/* TDX module Call Leaf IDs */
+#define TDG_VP_VMCALL			0
+#define TDG_VP_INFO			1
+#define TDG_MR_RTMR_EXTEND		2
+#define TDG_VP_VEINFO_GET		3
+#define TDG_MR_REPORT			4
+#define TDG_MEM_PAGE_ACCEPT		6
+#define TDG_VM_RD			7
+#define TDG_VM_WR			8
+
+/* TDX attributes */
+#define TDX_ATTR_DEBUG_BIT		0
+#define TDX_ATTR_DEBUG			BIT_ULL(TDX_ATTR_DEBUG_BIT)
+#define TDX_ATTR_HGS_PLUS_PROF_BIT	4
+#define TDX_ATTR_HGS_PLUS_PROF		BIT_ULL(TDX_ATTR_HGS_PLUS_PROF_BIT)
+#define TDX_ATTR_PERF_PROF_BIT		5
+#define TDX_ATTR_PERF_PROF		BIT_ULL(TDX_ATTR_PERF_PROF_BIT)
+#define TDX_ATTR_PMT_PROF_BIT		6
+#define TDX_ATTR_PMT_PROF		BIT_ULL(TDX_ATTR_PMT_PROF_BIT)
+#define TDX_ATTR_ICSSD_BIT		16
+#define TDX_ATTR_ICSSD			BIT_ULL(TDX_ATTR_ICSSD_BIT)
+#define TDX_ATTR_LASS_BIT		27
+#define TDX_ATTR_LASS			BIT_ULL(TDX_ATTR_LASS_BIT)
+#define TDX_ATTR_SEPT_VE_DISABLE_BIT	28
+#define TDX_ATTR_SEPT_VE_DISABLE	BIT_ULL(TDX_ATTR_SEPT_VE_DISABLE_BIT)
+#define TDX_ATTR_MIGRTABLE_BIT		29
+#define TDX_ATTR_MIGRTABLE		BIT_ULL(TDX_ATTR_MIGRTABLE_BIT)
+#define TDX_ATTR_PKS_BIT		30
+#define TDX_ATTR_PKS			BIT_ULL(TDX_ATTR_PKS_BIT)
+#define TDX_ATTR_KL_BIT			31
+#define TDX_ATTR_KL			BIT_ULL(TDX_ATTR_KL_BIT)
+#define TDX_ATTR_TPA_BIT		62
+#define TDX_ATTR_TPA			BIT_ULL(TDX_ATTR_TPA_BIT)
+#define TDX_ATTR_PERFMON_BIT		63
+#define TDX_ATTR_PERFMON		BIT_ULL(TDX_ATTR_PERFMON_BIT)
+
+/* TDX TD-Scope Metadata. To be used by TDG.VM.WR and TDG.VM.RD */
+#define TDCS_CONFIG_FLAGS		0x1110000300000016
+#define TDCS_TD_CTLS			0x1110000300000017
+#define TDCS_NOTIFY_ENABLES		0x9100000000000010
+#define TDCS_TOPOLOGY_ENUM_CONFIGURED	0x9100000000000019
+
+/* TDCS_CONFIG_FLAGS bits */
+#define TDCS_CONFIG_FLEXIBLE_PENDING_VE	BIT_ULL(1)
+
+/* TDCS_TD_CTLS bits */
+#define TD_CTLS_PENDING_VE_DISABLE_BIT	0
+#define TD_CTLS_PENDING_VE_DISABLE	BIT_ULL(TD_CTLS_PENDING_VE_DISABLE_BIT)
+#define TD_CTLS_ENUM_TOPOLOGY_BIT	1
+#define TD_CTLS_ENUM_TOPOLOGY		BIT_ULL(TD_CTLS_ENUM_TOPOLOGY_BIT)
+#define TD_CTLS_VIRT_CPUID2_BIT		2
+#define TD_CTLS_VIRT_CPUID2		BIT_ULL(TD_CTLS_VIRT_CPUID2_BIT)
+#define TD_CTLS_REDUCE_VE_BIT		3
+#define TD_CTLS_REDUCE_VE		BIT_ULL(TD_CTLS_REDUCE_VE_BIT)
+#define TD_CTLS_LOCK_BIT		63
+#define TD_CTLS_LOCK			BIT_ULL(TD_CTLS_LOCK_BIT)
+
+/* TDX hypercall Leaf IDs */
+#define TDVMCALL_GET_TD_VM_CALL_INFO	0x10000
+#define TDVMCALL_MAP_GPA		0x10001
+#define TDVMCALL_GET_QUOTE		0x10002
+#define TDVMCALL_REPORT_FATAL_ERROR	0x10003
+#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004ULL
+
+/*
+ * TDG.VP.VMCALL Status Codes (returned in R10)
+ */
+#define TDVMCALL_STATUS_SUCCESS		0x0000000000000000ULL
+#define TDVMCALL_STATUS_RETRY		0x0000000000000001ULL
+#define TDVMCALL_STATUS_INVALID_OPERAND	0x8000000000000000ULL
+#define TDVMCALL_STATUS_ALIGN_ERROR	0x8000000000000002ULL
+#define TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED	0x8000000000000003ULL
+
+/*
+ * Bitmasks of exposed registers (with VMM).
+ */
+#define TDX_RDX		BIT(2)
+#define TDX_RBX		BIT(3)
+#define TDX_RSI		BIT(6)
+#define TDX_RDI		BIT(7)
+#define TDX_R8		BIT(8)
+#define TDX_R9		BIT(9)
+#define TDX_R10		BIT(10)
+#define TDX_R11		BIT(11)
+#define TDX_R12		BIT(12)
+#define TDX_R13		BIT(13)
+#define TDX_R14		BIT(14)
+#define TDX_R15		BIT(15)
+
+/*
+ * These registers are clobbered to hold arguments for each
+ * TDVMCALL. They are safe to expose to the VMM.
+ * Each bit in this mask represents a register ID. Bit field
+ * details can be found in TDX GHCI specification, section
+ * titled "TDCALL [TDG.VP.VMCALL] leaf".
+ */
+#define TDVMCALL_EXPOSE_REGS_MASK	\
+	(TDX_RDX | TDX_RBX | TDX_RSI | TDX_RDI | TDX_R8  | TDX_R9  | \
+	 TDX_R10 | TDX_R11 | TDX_R12 | TDX_R13 | TDX_R14 | TDX_R15)
+
+/* TDX supported page sizes from the TDX module ABI. */
+#define TDX_PS_4K	0
+#define TDX_PS_2M	1
+#define TDX_PS_1G	2
+#define TDX_PS_NR	(TDX_PS_1G + 1)
+
+#ifndef __ASSEMBLER__
+
+#include <linux/compiler_attributes.h>
+
+/*
+ * Used in __tdcall*() to gather the input/output registers' values of the
+ * TDCALL instruction when requesting services from the TDX module. This is a
+ * software only structure and not part of the TDX module/VMM ABI
+ */
+struct tdx_module_args {
+	/* callee-clobbered */
+	u64 rcx;
+	u64 rdx;
+	u64 r8;
+	u64 r9;
+	/* extra callee-clobbered */
+	u64 r10;
+	u64 r11;
+	/* callee-saved + rdi/rsi */
+	u64 r12;
+	u64 r13;
+	u64 r14;
+	u64 r15;
+	u64 rbx;
+	u64 rdi;
+	u64 rsi;
+};
+
+/* Used to communicate with the TDX module */
+u64 __tdcall(u64 fn, struct tdx_module_args *args);
+u64 __tdcall_ret(u64 fn, struct tdx_module_args *args);
+u64 __tdcall_saved_ret(u64 fn, struct tdx_module_args *args);
+
+/* Used to request services from the VMM */
+u64 __tdx_hypercall(struct tdx_module_args *args);
+
+/*
+ * Wrapper for standard use of __tdx_hypercall with no output aside from
+ * return code.
+ */
+static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15)
+{
+	struct tdx_module_args args = {
+		.r10 = TDX_HYPERCALL_STANDARD,
+		.r11 = fn,
+		.r12 = r12,
+		.r13 = r13,
+		.r14 = r14,
+		.r15 = r15,
+	};
+
+	return __tdx_hypercall(&args);
+}
+
+
+/* Called from __tdx_hypercall() for unrecoverable failure */
+void __noreturn __tdx_hypercall_failed(void);
+
+bool tdx_accept_memory(phys_addr_t start, phys_addr_t end);
+
+/*
+ * The TDG.VP.VMCALL-Instruction-execution sub-functions are defined
+ * independently from but are currently matched 1:1 with VMX EXIT_REASONs.
+ * Reusing the KVM EXIT_REASON macros makes it easier to connect the host and
+ * guest sides of these calls.
+ */
+static __always_inline u64 hcall_func(u64 exit_reason)
+{
+        return exit_reason;
+}
+
+#endif /* !__ASSEMBLER__ */
+#endif /* _ASM_X86_SHARED_TDX_H */
diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h
new file mode 100644
index 000000000000..fc7dcec58fd4
--- /dev/null
+++ b/arch/x86/include/asm/shstk.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SHSTK_H
+#define _ASM_X86_SHSTK_H
+
+#ifndef __ASSEMBLER__
+#include <linux/types.h>
+
+struct task_struct;
+struct ksignal;
+
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+struct thread_shstk {
+	u64	base;
+	u64	size;
+};
+
+long shstk_prctl(struct task_struct *task, int option, unsigned long arg2);
+void reset_thread_features(void);
+unsigned long shstk_alloc_thread_stack(struct task_struct *p, u64 clone_flags,
+				       unsigned long stack_size);
+void shstk_free(struct task_struct *p);
+int setup_signal_shadow_stack(struct ksignal *ksig);
+int restore_signal_shadow_stack(void);
+int shstk_update_last_frame(unsigned long val);
+bool shstk_is_enabled(void);
+int shstk_pop(u64 *val);
+int shstk_push(u64 val);
+#else
+static inline long shstk_prctl(struct task_struct *task, int option,
+			       unsigned long arg2) { return -EINVAL; }
+static inline void reset_thread_features(void) {}
+static inline unsigned long shstk_alloc_thread_stack(struct task_struct *p,
+						     u64 clone_flags,
+						     unsigned long stack_size) { return 0; }
+static inline void shstk_free(struct task_struct *p) {}
+static inline int setup_signal_shadow_stack(struct ksignal *ksig) { return 0; }
+static inline int restore_signal_shadow_stack(void) { return 0; }
+static inline int shstk_update_last_frame(unsigned long val) { return 0; }
+static inline bool shstk_is_enabled(void) { return false; }
+static inline int shstk_pop(u64 *val) { return -ENOTSUPP; }
+static inline int shstk_push(u64 val) { return -ENOTSUPP; }
+#endif /* CONFIG_X86_USER_SHADOW_STACK */
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* _ASM_X86_SHSTK_H */
diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h
index f176114c04d4..84eab2724875 100644
--- a/arch/x86/include/asm/sigframe.h
+++ b/arch/x86/include/asm/sigframe.h
@@ -33,11 +33,7 @@ struct sigframe_ia32 {
 	 * legacy application accessing/modifying it.
 	 */
 	struct _fpstate_32 fpstate_unused;
-#ifdef CONFIG_IA32_EMULATION
-	unsigned int extramask[_COMPAT_NSIG_WORDS-1];
-#else /* !CONFIG_IA32_EMULATION */
-	unsigned long extramask[_NSIG_WORDS-1];
-#endif /* CONFIG_IA32_EMULATION */
+	unsigned int extramask[1];
 	char retcode[8];
 	/* fp state follows here */
 };
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index 2fcbd6f33ef7..8727c7e21dd1 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -14,12 +14,36 @@
 			 X86_EFLAGS_CF | X86_EFLAGS_RF)
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
-int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
-		     struct pt_regs *regs, unsigned long mask);
 
+void __user *
+get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size,
+	     void __user **fpstate);
 
-#ifdef CONFIG_X86_X32_ABI
-asmlinkage long sys32_x32_rt_sigreturn(void);
+int ia32_setup_frame(struct ksignal *ksig, struct pt_regs *regs);
+int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
+int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
+int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
+
+/*
+ * To prevent immediate repeat of single step trap on return from SIGTRAP
+ * handler if the trap flag (TF) is set without an external debugger attached,
+ * clear the software event flag in the augmented SS, ensuring no single-step
+ * trap is pending upon ERETU completion.
+ *
+ * Note, this function should be called in sigreturn() before the original
+ * state is restored to make sure the TF is read from the entry frame.
+ */
+static __always_inline void prevent_single_step_upon_eretu(struct pt_regs *regs)
+{
+	/*
+	 * If the trap flag (TF) is set, i.e., the sigreturn() SYSCALL instruction
+	 * is being single-stepped, do not clear the software event flag in the
+	 * augmented SS, thus a debugger won't skip over the following instruction.
+	 */
+#ifdef CONFIG_X86_FRED
+	if (!(regs->flags & X86_EFLAGS_TF))
+		regs->fred_ss.swevent = 0;
 #endif
+}
 
 #endif /* _ASM_X86_SIGHANDLING_H */
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 33d3c88a7225..5c03aaa89014 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_X86_SIGNAL_H
 #define _ASM_X86_SIGNAL_H
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #include <linux/linkage.h>
 
 /* Most things should be clean enough to redefine this at will, if care
@@ -28,14 +28,9 @@ typedef struct {
 #define SA_IA32_ABI	0x02000000u
 #define SA_X32_ABI	0x01000000u
 
-#ifndef CONFIG_COMPAT
-typedef sigset_t compat_sigset_t;
-#endif
-
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #include <uapi/asm/signal.h>
-#ifndef __ASSEMBLY__
-extern void do_signal(struct pt_regs *regs);
+#ifndef __ASSEMBLER__
 
 #define __ARCH_HAS_SA_RESTORER
 
@@ -88,8 +83,7 @@ static inline int __const_sigismember(sigset_t *set, int _sig)
 static inline int __gen_sigismember(sigset_t *set, int _sig)
 {
 	bool ret;
-	asm("btl %2,%1" CC_SET(c)
-	    : CC_OUT(c) (ret) : "m"(*set), "Ir"(_sig-1));
+	asm("btl %2,%1" : "=@ccc"(ret) : "m"(*set), "Ir"(_sig-1));
 	return ret;
 }
 
@@ -106,5 +100,5 @@ struct pt_regs;
 
 #endif /* !__i386__ */
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #endif /* _ASM_X86_SIGNAL_H */
diff --git a/arch/x86/include/asm/simd.h b/arch/x86/include/asm/simd.h
index a341c878e977..b8027b63cd7a 100644
--- a/arch/x86/include/asm/simd.h
+++ b/arch/x86/include/asm/simd.h
@@ -1,6 +1,10 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SIMD_H
+#define _ASM_SIMD_H
 
 #include <asm/fpu/api.h>
+#include <linux/compiler_attributes.h>
+#include <linux/types.h>
 
 /*
  * may_use_simd - whether it is allowable at this time to issue SIMD
@@ -10,3 +14,5 @@ static __must_check inline bool may_use_simd(void)
 {
 	return irq_fpu_usable();
 }
+
+#endif	/* _ASM_SIMD_H */
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index db333300bd4b..977bef14a0ab 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -1,79 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Supervisor Mode Access Prevention support
  *
  * Copyright (C) 2012 Intel Corporation
  * Author: H. Peter Anvin <hpa@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
  */
 
 #ifndef _ASM_X86_SMAP_H
 #define _ASM_X86_SMAP_H
 
-#include <linux/stringify.h>
 #include <asm/nops.h>
 #include <asm/cpufeatures.h>
+#include <asm/alternative.h>
 
-/* "Raw" instruction opcodes */
-#define __ASM_CLAC	.byte 0x0f,0x01,0xca
-#define __ASM_STAC	.byte 0x0f,0x01,0xcb
-
-#ifdef __ASSEMBLY__
-
-#include <asm/alternative-asm.h>
-
-#ifdef CONFIG_X86_SMAP
+#ifdef __ASSEMBLER__
 
 #define ASM_CLAC \
-	ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP
+	ALTERNATIVE "", "clac", X86_FEATURE_SMAP
 
 #define ASM_STAC \
-	ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP
+	ALTERNATIVE "", "stac", X86_FEATURE_SMAP
 
-#else /* CONFIG_X86_SMAP */
+#else /* __ASSEMBLER__ */
 
-#define ASM_CLAC
-#define ASM_STAC
+/*
+ * The CLAC/STAC instructions toggle the enforcement of
+ * X86_FEATURE_SMAP along with X86_FEATURE_LASS.
+ *
+ * SMAP enforcement is based on the _PAGE_BIT_USER bit in the page
+ * tables. The kernel is not allowed to touch pages with that bit set
+ * unless the AC bit is set.
+ *
+ * Use stac()/clac() when accessing userspace (_PAGE_USER) mappings,
+ * regardless of location.
+ *
+ * Note: a barrier is implicit in alternative().
+ */
 
-#endif /* CONFIG_X86_SMAP */
+static __always_inline void clac(void)
+{
+	alternative("", "clac", X86_FEATURE_SMAP);
+}
 
-#else /* __ASSEMBLY__ */
+static __always_inline void stac(void)
+{
+	alternative("", "stac", X86_FEATURE_SMAP);
+}
 
-#include <asm/alternative.h>
+/*
+ * LASS enforcement is based on bit 63 of the virtual address. The
+ * kernel is not allowed to touch memory in the lower half of the
+ * virtual address space.
+ *
+ * Use lass_stac()/lass_clac() to toggle the AC bit for kernel data
+ * accesses (!_PAGE_USER) that are blocked by LASS, but not by SMAP.
+ *
+ * Even with the AC bit set, LASS will continue to block instruction
+ * fetches from the user half of the address space. To allow those,
+ * clear CR4.LASS to disable the LASS mechanism entirely.
+ *
+ * Note: a barrier is implicit in alternative().
+ */
 
-#ifdef CONFIG_X86_SMAP
+static __always_inline void lass_clac(void)
+{
+	alternative("", "clac", X86_FEATURE_LASS);
+}
 
-static __always_inline void clac(void)
+static __always_inline void lass_stac(void)
 {
-	/* Note: a barrier is implicit in alternative() */
-	alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
+	alternative("", "stac", X86_FEATURE_LASS);
 }
 
-static __always_inline void stac(void)
+static __always_inline unsigned long smap_save(void)
 {
-	/* Note: a barrier is implicit in alternative() */
-	alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
+	unsigned long flags;
+
+	asm volatile ("# smap_save\n\t"
+		      ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "\n\t"
+				  "", "pushf; pop %0; clac",
+				  X86_FEATURE_SMAP)
+		      : "=rm" (flags) : : "memory", "cc");
+
+	return flags;
+}
+
+static __always_inline void smap_restore(unsigned long flags)
+{
+	asm volatile ("# smap_restore\n\t"
+		      ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "\n\t"
+				  "", "push %0; popf",
+				  X86_FEATURE_SMAP)
+		      : : "g" (flags) : "memory", "cc");
 }
 
 /* These macros can be used in asm() statements */
 #define ASM_CLAC \
-	ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
+	ALTERNATIVE("", "clac", X86_FEATURE_SMAP)
 #define ASM_STAC \
-	ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP)
-
-#else /* CONFIG_X86_SMAP */
-
-static inline void clac(void) { }
-static inline void stac(void) { }
-
-#define ASM_CLAC
-#define ASM_STAC
+	ALTERNATIVE("", "stac", X86_FEATURE_SMAP)
 
-#endif /* CONFIG_X86_SMAP */
+#define ASM_CLAC_UNSAFE \
+	ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "\n\t" "clac", X86_FEATURE_SMAP)
+#define ASM_STAC_UNSAFE \
+	ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "\n\t" "stac", X86_FEATURE_SMAP)
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* _ASM_X86_SMAP_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 2e95b6c1bca3..84951572ab81 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -1,44 +1,23 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_X86_SMP_H
 #define _ASM_X86_SMP_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #include <linux/cpumask.h>
-#include <asm/percpu.h>
+#include <linux/thread_info.h>
 
-/*
- * We need the APIC definitions automatically as part of 'smp.h'
- */
-#ifdef CONFIG_X86_LOCAL_APIC
-# include <asm/mpspec.h>
-# include <asm/apic.h>
-# ifdef CONFIG_X86_IO_APIC
-#  include <asm/io_apic.h>
-# endif
-#endif
-#include <asm/thread_info.h>
 #include <asm/cpumask.h>
 
-extern int smp_num_siblings;
-extern unsigned int num_processors;
+DECLARE_PER_CPU_CACHE_HOT(int, cpu_number);
 
 DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
 DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
+DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
 /* cpus sharing the last level cache: */
 DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
-DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
-DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
+DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
 
-static inline struct cpumask *cpu_llc_shared_mask(int cpu)
-{
-	return per_cpu(cpu_llc_shared_map, cpu);
-}
-
-DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid);
+DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid);
 DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid);
-DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
-DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid);
-#endif
 
 struct task_struct;
 
@@ -51,10 +30,13 @@ struct smp_ops {
 	void (*crash_stop_other_cpus)(void);
 	void (*smp_send_reschedule)(int cpu);
 
-	int (*cpu_up)(unsigned cpu, struct task_struct *tidle);
+	void (*cleanup_dead_cpu)(unsigned cpu);
+	void (*poll_sync_state)(void);
+	int (*kick_ap_alive)(unsigned cpu, struct task_struct *tidle);
 	int (*cpu_disable)(void);
 	void (*cpu_die)(unsigned int cpu);
 	void (*play_dead)(void);
+	void (*stop_this_cpu)(void);
 
 	void (*send_call_func_ipi)(const struct cpumask *mask);
 	void (*send_call_func_single_ipi)(int cpu);
@@ -76,11 +58,6 @@ static inline void stop_other_cpus(void)
 	smp_ops.stop_other_cpus(1);
 }
 
-static inline void smp_prepare_boot_cpu(void)
-{
-	smp_ops.smp_prepare_boot_cpu();
-}
-
 static inline void smp_prepare_cpus(unsigned int max_cpus)
 {
 	smp_ops.smp_prepare_cpus(max_cpus);
@@ -91,11 +68,6 @@ static inline void smp_cpus_done(unsigned int max_cpus)
 	smp_ops.smp_cpus_done(max_cpus);
 }
 
-static inline int __cpu_up(unsigned int cpu, struct task_struct *tidle)
-{
-	return smp_ops.cpu_up(cpu, tidle);
-}
-
 static inline int __cpu_disable(void)
 {
 	return smp_ops.cpu_disable();
@@ -103,15 +75,17 @@ static inline int __cpu_disable(void)
 
 static inline void __cpu_die(unsigned int cpu)
 {
-	smp_ops.cpu_die(cpu);
+	if (smp_ops.cpu_die)
+		smp_ops.cpu_die(cpu);
 }
 
-static inline void play_dead(void)
+static inline void __noreturn play_dead(void)
 {
 	smp_ops.play_dead();
+	BUG();
 }
 
-static inline void smp_send_reschedule(int cpu)
+static inline void arch_smp_send_reschedule(int cpu)
 {
 	smp_ops.smp_send_reschedule(cpu);
 }
@@ -128,26 +102,27 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 
 void cpu_disable_common(void);
 void native_smp_prepare_boot_cpu(void);
+void smp_prepare_cpus_common(void);
 void native_smp_prepare_cpus(unsigned int max_cpus);
-void calculate_max_logical_packages(void);
 void native_smp_cpus_done(unsigned int max_cpus);
-void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
-int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
+int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
+int native_kick_ap(unsigned int cpu, struct task_struct *tidle);
 int native_cpu_disable(void);
-int common_cpu_die(unsigned int cpu);
-void native_cpu_die(unsigned int cpu);
-void hlt_play_dead(void);
-void native_play_dead(void);
+void __noreturn hlt_play_dead(void);
+void __noreturn native_play_dead(void);
 void play_dead_common(void);
 void wbinvd_on_cpu(int cpu);
-int wbinvd_on_all_cpus(void);
+void wbinvd_on_all_cpus(void);
+void wbinvd_on_cpus_mask(struct cpumask *cpus);
+void wbnoinvd_on_all_cpus(void);
+void wbnoinvd_on_cpus_mask(struct cpumask *cpus);
+
+void smp_kick_mwait_play_dead(void);
+void __noreturn mwait_play_dead(unsigned int eax_hint);
 
+void native_smp_send_reschedule(int cpu);
 void native_send_call_func_ipi(const struct cpumask *mask);
 void native_send_call_func_single_ipi(int cpu);
-void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
-
-void smp_store_boot_cpu_info(void);
-void smp_store_cpu_info(int id);
 
 asmlinkage __visible void smp_reboot_interrupt(void);
 __visible void smp_reschedule_interrupt(struct pt_regs *regs);
@@ -159,34 +134,50 @@ __visible void smp_call_function_single_interrupt(struct pt_regs *r);
 
 /*
  * This function is needed by all SMP systems. It must _always_ be valid
- * from the initial startup. We map APIC_BASE very early in page_setup(),
- * so this is correct in the x86 case.
+ * from the initial startup.
  */
-#define raw_smp_processor_id() (this_cpu_read(cpu_number))
+#define raw_smp_processor_id()  this_cpu_read(cpu_number)
+#define __smp_processor_id() __this_cpu_read(cpu_number)
 
-#ifdef CONFIG_X86_32
-extern int safe_smp_processor_id(void);
-#else
-# define safe_smp_processor_id()	smp_processor_id()
-#endif
+static inline struct cpumask *cpu_llc_shared_mask(int cpu)
+{
+	return per_cpu(cpu_llc_shared_map, cpu);
+}
+
+static inline struct cpumask *cpu_l2c_shared_mask(int cpu)
+{
+	return per_cpu(cpu_l2c_shared_map, cpu);
+}
 
 #else /* !CONFIG_SMP */
 #define wbinvd_on_cpu(cpu)     wbinvd()
-static inline int wbinvd_on_all_cpus(void)
+static inline void wbinvd_on_all_cpus(void)
 {
 	wbinvd();
-	return 0;
 }
-#endif /* CONFIG_SMP */
 
-extern unsigned disabled_cpus;
+static inline void wbinvd_on_cpus_mask(struct cpumask *cpus)
+{
+	wbinvd();
+}
+
+static inline void wbnoinvd_on_all_cpus(void)
+{
+	wbnoinvd();
+}
+
+static inline void wbnoinvd_on_cpus_mask(struct cpumask *cpus)
+{
+	wbnoinvd();
+}
 
-#ifdef CONFIG_X86_LOCAL_APIC
-extern int hard_smp_processor_id(void);
+static inline struct cpumask *cpu_llc_shared_mask(int cpu)
+{
+	return (struct cpumask *)cpumask_of(0);
+}
 
-#else /* CONFIG_X86_LOCAL_APIC */
-#define hard_smp_processor_id()	0
-#endif /* CONFIG_X86_LOCAL_APIC */
+static inline void __noreturn mwait_play_dead(unsigned int eax_hint) { BUG(); }
+#endif /* CONFIG_SMP */
 
 #ifdef CONFIG_DEBUG_NMI_SELFTEST
 extern void nmi_selftest(void);
@@ -194,5 +185,15 @@ extern void nmi_selftest(void);
 #define nmi_selftest() do { } while (0)
 #endif
 
-#endif /* __ASSEMBLY__ */
+extern unsigned int smpboot_control;
+extern unsigned long apic_mmio_base;
+
+#endif /* !__ASSEMBLER__ */
+
+/* Control bits for startup_64 */
+#define STARTUP_READ_APICID	0x80000000
+
+/* Top 8 bits are reserved for control */
+#define STARTUP_PARALLEL_MASK	0xFF000000
+
 #endif /* _ASM_X86_SMP_H */
diff --git a/arch/x86/include/asm/softirq_stack.h b/arch/x86/include/asm/softirq_stack.h
new file mode 100644
index 000000000000..889d53d6a0e1
--- /dev/null
+++ b/arch/x86/include/asm/softirq_stack.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SOFTIRQ_STACK_H
+#define _ASM_X86_SOFTIRQ_STACK_H
+
+#ifdef CONFIG_X86_64
+# include <asm/irq_stack.h>
+#else
+# include <asm-generic/softirq_stack.h>
+#endif
+
+#endif
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 199218719a86..3918c7a434f5 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_X86_SPARSEMEM_H
 #define _ASM_X86_SPARSEMEM_H
 
+#include <linux/types.h>
+
 #ifdef CONFIG_SPARSEMEM
 /*
  * generic non-linear memory support:
@@ -10,26 +12,23 @@
  *    field of the struct page
  *
  * SECTION_SIZE_BITS		2^n: size of each section
- * MAX_PHYSADDR_BITS		2^n: max size of physical address space
- * MAX_PHYSMEM_BITS		2^n: how much memory we can have in that space
+ * MAX_PHYSMEM_BITS		2^n: max size of physical address space
  *
  */
 
 #ifdef CONFIG_X86_32
 # ifdef CONFIG_X86_PAE
 #  define SECTION_SIZE_BITS	29
-#  define MAX_PHYSADDR_BITS	36
 #  define MAX_PHYSMEM_BITS	36
 # else
 #  define SECTION_SIZE_BITS	26
-#  define MAX_PHYSADDR_BITS	32
 #  define MAX_PHYSMEM_BITS	32
 # endif
 #else /* CONFIG_X86_32 */
 # define SECTION_SIZE_BITS	27 /* matt - 128 is convenient right now */
-# define MAX_PHYSADDR_BITS	(pgtable_l5_enabled() ? 52 : 44)
 # define MAX_PHYSMEM_BITS	(pgtable_l5_enabled() ? 52 : 46)
 #endif
 
 #endif /* CONFIG_SPARSEMEM */
+
 #endif /* _ASM_X86_SPARSEMEM_H */
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index 5393babc0598..00b7e0398210 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -4,6 +4,7 @@
 
 #include <linux/thread_info.h>
 #include <asm/nospec-branch.h>
+#include <asm/msr.h>
 
 /*
  * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR
@@ -13,7 +14,7 @@
  * Takes the guest view of SPEC_CTRL MSR as a parameter and also
  * the guest's version of VIRT_SPEC_CTRL, if emulated.
  */
-extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest);
+extern void x86_virt_spec_ctrl(u64 guest_virt_spec_ctrl, bool guest);
 
 /**
  * x86_spec_ctrl_set_guest - Set speculation control registers for the guest
@@ -24,9 +25,9 @@ extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bo
  * Avoids writing to the MSR if the content/bits are the same
  */
 static inline
-void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
+void x86_spec_ctrl_set_guest(u64 guest_virt_spec_ctrl)
 {
-	x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true);
+	x86_virt_spec_ctrl(guest_virt_spec_ctrl, true);
 }
 
 /**
@@ -38,9 +39,9 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
  * Avoids writing to the MSR if the content/bits are the same
  */
 static inline
-void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
+void x86_spec_ctrl_restore_host(u64 guest_virt_spec_ctrl)
 {
-	x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false);
+	x86_virt_spec_ctrl(guest_virt_spec_ctrl, false);
 }
 
 /* AMD specific Speculative Store Bypass MSR data */
@@ -76,6 +77,16 @@ static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
 	return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
 }
 
+/*
+ * This can be used in noinstr functions & should only be called in bare
+ * metal context.
+ */
+static __always_inline void __update_spec_ctrl(u64 val)
+{
+	__this_cpu_write(x86_spec_ctrl_current, val);
+	native_wrmsrq(MSR_IA32_SPEC_CTRL, val);
+}
+
 #ifdef CONFIG_SMP
 extern void speculative_store_bypass_ht_init(void);
 #else
@@ -85,4 +96,6 @@ static inline void speculative_store_bypass_ht_init(void) { }
 extern void speculation_ctrl_update(unsigned long tif);
 extern void speculation_ctrl_update_current(void);
 
+extern bool itlb_multihit_kvm_mitigation;
+
 #endif
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 43c029cdc3fe..46aa2c9c1bda 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -2,54 +2,45 @@
 #ifndef _ASM_X86_SPECIAL_INSNS_H
 #define _ASM_X86_SPECIAL_INSNS_H
 
-
 #ifdef __KERNEL__
-
 #include <asm/nops.h>
+#include <asm/processor-flags.h>
 
-/*
- * Volatile isn't enough to prevent the compiler from reordering the
- * read/write functions for the control registers and messing everything up.
- * A memory clobber would solve the problem, but would prevent reordering of
- * all loads stores around it, which can hurt performance. Solution is to
- * use a variable and mimic reads and writes to it to enforce serialization
- */
-extern unsigned long __force_order;
+#include <linux/errno.h>
+#include <linux/irqflags.h>
+#include <linux/jump_label.h>
+
+void native_write_cr0(unsigned long val);
 
 static inline unsigned long native_read_cr0(void)
 {
 	unsigned long val;
-	asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order));
+	asm volatile("mov %%cr0,%0" : "=r" (val));
 	return val;
 }
 
-static inline void native_write_cr0(unsigned long val)
-{
-	asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order));
-}
-
-static inline unsigned long native_read_cr2(void)
+static __always_inline unsigned long native_read_cr2(void)
 {
 	unsigned long val;
-	asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order));
+	asm volatile("mov %%cr2,%0" : "=r" (val));
 	return val;
 }
 
-static inline void native_write_cr2(unsigned long val)
+static __always_inline void native_write_cr2(unsigned long val)
 {
-	asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order));
+	asm volatile("mov %0,%%cr2": : "r" (val) : "memory");
 }
 
-static inline unsigned long __native_read_cr3(void)
+static __always_inline unsigned long __native_read_cr3(void)
 {
 	unsigned long val;
-	asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
+	asm volatile("mov %%cr3,%0" : "=r" (val));
 	return val;
 }
 
-static inline void native_write_cr3(unsigned long val)
+static __always_inline void native_write_cr3(unsigned long val)
 {
-	asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order));
+	asm volatile("mov %0,%%cr3": : "r" (val) : "memory");
 }
 
 static inline unsigned long native_read_cr4(void)
@@ -64,35 +55,18 @@ static inline unsigned long native_read_cr4(void)
 	asm volatile("1: mov %%cr4, %0\n"
 		     "2:\n"
 		     _ASM_EXTABLE(1b, 2b)
-		     : "=r" (val), "=m" (__force_order) : "0" (0));
+		     : "=r" (val) : "0" (0));
 #else
 	/* CR4 always exists on x86_64. */
-	asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
+	asm volatile("mov %%cr4,%0" : "=r" (val));
 #endif
 	return val;
 }
 
-static inline void native_write_cr4(unsigned long val)
-{
-	asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
-}
-
-#ifdef CONFIG_X86_64
-static inline unsigned long native_read_cr8(void)
-{
-	unsigned long cr8;
-	asm volatile("movq %%cr8,%0" : "=r" (cr8));
-	return cr8;
-}
-
-static inline void native_write_cr8(unsigned long val)
-{
-	asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
-}
-#endif
+void native_write_cr4(unsigned long val);
 
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-static inline u32 __read_pkru(void)
+static inline u32 rdpkru(void)
 {
 	u32 ecx = 0;
 	u32 edx, pkru;
@@ -101,13 +75,11 @@ static inline u32 __read_pkru(void)
 	 * "rdpkru" instruction.  Places PKRU contents in to EAX,
 	 * clears EDX and requires that ecx=0.
 	 */
-	asm volatile(".byte 0x0f,0x01,0xee\n\t"
-		     : "=a" (pkru), "=d" (edx)
-		     : "c" (ecx));
+	asm volatile("rdpkru" : "=a" (pkru), "=d" (edx) : "c" (ecx));
 	return pkru;
 }
 
-static inline void __write_pkru(u32 pkru)
+static inline void wrpkru(u32 pkru)
 {
 	u32 ecx = 0, edx = 0;
 
@@ -115,26 +87,51 @@ static inline void __write_pkru(u32 pkru)
 	 * "wrpkru" instruction.  Loads contents in EAX to PKRU,
 	 * requires that ecx = edx = 0.
 	 */
-	asm volatile(".byte 0x0f,0x01,0xef\n\t"
-		     : : "a" (pkru), "c"(ecx), "d"(edx));
+	asm volatile("wrpkru" : : "a" (pkru), "c"(ecx), "d"(edx));
 }
+
 #else
-static inline u32 __read_pkru(void)
+static inline u32 rdpkru(void)
 {
 	return 0;
 }
 
-static inline void __write_pkru(u32 pkru)
+static inline void wrpkru(u32 pkru)
 {
 }
 #endif
 
-static inline void native_wbinvd(void)
+/*
+ * Write back all modified lines in all levels of cache associated with this
+ * logical processor to main memory, and then invalidate all caches.  Depending
+ * on the micro-architecture, WBINVD (and WBNOINVD below) may or may not affect
+ * lower level caches associated with another logical processor that shares any
+ * level of this processor's cache hierarchy.
+ */
+static __always_inline void wbinvd(void)
 {
-	asm volatile("wbinvd": : :"memory");
+	asm volatile("wbinvd" : : : "memory");
 }
 
-extern asmlinkage void native_load_gs_index(unsigned);
+/* Instruction encoding provided for binutils backwards compatibility. */
+#define ASM_WBNOINVD _ASM_BYTES(0xf3,0x0f,0x09)
+
+/*
+ * Write back all modified lines in all levels of cache associated with this
+ * logical processor to main memory, but do NOT explicitly invalidate caches,
+ * i.e. leave all/most cache lines in the hierarchy in non-modified state.
+ */
+static __always_inline void wbnoinvd(void)
+{
+	/*
+	 * Explicitly encode WBINVD if X86_FEATURE_WBNOINVD is unavailable even
+	 * though WBNOINVD is backwards compatible (it's simply WBINVD with an
+	 * ignored REP prefix), to guarantee that WBNOINVD isn't used if it
+	 * needs to be avoided for any reason.  For all supported usage in the
+	 * kernel, WBINVD is functionally a superset of WBNOINVD.
+	 */
+	alternative("wbinvd", ASM_WBNOINVD, X86_FEATURE_WBNOINVD);
+}
 
 static inline unsigned long __read_cr4(void)
 {
@@ -155,12 +152,12 @@ static inline void write_cr0(unsigned long x)
 	native_write_cr0(x);
 }
 
-static inline unsigned long read_cr2(void)
+static __always_inline unsigned long read_cr2(void)
 {
 	return native_read_cr2();
 }
 
-static inline void write_cr2(unsigned long x)
+static __always_inline void write_cr2(unsigned long x)
 {
 	native_write_cr2(x);
 }
@@ -183,62 +180,128 @@ static inline void __write_cr4(unsigned long x)
 {
 	native_write_cr4(x);
 }
+#endif /* CONFIG_PARAVIRT_XXL */
 
-static inline void wbinvd(void)
+static __always_inline void clflush(volatile void *__p)
 {
-	native_wbinvd();
+	asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
 }
 
-#ifdef CONFIG_X86_64
-
-static inline unsigned long read_cr8(void)
+static inline void clflushopt(volatile void *__p)
 {
-	return native_read_cr8();
+	alternative_io("ds clflush %0",
+		       "clflushopt %0", X86_FEATURE_CLFLUSHOPT,
+		       "+m" (*(volatile char __force *)__p));
 }
 
-static inline void write_cr8(unsigned long x)
+static inline void clwb(volatile void *__p)
 {
-	native_write_cr8(x);
+	volatile struct { char x[64]; } *p = __p;
+
+	asm_inline volatile(ALTERNATIVE_2(
+		"ds clflush %0",
+		"clflushopt %0", X86_FEATURE_CLFLUSHOPT,
+		"clwb %0", X86_FEATURE_CLWB)
+		: "+m" (*p));
 }
 
-static inline void load_gs_index(unsigned selector)
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+static inline int write_user_shstk_64(u64 __user *addr, u64 val)
 {
-	native_load_gs_index(selector);
+	asm goto("1: wrussq %[val], %[addr]\n"
+			  _ASM_EXTABLE(1b, %l[fail])
+			  :: [addr] "m" (*addr), [val] "r" (val)
+			  :: fail);
+	return 0;
+fail:
+	return -EFAULT;
 }
+#endif /* CONFIG_X86_USER_SHADOW_STACK */
 
-#endif
+#define nop() asm volatile ("nop")
 
-#endif /* CONFIG_PARAVIRT_XXL */
+static __always_inline void serialize(void)
+{
+	/* Instruction opcode for SERIALIZE; supported in binutils >= 2.35. */
+	asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory");
+}
 
-static inline void clflush(volatile void *__p)
+/* The dst parameter must be 64-bytes aligned */
+static inline void movdir64b(void *dst, const void *src)
 {
-	asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
+	const struct { char _[64]; } *__src = src;
+	struct { char _[64]; } *__dst = dst;
+
+	/*
+	 * MOVDIR64B %(rdx), rax.
+	 *
+	 * Both __src and __dst must be memory constraints in order to tell the
+	 * compiler that no other memory accesses should be reordered around
+	 * this one.
+	 *
+	 * Also, both must be supplied as lvalues because this tells
+	 * the compiler what the object is (its size) the instruction accesses.
+	 * I.e., not the pointers but what they point to, thus the deref'ing '*'.
+	 */
+	asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
+		     : "+m" (*__dst)
+		     :  "m" (*__src), "a" (__dst), "d" (__src));
 }
 
-static inline void clflushopt(volatile void *__p)
+static inline void movdir64b_io(void __iomem *dst, const void *src)
 {
-	alternative_io(".byte " __stringify(NOP_DS_PREFIX) "; clflush %P0",
-		       ".byte 0x66; clflush %P0",
-		       X86_FEATURE_CLFLUSHOPT,
-		       "+m" (*(volatile char __force *)__p));
+	movdir64b((void __force *)dst, src);
 }
 
-static inline void clwb(volatile void *__p)
+/**
+ * enqcmds - Enqueue a command in supervisor (CPL0) mode
+ * @dst: destination, in MMIO space (must be 512-bit aligned)
+ * @src: 512 bits memory operand
+ *
+ * The ENQCMDS instruction allows software to write a 512-bit command to
+ * a 512-bit-aligned special MMIO region that supports the instruction.
+ * A return status is loaded into the ZF flag in the RFLAGS register.
+ * ZF = 0 equates to success, and ZF = 1 indicates retry or error.
+ *
+ * This function issues the ENQCMDS instruction to submit data from
+ * kernel space to MMIO space, in a unit of 512 bits. Order of data access
+ * is not guaranteed, nor is a memory barrier performed afterwards. It
+ * returns 0 on success and -EAGAIN on failure.
+ *
+ * Warning: Do not use this helper unless your driver has checked that the
+ * ENQCMDS instruction is supported on the platform and the device accepts
+ * ENQCMDS.
+ */
+static inline int enqcmds(void __iomem *dst, const void *src)
 {
-	volatile struct { char x[64]; } *p = __p;
+	const struct { char _[64]; } *__src = src;
+	struct { char _[64]; } __iomem *__dst = dst;
+	bool zf;
 
-	asm volatile(ALTERNATIVE_2(
-		".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])",
-		".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */
-		X86_FEATURE_CLFLUSHOPT,
-		".byte 0x66, 0x0f, 0xae, 0x30",  /* clwb (%%rax) */
-		X86_FEATURE_CLWB)
-		: [p] "+m" (*p)
-		: [pax] "a" (p));
-}
+	/*
+	 * ENQCMDS %(rdx), rax
+	 *
+	 * See movdir64b()'s comment on operand specification.
+	 */
+	asm volatile(".byte 0xf3, 0x0f, 0x38, 0xf8, 0x02, 0x66, 0x90"
+		     : "=@ccz" (zf), "+m" (*__dst)
+		     : "m" (*__src), "a" (__dst), "d" (__src));
 
-#define nop() asm volatile ("nop")
+	/* Submission failure is indicated via EFLAGS.ZF=1 */
+	if (zf)
+		return -EAGAIN;
 
+	return 0;
+}
+
+static __always_inline void tile_release(void)
+{
+	/*
+	 * Instruction opcode for TILERELEASE; supported in binutils
+	 * version >= 2.36.
+	 */
+	asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0");
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index bf3e34b25afc..323db6c5852a 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -3,29 +3,7 @@
 #define _ASM_X86_SPINLOCK_TYPES_H
 
 #include <linux/types.h>
-
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-#define __TICKET_LOCK_INC	2
-#define TICKET_SLOWPATH_FLAG	((__ticket_t)1)
-#else
-#define __TICKET_LOCK_INC	1
-#define TICKET_SLOWPATH_FLAG	((__ticket_t)0)
-#endif
-
-#if (CONFIG_NR_CPUS < (256 / __TICKET_LOCK_INC))
-typedef u8  __ticket_t;
-typedef u16 __ticketpair_t;
-#else
-typedef u16 __ticket_t;
-typedef u32 __ticketpair_t;
-#endif
-
-#define TICKET_LOCK_INC	((__ticket_t)__TICKET_LOCK_INC)
-
-#define TICKET_SHIFT	(sizeof(__ticket_t) * 8)
-
 #include <asm-generic/qspinlock_types.h>
-
 #include <asm-generic/qrwlock_types.h>
 
 #endif /* _ASM_X86_SPINLOCK_TYPES_H */
diff --git a/arch/x86/include/asm/sta2x11.h b/arch/x86/include/asm/sta2x11.h
deleted file mode 100644
index e0975e9c4f47..000000000000
--- a/arch/x86/include/asm/sta2x11.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Header file for STMicroelectronics ConneXt (STA2X11) IOHub
- */
-#ifndef __ASM_STA2X11_H
-#define __ASM_STA2X11_H
-
-#include <linux/pci.h>
-
-/* This needs to be called from the MFD to configure its sub-devices */
-struct sta2x11_instance *sta2x11_get_instance(struct pci_dev *pdev);
-
-#endif /* __ASM_STA2X11_H */
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 8ec97a62c245..cd761b14eb02 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -2,33 +2,10 @@
 /*
  * GCC stack protector support.
  *
- * Stack protector works by putting predefined pattern at the start of
+ * Stack protector works by putting a predefined pattern at the start of
  * the stack frame and verifying that it hasn't been overwritten when
- * returning from the function.  The pattern is called stack canary
- * and unfortunately gcc requires it to be at a fixed offset from %gs.
- * On x86_64, the offset is 40 bytes and on x86_32 20 bytes.  x86_64
- * and x86_32 use segment registers differently and thus handles this
- * requirement differently.
- *
- * On x86_64, %gs is shared by percpu area and stack canary.  All
- * percpu symbols are zero based and %gs points to the base of percpu
- * area.  The first occupant of the percpu area is always
- * irq_stack_union which contains stack_canary at offset 40.  Userland
- * %gs is always saved and restored on kernel entry and exit using
- * swapgs, so stack protector doesn't add any complexity there.
- *
- * On x86_32, it's slightly more complicated.  As in x86_64, %gs is
- * used for userland TLS.  Unfortunately, some processors are much
- * slower at loading segment registers with different value when
- * entering and leaving the kernel, so the kernel uses %fs for percpu
- * area and manages %gs lazily so that %gs is switched only when
- * necessary, usually during task switch.
- *
- * As gcc requires the stack canary at %gs:20, %gs can't be managed
- * lazily if stack protector is enabled, so the kernel saves and
- * restores userland %gs on kernel entry and exit.  This behavior is
- * controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in
- * system.h to hide the details.
+ * returning from the function.  The pattern is called the stack canary
+ * and is a unique value for each task.
  */
 
 #ifndef _ASM_STACKPROTECTOR_H
@@ -41,85 +18,40 @@
 #include <asm/percpu.h>
 #include <asm/desc.h>
 
-#include <linux/random.h>
 #include <linux/sched.h>
 
-/*
- * 24 byte read-only segment initializer for stack canary.  Linker
- * can't handle the address bit shifting.  Address will be set in
- * head_32 for boot CPU and setup_per_cpu_areas() for others.
- */
-#define GDT_STACK_CANARY_INIT						\
-	[GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x18),
+DECLARE_PER_CPU_CACHE_HOT(unsigned long, __stack_chk_guard);
 
 /*
  * Initialize the stackprotector canary value.
  *
- * NOTE: this must only be called from functions that never return,
+ * NOTE: this must only be called from functions that never return
  * and it must always be inlined.
+ *
+ * In addition, it should be called from a compilation unit for which
+ * stack protector is disabled. Alternatively, the caller should not end
+ * with a function call which gets tail-call optimized as that would
+ * lead to checking a modified canary value.
  */
 static __always_inline void boot_init_stack_canary(void)
 {
-	u64 canary;
-	u64 tsc;
-
-#ifdef CONFIG_X86_64
-	BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
-#endif
-	/*
-	 * We both use the random pool and the current TSC as a source
-	 * of randomness. The TSC only matters for very early init,
-	 * there it already has some randomness on most systems. Later
-	 * on during the bootup the random pool has true entropy too.
-	 */
-	get_random_bytes(&canary, sizeof(canary));
-	tsc = rdtsc();
-	canary += tsc + (tsc << 32UL);
-	canary &= CANARY_MASK;
+	unsigned long canary = get_random_canary();
 
 	current->stack_canary = canary;
-#ifdef CONFIG_X86_64
-	this_cpu_write(irq_stack_union.stack_canary, canary);
-#else
-	this_cpu_write(stack_canary.canary, canary);
-#endif
+	this_cpu_write(__stack_chk_guard, canary);
 }
 
-static inline void setup_stack_canary_segment(int cpu)
+static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
 {
-#ifdef CONFIG_X86_32
-	unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
-	struct desc_struct *gdt_table = get_cpu_gdt_rw(cpu);
-	struct desc_struct desc;
-
-	desc = gdt_table[GDT_ENTRY_STACK_CANARY];
-	set_desc_base(&desc, canary);
-	write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S);
-#endif
-}
-
-static inline void load_stack_canary_segment(void)
-{
-#ifdef CONFIG_X86_32
-	asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory");
-#endif
+	per_cpu(__stack_chk_guard, cpu) = idle->stack_canary;
 }
 
 #else	/* STACKPROTECTOR */
 
-#define GDT_STACK_CANARY_INIT
-
 /* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */
 
-static inline void setup_stack_canary_segment(int cpu)
+static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
 { }
 
-static inline void load_stack_canary_segment(void)
-{
-#ifdef CONFIG_X86_32
-	asm volatile ("mov %0, %%gs" : : "r" (0));
-#endif
-}
-
 #endif	/* STACKPROTECTOR */
 #endif	/* _ASM_STACKPROTECTOR_H */
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index f335aad404a4..3881b5333eb8 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -9,6 +9,8 @@
 
 #include <linux/uaccess.h>
 #include <linux/ptrace.h>
+
+#include <asm/cpu_entry_area.h>
 #include <asm/switch_to.h>
 
 enum stack_type {
@@ -33,6 +35,18 @@ bool in_entry_stack(unsigned long *stack, struct stack_info *info);
 
 int get_stack_info(unsigned long *stack, struct task_struct *task,
 		   struct stack_info *info, unsigned long *visit_mask);
+bool get_stack_info_noinstr(unsigned long *stack, struct task_struct *task,
+			    struct stack_info *info);
+
+static __always_inline
+bool get_stack_guard_info(unsigned long *stack, struct stack_info *info)
+{
+	/* make sure it's not in the stack proper */
+	if (get_stack_info_noinstr(stack, current, info))
+		return false;
+	/* but if it is in the page below it, we hit a guard */
+	return get_stack_info_noinstr((void *)stack + PAGE_SIZE, current, info);
+}
 
 const char *stack_type_name(enum stack_type type);
 
@@ -76,7 +90,7 @@ static inline unsigned long *
 get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
 {
 	if (regs)
-		return (unsigned long *)kernel_stack_pointer(regs);
+		return (unsigned long *)regs->sp;
 
 	if (task == current)
 		return __builtin_frame_address(0);
@@ -84,9 +98,6 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
 	return (unsigned long *)task->thread.sp;
 }
 
-void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-			unsigned long *stack, char *log_lvl);
-
 /* The form of the top of the frame on the stack */
 struct stack_frame {
 	struct stack_frame *next_frame;
@@ -98,19 +109,6 @@ struct stack_frame_ia32 {
     u32 return_address;
 };
 
-static inline unsigned long caller_frame_pointer(void)
-{
-	struct stack_frame *frame;
-
-	frame = __builtin_frame_address(0);
-
-#ifdef CONFIG_FRAME_POINTER
-	frame = frame->next_frame;
-#endif
-
-	return (unsigned long)frame;
-}
-
 void show_opcodes(struct pt_regs *regs, const char *loglvl);
 void show_ip(struct pt_regs *regs, const char *loglvl);
 #endif /* _ASM_X86_STACKTRACE_H */
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
new file mode 100644
index 000000000000..4cd725a8fe91
--- /dev/null
+++ b/arch/x86/include/asm/static_call.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_STATIC_CALL_H
+#define _ASM_STATIC_CALL_H
+
+#include <asm/text-patching.h>
+
+/*
+ * For CONFIG_HAVE_STATIC_CALL_INLINE, this is a temporary trampoline which
+ * uses the current value of the key->func pointer to do an indirect jump to
+ * the function.  This trampoline is only used during boot, before the call
+ * sites get patched by static_call_update().  The name of this trampoline has
+ * a magical aspect: objtool uses it to find static call sites so it can create
+ * the .static_call_sites section.
+ *
+ * For CONFIG_HAVE_STATIC_CALL, this is a permanent trampoline which
+ * does a direct jump to the function.  The direct jump gets patched by
+ * static_call_update().
+ *
+ * Having the trampoline in a special section forces GCC to emit a JMP.d32 when
+ * it does tail-call optimization on the call; since you cannot compute the
+ * relative displacement across sections.
+ */
+
+/*
+ * The trampoline is 8 bytes and of the general form:
+ *
+ *   jmp.d32 \func
+ *   ud1 %esp, %ecx
+ *
+ * That trailing #UD provides both a speculation stop and serves as a unique
+ * 3 byte signature identifying static call trampolines. Also see tramp_ud[]
+ * and __static_call_fixup().
+ */
+#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns)			\
+	asm(".pushsection .static_call.text, \"ax\"		\n"	\
+	    ".align 4						\n"	\
+	    ".globl " STATIC_CALL_TRAMP_STR(name) "		\n"	\
+	    STATIC_CALL_TRAMP_STR(name) ":			\n"	\
+	    ANNOTATE_NOENDBR "					\n"	\
+	    insns "						\n"	\
+	    ".byte 0x0f, 0xb9, 0xcc				\n"	\
+	    ".type " STATIC_CALL_TRAMP_STR(name) ", @function	\n"	\
+	    ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
+	    ".popsection					\n")
+
+#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)			\
+	__ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
+
+#ifdef CONFIG_MITIGATION_RETHUNK
+#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)			\
+	__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk")
+#else
+#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)			\
+	__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop")
+#endif
+
+#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)			\
+	ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0)
+
+#define ARCH_ADD_TRAMP_KEY(name)					\
+	asm(".pushsection .static_call_tramp_key, \"a\"		\n"	\
+	    ".long " STATIC_CALL_TRAMP_STR(name) " - .		\n"	\
+	    ".long " STATIC_CALL_KEY_STR(name) " - .		\n"	\
+	    ".popsection					\n")
+
+extern bool __static_call_fixup(void *tramp, u8 op, void *dest);
+
+extern void __static_call_update_early(void *tramp, void *func);
+
+#define static_call_update_early(name, _func)				\
+({									\
+	typeof(&STATIC_CALL_TRAMP(name)) __F = (_func);			\
+	if (static_call_initialized) {					\
+		__static_call_update(&STATIC_CALL_KEY(name),		\
+				     STATIC_CALL_TRAMP_ADDR(name), __F);\
+	} else {							\
+		WRITE_ONCE(STATIC_CALL_KEY(name).func, _func);		\
+		__static_call_update_early(STATIC_CALL_TRAMP_ADDR(name),\
+					   __F);			\
+	}								\
+})
+
+#endif /* _ASM_STATIC_CALL_H */
diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h
index c3c2c1914d65..9cb5aae7fba9 100644
--- a/arch/x86/include/asm/string.h
+++ b/arch/x86/include/asm/string.h
@@ -1,6 +1,32 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_STRING_H
+#define _ASM_X86_STRING_H
+
 #ifdef CONFIG_X86_32
 # include <asm/string_32.h>
 #else
 # include <asm/string_64.h>
 #endif
+
+static __always_inline void *__inline_memcpy(void *to, const void *from, size_t len)
+{
+	void *ret = to;
+
+	asm volatile("rep movsb"
+		     : "+D" (to), "+S" (from), "+c" (len)
+		     : : "memory");
+	return ret;
+}
+
+static __always_inline void *__inline_memset(void *s, int v, size_t n)
+{
+	void *ret = s;
+
+	asm volatile("rep stosb"
+		     : "+D" (s), "+c" (n)
+		     : "a" ((uint8_t)v)
+		     : "memory");
+	return ret;
+}
+
+#endif /* _ASM_X86_STRING_H */
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index 55d392c6bd29..e9cce169bb4c 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -33,11 +33,11 @@ extern size_t strlen(const char *s);
 static __always_inline void *__memcpy(void *to, const void *from, size_t n)
 {
 	int d0, d1, d2;
-	asm volatile("rep ; movsl\n\t"
+	asm volatile("rep movsl\n\t"
 		     "movl %4,%%ecx\n\t"
 		     "andl $3,%%ecx\n\t"
 		     "jz 1f\n\t"
-		     "rep ; movsb\n\t"
+		     "rep movsb\n\t"
 		     "1:"
 		     : "=&c" (d0), "=&D" (d1), "=&S" (d2)
 		     : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from)
@@ -89,7 +89,7 @@ static __always_inline void *__constant_memcpy(void *to, const void *from,
 	if (n >= 5 * 4) {
 		/* large block: use rep prefix */
 		int ecx;
-		asm volatile("rep ; movsl"
+		asm volatile("rep movsl"
 			     : "=&c" (ecx), "=&D" (edi), "=&S" (esi)
 			     : "0" (n / 4), "1" (edi), "2" (esi)
 			     : "memory"
@@ -146,49 +146,9 @@ static __always_inline void *__constant_memcpy(void *to, const void *from,
 extern void *memcpy(void *, const void *, size_t);
 
 #ifndef CONFIG_FORTIFY_SOURCE
-#ifdef CONFIG_X86_USE_3DNOW
 
-#include <asm/mmx.h>
-
-/*
- *	This CPU favours 3DNow strongly (eg AMD Athlon)
- */
-
-static inline void *__constant_memcpy3d(void *to, const void *from, size_t len)
-{
-	if (len < 512)
-		return __constant_memcpy(to, from, len);
-	return _mmx_memcpy(to, from, len);
-}
-
-static inline void *__memcpy3d(void *to, const void *from, size_t len)
-{
-	if (len < 512)
-		return __memcpy(to, from, len);
-	return _mmx_memcpy(to, from, len);
-}
-
-#define memcpy(t, f, n)				\
-	(__builtin_constant_p((n))		\
-	 ? __constant_memcpy3d((t), (f), (n))	\
-	 : __memcpy3d((t), (f), (n)))
-
-#else
-
-/*
- *	No 3D Now!
- */
-
-#if (__GNUC__ >= 4)
 #define memcpy(t, f, n) __builtin_memcpy(t, f, n)
-#else
-#define memcpy(t, f, n)				\
-	(__builtin_constant_p((n))		\
-	 ? __constant_memcpy((t), (f), (n))	\
-	 : __memcpy((t), (f), (n)))
-#endif
 
-#endif
 #endif /* !CONFIG_FORTIFY_SOURCE */
 
 #define __HAVE_ARCH_MEMMOVE
@@ -205,8 +165,7 @@ extern void *memchr(const void *cs, int c, size_t count);
 static inline void *__memset_generic(void *s, char c, size_t count)
 {
 	int d0, d1;
-	asm volatile("rep\n\t"
-		     "stosb"
+	asm volatile("rep stosb"
 		     : "=&c" (d0), "=&D" (d1)
 		     : "a" (c), "1" (s), "0" (count)
 		     : "memory");
@@ -216,29 +175,6 @@ static inline void *__memset_generic(void *s, char c, size_t count)
 /* we might want to write optimized versions of these later */
 #define __constant_count_memset(s, c, count) __memset_generic((s), (c), (count))
 
-/*
- * memset(x, 0, y) is a reasonably common thing to do, so we want to fill
- * things 32 bits at a time even when we don't know the size of the
- * area at compile-time..
- */
-static __always_inline
-void *__constant_c_memset(void *s, unsigned long c, size_t count)
-{
-	int d0, d1;
-	asm volatile("rep ; stosl\n\t"
-		     "testb $2,%b3\n\t"
-		     "je 1f\n\t"
-		     "stosw\n"
-		     "1:\ttestb $1,%b3\n\t"
-		     "je 2f\n\t"
-		     "stosb\n"
-		     "2:"
-		     : "=&c" (d0), "=&D" (d1)
-		     : "a" (c), "q" (count), "0" (count/4), "1" ((long)s)
-		     : "memory");
-	return s;
-}
-
 /* Added by Gertjan van Wingerde to make minix and sysv module work */
 #define __HAVE_ARCH_STRNLEN
 extern size_t strnlen(const char *s, size_t count);
@@ -247,72 +183,6 @@ extern size_t strnlen(const char *s, size_t count);
 #define __HAVE_ARCH_STRSTR
 extern char *strstr(const char *cs, const char *ct);
 
-/*
- * This looks horribly ugly, but the compiler can optimize it totally,
- * as we by now know that both pattern and count is constant..
- */
-static __always_inline
-void *__constant_c_and_count_memset(void *s, unsigned long pattern,
-				    size_t count)
-{
-	switch (count) {
-	case 0:
-		return s;
-	case 1:
-		*(unsigned char *)s = pattern & 0xff;
-		return s;
-	case 2:
-		*(unsigned short *)s = pattern & 0xffff;
-		return s;
-	case 3:
-		*(unsigned short *)s = pattern & 0xffff;
-		*((unsigned char *)s + 2) = pattern & 0xff;
-		return s;
-	case 4:
-		*(unsigned long *)s = pattern;
-		return s;
-	}
-
-#define COMMON(x)							\
-	asm volatile("rep ; stosl"					\
-		     x							\
-		     : "=&c" (d0), "=&D" (d1)				\
-		     : "a" (eax), "0" (count/4), "1" ((long)s)	\
-		     : "memory")
-
-	{
-		int d0, d1;
-#if __GNUC__ == 4 && __GNUC_MINOR__ == 0
-		/* Workaround for broken gcc 4.0 */
-		register unsigned long eax asm("%eax") = pattern;
-#else
-		unsigned long eax = pattern;
-#endif
-
-		switch (count % 4) {
-		case 0:
-			COMMON("");
-			return s;
-		case 1:
-			COMMON("\n\tstosb");
-			return s;
-		case 2:
-			COMMON("\n\tstosw");
-			return s;
-		default:
-			COMMON("\n\tstosw\n\tstosb");
-			return s;
-		}
-	}
-
-#undef COMMON
-}
-
-#define __constant_c_x_memset(s, c, count)			\
-	(__builtin_constant_p(count)				\
-	 ? __constant_c_and_count_memset((s), (c), (count))	\
-	 : __constant_c_memset((s), (c), (count)))
-
 #define __memset(s, c, count)				\
 	(__builtin_constant_p(count)			\
 	 ? __constant_count_memset((s), (c), (count))	\
@@ -321,23 +191,14 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern,
 #define __HAVE_ARCH_MEMSET
 extern void *memset(void *, int, size_t);
 #ifndef CONFIG_FORTIFY_SOURCE
-#if (__GNUC__ >= 4)
 #define memset(s, c, count) __builtin_memset(s, c, count)
-#else
-#define memset(s, c, count)						\
-	(__builtin_constant_p(c)					\
-	 ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \
-				 (count))				\
-	 : __memset((s), (c), (count)))
-#endif
 #endif /* !CONFIG_FORTIFY_SOURCE */
 
 #define __HAVE_ARCH_MEMSET16
 static inline void *memset16(uint16_t *s, uint16_t v, size_t n)
 {
 	int d0, d1;
-	asm volatile("rep\n\t"
-		     "stosw"
+	asm volatile("rep stosw"
 		     : "=&c" (d0), "=&D" (d1)
 		     : "a" (v), "1" (s), "0" (n)
 		     : "memory");
@@ -348,8 +209,7 @@ static inline void *memset16(uint16_t *s, uint16_t v, size_t n)
 static inline void *memset32(uint32_t *s, uint32_t v, size_t n)
 {
 	int d0, d1;
-	asm volatile("rep\n\t"
-		     "stosl"
+	asm volatile("rep stosl"
 		     : "=&c" (d0), "=&D" (d1)
 		     : "a" (v), "1" (s), "0" (n)
 		     : "memory");
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 4e4194e21a09..4635616863f5 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -10,68 +10,68 @@
 /* Even with __builtin_ the compiler may decide to use the out of line
    function. */
 
+#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY)
+#include <linux/kmsan_string.h>
+#endif
+
 #define __HAVE_ARCH_MEMCPY 1
 extern void *memcpy(void *to, const void *from, size_t len);
 extern void *__memcpy(void *to, const void *from, size_t len);
 
-#ifndef CONFIG_FORTIFY_SOURCE
-#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4
-#define memcpy(dst, src, len)					\
-({								\
-	size_t __len = (len);					\
-	void *__ret;						\
-	if (__builtin_constant_p(len) && __len >= 64)		\
-		__ret = __memcpy((dst), (src), __len);		\
-	else							\
-		__ret = __builtin_memcpy((dst), (src), __len);	\
-	__ret;							\
-})
-#endif
-#endif /* !CONFIG_FORTIFY_SOURCE */
-
 #define __HAVE_ARCH_MEMSET
 void *memset(void *s, int c, size_t n);
 void *__memset(void *s, int c, size_t n);
+KCFI_REFERENCE(__memset);
 
+/*
+ * KMSAN needs to instrument as much code as possible. Use C versions of
+ * memsetXX() from lib/string.c under KMSAN.
+ */
+#if !defined(CONFIG_KMSAN)
 #define __HAVE_ARCH_MEMSET16
 static inline void *memset16(uint16_t *s, uint16_t v, size_t n)
 {
-	long d0, d1;
-	asm volatile("rep\n\t"
-		     "stosw"
-		     : "=&c" (d0), "=&D" (d1)
-		     : "a" (v), "1" (s), "0" (n)
-		     : "memory");
-	return s;
+	const auto s0 = s;
+	asm volatile (
+		"rep stosw"
+		: "+D" (s), "+c" (n)
+		: "a" (v)
+		: "memory"
+	);
+	return s0;
 }
 
 #define __HAVE_ARCH_MEMSET32
 static inline void *memset32(uint32_t *s, uint32_t v, size_t n)
 {
-	long d0, d1;
-	asm volatile("rep\n\t"
-		     "stosl"
-		     : "=&c" (d0), "=&D" (d1)
-		     : "a" (v), "1" (s), "0" (n)
-		     : "memory");
-	return s;
+	const auto s0 = s;
+	asm volatile (
+		"rep stosl"
+		: "+D" (s), "+c" (n)
+		: "a" (v)
+		: "memory"
+	);
+	return s0;
 }
 
 #define __HAVE_ARCH_MEMSET64
 static inline void *memset64(uint64_t *s, uint64_t v, size_t n)
 {
-	long d0, d1;
-	asm volatile("rep\n\t"
-		     "stosq"
-		     : "=&c" (d0), "=&D" (d1)
-		     : "a" (v), "1" (s), "0" (n)
-		     : "memory");
-	return s;
+	const auto s0 = s;
+	asm volatile (
+		"rep stosq"
+		: "+D" (s), "+c" (n)
+		: "a" (v)
+		: "memory"
+	);
+	return s0;
 }
+#endif
 
 #define __HAVE_ARCH_MEMMOVE
 void *memmove(void *dest, const void *src, size_t count);
 void *__memmove(void *dest, const void *src, size_t count);
+KCFI_REFERENCE(__memmove);
 
 int memcmp(const void *cs, const void *ct, size_t count);
 size_t strlen(const char *s);
@@ -79,56 +79,6 @@ char *strcpy(char *dest, const char *src);
 char *strcat(char *dest, const char *src);
 int strcmp(const char *cs, const char *ct);
 
-#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
-
-/*
- * For files that not instrumented (e.g. mm/slub.c) we
- * should use not instrumented version of mem* functions.
- */
-
-#undef memcpy
-#define memcpy(dst, src, len) __memcpy(dst, src, len)
-#define memmove(dst, src, len) __memmove(dst, src, len)
-#define memset(s, c, n) __memset(s, c, n)
-
-#ifndef __NO_FORTIFY
-#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
-#endif
-
-#endif
-
-#define __HAVE_ARCH_MEMCPY_MCSAFE 1
-__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src,
-		size_t cnt);
-DECLARE_STATIC_KEY_FALSE(mcsafe_key);
-
-/**
- * memcpy_mcsafe - copy memory with indication if a machine check happened
- *
- * @dst:	destination address
- * @src:	source address
- * @cnt:	number of bytes to copy
- *
- * Low level memory copy function that catches machine checks
- * We only call into the "safe" function on systems that can
- * actually do machine check recovery. Everyone else can just
- * use memcpy().
- *
- * Return 0 for success, or number of bytes not copied if there was an
- * exception.
- */
-static __always_inline __must_check unsigned long
-memcpy_mcsafe(void *dst, const void *src, size_t cnt)
-{
-#ifdef CONFIG_X86_MCE
-	if (static_branch_unlikely(&mcsafe_key))
-		return __memcpy_mcsafe(dst, src, cnt);
-	else
-#endif
-		memcpy(dst, src, cnt);
-	return 0;
-}
-
 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
 #define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1
 void __memcpy_flushcache(void *dst, const void *src, size_t cnt);
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index fdbd9d7b7bca..e8e5aab06255 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -9,19 +9,12 @@
 
 #include <asm/desc.h>
 #include <asm/fpu/api.h>
+#include <asm/msr.h>
 
 /* image of the saved processor state */
 struct saved_context {
-	/*
-	 * On x86_32, all segment registers, with the possible exception of
-	 * gs, are saved at kernel entry in pt_regs.
-	 */
-#ifdef CONFIG_X86_32_LAZY_GS
-	u16 gs;
-#endif
 	unsigned long cr0, cr2, cr3, cr4;
 	u64 misc_enable;
-	bool misc_enable_saved;
 	struct saved_msrs saved_msrs;
 	struct desc_ptr gdt_desc;
 	struct desc_ptr idt;
@@ -30,6 +23,12 @@ struct saved_context {
 	unsigned long tr;
 	unsigned long safety;
 	unsigned long return_address;
+	/*
+	 * On x86_32, all segment registers except gs are saved at kernel
+	 * entry in pt_regs.
+	 */
+	u16 gs;
+	bool misc_enable_saved;
 } __attribute__((packed));
 
 /* routines for saving/restoring kernel state */
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index a7af9f53c0cb..b512f9665f78 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -9,14 +9,19 @@
 
 #include <asm/desc.h>
 #include <asm/fpu/api.h>
+#include <asm/msr.h>
 
 /*
  * Image of the saved processor state, used by the low level ACPI suspend to
  * RAM code and by the low level hibernation code.
  *
- * If you modify it, fix arch/x86/kernel/acpi/wakeup_64.S and make sure that
- * __save/__restore_processor_state(), defined in arch/x86/kernel/suspend_64.c,
- * still work as required.
+ * If you modify it, check how it is used in arch/x86/kernel/acpi/wakeup_64.S
+ * and make sure that __save/__restore_processor_state(), defined in
+ * arch/x86/power/cpu.c, still work as required.
+ *
+ * Because the structure is packed, make sure to avoid unaligned members. For
+ * optimisation purposes but also because tools like kmemleak only search for
+ * pointers that are aligned.
  */
 struct saved_context {
 	struct pt_regs regs;
@@ -34,9 +39,8 @@ struct saved_context {
 	 */
 	unsigned long kernelmode_gs_base, usermode_gs_base, fs_base;
 
-	unsigned long cr0, cr2, cr3, cr4, cr8;
+	unsigned long cr0, cr2, cr3, cr4;
 	u64 misc_enable;
-	bool misc_enable_saved;
 	struct saved_msrs saved_msrs;
 	unsigned long efer;
 	u16 gdt_pad; /* Unused */
@@ -48,6 +52,7 @@ struct saved_context {
 	unsigned long tr;
 	unsigned long safety;
 	unsigned long return_address;
+	bool misc_enable_saved;
 } __attribute__((packed));
 
 #define loaddebug(thread,register) \
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index dec9c1e84c78..56aa99503dc4 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -3,10 +3,56 @@
 #define __SVM_H
 
 #include <uapi/asm/svm.h>
-
+#include <uapi/asm/kvm.h>
+
+#include <hyperv/hvhdk.h>
+
+/*
+ * 32-bit intercept words in the VMCB Control Area, starting
+ * at Byte offset 000h.
+ */
+
+enum intercept_words {
+	INTERCEPT_CR = 0,
+	INTERCEPT_DR,
+	INTERCEPT_EXCEPTION,
+	INTERCEPT_WORD3,
+	INTERCEPT_WORD4,
+	INTERCEPT_WORD5,
+	MAX_INTERCEPT,
+};
 
 enum {
-	INTERCEPT_INTR,
+	/* Byte offset 000h (word 0) */
+	INTERCEPT_CR0_READ = 0,
+	INTERCEPT_CR3_READ = 3,
+	INTERCEPT_CR4_READ = 4,
+	INTERCEPT_CR8_READ = 8,
+	INTERCEPT_CR0_WRITE = 16,
+	INTERCEPT_CR3_WRITE = 16 + 3,
+	INTERCEPT_CR4_WRITE = 16 + 4,
+	INTERCEPT_CR8_WRITE = 16 + 8,
+	/* Byte offset 004h (word 1) */
+	INTERCEPT_DR0_READ = 32,
+	INTERCEPT_DR1_READ,
+	INTERCEPT_DR2_READ,
+	INTERCEPT_DR3_READ,
+	INTERCEPT_DR4_READ,
+	INTERCEPT_DR5_READ,
+	INTERCEPT_DR6_READ,
+	INTERCEPT_DR7_READ,
+	INTERCEPT_DR0_WRITE = 48,
+	INTERCEPT_DR1_WRITE,
+	INTERCEPT_DR2_WRITE,
+	INTERCEPT_DR3_WRITE,
+	INTERCEPT_DR4_WRITE,
+	INTERCEPT_DR5_WRITE,
+	INTERCEPT_DR6_WRITE,
+	INTERCEPT_DR7_WRITE,
+	/* Byte offset 008h (word 2) */
+	INTERCEPT_EXCEPTION_OFFSET = 64,
+	/* Byte offset 00Ch (word 3) */
+	INTERCEPT_INTR = 96,
 	INTERCEPT_NMI,
 	INTERCEPT_SMI,
 	INTERCEPT_INIT,
@@ -38,7 +84,8 @@ enum {
 	INTERCEPT_TASK_SWITCH,
 	INTERCEPT_FERR_FREEZE,
 	INTERCEPT_SHUTDOWN,
-	INTERCEPT_VMRUN,
+	/* Byte offset 010h (word 4) */
+	INTERCEPT_VMRUN = 128,
 	INTERCEPT_VMMCALL,
 	INTERCEPT_VMLOAD,
 	INTERCEPT_VMSAVE,
@@ -52,15 +99,31 @@ enum {
 	INTERCEPT_MWAIT,
 	INTERCEPT_MWAIT_COND,
 	INTERCEPT_XSETBV,
+	INTERCEPT_RDPRU,
+	TRAP_EFER_WRITE,
+	TRAP_CR0_WRITE,
+	TRAP_CR1_WRITE,
+	TRAP_CR2_WRITE,
+	TRAP_CR3_WRITE,
+	TRAP_CR4_WRITE,
+	TRAP_CR5_WRITE,
+	TRAP_CR6_WRITE,
+	TRAP_CR7_WRITE,
+	TRAP_CR8_WRITE,
+	/* Byte offset 014h (word 5) */
+	INTERCEPT_INVLPGB = 160,
+	INTERCEPT_INVLPGB_ILLEGAL,
+	INTERCEPT_INVPCID,
+	INTERCEPT_MCOMMIT,
+	INTERCEPT_TLBSYNC,
+	INTERCEPT_BUSLOCK,
+	INTERCEPT_IDLE_HLT = 166,
 };
 
 
 struct __attribute__ ((__packed__)) vmcb_control_area {
-	u32 intercept_cr;
-	u32 intercept_dr;
-	u32 intercept_exceptions;
-	u64 intercept;
-	u8 reserved_1[40];
+	u32 intercepts[MAX_INTERCEPT];
+	u32 reserved_1[15 - MAX_INTERCEPT];
 	u16 pause_filter_thresh;
 	u16 pause_filter_count;
 	u64 iopm_base_pa;
@@ -81,7 +144,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 	u32 exit_int_info_err;
 	u64 nested_ctl;
 	u64 avic_vapic_bar;
-	u8 reserved_4[8];
+	u64 ghcb_gpa;
 	u32 event_inj;
 	u32 event_inj_err;
 	u64 nested_cr3;
@@ -95,7 +158,22 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 	u8 reserved_6[8];	/* Offset 0xe8 */
 	u64 avic_logical_id;	/* Offset 0xf0 */
 	u64 avic_physical_id;	/* Offset 0xf8 */
-	u8 reserved_7[768];
+	u8 reserved_7[8];
+	u64 vmsa_pa;		/* Used for an SEV-ES guest */
+	u8 reserved_8[16];
+	u16 bus_lock_counter;		/* Offset 0x120 */
+	u8 reserved_9[22];
+	u64 allowed_sev_features;	/* Offset 0x138 */
+	u64 guest_sev_features;		/* Offset 0x140 */
+	u8 reserved_10[664];
+	/*
+	 * Offset 0x3e0, 32 bytes reserved
+	 * for use by hypervisor/software.
+	 */
+	union {
+		struct hv_vmcb_enlightenments hv_enlightenments;
+		u8 reserved_sw[32];
+	};
 };
 
 
@@ -112,25 +190,40 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 #define V_GIF_SHIFT 9
 #define V_GIF_MASK (1 << V_GIF_SHIFT)
 
+#define V_NMI_PENDING_SHIFT 11
+#define V_NMI_PENDING_MASK (1 << V_NMI_PENDING_SHIFT)
+
+#define V_NMI_BLOCKING_SHIFT 12
+#define V_NMI_BLOCKING_MASK (1 << V_NMI_BLOCKING_SHIFT)
+
 #define V_INTR_PRIO_SHIFT 16
 #define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
 
 #define V_IGN_TPR_SHIFT 20
 #define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
 
+#define V_IRQ_INJECTION_BITS_MASK (V_IRQ_MASK | V_INTR_PRIO_MASK | V_IGN_TPR_MASK)
+
 #define V_INTR_MASKING_SHIFT 24
 #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
 
 #define V_GIF_ENABLE_SHIFT 25
 #define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT)
 
+#define V_NMI_ENABLE_SHIFT 26
+#define V_NMI_ENABLE_MASK (1 << V_NMI_ENABLE_SHIFT)
+
 #define AVIC_ENABLE_SHIFT 31
 #define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
 
+#define X2APIC_MODE_SHIFT 30
+#define X2APIC_MODE_MASK (1 << X2APIC_MODE_SHIFT)
+
 #define LBR_CTL_ENABLE_MASK BIT_ULL(0)
 #define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
 
-#define SVM_INTERRUPT_SHADOW_MASK 1
+#define SVM_INTERRUPT_SHADOW_MASK	BIT_ULL(0)
+#define SVM_GUEST_INTERRUPT_MASK	BIT_ULL(1)
 
 #define SVM_IOIO_STR_SHIFT 2
 #define SVM_IOIO_REP_SHIFT 3
@@ -143,21 +236,85 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 #define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
 #define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
 
-#define SVM_VM_CR_VALID_MASK	0x001fULL
-#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
-#define SVM_VM_CR_SVM_DIS_MASK  0x0010ULL
-
 #define SVM_NESTED_CTL_NP_ENABLE	BIT(0)
 #define SVM_NESTED_CTL_SEV_ENABLE	BIT(1)
+#define SVM_NESTED_CTL_SEV_ES_ENABLE	BIT(2)
+
+
+#define SVM_TSC_RATIO_RSVD	0xffffff0000000000ULL
+#define SVM_TSC_RATIO_MIN	0x0000000000000001ULL
+#define SVM_TSC_RATIO_MAX	0x000000ffffffffffULL
+#define SVM_TSC_RATIO_DEFAULT	0x0100000000ULL
+
+
+/* AVIC */
+#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK	(0xFFULL)
+#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT			31
+#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK		(1 << 31)
+
+/*
+ * GA_LOG_INTR is a synthetic flag that's never propagated to hardware-visible
+ * tables.  GA_LOG_INTR is set if the vCPU needs device posted IRQs to generate
+ * GA log interrupts to wake the vCPU (because it's blocking or about to block).
+ */
+#define AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR		BIT_ULL(61)
+
+#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK	GENMASK_ULL(11, 0)
+#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK	GENMASK_ULL(51, 12)
+#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK		(1ULL << 62)
+#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK		(1ULL << 63)
+#define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK		(0xFFULL)
+
+#define AVIC_DOORBELL_PHYSICAL_ID_MASK			GENMASK_ULL(11, 0)
+
+#define AVIC_UNACCEL_ACCESS_WRITE_MASK		1
+#define AVIC_UNACCEL_ACCESS_OFFSET_MASK		0xFF0
+#define AVIC_UNACCEL_ACCESS_VECTOR_MASK		0xFFFFFFFF
+
+enum avic_ipi_failure_cause {
+	AVIC_IPI_FAILURE_INVALID_INT_TYPE,
+	AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
+	AVIC_IPI_FAILURE_INVALID_TARGET,
+	AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
+	AVIC_IPI_FAILURE_INVALID_IPI_VECTOR,
+};
 
-struct __attribute__ ((__packed__)) vmcb_seg {
+#define AVIC_PHYSICAL_MAX_INDEX_MASK	GENMASK_ULL(11, 0)
+
+/*
+ * For AVIC, the max index allowed for physical APIC ID table is 0xfe (254), as
+ * 0xff is a broadcast to all CPUs, i.e. can't be targeted individually.
+ */
+#define AVIC_MAX_PHYSICAL_ID		0XFEULL
+
+/*
+ * For x2AVIC, the max index allowed for physical APIC ID table is 0x1ff (511).
+ * With X86_FEATURE_X2AVIC_EXT, the max index is increased to 0xfff (4095).
+ */
+#define X2AVIC_MAX_PHYSICAL_ID		0x1FFUL
+#define X2AVIC_4K_MAX_PHYSICAL_ID	0xFFFUL
+
+static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_PHYSICAL_ID);
+static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID);
+static_assert((X2AVIC_4K_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_4K_MAX_PHYSICAL_ID);
+
+#define SVM_SEV_FEAT_SNP_ACTIVE				BIT(0)
+#define SVM_SEV_FEAT_RESTRICTED_INJECTION		BIT(3)
+#define SVM_SEV_FEAT_ALTERNATE_INJECTION		BIT(4)
+#define SVM_SEV_FEAT_DEBUG_SWAP				BIT(5)
+#define SVM_SEV_FEAT_SECURE_TSC				BIT(9)
+
+#define VMCB_ALLOWED_SEV_FEATURES_VALID			BIT_ULL(63)
+
+struct vmcb_seg {
 	u16 selector;
 	u16 attrib;
 	u32 limit;
 	u64 base;
-};
+} __packed;
 
-struct __attribute__ ((__packed__)) vmcb_save_area {
+/* Save area definition for legacy and SEV-MEM guests */
+struct vmcb_save_area {
 	struct vmcb_seg es;
 	struct vmcb_seg cs;
 	struct vmcb_seg ss;
@@ -168,11 +325,13 @@ struct __attribute__ ((__packed__)) vmcb_save_area {
 	struct vmcb_seg ldtr;
 	struct vmcb_seg idtr;
 	struct vmcb_seg tr;
-	u8 reserved_1[43];
+	/* Reserved fields are named following their struct offset */
+	u8 reserved_0xa0[42];
+	u8 vmpl;
 	u8 cpl;
-	u8 reserved_2[4];
+	u8 reserved_0xcc[4];
 	u64 efer;
-	u8 reserved_3[112];
+	u8 reserved_0xd8[112];
 	u64 cr4;
 	u64 cr3;
 	u64 cr0;
@@ -180,9 +339,11 @@ struct __attribute__ ((__packed__)) vmcb_save_area {
 	u64 dr6;
 	u64 rflags;
 	u64 rip;
-	u8 reserved_4[88];
+	u8 reserved_0x180[88];
 	u64 rsp;
-	u8 reserved_5[24];
+	u64 s_cet;
+	u64 ssp;
+	u64 isst_addr;
 	u64 rax;
 	u64 star;
 	u64 lstar;
@@ -193,24 +354,251 @@ struct __attribute__ ((__packed__)) vmcb_save_area {
 	u64 sysenter_esp;
 	u64 sysenter_eip;
 	u64 cr2;
-	u8 reserved_6[32];
+	u8 reserved_0x248[32];
 	u64 g_pat;
 	u64 dbgctl;
 	u64 br_from;
 	u64 br_to;
 	u64 last_excp_from;
 	u64 last_excp_to;
-};
+	u8 reserved_0x298[72];
+	u64 spec_ctrl;		/* Guest version of SPEC_CTRL at 0x2E0 */
+} __packed;
 
-struct __attribute__ ((__packed__)) vmcb {
+/* Save area definition for SEV-ES and SEV-SNP guests */
+struct sev_es_save_area {
+	struct vmcb_seg es;
+	struct vmcb_seg cs;
+	struct vmcb_seg ss;
+	struct vmcb_seg ds;
+	struct vmcb_seg fs;
+	struct vmcb_seg gs;
+	struct vmcb_seg gdtr;
+	struct vmcb_seg ldtr;
+	struct vmcb_seg idtr;
+	struct vmcb_seg tr;
+	u64 pl0_ssp;
+	u64 pl1_ssp;
+	u64 pl2_ssp;
+	u64 pl3_ssp;
+	u64 u_cet;
+	u8 reserved_0xc8[2];
+	u8 vmpl;
+	u8 cpl;
+	u8 reserved_0xcc[4];
+	u64 efer;
+	u8 reserved_0xd8[104];
+	u64 xss;
+	u64 cr4;
+	u64 cr3;
+	u64 cr0;
+	u64 dr7;
+	u64 dr6;
+	u64 rflags;
+	u64 rip;
+	u64 dr0;
+	u64 dr1;
+	u64 dr2;
+	u64 dr3;
+	u64 dr0_addr_mask;
+	u64 dr1_addr_mask;
+	u64 dr2_addr_mask;
+	u64 dr3_addr_mask;
+	u8 reserved_0x1c0[24];
+	u64 rsp;
+	u64 s_cet;
+	u64 ssp;
+	u64 isst_addr;
+	u64 rax;
+	u64 star;
+	u64 lstar;
+	u64 cstar;
+	u64 sfmask;
+	u64 kernel_gs_base;
+	u64 sysenter_cs;
+	u64 sysenter_esp;
+	u64 sysenter_eip;
+	u64 cr2;
+	u8 reserved_0x248[32];
+	u64 g_pat;
+	u64 dbgctl;
+	u64 br_from;
+	u64 br_to;
+	u64 last_excp_from;
+	u64 last_excp_to;
+	u8 reserved_0x298[80];
+	u32 pkru;
+	u32 tsc_aux;
+	u64 tsc_scale;
+	u64 tsc_offset;
+	u8 reserved_0x300[8];
+	u64 rcx;
+	u64 rdx;
+	u64 rbx;
+	u64 reserved_0x320;	/* rsp already available at 0x01d8 */
+	u64 rbp;
+	u64 rsi;
+	u64 rdi;
+	u64 r8;
+	u64 r9;
+	u64 r10;
+	u64 r11;
+	u64 r12;
+	u64 r13;
+	u64 r14;
+	u64 r15;
+	u8 reserved_0x380[16];
+	u64 guest_exit_info_1;
+	u64 guest_exit_info_2;
+	u64 guest_exit_int_info;
+	u64 guest_nrip;
+	u64 sev_features;
+	u64 vintr_ctrl;
+	u64 guest_exit_code;
+	u64 virtual_tom;
+	u64 tlb_id;
+	u64 pcpu_id;
+	u64 event_inj;
+	u64 xcr0;
+	u8 reserved_0x3f0[16];
+
+	/* Floating point area */
+	u64 x87_dp;
+	u32 mxcsr;
+	u16 x87_ftw;
+	u16 x87_fsw;
+	u16 x87_fcw;
+	u16 x87_fop;
+	u16 x87_ds;
+	u16 x87_cs;
+	u64 x87_rip;
+	u8 fpreg_x87[80];
+	u8 fpreg_xmm[256];
+	u8 fpreg_ymm[256];
+} __packed;
+
+struct ghcb_save_area {
+	u8 reserved_0x0[203];
+	u8 cpl;
+	u8 reserved_0xcc[116];
+	u64 xss;
+	u8 reserved_0x148[24];
+	u64 dr7;
+	u8 reserved_0x168[16];
+	u64 rip;
+	u8 reserved_0x180[88];
+	u64 rsp;
+	u8 reserved_0x1e0[24];
+	u64 rax;
+	u8 reserved_0x200[264];
+	u64 rcx;
+	u64 rdx;
+	u64 rbx;
+	u8 reserved_0x320[8];
+	u64 rbp;
+	u64 rsi;
+	u64 rdi;
+	u64 r8;
+	u64 r9;
+	u64 r10;
+	u64 r11;
+	u64 r12;
+	u64 r13;
+	u64 r14;
+	u64 r15;
+	u8 reserved_0x380[16];
+	u64 sw_exit_code;
+	u64 sw_exit_info_1;
+	u64 sw_exit_info_2;
+	u64 sw_scratch;
+	u8 reserved_0x3b0[56];
+	u64 xcr0;
+	u8 valid_bitmap[16];
+	u64 x87_state_gpa;
+} __packed;
+
+#define GHCB_SHARED_BUF_SIZE	2032
+
+struct ghcb {
+	struct ghcb_save_area save;
+	u8 reserved_save[2048 - sizeof(struct ghcb_save_area)];
+
+	u8 shared_buffer[GHCB_SHARED_BUF_SIZE];
+
+	u8 reserved_0xff0[10];
+	u16 protocol_version;	/* negotiated SEV-ES/GHCB protocol version */
+	u32 ghcb_usage;
+} __packed;
+
+struct vmcb {
 	struct vmcb_control_area control;
-	struct vmcb_save_area save;
-};
+	union {
+		struct vmcb_save_area save;
+
+		/*
+		 * For SEV-ES VMs, the save area in the VMCB is used only to
+		 * save/load host state.  Guest state resides in a separate
+		 * page, the aptly named VM Save Area (VMSA), that is encrypted
+		 * with the guest's private key.
+		 */
+		struct sev_es_save_area host_sev_es_save;
+	};
+} __packed;
+
+#define EXPECTED_VMCB_SAVE_AREA_SIZE		744
+#define EXPECTED_GHCB_SAVE_AREA_SIZE		1032
+#define EXPECTED_SEV_ES_SAVE_AREA_SIZE		1648
+#define EXPECTED_VMCB_CONTROL_AREA_SIZE		1024
+#define EXPECTED_GHCB_SIZE			PAGE_SIZE
+
+#define BUILD_BUG_RESERVED_OFFSET(x, y) \
+	ASSERT_STRUCT_OFFSET(struct x, reserved ## _ ## y, y)
+
+static inline void __unused_size_checks(void)
+{
+	BUILD_BUG_ON(sizeof(struct vmcb_save_area)	!= EXPECTED_VMCB_SAVE_AREA_SIZE);
+	BUILD_BUG_ON(sizeof(struct ghcb_save_area)	!= EXPECTED_GHCB_SAVE_AREA_SIZE);
+	BUILD_BUG_ON(sizeof(struct sev_es_save_area)	!= EXPECTED_SEV_ES_SAVE_AREA_SIZE);
+	BUILD_BUG_ON(sizeof(struct vmcb_control_area)	!= EXPECTED_VMCB_CONTROL_AREA_SIZE);
+	BUILD_BUG_ON(offsetof(struct vmcb, save)	!= EXPECTED_VMCB_CONTROL_AREA_SIZE);
+	BUILD_BUG_ON(sizeof(struct ghcb)		!= EXPECTED_GHCB_SIZE);
+
+	/* Check offsets of reserved fields */
+
+	BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0xa0);
+	BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0xcc);
+	BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0xd8);
+	BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0x180);
+	BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0x248);
+	BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0x298);
+
+	BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0xc8);
+	BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0xcc);
+	BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0xd8);
+	BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x1c0);
+	BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x248);
+	BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x298);
+	BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x300);
+	BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x320);
+	BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x380);
+	BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x3f0);
+
+	BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x0);
+	BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0xcc);
+	BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x148);
+	BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x168);
+	BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x180);
+	BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x1e0);
+	BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x200);
+	BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x320);
+	BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x380);
+	BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x3b0);
+
+	BUILD_BUG_RESERVED_OFFSET(ghcb, 0xff0);
+}
 
 #define SVM_CPUID_FUNC 0x8000000a
 
-#define SVM_VM_CR_SVM_DISABLE 4
-
 #define SVM_SELECTOR_S_SHIFT 4
 #define SVM_SELECTOR_DPL_SHIFT 5
 #define SVM_SELECTOR_P_SHIFT 7
@@ -232,32 +620,6 @@ struct __attribute__ ((__packed__)) vmcb {
 #define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
 #define SVM_SELECTOR_CODE_MASK (1 << 3)
 
-#define INTERCEPT_CR0_READ	0
-#define INTERCEPT_CR3_READ	3
-#define INTERCEPT_CR4_READ	4
-#define INTERCEPT_CR8_READ	8
-#define INTERCEPT_CR0_WRITE	(16 + 0)
-#define INTERCEPT_CR3_WRITE	(16 + 3)
-#define INTERCEPT_CR4_WRITE	(16 + 4)
-#define INTERCEPT_CR8_WRITE	(16 + 8)
-
-#define INTERCEPT_DR0_READ	0
-#define INTERCEPT_DR1_READ	1
-#define INTERCEPT_DR2_READ	2
-#define INTERCEPT_DR3_READ	3
-#define INTERCEPT_DR4_READ	4
-#define INTERCEPT_DR5_READ	5
-#define INTERCEPT_DR6_READ	6
-#define INTERCEPT_DR7_READ	7
-#define INTERCEPT_DR0_WRITE	(16 + 0)
-#define INTERCEPT_DR1_WRITE	(16 + 1)
-#define INTERCEPT_DR2_WRITE	(16 + 2)
-#define INTERCEPT_DR3_WRITE	(16 + 3)
-#define INTERCEPT_DR4_WRITE	(16 + 4)
-#define INTERCEPT_DR5_WRITE	(16 + 5)
-#define INTERCEPT_DR6_WRITE	(16 + 6)
-#define INTERCEPT_DR7_WRITE	(16 + 7)
-
 #define SVM_EVTINJ_VEC_MASK 0xff
 
 #define SVM_EVTINJ_TYPE_SHIFT 8
@@ -290,4 +652,58 @@ struct __attribute__ ((__packed__)) vmcb {
 
 #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
 
+/* GHCB Accessor functions */
+
+#define GHCB_BITMAP_IDX(field)							\
+	(offsetof(struct ghcb_save_area, field) / sizeof(u64))
+
+#define DEFINE_GHCB_ACCESSORS(field)						\
+	static __always_inline bool ghcb_##field##_is_valid(const struct ghcb *ghcb) \
+	{									\
+		return test_bit(GHCB_BITMAP_IDX(field),				\
+				(unsigned long *)&ghcb->save.valid_bitmap);	\
+	}									\
+										\
+	static __always_inline u64 ghcb_get_##field(struct ghcb *ghcb)		\
+	{									\
+		return ghcb->save.field;					\
+	}									\
+										\
+	static __always_inline u64 ghcb_get_##field##_if_valid(struct ghcb *ghcb) \
+	{									\
+		return ghcb_##field##_is_valid(ghcb) ? ghcb->save.field : 0;	\
+	}									\
+										\
+	static __always_inline void ghcb_set_##field(struct ghcb *ghcb, u64 value) \
+	{									\
+		__set_bit(GHCB_BITMAP_IDX(field),				\
+			  (unsigned long *)&ghcb->save.valid_bitmap);		\
+		ghcb->save.field = value;					\
+	}
+
+DEFINE_GHCB_ACCESSORS(cpl)
+DEFINE_GHCB_ACCESSORS(rip)
+DEFINE_GHCB_ACCESSORS(rsp)
+DEFINE_GHCB_ACCESSORS(rax)
+DEFINE_GHCB_ACCESSORS(rcx)
+DEFINE_GHCB_ACCESSORS(rdx)
+DEFINE_GHCB_ACCESSORS(rbx)
+DEFINE_GHCB_ACCESSORS(rbp)
+DEFINE_GHCB_ACCESSORS(rsi)
+DEFINE_GHCB_ACCESSORS(rdi)
+DEFINE_GHCB_ACCESSORS(r8)
+DEFINE_GHCB_ACCESSORS(r9)
+DEFINE_GHCB_ACCESSORS(r10)
+DEFINE_GHCB_ACCESSORS(r11)
+DEFINE_GHCB_ACCESSORS(r12)
+DEFINE_GHCB_ACCESSORS(r13)
+DEFINE_GHCB_ACCESSORS(r14)
+DEFINE_GHCB_ACCESSORS(r15)
+DEFINE_GHCB_ACCESSORS(sw_exit_code)
+DEFINE_GHCB_ACCESSORS(sw_exit_info_1)
+DEFINE_GHCB_ACCESSORS(sw_exit_info_2)
+DEFINE_GHCB_ACCESSORS(sw_scratch)
+DEFINE_GHCB_ACCESSORS(xcr0)
+DEFINE_GHCB_ACCESSORS(xss)
+
 #endif
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
deleted file mode 100644
index ff6c92eff035..000000000000
--- a/arch/x86/include/asm/swiotlb.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_SWIOTLB_H
-#define _ASM_X86_SWIOTLB_H
-
-#include <linux/swiotlb.h>
-
-#ifdef CONFIG_SWIOTLB
-extern int swiotlb;
-extern int __init pci_swiotlb_detect_override(void);
-extern int __init pci_swiotlb_detect_4gb(void);
-extern void __init pci_swiotlb_init(void);
-extern void __init pci_swiotlb_late_init(void);
-#else
-#define swiotlb 0
-static inline int pci_swiotlb_detect_override(void)
-{
-	return 0;
-}
-static inline int pci_swiotlb_detect_4gb(void)
-{
-	return 0;
-}
-static inline void pci_swiotlb_init(void)
-{
-}
-static inline void pci_swiotlb_late_init(void)
-{
-}
-#endif
-#endif /* _ASM_X86_SWIOTLB_H */
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 7cf1a270d891..499b1c15cc8b 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -12,28 +12,9 @@ struct task_struct *__switch_to_asm(struct task_struct *prev,
 __visible struct task_struct *__switch_to(struct task_struct *prev,
 					  struct task_struct *next);
 
-/* This runs runs on the previous thread's stack. */
-static inline void prepare_switch_to(struct task_struct *next)
-{
-#ifdef CONFIG_VMAP_STACK
-	/*
-	 * If we switch to a stack that has a top-level paging entry
-	 * that is not present in the current mm, the resulting #PF will
-	 * will be promoted to a double-fault and we'll panic.  Probe
-	 * the new stack now so that vmalloc_fault can fix up the page
-	 * tables if needed.  This can only happen if we use a stack
-	 * in vmap space.
-	 *
-	 * We assume that the stack is aligned so that it never spans
-	 * more than one top-level paging entry.
-	 *
-	 * To minimize cache pollution, just follow the stack pointer.
-	 */
-	READ_ONCE(*(unsigned char *)next->thread.sp);
-#endif
-}
-
-asmlinkage void ret_from_fork(void);
+asmlinkage void ret_from_fork_asm(void);
+__visible void ret_from_fork(struct task_struct *prev, struct pt_regs *regs,
+			     int (*fn)(void *), void *fn_arg);
 
 /*
  * This is the structure pointed to by thread.sp for an inactive task.  The
@@ -46,6 +27,7 @@ struct inactive_task_frame {
 	unsigned long r13;
 	unsigned long r12;
 #else
+	unsigned long flags;
 	unsigned long si;
 	unsigned long di;
 #endif
@@ -66,12 +48,12 @@ struct fork_frame {
 
 #define switch_to(prev, next, last)					\
 do {									\
-	prepare_switch_to(next);					\
-									\
 	((last) = __switch_to_asm((prev), (next)));			\
 } while (0)
 
 #ifdef CONFIG_X86_32
+#include <asm/msr.h>
+
 static inline void refresh_sysenter_cs(struct thread_struct *thread)
 {
 	/* Only happens when SEP is enabled, no need to test "SEP"arately: */
@@ -79,7 +61,7 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
 		return;
 
 	this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
-	wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
+	wrmsrq(MSR_IA32_SYSENTER_CS, thread->sysenter_cs);
 }
 #endif
 
@@ -88,21 +70,23 @@ static inline void update_task_stack(struct task_struct *task)
 {
 	/* sp0 always points to the entry trampoline stack, which is constant: */
 #ifdef CONFIG_X86_32
-	if (static_cpu_has(X86_FEATURE_XENPV))
-		load_sp0(task->thread.sp0);
-	else
-		this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0);
+	this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0);
 #else
-	/*
-	 * x86-64 updates x86_tss.sp1 via cpu_current_top_of_stack. That
-	 * doesn't work on x86-32 because sp1 and
-	 * cpu_current_top_of_stack have different values (because of
-	 * the non-zero stack-padding on 32bit).
-	 */
-	if (static_cpu_has(X86_FEATURE_XENPV))
+	if (!cpu_feature_enabled(X86_FEATURE_FRED) && cpu_feature_enabled(X86_FEATURE_XENPV))
+		/* Xen PV enters the kernel on the thread stack. */
 		load_sp0(task_top_of_stack(task));
 #endif
+}
 
+static inline void kthread_frame_init(struct inactive_task_frame *frame,
+				      int (*fun)(void *), void *arg)
+{
+	frame->bx = (unsigned long)fun;
+#ifdef CONFIG_X86_32
+	frame->di = (unsigned long)arg;
+#else
+	frame->r12 = (unsigned long)arg;
+#endif
 }
 
 #endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h
index 2fe745356fb1..cd21a0405ac5 100644
--- a/arch/x86/include/asm/sync_bitops.h
+++ b/arch/x86/include/asm/sync_bitops.h
@@ -14,6 +14,8 @@
  * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
  */
 
+#include <asm/rmwcc.h>
+
 #define ADDR (*(volatile long *)addr)
 
 /**
@@ -29,7 +31,7 @@
  */
 static inline void sync_set_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; bts %1,%0"
+	asm volatile("lock " __ASM_SIZE(bts) " %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -47,7 +49,7 @@ static inline void sync_set_bit(long nr, volatile unsigned long *addr)
  */
 static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; btr %1,%0"
+	asm volatile("lock " __ASM_SIZE(btr) " %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -64,7 +66,7 @@ static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
  */
 static inline void sync_change_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; btc %1,%0"
+	asm volatile("lock " __ASM_SIZE(btc) " %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -78,14 +80,9 @@ static inline void sync_change_bit(long nr, volatile unsigned long *addr)
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
+static inline bool sync_test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
-
-	asm volatile("lock; bts %2,%1\n\tsetc %0"
-		     : "=qm" (oldbit), "+m" (ADDR)
-		     : "Ir" (nr) : "memory");
-	return oldbit;
+	return GEN_BINARY_RMWcc("lock " __ASM_SIZE(bts), *addr, c, "Ir", nr);
 }
 
 /**
@@ -98,12 +95,7 @@ static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
  */
 static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
-
-	asm volatile("lock; btr %2,%1\n\tsetc %0"
-		     : "=qm" (oldbit), "+m" (ADDR)
-		     : "Ir" (nr) : "memory");
-	return oldbit;
+	return GEN_BINARY_RMWcc("lock " __ASM_SIZE(btr), *addr, c, "Ir", nr);
 }
 
 /**
@@ -116,12 +108,7 @@ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
  */
 static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
-
-	asm volatile("lock; btc %2,%1\n\tsetc %0"
-		     : "=qm" (oldbit), "+m" (ADDR)
-		     : "Ir" (nr) : "memory");
-	return oldbit;
+	return GEN_BINARY_RMWcc("lock " __ASM_SIZE(btc), *addr, c, "Ir", nr);
 }
 
 #define sync_test_bit(nr, addr) test_bit(nr, addr)
diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h
index c67caafd3381..96bda43538ee 100644
--- a/arch/x86/include/asm/sync_core.h
+++ b/arch/x86/include/asm/sync_core.h
@@ -5,6 +5,88 @@
 #include <linux/preempt.h>
 #include <asm/processor.h>
 #include <asm/cpufeature.h>
+#include <asm/special_insns.h>
+
+#ifdef CONFIG_X86_32
+static __always_inline void iret_to_self(void)
+{
+	asm volatile (
+		"pushfl\n\t"
+		"pushl %%cs\n\t"
+		"pushl $1f\n\t"
+		"iret\n\t"
+		"1:"
+		: ASM_CALL_CONSTRAINT : : "memory");
+}
+#else
+static __always_inline void iret_to_self(void)
+{
+	unsigned int tmp;
+
+	asm volatile (
+		"mov %%ss, %0\n\t"
+		"pushq %q0\n\t"
+		"pushq %%rsp\n\t"
+		"addq $8, (%%rsp)\n\t"
+		"pushfq\n\t"
+		"mov %%cs, %0\n\t"
+		"pushq %q0\n\t"
+		"pushq $1f\n\t"
+		"iretq\n\t"
+		"1:"
+		: "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
+}
+#endif /* CONFIG_X86_32 */
+
+/*
+ * This function forces the icache and prefetched instruction stream to
+ * catch up with reality in two very specific cases:
+ *
+ *  a) Text was modified using one virtual address and is about to be executed
+ *     from the same physical page at a different virtual address.
+ *
+ *  b) Text was modified on a different CPU, may subsequently be
+ *     executed on this CPU, and you want to make sure the new version
+ *     gets executed.  This generally means you're calling this in an IPI.
+ *
+ * If you're calling this for a different reason, you're probably doing
+ * it wrong.
+ *
+ * Like all of Linux's memory ordering operations, this is a
+ * compiler barrier as well.
+ */
+static __always_inline void sync_core(void)
+{
+	/*
+	 * The SERIALIZE instruction is the most straightforward way to
+	 * do this, but it is not universally available.
+	 */
+	if (static_cpu_has(X86_FEATURE_SERIALIZE)) {
+		serialize();
+		return;
+	}
+
+	/*
+	 * For all other processors, there are quite a few ways to do this.
+	 * IRET-to-self is nice because it works on every CPU, at any CPL
+	 * (so it's compatible with paravirtualization), and it never exits
+	 * to a hypervisor.  The only downsides are that it's a bit slow
+	 * (it seems to be a bit more than 2x slower than the fastest
+	 * options) and that it unmasks NMIs.  The "push %cs" is needed,
+	 * because in paravirtual environments __KERNEL_CS may not be a
+	 * valid CS value when we do IRET directly.
+	 *
+	 * In case NMI unmasking or performance ever becomes a problem,
+	 * the next best option appears to be MOV-to-CR2 and an
+	 * unconditional jump.  That sequence also works on all CPUs,
+	 * but it will fault at CPL3 (i.e. Xen PV).
+	 *
+	 * CPUID is the conventional way, but it's nasty: it doesn't
+	 * exist on some 486-like CPUs, and it usually exits to a
+	 * hypervisor.
+	 */
+	iret_to_self();
+}
 
 /*
  * Ensure that a core serializing instruction is issued before returning
@@ -16,12 +98,13 @@ static inline void sync_core_before_usermode(void)
 	/* With PTI, we unconditionally serialize before running user code. */
 	if (static_cpu_has(X86_FEATURE_PTI))
 		return;
+
 	/*
-	 * Return from interrupt and NMI is done through iret, which is core
-	 * serializing.
+	 * Even if we're in an interrupt, we might reschedule before returning,
+	 * in which case we could switch to a different thread in the same mm
+	 * and return using SYSRET or SYSEXIT.  Instead of trying to keep
+	 * track of our need to sync the core, just sync right away.
 	 */
-	if (in_irq() || in_nmi())
-		return;
 	sync_core();
 }
 
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index d653139857af..c10dbb74cd00 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Access to user system call parameters and results
  *
  * Copyright (C) 2008-2009 Red Hat, Inc.  All rights reserved.
  *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License v.2.
- *
  * See asm-generic/syscall.h for descriptions of what we must do here.
  */
 
@@ -16,28 +13,20 @@
 #include <uapi/linux/audit.h>
 #include <linux/sched.h>
 #include <linux/err.h>
-#include <asm/asm-offsets.h>	/* For NR_syscalls */
 #include <asm/thread_info.h>	/* for TS_COMPAT */
 #include <asm/unistd.h>
 
-#ifdef CONFIG_X86_64
-typedef asmlinkage long (*sys_call_ptr_t)(const struct pt_regs *);
-#else
-typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long,
-					  unsigned long, unsigned long,
-					  unsigned long, unsigned long);
-#endif /* CONFIG_X86_64 */
+/* This is used purely for kernel/trace/trace_syscalls.c */
+typedef long (*sys_call_ptr_t)(const struct pt_regs *);
 extern const sys_call_ptr_t sys_call_table[];
 
-#if defined(CONFIG_X86_32)
-#define ia32_sys_call_table sys_call_table
-#define __NR_syscall_compat_max __NR_syscall_max
-#define IA32_NR_syscalls NR_syscalls
-#endif
-
-#if defined(CONFIG_IA32_EMULATION)
-extern const sys_call_ptr_t ia32_sys_call_table[];
-#endif
+/*
+ * These may not exist, but still put the prototypes in so we
+ * can use IS_ENABLED().
+ */
+extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
 
 /*
  * Only the low 32 bits of orig_ax are meaningful, so we return int.
@@ -49,6 +38,13 @@ static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
 	return regs->orig_ax;
 }
 
+static inline void syscall_set_nr(struct task_struct *task,
+				  struct pt_regs *regs,
+				  int nr)
+{
+	regs->orig_ax = nr;
+}
+
 static inline void syscall_rollback(struct task_struct *task,
 				    struct pt_regs *regs)
 {
@@ -91,23 +87,29 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-	memcpy(args, &regs->bx + i, n * sizeof(args[0]));
+	args[0] = regs->bx;
+	args[1] = regs->cx;
+	args[2] = regs->dx;
+	args[3] = regs->si;
+	args[4] = regs->di;
+	args[5] = regs->bp;
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-	memcpy(&regs->bx + i, args, n * sizeof(args[0]));
+	regs->bx = args[0];
+	regs->cx = args[1];
+	regs->dx = args[2];
+	regs->si = args[3];
+	regs->di = args[4];
+	regs->bp = args[5];
 }
 
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
 {
 	return AUDIT_ARCH_I386;
 }
@@ -116,131 +118,67 @@ static inline int syscall_get_arch(void)
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
 # ifdef CONFIG_IA32_EMULATION
-	if (task->thread_info.status & TS_COMPAT)
-		switch (i) {
-		case 0:
-			if (!n--) break;
-			*args++ = regs->bx;
-		case 1:
-			if (!n--) break;
-			*args++ = regs->cx;
-		case 2:
-			if (!n--) break;
-			*args++ = regs->dx;
-		case 3:
-			if (!n--) break;
-			*args++ = regs->si;
-		case 4:
-			if (!n--) break;
-			*args++ = regs->di;
-		case 5:
-			if (!n--) break;
-			*args++ = regs->bp;
-		case 6:
-			if (!n--) break;
-		default:
-			BUG();
-			break;
-		}
-	else
+	if (task->thread_info.status & TS_COMPAT) {
+		*args++ = regs->bx;
+		*args++ = regs->cx;
+		*args++ = regs->dx;
+		*args++ = regs->si;
+		*args++ = regs->di;
+		*args   = regs->bp;
+	} else
 # endif
-		switch (i) {
-		case 0:
-			if (!n--) break;
-			*args++ = regs->di;
-		case 1:
-			if (!n--) break;
-			*args++ = regs->si;
-		case 2:
-			if (!n--) break;
-			*args++ = regs->dx;
-		case 3:
-			if (!n--) break;
-			*args++ = regs->r10;
-		case 4:
-			if (!n--) break;
-			*args++ = regs->r8;
-		case 5:
-			if (!n--) break;
-			*args++ = regs->r9;
-		case 6:
-			if (!n--) break;
-		default:
-			BUG();
-			break;
-		}
+	{
+		*args++ = regs->di;
+		*args++ = regs->si;
+		*args++ = regs->dx;
+		*args++ = regs->r10;
+		*args++ = regs->r8;
+		*args   = regs->r9;
+	}
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
 # ifdef CONFIG_IA32_EMULATION
-	if (task->thread_info.status & TS_COMPAT)
-		switch (i) {
-		case 0:
-			if (!n--) break;
-			regs->bx = *args++;
-		case 1:
-			if (!n--) break;
-			regs->cx = *args++;
-		case 2:
-			if (!n--) break;
-			regs->dx = *args++;
-		case 3:
-			if (!n--) break;
-			regs->si = *args++;
-		case 4:
-			if (!n--) break;
-			regs->di = *args++;
-		case 5:
-			if (!n--) break;
-			regs->bp = *args++;
-		case 6:
-			if (!n--) break;
-		default:
-			BUG();
-			break;
-		}
-	else
+	if (task->thread_info.status & TS_COMPAT) {
+		regs->bx = *args++;
+		regs->cx = *args++;
+		regs->dx = *args++;
+		regs->si = *args++;
+		regs->di = *args++;
+		regs->bp = *args;
+	} else
 # endif
-		switch (i) {
-		case 0:
-			if (!n--) break;
-			regs->di = *args++;
-		case 1:
-			if (!n--) break;
-			regs->si = *args++;
-		case 2:
-			if (!n--) break;
-			regs->dx = *args++;
-		case 3:
-			if (!n--) break;
-			regs->r10 = *args++;
-		case 4:
-			if (!n--) break;
-			regs->r8 = *args++;
-		case 5:
-			if (!n--) break;
-			regs->r9 = *args++;
-		case 6:
-			if (!n--) break;
-		default:
-			BUG();
-			break;
-		}
+	{
+		regs->di = *args++;
+		regs->si = *args++;
+		regs->dx = *args++;
+		regs->r10 = *args++;
+		regs->r8 = *args++;
+		regs->r9 = *args;
+	}
 }
 
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
 {
 	/* x32 tasks should be considered AUDIT_ARCH_X86_64. */
-	return in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
+	return (IS_ENABLED(CONFIG_IA32_EMULATION) &&
+		task->thread_info.status & TS_COMPAT)
+		? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
 }
+
+bool do_syscall_64(struct pt_regs *regs, int nr);
+void do_int80_emulation(struct pt_regs *regs);
+
 #endif	/* CONFIG_X86_32 */
 
+void do_int80_syscall_32(struct pt_regs *regs);
+bool do_fast_syscall_32(struct pt_regs *regs);
+bool do_SYSENTER_32(struct pt_regs *regs);
+
 #endif	/* _ASM_X86_SYSCALL_H */
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index e046a405743d..7e88705e907f 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -6,86 +6,183 @@
 #ifndef _ASM_X86_SYSCALL_WRAPPER_H
 #define _ASM_X86_SYSCALL_WRAPPER_H
 
+#include <asm/ptrace.h>
+
+extern long __x64_sys_ni_syscall(const struct pt_regs *regs);
+extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
+
+/*
+ * Instead of the generic __SYSCALL_DEFINEx() definition, the x86 version takes
+ * struct pt_regs *regs as the only argument of the syscall stub(s) named as:
+ * __x64_sys_*()         - 64-bit native syscall
+ * __ia32_sys_*()        - 32-bit native syscall or common compat syscall
+ * __ia32_compat_sys_*() - 32-bit compat syscall
+ * __x64_compat_sys_*()  - 64-bit X32 compat syscall
+ *
+ * The registers are decoded according to the ABI:
+ * 64-bit: RDI, RSI, RDX, R10, R8, R9
+ * 32-bit: EBX, ECX, EDX, ESI, EDI, EBP
+ *
+ * The stub then passes the decoded arguments to the __se_sys_*() wrapper to
+ * perform sign-extension (omitted for zero-argument syscalls).  Finally the
+ * arguments are passed to the __do_sys_*() function which is the actual
+ * syscall.  These wrappers are marked as inline so the compiler can optimize
+ * the functions where appropriate.
+ *
+ * Example assembly (slightly re-ordered for better readability):
+ *
+ * <__x64_sys_recv>:		<-- syscall with 4 parameters
+ *	callq	<__fentry__>
+ *
+ *	mov	0x70(%rdi),%rdi	<-- decode regs->di
+ *	mov	0x68(%rdi),%rsi	<-- decode regs->si
+ *	mov	0x60(%rdi),%rdx	<-- decode regs->dx
+ *	mov	0x38(%rdi),%rcx	<-- decode regs->r10
+ *
+ *	xor	%r9d,%r9d	<-- clear %r9
+ *	xor	%r8d,%r8d	<-- clear %r8
+ *
+ *	callq	__sys_recvfrom	<-- do the actual work in __sys_recvfrom()
+ *				    which takes 6 arguments
+ *
+ *	cltq			<-- extend return value to 64-bit
+ *	retq			<-- return
+ *
+ * This approach avoids leaking random user-provided register content down
+ * the call chain.
+ */
+
 /* Mapping of registers to parameters for syscalls on x86-64 and x32 */
 #define SC_X86_64_REGS_TO_ARGS(x, ...)					\
 	__MAP(x,__SC_ARGS						\
 		,,regs->di,,regs->si,,regs->dx				\
 		,,regs->r10,,regs->r8,,regs->r9)			\
 
+
+/* SYSCALL_PT_ARGS is Adapted from s390x */
+#define SYSCALL_PT_ARG6(m, t1, t2, t3, t4, t5, t6)			\
+	SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5), m(t6, (regs->bp))
+#define SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5)				\
+	SYSCALL_PT_ARG4(m, t1, t2, t3, t4),  m(t5, (regs->di))
+#define SYSCALL_PT_ARG4(m, t1, t2, t3, t4)				\
+	SYSCALL_PT_ARG3(m, t1, t2, t3),  m(t4, (regs->si))
+#define SYSCALL_PT_ARG3(m, t1, t2, t3)					\
+	SYSCALL_PT_ARG2(m, t1, t2), m(t3, (regs->dx))
+#define SYSCALL_PT_ARG2(m, t1, t2)					\
+	SYSCALL_PT_ARG1(m, t1), m(t2, (regs->cx))
+#define SYSCALL_PT_ARG1(m, t1) m(t1, (regs->bx))
+#define SYSCALL_PT_ARGS(x, ...) SYSCALL_PT_ARG##x(__VA_ARGS__)
+
+#define __SC_COMPAT_CAST(t, a)						\
+	(__typeof(__builtin_choose_expr(__TYPE_IS_L(t), 0, 0U)))	\
+	(unsigned int)a
+
 /* Mapping of registers to parameters for syscalls on i386 */
 #define SC_IA32_REGS_TO_ARGS(x, ...)					\
-	__MAP(x,__SC_ARGS						\
-	      ,,(unsigned int)regs->bx,,(unsigned int)regs->cx		\
-	      ,,(unsigned int)regs->dx,,(unsigned int)regs->si		\
-	      ,,(unsigned int)regs->di,,(unsigned int)regs->bp)
+	SYSCALL_PT_ARGS(x, __SC_COMPAT_CAST,				\
+			__MAP(x, __SC_TYPE, __VA_ARGS__))		\
 
-#ifdef CONFIG_IA32_EMULATION
-/*
- * For IA32 emulation, we need to handle "compat" syscalls *and* create
- * additional wrappers (aptly named __ia32_sys_xyzzy) which decode the
- * ia32 regs in the proper order for shared or "common" syscalls. As some
- * syscalls may not be implemented, we need to expand COND_SYSCALL in
- * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this
- * case as well.
- */
-#define __IA32_COMPAT_SYS_STUBx(x, name, ...)				\
-	asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs);\
-	ALLOW_ERROR_INJECTION(__ia32_compat_sys##name, ERRNO);		\
-	asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs)\
+#define __SYS_STUB0(abi, name)						\
+	long __##abi##_##name(const struct pt_regs *regs);		\
+	ALLOW_ERROR_INJECTION(__##abi##_##name, ERRNO);			\
+	long __##abi##_##name(const struct pt_regs *regs)		\
+		__alias(__do_##name);
+
+#define __SYS_STUBx(abi, name, ...)					\
+	long __##abi##_##name(const struct pt_regs *regs);		\
+	ALLOW_ERROR_INJECTION(__##abi##_##name, ERRNO);			\
+	long __##abi##_##name(const struct pt_regs *regs)		\
 	{								\
-		return __se_compat_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\
-	}								\
+		return __se_##name(__VA_ARGS__);			\
+	}
 
-#define __IA32_SYS_STUBx(x, name, ...)					\
-	asmlinkage long __ia32_sys##name(const struct pt_regs *regs);	\
-	ALLOW_ERROR_INJECTION(__ia32_sys##name, ERRNO);			\
-	asmlinkage long __ia32_sys##name(const struct pt_regs *regs)	\
+#define __COND_SYSCALL(abi, name)					\
+	__weak long __##abi##_##name(const struct pt_regs *__unused);	\
+	__weak long __##abi##_##name(const struct pt_regs *__unused)	\
 	{								\
-		return __se_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\
+		return sys_ni_syscall();				\
 	}
 
+#ifdef CONFIG_X86_64
+#define __X64_SYS_STUB0(name)						\
+	__SYS_STUB0(x64, sys_##name)
+
+#define __X64_SYS_STUBx(x, name, ...)					\
+	__SYS_STUBx(x64, sys##name,					\
+		    SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__))
+
+#define __X64_COND_SYSCALL(name)					\
+	__COND_SYSCALL(x64, sys_##name)
+
+#else /* CONFIG_X86_64 */
+#define __X64_SYS_STUB0(name)
+#define __X64_SYS_STUBx(x, name, ...)
+#define __X64_COND_SYSCALL(name)
+#endif /* CONFIG_X86_64 */
+
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+#define __IA32_SYS_STUB0(name)						\
+	__SYS_STUB0(ia32, sys_##name)
+
+#define __IA32_SYS_STUBx(x, name, ...)					\
+	__SYS_STUBx(ia32, sys##name,					\
+		    SC_IA32_REGS_TO_ARGS(x, __VA_ARGS__))
+
+#define __IA32_COND_SYSCALL(name)					\
+	__COND_SYSCALL(ia32, sys_##name)
+
+#else /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
+#define __IA32_SYS_STUB0(name)
+#define __IA32_SYS_STUBx(x, name, ...)
+#define __IA32_COND_SYSCALL(name)
+#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
+
+#ifdef CONFIG_IA32_EMULATION
 /*
- * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias
- * named __ia32_sys_*()
+ * For IA32 emulation, we need to handle "compat" syscalls *and* create
+ * additional wrappers (aptly named __ia32_sys_xyzzy) which decode the
+ * ia32 regs in the proper order for shared or "common" syscalls. As some
+ * syscalls may not be implemented, we need to expand COND_SYSCALL in
+ * kernel/sys_ni.c to cover this case as well.
  */
-#define SYSCALL_DEFINE0(sname)					\
-	SYSCALL_METADATA(_##sname, 0);				\
-	asmlinkage long __x64_sys_##sname(void);		\
-	ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO);	\
-	SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname);	\
-	asmlinkage long __x64_sys_##sname(void)
+#define __IA32_COMPAT_SYS_STUB0(name)					\
+	__SYS_STUB0(ia32, compat_sys_##name)
 
-#define COND_SYSCALL(name)						\
-	cond_syscall(__x64_sys_##name);					\
-	cond_syscall(__ia32_sys_##name)
+#define __IA32_COMPAT_SYS_STUBx(x, name, ...)				\
+	__SYS_STUBx(ia32, compat_sys##name,				\
+		    SC_IA32_REGS_TO_ARGS(x, __VA_ARGS__))
 
-#define SYS_NI(name)							\
-	SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers);		\
-	SYSCALL_ALIAS(__ia32_sys_##name, sys_ni_posix_timers)
+#define __IA32_COMPAT_COND_SYSCALL(name)				\
+	__COND_SYSCALL(ia32, compat_sys_##name)
 
 #else /* CONFIG_IA32_EMULATION */
+#define __IA32_COMPAT_SYS_STUB0(name)
 #define __IA32_COMPAT_SYS_STUBx(x, name, ...)
-#define __IA32_SYS_STUBx(x, fullname, name, ...)
+#define __IA32_COMPAT_COND_SYSCALL(name)
 #endif /* CONFIG_IA32_EMULATION */
 
 
-#ifdef CONFIG_X86_X32
+#ifdef CONFIG_X86_X32_ABI
 /*
  * For the x32 ABI, we need to create a stub for compat_sys_*() which is aware
  * of the x86-64-style parameter ordering of x32 syscalls. The syscalls common
  * with x86_64 obviously do not need such care.
  */
+#define __X32_COMPAT_SYS_STUB0(name)					\
+	__SYS_STUB0(x64, compat_sys_##name)
+
 #define __X32_COMPAT_SYS_STUBx(x, name, ...)				\
-	asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs);\
-	ALLOW_ERROR_INJECTION(__x32_compat_sys##name, ERRNO);		\
-	asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs)\
-	{								\
-		return __se_compat_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\
-	}								\
+	__SYS_STUBx(x64, compat_sys##name,				\
+		    SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__))
+
+#define __X32_COMPAT_COND_SYSCALL(name)					\
+	__COND_SYSCALL(x64, compat_sys_##name)
 
-#else /* CONFIG_X86_X32 */
+#else /* CONFIG_X86_X32_ABI */
+#define __X32_COMPAT_SYS_STUB0(name)
 #define __X32_COMPAT_SYS_STUBx(x, name, ...)
-#endif /* CONFIG_X86_X32 */
+#define __X32_COMPAT_COND_SYSCALL(name)
+#endif /* CONFIG_X86_X32_ABI */
 
 
 #ifdef CONFIG_COMPAT
@@ -94,6 +191,14 @@
  * mapping of registers to parameters, we need to generate stubs for each
  * of them.
  */
+#define COMPAT_SYSCALL_DEFINE0(name)					\
+	static long							\
+	__do_compat_sys_##name(const struct pt_regs *__unused);		\
+	__IA32_COMPAT_SYS_STUB0(name)					\
+	__X32_COMPAT_SYS_STUB0(name)					\
+	static long							\
+	__do_compat_sys_##name(const struct pt_regs *__unused)
+
 #define COMPAT_SYSCALL_DEFINEx(x, name, ...)					\
 	static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\
 	static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
@@ -107,62 +212,18 @@
 
 /*
  * As some compat syscalls may not be implemented, we need to expand
- * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in
- * kernel/time/posix-stubs.c to cover this case as well.
+ * COND_SYSCALL_COMPAT in kernel/sys_ni.c to cover this case as well.
  */
 #define COND_SYSCALL_COMPAT(name) 					\
-	cond_syscall(__ia32_compat_sys_##name);				\
-	cond_syscall(__x32_compat_sys_##name)
-
-#define COMPAT_SYS_NI(name)						\
-	SYSCALL_ALIAS(__ia32_compat_sys_##name, sys_ni_posix_timers);	\
-	SYSCALL_ALIAS(__x32_compat_sys_##name, sys_ni_posix_timers)
+	__IA32_COMPAT_COND_SYSCALL(name)				\
+	__X32_COMPAT_COND_SYSCALL(name)
 
 #endif /* CONFIG_COMPAT */
 
-
-/*
- * Instead of the generic __SYSCALL_DEFINEx() definition, this macro takes
- * struct pt_regs *regs as the only argument of the syscall stub named
- * __x64_sys_*(). It decodes just the registers it needs and passes them on to
- * the __se_sys_*() wrapper performing sign extension and then to the
- * __do_sys_*() function doing the actual job. These wrappers and functions
- * are inlined (at least in very most cases), meaning that the assembly looks
- * as follows (slightly re-ordered for better readability):
- *
- * <__x64_sys_recv>:		<-- syscall with 4 parameters
- *	callq	<__fentry__>
- *
- *	mov	0x70(%rdi),%rdi	<-- decode regs->di
- *	mov	0x68(%rdi),%rsi	<-- decode regs->si
- *	mov	0x60(%rdi),%rdx	<-- decode regs->dx
- *	mov	0x38(%rdi),%rcx	<-- decode regs->r10
- *
- *	xor	%r9d,%r9d	<-- clear %r9
- *	xor	%r8d,%r8d	<-- clear %r8
- *
- *	callq	__sys_recvfrom	<-- do the actual work in __sys_recvfrom()
- *				    which takes 6 arguments
- *
- *	cltq			<-- extend return value to 64-bit
- *	retq			<-- return
- *
- * This approach avoids leaking random user-provided register content down
- * the call chain.
- *
- * If IA32_EMULATION is enabled, this macro generates an additional wrapper
- * named __ia32_sys_*() which decodes the struct pt_regs *regs according
- * to the i386 calling convention (bx, cx, dx, si, di, bp).
- */
 #define __SYSCALL_DEFINEx(x, name, ...)					\
-	asmlinkage long __x64_sys##name(const struct pt_regs *regs);	\
-	ALLOW_ERROR_INJECTION(__x64_sys##name, ERRNO);			\
 	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\
 	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
-	asmlinkage long __x64_sys##name(const struct pt_regs *regs)	\
-	{								\
-		return __se_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\
-	}								\
+	__X64_SYS_STUBx(x, name, __VA_ARGS__)				\
 	__IA32_SYS_STUBx(x, name, __VA_ARGS__)				\
 	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))	\
 	{								\
@@ -177,33 +238,27 @@
  * As the generic SYSCALL_DEFINE0() macro does not decode any parameters for
  * obvious reasons, and passing struct pt_regs *regs to it in %rdi does not
  * hurt, we only need to re-define it here to keep the naming congruent to
- * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() and SYS_NI()
- * macros to work correctly.
+ * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() macro
+ * to work correctly.
  */
-#ifndef SYSCALL_DEFINE0
-#define SYSCALL_DEFINE0(sname)					\
-	SYSCALL_METADATA(_##sname, 0);				\
-	asmlinkage long __x64_sys_##sname(void);		\
-	ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO);	\
-	asmlinkage long __x64_sys_##sname(void)
-#endif
+#define SYSCALL_DEFINE0(sname)						\
+	SYSCALL_METADATA(_##sname, 0);					\
+	static long __do_sys_##sname(const struct pt_regs *__unused);	\
+	__X64_SYS_STUB0(sname)						\
+	__IA32_SYS_STUB0(sname)						\
+	static long __do_sys_##sname(const struct pt_regs *__unused)
 
-#ifndef COND_SYSCALL
-#define COND_SYSCALL(name) cond_syscall(__x64_sys_##name)
-#endif
-
-#ifndef SYS_NI
-#define SYS_NI(name) SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers);
-#endif
+#define COND_SYSCALL(name)						\
+	__X64_COND_SYSCALL(name)					\
+	__IA32_COND_SYSCALL(name)
 
 
 /*
  * For VSYSCALLS, we need to declare these three syscalls with the new
  * pt_regs-based calling convention for in-kernel use.
  */
-struct pt_regs;
-asmlinkage long __x64_sys_getcpu(const struct pt_regs *regs);
-asmlinkage long __x64_sys_gettimeofday(const struct pt_regs *regs);
-asmlinkage long __x64_sys_time(const struct pt_regs *regs);
+long __x64_sys_getcpu(const struct pt_regs *regs);
+long __x64_sys_gettimeofday(const struct pt_regs *regs);
+long __x64_sys_time(const struct pt_regs *regs);
 
 #endif /* _ASM_X86_SYSCALL_WRAPPER_H */
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 9fa979dd0d9d..6714a358235d 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -1,51 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * syscalls.h - Linux syscall interfaces (arch-specific)
  *
  * Copyright (c) 2008 Jaswinder Singh Rajput
- *
- * This file is released under the GPLv2.
- * See the file COPYING for more details.
  */
 
 #ifndef _ASM_X86_SYSCALLS_H
 #define _ASM_X86_SYSCALLS_H
 
-#include <linux/compiler.h>
-#include <linux/linkage.h>
-#include <linux/signal.h>
-#include <linux/types.h>
-
 /* Common in X86_32 and X86_64 */
 /* kernel/ioport.c */
 long ksys_ioperm(unsigned long from, unsigned long num, int turn_on);
 
-#ifdef CONFIG_X86_32
-/*
- * These definitions are only valid on pure 32-bit systems; x86-64 uses a
- * different syscall calling convention
- */
-asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
-asmlinkage long sys_iopl(unsigned int);
-
-/* kernel/ldt.c */
-asmlinkage long sys_modify_ldt(int, void __user *, unsigned long);
-
-/* kernel/signal.c */
-asmlinkage long sys_rt_sigreturn(void);
-
-/* kernel/tls.c */
-asmlinkage long sys_set_thread_area(struct user_desc __user *);
-asmlinkage long sys_get_thread_area(struct user_desc __user *);
-
-/* X86_32 only */
-
-/* kernel/signal.c */
-asmlinkage long sys_sigreturn(void);
-
-/* kernel/vm86_32.c */
-struct vm86_struct;
-asmlinkage long sys_vm86old(struct vm86_struct __user *);
-asmlinkage long sys_vm86(unsigned long, unsigned long);
-
-#endif /* CONFIG_X86_32 */
 #endif /* _ASM_X86_SYSCALLS_H */
diff --git a/arch/x86/include/asm/sysfb.h b/arch/x86/include/asm/sysfb.h
deleted file mode 100644
index 2aeb3e25579c..000000000000
--- a/arch/x86/include/asm/sysfb.h
+++ /dev/null
@@ -1,98 +0,0 @@
-#ifndef _ARCH_X86_KERNEL_SYSFB_H
-#define _ARCH_X86_KERNEL_SYSFB_H
-
-/*
- * Generic System Framebuffers on x86
- * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/platform_data/simplefb.h>
-#include <linux/screen_info.h>
-
-enum {
-	M_I17,		/* 17-Inch iMac */
-	M_I20,		/* 20-Inch iMac */
-	M_I20_SR,	/* 20-Inch iMac (Santa Rosa) */
-	M_I24,		/* 24-Inch iMac */
-	M_I24_8_1,	/* 24-Inch iMac, 8,1th gen */
-	M_I24_10_1,	/* 24-Inch iMac, 10,1th gen */
-	M_I27_11_1,	/* 27-Inch iMac, 11,1th gen */
-	M_MINI,		/* Mac Mini */
-	M_MINI_3_1,	/* Mac Mini, 3,1th gen */
-	M_MINI_4_1,	/* Mac Mini, 4,1th gen */
-	M_MB,		/* MacBook */
-	M_MB_2,		/* MacBook, 2nd rev. */
-	M_MB_3,		/* MacBook, 3rd rev. */
-	M_MB_5_1,	/* MacBook, 5th rev. */
-	M_MB_6_1,	/* MacBook, 6th rev. */
-	M_MB_7_1,	/* MacBook, 7th rev. */
-	M_MB_SR,	/* MacBook, 2nd gen, (Santa Rosa) */
-	M_MBA,		/* MacBook Air */
-	M_MBA_3,	/* Macbook Air, 3rd rev */
-	M_MBP,		/* MacBook Pro */
-	M_MBP_2,	/* MacBook Pro 2nd gen */
-	M_MBP_2_2,	/* MacBook Pro 2,2nd gen */
-	M_MBP_SR,	/* MacBook Pro (Santa Rosa) */
-	M_MBP_4,	/* MacBook Pro, 4th gen */
-	M_MBP_5_1,	/* MacBook Pro, 5,1th gen */
-	M_MBP_5_2,	/* MacBook Pro, 5,2th gen */
-	M_MBP_5_3,	/* MacBook Pro, 5,3rd gen */
-	M_MBP_6_1,	/* MacBook Pro, 6,1th gen */
-	M_MBP_6_2,	/* MacBook Pro, 6,2th gen */
-	M_MBP_7_1,	/* MacBook Pro, 7,1th gen */
-	M_MBP_8_2,	/* MacBook Pro, 8,2nd gen */
-	M_UNKNOWN	/* placeholder */
-};
-
-struct efifb_dmi_info {
-	char *optname;
-	unsigned long base;
-	int stride;
-	int width;
-	int height;
-	int flags;
-};
-
-#ifdef CONFIG_EFI
-
-extern struct efifb_dmi_info efifb_dmi_list[];
-void sysfb_apply_efi_quirks(void);
-
-#else /* CONFIG_EFI */
-
-static inline void sysfb_apply_efi_quirks(void)
-{
-}
-
-#endif /* CONFIG_EFI */
-
-#ifdef CONFIG_X86_SYSFB
-
-bool parse_mode(const struct screen_info *si,
-		struct simplefb_platform_data *mode);
-int create_simplefb(const struct screen_info *si,
-		    const struct simplefb_platform_data *mode);
-
-#else /* CONFIG_X86_SYSFB */
-
-static inline bool parse_mode(const struct screen_info *si,
-			      struct simplefb_platform_data *mode)
-{
-	return false;
-}
-
-static inline int create_simplefb(const struct screen_info *si,
-				  const struct simplefb_platform_data *mode)
-{
-	return -EINVAL;
-}
-
-#endif /* CONFIG_X86_SYSFB */
-
-#endif /* _ARCH_X86_KERNEL_SYSFB_H */
diff --git a/arch/x86/include/asm/tce.h b/arch/x86/include/asm/tce.h
deleted file mode 100644
index 7a6677c1a715..000000000000
--- a/arch/x86/include/asm/tce.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * This file is derived from asm-powerpc/tce.h.
- *
- * Copyright (C) IBM Corporation, 2006
- *
- * Author: Muli Ben-Yehuda <muli@il.ibm.com>
- * Author: Jon Mason <jdmason@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#ifndef _ASM_X86_TCE_H
-#define _ASM_X86_TCE_H
-
-extern unsigned int specified_table_size;
-struct iommu_table;
-
-#define TCE_ENTRY_SIZE   8   /* in bytes */
-
-#define TCE_READ_SHIFT   0
-#define TCE_WRITE_SHIFT  1
-#define TCE_HUBID_SHIFT  2   /* unused */
-#define TCE_RSVD_SHIFT   8   /* unused */
-#define TCE_RPN_SHIFT    12
-#define TCE_UNUSED_SHIFT 48  /* unused */
-
-#define TCE_RPN_MASK     0x0000fffffffff000ULL
-
-extern void tce_build(struct iommu_table *tbl, unsigned long index,
-		      unsigned int npages, unsigned long uaddr, int direction);
-extern void tce_free(struct iommu_table *tbl, long index, unsigned int npages);
-extern void * __init alloc_tce_table(void);
-extern void __init free_tce_table(void *tbl);
-extern int __init build_tce_table(struct pci_dev *dev, void __iomem *bbar);
-
-#endif /* _ASM_X86_TCE_H */
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
new file mode 100644
index 000000000000..6b338d7f01b7
--- /dev/null
+++ b/arch/x86/include/asm/tdx.h
@@ -0,0 +1,240 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021-2022 Intel Corporation */
+#ifndef _ASM_X86_TDX_H
+#define _ASM_X86_TDX_H
+
+#include <linux/init.h>
+#include <linux/bits.h>
+#include <linux/mmzone.h>
+
+#include <asm/errno.h>
+#include <asm/ptrace.h>
+#include <asm/trapnr.h>
+#include <asm/shared/tdx.h>
+
+/*
+ * SW-defined error codes.
+ *
+ * Bits 47:40 == 0xFF indicate Reserved status code class that never used by
+ * TDX module.
+ */
+#define TDX_ERROR			_BITUL(63)
+#define TDX_NON_RECOVERABLE		_BITUL(62)
+#define TDX_SW_ERROR			(TDX_ERROR | GENMASK_ULL(47, 40))
+#define TDX_SEAMCALL_VMFAILINVALID	(TDX_SW_ERROR | _UL(0xFFFF0000))
+
+#define TDX_SEAMCALL_GP			(TDX_SW_ERROR | X86_TRAP_GP)
+#define TDX_SEAMCALL_UD			(TDX_SW_ERROR | X86_TRAP_UD)
+
+/*
+ * TDX module SEAMCALL leaf function error codes
+ */
+#define TDX_SUCCESS		0ULL
+#define TDX_RND_NO_ENTROPY	0x8000020300000000ULL
+
+#ifndef __ASSEMBLER__
+
+#include <uapi/asm/mce.h>
+#include <asm/tdx_global_metadata.h>
+#include <linux/pgtable.h>
+
+/*
+ * Used by the #VE exception handler to gather the #VE exception
+ * info from the TDX module. This is a software only structure
+ * and not part of the TDX module/VMM ABI.
+ */
+struct ve_info {
+	u64 exit_reason;
+	u64 exit_qual;
+	/* Guest Linear (virtual) Address */
+	u64 gla;
+	/* Guest Physical Address */
+	u64 gpa;
+	u32 instr_len;
+	u32 instr_info;
+};
+
+#ifdef CONFIG_INTEL_TDX_GUEST
+
+void __init tdx_early_init(void);
+
+void tdx_get_ve_info(struct ve_info *ve);
+
+bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
+
+void tdx_halt(void);
+
+bool tdx_early_handle_ve(struct pt_regs *regs);
+
+int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport);
+
+int tdx_mcall_extend_rtmr(u8 index, u8 *data);
+
+u64 tdx_hcall_get_quote(u8 *buf, size_t size);
+
+void __init tdx_dump_attributes(u64 td_attr);
+void __init tdx_dump_td_ctls(u64 td_ctls);
+
+#else
+
+static inline void tdx_early_init(void) { };
+static inline void tdx_halt(void) { };
+
+static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; }
+
+#endif /* CONFIG_INTEL_TDX_GUEST */
+
+#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_INTEL_TDX_GUEST)
+long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, unsigned long p2,
+		       unsigned long p3, unsigned long p4);
+#else
+static inline long tdx_kvm_hypercall(unsigned int nr, unsigned long p1,
+				     unsigned long p2, unsigned long p3,
+				     unsigned long p4)
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_INTEL_TDX_GUEST && CONFIG_KVM_GUEST */
+
+#ifdef CONFIG_INTEL_TDX_HOST
+u64 __seamcall(u64 fn, struct tdx_module_args *args);
+u64 __seamcall_ret(u64 fn, struct tdx_module_args *args);
+u64 __seamcall_saved_ret(u64 fn, struct tdx_module_args *args);
+void tdx_init(void);
+
+#include <linux/preempt.h>
+#include <asm/archrandom.h>
+#include <asm/processor.h>
+
+typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args);
+
+static __always_inline u64 __seamcall_dirty_cache(sc_func_t func, u64 fn,
+						  struct tdx_module_args *args)
+{
+	lockdep_assert_preemption_disabled();
+
+	/*
+	 * SEAMCALLs are made to the TDX module and can generate dirty
+	 * cachelines of TDX private memory.  Mark cache state incoherent
+	 * so that the cache can be flushed during kexec.
+	 *
+	 * This needs to be done before actually making the SEAMCALL,
+	 * because kexec-ing CPU could send NMI to stop remote CPUs,
+	 * in which case even disabling IRQ won't help here.
+	 */
+	this_cpu_write(cache_state_incoherent, true);
+
+	return func(fn, args);
+}
+
+static __always_inline u64 sc_retry(sc_func_t func, u64 fn,
+			   struct tdx_module_args *args)
+{
+	int retry = RDRAND_RETRY_LOOPS;
+	u64 ret;
+
+	do {
+		preempt_disable();
+		ret = __seamcall_dirty_cache(func, fn, args);
+		preempt_enable();
+	} while (ret == TDX_RND_NO_ENTROPY && --retry);
+
+	return ret;
+}
+
+#define seamcall(_fn, _args)		sc_retry(__seamcall, (_fn), (_args))
+#define seamcall_ret(_fn, _args)	sc_retry(__seamcall_ret, (_fn), (_args))
+#define seamcall_saved_ret(_fn, _args)	sc_retry(__seamcall_saved_ret, (_fn), (_args))
+int tdx_cpu_enable(void);
+int tdx_enable(void);
+const char *tdx_dump_mce_info(struct mce *m);
+const struct tdx_sys_info *tdx_get_sysinfo(void);
+
+int tdx_guest_keyid_alloc(void);
+u32 tdx_get_nr_guest_keyids(void);
+void tdx_guest_keyid_free(unsigned int keyid);
+
+void tdx_quirk_reset_page(struct page *page);
+
+struct tdx_td {
+	/* TD root structure: */
+	struct page *tdr_page;
+
+	int tdcs_nr_pages;
+	/* TD control structure: */
+	struct page **tdcs_pages;
+
+	/* Size of `tdcx_pages` in struct tdx_vp */
+	int tdcx_nr_pages;
+};
+
+struct tdx_vp {
+	/* TDVP root page */
+	struct page *tdvpr_page;
+	/* precalculated page_to_phys(tdvpr_page) for use in noinstr code */
+	phys_addr_t tdvpr_pa;
+
+	/* TD vCPU control structure: */
+	struct page **tdcx_pages;
+};
+
+static inline u64 mk_keyed_paddr(u16 hkid, struct page *page)
+{
+	u64 ret;
+
+	ret = page_to_phys(page);
+	/* KeyID bits are just above the physical address bits: */
+	ret |= (u64)hkid << boot_cpu_data.x86_phys_bits;
+
+	return ret;
+}
+
+static inline int pg_level_to_tdx_sept_level(enum pg_level level)
+{
+        WARN_ON_ONCE(level == PG_LEVEL_NONE);
+        return level - 1;
+}
+
+u64 tdh_vp_enter(struct tdx_vp *vp, struct tdx_module_args *args);
+u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page);
+u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2);
+u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2);
+u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page);
+u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2);
+u64 tdh_mem_range_block(struct tdx_td *td, u64 gpa, int level, u64 *ext_err1, u64 *ext_err2);
+u64 tdh_mng_key_config(struct tdx_td *td);
+u64 tdh_mng_create(struct tdx_td *td, u16 hkid);
+u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp);
+u64 tdh_mng_rd(struct tdx_td *td, u64 field, u64 *data);
+u64 tdh_mr_extend(struct tdx_td *td, u64 gpa, u64 *ext_err1, u64 *ext_err2);
+u64 tdh_mr_finalize(struct tdx_td *td);
+u64 tdh_vp_flush(struct tdx_vp *vp);
+u64 tdh_mng_vpflushdone(struct tdx_td *td);
+u64 tdh_mng_key_freeid(struct tdx_td *td);
+u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err);
+u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid);
+u64 tdh_vp_rd(struct tdx_vp *vp, u64 field, u64 *data);
+u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask);
+u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size);
+u64 tdh_mem_track(struct tdx_td *tdr);
+u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *ext_err1, u64 *ext_err2);
+u64 tdh_phymem_cache_wb(bool resume);
+u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td);
+u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page);
+#else
+static inline void tdx_init(void) { }
+static inline int tdx_cpu_enable(void) { return -ENODEV; }
+static inline int tdx_enable(void)  { return -ENODEV; }
+static inline u32 tdx_get_nr_guest_keyids(void) { return 0; }
+static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; }
+static inline const struct tdx_sys_info *tdx_get_sysinfo(void) { return NULL; }
+#endif	/* CONFIG_INTEL_TDX_HOST */
+
+#ifdef CONFIG_KEXEC_CORE
+void tdx_cpu_flush_cache_for_kexec(void);
+#else
+static inline void tdx_cpu_flush_cache_for_kexec(void) { }
+#endif
+
+#endif /* !__ASSEMBLER__ */
+#endif /* _ASM_X86_TDX_H */
diff --git a/arch/x86/include/asm/tdx_global_metadata.h b/arch/x86/include/asm/tdx_global_metadata.h
new file mode 100644
index 000000000000..060a2ad744bf
--- /dev/null
+++ b/arch/x86/include/asm/tdx_global_metadata.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Automatically generated TDX global metadata structures. */
+#ifndef _X86_VIRT_TDX_AUTO_GENERATED_TDX_GLOBAL_METADATA_H
+#define _X86_VIRT_TDX_AUTO_GENERATED_TDX_GLOBAL_METADATA_H
+
+#include <linux/types.h>
+
+struct tdx_sys_info_features {
+	u64 tdx_features0;
+};
+
+struct tdx_sys_info_tdmr {
+	u16 max_tdmrs;
+	u16 max_reserved_per_tdmr;
+	u16 pamt_4k_entry_size;
+	u16 pamt_2m_entry_size;
+	u16 pamt_1g_entry_size;
+};
+
+struct tdx_sys_info_td_ctrl {
+	u16 tdr_base_size;
+	u16 tdcs_base_size;
+	u16 tdvps_base_size;
+};
+
+struct tdx_sys_info_td_conf {
+	u64 attributes_fixed0;
+	u64 attributes_fixed1;
+	u64 xfam_fixed0;
+	u64 xfam_fixed1;
+	u16 num_cpuid_config;
+	u16 max_vcpus_per_td;
+	u64 cpuid_config_leaves[128];
+	u64 cpuid_config_values[128][2];
+};
+
+struct tdx_sys_info {
+	struct tdx_sys_info_features features;
+	struct tdx_sys_info_tdmr tdmr;
+	struct tdx_sys_info_td_ctrl td_ctrl;
+	struct tdx_sys_info_td_conf td_conf;
+};
+
+#endif
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index e85ff65c43c3..f2d142a0a862 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -6,19 +6,16 @@
 #include <linux/stddef.h>
 #include <asm/ptrace.h>
 
-struct paravirt_patch_site;
-#ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch_site *start,
-		    struct paravirt_patch_site *end);
-#else
-static inline void apply_paravirt(struct paravirt_patch_site *start,
-				  struct paravirt_patch_site *end)
-{}
-#define __parainstructions	NULL
-#define __parainstructions_end	NULL
-#endif
-
-extern void *text_poke_early(void *addr, const void *opcode, size_t len);
+/*
+ * Currently, the max observed size in the kernel code is
+ * JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5.
+ * Raise it if needed.
+ */
+#define TEXT_POKE_MAX_OPCODE_SIZE	5
+
+extern void text_poke_early(void *addr, const void *opcode, size_t len);
+
+extern void text_poke_apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len);
 
 /*
  * Clear and restore the kernel write-protection flag on the local CPU.
@@ -31,12 +28,191 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
  * no thread can be preempted in the instructions being modified (no iret to an
  * invalid instruction possible) or if the instructions are changed from a
  * consistent state to another consistent state atomically.
- * On the local CPU you need to be protected again NMI or MCE handlers seeing an
- * inconsistent instruction while you patch.
+ * On the local CPU you need to be protected against NMI or MCE handlers seeing
+ * an inconsistent instruction while you patch.
  */
 extern void *text_poke(void *addr, const void *opcode, size_t len);
-extern int poke_int3_handler(struct pt_regs *regs);
-extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+extern void smp_text_poke_sync_each_cpu(void);
+extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
+extern void *text_poke_copy(void *addr, const void *opcode, size_t len);
+#define text_poke_copy text_poke_copy
+extern void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, bool core_ok);
+extern void *text_poke_set(void *addr, int c, size_t len);
+extern int smp_text_poke_int3_handler(struct pt_regs *regs);
+extern void smp_text_poke_single(void *addr, const void *opcode, size_t len, const void *emulate);
+
+extern void smp_text_poke_batch_add(void *addr, const void *opcode, size_t len, const void *emulate);
+extern void smp_text_poke_batch_finish(void);
+
+#define INT3_INSN_SIZE		1
+#define INT3_INSN_OPCODE	0xCC
+
+#define RET_INSN_SIZE		1
+#define RET_INSN_OPCODE		0xC3
+
+#define CALL_INSN_SIZE		5
+#define CALL_INSN_OPCODE	0xE8
+
+#define JMP32_INSN_SIZE		5
+#define JMP32_INSN_OPCODE	0xE9
+
+#define JMP8_INSN_SIZE		2
+#define JMP8_INSN_OPCODE	0xEB
+
+#define DISP32_SIZE		4
+
+static __always_inline int text_opcode_size(u8 opcode)
+{
+	int size = 0;
+
+#define __CASE(insn)	\
+	case insn##_INSN_OPCODE: size = insn##_INSN_SIZE; break
+
+	switch(opcode) {
+	__CASE(INT3);
+	__CASE(RET);
+	__CASE(CALL);
+	__CASE(JMP32);
+	__CASE(JMP8);
+	}
+
+#undef __CASE
+
+	return size;
+}
+
+union text_poke_insn {
+	u8 text[TEXT_POKE_MAX_OPCODE_SIZE];
+	struct {
+		u8 opcode;
+		s32 disp;
+	} __attribute__((packed));
+};
+
+static __always_inline
+void __text_gen_insn(void *buf, u8 opcode, const void *addr, const void *dest, int size)
+{
+	union text_poke_insn *insn = buf;
+
+	BUG_ON(size < text_opcode_size(opcode));
+
+	/*
+	 * Hide the addresses to avoid the compiler folding in constants when
+	 * referencing code, these can mess up annotations like
+	 * ANNOTATE_NOENDBR.
+	 */
+	OPTIMIZER_HIDE_VAR(insn);
+	OPTIMIZER_HIDE_VAR(addr);
+	OPTIMIZER_HIDE_VAR(dest);
+
+	insn->opcode = opcode;
+
+	if (size > 1) {
+		insn->disp = (long)dest - (long)(addr + size);
+		if (size == 2) {
+			/*
+			 * Ensure that for JMP8 the displacement
+			 * actually fits the signed byte.
+			 */
+			BUG_ON((insn->disp >> 31) != (insn->disp >> 7));
+		}
+	}
+}
+
+static __always_inline
+void *text_gen_insn(u8 opcode, const void *addr, const void *dest)
+{
+	static union text_poke_insn insn; /* per instance */
+	__text_gen_insn(&insn, opcode, addr, dest, text_opcode_size(opcode));
+	return &insn.text;
+}
+
 extern int after_bootmem;
+extern __ro_after_init struct mm_struct *text_poke_mm;
+extern __ro_after_init unsigned long text_poke_mm_addr;
+
+#ifndef CONFIG_UML_X86
+static __always_inline
+void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
+{
+	regs->ip = ip;
+}
+
+static __always_inline
+void int3_emulate_push(struct pt_regs *regs, unsigned long val)
+{
+	/*
+	 * The INT3 handler in entry_64.S adds a gap between the
+	 * stack where the break point happened, and the saving of
+	 * pt_regs. We can extend the original stack because of
+	 * this gap. See the idtentry macro's X86_TRAP_BP logic.
+	 *
+	 * Similarly, entry_32.S will have a gap on the stack for
+	 * (any) hardware exception and pt_regs; see the
+	 * FIXUP_FRAME macro.
+	 */
+	regs->sp -= sizeof(unsigned long);
+	*(unsigned long *)regs->sp = val;
+}
+
+static __always_inline
+unsigned long int3_emulate_pop(struct pt_regs *regs)
+{
+	unsigned long val = *(unsigned long *)regs->sp;
+	regs->sp += sizeof(unsigned long);
+	return val;
+}
+
+static __always_inline
+void int3_emulate_call(struct pt_regs *regs, unsigned long func)
+{
+	int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE);
+	int3_emulate_jmp(regs, func);
+}
+
+static __always_inline
+void int3_emulate_ret(struct pt_regs *regs)
+{
+	unsigned long ip = int3_emulate_pop(regs);
+	int3_emulate_jmp(regs, ip);
+}
+
+static __always_inline
+bool __emulate_cc(unsigned long flags, u8 cc)
+{
+	static const unsigned long cc_mask[6] = {
+		[0] = X86_EFLAGS_OF,
+		[1] = X86_EFLAGS_CF,
+		[2] = X86_EFLAGS_ZF,
+		[3] = X86_EFLAGS_CF | X86_EFLAGS_ZF,
+		[4] = X86_EFLAGS_SF,
+		[5] = X86_EFLAGS_PF,
+	};
+
+	bool invert = cc & 1;
+	bool match;
+
+	if (cc < 0xc) {
+		match = flags & cc_mask[cc >> 1];
+	} else {
+		match = ((flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
+			((flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
+		if (cc >= 0xe)
+			match = match || (flags & X86_EFLAGS_ZF);
+	}
+
+	return (match && !invert) || (!match && invert);
+}
+
+static __always_inline
+void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp)
+{
+	if (__emulate_cc(regs->flags, cc))
+		ip += disp;
+
+	int3_emulate_jmp(regs, ip);
+}
+
+#endif /* !CONFIG_UML_X86 */
 
 #endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/include/asm/thermal.h b/arch/x86/include/asm/thermal.h
new file mode 100644
index 000000000000..91a7b6687c3b
--- /dev/null
+++ b/arch/x86/include/asm/thermal.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_THERMAL_H
+#define _ASM_X86_THERMAL_H
+
+#ifdef CONFIG_X86_THERMAL_VECTOR
+void therm_lvt_init(void);
+void intel_init_thermal(struct cpuinfo_x86 *c);
+bool x86_thermal_enabled(void);
+void intel_thermal_interrupt(void);
+#else
+static inline void therm_lvt_init(void)				{ }
+static inline void intel_init_thermal(struct cpuinfo_x86 *c)	{ }
+#endif
+
+#endif /* _ASM_X86_THERMAL_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e0eccbcb8447..e71e0e8362ed 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -31,7 +31,9 @@
  * In vm86 mode, the hardware frame is much longer still, so add 16
  * bytes to make room for the real-mode segments.
  *
- * x86_64 has a fixed-length stack frame.
+ * x86-64 has a fixed-length stack frame, but it depends on whether
+ * or not FRED is enabled. Future versions of FRED might make this
+ * dynamic, but for now it is always 2 words longer.
  */
 #ifdef CONFIG_X86_32
 # ifdef CONFIG_VM86
@@ -39,8 +41,12 @@
 # else
 #  define TOP_OF_KERNEL_STACK_PADDING 8
 # endif
-#else
-# define TOP_OF_KERNEL_STACK_PADDING 0
+#else /* x86-64 */
+# ifdef CONFIG_X86_FRED
+#  define TOP_OF_KERNEL_STACK_PADDING (2 * 8)
+# else
+#  define TOP_OF_KERNEL_STACK_PADDING 0
+# endif
 #endif
 
 /*
@@ -48,14 +54,18 @@
  * - this struct should fit entirely inside of one cache line
  * - this struct shares the supervisor stack pages
  */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 struct task_struct;
 #include <asm/cpufeature.h>
 #include <linux/atomic.h>
 
 struct thread_info {
 	unsigned long		flags;		/* low level flags */
+	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
 	u32			status;		/* thread synchronous flags */
+#ifdef CONFIG_SMP
+	u32			cpu;		/* current CPU */
+#endif
 };
 
 #define INIT_THREAD_INFO(tsk)			\
@@ -63,86 +73,53 @@ struct thread_info {
 	.flags		= 0,			\
 }
 
-#else /* !__ASSEMBLY__ */
+#else /* !__ASSEMBLER__ */
 
 #include <asm/asm-offsets.h>
 
 #endif
 
 /*
- * thread information flags
- * - these are process state flags that various assembly files
- *   may need to access
+ * Tell the generic TIF infrastructure which bits x86 supports
  */
-#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
-#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
-#define TIF_SIGPENDING		2	/* signal pending */
-#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
-#define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
-#define TIF_SSBD		5	/* Speculative store bypass disable */
-#define TIF_SYSCALL_EMU		6	/* syscall emulation active */
-#define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
-#define TIF_SECCOMP		8	/* secure computing */
-#define TIF_SPEC_IB		9	/* Indirect branch speculation mitigation */
-#define TIF_SPEC_FORCE_UPDATE	10	/* Force speculation MSR update in context switch */
-#define TIF_USER_RETURN_NOTIFY	11	/* notify kernel of userspace return */
-#define TIF_UPROBE		12	/* breakpointed or singlestepping */
-#define TIF_PATCH_PENDING	13	/* pending live patching update */
-#define TIF_NOCPUID		15	/* CPUID is not accessible in userland */
-#define TIF_NOTSC		16	/* TSC is not accessible in userland */
-#define TIF_IA32		17	/* IA32 compatibility process */
-#define TIF_NOHZ		19	/* in adaptive nohz mode */
-#define TIF_MEMDIE		20	/* is terminating due to OOM killer */
-#define TIF_POLLING_NRFLAG	21	/* idle is polling for TIF_NEED_RESCHED */
+#define HAVE_TIF_NEED_RESCHED_LAZY
+#define HAVE_TIF_POLLING_NRFLAG
+#define HAVE_TIF_SINGLESTEP
+
+#include <asm-generic/thread_info_tif.h>
+
+/* Architecture specific TIF space starts at 16 */
+#define TIF_SSBD		16	/* Speculative store bypass disable */
+#define TIF_SPEC_IB		17	/* Indirect branch speculation mitigation */
+#define TIF_SPEC_L1D_FLUSH	18	/* Flush L1D on mm switches (processes) */
+#define TIF_NEED_FPU_LOAD	19	/* load FPU on return to userspace */
+#define TIF_NOCPUID		20	/* CPUID is not accessible in userland */
+#define TIF_NOTSC		21	/* TSC is not accessible in userland */
 #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
+#define TIF_SPEC_FORCE_UPDATE	23	/* Force speculation MSR update in context switch */
 #define TIF_FORCED_TF		24	/* true if TF in eflags artificially */
-#define TIF_BLOCKSTEP		25	/* set when we want DEBUGCTLMSR_BTF */
+#define TIF_SINGLESTEP		25	/* reenable singlestep on user return*/
+#define TIF_BLOCKSTEP		26	/* set when we want DEBUGCTLMSR_BTF */
 #define TIF_LAZY_MMU_UPDATES	27	/* task is updating the mmu lazily */
-#define TIF_SYSCALL_TRACEPOINT	28	/* syscall tracepoint instrumentation */
-#define TIF_ADDR32		29	/* 32-bit address space on 64 bits */
-#define TIF_X32			30	/* 32-bit native x86-64 binary */
-#define TIF_FSCHECK		31	/* Check FS is USER_DS on return */
-
-#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
-#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
-#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
-#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
-#define _TIF_SSBD		(1 << TIF_SSBD)
-#define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
-#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
-#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
-#define _TIF_SPEC_IB		(1 << TIF_SPEC_IB)
-#define _TIF_SPEC_FORCE_UPDATE	(1 << TIF_SPEC_FORCE_UPDATE)
-#define _TIF_USER_RETURN_NOTIFY	(1 << TIF_USER_RETURN_NOTIFY)
-#define _TIF_UPROBE		(1 << TIF_UPROBE)
-#define _TIF_PATCH_PENDING	(1 << TIF_PATCH_PENDING)
-#define _TIF_NOCPUID		(1 << TIF_NOCPUID)
-#define _TIF_NOTSC		(1 << TIF_NOTSC)
-#define _TIF_IA32		(1 << TIF_IA32)
-#define _TIF_NOHZ		(1 << TIF_NOHZ)
-#define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
-#define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
-#define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
-#define _TIF_BLOCKSTEP		(1 << TIF_BLOCKSTEP)
-#define _TIF_LAZY_MMU_UPDATES	(1 << TIF_LAZY_MMU_UPDATES)
-#define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
-#define _TIF_ADDR32		(1 << TIF_ADDR32)
-#define _TIF_X32		(1 << TIF_X32)
-#define _TIF_FSCHECK		(1 << TIF_FSCHECK)
-
-/*
- * work to do in syscall_trace_enter().  Also includes TIF_NOHZ for
- * enter_from_user_mode()
- */
-#define _TIF_WORK_SYSCALL_ENTRY	\
-	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT |	\
-	 _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT |	\
-	 _TIF_NOHZ)
+#define TIF_ADDR32		28	/* 32-bit address space on 64 bits */
+
+#define _TIF_SSBD		BIT(TIF_SSBD)
+#define _TIF_SPEC_IB		BIT(TIF_SPEC_IB)
+#define _TIF_SPEC_L1D_FLUSH	BIT(TIF_SPEC_L1D_FLUSH)
+#define _TIF_NEED_FPU_LOAD	BIT(TIF_NEED_FPU_LOAD)
+#define _TIF_NOCPUID		BIT(TIF_NOCPUID)
+#define _TIF_NOTSC		BIT(TIF_NOTSC)
+#define _TIF_IO_BITMAP		BIT(TIF_IO_BITMAP)
+#define _TIF_SPEC_FORCE_UPDATE	BIT(TIF_SPEC_FORCE_UPDATE)
+#define _TIF_FORCED_TF		BIT(TIF_FORCED_TF)
+#define _TIF_BLOCKSTEP		BIT(TIF_BLOCKSTEP)
+#define _TIF_SINGLESTEP		BIT(TIF_SINGLESTEP)
+#define _TIF_LAZY_MMU_UPDATES	BIT(TIF_LAZY_MMU_UPDATES)
+#define _TIF_ADDR32		BIT(TIF_ADDR32)
 
 /* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW_BASE						\
-	(_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|		\
+#define _TIF_WORK_CTXSW_BASE					\
+	(_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP |		\
 	 _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
 
 /*
@@ -154,8 +131,14 @@ struct thread_info {
 # define _TIF_WORK_CTXSW	(_TIF_WORK_CTXSW_BASE)
 #endif
 
-#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
-#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
+#ifdef CONFIG_X86_IOPL_IOPERM
+# define _TIF_WORK_CTXSW_PREV	(_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY | \
+				 _TIF_IO_BITMAP)
+#else
+# define _TIF_WORK_CTXSW_PREV	(_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY)
+#endif
+
+#define _TIF_WORK_CTXSW_NEXT	(_TIF_WORK_CTXSW)
 
 #define STACK_WARN		(THREAD_SIZE/8)
 
@@ -164,7 +147,7 @@ struct thread_info {
  *
  * preempt_count needs to be 1 initially, until the scheduler is functional.
  */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 /*
  * Walks up the stack frames to make sure that the specified object is
@@ -174,7 +157,12 @@ struct thread_info {
  *	GOOD_FRAME	if within a frame
  *	BAD_STACK	if placed across a frame boundary (or outside stack)
  *	NOT_STACK	unable to determine (no frame pointers, etc)
+ *
+ * This function reads pointers from the stack and dereferences them. The
+ * pointers may not have their KMSAN shadow set up properly, which may result
+ * in false positive reports. Disable instrumentation to avoid those.
  */
+__no_kmsan_checks
 static inline int arch_within_stack_frames(const void * const stack,
 					   const void * const stackend,
 					   const void *obj, unsigned long len)
@@ -211,18 +199,25 @@ static inline int arch_within_stack_frames(const void * const stack,
 #endif
 }
 
-#else /* !__ASSEMBLY__ */
+#endif  /* !__ASSEMBLER__ */
 
-#ifdef CONFIG_X86_64
-# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
-#endif
-
-#endif
+/*
+ * Thread-synchronous status.
+ *
+ * This is different from the flags in that nobody else
+ * ever touches our thread-synchronous status, so we don't
+ * have to worry about atomic accesses.
+ */
+#define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/
 
+#ifndef __ASSEMBLER__
 #ifdef CONFIG_COMPAT
 #define TS_I386_REGS_POKED	0x0004	/* regs poked by 32-bit ptracer */
+
+#define arch_set_restart_data(restart)	\
+	do { restart->arch_data = current_thread_info()->status; } while (0)
+
 #endif
-#ifndef __ASSEMBLY__
 
 #ifdef CONFIG_X86_32
 #define in_ia32_syscall() true
@@ -231,20 +226,8 @@ static inline int arch_within_stack_frames(const void * const stack,
 			   current_thread_info()->status & TS_COMPAT)
 #endif
 
-/*
- * Force syscall return via IRET by making it look as if there was
- * some work pending. IRET is our most capable (but slowest) syscall
- * return path, which is able to restore modified SS, CS and certain
- * EFLAGS values that other (fast) syscall return instructions
- * are not able to restore properly.
- */
-#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME)
-
-extern void arch_task_cache_init(void);
-extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
-extern void arch_release_task_struct(struct task_struct *tsk);
 extern void arch_setup_new_exec(void);
 #define arch_setup_new_exec arch_setup_new_exec
-#endif	/* !__ASSEMBLY__ */
+#endif	/* !__ASSEMBLER__ */
 
 #endif /* _ASM_X86_THREAD_INFO_H */
diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h
index cef818b16045..f360104ed172 100644
--- a/arch/x86/include/asm/time.h
+++ b/arch/x86/include/asm/time.h
@@ -6,7 +6,8 @@
 #include <asm/mc146818rtc.h>
 
 extern void hpet_time_init(void);
-extern void time_init(void);
+extern bool pit_timer_init(void);
+extern bool tsc_clocksource_watchdog_disabled(void);
 
 extern struct clock_event_device *global_clock_event;
 
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 7365dd4acffb..23baf8c9b34c 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -6,8 +6,6 @@
 #include <linux/interrupt.h>
 #include <linux/math64.h>
 
-#define TICK_SIZE (tick_nsec / 1000)
-
 unsigned long long native_sched_clock(void);
 extern void recalibrate_cpu_khz(void);
 
diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h
index a4a8b1b16c0c..956e4145311b 100644
--- a/arch/x86/include/asm/timex.h
+++ b/arch/x86/include/asm/timex.h
@@ -5,6 +5,15 @@
 #include <asm/processor.h>
 #include <asm/tsc.h>
 
+static inline unsigned long random_get_entropy(void)
+{
+	if (!IS_ENABLED(CONFIG_X86_TSC) &&
+	    !cpu_feature_enabled(X86_FEATURE_TSC))
+		return random_get_entropy_fallback();
+	return rdtsc();
+}
+#define random_get_entropy random_get_entropy
+
 /* Assume we use the PIT time source for the clock tick */
 #define CLOCK_TICK_RATE		PIT_TICK_RATE
 
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 404b8b1d44f5..866ea78ba156 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -2,13 +2,13 @@
 #ifndef _ASM_X86_TLB_H
 #define _ASM_X86_TLB_H
 
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-
+#define tlb_flush tlb_flush
 static inline void tlb_flush(struct mmu_gather *tlb);
 
 #include <asm-generic/tlb.h>
+#include <linux/kernel.h>
+#include <vdso/bits.h>
+#include <vdso/page.h>
 
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
@@ -23,18 +23,144 @@ static inline void tlb_flush(struct mmu_gather *tlb)
 	flush_tlb_mm_range(tlb->mm, start, end, stride_shift, tlb->freed_tables);
 }
 
+static inline void invlpg(unsigned long addr)
+{
+	asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+}
+
+enum addr_stride {
+	PTE_STRIDE = 0,
+	PMD_STRIDE = 1
+};
+
+/*
+ * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
+ * of the three. For example:
+ * - FLAG_VA | FLAG_INCLUDE_GLOBAL: invalidate all TLB entries at the address
+ * - FLAG_PCID:			    invalidate all TLB entries matching the PCID
+ *
+ * The first is used to invalidate (kernel) mappings at a particular
+ * address across all processes.
+ *
+ * The latter invalidates all TLB entries matching a PCID.
+ */
+#define INVLPGB_FLAG_VA			BIT(0)
+#define INVLPGB_FLAG_PCID		BIT(1)
+#define INVLPGB_FLAG_ASID		BIT(2)
+#define INVLPGB_FLAG_INCLUDE_GLOBAL	BIT(3)
+#define INVLPGB_FLAG_FINAL_ONLY		BIT(4)
+#define INVLPGB_FLAG_INCLUDE_NESTED	BIT(5)
+
+/* The implied mode when all bits are clear: */
+#define INVLPGB_MODE_ALL_NONGLOBALS	0UL
+
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
 /*
- * While x86 architecture in general requires an IPI to perform TLB
- * shootdown, enablement code for several hypervisors overrides
- * .flush_tlb_others hook in pv_mmu_ops and implements it by issuing
- * a hypercall. To keep software pagetable walkers safe in this case we
- * switch to RCU based table free (HAVE_RCU_TABLE_FREE). See the comment
- * below 'ifdef CONFIG_HAVE_RCU_TABLE_FREE' in include/asm-generic/tlb.h
- * for more details.
+ * INVLPGB does broadcast TLB invalidation across all the CPUs in the system.
+ *
+ * The INVLPGB instruction is weakly ordered, and a batch of invalidations can
+ * be done in a parallel fashion.
+ *
+ * The instruction takes the number of extra pages to invalidate, beyond the
+ * first page, while __invlpgb gets the more human readable number of pages to
+ * invalidate.
+ *
+ * The bits in rax[0:2] determine respectively which components of the address
+ * (VA, PCID, ASID) get compared when flushing. If neither bits are set, *any*
+ * address in the specified range matches.
+ *
+ * Since it is desired to only flush TLB entries for the ASID that is executing
+ * the instruction (a host/hypervisor or a guest), the ASID valid bit should
+ * always be set. On a host/hypervisor, the hardware will use the ASID value
+ * specified in EDX[15:0] (which should be 0). On a guest, the hardware will
+ * use the actual ASID value of the guest.
+ *
+ * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from
+ * this CPU have completed.
  */
-static inline void __tlb_remove_table(void *table)
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+			     unsigned long addr, u16 nr_pages,
+			     enum addr_stride stride, u8 flags)
 {
-	free_page_and_swap_cache(table);
+	u64 rax = addr | flags | INVLPGB_FLAG_ASID;
+	u32 ecx = (stride << 31) | (nr_pages - 1);
+	u32 edx = (pcid << 16) | asid;
+
+	/* The low bits in rax are for flags. Verify addr is clean. */
+	VM_WARN_ON_ONCE(addr & ~PAGE_MASK);
+
+	/* INVLPGB; supported in binutils >= 2.36. */
+	asm volatile(".byte 0x0f, 0x01, 0xfe" :: "a" (rax), "c" (ecx), "d" (edx));
+}
+
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags)
+{
+	__invlpgb(asid, pcid, 0, 1, 0, flags);
 }
 
+static inline void __tlbsync(void)
+{
+	/*
+	 * TLBSYNC waits for INVLPGB instructions originating on the same CPU
+	 * to have completed. Print a warning if the task has been migrated,
+	 * and might not be waiting on all the INVLPGBs issued during this TLB
+	 * invalidation sequence.
+	 */
+	cant_migrate();
+
+	/* TLBSYNC: supported in binutils >= 0.36. */
+	asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
+}
+#else
+/* Some compilers (I'm looking at you clang!) simply can't do DCE */
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+			     unsigned long addr, u16 nr_pages,
+			     enum addr_stride s, u8 flags) { }
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags) { }
+static inline void __tlbsync(void) { }
+#endif
+
+static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+						unsigned long addr,
+						u16 nr, bool stride)
+{
+	enum addr_stride str = stride ? PMD_STRIDE : PTE_STRIDE;
+	u8 flags = INVLPGB_FLAG_PCID | INVLPGB_FLAG_VA;
+
+	__invlpgb(0, pcid, addr, nr, str, flags);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+{
+	__invlpgb_all(0, pcid, INVLPGB_FLAG_PCID);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invlpgb_flush_all(void)
+{
+	/*
+	 * TLBSYNC at the end needs to make sure all flushes done on the
+	 * current CPU have been executed system-wide. Therefore, make
+	 * sure nothing gets migrated in-between but disable preemption
+	 * as it is cheaper.
+	 */
+	guard(preempt)();
+	__invlpgb_all(0, 0, INVLPGB_FLAG_INCLUDE_GLOBAL);
+	__tlbsync();
+}
+
+/* Flush addr, including globals, for all PCIDs. */
+static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+{
+	__invlpgb(0, 0, addr, nr, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invlpgb_flush_all_nonglobals(void)
+{
+	guard(preempt)();
+	__invlpgb_all(0, 0, INVLPGB_MODE_ALL_NONGLOBALS);
+	__tlbsync();
+}
 #endif /* _ASM_X86_TLB_H */
diff --git a/arch/x86/include/asm/tlbbatch.h b/arch/x86/include/asm/tlbbatch.h
index 1ad56eb3e8a8..80aaf64ff25f 100644
--- a/arch/x86/include/asm/tlbbatch.h
+++ b/arch/x86/include/asm/tlbbatch.h
@@ -10,6 +10,11 @@ struct arch_tlbflush_unmap_batch {
 	 * the PFNs being flushed..
 	 */
 	struct cpumask cpumask;
+	/*
+	 * Set if pages were unmapped from any MM, even one that does not
+	 * have active CPUs in its cpumask.
+	 */
+	bool unmapped_pages;
 };
 
 #endif /* _ARCH_X86_TLBBATCH_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index f4204bf377fc..00daedfefc1b 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -2,9 +2,11 @@
 #ifndef _ASM_X86_TLBFLUSH_H
 #define _ASM_X86_TLBFLUSH_H
 
-#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/mmu_notifier.h>
 #include <linux/sched.h>
 
+#include <asm/barrier.h>
 #include <asm/processor.h>
 #include <asm/cpufeature.h>
 #include <asm/special_insns.h>
@@ -12,141 +14,56 @@
 #include <asm/invpcid.h>
 #include <asm/pti.h>
 #include <asm/processor-flags.h>
+#include <asm/pgtable.h>
 
-/*
- * The x86 feature is called PCID (Process Context IDentifier). It is similar
- * to what is traditionally called ASID on the RISC processors.
- *
- * We don't use the traditional ASID implementation, where each process/mm gets
- * its own ASID and flush/restart when we run out of ASID space.
- *
- * Instead we have a small per-cpu array of ASIDs and cache the last few mm's
- * that came by on this CPU, allowing cheaper switch_mm between processes on
- * this CPU.
- *
- * We end up with different spaces for different things. To avoid confusion we
- * use different names for each of them:
- *
- * ASID  - [0, TLB_NR_DYN_ASIDS-1]
- *         the canonical identifier for an mm
- *
- * kPCID - [1, TLB_NR_DYN_ASIDS]
- *         the value we write into the PCID part of CR3; corresponds to the
- *         ASID+1, because PCID 0 is special.
- *
- * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
- *         for KPTI each mm has two address spaces and thus needs two
- *         PCID values, but we can still do with a single ASID denomination
- *         for each mm. Corresponds to kPCID + 2048.
- *
- */
+DECLARE_PER_CPU(u64, tlbstate_untag_mask);
 
-/* There are 12 bits of space for ASIDS in CR3 */
-#define CR3_HW_ASID_BITS		12
+void __flush_tlb_all(void);
 
-/*
- * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
- * user/kernel switches
- */
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
-# define PTI_CONSUMED_PCID_BITS	1
-#else
-# define PTI_CONSUMED_PCID_BITS	0
-#endif
+#define TLB_FLUSH_ALL	-1UL
+#define TLB_GENERATION_INVALID	0
 
-#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
+void cr4_update_irqsoff(unsigned long set, unsigned long clear);
+unsigned long cr4_read_shadow(void);
 
-/*
- * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid.  -1 below to account
- * for them being zero-based.  Another -1 is because PCID 0 is reserved for
- * use by non-PCID-aware users.
- */
-#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
-
-/*
- * 6 because 6 should be plenty and struct tlb_state will fit in two cache
- * lines.
- */
-#define TLB_NR_DYN_ASIDS	6
-
-/*
- * Given @asid, compute kPCID
- */
-static inline u16 kern_pcid(u16 asid)
+/* Set in this cpu's CR4. */
+static inline void cr4_set_bits_irqsoff(unsigned long mask)
 {
-	VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
-
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
-	/*
-	 * Make sure that the dynamic ASID space does not confict with the
-	 * bit we are using to switch between user and kernel ASIDs.
-	 */
-	BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));
-
-	/*
-	 * The ASID being passed in here should have respected the
-	 * MAX_ASID_AVAILABLE and thus never have the switch bit set.
-	 */
-	VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT));
-#endif
-	/*
-	 * The dynamically-assigned ASIDs that get passed in are small
-	 * (<TLB_NR_DYN_ASIDS).  They never have the high switch bit set,
-	 * so do not bother to clear it.
-	 *
-	 * If PCID is on, ASID-aware code paths put the ASID+1 into the
-	 * PCID bits.  This serves two purposes.  It prevents a nasty
-	 * situation in which PCID-unaware code saves CR3, loads some other
-	 * value (with PCID == 0), and then restores CR3, thus corrupting
-	 * the TLB for ASID 0 if the saved ASID was nonzero.  It also means
-	 * that any bugs involving loading a PCID-enabled CR3 with
-	 * CR4.PCIDE off will trigger deterministically.
-	 */
-	return asid + 1;
+	cr4_update_irqsoff(mask, 0);
 }
 
-/*
- * Given @asid, compute uPCID
- */
-static inline u16 user_pcid(u16 asid)
+/* Clear in this cpu's CR4. */
+static inline void cr4_clear_bits_irqsoff(unsigned long mask)
 {
-	u16 ret = kern_pcid(asid);
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
-	ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;
-#endif
-	return ret;
+	cr4_update_irqsoff(0, mask);
 }
 
-struct pgd_t;
-static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
+/* Set in this cpu's CR4. */
+static inline void cr4_set_bits(unsigned long mask)
 {
-	if (static_cpu_has(X86_FEATURE_PCID)) {
-		return __sme_pa(pgd) | kern_pcid(asid);
-	} else {
-		VM_WARN_ON_ONCE(asid != 0);
-		return __sme_pa(pgd);
-	}
+	unsigned long flags;
+
+	local_irq_save(flags);
+	cr4_set_bits_irqsoff(mask);
+	local_irq_restore(flags);
 }
 
-static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
+/* Clear in this cpu's CR4. */
+static inline void cr4_clear_bits(unsigned long mask)
 {
-	VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
-	/*
-	 * Use boot_cpu_has() instead of this_cpu_has() as this function
-	 * might be called during early boot. This should work even after
-	 * boot because all CPU's the have same capabilities:
-	 */
-	VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID));
-	return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	cr4_clear_bits_irqsoff(mask);
+	local_irq_restore(flags);
 }
 
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define __flush_tlb() __native_flush_tlb()
-#define __flush_tlb_global() __native_flush_tlb_global()
-#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr)
-#endif
+#ifndef MODULE
+/*
+ * 6 because 6 should be plenty and struct tlb_state will fit in two cache
+ * lines.
+ */
+#define TLB_NR_DYN_ASIDS	6
 
 struct tlb_context {
 	u64 ctx_id;
@@ -167,35 +84,18 @@ struct tlb_state {
 	 */
 	struct mm_struct *loaded_mm;
 
-#define LOADED_MM_SWITCHING ((struct mm_struct *)1)
+#define LOADED_MM_SWITCHING ((struct mm_struct *)1UL)
 
 	/* Last user mm for optimizing IBPB */
 	union {
 		struct mm_struct	*last_user_mm;
-		unsigned long		last_user_mm_ibpb;
+		unsigned long		last_user_mm_spec;
 	};
 
 	u16 loaded_mm_asid;
 	u16 next_asid;
 
 	/*
-	 * We can be in one of several states:
-	 *
-	 *  - Actively using an mm.  Our CPU's bit will be set in
-	 *    mm_cpumask(loaded_mm) and is_lazy == false;
-	 *
-	 *  - Not using a real mm.  loaded_mm == &init_mm.  Our CPU's bit
-	 *    will not be set in mm_cpumask(&init_mm) and is_lazy == false.
-	 *
-	 *  - Lazily using a real mm.  loaded_mm != &init_mm, our bit
-	 *    is set in mm_cpumask(loaded_mm), but is_lazy == true.
-	 *    We're heuristically guessing that the CR3 load we
-	 *    skipped more than makes up for the overhead added by
-	 *    lazy mode.
-	 */
-	bool is_lazy;
-
-	/*
 	 * If set we changed the page tables in such a way that we
 	 * needed an invalidation of all contexts (aka. PCIDs / ASIDs).
 	 * This tells us to go invalidate all the non-loaded ctxs[]
@@ -206,6 +106,16 @@ struct tlb_state {
 	 */
 	bool invalidate_other;
 
+#ifdef CONFIG_ADDRESS_MASKING
+	/*
+	 * Active LAM mode.
+	 *
+	 * X86_CR3_LAM_U57/U48 shifted right by X86_CR3_LAM_U57_BIT or 0 if LAM
+	 * disabled.
+	 */
+	u8 lam;
+#endif
+
 	/*
 	 * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
 	 * the corresponding user PCID needs a flush next time we
@@ -240,39 +150,30 @@ struct tlb_state {
 	 */
 	struct tlb_context ctxs[TLB_NR_DYN_ASIDS];
 };
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
-
-/*
- * Blindly accessing user memory from NMI context can be dangerous
- * if we're in the middle of switching the current user task or
- * switching the loaded mm.  It can also be dangerous if we
- * interrupted some kernel code that was temporarily using a
- * different mm.
- */
-static inline bool nmi_uaccess_okay(void)
-{
-	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
-	struct mm_struct *current_mm = current->mm;
-
-	VM_WARN_ON_ONCE(!loaded_mm);
+DECLARE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate);
 
+struct tlb_state_shared {
 	/*
-	 * The condition we want to check is
-	 * current_mm->pgd == __va(read_cr3_pa()).  This may be slow, though,
-	 * if we're running in a VM with shadow paging, and nmi_uaccess_okay()
-	 * is supposed to be reasonably fast.
+	 * We can be in one of several states:
+	 *
+	 *  - Actively using an mm.  Our CPU's bit will be set in
+	 *    mm_cpumask(loaded_mm) and is_lazy == false;
 	 *
-	 * Instead, we check the almost equivalent but somewhat conservative
-	 * condition below, and we rely on the fact that switch_mm_irqs_off()
-	 * sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3.
+	 *  - Not using a real mm.  loaded_mm == &init_mm.  Our CPU's bit
+	 *    will not be set in mm_cpumask(&init_mm) and is_lazy == false.
+	 *
+	 *  - Lazily using a real mm.  loaded_mm != &init_mm, our bit
+	 *    is set in mm_cpumask(loaded_mm), but is_lazy == true.
+	 *    We're heuristically guessing that the CR3 load we
+	 *    skipped more than makes up for the overhead added by
+	 *    lazy mode.
 	 */
-	if (loaded_mm != current_mm)
-		return false;
+	bool is_lazy;
+};
+DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);
 
-	VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa()));
-
-	return true;
-}
+bool nmi_uaccess_okay(void);
+#define nmi_uaccess_okay nmi_uaccess_okay
 
 /* Initialize cr4 shadow for this CPU. */
 static inline void cr4_init_shadow(void)
@@ -280,234 +181,15 @@ static inline void cr4_init_shadow(void)
 	this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
 }
 
-static inline void __cr4_set(unsigned long cr4)
-{
-	lockdep_assert_irqs_disabled();
-	this_cpu_write(cpu_tlbstate.cr4, cr4);
-	__write_cr4(cr4);
-}
-
-/* Set in this cpu's CR4. */
-static inline void cr4_set_bits(unsigned long mask)
-{
-	unsigned long cr4, flags;
-
-	local_irq_save(flags);
-	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-	if ((cr4 | mask) != cr4)
-		__cr4_set(cr4 | mask);
-	local_irq_restore(flags);
-}
-
-/* Clear in this cpu's CR4. */
-static inline void cr4_clear_bits(unsigned long mask)
-{
-	unsigned long cr4, flags;
-
-	local_irq_save(flags);
-	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-	if ((cr4 & ~mask) != cr4)
-		__cr4_set(cr4 & ~mask);
-	local_irq_restore(flags);
-}
-
-static inline void cr4_toggle_bits_irqsoff(unsigned long mask)
-{
-	unsigned long cr4;
-
-	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-	__cr4_set(cr4 ^ mask);
-}
-
-/* Read the CR4 shadow. */
-static inline unsigned long cr4_read_shadow(void)
-{
-	return this_cpu_read(cpu_tlbstate.cr4);
-}
-
-/*
- * Mark all other ASIDs as invalid, preserves the current.
- */
-static inline void invalidate_other_asid(void)
-{
-	this_cpu_write(cpu_tlbstate.invalidate_other, true);
-}
-
-/*
- * Save some of cr4 feature set we're using (e.g.  Pentium 4MB
- * enable and PPro Global page enable), so that any CPU's that boot
- * up after us can get the correct flags.  This should only be used
- * during boot on the boot cpu.
- */
 extern unsigned long mmu_cr4_features;
 extern u32 *trampoline_cr4_features;
 
-static inline void cr4_set_bits_and_update_boot(unsigned long mask)
-{
-	mmu_cr4_features |= mask;
-	if (trampoline_cr4_features)
-		*trampoline_cr4_features = mmu_cr4_features;
-	cr4_set_bits(mask);
-}
+/* How many pages can be invalidated with one INVLPGB. */
+extern u16 invlpgb_count_max;
 
 extern void initialize_tlbstate_and_flush(void);
 
 /*
- * Given an ASID, flush the corresponding user ASID.  We can delay this
- * until the next time we switch to it.
- *
- * See SWITCH_TO_USER_CR3.
- */
-static inline void invalidate_user_asid(u16 asid)
-{
-	/* There is no user ASID if address space separation is off */
-	if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
-		return;
-
-	/*
-	 * We only have a single ASID if PCID is off and the CR3
-	 * write will have flushed it.
-	 */
-	if (!cpu_feature_enabled(X86_FEATURE_PCID))
-		return;
-
-	if (!static_cpu_has(X86_FEATURE_PTI))
-		return;
-
-	__set_bit(kern_pcid(asid),
-		  (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
-}
-
-/*
- * flush the entire current user mapping
- */
-static inline void __native_flush_tlb(void)
-{
-	/*
-	 * Preemption or interrupts must be disabled to protect the access
-	 * to the per CPU variable and to prevent being preempted between
-	 * read_cr3() and write_cr3().
-	 */
-	WARN_ON_ONCE(preemptible());
-
-	invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
-
-	/* If current->mm == NULL then the read_cr3() "borrows" an mm */
-	native_write_cr3(__native_read_cr3());
-}
-
-/*
- * flush everything
- */
-static inline void __native_flush_tlb_global(void)
-{
-	unsigned long cr4, flags;
-
-	if (static_cpu_has(X86_FEATURE_INVPCID)) {
-		/*
-		 * Using INVPCID is considerably faster than a pair of writes
-		 * to CR4 sandwiched inside an IRQ flag save/restore.
-		 *
-		 * Note, this works with CR4.PCIDE=0 or 1.
-		 */
-		invpcid_flush_all();
-		return;
-	}
-
-	/*
-	 * Read-modify-write to CR4 - protect it from preemption and
-	 * from interrupts. (Use the raw variant because this code can
-	 * be called from deep inside debugging code.)
-	 */
-	raw_local_irq_save(flags);
-
-	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-	/* toggle PGE */
-	native_write_cr4(cr4 ^ X86_CR4_PGE);
-	/* write old PGE again and flush TLBs */
-	native_write_cr4(cr4);
-
-	raw_local_irq_restore(flags);
-}
-
-/*
- * flush one page in the user mapping
- */
-static inline void __native_flush_tlb_one_user(unsigned long addr)
-{
-	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
-
-	asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
-
-	if (!static_cpu_has(X86_FEATURE_PTI))
-		return;
-
-	/*
-	 * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
-	 * Just use invalidate_user_asid() in case we are called early.
-	 */
-	if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
-		invalidate_user_asid(loaded_mm_asid);
-	else
-		invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
-}
-
-/*
- * flush everything
- */
-static inline void __flush_tlb_all(void)
-{
-	/*
-	 * This is to catch users with enabled preemption and the PGE feature
-	 * and don't trigger the warning in __native_flush_tlb().
-	 */
-	VM_WARN_ON_ONCE(preemptible());
-
-	if (boot_cpu_has(X86_FEATURE_PGE)) {
-		__flush_tlb_global();
-	} else {
-		/*
-		 * !PGE -> !PCID (setup_pcid()), thus every flush is total.
-		 */
-		__flush_tlb();
-	}
-}
-
-/*
- * flush one page in the kernel mapping
- */
-static inline void __flush_tlb_one_kernel(unsigned long addr)
-{
-	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
-
-	/*
-	 * If PTI is off, then __flush_tlb_one_user() is just INVLPG or its
-	 * paravirt equivalent.  Even with PCID, this is sufficient: we only
-	 * use PCID if we also use global PTEs for the kernel mapping, and
-	 * INVLPG flushes global translations across all address spaces.
-	 *
-	 * If PTI is on, then the kernel is mapped with non-global PTEs, and
-	 * __flush_tlb_one_user() will flush the given address for the current
-	 * kernel address space and for its usermode counterpart, but it does
-	 * not flush it for other address spaces.
-	 */
-	__flush_tlb_one_user(addr);
-
-	if (!static_cpu_has(X86_FEATURE_PTI))
-		return;
-
-	/*
-	 * See above.  We need to propagate the flush to all other address
-	 * spaces.  In principle, we only need to propagate it to kernelmode
-	 * address spaces, but the extra bookkeeping we would need is not
-	 * worth it.
-	 */
-	invalidate_other_asid();
-}
-
-#define TLB_FLUSH_ALL	-1UL
-
-/*
  * TLB flushing:
  *
  *  - flush_tlb_all() flushes all processes TLBs
@@ -515,7 +197,7 @@ static inline void __flush_tlb_one_kernel(unsigned long addr)
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_others(cpumask, info) flushes TLBs on other cpus
+ *  - flush_tlb_multi(cpumask, info) flushes TLBs on multiple cpus
  *
  * ..but the i386 has somewhat limited tlb flushing capabilities,
  * and page-granular flushes are available only on i486 and up.
@@ -541,11 +223,86 @@ struct flush_tlb_info {
 	unsigned long		start;
 	unsigned long		end;
 	u64			new_tlb_gen;
-	unsigned int		stride_shift;
-	bool			freed_tables;
+	unsigned int		initiating_cpu;
+	u8			stride_shift;
+	u8			freed_tables;
+	u8			trim_cpumask;
 };
 
-#define local_flush_tlb() __flush_tlb()
+void flush_tlb_local(void);
+void flush_tlb_one_user(unsigned long addr);
+void flush_tlb_one_kernel(unsigned long addr);
+void flush_tlb_multi(const struct cpumask *cpumask,
+		      const struct flush_tlb_info *info);
+
+static inline bool is_dyn_asid(u16 asid)
+{
+	return asid < TLB_NR_DYN_ASIDS;
+}
+
+static inline bool is_global_asid(u16 asid)
+{
+	return !is_dyn_asid(asid);
+}
+
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
+static inline u16 mm_global_asid(struct mm_struct *mm)
+{
+	u16 asid;
+
+	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+		return 0;
+
+	asid = smp_load_acquire(&mm->context.global_asid);
+
+	/* mm->context.global_asid is either 0, or a global ASID */
+	VM_WARN_ON_ONCE(asid && is_dyn_asid(asid));
+
+	return asid;
+}
+
+static inline void mm_init_global_asid(struct mm_struct *mm)
+{
+	if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
+		mm->context.global_asid = 0;
+		mm->context.asid_transition = false;
+	}
+}
+
+static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid)
+{
+	/*
+	 * Notably flush_tlb_mm_range() -> broadcast_tlb_flush() ->
+	 * finish_asid_transition() needs to observe asid_transition = true
+	 * once it observes global_asid.
+	 */
+	mm->context.asid_transition = true;
+	smp_store_release(&mm->context.global_asid, asid);
+}
+
+static inline void mm_clear_asid_transition(struct mm_struct *mm)
+{
+	WRITE_ONCE(mm->context.asid_transition, false);
+}
+
+static inline bool mm_in_asid_transition(struct mm_struct *mm)
+{
+	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+		return false;
+
+	return mm && READ_ONCE(mm->context.asid_transition);
+}
+#else
+static inline u16 mm_global_asid(struct mm_struct *mm) { return 0; }
+static inline void mm_init_global_asid(struct mm_struct *mm) { }
+static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) { }
+static inline void mm_clear_asid_transition(struct mm_struct *mm) { }
+static inline bool mm_in_asid_transition(struct mm_struct *mm) { return false; }
+#endif /* CONFIG_BROADCAST_TLB_FLUSH */
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#endif
 
 #define flush_tlb_mm(mm)						\
 		flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL, true)
@@ -554,7 +311,7 @@ struct flush_tlb_info {
 	flush_tlb_mm_range((vma)->vm_mm, start, end,			\
 			   ((vma)->vm_flags & VM_HUGETLB)		\
 				? huge_page_shift(hstate_vma(vma))	\
-				: PAGE_SHIFT, false)
+				: PAGE_SHIFT, true)
 
 extern void flush_tlb_all(void);
 extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
@@ -567,8 +324,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
 	flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false);
 }
 
-void native_flush_tlb_others(const struct cpumask *cpumask,
-			     const struct flush_tlb_info *info);
+static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
+{
+	bool should_defer = false;
+
+	/* If remote CPUs need to be flushed then defer batch the flush */
+	if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
+		should_defer = true;
+	put_cpu();
+
+	return should_defer;
+}
 
 static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
 {
@@ -581,21 +347,145 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
 	return atomic64_inc_return(&mm->context.tlb_gen);
 }
 
-static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
-					struct mm_struct *mm)
+static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+		struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	inc_mm_tlb_gen(mm);
 	cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+	batch->unmapped_pages = true;
+	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
 }
 
 extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
 
-#ifndef CONFIG_PARAVIRT
-#define flush_tlb_others(mask, info)	\
-	native_flush_tlb_others(mask, info)
+static inline bool pte_flags_need_flush(unsigned long oldflags,
+					unsigned long newflags,
+					bool ignore_access)
+{
+	/*
+	 * Flags that require a flush when cleared but not when they are set.
+	 * Only include flags that would not trigger spurious page-faults.
+	 * Non-present entries are not cached. Hardware would set the
+	 * dirty/access bit if needed without a fault.
+	 */
+	const pteval_t flush_on_clear = _PAGE_DIRTY | _PAGE_PRESENT |
+					_PAGE_ACCESSED;
+	const pteval_t software_flags = _PAGE_SOFTW1 | _PAGE_SOFTW2 |
+					_PAGE_SOFTW3 | _PAGE_SOFTW4 |
+					_PAGE_SAVED_DIRTY;
+	const pteval_t flush_on_change = _PAGE_RW | _PAGE_USER | _PAGE_PWT |
+			  _PAGE_PCD | _PAGE_PSE | _PAGE_GLOBAL | _PAGE_PAT |
+			  _PAGE_PAT_LARGE | _PAGE_PKEY_BIT0 | _PAGE_PKEY_BIT1 |
+			  _PAGE_PKEY_BIT2 | _PAGE_PKEY_BIT3 | _PAGE_NX;
+	unsigned long diff = oldflags ^ newflags;
+
+	BUILD_BUG_ON(flush_on_clear & software_flags);
+	BUILD_BUG_ON(flush_on_clear & flush_on_change);
+	BUILD_BUG_ON(flush_on_change & software_flags);
+
+	/* Ignore software flags */
+	diff &= ~software_flags;
+
+	if (ignore_access)
+		diff &= ~_PAGE_ACCESSED;
+
+	/*
+	 * Did any of the 'flush_on_clear' flags was clleared set from between
+	 * 'oldflags' and 'newflags'?
+	 */
+	if (diff & oldflags & flush_on_clear)
+		return true;
+
+	/* Flush on modified flags. */
+	if (diff & flush_on_change)
+		return true;
+
+	/* Ensure there are no flags that were left behind */
+	if (IS_ENABLED(CONFIG_DEBUG_VM) &&
+	    (diff & ~(flush_on_clear | software_flags | flush_on_change))) {
+		VM_WARN_ON_ONCE(1);
+		return true;
+	}
 
-#define paravirt_tlb_remove_table(tlb, page) \
-	tlb_remove_page(tlb, (void *)(page))
+	return false;
+}
+
+/*
+ * pte_needs_flush() checks whether permissions were demoted and require a
+ * flush. It should only be used for userspace PTEs.
+ */
+static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte)
+{
+	/* !PRESENT -> * ; no need for flush */
+	if (!(pte_flags(oldpte) & _PAGE_PRESENT))
+		return false;
+
+	/* PFN changed ; needs flush */
+	if (pte_pfn(oldpte) != pte_pfn(newpte))
+		return true;
+
+	/*
+	 * check PTE flags; ignore access-bit; see comment in
+	 * ptep_clear_flush_young().
+	 */
+	return pte_flags_need_flush(pte_flags(oldpte), pte_flags(newpte),
+				    true);
+}
+#define pte_needs_flush pte_needs_flush
+
+/*
+ * huge_pmd_needs_flush() checks whether permissions were demoted and require a
+ * flush. It should only be used for userspace huge PMDs.
+ */
+static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
+{
+	/* !PRESENT -> * ; no need for flush */
+	if (!(pmd_flags(oldpmd) & _PAGE_PRESENT))
+		return false;
+
+	/* PFN changed ; needs flush */
+	if (pmd_pfn(oldpmd) != pmd_pfn(newpmd))
+		return true;
+
+	/*
+	 * check PMD flags; do not ignore access-bit; see
+	 * pmdp_clear_flush_young().
+	 */
+	return pte_flags_need_flush(pmd_flags(oldpmd), pmd_flags(newpmd),
+				    false);
+}
+#define huge_pmd_needs_flush huge_pmd_needs_flush
+
+#ifdef CONFIG_ADDRESS_MASKING
+static inline  u64 tlbstate_lam_cr3_mask(void)
+{
+	u64 lam = this_cpu_read(cpu_tlbstate.lam);
+
+	return lam << X86_CR3_LAM_U57_BIT;
+}
+
+static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask)
+{
+	this_cpu_write(cpu_tlbstate.lam, lam >> X86_CR3_LAM_U57_BIT);
+	this_cpu_write(tlbstate_untag_mask, untag_mask);
+}
+
+#else
+
+static inline u64 tlbstate_lam_cr3_mask(void)
+{
+	return 0;
+}
+
+static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask)
+{
+}
 #endif
+#endif /* !MODULE */
 
+static inline void __native_tlb_flush_global(unsigned long cr4)
+{
+	native_write_cr4(cr4 ^ X86_CR4_PGE);
+	native_write_cr4(cr4);
+}
 #endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 453cf38a1c33..1fadf0cf520c 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -31,9 +31,9 @@
  * CONFIG_NUMA.
  */
 #include <linux/numa.h>
+#include <linux/cpumask.h>
 
 #ifdef CONFIG_NUMA
-#include <linux/cpumask.h>
 
 #include <asm/mpspec.h>
 #include <asm/percpu.h>
@@ -102,18 +102,105 @@ static inline void setup_node_to_cpumask_map(void) { }
 
 #include <asm-generic/topology.h>
 
+/* Topology information */
+enum x86_topology_domains {
+	TOPO_SMT_DOMAIN,
+	TOPO_CORE_DOMAIN,
+	TOPO_MODULE_DOMAIN,
+	TOPO_TILE_DOMAIN,
+	TOPO_DIE_DOMAIN,
+	TOPO_DIEGRP_DOMAIN,
+	TOPO_PKG_DOMAIN,
+	TOPO_MAX_DOMAIN,
+};
+
+enum x86_topology_cpu_type {
+	TOPO_CPU_TYPE_PERFORMANCE,
+	TOPO_CPU_TYPE_EFFICIENCY,
+	TOPO_CPU_TYPE_UNKNOWN,
+};
+
+struct x86_topology_system {
+	unsigned int	dom_shifts[TOPO_MAX_DOMAIN];
+	unsigned int	dom_size[TOPO_MAX_DOMAIN];
+};
+
+extern struct x86_topology_system x86_topo_system;
+
+static inline unsigned int topology_get_domain_size(enum x86_topology_domains dom)
+{
+	return x86_topo_system.dom_size[dom];
+}
+
+static inline unsigned int topology_get_domain_shift(enum x86_topology_domains dom)
+{
+	return dom == TOPO_SMT_DOMAIN ? 0 : x86_topo_system.dom_shifts[dom - 1];
+}
+
 extern const struct cpumask *cpu_coregroup_mask(int cpu);
+extern const struct cpumask *cpu_clustergroup_mask(int cpu);
+
+#define topology_logical_package_id(cpu)	(cpu_data(cpu).topo.logical_pkg_id)
+#define topology_physical_package_id(cpu)	(cpu_data(cpu).topo.pkg_id)
+#define topology_logical_die_id(cpu)		(cpu_data(cpu).topo.logical_die_id)
+#define topology_logical_core_id(cpu)		(cpu_data(cpu).topo.logical_core_id)
+#define topology_die_id(cpu)			(cpu_data(cpu).topo.die_id)
+#define topology_core_id(cpu)			(cpu_data(cpu).topo.core_id)
+#define topology_ppin(cpu)			(cpu_data(cpu).ppin)
 
-#define topology_logical_package_id(cpu)	(cpu_data(cpu).logical_proc_id)
-#define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
-#define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
+#define topology_amd_node_id(cpu)		(cpu_data(cpu).topo.amd_node_id)
+
+extern unsigned int __max_dies_per_package;
+extern unsigned int __max_logical_packages;
+extern unsigned int __max_threads_per_core;
+extern unsigned int __num_threads_per_package;
+extern unsigned int __num_cores_per_package;
+
+const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c);
+enum x86_topology_cpu_type get_topology_cpu_type(struct cpuinfo_x86 *c);
+
+static inline unsigned int topology_max_packages(void)
+{
+	return __max_logical_packages;
+}
+
+static inline unsigned int topology_max_dies_per_package(void)
+{
+	return __max_dies_per_package;
+}
+
+static inline unsigned int topology_num_cores_per_package(void)
+{
+	return __num_cores_per_package;
+}
+
+static inline unsigned int topology_num_threads_per_package(void)
+{
+	return __num_threads_per_package;
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level);
+#else
+static inline int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level)
+{
+	return 0;
+}
+#endif
 
 #ifdef CONFIG_SMP
+#define topology_cluster_id(cpu)		(cpu_data(cpu).topo.l2c_id)
+#define topology_die_cpumask(cpu)		(per_cpu(cpu_die_map, cpu))
+#define topology_cluster_cpumask(cpu)		(cpu_clustergroup_mask(cpu))
 #define topology_core_cpumask(cpu)		(per_cpu(cpu_core_map, cpu))
 #define topology_sibling_cpumask(cpu)		(per_cpu(cpu_sibling_map, cpu))
 
-extern unsigned int __max_logical_packages;
-#define topology_max_packages()			(__max_logical_packages)
+
+static inline int topology_phys_to_logical_pkg(unsigned int pkg)
+{
+	return topology_get_logical_id(pkg << x86_topo_system.dom_shifts[TOPO_PKG_DOMAIN],
+				       TOPO_PKG_DOMAIN);
+}
 
 extern int __max_smt_threads;
 
@@ -122,19 +209,43 @@ static inline int topology_max_smt_threads(void)
 	return __max_smt_threads;
 }
 
-int topology_update_package_map(unsigned int apicid, unsigned int cpu);
-int topology_phys_to_logical_pkg(unsigned int pkg);
-bool topology_is_primary_thread(unsigned int cpu);
-bool topology_smt_supported(void);
-#else
-#define topology_max_packages()			(1)
-static inline int
-topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
+#include <linux/cpu_smt.h>
+
+extern unsigned int __amd_nodes_per_pkg;
+
+static inline unsigned int topology_amd_nodes_per_pkg(void)
+{
+	return __amd_nodes_per_pkg;
+}
+
+#else /* CONFIG_SMP */
 static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
 static inline int topology_max_smt_threads(void) { return 1; }
-static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
-static inline bool topology_smt_supported(void) { return false; }
-#endif
+static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
+#endif /* !CONFIG_SMP */
+
+extern struct cpumask __cpu_primary_thread_mask;
+#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
+
+/**
+ * topology_is_primary_thread - Check whether CPU is the primary SMT thread
+ * @cpu:	CPU to check
+ */
+static inline bool topology_is_primary_thread(unsigned int cpu)
+{
+	return cpumask_test_cpu(cpu, cpu_primary_thread_mask);
+}
+#define topology_is_primary_thread topology_is_primary_thread
+
+int topology_get_primary_thread(unsigned int cpu);
+
+static inline bool topology_is_core_online(unsigned int cpu)
+{
+	int pcpu = topology_get_primary_thread(cpu);
+
+	return pcpu >= 0 ? cpu_online(pcpu) : false;
+}
+#define topology_is_core_online topology_is_core_online
 
 static inline void arch_fix_phys_package_id(int num, u32 slot)
 {
@@ -150,7 +261,7 @@ extern bool x86_topology_update;
 #include <asm/percpu.h>
 
 DECLARE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
-extern unsigned int __read_mostly sysctl_sched_itmt_enabled;
+extern bool __read_mostly sysctl_sched_itmt_enabled;
 
 /* Interface to set priority of a cpu */
 void sched_set_itmt_core_prio(int prio, int core_cpu);
@@ -163,7 +274,7 @@ void sched_clear_itmt_support(void);
 
 #else /* CONFIG_SCHED_MC_PRIO */
 
-#define sysctl_sched_itmt_enabled	0
+#define sysctl_sched_itmt_enabled	false
 static inline void sched_set_itmt_core_prio(int prio, int core_cpu)
 {
 }
@@ -176,4 +287,44 @@ static inline void sched_clear_itmt_support(void)
 }
 #endif /* CONFIG_SCHED_MC_PRIO */
 
+#if defined(CONFIG_SMP) && defined(CONFIG_X86_64)
+#include <asm/cpufeature.h>
+
+DECLARE_STATIC_KEY_FALSE(arch_scale_freq_key);
+
+#define arch_scale_freq_invariant() static_branch_likely(&arch_scale_freq_key)
+
+DECLARE_PER_CPU(unsigned long, arch_freq_scale);
+
+static inline long arch_scale_freq_capacity(int cpu)
+{
+	return per_cpu(arch_freq_scale, cpu);
+}
+#define arch_scale_freq_capacity arch_scale_freq_capacity
+
+bool arch_enable_hybrid_capacity_scale(void);
+void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap,
+			   unsigned long cap_freq, unsigned long base_freq);
+
+unsigned long arch_scale_cpu_capacity(int cpu);
+#define arch_scale_cpu_capacity arch_scale_cpu_capacity
+
+extern void arch_set_max_freq_ratio(bool turbo_disabled);
+extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled);
+#else
+static inline bool arch_enable_hybrid_capacity_scale(void) { return false; }
+static inline void arch_set_cpu_capacity(int cpu, unsigned long cap,
+					 unsigned long max_cap,
+					 unsigned long cap_freq,
+					 unsigned long base_freq) { }
+
+static inline void arch_set_max_freq_ratio(bool turbo_disabled) { }
+static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { }
+#endif
+
+extern void arch_scale_freq_tick(void);
+#define arch_scale_freq_tick arch_scale_freq_tick
+
+extern int arch_sched_node_distance(int from, int to);
+
 #endif /* _ASM_X86_TOPOLOGY_H */
diff --git a/arch/x86/include/asm/trace/common.h b/arch/x86/include/asm/trace/common.h
deleted file mode 100644
index 57c8da027d99..000000000000
--- a/arch/x86/include/asm/trace/common.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _ASM_TRACE_COMMON_H
-#define _ASM_TRACE_COMMON_H
-
-#ifdef CONFIG_TRACING
-DECLARE_STATIC_KEY_FALSE(trace_pagefault_key);
-#define trace_pagefault_enabled()			\
-	static_branch_unlikely(&trace_pagefault_key)
-DECLARE_STATIC_KEY_FALSE(trace_resched_ipi_key);
-#define trace_resched_ipi_enabled()			\
-	static_branch_unlikely(&trace_resched_ipi_key)
-#else
-static inline bool trace_pagefault_enabled(void) { return false; }
-static inline bool trace_resched_ipi_enabled(void) { return false; }
-#endif
-
-#endif
diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h
deleted file mode 100644
index e0e6d7f21399..000000000000
--- a/arch/x86/include/asm/trace/exceptions.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM exceptions
-
-#if !defined(_TRACE_PAGE_FAULT_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_PAGE_FAULT_H
-
-#include <linux/tracepoint.h>
-#include <asm/trace/common.h>
-
-extern int trace_pagefault_reg(void);
-extern void trace_pagefault_unreg(void);
-
-DECLARE_EVENT_CLASS(x86_exceptions,
-
-	TP_PROTO(unsigned long address, struct pt_regs *regs,
-		 unsigned long error_code),
-
-	TP_ARGS(address, regs, error_code),
-
-	TP_STRUCT__entry(
-		__field(		unsigned long, address	)
-		__field(		unsigned long, ip	)
-		__field(		unsigned long, error_code )
-	),
-
-	TP_fast_assign(
-		__entry->address = address;
-		__entry->ip = regs->ip;
-		__entry->error_code = error_code;
-	),
-
-	TP_printk("address=%pf ip=%pf error_code=0x%lx",
-		  (void *)__entry->address, (void *)__entry->ip,
-		  __entry->error_code) );
-
-#define DEFINE_PAGE_FAULT_EVENT(name)				\
-DEFINE_EVENT_FN(x86_exceptions, name,				\
-	TP_PROTO(unsigned long address,	struct pt_regs *regs,	\
-		 unsigned long error_code),			\
-	TP_ARGS(address, regs, error_code),			\
-	trace_pagefault_reg, trace_pagefault_unreg);
-
-DEFINE_PAGE_FAULT_EVENT(page_fault_user);
-DEFINE_PAGE_FAULT_EVENT(page_fault_kernel);
-
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE exceptions
-#endif /*  _TRACE_PAGE_FAULT_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 069c04be1507..721b408d9a67 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -13,22 +13,22 @@ DECLARE_EVENT_CLASS(x86_fpu,
 
 	TP_STRUCT__entry(
 		__field(struct fpu *, fpu)
-		__field(bool, initialized)
+		__field(bool, load_fpu)
 		__field(u64, xfeatures)
 		__field(u64, xcomp_bv)
 		),
 
 	TP_fast_assign(
 		__entry->fpu		= fpu;
-		__entry->initialized	= fpu->initialized;
+		__entry->load_fpu	= test_thread_flag(TIF_NEED_FPU_LOAD);
 		if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
-			__entry->xfeatures = fpu->state.xsave.header.xfeatures;
-			__entry->xcomp_bv  = fpu->state.xsave.header.xcomp_bv;
+			__entry->xfeatures = fpu->fpstate->regs.xsave.header.xfeatures;
+			__entry->xcomp_bv  = fpu->fpstate->regs.xsave.header.xcomp_bv;
 		}
 	),
-	TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx",
+	TP_printk("x86/fpu: %p load: %d xfeatures: %llx xcomp_bv: %llx",
 			__entry->fpu,
-			__entry->initialized,
+			__entry->load_fpu,
 			__entry->xfeatures,
 			__entry->xcomp_bv
 	)
@@ -44,16 +44,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_after_save,
 	TP_ARGS(fpu)
 );
 
-DEFINE_EVENT(x86_fpu, x86_fpu_before_restore,
-	TP_PROTO(struct fpu *fpu),
-	TP_ARGS(fpu)
-);
-
-DEFINE_EVENT(x86_fpu, x86_fpu_after_restore,
-	TP_PROTO(struct fpu *fpu),
-	TP_ARGS(fpu)
-);
-
 DEFINE_EVENT(x86_fpu, x86_fpu_regs_activated,
 	TP_PROTO(struct fpu *fpu),
 	TP_ARGS(fpu)
@@ -64,26 +54,11 @@ DEFINE_EVENT(x86_fpu, x86_fpu_regs_deactivated,
 	TP_ARGS(fpu)
 );
 
-DEFINE_EVENT(x86_fpu, x86_fpu_activate_state,
-	TP_PROTO(struct fpu *fpu),
-	TP_ARGS(fpu)
-);
-
-DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
-	TP_PROTO(struct fpu *fpu),
-	TP_ARGS(fpu)
-);
-
 DEFINE_EVENT(x86_fpu, x86_fpu_dropped,
 	TP_PROTO(struct fpu *fpu),
 	TP_ARGS(fpu)
 );
 
-DEFINE_EVENT(x86_fpu, x86_fpu_copy_src,
-	TP_PROTO(struct fpu *fpu),
-	TP_ARGS(fpu)
-);
-
 DEFINE_EVENT(x86_fpu, x86_fpu_copy_dst,
 	TP_PROTO(struct fpu *fpu),
 	TP_ARGS(fpu)
diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h
index ace464f09681..a8e5a7a2b460 100644
--- a/arch/x86/include/asm/trace/hyperv.h
+++ b/arch/x86/include/asm/trace/hyperv.h
@@ -8,7 +8,7 @@
 
 #if IS_ENABLED(CONFIG_HYPERV)
 
-TRACE_EVENT(hyperv_mmu_flush_tlb_others,
+TRACE_EVENT(hyperv_mmu_flush_tlb_multi,
 	    TP_PROTO(const struct cpumask *cpus,
 		     const struct flush_tlb_info *info),
 	    TP_ARGS(cpus, info),
@@ -71,6 +71,21 @@ TRACE_EVENT(hyperv_send_ipi_mask,
 		      __entry->ncpus, __entry->vector)
 	);
 
+TRACE_EVENT(hyperv_send_ipi_one,
+	    TP_PROTO(int cpu,
+		     int vector),
+	    TP_ARGS(cpu, vector),
+	    TP_STRUCT__entry(
+		    __field(int, cpu)
+		    __field(int, vector)
+		    ),
+	    TP_fast_assign(__entry->cpu = cpu;
+			   __entry->vector = vector;
+		    ),
+	    TP_printk("cpu %d vector %x",
+		      __entry->cpu, __entry->vector)
+	);
+
 #endif /* CONFIG_HYPERV */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 33b9d0f0aafe..7408bebdfde0 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -6,13 +6,9 @@
 #define _TRACE_IRQ_VECTORS_H
 
 #include <linux/tracepoint.h>
-#include <asm/trace/common.h>
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
-extern int trace_resched_ipi_reg(void);
-extern void trace_resched_ipi_unreg(void);
-
 DECLARE_EVENT_CLASS(x86_irq_vector,
 
 	TP_PROTO(int vector),
@@ -37,18 +33,6 @@ DEFINE_EVENT_FN(x86_irq_vector, name##_exit,	\
 	TP_PROTO(int vector),			\
 	TP_ARGS(vector), NULL, NULL);
 
-#define DEFINE_RESCHED_IPI_EVENT(name)		\
-DEFINE_EVENT_FN(x86_irq_vector, name##_entry,	\
-	TP_PROTO(int vector),			\
-	TP_ARGS(vector),			\
-	trace_resched_ipi_reg,			\
-	trace_resched_ipi_unreg);		\
-DEFINE_EVENT_FN(x86_irq_vector, name##_exit,	\
-	TP_PROTO(int vector),			\
-	TP_ARGS(vector),			\
-	trace_resched_ipi_reg,			\
-	trace_resched_ipi_unreg);
-
 /*
  * local_timer - called when entering/exiting a local timer interrupt
  * vector handler
@@ -99,7 +83,7 @@ TRACE_EVENT_PERF_PERM(irq_work_exit, is_sampling_event(p_event) ? -EPERM : 0);
 /*
  * reschedule - called when entering/exiting a reschedule vector handler
  */
-DEFINE_RESCHED_IPI_EVENT(reschedule);
+DEFINE_IRQ_VECTOR_EVENT(reschedule);
 
 /*
  * call_function - called when entering/exiting a call function interrupt
diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h
deleted file mode 100644
index 54133017267c..000000000000
--- a/arch/x86/include/asm/trace/mpx.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM mpx
-
-#if !defined(_TRACE_MPX_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_MPX_H
-
-#include <linux/tracepoint.h>
-
-#ifdef CONFIG_X86_INTEL_MPX
-
-TRACE_EVENT(mpx_bounds_register_exception,
-
-	TP_PROTO(void __user *addr_referenced,
-		 const struct mpx_bndreg *bndreg),
-	TP_ARGS(addr_referenced, bndreg),
-
-	TP_STRUCT__entry(
-		__field(void __user *, addr_referenced)
-		__field(u64, lower_bound)
-		__field(u64, upper_bound)
-	),
-
-	TP_fast_assign(
-		__entry->addr_referenced = addr_referenced;
-		__entry->lower_bound = bndreg->lower_bound;
-		__entry->upper_bound = bndreg->upper_bound;
-	),
-	/*
-	 * Note that we are printing out the '~' of the upper
-	 * bounds register here.  It is actually stored in its
-	 * one's complement form so that its 'init' state
-	 * corresponds to all 0's.  But, that looks like
-	 * gibberish when printed out, so print out the 1's
-	 * complement instead of the actual value here.  Note
-	 * though that you still need to specify filters for the
-	 * actual value, not the displayed one.
-	 */
-	TP_printk("address referenced: 0x%p bounds: lower: 0x%llx ~upper: 0x%llx",
-		__entry->addr_referenced,
-		__entry->lower_bound,
-		~__entry->upper_bound
-	)
-);
-
-TRACE_EVENT(bounds_exception_mpx,
-
-	TP_PROTO(const struct mpx_bndcsr *bndcsr),
-	TP_ARGS(bndcsr),
-
-	TP_STRUCT__entry(
-		__field(u64, bndcfgu)
-		__field(u64, bndstatus)
-	),
-
-	TP_fast_assign(
-		/* need to get rid of the 'const' on bndcsr */
-		__entry->bndcfgu   = (u64)bndcsr->bndcfgu;
-		__entry->bndstatus = (u64)bndcsr->bndstatus;
-	),
-
-	TP_printk("bndcfgu:0x%llx bndstatus:0x%llx",
-		__entry->bndcfgu,
-		__entry->bndstatus)
-);
-
-DECLARE_EVENT_CLASS(mpx_range_trace,
-
-	TP_PROTO(unsigned long start,
-		 unsigned long end),
-	TP_ARGS(start, end),
-
-	TP_STRUCT__entry(
-		__field(unsigned long, start)
-		__field(unsigned long, end)
-	),
-
-	TP_fast_assign(
-		__entry->start = start;
-		__entry->end   = end;
-	),
-
-	TP_printk("[0x%p:0x%p]",
-		(void *)__entry->start,
-		(void *)__entry->end
-	)
-);
-
-DEFINE_EVENT(mpx_range_trace, mpx_unmap_zap,
-	TP_PROTO(unsigned long start, unsigned long end),
-	TP_ARGS(start, end)
-);
-
-DEFINE_EVENT(mpx_range_trace, mpx_unmap_search,
-	TP_PROTO(unsigned long start, unsigned long end),
-	TP_ARGS(start, end)
-);
-
-TRACE_EVENT(mpx_new_bounds_table,
-
-	TP_PROTO(unsigned long table_vaddr),
-	TP_ARGS(table_vaddr),
-
-	TP_STRUCT__entry(
-		__field(unsigned long, table_vaddr)
-	),
-
-	TP_fast_assign(
-		__entry->table_vaddr = table_vaddr;
-	),
-
-	TP_printk("table vaddr:%p", (void *)__entry->table_vaddr)
-);
-
-#else
-
-/*
- * This gets used outside of MPX-specific code, so we need a stub.
- */
-static inline
-void trace_bounds_exception_mpx(const struct mpx_bndcsr *bndcsr)
-{
-}
-
-#endif /* CONFIG_X86_INTEL_MPX */
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH asm/trace/
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_FILE mpx
-#endif /* _TRACE_MPX_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/trap_pf.h b/arch/x86/include/asm/trap_pf.h
new file mode 100644
index 000000000000..a23a7b707b64
--- /dev/null
+++ b/arch/x86/include/asm/trap_pf.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_TRAP_PF_H
+#define _ASM_X86_TRAP_PF_H
+
+#include <linux/bits.h>
+
+/*
+ * Page fault error code bits:
+ *
+ *   bit 0 ==	 0: no page found	1: protection fault
+ *   bit 1 ==	 0: read access		1: write access
+ *   bit 2 ==	 0: kernel-mode access	1: user-mode access
+ *   bit 3 ==				1: use of reserved bit detected
+ *   bit 4 ==				1: fault was an instruction fetch
+ *   bit 5 ==				1: protection keys block access
+ *   bit 6 ==				1: shadow stack access fault
+ *   bit 15 ==				1: SGX MMU page-fault
+ *   bit 31 ==				1: fault was due to RMP violation
+ */
+enum x86_pf_error_code {
+	X86_PF_PROT	=		BIT(0),
+	X86_PF_WRITE	=		BIT(1),
+	X86_PF_USER	=		BIT(2),
+	X86_PF_RSVD	=		BIT(3),
+	X86_PF_INSTR	=		BIT(4),
+	X86_PF_PK	=		BIT(5),
+	X86_PF_SHSTK	=		BIT(6),
+	X86_PF_SGX	=		BIT(15),
+	X86_PF_RMP	=		BIT(31),
+};
+
+#endif /* _ASM_X86_TRAP_PF_H */
diff --git a/arch/x86/include/asm/trapnr.h b/arch/x86/include/asm/trapnr.h
new file mode 100644
index 000000000000..8d1154cdf787
--- /dev/null
+++ b/arch/x86/include/asm/trapnr.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_TRAPNR_H
+#define _ASM_X86_TRAPNR_H
+
+/*
+ * Event type codes used by FRED, Intel VT-x and AMD SVM
+ */
+#define EVENT_TYPE_EXTINT	0	// External interrupt
+#define EVENT_TYPE_RESERVED	1
+#define EVENT_TYPE_NMI		2	// NMI
+#define EVENT_TYPE_HWEXC	3	// Hardware originated traps, exceptions
+#define EVENT_TYPE_SWINT	4	// INT n
+#define EVENT_TYPE_PRIV_SWEXC	5	// INT1
+#define EVENT_TYPE_SWEXC	6	// INTO, INT3
+#define EVENT_TYPE_OTHER	7	// FRED SYSCALL/SYSENTER, VT-x MTF
+
+/* Interrupts/Exceptions */
+
+#define X86_TRAP_DE		 0	/* Divide-by-zero */
+#define X86_TRAP_DB		 1	/* Debug */
+#define X86_TRAP_NMI		 2	/* Non-maskable Interrupt */
+#define X86_TRAP_BP		 3	/* Breakpoint */
+#define X86_TRAP_OF		 4	/* Overflow */
+#define X86_TRAP_BR		 5	/* Bound Range Exceeded */
+#define X86_TRAP_UD		 6	/* Invalid Opcode */
+#define X86_TRAP_NM		 7	/* Device Not Available */
+#define X86_TRAP_DF		 8	/* Double Fault */
+#define X86_TRAP_OLD_MF		 9	/* Coprocessor Segment Overrun */
+#define X86_TRAP_TS		10	/* Invalid TSS */
+#define X86_TRAP_NP		11	/* Segment Not Present */
+#define X86_TRAP_SS		12	/* Stack Segment Fault */
+#define X86_TRAP_GP		13	/* General Protection Fault */
+#define X86_TRAP_PF		14	/* Page Fault */
+#define X86_TRAP_SPURIOUS	15	/* Spurious Interrupt */
+#define X86_TRAP_MF		16	/* x87 Floating-Point Exception */
+#define X86_TRAP_AC		17	/* Alignment Check */
+#define X86_TRAP_MC		18	/* Machine Check */
+#define X86_TRAP_XF		19	/* SIMD Floating-Point Exception */
+#define X86_TRAP_VE		20	/* Virtualization Exception */
+#define X86_TRAP_CP		21	/* Control Protection Exception */
+#define X86_TRAP_VC		29	/* VMM Communication Exception */
+#define X86_TRAP_IRET		32	/* IRET Exception */
+
+#endif
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 7d6f3f3fad78..869b88061801 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -6,93 +6,24 @@
 #include <linux/kprobes.h>
 
 #include <asm/debugreg.h>
+#include <asm/idtentry.h>
 #include <asm/siginfo.h>			/* TRAP_TRACE, ... */
+#include <asm/trap_pf.h>
 
-#define dotraplinkage __visible
-
-asmlinkage void divide_error(void);
-asmlinkage void debug(void);
-asmlinkage void nmi(void);
-asmlinkage void int3(void);
-asmlinkage void overflow(void);
-asmlinkage void bounds(void);
-asmlinkage void invalid_op(void);
-asmlinkage void device_not_available(void);
 #ifdef CONFIG_X86_64
-asmlinkage void double_fault(void);
-#endif
-asmlinkage void coprocessor_segment_overrun(void);
-asmlinkage void invalid_TSS(void);
-asmlinkage void segment_not_present(void);
-asmlinkage void stack_segment(void);
-asmlinkage void general_protection(void);
-asmlinkage void page_fault(void);
-asmlinkage void async_page_fault(void);
-asmlinkage void spurious_interrupt_bug(void);
-asmlinkage void coprocessor_error(void);
-asmlinkage void alignment_check(void);
-#ifdef CONFIG_X86_MCE
-asmlinkage void machine_check(void);
-#endif /* CONFIG_X86_MCE */
-asmlinkage void simd_coprocessor_error(void);
-
-#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
-asmlinkage void xen_divide_error(void);
-asmlinkage void xen_xennmi(void);
-asmlinkage void xen_xendebug(void);
-asmlinkage void xen_xenint3(void);
-asmlinkage void xen_overflow(void);
-asmlinkage void xen_bounds(void);
-asmlinkage void xen_invalid_op(void);
-asmlinkage void xen_device_not_available(void);
-asmlinkage void xen_double_fault(void);
-asmlinkage void xen_coprocessor_segment_overrun(void);
-asmlinkage void xen_invalid_TSS(void);
-asmlinkage void xen_segment_not_present(void);
-asmlinkage void xen_stack_segment(void);
-asmlinkage void xen_general_protection(void);
-asmlinkage void xen_page_fault(void);
-asmlinkage void xen_spurious_interrupt_bug(void);
-asmlinkage void xen_coprocessor_error(void);
-asmlinkage void xen_alignment_check(void);
-#ifdef CONFIG_X86_MCE
-asmlinkage void xen_machine_check(void);
-#endif /* CONFIG_X86_MCE */
-asmlinkage void xen_simd_coprocessor_error(void);
-#endif
-
-dotraplinkage void do_divide_error(struct pt_regs *regs, long error_code);
-dotraplinkage void do_debug(struct pt_regs *regs, long error_code);
-dotraplinkage void do_nmi(struct pt_regs *regs, long error_code);
-dotraplinkage void do_int3(struct pt_regs *regs, long error_code);
-dotraplinkage void do_overflow(struct pt_regs *regs, long error_code);
-dotraplinkage void do_bounds(struct pt_regs *regs, long error_code);
-dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code);
-dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code);
-dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code);
-dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code);
-dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code);
-dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code);
-#ifdef CONFIG_X86_64
-dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code);
 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
 asmlinkage __visible notrace
-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
-void __init trap_init(void);
-#endif
-dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code);
-dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code);
-dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code);
-dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code);
-dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code);
-#ifdef CONFIG_X86_MCE
-dotraplinkage void do_machine_check(struct pt_regs *regs, long error_code);
+struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs);
+asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
 #endif
-dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code);
-#ifdef CONFIG_X86_32
-dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code);
+
+extern int ibt_selftest(void);
+extern int ibt_selftest_noendbr(void);
+
+#ifdef CONFIG_X86_F00F_BUG
+/* For handling the FOOF bug */
+void handle_invalid_op(struct pt_regs *regs);
 #endif
-dotraplinkage void do_mce(struct pt_regs *regs, long error_code);
 
 static inline int get_si_code(unsigned long condition)
 {
@@ -104,72 +35,26 @@ static inline int get_si_code(unsigned long condition)
 		return TRAP_BRKPT;
 }
 
-extern int panic_on_unrecovered_nmi;
-
 void math_emulate(struct math_emu_info *);
-#ifndef CONFIG_X86_32
-asmlinkage void smp_thermal_interrupt(struct pt_regs *regs);
-asmlinkage void smp_threshold_interrupt(struct pt_regs *regs);
-asmlinkage void smp_deferred_error_interrupt(struct pt_regs *regs);
-#endif
 
-void smp_apic_timer_interrupt(struct pt_regs *regs);
-void smp_spurious_interrupt(struct pt_regs *regs);
-void smp_error_interrupt(struct pt_regs *regs);
-asmlinkage void smp_irq_move_cleanup_interrupt(void);
-
-extern void ist_enter(struct pt_regs *regs);
-extern void ist_exit(struct pt_regs *regs);
-extern void ist_begin_non_atomic(struct pt_regs *regs);
-extern void ist_end_non_atomic(void);
+bool fault_in_kernel_space(unsigned long address);
 
 #ifdef CONFIG_VMAP_STACK
-void __noreturn handle_stack_overflow(const char *message,
-				      struct pt_regs *regs,
-				      unsigned long fault_address);
+void __noreturn handle_stack_overflow(struct pt_regs *regs,
+				      unsigned long fault_address,
+				      struct stack_info *info);
 #endif
 
-/* Interrupts/Exceptions */
-enum {
-	X86_TRAP_DE = 0,	/*  0, Divide-by-zero */
-	X86_TRAP_DB,		/*  1, Debug */
-	X86_TRAP_NMI,		/*  2, Non-maskable Interrupt */
-	X86_TRAP_BP,		/*  3, Breakpoint */
-	X86_TRAP_OF,		/*  4, Overflow */
-	X86_TRAP_BR,		/*  5, Bound Range Exceeded */
-	X86_TRAP_UD,		/*  6, Invalid Opcode */
-	X86_TRAP_NM,		/*  7, Device Not Available */
-	X86_TRAP_DF,		/*  8, Double Fault */
-	X86_TRAP_OLD_MF,	/*  9, Coprocessor Segment Overrun */
-	X86_TRAP_TS,		/* 10, Invalid TSS */
-	X86_TRAP_NP,		/* 11, Segment Not Present */
-	X86_TRAP_SS,		/* 12, Stack Segment Fault */
-	X86_TRAP_GP,		/* 13, General Protection Fault */
-	X86_TRAP_PF,		/* 14, Page Fault */
-	X86_TRAP_SPURIOUS,	/* 15, Spurious Interrupt */
-	X86_TRAP_MF,		/* 16, x87 Floating-Point Exception */
-	X86_TRAP_AC,		/* 17, Alignment Check */
-	X86_TRAP_MC,		/* 18, Machine Check */
-	X86_TRAP_XF,		/* 19, SIMD Floating-Point Exception */
-	X86_TRAP_IRET = 32,	/* 32, IRET Exception */
-};
+static inline void cond_local_irq_enable(struct pt_regs *regs)
+{
+	if (regs->flags & X86_EFLAGS_IF)
+		local_irq_enable();
+}
+
+static inline void cond_local_irq_disable(struct pt_regs *regs)
+{
+	if (regs->flags & X86_EFLAGS_IF)
+		local_irq_disable();
+}
 
-/*
- * Page fault error code bits:
- *
- *   bit 0 ==	 0: no page found	1: protection fault
- *   bit 1 ==	 0: read access		1: write access
- *   bit 2 ==	 0: kernel-mode access	1: user-mode access
- *   bit 3 ==				1: use of reserved bit detected
- *   bit 4 ==				1: fault was an instruction fetch
- *   bit 5 ==				1: protection keys block access
- */
-enum x86_pf_error_code {
-	X86_PF_PROT	=		1 << 0,
-	X86_PF_WRITE	=		1 << 1,
-	X86_PF_USER	=		1 << 2,
-	X86_PF_RSVD	=		1 << 3,
-	X86_PF_INSTR	=		1 << 4,
-	X86_PF_PK	=		1 << 5,
-};
 #endif /* _ASM_X86_TRAPS_H */
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 8a0c25c6bf09..4f7f09f50552 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -5,10 +5,64 @@
 #ifndef _ASM_X86_TSC_H
 #define _ASM_X86_TSC_H
 
+#include <asm/asm.h>
+#include <asm/cpufeature.h>
 #include <asm/processor.h>
+#include <asm/msr.h>
 
-#define NS_SCALE	10 /* 2^10, carefully chosen */
-#define US_SCALE	32 /* 2^32, arbitralrily chosen */
+/**
+ * rdtsc() - returns the current TSC without ordering constraints
+ *
+ * rdtsc() returns the result of RDTSC as a 64-bit integer.  The
+ * only ordering constraint it supplies is the ordering implied by
+ * "asm volatile": it will put the RDTSC in the place you expect.  The
+ * CPU can and will speculatively execute that RDTSC, though, so the
+ * results can be non-monotonic if compared on different CPUs.
+ */
+static __always_inline u64 rdtsc(void)
+{
+	EAX_EDX_DECLARE_ARGS(val, low, high);
+
+	asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));
+
+	return EAX_EDX_VAL(val, low, high);
+}
+
+/**
+ * rdtsc_ordered() - read the current TSC in program order
+ *
+ * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer.
+ * It is ordered like a load to a global in-memory counter.  It should
+ * be impossible to observe non-monotonic rdtsc_unordered() behavior
+ * across multiple CPUs as long as the TSC is synced.
+ */
+static __always_inline u64 rdtsc_ordered(void)
+{
+	EAX_EDX_DECLARE_ARGS(val, low, high);
+
+	/*
+	 * The RDTSC instruction is not ordered relative to memory
+	 * access.  The Intel SDM and the AMD APM are both vague on this
+	 * point, but empirically an RDTSC instruction can be
+	 * speculatively executed before prior loads.  An RDTSC
+	 * immediately after an appropriate barrier appears to be
+	 * ordered as a normal load, that is, it provides the same
+	 * ordering guarantees as reading from a global memory location
+	 * that some other imaginary CPU is updating continuously with a
+	 * time stamp.
+	 *
+	 * Thus, use the preferred barrier on the respective CPU, aiming for
+	 * RDTSCP as the default.
+	 */
+	asm volatile(ALTERNATIVE_2("rdtsc",
+				   "lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC,
+				   "rdtscp", X86_FEATURE_RDTSCP)
+			: EAX_EDX_RET(val, low, high)
+			/* RDTSCP clobbers ECX with MSR_TSC_AUX. */
+			:: "ecx");
+
+	return EAX_EDX_VAL(val, low, high);
+}
 
 /*
  * Standard way to access the cycle counter.
@@ -22,20 +76,15 @@ extern void disable_TSC(void);
 
 static inline cycles_t get_cycles(void)
 {
-#ifndef CONFIG_X86_TSC
-	if (!boot_cpu_has(X86_FEATURE_TSC))
+	if (!IS_ENABLED(CONFIG_X86_TSC) &&
+	    !cpu_feature_enabled(X86_FEATURE_TSC))
 		return 0;
-#endif
-
 	return rdtsc();
 }
-
-extern struct system_counterval_t convert_art_to_tsc(u64 art);
-extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns);
+#define get_cycles get_cycles
 
 extern void tsc_early_init(void);
 extern void tsc_init(void);
-extern unsigned long calibrate_delay_is_known(void);
 extern void mark_tsc_unstable(char *reason);
 extern int unsynchronized_tsc(void);
 extern int check_tsc_unstable(void);
@@ -58,12 +107,10 @@ extern bool tsc_async_resets;
 #ifdef CONFIG_X86_TSC
 extern bool tsc_store_and_check_tsc_adjust(bool bootcpu);
 extern void tsc_verify_tsc_adjust(bool resume);
-extern void check_tsc_sync_source(int cpu);
 extern void check_tsc_sync_target(void);
 #else
 static inline bool tsc_store_and_check_tsc_adjust(bool bootcpu) { return false; }
 static inline void tsc_verify_tsc_adjust(bool resume) { }
-static inline void check_tsc_sync_source(int cpu) { }
 static inline void check_tsc_sync_target(void) { }
 #endif
 
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 780f2b42c8ef..367297b188c3 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -5,118 +5,33 @@
  * User space memory access functions
  */
 #include <linux/compiler.h>
+#include <linux/instrumented.h>
 #include <linux/kasan-checks.h>
+#include <linux/mm_types.h>
 #include <linux/string.h>
+#include <linux/mmap_lock.h>
 #include <asm/asm.h>
 #include <asm/page.h>
 #include <asm/smap.h>
 #include <asm/extable.h>
+#include <asm/tlbflush.h>
 
-/*
- * The fs value determines whether argument validity checking should be
- * performed or not.  If get_fs() == USER_DS, checking is performed, with
- * get_fs() == KERNEL_DS, checking is bypassed.
- *
- * For historical reasons, these macros are grossly misnamed.
- */
-
-#define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
-
-#define KERNEL_DS	MAKE_MM_SEG(-1UL)
-#define USER_DS 	MAKE_MM_SEG(TASK_SIZE_MAX)
-
-#define get_ds()	(KERNEL_DS)
-#define get_fs()	(current->thread.addr_limit)
-static inline void set_fs(mm_segment_t fs)
-{
-	current->thread.addr_limit = fs;
-	/* On user-mode return, check fs is correct */
-	set_thread_flag(TIF_FSCHECK);
-}
-
-#define segment_eq(a, b)	((a).seg == (b).seg)
-
-#define user_addr_max() (current->thread.addr_limit.seg)
-#define __addr_ok(addr) 	\
-	((unsigned long __force)(addr) < user_addr_max())
-
-/*
- * Test whether a block of memory is a valid user space address.
- * Returns 0 if the range is valid, nonzero otherwise.
- */
-static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, unsigned long limit)
-{
-	/*
-	 * If we have used "sizeof()" for the size,
-	 * we know it won't overflow the limit (but
-	 * it might overflow the 'addr', so it's
-	 * important to subtract the size from the
-	 * limit, not add it to the address).
-	 */
-	if (__builtin_constant_p(size))
-		return unlikely(addr > limit - size);
-
-	/* Arbitrary sizes? Be careful about overflow */
-	addr += size;
-	if (unlikely(addr < size))
-		return true;
-	return unlikely(addr > limit);
-}
-
-#define __range_not_ok(addr, size, limit)				\
-({									\
-	__chk_user_ptr(addr);						\
-	__chk_range_not_ok((unsigned long __force)(addr), size, limit); \
-})
-
-#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-# define WARN_ON_IN_IRQ()	WARN_ON_ONCE(!in_task())
+#ifdef CONFIG_X86_32
+# include <asm/uaccess_32.h>
 #else
-# define WARN_ON_IN_IRQ()
+# include <asm/uaccess_64.h>
 #endif
 
-/**
- * access_ok: - Checks if a user space pointer is valid
- * @addr: User space pointer to start of block to check
- * @size: Size of block to check
- *
- * Context: User context only. This function may sleep if pagefaults are
- *          enabled.
- *
- * Checks if a pointer to a block of memory in user space is valid.
- *
- * Returns true (nonzero) if the memory block may be valid, false (zero)
- * if it is definitely invalid.
- *
- * Note that, depending on architecture, this function probably just
- * checks that the pointer is in the user space range - after calling
- * this function, memory access functions may still return -EFAULT.
- */
-#define access_ok(addr, size)					\
-({									\
-	WARN_ON_IN_IRQ();						\
-	likely(!__range_not_ok(addr, size, user_addr_max()));		\
-})
-
-/*
- * These are the main single-value transfer routines.  They automatically
- * use the right size if we just have the right pointer type.
- *
- * This gets kind of ugly. We want to return _two_ values in "get_user()"
- * and yet we don't want to do any pointers, because that is too much
- * of a performance impact. Thus we have a few rather ugly macros here,
- * and hide all the ugliness from the user.
- *
- * The "__xxx" versions of the user access functions are versions that
- * do not verify the address space, that must have been done previously
- * with a separate "access_ok()" call (this is used when we do multiple
- * accesses to the same area of user memory).
- */
+#include <asm-generic/access_ok.h>
 
 extern int __get_user_1(void);
 extern int __get_user_2(void);
 extern int __get_user_4(void);
 extern int __get_user_8(void);
+extern int __get_user_nocheck_1(void);
+extern int __get_user_nocheck_2(void);
+extern int __get_user_nocheck_4(void);
+extern int __get_user_nocheck_8(void);
 extern int __get_user_bad(void);
 
 #define __uaccess_begin() stac()
@@ -128,31 +43,24 @@ extern int __get_user_bad(void);
 })
 
 /*
- * This is a type: either unsigned long, if the argument fits into
- * that type, or otherwise unsigned long long.
+ * This is the smallest unsigned integer type that can fit a value
+ * (up to 'long long')
  */
-#define __inttype(x) \
-__typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
+#define __inttype(x) __typeof__(		\
+	__typefits(x,char,			\
+	  __typefits(x,short,			\
+	    __typefits(x,int,			\
+	      __typefits(x,long,0ULL)))))
+
+#define __typefits(x,type,not) \
+	__builtin_choose_expr(sizeof(x)<=sizeof(type),(unsigned type)0,not)
 
-/**
- * get_user: - Get a simple variable from user space.
- * @x:   Variable to store result.
- * @ptr: Source address, in user space.
- *
- * Context: User context only. This function may sleep if pagefaults are
- *          enabled.
- *
- * This macro copies a single simple variable from user space to kernel
- * space.  It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and the result of
- * dereferencing @ptr must be assignable to @x without a cast.
- *
- * Returns zero on success, or -EFAULT on error.
- * On error, the variable @x is set to zero.
- */
 /*
+ * This is used for both get_user() and __get_user() to expand to
+ * the proper special function call that has odd calling conventions
+ * due to returning both a value and an error, and that depends on
+ * the size of the pointer passed in.
+ *
  * Careful: we have to cast the result to the type of the pointer
  * for sign reasons.
  *
@@ -165,29 +73,67 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
  * Clang/LLVM cares about the size of the register, but still wants
  * the base register for something that ends up being a pair.
  */
-#define get_user(x, ptr)						\
+#define do_get_user_call(fn,x,ptr)					\
 ({									\
 	int __ret_gu;							\
 	register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX);		\
 	__chk_user_ptr(ptr);						\
-	might_fault();							\
-	asm volatile("call __get_user_%P4"				\
+	asm volatile("call __" #fn "_%c[size]"				\
 		     : "=a" (__ret_gu), "=r" (__val_gu),		\
 			ASM_CALL_CONSTRAINT				\
-		     : "0" (ptr), "i" (sizeof(*(ptr))));		\
+		     : "0" (ptr), [size] "i" (sizeof(*(ptr))));		\
+	instrument_get_user(__val_gu);					\
 	(x) = (__force __typeof__(*(ptr))) __val_gu;			\
 	__builtin_expect(__ret_gu, 0);					\
 })
 
-#define __put_user_x(size, x, ptr, __ret_pu)			\
-	asm volatile("call __put_user_" #size : "=a" (__ret_pu)	\
-		     : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
+/**
+ * get_user - Get a simple variable from user space.
+ * @x:   Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Return: zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ */
+#define get_user(x,ptr) ({ might_fault(); do_get_user_call(get_user,x,ptr); })
 
+/**
+ * __get_user - Get a simple variable from user space, with less checking.
+ * @x:   Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Return: zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ */
+#define __get_user(x,ptr) do_get_user_call(get_user_nocheck,x,ptr)
 
 
 #ifdef CONFIG_X86_32
 #define __put_user_goto_u64(x, addr, label)			\
-	asm_volatile_goto("\n"					\
+	asm goto("\n"					\
 		     "1:	movl %%eax,0(%1)\n"		\
 		     "2:	movl %%edx,4(%1)\n"		\
 		     _ASM_EXTABLE_UA(1b, %l2)			\
@@ -195,39 +141,55 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
 		     : : "A" (x), "r" (addr)			\
 		     : : label)
 
-#define __put_user_asm_ex_u64(x, addr)					\
-	asm volatile("\n"						\
-		     "1:	movl %%eax,0(%1)\n"			\
-		     "2:	movl %%edx,4(%1)\n"			\
-		     "3:"						\
-		     _ASM_EXTABLE_EX(1b, 2b)				\
-		     _ASM_EXTABLE_EX(2b, 3b)				\
-		     : : "A" (x), "r" (addr))
-
-#define __put_user_x8(x, ptr, __ret_pu)				\
-	asm volatile("call __put_user_8" : "=a" (__ret_pu)	\
-		     : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
 #else
 #define __put_user_goto_u64(x, ptr, label) \
-	__put_user_goto(x, ptr, "q", "", "er", label)
-#define __put_user_asm_ex_u64(x, addr)	\
-	__put_user_asm_ex(x, addr, "q", "", "er")
-#define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu)
+	__put_user_goto(x, ptr, "q", "er", label)
 #endif
 
 extern void __put_user_bad(void);
 
 /*
  * Strange magic calling convention: pointer in %ecx,
- * value in %eax(:%edx), return value in %eax. clobbers %rbx
+ * value in %eax(:%edx), return value in %ecx. clobbers %rbx
  */
 extern void __put_user_1(void);
 extern void __put_user_2(void);
 extern void __put_user_4(void);
 extern void __put_user_8(void);
+extern void __put_user_nocheck_1(void);
+extern void __put_user_nocheck_2(void);
+extern void __put_user_nocheck_4(void);
+extern void __put_user_nocheck_8(void);
+
+/*
+ * ptr must be evaluated and assigned to the temporary __ptr_pu before
+ * the assignment of x to __val_pu, to avoid any function calls
+ * involved in the ptr expression (possibly implicitly generated due
+ * to KASAN) from clobbering %ax.
+ */
+#define do_put_user_call(fn,x,ptr)					\
+({									\
+	int __ret_pu;							\
+	void __user *__ptr_pu;						\
+	register __typeof__(*(ptr)) __val_pu asm("%"_ASM_AX);		\
+	__typeof__(*(ptr)) __x = (x); /* eval x once */			\
+	__typeof__(ptr) __ptr = (ptr); /* eval ptr once */		\
+	__chk_user_ptr(__ptr);						\
+	__ptr_pu = __ptr;						\
+	__val_pu = __x;							\
+	asm volatile("call __" #fn "_%c[size]"				\
+		     : "=c" (__ret_pu),					\
+			ASM_CALL_CONSTRAINT				\
+		     : "0" (__ptr_pu),					\
+		       "r" (__val_pu),					\
+		       [size] "i" (sizeof(*(ptr)))			\
+		     :"ebx");						\
+	instrument_put_user(__x, __ptr, sizeof(*(ptr)));		\
+	__builtin_expect(__ret_pu, 0);					\
+})
 
 /**
- * put_user: - Write a simple value into user space.
+ * put_user - Write a simple value into user space.
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
@@ -241,214 +203,268 @@ extern void __put_user_8(void);
  * @ptr must have pointer-to-simple-variable type, and @x must be assignable
  * to the result of dereferencing @ptr.
  *
- * Returns zero on success, or -EFAULT on error.
+ * Return: zero on success, or -EFAULT on error.
  */
-#define put_user(x, ptr)					\
-({								\
-	int __ret_pu;						\
-	__typeof__(*(ptr)) __pu_val;				\
-	__chk_user_ptr(ptr);					\
-	might_fault();						\
-	__pu_val = x;						\
-	switch (sizeof(*(ptr))) {				\
-	case 1:							\
-		__put_user_x(1, __pu_val, ptr, __ret_pu);	\
-		break;						\
-	case 2:							\
-		__put_user_x(2, __pu_val, ptr, __ret_pu);	\
-		break;						\
-	case 4:							\
-		__put_user_x(4, __pu_val, ptr, __ret_pu);	\
-		break;						\
-	case 8:							\
-		__put_user_x8(__pu_val, ptr, __ret_pu);		\
-		break;						\
-	default:						\
-		__put_user_x(X, __pu_val, ptr, __ret_pu);	\
-		break;						\
-	}							\
-	__builtin_expect(__ret_pu, 0);				\
-})
+#define put_user(x, ptr) ({ might_fault(); do_put_user_call(put_user,x,ptr); })
+
+/**
+ * __put_user - Write a simple value into user space, with less checking.
+ * @x:   Value to copy to user space.
+ * @ptr: Destination address, in user space.
+ *
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
+ *
+ * This macro copies a single simple value from kernel space to user
+ * space.  It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and @x must be assignable
+ * to the result of dereferencing @ptr.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Return: zero on success, or -EFAULT on error.
+ */
+#define __put_user(x, ptr) do_put_user_call(put_user_nocheck,x,ptr)
 
 #define __put_user_size(x, ptr, size, label)				\
 do {									\
-	__chk_user_ptr(ptr);						\
+	__typeof__(*(ptr)) __x = (x); /* eval x once */			\
+	__typeof__(ptr) __ptr = (ptr); /* eval ptr once */		\
+	__chk_user_ptr(__ptr);						\
 	switch (size) {							\
 	case 1:								\
-		__put_user_goto(x, ptr, "b", "b", "iq", label);	\
+		__put_user_goto(__x, __ptr, "b", "iq", label);		\
 		break;							\
 	case 2:								\
-		__put_user_goto(x, ptr, "w", "w", "ir", label);		\
+		__put_user_goto(__x, __ptr, "w", "ir", label);		\
 		break;							\
 	case 4:								\
-		__put_user_goto(x, ptr, "l", "k", "ir", label);		\
+		__put_user_goto(__x, __ptr, "l", "ir", label);		\
 		break;							\
 	case 8:								\
-		__put_user_goto_u64((__typeof__(*ptr))(x), ptr, label);	\
+		__put_user_goto_u64(__x, __ptr, label);			\
 		break;							\
 	default:							\
 		__put_user_bad();					\
 	}								\
+	instrument_put_user(__x, __ptr, size);				\
 } while (0)
 
-/*
- * This doesn't do __uaccess_begin/end - the exception handling
- * around it must do that.
- */
-#define __put_user_size_ex(x, ptr, size)				\
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+#ifdef CONFIG_X86_32
+#define __get_user_asm_u64(x, ptr, label) do {				\
+	unsigned int __gu_low, __gu_high;				\
+	const unsigned int __user *__gu_ptr;				\
+	__gu_ptr = (const void __user *)(ptr);				\
+	__get_user_asm(__gu_low, __gu_ptr, "l", "=r", label);		\
+	__get_user_asm(__gu_high, __gu_ptr+1, "l", "=r", label);	\
+	(x) = ((unsigned long long)__gu_high << 32) | __gu_low;		\
+} while (0)
+#else
+#define __get_user_asm_u64(x, ptr, label)				\
+	__get_user_asm(x, ptr, "q", "=r", label)
+#endif
+
+#define __get_user_size(x, ptr, size, label)				\
 do {									\
 	__chk_user_ptr(ptr);						\
 	switch (size) {							\
-	case 1:								\
-		__put_user_asm_ex(x, ptr, "b", "b", "iq");		\
+	case 1:	{							\
+		unsigned char x_u8__;					\
+		__get_user_asm(x_u8__, ptr, "b", "=q", label);		\
+		(x) = x_u8__;						\
 		break;							\
+	}								\
 	case 2:								\
-		__put_user_asm_ex(x, ptr, "w", "w", "ir");		\
+		__get_user_asm(x, ptr, "w", "=r", label);		\
 		break;							\
 	case 4:								\
-		__put_user_asm_ex(x, ptr, "l", "k", "ir");		\
+		__get_user_asm(x, ptr, "l", "=r", label);		\
 		break;							\
 	case 8:								\
-		__put_user_asm_ex_u64((__typeof__(*ptr))(x), ptr);	\
+		__get_user_asm_u64(x, ptr, label);			\
 		break;							\
 	default:							\
-		__put_user_bad();					\
+		(x) = __get_user_bad();					\
 	}								\
+	instrument_get_user(x);						\
 } while (0)
 
+#define __get_user_asm(x, addr, itype, ltype, label)			\
+	asm_goto_output("\n"						\
+		     "1:	mov"itype" %[umem],%[output]\n"		\
+		     _ASM_EXTABLE_UA(1b, %l2)				\
+		     : [output] ltype(x)				\
+		     : [umem] "m" (__m(addr))				\
+		     : : label)
+
+#else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
 #ifdef CONFIG_X86_32
-#define __get_user_asm_u64(x, ptr, retval, errret)			\
+#define __get_user_asm_u64(x, ptr, retval)				\
 ({									\
 	__typeof__(ptr) __ptr = (ptr);					\
-	asm volatile("\n"					\
-		     "1:	movl %2,%%eax\n"			\
-		     "2:	movl %3,%%edx\n"			\
-		     "3:\n"				\
-		     ".section .fixup,\"ax\"\n"				\
-		     "4:	mov %4,%0\n"				\
-		     "	xorl %%eax,%%eax\n"				\
-		     "	xorl %%edx,%%edx\n"				\
-		     "	jmp 3b\n"					\
-		     ".previous\n"					\
-		     _ASM_EXTABLE_UA(1b, 4b)				\
-		     _ASM_EXTABLE_UA(2b, 4b)				\
-		     : "=r" (retval), "=&A"(x)				\
-		     : "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1),	\
-		       "i" (errret), "0" (retval));			\
+	asm volatile("\n"						\
+		     "1:	movl %[lowbits],%%eax\n"		\
+		     "2:	movl %[highbits],%%edx\n"		\
+		     "3:\n"						\
+		     _ASM_EXTABLE_TYPE_REG(1b, 3b, EX_TYPE_EFAULT_REG |	\
+					   EX_FLAG_CLEAR_AX_DX,		\
+					   %[errout])			\
+		     _ASM_EXTABLE_TYPE_REG(2b, 3b, EX_TYPE_EFAULT_REG |	\
+					   EX_FLAG_CLEAR_AX_DX,		\
+					   %[errout])			\
+		     : [errout] "=r" (retval),				\
+		       [output] "=&A"(x)				\
+		     : [lowbits] "m" (__m(__ptr)),			\
+		       [highbits] "m" __m(((u32 __user *)(__ptr)) + 1),	\
+		       "0" (retval));					\
 })
 
-#define __get_user_asm_ex_u64(x, ptr)			(x) = __get_user_bad()
 #else
-#define __get_user_asm_u64(x, ptr, retval, errret) \
-	 __get_user_asm(x, ptr, retval, "q", "", "=r", errret)
-#define __get_user_asm_ex_u64(x, ptr) \
-	 __get_user_asm_ex(x, ptr, "q", "", "=r")
+#define __get_user_asm_u64(x, ptr, retval) \
+	 __get_user_asm(x, ptr, retval, "q")
 #endif
 
-#define __get_user_size(x, ptr, size, retval, errret)			\
+#define __get_user_size(x, ptr, size, retval)				\
 do {									\
+	unsigned char x_u8__;						\
+									\
 	retval = 0;							\
 	__chk_user_ptr(ptr);						\
 	switch (size) {							\
 	case 1:								\
-		__get_user_asm(x, ptr, retval, "b", "b", "=q", errret);	\
+		__get_user_asm(x_u8__, ptr, retval, "b");		\
+		(x) = x_u8__;						\
 		break;							\
 	case 2:								\
-		__get_user_asm(x, ptr, retval, "w", "w", "=r", errret);	\
+		__get_user_asm(x, ptr, retval, "w");			\
 		break;							\
 	case 4:								\
-		__get_user_asm(x, ptr, retval, "l", "k", "=r", errret);	\
+		__get_user_asm(x, ptr, retval, "l");			\
 		break;							\
 	case 8:								\
-		__get_user_asm_u64(x, ptr, retval, errret);		\
+		__get_user_asm_u64(x, ptr, retval);			\
 		break;							\
 	default:							\
 		(x) = __get_user_bad();					\
 	}								\
 } while (0)
 
-#define __get_user_asm(x, addr, err, itype, rtype, ltype, errret)	\
+#define __get_user_asm(x, addr, err, itype)				\
 	asm volatile("\n"						\
-		     "1:	mov"itype" %2,%"rtype"1\n"		\
+		     "1:	mov"itype" %[umem],%[output]\n"		\
 		     "2:\n"						\
-		     ".section .fixup,\"ax\"\n"				\
-		     "3:	mov %3,%0\n"				\
-		     "	xor"itype" %"rtype"1,%"rtype"1\n"		\
-		     "	jmp 2b\n"					\
-		     ".previous\n"					\
-		     _ASM_EXTABLE_UA(1b, 3b)				\
-		     : "=r" (err), ltype(x)				\
-		     : "m" (__m(addr)), "i" (errret), "0" (err))
-
-#define __get_user_asm_nozero(x, addr, err, itype, rtype, ltype, errret)	\
+		     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG | \
+					   EX_FLAG_CLEAR_AX,		\
+					   %[errout])			\
+		     : [errout] "=r" (err),				\
+		       [output] "=a" (x)				\
+		     : [umem] "m" (__m(addr)),				\
+		       "0" (err))
+
+#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+#ifdef CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
+#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label)	({ \
+	bool success;							\
+	__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold);		\
+	__typeof__(*(_ptr)) __old = *_old;				\
+	__typeof__(*(_ptr)) __new = (_new);				\
+	asm_goto_output("\n"						\
+		     "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
+		     _ASM_EXTABLE_UA(1b, %l[label])			\
+		     : "=@ccz" (success),				\
+		       [ptr] "+m" (*_ptr),				\
+		       [old] "+a" (__old)				\
+		     : [new] ltype (__new)				\
+		     : "memory"						\
+		     : label);						\
+	if (unlikely(!success))						\
+		*_old = __old;						\
+	likely(success);					})
+
+#ifdef CONFIG_X86_32
+#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label)	({	\
+	bool success;							\
+	__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold);		\
+	__typeof__(*(_ptr)) __old = *_old;				\
+	__typeof__(*(_ptr)) __new = (_new);				\
+	asm_goto_output("\n"						\
+		     "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n"		\
+		     _ASM_EXTABLE_UA(1b, %l[label])			\
+		     : "=@ccz" (success),				\
+		       "+A" (__old),					\
+		       [ptr] "+m" (*_ptr)				\
+		     : "b" ((u32)__new),				\
+		       "c" ((u32)((u64)__new >> 32))			\
+		     : "memory"						\
+		     : label);						\
+	if (unlikely(!success))						\
+		*_old = __old;						\
+	likely(success);					})
+#endif // CONFIG_X86_32
+#else  // !CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
+#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label)	({ \
+	int __err = 0;							\
+	bool success;							\
+	__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold);		\
+	__typeof__(*(_ptr)) __old = *_old;				\
+	__typeof__(*(_ptr)) __new = (_new);				\
 	asm volatile("\n"						\
-		     "1:	mov"itype" %2,%"rtype"1\n"		\
+		     "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
 		     "2:\n"						\
-		     ".section .fixup,\"ax\"\n"				\
-		     "3:	mov %3,%0\n"				\
-		     "	jmp 2b\n"					\
-		     ".previous\n"					\
-		     _ASM_EXTABLE_UA(1b, 3b)				\
-		     : "=r" (err), ltype(x)				\
-		     : "m" (__m(addr)), "i" (errret), "0" (err))
+		     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG,	\
+					   %[errout])			\
+		     : "=@ccz" (success),				\
+		       [errout] "+r" (__err),				\
+		       [ptr] "+m" (*_ptr),				\
+		       [old] "+a" (__old)				\
+		     : [new] ltype (__new)				\
+		     : "memory");					\
+	if (unlikely(__err))						\
+		goto label;						\
+	if (unlikely(!success))						\
+		*_old = __old;						\
+	likely(success);					})
 
+#ifdef CONFIG_X86_32
 /*
- * This doesn't do __uaccess_begin/end - the exception handling
- * around it must do that.
+ * Unlike the normal CMPXCHG, use output GPR for both success/fail and error.
+ * There are only six GPRs available and four (EAX, EBX, ECX, and EDX) are
+ * hardcoded by CMPXCHG8B, leaving only ESI and EDI.  If the compiler uses
+ * both ESI and EDI for the memory operand, compilation will fail if the error
+ * is an input+output as there will be no register available for input.
  */
-#define __get_user_size_ex(x, ptr, size)				\
-do {									\
-	__chk_user_ptr(ptr);						\
-	switch (size) {							\
-	case 1:								\
-		__get_user_asm_ex(x, ptr, "b", "b", "=q");		\
-		break;							\
-	case 2:								\
-		__get_user_asm_ex(x, ptr, "w", "w", "=r");		\
-		break;							\
-	case 4:								\
-		__get_user_asm_ex(x, ptr, "l", "k", "=r");		\
-		break;							\
-	case 8:								\
-		__get_user_asm_ex_u64(x, ptr);				\
-		break;							\
-	default:							\
-		(x) = __get_user_bad();					\
-	}								\
-} while (0)
-
-#define __get_user_asm_ex(x, addr, itype, rtype, ltype)			\
-	asm volatile("1:	mov"itype" %1,%"rtype"0\n"		\
+#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label)	({	\
+	int __result;							\
+	__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold);		\
+	__typeof__(*(_ptr)) __old = *_old;				\
+	__typeof__(*(_ptr)) __new = (_new);				\
+	asm volatile("\n"						\
+		     "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n"		\
+		     "mov $0, %[result]\n\t"				\
+		     "setz %b[result]\n"				\
 		     "2:\n"						\
-		     ".section .fixup,\"ax\"\n"				\
-                     "3:xor"itype" %"rtype"0,%"rtype"0\n"		\
-		     "  jmp 2b\n"					\
-		     ".previous\n"					\
-		     _ASM_EXTABLE_EX(1b, 3b)				\
-		     : ltype(x) : "m" (__m(addr)))
-
-#define __put_user_nocheck(x, ptr, size)			\
-({								\
-	__label__ __pu_label;					\
-	int __pu_err = -EFAULT;					\
-	__uaccess_begin();					\
-	__put_user_size((x), (ptr), (size), __pu_label);	\
-	__pu_err = 0;						\
-__pu_label:							\
-	__uaccess_end();					\
-	__builtin_expect(__pu_err, 0);				\
-})
-
-#define __get_user_nocheck(x, ptr, size)				\
-({									\
-	int __gu_err;							\
-	__inttype(*(ptr)) __gu_val;					\
-	__uaccess_begin_nospec();					\
-	__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT);	\
-	__uaccess_end();						\
-	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
-	__builtin_expect(__gu_err, 0);					\
-})
+		     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG,	\
+					   %[result])			\
+		     : [result] "=q" (__result),			\
+		       "+A" (__old),					\
+		       [ptr] "+m" (*_ptr)				\
+		     : "b" ((u32)__new),				\
+		       "c" ((u32)((u64)__new >> 32))			\
+		     : "memory", "cc");					\
+	if (unlikely(__result < 0))					\
+		goto label;						\
+	if (unlikely(!__result))					\
+		*_old = __old;						\
+	likely(__result);					})
+#endif // CONFIG_X86_32
+#endif // CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
 
 /* FIXME: this hack is definitely wrong -AK */
 struct __large_struct { unsigned long buf[100]; };
@@ -459,118 +475,13 @@ struct __large_struct { unsigned long buf[100]; };
  * we do not write to any memory gcc knows about, so there are no
  * aliasing issues.
  */
-#define __put_user_goto(x, addr, itype, rtype, ltype, label)	\
-	asm_volatile_goto("\n"						\
-		"1:	mov"itype" %"rtype"0,%1\n"			\
-		_ASM_EXTABLE_UA(1b, %l2)					\
+#define __put_user_goto(x, addr, itype, ltype, label)			\
+	asm goto("\n"							\
+		"1:	mov"itype" %0,%1\n"				\
+		_ASM_EXTABLE_UA(1b, %l2)				\
 		: : ltype(x), "m" (__m(addr))				\
 		: : label)
 
-#define __put_user_failed(x, addr, itype, rtype, ltype, errret)		\
-	({	__label__ __puflab;					\
-		int __pufret = errret;					\
-		__put_user_goto(x,addr,itype,rtype,ltype,__puflab);	\
-		__pufret = 0;						\
-	__puflab: __pufret; })
-
-#define __put_user_asm(x, addr, retval, itype, rtype, ltype, errret)	do {	\
-	retval = __put_user_failed(x, addr, itype, rtype, ltype, errret);	\
-} while (0)
-
-#define __put_user_asm_ex(x, addr, itype, rtype, ltype)			\
-	asm volatile("1:	mov"itype" %"rtype"0,%1\n"		\
-		     "2:\n"						\
-		     _ASM_EXTABLE_EX(1b, 2b)				\
-		     : : ltype(x), "m" (__m(addr)))
-
-/*
- * uaccess_try and catch
- */
-#define uaccess_try	do {						\
-	current->thread.uaccess_err = 0;				\
-	__uaccess_begin();						\
-	barrier();
-
-#define uaccess_try_nospec do {						\
-	current->thread.uaccess_err = 0;				\
-	__uaccess_begin_nospec();					\
-
-#define uaccess_catch(err)						\
-	__uaccess_end();						\
-	(err) |= (current->thread.uaccess_err ? -EFAULT : 0);		\
-} while (0)
-
-/**
- * __get_user: - Get a simple variable from user space, with less checking.
- * @x:   Variable to store result.
- * @ptr: Source address, in user space.
- *
- * Context: User context only. This function may sleep if pagefaults are
- *          enabled.
- *
- * This macro copies a single simple variable from user space to kernel
- * space.  It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and the result of
- * dereferencing @ptr must be assignable to @x without a cast.
- *
- * Caller must check the pointer with access_ok() before calling this
- * function.
- *
- * Returns zero on success, or -EFAULT on error.
- * On error, the variable @x is set to zero.
- */
-
-#define __get_user(x, ptr)						\
-	__get_user_nocheck((x), (ptr), sizeof(*(ptr)))
-
-/**
- * __put_user: - Write a simple value into user space, with less checking.
- * @x:   Value to copy to user space.
- * @ptr: Destination address, in user space.
- *
- * Context: User context only. This function may sleep if pagefaults are
- *          enabled.
- *
- * This macro copies a single simple value from kernel space to user
- * space.  It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and @x must be assignable
- * to the result of dereferencing @ptr.
- *
- * Caller must check the pointer with access_ok() before calling this
- * function.
- *
- * Returns zero on success, or -EFAULT on error.
- */
-
-#define __put_user(x, ptr)						\
-	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-
-/*
- * {get|put}_user_try and catch
- *
- * get_user_try {
- *	get_user_ex(...);
- * } get_user_catch(err)
- */
-#define get_user_try		uaccess_try_nospec
-#define get_user_catch(err)	uaccess_catch(err)
-
-#define get_user_ex(x, ptr)	do {					\
-	unsigned long __gue_val;					\
-	__get_user_size_ex((__gue_val), (ptr), (sizeof(*(ptr))));	\
-	(x) = (__force __typeof__(*(ptr)))__gue_val;			\
-} while (0)
-
-#define put_user_try		uaccess_try
-#define put_user_catch(err)	uaccess_catch(err)
-
-#define put_user_ex(x, ptr)						\
-	__put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-
 extern unsigned long
 copy_from_user_nmi(void *to, const void __user *from, unsigned long n);
 extern __must_check long
@@ -578,102 +489,14 @@ strncpy_from_user(char *dst, const char __user *src, long count);
 
 extern __must_check long strnlen_user(const char __user *str, long n);
 
-unsigned long __must_check clear_user(void __user *mem, unsigned long len);
-unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
-
-extern void __cmpxchg_wrong_size(void)
-	__compiletime_error("Bad argument size for cmpxchg");
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+unsigned long __must_check
+copy_mc_to_kernel(void *to, const void *from, unsigned len);
+#define copy_mc_to_kernel copy_mc_to_kernel
 
-#define __user_atomic_cmpxchg_inatomic(uval, ptr, old, new, size)	\
-({									\
-	int __ret = 0;							\
-	__typeof__(ptr) __uval = (uval);				\
-	__typeof__(*(ptr)) __old = (old);				\
-	__typeof__(*(ptr)) __new = (new);				\
-	__uaccess_begin_nospec();					\
-	switch (size) {							\
-	case 1:								\
-	{								\
-		asm volatile("\n"					\
-			"1:\t" LOCK_PREFIX "cmpxchgb %4, %2\n"		\
-			"2:\n"						\
-			"\t.section .fixup, \"ax\"\n"			\
-			"3:\tmov     %3, %0\n"				\
-			"\tjmp     2b\n"				\
-			"\t.previous\n"					\
-			_ASM_EXTABLE_UA(1b, 3b)				\
-			: "+r" (__ret), "=a" (__old), "+m" (*(ptr))	\
-			: "i" (-EFAULT), "q" (__new), "1" (__old)	\
-			: "memory"					\
-		);							\
-		break;							\
-	}								\
-	case 2:								\
-	{								\
-		asm volatile("\n"					\
-			"1:\t" LOCK_PREFIX "cmpxchgw %4, %2\n"		\
-			"2:\n"						\
-			"\t.section .fixup, \"ax\"\n"			\
-			"3:\tmov     %3, %0\n"				\
-			"\tjmp     2b\n"				\
-			"\t.previous\n"					\
-			_ASM_EXTABLE_UA(1b, 3b)				\
-			: "+r" (__ret), "=a" (__old), "+m" (*(ptr))	\
-			: "i" (-EFAULT), "r" (__new), "1" (__old)	\
-			: "memory"					\
-		);							\
-		break;							\
-	}								\
-	case 4:								\
-	{								\
-		asm volatile("\n"					\
-			"1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"		\
-			"2:\n"						\
-			"\t.section .fixup, \"ax\"\n"			\
-			"3:\tmov     %3, %0\n"				\
-			"\tjmp     2b\n"				\
-			"\t.previous\n"					\
-			_ASM_EXTABLE_UA(1b, 3b)				\
-			: "+r" (__ret), "=a" (__old), "+m" (*(ptr))	\
-			: "i" (-EFAULT), "r" (__new), "1" (__old)	\
-			: "memory"					\
-		);							\
-		break;							\
-	}								\
-	case 8:								\
-	{								\
-		if (!IS_ENABLED(CONFIG_X86_64))				\
-			__cmpxchg_wrong_size();				\
-									\
-		asm volatile("\n"					\
-			"1:\t" LOCK_PREFIX "cmpxchgq %4, %2\n"		\
-			"2:\n"						\
-			"\t.section .fixup, \"ax\"\n"			\
-			"3:\tmov     %3, %0\n"				\
-			"\tjmp     2b\n"				\
-			"\t.previous\n"					\
-			_ASM_EXTABLE_UA(1b, 3b)				\
-			: "+r" (__ret), "=a" (__old), "+m" (*(ptr))	\
-			: "i" (-EFAULT), "r" (__new), "1" (__old)	\
-			: "memory"					\
-		);							\
-		break;							\
-	}								\
-	default:							\
-		__cmpxchg_wrong_size();					\
-	}								\
-	__uaccess_end();						\
-	*__uval = __old;						\
-	__ret;								\
-})
-
-#define user_atomic_cmpxchg_inatomic(uval, ptr, old, new)		\
-({									\
-	access_ok((ptr), sizeof(*(ptr))) ?		\
-		__user_atomic_cmpxchg_inatomic((uval), (ptr),		\
-				(old), (new), sizeof(*(ptr))) :		\
-		-EFAULT;						\
-})
+unsigned long __must_check
+copy_mc_to_user(void __user *to, const void *from, unsigned len);
+#endif
 
 /*
  * movsl can be slow when source and dest are not both 8-byte aligned
@@ -686,28 +509,13 @@ extern struct movsl_mask {
 
 #define ARCH_HAS_NOCACHE_UACCESS 1
 
-#ifdef CONFIG_X86_32
-# include <asm/uaccess_32.h>
-#else
-# include <asm/uaccess_64.h>
-#endif
-
-/*
- * We rely on the nested NMI work to allow atomic faults from the NMI path; the
- * nested NMI paths are careful to preserve CR2.
- *
- * Caller must use pagefault_enable/disable, or run in interrupt context,
- * and also do a uaccess_ok() check
- */
-#define __copy_from_user_nmi __copy_from_user_inatomic
-
 /*
  * The "unsafe" user accesses aren't really "unsafe", but the naming
  * is a big fat warning: you have to not only do the access_ok()
  * checking before using them, but you have to surround them with the
  * user_access_begin/end() pair.
  */
-static __must_check inline bool user_access_begin(const void __user *ptr, size_t len)
+static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)
 {
 	if (unlikely(!access_ok(ptr,len)))
 		return 0;
@@ -717,17 +525,117 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t
 #define user_access_begin(a,b)	user_access_begin(a,b)
 #define user_access_end()	__uaccess_end()
 
-#define unsafe_put_user(x, ptr, label)	\
+#define user_access_save()	smap_save()
+#define user_access_restore(x)	smap_restore(x)
+
+#define arch_unsafe_put_user(x, ptr, label)	\
 	__put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label)
 
-#define unsafe_get_user(x, ptr, err_label)					\
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+#define arch_unsafe_get_user(x, ptr, err_label)					\
+do {										\
+	__inttype(*(ptr)) __gu_val;						\
+	__get_user_size(__gu_val, (ptr), sizeof(*(ptr)), err_label);		\
+	(x) = (__force __typeof__(*(ptr)))__gu_val;				\
+} while (0)
+#else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+#define arch_unsafe_get_user(x, ptr, err_label)					\
 do {										\
 	int __gu_err;								\
 	__inttype(*(ptr)) __gu_val;						\
-	__get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT);	\
+	__get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err);		\
 	(x) = (__force __typeof__(*(ptr)))__gu_val;				\
 	if (unlikely(__gu_err)) goto err_label;					\
 } while (0)
+#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+extern void __try_cmpxchg_user_wrong_size(void);
+
+#ifndef CONFIG_X86_32
+#define __try_cmpxchg64_user_asm(_ptr, _oldp, _nval, _label)		\
+	__try_cmpxchg_user_asm("q", "r", (_ptr), (_oldp), (_nval), _label)
+#endif
+
+/*
+ * Force the pointer to u<size> to match the size expected by the asm helper.
+ * clang/LLVM compiles all cases and only discards the unused paths after
+ * processing errors, which breaks i386 if the pointer is an 8-byte value.
+ */
+#define unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({			\
+	bool __ret;								\
+	__chk_user_ptr(_ptr);							\
+	switch (sizeof(*(_ptr))) {						\
+	case 1:	__ret = __try_cmpxchg_user_asm("b", "q",			\
+					       (__force u8 *)(_ptr), (_oldp),	\
+					       (_nval), _label);		\
+		break;								\
+	case 2:	__ret = __try_cmpxchg_user_asm("w", "r",			\
+					       (__force u16 *)(_ptr), (_oldp),	\
+					       (_nval), _label);		\
+		break;								\
+	case 4:	__ret = __try_cmpxchg_user_asm("l", "r",			\
+					       (__force u32 *)(_ptr), (_oldp),	\
+					       (_nval), _label);		\
+		break;								\
+	case 8:	__ret = __try_cmpxchg64_user_asm((__force u64 *)(_ptr), (_oldp),\
+						 (_nval), _label);		\
+		break;								\
+	default: __try_cmpxchg_user_wrong_size();				\
+	}									\
+	__ret;						})
+
+/* "Returns" 0 on success, 1 on failure, -EFAULT if the access faults. */
+#define __try_cmpxchg_user(_ptr, _oldp, _nval, _label)	({		\
+	int __ret = -EFAULT;						\
+	__uaccess_begin_nospec();					\
+	__ret = !unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label);	\
+_label:									\
+	__uaccess_end();						\
+	__ret;								\
+							})
+
+/*
+ * We want the unsafe accessors to always be inlined and use
+ * the error labels - thus the macro games.
+ */
+#define unsafe_copy_loop(dst, src, len, type, label)				\
+	while (len >= sizeof(type)) {						\
+		unsafe_put_user(*(type *)(src),(type __user *)(dst),label);	\
+		dst += sizeof(type);						\
+		src += sizeof(type);						\
+		len -= sizeof(type);						\
+	}
+
+#define unsafe_copy_to_user(_dst,_src,_len,label)			\
+do {									\
+	char __user *__ucu_dst = (_dst);				\
+	const char *__ucu_src = (_src);					\
+	size_t __ucu_len = (_len);					\
+	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label);	\
+	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label);	\
+	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label);	\
+	unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label);	\
+} while (0)
+
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+#define arch_get_kernel_nofault(dst, src, type, err_label)		\
+	__get_user_size(*((type *)(dst)), (__force type __user *)(src),	\
+			sizeof(type), err_label)
+#else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+#define arch_get_kernel_nofault(dst, src, type, err_label)			\
+do {									\
+	int __kr_err;							\
+									\
+	__get_user_size(*((type *)(dst)), (__force type __user *)(src),	\
+			sizeof(type), __kr_err);			\
+	if (unlikely(__kr_err))						\
+		goto err_label;						\
+} while (0)
+#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+#define arch_put_kernel_nofault(dst, src, type, err_label)		\
+	__put_user_size(*((type *)(src)), (__force type __user *)(dst),	\
+			sizeof(type), err_label)
 
 #endif /* _ASM_X86_UACCESS_H */
 
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index ba2dc1930630..40379a1adbb8 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -23,33 +23,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 static __always_inline unsigned long
 raw_copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	if (__builtin_constant_p(n)) {
-		unsigned long ret;
-
-		switch (n) {
-		case 1:
-			ret = 0;
-			__uaccess_begin_nospec();
-			__get_user_asm_nozero(*(u8 *)to, from, ret,
-					      "b", "b", "=q", 1);
-			__uaccess_end();
-			return ret;
-		case 2:
-			ret = 0;
-			__uaccess_begin_nospec();
-			__get_user_asm_nozero(*(u16 *)to, from, ret,
-					      "w", "w", "=r", 2);
-			__uaccess_end();
-			return ret;
-		case 4:
-			ret = 0;
-			__uaccess_begin_nospec();
-			__get_user_asm_nozero(*(u32 *)to, from, ret,
-					      "l", "k", "=r", 4);
-			__uaccess_end();
-			return ret;
-		}
-	}
 	return __copy_user_ll(to, (__force const void *)from, n);
 }
 
@@ -60,4 +33,7 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from,
        return __copy_from_user_ll_nocache_nozero(to, from, n);
 }
 
+unsigned long __must_check clear_user(void __user *mem, unsigned long len);
+unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
+
 #endif /* _ASM_X86_UACCESS_32_H */
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index a9d637bc301d..915124011c27 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -11,193 +11,155 @@
 #include <asm/alternative.h>
 #include <asm/cpufeatures.h>
 #include <asm/page.h>
+#include <asm/percpu.h>
 
+#ifdef MODULE
+  #define runtime_const_ptr(sym) (sym)
+#else
+  #include <asm/runtime-const.h>
+#endif
+extern unsigned long USER_PTR_MAX;
+
+#ifdef CONFIG_ADDRESS_MASKING
 /*
- * Copy To/From Userspace
+ * Mask out tag bits from the address.
  */
+static inline unsigned long __untagged_addr(unsigned long addr)
+{
+	asm_inline (ALTERNATIVE("", "and " __percpu_arg([mask]) ", %[addr]",
+				X86_FEATURE_LAM)
+	     : [addr] "+r" (addr)
+	     : [mask] "m" (__my_cpu_var(tlbstate_untag_mask)));
 
-/* Handles exceptions in both to and from, but doesn't do access_ok */
-__must_check unsigned long
-copy_user_enhanced_fast_string(void *to, const void *from, unsigned len);
-__must_check unsigned long
-copy_user_generic_string(void *to, const void *from, unsigned len);
-__must_check unsigned long
-copy_user_generic_unrolled(void *to, const void *from, unsigned len);
+	return addr;
+}
 
-static __always_inline __must_check unsigned long
-copy_user_generic(void *to, const void *from, unsigned len)
+#define untagged_addr(addr)	({					\
+	unsigned long __addr = (__force unsigned long)(addr);		\
+	(__force __typeof__(addr))__untagged_addr(__addr);		\
+})
+
+static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
+						   unsigned long addr)
 {
-	unsigned ret;
+	mmap_assert_locked(mm);
+	return addr & (mm)->context.untag_mask;
+}
 
-	/*
-	 * If CPU has ERMS feature, use copy_user_enhanced_fast_string.
-	 * Otherwise, if CPU has rep_good feature, use copy_user_generic_string.
-	 * Otherwise, use copy_user_generic_unrolled.
-	 */
-	alternative_call_2(copy_user_generic_unrolled,
-			 copy_user_generic_string,
-			 X86_FEATURE_REP_GOOD,
-			 copy_user_enhanced_fast_string,
-			 X86_FEATURE_ERMS,
-			 ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
-				     "=d" (len)),
-			 "1" (to), "2" (from), "3" (len)
-			 : "memory", "rcx", "r8", "r9", "r10", "r11");
+#define untagged_addr_remote(mm, addr)	({				\
+	unsigned long __addr = (__force unsigned long)(addr);		\
+	(__force __typeof__(addr))__untagged_addr_remote(mm, __addr);	\
+})
+
+#endif
+
+#define valid_user_address(x) \
+	likely((__force unsigned long)(x) <= runtime_const_ptr(USER_PTR_MAX))
+
+/*
+ * Masking the user address is an alternative to a conditional
+ * user_access_begin that can avoid the fencing. This only works
+ * for dense accesses starting at the address.
+ */
+static inline void __user *mask_user_address(const void __user *ptr)
+{
+	void __user *ret;
+	asm("cmp %1,%0\n\t"
+	    "cmova %1,%0"
+		:"=r" (ret)
+		:"r" (runtime_const_ptr(USER_PTR_MAX)),
+		 "0" (ptr));
 	return ret;
 }
+#define masked_user_access_begin(x) ({				\
+	auto __masked_ptr = (x);				\
+	__masked_ptr = mask_user_address(__masked_ptr);		\
+	__uaccess_begin(); __masked_ptr; })
 
-static __always_inline __must_check unsigned long
-copy_to_user_mcsafe(void *to, const void *from, unsigned len)
+/*
+ * User pointers can have tag bits on x86-64.  This scheme tolerates
+ * arbitrary values in those bits rather then masking them off.
+ *
+ * Enforce two rules:
+ * 1. 'ptr' must be in the user part of the address space
+ * 2. 'ptr+size' must not overflow into kernel addresses
+ *
+ * Note that we always have at least one guard page between the
+ * max user address and the non-canonical gap, allowing us to
+ * ignore small sizes entirely.
+ *
+ * In fact, we could probably remove the size check entirely, since
+ * any kernel accesses will be in increasing address order starting
+ * at 'ptr'.
+ *
+ * That's a separate optimization, for now just handle the small
+ * constant case.
+ */
+static inline bool __access_ok(const void __user *ptr, unsigned long size)
 {
-	unsigned long ret;
+	if (__builtin_constant_p(size <= PAGE_SIZE) && size <= PAGE_SIZE) {
+		return valid_user_address(ptr);
+	} else {
+		unsigned long sum = size + (__force unsigned long)ptr;
+
+		return valid_user_address(sum) && sum >= (__force unsigned long)ptr;
+	}
+}
+#define __access_ok __access_ok
 
-	__uaccess_begin();
+/*
+ * Copy To/From Userspace
+ */
+
+/* Handles exceptions in both to and from, but doesn't do access_ok */
+__must_check unsigned long
+rep_movs_alternative(void *to, const void *from, unsigned len);
+
+static __always_inline __must_check unsigned long
+copy_user_generic(void *to, const void *from, unsigned long len)
+{
+	stac();
 	/*
-	 * Note, __memcpy_mcsafe() is explicitly used since it can
-	 * handle exceptions / faults.  memcpy_mcsafe() may fall back to
-	 * memcpy() which lacks this handling.
+	 * If CPU has FSRM feature, use 'rep movs'.
+	 * Otherwise, use rep_movs_alternative.
 	 */
-	ret = __memcpy_mcsafe(to, from, len);
-	__uaccess_end();
-	return ret;
+	asm volatile(
+		"1:\n\t"
+		ALTERNATIVE("rep movsb",
+			    "call rep_movs_alternative", ALT_NOT(X86_FEATURE_FSRM))
+		"2:\n"
+		_ASM_EXTABLE_UA(1b, 2b)
+		:"+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT
+		: : "memory", "rax");
+	clac();
+	return len;
 }
 
 static __always_inline __must_check unsigned long
 raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
 {
-	int ret = 0;
-
-	if (!__builtin_constant_p(size))
-		return copy_user_generic(dst, (__force void *)src, size);
-	switch (size) {
-	case 1:
-		__uaccess_begin_nospec();
-		__get_user_asm_nozero(*(u8 *)dst, (u8 __user *)src,
-			      ret, "b", "b", "=q", 1);
-		__uaccess_end();
-		return ret;
-	case 2:
-		__uaccess_begin_nospec();
-		__get_user_asm_nozero(*(u16 *)dst, (u16 __user *)src,
-			      ret, "w", "w", "=r", 2);
-		__uaccess_end();
-		return ret;
-	case 4:
-		__uaccess_begin_nospec();
-		__get_user_asm_nozero(*(u32 *)dst, (u32 __user *)src,
-			      ret, "l", "k", "=r", 4);
-		__uaccess_end();
-		return ret;
-	case 8:
-		__uaccess_begin_nospec();
-		__get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
-			      ret, "q", "", "=r", 8);
-		__uaccess_end();
-		return ret;
-	case 10:
-		__uaccess_begin_nospec();
-		__get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
-			       ret, "q", "", "=r", 10);
-		if (likely(!ret))
-			__get_user_asm_nozero(*(u16 *)(8 + (char *)dst),
-				       (u16 __user *)(8 + (char __user *)src),
-				       ret, "w", "w", "=r", 2);
-		__uaccess_end();
-		return ret;
-	case 16:
-		__uaccess_begin_nospec();
-		__get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
-			       ret, "q", "", "=r", 16);
-		if (likely(!ret))
-			__get_user_asm_nozero(*(u64 *)(8 + (char *)dst),
-				       (u64 __user *)(8 + (char __user *)src),
-				       ret, "q", "", "=r", 8);
-		__uaccess_end();
-		return ret;
-	default:
-		return copy_user_generic(dst, (__force void *)src, size);
-	}
+	return copy_user_generic(dst, (__force void *)src, size);
 }
 
 static __always_inline __must_check unsigned long
 raw_copy_to_user(void __user *dst, const void *src, unsigned long size)
 {
-	int ret = 0;
-
-	if (!__builtin_constant_p(size))
-		return copy_user_generic((__force void *)dst, src, size);
-	switch (size) {
-	case 1:
-		__uaccess_begin();
-		__put_user_asm(*(u8 *)src, (u8 __user *)dst,
-			      ret, "b", "b", "iq", 1);
-		__uaccess_end();
-		return ret;
-	case 2:
-		__uaccess_begin();
-		__put_user_asm(*(u16 *)src, (u16 __user *)dst,
-			      ret, "w", "w", "ir", 2);
-		__uaccess_end();
-		return ret;
-	case 4:
-		__uaccess_begin();
-		__put_user_asm(*(u32 *)src, (u32 __user *)dst,
-			      ret, "l", "k", "ir", 4);
-		__uaccess_end();
-		return ret;
-	case 8:
-		__uaccess_begin();
-		__put_user_asm(*(u64 *)src, (u64 __user *)dst,
-			      ret, "q", "", "er", 8);
-		__uaccess_end();
-		return ret;
-	case 10:
-		__uaccess_begin();
-		__put_user_asm(*(u64 *)src, (u64 __user *)dst,
-			       ret, "q", "", "er", 10);
-		if (likely(!ret)) {
-			asm("":::"memory");
-			__put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst,
-				       ret, "w", "w", "ir", 2);
-		}
-		__uaccess_end();
-		return ret;
-	case 16:
-		__uaccess_begin();
-		__put_user_asm(*(u64 *)src, (u64 __user *)dst,
-			       ret, "q", "", "er", 16);
-		if (likely(!ret)) {
-			asm("":::"memory");
-			__put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst,
-				       ret, "q", "", "er", 8);
-		}
-		__uaccess_end();
-		return ret;
-	default:
-		return copy_user_generic((__force void *)dst, src, size);
-	}
-}
-
-static __always_inline __must_check
-unsigned long raw_copy_in_user(void __user *dst, const void __user *src, unsigned long size)
-{
-	return copy_user_generic((__force void *)dst,
-				 (__force void *)src, size);
+	return copy_user_generic((__force void *)dst, src, size);
 }
 
-extern long __copy_user_nocache(void *dst, const void __user *src,
-				unsigned size, int zerorest);
-
+extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size);
 extern long __copy_user_flushcache(void *dst, const void __user *src, unsigned size);
-extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
-			   size_t len);
 
 static inline int
 __copy_from_user_inatomic_nocache(void *dst, const void __user *src,
 				  unsigned size)
 {
+	long ret;
 	kasan_check_write(dst, size);
-	return __copy_user_nocache(dst, src, size, 0);
+	stac();
+	ret = __copy_user_nocache(dst, src, size);
+	clac();
+	return ret;
 }
 
 static inline int
@@ -207,10 +169,40 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
 	return __copy_user_flushcache(dst, src, size);
 }
 
-unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len);
+/*
+ * Zero Userspace.
+ */
 
-unsigned long
-mcsafe_handle_tail(char *to, char *from, unsigned len);
+__must_check unsigned long
+rep_stos_alternative(void __user *addr, unsigned long len);
+
+static __always_inline __must_check unsigned long __clear_user(void __user *addr, unsigned long size)
+{
+	might_fault();
+	stac();
 
+	/*
+	 * No memory constraint because it doesn't change any memory gcc
+	 * knows about.
+	 */
+	asm volatile(
+		"1:\n\t"
+		ALTERNATIVE("rep stosb",
+			    "call rep_stos_alternative", ALT_NOT(X86_FEATURE_FSRS))
+		"2:\n"
+	       _ASM_EXTABLE_UA(1b, 2b)
+	       : "+c" (size), "+D" (addr), ASM_CALL_CONSTRAINT
+	       : "a" (0));
+
+	clac();
+
+	return size;
+}
+
+static __always_inline unsigned long clear_user(void __user *to, unsigned long n)
+{
+	if (__access_ok(to, n))
+		return __clear_user(to, n);
+	return n;
+}
 #endif /* _ASM_X86_UACCESS_64_H */
diff --git a/arch/x86/include/asm/umip.h b/arch/x86/include/asm/umip.h
index db43f2a0d92c..aeed98c3c9e1 100644
--- a/arch/x86/include/asm/umip.h
+++ b/arch/x86/include/asm/umip.h
@@ -4,9 +4,9 @@
 #include <linux/types.h>
 #include <asm/ptrace.h>
 
-#ifdef CONFIG_X86_INTEL_UMIP
+#ifdef CONFIG_X86_UMIP
 bool fixup_umip_exception(struct pt_regs *regs);
 #else
 static inline bool fixup_umip_exception(struct pt_regs *regs) { return false; }
-#endif  /* CONFIG_X86_INTEL_UMIP */
+#endif  /* CONFIG_X86_UMIP */
 #endif  /* _ASM_X86_UMIP_H */
diff --git a/arch/x86/include/asm/unaccepted_memory.h b/arch/x86/include/asm/unaccepted_memory.h
new file mode 100644
index 000000000000..f5937e9866ac
--- /dev/null
+++ b/arch/x86/include/asm/unaccepted_memory.h
@@ -0,0 +1,27 @@
+#ifndef _ASM_X86_UNACCEPTED_MEMORY_H
+#define _ASM_X86_UNACCEPTED_MEMORY_H
+
+#include <linux/efi.h>
+#include <asm/tdx.h>
+#include <asm/sev.h>
+
+static inline void arch_accept_memory(phys_addr_t start, phys_addr_t end)
+{
+	/* Platform-specific memory-acceptance call goes here */
+	if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
+		if (!tdx_accept_memory(start, end))
+			panic("TDX: Failed to accept memory\n");
+	} else if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
+		snp_accept_memory(start, end);
+	} else {
+		panic("Cannot accept memory: unknown platform\n");
+	}
+}
+
+static inline struct efi_unaccepted_memory *efi_get_unaccepted_table(void)
+{
+	if (efi.unaccepted == EFI_INVALID_TABLE_ADDR)
+		return NULL;
+	return __va(efi.unaccepted);
+}
+#endif
diff --git a/arch/x86/include/asm/unaligned.h b/arch/x86/include/asm/unaligned.h
deleted file mode 100644
index 9c754a7447aa..000000000000
--- a/arch/x86/include/asm/unaligned.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_UNALIGNED_H
-#define _ASM_X86_UNALIGNED_H
-
-/*
- * The x86 can do unaligned accesses itself.
- */
-
-#include <linux/unaligned/access_ok.h>
-#include <linux/unaligned/generic.h>
-
-#define get_unaligned __get_unaligned_le
-#define put_unaligned __put_unaligned_le
-
-#endif /* _ASM_X86_UNALIGNED_H */
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index dc4ed8bc2382..6c9e5bdd3916 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -5,12 +5,6 @@
 #include <uapi/asm/unistd.h>
 
 
-# ifdef CONFIG_X86_X32_ABI
-#  define __SYSCALL_MASK (~(__X32_SYSCALL_BIT))
-# else
-#  define __SYSCALL_MASK (~0)
-# endif
-
 # ifdef CONFIG_X86_32
 
 #  include <asm/unistd_32.h>
@@ -19,19 +13,27 @@
 #  define __ARCH_WANT_SYS_OLD_MMAP
 #  define __ARCH_WANT_SYS_OLD_SELECT
 
+#  define IA32_NR_syscalls (__NR_syscalls)
+
 # else
 
 #  include <asm/unistd_64.h>
 #  include <asm/unistd_64_x32.h>
-#  define __ARCH_WANT_COMPAT_SYS_TIME
-#  define __ARCH_WANT_SYS_UTIME32
+#  include <asm/unistd_32_ia32.h>
+#  define __ARCH_WANT_SYS_TIME
+#  define __ARCH_WANT_SYS_UTIME
+#  define __ARCH_WANT_COMPAT_STAT
 #  define __ARCH_WANT_COMPAT_SYS_PREADV64
 #  define __ARCH_WANT_COMPAT_SYS_PWRITEV64
 #  define __ARCH_WANT_COMPAT_SYS_PREADV64V2
 #  define __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
+#  define X32_NR_syscalls (__NR_x32_syscalls)
+#  define IA32_NR_syscalls (__NR_ia32_syscalls)
 
 # endif
 
+# define NR_syscalls (__NR_syscalls)
+
 # define __ARCH_WANT_NEW_STAT
 # define __ARCH_WANT_OLD_READDIR
 # define __ARCH_WANT_OLD_STAT
@@ -48,8 +50,8 @@
 # define __ARCH_WANT_SYS_SIGPENDING
 # define __ARCH_WANT_SYS_SIGPROCMASK
 # define __ARCH_WANT_SYS_SOCKETCALL
-# define __ARCH_WANT_SYS_TIME
-# define __ARCH_WANT_SYS_UTIME
+# define __ARCH_WANT_SYS_TIME32
+# define __ARCH_WANT_SYS_UTIME32
 # define __ARCH_WANT_SYS_WAITPID
 # define __ARCH_WANT_SYS_FORK
 # define __ARCH_WANT_SYS_VFORK
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 1f86e1b0a5cd..7cede4dc21f0 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -4,6 +4,7 @@
 
 #include <linux/sched.h>
 #include <linux/ftrace.h>
+#include <linux/rethook.h>
 #include <asm/ptrace.h>
 #include <asm/stacktrace.h>
 
@@ -15,14 +16,23 @@ struct unwind_state {
 	unsigned long stack_mask;
 	struct task_struct *task;
 	int graph_idx;
+#if defined(CONFIG_RETHOOK)
+	struct llist_node *kr_cur;
+#endif
 	bool error;
 #if defined(CONFIG_UNWINDER_ORC)
 	bool signal, full_regs;
 	unsigned long sp, bp, ip;
-	struct pt_regs *regs;
+	struct pt_regs *regs, *prev_regs;
 #elif defined(CONFIG_UNWINDER_FRAME_POINTER)
 	bool got_irq;
 	unsigned long *bp, *orig_sp, ip;
+	/*
+	 * If non-NULL: The current frame is incomplete and doesn't contain a
+	 * valid BP. When looking for the next frame, use this instead of the
+	 * non-existent saved BP.
+	 */
+	unsigned long *next_bp;
 	struct pt_regs *regs;
 #else
 	unsigned long *sp;
@@ -93,6 +103,30 @@ void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
 			void *orc, size_t orc_size) {}
 #endif
 
+static inline
+unsigned long unwind_recover_rethook(struct unwind_state *state,
+				     unsigned long addr, unsigned long *addr_p)
+{
+#ifdef CONFIG_RETHOOK
+	if (is_rethook_trampoline(addr))
+		return rethook_find_ret_addr(state->task, (unsigned long)addr_p,
+					     &state->kr_cur);
+#endif
+	return addr;
+}
+
+/* Recover the return address modified by rethook and ftrace_graph. */
+static inline
+unsigned long unwind_recover_ret_addr(struct unwind_state *state,
+				     unsigned long addr, unsigned long *addr_p)
+{
+	unsigned long ret;
+
+	ret = ftrace_graph_ret_addr(state->task, &state->graph_idx,
+				    addr, addr_p);
+	return unwind_recover_rethook(state, ret, addr_p);
+}
+
 /*
  * This disables KASAN checking when reading a value from another task's stack,
  * since the other task could be running on another CPU and could have poisoned
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
index 0bcdb1279361..8f4579c5a6f8 100644
--- a/arch/x86/include/asm/unwind_hints.h
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -1,51 +1,26 @@
 #ifndef _ASM_X86_UNWIND_HINTS_H
 #define _ASM_X86_UNWIND_HINTS_H
 
+#include <linux/objtool.h>
+
 #include "orc_types.h"
 
-#ifdef __ASSEMBLY__
-
-/*
- * In asm, there are two kinds of code: normal C-type callable functions and
- * the rest.  The normal callable functions can be called by other code, and
- * don't do anything unusual with the stack.  Such normal callable functions
- * are annotated with the ENTRY/ENDPROC macros.  Most asm code falls in this
- * category.  In this case, no special debugging annotations are needed because
- * objtool can automatically generate the ORC data for the ORC unwinder to read
- * at runtime.
- *
- * Anything which doesn't fall into the above category, such as syscall and
- * interrupt handlers, tends to not be called directly by other functions, and
- * often does unusual non-C-function-type things with the stack pointer.  Such
- * code needs to be annotated such that objtool can understand it.  The
- * following CFI hint macros are for this type of code.
- *
- * These macros provide hints to objtool about the state of the stack at each
- * instruction.  Objtool starts from the hints and follows the code flow,
- * making automatic CFI adjustments when it sees pushes and pops, filling out
- * the debuginfo as necessary.  It will also warn if it sees any
- * inconsistencies.
- */
-.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL end=0
-#ifdef CONFIG_STACK_VALIDATION
-.Lunwind_hint_ip_\@:
-	.pushsection .discard.unwind_hints
-		/* struct unwind_hint */
-		.long .Lunwind_hint_ip_\@ - .
-		.short \sp_offset
-		.byte \sp_reg
-		.byte \type
-		.byte \end
-		.balign 4
-	.popsection
-#endif
+#ifdef __ASSEMBLER__
+
+.macro UNWIND_HINT_END_OF_STACK
+	UNWIND_HINT type=UNWIND_HINT_TYPE_END_OF_STACK
+.endm
+
+.macro UNWIND_HINT_UNDEFINED
+	UNWIND_HINT type=UNWIND_HINT_TYPE_UNDEFINED
 .endm
 
-.macro UNWIND_HINT_EMPTY
-	UNWIND_HINT sp_reg=ORC_REG_UNDEFINED end=1
+.macro UNWIND_HINT_ENTRY
+	VALIDATE_UNRET_BEGIN
+	UNWIND_HINT_END_OF_STACK
 .endm
 
-.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0
+.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 signal=1
 	.if \base == %rsp
 		.if \indirect
 			.set sp_reg, ORC_REG_SP_INDIRECT
@@ -66,44 +41,53 @@
 
 	.set sp_offset, \offset
 
-	.if \iret
-		.set type, ORC_TYPE_REGS_IRET
+	.if \partial
+		.set type, UNWIND_HINT_TYPE_REGS_PARTIAL
 	.elseif \extra == 0
-		.set type, ORC_TYPE_REGS_IRET
+		.set type, UNWIND_HINT_TYPE_REGS_PARTIAL
 		.set sp_offset, \offset + (16*8)
 	.else
-		.set type, ORC_TYPE_REGS
+		.set type, UNWIND_HINT_TYPE_REGS
 	.endif
 
-	UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type
+	UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type signal=\signal
+.endm
+
+.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0 signal=1
+	UNWIND_HINT_REGS base=\base offset=\offset partial=1 signal=\signal
 .endm
 
-.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0
-	UNWIND_HINT_REGS base=\base offset=\offset iret=1
+.macro UNWIND_HINT_IRET_ENTRY base=%rsp offset=0 signal=1
+	VALIDATE_UNRET_BEGIN
+	UNWIND_HINT_IRET_REGS base=\base offset=\offset signal=\signal
 .endm
 
-.macro UNWIND_HINT_FUNC sp_offset=8
-	UNWIND_HINT sp_offset=\sp_offset
+.macro UNWIND_HINT_FUNC
+	UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC
 .endm
 
-#else /* !__ASSEMBLY__ */
+.macro UNWIND_HINT_SAVE
+	UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE
+.endm
+
+.macro UNWIND_HINT_RESTORE
+	UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE
+.endm
+
+#else
+
+#define UNWIND_HINT_UNDEFINED \
+	UNWIND_HINT(UNWIND_HINT_TYPE_UNDEFINED, 0, 0, 0)
 
-#define UNWIND_HINT(sp_reg, sp_offset, type, end)		\
-	"987: \n\t"						\
-	".pushsection .discard.unwind_hints\n\t"		\
-	/* struct unwind_hint */				\
-	".long 987b - .\n\t"					\
-	".short " __stringify(sp_offset) "\n\t"			\
-	".byte " __stringify(sp_reg) "\n\t"			\
-	".byte " __stringify(type) "\n\t"			\
-	".byte " __stringify(end) "\n\t"			\
-	".balign 4 \n\t"					\
-	".popsection\n\t"
+#define UNWIND_HINT_FUNC \
+	UNWIND_HINT(UNWIND_HINT_TYPE_FUNC, ORC_REG_SP, 8, 0)
 
-#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE, 0)
+#define UNWIND_HINT_SAVE \
+	UNWIND_HINT(UNWIND_HINT_TYPE_SAVE, 0, 0, 0)
 
-#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE, 0)
+#define UNWIND_HINT_RESTORE \
+	UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0)
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* _ASM_X86_UNWIND_HINTS_H */
diff --git a/arch/x86/include/asm/unwind_user.h b/arch/x86/include/asm/unwind_user.h
new file mode 100644
index 000000000000..12064284bc4e
--- /dev/null
+++ b/arch/x86/include/asm/unwind_user.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_UNWIND_USER_H
+#define _ASM_X86_UNWIND_USER_H
+
+#ifdef CONFIG_HAVE_UNWIND_USER_FP
+
+#include <asm/ptrace.h>
+#include <asm/uprobes.h>
+
+#define ARCH_INIT_USER_FP_FRAME(ws)			\
+	.cfa_off	=  2*(ws),			\
+	.ra_off		= -1*(ws),			\
+	.fp_off		= -2*(ws),			\
+	.use_fp		= true,
+
+#define ARCH_INIT_USER_FP_ENTRY_FRAME(ws)		\
+	.cfa_off	=  1*(ws),			\
+	.ra_off		= -1*(ws),			\
+	.fp_off		= 0,				\
+	.use_fp		= false,
+
+static inline int unwind_user_word_size(struct pt_regs *regs)
+{
+	/* We can't unwind VM86 stacks */
+	if (regs->flags & X86_VM_MASK)
+		return 0;
+#ifdef CONFIG_X86_64
+	if (!user_64bit_mode(regs))
+		return sizeof(int);
+#endif
+	return sizeof(long);
+}
+
+static inline bool unwind_user_at_function_start(struct pt_regs *regs)
+{
+	return is_uprobe_at_func_entry(regs);
+}
+
+#endif /* CONFIG_HAVE_UNWIND_USER_FP */
+
+#endif /* _ASM_X86_UNWIND_USER_H */
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index d8bfa98fca98..362210c79998 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -1,22 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 #ifndef _ASM_UPROBES_H
 #define _ASM_UPROBES_H
 /*
  * User-space Probes (UProbes) for x86
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
  * Copyright (C) IBM Corporation, 2008-2011
  * Authors:
  *	Srikar Dronamraju
@@ -33,6 +20,11 @@ typedef u8 uprobe_opcode_t;
 #define UPROBE_SWBP_INSN		0xcc
 #define UPROBE_SWBP_INSN_SIZE		   1
 
+enum {
+	ARCH_UPROBE_FLAG_CAN_OPTIMIZE   = 0,
+	ARCH_UPROBE_FLAG_OPTIMIZE_FAIL  = 1,
+};
+
 struct uprobe_xol_ops;
 
 struct arch_uprobe {
@@ -58,6 +50,8 @@ struct arch_uprobe {
 			u8	ilen;
 		}			push;
 	};
+
+	unsigned long flags;
 };
 
 struct arch_uprobe_task {
@@ -68,4 +62,13 @@ struct arch_uprobe_task {
 	unsigned int			saved_tf;
 };
 
+#ifdef CONFIG_UPROBES
+extern bool is_uprobe_at_func_entry(struct pt_regs *regs);
+#else
+static bool is_uprobe_at_func_entry(struct pt_regs *regs)
+{
+	return false;
+}
+#endif /* CONFIG_UPROBES */
+
 #endif	/* _ASM_UPROBES_H */
diff --git a/arch/x86/include/asm/user_32.h b/arch/x86/include/asm/user_32.h
index d72c3d66e94f..8963915e533f 100644
--- a/arch/x86/include/asm/user_32.h
+++ b/arch/x86/include/asm/user_32.h
@@ -124,9 +124,5 @@ struct user{
   char u_comm[32];		/* User command that was responsible */
   int u_debugreg[8];
 };
-#define NBPG PAGE_SIZE
-#define UPAGES 1
-#define HOST_TEXT_START_ADDR (u.start_code)
-#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
 
 #endif /* _ASM_X86_USER_32_H */
diff --git a/arch/x86/include/asm/user_64.h b/arch/x86/include/asm/user_64.h
index db909923611c..1dd10f07ccd6 100644
--- a/arch/x86/include/asm/user_64.h
+++ b/arch/x86/include/asm/user_64.h
@@ -130,9 +130,5 @@ struct user {
   unsigned long error_code; /* CPU error code or 0 */
   unsigned long fault_address; /* CR3 or 0 */
 };
-#define NBPG PAGE_SIZE
-#define UPAGES 1
-#define HOST_TEXT_START_ADDR (u.start_code)
-#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
 
 #endif /* _ASM_X86_USER_64_H */
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index e652a7cc6186..6989b824fd32 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -1,27 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 #ifndef _ASM_X86_UV_BIOS_H
 #define _ASM_X86_UV_BIOS_H
 
 /*
  * UV BIOS layer definitions.
  *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- *
- *  Copyright (c) 2008-2009 Silicon Graphics, Inc.  All Rights Reserved.
- *  Copyright (c) Russ Anderson <rja@sgi.com>
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
+ * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (c) Russ Anderson <rja@sgi.com>
  */
 
+#include <linux/efi.h>
 #include <linux/rtc.h>
 
 /*
@@ -40,6 +29,20 @@ enum uv_bios_cmd {
 	UV_BIOS_SET_LEGACY_VGA_TARGET
 };
 
+#define UV_BIOS_EXTRA			    0x10000
+#define UV_BIOS_GET_PCI_TOPOLOGY	    0x10001
+#define UV_BIOS_GET_GEOINFO		    0x10003
+
+#define UV_BIOS_EXTRA_OP_MEM_COPYIN	    0x1000
+#define UV_BIOS_EXTRA_OP_MEM_COPYOUT	    0x2000
+#define UV_BIOS_EXTRA_OP_MASK		    0x0fff
+#define UV_BIOS_EXTRA_GET_HEAPSIZE	    1
+#define UV_BIOS_EXTRA_INSTALL_HEAP	    2
+#define UV_BIOS_EXTRA_MASTER_NASID	    3
+#define UV_BIOS_EXTRA_OBJECT_COUNT	    (10|UV_BIOS_EXTRA_OP_MEM_COPYOUT)
+#define UV_BIOS_EXTRA_ENUM_OBJECTS	    (12|UV_BIOS_EXTRA_OP_MEM_COPYOUT)
+#define UV_BIOS_EXTRA_ENUM_PORTS	    (13|UV_BIOS_EXTRA_OP_MEM_COPYOUT)
+
 /*
  * Status values returned from a BIOS call.
  */
@@ -48,7 +51,8 @@ enum {
 	BIOS_STATUS_SUCCESS		=  0,
 	BIOS_STATUS_UNIMPLEMENTED	= -ENOSYS,
 	BIOS_STATUS_EINVAL		= -EINVAL,
-	BIOS_STATUS_UNAVAIL		= -EBUSY
+	BIOS_STATUS_UNAVAIL		= -EBUSY,
+	BIOS_STATUS_ABORT		= -EINTR,
 };
 
 /* Address map parameters */
@@ -83,18 +87,27 @@ struct uv_gam_range_entry {
 	u32	limit;		/* PA bits 56:26 (UV_GAM_RANGE_SHFT) */
 };
 
+#define	UV_AT_SIZE	8	/* 7 character arch type + NULL char */
+struct uv_arch_type_entry {
+	char	archtype[UV_AT_SIZE];
+};
+
 #define	UV_SYSTAB_SIG			"UVST"
-#define	UV_SYSTAB_VERSION_1		1	/* UV1/2/3 BIOS version */
+#define	UV_SYSTAB_VERSION_1		1	/* UV2/3 BIOS version */
 #define	UV_SYSTAB_VERSION_UV4		0x400	/* UV4 BIOS base version */
 #define	UV_SYSTAB_VERSION_UV4_1		0x401	/* + gpa_shift */
 #define	UV_SYSTAB_VERSION_UV4_2		0x402	/* + TYPE_NVRAM/WINDOW/MBOX */
 #define	UV_SYSTAB_VERSION_UV4_3		0x403	/* - GAM Range PXM Value */
 #define	UV_SYSTAB_VERSION_UV4_LATEST	UV_SYSTAB_VERSION_UV4_3
 
+#define	UV_SYSTAB_VERSION_UV5		0x500	/* UV5 GAM base version */
+#define	UV_SYSTAB_VERSION_UV5_LATEST	UV_SYSTAB_VERSION_UV5
+
 #define	UV_SYSTAB_TYPE_UNUSED		0	/* End of table (offset == 0) */
 #define	UV_SYSTAB_TYPE_GAM_PARAMS	1	/* GAM PARAM conversions */
 #define	UV_SYSTAB_TYPE_GAM_RNG_TBL	2	/* GAM entry table */
-#define	UV_SYSTAB_TYPE_MAX		3
+#define	UV_SYSTAB_TYPE_ARCH_TYPE	3	/* UV arch type */
+#define	UV_SYSTAB_TYPE_MAX		4
 
 /*
  * The UV system table describes specific firmware
@@ -103,7 +116,8 @@ struct uv_gam_range_entry {
 struct uv_systab {
 	char signature[4];	/* must be UV_SYSTAB_SIG */
 	u32 revision;		/* distinguish different firmware revs */
-	u64 function;		/* BIOS runtime callback function ptr */
+	u64 (__efiapi *function)(enum uv_bios_cmd, ...);
+				/* BIOS runtime callback function ptr */
 	u32 size;		/* systab size (starting with _VERSION_UV4) */
 	struct {
 		u32 type:8;	/* type of entry */
@@ -111,6 +125,32 @@ struct uv_systab {
 	} entry[1];		/* additional entries follow */
 };
 extern struct uv_systab *uv_systab;
+
+#define UV_BIOS_MAXSTRING	      128
+struct uv_bios_hub_info {
+	unsigned int id;
+	union {
+		struct {
+			unsigned long long this_part:1;
+			unsigned long long is_shared:1;
+			unsigned long long is_disabled:1;
+		} fields;
+		struct {
+			unsigned long long flags;
+			unsigned long long reserved;
+		} b;
+	} f;
+	char name[UV_BIOS_MAXSTRING];
+	char location[UV_BIOS_MAXSTRING];
+	unsigned int ports;
+};
+
+struct uv_bios_port_info {
+	unsigned int port;
+	unsigned int conn_id;
+	unsigned int conn_port;
+};
+
 /* (... end of definitions from UV BIOS ...) */
 
 enum {
@@ -135,13 +175,6 @@ enum uv_memprotect {
 	UV_MEMPROT_ALLOW_RW
 };
 
-/*
- * bios calls have 6 parameters
- */
-extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64);
-extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64);
-extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64);
-
 extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *, long *);
 extern s64 uv_bios_freq_base(u64, u64 *);
 extern int uv_bios_mq_watchlist_alloc(unsigned long, unsigned int,
@@ -151,11 +184,17 @@ extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect);
 extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *);
 extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus);
 
-#ifdef CONFIG_EFI
-extern void uv_bios_init(void);
-#else
-void uv_bios_init(void) { }
-#endif
+extern s64 uv_bios_get_master_nasid(u64 sz, u64 *nasid);
+extern s64 uv_bios_get_heapsize(u64 nasid, u64 sz, u64 *heap_sz);
+extern s64 uv_bios_install_heap(u64 nasid, u64 sz, u64 *heap);
+extern s64 uv_bios_obj_count(u64 nasid, u64 sz, u64 *objcnt);
+extern s64 uv_bios_enum_objs(u64 nasid, u64 sz, u64 *objbuf);
+extern s64 uv_bios_enum_ports(u64 nasid, u64 obj_id, u64 sz, u64 *portbuf);
+extern s64 uv_bios_get_geoinfo(u64 nasid, u64 sz, u64 *geo);
+extern s64 uv_bios_get_pci_topology(u64 sz, u64 *buf);
+
+extern int uv_bios_init(void);
+extern unsigned long get_uv_systab_phys(bool msg);
 
 extern unsigned long sn_rtc_cycles_per_second;
 extern int uv_type;
@@ -163,8 +202,14 @@ extern long sn_partition_id;
 extern long sn_coherency_id;
 extern long sn_region_size;
 extern long system_serial_number;
-#define uv_partition_coherence_id()	(sn_coherency_id)
+extern ssize_t uv_get_archtype(char *buf, int len);
+extern int uv_get_hubless_system(void);
 
 extern struct kobject *sgi_uv_kobj;	/* /sys/firmware/sgi_uv */
 
+/*
+ * EFI runtime lock; cf. firmware/efi/runtime-wrappers.c for details
+ */
+extern struct semaphore __efi_uv_runtime_lock;
+
 #endif /* _ASM_X86_UV_BIOS_H */
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index e60c45fd3679..648eb23fe7f0 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -2,41 +2,42 @@
 #ifndef _ASM_X86_UV_UV_H
 #define _ASM_X86_UV_UV_H
 
-#include <asm/tlbflush.h>
-
-enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
-
-struct cpumask;
-struct mm_struct;
+enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC};
 
 #ifdef CONFIG_X86_UV
 #include <linux/efi.h>
 
+#define	UV_PROC_NODE	"sgi_uv"
+
+static inline int uv(int uvtype)
+{
+	/* uv(0) is "any" */
+	if (uvtype >= 0 && uvtype <= 30)
+		return 1 << uvtype;
+	return 1;
+}
+
+extern unsigned long uv_systab_phys;
+
 extern enum uv_system_type get_uv_system_type(void);
 static inline bool is_early_uv_system(void)
 {
-	return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab);
+	return uv_systab_phys && uv_systab_phys != EFI_INVALID_TABLE_ADDR;
 }
 extern int is_uv_system(void);
-extern int is_uv_hubless(void);
+extern int is_uv_hubbed(int uvtype);
 extern void uv_cpu_init(void);
 extern void uv_nmi_init(void);
 extern void uv_system_init(void);
-extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
-						 const struct flush_tlb_info *info);
 
-#else	/* X86_UV */
+#else	/* !X86_UV */
 
 static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
 static inline bool is_early_uv_system(void)	{ return 0; }
 static inline int is_uv_system(void)	{ return 0; }
-static inline int is_uv_hubless(void)	{ return 0; }
+static inline int is_uv_hubbed(int uv)	{ return 0; }
 static inline void uv_cpu_init(void)	{ }
 static inline void uv_system_init(void)	{ }
-static inline const struct cpumask *
-uv_flush_tlb_others(const struct cpumask *cpumask,
-		    const struct flush_tlb_info *info)
-{ return cpumask; }
 
 #endif	/* X86_UV */
 
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
deleted file mode 100644
index 7803114aa140..000000000000
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ /dev/null
@@ -1,861 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * SGI UV Broadcast Assist Unit definitions
- *
- * Copyright (C) 2008-2011 Silicon Graphics, Inc. All rights reserved.
- */
-
-#ifndef _ASM_X86_UV_UV_BAU_H
-#define _ASM_X86_UV_UV_BAU_H
-
-#include <linux/bitmap.h>
-#define BITSPERBYTE 8
-
-/*
- * Broadcast Assist Unit messaging structures
- *
- * Selective Broadcast activations are induced by software action
- * specifying a particular 8-descriptor "set" via a 6-bit index written
- * to an MMR.
- * Thus there are 64 unique 512-byte sets of SB descriptors - one set for
- * each 6-bit index value. These descriptor sets are mapped in sequence
- * starting with set 0 located at the address specified in the
- * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512,
- * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on.
- *
- * We will use one set for sending BAU messages from each of the
- * cpu's on the uvhub.
- *
- * TLB shootdown will use the first of the 8 descriptors of each set.
- * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set).
- */
-
-#define MAX_CPUS_PER_UVHUB		128
-#define MAX_CPUS_PER_SOCKET		64
-#define ADP_SZ				64 /* hardware-provided max. */
-#define UV_CPUS_PER_AS			32 /* hardware-provided max. */
-#define ITEMS_PER_DESC			8
-/* the 'throttle' to prevent the hardware stay-busy bug */
-#define MAX_BAU_CONCURRENT		3
-#define UV_ACT_STATUS_MASK		0x3
-#define UV_ACT_STATUS_SIZE		2
-#define UV_DISTRIBUTION_SIZE		256
-#define UV_SW_ACK_NPENDING		8
-#define UV1_NET_ENDPOINT_INTD		0x38
-#define UV2_NET_ENDPOINT_INTD		0x28
-#define UV_NET_ENDPOINT_INTD		(is_uv1_hub() ?			\
-			UV1_NET_ENDPOINT_INTD : UV2_NET_ENDPOINT_INTD)
-#define UV_PAYLOADQ_GNODE_SHIFT		49
-#define UV_PTC_BASENAME			"sgi_uv/ptc_statistics"
-#define UV_BAU_BASENAME			"sgi_uv/bau_tunables"
-#define UV_BAU_TUNABLES_DIR		"sgi_uv"
-#define UV_BAU_TUNABLES_FILE		"bau_tunables"
-#define WHITESPACE			" \t\n"
-#define cpubit_isset(cpu, bau_local_cpumask) \
-	test_bit((cpu), (bau_local_cpumask).bits)
-
-/* [19:16] SOFT_ACK timeout period  19: 1 is urgency 7  17:16 1 is multiplier */
-/*
- * UV2: Bit 19 selects between
- *  (0): 10 microsecond timebase and
- *  (1): 80 microseconds
- *  we're using 560us, similar to UV1: 65 units of 10us
- */
-#define UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD (9UL)
-#define UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD (15UL)
-
-#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD	(is_uv1_hub() ?			\
-		UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD :			\
-		UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD)
-/* assuming UV3 is the same */
-
-#define BAU_MISC_CONTROL_MULT_MASK	3
-
-#define UVH_AGING_PRESCALE_SEL		0x000000b000UL
-/* [30:28] URGENCY_7  an index into a table of times */
-#define BAU_URGENCY_7_SHIFT		28
-#define BAU_URGENCY_7_MASK		7
-
-#define UVH_TRANSACTION_TIMEOUT		0x000000b200UL
-/* [45:40] BAU - BAU transaction timeout select - a multiplier */
-#define BAU_TRANS_SHIFT			40
-#define BAU_TRANS_MASK			0x3f
-
-/*
- * shorten some awkward names
- */
-#define AS_PUSH_SHIFT UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT
-#define SOFTACK_MSHIFT UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT
-#define SOFTACK_PSHIFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT
-#define SOFTACK_TIMEOUT_PERIOD UV_INTD_SOFT_ACK_TIMEOUT_PERIOD
-#define PREFETCH_HINT_SHFT UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_SHFT
-#define SB_STATUS_SHFT UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT
-#define write_gmmr	uv_write_global_mmr64
-#define write_lmmr	uv_write_local_mmr
-#define read_lmmr	uv_read_local_mmr
-#define read_gmmr	uv_read_global_mmr64
-
-/*
- * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1
- */
-#define DS_IDLE				0
-#define DS_ACTIVE			1
-#define DS_DESTINATION_TIMEOUT		2
-#define DS_SOURCE_TIMEOUT		3
-/*
- * bits put together from HRP_LB_BAU_SB_ACTIVATION_STATUS_0/1/2
- * values 1 and 3 will not occur
- *        Decoded meaning              ERROR  BUSY    AUX ERR
- * -------------------------------     ----   -----   -------
- * IDLE                                 0       0        0
- * BUSY (active)                        0       1        0
- * SW Ack Timeout (destination)         1       0        0
- * SW Ack INTD rejected (strong NACK)   1       0        1
- * Source Side Time Out Detected        1       1        0
- * Destination Side PUT Failed          1       1        1
- */
-#define UV2H_DESC_IDLE			0
-#define UV2H_DESC_BUSY			2
-#define UV2H_DESC_DEST_TIMEOUT		4
-#define UV2H_DESC_DEST_STRONG_NACK	5
-#define UV2H_DESC_SOURCE_TIMEOUT	6
-#define UV2H_DESC_DEST_PUT_ERR		7
-
-/*
- * delay for 'plugged' timeout retries, in microseconds
- */
-#define PLUGGED_DELAY			10
-
-/*
- * threshholds at which to use IPI to free resources
- */
-/* after this # consecutive 'plugged' timeouts, use IPI to release resources */
-#define PLUGSB4RESET			100
-/* after this many consecutive timeouts, use IPI to release resources */
-#define TIMEOUTSB4RESET			1
-/* at this number uses of IPI to release resources, giveup the request */
-#define IPI_RESET_LIMIT			1
-/* after this # consecutive successes, bump up the throttle if it was lowered */
-#define COMPLETE_THRESHOLD		5
-/* after this # of giveups (fall back to kernel IPI's) disable the use of
-   the BAU for a period of time */
-#define GIVEUP_LIMIT			100
-
-#define UV_LB_SUBNODEID			0x10
-
-/* these two are the same for UV1 and UV2: */
-#define UV_SA_SHFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT
-#define UV_SA_MASK UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK
-/* 4 bits of software ack period */
-#define UV2_ACK_MASK			0x7UL
-#define UV2_ACK_UNITS_SHFT		3
-#define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT
-
-/*
- * number of entries in the destination side payload queue
- */
-#define DEST_Q_SIZE			20
-/*
- * number of destination side software ack resources
- */
-#define DEST_NUM_RESOURCES		8
-/*
- * completion statuses for sending a TLB flush message
- */
-#define FLUSH_RETRY_PLUGGED		1
-#define FLUSH_RETRY_TIMEOUT		2
-#define FLUSH_GIVEUP			3
-#define FLUSH_COMPLETE			4
-
-/*
- * tuning the action when the numalink network is extremely delayed
- */
-#define CONGESTED_RESPONSE_US		1000	/* 'long' response time, in
-						   microseconds */
-#define CONGESTED_REPS			10	/* long delays averaged over
-						   this many broadcasts */
-#define DISABLED_PERIOD			10	/* time for the bau to be
-						   disabled, in seconds */
-/* see msg_type: */
-#define MSG_NOOP			0
-#define MSG_REGULAR			1
-#define MSG_RETRY			2
-
-#define BAU_DESC_QUALIFIER		0x534749
-
-enum uv_bau_version {
-	UV_BAU_V1 = 1,
-	UV_BAU_V2,
-	UV_BAU_V3,
-	UV_BAU_V4,
-};
-
-/*
- * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor)
- * If the 'multilevel' flag in the header portion of the descriptor
- * has been set to 0, then endpoint multi-unicast mode is selected.
- * The distribution specification (32 bytes) is interpreted as a 256-bit
- * distribution vector. Adjacent bits correspond to consecutive even numbered
- * nodeIDs. The result of adding the index of a given bit to the 15-bit
- * 'base_dest_nasid' field of the header corresponds to the
- * destination nodeID associated with that specified bit.
- */
-struct pnmask {
-	unsigned long		bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)];
-};
-
-/*
- * mask of cpu's on a uvhub
- * (during initialization we need to check that unsigned long has
- *  enough bits for max. cpu's per uvhub)
- */
-struct bau_local_cpumask {
-	unsigned long		bits;
-};
-
-/*
- * Payload: 16 bytes (128 bits) (bytes 0x20-0x2f of descriptor)
- * only 12 bytes (96 bits) of the payload area are usable.
- * An additional 3 bytes (bits 27:4) of the header address are carried
- * to the next bytes of the destination payload queue.
- * And an additional 2 bytes of the header Suppl_A field are also
- * carried to the destination payload queue.
- * But the first byte of the Suppl_A becomes bits 127:120 (the 16th byte)
- * of the destination payload queue, which is written by the hardware
- * with the s/w ack resource bit vector.
- * [ effective message contents (16 bytes (128 bits) maximum), not counting
- *   the s/w ack bit vector  ]
- */
-
-/**
- * struct uv1_2_3_bau_msg_payload - defines payload for INTD transactions
- * @address:		Signifies a page or all TLB's of the cpu
- * @sending_cpu:	CPU from which the message originates
- * @acknowledge_count:	CPUs on the destination Hub that received the interrupt
- */
-struct uv1_2_3_bau_msg_payload {
-	u64 address;
-	u16 sending_cpu;
-	u16 acknowledge_count;
-};
-
-/**
- * struct uv4_bau_msg_payload - defines payload for INTD transactions
- * @address:		Signifies a page or all TLB's of the cpu
- * @sending_cpu:	CPU from which the message originates
- * @acknowledge_count:	CPUs on the destination Hub that received the interrupt
- * @qualifier:		Set by source to verify origin of INTD broadcast
- */
-struct uv4_bau_msg_payload {
-	u64 address;
-	u16 sending_cpu;
-	u16 acknowledge_count;
-	u32 reserved:8;
-	u32 qualifier:24;
-};
-
-/*
- * UV1 Message header:  16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
- * see table 4.2.3.0.1 in broacast_assist spec.
- */
-struct uv1_bau_msg_header {
-	unsigned int	dest_subnodeid:6;	/* must be 0x10, for the LB */
-	/* bits 5:0 */
-	unsigned int	base_dest_nasid:15;	/* nasid of the first bit */
-	/* bits 20:6 */				/* in uvhub map */
-	unsigned int	command:8;		/* message type */
-	/* bits 28:21 */
-	/* 0x38: SN3net EndPoint Message */
-	unsigned int	rsvd_1:3;		/* must be zero */
-	/* bits 31:29 */
-	/* int will align on 32 bits */
-	unsigned int	rsvd_2:9;		/* must be zero */
-	/* bits 40:32 */
-	/* Suppl_A is 56-41 */
-	unsigned int	sequence:16;		/* message sequence number */
-	/* bits 56:41 */			/* becomes bytes 16-17 of msg */
-						/* Address field (96:57) is
-						   never used as an address
-						   (these are address bits
-						   42:3) */
-
-	unsigned int	rsvd_3:1;		/* must be zero */
-	/* bit 57 */
-	/* address bits 27:4 are payload */
-	/* these next 24  (58-81) bits become bytes 12-14 of msg */
-	/* bits 65:58 land in byte 12 */
-	unsigned int	replied_to:1;		/* sent as 0 by the source to
-						   byte 12 */
-	/* bit 58 */
-	unsigned int	msg_type:3;		/* software type of the
-						   message */
-	/* bits 61:59 */
-	unsigned int	canceled:1;		/* message canceled, resource
-						   is to be freed*/
-	/* bit 62 */
-	unsigned int	payload_1a:1;		/* not currently used */
-	/* bit 63 */
-	unsigned int	payload_1b:2;		/* not currently used */
-	/* bits 65:64 */
-
-	/* bits 73:66 land in byte 13 */
-	unsigned int	payload_1ca:6;		/* not currently used */
-	/* bits 71:66 */
-	unsigned int	payload_1c:2;		/* not currently used */
-	/* bits 73:72 */
-
-	/* bits 81:74 land in byte 14 */
-	unsigned int	payload_1d:6;		/* not currently used */
-	/* bits 79:74 */
-	unsigned int	payload_1e:2;		/* not currently used */
-	/* bits 81:80 */
-
-	unsigned int	rsvd_4:7;		/* must be zero */
-	/* bits 88:82 */
-	unsigned int	swack_flag:1;		/* software acknowledge flag */
-	/* bit 89 */
-						/* INTD trasactions at
-						   destination are to wait for
-						   software acknowledge */
-	unsigned int	rsvd_5:6;		/* must be zero */
-	/* bits 95:90 */
-	unsigned int	rsvd_6:5;		/* must be zero */
-	/* bits 100:96 */
-	unsigned int	int_both:1;		/* if 1, interrupt both sockets
-						   on the uvhub */
-	/* bit 101*/
-	unsigned int	fairness:3;		/* usually zero */
-	/* bits 104:102 */
-	unsigned int	multilevel:1;		/* multi-level multicast
-						   format */
-	/* bit 105 */
-	/* 0 for TLB: endpoint multi-unicast messages */
-	unsigned int	chaining:1;		/* next descriptor is part of
-						   this activation*/
-	/* bit 106 */
-	unsigned int	rsvd_7:21;		/* must be zero */
-	/* bits 127:107 */
-};
-
-/*
- * UV2 Message header:  16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
- * see figure 9-2 of harp_sys.pdf
- * assuming UV3 is the same
- */
-struct uv2_3_bau_msg_header {
-	unsigned int	base_dest_nasid:15;	/* nasid of the first bit */
-	/* bits 14:0 */				/* in uvhub map */
-	unsigned int	dest_subnodeid:5;	/* must be 0x10, for the LB */
-	/* bits 19:15 */
-	unsigned int	rsvd_1:1;		/* must be zero */
-	/* bit 20 */
-	/* Address bits 59:21 */
-	/* bits 25:2 of address (44:21) are payload */
-	/* these next 24 bits become bytes 12-14 of msg */
-	/* bits 28:21 land in byte 12 */
-	unsigned int	replied_to:1;		/* sent as 0 by the source to
-						   byte 12 */
-	/* bit 21 */
-	unsigned int	msg_type:3;		/* software type of the
-						   message */
-	/* bits 24:22 */
-	unsigned int	canceled:1;		/* message canceled, resource
-						   is to be freed*/
-	/* bit 25 */
-	unsigned int	payload_1:3;		/* not currently used */
-	/* bits 28:26 */
-
-	/* bits 36:29 land in byte 13 */
-	unsigned int	payload_2a:3;		/* not currently used */
-	unsigned int	payload_2b:5;		/* not currently used */
-	/* bits 36:29 */
-
-	/* bits 44:37 land in byte 14 */
-	unsigned int	payload_3:8;		/* not currently used */
-	/* bits 44:37 */
-
-	unsigned int	rsvd_2:7;		/* reserved */
-	/* bits 51:45 */
-	unsigned int	swack_flag:1;		/* software acknowledge flag */
-	/* bit 52 */
-	unsigned int	rsvd_3a:3;		/* must be zero */
-	unsigned int	rsvd_3b:8;		/* must be zero */
-	unsigned int	rsvd_3c:8;		/* must be zero */
-	unsigned int	rsvd_3d:3;		/* must be zero */
-	/* bits 74:53 */
-	unsigned int	fairness:3;		/* usually zero */
-	/* bits 77:75 */
-
-	unsigned int	sequence:16;		/* message sequence number */
-	/* bits 93:78  Suppl_A  */
-	unsigned int	chaining:1;		/* next descriptor is part of
-						   this activation*/
-	/* bit 94 */
-	unsigned int	multilevel:1;		/* multi-level multicast
-						   format */
-	/* bit 95 */
-	unsigned int	rsvd_4:24;		/* ordered / source node /
-						   source subnode / aging
-						   must be zero */
-	/* bits 119:96 */
-	unsigned int	command:8;		/* message type */
-	/* bits 127:120 */
-};
-
-/*
- * The activation descriptor:
- * The format of the message to send, plus all accompanying control
- * Should be 64 bytes
- */
-struct bau_desc {
-	struct pnmask				distribution;
-	/*
-	 * message template, consisting of header and payload:
-	 */
-	union bau_msg_header {
-		struct uv1_bau_msg_header	uv1_hdr;
-		struct uv2_3_bau_msg_header	uv2_3_hdr;
-	} header;
-
-	union bau_payload_header {
-		struct uv1_2_3_bau_msg_payload	uv1_2_3;
-		struct uv4_bau_msg_payload	uv4;
-	} payload;
-};
-/* UV1:
- *   -payload--    ---------header------
- *   bytes 0-11    bits 41-56  bits 58-81
- *       A           B  (2)      C (3)
- *
- *            A/B/C are moved to:
- *       A            C          B
- *   bytes 0-11  bytes 12-14  bytes 16-17  (byte 15 filled in by hw as vector)
- *   ------------payload queue-----------
- */
-/* UV2:
- *   -payload--    ---------header------
- *   bytes 0-11    bits 70-78  bits 21-44
- *       A           B  (2)      C (3)
- *
- *            A/B/C are moved to:
- *       A            C          B
- *   bytes 0-11  bytes 12-14  bytes 16-17  (byte 15 filled in by hw as vector)
- *   ------------payload queue-----------
- */
-
-/*
- * The payload queue on the destination side is an array of these.
- * With BAU_MISC_CONTROL set for software acknowledge mode, the messages
- * are 32 bytes (2 micropackets) (256 bits) in length, but contain only 17
- * bytes of usable data, including the sw ack vector in byte 15 (bits 127:120)
- * (12 bytes come from bau_msg_payload, 3 from payload_1, 2 from
- *  swack_vec and payload_2)
- * "Enabling Software Acknowledgment mode (see Section 4.3.3 Software
- *  Acknowledge Processing) also selects 32 byte (17 bytes usable) payload
- *  operation."
- */
-struct bau_pq_entry {
-	unsigned long	address;	/* signifies a page or all TLB's
-					   of the cpu */
-	/* 64 bits, bytes 0-7 */
-	unsigned short	sending_cpu;	/* cpu that sent the message */
-	/* 16 bits, bytes 8-9 */
-	unsigned short	acknowledge_count; /* filled in by destination */
-	/* 16 bits, bytes 10-11 */
-	/* these next 3 bytes come from bits 58-81 of the message header */
-	unsigned short	replied_to:1;	/* sent as 0 by the source */
-	unsigned short	msg_type:3;	/* software message type */
-	unsigned short	canceled:1;	/* sent as 0 by the source */
-	unsigned short	unused1:3;	/* not currently using */
-	/* byte 12 */
-	unsigned char	unused2a;	/* not currently using */
-	/* byte 13 */
-	unsigned char	unused2;	/* not currently using */
-	/* byte 14 */
-	unsigned char	swack_vec;	/* filled in by the hardware */
-	/* byte 15 (bits 127:120) */
-	unsigned short	sequence;	/* message sequence number */
-	/* bytes 16-17 */
-	unsigned char	unused4[2];	/* not currently using bytes 18-19 */
-	/* bytes 18-19 */
-	int		number_of_cpus;	/* filled in at destination */
-	/* 32 bits, bytes 20-23 (aligned) */
-	unsigned char	unused5[8];	/* not using */
-	/* bytes 24-31 */
-};
-
-struct msg_desc {
-	struct bau_pq_entry	*msg;
-	int			msg_slot;
-	struct bau_pq_entry	*queue_first;
-	struct bau_pq_entry	*queue_last;
-};
-
-struct reset_args {
-	int			sender;
-};
-
-/*
- * This structure is allocated per_cpu for UV TLB shootdown statistics.
- */
-struct ptc_stats {
-	/* sender statistics */
-	unsigned long	s_giveup;		/* number of fall backs to
-						   IPI-style flushes */
-	unsigned long	s_requestor;		/* number of shootdown
-						   requests */
-	unsigned long	s_stimeout;		/* source side timeouts */
-	unsigned long	s_dtimeout;		/* destination side timeouts */
-	unsigned long	s_strongnacks;		/* number of strong nack's */
-	unsigned long	s_time;			/* time spent in sending side */
-	unsigned long	s_retriesok;		/* successful retries */
-	unsigned long	s_ntargcpu;		/* total number of cpu's
-						   targeted */
-	unsigned long	s_ntargself;		/* times the sending cpu was
-						   targeted */
-	unsigned long	s_ntarglocals;		/* targets of cpus on the local
-						   blade */
-	unsigned long	s_ntargremotes;		/* targets of cpus on remote
-						   blades */
-	unsigned long	s_ntarglocaluvhub;	/* targets of the local hub */
-	unsigned long	s_ntargremoteuvhub;	/* remotes hubs targeted */
-	unsigned long	s_ntarguvhub;		/* total number of uvhubs
-						   targeted */
-	unsigned long	s_ntarguvhub16;		/* number of times target
-						   hubs >= 16*/
-	unsigned long	s_ntarguvhub8;		/* number of times target
-						   hubs >= 8 */
-	unsigned long	s_ntarguvhub4;		/* number of times target
-						   hubs >= 4 */
-	unsigned long	s_ntarguvhub2;		/* number of times target
-						   hubs >= 2 */
-	unsigned long	s_ntarguvhub1;		/* number of times target
-						   hubs == 1 */
-	unsigned long	s_resets_plug;		/* ipi-style resets from plug
-						   state */
-	unsigned long	s_resets_timeout;	/* ipi-style resets from
-						   timeouts */
-	unsigned long	s_busy;			/* status stayed busy past
-						   s/w timer */
-	unsigned long	s_throttles;		/* waits in throttle */
-	unsigned long	s_retry_messages;	/* retry broadcasts */
-	unsigned long	s_bau_reenabled;	/* for bau enable/disable */
-	unsigned long	s_bau_disabled;		/* for bau enable/disable */
-	unsigned long	s_uv2_wars;		/* uv2 workaround, perm. busy */
-	unsigned long	s_uv2_wars_hw;		/* uv2 workaround, hiwater */
-	unsigned long	s_uv2_war_waits;	/* uv2 workaround, long waits */
-	unsigned long	s_overipilimit;		/* over the ipi reset limit */
-	unsigned long	s_giveuplimit;		/* disables, over giveup limit*/
-	unsigned long	s_enters;		/* entries to the driver */
-	unsigned long	s_ipifordisabled;	/* fall back to IPI; disabled */
-	unsigned long	s_plugged;		/* plugged by h/w bug*/
-	unsigned long	s_congested;		/* giveup on long wait */
-	/* destination statistics */
-	unsigned long	d_alltlb;		/* times all tlb's on this
-						   cpu were flushed */
-	unsigned long	d_onetlb;		/* times just one tlb on this
-						   cpu was flushed */
-	unsigned long	d_multmsg;		/* interrupts with multiple
-						   messages */
-	unsigned long	d_nomsg;		/* interrupts with no message */
-	unsigned long	d_time;			/* time spent on destination
-						   side */
-	unsigned long	d_requestee;		/* number of messages
-						   processed */
-	unsigned long	d_retries;		/* number of retry messages
-						   processed */
-	unsigned long	d_canceled;		/* number of messages canceled
-						   by retries */
-	unsigned long	d_nocanceled;		/* retries that found nothing
-						   to cancel */
-	unsigned long	d_resets;		/* number of ipi-style requests
-						   processed */
-	unsigned long	d_rcanceled;		/* number of messages canceled
-						   by resets */
-};
-
-struct tunables {
-	int			*tunp;
-	int			deflt;
-};
-
-struct hub_and_pnode {
-	short			uvhub;
-	short			pnode;
-};
-
-struct socket_desc {
-	short			num_cpus;
-	short			cpu_number[MAX_CPUS_PER_SOCKET];
-};
-
-struct uvhub_desc {
-	unsigned short		socket_mask;
-	short			num_cpus;
-	short			uvhub;
-	short			pnode;
-	struct socket_desc	socket[2];
-};
-
-/**
- * struct bau_control
- * @status_mmr: location of status mmr, determined by uvhub_cpu
- * @status_index: index of ERR|BUSY bits in status mmr, determined by uvhub_cpu
- *
- * Per-cpu control struct containing CPU topology information and BAU tuneables.
- */
-struct bau_control {
-	struct bau_desc		*descriptor_base;
-	struct bau_pq_entry	*queue_first;
-	struct bau_pq_entry	*queue_last;
-	struct bau_pq_entry	*bau_msg_head;
-	struct bau_control	*uvhub_master;
-	struct bau_control	*socket_master;
-	struct ptc_stats	*statp;
-	cpumask_t		*cpumask;
-	unsigned long		timeout_interval;
-	unsigned long		set_bau_on_time;
-	atomic_t		active_descriptor_count;
-	int			plugged_tries;
-	int			timeout_tries;
-	int			ipi_attempts;
-	int			conseccompletes;
-	u64			status_mmr;
-	int			status_index;
-	bool			nobau;
-	short			baudisabled;
-	short			cpu;
-	short			osnode;
-	short			uvhub_cpu;
-	short			uvhub;
-	short			uvhub_version;
-	short			cpus_in_socket;
-	short			cpus_in_uvhub;
-	short			partition_base_pnode;
-	short			busy;       /* all were busy (war) */
-	unsigned short		message_number;
-	unsigned short		uvhub_quiesce;
-	short			socket_acknowledge_count[DEST_Q_SIZE];
-	cycles_t		send_message;
-	cycles_t		period_end;
-	cycles_t		period_time;
-	spinlock_t		uvhub_lock;
-	spinlock_t		queue_lock;
-	spinlock_t		disable_lock;
-	/* tunables */
-	int			max_concurr;
-	int			max_concurr_const;
-	int			plugged_delay;
-	int			plugsb4reset;
-	int			timeoutsb4reset;
-	int			ipi_reset_limit;
-	int			complete_threshold;
-	int			cong_response_us;
-	int			cong_reps;
-	cycles_t		disabled_period;
-	int			period_giveups;
-	int			giveup_limit;
-	long			period_requests;
-	struct hub_and_pnode	*thp;
-};
-
-/* Abstracted BAU functions */
-struct bau_operations {
-	unsigned long	(*read_l_sw_ack)(void);
-	unsigned long	(*read_g_sw_ack)(int pnode);
-	unsigned long	(*bau_gpa_to_offset)(unsigned long vaddr);
-	void		(*write_l_sw_ack)(unsigned long mmr);
-	void		(*write_g_sw_ack)(int pnode, unsigned long mmr);
-	void		(*write_payload_first)(int pnode, unsigned long mmr);
-	void		(*write_payload_last)(int pnode, unsigned long mmr);
-	int		(*wait_completion)(struct bau_desc*,
-				struct bau_control*, long try);
-};
-
-static inline void write_mmr_data_broadcast(int pnode, unsigned long mmr_image)
-{
-	write_gmmr(pnode, UVH_BAU_DATA_BROADCAST, mmr_image);
-}
-
-static inline void write_mmr_descriptor_base(int pnode, unsigned long mmr_image)
-{
-	write_gmmr(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, mmr_image);
-}
-
-static inline void write_mmr_activation(unsigned long index)
-{
-	write_lmmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
-}
-
-static inline void write_gmmr_activation(int pnode, unsigned long mmr_image)
-{
-	write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, mmr_image);
-}
-
-static inline void write_mmr_proc_payload_first(int pnode, unsigned long mmr_image)
-{
-	write_gmmr(pnode, UV4H_LB_PROC_INTD_QUEUE_FIRST, mmr_image);
-}
-
-static inline void write_mmr_proc_payload_last(int pnode, unsigned long mmr_image)
-{
-	write_gmmr(pnode, UV4H_LB_PROC_INTD_QUEUE_LAST, mmr_image);
-}
-
-static inline void write_mmr_payload_first(int pnode, unsigned long mmr_image)
-{
-	write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, mmr_image);
-}
-
-static inline void write_mmr_payload_tail(int pnode, unsigned long mmr_image)
-{
-	write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, mmr_image);
-}
-
-static inline void write_mmr_payload_last(int pnode, unsigned long mmr_image)
-{
-	write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, mmr_image);
-}
-
-static inline void write_mmr_misc_control(int pnode, unsigned long mmr_image)
-{
-	write_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
-}
-
-static inline unsigned long read_mmr_misc_control(int pnode)
-{
-	return read_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL);
-}
-
-static inline void write_mmr_sw_ack(unsigned long mr)
-{
-	uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr);
-}
-
-static inline void write_gmmr_sw_ack(int pnode, unsigned long mr)
-{
-	write_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr);
-}
-
-static inline unsigned long read_mmr_sw_ack(void)
-{
-	return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
-}
-
-static inline unsigned long read_gmmr_sw_ack(int pnode)
-{
-	return read_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
-}
-
-static inline void write_mmr_proc_sw_ack(unsigned long mr)
-{
-	uv_write_local_mmr(UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR, mr);
-}
-
-static inline void write_gmmr_proc_sw_ack(int pnode, unsigned long mr)
-{
-	write_gmmr(pnode, UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR, mr);
-}
-
-static inline unsigned long read_mmr_proc_sw_ack(void)
-{
-	return read_lmmr(UV4H_LB_PROC_INTD_SOFT_ACK_PENDING);
-}
-
-static inline unsigned long read_gmmr_proc_sw_ack(int pnode)
-{
-	return read_gmmr(pnode, UV4H_LB_PROC_INTD_SOFT_ACK_PENDING);
-}
-
-static inline void write_mmr_data_config(int pnode, unsigned long mr)
-{
-	uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, mr);
-}
-
-static inline int bau_uvhub_isset(int uvhub, struct pnmask *dstp)
-{
-	return constant_test_bit(uvhub, &dstp->bits[0]);
-}
-static inline void bau_uvhub_set(int pnode, struct pnmask *dstp)
-{
-	__set_bit(pnode, &dstp->bits[0]);
-}
-static inline void bau_uvhubs_clear(struct pnmask *dstp,
-				    int nbits)
-{
-	bitmap_zero(&dstp->bits[0], nbits);
-}
-static inline int bau_uvhub_weight(struct pnmask *dstp)
-{
-	return bitmap_weight((unsigned long *)&dstp->bits[0],
-				UV_DISTRIBUTION_SIZE);
-}
-
-static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
-{
-	bitmap_zero(&dstp->bits, nbits);
-}
-
-extern void uv_bau_message_intr1(void);
-#ifdef CONFIG_TRACING
-#define trace_uv_bau_message_intr1 uv_bau_message_intr1
-#endif
-extern void uv_bau_timeout_intr1(void);
-
-struct atomic_short {
-	short counter;
-};
-
-/*
- * atomic_read_short - read a short atomic variable
- * @v: pointer of type atomic_short
- *
- * Atomically reads the value of @v.
- */
-static inline int atomic_read_short(const struct atomic_short *v)
-{
-	return v->counter;
-}
-
-/*
- * atom_asr - add and return a short int
- * @i: short value to add
- * @v: pointer of type atomic_short
- *
- * Atomically adds @i to @v and returns @i + @v
- */
-static inline int atom_asr(short i, struct atomic_short *v)
-{
-	short __i = i;
-	asm volatile(LOCK_PREFIX "xaddw %0, %1"
-			: "+r" (i), "+m" (v->counter)
-			: : "memory");
-	return i + __i;
-}
-
-/*
- * conditionally add 1 to *v, unless *v is >= u
- * return 0 if we cannot add 1 to *v because it is >= u
- * return 1 if we can add 1 to *v because it is < u
- * the add is atomic
- *
- * This is close to atomic_add_unless(), but this allows the 'u' value
- * to be lowered below the current 'v'.  atomic_add_unless can only stop
- * on equal.
- */
-static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
-{
-	spin_lock(lock);
-	if (atomic_read(v) >= u) {
-		spin_unlock(lock);
-		return 0;
-	}
-	atomic_inc(v);
-	spin_unlock(lock);
-	return 1;
-}
-
-#endif /* _ASM_X86_UV_UV_BAU_H */
diff --git a/arch/x86/include/asm/uv/uv_geo.h b/arch/x86/include/asm/uv/uv_geo.h
new file mode 100644
index 000000000000..027a9258dbca
--- /dev/null
+++ b/arch/x86/include/asm/uv/uv_geo.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2020 Hewlett Packard Enterprise Development LP. All rights reserved.
+ */
+
+#ifndef _ASM_UV_GEO_H
+#define _ASM_UV_GEO_H
+
+/* Type declarations */
+
+/* Size of a geoid_s structure (must be before decl. of geoid_u) */
+#define GEOID_SIZE	8
+
+/* Fields common to all substructures */
+struct geo_common_s {
+	unsigned char type;		/* What type of h/w is named by this geoid_s */
+	unsigned char blade;
+	unsigned char slot;		/* slot is IRU */
+	unsigned char upos;
+	unsigned char rack;
+};
+
+/* Additional fields for particular types of hardware */
+struct geo_node_s {
+	struct geo_common_s common;		/* No additional fields needed */
+};
+
+struct geo_rtr_s {
+	struct geo_common_s common;		/* No additional fields needed */
+};
+
+struct geo_iocntl_s {
+	struct geo_common_s common;		/* No additional fields needed */
+};
+
+struct geo_pcicard_s {
+	struct geo_iocntl_s common;
+	char bus;				/* Bus/widget number */
+	char slot;				/* PCI slot number */
+};
+
+/* Subcomponents of a node */
+struct geo_cpu_s {
+	struct geo_node_s node;
+	unsigned char	socket:4,	/* Which CPU on the node */
+			thread:4;
+	unsigned char	core;
+};
+
+struct geo_mem_s {
+	struct geo_node_s node;
+	char membus;			/* The memory bus on the node */
+	char memslot;			/* The memory slot on the bus */
+};
+
+union geoid_u {
+	struct geo_common_s common;
+	struct geo_node_s node;
+	struct geo_iocntl_s iocntl;
+	struct geo_pcicard_s pcicard;
+	struct geo_rtr_s rtr;
+	struct geo_cpu_s cpu;
+	struct geo_mem_s mem;
+	char padsize[GEOID_SIZE];
+};
+
+/* Defined constants */
+
+#define GEO_MAX_LEN	48
+
+#define GEO_TYPE_INVALID	0
+#define GEO_TYPE_MODULE		1
+#define GEO_TYPE_NODE		2
+#define GEO_TYPE_RTR		3
+#define GEO_TYPE_IOCNTL		4
+#define GEO_TYPE_IOCARD		5
+#define GEO_TYPE_CPU		6
+#define GEO_TYPE_MEM		7
+#define GEO_TYPE_MAX		(GEO_TYPE_MEM+1)
+
+static inline int geo_rack(union geoid_u g)
+{
+	return (g.common.type == GEO_TYPE_INVALID) ?
+		-1 : g.common.rack;
+}
+
+static inline int geo_slot(union geoid_u g)
+{
+	return (g.common.type == GEO_TYPE_INVALID) ?
+		-1 : g.common.upos;
+}
+
+static inline int geo_blade(union geoid_u g)
+{
+	return (g.common.type == GEO_TYPE_INVALID) ?
+		-1 : g.common.blade * 2 + g.common.slot;
+}
+
+#endif /* _ASM_UV_GEO_H */
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 44cf6d6deb7a..ea877fd83114 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -5,6 +5,7 @@
  *
  * SGI UV architectural definitions
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (C) 2007-2014 Silicon Graphics, Inc. All rights reserved.
  */
 
@@ -19,6 +20,7 @@
 #include <linux/topology.h>
 #include <asm/types.h>
 #include <asm/percpu.h>
+#include <asm/uv/uv.h>
 #include <asm/uv/uv_mmrs.h>
 #include <asm/uv/bios.h>
 #include <asm/irq_vectors.h>
@@ -128,17 +130,6 @@
  */
 #define UV_MAX_NASID_VALUE	(UV_MAX_NUMALINK_BLADES * 2)
 
-/* System Controller Interface Reg info */
-struct uv_scir_s {
-	struct timer_list timer;
-	unsigned long	offset;
-	unsigned long	last;
-	unsigned long	idle_on;
-	unsigned long	idle_off;
-	unsigned char	state;
-	unsigned char	enabled;
-};
-
 /* GAM (globally addressed memory) range table */
 struct uv_gam_range_s {
 	u32	limit;		/* PA bits 56:26 (GAM_RANGE_SHFT) */
@@ -154,6 +145,8 @@ struct uv_gam_range_s {
  * available in the L3 cache on the cpu socket for the node.
  */
 struct uv_hub_info_s {
+	unsigned int		hub_type;
+	unsigned char		hub_revision;
 	unsigned long		global_mmr_base;
 	unsigned long		global_mmr_shift;
 	unsigned long		gpa_mask;
@@ -166,9 +159,9 @@ struct uv_hub_info_s {
 	unsigned char		m_val;
 	unsigned char		n_val;
 	unsigned char		gr_table_len;
-	unsigned char		hub_revision;
 	unsigned char		apic_pnode_shift;
 	unsigned char		gpa_shift;
+	unsigned char		nasid_shift;
 	unsigned char		m_shift;
 	unsigned char		n_lshift;
 	unsigned int		gnode_extra;
@@ -184,22 +177,20 @@ struct uv_hub_info_s {
 	unsigned short		nr_possible_cpus;
 	unsigned short		nr_online_cpus;
 	short			memory_nid;
+	unsigned short		*node_to_socket;
 };
 
 /* CPU specific info with a pointer to the hub common info struct */
 struct uv_cpu_info_s {
 	void			*p_uv_hub_info;
 	unsigned char		blade_cpu_id;
-	struct uv_scir_s	scir;
+	void			*reserved;
 };
 DECLARE_PER_CPU(struct uv_cpu_info_s, __uv_cpu_info);
 
 #define uv_cpu_info		this_cpu_ptr(&__uv_cpu_info)
 #define uv_cpu_info_per(cpu)	(&per_cpu(__uv_cpu_info, cpu))
 
-#define	uv_scir_info		(&uv_cpu_info->scir)
-#define	uv_cpu_scir_info(cpu)	(&uv_cpu_info_per(cpu)->scir)
-
 /* Node specific hub common info struct */
 extern void **__uv_hub_info_list;
 static inline struct uv_hub_info_s *uv_hub_info_list(int node)
@@ -218,109 +209,51 @@ static inline struct uv_hub_info_s *uv_cpu_hub_info(int cpu)
 	return (struct uv_hub_info_s *)uv_cpu_info_per(cpu)->p_uv_hub_info;
 }
 
-#define	UV_HUB_INFO_VERSION	0x7150
-extern int uv_hub_info_version(void);
-static inline int uv_hub_info_check(int version)
+static inline int uv_hub_type(void)
 {
-	if (uv_hub_info_version() == version)
-		return 0;
-
-	pr_crit("UV: uv_hub_info version(%x) mismatch, expecting(%x)\n",
-		uv_hub_info_version(), version);
+	return uv_hub_info->hub_type;
+}
 
-	BUG();	/* Catastrophic - cannot continue on unknown UV system */
+static inline __init void uv_hub_type_set(int uvmask)
+{
+	uv_hub_info->hub_type = uvmask;
 }
-#define	_uv_hub_info_check()	uv_hub_info_check(UV_HUB_INFO_VERSION)
+
 
 /*
  * HUB revision ranges for each UV HUB architecture.
  * This is a software convention - NOT the hardware revision numbers in
  * the hub chip.
  */
-#define UV1_HUB_REVISION_BASE		1
 #define UV2_HUB_REVISION_BASE		3
 #define UV3_HUB_REVISION_BASE		5
 #define UV4_HUB_REVISION_BASE		7
 #define UV4A_HUB_REVISION_BASE		8	/* UV4 (fixed) rev 2 */
+#define UV5_HUB_REVISION_BASE		9
 
-#ifdef	UV1_HUB_IS_SUPPORTED
-static inline int is_uv1_hub(void)
-{
-	return uv_hub_info->hub_revision < UV2_HUB_REVISION_BASE;
-}
-#else
-static inline int is_uv1_hub(void)
-{
-	return 0;
-}
-#endif
+static inline int is_uv(int uvmask) { return uv_hub_type() & uvmask; }
+static inline int is_uv1_hub(void) { return 0; }
+static inline int is_uv2_hub(void) { return is_uv(UV2); }
+static inline int is_uv3_hub(void) { return is_uv(UV3); }
+static inline int is_uv4a_hub(void) { return is_uv(UV4A); }
+static inline int is_uv4_hub(void) { return is_uv(UV4); }
+static inline int is_uv5_hub(void) { return is_uv(UV5); }
 
-#ifdef	UV2_HUB_IS_SUPPORTED
-static inline int is_uv2_hub(void)
-{
-	return ((uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) &&
-		(uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE));
-}
-#else
-static inline int is_uv2_hub(void)
-{
-	return 0;
-}
-#endif
-
-#ifdef	UV3_HUB_IS_SUPPORTED
-static inline int is_uv3_hub(void)
-{
-	return ((uv_hub_info->hub_revision >= UV3_HUB_REVISION_BASE) &&
-		(uv_hub_info->hub_revision < UV4_HUB_REVISION_BASE));
-}
-#else
-static inline int is_uv3_hub(void)
-{
-	return 0;
-}
-#endif
-
-/* First test "is UV4A", then "is UV4" */
-#ifdef	UV4A_HUB_IS_SUPPORTED
-static inline int is_uv4a_hub(void)
-{
-	return (uv_hub_info->hub_revision >= UV4A_HUB_REVISION_BASE);
-}
-#else
-static inline int is_uv4a_hub(void)
-{
-	return 0;
-}
-#endif
+/*
+ * UV4A is a revision of UV4.  So on UV4A, both is_uv4_hub() and
+ * is_uv4a_hub() return true, While on UV4, only is_uv4_hub()
+ * returns true.  So to get true results, first test if is UV4A,
+ * then test if is UV4.
+ */
 
-#ifdef	UV4_HUB_IS_SUPPORTED
-static inline int is_uv4_hub(void)
-{
-	return uv_hub_info->hub_revision >= UV4_HUB_REVISION_BASE;
-}
-#else
-static inline int is_uv4_hub(void)
-{
-	return 0;
-}
-#endif
+/* UVX class: UV2,3,4 */
+static inline int is_uvx_hub(void) { return is_uv(UVX); }
 
-static inline int is_uvx_hub(void)
-{
-	if (uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE)
-		return uv_hub_info->hub_revision;
+/* UVY class: UV5,..? */
+static inline int is_uvy_hub(void) { return is_uv(UVY); }
 
-	return 0;
-}
-
-static inline int is_uv_hub(void)
-{
-#ifdef	UV1_HUB_IS_SUPPORTED
-	return uv_hub_info->hub_revision;
-#endif
-	return is_uvx_hub();
-}
+/* Any UV Hubbed System */
+static inline int is_uv_hub(void) { return is_uv(UV_ANY); }
 
 union uvh_apicid {
     unsigned long       v;
@@ -342,14 +275,11 @@ union uvh_apicid {
  *		g -  GNODE (full 15-bit global nasid, right shifted 1)
  *		p -  PNODE (local part of nsids, right shifted 1)
  */
-#define UV_NASID_TO_PNODE(n)		(((n) >> 1) & uv_hub_info->pnode_mask)
+#define UV_NASID_TO_PNODE(n)		\
+		(((n) >> uv_hub_info->nasid_shift) & uv_hub_info->pnode_mask)
 #define UV_PNODE_TO_GNODE(p)		((p) |uv_hub_info->gnode_extra)
-#define UV_PNODE_TO_NASID(p)		(UV_PNODE_TO_GNODE(p) << 1)
-
-#define UV1_LOCAL_MMR_BASE		0xf4000000UL
-#define UV1_GLOBAL_MMR32_BASE		0xf8000000UL
-#define UV1_LOCAL_MMR_SIZE		(64UL * 1024 * 1024)
-#define UV1_GLOBAL_MMR32_SIZE		(64UL * 1024 * 1024)
+#define UV_PNODE_TO_NASID(p)		\
+		(UV_PNODE_TO_GNODE(p) << uv_hub_info->nasid_shift)
 
 #define UV2_LOCAL_MMR_BASE		0xfa000000UL
 #define UV2_GLOBAL_MMR32_BASE		0xfc000000UL
@@ -362,33 +292,42 @@ union uvh_apicid {
 #define UV3_GLOBAL_MMR32_SIZE		(32UL * 1024 * 1024)
 
 #define UV4_LOCAL_MMR_BASE		0xfa000000UL
-#define UV4_GLOBAL_MMR32_BASE		0xfc000000UL
+#define UV4_GLOBAL_MMR32_BASE		0
 #define UV4_LOCAL_MMR_SIZE		(32UL * 1024 * 1024)
-#define UV4_GLOBAL_MMR32_SIZE		(16UL * 1024 * 1024)
+#define UV4_GLOBAL_MMR32_SIZE		0
+
+#define UV5_LOCAL_MMR_BASE		0xfa000000UL
+#define UV5_GLOBAL_MMR32_BASE		0
+#define UV5_LOCAL_MMR_SIZE		(32UL * 1024 * 1024)
+#define UV5_GLOBAL_MMR32_SIZE		0
 
 #define UV_LOCAL_MMR_BASE		(				\
-					is_uv1_hub() ? UV1_LOCAL_MMR_BASE : \
-					is_uv2_hub() ? UV2_LOCAL_MMR_BASE : \
-					is_uv3_hub() ? UV3_LOCAL_MMR_BASE : \
-					/*is_uv4_hub*/ UV4_LOCAL_MMR_BASE)
+					is_uv(UV2) ? UV2_LOCAL_MMR_BASE : \
+					is_uv(UV3) ? UV3_LOCAL_MMR_BASE : \
+					is_uv(UV4) ? UV4_LOCAL_MMR_BASE : \
+					is_uv(UV5) ? UV5_LOCAL_MMR_BASE : \
+					0)
 
 #define UV_GLOBAL_MMR32_BASE		(				\
-					is_uv1_hub() ? UV1_GLOBAL_MMR32_BASE : \
-					is_uv2_hub() ? UV2_GLOBAL_MMR32_BASE : \
-					is_uv3_hub() ? UV3_GLOBAL_MMR32_BASE : \
-					/*is_uv4_hub*/ UV4_GLOBAL_MMR32_BASE)
+					is_uv(UV2) ? UV2_GLOBAL_MMR32_BASE : \
+					is_uv(UV3) ? UV3_GLOBAL_MMR32_BASE : \
+					is_uv(UV4) ? UV4_GLOBAL_MMR32_BASE : \
+					is_uv(UV5) ? UV5_GLOBAL_MMR32_BASE : \
+					0)
 
 #define UV_LOCAL_MMR_SIZE		(				\
-					is_uv1_hub() ? UV1_LOCAL_MMR_SIZE : \
-					is_uv2_hub() ? UV2_LOCAL_MMR_SIZE : \
-					is_uv3_hub() ? UV3_LOCAL_MMR_SIZE : \
-					/*is_uv4_hub*/ UV4_LOCAL_MMR_SIZE)
+					is_uv(UV2) ? UV2_LOCAL_MMR_SIZE : \
+					is_uv(UV3) ? UV3_LOCAL_MMR_SIZE : \
+					is_uv(UV4) ? UV4_LOCAL_MMR_SIZE : \
+					is_uv(UV5) ? UV5_LOCAL_MMR_SIZE : \
+					0)
 
 #define UV_GLOBAL_MMR32_SIZE		(				\
-					is_uv1_hub() ? UV1_GLOBAL_MMR32_SIZE : \
-					is_uv2_hub() ? UV2_GLOBAL_MMR32_SIZE : \
-					is_uv3_hub() ? UV3_GLOBAL_MMR32_SIZE : \
-					/*is_uv4_hub*/ UV4_GLOBAL_MMR32_SIZE)
+					is_uv(UV2) ? UV2_GLOBAL_MMR32_SIZE : \
+					is_uv(UV3) ? UV3_GLOBAL_MMR32_SIZE : \
+					is_uv(UV4) ? UV4_GLOBAL_MMR32_SIZE : \
+					is_uv(UV5) ? UV5_GLOBAL_MMR32_SIZE : \
+					0)
 
 #define UV_GLOBAL_MMR64_BASE		(uv_hub_info->global_mmr_base)
 
@@ -406,8 +345,6 @@ union uvh_apicid {
 #define UVH_APICID		0x002D0E00L
 #define UV_APIC_PNODE_SHIFT	6
 
-#define UV_APICID_HIBIT_MASK	0xffff0000
-
 /* Local Bus from cpu's perspective */
 #define LOCAL_BUS_BASE		0x1c00000
 #define LOCAL_BUS_SIZE		(4 * 1024 * 1024)
@@ -417,7 +354,7 @@ union uvh_apicid {
  *
  * Note there are NO leds on a UV system.  This register is only
  * used by the system controller to monitor system-wide operation.
- * There are 64 regs per node.  With Nahelem cpus (2 cores per node,
+ * There are 64 regs per node.  With Nehalem cpus (2 cores per node,
  * 8 cpus per core, 2 threads per cpu) there are 32 cpu threads on
  * a node.
  *
@@ -583,25 +520,30 @@ static inline int uv_socket_to_node(int socket)
 	return _uv_socket_to_node(socket, uv_hub_info->socket_to_node);
 }
 
+static inline int uv_pnode_to_socket(int pnode)
+{
+	unsigned short *p2s = uv_hub_info->pnode_to_socket;
+
+	return p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode;
+}
+
 /* pnode, offset --> socket virtual */
 static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset)
 {
 	unsigned int m_val = uv_hub_info->m_val;
 	unsigned long base;
-	unsigned short sockid, node, *p2s;
+	unsigned short sockid;
 
 	if (m_val)
 		return __va(((unsigned long)pnode << m_val) | offset);
 
-	p2s = uv_hub_info->pnode_to_socket;
-	sockid = p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode;
-	node = uv_socket_to_node(sockid);
+	sockid = uv_pnode_to_socket(pnode);
 
 	/* limit address of previous socket is our base, except node 0 is 0 */
-	if (!node)
+	if (sockid == 0)
 		return __va((unsigned long)offset);
 
-	base = (unsigned long)(uv_hub_info->gr_table[node - 1].limit);
+	base = (unsigned long)(uv_hub_info->gr_table[sockid - 1].limit);
 	return __va(base << UV_GAM_RANGE_SHFT | offset);
 }
 
@@ -614,15 +556,6 @@ static inline int uv_apicid_to_pnode(int apicid)
 	return s2pn ? s2pn[pnode - uv_hub_info->min_socket] : pnode;
 }
 
-/* Convert an apicid to the socket number on the blade */
-static inline int uv_apicid_to_socket(int apicid)
-{
-	if (is_uv1_hub())
-		return (apicid >> (uv_hub_info->apic_pnode_shift - 1)) & 1;
-	else
-		return 0;
-}
-
 /*
  * Access global MMRs using the low memory MMR32 space. This region supports
  * faster MMR access but not all MMRs are accessible in this space.
@@ -713,15 +646,14 @@ static inline int uv_cpu_blade_processor_id(int cpu)
 {
 	return uv_cpu_info_per(cpu)->blade_cpu_id;
 }
-#define _uv_cpu_blade_processor_id 1	/* indicate function available */
 
-/* Blade number to Node number (UV1..UV4 is 1:1) */
+/* Blade number to Node number (UV2..UV4 is 1:1) */
 static inline int uv_blade_to_node(int blade)
 {
-	return blade;
+	return uv_socket_to_node(blade);
 }
 
-/* Blade number of current cpu. Numnbered 0 .. <#blades -1> */
+/* Blade number of current cpu. Numbered 0 .. <#blades -1> */
 static inline int uv_numa_blade_id(void)
 {
 	return uv_hub_info->numa_blade_id;
@@ -729,24 +661,28 @@ static inline int uv_numa_blade_id(void)
 
 /*
  * Convert linux node number to the UV blade number.
- * .. Currently for UV1 thru UV4 the node and the blade are identical.
- * .. If this changes then you MUST check references to this function!
+ * .. Currently for UV2 thru UV4 the node and the blade are identical.
+ * .. UV5 needs conversion when sub-numa clustering is enabled.
  */
 static inline int uv_node_to_blade_id(int nid)
 {
-	return nid;
+	unsigned short *n2s = uv_hub_info->node_to_socket;
+
+	return n2s ? n2s[nid] : nid;
 }
 
-/* Convert a cpu number to the the UV blade number */
+/* Convert a CPU number to the UV blade number */
 static inline int uv_cpu_to_blade_id(int cpu)
 {
-	return uv_node_to_blade_id(cpu_to_node(cpu));
+	return uv_cpu_hub_info(cpu)->numa_blade_id;
 }
 
 /* Convert a blade id to the PNODE of the blade */
 static inline int uv_blade_to_pnode(int bid)
 {
-	return uv_hub_info_list(uv_blade_to_node(bid))->pnode;
+	unsigned short *s2p = uv_hub_info->socket_to_pnode;
+
+	return s2p ? s2p[bid] : bid;
 }
 
 /* Nid of memory node on blade. -1 if no blade-local memory */
@@ -801,7 +737,7 @@ extern void uv_nmi_setup_hubless(void);
 #define UVH_TSC_SYNC_SHIFT_UV2K	16	/* UV2/3k have different bits */
 #define UVH_TSC_SYNC_MASK	3	/* 0011 */
 #define UVH_TSC_SYNC_VALID	3	/* 0011 */
-#define UVH_TSC_SYNC_INVALID	2	/* 0010 */
+#define UVH_TSC_SYNC_UNKNOWN	0	/* 0000 */
 
 /* BMC sets a bit this MMR non-zero before sending an NMI */
 #define UVH_NMI_MMR		UVH_BIOS_KERNEL_MMR
@@ -809,19 +745,6 @@ extern void uv_nmi_setup_hubless(void);
 #define UVH_NMI_MMR_SHIFT	63
 #define UVH_NMI_MMR_TYPE	"SCRATCH5"
 
-/* Newer SMM NMI handler, not present in all systems */
-#define UVH_NMI_MMRX		UVH_EVENT_OCCURRED0
-#define UVH_NMI_MMRX_CLEAR	UVH_EVENT_OCCURRED0_ALIAS
-#define UVH_NMI_MMRX_SHIFT	UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT
-#define UVH_NMI_MMRX_TYPE	"EXTIO_INT0"
-
-/* Non-zero indicates newer SMM NMI handler present */
-#define UVH_NMI_MMRX_SUPPORTED	UVH_EXTIO_INT0_BROADCAST
-
-/* Indicates to BIOS that we want to use the newer SMM NMI handler */
-#define UVH_NMI_MMRX_REQ	UVH_BIOS_KERNEL_MMR_ALIAS_2
-#define UVH_NMI_MMRX_REQ_SHIFT	62
-
 struct uv_hub_nmi_s {
 	raw_spinlock_t	nmi_lock;
 	atomic_t	in_nmi;		/* flag this node in UV NMI IRQ */
@@ -853,51 +776,6 @@ DECLARE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi);
 #define	UV_NMI_STATE_DUMP		2
 #define	UV_NMI_STATE_DUMP_DONE		3
 
-/* Update SCIR state */
-static inline void uv_set_scir_bits(unsigned char value)
-{
-	if (uv_scir_info->state != value) {
-		uv_scir_info->state = value;
-		uv_write_local_mmr8(uv_scir_info->offset, value);
-	}
-}
-
-static inline unsigned long uv_scir_offset(int apicid)
-{
-	return SCIR_LOCAL_MMR_BASE | (apicid & 0x3f);
-}
-
-static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
-{
-	if (uv_cpu_scir_info(cpu)->state != value) {
-		uv_write_global_mmr8(uv_cpu_to_pnode(cpu),
-				uv_cpu_scir_info(cpu)->offset, value);
-		uv_cpu_scir_info(cpu)->state = value;
-	}
-}
-
-extern unsigned int uv_apicid_hibits;
-static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode)
-{
-	apicid |= uv_apicid_hibits;
-	return (1UL << UVH_IPI_INT_SEND_SHFT) |
-			((apicid) << UVH_IPI_INT_APIC_ID_SHFT) |
-			(mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
-			(vector << UVH_IPI_INT_VECTOR_SHFT);
-}
-
-static inline void uv_hub_send_ipi(int pnode, int apicid, int vector)
-{
-	unsigned long val;
-	unsigned long dmode = dest_Fixed;
-
-	if (vector == NMI_VECTOR)
-		dmode = dest_NMI;
-
-	val = uv_hub_ipi_value(apicid, vector, dmode);
-	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
-}
-
 /*
  * Get the minimum revision number of the hub chips within the partition.
  * (See UVx_HUB_REVISION_BASE above for specific values.)
diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h
index d6b17c760622..1876b5edd142 100644
--- a/arch/x86/include/asm/uv/uv_irq.h
+++ b/arch/x86/include/asm/uv/uv_irq.h
@@ -31,7 +31,6 @@ enum {
 	UV_AFFINITY_CPU
 };
 
-extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
 extern int uv_setup_irq(char *, int, int, unsigned long, int);
 extern void uv_teardown_irq(unsigned int);
 
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 62c79e26a59a..bb45812889dd 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -3,8 +3,9 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * SGI UV MMR definitions
+ * HPE UV MMR definitions
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (C) 2007-2016 Silicon Graphics, Inc. All rights reserved.
  */
 
@@ -18,58 +19,43 @@
  * grouped by architecture types.
  *
  * UVH  - definitions common to all UV hub types.
- * UVXH - definitions common to all UV eXtended hub types (currently 2, 3, 4).
- * UV1H - definitions specific to UV type 1 hub.
- * UV2H - definitions specific to UV type 2 hub.
- * UV3H - definitions specific to UV type 3 hub.
+ * UVXH - definitions common to UVX class (2, 3, 4).
+ * UVYH - definitions common to UVY class (5).
+ * UV5H - definitions specific to UV type 5 hub.
+ * UV4AH - definitions specific to UV type 4A hub.
  * UV4H - definitions specific to UV type 4 hub.
- *
- * So in general, MMR addresses and structures are identical on all hubs types.
- * These MMRs are identified as:
- *	#define UVH_xxx		<address>
- *	union uvh_xxx {
- *		unsigned long       v;
- *		struct uvh_int_cmpd_s {
- *		} s;
- *	};
+ * UV3H - definitions specific to UV type 3 hub.
+ * UV2H - definitions specific to UV type 2 hub.
  *
  * If the MMR exists on all hub types but have different addresses,
- * use a conditional operator to define the value at runtime.
- *	#define UV1Hxxx	a
- *	#define UV2Hxxx	b
- *	#define UV3Hxxx	c
- *	#define UV4Hxxx	d
- *	#define UV4AHxxx e
- *	#define UVHxxx	(is_uv1_hub() ? UV1Hxxx :
- *			(is_uv2_hub() ? UV2Hxxx :
- *			(is_uv3_hub() ? UV3Hxxx :
- *			(is_uv4a_hub() ? UV4AHxxx :
- *					UV4Hxxx))
+ * use a conditional operator to define the value at runtime.  Any
+ * that are not defined are blank.
+ *	(UV4A variations only generated if different from uv4)
+ *	#define UVHxxx (
+ *		is_uv(UV5) ? UV5Hxxx value :
+ *		is_uv(UV4A) ? UV4AHxxx value :
+ *		is_uv(UV4) ? UV4Hxxx value :
+ *		is_uv(UV3) ? UV3Hxxx value :
+ *		is_uv(UV2) ? UV2Hxxx value :
+ *		<ucv> or <undef value>)
  *
- * If the MMR exists on all hub types > 1 but have different addresses, the
- * variation using "UVX" as the prefix exists.
- *	#define UV2Hxxx	b
- *	#define UV3Hxxx	c
- *	#define UV4Hxxx	d
- *	#define UV4AHxxx e
- *	#define UVHxxx	(is_uv2_hub() ? UV2Hxxx :
- *			(is_uv3_hub() ? UV3Hxxx :
- *			(is_uv4a_hub() ? UV4AHxxx :
- *					UV4Hxxx))
+ * Class UVX has UVs (2|3|4|4A).
+ * Class UVY has UVs (5).
  *
  *	union uvh_xxx {
  *		unsigned long       v;
  *		struct uvh_xxx_s {	 # Common fields only
  *		} s;
- *		struct uv1h_xxx_s {	 # Full UV1 definition (*)
- *		} s1;
- *		struct uv2h_xxx_s {	 # Full UV2 definition (*)
- *		} s2;
- *		struct uv3h_xxx_s {	 # Full UV3 definition (*)
- *		} s3;
- *		(NOTE: No struct uv4ah_xxx_s members exist)
+ *		struct uv5h_xxx_s {	 # Full UV5 definition (*)
+ *		} s5;
+ *		struct uv4ah_xxx_s {	 # Full UV4A definition (*)
+ *		} s4a;
  *		struct uv4h_xxx_s {	 # Full UV4 definition (*)
  *		} s4;
+ *		struct uv3h_xxx_s {	 # Full UV3 definition (*)
+ *		} s3;
+ *		struct uv2h_xxx_s {	 # Full UV2 definition (*)
+ *		} s2;
  *	};
  *		(* - if present and different than the common struct)
  *
@@ -78,18 +64,22 @@
  * if the contents is the same for all hubs, only the "s" structure is
  * generated.
  *
- * If the MMR exists on ONLY 1 type of hub, no generic definition is
- * generated:
- *	#define UVnH_xxx	<uvn address>
- *	union uvnh_xxx {
- *		unsigned long       v;
- *		struct uvh_int_cmpd_s {
- *		} sn;
- *	};
- *
- * (GEN Flags: mflags_opt= undefs=function UV234=UVXH)
+ * (GEN Flags: undefs=function)
  */
 
+ /* UV bit masks */
+#define	UV2	(1 << 0)
+#define	UV3	(1 << 1)
+#define	UV4	(1 << 2)
+#define	UV4A	(1 << 3)
+#define	UV5	(1 << 4)
+#define	UVX	(UV2|UV3|UV4)
+#define	UVY	(UV5)
+#define	UV_ANY	(~0)
+
+
+
+
 #define UV_MMR_ENABLE		(1UL << 63)
 
 #define UV1_HUB_PART_NUMBER	0x88a5
@@ -98,533 +88,475 @@
 #define UV3_HUB_PART_NUMBER	0x9578
 #define UV3_HUB_PART_NUMBER_X	0x4321
 #define UV4_HUB_PART_NUMBER	0x99a1
-
-/* Compat: Indicate which UV Hubs are supported. */
-#define UV1_HUB_IS_SUPPORTED	1
-#define UV2_HUB_IS_SUPPORTED	1
-#define UV3_HUB_IS_SUPPORTED	1
-#define UV4_HUB_IS_SUPPORTED	1
-#define UV4A_HUB_IS_SUPPORTED	1
+#define UV5_HUB_PART_NUMBER	0xa171
 
 /* Error function to catch undefined references */
 extern unsigned long uv_undefined(char *str);
 
 /* ========================================================================= */
-/*                          UVH_BAU_DATA_BROADCAST                           */
-/* ========================================================================= */
-#define UVH_BAU_DATA_BROADCAST 0x61688UL
-
-#define UV1H_BAU_DATA_BROADCAST_32 0x440
-#define UV2H_BAU_DATA_BROADCAST_32 0x440
-#define UV3H_BAU_DATA_BROADCAST_32 0x440
-#define UV4H_BAU_DATA_BROADCAST_32 0x360
-#define UVH_BAU_DATA_BROADCAST_32 (					\
-	is_uv1_hub() ? UV1H_BAU_DATA_BROADCAST_32 :			\
-	is_uv2_hub() ? UV2H_BAU_DATA_BROADCAST_32 :			\
-	is_uv3_hub() ? UV3H_BAU_DATA_BROADCAST_32 :			\
-	/*is_uv4_hub*/ UV4H_BAU_DATA_BROADCAST_32)
-
-#define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT		0
-#define UVH_BAU_DATA_BROADCAST_ENABLE_MASK		0x0000000000000001UL
-
-
-union uvh_bau_data_broadcast_u {
-	unsigned long	v;
-	struct uvh_bau_data_broadcast_s {
-		unsigned long	enable:1;			/* RW */
-		unsigned long	rsvd_1_63:63;
-	} s;
-};
-
-/* ========================================================================= */
-/*                           UVH_BAU_DATA_CONFIG                             */
-/* ========================================================================= */
-#define UVH_BAU_DATA_CONFIG 0x61680UL
-
-#define UV1H_BAU_DATA_CONFIG_32 0x438
-#define UV2H_BAU_DATA_CONFIG_32 0x438
-#define UV3H_BAU_DATA_CONFIG_32 0x438
-#define UV4H_BAU_DATA_CONFIG_32 0x358
-#define UVH_BAU_DATA_CONFIG_32 (					\
-	is_uv1_hub() ? UV1H_BAU_DATA_CONFIG_32 :			\
-	is_uv2_hub() ? UV2H_BAU_DATA_CONFIG_32 :			\
-	is_uv3_hub() ? UV3H_BAU_DATA_CONFIG_32 :			\
-	/*is_uv4_hub*/ UV4H_BAU_DATA_CONFIG_32)
-
-#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT			0
-#define UVH_BAU_DATA_CONFIG_DM_SHFT			8
-#define UVH_BAU_DATA_CONFIG_DESTMODE_SHFT		11
-#define UVH_BAU_DATA_CONFIG_STATUS_SHFT			12
-#define UVH_BAU_DATA_CONFIG_P_SHFT			13
-#define UVH_BAU_DATA_CONFIG_T_SHFT			15
-#define UVH_BAU_DATA_CONFIG_M_SHFT			16
-#define UVH_BAU_DATA_CONFIG_APIC_ID_SHFT		32
-#define UVH_BAU_DATA_CONFIG_VECTOR_MASK			0x00000000000000ffUL
-#define UVH_BAU_DATA_CONFIG_DM_MASK			0x0000000000000700UL
-#define UVH_BAU_DATA_CONFIG_DESTMODE_MASK		0x0000000000000800UL
-#define UVH_BAU_DATA_CONFIG_STATUS_MASK			0x0000000000001000UL
-#define UVH_BAU_DATA_CONFIG_P_MASK			0x0000000000002000UL
-#define UVH_BAU_DATA_CONFIG_T_MASK			0x0000000000008000UL
-#define UVH_BAU_DATA_CONFIG_M_MASK			0x0000000000010000UL
-#define UVH_BAU_DATA_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
-
-
-union uvh_bau_data_config_u {
-	unsigned long	v;
-	struct uvh_bau_data_config_s {
-		unsigned long	vector_:8;			/* RW */
-		unsigned long	dm:3;				/* RW */
-		unsigned long	destmode:1;			/* RW */
-		unsigned long	status:1;			/* RO */
-		unsigned long	p:1;				/* RO */
-		unsigned long	rsvd_14:1;
-		unsigned long	t:1;				/* RO */
-		unsigned long	m:1;				/* RW */
-		unsigned long	rsvd_17_31:15;
-		unsigned long	apic_id:32;			/* RW */
-	} s;
-};
-
-/* ========================================================================= */
 /*                           UVH_EVENT_OCCURRED0                             */
 /* ========================================================================= */
 #define UVH_EVENT_OCCURRED0 0x70000UL
-#define UVH_EVENT_OCCURRED0_32 0x5e8
 
+/* UVH common defines*/
 #define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT		0
-#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT		11
 #define UVH_EVENT_OCCURRED0_LB_HCERR_MASK		0x0000000000000001UL
-#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK		0x0000000000000800UL
-
-#define UV1H_EVENT_OCCURRED0_GR0_HCERR_SHFT		1
-#define UV1H_EVENT_OCCURRED0_GR1_HCERR_SHFT		2
-#define UV1H_EVENT_OCCURRED0_LH_HCERR_SHFT		3
-#define UV1H_EVENT_OCCURRED0_RH_HCERR_SHFT		4
-#define UV1H_EVENT_OCCURRED0_XN_HCERR_SHFT		5
-#define UV1H_EVENT_OCCURRED0_SI_HCERR_SHFT		6
-#define UV1H_EVENT_OCCURRED0_LB_AOERR0_SHFT		7
-#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_SHFT		8
-#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_SHFT		9
-#define UV1H_EVENT_OCCURRED0_LH_AOERR0_SHFT		10
-#define UV1H_EVENT_OCCURRED0_XN_AOERR0_SHFT		12
-#define UV1H_EVENT_OCCURRED0_SI_AOERR0_SHFT		13
-#define UV1H_EVENT_OCCURRED0_LB_AOERR1_SHFT		14
-#define UV1H_EVENT_OCCURRED0_GR0_AOERR1_SHFT		15
-#define UV1H_EVENT_OCCURRED0_GR1_AOERR1_SHFT		16
-#define UV1H_EVENT_OCCURRED0_LH_AOERR1_SHFT		17
-#define UV1H_EVENT_OCCURRED0_RH_AOERR1_SHFT		18
-#define UV1H_EVENT_OCCURRED0_XN_AOERR1_SHFT		19
-#define UV1H_EVENT_OCCURRED0_SI_AOERR1_SHFT		20
-#define UV1H_EVENT_OCCURRED0_RH_VPI_INT_SHFT		21
-#define UV1H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT	22
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT		23
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT		24
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT		25
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT		26
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT		27
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT		28
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT		29
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT		30
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT		31
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT		32
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT		33
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT		34
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT		35
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT		36
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT		37
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT		38
-#define UV1H_EVENT_OCCURRED0_L1_NMI_INT_SHFT		39
-#define UV1H_EVENT_OCCURRED0_STOP_CLOCK_SHFT		40
-#define UV1H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT		41
-#define UV1H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT		42
-#define UV1H_EVENT_OCCURRED0_LTC_INT_SHFT		43
-#define UV1H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT	44
-#define UV1H_EVENT_OCCURRED0_IPI_INT_SHFT		45
-#define UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT		46
-#define UV1H_EVENT_OCCURRED0_EXTIO_INT1_SHFT		47
-#define UV1H_EVENT_OCCURRED0_EXTIO_INT2_SHFT		48
-#define UV1H_EVENT_OCCURRED0_EXTIO_INT3_SHFT		49
-#define UV1H_EVENT_OCCURRED0_PROFILE_INT_SHFT		50
-#define UV1H_EVENT_OCCURRED0_RTC0_SHFT			51
-#define UV1H_EVENT_OCCURRED0_RTC1_SHFT			52
-#define UV1H_EVENT_OCCURRED0_RTC2_SHFT			53
-#define UV1H_EVENT_OCCURRED0_RTC3_SHFT			54
-#define UV1H_EVENT_OCCURRED0_BAU_DATA_SHFT		55
-#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT	56
-#define UV1H_EVENT_OCCURRED0_GR0_HCERR_MASK		0x0000000000000002UL
-#define UV1H_EVENT_OCCURRED0_GR1_HCERR_MASK		0x0000000000000004UL
-#define UV1H_EVENT_OCCURRED0_LH_HCERR_MASK		0x0000000000000008UL
-#define UV1H_EVENT_OCCURRED0_RH_HCERR_MASK		0x0000000000000010UL
-#define UV1H_EVENT_OCCURRED0_XN_HCERR_MASK		0x0000000000000020UL
-#define UV1H_EVENT_OCCURRED0_SI_HCERR_MASK		0x0000000000000040UL
-#define UV1H_EVENT_OCCURRED0_LB_AOERR0_MASK		0x0000000000000080UL
-#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_MASK		0x0000000000000100UL
-#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_MASK		0x0000000000000200UL
-#define UV1H_EVENT_OCCURRED0_LH_AOERR0_MASK		0x0000000000000400UL
-#define UV1H_EVENT_OCCURRED0_XN_AOERR0_MASK		0x0000000000001000UL
-#define UV1H_EVENT_OCCURRED0_SI_AOERR0_MASK		0x0000000000002000UL
-#define UV1H_EVENT_OCCURRED0_LB_AOERR1_MASK		0x0000000000004000UL
-#define UV1H_EVENT_OCCURRED0_GR0_AOERR1_MASK		0x0000000000008000UL
-#define UV1H_EVENT_OCCURRED0_GR1_AOERR1_MASK		0x0000000000010000UL
-#define UV1H_EVENT_OCCURRED0_LH_AOERR1_MASK		0x0000000000020000UL
-#define UV1H_EVENT_OCCURRED0_RH_AOERR1_MASK		0x0000000000040000UL
-#define UV1H_EVENT_OCCURRED0_XN_AOERR1_MASK		0x0000000000080000UL
-#define UV1H_EVENT_OCCURRED0_SI_AOERR1_MASK		0x0000000000100000UL
-#define UV1H_EVENT_OCCURRED0_RH_VPI_INT_MASK		0x0000000000200000UL
-#define UV1H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK	0x0000000000400000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK		0x0000000000800000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK		0x0000000001000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK		0x0000000002000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK		0x0000000004000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK		0x0000000008000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK		0x0000000010000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK		0x0000000020000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK		0x0000000040000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK		0x0000000080000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK		0x0000000100000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK		0x0000000200000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK		0x0000000400000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK		0x0000000800000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK		0x0000001000000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK		0x0000002000000000UL
-#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK		0x0000004000000000UL
-#define UV1H_EVENT_OCCURRED0_L1_NMI_INT_MASK		0x0000008000000000UL
-#define UV1H_EVENT_OCCURRED0_STOP_CLOCK_MASK		0x0000010000000000UL
-#define UV1H_EVENT_OCCURRED0_ASIC_TO_L1_MASK		0x0000020000000000UL
-#define UV1H_EVENT_OCCURRED0_L1_TO_ASIC_MASK		0x0000040000000000UL
-#define UV1H_EVENT_OCCURRED0_LTC_INT_MASK		0x0000080000000000UL
-#define UV1H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK	0x0000100000000000UL
-#define UV1H_EVENT_OCCURRED0_IPI_INT_MASK		0x0000200000000000UL
-#define UV1H_EVENT_OCCURRED0_EXTIO_INT0_MASK		0x0000400000000000UL
-#define UV1H_EVENT_OCCURRED0_EXTIO_INT1_MASK		0x0000800000000000UL
-#define UV1H_EVENT_OCCURRED0_EXTIO_INT2_MASK		0x0001000000000000UL
-#define UV1H_EVENT_OCCURRED0_EXTIO_INT3_MASK		0x0002000000000000UL
-#define UV1H_EVENT_OCCURRED0_PROFILE_INT_MASK		0x0004000000000000UL
-#define UV1H_EVENT_OCCURRED0_RTC0_MASK			0x0008000000000000UL
-#define UV1H_EVENT_OCCURRED0_RTC1_MASK			0x0010000000000000UL
-#define UV1H_EVENT_OCCURRED0_RTC2_MASK			0x0020000000000000UL
-#define UV1H_EVENT_OCCURRED0_RTC3_MASK			0x0040000000000000UL
-#define UV1H_EVENT_OCCURRED0_BAU_DATA_MASK		0x0080000000000000UL
-#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK	0x0100000000000000UL
 
+/* UVXH common defines */
 #define UVXH_EVENT_OCCURRED0_RH_HCERR_SHFT		2
-#define UVXH_EVENT_OCCURRED0_LH0_HCERR_SHFT		3
-#define UVXH_EVENT_OCCURRED0_LH1_HCERR_SHFT		4
-#define UVXH_EVENT_OCCURRED0_GR0_HCERR_SHFT		5
-#define UVXH_EVENT_OCCURRED0_GR1_HCERR_SHFT		6
-#define UVXH_EVENT_OCCURRED0_NI0_HCERR_SHFT		7
-#define UVXH_EVENT_OCCURRED0_NI1_HCERR_SHFT		8
-#define UVXH_EVENT_OCCURRED0_LB_AOERR0_SHFT		9
-#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_SHFT		12
-#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_SHFT		13
-#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_SHFT		14
-#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_SHFT		15
-#define UVXH_EVENT_OCCURRED0_XB_AOERR0_SHFT		16
 #define UVXH_EVENT_OCCURRED0_RH_HCERR_MASK		0x0000000000000004UL
+#define UVXH_EVENT_OCCURRED0_LH0_HCERR_SHFT		3
 #define UVXH_EVENT_OCCURRED0_LH0_HCERR_MASK		0x0000000000000008UL
+#define UVXH_EVENT_OCCURRED0_LH1_HCERR_SHFT		4
 #define UVXH_EVENT_OCCURRED0_LH1_HCERR_MASK		0x0000000000000010UL
+#define UVXH_EVENT_OCCURRED0_GR0_HCERR_SHFT		5
 #define UVXH_EVENT_OCCURRED0_GR0_HCERR_MASK		0x0000000000000020UL
+#define UVXH_EVENT_OCCURRED0_GR1_HCERR_SHFT		6
 #define UVXH_EVENT_OCCURRED0_GR1_HCERR_MASK		0x0000000000000040UL
+#define UVXH_EVENT_OCCURRED0_NI0_HCERR_SHFT		7
 #define UVXH_EVENT_OCCURRED0_NI0_HCERR_MASK		0x0000000000000080UL
+#define UVXH_EVENT_OCCURRED0_NI1_HCERR_SHFT		8
 #define UVXH_EVENT_OCCURRED0_NI1_HCERR_MASK		0x0000000000000100UL
+#define UVXH_EVENT_OCCURRED0_LB_AOERR0_SHFT		9
 #define UVXH_EVENT_OCCURRED0_LB_AOERR0_MASK		0x0000000000000200UL
+#define UVXH_EVENT_OCCURRED0_RH_AOERR0_SHFT		11
+#define UVXH_EVENT_OCCURRED0_RH_AOERR0_MASK		0x0000000000000800UL
+#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_SHFT		12
 #define UVXH_EVENT_OCCURRED0_LH0_AOERR0_MASK		0x0000000000001000UL
+#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_SHFT		13
 #define UVXH_EVENT_OCCURRED0_LH1_AOERR0_MASK		0x0000000000002000UL
+#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_SHFT		14
 #define UVXH_EVENT_OCCURRED0_GR0_AOERR0_MASK		0x0000000000004000UL
+#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_SHFT		15
 #define UVXH_EVENT_OCCURRED0_GR1_AOERR0_MASK		0x0000000000008000UL
+#define UVXH_EVENT_OCCURRED0_XB_AOERR0_SHFT		16
 #define UVXH_EVENT_OCCURRED0_XB_AOERR0_MASK		0x0000000000010000UL
 
-#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT		1
-#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT		10
-#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT		17
-#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT		18
-#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT		19
-#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT		20
-#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT		21
-#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT		22
-#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT		23
-#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT		24
-#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT		25
-#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT		26
-#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT		27
-#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT		28
-#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT		29
-#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT		30
-#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT	31
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT		32
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT		33
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT		34
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT		35
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT		36
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT		37
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT		38
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT		39
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT		40
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT		41
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT		42
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT		43
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT		44
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT		45
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT		46
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT		47
-#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT		48
-#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT		49
-#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT		50
-#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT		51
-#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT	52
-#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT		53
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT		54
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT		55
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT		56
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT		57
-#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT		58
-#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK		0x0000000000000002UL
-#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK		0x0000000000000400UL
-#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK		0x0000000000020000UL
-#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK		0x0000000000040000UL
-#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK		0x0000000000080000UL
-#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK		0x0000000000100000UL
-#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK		0x0000000000200000UL
-#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK		0x0000000000400000UL
-#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK		0x0000000000800000UL
-#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK		0x0000000001000000UL
-#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK		0x0000000002000000UL
-#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK		0x0000000004000000UL
-#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK		0x0000000008000000UL
-#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK		0x0000000010000000UL
-#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK		0x0000000020000000UL
-#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK		0x0000000040000000UL
-#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK	0x0000000080000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK		0x0000000100000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK		0x0000000200000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK		0x0000000400000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK		0x0000000800000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK		0x0000001000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK		0x0000002000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK		0x0000004000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK		0x0000008000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK		0x0000010000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK		0x0000020000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK		0x0000040000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK		0x0000080000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK		0x0000100000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK		0x0000200000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK		0x0000400000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK		0x0000800000000000UL
-#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK		0x0001000000000000UL
-#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK		0x0002000000000000UL
-#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK		0x0004000000000000UL
-#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK		0x0008000000000000UL
-#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK	0x0010000000000000UL
-#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK		0x0020000000000000UL
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK		0x0040000000000000UL
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK		0x0080000000000000UL
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK		0x0100000000000000UL
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK		0x0200000000000000UL
-#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK		0x0400000000000000UL
-
-#define UV3H_EVENT_OCCURRED0_QP_HCERR_SHFT		1
-#define UV3H_EVENT_OCCURRED0_QP_AOERR0_SHFT		10
-#define UV3H_EVENT_OCCURRED0_RT_AOERR0_SHFT		17
-#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_SHFT		18
-#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_SHFT		19
-#define UV3H_EVENT_OCCURRED0_LB_AOERR1_SHFT		20
-#define UV3H_EVENT_OCCURRED0_QP_AOERR1_SHFT		21
-#define UV3H_EVENT_OCCURRED0_RH_AOERR1_SHFT		22
-#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_SHFT		23
-#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_SHFT		24
-#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_SHFT		25
-#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_SHFT		26
-#define UV3H_EVENT_OCCURRED0_XB_AOERR1_SHFT		27
-#define UV3H_EVENT_OCCURRED0_RT_AOERR1_SHFT		28
-#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_SHFT		29
-#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_SHFT		30
-#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT	31
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT		32
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT		33
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT		34
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT		35
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT		36
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT		37
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT		38
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT		39
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT		40
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT		41
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT		42
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT		43
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT		44
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT		45
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT		46
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT		47
-#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_SHFT		48
-#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_SHFT		49
-#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT		50
-#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT		51
-#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT	52
-#define UV3H_EVENT_OCCURRED0_IPI_INT_SHFT		53
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT		54
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_SHFT		55
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_SHFT		56
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_SHFT		57
-#define UV3H_EVENT_OCCURRED0_PROFILE_INT_SHFT		58
-#define UV3H_EVENT_OCCURRED0_QP_HCERR_MASK		0x0000000000000002UL
-#define UV3H_EVENT_OCCURRED0_QP_AOERR0_MASK		0x0000000000000400UL
-#define UV3H_EVENT_OCCURRED0_RT_AOERR0_MASK		0x0000000000020000UL
-#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_MASK		0x0000000000040000UL
-#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_MASK		0x0000000000080000UL
-#define UV3H_EVENT_OCCURRED0_LB_AOERR1_MASK		0x0000000000100000UL
-#define UV3H_EVENT_OCCURRED0_QP_AOERR1_MASK		0x0000000000200000UL
-#define UV3H_EVENT_OCCURRED0_RH_AOERR1_MASK		0x0000000000400000UL
-#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_MASK		0x0000000000800000UL
-#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_MASK		0x0000000001000000UL
-#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_MASK		0x0000000002000000UL
-#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_MASK		0x0000000004000000UL
-#define UV3H_EVENT_OCCURRED0_XB_AOERR1_MASK		0x0000000008000000UL
-#define UV3H_EVENT_OCCURRED0_RT_AOERR1_MASK		0x0000000010000000UL
-#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_MASK		0x0000000020000000UL
-#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_MASK		0x0000000040000000UL
-#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK	0x0000000080000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK		0x0000000100000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK		0x0000000200000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK		0x0000000400000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK		0x0000000800000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK		0x0000001000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK		0x0000002000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK		0x0000004000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK		0x0000008000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK		0x0000010000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK		0x0000020000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK		0x0000040000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK		0x0000080000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK		0x0000100000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK		0x0000200000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK		0x0000400000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK		0x0000800000000000UL
-#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_MASK		0x0001000000000000UL
-#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_MASK		0x0002000000000000UL
-#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_MASK		0x0004000000000000UL
-#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_MASK		0x0008000000000000UL
-#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK	0x0010000000000000UL
-#define UV3H_EVENT_OCCURRED0_IPI_INT_MASK		0x0020000000000000UL
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_MASK		0x0040000000000000UL
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_MASK		0x0080000000000000UL
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_MASK		0x0100000000000000UL
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_MASK		0x0200000000000000UL
-#define UV3H_EVENT_OCCURRED0_PROFILE_INT_MASK		0x0400000000000000UL
-
+/* UVYH common defines */
+#define UVYH_EVENT_OCCURRED0_KT_HCERR_SHFT		1
+#define UVYH_EVENT_OCCURRED0_KT_HCERR_MASK		0x0000000000000002UL
+#define UVYH_EVENT_OCCURRED0_RH0_HCERR_SHFT		2
+#define UVYH_EVENT_OCCURRED0_RH0_HCERR_MASK		0x0000000000000004UL
+#define UVYH_EVENT_OCCURRED0_RH1_HCERR_SHFT		3
+#define UVYH_EVENT_OCCURRED0_RH1_HCERR_MASK		0x0000000000000008UL
+#define UVYH_EVENT_OCCURRED0_LH0_HCERR_SHFT		4
+#define UVYH_EVENT_OCCURRED0_LH0_HCERR_MASK		0x0000000000000010UL
+#define UVYH_EVENT_OCCURRED0_LH1_HCERR_SHFT		5
+#define UVYH_EVENT_OCCURRED0_LH1_HCERR_MASK		0x0000000000000020UL
+#define UVYH_EVENT_OCCURRED0_LH2_HCERR_SHFT		6
+#define UVYH_EVENT_OCCURRED0_LH2_HCERR_MASK		0x0000000000000040UL
+#define UVYH_EVENT_OCCURRED0_LH3_HCERR_SHFT		7
+#define UVYH_EVENT_OCCURRED0_LH3_HCERR_MASK		0x0000000000000080UL
+#define UVYH_EVENT_OCCURRED0_XB_HCERR_SHFT		8
+#define UVYH_EVENT_OCCURRED0_XB_HCERR_MASK		0x0000000000000100UL
+#define UVYH_EVENT_OCCURRED0_RDM_HCERR_SHFT		9
+#define UVYH_EVENT_OCCURRED0_RDM_HCERR_MASK		0x0000000000000200UL
+#define UVYH_EVENT_OCCURRED0_NI0_HCERR_SHFT		10
+#define UVYH_EVENT_OCCURRED0_NI0_HCERR_MASK		0x0000000000000400UL
+#define UVYH_EVENT_OCCURRED0_NI1_HCERR_SHFT		11
+#define UVYH_EVENT_OCCURRED0_NI1_HCERR_MASK		0x0000000000000800UL
+#define UVYH_EVENT_OCCURRED0_LB_AOERR0_SHFT		12
+#define UVYH_EVENT_OCCURRED0_LB_AOERR0_MASK		0x0000000000001000UL
+#define UVYH_EVENT_OCCURRED0_KT_AOERR0_SHFT		13
+#define UVYH_EVENT_OCCURRED0_KT_AOERR0_MASK		0x0000000000002000UL
+#define UVYH_EVENT_OCCURRED0_RH0_AOERR0_SHFT		14
+#define UVYH_EVENT_OCCURRED0_RH0_AOERR0_MASK		0x0000000000004000UL
+#define UVYH_EVENT_OCCURRED0_RH1_AOERR0_SHFT		15
+#define UVYH_EVENT_OCCURRED0_RH1_AOERR0_MASK		0x0000000000008000UL
+#define UVYH_EVENT_OCCURRED0_LH0_AOERR0_SHFT		16
+#define UVYH_EVENT_OCCURRED0_LH0_AOERR0_MASK		0x0000000000010000UL
+#define UVYH_EVENT_OCCURRED0_LH1_AOERR0_SHFT		17
+#define UVYH_EVENT_OCCURRED0_LH1_AOERR0_MASK		0x0000000000020000UL
+#define UVYH_EVENT_OCCURRED0_LH2_AOERR0_SHFT		18
+#define UVYH_EVENT_OCCURRED0_LH2_AOERR0_MASK		0x0000000000040000UL
+#define UVYH_EVENT_OCCURRED0_LH3_AOERR0_SHFT		19
+#define UVYH_EVENT_OCCURRED0_LH3_AOERR0_MASK		0x0000000000080000UL
+#define UVYH_EVENT_OCCURRED0_XB_AOERR0_SHFT		20
+#define UVYH_EVENT_OCCURRED0_XB_AOERR0_MASK		0x0000000000100000UL
+#define UVYH_EVENT_OCCURRED0_RDM_AOERR0_SHFT		21
+#define UVYH_EVENT_OCCURRED0_RDM_AOERR0_MASK		0x0000000000200000UL
+#define UVYH_EVENT_OCCURRED0_RT0_AOERR0_SHFT		22
+#define UVYH_EVENT_OCCURRED0_RT0_AOERR0_MASK		0x0000000000400000UL
+#define UVYH_EVENT_OCCURRED0_RT1_AOERR0_SHFT		23
+#define UVYH_EVENT_OCCURRED0_RT1_AOERR0_MASK		0x0000000000800000UL
+#define UVYH_EVENT_OCCURRED0_NI0_AOERR0_SHFT		24
+#define UVYH_EVENT_OCCURRED0_NI0_AOERR0_MASK		0x0000000001000000UL
+#define UVYH_EVENT_OCCURRED0_NI1_AOERR0_SHFT		25
+#define UVYH_EVENT_OCCURRED0_NI1_AOERR0_MASK		0x0000000002000000UL
+#define UVYH_EVENT_OCCURRED0_LB_AOERR1_SHFT		26
+#define UVYH_EVENT_OCCURRED0_LB_AOERR1_MASK		0x0000000004000000UL
+#define UVYH_EVENT_OCCURRED0_KT_AOERR1_SHFT		27
+#define UVYH_EVENT_OCCURRED0_KT_AOERR1_MASK		0x0000000008000000UL
+#define UVYH_EVENT_OCCURRED0_RH0_AOERR1_SHFT		28
+#define UVYH_EVENT_OCCURRED0_RH0_AOERR1_MASK		0x0000000010000000UL
+#define UVYH_EVENT_OCCURRED0_RH1_AOERR1_SHFT		29
+#define UVYH_EVENT_OCCURRED0_RH1_AOERR1_MASK		0x0000000020000000UL
+#define UVYH_EVENT_OCCURRED0_LH0_AOERR1_SHFT		30
+#define UVYH_EVENT_OCCURRED0_LH0_AOERR1_MASK		0x0000000040000000UL
+#define UVYH_EVENT_OCCURRED0_LH1_AOERR1_SHFT		31
+#define UVYH_EVENT_OCCURRED0_LH1_AOERR1_MASK		0x0000000080000000UL
+#define UVYH_EVENT_OCCURRED0_LH2_AOERR1_SHFT		32
+#define UVYH_EVENT_OCCURRED0_LH2_AOERR1_MASK		0x0000000100000000UL
+#define UVYH_EVENT_OCCURRED0_LH3_AOERR1_SHFT		33
+#define UVYH_EVENT_OCCURRED0_LH3_AOERR1_MASK		0x0000000200000000UL
+#define UVYH_EVENT_OCCURRED0_XB_AOERR1_SHFT		34
+#define UVYH_EVENT_OCCURRED0_XB_AOERR1_MASK		0x0000000400000000UL
+#define UVYH_EVENT_OCCURRED0_RDM_AOERR1_SHFT		35
+#define UVYH_EVENT_OCCURRED0_RDM_AOERR1_MASK		0x0000000800000000UL
+#define UVYH_EVENT_OCCURRED0_RT0_AOERR1_SHFT		36
+#define UVYH_EVENT_OCCURRED0_RT0_AOERR1_MASK		0x0000001000000000UL
+#define UVYH_EVENT_OCCURRED0_RT1_AOERR1_SHFT		37
+#define UVYH_EVENT_OCCURRED0_RT1_AOERR1_MASK		0x0000002000000000UL
+#define UVYH_EVENT_OCCURRED0_NI0_AOERR1_SHFT		38
+#define UVYH_EVENT_OCCURRED0_NI0_AOERR1_MASK		0x0000004000000000UL
+#define UVYH_EVENT_OCCURRED0_NI1_AOERR1_SHFT		39
+#define UVYH_EVENT_OCCURRED0_NI1_AOERR1_MASK		0x0000008000000000UL
+#define UVYH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT	40
+#define UVYH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK	0x0000010000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT		41
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK		0x0000020000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT		42
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK		0x0000040000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT		43
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK		0x0000080000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT		44
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK		0x0000100000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT		45
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK		0x0000200000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT		46
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK		0x0000400000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT		47
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK		0x0000800000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT		48
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK		0x0001000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT		49
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK		0x0002000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT		50
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK		0x0004000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT		51
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK		0x0008000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT		52
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK		0x0010000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT		53
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK		0x0020000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT		54
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK		0x0040000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT		55
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK		0x0080000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT		56
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK		0x0100000000000000UL
+#define UVYH_EVENT_OCCURRED0_L1_NMI_INT_SHFT		57
+#define UVYH_EVENT_OCCURRED0_L1_NMI_INT_MASK		0x0200000000000000UL
+#define UVYH_EVENT_OCCURRED0_STOP_CLOCK_SHFT		58
+#define UVYH_EVENT_OCCURRED0_STOP_CLOCK_MASK		0x0400000000000000UL
+#define UVYH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT		59
+#define UVYH_EVENT_OCCURRED0_ASIC_TO_L1_MASK		0x0800000000000000UL
+#define UVYH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT		60
+#define UVYH_EVENT_OCCURRED0_L1_TO_ASIC_MASK		0x1000000000000000UL
+#define UVYH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT	61
+#define UVYH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK	0x2000000000000000UL
+
+/* UV4 unique defines */
 #define UV4H_EVENT_OCCURRED0_KT_HCERR_SHFT		1
-#define UV4H_EVENT_OCCURRED0_KT_AOERR0_SHFT		10
-#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_SHFT		17
-#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_SHFT		18
-#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_SHFT		19
-#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_SHFT		20
-#define UV4H_EVENT_OCCURRED0_NI0_AOERR0_SHFT		21
-#define UV4H_EVENT_OCCURRED0_NI1_AOERR0_SHFT		22
-#define UV4H_EVENT_OCCURRED0_LB_AOERR1_SHFT		23
-#define UV4H_EVENT_OCCURRED0_KT_AOERR1_SHFT		24
-#define UV4H_EVENT_OCCURRED0_RH_AOERR1_SHFT		25
-#define UV4H_EVENT_OCCURRED0_LH0_AOERR1_SHFT		26
-#define UV4H_EVENT_OCCURRED0_LH1_AOERR1_SHFT		27
-#define UV4H_EVENT_OCCURRED0_GR0_AOERR1_SHFT		28
-#define UV4H_EVENT_OCCURRED0_GR1_AOERR1_SHFT		29
-#define UV4H_EVENT_OCCURRED0_XB_AOERR1_SHFT		30
-#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_SHFT		31
-#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_SHFT		32
-#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_SHFT		33
-#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_SHFT		34
-#define UV4H_EVENT_OCCURRED0_NI0_AOERR1_SHFT		35
-#define UV4H_EVENT_OCCURRED0_NI1_AOERR1_SHFT		36
-#define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT	37
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT		38
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT		39
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT		40
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT		41
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT		42
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT		43
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT		44
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT		45
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT		46
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT		47
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT		48
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT		49
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT		50
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT		51
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT		52
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT		53
-#define UV4H_EVENT_OCCURRED0_L1_NMI_INT_SHFT		54
-#define UV4H_EVENT_OCCURRED0_STOP_CLOCK_SHFT		55
-#define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT		56
-#define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT		57
-#define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT	58
-#define UV4H_EVENT_OCCURRED0_IPI_INT_SHFT		59
-#define UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT		60
-#define UV4H_EVENT_OCCURRED0_EXTIO_INT1_SHFT		61
-#define UV4H_EVENT_OCCURRED0_EXTIO_INT2_SHFT		62
-#define UV4H_EVENT_OCCURRED0_EXTIO_INT3_SHFT		63
 #define UV4H_EVENT_OCCURRED0_KT_HCERR_MASK		0x0000000000000002UL
+#define UV4H_EVENT_OCCURRED0_KT_AOERR0_SHFT		10
 #define UV4H_EVENT_OCCURRED0_KT_AOERR0_MASK		0x0000000000000400UL
+#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_SHFT		17
 #define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_MASK		0x0000000000020000UL
+#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_SHFT		18
 #define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_MASK		0x0000000000040000UL
+#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_SHFT		19
 #define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_MASK		0x0000000000080000UL
+#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_SHFT		20
 #define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_MASK		0x0000000000100000UL
+#define UV4H_EVENT_OCCURRED0_NI0_AOERR0_SHFT		21
 #define UV4H_EVENT_OCCURRED0_NI0_AOERR0_MASK		0x0000000000200000UL
+#define UV4H_EVENT_OCCURRED0_NI1_AOERR0_SHFT		22
 #define UV4H_EVENT_OCCURRED0_NI1_AOERR0_MASK		0x0000000000400000UL
+#define UV4H_EVENT_OCCURRED0_LB_AOERR1_SHFT		23
 #define UV4H_EVENT_OCCURRED0_LB_AOERR1_MASK		0x0000000000800000UL
+#define UV4H_EVENT_OCCURRED0_KT_AOERR1_SHFT		24
 #define UV4H_EVENT_OCCURRED0_KT_AOERR1_MASK		0x0000000001000000UL
+#define UV4H_EVENT_OCCURRED0_RH_AOERR1_SHFT		25
 #define UV4H_EVENT_OCCURRED0_RH_AOERR1_MASK		0x0000000002000000UL
+#define UV4H_EVENT_OCCURRED0_LH0_AOERR1_SHFT		26
 #define UV4H_EVENT_OCCURRED0_LH0_AOERR1_MASK		0x0000000004000000UL
+#define UV4H_EVENT_OCCURRED0_LH1_AOERR1_SHFT		27
 #define UV4H_EVENT_OCCURRED0_LH1_AOERR1_MASK		0x0000000008000000UL
+#define UV4H_EVENT_OCCURRED0_GR0_AOERR1_SHFT		28
 #define UV4H_EVENT_OCCURRED0_GR0_AOERR1_MASK		0x0000000010000000UL
+#define UV4H_EVENT_OCCURRED0_GR1_AOERR1_SHFT		29
 #define UV4H_EVENT_OCCURRED0_GR1_AOERR1_MASK		0x0000000020000000UL
+#define UV4H_EVENT_OCCURRED0_XB_AOERR1_SHFT		30
 #define UV4H_EVENT_OCCURRED0_XB_AOERR1_MASK		0x0000000040000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_SHFT		31
 #define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_MASK		0x0000000080000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_SHFT		32
 #define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_MASK		0x0000000100000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_SHFT		33
 #define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_MASK		0x0000000200000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_SHFT		34
 #define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_MASK		0x0000000400000000UL
+#define UV4H_EVENT_OCCURRED0_NI0_AOERR1_SHFT		35
 #define UV4H_EVENT_OCCURRED0_NI0_AOERR1_MASK		0x0000000800000000UL
+#define UV4H_EVENT_OCCURRED0_NI1_AOERR1_SHFT		36
 #define UV4H_EVENT_OCCURRED0_NI1_AOERR1_MASK		0x0000001000000000UL
+#define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT	37
 #define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK	0x0000002000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT		38
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK		0x0000004000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT		39
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK		0x0000008000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT		40
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK		0x0000010000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT		41
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK		0x0000020000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT		42
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK		0x0000040000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT		43
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK		0x0000080000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT		44
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK		0x0000100000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT		45
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK		0x0000200000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT		46
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK		0x0000400000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT		47
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK		0x0000800000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT		48
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK		0x0001000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT		49
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK		0x0002000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT		50
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK		0x0004000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT		51
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK		0x0008000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT		52
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK		0x0010000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT		53
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK		0x0020000000000000UL
+#define UV4H_EVENT_OCCURRED0_L1_NMI_INT_SHFT		54
 #define UV4H_EVENT_OCCURRED0_L1_NMI_INT_MASK		0x0040000000000000UL
+#define UV4H_EVENT_OCCURRED0_STOP_CLOCK_SHFT		55
 #define UV4H_EVENT_OCCURRED0_STOP_CLOCK_MASK		0x0080000000000000UL
+#define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT		56
 #define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_MASK		0x0100000000000000UL
+#define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT		57
 #define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_MASK		0x0200000000000000UL
+#define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT	58
 #define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK	0x0400000000000000UL
+#define UV4H_EVENT_OCCURRED0_IPI_INT_SHFT		59
 #define UV4H_EVENT_OCCURRED0_IPI_INT_MASK		0x0800000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT		60
 #define UV4H_EVENT_OCCURRED0_EXTIO_INT0_MASK		0x1000000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT1_SHFT		61
 #define UV4H_EVENT_OCCURRED0_EXTIO_INT1_MASK		0x2000000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT2_SHFT		62
 #define UV4H_EVENT_OCCURRED0_EXTIO_INT2_MASK		0x4000000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT3_SHFT		63
 #define UV4H_EVENT_OCCURRED0_EXTIO_INT3_MASK		0x8000000000000000UL
 
+/* UV3 unique defines */
+#define UV3H_EVENT_OCCURRED0_QP_HCERR_SHFT		1
+#define UV3H_EVENT_OCCURRED0_QP_HCERR_MASK		0x0000000000000002UL
+#define UV3H_EVENT_OCCURRED0_QP_AOERR0_SHFT		10
+#define UV3H_EVENT_OCCURRED0_QP_AOERR0_MASK		0x0000000000000400UL
+#define UV3H_EVENT_OCCURRED0_RT_AOERR0_SHFT		17
+#define UV3H_EVENT_OCCURRED0_RT_AOERR0_MASK		0x0000000000020000UL
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_SHFT		18
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_MASK		0x0000000000040000UL
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_SHFT		19
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_MASK		0x0000000000080000UL
+#define UV3H_EVENT_OCCURRED0_LB_AOERR1_SHFT		20
+#define UV3H_EVENT_OCCURRED0_LB_AOERR1_MASK		0x0000000000100000UL
+#define UV3H_EVENT_OCCURRED0_QP_AOERR1_SHFT		21
+#define UV3H_EVENT_OCCURRED0_QP_AOERR1_MASK		0x0000000000200000UL
+#define UV3H_EVENT_OCCURRED0_RH_AOERR1_SHFT		22
+#define UV3H_EVENT_OCCURRED0_RH_AOERR1_MASK		0x0000000000400000UL
+#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_SHFT		23
+#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_MASK		0x0000000000800000UL
+#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_SHFT		24
+#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_MASK		0x0000000001000000UL
+#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_SHFT		25
+#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_MASK		0x0000000002000000UL
+#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_SHFT		26
+#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_MASK		0x0000000004000000UL
+#define UV3H_EVENT_OCCURRED0_XB_AOERR1_SHFT		27
+#define UV3H_EVENT_OCCURRED0_XB_AOERR1_MASK		0x0000000008000000UL
+#define UV3H_EVENT_OCCURRED0_RT_AOERR1_SHFT		28
+#define UV3H_EVENT_OCCURRED0_RT_AOERR1_MASK		0x0000000010000000UL
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_SHFT		29
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_MASK		0x0000000020000000UL
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_SHFT		30
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_MASK		0x0000000040000000UL
+#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT	31
+#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK	0x0000000080000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT		32
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK		0x0000000100000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT		33
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK		0x0000000200000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT		34
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK		0x0000000400000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT		35
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK		0x0000000800000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT		36
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK		0x0000001000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT		37
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK		0x0000002000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT		38
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK		0x0000004000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT		39
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK		0x0000008000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT		40
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK		0x0000010000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT		41
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK		0x0000020000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT		42
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK		0x0000040000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT		43
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK		0x0000080000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT		44
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK		0x0000100000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT		45
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK		0x0000200000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT		46
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK		0x0000400000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT		47
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK		0x0000800000000000UL
+#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_SHFT		48
+#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_MASK		0x0001000000000000UL
+#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_SHFT		49
+#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_MASK		0x0002000000000000UL
+#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT		50
+#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_MASK		0x0004000000000000UL
+#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT		51
+#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_MASK		0x0008000000000000UL
+#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT	52
+#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK	0x0010000000000000UL
+#define UV3H_EVENT_OCCURRED0_IPI_INT_SHFT		53
+#define UV3H_EVENT_OCCURRED0_IPI_INT_MASK		0x0020000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT		54
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_MASK		0x0040000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_SHFT		55
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_MASK		0x0080000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_SHFT		56
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_MASK		0x0100000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_SHFT		57
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_MASK		0x0200000000000000UL
+#define UV3H_EVENT_OCCURRED0_PROFILE_INT_SHFT		58
+#define UV3H_EVENT_OCCURRED0_PROFILE_INT_MASK		0x0400000000000000UL
+
+/* UV2 unique defines */
+#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT		1
+#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK		0x0000000000000002UL
+#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT		10
+#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK		0x0000000000000400UL
+#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT		17
+#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK		0x0000000000020000UL
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT		18
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK		0x0000000000040000UL
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT		19
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK		0x0000000000080000UL
+#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT		20
+#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK		0x0000000000100000UL
+#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT		21
+#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK		0x0000000000200000UL
+#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT		22
+#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK		0x0000000000400000UL
+#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT		23
+#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK		0x0000000000800000UL
+#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT		24
+#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK		0x0000000001000000UL
+#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT		25
+#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK		0x0000000002000000UL
+#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT		26
+#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK		0x0000000004000000UL
+#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT		27
+#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK		0x0000000008000000UL
+#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT		28
+#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK		0x0000000010000000UL
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT		29
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK		0x0000000020000000UL
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT		30
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK		0x0000000040000000UL
+#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT	31
+#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK	0x0000000080000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT		32
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK		0x0000000100000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT		33
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK		0x0000000200000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT		34
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK		0x0000000400000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT		35
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK		0x0000000800000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT		36
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK		0x0000001000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT		37
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK		0x0000002000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT		38
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK		0x0000004000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT		39
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK		0x0000008000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT		40
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK		0x0000010000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT		41
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK		0x0000020000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT		42
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK		0x0000040000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT		43
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK		0x0000080000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT		44
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK		0x0000100000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT		45
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK		0x0000200000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT		46
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK		0x0000400000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT		47
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK		0x0000800000000000UL
+#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT		48
+#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK		0x0001000000000000UL
+#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT		49
+#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK		0x0002000000000000UL
+#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT		50
+#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK		0x0004000000000000UL
+#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT		51
+#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK		0x0008000000000000UL
+#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT	52
+#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK	0x0010000000000000UL
+#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT		53
+#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK		0x0020000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT		54
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK		0x0040000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT		55
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK		0x0080000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT		56
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK		0x0100000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT		57
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK		0x0200000000000000UL
+#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT		58
+#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK		0x0400000000000000UL
+
+#define UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK (				\
+	is_uv(UV4) ? 0x1000000000000000UL :				\
+	is_uv(UV3) ? 0x0040000000000000UL :				\
+	is_uv(UV2) ? 0x0040000000000000UL :				\
+	0)
 #define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT (				\
-	is_uv1_hub() ? UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT :		\
-	is_uv2_hub() ? UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT :		\
-	is_uv3_hub() ? UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT :		\
-	/*is_uv4_hub*/ UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT)
+	is_uv(UV4) ? 60 :						\
+	is_uv(UV3) ? 54 :						\
+	is_uv(UV2) ? 54 :						\
+	-1)
 
 union uvh_event_occurred0_u {
 	unsigned long	v;
+
+	/* UVH common struct */
 	struct uvh_event_occurred0_s {
-		unsigned long	lb_hcerr:1;			/* RW, W1C */
-		unsigned long	rsvd_1_10:10;
-		unsigned long	rh_aoerr0:1;			/* RW, W1C */
-		unsigned long	rsvd_12_63:52;
+		unsigned long	lb_hcerr:1;			/* RW */
+		unsigned long	rsvd_1_63:63;
 	} s;
+
+	/* UVXH common struct */
 	struct uvxh_event_occurred0_s {
 		unsigned long	lb_hcerr:1;			/* RW */
 		unsigned long	rsvd_1:1;
@@ -645,6 +577,142 @@ union uvh_event_occurred0_u {
 		unsigned long	xb_aoerr0:1;			/* RW */
 		unsigned long	rsvd_17_63:47;
 	} sx;
+
+	/* UVYH common struct */
+	struct uvyh_event_occurred0_s {
+		unsigned long	lb_hcerr:1;			/* RW */
+		unsigned long	kt_hcerr:1;			/* RW */
+		unsigned long	rh0_hcerr:1;			/* RW */
+		unsigned long	rh1_hcerr:1;			/* RW */
+		unsigned long	lh0_hcerr:1;			/* RW */
+		unsigned long	lh1_hcerr:1;			/* RW */
+		unsigned long	lh2_hcerr:1;			/* RW */
+		unsigned long	lh3_hcerr:1;			/* RW */
+		unsigned long	xb_hcerr:1;			/* RW */
+		unsigned long	rdm_hcerr:1;			/* RW */
+		unsigned long	ni0_hcerr:1;			/* RW */
+		unsigned long	ni1_hcerr:1;			/* RW */
+		unsigned long	lb_aoerr0:1;			/* RW */
+		unsigned long	kt_aoerr0:1;			/* RW */
+		unsigned long	rh0_aoerr0:1;			/* RW */
+		unsigned long	rh1_aoerr0:1;			/* RW */
+		unsigned long	lh0_aoerr0:1;			/* RW */
+		unsigned long	lh1_aoerr0:1;			/* RW */
+		unsigned long	lh2_aoerr0:1;			/* RW */
+		unsigned long	lh3_aoerr0:1;			/* RW */
+		unsigned long	xb_aoerr0:1;			/* RW */
+		unsigned long	rdm_aoerr0:1;			/* RW */
+		unsigned long	rt0_aoerr0:1;			/* RW */
+		unsigned long	rt1_aoerr0:1;			/* RW */
+		unsigned long	ni0_aoerr0:1;			/* RW */
+		unsigned long	ni1_aoerr0:1;			/* RW */
+		unsigned long	lb_aoerr1:1;			/* RW */
+		unsigned long	kt_aoerr1:1;			/* RW */
+		unsigned long	rh0_aoerr1:1;			/* RW */
+		unsigned long	rh1_aoerr1:1;			/* RW */
+		unsigned long	lh0_aoerr1:1;			/* RW */
+		unsigned long	lh1_aoerr1:1;			/* RW */
+		unsigned long	lh2_aoerr1:1;			/* RW */
+		unsigned long	lh3_aoerr1:1;			/* RW */
+		unsigned long	xb_aoerr1:1;			/* RW */
+		unsigned long	rdm_aoerr1:1;			/* RW */
+		unsigned long	rt0_aoerr1:1;			/* RW */
+		unsigned long	rt1_aoerr1:1;			/* RW */
+		unsigned long	ni0_aoerr1:1;			/* RW */
+		unsigned long	ni1_aoerr1:1;			/* RW */
+		unsigned long	system_shutdown_int:1;		/* RW */
+		unsigned long	lb_irq_int_0:1;			/* RW */
+		unsigned long	lb_irq_int_1:1;			/* RW */
+		unsigned long	lb_irq_int_2:1;			/* RW */
+		unsigned long	lb_irq_int_3:1;			/* RW */
+		unsigned long	lb_irq_int_4:1;			/* RW */
+		unsigned long	lb_irq_int_5:1;			/* RW */
+		unsigned long	lb_irq_int_6:1;			/* RW */
+		unsigned long	lb_irq_int_7:1;			/* RW */
+		unsigned long	lb_irq_int_8:1;			/* RW */
+		unsigned long	lb_irq_int_9:1;			/* RW */
+		unsigned long	lb_irq_int_10:1;		/* RW */
+		unsigned long	lb_irq_int_11:1;		/* RW */
+		unsigned long	lb_irq_int_12:1;		/* RW */
+		unsigned long	lb_irq_int_13:1;		/* RW */
+		unsigned long	lb_irq_int_14:1;		/* RW */
+		unsigned long	lb_irq_int_15:1;		/* RW */
+		unsigned long	l1_nmi_int:1;			/* RW */
+		unsigned long	stop_clock:1;			/* RW */
+		unsigned long	asic_to_l1:1;			/* RW */
+		unsigned long	l1_to_asic:1;			/* RW */
+		unsigned long	la_seq_trigger:1;		/* RW */
+		unsigned long	rsvd_62_63:2;
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_event_occurred0_s {
+		unsigned long	lb_hcerr:1;			/* RW */
+		unsigned long	kt_hcerr:1;			/* RW */
+		unsigned long	rh0_hcerr:1;			/* RW */
+		unsigned long	rh1_hcerr:1;			/* RW */
+		unsigned long	lh0_hcerr:1;			/* RW */
+		unsigned long	lh1_hcerr:1;			/* RW */
+		unsigned long	lh2_hcerr:1;			/* RW */
+		unsigned long	lh3_hcerr:1;			/* RW */
+		unsigned long	xb_hcerr:1;			/* RW */
+		unsigned long	rdm_hcerr:1;			/* RW */
+		unsigned long	ni0_hcerr:1;			/* RW */
+		unsigned long	ni1_hcerr:1;			/* RW */
+		unsigned long	lb_aoerr0:1;			/* RW */
+		unsigned long	kt_aoerr0:1;			/* RW */
+		unsigned long	rh0_aoerr0:1;			/* RW */
+		unsigned long	rh1_aoerr0:1;			/* RW */
+		unsigned long	lh0_aoerr0:1;			/* RW */
+		unsigned long	lh1_aoerr0:1;			/* RW */
+		unsigned long	lh2_aoerr0:1;			/* RW */
+		unsigned long	lh3_aoerr0:1;			/* RW */
+		unsigned long	xb_aoerr0:1;			/* RW */
+		unsigned long	rdm_aoerr0:1;			/* RW */
+		unsigned long	rt0_aoerr0:1;			/* RW */
+		unsigned long	rt1_aoerr0:1;			/* RW */
+		unsigned long	ni0_aoerr0:1;			/* RW */
+		unsigned long	ni1_aoerr0:1;			/* RW */
+		unsigned long	lb_aoerr1:1;			/* RW */
+		unsigned long	kt_aoerr1:1;			/* RW */
+		unsigned long	rh0_aoerr1:1;			/* RW */
+		unsigned long	rh1_aoerr1:1;			/* RW */
+		unsigned long	lh0_aoerr1:1;			/* RW */
+		unsigned long	lh1_aoerr1:1;			/* RW */
+		unsigned long	lh2_aoerr1:1;			/* RW */
+		unsigned long	lh3_aoerr1:1;			/* RW */
+		unsigned long	xb_aoerr1:1;			/* RW */
+		unsigned long	rdm_aoerr1:1;			/* RW */
+		unsigned long	rt0_aoerr1:1;			/* RW */
+		unsigned long	rt1_aoerr1:1;			/* RW */
+		unsigned long	ni0_aoerr1:1;			/* RW */
+		unsigned long	ni1_aoerr1:1;			/* RW */
+		unsigned long	system_shutdown_int:1;		/* RW */
+		unsigned long	lb_irq_int_0:1;			/* RW */
+		unsigned long	lb_irq_int_1:1;			/* RW */
+		unsigned long	lb_irq_int_2:1;			/* RW */
+		unsigned long	lb_irq_int_3:1;			/* RW */
+		unsigned long	lb_irq_int_4:1;			/* RW */
+		unsigned long	lb_irq_int_5:1;			/* RW */
+		unsigned long	lb_irq_int_6:1;			/* RW */
+		unsigned long	lb_irq_int_7:1;			/* RW */
+		unsigned long	lb_irq_int_8:1;			/* RW */
+		unsigned long	lb_irq_int_9:1;			/* RW */
+		unsigned long	lb_irq_int_10:1;		/* RW */
+		unsigned long	lb_irq_int_11:1;		/* RW */
+		unsigned long	lb_irq_int_12:1;		/* RW */
+		unsigned long	lb_irq_int_13:1;		/* RW */
+		unsigned long	lb_irq_int_14:1;		/* RW */
+		unsigned long	lb_irq_int_15:1;		/* RW */
+		unsigned long	l1_nmi_int:1;			/* RW */
+		unsigned long	stop_clock:1;			/* RW */
+		unsigned long	asic_to_l1:1;			/* RW */
+		unsigned long	l1_to_asic:1;			/* RW */
+		unsigned long	la_seq_trigger:1;		/* RW */
+		unsigned long	rsvd_62_63:2;
+	} s5;
+
+	/* UV4 unique struct */
 	struct uv4h_event_occurred0_s {
 		unsigned long	lb_hcerr:1;			/* RW */
 		unsigned long	kt_hcerr:1;			/* RW */
@@ -711,13 +779,1355 @@ union uvh_event_occurred0_u {
 		unsigned long	extio_int2:1;			/* RW */
 		unsigned long	extio_int3:1;			/* RW */
 	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_event_occurred0_s {
+		unsigned long	lb_hcerr:1;			/* RW */
+		unsigned long	qp_hcerr:1;			/* RW */
+		unsigned long	rh_hcerr:1;			/* RW */
+		unsigned long	lh0_hcerr:1;			/* RW */
+		unsigned long	lh1_hcerr:1;			/* RW */
+		unsigned long	gr0_hcerr:1;			/* RW */
+		unsigned long	gr1_hcerr:1;			/* RW */
+		unsigned long	ni0_hcerr:1;			/* RW */
+		unsigned long	ni1_hcerr:1;			/* RW */
+		unsigned long	lb_aoerr0:1;			/* RW */
+		unsigned long	qp_aoerr0:1;			/* RW */
+		unsigned long	rh_aoerr0:1;			/* RW */
+		unsigned long	lh0_aoerr0:1;			/* RW */
+		unsigned long	lh1_aoerr0:1;			/* RW */
+		unsigned long	gr0_aoerr0:1;			/* RW */
+		unsigned long	gr1_aoerr0:1;			/* RW */
+		unsigned long	xb_aoerr0:1;			/* RW */
+		unsigned long	rt_aoerr0:1;			/* RW */
+		unsigned long	ni0_aoerr0:1;			/* RW */
+		unsigned long	ni1_aoerr0:1;			/* RW */
+		unsigned long	lb_aoerr1:1;			/* RW */
+		unsigned long	qp_aoerr1:1;			/* RW */
+		unsigned long	rh_aoerr1:1;			/* RW */
+		unsigned long	lh0_aoerr1:1;			/* RW */
+		unsigned long	lh1_aoerr1:1;			/* RW */
+		unsigned long	gr0_aoerr1:1;			/* RW */
+		unsigned long	gr1_aoerr1:1;			/* RW */
+		unsigned long	xb_aoerr1:1;			/* RW */
+		unsigned long	rt_aoerr1:1;			/* RW */
+		unsigned long	ni0_aoerr1:1;			/* RW */
+		unsigned long	ni1_aoerr1:1;			/* RW */
+		unsigned long	system_shutdown_int:1;		/* RW */
+		unsigned long	lb_irq_int_0:1;			/* RW */
+		unsigned long	lb_irq_int_1:1;			/* RW */
+		unsigned long	lb_irq_int_2:1;			/* RW */
+		unsigned long	lb_irq_int_3:1;			/* RW */
+		unsigned long	lb_irq_int_4:1;			/* RW */
+		unsigned long	lb_irq_int_5:1;			/* RW */
+		unsigned long	lb_irq_int_6:1;			/* RW */
+		unsigned long	lb_irq_int_7:1;			/* RW */
+		unsigned long	lb_irq_int_8:1;			/* RW */
+		unsigned long	lb_irq_int_9:1;			/* RW */
+		unsigned long	lb_irq_int_10:1;		/* RW */
+		unsigned long	lb_irq_int_11:1;		/* RW */
+		unsigned long	lb_irq_int_12:1;		/* RW */
+		unsigned long	lb_irq_int_13:1;		/* RW */
+		unsigned long	lb_irq_int_14:1;		/* RW */
+		unsigned long	lb_irq_int_15:1;		/* RW */
+		unsigned long	l1_nmi_int:1;			/* RW */
+		unsigned long	stop_clock:1;			/* RW */
+		unsigned long	asic_to_l1:1;			/* RW */
+		unsigned long	l1_to_asic:1;			/* RW */
+		unsigned long	la_seq_trigger:1;		/* RW */
+		unsigned long	ipi_int:1;			/* RW */
+		unsigned long	extio_int0:1;			/* RW */
+		unsigned long	extio_int1:1;			/* RW */
+		unsigned long	extio_int2:1;			/* RW */
+		unsigned long	extio_int3:1;			/* RW */
+		unsigned long	profile_int:1;			/* RW */
+		unsigned long	rsvd_59_63:5;
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_event_occurred0_s {
+		unsigned long	lb_hcerr:1;			/* RW */
+		unsigned long	qp_hcerr:1;			/* RW */
+		unsigned long	rh_hcerr:1;			/* RW */
+		unsigned long	lh0_hcerr:1;			/* RW */
+		unsigned long	lh1_hcerr:1;			/* RW */
+		unsigned long	gr0_hcerr:1;			/* RW */
+		unsigned long	gr1_hcerr:1;			/* RW */
+		unsigned long	ni0_hcerr:1;			/* RW */
+		unsigned long	ni1_hcerr:1;			/* RW */
+		unsigned long	lb_aoerr0:1;			/* RW */
+		unsigned long	qp_aoerr0:1;			/* RW */
+		unsigned long	rh_aoerr0:1;			/* RW */
+		unsigned long	lh0_aoerr0:1;			/* RW */
+		unsigned long	lh1_aoerr0:1;			/* RW */
+		unsigned long	gr0_aoerr0:1;			/* RW */
+		unsigned long	gr1_aoerr0:1;			/* RW */
+		unsigned long	xb_aoerr0:1;			/* RW */
+		unsigned long	rt_aoerr0:1;			/* RW */
+		unsigned long	ni0_aoerr0:1;			/* RW */
+		unsigned long	ni1_aoerr0:1;			/* RW */
+		unsigned long	lb_aoerr1:1;			/* RW */
+		unsigned long	qp_aoerr1:1;			/* RW */
+		unsigned long	rh_aoerr1:1;			/* RW */
+		unsigned long	lh0_aoerr1:1;			/* RW */
+		unsigned long	lh1_aoerr1:1;			/* RW */
+		unsigned long	gr0_aoerr1:1;			/* RW */
+		unsigned long	gr1_aoerr1:1;			/* RW */
+		unsigned long	xb_aoerr1:1;			/* RW */
+		unsigned long	rt_aoerr1:1;			/* RW */
+		unsigned long	ni0_aoerr1:1;			/* RW */
+		unsigned long	ni1_aoerr1:1;			/* RW */
+		unsigned long	system_shutdown_int:1;		/* RW */
+		unsigned long	lb_irq_int_0:1;			/* RW */
+		unsigned long	lb_irq_int_1:1;			/* RW */
+		unsigned long	lb_irq_int_2:1;			/* RW */
+		unsigned long	lb_irq_int_3:1;			/* RW */
+		unsigned long	lb_irq_int_4:1;			/* RW */
+		unsigned long	lb_irq_int_5:1;			/* RW */
+		unsigned long	lb_irq_int_6:1;			/* RW */
+		unsigned long	lb_irq_int_7:1;			/* RW */
+		unsigned long	lb_irq_int_8:1;			/* RW */
+		unsigned long	lb_irq_int_9:1;			/* RW */
+		unsigned long	lb_irq_int_10:1;		/* RW */
+		unsigned long	lb_irq_int_11:1;		/* RW */
+		unsigned long	lb_irq_int_12:1;		/* RW */
+		unsigned long	lb_irq_int_13:1;		/* RW */
+		unsigned long	lb_irq_int_14:1;		/* RW */
+		unsigned long	lb_irq_int_15:1;		/* RW */
+		unsigned long	l1_nmi_int:1;			/* RW */
+		unsigned long	stop_clock:1;			/* RW */
+		unsigned long	asic_to_l1:1;			/* RW */
+		unsigned long	l1_to_asic:1;			/* RW */
+		unsigned long	la_seq_trigger:1;		/* RW */
+		unsigned long	ipi_int:1;			/* RW */
+		unsigned long	extio_int0:1;			/* RW */
+		unsigned long	extio_int1:1;			/* RW */
+		unsigned long	extio_int2:1;			/* RW */
+		unsigned long	extio_int3:1;			/* RW */
+		unsigned long	profile_int:1;			/* RW */
+		unsigned long	rsvd_59_63:5;
+	} s2;
 };
 
 /* ========================================================================= */
 /*                        UVH_EVENT_OCCURRED0_ALIAS                          */
 /* ========================================================================= */
 #define UVH_EVENT_OCCURRED0_ALIAS 0x70008UL
-#define UVH_EVENT_OCCURRED0_ALIAS_32 0x5f0
+
+
+/* ========================================================================= */
+/*                           UVH_EVENT_OCCURRED1                             */
+/* ========================================================================= */
+#define UVH_EVENT_OCCURRED1 0x70080UL
+
+
+
+/* UVYH common defines */
+#define UVYH_EVENT_OCCURRED1_IPI_INT_SHFT		0
+#define UVYH_EVENT_OCCURRED1_IPI_INT_MASK		0x0000000000000001UL
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT0_SHFT		1
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT0_MASK		0x0000000000000002UL
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT1_SHFT		2
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT1_MASK		0x0000000000000004UL
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT2_SHFT		3
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT2_MASK		0x0000000000000008UL
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT3_SHFT		4
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT3_MASK		0x0000000000000010UL
+#define UVYH_EVENT_OCCURRED1_PROFILE_INT_SHFT		5
+#define UVYH_EVENT_OCCURRED1_PROFILE_INT_MASK		0x0000000000000020UL
+#define UVYH_EVENT_OCCURRED1_BAU_DATA_SHFT		6
+#define UVYH_EVENT_OCCURRED1_BAU_DATA_MASK		0x0000000000000040UL
+#define UVYH_EVENT_OCCURRED1_PROC_GENERAL_SHFT		7
+#define UVYH_EVENT_OCCURRED1_PROC_GENERAL_MASK		0x0000000000000080UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT0_SHFT		8
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT0_MASK		0x0000000000000100UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT1_SHFT		9
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT1_MASK		0x0000000000000200UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT2_SHFT		10
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT2_MASK		0x0000000000000400UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT3_SHFT		11
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT3_MASK		0x0000000000000800UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT4_SHFT		12
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT4_MASK		0x0000000000001000UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT5_SHFT		13
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT5_MASK		0x0000000000002000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT0_SHFT		14
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT0_MASK		0x0000000000004000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT1_SHFT		15
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT1_MASK		0x0000000000008000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT2_SHFT		16
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT2_MASK		0x0000000000010000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT3_SHFT		17
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT3_MASK		0x0000000000020000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT4_SHFT		18
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT4_MASK		0x0000000000040000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT5_SHFT		19
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT5_MASK		0x0000000000080000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT6_SHFT		20
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT6_MASK		0x0000000000100000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT7_SHFT		21
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT7_MASK		0x0000000000200000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT8_SHFT		22
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT8_MASK		0x0000000000400000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT9_SHFT		23
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT9_MASK		0x0000000000800000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT10_SHFT		24
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT10_MASK		0x0000000001000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT11_SHFT		25
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT11_MASK		0x0000000002000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT12_SHFT		26
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT12_MASK		0x0000000004000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT13_SHFT		27
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT13_MASK		0x0000000008000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT14_SHFT		28
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT14_MASK		0x0000000010000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT15_SHFT		29
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT15_MASK		0x0000000020000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT16_SHFT		30
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT16_MASK		0x0000000040000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT17_SHFT		31
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT17_MASK		0x0000000080000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT18_SHFT		32
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT18_MASK		0x0000000100000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT19_SHFT		33
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT19_MASK		0x0000000200000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT20_SHFT		34
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT20_MASK		0x0000000400000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT21_SHFT		35
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT21_MASK		0x0000000800000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT22_SHFT		36
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT22_MASK		0x0000001000000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT23_SHFT		37
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT23_MASK		0x0000002000000000UL
+
+/* UV4 unique defines */
+#define UV4H_EVENT_OCCURRED1_PROFILE_INT_SHFT		0
+#define UV4H_EVENT_OCCURRED1_PROFILE_INT_MASK		0x0000000000000001UL
+#define UV4H_EVENT_OCCURRED1_BAU_DATA_SHFT		1
+#define UV4H_EVENT_OCCURRED1_BAU_DATA_MASK		0x0000000000000002UL
+#define UV4H_EVENT_OCCURRED1_PROC_GENERAL_SHFT		2
+#define UV4H_EVENT_OCCURRED1_PROC_GENERAL_MASK		0x0000000000000004UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT0_SHFT		3
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT0_MASK		0x0000000000000008UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT1_SHFT		4
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT1_MASK		0x0000000000000010UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT2_SHFT		5
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT2_MASK		0x0000000000000020UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT3_SHFT		6
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT3_MASK		0x0000000000000040UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT4_SHFT		7
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT4_MASK		0x0000000000000080UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT5_SHFT		8
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT5_MASK		0x0000000000000100UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT6_SHFT		9
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT6_MASK		0x0000000000000200UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT7_SHFT		10
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT7_MASK		0x0000000000000400UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT8_SHFT		11
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT8_MASK		0x0000000000000800UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT9_SHFT		12
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT9_MASK		0x0000000000001000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT10_SHFT		13
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT10_MASK		0x0000000000002000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT11_SHFT		14
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT11_MASK		0x0000000000004000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT12_SHFT		15
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT12_MASK		0x0000000000008000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT13_SHFT		16
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT13_MASK		0x0000000000010000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT14_SHFT		17
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT14_MASK		0x0000000000020000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT15_SHFT		18
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT15_MASK		0x0000000000040000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT16_SHFT		19
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT16_MASK		0x0000000000080000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT17_SHFT		20
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT17_MASK		0x0000000000100000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT18_SHFT		21
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT18_MASK		0x0000000000200000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT19_SHFT		22
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT19_MASK		0x0000000000400000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT20_SHFT		23
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT20_MASK		0x0000000000800000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT21_SHFT		24
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT21_MASK		0x0000000001000000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT22_SHFT		25
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT22_MASK		0x0000000002000000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT23_SHFT		26
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT23_MASK		0x0000000004000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT0_SHFT		27
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT0_MASK		0x0000000008000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT1_SHFT		28
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT1_MASK		0x0000000010000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT2_SHFT		29
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT2_MASK		0x0000000020000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT3_SHFT		30
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT3_MASK		0x0000000040000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT4_SHFT		31
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT4_MASK		0x0000000080000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT5_SHFT		32
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT5_MASK		0x0000000100000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT6_SHFT		33
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT6_MASK		0x0000000200000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT7_SHFT		34
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT7_MASK		0x0000000400000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT8_SHFT		35
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT8_MASK		0x0000000800000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT9_SHFT		36
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT9_MASK		0x0000001000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT10_SHFT		37
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT10_MASK		0x0000002000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT11_SHFT		38
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT11_MASK		0x0000004000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT12_SHFT		39
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT12_MASK		0x0000008000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT13_SHFT		40
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT13_MASK		0x0000010000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT14_SHFT		41
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT14_MASK		0x0000020000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT15_SHFT		42
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT15_MASK		0x0000040000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT16_SHFT		43
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT16_MASK		0x0000080000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT17_SHFT		44
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT17_MASK		0x0000100000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT18_SHFT		45
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT18_MASK		0x0000200000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT19_SHFT		46
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT19_MASK		0x0000400000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT20_SHFT		47
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT20_MASK		0x0000800000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT21_SHFT		48
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT21_MASK		0x0001000000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT22_SHFT		49
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT22_MASK		0x0002000000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT23_SHFT		50
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT23_MASK		0x0004000000000000UL
+
+/* UV3 unique defines */
+#define UV3H_EVENT_OCCURRED1_BAU_DATA_SHFT		0
+#define UV3H_EVENT_OCCURRED1_BAU_DATA_MASK		0x0000000000000001UL
+#define UV3H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_SHFT	1
+#define UV3H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_MASK	0x0000000000000002UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_SHFT 2
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000004UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_SHFT 3
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000008UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_SHFT 4
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000010UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_SHFT 5
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000020UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_SHFT 6
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000040UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_SHFT 7
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000080UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_SHFT 8
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000100UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_SHFT 9
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000200UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_SHFT 10
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000400UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_SHFT 11
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000800UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_SHFT 12
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000001000UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_SHFT 13
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000002000UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_SHFT 14
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000004000UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_SHFT 15
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000008000UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_SHFT 16
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000010000UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_SHFT 17
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000020000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT0_SHFT		18
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT0_MASK		0x0000000000040000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT1_SHFT		19
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT1_MASK		0x0000000000080000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT2_SHFT		20
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT2_MASK		0x0000000000100000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT3_SHFT		21
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT3_MASK		0x0000000000200000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT4_SHFT		22
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT4_MASK		0x0000000000400000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT5_SHFT		23
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT5_MASK		0x0000000000800000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT6_SHFT		24
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT6_MASK		0x0000000001000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT7_SHFT		25
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT7_MASK		0x0000000002000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT8_SHFT		26
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT8_MASK		0x0000000004000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT9_SHFT		27
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT9_MASK		0x0000000008000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT10_SHFT		28
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT10_MASK		0x0000000010000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT11_SHFT		29
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT11_MASK		0x0000000020000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT12_SHFT		30
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT12_MASK		0x0000000040000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT13_SHFT		31
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT13_MASK		0x0000000080000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT14_SHFT		32
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT14_MASK		0x0000000100000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT15_SHFT		33
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT15_MASK		0x0000000200000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT0_SHFT		34
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT0_MASK		0x0000000400000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT1_SHFT		35
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT1_MASK		0x0000000800000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT2_SHFT		36
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT2_MASK		0x0000001000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT3_SHFT		37
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT3_MASK		0x0000002000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT4_SHFT		38
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT4_MASK		0x0000004000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT5_SHFT		39
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT5_MASK		0x0000008000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT6_SHFT		40
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT6_MASK		0x0000010000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT7_SHFT		41
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT7_MASK		0x0000020000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT8_SHFT		42
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT8_MASK		0x0000040000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT9_SHFT		43
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT9_MASK		0x0000080000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT10_SHFT		44
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT10_MASK		0x0000100000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT11_SHFT		45
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT11_MASK		0x0000200000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT12_SHFT		46
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT12_MASK		0x0000400000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT13_SHFT		47
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT13_MASK		0x0000800000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT14_SHFT		48
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT14_MASK		0x0001000000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT15_SHFT		49
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT15_MASK		0x0002000000000000UL
+#define UV3H_EVENT_OCCURRED1_RTC_INTERVAL_INT_SHFT	50
+#define UV3H_EVENT_OCCURRED1_RTC_INTERVAL_INT_MASK	0x0004000000000000UL
+#define UV3H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_SHFT	51
+#define UV3H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_MASK	0x0008000000000000UL
+
+/* UV2 unique defines */
+#define UV2H_EVENT_OCCURRED1_BAU_DATA_SHFT		0
+#define UV2H_EVENT_OCCURRED1_BAU_DATA_MASK		0x0000000000000001UL
+#define UV2H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_SHFT	1
+#define UV2H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_MASK	0x0000000000000002UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_SHFT 2
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000004UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_SHFT 3
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000008UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_SHFT 4
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000010UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_SHFT 5
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000020UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_SHFT 6
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000040UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_SHFT 7
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000080UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_SHFT 8
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000100UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_SHFT 9
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000200UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_SHFT 10
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000400UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_SHFT 11
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000800UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_SHFT 12
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000001000UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_SHFT 13
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000002000UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_SHFT 14
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000004000UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_SHFT 15
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000008000UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_SHFT 16
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000010000UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_SHFT 17
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000020000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT0_SHFT		18
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT0_MASK		0x0000000000040000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT1_SHFT		19
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT1_MASK		0x0000000000080000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT2_SHFT		20
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT2_MASK		0x0000000000100000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT3_SHFT		21
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT3_MASK		0x0000000000200000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT4_SHFT		22
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT4_MASK		0x0000000000400000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT5_SHFT		23
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT5_MASK		0x0000000000800000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT6_SHFT		24
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT6_MASK		0x0000000001000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT7_SHFT		25
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT7_MASK		0x0000000002000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT8_SHFT		26
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT8_MASK		0x0000000004000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT9_SHFT		27
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT9_MASK		0x0000000008000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT10_SHFT		28
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT10_MASK		0x0000000010000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT11_SHFT		29
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT11_MASK		0x0000000020000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT12_SHFT		30
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT12_MASK		0x0000000040000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT13_SHFT		31
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT13_MASK		0x0000000080000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT14_SHFT		32
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT14_MASK		0x0000000100000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT15_SHFT		33
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT15_MASK		0x0000000200000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT0_SHFT		34
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT0_MASK		0x0000000400000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT1_SHFT		35
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT1_MASK		0x0000000800000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT2_SHFT		36
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT2_MASK		0x0000001000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT3_SHFT		37
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT3_MASK		0x0000002000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT4_SHFT		38
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT4_MASK		0x0000004000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT5_SHFT		39
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT5_MASK		0x0000008000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT6_SHFT		40
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT6_MASK		0x0000010000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT7_SHFT		41
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT7_MASK		0x0000020000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT8_SHFT		42
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT8_MASK		0x0000040000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT9_SHFT		43
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT9_MASK		0x0000080000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT10_SHFT		44
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT10_MASK		0x0000100000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT11_SHFT		45
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT11_MASK		0x0000200000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT12_SHFT		46
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT12_MASK		0x0000400000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT13_SHFT		47
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT13_MASK		0x0000800000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT14_SHFT		48
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT14_MASK		0x0001000000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT15_SHFT		49
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT15_MASK		0x0002000000000000UL
+#define UV2H_EVENT_OCCURRED1_RTC_INTERVAL_INT_SHFT	50
+#define UV2H_EVENT_OCCURRED1_RTC_INTERVAL_INT_MASK	0x0004000000000000UL
+#define UV2H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_SHFT	51
+#define UV2H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_MASK	0x0008000000000000UL
+
+#define UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK (				\
+	is_uv(UV5) ? 0x0000000000000002UL :				\
+	0)
+#define UVH_EVENT_OCCURRED1_EXTIO_INT0_SHFT (				\
+	is_uv(UV5) ? 1 :						\
+	-1)
+
+union uvyh_event_occurred1_u {
+	unsigned long	v;
+
+	/* UVYH common struct */
+	struct uvyh_event_occurred1_s {
+		unsigned long	ipi_int:1;			/* RW */
+		unsigned long	extio_int0:1;			/* RW */
+		unsigned long	extio_int1:1;			/* RW */
+		unsigned long	extio_int2:1;			/* RW */
+		unsigned long	extio_int3:1;			/* RW */
+		unsigned long	profile_int:1;			/* RW */
+		unsigned long	bau_data:1;			/* RW */
+		unsigned long	proc_general:1;			/* RW */
+		unsigned long	xh_tlb_int0:1;			/* RW */
+		unsigned long	xh_tlb_int1:1;			/* RW */
+		unsigned long	xh_tlb_int2:1;			/* RW */
+		unsigned long	xh_tlb_int3:1;			/* RW */
+		unsigned long	xh_tlb_int4:1;			/* RW */
+		unsigned long	xh_tlb_int5:1;			/* RW */
+		unsigned long	rdm_tlb_int0:1;			/* RW */
+		unsigned long	rdm_tlb_int1:1;			/* RW */
+		unsigned long	rdm_tlb_int2:1;			/* RW */
+		unsigned long	rdm_tlb_int3:1;			/* RW */
+		unsigned long	rdm_tlb_int4:1;			/* RW */
+		unsigned long	rdm_tlb_int5:1;			/* RW */
+		unsigned long	rdm_tlb_int6:1;			/* RW */
+		unsigned long	rdm_tlb_int7:1;			/* RW */
+		unsigned long	rdm_tlb_int8:1;			/* RW */
+		unsigned long	rdm_tlb_int9:1;			/* RW */
+		unsigned long	rdm_tlb_int10:1;		/* RW */
+		unsigned long	rdm_tlb_int11:1;		/* RW */
+		unsigned long	rdm_tlb_int12:1;		/* RW */
+		unsigned long	rdm_tlb_int13:1;		/* RW */
+		unsigned long	rdm_tlb_int14:1;		/* RW */
+		unsigned long	rdm_tlb_int15:1;		/* RW */
+		unsigned long	rdm_tlb_int16:1;		/* RW */
+		unsigned long	rdm_tlb_int17:1;		/* RW */
+		unsigned long	rdm_tlb_int18:1;		/* RW */
+		unsigned long	rdm_tlb_int19:1;		/* RW */
+		unsigned long	rdm_tlb_int20:1;		/* RW */
+		unsigned long	rdm_tlb_int21:1;		/* RW */
+		unsigned long	rdm_tlb_int22:1;		/* RW */
+		unsigned long	rdm_tlb_int23:1;		/* RW */
+		unsigned long	rsvd_38_63:26;
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_event_occurred1_s {
+		unsigned long	ipi_int:1;			/* RW */
+		unsigned long	extio_int0:1;			/* RW */
+		unsigned long	extio_int1:1;			/* RW */
+		unsigned long	extio_int2:1;			/* RW */
+		unsigned long	extio_int3:1;			/* RW */
+		unsigned long	profile_int:1;			/* RW */
+		unsigned long	bau_data:1;			/* RW */
+		unsigned long	proc_general:1;			/* RW */
+		unsigned long	xh_tlb_int0:1;			/* RW */
+		unsigned long	xh_tlb_int1:1;			/* RW */
+		unsigned long	xh_tlb_int2:1;			/* RW */
+		unsigned long	xh_tlb_int3:1;			/* RW */
+		unsigned long	xh_tlb_int4:1;			/* RW */
+		unsigned long	xh_tlb_int5:1;			/* RW */
+		unsigned long	rdm_tlb_int0:1;			/* RW */
+		unsigned long	rdm_tlb_int1:1;			/* RW */
+		unsigned long	rdm_tlb_int2:1;			/* RW */
+		unsigned long	rdm_tlb_int3:1;			/* RW */
+		unsigned long	rdm_tlb_int4:1;			/* RW */
+		unsigned long	rdm_tlb_int5:1;			/* RW */
+		unsigned long	rdm_tlb_int6:1;			/* RW */
+		unsigned long	rdm_tlb_int7:1;			/* RW */
+		unsigned long	rdm_tlb_int8:1;			/* RW */
+		unsigned long	rdm_tlb_int9:1;			/* RW */
+		unsigned long	rdm_tlb_int10:1;		/* RW */
+		unsigned long	rdm_tlb_int11:1;		/* RW */
+		unsigned long	rdm_tlb_int12:1;		/* RW */
+		unsigned long	rdm_tlb_int13:1;		/* RW */
+		unsigned long	rdm_tlb_int14:1;		/* RW */
+		unsigned long	rdm_tlb_int15:1;		/* RW */
+		unsigned long	rdm_tlb_int16:1;		/* RW */
+		unsigned long	rdm_tlb_int17:1;		/* RW */
+		unsigned long	rdm_tlb_int18:1;		/* RW */
+		unsigned long	rdm_tlb_int19:1;		/* RW */
+		unsigned long	rdm_tlb_int20:1;		/* RW */
+		unsigned long	rdm_tlb_int21:1;		/* RW */
+		unsigned long	rdm_tlb_int22:1;		/* RW */
+		unsigned long	rdm_tlb_int23:1;		/* RW */
+		unsigned long	rsvd_38_63:26;
+	} s5;
+
+	/* UV4 unique struct */
+	struct uv4h_event_occurred1_s {
+		unsigned long	profile_int:1;			/* RW */
+		unsigned long	bau_data:1;			/* RW */
+		unsigned long	proc_general:1;			/* RW */
+		unsigned long	gr0_tlb_int0:1;			/* RW */
+		unsigned long	gr0_tlb_int1:1;			/* RW */
+		unsigned long	gr0_tlb_int2:1;			/* RW */
+		unsigned long	gr0_tlb_int3:1;			/* RW */
+		unsigned long	gr0_tlb_int4:1;			/* RW */
+		unsigned long	gr0_tlb_int5:1;			/* RW */
+		unsigned long	gr0_tlb_int6:1;			/* RW */
+		unsigned long	gr0_tlb_int7:1;			/* RW */
+		unsigned long	gr0_tlb_int8:1;			/* RW */
+		unsigned long	gr0_tlb_int9:1;			/* RW */
+		unsigned long	gr0_tlb_int10:1;		/* RW */
+		unsigned long	gr0_tlb_int11:1;		/* RW */
+		unsigned long	gr0_tlb_int12:1;		/* RW */
+		unsigned long	gr0_tlb_int13:1;		/* RW */
+		unsigned long	gr0_tlb_int14:1;		/* RW */
+		unsigned long	gr0_tlb_int15:1;		/* RW */
+		unsigned long	gr0_tlb_int16:1;		/* RW */
+		unsigned long	gr0_tlb_int17:1;		/* RW */
+		unsigned long	gr0_tlb_int18:1;		/* RW */
+		unsigned long	gr0_tlb_int19:1;		/* RW */
+		unsigned long	gr0_tlb_int20:1;		/* RW */
+		unsigned long	gr0_tlb_int21:1;		/* RW */
+		unsigned long	gr0_tlb_int22:1;		/* RW */
+		unsigned long	gr0_tlb_int23:1;		/* RW */
+		unsigned long	gr1_tlb_int0:1;			/* RW */
+		unsigned long	gr1_tlb_int1:1;			/* RW */
+		unsigned long	gr1_tlb_int2:1;			/* RW */
+		unsigned long	gr1_tlb_int3:1;			/* RW */
+		unsigned long	gr1_tlb_int4:1;			/* RW */
+		unsigned long	gr1_tlb_int5:1;			/* RW */
+		unsigned long	gr1_tlb_int6:1;			/* RW */
+		unsigned long	gr1_tlb_int7:1;			/* RW */
+		unsigned long	gr1_tlb_int8:1;			/* RW */
+		unsigned long	gr1_tlb_int9:1;			/* RW */
+		unsigned long	gr1_tlb_int10:1;		/* RW */
+		unsigned long	gr1_tlb_int11:1;		/* RW */
+		unsigned long	gr1_tlb_int12:1;		/* RW */
+		unsigned long	gr1_tlb_int13:1;		/* RW */
+		unsigned long	gr1_tlb_int14:1;		/* RW */
+		unsigned long	gr1_tlb_int15:1;		/* RW */
+		unsigned long	gr1_tlb_int16:1;		/* RW */
+		unsigned long	gr1_tlb_int17:1;		/* RW */
+		unsigned long	gr1_tlb_int18:1;		/* RW */
+		unsigned long	gr1_tlb_int19:1;		/* RW */
+		unsigned long	gr1_tlb_int20:1;		/* RW */
+		unsigned long	gr1_tlb_int21:1;		/* RW */
+		unsigned long	gr1_tlb_int22:1;		/* RW */
+		unsigned long	gr1_tlb_int23:1;		/* RW */
+		unsigned long	rsvd_51_63:13;
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_event_occurred1_s {
+		unsigned long	bau_data:1;			/* RW */
+		unsigned long	power_management_req:1;		/* RW */
+		unsigned long	message_accelerator_int0:1;	/* RW */
+		unsigned long	message_accelerator_int1:1;	/* RW */
+		unsigned long	message_accelerator_int2:1;	/* RW */
+		unsigned long	message_accelerator_int3:1;	/* RW */
+		unsigned long	message_accelerator_int4:1;	/* RW */
+		unsigned long	message_accelerator_int5:1;	/* RW */
+		unsigned long	message_accelerator_int6:1;	/* RW */
+		unsigned long	message_accelerator_int7:1;	/* RW */
+		unsigned long	message_accelerator_int8:1;	/* RW */
+		unsigned long	message_accelerator_int9:1;	/* RW */
+		unsigned long	message_accelerator_int10:1;	/* RW */
+		unsigned long	message_accelerator_int11:1;	/* RW */
+		unsigned long	message_accelerator_int12:1;	/* RW */
+		unsigned long	message_accelerator_int13:1;	/* RW */
+		unsigned long	message_accelerator_int14:1;	/* RW */
+		unsigned long	message_accelerator_int15:1;	/* RW */
+		unsigned long	gr0_tlb_int0:1;			/* RW */
+		unsigned long	gr0_tlb_int1:1;			/* RW */
+		unsigned long	gr0_tlb_int2:1;			/* RW */
+		unsigned long	gr0_tlb_int3:1;			/* RW */
+		unsigned long	gr0_tlb_int4:1;			/* RW */
+		unsigned long	gr0_tlb_int5:1;			/* RW */
+		unsigned long	gr0_tlb_int6:1;			/* RW */
+		unsigned long	gr0_tlb_int7:1;			/* RW */
+		unsigned long	gr0_tlb_int8:1;			/* RW */
+		unsigned long	gr0_tlb_int9:1;			/* RW */
+		unsigned long	gr0_tlb_int10:1;		/* RW */
+		unsigned long	gr0_tlb_int11:1;		/* RW */
+		unsigned long	gr0_tlb_int12:1;		/* RW */
+		unsigned long	gr0_tlb_int13:1;		/* RW */
+		unsigned long	gr0_tlb_int14:1;		/* RW */
+		unsigned long	gr0_tlb_int15:1;		/* RW */
+		unsigned long	gr1_tlb_int0:1;			/* RW */
+		unsigned long	gr1_tlb_int1:1;			/* RW */
+		unsigned long	gr1_tlb_int2:1;			/* RW */
+		unsigned long	gr1_tlb_int3:1;			/* RW */
+		unsigned long	gr1_tlb_int4:1;			/* RW */
+		unsigned long	gr1_tlb_int5:1;			/* RW */
+		unsigned long	gr1_tlb_int6:1;			/* RW */
+		unsigned long	gr1_tlb_int7:1;			/* RW */
+		unsigned long	gr1_tlb_int8:1;			/* RW */
+		unsigned long	gr1_tlb_int9:1;			/* RW */
+		unsigned long	gr1_tlb_int10:1;		/* RW */
+		unsigned long	gr1_tlb_int11:1;		/* RW */
+		unsigned long	gr1_tlb_int12:1;		/* RW */
+		unsigned long	gr1_tlb_int13:1;		/* RW */
+		unsigned long	gr1_tlb_int14:1;		/* RW */
+		unsigned long	gr1_tlb_int15:1;		/* RW */
+		unsigned long	rtc_interval_int:1;		/* RW */
+		unsigned long	bau_dashboard_int:1;		/* RW */
+		unsigned long	rsvd_52_63:12;
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_event_occurred1_s {
+		unsigned long	bau_data:1;			/* RW */
+		unsigned long	power_management_req:1;		/* RW */
+		unsigned long	message_accelerator_int0:1;	/* RW */
+		unsigned long	message_accelerator_int1:1;	/* RW */
+		unsigned long	message_accelerator_int2:1;	/* RW */
+		unsigned long	message_accelerator_int3:1;	/* RW */
+		unsigned long	message_accelerator_int4:1;	/* RW */
+		unsigned long	message_accelerator_int5:1;	/* RW */
+		unsigned long	message_accelerator_int6:1;	/* RW */
+		unsigned long	message_accelerator_int7:1;	/* RW */
+		unsigned long	message_accelerator_int8:1;	/* RW */
+		unsigned long	message_accelerator_int9:1;	/* RW */
+		unsigned long	message_accelerator_int10:1;	/* RW */
+		unsigned long	message_accelerator_int11:1;	/* RW */
+		unsigned long	message_accelerator_int12:1;	/* RW */
+		unsigned long	message_accelerator_int13:1;	/* RW */
+		unsigned long	message_accelerator_int14:1;	/* RW */
+		unsigned long	message_accelerator_int15:1;	/* RW */
+		unsigned long	gr0_tlb_int0:1;			/* RW */
+		unsigned long	gr0_tlb_int1:1;			/* RW */
+		unsigned long	gr0_tlb_int2:1;			/* RW */
+		unsigned long	gr0_tlb_int3:1;			/* RW */
+		unsigned long	gr0_tlb_int4:1;			/* RW */
+		unsigned long	gr0_tlb_int5:1;			/* RW */
+		unsigned long	gr0_tlb_int6:1;			/* RW */
+		unsigned long	gr0_tlb_int7:1;			/* RW */
+		unsigned long	gr0_tlb_int8:1;			/* RW */
+		unsigned long	gr0_tlb_int9:1;			/* RW */
+		unsigned long	gr0_tlb_int10:1;		/* RW */
+		unsigned long	gr0_tlb_int11:1;		/* RW */
+		unsigned long	gr0_tlb_int12:1;		/* RW */
+		unsigned long	gr0_tlb_int13:1;		/* RW */
+		unsigned long	gr0_tlb_int14:1;		/* RW */
+		unsigned long	gr0_tlb_int15:1;		/* RW */
+		unsigned long	gr1_tlb_int0:1;			/* RW */
+		unsigned long	gr1_tlb_int1:1;			/* RW */
+		unsigned long	gr1_tlb_int2:1;			/* RW */
+		unsigned long	gr1_tlb_int3:1;			/* RW */
+		unsigned long	gr1_tlb_int4:1;			/* RW */
+		unsigned long	gr1_tlb_int5:1;			/* RW */
+		unsigned long	gr1_tlb_int6:1;			/* RW */
+		unsigned long	gr1_tlb_int7:1;			/* RW */
+		unsigned long	gr1_tlb_int8:1;			/* RW */
+		unsigned long	gr1_tlb_int9:1;			/* RW */
+		unsigned long	gr1_tlb_int10:1;		/* RW */
+		unsigned long	gr1_tlb_int11:1;		/* RW */
+		unsigned long	gr1_tlb_int12:1;		/* RW */
+		unsigned long	gr1_tlb_int13:1;		/* RW */
+		unsigned long	gr1_tlb_int14:1;		/* RW */
+		unsigned long	gr1_tlb_int15:1;		/* RW */
+		unsigned long	rtc_interval_int:1;		/* RW */
+		unsigned long	bau_dashboard_int:1;		/* RW */
+		unsigned long	rsvd_52_63:12;
+	} s2;
+};
+
+/* ========================================================================= */
+/*                        UVH_EVENT_OCCURRED1_ALIAS                          */
+/* ========================================================================= */
+#define UVH_EVENT_OCCURRED1_ALIAS 0x70088UL
+
+
+/* ========================================================================= */
+/*                           UVH_EVENT_OCCURRED2                             */
+/* ========================================================================= */
+#define UVH_EVENT_OCCURRED2 0x70100UL
+
+
+
+/* UVYH common defines */
+#define UVYH_EVENT_OCCURRED2_RTC_INTERVAL_INT_SHFT	0
+#define UVYH_EVENT_OCCURRED2_RTC_INTERVAL_INT_MASK	0x0000000000000001UL
+#define UVYH_EVENT_OCCURRED2_BAU_DASHBOARD_INT_SHFT	1
+#define UVYH_EVENT_OCCURRED2_BAU_DASHBOARD_INT_MASK	0x0000000000000002UL
+#define UVYH_EVENT_OCCURRED2_RTC_0_SHFT			2
+#define UVYH_EVENT_OCCURRED2_RTC_0_MASK			0x0000000000000004UL
+#define UVYH_EVENT_OCCURRED2_RTC_1_SHFT			3
+#define UVYH_EVENT_OCCURRED2_RTC_1_MASK			0x0000000000000008UL
+#define UVYH_EVENT_OCCURRED2_RTC_2_SHFT			4
+#define UVYH_EVENT_OCCURRED2_RTC_2_MASK			0x0000000000000010UL
+#define UVYH_EVENT_OCCURRED2_RTC_3_SHFT			5
+#define UVYH_EVENT_OCCURRED2_RTC_3_MASK			0x0000000000000020UL
+#define UVYH_EVENT_OCCURRED2_RTC_4_SHFT			6
+#define UVYH_EVENT_OCCURRED2_RTC_4_MASK			0x0000000000000040UL
+#define UVYH_EVENT_OCCURRED2_RTC_5_SHFT			7
+#define UVYH_EVENT_OCCURRED2_RTC_5_MASK			0x0000000000000080UL
+#define UVYH_EVENT_OCCURRED2_RTC_6_SHFT			8
+#define UVYH_EVENT_OCCURRED2_RTC_6_MASK			0x0000000000000100UL
+#define UVYH_EVENT_OCCURRED2_RTC_7_SHFT			9
+#define UVYH_EVENT_OCCURRED2_RTC_7_MASK			0x0000000000000200UL
+#define UVYH_EVENT_OCCURRED2_RTC_8_SHFT			10
+#define UVYH_EVENT_OCCURRED2_RTC_8_MASK			0x0000000000000400UL
+#define UVYH_EVENT_OCCURRED2_RTC_9_SHFT			11
+#define UVYH_EVENT_OCCURRED2_RTC_9_MASK			0x0000000000000800UL
+#define UVYH_EVENT_OCCURRED2_RTC_10_SHFT		12
+#define UVYH_EVENT_OCCURRED2_RTC_10_MASK		0x0000000000001000UL
+#define UVYH_EVENT_OCCURRED2_RTC_11_SHFT		13
+#define UVYH_EVENT_OCCURRED2_RTC_11_MASK		0x0000000000002000UL
+#define UVYH_EVENT_OCCURRED2_RTC_12_SHFT		14
+#define UVYH_EVENT_OCCURRED2_RTC_12_MASK		0x0000000000004000UL
+#define UVYH_EVENT_OCCURRED2_RTC_13_SHFT		15
+#define UVYH_EVENT_OCCURRED2_RTC_13_MASK		0x0000000000008000UL
+#define UVYH_EVENT_OCCURRED2_RTC_14_SHFT		16
+#define UVYH_EVENT_OCCURRED2_RTC_14_MASK		0x0000000000010000UL
+#define UVYH_EVENT_OCCURRED2_RTC_15_SHFT		17
+#define UVYH_EVENT_OCCURRED2_RTC_15_MASK		0x0000000000020000UL
+#define UVYH_EVENT_OCCURRED2_RTC_16_SHFT		18
+#define UVYH_EVENT_OCCURRED2_RTC_16_MASK		0x0000000000040000UL
+#define UVYH_EVENT_OCCURRED2_RTC_17_SHFT		19
+#define UVYH_EVENT_OCCURRED2_RTC_17_MASK		0x0000000000080000UL
+#define UVYH_EVENT_OCCURRED2_RTC_18_SHFT		20
+#define UVYH_EVENT_OCCURRED2_RTC_18_MASK		0x0000000000100000UL
+#define UVYH_EVENT_OCCURRED2_RTC_19_SHFT		21
+#define UVYH_EVENT_OCCURRED2_RTC_19_MASK		0x0000000000200000UL
+#define UVYH_EVENT_OCCURRED2_RTC_20_SHFT		22
+#define UVYH_EVENT_OCCURRED2_RTC_20_MASK		0x0000000000400000UL
+#define UVYH_EVENT_OCCURRED2_RTC_21_SHFT		23
+#define UVYH_EVENT_OCCURRED2_RTC_21_MASK		0x0000000000800000UL
+#define UVYH_EVENT_OCCURRED2_RTC_22_SHFT		24
+#define UVYH_EVENT_OCCURRED2_RTC_22_MASK		0x0000000001000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_23_SHFT		25
+#define UVYH_EVENT_OCCURRED2_RTC_23_MASK		0x0000000002000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_24_SHFT		26
+#define UVYH_EVENT_OCCURRED2_RTC_24_MASK		0x0000000004000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_25_SHFT		27
+#define UVYH_EVENT_OCCURRED2_RTC_25_MASK		0x0000000008000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_26_SHFT		28
+#define UVYH_EVENT_OCCURRED2_RTC_26_MASK		0x0000000010000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_27_SHFT		29
+#define UVYH_EVENT_OCCURRED2_RTC_27_MASK		0x0000000020000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_28_SHFT		30
+#define UVYH_EVENT_OCCURRED2_RTC_28_MASK		0x0000000040000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_29_SHFT		31
+#define UVYH_EVENT_OCCURRED2_RTC_29_MASK		0x0000000080000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_30_SHFT		32
+#define UVYH_EVENT_OCCURRED2_RTC_30_MASK		0x0000000100000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_31_SHFT		33
+#define UVYH_EVENT_OCCURRED2_RTC_31_MASK		0x0000000200000000UL
+
+/* UV4 unique defines */
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_SHFT 0
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000001UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_SHFT 1
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000002UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_SHFT 2
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000004UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_SHFT 3
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000008UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_SHFT 4
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000010UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_SHFT 5
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000020UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_SHFT 6
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000040UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_SHFT 7
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000080UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_SHFT 8
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000100UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_SHFT 9
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000200UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_SHFT 10
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000000400UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_SHFT 11
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000000800UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_SHFT 12
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000001000UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_SHFT 13
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000002000UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_SHFT 14
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000004000UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_SHFT 15
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000008000UL
+#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_SHFT	16
+#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_MASK	0x0000000000010000UL
+#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_SHFT	17
+#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_MASK	0x0000000000020000UL
+#define UV4H_EVENT_OCCURRED2_RTC_0_SHFT			18
+#define UV4H_EVENT_OCCURRED2_RTC_0_MASK			0x0000000000040000UL
+#define UV4H_EVENT_OCCURRED2_RTC_1_SHFT			19
+#define UV4H_EVENT_OCCURRED2_RTC_1_MASK			0x0000000000080000UL
+#define UV4H_EVENT_OCCURRED2_RTC_2_SHFT			20
+#define UV4H_EVENT_OCCURRED2_RTC_2_MASK			0x0000000000100000UL
+#define UV4H_EVENT_OCCURRED2_RTC_3_SHFT			21
+#define UV4H_EVENT_OCCURRED2_RTC_3_MASK			0x0000000000200000UL
+#define UV4H_EVENT_OCCURRED2_RTC_4_SHFT			22
+#define UV4H_EVENT_OCCURRED2_RTC_4_MASK			0x0000000000400000UL
+#define UV4H_EVENT_OCCURRED2_RTC_5_SHFT			23
+#define UV4H_EVENT_OCCURRED2_RTC_5_MASK			0x0000000000800000UL
+#define UV4H_EVENT_OCCURRED2_RTC_6_SHFT			24
+#define UV4H_EVENT_OCCURRED2_RTC_6_MASK			0x0000000001000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_7_SHFT			25
+#define UV4H_EVENT_OCCURRED2_RTC_7_MASK			0x0000000002000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_8_SHFT			26
+#define UV4H_EVENT_OCCURRED2_RTC_8_MASK			0x0000000004000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_9_SHFT			27
+#define UV4H_EVENT_OCCURRED2_RTC_9_MASK			0x0000000008000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_10_SHFT		28
+#define UV4H_EVENT_OCCURRED2_RTC_10_MASK		0x0000000010000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_11_SHFT		29
+#define UV4H_EVENT_OCCURRED2_RTC_11_MASK		0x0000000020000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_12_SHFT		30
+#define UV4H_EVENT_OCCURRED2_RTC_12_MASK		0x0000000040000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_13_SHFT		31
+#define UV4H_EVENT_OCCURRED2_RTC_13_MASK		0x0000000080000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_14_SHFT		32
+#define UV4H_EVENT_OCCURRED2_RTC_14_MASK		0x0000000100000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_15_SHFT		33
+#define UV4H_EVENT_OCCURRED2_RTC_15_MASK		0x0000000200000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_16_SHFT		34
+#define UV4H_EVENT_OCCURRED2_RTC_16_MASK		0x0000000400000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_17_SHFT		35
+#define UV4H_EVENT_OCCURRED2_RTC_17_MASK		0x0000000800000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_18_SHFT		36
+#define UV4H_EVENT_OCCURRED2_RTC_18_MASK		0x0000001000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_19_SHFT		37
+#define UV4H_EVENT_OCCURRED2_RTC_19_MASK		0x0000002000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_20_SHFT		38
+#define UV4H_EVENT_OCCURRED2_RTC_20_MASK		0x0000004000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_21_SHFT		39
+#define UV4H_EVENT_OCCURRED2_RTC_21_MASK		0x0000008000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_22_SHFT		40
+#define UV4H_EVENT_OCCURRED2_RTC_22_MASK		0x0000010000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_23_SHFT		41
+#define UV4H_EVENT_OCCURRED2_RTC_23_MASK		0x0000020000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_24_SHFT		42
+#define UV4H_EVENT_OCCURRED2_RTC_24_MASK		0x0000040000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_25_SHFT		43
+#define UV4H_EVENT_OCCURRED2_RTC_25_MASK		0x0000080000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_26_SHFT		44
+#define UV4H_EVENT_OCCURRED2_RTC_26_MASK		0x0000100000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_27_SHFT		45
+#define UV4H_EVENT_OCCURRED2_RTC_27_MASK		0x0000200000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_28_SHFT		46
+#define UV4H_EVENT_OCCURRED2_RTC_28_MASK		0x0000400000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_29_SHFT		47
+#define UV4H_EVENT_OCCURRED2_RTC_29_MASK		0x0000800000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_30_SHFT		48
+#define UV4H_EVENT_OCCURRED2_RTC_30_MASK		0x0001000000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_31_SHFT		49
+#define UV4H_EVENT_OCCURRED2_RTC_31_MASK		0x0002000000000000UL
+
+/* UV3 unique defines */
+#define UV3H_EVENT_OCCURRED2_RTC_0_SHFT			0
+#define UV3H_EVENT_OCCURRED2_RTC_0_MASK			0x0000000000000001UL
+#define UV3H_EVENT_OCCURRED2_RTC_1_SHFT			1
+#define UV3H_EVENT_OCCURRED2_RTC_1_MASK			0x0000000000000002UL
+#define UV3H_EVENT_OCCURRED2_RTC_2_SHFT			2
+#define UV3H_EVENT_OCCURRED2_RTC_2_MASK			0x0000000000000004UL
+#define UV3H_EVENT_OCCURRED2_RTC_3_SHFT			3
+#define UV3H_EVENT_OCCURRED2_RTC_3_MASK			0x0000000000000008UL
+#define UV3H_EVENT_OCCURRED2_RTC_4_SHFT			4
+#define UV3H_EVENT_OCCURRED2_RTC_4_MASK			0x0000000000000010UL
+#define UV3H_EVENT_OCCURRED2_RTC_5_SHFT			5
+#define UV3H_EVENT_OCCURRED2_RTC_5_MASK			0x0000000000000020UL
+#define UV3H_EVENT_OCCURRED2_RTC_6_SHFT			6
+#define UV3H_EVENT_OCCURRED2_RTC_6_MASK			0x0000000000000040UL
+#define UV3H_EVENT_OCCURRED2_RTC_7_SHFT			7
+#define UV3H_EVENT_OCCURRED2_RTC_7_MASK			0x0000000000000080UL
+#define UV3H_EVENT_OCCURRED2_RTC_8_SHFT			8
+#define UV3H_EVENT_OCCURRED2_RTC_8_MASK			0x0000000000000100UL
+#define UV3H_EVENT_OCCURRED2_RTC_9_SHFT			9
+#define UV3H_EVENT_OCCURRED2_RTC_9_MASK			0x0000000000000200UL
+#define UV3H_EVENT_OCCURRED2_RTC_10_SHFT		10
+#define UV3H_EVENT_OCCURRED2_RTC_10_MASK		0x0000000000000400UL
+#define UV3H_EVENT_OCCURRED2_RTC_11_SHFT		11
+#define UV3H_EVENT_OCCURRED2_RTC_11_MASK		0x0000000000000800UL
+#define UV3H_EVENT_OCCURRED2_RTC_12_SHFT		12
+#define UV3H_EVENT_OCCURRED2_RTC_12_MASK		0x0000000000001000UL
+#define UV3H_EVENT_OCCURRED2_RTC_13_SHFT		13
+#define UV3H_EVENT_OCCURRED2_RTC_13_MASK		0x0000000000002000UL
+#define UV3H_EVENT_OCCURRED2_RTC_14_SHFT		14
+#define UV3H_EVENT_OCCURRED2_RTC_14_MASK		0x0000000000004000UL
+#define UV3H_EVENT_OCCURRED2_RTC_15_SHFT		15
+#define UV3H_EVENT_OCCURRED2_RTC_15_MASK		0x0000000000008000UL
+#define UV3H_EVENT_OCCURRED2_RTC_16_SHFT		16
+#define UV3H_EVENT_OCCURRED2_RTC_16_MASK		0x0000000000010000UL
+#define UV3H_EVENT_OCCURRED2_RTC_17_SHFT		17
+#define UV3H_EVENT_OCCURRED2_RTC_17_MASK		0x0000000000020000UL
+#define UV3H_EVENT_OCCURRED2_RTC_18_SHFT		18
+#define UV3H_EVENT_OCCURRED2_RTC_18_MASK		0x0000000000040000UL
+#define UV3H_EVENT_OCCURRED2_RTC_19_SHFT		19
+#define UV3H_EVENT_OCCURRED2_RTC_19_MASK		0x0000000000080000UL
+#define UV3H_EVENT_OCCURRED2_RTC_20_SHFT		20
+#define UV3H_EVENT_OCCURRED2_RTC_20_MASK		0x0000000000100000UL
+#define UV3H_EVENT_OCCURRED2_RTC_21_SHFT		21
+#define UV3H_EVENT_OCCURRED2_RTC_21_MASK		0x0000000000200000UL
+#define UV3H_EVENT_OCCURRED2_RTC_22_SHFT		22
+#define UV3H_EVENT_OCCURRED2_RTC_22_MASK		0x0000000000400000UL
+#define UV3H_EVENT_OCCURRED2_RTC_23_SHFT		23
+#define UV3H_EVENT_OCCURRED2_RTC_23_MASK		0x0000000000800000UL
+#define UV3H_EVENT_OCCURRED2_RTC_24_SHFT		24
+#define UV3H_EVENT_OCCURRED2_RTC_24_MASK		0x0000000001000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_25_SHFT		25
+#define UV3H_EVENT_OCCURRED2_RTC_25_MASK		0x0000000002000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_26_SHFT		26
+#define UV3H_EVENT_OCCURRED2_RTC_26_MASK		0x0000000004000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_27_SHFT		27
+#define UV3H_EVENT_OCCURRED2_RTC_27_MASK		0x0000000008000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_28_SHFT		28
+#define UV3H_EVENT_OCCURRED2_RTC_28_MASK		0x0000000010000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_29_SHFT		29
+#define UV3H_EVENT_OCCURRED2_RTC_29_MASK		0x0000000020000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_30_SHFT		30
+#define UV3H_EVENT_OCCURRED2_RTC_30_MASK		0x0000000040000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_31_SHFT		31
+#define UV3H_EVENT_OCCURRED2_RTC_31_MASK		0x0000000080000000UL
+
+/* UV2 unique defines */
+#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT			0
+#define UV2H_EVENT_OCCURRED2_RTC_0_MASK			0x0000000000000001UL
+#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT			1
+#define UV2H_EVENT_OCCURRED2_RTC_1_MASK			0x0000000000000002UL
+#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT			2
+#define UV2H_EVENT_OCCURRED2_RTC_2_MASK			0x0000000000000004UL
+#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT			3
+#define UV2H_EVENT_OCCURRED2_RTC_3_MASK			0x0000000000000008UL
+#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT			4
+#define UV2H_EVENT_OCCURRED2_RTC_4_MASK			0x0000000000000010UL
+#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT			5
+#define UV2H_EVENT_OCCURRED2_RTC_5_MASK			0x0000000000000020UL
+#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT			6
+#define UV2H_EVENT_OCCURRED2_RTC_6_MASK			0x0000000000000040UL
+#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT			7
+#define UV2H_EVENT_OCCURRED2_RTC_7_MASK			0x0000000000000080UL
+#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT			8
+#define UV2H_EVENT_OCCURRED2_RTC_8_MASK			0x0000000000000100UL
+#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT			9
+#define UV2H_EVENT_OCCURRED2_RTC_9_MASK			0x0000000000000200UL
+#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT		10
+#define UV2H_EVENT_OCCURRED2_RTC_10_MASK		0x0000000000000400UL
+#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT		11
+#define UV2H_EVENT_OCCURRED2_RTC_11_MASK		0x0000000000000800UL
+#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT		12
+#define UV2H_EVENT_OCCURRED2_RTC_12_MASK		0x0000000000001000UL
+#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT		13
+#define UV2H_EVENT_OCCURRED2_RTC_13_MASK		0x0000000000002000UL
+#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT		14
+#define UV2H_EVENT_OCCURRED2_RTC_14_MASK		0x0000000000004000UL
+#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT		15
+#define UV2H_EVENT_OCCURRED2_RTC_15_MASK		0x0000000000008000UL
+#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT		16
+#define UV2H_EVENT_OCCURRED2_RTC_16_MASK		0x0000000000010000UL
+#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT		17
+#define UV2H_EVENT_OCCURRED2_RTC_17_MASK		0x0000000000020000UL
+#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT		18
+#define UV2H_EVENT_OCCURRED2_RTC_18_MASK		0x0000000000040000UL
+#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT		19
+#define UV2H_EVENT_OCCURRED2_RTC_19_MASK		0x0000000000080000UL
+#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT		20
+#define UV2H_EVENT_OCCURRED2_RTC_20_MASK		0x0000000000100000UL
+#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT		21
+#define UV2H_EVENT_OCCURRED2_RTC_21_MASK		0x0000000000200000UL
+#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT		22
+#define UV2H_EVENT_OCCURRED2_RTC_22_MASK		0x0000000000400000UL
+#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT		23
+#define UV2H_EVENT_OCCURRED2_RTC_23_MASK		0x0000000000800000UL
+#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT		24
+#define UV2H_EVENT_OCCURRED2_RTC_24_MASK		0x0000000001000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT		25
+#define UV2H_EVENT_OCCURRED2_RTC_25_MASK		0x0000000002000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT		26
+#define UV2H_EVENT_OCCURRED2_RTC_26_MASK		0x0000000004000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT		27
+#define UV2H_EVENT_OCCURRED2_RTC_27_MASK		0x0000000008000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT		28
+#define UV2H_EVENT_OCCURRED2_RTC_28_MASK		0x0000000010000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT		29
+#define UV2H_EVENT_OCCURRED2_RTC_29_MASK		0x0000000020000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT		30
+#define UV2H_EVENT_OCCURRED2_RTC_30_MASK		0x0000000040000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT		31
+#define UV2H_EVENT_OCCURRED2_RTC_31_MASK		0x0000000080000000UL
+
+#define UVH_EVENT_OCCURRED2_RTC_1_MASK (				\
+	is_uv(UV5) ? 0x0000000000000008UL :				\
+	is_uv(UV4) ? 0x0000000000080000UL :				\
+	is_uv(UV3) ? 0x0000000000000002UL :				\
+	is_uv(UV2) ? 0x0000000000000002UL :				\
+	0)
+#define UVH_EVENT_OCCURRED2_RTC_1_SHFT (				\
+	is_uv(UV5) ? 3 :						\
+	is_uv(UV4) ? 19 :						\
+	is_uv(UV3) ? 1 :						\
+	is_uv(UV2) ? 1 :						\
+	-1)
+
+union uvyh_event_occurred2_u {
+	unsigned long	v;
+
+	/* UVYH common struct */
+	struct uvyh_event_occurred2_s {
+		unsigned long	rtc_interval_int:1;		/* RW */
+		unsigned long	bau_dashboard_int:1;		/* RW */
+		unsigned long	rtc_0:1;			/* RW */
+		unsigned long	rtc_1:1;			/* RW */
+		unsigned long	rtc_2:1;			/* RW */
+		unsigned long	rtc_3:1;			/* RW */
+		unsigned long	rtc_4:1;			/* RW */
+		unsigned long	rtc_5:1;			/* RW */
+		unsigned long	rtc_6:1;			/* RW */
+		unsigned long	rtc_7:1;			/* RW */
+		unsigned long	rtc_8:1;			/* RW */
+		unsigned long	rtc_9:1;			/* RW */
+		unsigned long	rtc_10:1;			/* RW */
+		unsigned long	rtc_11:1;			/* RW */
+		unsigned long	rtc_12:1;			/* RW */
+		unsigned long	rtc_13:1;			/* RW */
+		unsigned long	rtc_14:1;			/* RW */
+		unsigned long	rtc_15:1;			/* RW */
+		unsigned long	rtc_16:1;			/* RW */
+		unsigned long	rtc_17:1;			/* RW */
+		unsigned long	rtc_18:1;			/* RW */
+		unsigned long	rtc_19:1;			/* RW */
+		unsigned long	rtc_20:1;			/* RW */
+		unsigned long	rtc_21:1;			/* RW */
+		unsigned long	rtc_22:1;			/* RW */
+		unsigned long	rtc_23:1;			/* RW */
+		unsigned long	rtc_24:1;			/* RW */
+		unsigned long	rtc_25:1;			/* RW */
+		unsigned long	rtc_26:1;			/* RW */
+		unsigned long	rtc_27:1;			/* RW */
+		unsigned long	rtc_28:1;			/* RW */
+		unsigned long	rtc_29:1;			/* RW */
+		unsigned long	rtc_30:1;			/* RW */
+		unsigned long	rtc_31:1;			/* RW */
+		unsigned long	rsvd_34_63:30;
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_event_occurred2_s {
+		unsigned long	rtc_interval_int:1;		/* RW */
+		unsigned long	bau_dashboard_int:1;		/* RW */
+		unsigned long	rtc_0:1;			/* RW */
+		unsigned long	rtc_1:1;			/* RW */
+		unsigned long	rtc_2:1;			/* RW */
+		unsigned long	rtc_3:1;			/* RW */
+		unsigned long	rtc_4:1;			/* RW */
+		unsigned long	rtc_5:1;			/* RW */
+		unsigned long	rtc_6:1;			/* RW */
+		unsigned long	rtc_7:1;			/* RW */
+		unsigned long	rtc_8:1;			/* RW */
+		unsigned long	rtc_9:1;			/* RW */
+		unsigned long	rtc_10:1;			/* RW */
+		unsigned long	rtc_11:1;			/* RW */
+		unsigned long	rtc_12:1;			/* RW */
+		unsigned long	rtc_13:1;			/* RW */
+		unsigned long	rtc_14:1;			/* RW */
+		unsigned long	rtc_15:1;			/* RW */
+		unsigned long	rtc_16:1;			/* RW */
+		unsigned long	rtc_17:1;			/* RW */
+		unsigned long	rtc_18:1;			/* RW */
+		unsigned long	rtc_19:1;			/* RW */
+		unsigned long	rtc_20:1;			/* RW */
+		unsigned long	rtc_21:1;			/* RW */
+		unsigned long	rtc_22:1;			/* RW */
+		unsigned long	rtc_23:1;			/* RW */
+		unsigned long	rtc_24:1;			/* RW */
+		unsigned long	rtc_25:1;			/* RW */
+		unsigned long	rtc_26:1;			/* RW */
+		unsigned long	rtc_27:1;			/* RW */
+		unsigned long	rtc_28:1;			/* RW */
+		unsigned long	rtc_29:1;			/* RW */
+		unsigned long	rtc_30:1;			/* RW */
+		unsigned long	rtc_31:1;			/* RW */
+		unsigned long	rsvd_34_63:30;
+	} s5;
+
+	/* UV4 unique struct */
+	struct uv4h_event_occurred2_s {
+		unsigned long	message_accelerator_int0:1;	/* RW */
+		unsigned long	message_accelerator_int1:1;	/* RW */
+		unsigned long	message_accelerator_int2:1;	/* RW */
+		unsigned long	message_accelerator_int3:1;	/* RW */
+		unsigned long	message_accelerator_int4:1;	/* RW */
+		unsigned long	message_accelerator_int5:1;	/* RW */
+		unsigned long	message_accelerator_int6:1;	/* RW */
+		unsigned long	message_accelerator_int7:1;	/* RW */
+		unsigned long	message_accelerator_int8:1;	/* RW */
+		unsigned long	message_accelerator_int9:1;	/* RW */
+		unsigned long	message_accelerator_int10:1;	/* RW */
+		unsigned long	message_accelerator_int11:1;	/* RW */
+		unsigned long	message_accelerator_int12:1;	/* RW */
+		unsigned long	message_accelerator_int13:1;	/* RW */
+		unsigned long	message_accelerator_int14:1;	/* RW */
+		unsigned long	message_accelerator_int15:1;	/* RW */
+		unsigned long	rtc_interval_int:1;		/* RW */
+		unsigned long	bau_dashboard_int:1;		/* RW */
+		unsigned long	rtc_0:1;			/* RW */
+		unsigned long	rtc_1:1;			/* RW */
+		unsigned long	rtc_2:1;			/* RW */
+		unsigned long	rtc_3:1;			/* RW */
+		unsigned long	rtc_4:1;			/* RW */
+		unsigned long	rtc_5:1;			/* RW */
+		unsigned long	rtc_6:1;			/* RW */
+		unsigned long	rtc_7:1;			/* RW */
+		unsigned long	rtc_8:1;			/* RW */
+		unsigned long	rtc_9:1;			/* RW */
+		unsigned long	rtc_10:1;			/* RW */
+		unsigned long	rtc_11:1;			/* RW */
+		unsigned long	rtc_12:1;			/* RW */
+		unsigned long	rtc_13:1;			/* RW */
+		unsigned long	rtc_14:1;			/* RW */
+		unsigned long	rtc_15:1;			/* RW */
+		unsigned long	rtc_16:1;			/* RW */
+		unsigned long	rtc_17:1;			/* RW */
+		unsigned long	rtc_18:1;			/* RW */
+		unsigned long	rtc_19:1;			/* RW */
+		unsigned long	rtc_20:1;			/* RW */
+		unsigned long	rtc_21:1;			/* RW */
+		unsigned long	rtc_22:1;			/* RW */
+		unsigned long	rtc_23:1;			/* RW */
+		unsigned long	rtc_24:1;			/* RW */
+		unsigned long	rtc_25:1;			/* RW */
+		unsigned long	rtc_26:1;			/* RW */
+		unsigned long	rtc_27:1;			/* RW */
+		unsigned long	rtc_28:1;			/* RW */
+		unsigned long	rtc_29:1;			/* RW */
+		unsigned long	rtc_30:1;			/* RW */
+		unsigned long	rtc_31:1;			/* RW */
+		unsigned long	rsvd_50_63:14;
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_event_occurred2_s {
+		unsigned long	rtc_0:1;			/* RW */
+		unsigned long	rtc_1:1;			/* RW */
+		unsigned long	rtc_2:1;			/* RW */
+		unsigned long	rtc_3:1;			/* RW */
+		unsigned long	rtc_4:1;			/* RW */
+		unsigned long	rtc_5:1;			/* RW */
+		unsigned long	rtc_6:1;			/* RW */
+		unsigned long	rtc_7:1;			/* RW */
+		unsigned long	rtc_8:1;			/* RW */
+		unsigned long	rtc_9:1;			/* RW */
+		unsigned long	rtc_10:1;			/* RW */
+		unsigned long	rtc_11:1;			/* RW */
+		unsigned long	rtc_12:1;			/* RW */
+		unsigned long	rtc_13:1;			/* RW */
+		unsigned long	rtc_14:1;			/* RW */
+		unsigned long	rtc_15:1;			/* RW */
+		unsigned long	rtc_16:1;			/* RW */
+		unsigned long	rtc_17:1;			/* RW */
+		unsigned long	rtc_18:1;			/* RW */
+		unsigned long	rtc_19:1;			/* RW */
+		unsigned long	rtc_20:1;			/* RW */
+		unsigned long	rtc_21:1;			/* RW */
+		unsigned long	rtc_22:1;			/* RW */
+		unsigned long	rtc_23:1;			/* RW */
+		unsigned long	rtc_24:1;			/* RW */
+		unsigned long	rtc_25:1;			/* RW */
+		unsigned long	rtc_26:1;			/* RW */
+		unsigned long	rtc_27:1;			/* RW */
+		unsigned long	rtc_28:1;			/* RW */
+		unsigned long	rtc_29:1;			/* RW */
+		unsigned long	rtc_30:1;			/* RW */
+		unsigned long	rtc_31:1;			/* RW */
+		unsigned long	rsvd_32_63:32;
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_event_occurred2_s {
+		unsigned long	rtc_0:1;			/* RW */
+		unsigned long	rtc_1:1;			/* RW */
+		unsigned long	rtc_2:1;			/* RW */
+		unsigned long	rtc_3:1;			/* RW */
+		unsigned long	rtc_4:1;			/* RW */
+		unsigned long	rtc_5:1;			/* RW */
+		unsigned long	rtc_6:1;			/* RW */
+		unsigned long	rtc_7:1;			/* RW */
+		unsigned long	rtc_8:1;			/* RW */
+		unsigned long	rtc_9:1;			/* RW */
+		unsigned long	rtc_10:1;			/* RW */
+		unsigned long	rtc_11:1;			/* RW */
+		unsigned long	rtc_12:1;			/* RW */
+		unsigned long	rtc_13:1;			/* RW */
+		unsigned long	rtc_14:1;			/* RW */
+		unsigned long	rtc_15:1;			/* RW */
+		unsigned long	rtc_16:1;			/* RW */
+		unsigned long	rtc_17:1;			/* RW */
+		unsigned long	rtc_18:1;			/* RW */
+		unsigned long	rtc_19:1;			/* RW */
+		unsigned long	rtc_20:1;			/* RW */
+		unsigned long	rtc_21:1;			/* RW */
+		unsigned long	rtc_22:1;			/* RW */
+		unsigned long	rtc_23:1;			/* RW */
+		unsigned long	rtc_24:1;			/* RW */
+		unsigned long	rtc_25:1;			/* RW */
+		unsigned long	rtc_26:1;			/* RW */
+		unsigned long	rtc_27:1;			/* RW */
+		unsigned long	rtc_28:1;			/* RW */
+		unsigned long	rtc_29:1;			/* RW */
+		unsigned long	rtc_30:1;			/* RW */
+		unsigned long	rtc_31:1;			/* RW */
+		unsigned long	rsvd_32_63:32;
+	} s2;
+};
+
+/* ========================================================================= */
+/*                        UVH_EVENT_OCCURRED2_ALIAS                          */
+/* ========================================================================= */
+#define UVH_EVENT_OCCURRED2_ALIAS 0x70108UL
 
 
 /* ========================================================================= */
@@ -725,53 +2135,148 @@ union uvh_event_occurred0_u {
 /* ========================================================================= */
 #define UVH_EXTIO_INT0_BROADCAST 0x61448UL
 
-#define UV1H_EXTIO_INT0_BROADCAST_32 0x3f0
-#define UV2H_EXTIO_INT0_BROADCAST_32 0x3f0
-#define UV3H_EXTIO_INT0_BROADCAST_32 0x3f0
-#define UV4H_EXTIO_INT0_BROADCAST_32 0x310
-#define UVH_EXTIO_INT0_BROADCAST_32 (					\
-	is_uv1_hub() ? UV1H_EXTIO_INT0_BROADCAST_32 :			\
-	is_uv2_hub() ? UV2H_EXTIO_INT0_BROADCAST_32 :			\
-	is_uv3_hub() ? UV3H_EXTIO_INT0_BROADCAST_32 :			\
-	/*is_uv4_hub*/ UV4H_EXTIO_INT0_BROADCAST_32)
-
+/* UVH common defines*/
 #define UVH_EXTIO_INT0_BROADCAST_ENABLE_SHFT		0
 #define UVH_EXTIO_INT0_BROADCAST_ENABLE_MASK		0x0000000000000001UL
 
 
 union uvh_extio_int0_broadcast_u {
 	unsigned long	v;
+
+	/* UVH common struct */
 	struct uvh_extio_int0_broadcast_s {
 		unsigned long	enable:1;			/* RW */
 		unsigned long	rsvd_1_63:63;
 	} s;
+
+	/* UV5 unique struct */
+	struct uv5h_extio_int0_broadcast_s {
+		unsigned long	enable:1;			/* RW */
+		unsigned long	rsvd_1_63:63;
+	} s5;
+
+	/* UV4 unique struct */
+	struct uv4h_extio_int0_broadcast_s {
+		unsigned long	enable:1;			/* RW */
+		unsigned long	rsvd_1_63:63;
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_extio_int0_broadcast_s {
+		unsigned long	enable:1;			/* RW */
+		unsigned long	rsvd_1_63:63;
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_extio_int0_broadcast_s {
+		unsigned long	enable:1;			/* RW */
+		unsigned long	rsvd_1_63:63;
+	} s2;
+};
+
+/* ========================================================================= */
+/*                          UVH_GR0_GAM_GR_CONFIG                            */
+/* ========================================================================= */
+#define UVH_GR0_GAM_GR_CONFIG (						\
+	is_uv(UV5) ? 0x600028UL :					\
+	is_uv(UV4) ? 0x600028UL :					\
+	is_uv(UV3) ? 0xc00028UL :					\
+	is_uv(UV2) ? 0xc00028UL :					\
+	0)
+
+
+
+/* UVYH common defines */
+#define UVYH_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT		10
+#define UVYH_GR0_GAM_GR_CONFIG_SUBSPACE_MASK		0x0000000000000400UL
+
+/* UV4 unique defines */
+#define UV4H_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT		10
+#define UV4H_GR0_GAM_GR_CONFIG_SUBSPACE_MASK		0x0000000000000400UL
+
+/* UV3 unique defines */
+#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_SHFT		0
+#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_MASK		0x000000000000003fUL
+#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT		10
+#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_MASK		0x0000000000000400UL
+
+/* UV2 unique defines */
+#define UV2H_GR0_GAM_GR_CONFIG_N_GR_SHFT		0
+#define UV2H_GR0_GAM_GR_CONFIG_N_GR_MASK		0x000000000000000fUL
+
+
+union uvyh_gr0_gam_gr_config_u {
+	unsigned long	v;
+
+	/* UVYH common struct */
+	struct uvyh_gr0_gam_gr_config_s {
+		unsigned long	rsvd_0_9:10;
+		unsigned long	subspace:1;			/* RW */
+		unsigned long	rsvd_11_63:53;
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_gr0_gam_gr_config_s {
+		unsigned long	rsvd_0_9:10;
+		unsigned long	subspace:1;			/* RW */
+		unsigned long	rsvd_11_63:53;
+	} s5;
+
+	/* UV4 unique struct */
+	struct uv4h_gr0_gam_gr_config_s {
+		unsigned long	rsvd_0_9:10;
+		unsigned long	subspace:1;			/* RW */
+		unsigned long	rsvd_11_63:53;
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_gr0_gam_gr_config_s {
+		unsigned long	m_skt:6;			/* RW */
+		unsigned long	undef_6_9:4;			/* Undefined */
+		unsigned long	subspace:1;			/* RW */
+		unsigned long	reserved:53;
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_gr0_gam_gr_config_s {
+		unsigned long	n_gr:4;				/* RW */
+		unsigned long	reserved:60;
+	} s2;
 };
 
 /* ========================================================================= */
 /*                         UVH_GR0_TLB_INT0_CONFIG                           */
 /* ========================================================================= */
-#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL
-
-#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT		0
-#define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT			8
-#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT		11
-#define UVH_GR0_TLB_INT0_CONFIG_STATUS_SHFT		12
-#define UVH_GR0_TLB_INT0_CONFIG_P_SHFT			13
-#define UVH_GR0_TLB_INT0_CONFIG_T_SHFT			15
-#define UVH_GR0_TLB_INT0_CONFIG_M_SHFT			16
-#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT		32
-#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_MASK		0x00000000000000ffUL
-#define UVH_GR0_TLB_INT0_CONFIG_DM_MASK			0x0000000000000700UL
-#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK		0x0000000000000800UL
-#define UVH_GR0_TLB_INT0_CONFIG_STATUS_MASK		0x0000000000001000UL
-#define UVH_GR0_TLB_INT0_CONFIG_P_MASK			0x0000000000002000UL
-#define UVH_GR0_TLB_INT0_CONFIG_T_MASK			0x0000000000008000UL
-#define UVH_GR0_TLB_INT0_CONFIG_M_MASK			0x0000000000010000UL
-#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
+#define UVH_GR0_TLB_INT0_CONFIG (					\
+	is_uv(UV4) ? 0x61b00UL :					\
+	is_uv(UV3) ? 0x61b00UL :					\
+	is_uv(UV2) ? 0x61b00UL :					\
+	uv_undefined("UVH_GR0_TLB_INT0_CONFIG"))
+
+
+/* UVXH common defines */
+#define UVXH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT		0
+#define UVXH_GR0_TLB_INT0_CONFIG_VECTOR_MASK		0x00000000000000ffUL
+#define UVXH_GR0_TLB_INT0_CONFIG_DM_SHFT		8
+#define UVXH_GR0_TLB_INT0_CONFIG_DM_MASK		0x0000000000000700UL
+#define UVXH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT		11
+#define UVXH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK		0x0000000000000800UL
+#define UVXH_GR0_TLB_INT0_CONFIG_STATUS_SHFT		12
+#define UVXH_GR0_TLB_INT0_CONFIG_STATUS_MASK		0x0000000000001000UL
+#define UVXH_GR0_TLB_INT0_CONFIG_P_SHFT			13
+#define UVXH_GR0_TLB_INT0_CONFIG_P_MASK			0x0000000000002000UL
+#define UVXH_GR0_TLB_INT0_CONFIG_T_SHFT			15
+#define UVXH_GR0_TLB_INT0_CONFIG_T_MASK			0x0000000000008000UL
+#define UVXH_GR0_TLB_INT0_CONFIG_M_SHFT			16
+#define UVXH_GR0_TLB_INT0_CONFIG_M_MASK			0x0000000000010000UL
+#define UVXH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT		32
+#define UVXH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
 
 
 union uvh_gr0_tlb_int0_config_u {
 	unsigned long	v;
+
+	/* UVH common struct */
 	struct uvh_gr0_tlb_int0_config_s {
 		unsigned long	vector_:8;			/* RW */
 		unsigned long	dm:3;				/* RW */
@@ -784,33 +2289,97 @@ union uvh_gr0_tlb_int0_config_u {
 		unsigned long	rsvd_17_31:15;
 		unsigned long	apic_id:32;			/* RW */
 	} s;
+
+	/* UVXH common struct */
+	struct uvxh_gr0_tlb_int0_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} sx;
+
+	/* UV4 unique struct */
+	struct uv4h_gr0_tlb_int0_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_gr0_tlb_int0_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_gr0_tlb_int0_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s2;
 };
 
 /* ========================================================================= */
 /*                         UVH_GR0_TLB_INT1_CONFIG                           */
 /* ========================================================================= */
-#define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL
-
-#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT		0
-#define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT			8
-#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT		11
-#define UVH_GR0_TLB_INT1_CONFIG_STATUS_SHFT		12
-#define UVH_GR0_TLB_INT1_CONFIG_P_SHFT			13
-#define UVH_GR0_TLB_INT1_CONFIG_T_SHFT			15
-#define UVH_GR0_TLB_INT1_CONFIG_M_SHFT			16
-#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT		32
-#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_MASK		0x00000000000000ffUL
-#define UVH_GR0_TLB_INT1_CONFIG_DM_MASK			0x0000000000000700UL
-#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK		0x0000000000000800UL
-#define UVH_GR0_TLB_INT1_CONFIG_STATUS_MASK		0x0000000000001000UL
-#define UVH_GR0_TLB_INT1_CONFIG_P_MASK			0x0000000000002000UL
-#define UVH_GR0_TLB_INT1_CONFIG_T_MASK			0x0000000000008000UL
-#define UVH_GR0_TLB_INT1_CONFIG_M_MASK			0x0000000000010000UL
-#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
+#define UVH_GR0_TLB_INT1_CONFIG (					\
+	is_uv(UV4) ? 0x61b40UL :					\
+	is_uv(UV3) ? 0x61b40UL :					\
+	is_uv(UV2) ? 0x61b40UL :					\
+	uv_undefined("UVH_GR0_TLB_INT1_CONFIG"))
+
+
+/* UVXH common defines */
+#define UVXH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT		0
+#define UVXH_GR0_TLB_INT1_CONFIG_VECTOR_MASK		0x00000000000000ffUL
+#define UVXH_GR0_TLB_INT1_CONFIG_DM_SHFT		8
+#define UVXH_GR0_TLB_INT1_CONFIG_DM_MASK		0x0000000000000700UL
+#define UVXH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT		11
+#define UVXH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK		0x0000000000000800UL
+#define UVXH_GR0_TLB_INT1_CONFIG_STATUS_SHFT		12
+#define UVXH_GR0_TLB_INT1_CONFIG_STATUS_MASK		0x0000000000001000UL
+#define UVXH_GR0_TLB_INT1_CONFIG_P_SHFT			13
+#define UVXH_GR0_TLB_INT1_CONFIG_P_MASK			0x0000000000002000UL
+#define UVXH_GR0_TLB_INT1_CONFIG_T_SHFT			15
+#define UVXH_GR0_TLB_INT1_CONFIG_T_MASK			0x0000000000008000UL
+#define UVXH_GR0_TLB_INT1_CONFIG_M_SHFT			16
+#define UVXH_GR0_TLB_INT1_CONFIG_M_MASK			0x0000000000010000UL
+#define UVXH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT		32
+#define UVXH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
 
 
 union uvh_gr0_tlb_int1_config_u {
 	unsigned long	v;
+
+	/* UVH common struct */
 	struct uvh_gr0_tlb_int1_config_s {
 		unsigned long	vector_:8;			/* RW */
 		unsigned long	dm:3;				/* RW */
@@ -823,466 +2392,97 @@ union uvh_gr0_tlb_int1_config_u {
 		unsigned long	rsvd_17_31:15;
 		unsigned long	apic_id:32;			/* RW */
 	} s;
-};
 
-/* ========================================================================= */
-/*                         UVH_GR0_TLB_MMR_CONTROL                           */
-/* ========================================================================= */
-#define UV1H_GR0_TLB_MMR_CONTROL 0x401080UL
-#define UV2H_GR0_TLB_MMR_CONTROL 0xc01080UL
-#define UV3H_GR0_TLB_MMR_CONTROL 0xc01080UL
-#define UV4H_GR0_TLB_MMR_CONTROL 0x601080UL
-#define UVH_GR0_TLB_MMR_CONTROL (					\
-	is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL :			\
-	is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL :			\
-	is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL :			\
-	/*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL)
-
-#define UVH_GR0_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-
-#define UV1H_GR0_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT		12
-#define UV1H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_SHFT	48
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_SHFT	52
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBPGSIZE_SHFT	54
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_SHFT	56
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_SHFT	60
-#define UV1H_GR0_TLB_MMR_CONTROL_INDEX_MASK		0x0000000000000fffUL
-#define UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK		0x0000000000003000UL
-#define UV1H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_MASK	0x0001000000000000UL
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK	0x0010000000000000UL
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBPGSIZE_MASK	0x0040000000000000UL
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_MASK	0x0100000000000000UL
-#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK	0x1000000000000000UL
-
-#define UVXH_GR0_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT	32
-#define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK	0x0000000100000000UL
-
-#define UV2H_GR0_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT		12
-#define UV2H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT	32
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_SHFT	48
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_SHFT	52
-#define UV2H_GR0_TLB_MMR_CONTROL_INDEX_MASK		0x0000000000000fffUL
-#define UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK		0x0000000000003000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK	0x0000000100000000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_MASK	0x0001000000000000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK	0x0010000000000000UL
-
-#define UV3H_GR0_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT		12
-#define UV3H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UV3H_GR0_TLB_MMR_CONTROL_ECC_SEL_SHFT		21
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT	32
-#define UV3H_GR0_TLB_MMR_CONTROL_INDEX_MASK		0x0000000000000fffUL
-#define UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK		0x0000000000003000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_ECC_SEL_MASK		0x0000000000200000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK	0x0000000100000000UL
-
-#define UV4H_GR0_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT		13
-#define UV4H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UV4H_GR0_TLB_MMR_CONTROL_ECC_SEL_SHFT		21
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT	32
-#define UV4H_GR0_TLB_MMR_CONTROL_PAGE_SIZE_SHFT		59
-#define UV4H_GR0_TLB_MMR_CONTROL_INDEX_MASK		0x0000000000001fffUL
-#define UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK		0x0000000000006000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_ECC_SEL_MASK		0x0000000000200000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK	0x0000000100000000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_PAGE_SIZE_MASK		0xf800000000000000UL
-
-#define UVH_GR0_TLB_MMR_CONTROL_INDEX_MASK (				\
-	is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL_INDEX_MASK :		\
-	is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_INDEX_MASK :		\
-	is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_INDEX_MASK :		\
-	/*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_INDEX_MASK)
-#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK (				\
-	is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK :		\
-	is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK :		\
-	is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK :		\
-	/*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK)
-#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT (				\
-	is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT :		\
-	is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT :		\
-	is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT :		\
-	/*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT)
-
-union uvh_gr0_tlb_mmr_control_u {
-	unsigned long	v;
-	struct uvh_gr0_tlb_mmr_control_s {
-		unsigned long	rsvd_0_15:16;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	rsvd_21_29:9;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	rsvd_32_48:17;
-		unsigned long	rsvd_49_51:3;
-		unsigned long	rsvd_52_63:12;
-	} s;
-	struct uv1h_gr0_tlb_mmr_control_s {
-		unsigned long	index:12;			/* RW */
-		unsigned long	mem_sel:2;			/* RW */
-		unsigned long	rsvd_14_15:2;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	rsvd_21_29:9;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	mmr_inj_con:1;			/* RW */
-		unsigned long	rsvd_49_51:3;
-		unsigned long	mmr_inj_tlbram:1;		/* RW */
-		unsigned long	rsvd_53:1;
-		unsigned long	mmr_inj_tlbpgsize:1;		/* RW */
-		unsigned long	rsvd_55:1;
-		unsigned long	mmr_inj_tlbrreg:1;		/* RW */
-		unsigned long	rsvd_57_59:3;
-		unsigned long	mmr_inj_tlblruv:1;		/* RW */
-		unsigned long	rsvd_61_63:3;
-	} s1;
-	struct uvxh_gr0_tlb_mmr_control_s {
-		unsigned long	rsvd_0_15:16;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	rsvd_21_29:9;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	mmr_op_done:1;			/* RW */
-		unsigned long	rsvd_33_47:15;
-		unsigned long	rsvd_48:1;
-		unsigned long	rsvd_49_51:3;
-		unsigned long	rsvd_52_63:12;
+	/* UVXH common struct */
+	struct uvxh_gr0_tlb_int1_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
 	} sx;
-	struct uv2h_gr0_tlb_mmr_control_s {
-		unsigned long	index:12;			/* RW */
-		unsigned long	mem_sel:2;			/* RW */
-		unsigned long	rsvd_14_15:2;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	rsvd_21_29:9;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	mmr_op_done:1;			/* RW */
-		unsigned long	rsvd_33_47:15;
-		unsigned long	mmr_inj_con:1;			/* RW */
-		unsigned long	rsvd_49_51:3;
-		unsigned long	mmr_inj_tlbram:1;		/* RW */
-		unsigned long	rsvd_53_63:11;
-	} s2;
-	struct uv3h_gr0_tlb_mmr_control_s {
-		unsigned long	index:12;			/* RW */
-		unsigned long	mem_sel:2;			/* RW */
-		unsigned long	rsvd_14_15:2;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	ecc_sel:1;			/* RW */
-		unsigned long	rsvd_22_29:8;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	mmr_op_done:1;			/* RW */
-		unsigned long	rsvd_33_47:15;
-		unsigned long	undef_48:1;			/* Undefined */
-		unsigned long	rsvd_49_51:3;
-		unsigned long	undef_52:1;			/* Undefined */
-		unsigned long	rsvd_53_63:11;
-	} s3;
-	struct uv4h_gr0_tlb_mmr_control_s {
-		unsigned long	index:13;			/* RW */
-		unsigned long	mem_sel:2;			/* RW */
-		unsigned long	rsvd_15:1;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	ecc_sel:1;			/* RW */
-		unsigned long	rsvd_22_29:8;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	mmr_op_done:1;			/* RW */
-		unsigned long	rsvd_33_47:15;
-		unsigned long	undef_48:1;			/* Undefined */
-		unsigned long	rsvd_49_51:3;
-		unsigned long	rsvd_52_58:7;
-		unsigned long	page_size:5;			/* RW */
+
+	/* UV4 unique struct */
+	struct uv4h_gr0_tlb_int1_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
 	} s4;
-};
 
-/* ========================================================================= */
-/*                       UVH_GR0_TLB_MMR_READ_DATA_HI                        */
-/* ========================================================================= */
-#define UV1H_GR0_TLB_MMR_READ_DATA_HI 0x4010a0UL
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI 0x6010a0UL
-#define UVH_GR0_TLB_MMR_READ_DATA_HI (					\
-	is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_HI :			\
-	is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_HI :			\
-	is_uv3_hub() ? UV3H_GR0_TLB_MMR_READ_DATA_HI :			\
-	/*is_uv4_hub*/ UV4H_GR0_TLB_MMR_READ_DATA_HI)
-
-#define UVH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-
-#define UV1H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-#define UV1H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT		41
-#define UV1H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT	43
-#define UV1H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT	44
-#define UV1H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK		0x000001ffffffffffUL
-#define UV1H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK		0x0000060000000000UL
-#define UV1H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK	0x0000080000000000UL
-#define UV1H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0000100000000000UL
-
-#define UVXH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT		41
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT	43
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT	44
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK		0x000001ffffffffffUL
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK		0x0000060000000000UL
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK	0x0000080000000000UL
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0000100000000000UL
-
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT		41
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT	43
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT	44
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT	45
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT	55
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK		0x000001ffffffffffUL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK		0x0000060000000000UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK	0x0000080000000000UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0000100000000000UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_MASK	0x0000200000000000UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK	0xff80000000000000UL
-
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PNID_SHFT		34
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT		49
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT	51
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT	52
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT	53
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT	55
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK		0x00000003ffffffffUL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PNID_MASK		0x0001fffc00000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK		0x0006000000000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK	0x0008000000000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0010000000000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_MASK	0x0020000000000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK	0xff80000000000000UL
-
-
-union uvh_gr0_tlb_mmr_read_data_hi_u {
-	unsigned long	v;
-	struct uv1h_gr0_tlb_mmr_read_data_hi_s {
-		unsigned long	pfn:41;				/* RO */
-		unsigned long	gaa:2;				/* RO */
-		unsigned long	dirty:1;			/* RO */
-		unsigned long	larger:1;			/* RO */
-		unsigned long	rsvd_45_63:19;
-	} s1;
-	struct uv2h_gr0_tlb_mmr_read_data_hi_s {
-		unsigned long	pfn:41;				/* RO */
-		unsigned long	gaa:2;				/* RO */
-		unsigned long	dirty:1;			/* RO */
-		unsigned long	larger:1;			/* RO */
-		unsigned long	rsvd_45_63:19;
-	} s2;
-	struct uv3h_gr0_tlb_mmr_read_data_hi_s {
-		unsigned long	pfn:41;				/* RO */
-		unsigned long	gaa:2;				/* RO */
-		unsigned long	dirty:1;			/* RO */
-		unsigned long	larger:1;			/* RO */
-		unsigned long	aa_ext:1;			/* RO */
-		unsigned long	undef_46_54:9;			/* Undefined */
-		unsigned long	way_ecc:9;			/* RO */
+	/* UV3 unique struct */
+	struct uv3h_gr0_tlb_int1_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
 	} s3;
-	struct uv4h_gr0_tlb_mmr_read_data_hi_s {
-		unsigned long	pfn:34;				/* RO */
-		unsigned long	pnid:15;			/* RO */
-		unsigned long	gaa:2;				/* RO */
-		unsigned long	dirty:1;			/* RO */
-		unsigned long	larger:1;			/* RO */
-		unsigned long	aa_ext:1;			/* RO */
-		unsigned long	undef_54:1;			/* Undefined */
-		unsigned long	way_ecc:9;			/* RO */
-	} s4;
-};
 
-/* ========================================================================= */
-/*                       UVH_GR0_TLB_MMR_READ_DATA_LO                        */
-/* ========================================================================= */
-#define UV1H_GR0_TLB_MMR_READ_DATA_LO 0x4010a8UL
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO 0x6010a8UL
-#define UVH_GR0_TLB_MMR_READ_DATA_LO (					\
-	is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_LO :			\
-	is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_LO :			\
-	is_uv3_hub() ? UV3H_GR0_TLB_MMR_READ_DATA_LO :			\
-	/*is_uv4_hub*/ UV4H_GR0_TLB_MMR_READ_DATA_LO)
-
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT		63
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK		0x8000000000000000UL
-
-#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UV1H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT	63
-#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UV1H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
-
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT	63
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
-
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT	63
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
-
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT	63
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
-
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT	63
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
-
-
-union uvh_gr0_tlb_mmr_read_data_lo_u {
-	unsigned long	v;
-	struct uvh_gr0_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
-	} s;
-	struct uv1h_gr0_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
-	} s1;
-	struct uvxh_gr0_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
-	} sx;
-	struct uv2h_gr0_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
+	/* UV2 unique struct */
+	struct uv2h_gr0_tlb_int1_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
 	} s2;
-	struct uv3h_gr0_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
-	} s3;
-	struct uv4h_gr0_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
-	} s4;
 };
 
 /* ========================================================================= */
 /*                         UVH_GR1_TLB_INT0_CONFIG                           */
 /* ========================================================================= */
-#define UV1H_GR1_TLB_INT0_CONFIG 0x61f00UL
-#define UV2H_GR1_TLB_INT0_CONFIG 0x61f00UL
-#define UV3H_GR1_TLB_INT0_CONFIG 0x61f00UL
-#define UV4H_GR1_TLB_INT0_CONFIG 0x62100UL
 #define UVH_GR1_TLB_INT0_CONFIG (					\
-	is_uv1_hub() ? UV1H_GR1_TLB_INT0_CONFIG :			\
-	is_uv2_hub() ? UV2H_GR1_TLB_INT0_CONFIG :			\
-	is_uv3_hub() ? UV3H_GR1_TLB_INT0_CONFIG :			\
-	/*is_uv4_hub*/ UV4H_GR1_TLB_INT0_CONFIG)
-
-#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT		0
-#define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT			8
-#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT		11
-#define UVH_GR1_TLB_INT0_CONFIG_STATUS_SHFT		12
-#define UVH_GR1_TLB_INT0_CONFIG_P_SHFT			13
-#define UVH_GR1_TLB_INT0_CONFIG_T_SHFT			15
-#define UVH_GR1_TLB_INT0_CONFIG_M_SHFT			16
-#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT		32
-#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_MASK		0x00000000000000ffUL
-#define UVH_GR1_TLB_INT0_CONFIG_DM_MASK			0x0000000000000700UL
-#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK		0x0000000000000800UL
-#define UVH_GR1_TLB_INT0_CONFIG_STATUS_MASK		0x0000000000001000UL
-#define UVH_GR1_TLB_INT0_CONFIG_P_MASK			0x0000000000002000UL
-#define UVH_GR1_TLB_INT0_CONFIG_T_MASK			0x0000000000008000UL
-#define UVH_GR1_TLB_INT0_CONFIG_M_MASK			0x0000000000010000UL
-#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
+	is_uv(UV4) ? 0x62100UL :					\
+	is_uv(UV3) ? 0x61f00UL :					\
+	is_uv(UV2) ? 0x61f00UL :					\
+	uv_undefined("UVH_GR1_TLB_INT0_CONFIG"))
+
+
+/* UVXH common defines */
+#define UVXH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT		0
+#define UVXH_GR1_TLB_INT0_CONFIG_VECTOR_MASK		0x00000000000000ffUL
+#define UVXH_GR1_TLB_INT0_CONFIG_DM_SHFT		8
+#define UVXH_GR1_TLB_INT0_CONFIG_DM_MASK		0x0000000000000700UL
+#define UVXH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT		11
+#define UVXH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK		0x0000000000000800UL
+#define UVXH_GR1_TLB_INT0_CONFIG_STATUS_SHFT		12
+#define UVXH_GR1_TLB_INT0_CONFIG_STATUS_MASK		0x0000000000001000UL
+#define UVXH_GR1_TLB_INT0_CONFIG_P_SHFT			13
+#define UVXH_GR1_TLB_INT0_CONFIG_P_MASK			0x0000000000002000UL
+#define UVXH_GR1_TLB_INT0_CONFIG_T_SHFT			15
+#define UVXH_GR1_TLB_INT0_CONFIG_T_MASK			0x0000000000008000UL
+#define UVXH_GR1_TLB_INT0_CONFIG_M_SHFT			16
+#define UVXH_GR1_TLB_INT0_CONFIG_M_MASK			0x0000000000010000UL
+#define UVXH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT		32
+#define UVXH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
 
 
 union uvh_gr1_tlb_int0_config_u {
 	unsigned long	v;
+
+	/* UVH common struct */
 	struct uvh_gr1_tlb_int0_config_s {
 		unsigned long	vector_:8;			/* RW */
 		unsigned long	dm:3;				/* RW */
@@ -1295,41 +2495,97 @@ union uvh_gr1_tlb_int0_config_u {
 		unsigned long	rsvd_17_31:15;
 		unsigned long	apic_id:32;			/* RW */
 	} s;
+
+	/* UVXH common struct */
+	struct uvxh_gr1_tlb_int0_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} sx;
+
+	/* UV4 unique struct */
+	struct uv4h_gr1_tlb_int0_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_gr1_tlb_int0_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_gr1_tlb_int0_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s2;
 };
 
 /* ========================================================================= */
 /*                         UVH_GR1_TLB_INT1_CONFIG                           */
 /* ========================================================================= */
-#define UV1H_GR1_TLB_INT1_CONFIG 0x61f40UL
-#define UV2H_GR1_TLB_INT1_CONFIG 0x61f40UL
-#define UV3H_GR1_TLB_INT1_CONFIG 0x61f40UL
-#define UV4H_GR1_TLB_INT1_CONFIG 0x62140UL
 #define UVH_GR1_TLB_INT1_CONFIG (					\
-	is_uv1_hub() ? UV1H_GR1_TLB_INT1_CONFIG :			\
-	is_uv2_hub() ? UV2H_GR1_TLB_INT1_CONFIG :			\
-	is_uv3_hub() ? UV3H_GR1_TLB_INT1_CONFIG :			\
-	/*is_uv4_hub*/ UV4H_GR1_TLB_INT1_CONFIG)
-
-#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT		0
-#define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT			8
-#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT		11
-#define UVH_GR1_TLB_INT1_CONFIG_STATUS_SHFT		12
-#define UVH_GR1_TLB_INT1_CONFIG_P_SHFT			13
-#define UVH_GR1_TLB_INT1_CONFIG_T_SHFT			15
-#define UVH_GR1_TLB_INT1_CONFIG_M_SHFT			16
-#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT		32
-#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_MASK		0x00000000000000ffUL
-#define UVH_GR1_TLB_INT1_CONFIG_DM_MASK			0x0000000000000700UL
-#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK		0x0000000000000800UL
-#define UVH_GR1_TLB_INT1_CONFIG_STATUS_MASK		0x0000000000001000UL
-#define UVH_GR1_TLB_INT1_CONFIG_P_MASK			0x0000000000002000UL
-#define UVH_GR1_TLB_INT1_CONFIG_T_MASK			0x0000000000008000UL
-#define UVH_GR1_TLB_INT1_CONFIG_M_MASK			0x0000000000010000UL
-#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
+	is_uv(UV4) ? 0x62140UL :					\
+	is_uv(UV3) ? 0x61f40UL :					\
+	is_uv(UV2) ? 0x61f40UL :					\
+	uv_undefined("UVH_GR1_TLB_INT1_CONFIG"))
+
+
+/* UVXH common defines */
+#define UVXH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT		0
+#define UVXH_GR1_TLB_INT1_CONFIG_VECTOR_MASK		0x00000000000000ffUL
+#define UVXH_GR1_TLB_INT1_CONFIG_DM_SHFT		8
+#define UVXH_GR1_TLB_INT1_CONFIG_DM_MASK		0x0000000000000700UL
+#define UVXH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT		11
+#define UVXH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK		0x0000000000000800UL
+#define UVXH_GR1_TLB_INT1_CONFIG_STATUS_SHFT		12
+#define UVXH_GR1_TLB_INT1_CONFIG_STATUS_MASK		0x0000000000001000UL
+#define UVXH_GR1_TLB_INT1_CONFIG_P_SHFT			13
+#define UVXH_GR1_TLB_INT1_CONFIG_P_MASK			0x0000000000002000UL
+#define UVXH_GR1_TLB_INT1_CONFIG_T_SHFT			15
+#define UVXH_GR1_TLB_INT1_CONFIG_T_MASK			0x0000000000008000UL
+#define UVXH_GR1_TLB_INT1_CONFIG_M_SHFT			16
+#define UVXH_GR1_TLB_INT1_CONFIG_M_MASK			0x0000000000010000UL
+#define UVXH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT		32
+#define UVXH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
 
 
 union uvh_gr1_tlb_int1_config_u {
 	unsigned long	v;
+
+	/* UVH common struct */
 	struct uvh_gr1_tlb_int1_config_s {
 		unsigned long	vector_:8;			/* RW */
 		unsigned long	dm:3;				/* RW */
@@ -1342,416 +2598,62 @@ union uvh_gr1_tlb_int1_config_u {
 		unsigned long	rsvd_17_31:15;
 		unsigned long	apic_id:32;			/* RW */
 	} s;
-};
 
-/* ========================================================================= */
-/*                         UVH_GR1_TLB_MMR_CONTROL                           */
-/* ========================================================================= */
-#define UV1H_GR1_TLB_MMR_CONTROL 0x801080UL
-#define UV2H_GR1_TLB_MMR_CONTROL 0x1001080UL
-#define UV3H_GR1_TLB_MMR_CONTROL 0x1001080UL
-#define UV4H_GR1_TLB_MMR_CONTROL 0x701080UL
-#define UVH_GR1_TLB_MMR_CONTROL (					\
-	is_uv1_hub() ? UV1H_GR1_TLB_MMR_CONTROL :			\
-	is_uv2_hub() ? UV2H_GR1_TLB_MMR_CONTROL :			\
-	is_uv3_hub() ? UV3H_GR1_TLB_MMR_CONTROL :			\
-	/*is_uv4_hub*/ UV4H_GR1_TLB_MMR_CONTROL)
-
-#define UVH_GR1_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UVH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UVH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-
-#define UV1H_GR1_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UV1H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT		12
-#define UV1H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_SHFT	48
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_SHFT	52
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBPGSIZE_SHFT	54
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_SHFT	56
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_SHFT	60
-#define UV1H_GR1_TLB_MMR_CONTROL_INDEX_MASK		0x0000000000000fffUL
-#define UV1H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK		0x0000000000003000UL
-#define UV1H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_MASK	0x0001000000000000UL
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK	0x0010000000000000UL
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBPGSIZE_MASK	0x0040000000000000UL
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_MASK	0x0100000000000000UL
-#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK	0x1000000000000000UL
-
-#define UVXH_GR1_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT	32
-#define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK	0x0000000100000000UL
-
-#define UV2H_GR1_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UV2H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT		12
-#define UV2H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT	32
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_SHFT	48
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_SHFT	52
-#define UV2H_GR1_TLB_MMR_CONTROL_INDEX_MASK		0x0000000000000fffUL
-#define UV2H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK		0x0000000000003000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK	0x0000000100000000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_MASK	0x0001000000000000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK	0x0010000000000000UL
-
-#define UV3H_GR1_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UV3H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT		12
-#define UV3H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UV3H_GR1_TLB_MMR_CONTROL_ECC_SEL_SHFT		21
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT	32
-#define UV3H_GR1_TLB_MMR_CONTROL_INDEX_MASK		0x0000000000000fffUL
-#define UV3H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK		0x0000000000003000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_ECC_SEL_MASK		0x0000000000200000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK	0x0000000100000000UL
-
-#define UV4H_GR1_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UV4H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT		13
-#define UV4H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UV4H_GR1_TLB_MMR_CONTROL_ECC_SEL_SHFT		21
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT	32
-#define UV4H_GR1_TLB_MMR_CONTROL_PAGE_SIZE_SHFT		59
-#define UV4H_GR1_TLB_MMR_CONTROL_INDEX_MASK		0x0000000000001fffUL
-#define UV4H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK		0x0000000000006000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_ECC_SEL_MASK		0x0000000000200000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK	0x0000000100000000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_PAGE_SIZE_MASK		0xf800000000000000UL
-
-
-union uvh_gr1_tlb_mmr_control_u {
-	unsigned long	v;
-	struct uvh_gr1_tlb_mmr_control_s {
-		unsigned long	rsvd_0_15:16;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	rsvd_21_29:9;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	rsvd_32_48:17;
-		unsigned long	rsvd_49_51:3;
-		unsigned long	rsvd_52_63:12;
-	} s;
-	struct uv1h_gr1_tlb_mmr_control_s {
-		unsigned long	index:12;			/* RW */
-		unsigned long	mem_sel:2;			/* RW */
-		unsigned long	rsvd_14_15:2;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	rsvd_21_29:9;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	mmr_inj_con:1;			/* RW */
-		unsigned long	rsvd_49_51:3;
-		unsigned long	mmr_inj_tlbram:1;		/* RW */
-		unsigned long	rsvd_53:1;
-		unsigned long	mmr_inj_tlbpgsize:1;		/* RW */
-		unsigned long	rsvd_55:1;
-		unsigned long	mmr_inj_tlbrreg:1;		/* RW */
-		unsigned long	rsvd_57_59:3;
-		unsigned long	mmr_inj_tlblruv:1;		/* RW */
-		unsigned long	rsvd_61_63:3;
-	} s1;
-	struct uvxh_gr1_tlb_mmr_control_s {
-		unsigned long	rsvd_0_15:16;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	rsvd_21_29:9;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	mmr_op_done:1;			/* RW */
-		unsigned long	rsvd_33_47:15;
-		unsigned long	rsvd_48:1;
-		unsigned long	rsvd_49_51:3;
-		unsigned long	rsvd_52_63:12;
+	/* UVXH common struct */
+	struct uvxh_gr1_tlb_int1_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
 	} sx;
-	struct uv2h_gr1_tlb_mmr_control_s {
-		unsigned long	index:12;			/* RW */
-		unsigned long	mem_sel:2;			/* RW */
-		unsigned long	rsvd_14_15:2;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	rsvd_21_29:9;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	mmr_op_done:1;			/* RW */
-		unsigned long	rsvd_33_47:15;
-		unsigned long	mmr_inj_con:1;			/* RW */
-		unsigned long	rsvd_49_51:3;
-		unsigned long	mmr_inj_tlbram:1;		/* RW */
-		unsigned long	rsvd_53_63:11;
-	} s2;
-	struct uv3h_gr1_tlb_mmr_control_s {
-		unsigned long	index:12;			/* RW */
-		unsigned long	mem_sel:2;			/* RW */
-		unsigned long	rsvd_14_15:2;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	ecc_sel:1;			/* RW */
-		unsigned long	rsvd_22_29:8;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	mmr_op_done:1;			/* RW */
-		unsigned long	rsvd_33_47:15;
-		unsigned long	undef_48:1;			/* Undefined */
-		unsigned long	rsvd_49_51:3;
-		unsigned long	undef_52:1;			/* Undefined */
-		unsigned long	rsvd_53_63:11;
-	} s3;
-	struct uv4h_gr1_tlb_mmr_control_s {
-		unsigned long	index:13;			/* RW */
-		unsigned long	mem_sel:2;			/* RW */
-		unsigned long	rsvd_15:1;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	ecc_sel:1;			/* RW */
-		unsigned long	rsvd_22_29:8;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	mmr_op_done:1;			/* RW */
-		unsigned long	rsvd_33_47:15;
-		unsigned long	undef_48:1;			/* Undefined */
-		unsigned long	rsvd_49_51:3;
-		unsigned long	rsvd_52_58:7;
-		unsigned long	page_size:5;			/* RW */
+
+	/* UV4 unique struct */
+	struct uv4h_gr1_tlb_int1_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
 	} s4;
-};
 
-/* ========================================================================= */
-/*                       UVH_GR1_TLB_MMR_READ_DATA_HI                        */
-/* ========================================================================= */
-#define UV1H_GR1_TLB_MMR_READ_DATA_HI 0x8010a0UL
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI 0x7010a0UL
-#define UVH_GR1_TLB_MMR_READ_DATA_HI (					\
-	is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_HI :			\
-	is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_HI :			\
-	is_uv3_hub() ? UV3H_GR1_TLB_MMR_READ_DATA_HI :			\
-	/*is_uv4_hub*/ UV4H_GR1_TLB_MMR_READ_DATA_HI)
-
-#define UVH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-
-#define UV1H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-#define UV1H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT		41
-#define UV1H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT	43
-#define UV1H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT	44
-#define UV1H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK		0x000001ffffffffffUL
-#define UV1H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK		0x0000060000000000UL
-#define UV1H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK	0x0000080000000000UL
-#define UV1H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0000100000000000UL
-
-#define UVXH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT		41
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT	43
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT	44
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK		0x000001ffffffffffUL
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK		0x0000060000000000UL
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK	0x0000080000000000UL
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0000100000000000UL
-
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT		41
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT	43
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT	44
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT	45
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT	55
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK		0x000001ffffffffffUL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK		0x0000060000000000UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK	0x0000080000000000UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0000100000000000UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_MASK	0x0000200000000000UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK	0xff80000000000000UL
-
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PNID_SHFT		34
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT		49
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT	51
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT	52
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT	53
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT	55
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK		0x00000003ffffffffUL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PNID_MASK		0x0001fffc00000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK		0x0006000000000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK	0x0008000000000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0010000000000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_MASK	0x0020000000000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK	0xff80000000000000UL
-
-
-union uvh_gr1_tlb_mmr_read_data_hi_u {
-	unsigned long	v;
-	struct uv1h_gr1_tlb_mmr_read_data_hi_s {
-		unsigned long	pfn:41;				/* RO */
-		unsigned long	gaa:2;				/* RO */
-		unsigned long	dirty:1;			/* RO */
-		unsigned long	larger:1;			/* RO */
-		unsigned long	rsvd_45_63:19;
-	} s1;
-	struct uv2h_gr1_tlb_mmr_read_data_hi_s {
-		unsigned long	pfn:41;				/* RO */
-		unsigned long	gaa:2;				/* RO */
-		unsigned long	dirty:1;			/* RO */
-		unsigned long	larger:1;			/* RO */
-		unsigned long	rsvd_45_63:19;
-	} s2;
-	struct uv3h_gr1_tlb_mmr_read_data_hi_s {
-		unsigned long	pfn:41;				/* RO */
-		unsigned long	gaa:2;				/* RO */
-		unsigned long	dirty:1;			/* RO */
-		unsigned long	larger:1;			/* RO */
-		unsigned long	aa_ext:1;			/* RO */
-		unsigned long	undef_46_54:9;			/* Undefined */
-		unsigned long	way_ecc:9;			/* RO */
+	/* UV3 unique struct */
+	struct uv3h_gr1_tlb_int1_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
 	} s3;
-	struct uv4h_gr1_tlb_mmr_read_data_hi_s {
-		unsigned long	pfn:34;				/* RO */
-		unsigned long	pnid:15;			/* RO */
-		unsigned long	gaa:2;				/* RO */
-		unsigned long	dirty:1;			/* RO */
-		unsigned long	larger:1;			/* RO */
-		unsigned long	aa_ext:1;			/* RO */
-		unsigned long	undef_54:1;			/* Undefined */
-		unsigned long	way_ecc:9;			/* RO */
-	} s4;
-};
 
-/* ========================================================================= */
-/*                       UVH_GR1_TLB_MMR_READ_DATA_LO                        */
-/* ========================================================================= */
-#define UV1H_GR1_TLB_MMR_READ_DATA_LO 0x8010a8UL
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO 0x7010a8UL
-#define UVH_GR1_TLB_MMR_READ_DATA_LO (					\
-	is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_LO :			\
-	is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_LO :			\
-	is_uv3_hub() ? UV3H_GR1_TLB_MMR_READ_DATA_LO :			\
-	/*is_uv4_hub*/ UV4H_GR1_TLB_MMR_READ_DATA_LO)
-
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT		63
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK		0x8000000000000000UL
-
-#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UV1H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT	63
-#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UV1H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
-
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT	63
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
-
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT	63
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
-
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT	63
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
-
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT	63
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
-
-
-union uvh_gr1_tlb_mmr_read_data_lo_u {
-	unsigned long	v;
-	struct uvh_gr1_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
-	} s;
-	struct uv1h_gr1_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
-	} s1;
-	struct uvxh_gr1_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
-	} sx;
-	struct uv2h_gr1_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
+	/* UV2 unique struct */
+	struct uv2h_gr1_tlb_int1_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
 	} s2;
-	struct uv3h_gr1_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
-	} s3;
-	struct uv4h_gr1_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
-	} s4;
 };
 
 /* ========================================================================= */
@@ -1759,58 +2661,43 @@ union uvh_gr1_tlb_mmr_read_data_lo_u {
 /* ========================================================================= */
 #define UVH_INT_CMPB 0x22080UL
 
+/* UVH common defines*/
 #define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT		0
 #define UVH_INT_CMPB_REAL_TIME_CMPB_MASK		0x00ffffffffffffffUL
 
 
 union uvh_int_cmpb_u {
 	unsigned long	v;
+
+	/* UVH common struct */
 	struct uvh_int_cmpb_s {
 		unsigned long	real_time_cmpb:56;		/* RW */
 		unsigned long	rsvd_56_63:8;
 	} s;
-};
-
-/* ========================================================================= */
-/*                               UVH_INT_CMPC                                */
-/* ========================================================================= */
-#define UVH_INT_CMPC 0x22100UL
-
-
-#define UV1H_INT_CMPC_REAL_TIME_CMPC_SHFT		0
-#define UV1H_INT_CMPC_REAL_TIME_CMPC_MASK		0x00ffffffffffffffUL
 
-#define UVXH_INT_CMPC_REAL_TIME_CMP_2_SHFT		0
-#define UVXH_INT_CMPC_REAL_TIME_CMP_2_MASK		0x00ffffffffffffffUL
-
-
-union uvh_int_cmpc_u {
-	unsigned long	v;
-	struct uvh_int_cmpc_s {
-		unsigned long	real_time_cmpc:56;		/* RW */
+	/* UV5 unique struct */
+	struct uv5h_int_cmpb_s {
+		unsigned long	real_time_cmpb:56;		/* RW */
 		unsigned long	rsvd_56_63:8;
-	} s;
-};
-
-/* ========================================================================= */
-/*                               UVH_INT_CMPD                                */
-/* ========================================================================= */
-#define UVH_INT_CMPD 0x22180UL
-
-
-#define UV1H_INT_CMPD_REAL_TIME_CMPD_SHFT		0
-#define UV1H_INT_CMPD_REAL_TIME_CMPD_MASK		0x00ffffffffffffffUL
+	} s5;
 
-#define UVXH_INT_CMPD_REAL_TIME_CMP_3_SHFT		0
-#define UVXH_INT_CMPD_REAL_TIME_CMP_3_MASK		0x00ffffffffffffffUL
+	/* UV4 unique struct */
+	struct uv4h_int_cmpb_s {
+		unsigned long	real_time_cmpb:56;		/* RW */
+		unsigned long	rsvd_56_63:8;
+	} s4;
 
+	/* UV3 unique struct */
+	struct uv3h_int_cmpb_s {
+		unsigned long	real_time_cmpb:56;		/* RW */
+		unsigned long	rsvd_56_63:8;
+	} s3;
 
-union uvh_int_cmpd_u {
-	unsigned long	v;
-	struct uvh_int_cmpd_s {
-		unsigned long	real_time_cmpd:56;		/* RW */
+	/* UV2 unique struct */
+	struct uv2h_int_cmpb_s {
+		unsigned long	real_time_cmpb:56;		/* RW */
 		unsigned long	rsvd_56_63:8;
-	} s;
+	} s2;
 };
 
 /* ========================================================================= */
@@ -1818,30 +2705,23 @@ union uvh_int_cmpd_u {
 /* ========================================================================= */
 #define UVH_IPI_INT 0x60500UL
 
-#define UV1H_IPI_INT_32 0x348
-#define UV2H_IPI_INT_32 0x348
-#define UV3H_IPI_INT_32 0x348
-#define UV4H_IPI_INT_32 0x268
-#define UVH_IPI_INT_32 (						\
-	is_uv1_hub() ? UV1H_IPI_INT_32 :				\
-	is_uv2_hub() ? UV2H_IPI_INT_32 :				\
-	is_uv3_hub() ? UV3H_IPI_INT_32 :				\
-	/*is_uv4_hub*/ UV4H_IPI_INT_32)
-
+/* UVH common defines*/
 #define UVH_IPI_INT_VECTOR_SHFT				0
-#define UVH_IPI_INT_DELIVERY_MODE_SHFT			8
-#define UVH_IPI_INT_DESTMODE_SHFT			11
-#define UVH_IPI_INT_APIC_ID_SHFT			16
-#define UVH_IPI_INT_SEND_SHFT				63
 #define UVH_IPI_INT_VECTOR_MASK				0x00000000000000ffUL
+#define UVH_IPI_INT_DELIVERY_MODE_SHFT			8
 #define UVH_IPI_INT_DELIVERY_MODE_MASK			0x0000000000000700UL
+#define UVH_IPI_INT_DESTMODE_SHFT			11
 #define UVH_IPI_INT_DESTMODE_MASK			0x0000000000000800UL
+#define UVH_IPI_INT_APIC_ID_SHFT			16
 #define UVH_IPI_INT_APIC_ID_MASK			0x0000ffffffff0000UL
+#define UVH_IPI_INT_SEND_SHFT				63
 #define UVH_IPI_INT_SEND_MASK				0x8000000000000000UL
 
 
 union uvh_ipi_int_u {
 	unsigned long	v;
+
+	/* UVH common struct */
 	struct uvh_ipi_int_s {
 		unsigned long	vector_:8;			/* RW */
 		unsigned long	delivery_mode:3;		/* RW */
@@ -1851,1243 +2731,733 @@ union uvh_ipi_int_u {
 		unsigned long	rsvd_48_62:15;
 		unsigned long	send:1;				/* WP */
 	} s;
+
+	/* UV5 unique struct */
+	struct uv5h_ipi_int_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	delivery_mode:3;		/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	rsvd_12_15:4;
+		unsigned long	apic_id:32;			/* RW */
+		unsigned long	rsvd_48_62:15;
+		unsigned long	send:1;				/* WP */
+	} s5;
+
+	/* UV4 unique struct */
+	struct uv4h_ipi_int_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	delivery_mode:3;		/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	rsvd_12_15:4;
+		unsigned long	apic_id:32;			/* RW */
+		unsigned long	rsvd_48_62:15;
+		unsigned long	send:1;				/* WP */
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_ipi_int_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	delivery_mode:3;		/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	rsvd_12_15:4;
+		unsigned long	apic_id:32;			/* RW */
+		unsigned long	rsvd_48_62:15;
+		unsigned long	send:1;				/* WP */
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_ipi_int_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	delivery_mode:3;		/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	rsvd_12_15:4;
+		unsigned long	apic_id:32;			/* RW */
+		unsigned long	rsvd_48_62:15;
+		unsigned long	send:1;				/* WP */
+	} s2;
 };
 
 /* ========================================================================= */
-/*                   UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST                     */
+/*                               UVH_NODE_ID                                 */
 /* ========================================================================= */
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
-#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST")
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST (				\
-	is_uv1_hub() ? UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST :		\
-	is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST :		\
-	is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST)
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0
+#define UVH_NODE_ID 0x0UL
 
+/* UVH common defines*/
+#define UVH_NODE_ID_FORCE1_SHFT				0
+#define UVH_NODE_ID_FORCE1_MASK				0x0000000000000001UL
+#define UVH_NODE_ID_MANUFACTURER_SHFT			1
+#define UVH_NODE_ID_MANUFACTURER_MASK			0x0000000000000ffeUL
+#define UVH_NODE_ID_PART_NUMBER_SHFT			12
+#define UVH_NODE_ID_PART_NUMBER_MASK			0x000000000ffff000UL
+#define UVH_NODE_ID_REVISION_SHFT			28
+#define UVH_NODE_ID_REVISION_MASK			0x00000000f0000000UL
+#define UVH_NODE_ID_NODE_ID_SHFT			32
+#define UVH_NODE_ID_NI_PORT_SHFT			57
 
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL
+/* UVXH common defines */
+#define UVXH_NODE_ID_NODE_ID_MASK			0x00007fff00000000UL
+#define UVXH_NODE_ID_NODES_PER_BIT_SHFT			50
+#define UVXH_NODE_ID_NODES_PER_BIT_MASK			0x01fc000000000000UL
+#define UVXH_NODE_ID_NI_PORT_MASK			0x3e00000000000000UL
 
+/* UVYH common defines */
+#define UVYH_NODE_ID_NODE_ID_MASK			0x0000007f00000000UL
+#define UVYH_NODE_ID_NI_PORT_MASK			0x7e00000000000000UL
 
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL
+/* UV4 unique defines */
+#define UV4H_NODE_ID_ROUTER_SELECT_SHFT			48
+#define UV4H_NODE_ID_ROUTER_SELECT_MASK			0x0001000000000000UL
+#define UV4H_NODE_ID_RESERVED_2_SHFT			49
+#define UV4H_NODE_ID_RESERVED_2_MASK			0x0002000000000000UL
 
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL
+/* UV3 unique defines */
+#define UV3H_NODE_ID_ROUTER_SELECT_SHFT			48
+#define UV3H_NODE_ID_ROUTER_SELECT_MASK			0x0001000000000000UL
+#define UV3H_NODE_ID_RESERVED_2_SHFT			49
+#define UV3H_NODE_ID_RESERVED_2_MASK			0x0002000000000000UL
 
 
-union uvh_lb_bau_intd_payload_queue_first_u {
+union uvh_node_id_u {
 	unsigned long	v;
-	struct uv1h_lb_bau_intd_payload_queue_first_s {
-		unsigned long	rsvd_0_3:4;
-		unsigned long	address:39;			/* RW */
-		unsigned long	rsvd_43_48:6;
-		unsigned long	node_id:14;			/* RW */
-		unsigned long	rsvd_63:1;
-	} s1;
-	struct uv2h_lb_bau_intd_payload_queue_first_s {
-		unsigned long	rsvd_0_3:4;
-		unsigned long	address:39;			/* RW */
-		unsigned long	rsvd_43_48:6;
-		unsigned long	node_id:14;			/* RW */
+
+	/* UVH common struct */
+	struct uvh_node_id_s {
+		unsigned long	force1:1;			/* RO */
+		unsigned long	manufacturer:11;		/* RO */
+		unsigned long	part_number:16;			/* RO */
+		unsigned long	revision:4;			/* RO */
+		unsigned long	rsvd_32_63:32;
+	} s;
+
+	/* UVXH common struct */
+	struct uvxh_node_id_s {
+		unsigned long	force1:1;			/* RO */
+		unsigned long	manufacturer:11;		/* RO */
+		unsigned long	part_number:16;			/* RO */
+		unsigned long	revision:4;			/* RO */
+		unsigned long	node_id:15;			/* RW */
+		unsigned long	rsvd_47_49:3;
+		unsigned long	nodes_per_bit:7;		/* RO */
+		unsigned long	ni_port:5;			/* RO */
+		unsigned long	rsvd_62_63:2;
+	} sx;
+
+	/* UVYH common struct */
+	struct uvyh_node_id_s {
+		unsigned long	force1:1;			/* RO */
+		unsigned long	manufacturer:11;		/* RO */
+		unsigned long	part_number:16;			/* RO */
+		unsigned long	revision:4;			/* RO */
+		unsigned long	node_id:7;			/* RW */
+		unsigned long	rsvd_39_56:18;
+		unsigned long	ni_port:6;			/* RO */
 		unsigned long	rsvd_63:1;
-	} s2;
-	struct uv3h_lb_bau_intd_payload_queue_first_s {
-		unsigned long	rsvd_0_3:4;
-		unsigned long	address:39;			/* RW */
-		unsigned long	rsvd_43_48:6;
-		unsigned long	node_id:14;			/* RW */
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_node_id_s {
+		unsigned long	force1:1;			/* RO */
+		unsigned long	manufacturer:11;		/* RO */
+		unsigned long	part_number:16;			/* RO */
+		unsigned long	revision:4;			/* RO */
+		unsigned long	node_id:7;			/* RW */
+		unsigned long	rsvd_39_56:18;
+		unsigned long	ni_port:6;			/* RO */
 		unsigned long	rsvd_63:1;
+	} s5;
+
+	/* UV4 unique struct */
+	struct uv4h_node_id_s {
+		unsigned long	force1:1;			/* RO */
+		unsigned long	manufacturer:11;		/* RO */
+		unsigned long	part_number:16;			/* RO */
+		unsigned long	revision:4;			/* RO */
+		unsigned long	node_id:15;			/* RW */
+		unsigned long	rsvd_47:1;
+		unsigned long	router_select:1;		/* RO */
+		unsigned long	rsvd_49:1;
+		unsigned long	nodes_per_bit:7;		/* RO */
+		unsigned long	ni_port:5;			/* RO */
+		unsigned long	rsvd_62_63:2;
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_node_id_s {
+		unsigned long	force1:1;			/* RO */
+		unsigned long	manufacturer:11;		/* RO */
+		unsigned long	part_number:16;			/* RO */
+		unsigned long	revision:4;			/* RO */
+		unsigned long	node_id:15;			/* RW */
+		unsigned long	rsvd_47:1;
+		unsigned long	router_select:1;		/* RO */
+		unsigned long	rsvd_49:1;
+		unsigned long	nodes_per_bit:7;		/* RO */
+		unsigned long	ni_port:5;			/* RO */
+		unsigned long	rsvd_62_63:2;
 	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_node_id_s {
+		unsigned long	force1:1;			/* RO */
+		unsigned long	manufacturer:11;		/* RO */
+		unsigned long	part_number:16;			/* RO */
+		unsigned long	revision:4;			/* RO */
+		unsigned long	node_id:15;			/* RW */
+		unsigned long	rsvd_47_49:3;
+		unsigned long	nodes_per_bit:7;		/* RO */
+		unsigned long	ni_port:5;			/* RO */
+		unsigned long	rsvd_62_63:2;
+	} s2;
 };
 
 /* ========================================================================= */
-/*                    UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST                     */
+/*                            UVH_NODE_PRESENT_0                             */
 /* ========================================================================= */
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
-#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST")
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST (				\
-	is_uv1_hub() ? UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST :		\
-	is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST :		\
-	is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST)
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8
+#define UVH_NODE_PRESENT_0 (						\
+	is_uv(UV5) ? 0x1400UL :						\
+	0)
 
 
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
+/* UVYH common defines */
+#define UVYH_NODE_PRESENT_0_NODES_SHFT			0
+#define UVYH_NODE_PRESENT_0_NODES_MASK			0xffffffffffffffffUL
 
 
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
+union uvh_node_present_0_u {
+	unsigned long	v;
 
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
+	/* UVH common struct */
+	struct uvh_node_present_0_s {
+		unsigned long	nodes:64;			/* RW */
+	} s;
 
+	/* UVYH common struct */
+	struct uvyh_node_present_0_s {
+		unsigned long	nodes:64;			/* RW */
+	} sy;
 
-union uvh_lb_bau_intd_payload_queue_last_u {
-	unsigned long	v;
-	struct uv1h_lb_bau_intd_payload_queue_last_s {
-		unsigned long	rsvd_0_3:4;
-		unsigned long	address:39;			/* RW */
-		unsigned long	rsvd_43_63:21;
-	} s1;
-	struct uv2h_lb_bau_intd_payload_queue_last_s {
-		unsigned long	rsvd_0_3:4;
-		unsigned long	address:39;			/* RW */
-		unsigned long	rsvd_43_63:21;
-	} s2;
-	struct uv3h_lb_bau_intd_payload_queue_last_s {
-		unsigned long	rsvd_0_3:4;
-		unsigned long	address:39;			/* RW */
-		unsigned long	rsvd_43_63:21;
-	} s3;
+	/* UV5 unique struct */
+	struct uv5h_node_present_0_s {
+		unsigned long	nodes:64;			/* RW */
+	} s5;
 };
 
 /* ========================================================================= */
-/*                    UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL                     */
+/*                            UVH_NODE_PRESENT_1                             */
 /* ========================================================================= */
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
-#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL")
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL (				\
-	is_uv1_hub() ? UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL :		\
-	is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL :		\
-	is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL)
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0
+#define UVH_NODE_PRESENT_1 (						\
+	is_uv(UV5) ? 0x1408UL :						\
+	0)
 
 
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
-#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
+/* UVYH common defines */
+#define UVYH_NODE_PRESENT_1_NODES_SHFT			0
+#define UVYH_NODE_PRESENT_1_NODES_MASK			0xffffffffffffffffUL
 
 
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
+union uvh_node_present_1_u {
+	unsigned long	v;
 
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
+	/* UVH common struct */
+	struct uvh_node_present_1_s {
+		unsigned long	nodes:64;			/* RW */
+	} s;
 
+	/* UVYH common struct */
+	struct uvyh_node_present_1_s {
+		unsigned long	nodes:64;			/* RW */
+	} sy;
 
-union uvh_lb_bau_intd_payload_queue_tail_u {
-	unsigned long	v;
-	struct uv1h_lb_bau_intd_payload_queue_tail_s {
-		unsigned long	rsvd_0_3:4;
-		unsigned long	address:39;			/* RW */
-		unsigned long	rsvd_43_63:21;
-	} s1;
-	struct uv2h_lb_bau_intd_payload_queue_tail_s {
-		unsigned long	rsvd_0_3:4;
-		unsigned long	address:39;			/* RW */
-		unsigned long	rsvd_43_63:21;
-	} s2;
-	struct uv3h_lb_bau_intd_payload_queue_tail_s {
-		unsigned long	rsvd_0_3:4;
-		unsigned long	address:39;			/* RW */
-		unsigned long	rsvd_43_63:21;
-	} s3;
+	/* UV5 unique struct */
+	struct uv5h_node_present_1_s {
+		unsigned long	nodes:64;			/* RW */
+	} s5;
 };
 
 /* ========================================================================= */
-/*                   UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE                    */
+/*                          UVH_NODE_PRESENT_TABLE                           */
 /* ========================================================================= */
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
-#define UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE uv_undefined("UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE")
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE (				\
-	is_uv1_hub() ? UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE :		\
-	is_uv2_hub() ? UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE :		\
-	is_uv3_hub() ? UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE)
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68
-
-
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL
-
-
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL
-
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL
-
-
-union uvh_lb_bau_intd_software_acknowledge_u {
+#define UVH_NODE_PRESENT_TABLE (					\
+	is_uv(UV4) ? 0x1400UL :						\
+	is_uv(UV3) ? 0x1400UL :						\
+	is_uv(UV2) ? 0x1400UL :						\
+	0)
+
+#define UVH_NODE_PRESENT_TABLE_DEPTH (					\
+	is_uv(UV4) ? 4 :						\
+	is_uv(UV3) ? 16 :						\
+	is_uv(UV2) ? 16 :						\
+	0)
+
+
+/* UVXH common defines */
+#define UVXH_NODE_PRESENT_TABLE_NODES_SHFT		0
+#define UVXH_NODE_PRESENT_TABLE_NODES_MASK		0xffffffffffffffffUL
+
+
+union uvh_node_present_table_u {
 	unsigned long	v;
-	struct uv1h_lb_bau_intd_software_acknowledge_s {
-		unsigned long	pending_0:1;			/* RW, W1C */
-		unsigned long	pending_1:1;			/* RW, W1C */
-		unsigned long	pending_2:1;			/* RW, W1C */
-		unsigned long	pending_3:1;			/* RW, W1C */
-		unsigned long	pending_4:1;			/* RW, W1C */
-		unsigned long	pending_5:1;			/* RW, W1C */
-		unsigned long	pending_6:1;			/* RW, W1C */
-		unsigned long	pending_7:1;			/* RW, W1C */
-		unsigned long	timeout_0:1;			/* RW, W1C */
-		unsigned long	timeout_1:1;			/* RW, W1C */
-		unsigned long	timeout_2:1;			/* RW, W1C */
-		unsigned long	timeout_3:1;			/* RW, W1C */
-		unsigned long	timeout_4:1;			/* RW, W1C */
-		unsigned long	timeout_5:1;			/* RW, W1C */
-		unsigned long	timeout_6:1;			/* RW, W1C */
-		unsigned long	timeout_7:1;			/* RW, W1C */
-		unsigned long	rsvd_16_63:48;
-	} s1;
-	struct uv2h_lb_bau_intd_software_acknowledge_s {
-		unsigned long	pending_0:1;			/* RW */
-		unsigned long	pending_1:1;			/* RW */
-		unsigned long	pending_2:1;			/* RW */
-		unsigned long	pending_3:1;			/* RW */
-		unsigned long	pending_4:1;			/* RW */
-		unsigned long	pending_5:1;			/* RW */
-		unsigned long	pending_6:1;			/* RW */
-		unsigned long	pending_7:1;			/* RW */
-		unsigned long	timeout_0:1;			/* RW */
-		unsigned long	timeout_1:1;			/* RW */
-		unsigned long	timeout_2:1;			/* RW */
-		unsigned long	timeout_3:1;			/* RW */
-		unsigned long	timeout_4:1;			/* RW */
-		unsigned long	timeout_5:1;			/* RW */
-		unsigned long	timeout_6:1;			/* RW */
-		unsigned long	timeout_7:1;			/* RW */
-		unsigned long	rsvd_16_63:48;
-	} s2;
-	struct uv3h_lb_bau_intd_software_acknowledge_s {
-		unsigned long	pending_0:1;			/* RW */
-		unsigned long	pending_1:1;			/* RW */
-		unsigned long	pending_2:1;			/* RW */
-		unsigned long	pending_3:1;			/* RW */
-		unsigned long	pending_4:1;			/* RW */
-		unsigned long	pending_5:1;			/* RW */
-		unsigned long	pending_6:1;			/* RW */
-		unsigned long	pending_7:1;			/* RW */
-		unsigned long	timeout_0:1;			/* RW */
-		unsigned long	timeout_1:1;			/* RW */
-		unsigned long	timeout_2:1;			/* RW */
-		unsigned long	timeout_3:1;			/* RW */
-		unsigned long	timeout_4:1;			/* RW */
-		unsigned long	timeout_5:1;			/* RW */
-		unsigned long	timeout_6:1;			/* RW */
-		unsigned long	timeout_7:1;			/* RW */
-		unsigned long	rsvd_16_63:48;
+
+	/* UVH common struct */
+	struct uvh_node_present_table_s {
+		unsigned long	nodes:64;			/* RW */
+	} s;
+
+	/* UVXH common struct */
+	struct uvxh_node_present_table_s {
+		unsigned long	nodes:64;			/* RW */
+	} sx;
+
+	/* UV4 unique struct */
+	struct uv4h_node_present_table_s {
+		unsigned long	nodes:64;			/* RW */
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_node_present_table_s {
+		unsigned long	nodes:64;			/* RW */
 	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_node_present_table_s {
+		unsigned long	nodes:64;			/* RW */
+	} s2;
 };
 
 /* ========================================================================= */
-/*                UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS                 */
+/*                       UVH_RH10_GAM_ADDR_MAP_CONFIG                        */
 /* ========================================================================= */
-#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
-#define UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS uv_undefined("UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS")
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS (			\
-	is_uv1_hub() ? UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS :	\
-	is_uv2_hub() ? UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS :	\
-	is_uv3_hub() ? UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS :	\
-	/*is_uv4_hub*/ UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS)
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70
+#define UVH_RH10_GAM_ADDR_MAP_CONFIG (					\
+	is_uv(UV5) ? 0x470000UL :					\
+	0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_N_SKT_SHFT	6
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_N_SKT_MASK	0x00000000000001c0UL
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_LS_ENABLE_SHFT	12
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_LS_ENABLE_MASK	0x0000000000001000UL
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_MK_TME_KEYID_BITS_SHFT 16
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_MK_TME_KEYID_BITS_MASK 0x00000000000f0000UL
+
+
+union uvh_rh10_gam_addr_map_config_u {
+	unsigned long	v;
+
+	/* UVH common struct */
+	struct uvh_rh10_gam_addr_map_config_s {
+		unsigned long	undef_0_5:6;			/* Undefined */
+		unsigned long	n_skt:3;			/* RW */
+		unsigned long	undef_9_11:3;			/* Undefined */
+		unsigned long	ls_enable:1;			/* RW */
+		unsigned long	undef_13_15:3;			/* Undefined */
+		unsigned long	mk_tme_keyid_bits:4;		/* RW */
+		unsigned long	rsvd_20_63:44;
+	} s;
 
+	/* UVYH common struct */
+	struct uvyh_rh10_gam_addr_map_config_s {
+		unsigned long	undef_0_5:6;			/* Undefined */
+		unsigned long	n_skt:3;			/* RW */
+		unsigned long	undef_9_11:3;			/* Undefined */
+		unsigned long	ls_enable:1;			/* RW */
+		unsigned long	undef_13_15:3;			/* Undefined */
+		unsigned long	mk_tme_keyid_bits:4;		/* RW */
+		unsigned long	rsvd_20_63:44;
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_rh10_gam_addr_map_config_s {
+		unsigned long	undef_0_5:6;			/* Undefined */
+		unsigned long	n_skt:3;			/* RW */
+		unsigned long	undef_9_11:3;			/* Undefined */
+		unsigned long	ls_enable:1;			/* RW */
+		unsigned long	undef_13_15:3;			/* Undefined */
+		unsigned long	mk_tme_keyid_bits:4;		/* RW */
+	} s5;
+};
 
 /* ========================================================================= */
-/*                         UVH_LB_BAU_MISC_CONTROL                           */
+/*                     UVH_RH10_GAM_GRU_OVERLAY_CONFIG                       */
 /* ========================================================================= */
-#define UV1H_LB_BAU_MISC_CONTROL 0x320170UL
-#define UV2H_LB_BAU_MISC_CONTROL 0x320170UL
-#define UV3H_LB_BAU_MISC_CONTROL 0x320170UL
-#define UV4H_LB_BAU_MISC_CONTROL 0xc8170UL
-#define UVH_LB_BAU_MISC_CONTROL (					\
-	is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL :			\
-	is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL :			\
-	is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL :			\
-	/*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL)
-
-#define UV1H_LB_BAU_MISC_CONTROL_32 0xa10
-#define UV2H_LB_BAU_MISC_CONTROL_32 0xa10
-#define UV3H_LB_BAU_MISC_CONTROL_32 0xa10
-#define UV4H_LB_BAU_MISC_CONTROL_32 0xa18
-#define UVH_LB_BAU_MISC_CONTROL_32 (					\
-	is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_32 :			\
-	is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_32 :			\
-	is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_32 :			\
-	/*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_32)
-
-#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT	0
-#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT		8
-#define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT	9
-#define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT	10
-#define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UVH_LB_BAU_MISC_CONTROL_FUN_SHFT		48
-#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK	0x00000000000000ffUL
-#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK		0x0000000000000100UL
-#define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK	0x0000000000000200UL
-#define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK	0x0000000000000400UL
-#define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UVH_LB_BAU_MISC_CONTROL_FUN_MASK		0xffff000000000000UL
-
-#define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT	0
-#define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT		8
-#define UV1H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT	9
-#define UV1H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT	10
-#define UV1H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UV1H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
-#define UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
-#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UV1H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UV1H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UV1H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UV1H_LB_BAU_MISC_CONTROL_FUN_SHFT		48
-#define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK	0x00000000000000ffUL
-#define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK		0x0000000000000100UL
-#define UV1H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK	0x0000000000000200UL
-#define UV1H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK	0x0000000000000400UL
-#define UV1H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UV1H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
-#define UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
-#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UV1H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UV1H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UV1H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UV1H_LB_BAU_MISC_CONTROL_FUN_MASK		0xffff000000000000UL
-
-#define UVXH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT	0
-#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT		8
-#define UVXH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT	9
-#define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT	10
-#define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UVXH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UVXH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
-#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT	30
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
-#define UVXH_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
-#define UVXH_LB_BAU_MISC_CONTROL_FUN_SHFT		48
-#define UVXH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK	0x00000000000000ffUL
-#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK		0x0000000000000100UL
-#define UVXH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK	0x0000000000000200UL
-#define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK	0x0000000000000400UL
-#define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UVXH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK	0x0000000040000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_FUN_MASK		0xffff000000000000UL
-
-#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT	0
-#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT		8
-#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT	9
-#define UV2H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT	10
-#define UV2H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UV2H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
-#define UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UV2H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UV2H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
-#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT	30
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
-#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
-#define UV2H_LB_BAU_MISC_CONTROL_FUN_SHFT		48
-#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK	0x00000000000000ffUL
-#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK		0x0000000000000100UL
-#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK	0x0000000000000200UL
-#define UV2H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK	0x0000000000000400UL
-#define UV2H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UV2H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
-#define UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UV2H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK	0x0000000040000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_FUN_MASK		0xffff000000000000UL
-
-#define UV3H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT	0
-#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT		8
-#define UV3H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT	9
-#define UV3H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT	10
-#define UV3H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UV3H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
-#define UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UV3H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UV3H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
-#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT	30
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
-#define UV3H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_SHFT 36
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_SHFT 37
-#define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_SHFT 38
-#define UV3H_LB_BAU_MISC_CONTROL_FUN_SHFT		48
-#define UV3H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK	0x00000000000000ffUL
-#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK		0x0000000000000100UL
-#define UV3H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK	0x0000000000000200UL
-#define UV3H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK	0x0000000000000400UL
-#define UV3H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UV3H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
-#define UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UV3H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK	0x0000000040000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_MASK 0x0000001000000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_MASK 0x0000002000000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_MASK 0x00003fc000000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_FUN_MASK		0xffff000000000000UL
-
-#define UV4H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT	0
-#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT		8
-#define UV4H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT	9
-#define UV4H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT	10
-#define UV4H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UV4H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_15_19_SHFT	15
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UV4H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UV4H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
-#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT	30
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
-#define UV4H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_SHFT 36
-#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_37_SHFT	37
-#define UV4H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_SHFT 38
-#define UV4H_LB_BAU_MISC_CONTROL_ADDRESS_INTERLEAVE_SELECT_SHFT 46
-#define UV4H_LB_BAU_MISC_CONTROL_FUN_SHFT		48
-#define UV4H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK	0x00000000000000ffUL
-#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK		0x0000000000000100UL
-#define UV4H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK	0x0000000000000200UL
-#define UV4H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK	0x0000000000000400UL
-#define UV4H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UV4H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_15_19_MASK	0x00000000000f8000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UV4H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK	0x0000000040000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_MASK 0x0000001000000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_37_MASK	0x0000002000000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_MASK 0x00003fc000000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ADDRESS_INTERLEAVE_SELECT_MASK 0x0000400000000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_FUN_MASK		0xffff000000000000UL
-
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK	\
-	uv_undefined("UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK")
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK (	\
-	is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \
-	is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \
-	is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \
-	/*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK)
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT	\
-	uv_undefined("UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT")
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT (	\
-	is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \
-	is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \
-	is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \
-	/*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT)
-#define UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK	\
-	uv_undefined("UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK")
-#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK (	\
-	is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \
-	is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \
-	is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \
-	/*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK)
-#define UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT	\
-	uv_undefined("UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT")
-#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT (	\
-	is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \
-	is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \
-	is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \
-	/*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT)
-
-union uvh_lb_bau_misc_control_u {
+#define UVH_RH10_GAM_GRU_OVERLAY_CONFIG (				\
+	is_uv(UV5) ? 0x4700b0UL :					\
+	0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT	25
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_MASK	0x000ffffffe000000UL
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_N_GRU_SHFT	52
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_N_GRU_MASK	0x0070000000000000UL
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_ENABLE_SHFT	63
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_ENABLE_MASK	0x8000000000000000UL
+
+#define UVH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_MASK (			\
+	is_uv(UV5) ? 0x000ffffffe000000UL :				\
+	0)
+#define UVH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT (			\
+	is_uv(UV5) ? 25 :						\
+	-1)
+
+union uvh_rh10_gam_gru_overlay_config_u {
 	unsigned long	v;
-	struct uvh_lb_bau_misc_control_s {
-		unsigned long	rejection_delay:8;		/* RW */
-		unsigned long	apic_mode:1;			/* RW */
-		unsigned long	force_broadcast:1;		/* RW */
-		unsigned long	force_lock_nop:1;		/* RW */
-		unsigned long	qpi_agent_presence_vector:3;	/* RW */
-		unsigned long	descriptor_fetch_mode:1;	/* RW */
-		unsigned long	rsvd_15_19:5;
-		unsigned long	enable_dual_mapping_mode:1;	/* RW */
-		unsigned long	vga_io_port_decode_enable:1;	/* RW */
-		unsigned long	vga_io_port_16_bit_decode:1;	/* RW */
-		unsigned long	suppress_dest_registration:1;	/* RW */
-		unsigned long	programmed_initial_priority:3;	/* RW */
-		unsigned long	use_incoming_priority:1;	/* RW */
-		unsigned long	enable_programmed_initial_priority:1;/* RW */
-		unsigned long	rsvd_29_47:19;
-		unsigned long	fun:16;				/* RW */
+
+	/* UVH common struct */
+	struct uvh_rh10_gam_gru_overlay_config_s {
+		unsigned long	undef_0_24:25;			/* Undefined */
+		unsigned long	base:27;			/* RW */
+		unsigned long	n_gru:3;			/* RW */
+		unsigned long	undef_55_62:8;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
 	} s;
-	struct uv1h_lb_bau_misc_control_s {
-		unsigned long	rejection_delay:8;		/* RW */
-		unsigned long	apic_mode:1;			/* RW */
-		unsigned long	force_broadcast:1;		/* RW */
-		unsigned long	force_lock_nop:1;		/* RW */
-		unsigned long	qpi_agent_presence_vector:3;	/* RW */
-		unsigned long	descriptor_fetch_mode:1;	/* RW */
-		unsigned long	enable_intd_soft_ack_mode:1;	/* RW */
-		unsigned long	intd_soft_ack_timeout_period:4;	/* RW */
-		unsigned long	enable_dual_mapping_mode:1;	/* RW */
-		unsigned long	vga_io_port_decode_enable:1;	/* RW */
-		unsigned long	vga_io_port_16_bit_decode:1;	/* RW */
-		unsigned long	suppress_dest_registration:1;	/* RW */
-		unsigned long	programmed_initial_priority:3;	/* RW */
-		unsigned long	use_incoming_priority:1;	/* RW */
-		unsigned long	enable_programmed_initial_priority:1;/* RW */
-		unsigned long	rsvd_29_47:19;
-		unsigned long	fun:16;				/* RW */
-	} s1;
-	struct uvxh_lb_bau_misc_control_s {
-		unsigned long	rejection_delay:8;		/* RW */
-		unsigned long	apic_mode:1;			/* RW */
-		unsigned long	force_broadcast:1;		/* RW */
-		unsigned long	force_lock_nop:1;		/* RW */
-		unsigned long	qpi_agent_presence_vector:3;	/* RW */
-		unsigned long	descriptor_fetch_mode:1;	/* RW */
-		unsigned long	rsvd_15_19:5;
-		unsigned long	enable_dual_mapping_mode:1;	/* RW */
-		unsigned long	vga_io_port_decode_enable:1;	/* RW */
-		unsigned long	vga_io_port_16_bit_decode:1;	/* RW */
-		unsigned long	suppress_dest_registration:1;	/* RW */
-		unsigned long	programmed_initial_priority:3;	/* RW */
-		unsigned long	use_incoming_priority:1;	/* RW */
-		unsigned long	enable_programmed_initial_priority:1;/* RW */
-		unsigned long	enable_automatic_apic_mode_selection:1;/* RW */
-		unsigned long	apic_mode_status:1;		/* RO */
-		unsigned long	suppress_interrupts_to_self:1;	/* RW */
-		unsigned long	enable_lock_based_system_flush:1;/* RW */
-		unsigned long	enable_extended_sb_status:1;	/* RW */
-		unsigned long	suppress_int_prio_udt_to_self:1;/* RW */
-		unsigned long	use_legacy_descriptor_formats:1;/* RW */
-		unsigned long	rsvd_36_47:12;
-		unsigned long	fun:16;				/* RW */
-	} sx;
-	struct uv2h_lb_bau_misc_control_s {
-		unsigned long	rejection_delay:8;		/* RW */
-		unsigned long	apic_mode:1;			/* RW */
-		unsigned long	force_broadcast:1;		/* RW */
-		unsigned long	force_lock_nop:1;		/* RW */
-		unsigned long	qpi_agent_presence_vector:3;	/* RW */
-		unsigned long	descriptor_fetch_mode:1;	/* RW */
-		unsigned long	enable_intd_soft_ack_mode:1;	/* RW */
-		unsigned long	intd_soft_ack_timeout_period:4;	/* RW */
-		unsigned long	enable_dual_mapping_mode:1;	/* RW */
-		unsigned long	vga_io_port_decode_enable:1;	/* RW */
-		unsigned long	vga_io_port_16_bit_decode:1;	/* RW */
-		unsigned long	suppress_dest_registration:1;	/* RW */
-		unsigned long	programmed_initial_priority:3;	/* RW */
-		unsigned long	use_incoming_priority:1;	/* RW */
-		unsigned long	enable_programmed_initial_priority:1;/* RW */
-		unsigned long	enable_automatic_apic_mode_selection:1;/* RW */
-		unsigned long	apic_mode_status:1;		/* RO */
-		unsigned long	suppress_interrupts_to_self:1;	/* RW */
-		unsigned long	enable_lock_based_system_flush:1;/* RW */
-		unsigned long	enable_extended_sb_status:1;	/* RW */
-		unsigned long	suppress_int_prio_udt_to_self:1;/* RW */
-		unsigned long	use_legacy_descriptor_formats:1;/* RW */
-		unsigned long	rsvd_36_47:12;
-		unsigned long	fun:16;				/* RW */
-	} s2;
-	struct uv3h_lb_bau_misc_control_s {
-		unsigned long	rejection_delay:8;		/* RW */
-		unsigned long	apic_mode:1;			/* RW */
-		unsigned long	force_broadcast:1;		/* RW */
-		unsigned long	force_lock_nop:1;		/* RW */
-		unsigned long	qpi_agent_presence_vector:3;	/* RW */
-		unsigned long	descriptor_fetch_mode:1;	/* RW */
-		unsigned long	enable_intd_soft_ack_mode:1;	/* RW */
-		unsigned long	intd_soft_ack_timeout_period:4;	/* RW */
-		unsigned long	enable_dual_mapping_mode:1;	/* RW */
-		unsigned long	vga_io_port_decode_enable:1;	/* RW */
-		unsigned long	vga_io_port_16_bit_decode:1;	/* RW */
-		unsigned long	suppress_dest_registration:1;	/* RW */
-		unsigned long	programmed_initial_priority:3;	/* RW */
-		unsigned long	use_incoming_priority:1;	/* RW */
-		unsigned long	enable_programmed_initial_priority:1;/* RW */
-		unsigned long	enable_automatic_apic_mode_selection:1;/* RW */
-		unsigned long	apic_mode_status:1;		/* RO */
-		unsigned long	suppress_interrupts_to_self:1;	/* RW */
-		unsigned long	enable_lock_based_system_flush:1;/* RW */
-		unsigned long	enable_extended_sb_status:1;	/* RW */
-		unsigned long	suppress_int_prio_udt_to_self:1;/* RW */
-		unsigned long	use_legacy_descriptor_formats:1;/* RW */
-		unsigned long	suppress_quiesce_msgs_to_qpi:1;	/* RW */
-		unsigned long	enable_intd_prefetch_hint:1;	/* RW */
-		unsigned long	thread_kill_timebase:8;		/* RW */
-		unsigned long	rsvd_46_47:2;
-		unsigned long	fun:16;				/* RW */
-	} s3;
-	struct uv4h_lb_bau_misc_control_s {
-		unsigned long	rejection_delay:8;		/* RW */
-		unsigned long	apic_mode:1;			/* RW */
-		unsigned long	force_broadcast:1;		/* RW */
-		unsigned long	force_lock_nop:1;		/* RW */
-		unsigned long	qpi_agent_presence_vector:3;	/* RW */
-		unsigned long	descriptor_fetch_mode:1;	/* RW */
-		unsigned long	rsvd_15_19:5;
-		unsigned long	enable_dual_mapping_mode:1;	/* RW */
-		unsigned long	vga_io_port_decode_enable:1;	/* RW */
-		unsigned long	vga_io_port_16_bit_decode:1;	/* RW */
-		unsigned long	suppress_dest_registration:1;	/* RW */
-		unsigned long	programmed_initial_priority:3;	/* RW */
-		unsigned long	use_incoming_priority:1;	/* RW */
-		unsigned long	enable_programmed_initial_priority:1;/* RW */
-		unsigned long	enable_automatic_apic_mode_selection:1;/* RW */
-		unsigned long	apic_mode_status:1;		/* RO */
-		unsigned long	suppress_interrupts_to_self:1;	/* RW */
-		unsigned long	enable_lock_based_system_flush:1;/* RW */
-		unsigned long	enable_extended_sb_status:1;	/* RW */
-		unsigned long	suppress_int_prio_udt_to_self:1;/* RW */
-		unsigned long	use_legacy_descriptor_formats:1;/* RW */
-		unsigned long	suppress_quiesce_msgs_to_qpi:1;	/* RW */
-		unsigned long	rsvd_37:1;
-		unsigned long	thread_kill_timebase:8;		/* RW */
-		unsigned long	address_interleave_select:1;	/* RW */
-		unsigned long	rsvd_47:1;
-		unsigned long	fun:16;				/* RW */
-	} s4;
+
+	/* UVYH common struct */
+	struct uvyh_rh10_gam_gru_overlay_config_s {
+		unsigned long	undef_0_24:25;			/* Undefined */
+		unsigned long	base:27;			/* RW */
+		unsigned long	n_gru:3;			/* RW */
+		unsigned long	undef_55_62:8;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_rh10_gam_gru_overlay_config_s {
+		unsigned long	undef_0_24:25;			/* Undefined */
+		unsigned long	base:27;			/* RW */
+		unsigned long	n_gru:3;			/* RW */
+		unsigned long	undef_55_62:8;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
+	} s5;
 };
 
 /* ========================================================================= */
-/*                     UVH_LB_BAU_SB_ACTIVATION_CONTROL                      */
+/*                    UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0                     */
 /* ========================================================================= */
-#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
-#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
-#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
-#define UV4H_LB_BAU_SB_ACTIVATION_CONTROL 0xc8020UL
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL (				\
-	is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_CONTROL :		\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_CONTROL :		\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_CONTROL :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_CONTROL)
-
-#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
-#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
-#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
-#define UV4H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9c8
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 (				\
-	is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_CONTROL_32 :		\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_CONTROL_32 :		\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_CONTROL_32 :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_CONTROL_32)
-
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT	0
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT	62
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_SHFT	63
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK	0x000000000000003fUL
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK	0x4000000000000000UL
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK	0x8000000000000000UL
-
-
-union uvh_lb_bau_sb_activation_control_u {
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0 (				\
+	is_uv(UV5) ? 0x473000UL :					\
+	0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT	26
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK	0x000ffffffc000000UL
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT	52
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK	0x03f0000000000000UL
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT	63
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK	0x8000000000000000UL
+
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK (			\
+	is_uv(UV5) ? 0x000ffffffc000000UL :				\
+	0)
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT (			\
+	is_uv(UV5) ? 26 :						\
+	-1)
+
+union uvh_rh10_gam_mmioh_overlay_config0_u {
 	unsigned long	v;
-	struct uvh_lb_bau_sb_activation_control_s {
-		unsigned long	index:6;			/* RW */
-		unsigned long	rsvd_6_61:56;
-		unsigned long	push:1;				/* WP */
-		unsigned long	init:1;				/* WP */
+
+	/* UVH common struct */
+	struct uvh_rh10_gam_mmioh_overlay_config0_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:26;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	undef_62:1;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
 	} s;
+
+	/* UVYH common struct */
+	struct uvyh_rh10_gam_mmioh_overlay_config0_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:26;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	undef_62:1;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_rh10_gam_mmioh_overlay_config0_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:26;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	undef_62:1;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
+	} s5;
 };
 
 /* ========================================================================= */
-/*                    UVH_LB_BAU_SB_ACTIVATION_STATUS_0                      */
+/*                    UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1                     */
 /* ========================================================================= */
-#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_0 0xc8030UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 (				\
-	is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_0 :		\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_0 :		\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_0 :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_0)
-
-#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9d0
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 (				\
-	is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_0_32 :		\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_32 :		\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_32 :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_0_32)
-
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT	0
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK	0xffffffffffffffffUL
-
-
-union uvh_lb_bau_sb_activation_status_0_u {
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1 (				\
+	is_uv(UV5) ? 0x474000UL :					\
+	0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT	26
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK	0x000ffffffc000000UL
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT	52
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK	0x03f0000000000000UL
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT	63
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK	0x8000000000000000UL
+
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK (			\
+	is_uv(UV5) ? 0x000ffffffc000000UL :				\
+	0)
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT (			\
+	is_uv(UV5) ? 26 :						\
+	-1)
+
+union uvh_rh10_gam_mmioh_overlay_config1_u {
 	unsigned long	v;
-	struct uvh_lb_bau_sb_activation_status_0_s {
-		unsigned long	status:64;			/* RW */
+
+	/* UVH common struct */
+	struct uvh_rh10_gam_mmioh_overlay_config1_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:26;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	undef_62:1;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
 	} s;
+
+	/* UVYH common struct */
+	struct uvyh_rh10_gam_mmioh_overlay_config1_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:26;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	undef_62:1;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_rh10_gam_mmioh_overlay_config1_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:26;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	undef_62:1;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
+	} s5;
 };
 
 /* ========================================================================= */
-/*                    UVH_LB_BAU_SB_ACTIVATION_STATUS_1                      */
+/*                   UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0                     */
 /* ========================================================================= */
-#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_1 0xc8040UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 (				\
-	is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_1 :		\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_1 :		\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_1 :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_1)
-
-#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9d8
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 (				\
-	is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_1_32 :		\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_32 :		\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_32 :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_1_32)
-
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT	0
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK	0xffffffffffffffffUL
-
-
-union uvh_lb_bau_sb_activation_status_1_u {
+#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0 (				\
+	is_uv(UV5) ? 0x473800UL :					\
+	0)
+
+#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH (			\
+	is_uv(UV5) ? 128 :						\
+	0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT	0
+#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK	0x000000000000007fUL
+
+
+union uvh_rh10_gam_mmioh_redirect_config0_u {
 	unsigned long	v;
-	struct uvh_lb_bau_sb_activation_status_1_s {
-		unsigned long	status:64;			/* RW */
+
+	/* UVH common struct */
+	struct uvh_rh10_gam_mmioh_redirect_config0_s {
+		unsigned long	nasid:7;			/* RW */
+		unsigned long	rsvd_7_63:57;
 	} s;
+
+	/* UVYH common struct */
+	struct uvyh_rh10_gam_mmioh_redirect_config0_s {
+		unsigned long	nasid:7;			/* RW */
+		unsigned long	rsvd_7_63:57;
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_rh10_gam_mmioh_redirect_config0_s {
+		unsigned long	nasid:7;			/* RW */
+		unsigned long	rsvd_7_63:57;
+	} s5;
 };
 
 /* ========================================================================= */
-/*                      UVH_LB_BAU_SB_DESCRIPTOR_BASE                        */
+/*                   UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1                     */
 /* ========================================================================= */
-#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
-#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
-#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
-#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE 0xc8010UL
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE (					\
-	is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE :			\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE :			\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE :			\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE)
-
-#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
-#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
-#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
-#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9c0
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 (				\
-	is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE_32 :		\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_32 :		\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_32 :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_32)
-
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT	12
-
-#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT	49
-#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
-#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK	0x7ffe000000000000UL
-
-#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT	49
-#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
-#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK	0x7ffe000000000000UL
-
-#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT	49
-#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
-#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK	0x7ffe000000000000UL
-
-#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT	49
-#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x00003ffffffff000UL
-#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK	0x7ffe000000000000UL
-
-#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT	53
-#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000ffffffffff000UL
-#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK	0xffe0000000000000UL
-
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT (			\
-	is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT :	\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT :	\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT :	\
-	is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT :	\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT)
-
-#define UVH_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK (			\
-	is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK :	\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK :	\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK :	\
-	is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK :	\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK)
-
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK (			\
-	is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK :	\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK :	\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK :	\
-	is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK :	\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK)
+#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1 (				\
+	is_uv(UV5) ? 0x474800UL :					\
+	0)
+
+#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH (			\
+	is_uv(UV5) ? 128 :						\
+	0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT	0
+#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK	0x000000000000007fUL
+
+
+union uvh_rh10_gam_mmioh_redirect_config1_u {
+	unsigned long	v;
+
+	/* UVH common struct */
+	struct uvh_rh10_gam_mmioh_redirect_config1_s {
+		unsigned long	nasid:7;			/* RW */
+		unsigned long	rsvd_7_63:57;
+	} s;
+
+	/* UVYH common struct */
+	struct uvyh_rh10_gam_mmioh_redirect_config1_s {
+		unsigned long	nasid:7;			/* RW */
+		unsigned long	rsvd_7_63:57;
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_rh10_gam_mmioh_redirect_config1_s {
+		unsigned long	nasid:7;			/* RW */
+		unsigned long	rsvd_7_63:57;
+	} s5;
+};
 
 /* ========================================================================= */
-/*                               UVH_NODE_ID                                 */
+/*                     UVH_RH10_GAM_MMR_OVERLAY_CONFIG                       */
 /* ========================================================================= */
-#define UVH_NODE_ID 0x0UL
-#define UV1H_NODE_ID 0x0UL
-#define UV2H_NODE_ID 0x0UL
-#define UV3H_NODE_ID 0x0UL
-#define UV4H_NODE_ID 0x0UL
+#define UVH_RH10_GAM_MMR_OVERLAY_CONFIG (				\
+	is_uv(UV5) ? 0x470090UL :					\
+	0)
 
-#define UVH_NODE_ID_FORCE1_SHFT				0
-#define UVH_NODE_ID_MANUFACTURER_SHFT			1
-#define UVH_NODE_ID_PART_NUMBER_SHFT			12
-#define UVH_NODE_ID_REVISION_SHFT			28
-#define UVH_NODE_ID_NODE_ID_SHFT			32
-#define UVH_NODE_ID_FORCE1_MASK				0x0000000000000001UL
-#define UVH_NODE_ID_MANUFACTURER_MASK			0x0000000000000ffeUL
-#define UVH_NODE_ID_PART_NUMBER_MASK			0x000000000ffff000UL
-#define UVH_NODE_ID_REVISION_MASK			0x00000000f0000000UL
-#define UVH_NODE_ID_NODE_ID_MASK			0x00007fff00000000UL
-
-#define UV1H_NODE_ID_FORCE1_SHFT			0
-#define UV1H_NODE_ID_MANUFACTURER_SHFT			1
-#define UV1H_NODE_ID_PART_NUMBER_SHFT			12
-#define UV1H_NODE_ID_REVISION_SHFT			28
-#define UV1H_NODE_ID_NODE_ID_SHFT			32
-#define UV1H_NODE_ID_NODES_PER_BIT_SHFT			48
-#define UV1H_NODE_ID_NI_PORT_SHFT			56
-#define UV1H_NODE_ID_FORCE1_MASK			0x0000000000000001UL
-#define UV1H_NODE_ID_MANUFACTURER_MASK			0x0000000000000ffeUL
-#define UV1H_NODE_ID_PART_NUMBER_MASK			0x000000000ffff000UL
-#define UV1H_NODE_ID_REVISION_MASK			0x00000000f0000000UL
-#define UV1H_NODE_ID_NODE_ID_MASK			0x00007fff00000000UL
-#define UV1H_NODE_ID_NODES_PER_BIT_MASK			0x007f000000000000UL
-#define UV1H_NODE_ID_NI_PORT_MASK			0x0f00000000000000UL
-
-#define UVXH_NODE_ID_FORCE1_SHFT			0
-#define UVXH_NODE_ID_MANUFACTURER_SHFT			1
-#define UVXH_NODE_ID_PART_NUMBER_SHFT			12
-#define UVXH_NODE_ID_REVISION_SHFT			28
-#define UVXH_NODE_ID_NODE_ID_SHFT			32
-#define UVXH_NODE_ID_NODES_PER_BIT_SHFT			50
-#define UVXH_NODE_ID_NI_PORT_SHFT			57
-#define UVXH_NODE_ID_FORCE1_MASK			0x0000000000000001UL
-#define UVXH_NODE_ID_MANUFACTURER_MASK			0x0000000000000ffeUL
-#define UVXH_NODE_ID_PART_NUMBER_MASK			0x000000000ffff000UL
-#define UVXH_NODE_ID_REVISION_MASK			0x00000000f0000000UL
-#define UVXH_NODE_ID_NODE_ID_MASK			0x00007fff00000000UL
-#define UVXH_NODE_ID_NODES_PER_BIT_MASK			0x01fc000000000000UL
-#define UVXH_NODE_ID_NI_PORT_MASK			0x3e00000000000000UL
 
-#define UV2H_NODE_ID_FORCE1_SHFT			0
-#define UV2H_NODE_ID_MANUFACTURER_SHFT			1
-#define UV2H_NODE_ID_PART_NUMBER_SHFT			12
-#define UV2H_NODE_ID_REVISION_SHFT			28
-#define UV2H_NODE_ID_NODE_ID_SHFT			32
-#define UV2H_NODE_ID_NODES_PER_BIT_SHFT			50
-#define UV2H_NODE_ID_NI_PORT_SHFT			57
-#define UV2H_NODE_ID_FORCE1_MASK			0x0000000000000001UL
-#define UV2H_NODE_ID_MANUFACTURER_MASK			0x0000000000000ffeUL
-#define UV2H_NODE_ID_PART_NUMBER_MASK			0x000000000ffff000UL
-#define UV2H_NODE_ID_REVISION_MASK			0x00000000f0000000UL
-#define UV2H_NODE_ID_NODE_ID_MASK			0x00007fff00000000UL
-#define UV2H_NODE_ID_NODES_PER_BIT_MASK			0x01fc000000000000UL
-#define UV2H_NODE_ID_NI_PORT_MASK			0x3e00000000000000UL
-
-#define UV3H_NODE_ID_FORCE1_SHFT			0
-#define UV3H_NODE_ID_MANUFACTURER_SHFT			1
-#define UV3H_NODE_ID_PART_NUMBER_SHFT			12
-#define UV3H_NODE_ID_REVISION_SHFT			28
-#define UV3H_NODE_ID_NODE_ID_SHFT			32
-#define UV3H_NODE_ID_ROUTER_SELECT_SHFT			48
-#define UV3H_NODE_ID_RESERVED_2_SHFT			49
-#define UV3H_NODE_ID_NODES_PER_BIT_SHFT			50
-#define UV3H_NODE_ID_NI_PORT_SHFT			57
-#define UV3H_NODE_ID_FORCE1_MASK			0x0000000000000001UL
-#define UV3H_NODE_ID_MANUFACTURER_MASK			0x0000000000000ffeUL
-#define UV3H_NODE_ID_PART_NUMBER_MASK			0x000000000ffff000UL
-#define UV3H_NODE_ID_REVISION_MASK			0x00000000f0000000UL
-#define UV3H_NODE_ID_NODE_ID_MASK			0x00007fff00000000UL
-#define UV3H_NODE_ID_ROUTER_SELECT_MASK			0x0001000000000000UL
-#define UV3H_NODE_ID_RESERVED_2_MASK			0x0002000000000000UL
-#define UV3H_NODE_ID_NODES_PER_BIT_MASK			0x01fc000000000000UL
-#define UV3H_NODE_ID_NI_PORT_MASK			0x3e00000000000000UL
-
-#define UV4H_NODE_ID_FORCE1_SHFT			0
-#define UV4H_NODE_ID_MANUFACTURER_SHFT			1
-#define UV4H_NODE_ID_PART_NUMBER_SHFT			12
-#define UV4H_NODE_ID_REVISION_SHFT			28
-#define UV4H_NODE_ID_NODE_ID_SHFT			32
-#define UV4H_NODE_ID_ROUTER_SELECT_SHFT			48
-#define UV4H_NODE_ID_RESERVED_2_SHFT			49
-#define UV4H_NODE_ID_NODES_PER_BIT_SHFT			50
-#define UV4H_NODE_ID_NI_PORT_SHFT			57
-#define UV4H_NODE_ID_FORCE1_MASK			0x0000000000000001UL
-#define UV4H_NODE_ID_MANUFACTURER_MASK			0x0000000000000ffeUL
-#define UV4H_NODE_ID_PART_NUMBER_MASK			0x000000000ffff000UL
-#define UV4H_NODE_ID_REVISION_MASK			0x00000000f0000000UL
-#define UV4H_NODE_ID_NODE_ID_MASK			0x00007fff00000000UL
-#define UV4H_NODE_ID_ROUTER_SELECT_MASK			0x0001000000000000UL
-#define UV4H_NODE_ID_RESERVED_2_MASK			0x0002000000000000UL
-#define UV4H_NODE_ID_NODES_PER_BIT_MASK			0x01fc000000000000UL
-#define UV4H_NODE_ID_NI_PORT_MASK			0x3e00000000000000UL
+/* UVYH common defines */
+#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT	25
+#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_MASK	0x000ffffffe000000UL
+#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_ENABLE_SHFT	63
+#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_ENABLE_MASK	0x8000000000000000UL
 
+#define UVH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_MASK (			\
+	is_uv(UV5) ? 0x000ffffffe000000UL :				\
+	0)
+#define UVH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT (			\
+	is_uv(UV5) ? 25 :						\
+	-1)
 
-union uvh_node_id_u {
+union uvh_rh10_gam_mmr_overlay_config_u {
 	unsigned long	v;
-	struct uvh_node_id_s {
-		unsigned long	force1:1;			/* RO */
-		unsigned long	manufacturer:11;		/* RO */
-		unsigned long	part_number:16;			/* RO */
-		unsigned long	revision:4;			/* RO */
-		unsigned long	node_id:15;			/* RW */
-		unsigned long	rsvd_47_63:17;
+
+	/* UVH common struct */
+	struct uvh_rh10_gam_mmr_overlay_config_s {
+		unsigned long	undef_0_24:25;			/* Undefined */
+		unsigned long	base:27;			/* RW */
+		unsigned long	undef_52_62:11;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
 	} s;
-	struct uv1h_node_id_s {
-		unsigned long	force1:1;			/* RO */
-		unsigned long	manufacturer:11;		/* RO */
-		unsigned long	part_number:16;			/* RO */
-		unsigned long	revision:4;			/* RO */
-		unsigned long	node_id:15;			/* RW */
-		unsigned long	rsvd_47:1;
-		unsigned long	nodes_per_bit:7;		/* RW */
-		unsigned long	rsvd_55:1;
-		unsigned long	ni_port:4;			/* RO */
-		unsigned long	rsvd_60_63:4;
-	} s1;
-	struct uvxh_node_id_s {
-		unsigned long	force1:1;			/* RO */
-		unsigned long	manufacturer:11;		/* RO */
-		unsigned long	part_number:16;			/* RO */
-		unsigned long	revision:4;			/* RO */
-		unsigned long	node_id:15;			/* RW */
-		unsigned long	rsvd_47_49:3;
-		unsigned long	nodes_per_bit:7;		/* RO */
-		unsigned long	ni_port:5;			/* RO */
-		unsigned long	rsvd_62_63:2;
-	} sx;
-	struct uv2h_node_id_s {
-		unsigned long	force1:1;			/* RO */
-		unsigned long	manufacturer:11;		/* RO */
-		unsigned long	part_number:16;			/* RO */
-		unsigned long	revision:4;			/* RO */
-		unsigned long	node_id:15;			/* RW */
-		unsigned long	rsvd_47_49:3;
-		unsigned long	nodes_per_bit:7;		/* RO */
-		unsigned long	ni_port:5;			/* RO */
-		unsigned long	rsvd_62_63:2;
-	} s2;
-	struct uv3h_node_id_s {
-		unsigned long	force1:1;			/* RO */
-		unsigned long	manufacturer:11;		/* RO */
-		unsigned long	part_number:16;			/* RO */
-		unsigned long	revision:4;			/* RO */
-		unsigned long	node_id:15;			/* RW */
-		unsigned long	rsvd_47:1;
-		unsigned long	router_select:1;		/* RO */
-		unsigned long	rsvd_49:1;
-		unsigned long	nodes_per_bit:7;		/* RO */
-		unsigned long	ni_port:5;			/* RO */
-		unsigned long	rsvd_62_63:2;
-	} s3;
-	struct uv4h_node_id_s {
-		unsigned long	force1:1;			/* RO */
-		unsigned long	manufacturer:11;		/* RO */
-		unsigned long	part_number:16;			/* RO */
-		unsigned long	revision:4;			/* RO */
-		unsigned long	node_id:15;			/* RW */
-		unsigned long	rsvd_47:1;
-		unsigned long	router_select:1;		/* RO */
-		unsigned long	rsvd_49:1;
-		unsigned long	nodes_per_bit:7;		/* RO */
-		unsigned long	ni_port:5;			/* RO */
-		unsigned long	rsvd_62_63:2;
-	} s4;
+
+	/* UVYH common struct */
+	struct uvyh_rh10_gam_mmr_overlay_config_s {
+		unsigned long	undef_0_24:25;			/* Undefined */
+		unsigned long	base:27;			/* RW */
+		unsigned long	undef_52_62:11;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_rh10_gam_mmr_overlay_config_s {
+		unsigned long	undef_0_24:25;			/* Undefined */
+		unsigned long	base:27;			/* RW */
+		unsigned long	undef_52_62:11;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
+	} s5;
 };
 
 /* ========================================================================= */
-/*                          UVH_NODE_PRESENT_TABLE                           */
+/*                        UVH_RH_GAM_ADDR_MAP_CONFIG                         */
 /* ========================================================================= */
-#define UVH_NODE_PRESENT_TABLE 0x1400UL
+#define UVH_RH_GAM_ADDR_MAP_CONFIG (					\
+	is_uv(UV4) ? 0x480000UL :					\
+	is_uv(UV3) ? 0x1600000UL :					\
+	is_uv(UV2) ? 0x1600000UL :					\
+	0)
 
-#define UV1H_NODE_PRESENT_TABLE_DEPTH 16
-#define UV2H_NODE_PRESENT_TABLE_DEPTH 16
-#define UV3H_NODE_PRESENT_TABLE_DEPTH 16
-#define UV4H_NODE_PRESENT_TABLE_DEPTH 4
-#define UVH_NODE_PRESENT_TABLE_DEPTH (					\
-	is_uv1_hub() ? UV1H_NODE_PRESENT_TABLE_DEPTH :			\
-	is_uv2_hub() ? UV2H_NODE_PRESENT_TABLE_DEPTH :			\
-	is_uv3_hub() ? UV3H_NODE_PRESENT_TABLE_DEPTH :			\
-	/*is_uv4_hub*/ UV4H_NODE_PRESENT_TABLE_DEPTH)
 
-#define UVH_NODE_PRESENT_TABLE_NODES_SHFT		0
-#define UVH_NODE_PRESENT_TABLE_NODES_MASK		0xffffffffffffffffUL
+/* UVXH common defines */
+#define UVXH_RH_GAM_ADDR_MAP_CONFIG_N_SKT_SHFT		6
+#define UVXH_RH_GAM_ADDR_MAP_CONFIG_N_SKT_MASK		0x00000000000003c0UL
 
+/* UV3 unique defines */
+#define UV3H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_SHFT		0
+#define UV3H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_MASK		0x000000000000003fUL
 
-union uvh_node_present_table_u {
+/* UV2 unique defines */
+#define UV2H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_SHFT		0
+#define UV2H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_MASK		0x000000000000003fUL
+
+
+union uvh_rh_gam_addr_map_config_u {
 	unsigned long	v;
-	struct uvh_node_present_table_s {
-		unsigned long	nodes:64;			/* RW */
+
+	/* UVH common struct */
+	struct uvh_rh_gam_addr_map_config_s {
+		unsigned long	rsvd_0_5:6;
+		unsigned long	n_skt:4;			/* RW */
+		unsigned long	rsvd_10_63:54;
 	} s;
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_addr_map_config_s {
+		unsigned long	rsvd_0_5:6;
+		unsigned long	n_skt:4;			/* RW */
+		unsigned long	rsvd_10_63:54;
+	} sx;
+
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_addr_map_config_s {
+		unsigned long	rsvd_0_5:6;
+		unsigned long	n_skt:4;			/* RW */
+		unsigned long	rsvd_10_63:54;
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_addr_map_config_s {
+		unsigned long	m_skt:6;			/* RW */
+		unsigned long	n_skt:4;			/* RW */
+		unsigned long	rsvd_10_63:54;
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_rh_gam_addr_map_config_s {
+		unsigned long	m_skt:6;			/* RW */
+		unsigned long	n_skt:4;			/* RW */
+		unsigned long	rsvd_10_63:54;
+	} s2;
 };
 
 /* ========================================================================= */
-/*                 UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR                  */
+/*                    UVH_RH_GAM_ALIAS_0_OVERLAY_CONFIG                      */
 /* ========================================================================= */
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x4800c8UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR (			\
-	is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR :	\
-	is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR :	\
-	is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR :	\
-	/*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR)
-
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-
-union uvh_rh_gam_alias210_overlay_config_0_mmr_u {
+#define UVH_RH_GAM_ALIAS_0_OVERLAY_CONFIG (				\
+	is_uv(UV4) ? 0x4800c8UL :					\
+	is_uv(UV3) ? 0x16000c8UL :					\
+	is_uv(UV2) ? 0x16000c8UL :					\
+	0)
+
+
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_BASE_SHFT	24
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_BASE_MASK	0x00000000ff000000UL
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_M_ALIAS_SHFT	48
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_M_ALIAS_MASK	0x001f000000000000UL
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_ENABLE_SHFT	63
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_ENABLE_MASK	0x8000000000000000UL
+
+
+union uvh_rh_gam_alias_0_overlay_config_u {
 	unsigned long	v;
-	struct uvh_rh_gam_alias210_overlay_config_0_mmr_s {
+
+	/* UVH common struct */
+	struct uvh_rh_gam_alias_0_overlay_config_s {
 		unsigned long	rsvd_0_23:24;
 		unsigned long	base:8;				/* RW */
 		unsigned long	rsvd_32_47:16;
@@ -3095,15 +3465,9 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u {
 		unsigned long	rsvd_53_62:10;
 		unsigned long	enable:1;			/* RW */
 	} s;
-	struct uv1h_rh_gam_alias210_overlay_config_0_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
-		unsigned long	enable:1;			/* RW */
-	} s1;
-	struct uvxh_rh_gam_alias210_overlay_config_0_mmr_s {
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_alias_0_overlay_config_s {
 		unsigned long	rsvd_0_23:24;
 		unsigned long	base:8;				/* RW */
 		unsigned long	rsvd_32_47:16;
@@ -3111,15 +3475,19 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u {
 		unsigned long	rsvd_53_62:10;
 		unsigned long	enable:1;			/* RW */
 	} sx;
-	struct uv2h_rh_gam_alias210_overlay_config_0_mmr_s {
+
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_alias_0_overlay_config_s {
 		unsigned long	rsvd_0_23:24;
 		unsigned long	base:8;				/* RW */
 		unsigned long	rsvd_32_47:16;
 		unsigned long	m_alias:5;			/* RW */
 		unsigned long	rsvd_53_62:10;
 		unsigned long	enable:1;			/* RW */
-	} s2;
-	struct uv3h_rh_gam_alias210_overlay_config_0_mmr_s {
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_alias_0_overlay_config_s {
 		unsigned long	rsvd_0_23:24;
 		unsigned long	base:8;				/* RW */
 		unsigned long	rsvd_32_47:16;
@@ -3127,183 +3495,96 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u {
 		unsigned long	rsvd_53_62:10;
 		unsigned long	enable:1;			/* RW */
 	} s3;
-	struct uv4h_rh_gam_alias210_overlay_config_0_mmr_s {
+
+	/* UV2 unique struct */
+	struct uv2h_rh_gam_alias_0_overlay_config_s {
 		unsigned long	rsvd_0_23:24;
 		unsigned long	base:8;				/* RW */
 		unsigned long	rsvd_32_47:16;
 		unsigned long	m_alias:5;			/* RW */
 		unsigned long	rsvd_53_62:10;
 		unsigned long	enable:1;			/* RW */
-	} s4;
+	} s2;
 };
 
 /* ========================================================================= */
-/*                 UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR                  */
+/*                    UVH_RH_GAM_ALIAS_0_REDIRECT_CONFIG                     */
 /* ========================================================================= */
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x4800d8UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR (			\
-	is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR :	\
-	is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR :	\
-	is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR :	\
-	/*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR)
-
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-
-union uvh_rh_gam_alias210_overlay_config_1_mmr_u {
+#define UVH_RH_GAM_ALIAS_0_REDIRECT_CONFIG (				\
+	is_uv(UV4) ? 0x4800d0UL :					\
+	is_uv(UV3) ? 0x16000d0UL :					\
+	is_uv(UV2) ? 0x16000d0UL :					\
+	0)
+
+
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_0_REDIRECT_CONFIG_DEST_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS_0_REDIRECT_CONFIG_DEST_BASE_MASK 0x00003fffff000000UL
+
+
+union uvh_rh_gam_alias_0_redirect_config_u {
 	unsigned long	v;
-	struct uvh_rh_gam_alias210_overlay_config_1_mmr_s {
+
+	/* UVH common struct */
+	struct uvh_rh_gam_alias_0_redirect_config_s {
 		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
-		unsigned long	enable:1;			/* RW */
+		unsigned long	dest_base:22;			/* RW */
+		unsigned long	rsvd_46_63:18;
 	} s;
-	struct uv1h_rh_gam_alias210_overlay_config_1_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
-		unsigned long	enable:1;			/* RW */
-	} s1;
-	struct uvxh_rh_gam_alias210_overlay_config_1_mmr_s {
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_alias_0_redirect_config_s {
 		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
-		unsigned long	enable:1;			/* RW */
+		unsigned long	dest_base:22;			/* RW */
+		unsigned long	rsvd_46_63:18;
 	} sx;
-	struct uv2h_rh_gam_alias210_overlay_config_1_mmr_s {
+
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_alias_0_redirect_config_s {
 		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
-		unsigned long	enable:1;			/* RW */
-	} s2;
-	struct uv3h_rh_gam_alias210_overlay_config_1_mmr_s {
+		unsigned long	dest_base:22;			/* RW */
+		unsigned long	rsvd_46_63:18;
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_alias_0_redirect_config_s {
 		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
-		unsigned long	enable:1;			/* RW */
+		unsigned long	dest_base:22;			/* RW */
+		unsigned long	rsvd_46_63:18;
 	} s3;
-	struct uv4h_rh_gam_alias210_overlay_config_1_mmr_s {
+
+	/* UV2 unique struct */
+	struct uv2h_rh_gam_alias_0_redirect_config_s {
 		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
-		unsigned long	enable:1;			/* RW */
-	} s4;
+		unsigned long	dest_base:22;			/* RW */
+		unsigned long	rsvd_46_63:18;
+	} s2;
 };
 
 /* ========================================================================= */
-/*                 UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR                  */
+/*                    UVH_RH_GAM_ALIAS_1_OVERLAY_CONFIG                      */
 /* ========================================================================= */
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x4800e8UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR (			\
-	is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR :	\
-	is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR :	\
-	is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR :	\
-	/*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR)
-
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-
-union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
+#define UVH_RH_GAM_ALIAS_1_OVERLAY_CONFIG (				\
+	is_uv(UV4) ? 0x4800d8UL :					\
+	is_uv(UV3) ? 0x16000d8UL :					\
+	is_uv(UV2) ? 0x16000d8UL :					\
+	0)
+
+
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_BASE_SHFT	24
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_BASE_MASK	0x00000000ff000000UL
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_M_ALIAS_SHFT	48
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_M_ALIAS_MASK	0x001f000000000000UL
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_ENABLE_SHFT	63
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_ENABLE_MASK	0x8000000000000000UL
+
+
+union uvh_rh_gam_alias_1_overlay_config_u {
 	unsigned long	v;
-	struct uvh_rh_gam_alias210_overlay_config_2_mmr_s {
+
+	/* UVH common struct */
+	struct uvh_rh_gam_alias_1_overlay_config_s {
 		unsigned long	rsvd_0_23:24;
 		unsigned long	base:8;				/* RW */
 		unsigned long	rsvd_32_47:16;
@@ -3311,15 +3592,9 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
 		unsigned long	rsvd_53_62:10;
 		unsigned long	enable:1;			/* RW */
 	} s;
-	struct uv1h_rh_gam_alias210_overlay_config_2_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
-		unsigned long	enable:1;			/* RW */
-	} s1;
-	struct uvxh_rh_gam_alias210_overlay_config_2_mmr_s {
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_alias_1_overlay_config_s {
 		unsigned long	rsvd_0_23:24;
 		unsigned long	base:8;				/* RW */
 		unsigned long	rsvd_32_47:16;
@@ -3327,15 +3602,19 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
 		unsigned long	rsvd_53_62:10;
 		unsigned long	enable:1;			/* RW */
 	} sx;
-	struct uv2h_rh_gam_alias210_overlay_config_2_mmr_s {
+
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_alias_1_overlay_config_s {
 		unsigned long	rsvd_0_23:24;
 		unsigned long	base:8;				/* RW */
 		unsigned long	rsvd_32_47:16;
 		unsigned long	m_alias:5;			/* RW */
 		unsigned long	rsvd_53_62:10;
 		unsigned long	enable:1;			/* RW */
-	} s2;
-	struct uv3h_rh_gam_alias210_overlay_config_2_mmr_s {
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_alias_1_overlay_config_s {
 		unsigned long	rsvd_0_23:24;
 		unsigned long	base:8;				/* RW */
 		unsigned long	rsvd_32_47:16;
@@ -3343,390 +3622,289 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
 		unsigned long	rsvd_53_62:10;
 		unsigned long	enable:1;			/* RW */
 	} s3;
-	struct uv4h_rh_gam_alias210_overlay_config_2_mmr_s {
+
+	/* UV2 unique struct */
+	struct uv2h_rh_gam_alias_1_overlay_config_s {
 		unsigned long	rsvd_0_23:24;
 		unsigned long	base:8;				/* RW */
 		unsigned long	rsvd_32_47:16;
 		unsigned long	m_alias:5;			/* RW */
 		unsigned long	rsvd_53_62:10;
 		unsigned long	enable:1;			/* RW */
-	} s4;
+	} s2;
 };
 
 /* ========================================================================= */
-/*                UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR                  */
+/*                    UVH_RH_GAM_ALIAS_1_REDIRECT_CONFIG                     */
 /* ========================================================================= */
-#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x4800d0UL
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR (			\
-	is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR :	\
-	is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR :	\
-	is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR :	\
-	/*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR)
-
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+#define UVH_RH_GAM_ALIAS_1_REDIRECT_CONFIG (				\
+	is_uv(UV4) ? 0x4800e0UL :					\
+	is_uv(UV3) ? 0x16000e0UL :					\
+	is_uv(UV2) ? 0x16000e0UL :					\
+	0)
 
-#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
 
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_1_REDIRECT_CONFIG_DEST_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS_1_REDIRECT_CONFIG_DEST_BASE_MASK 0x00003fffff000000UL
 
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
 
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-
-union uvh_rh_gam_alias210_redirect_config_0_mmr_u {
+union uvh_rh_gam_alias_1_redirect_config_u {
 	unsigned long	v;
-	struct uvh_rh_gam_alias210_redirect_config_0_mmr_s {
+
+	/* UVH common struct */
+	struct uvh_rh_gam_alias_1_redirect_config_s {
 		unsigned long	rsvd_0_23:24;
 		unsigned long	dest_base:22;			/* RW */
 		unsigned long	rsvd_46_63:18;
 	} s;
-	struct uv1h_rh_gam_alias210_redirect_config_0_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
-	} s1;
-	struct uvxh_rh_gam_alias210_redirect_config_0_mmr_s {
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_alias_1_redirect_config_s {
 		unsigned long	rsvd_0_23:24;
 		unsigned long	dest_base:22;			/* RW */
 		unsigned long	rsvd_46_63:18;
 	} sx;
-	struct uv2h_rh_gam_alias210_redirect_config_0_mmr_s {
+
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_alias_1_redirect_config_s {
 		unsigned long	rsvd_0_23:24;
 		unsigned long	dest_base:22;			/* RW */
 		unsigned long	rsvd_46_63:18;
-	} s2;
-	struct uv3h_rh_gam_alias210_redirect_config_0_mmr_s {
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_alias_1_redirect_config_s {
 		unsigned long	rsvd_0_23:24;
 		unsigned long	dest_base:22;			/* RW */
 		unsigned long	rsvd_46_63:18;
 	} s3;
-	struct uv4h_rh_gam_alias210_redirect_config_0_mmr_s {
+
+	/* UV2 unique struct */
+	struct uv2h_rh_gam_alias_1_redirect_config_s {
 		unsigned long	rsvd_0_23:24;
 		unsigned long	dest_base:22;			/* RW */
 		unsigned long	rsvd_46_63:18;
-	} s4;
+	} s2;
 };
 
 /* ========================================================================= */
-/*                UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR                  */
+/*                    UVH_RH_GAM_ALIAS_2_OVERLAY_CONFIG                      */
 /* ========================================================================= */
-#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x4800e0UL
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR (			\
-	is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR :	\
-	is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR :	\
-	is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR :	\
-	/*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR)
-
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+#define UVH_RH_GAM_ALIAS_2_OVERLAY_CONFIG (				\
+	is_uv(UV4) ? 0x4800e8UL :					\
+	is_uv(UV3) ? 0x16000e8UL :					\
+	is_uv(UV2) ? 0x16000e8UL :					\
+	0)
 
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
 
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_BASE_SHFT	24
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_BASE_MASK	0x00000000ff000000UL
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_M_ALIAS_SHFT	48
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_M_ALIAS_MASK	0x001f000000000000UL
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_ENABLE_SHFT	63
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_ENABLE_MASK	0x8000000000000000UL
 
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
 
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-
-union uvh_rh_gam_alias210_redirect_config_1_mmr_u {
+union uvh_rh_gam_alias_2_overlay_config_u {
 	unsigned long	v;
-	struct uvh_rh_gam_alias210_redirect_config_1_mmr_s {
+
+	/* UVH common struct */
+	struct uvh_rh_gam_alias_2_overlay_config_s {
 		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
+		unsigned long	base:8;				/* RW */
+		unsigned long	rsvd_32_47:16;
+		unsigned long	m_alias:5;			/* RW */
+		unsigned long	rsvd_53_62:10;
+		unsigned long	enable:1;			/* RW */
 	} s;
-	struct uv1h_rh_gam_alias210_redirect_config_1_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
-	} s1;
-	struct uvxh_rh_gam_alias210_redirect_config_1_mmr_s {
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_alias_2_overlay_config_s {
 		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
+		unsigned long	base:8;				/* RW */
+		unsigned long	rsvd_32_47:16;
+		unsigned long	m_alias:5;			/* RW */
+		unsigned long	rsvd_53_62:10;
+		unsigned long	enable:1;			/* RW */
 	} sx;
-	struct uv2h_rh_gam_alias210_redirect_config_1_mmr_s {
+
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_alias_2_overlay_config_s {
 		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
-	} s2;
-	struct uv3h_rh_gam_alias210_redirect_config_1_mmr_s {
+		unsigned long	base:8;				/* RW */
+		unsigned long	rsvd_32_47:16;
+		unsigned long	m_alias:5;			/* RW */
+		unsigned long	rsvd_53_62:10;
+		unsigned long	enable:1;			/* RW */
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_alias_2_overlay_config_s {
 		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
+		unsigned long	base:8;				/* RW */
+		unsigned long	rsvd_32_47:16;
+		unsigned long	m_alias:5;			/* RW */
+		unsigned long	rsvd_53_62:10;
+		unsigned long	enable:1;			/* RW */
 	} s3;
-	struct uv4h_rh_gam_alias210_redirect_config_1_mmr_s {
+
+	/* UV2 unique struct */
+	struct uv2h_rh_gam_alias_2_overlay_config_s {
 		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
-	} s4;
+		unsigned long	base:8;				/* RW */
+		unsigned long	rsvd_32_47:16;
+		unsigned long	m_alias:5;			/* RW */
+		unsigned long	rsvd_53_62:10;
+		unsigned long	enable:1;			/* RW */
+	} s2;
 };
 
 /* ========================================================================= */
-/*                UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR                  */
+/*                    UVH_RH_GAM_ALIAS_2_REDIRECT_CONFIG                     */
 /* ========================================================================= */
-#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x4800f0UL
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR (			\
-	is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR :	\
-	is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR :	\
-	is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR :	\
-	/*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR)
+#define UVH_RH_GAM_ALIAS_2_REDIRECT_CONFIG (				\
+	is_uv(UV4) ? 0x4800f0UL :					\
+	is_uv(UV3) ? 0x16000f0UL :					\
+	is_uv(UV2) ? 0x16000f0UL :					\
+	0)
 
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
 
-#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_2_REDIRECT_CONFIG_DEST_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS_2_REDIRECT_CONFIG_DEST_BASE_MASK 0x00003fffff000000UL
 
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
 
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-
-union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
+union uvh_rh_gam_alias_2_redirect_config_u {
 	unsigned long	v;
-	struct uvh_rh_gam_alias210_redirect_config_2_mmr_s {
+
+	/* UVH common struct */
+	struct uvh_rh_gam_alias_2_redirect_config_s {
 		unsigned long	rsvd_0_23:24;
 		unsigned long	dest_base:22;			/* RW */
 		unsigned long	rsvd_46_63:18;
 	} s;
-	struct uv1h_rh_gam_alias210_redirect_config_2_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
-	} s1;
-	struct uvxh_rh_gam_alias210_redirect_config_2_mmr_s {
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_alias_2_redirect_config_s {
 		unsigned long	rsvd_0_23:24;
 		unsigned long	dest_base:22;			/* RW */
 		unsigned long	rsvd_46_63:18;
 	} sx;
-	struct uv2h_rh_gam_alias210_redirect_config_2_mmr_s {
+
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_alias_2_redirect_config_s {
 		unsigned long	rsvd_0_23:24;
 		unsigned long	dest_base:22;			/* RW */
 		unsigned long	rsvd_46_63:18;
-	} s2;
-	struct uv3h_rh_gam_alias210_redirect_config_2_mmr_s {
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_alias_2_redirect_config_s {
 		unsigned long	rsvd_0_23:24;
 		unsigned long	dest_base:22;			/* RW */
 		unsigned long	rsvd_46_63:18;
 	} s3;
-	struct uv4h_rh_gam_alias210_redirect_config_2_mmr_s {
+
+	/* UV2 unique struct */
+	struct uv2h_rh_gam_alias_2_redirect_config_s {
 		unsigned long	rsvd_0_23:24;
 		unsigned long	dest_base:22;			/* RW */
 		unsigned long	rsvd_46_63:18;
-	} s4;
-};
-
-/* ========================================================================= */
-/*                          UVH_RH_GAM_CONFIG_MMR                            */
-/* ========================================================================= */
-#define UV1H_RH_GAM_CONFIG_MMR 0x1600000UL
-#define UV2H_RH_GAM_CONFIG_MMR 0x1600000UL
-#define UV3H_RH_GAM_CONFIG_MMR 0x1600000UL
-#define UV4H_RH_GAM_CONFIG_MMR 0x480000UL
-#define UVH_RH_GAM_CONFIG_MMR (						\
-	is_uv1_hub() ? UV1H_RH_GAM_CONFIG_MMR :				\
-	is_uv2_hub() ? UV2H_RH_GAM_CONFIG_MMR :				\
-	is_uv3_hub() ? UV3H_RH_GAM_CONFIG_MMR :				\
-	/*is_uv4_hub*/ UV4H_RH_GAM_CONFIG_MMR)
-
-#define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT		6
-#define UVH_RH_GAM_CONFIG_MMR_N_SKT_MASK		0x00000000000003c0UL
-
-#define UV1H_RH_GAM_CONFIG_MMR_M_SKT_SHFT		0
-#define UV1H_RH_GAM_CONFIG_MMR_N_SKT_SHFT		6
-#define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_SHFT		12
-#define UV1H_RH_GAM_CONFIG_MMR_M_SKT_MASK		0x000000000000003fUL
-#define UV1H_RH_GAM_CONFIG_MMR_N_SKT_MASK		0x00000000000003c0UL
-#define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK		0x0000000000001000UL
-
-#define UVXH_RH_GAM_CONFIG_MMR_N_SKT_SHFT		6
-#define UVXH_RH_GAM_CONFIG_MMR_N_SKT_MASK		0x00000000000003c0UL
-
-#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_SHFT		0
-#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_SHFT		6
-#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_MASK		0x000000000000003fUL
-#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_MASK		0x00000000000003c0UL
-
-#define UV3H_RH_GAM_CONFIG_MMR_M_SKT_SHFT		0
-#define UV3H_RH_GAM_CONFIG_MMR_N_SKT_SHFT		6
-#define UV3H_RH_GAM_CONFIG_MMR_M_SKT_MASK		0x000000000000003fUL
-#define UV3H_RH_GAM_CONFIG_MMR_N_SKT_MASK		0x00000000000003c0UL
-
-#define UV4H_RH_GAM_CONFIG_MMR_N_SKT_SHFT		6
-#define UV4H_RH_GAM_CONFIG_MMR_N_SKT_MASK		0x00000000000003c0UL
-
-
-union uvh_rh_gam_config_mmr_u {
-	unsigned long	v;
-	struct uvh_rh_gam_config_mmr_s {
-		unsigned long	rsvd_0_5:6;
-		unsigned long	n_skt:4;			/* RW */
-		unsigned long	rsvd_10_63:54;
-	} s;
-	struct uv1h_rh_gam_config_mmr_s {
-		unsigned long	m_skt:6;			/* RW */
-		unsigned long	n_skt:4;			/* RW */
-		unsigned long	rsvd_10_11:2;
-		unsigned long	mmiol_cfg:1;			/* RW */
-		unsigned long	rsvd_13_63:51;
-	} s1;
-	struct uvxh_rh_gam_config_mmr_s {
-		unsigned long	rsvd_0_5:6;
-		unsigned long	n_skt:4;			/* RW */
-		unsigned long	rsvd_10_63:54;
-	} sx;
-	struct uv2h_rh_gam_config_mmr_s {
-		unsigned long	m_skt:6;			/* RW */
-		unsigned long	n_skt:4;			/* RW */
-		unsigned long	rsvd_10_63:54;
 	} s2;
-	struct uv3h_rh_gam_config_mmr_s {
-		unsigned long	m_skt:6;			/* RW */
-		unsigned long	n_skt:4;			/* RW */
-		unsigned long	rsvd_10_63:54;
-	} s3;
-	struct uv4h_rh_gam_config_mmr_s {
-		unsigned long	rsvd_0_5:6;
-		unsigned long	n_skt:4;			/* RW */
-		unsigned long	rsvd_10_63:54;
-	} s4;
 };
 
 /* ========================================================================= */
-/*                    UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR                      */
+/*                      UVH_RH_GAM_GRU_OVERLAY_CONFIG                        */
 /* ========================================================================= */
-#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x480010UL
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR (				\
-	is_uv1_hub() ? UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR :		\
-	is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR :		\
-	is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR :		\
-	/*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR)
-
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT	52
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK	0x00f0000000000000UL
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT	28
-#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT	48
-#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT	52
-#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffff0000000UL
-#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK	0x0001000000000000UL
-#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK	0x00f0000000000000UL
-#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT	52
-#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK	0x00f0000000000000UL
-#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT	28
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT	52
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffff0000000UL
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK	0x00f0000000000000UL
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT	28
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT	52
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_SHFT	62
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffff0000000UL
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK	0x00f0000000000000UL
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_MASK	0x4000000000000000UL
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT	26
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT	52
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK	0x00f0000000000000UL
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK (			\
-	is_uv1_hub() ? UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK :	\
-	is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK :	\
-	is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK :	\
-	/*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK)
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT (			\
-	is_uv1_hub() ? UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT :	\
-	is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT :	\
-	is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT :	\
-	/*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT)
-
-union uvh_rh_gam_gru_overlay_config_mmr_u {
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG (					\
+	is_uv(UV4) ? 0x480010UL :					\
+	is_uv(UV3) ? 0x1600010UL :					\
+	is_uv(UV2) ? 0x1600010UL :					\
+	0)
+
+
+/* UVXH common defines */
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_N_GRU_SHFT	52
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_N_GRU_MASK	0x00f0000000000000UL
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_ENABLE_SHFT	63
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_ENABLE_MASK	0x8000000000000000UL
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT	26
+#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK	0x000ffffffc000000UL
+
+/* UV4 unique defines */
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT	26
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK	0x00003ffffc000000UL
+
+/* UV3 unique defines */
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT	28
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK	0x00003ffff0000000UL
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MODE_SHFT	62
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MODE_MASK	0x4000000000000000UL
+
+/* UV2 unique defines */
+#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT	28
+#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK	0x00003ffff0000000UL
+
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK (			\
+	is_uv(UV4A) ? 0x000ffffffc000000UL :				\
+	is_uv(UV4) ? 0x00003ffffc000000UL :				\
+	is_uv(UV3) ? 0x00003ffff0000000UL :				\
+	is_uv(UV2) ? 0x00003ffff0000000UL :				\
+	0)
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT (			\
+	is_uv(UV4) ? 26 :						\
+	is_uv(UV3) ? 28 :						\
+	is_uv(UV2) ? 28 :						\
+	-1)
+
+union uvh_rh_gam_gru_overlay_config_u {
 	unsigned long	v;
-	struct uvh_rh_gam_gru_overlay_config_mmr_s {
-		unsigned long	rsvd_0_51:52;
+
+	/* UVH common struct */
+	struct uvh_rh_gam_gru_overlay_config_s {
+		unsigned long	rsvd_0_45:46;
+		unsigned long	rsvd_46_51:6;
 		unsigned long	n_gru:4;			/* RW */
 		unsigned long	rsvd_56_62:7;
 		unsigned long	enable:1;			/* RW */
 	} s;
-	struct uv1h_rh_gam_gru_overlay_config_mmr_s {
-		unsigned long	rsvd_0_27:28;
-		unsigned long	base:18;			/* RW */
-		unsigned long	rsvd_46_47:2;
-		unsigned long	gr4:1;				/* RW */
-		unsigned long	rsvd_49_51:3;
-		unsigned long	n_gru:4;			/* RW */
-		unsigned long	rsvd_56_62:7;
-		unsigned long	enable:1;			/* RW */
-	} s1;
-	struct uvxh_rh_gam_gru_overlay_config_mmr_s {
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_gru_overlay_config_s {
 		unsigned long	rsvd_0_45:46;
 		unsigned long	rsvd_46_51:6;
 		unsigned long	n_gru:4;			/* RW */
 		unsigned long	rsvd_56_62:7;
 		unsigned long	enable:1;			/* RW */
 	} sx;
-	struct uv2h_rh_gam_gru_overlay_config_mmr_s {
-		unsigned long	rsvd_0_27:28;
-		unsigned long	base:18;			/* RW */
+
+	/* UV4A unique struct */
+	struct uv4ah_rh_gam_gru_overlay_config_s {
+		unsigned long	rsvd_0_24:25;
+		unsigned long	undef_25:1;			/* Undefined */
+		unsigned long	base:26;			/* RW */
+		unsigned long	n_gru:4;			/* RW */
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
+	} s4a;
+
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_gru_overlay_config_s {
+		unsigned long	rsvd_0_24:25;
+		unsigned long	undef_25:1;			/* Undefined */
+		unsigned long	base:20;			/* RW */
 		unsigned long	rsvd_46_51:6;
 		unsigned long	n_gru:4;			/* RW */
 		unsigned long	rsvd_56_62:7;
 		unsigned long	enable:1;			/* RW */
-	} s2;
-	struct uv3h_rh_gam_gru_overlay_config_mmr_s {
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_gru_overlay_config_s {
 		unsigned long	rsvd_0_27:28;
 		unsigned long	base:18;			/* RW */
 		unsigned long	rsvd_46_51:6;
@@ -3735,88 +3913,141 @@ union uvh_rh_gam_gru_overlay_config_mmr_u {
 		unsigned long	mode:1;				/* RW */
 		unsigned long	enable:1;			/* RW */
 	} s3;
-	struct uv4h_rh_gam_gru_overlay_config_mmr_s {
-		unsigned long	rsvd_0_24:25;
-		unsigned long	undef_25:1;			/* Undefined */
-		unsigned long	base:20;			/* RW */
+
+	/* UV2 unique struct */
+	struct uv2h_rh_gam_gru_overlay_config_s {
+		unsigned long	rsvd_0_27:28;
+		unsigned long	base:18;			/* RW */
 		unsigned long	rsvd_46_51:6;
 		unsigned long	n_gru:4;			/* RW */
 		unsigned long	rsvd_56_62:7;
 		unsigned long	enable:1;			/* RW */
-	} s4;
+	} s2;
 };
 
 /* ========================================================================= */
-/*                   UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR                    */
+/*                     UVH_RH_GAM_MMIOH_OVERLAY_CONFIG                       */
 /* ========================================================================= */
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR uv_undefined("UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR")
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR")
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x1603000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x483000UL
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR (				\
-	is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR :		\
-	is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR :		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR :		\
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR)
-
-
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT	26
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT	46
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK	0x000fc00000000000UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT	26
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT	46
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK	0x000fc00000000000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 52
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x000ffffffc000000UL
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x03f0000000000000UL
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT (		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT : \
-	is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT : \
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT)
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK (		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK : \
-	is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK : \
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK)
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK (		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK : \
-	is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK : \
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK)
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK (		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK : \
-	is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK : \
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK)
-
-union uvh_rh_gam_mmioh_overlay_config0_mmr_u {
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG (				\
+	is_uv(UV2) ? 0x1600030UL :					\
+	0)
+
+
+
+/* UV2 unique defines */
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT	27
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_MASK	0x00003ffff8000000UL
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_M_IO_SHFT	46
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_M_IO_MASK	0x000fc00000000000UL
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_N_IO_SHFT	52
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_N_IO_MASK	0x00f0000000000000UL
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_ENABLE_SHFT	63
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_ENABLE_MASK	0x8000000000000000UL
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT (			\
+	is_uv(UV2) ? 27 :						\
+	uv_undefined("UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT"))
+
+union uvh_rh_gam_mmioh_overlay_config_u {
 	unsigned long	v;
-	struct uv3h_rh_gam_mmioh_overlay_config0_mmr_s {
+
+	/* UVH common struct */
+	struct uvh_rh_gam_mmioh_overlay_config_s {
+		unsigned long	rsvd_0_26:27;
+		unsigned long	base:19;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;				/* RW */
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
+	} s;
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_mmioh_overlay_config_s {
+		unsigned long	rsvd_0_26:27;
+		unsigned long	base:19;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;				/* RW */
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
+	} sx;
+
+	/* UV2 unique struct */
+	struct uv2h_rh_gam_mmioh_overlay_config_s {
+		unsigned long	rsvd_0_26:27;
+		unsigned long	base:19;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;				/* RW */
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
+	} s2;
+};
+
+/* ========================================================================= */
+/*                     UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0                      */
+/* ========================================================================= */
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0 (				\
+	is_uv(UV4) ? 0x483000UL :					\
+	is_uv(UV3) ? 0x1603000UL :					\
+	0)
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT	26
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK	0x000ffffffc000000UL
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT	52
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK	0x03f0000000000000UL
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT	63
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK	0x8000000000000000UL
+
+/* UV4 unique defines */
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT	26
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK	0x00003ffffc000000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT	46
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK	0x000fc00000000000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT	63
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK	0x8000000000000000UL
+
+/* UV3 unique defines */
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT	26
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK	0x00003ffffc000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT	46
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK	0x000fc00000000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT	63
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK	0x8000000000000000UL
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK (			\
+	is_uv(UV4A) ? 0x000ffffffc000000UL :				\
+	is_uv(UV4) ? 0x00003ffffc000000UL :				\
+	is_uv(UV3) ? 0x00003ffffc000000UL :				\
+	0)
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT (			\
+	is_uv(UV4) ? 26 :						\
+	is_uv(UV3) ? 26 :						\
+	-1)
+
+union uvh_rh_gam_mmioh_overlay_config0_u {
+	unsigned long	v;
+
+	/* UVH common struct */
+	struct uvh_rh_gam_mmioh_overlay_config0_s {
 		unsigned long	rsvd_0_25:26;
 		unsigned long	base:20;			/* RW */
 		unsigned long	m_io:6;				/* RW */
 		unsigned long	n_io:4;
 		unsigned long	rsvd_56_62:7;
 		unsigned long	enable:1;			/* RW */
-	} s3;
-	struct uv4h_rh_gam_mmioh_overlay_config0_mmr_s {
+	} s;
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_mmioh_overlay_config0_s {
 		unsigned long	rsvd_0_25:26;
 		unsigned long	base:20;			/* RW */
 		unsigned long	m_io:6;				/* RW */
 		unsigned long	n_io:4;
 		unsigned long	rsvd_56_62:7;
 		unsigned long	enable:1;			/* RW */
-	} s4;
+	} sx;
+
+	/* UV4A unique struct */
 	struct uv4ah_rh_gam_mmioh_overlay_config0_mmr_s {
 		unsigned long	rsvd_0_25:26;
 		unsigned long	base:26;			/* RW */
@@ -3825,73 +4056,94 @@ union uvh_rh_gam_mmioh_overlay_config0_mmr_u {
 		unsigned long	undef_62:1;			/* Undefined */
 		unsigned long	enable:1;			/* RW */
 	} s4a;
+
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_mmioh_overlay_config0_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:20;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_mmioh_overlay_config0_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:20;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
+	} s3;
 };
 
 /* ========================================================================= */
-/*                   UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR                    */
+/*                     UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1                      */
 /* ========================================================================= */
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR")
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR")
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1603000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x484000UL
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR (				\
-	is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR :		\
-	is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR :		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR :		\
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR)
-
-
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT	26
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT	46
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK	0x000fc00000000000UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT	26
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT	46
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK	0x000fc00000000000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 52
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x000ffffffc000000UL
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x03f0000000000000UL
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT (		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT : \
-	is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT : \
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT)
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK (		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK : \
-	is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK : \
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK)
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK (		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK : \
-	is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK : \
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK)
-
-union uvh_rh_gam_mmioh_overlay_config1_mmr_u {
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1 (				\
+	is_uv(UV4) ? 0x484000UL :					\
+	is_uv(UV3) ? 0x1604000UL :					\
+	0)
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT	26
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK	0x000ffffffc000000UL
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT	52
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK	0x03f0000000000000UL
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT	63
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK	0x8000000000000000UL
+
+/* UV4 unique defines */
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT	26
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK	0x00003ffffc000000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT	46
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK	0x000fc00000000000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT	63
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK	0x8000000000000000UL
+
+/* UV3 unique defines */
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT	26
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK	0x00003ffffc000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT	46
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK	0x000fc00000000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT	63
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK	0x8000000000000000UL
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK (			\
+	is_uv(UV4A) ? 0x000ffffffc000000UL : \
+	is_uv(UV4) ? 0x00003ffffc000000UL :				\
+	is_uv(UV3) ? 0x00003ffffc000000UL :				\
+	0)
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT (			\
+	is_uv(UV4) ? 26 :						\
+	is_uv(UV3) ? 26 :						\
+	-1)
+
+union uvh_rh_gam_mmioh_overlay_config1_u {
 	unsigned long	v;
-	struct uv3h_rh_gam_mmioh_overlay_config1_mmr_s {
+
+	/* UVH common struct */
+	struct uvh_rh_gam_mmioh_overlay_config1_s {
 		unsigned long	rsvd_0_25:26;
 		unsigned long	base:20;			/* RW */
 		unsigned long	m_io:6;				/* RW */
 		unsigned long	n_io:4;
 		unsigned long	rsvd_56_62:7;
 		unsigned long	enable:1;			/* RW */
-	} s3;
-	struct uv4h_rh_gam_mmioh_overlay_config1_mmr_s {
+	} s;
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_mmioh_overlay_config1_s {
 		unsigned long	rsvd_0_25:26;
 		unsigned long	base:20;			/* RW */
 		unsigned long	m_io:6;				/* RW */
 		unsigned long	n_io:4;
 		unsigned long	rsvd_56_62:7;
 		unsigned long	enable:1;			/* RW */
-	} s4;
+	} sx;
+
+	/* UV4A unique struct */
 	struct uv4ah_rh_gam_mmioh_overlay_config1_mmr_s {
 		unsigned long	rsvd_0_25:26;
 		unsigned long	base:26;			/* RW */
@@ -3900,278 +4152,289 @@ union uvh_rh_gam_mmioh_overlay_config1_mmr_u {
 		unsigned long	undef_62:1;			/* Undefined */
 		unsigned long	enable:1;			/* RW */
 	} s4a;
-};
 
-/* ========================================================================= */
-/*                   UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR                     */
-/* ========================================================================= */
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR uv_undefined("UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR")
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR uv_undefined("UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR")
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR (				\
-	is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR :		\
-	is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR :		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR :		\
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR)
-
-
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT	30
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT	46
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT	52
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003fffc0000000UL
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK	0x000fc00000000000UL
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK	0x00f0000000000000UL
-#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT	27
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT	46
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT	52
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffff8000000UL
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK	0x000fc00000000000UL
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK	0x00f0000000000000UL
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-
-union uvh_rh_gam_mmioh_overlay_config_mmr_u {
-	unsigned long	v;
-	struct uv1h_rh_gam_mmioh_overlay_config_mmr_s {
-		unsigned long	rsvd_0_29:30;
-		unsigned long	base:16;			/* RW */
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_mmioh_overlay_config1_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:20;			/* RW */
 		unsigned long	m_io:6;				/* RW */
-		unsigned long	n_io:4;				/* RW */
+		unsigned long	n_io:4;
 		unsigned long	rsvd_56_62:7;
 		unsigned long	enable:1;			/* RW */
-	} s1;
-	struct uv2h_rh_gam_mmioh_overlay_config_mmr_s {
-		unsigned long	rsvd_0_26:27;
-		unsigned long	base:19;			/* RW */
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_mmioh_overlay_config1_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:20;			/* RW */
 		unsigned long	m_io:6;				/* RW */
-		unsigned long	n_io:4;				/* RW */
+		unsigned long	n_io:4;
 		unsigned long	rsvd_56_62:7;
 		unsigned long	enable:1;			/* RW */
-	} s2;
+	} s3;
 };
 
 /* ========================================================================= */
-/*                  UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR                    */
+/*                    UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0                      */
 /* ========================================================================= */
-#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR")
-#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR")
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x1603800UL
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x483800UL
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR (				\
-	is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR :		\
-	is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR :		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR :		\
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR)
-
-#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH")
-#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH")
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH (			\
-	is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH :	\
-	is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH :	\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH :	\
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH)
-
-
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL
-
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL
-
-#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000000fffUL
-
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK (		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK : \
-	is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK : \
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK)
-
-union uvh_rh_gam_mmioh_redirect_config0_mmr_u {
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0 (				\
+	is_uv(UV4) ? 0x483800UL :					\
+	is_uv(UV3) ? 0x1603800UL :					\
+	0)
+
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH (			\
+	is_uv(UV4) ? 128 :						\
+	is_uv(UV3) ? 128 :						\
+	0)
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT	0
+#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK	0x0000000000000fffUL
+
+/* UV4 unique defines */
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT	0
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK	0x0000000000007fffUL
+
+/* UV3 unique defines */
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT	0
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK	0x0000000000007fffUL
+
+/* UVH common defines */
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK (			\
+	is_uv(UV4A) ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK :	\
+	is_uv(UV4)  ?  UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK :	\
+	is_uv(UV3)  ?  UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK :	\
+	0)
+
+
+union uvh_rh_gam_mmioh_redirect_config0_u {
 	unsigned long	v;
-	struct uv3h_rh_gam_mmioh_redirect_config0_mmr_s {
+
+	/* UVH common struct */
+	struct uvh_rh_gam_mmioh_redirect_config0_s {
 		unsigned long	nasid:15;			/* RW */
 		unsigned long	rsvd_15_63:49;
-	} s3;
-	struct uv4h_rh_gam_mmioh_redirect_config0_mmr_s {
+	} s;
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_mmioh_redirect_config0_s {
 		unsigned long	nasid:15;			/* RW */
 		unsigned long	rsvd_15_63:49;
-	} s4;
-	struct uv4ah_rh_gam_mmioh_redirect_config0_mmr_s {
+	} sx;
+
+	struct uv4ah_rh_gam_mmioh_redirect_config0_s {
 		unsigned long	nasid:12;			/* RW */
 		unsigned long	rsvd_12_63:52;
 	} s4a;
+
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_mmioh_redirect_config0_s {
+		unsigned long	nasid:15;			/* RW */
+		unsigned long	rsvd_15_63:49;
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_mmioh_redirect_config0_s {
+		unsigned long	nasid:15;			/* RW */
+		unsigned long	rsvd_15_63:49;
+	} s3;
 };
 
 /* ========================================================================= */
-/*                  UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR                    */
+/*                    UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1                      */
 /* ========================================================================= */
-#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR")
-#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR")
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x1604800UL
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x484800UL
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR (				\
-	is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR :		\
-	is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR :		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR :		\
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR)
-
-#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH")
-#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH")
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH (			\
-	is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH :	\
-	is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH :	\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH :	\
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH)
-
-
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL
-
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL
-
-#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000000fffUL
-
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK (		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK : \
-	is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK : \
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK)
-
-union uvh_rh_gam_mmioh_redirect_config1_mmr_u {
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1 (				\
+	is_uv(UV4) ? 0x484800UL :					\
+	is_uv(UV3) ? 0x1604800UL :					\
+	0)
+
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH (			\
+	is_uv(UV4) ? 128 :						\
+	is_uv(UV3) ? 128 :						\
+	0)
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT	0
+#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK	0x0000000000000fffUL
+
+/* UV4 unique defines */
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT	0
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK	0x0000000000007fffUL
+
+/* UV3 unique defines */
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT	0
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK	0x0000000000007fffUL
+
+/* UVH common defines */
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK (			\
+	is_uv(UV4A) ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK :	\
+	is_uv(UV4)  ?  UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK :	\
+	is_uv(UV3)  ?  UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK :	\
+	0)
+
+
+union uvh_rh_gam_mmioh_redirect_config1_u {
 	unsigned long	v;
-	struct uv3h_rh_gam_mmioh_redirect_config1_mmr_s {
+
+	/* UVH common struct */
+	struct uvh_rh_gam_mmioh_redirect_config1_s {
 		unsigned long	nasid:15;			/* RW */
 		unsigned long	rsvd_15_63:49;
-	} s3;
-	struct uv4h_rh_gam_mmioh_redirect_config1_mmr_s {
+	} s;
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_mmioh_redirect_config1_s {
 		unsigned long	nasid:15;			/* RW */
 		unsigned long	rsvd_15_63:49;
-	} s4;
-	struct uv4ah_rh_gam_mmioh_redirect_config1_mmr_s {
+	} sx;
+
+	struct uv4ah_rh_gam_mmioh_redirect_config1_s {
 		unsigned long	nasid:12;			/* RW */
 		unsigned long	rsvd_12_63:52;
 	} s4a;
+
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_mmioh_redirect_config1_s {
+		unsigned long	nasid:15;			/* RW */
+		unsigned long	rsvd_15_63:49;
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_mmioh_redirect_config1_s {
+		unsigned long	nasid:15;			/* RW */
+		unsigned long	rsvd_15_63:49;
+	} s3;
 };
 
 /* ========================================================================= */
-/*                    UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR                      */
+/*                      UVH_RH_GAM_MMR_OVERLAY_CONFIG                        */
 /* ========================================================================= */
-#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
-#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
-#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
-#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x480028UL
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR (				\
-	is_uv1_hub() ? UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR :		\
-	is_uv2_hub() ? UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR :		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR :		\
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR)
-
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT	26
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT	26
-#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46
-#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL
-#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT	26
-#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT	26
-#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT	26
-#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT	26
-#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-
-union uvh_rh_gam_mmr_overlay_config_mmr_u {
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG (					\
+	is_uv(UV4) ? 0x480028UL :					\
+	is_uv(UV3) ? 0x1600028UL :					\
+	is_uv(UV2) ? 0x1600028UL :					\
+	0)
+
+
+/* UVXH common defines */
+#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT	26
+#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_MASK (			\
+	is_uv(UV4A) ? 0x000ffffffc000000UL :				\
+	is_uv(UV4) ? 0x00003ffffc000000UL :				\
+	is_uv(UV3) ? 0x00003ffffc000000UL :				\
+	is_uv(UV2) ? 0x00003ffffc000000UL :				\
+	0)
+#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_ENABLE_SHFT	63
+#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_ENABLE_MASK	0x8000000000000000UL
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT	26
+#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK	0x000ffffffc000000UL
+
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_MASK (			\
+	is_uv(UV4A) ? 0x000ffffffc000000UL :				\
+	is_uv(UV4) ? 0x00003ffffc000000UL :				\
+	is_uv(UV3) ? 0x00003ffffc000000UL :				\
+	is_uv(UV2) ? 0x00003ffffc000000UL :				\
+	0)
+
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT (			\
+	is_uv(UV4) ? 26 :						\
+	is_uv(UV3) ? 26 :						\
+	is_uv(UV2) ? 26 :						\
+	-1)
+
+union uvh_rh_gam_mmr_overlay_config_u {
 	unsigned long	v;
-	struct uvh_rh_gam_mmr_overlay_config_mmr_s {
+
+	/* UVH common struct */
+	struct uvh_rh_gam_mmr_overlay_config_s {
 		unsigned long	rsvd_0_25:26;
 		unsigned long	base:20;			/* RW */
 		unsigned long	rsvd_46_62:17;
 		unsigned long	enable:1;			/* RW */
 	} s;
-	struct uv1h_rh_gam_mmr_overlay_config_mmr_s {
-		unsigned long	rsvd_0_25:26;
-		unsigned long	base:20;			/* RW */
-		unsigned long	dual_hub:1;			/* RW */
-		unsigned long	rsvd_47_62:16;
-		unsigned long	enable:1;			/* RW */
-	} s1;
-	struct uvxh_rh_gam_mmr_overlay_config_mmr_s {
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_mmr_overlay_config_s {
 		unsigned long	rsvd_0_25:26;
 		unsigned long	base:20;			/* RW */
 		unsigned long	rsvd_46_62:17;
 		unsigned long	enable:1;			/* RW */
 	} sx;
-	struct uv2h_rh_gam_mmr_overlay_config_mmr_s {
+
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_mmr_overlay_config_s {
 		unsigned long	rsvd_0_25:26;
 		unsigned long	base:20;			/* RW */
 		unsigned long	rsvd_46_62:17;
 		unsigned long	enable:1;			/* RW */
-	} s2;
-	struct uv3h_rh_gam_mmr_overlay_config_mmr_s {
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_mmr_overlay_config_s {
 		unsigned long	rsvd_0_25:26;
 		unsigned long	base:20;			/* RW */
 		unsigned long	rsvd_46_62:17;
 		unsigned long	enable:1;			/* RW */
 	} s3;
-	struct uv4h_rh_gam_mmr_overlay_config_mmr_s {
+
+	/* UV2 unique struct */
+	struct uv2h_rh_gam_mmr_overlay_config_s {
 		unsigned long	rsvd_0_25:26;
 		unsigned long	base:20;			/* RW */
 		unsigned long	rsvd_46_62:17;
 		unsigned long	enable:1;			/* RW */
-	} s4;
+	} s2;
 };
 
 /* ========================================================================= */
 /*                                 UVH_RTC                                   */
 /* ========================================================================= */
-#define UV1H_RTC 0x340000UL
-#define UV2H_RTC 0x340000UL
-#define UV3H_RTC 0x340000UL
-#define UV4H_RTC 0xe0000UL
 #define UVH_RTC (							\
-	is_uv1_hub() ? UV1H_RTC :					\
-	is_uv2_hub() ? UV2H_RTC :					\
-	is_uv3_hub() ? UV3H_RTC :					\
-	/*is_uv4_hub*/ UV4H_RTC)
+	is_uv(UV5) ? 0xe0000UL :					\
+	is_uv(UV4) ? 0xe0000UL :					\
+	is_uv(UV3) ? 0x340000UL :					\
+	is_uv(UV2) ? 0x340000UL :					\
+	0)
 
+/* UVH common defines*/
 #define UVH_RTC_REAL_TIME_CLOCK_SHFT			0
 #define UVH_RTC_REAL_TIME_CLOCK_MASK			0x00ffffffffffffffUL
 
 
 union uvh_rtc_u {
 	unsigned long	v;
+
+	/* UVH common struct */
 	struct uvh_rtc_s {
 		unsigned long	real_time_clock:56;		/* RW */
 		unsigned long	rsvd_56_63:8;
 	} s;
+
+	/* UV5 unique struct */
+	struct uv5h_rtc_s {
+		unsigned long	real_time_clock:56;		/* RW */
+		unsigned long	rsvd_56_63:8;
+	} s5;
+
+	/* UV4 unique struct */
+	struct uv4h_rtc_s {
+		unsigned long	real_time_clock:56;		/* RW */
+		unsigned long	rsvd_56_63:8;
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rtc_s {
+		unsigned long	real_time_clock:56;		/* RW */
+		unsigned long	rsvd_56_63:8;
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_rtc_s {
+		unsigned long	real_time_clock:56;		/* RW */
+		unsigned long	rsvd_56_63:8;
+	} s2;
 };
 
 /* ========================================================================= */
@@ -4179,26 +4442,29 @@ union uvh_rtc_u {
 /* ========================================================================= */
 #define UVH_RTC1_INT_CONFIG 0x615c0UL
 
+/* UVH common defines*/
 #define UVH_RTC1_INT_CONFIG_VECTOR_SHFT			0
-#define UVH_RTC1_INT_CONFIG_DM_SHFT			8
-#define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT		11
-#define UVH_RTC1_INT_CONFIG_STATUS_SHFT			12
-#define UVH_RTC1_INT_CONFIG_P_SHFT			13
-#define UVH_RTC1_INT_CONFIG_T_SHFT			15
-#define UVH_RTC1_INT_CONFIG_M_SHFT			16
-#define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT		32
 #define UVH_RTC1_INT_CONFIG_VECTOR_MASK			0x00000000000000ffUL
+#define UVH_RTC1_INT_CONFIG_DM_SHFT			8
 #define UVH_RTC1_INT_CONFIG_DM_MASK			0x0000000000000700UL
+#define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT		11
 #define UVH_RTC1_INT_CONFIG_DESTMODE_MASK		0x0000000000000800UL
+#define UVH_RTC1_INT_CONFIG_STATUS_SHFT			12
 #define UVH_RTC1_INT_CONFIG_STATUS_MASK			0x0000000000001000UL
+#define UVH_RTC1_INT_CONFIG_P_SHFT			13
 #define UVH_RTC1_INT_CONFIG_P_MASK			0x0000000000002000UL
+#define UVH_RTC1_INT_CONFIG_T_SHFT			15
 #define UVH_RTC1_INT_CONFIG_T_MASK			0x0000000000008000UL
+#define UVH_RTC1_INT_CONFIG_M_SHFT			16
 #define UVH_RTC1_INT_CONFIG_M_MASK			0x0000000000010000UL
+#define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT		32
 #define UVH_RTC1_INT_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
 
 
 union uvh_rtc1_int_config_u {
 	unsigned long	v;
+
+	/* UVH common struct */
 	struct uvh_rtc1_int_config_s {
 		unsigned long	vector_:8;			/* RW */
 		unsigned long	dm:3;				/* RW */
@@ -4211,618 +4477,175 @@ union uvh_rtc1_int_config_u {
 		unsigned long	rsvd_17_31:15;
 		unsigned long	apic_id:32;			/* RW */
 	} s;
-};
-
-/* ========================================================================= */
-/*                               UVH_SCRATCH5                                */
-/* ========================================================================= */
-#define UV1H_SCRATCH5 0x2d0200UL
-#define UV2H_SCRATCH5 0x2d0200UL
-#define UV3H_SCRATCH5 0x2d0200UL
-#define UV4H_SCRATCH5 0xb0200UL
-#define UVH_SCRATCH5 (							\
-	is_uv1_hub() ? UV1H_SCRATCH5 :					\
-	is_uv2_hub() ? UV2H_SCRATCH5 :					\
-	is_uv3_hub() ? UV3H_SCRATCH5 :					\
-	/*is_uv4_hub*/ UV4H_SCRATCH5)
-
-#define UV1H_SCRATCH5_32 0x778
-#define UV2H_SCRATCH5_32 0x778
-#define UV3H_SCRATCH5_32 0x778
-#define UV4H_SCRATCH5_32 0x798
-#define UVH_SCRATCH5_32 (						\
-	is_uv1_hub() ? UV1H_SCRATCH5_32 :				\
-	is_uv2_hub() ? UV2H_SCRATCH5_32 :				\
-	is_uv3_hub() ? UV3H_SCRATCH5_32 :				\
-	/*is_uv4_hub*/ UV4H_SCRATCH5_32)
-
-#define UVH_SCRATCH5_SCRATCH5_SHFT			0
-#define UVH_SCRATCH5_SCRATCH5_MASK			0xffffffffffffffffUL
-
 
-union uvh_scratch5_u {
-	unsigned long	v;
-	struct uvh_scratch5_s {
-		unsigned long	scratch5:64;			/* RW, W1CS */
-	} s;
-};
-
-/* ========================================================================= */
-/*                            UVH_SCRATCH5_ALIAS                             */
-/* ========================================================================= */
-#define UV1H_SCRATCH5_ALIAS 0x2d0208UL
-#define UV2H_SCRATCH5_ALIAS 0x2d0208UL
-#define UV3H_SCRATCH5_ALIAS 0x2d0208UL
-#define UV4H_SCRATCH5_ALIAS 0xb0208UL
-#define UVH_SCRATCH5_ALIAS (						\
-	is_uv1_hub() ? UV1H_SCRATCH5_ALIAS :				\
-	is_uv2_hub() ? UV2H_SCRATCH5_ALIAS :				\
-	is_uv3_hub() ? UV3H_SCRATCH5_ALIAS :				\
-	/*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS)
-
-#define UV1H_SCRATCH5_ALIAS_32 0x780
-#define UV2H_SCRATCH5_ALIAS_32 0x780
-#define UV3H_SCRATCH5_ALIAS_32 0x780
-#define UV4H_SCRATCH5_ALIAS_32 0x7a0
-#define UVH_SCRATCH5_ALIAS_32 (						\
-	is_uv1_hub() ? UV1H_SCRATCH5_ALIAS_32 :				\
-	is_uv2_hub() ? UV2H_SCRATCH5_ALIAS_32 :				\
-	is_uv3_hub() ? UV3H_SCRATCH5_ALIAS_32 :				\
-	/*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS_32)
-
-
-/* ========================================================================= */
-/*                           UVH_SCRATCH5_ALIAS_2                            */
-/* ========================================================================= */
-#define UV1H_SCRATCH5_ALIAS_2 0x2d0210UL
-#define UV2H_SCRATCH5_ALIAS_2 0x2d0210UL
-#define UV3H_SCRATCH5_ALIAS_2 0x2d0210UL
-#define UV4H_SCRATCH5_ALIAS_2 0xb0210UL
-#define UVH_SCRATCH5_ALIAS_2 (						\
-	is_uv1_hub() ? UV1H_SCRATCH5_ALIAS_2 :				\
-	is_uv2_hub() ? UV2H_SCRATCH5_ALIAS_2 :				\
-	is_uv3_hub() ? UV3H_SCRATCH5_ALIAS_2 :				\
-	/*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS_2)
-#define UVH_SCRATCH5_ALIAS_2_32 0x788
-
-
-/* ========================================================================= */
-/*                          UVXH_EVENT_OCCURRED2                             */
-/* ========================================================================= */
-#define UVXH_EVENT_OCCURRED2 0x70100UL
-
-#define UV2H_EVENT_OCCURRED2_32 0xb68
-#define UV3H_EVENT_OCCURRED2_32 0xb68
-#define UV4H_EVENT_OCCURRED2_32 0x608
-#define UVH_EVENT_OCCURRED2_32 (					\
-	is_uv2_hub() ? UV2H_EVENT_OCCURRED2_32 :			\
-	is_uv3_hub() ? UV3H_EVENT_OCCURRED2_32 :			\
-	/*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_32)
-
-
-#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT			0
-#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT			1
-#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT			2
-#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT			3
-#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT			4
-#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT			5
-#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT			6
-#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT			7
-#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT			8
-#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT			9
-#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT		10
-#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT		11
-#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT		12
-#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT		13
-#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT		14
-#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT		15
-#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT		16
-#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT		17
-#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT		18
-#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT		19
-#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT		20
-#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT		21
-#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT		22
-#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT		23
-#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT		24
-#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT		25
-#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT		26
-#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT		27
-#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT		28
-#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT		29
-#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT		30
-#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT		31
-#define UV2H_EVENT_OCCURRED2_RTC_0_MASK			0x0000000000000001UL
-#define UV2H_EVENT_OCCURRED2_RTC_1_MASK			0x0000000000000002UL
-#define UV2H_EVENT_OCCURRED2_RTC_2_MASK			0x0000000000000004UL
-#define UV2H_EVENT_OCCURRED2_RTC_3_MASK			0x0000000000000008UL
-#define UV2H_EVENT_OCCURRED2_RTC_4_MASK			0x0000000000000010UL
-#define UV2H_EVENT_OCCURRED2_RTC_5_MASK			0x0000000000000020UL
-#define UV2H_EVENT_OCCURRED2_RTC_6_MASK			0x0000000000000040UL
-#define UV2H_EVENT_OCCURRED2_RTC_7_MASK			0x0000000000000080UL
-#define UV2H_EVENT_OCCURRED2_RTC_8_MASK			0x0000000000000100UL
-#define UV2H_EVENT_OCCURRED2_RTC_9_MASK			0x0000000000000200UL
-#define UV2H_EVENT_OCCURRED2_RTC_10_MASK		0x0000000000000400UL
-#define UV2H_EVENT_OCCURRED2_RTC_11_MASK		0x0000000000000800UL
-#define UV2H_EVENT_OCCURRED2_RTC_12_MASK		0x0000000000001000UL
-#define UV2H_EVENT_OCCURRED2_RTC_13_MASK		0x0000000000002000UL
-#define UV2H_EVENT_OCCURRED2_RTC_14_MASK		0x0000000000004000UL
-#define UV2H_EVENT_OCCURRED2_RTC_15_MASK		0x0000000000008000UL
-#define UV2H_EVENT_OCCURRED2_RTC_16_MASK		0x0000000000010000UL
-#define UV2H_EVENT_OCCURRED2_RTC_17_MASK		0x0000000000020000UL
-#define UV2H_EVENT_OCCURRED2_RTC_18_MASK		0x0000000000040000UL
-#define UV2H_EVENT_OCCURRED2_RTC_19_MASK		0x0000000000080000UL
-#define UV2H_EVENT_OCCURRED2_RTC_20_MASK		0x0000000000100000UL
-#define UV2H_EVENT_OCCURRED2_RTC_21_MASK		0x0000000000200000UL
-#define UV2H_EVENT_OCCURRED2_RTC_22_MASK		0x0000000000400000UL
-#define UV2H_EVENT_OCCURRED2_RTC_23_MASK		0x0000000000800000UL
-#define UV2H_EVENT_OCCURRED2_RTC_24_MASK		0x0000000001000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_25_MASK		0x0000000002000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_26_MASK		0x0000000004000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_27_MASK		0x0000000008000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_28_MASK		0x0000000010000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_29_MASK		0x0000000020000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_30_MASK		0x0000000040000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_31_MASK		0x0000000080000000UL
-
-#define UV3H_EVENT_OCCURRED2_RTC_0_SHFT			0
-#define UV3H_EVENT_OCCURRED2_RTC_1_SHFT			1
-#define UV3H_EVENT_OCCURRED2_RTC_2_SHFT			2
-#define UV3H_EVENT_OCCURRED2_RTC_3_SHFT			3
-#define UV3H_EVENT_OCCURRED2_RTC_4_SHFT			4
-#define UV3H_EVENT_OCCURRED2_RTC_5_SHFT			5
-#define UV3H_EVENT_OCCURRED2_RTC_6_SHFT			6
-#define UV3H_EVENT_OCCURRED2_RTC_7_SHFT			7
-#define UV3H_EVENT_OCCURRED2_RTC_8_SHFT			8
-#define UV3H_EVENT_OCCURRED2_RTC_9_SHFT			9
-#define UV3H_EVENT_OCCURRED2_RTC_10_SHFT		10
-#define UV3H_EVENT_OCCURRED2_RTC_11_SHFT		11
-#define UV3H_EVENT_OCCURRED2_RTC_12_SHFT		12
-#define UV3H_EVENT_OCCURRED2_RTC_13_SHFT		13
-#define UV3H_EVENT_OCCURRED2_RTC_14_SHFT		14
-#define UV3H_EVENT_OCCURRED2_RTC_15_SHFT		15
-#define UV3H_EVENT_OCCURRED2_RTC_16_SHFT		16
-#define UV3H_EVENT_OCCURRED2_RTC_17_SHFT		17
-#define UV3H_EVENT_OCCURRED2_RTC_18_SHFT		18
-#define UV3H_EVENT_OCCURRED2_RTC_19_SHFT		19
-#define UV3H_EVENT_OCCURRED2_RTC_20_SHFT		20
-#define UV3H_EVENT_OCCURRED2_RTC_21_SHFT		21
-#define UV3H_EVENT_OCCURRED2_RTC_22_SHFT		22
-#define UV3H_EVENT_OCCURRED2_RTC_23_SHFT		23
-#define UV3H_EVENT_OCCURRED2_RTC_24_SHFT		24
-#define UV3H_EVENT_OCCURRED2_RTC_25_SHFT		25
-#define UV3H_EVENT_OCCURRED2_RTC_26_SHFT		26
-#define UV3H_EVENT_OCCURRED2_RTC_27_SHFT		27
-#define UV3H_EVENT_OCCURRED2_RTC_28_SHFT		28
-#define UV3H_EVENT_OCCURRED2_RTC_29_SHFT		29
-#define UV3H_EVENT_OCCURRED2_RTC_30_SHFT		30
-#define UV3H_EVENT_OCCURRED2_RTC_31_SHFT		31
-#define UV3H_EVENT_OCCURRED2_RTC_0_MASK			0x0000000000000001UL
-#define UV3H_EVENT_OCCURRED2_RTC_1_MASK			0x0000000000000002UL
-#define UV3H_EVENT_OCCURRED2_RTC_2_MASK			0x0000000000000004UL
-#define UV3H_EVENT_OCCURRED2_RTC_3_MASK			0x0000000000000008UL
-#define UV3H_EVENT_OCCURRED2_RTC_4_MASK			0x0000000000000010UL
-#define UV3H_EVENT_OCCURRED2_RTC_5_MASK			0x0000000000000020UL
-#define UV3H_EVENT_OCCURRED2_RTC_6_MASK			0x0000000000000040UL
-#define UV3H_EVENT_OCCURRED2_RTC_7_MASK			0x0000000000000080UL
-#define UV3H_EVENT_OCCURRED2_RTC_8_MASK			0x0000000000000100UL
-#define UV3H_EVENT_OCCURRED2_RTC_9_MASK			0x0000000000000200UL
-#define UV3H_EVENT_OCCURRED2_RTC_10_MASK		0x0000000000000400UL
-#define UV3H_EVENT_OCCURRED2_RTC_11_MASK		0x0000000000000800UL
-#define UV3H_EVENT_OCCURRED2_RTC_12_MASK		0x0000000000001000UL
-#define UV3H_EVENT_OCCURRED2_RTC_13_MASK		0x0000000000002000UL
-#define UV3H_EVENT_OCCURRED2_RTC_14_MASK		0x0000000000004000UL
-#define UV3H_EVENT_OCCURRED2_RTC_15_MASK		0x0000000000008000UL
-#define UV3H_EVENT_OCCURRED2_RTC_16_MASK		0x0000000000010000UL
-#define UV3H_EVENT_OCCURRED2_RTC_17_MASK		0x0000000000020000UL
-#define UV3H_EVENT_OCCURRED2_RTC_18_MASK		0x0000000000040000UL
-#define UV3H_EVENT_OCCURRED2_RTC_19_MASK		0x0000000000080000UL
-#define UV3H_EVENT_OCCURRED2_RTC_20_MASK		0x0000000000100000UL
-#define UV3H_EVENT_OCCURRED2_RTC_21_MASK		0x0000000000200000UL
-#define UV3H_EVENT_OCCURRED2_RTC_22_MASK		0x0000000000400000UL
-#define UV3H_EVENT_OCCURRED2_RTC_23_MASK		0x0000000000800000UL
-#define UV3H_EVENT_OCCURRED2_RTC_24_MASK		0x0000000001000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_25_MASK		0x0000000002000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_26_MASK		0x0000000004000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_27_MASK		0x0000000008000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_28_MASK		0x0000000010000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_29_MASK		0x0000000020000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_30_MASK		0x0000000040000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_31_MASK		0x0000000080000000UL
+	/* UV5 unique struct */
+	struct uv5h_rtc1_int_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s5;
 
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_SHFT 0
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_SHFT 1
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_SHFT 2
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_SHFT 3
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_SHFT 4
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_SHFT 5
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_SHFT 6
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_SHFT 7
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_SHFT 8
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_SHFT 9
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_SHFT 10
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_SHFT 11
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_SHFT 12
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_SHFT 13
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_SHFT 14
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_SHFT 15
-#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_SHFT	16
-#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_SHFT	17
-#define UV4H_EVENT_OCCURRED2_RTC_0_SHFT			18
-#define UV4H_EVENT_OCCURRED2_RTC_1_SHFT			19
-#define UV4H_EVENT_OCCURRED2_RTC_2_SHFT			20
-#define UV4H_EVENT_OCCURRED2_RTC_3_SHFT			21
-#define UV4H_EVENT_OCCURRED2_RTC_4_SHFT			22
-#define UV4H_EVENT_OCCURRED2_RTC_5_SHFT			23
-#define UV4H_EVENT_OCCURRED2_RTC_6_SHFT			24
-#define UV4H_EVENT_OCCURRED2_RTC_7_SHFT			25
-#define UV4H_EVENT_OCCURRED2_RTC_8_SHFT			26
-#define UV4H_EVENT_OCCURRED2_RTC_9_SHFT			27
-#define UV4H_EVENT_OCCURRED2_RTC_10_SHFT		28
-#define UV4H_EVENT_OCCURRED2_RTC_11_SHFT		29
-#define UV4H_EVENT_OCCURRED2_RTC_12_SHFT		30
-#define UV4H_EVENT_OCCURRED2_RTC_13_SHFT		31
-#define UV4H_EVENT_OCCURRED2_RTC_14_SHFT		32
-#define UV4H_EVENT_OCCURRED2_RTC_15_SHFT		33
-#define UV4H_EVENT_OCCURRED2_RTC_16_SHFT		34
-#define UV4H_EVENT_OCCURRED2_RTC_17_SHFT		35
-#define UV4H_EVENT_OCCURRED2_RTC_18_SHFT		36
-#define UV4H_EVENT_OCCURRED2_RTC_19_SHFT		37
-#define UV4H_EVENT_OCCURRED2_RTC_20_SHFT		38
-#define UV4H_EVENT_OCCURRED2_RTC_21_SHFT		39
-#define UV4H_EVENT_OCCURRED2_RTC_22_SHFT		40
-#define UV4H_EVENT_OCCURRED2_RTC_23_SHFT		41
-#define UV4H_EVENT_OCCURRED2_RTC_24_SHFT		42
-#define UV4H_EVENT_OCCURRED2_RTC_25_SHFT		43
-#define UV4H_EVENT_OCCURRED2_RTC_26_SHFT		44
-#define UV4H_EVENT_OCCURRED2_RTC_27_SHFT		45
-#define UV4H_EVENT_OCCURRED2_RTC_28_SHFT		46
-#define UV4H_EVENT_OCCURRED2_RTC_29_SHFT		47
-#define UV4H_EVENT_OCCURRED2_RTC_30_SHFT		48
-#define UV4H_EVENT_OCCURRED2_RTC_31_SHFT		49
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000001UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000002UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000004UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000008UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000010UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000020UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000040UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000080UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000100UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000200UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000000400UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000000800UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000001000UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000002000UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000004000UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000008000UL
-#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_MASK	0x0000000000010000UL
-#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_MASK	0x0000000000020000UL
-#define UV4H_EVENT_OCCURRED2_RTC_0_MASK			0x0000000000040000UL
-#define UV4H_EVENT_OCCURRED2_RTC_1_MASK			0x0000000000080000UL
-#define UV4H_EVENT_OCCURRED2_RTC_2_MASK			0x0000000000100000UL
-#define UV4H_EVENT_OCCURRED2_RTC_3_MASK			0x0000000000200000UL
-#define UV4H_EVENT_OCCURRED2_RTC_4_MASK			0x0000000000400000UL
-#define UV4H_EVENT_OCCURRED2_RTC_5_MASK			0x0000000000800000UL
-#define UV4H_EVENT_OCCURRED2_RTC_6_MASK			0x0000000001000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_7_MASK			0x0000000002000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_8_MASK			0x0000000004000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_9_MASK			0x0000000008000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_10_MASK		0x0000000010000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_11_MASK		0x0000000020000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_12_MASK		0x0000000040000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_13_MASK		0x0000000080000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_14_MASK		0x0000000100000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_15_MASK		0x0000000200000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_16_MASK		0x0000000400000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_17_MASK		0x0000000800000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_18_MASK		0x0000001000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_19_MASK		0x0000002000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_20_MASK		0x0000004000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_21_MASK		0x0000008000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_22_MASK		0x0000010000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_23_MASK		0x0000020000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_24_MASK		0x0000040000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_25_MASK		0x0000080000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_26_MASK		0x0000100000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_27_MASK		0x0000200000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_28_MASK		0x0000400000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_29_MASK		0x0000800000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_30_MASK		0x0001000000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_31_MASK		0x0002000000000000UL
+	/* UV4 unique struct */
+	struct uv4h_rtc1_int_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s4;
 
-#define UVXH_EVENT_OCCURRED2_RTC_1_MASK (				\
-	is_uv2_hub() ? UV2H_EVENT_OCCURRED2_RTC_1_MASK :		\
-	is_uv3_hub() ? UV3H_EVENT_OCCURRED2_RTC_1_MASK :		\
-	/*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_RTC_1_MASK)
+	/* UV3 unique struct */
+	struct uv3h_rtc1_int_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s3;
 
-union uvh_event_occurred2_u {
-	unsigned long	v;
-	struct uv2h_event_occurred2_s {
-		unsigned long	rtc_0:1;			/* RW */
-		unsigned long	rtc_1:1;			/* RW */
-		unsigned long	rtc_2:1;			/* RW */
-		unsigned long	rtc_3:1;			/* RW */
-		unsigned long	rtc_4:1;			/* RW */
-		unsigned long	rtc_5:1;			/* RW */
-		unsigned long	rtc_6:1;			/* RW */
-		unsigned long	rtc_7:1;			/* RW */
-		unsigned long	rtc_8:1;			/* RW */
-		unsigned long	rtc_9:1;			/* RW */
-		unsigned long	rtc_10:1;			/* RW */
-		unsigned long	rtc_11:1;			/* RW */
-		unsigned long	rtc_12:1;			/* RW */
-		unsigned long	rtc_13:1;			/* RW */
-		unsigned long	rtc_14:1;			/* RW */
-		unsigned long	rtc_15:1;			/* RW */
-		unsigned long	rtc_16:1;			/* RW */
-		unsigned long	rtc_17:1;			/* RW */
-		unsigned long	rtc_18:1;			/* RW */
-		unsigned long	rtc_19:1;			/* RW */
-		unsigned long	rtc_20:1;			/* RW */
-		unsigned long	rtc_21:1;			/* RW */
-		unsigned long	rtc_22:1;			/* RW */
-		unsigned long	rtc_23:1;			/* RW */
-		unsigned long	rtc_24:1;			/* RW */
-		unsigned long	rtc_25:1;			/* RW */
-		unsigned long	rtc_26:1;			/* RW */
-		unsigned long	rtc_27:1;			/* RW */
-		unsigned long	rtc_28:1;			/* RW */
-		unsigned long	rtc_29:1;			/* RW */
-		unsigned long	rtc_30:1;			/* RW */
-		unsigned long	rtc_31:1;			/* RW */
-		unsigned long	rsvd_32_63:32;
+	/* UV2 unique struct */
+	struct uv2h_rtc1_int_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
 	} s2;
-	struct uv3h_event_occurred2_s {
-		unsigned long	rtc_0:1;			/* RW */
-		unsigned long	rtc_1:1;			/* RW */
-		unsigned long	rtc_2:1;			/* RW */
-		unsigned long	rtc_3:1;			/* RW */
-		unsigned long	rtc_4:1;			/* RW */
-		unsigned long	rtc_5:1;			/* RW */
-		unsigned long	rtc_6:1;			/* RW */
-		unsigned long	rtc_7:1;			/* RW */
-		unsigned long	rtc_8:1;			/* RW */
-		unsigned long	rtc_9:1;			/* RW */
-		unsigned long	rtc_10:1;			/* RW */
-		unsigned long	rtc_11:1;			/* RW */
-		unsigned long	rtc_12:1;			/* RW */
-		unsigned long	rtc_13:1;			/* RW */
-		unsigned long	rtc_14:1;			/* RW */
-		unsigned long	rtc_15:1;			/* RW */
-		unsigned long	rtc_16:1;			/* RW */
-		unsigned long	rtc_17:1;			/* RW */
-		unsigned long	rtc_18:1;			/* RW */
-		unsigned long	rtc_19:1;			/* RW */
-		unsigned long	rtc_20:1;			/* RW */
-		unsigned long	rtc_21:1;			/* RW */
-		unsigned long	rtc_22:1;			/* RW */
-		unsigned long	rtc_23:1;			/* RW */
-		unsigned long	rtc_24:1;			/* RW */
-		unsigned long	rtc_25:1;			/* RW */
-		unsigned long	rtc_26:1;			/* RW */
-		unsigned long	rtc_27:1;			/* RW */
-		unsigned long	rtc_28:1;			/* RW */
-		unsigned long	rtc_29:1;			/* RW */
-		unsigned long	rtc_30:1;			/* RW */
-		unsigned long	rtc_31:1;			/* RW */
-		unsigned long	rsvd_32_63:32;
-	} s3;
-	struct uv4h_event_occurred2_s {
-		unsigned long	message_accelerator_int0:1;	/* RW */
-		unsigned long	message_accelerator_int1:1;	/* RW */
-		unsigned long	message_accelerator_int2:1;	/* RW */
-		unsigned long	message_accelerator_int3:1;	/* RW */
-		unsigned long	message_accelerator_int4:1;	/* RW */
-		unsigned long	message_accelerator_int5:1;	/* RW */
-		unsigned long	message_accelerator_int6:1;	/* RW */
-		unsigned long	message_accelerator_int7:1;	/* RW */
-		unsigned long	message_accelerator_int8:1;	/* RW */
-		unsigned long	message_accelerator_int9:1;	/* RW */
-		unsigned long	message_accelerator_int10:1;	/* RW */
-		unsigned long	message_accelerator_int11:1;	/* RW */
-		unsigned long	message_accelerator_int12:1;	/* RW */
-		unsigned long	message_accelerator_int13:1;	/* RW */
-		unsigned long	message_accelerator_int14:1;	/* RW */
-		unsigned long	message_accelerator_int15:1;	/* RW */
-		unsigned long	rtc_interval_int:1;		/* RW */
-		unsigned long	bau_dashboard_int:1;		/* RW */
-		unsigned long	rtc_0:1;			/* RW */
-		unsigned long	rtc_1:1;			/* RW */
-		unsigned long	rtc_2:1;			/* RW */
-		unsigned long	rtc_3:1;			/* RW */
-		unsigned long	rtc_4:1;			/* RW */
-		unsigned long	rtc_5:1;			/* RW */
-		unsigned long	rtc_6:1;			/* RW */
-		unsigned long	rtc_7:1;			/* RW */
-		unsigned long	rtc_8:1;			/* RW */
-		unsigned long	rtc_9:1;			/* RW */
-		unsigned long	rtc_10:1;			/* RW */
-		unsigned long	rtc_11:1;			/* RW */
-		unsigned long	rtc_12:1;			/* RW */
-		unsigned long	rtc_13:1;			/* RW */
-		unsigned long	rtc_14:1;			/* RW */
-		unsigned long	rtc_15:1;			/* RW */
-		unsigned long	rtc_16:1;			/* RW */
-		unsigned long	rtc_17:1;			/* RW */
-		unsigned long	rtc_18:1;			/* RW */
-		unsigned long	rtc_19:1;			/* RW */
-		unsigned long	rtc_20:1;			/* RW */
-		unsigned long	rtc_21:1;			/* RW */
-		unsigned long	rtc_22:1;			/* RW */
-		unsigned long	rtc_23:1;			/* RW */
-		unsigned long	rtc_24:1;			/* RW */
-		unsigned long	rtc_25:1;			/* RW */
-		unsigned long	rtc_26:1;			/* RW */
-		unsigned long	rtc_27:1;			/* RW */
-		unsigned long	rtc_28:1;			/* RW */
-		unsigned long	rtc_29:1;			/* RW */
-		unsigned long	rtc_30:1;			/* RW */
-		unsigned long	rtc_31:1;			/* RW */
-		unsigned long	rsvd_50_63:14;
-	} s4;
 };
 
 /* ========================================================================= */
-/*                       UVXH_EVENT_OCCURRED2_ALIAS                          */
-/* ========================================================================= */
-#define UVXH_EVENT_OCCURRED2_ALIAS 0x70108UL
-
-#define UV2H_EVENT_OCCURRED2_ALIAS_32 0xb70
-#define UV3H_EVENT_OCCURRED2_ALIAS_32 0xb70
-#define UV4H_EVENT_OCCURRED2_ALIAS_32 0x610
-#define UVH_EVENT_OCCURRED2_ALIAS_32 (					\
-	is_uv2_hub() ? UV2H_EVENT_OCCURRED2_ALIAS_32 :			\
-	is_uv3_hub() ? UV3H_EVENT_OCCURRED2_ALIAS_32 :			\
-	/*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_ALIAS_32)
-
-
-/* ========================================================================= */
-/*                   UVXH_LB_BAU_SB_ACTIVATION_STATUS_2                      */
+/*                               UVH_SCRATCH5                                */
 /* ========================================================================= */
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2 0xc8130UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_2 (				\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 :		\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_2 :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_2)
-
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0xa10
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_2_32 (				\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 :		\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_32)
+#define UVH_SCRATCH5 (							\
+	is_uv(UV5) ? 0xb0200UL :					\
+	is_uv(UV4) ? 0xb0200UL :					\
+	is_uv(UV3) ? 0x2d0200UL :					\
+	is_uv(UV2) ? 0x2d0200UL :					\
+	0)
+#define UV5H_SCRATCH5 0xb0200UL
+#define UV4H_SCRATCH5 0xb0200UL
+#define UV3H_SCRATCH5 0x2d0200UL
+#define UV2H_SCRATCH5 0x2d0200UL
 
-#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
-#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+/* UVH common defines*/
+#define UVH_SCRATCH5_SCRATCH5_SHFT			0
+#define UVH_SCRATCH5_SCRATCH5_MASK			0xffffffffffffffffUL
 
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+/* UVXH common defines */
+#define UVXH_SCRATCH5_SCRATCH5_SHFT			0
+#define UVXH_SCRATCH5_SCRATCH5_MASK			0xffffffffffffffffUL
 
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+/* UVYH common defines */
+#define UVYH_SCRATCH5_SCRATCH5_SHFT			0
+#define UVYH_SCRATCH5_SCRATCH5_MASK			0xffffffffffffffffUL
 
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+/* UV5 unique defines */
+#define UV5H_SCRATCH5_SCRATCH5_SHFT			0
+#define UV5H_SCRATCH5_SCRATCH5_MASK			0xffffffffffffffffUL
 
+/* UV4 unique defines */
+#define UV4H_SCRATCH5_SCRATCH5_SHFT			0
+#define UV4H_SCRATCH5_SCRATCH5_MASK			0xffffffffffffffffUL
 
-union uvxh_lb_bau_sb_activation_status_2_u {
-	unsigned long	v;
-	struct uvxh_lb_bau_sb_activation_status_2_s {
-		unsigned long	aux_error:64;			/* RW */
-	} sx;
-	struct uv2h_lb_bau_sb_activation_status_2_s {
-		unsigned long	aux_error:64;			/* RW */
-	} s2;
-	struct uv3h_lb_bau_sb_activation_status_2_s {
-		unsigned long	aux_error:64;			/* RW */
-	} s3;
-	struct uv4h_lb_bau_sb_activation_status_2_s {
-		unsigned long	aux_error:64;			/* RW */
-	} s4;
-};
+/* UV3 unique defines */
+#define UV3H_SCRATCH5_SCRATCH5_SHFT			0
+#define UV3H_SCRATCH5_SCRATCH5_MASK			0xffffffffffffffffUL
 
-/* ========================================================================= */
-/*                   UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK                    */
-/* ========================================================================= */
-#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK		0x320130UL
-#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_32		0x9f0
+/* UV2 unique defines */
+#define UV2H_SCRATCH5_SCRATCH5_SHFT			0
+#define UV2H_SCRATCH5_SCRATCH5_MASK			0xffffffffffffffffUL
 
-#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0
-#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL
 
-union uv1h_lb_target_physical_apic_id_mask_u {
+union uvh_scratch5_u {
 	unsigned long	v;
-	struct uv1h_lb_target_physical_apic_id_mask_s {
-		unsigned long	bit_enables:32;			/* RW */
-		unsigned long	rsvd_32_63:32;
-	} s1;
-};
 
-/* ========================================================================= */
-/*                          UV3H_GR0_GAM_GR_CONFIG                           */
-/* ========================================================================= */
-#define UV3H_GR0_GAM_GR_CONFIG				0xc00028UL
-
-#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_SHFT		0
-#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT		10
-#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_MASK		0x000000000000003fUL
-#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_MASK		0x0000000000000400UL
+	/* UVH common struct */
+	struct uvh_scratch5_s {
+		unsigned long	scratch5:64;			/* RW */
+	} s;
 
-union uv3h_gr0_gam_gr_config_u {
-	unsigned long	v;
-	struct uv3h_gr0_gam_gr_config_s {
-		unsigned long	m_skt:6;			/* RW */
-		unsigned long	undef_6_9:4;			/* Undefined */
-		unsigned long	subspace:1;			/* RW */
-		unsigned long	reserved:53;
-	} s3;
-};
+	/* UVXH common struct */
+	struct uvxh_scratch5_s {
+		unsigned long	scratch5:64;			/* RW */
+	} sx;
 
-/* ========================================================================= */
-/*                       UV4H_LB_PROC_INTD_QUEUE_FIRST                       */
-/* ========================================================================= */
-#define UV4H_LB_PROC_INTD_QUEUE_FIRST			0xa4100UL
+	/* UVYH common struct */
+	struct uvyh_scratch5_s {
+		unsigned long	scratch5:64;			/* RW */
+	} sy;
 
-#define UV4H_LB_PROC_INTD_QUEUE_FIRST_FIRST_PAYLOAD_ADDRESS_SHFT 6
-#define UV4H_LB_PROC_INTD_QUEUE_FIRST_FIRST_PAYLOAD_ADDRESS_MASK 0x00003fffffffffc0UL
+	/* UV5 unique struct */
+	struct uv5h_scratch5_s {
+		unsigned long	scratch5:64;			/* RW */
+	} s5;
 
-union uv4h_lb_proc_intd_queue_first_u {
-	unsigned long	v;
-	struct uv4h_lb_proc_intd_queue_first_s {
-		unsigned long	undef_0_5:6;			/* Undefined */
-		unsigned long	first_payload_address:40;	/* RW */
+	/* UV4 unique struct */
+	struct uv4h_scratch5_s {
+		unsigned long	scratch5:64;			/* RW */
 	} s4;
-};
-
-/* ========================================================================= */
-/*                       UV4H_LB_PROC_INTD_QUEUE_LAST                        */
-/* ========================================================================= */
-#define UV4H_LB_PROC_INTD_QUEUE_LAST			0xa4108UL
 
-#define UV4H_LB_PROC_INTD_QUEUE_LAST_LAST_PAYLOAD_ADDRESS_SHFT 5
-#define UV4H_LB_PROC_INTD_QUEUE_LAST_LAST_PAYLOAD_ADDRESS_MASK 0x00003fffffffffe0UL
+	/* UV3 unique struct */
+	struct uv3h_scratch5_s {
+		unsigned long	scratch5:64;			/* RW */
+	} s3;
 
-union uv4h_lb_proc_intd_queue_last_u {
-	unsigned long	v;
-	struct uv4h_lb_proc_intd_queue_last_s {
-		unsigned long	undef_0_4:5;			/* Undefined */
-		unsigned long	last_payload_address:41;	/* RW */
-	} s4;
+	/* UV2 unique struct */
+	struct uv2h_scratch5_s {
+		unsigned long	scratch5:64;			/* RW */
+	} s2;
 };
 
 /* ========================================================================= */
-/*                     UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR                      */
+/*                            UVH_SCRATCH5_ALIAS                             */
 /* ========================================================================= */
-#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR		0xa4118UL
-
-#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR_SOFT_ACK_PENDING_FLAGS_SHFT 0
-#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR_SOFT_ACK_PENDING_FLAGS_MASK 0x00000000000000ffUL
+#define UVH_SCRATCH5_ALIAS (						\
+	is_uv(UV5) ? 0xb0208UL :					\
+	is_uv(UV4) ? 0xb0208UL :					\
+	is_uv(UV3) ? 0x2d0208UL :					\
+	is_uv(UV2) ? 0x2d0208UL :					\
+	0)
+#define UV5H_SCRATCH5_ALIAS 0xb0208UL
+#define UV4H_SCRATCH5_ALIAS 0xb0208UL
+#define UV3H_SCRATCH5_ALIAS 0x2d0208UL
+#define UV2H_SCRATCH5_ALIAS 0x2d0208UL
 
-union uv4h_lb_proc_intd_soft_ack_clear_u {
-	unsigned long	v;
-	struct uv4h_lb_proc_intd_soft_ack_clear_s {
-		unsigned long	soft_ack_pending_flags:8;	/* WP */
-	} s4;
-};
 
 /* ========================================================================= */
-/*                    UV4H_LB_PROC_INTD_SOFT_ACK_PENDING                     */
+/*                           UVH_SCRATCH5_ALIAS_2                            */
 /* ========================================================================= */
-#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING		0xa4110UL
-
-#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING_SOFT_ACK_FLAGS_SHFT 0
-#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING_SOFT_ACK_FLAGS_MASK 0x00000000000000ffUL
+#define UVH_SCRATCH5_ALIAS_2 (						\
+	is_uv(UV5) ? 0xb0210UL :					\
+	is_uv(UV4) ? 0xb0210UL :					\
+	is_uv(UV3) ? 0x2d0210UL :					\
+	is_uv(UV2) ? 0x2d0210UL :					\
+	0)
+#define UV5H_SCRATCH5_ALIAS_2 0xb0210UL
+#define UV4H_SCRATCH5_ALIAS_2 0xb0210UL
+#define UV3H_SCRATCH5_ALIAS_2 0x2d0210UL
+#define UV2H_SCRATCH5_ALIAS_2 0x2d0210UL
 
-union uv4h_lb_proc_intd_soft_ack_pending_u {
-	unsigned long	v;
-	struct uv4h_lb_proc_intd_soft_ack_pending_s {
-		unsigned long	soft_ack_flags:8;		/* RW */
-	} s4;
-};
 
 
 #endif /* _ASM_X86_UV_UV_MMRS_H */
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 27566e57e87d..b7253ef3205a 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -15,36 +15,29 @@ struct vdso_image {
 	unsigned long size;   /* Always a multiple of PAGE_SIZE */
 
 	unsigned long alt, alt_len;
+	unsigned long extable_base, extable_len;
+	const void *extable;
 
-	long sym_vvar_start;  /* Negative offset to the vvar area */
-
-	long sym_vvar_page;
-	long sym_hpet_page;
-	long sym_pvclock_page;
-	long sym_hvclock_page;
 	long sym_VDSO32_NOTE_MASK;
 	long sym___kernel_sigreturn;
 	long sym___kernel_rt_sigreturn;
 	long sym___kernel_vsyscall;
 	long sym_int80_landing_pad;
+	long sym_vdso32_sigreturn_landing_pad;
+	long sym_vdso32_rt_sigreturn_landing_pad;
 };
 
-#ifdef CONFIG_X86_64
 extern const struct vdso_image vdso_image_64;
-#endif
-
-#ifdef CONFIG_X86_X32
 extern const struct vdso_image vdso_image_x32;
-#endif
-
-#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
 extern const struct vdso_image vdso_image_32;
-#endif
 
-extern void __init init_vdso_image(const struct vdso_image *image);
+extern int __init init_vdso_image(const struct vdso_image *image);
 
 extern int map_vdso_once(const struct vdso_image *image, unsigned long addr);
 
+extern bool fixup_vdso_exception(struct pt_regs *regs, int trapnr,
+				 unsigned long error_code,
+				 unsigned long fault_addr);
 #endif /* __ASSEMBLER__ */
 
 #endif /* _ASM_X86_VDSO_H */
diff --git a/arch/x86/include/asm/vdso/clocksource.h b/arch/x86/include/asm/vdso/clocksource.h
new file mode 100644
index 000000000000..136e5e57cfe1
--- /dev/null
+++ b/arch/x86/include/asm/vdso/clocksource.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_CLOCKSOURCE_H
+#define __ASM_VDSO_CLOCKSOURCE_H
+
+#define VDSO_ARCH_CLOCKMODES	\
+	VDSO_CLOCKMODE_TSC,	\
+	VDSO_CLOCKMODE_PVCLOCK,	\
+	VDSO_CLOCKMODE_HVCLOCK
+
+#define HAVE_VDSO_CLOCKMODE_HVCLOCK
+
+#endif /* __ASM_VDSO_CLOCKSOURCE_H */
diff --git a/arch/x86/include/asm/vdso/getrandom.h b/arch/x86/include/asm/vdso/getrandom.h
new file mode 100644
index 000000000000..ff1c11b9fa27
--- /dev/null
+++ b/arch/x86/include/asm/vdso/getrandom.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+#ifndef __ASM_VDSO_GETRANDOM_H
+#define __ASM_VDSO_GETRANDOM_H
+
+#ifndef __ASSEMBLER__
+
+#include <asm/unistd.h>
+
+/**
+ * getrandom_syscall - Invoke the getrandom() syscall.
+ * @buffer:	Destination buffer to fill with random bytes.
+ * @len:	Size of @buffer in bytes.
+ * @flags:	Zero or more GRND_* flags.
+ * Returns:	The number of random bytes written to @buffer, or a negative value indicating an error.
+ */
+static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
+{
+	long ret;
+
+	asm ("syscall" : "=a" (ret) :
+	     "0" (__NR_getrandom), "D" (buffer), "S" (len), "d" (flags) :
+	     "rcx", "r11", "memory");
+
+	return ret;
+}
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* __ASM_VDSO_GETRANDOM_H */
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
new file mode 100644
index 000000000000..73b2e7ee8f0f
--- /dev/null
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,336 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Fast user context implementation of clock_gettime, gettimeofday, and time.
+ *
+ * Copyright (C) 2019 ARM Limited.
+ * Copyright 2006 Andi Kleen, SUSE Labs.
+ * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
+ *  sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
+ */
+#ifndef __ASM_VDSO_GETTIMEOFDAY_H
+#define __ASM_VDSO_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLER__
+
+#include <uapi/linux/time.h>
+#include <asm/vgtod.h>
+#include <asm/unistd.h>
+#include <asm/msr.h>
+#include <asm/pvclock.h>
+#include <clocksource/hyperv_timer.h>
+
+#define VDSO_HAS_TIME 1
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+/*
+ * Declare the memory-mapped vclock data pages.  These come from hypervisors.
+ * If we ever reintroduce something like direct access to an MMIO clock like
+ * the HPET again, it will go here as well.
+ *
+ * A load from any of these pages will segfault if the clock in question is
+ * disabled, so appropriate compiler barriers and checks need to be used
+ * to prevent stray loads.
+ *
+ * These declarations MUST NOT be const.  The compiler will assume that
+ * an extern const variable has genuinely constant contents, and the
+ * resulting code won't work, since the whole point is that these pages
+ * change over time, possibly while we're accessing them.
+ */
+
+#ifdef CONFIG_PARAVIRT_CLOCK
+/*
+ * This is the vCPU 0 pvclock page.  We only use pvclock from the vDSO
+ * if the hypervisor tells us that all vCPUs can get valid data from the
+ * vCPU 0 page.
+ */
+extern struct pvclock_vsyscall_time_info pvclock_page
+	__attribute__((visibility("hidden")));
+#endif
+
+#ifdef CONFIG_HYPERV_TIMER
+extern struct ms_hyperv_tsc_page hvclock_page
+	__attribute__((visibility("hidden")));
+#endif
+
+#ifndef BUILD_VDSO32
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+	long ret;
+
+	asm ("syscall" : "=a" (ret), "=m" (*_ts) :
+	     "0" (__NR_clock_gettime), "D" (_clkid), "S" (_ts) :
+	     "rcx", "r11");
+
+	return ret;
+}
+
+static __always_inline
+long gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+			   struct timezone *_tz)
+{
+	long ret;
+
+	asm("syscall" : "=a" (ret) :
+	    "0" (__NR_gettimeofday), "D" (_tv), "S" (_tz) : "memory");
+
+	return ret;
+}
+
+static __always_inline
+long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+	long ret;
+
+	asm ("syscall" : "=a" (ret), "=m" (*_ts) :
+	     "0" (__NR_clock_getres), "D" (_clkid), "S" (_ts) :
+	     "rcx", "r11");
+
+	return ret;
+}
+
+#else
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+	long ret;
+
+	asm (
+		"mov %%ebx, %%edx \n"
+		"mov %[clock], %%ebx \n"
+		"call __kernel_vsyscall \n"
+		"mov %%edx, %%ebx \n"
+		: "=a" (ret), "=m" (*_ts)
+		: "0" (__NR_clock_gettime64), [clock] "g" (_clkid), "c" (_ts)
+		: "edx");
+
+	return ret;
+}
+
+static __always_inline
+long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+	long ret;
+
+	asm (
+		"mov %%ebx, %%edx \n"
+		"mov %[clock], %%ebx \n"
+		"call __kernel_vsyscall \n"
+		"mov %%edx, %%ebx \n"
+		: "=a" (ret), "=m" (*_ts)
+		: "0" (__NR_clock_gettime), [clock] "g" (_clkid), "c" (_ts)
+		: "edx");
+
+	return ret;
+}
+
+static __always_inline
+long gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+			   struct timezone *_tz)
+{
+	long ret;
+
+	asm(
+		"mov %%ebx, %%edx \n"
+		"mov %2, %%ebx \n"
+		"call __kernel_vsyscall \n"
+		"mov %%edx, %%ebx \n"
+		: "=a" (ret)
+		: "0" (__NR_gettimeofday), "g" (_tv), "c" (_tz)
+		: "memory", "edx");
+
+	return ret;
+}
+
+static __always_inline long
+clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+	long ret;
+
+	asm (
+		"mov %%ebx, %%edx \n"
+		"mov %[clock], %%ebx \n"
+		"call __kernel_vsyscall \n"
+		"mov %%edx, %%ebx \n"
+		: "=a" (ret), "=m" (*_ts)
+		: "0" (__NR_clock_getres_time64), [clock] "g" (_clkid), "c" (_ts)
+		: "edx");
+
+	return ret;
+}
+
+static __always_inline
+long clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+	long ret;
+
+	asm (
+		"mov %%ebx, %%edx \n"
+		"mov %[clock], %%ebx \n"
+		"call __kernel_vsyscall \n"
+		"mov %%edx, %%ebx \n"
+		: "=a" (ret), "=m" (*_ts)
+		: "0" (__NR_clock_getres), [clock] "g" (_clkid), "c" (_ts)
+		: "edx");
+
+	return ret;
+}
+
+#endif
+
+#ifdef CONFIG_PARAVIRT_CLOCK
+static u64 vread_pvclock(void)
+{
+	const struct pvclock_vcpu_time_info *pvti = &pvclock_page.pvti;
+	u32 version;
+	u64 ret;
+
+	/*
+	 * Note: The kernel and hypervisor must guarantee that cpu ID
+	 * number maps 1:1 to per-CPU pvclock time info.
+	 *
+	 * Because the hypervisor is entirely unaware of guest userspace
+	 * preemption, it cannot guarantee that per-CPU pvclock time
+	 * info is updated if the underlying CPU changes or that that
+	 * version is increased whenever underlying CPU changes.
+	 *
+	 * On KVM, we are guaranteed that pvti updates for any vCPU are
+	 * atomic as seen by *all* vCPUs.  This is an even stronger
+	 * guarantee than we get with a normal seqlock.
+	 *
+	 * On Xen, we don't appear to have that guarantee, but Xen still
+	 * supplies a valid seqlock using the version field.
+	 *
+	 * We only do pvclock vdso timing at all if
+	 * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
+	 * mean that all vCPUs have matching pvti and that the TSC is
+	 * synced, so we can just look at vCPU 0's pvti.
+	 */
+
+	do {
+		version = pvclock_read_begin(pvti);
+
+		if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT)))
+			return U64_MAX;
+
+		ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
+	} while (pvclock_read_retry(pvti, version));
+
+	return ret & S64_MAX;
+}
+#endif
+
+#ifdef CONFIG_HYPERV_TIMER
+static u64 vread_hvclock(void)
+{
+	u64 tsc, time;
+
+	if (hv_read_tsc_page_tsc(&hvclock_page, &tsc, &time))
+		return time & S64_MAX;
+
+	return U64_MAX;
+}
+#endif
+
+static inline u64 __arch_get_hw_counter(s32 clock_mode,
+					const struct vdso_time_data *vd)
+{
+	if (likely(clock_mode == VDSO_CLOCKMODE_TSC))
+		return (u64)rdtsc_ordered() & S64_MAX;
+	/*
+	 * For any memory-mapped vclock type, we need to make sure that gcc
+	 * doesn't cleverly hoist a load before the mode check.  Otherwise we
+	 * might end up touching the memory-mapped page even if the vclock in
+	 * question isn't enabled, which will segfault.  Hence the barriers.
+	 */
+#ifdef CONFIG_PARAVIRT_CLOCK
+	if (clock_mode == VDSO_CLOCKMODE_PVCLOCK) {
+		barrier();
+		return vread_pvclock();
+	}
+#endif
+#ifdef CONFIG_HYPERV_TIMER
+	if (clock_mode == VDSO_CLOCKMODE_HVCLOCK) {
+		barrier();
+		return vread_hvclock();
+	}
+#endif
+	return U64_MAX;
+}
+
+static inline bool arch_vdso_clocksource_ok(const struct vdso_clock *vc)
+{
+	return true;
+}
+#define vdso_clocksource_ok arch_vdso_clocksource_ok
+
+/*
+ * Clocksource read value validation to handle PV and HyperV clocksources
+ * which can be invalidated asynchronously and indicate invalidation by
+ * returning U64_MAX, which can be effectively tested by checking for a
+ * negative value after casting it to s64.
+ *
+ * This effectively forces a S64_MAX mask on the calculations, unlike the
+ * U64_MAX mask normally used by x86 clocksources.
+ */
+static inline bool arch_vdso_cycles_ok(u64 cycles)
+{
+	return (s64)cycles >= 0;
+}
+#define vdso_cycles_ok arch_vdso_cycles_ok
+
+/*
+ * x86 specific calculation of nanoseconds for the current cycle count
+ *
+ * The regular implementation assumes that clocksource reads are globally
+ * monotonic. The TSC can be slightly off across sockets which can cause
+ * the regular delta calculation (@cycles - @last) to return a huge time
+ * jump.
+ *
+ * Therefore it needs to be verified that @cycles are greater than
+ * @vd->cycles_last. If not then use @vd->cycles_last, which is the base
+ * time of the current conversion period.
+ *
+ * This variant also uses a custom mask because while the clocksource mask of
+ * all the VDSO capable clocksources on x86 is U64_MAX, the above code uses
+ * U64_MASK as an exception value, additionally arch_vdso_cycles_ok() above
+ * declares everything with the MSB/Sign-bit set as invalid. Therefore the
+ * effective mask is S64_MAX.
+ */
+static __always_inline u64 vdso_calc_ns(const struct vdso_clock *vc, u64 cycles, u64 base)
+{
+	u64 delta = cycles - vc->cycle_last;
+
+	/*
+	 * Negative motion and deltas which can cause multiplication
+	 * overflow require special treatment. This check covers both as
+	 * negative motion is guaranteed to be greater than @vc::max_cycles
+	 * due to unsigned comparison.
+	 *
+	 * Due to the MSB/Sign-bit being used as invalid marker (see
+	 * arch_vdso_cycles_ok() above), the effective mask is S64_MAX, but that
+	 * case is also unlikely and will also take the unlikely path here.
+	 */
+	if (unlikely(delta > vc->max_cycles)) {
+		/*
+		 * Due to the above mentioned TSC wobbles, filter out
+		 * negative motion.  Per the above masking, the effective
+		 * sign bit is now bit 62.
+		 */
+		if (delta & (1ULL << 62))
+			return base >> vc->shift;
+
+		/* Handle multiplication overflow gracefully */
+		return mul_u64_u32_add_u64_shr(delta & S64_MAX, vc->mult, base, vc->shift);
+	}
+
+	return ((delta * vc->mult) + base) >> vc->shift;
+}
+#define vdso_calc_ns vdso_calc_ns
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/x86/include/asm/vdso/processor.h b/arch/x86/include/asm/vdso/processor.h
new file mode 100644
index 000000000000..7000aeb59aa2
--- /dev/null
+++ b/arch/x86/include/asm/vdso/processor.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 ARM Ltd.
+ */
+#ifndef __ASM_VDSO_PROCESSOR_H
+#define __ASM_VDSO_PROCESSOR_H
+
+#ifndef __ASSEMBLER__
+
+/* PAUSE is a good thing to insert into busy-wait loops. */
+static __always_inline void native_pause(void)
+{
+	asm volatile("pause" ::: "memory");
+}
+
+static __always_inline void cpu_relax(void)
+{
+	native_pause();
+}
+
+struct getcpu_cache;
+
+notrace long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused);
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* __ASM_VDSO_PROCESSOR_H */
diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h
new file mode 100644
index 000000000000..4aa311a923f2
--- /dev/null
+++ b/arch/x86/include/asm/vdso/vsyscall.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#define __VDSO_PAGES	6
+
+#define VDSO_NR_VCLOCK_PAGES	2
+#define VDSO_VCLOCK_PAGES_START(_b)	((_b) + (__VDSO_PAGES - VDSO_NR_VCLOCK_PAGES) * PAGE_SIZE)
+#define VDSO_PAGE_PVCLOCK_OFFSET	0
+#define VDSO_PAGE_HVCLOCK_OFFSET	1
+
+#ifndef __ASSEMBLER__
+
+#include <vdso/datapage.h>
+#include <asm/vgtod.h>
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h
new file mode 100644
index 000000000000..5d471253c755
--- /dev/null
+++ b/arch/x86/include/asm/vermagic.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_VERMAGIC_H
+#define _ASM_VERMAGIC_H
+
+#ifdef CONFIG_X86_64
+/* X86_64 does not define MODULE_PROC_FAMILY */
+#elif defined CONFIG_M486SX
+#define MODULE_PROC_FAMILY "486SX "
+#elif defined CONFIG_M486
+#define MODULE_PROC_FAMILY "486 "
+#elif defined CONFIG_M586
+#define MODULE_PROC_FAMILY "586 "
+#elif defined CONFIG_M586TSC
+#define MODULE_PROC_FAMILY "586TSC "
+#elif defined CONFIG_M586MMX
+#define MODULE_PROC_FAMILY "586MMX "
+#elif defined CONFIG_MATOM
+#define MODULE_PROC_FAMILY "ATOM "
+#elif defined CONFIG_M686
+#define MODULE_PROC_FAMILY "686 "
+#elif defined CONFIG_MPENTIUMII
+#define MODULE_PROC_FAMILY "PENTIUMII "
+#elif defined CONFIG_MPENTIUMIII
+#define MODULE_PROC_FAMILY "PENTIUMIII "
+#elif defined CONFIG_MPENTIUMM
+#define MODULE_PROC_FAMILY "PENTIUMM "
+#elif defined CONFIG_MPENTIUM4
+#define MODULE_PROC_FAMILY "PENTIUM4 "
+#elif defined CONFIG_MK6
+#define MODULE_PROC_FAMILY "K6 "
+#elif defined CONFIG_MK7
+#define MODULE_PROC_FAMILY "K7 "
+#elif defined CONFIG_MELAN
+#define MODULE_PROC_FAMILY "ELAN "
+#elif defined CONFIG_MCRUSOE
+#define MODULE_PROC_FAMILY "CRUSOE "
+#elif defined CONFIG_MEFFICEON
+#define MODULE_PROC_FAMILY "EFFICEON "
+#elif defined CONFIG_MWINCHIPC6
+#define MODULE_PROC_FAMILY "WINCHIPC6 "
+#elif defined CONFIG_MWINCHIP3D
+#define MODULE_PROC_FAMILY "WINCHIP3D "
+#elif defined CONFIG_MCYRIXIII
+#define MODULE_PROC_FAMILY "CYRIXIII "
+#elif defined CONFIG_MVIAC3_2
+#define MODULE_PROC_FAMILY "VIAC3-2 "
+#elif defined CONFIG_MVIAC7
+#define MODULE_PROC_FAMILY "VIAC7 "
+#elif defined CONFIG_MGEODEGX1
+#define MODULE_PROC_FAMILY "GEODEGX1 "
+#elif defined CONFIG_MGEODE_LX
+#define MODULE_PROC_FAMILY "GEODE "
+#else
+#error unknown processor family
+#endif
+
+#ifdef CONFIG_X86_32
+# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY
+#else
+# define MODULE_ARCH_VERMAGIC ""
+#endif
+
+#endif /* _ASM_VERMAGIC_H */
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 913a133f8e6f..a0ce291abcae 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -2,92 +2,18 @@
 #ifndef _ASM_X86_VGTOD_H
 #define _ASM_X86_VGTOD_H
 
-#include <linux/compiler.h>
-#include <linux/clocksource.h>
-
-#include <uapi/linux/time.h>
-
-#ifdef BUILD_VDSO32_64
-typedef u64 gtod_long_t;
-#else
-typedef unsigned long gtod_long_t;
-#endif
-
-/*
- * There is one of these objects in the vvar page for each
- * vDSO-accelerated clockid.  For high-resolution clocks, this encodes
- * the time corresponding to vsyscall_gtod_data.cycle_last.  For coarse
- * clocks, this encodes the actual time.
- *
- * To confuse the reader, for high-resolution clocks, nsec is left-shifted
- * by vsyscall_gtod_data.shift.
- */
-struct vgtod_ts {
-	u64		sec;
-	u64		nsec;
-};
-
-#define VGTOD_BASES	(CLOCK_TAI + 1)
-#define VGTOD_HRES	(BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC) | BIT(CLOCK_TAI))
-#define VGTOD_COARSE	(BIT(CLOCK_REALTIME_COARSE) | BIT(CLOCK_MONOTONIC_COARSE))
-
 /*
- * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
- * so be carefull by modifying this structure.
+ * This check is required to prevent ARCH=um to include
+ * unwanted headers.
  */
-struct vsyscall_gtod_data {
-	unsigned int	seq;
-
-	int		vclock_mode;
-	u64		cycle_last;
-	u64		mask;
-	u32		mult;
-	u32		shift;
-
-	struct vgtod_ts	basetime[VGTOD_BASES];
-
-	int		tz_minuteswest;
-	int		tz_dsttime;
-};
-extern struct vsyscall_gtod_data vsyscall_gtod_data;
-
-extern int vclocks_used;
-static inline bool vclock_was_used(int vclock)
-{
-	return READ_ONCE(vclocks_used) & (1 << vclock);
-}
-
-static inline unsigned int gtod_read_begin(const struct vsyscall_gtod_data *s)
-{
-	unsigned int ret;
-
-repeat:
-	ret = READ_ONCE(s->seq);
-	if (unlikely(ret & 1)) {
-		cpu_relax();
-		goto repeat;
-	}
-	smp_rmb();
-	return ret;
-}
-
-static inline int gtod_read_retry(const struct vsyscall_gtod_data *s,
-				  unsigned int start)
-{
-	smp_rmb();
-	return unlikely(s->seq != start);
-}
+#ifdef CONFIG_GENERIC_GETTIMEOFDAY
+#include <linux/compiler.h>
+#include <asm/clocksource.h>
+#include <vdso/datapage.h>
+#include <vdso/helpers.h>
 
-static inline void gtod_write_begin(struct vsyscall_gtod_data *s)
-{
-	++s->seq;
-	smp_wmb();
-}
+#include <uapi/linux/time.h>
 
-static inline void gtod_write_end(struct vsyscall_gtod_data *s)
-{
-	smp_wmb();
-	++s->seq;
-}
+#endif /* CONFIG_GENERIC_GETTIMEOFDAY */
 
 #endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/video.h b/arch/x86/include/asm/video.h
new file mode 100644
index 000000000000..08ec328203ef
--- /dev/null
+++ b/arch/x86/include/asm/video.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_VIDEO_H
+#define _ASM_X86_VIDEO_H
+
+#include <linux/types.h>
+
+#include <asm/page.h>
+
+struct device;
+
+pgprot_t pgprot_framebuffer(pgprot_t prot,
+			    unsigned long vm_start, unsigned long vm_end,
+			    unsigned long offset);
+#define pgprot_framebuffer pgprot_framebuffer
+
+#ifdef CONFIG_VIDEO
+bool video_is_primary_device(struct device *dev);
+#define video_is_primary_device video_is_primary_device
+#endif
+
+#include <asm-generic/video.h>
+
+#endif /* _ASM_X86_VIDEO_H */
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
deleted file mode 100644
index 1fc7a0d1e877..000000000000
--- a/arch/x86/include/asm/virtext.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/* CPU virtualization extensions handling
- *
- * This should carry the code for handling CPU virtualization extensions
- * that needs to live in the kernel core.
- *
- * Author: Eduardo Habkost <ehabkost@redhat.com>
- *
- * Copyright (C) 2008, Red Hat Inc.
- *
- * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- */
-#ifndef _ASM_X86_VIRTEX_H
-#define _ASM_X86_VIRTEX_H
-
-#include <asm/processor.h>
-
-#include <asm/vmx.h>
-#include <asm/svm.h>
-#include <asm/tlbflush.h>
-
-/*
- * VMX functions:
- */
-
-static inline int cpu_has_vmx(void)
-{
-	unsigned long ecx = cpuid_ecx(1);
-	return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
-}
-
-
-/** Disable VMX on the current CPU
- *
- * vmxoff causes a undefined-opcode exception if vmxon was not run
- * on the CPU previously. Only call this function if you know VMX
- * is enabled.
- */
-static inline void cpu_vmxoff(void)
-{
-	asm volatile ("vmxoff");
-	cr4_clear_bits(X86_CR4_VMXE);
-}
-
-static inline int cpu_vmx_enabled(void)
-{
-	return __read_cr4() & X86_CR4_VMXE;
-}
-
-/** Disable VMX if it is enabled on the current CPU
- *
- * You shouldn't call this if cpu_has_vmx() returns 0.
- */
-static inline void __cpu_emergency_vmxoff(void)
-{
-	if (cpu_vmx_enabled())
-		cpu_vmxoff();
-}
-
-/** Disable VMX if it is supported and enabled on the current CPU
- */
-static inline void cpu_emergency_vmxoff(void)
-{
-	if (cpu_has_vmx())
-		__cpu_emergency_vmxoff();
-}
-
-
-
-
-/*
- * SVM functions:
- */
-
-/** Check if the CPU has SVM support
- *
- * You can use the 'msg' arg to get a message describing the problem,
- * if the function returns zero. Simply pass NULL if you are not interested
- * on the messages; gcc should take care of not generating code for
- * the messages on this case.
- */
-static inline int cpu_has_svm(const char **msg)
-{
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
-	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) {
-		if (msg)
-			*msg = "not amd or hygon";
-		return 0;
-	}
-
-	if (boot_cpu_data.extended_cpuid_level < SVM_CPUID_FUNC) {
-		if (msg)
-			*msg = "can't execute cpuid_8000000a";
-		return 0;
-	}
-
-	if (!boot_cpu_has(X86_FEATURE_SVM)) {
-		if (msg)
-			*msg = "svm not available";
-		return 0;
-	}
-	return 1;
-}
-
-
-/** Disable SVM on the current CPU
- *
- * You should call this only if cpu_has_svm() returned true.
- */
-static inline void cpu_svm_disable(void)
-{
-	uint64_t efer;
-
-	wrmsrl(MSR_VM_HSAVE_PA, 0);
-	rdmsrl(MSR_EFER, efer);
-	wrmsrl(MSR_EFER, efer & ~EFER_SVME);
-}
-
-/** Makes sure SVM is disabled, if it is supported on the CPU
- */
-static inline void cpu_emergency_svm_disable(void)
-{
-	if (cpu_has_svm(NULL))
-		cpu_svm_disable();
-}
-
-#endif /* _ASM_X86_VIRTEX_H */
diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h
index 26efbec94448..62ee19909903 100644
--- a/arch/x86/include/asm/vm86.h
+++ b/arch/x86/include/asm/vm86.h
@@ -36,7 +36,6 @@ struct vm86 {
 	unsigned long saved_sp0;
 
 	unsigned long flags;
-	unsigned long screen_bitmap;
 	unsigned long cpu_type;
 	struct revectored_struct int_revectored;
 	struct revectored_struct int21_revectored;
@@ -85,7 +84,7 @@ static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c)
 
 static inline void save_v86_state(struct kernel_vm86_regs *a, int b) { }
 
-#define free_vm86(t) do { } while(0)
+#define free_vm86(task) do { (void)(task); } while(0)
 
 #endif /* CONFIG_VM86 */
 
diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h
new file mode 100644
index 000000000000..49ce331f3ac6
--- /dev/null
+++ b/arch/x86/include/asm/vmalloc.h
@@ -0,0 +1,26 @@
+#ifndef _ASM_X86_VMALLOC_H
+#define _ASM_X86_VMALLOC_H
+
+#include <asm/cpufeature.h>
+#include <asm/page.h>
+#include <asm/pgtable_areas.h>
+
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+
+#ifdef CONFIG_X86_64
+#define arch_vmap_pud_supported arch_vmap_pud_supported
+static inline bool arch_vmap_pud_supported(pgprot_t prot)
+{
+	return boot_cpu_has(X86_FEATURE_GBPAGES);
+}
+#endif
+
+#define arch_vmap_pmd_supported arch_vmap_pmd_supported
+static inline bool arch_vmap_pmd_supported(pgprot_t prot)
+{
+	return boot_cpu_has(X86_FEATURE_PSE);
+}
+
+#endif
+
+#endif /* _ASM_X86_VMALLOC_H */
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h
new file mode 100644
index 000000000000..c9cf43d5ef23
--- /dev/null
+++ b/arch/x86/include/asm/vmware.h
@@ -0,0 +1,327 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+#ifndef _ASM_X86_VMWARE_H
+#define _ASM_X86_VMWARE_H
+
+#include <asm/cpufeatures.h>
+#include <asm/alternative.h>
+#include <linux/stringify.h>
+
+/*
+ * VMware hypercall ABI.
+ *
+ * - Low bandwidth (LB) hypercalls (I/O port based, vmcall and vmmcall)
+ * have up to 6 input and 6 output arguments passed and returned using
+ * registers: %eax (arg0), %ebx (arg1), %ecx (arg2), %edx (arg3),
+ * %esi (arg4), %edi (arg5).
+ * The following input arguments must be initialized by the caller:
+ * arg0 - VMWARE_HYPERVISOR_MAGIC
+ * arg2 - Hypercall command
+ * arg3 bits [15:0] - Port number, LB and direction flags
+ *
+ * - Low bandwidth TDX hypercalls (x86_64 only) are similar to LB
+ * hypercalls. They also have up to 6 input and 6 output on registers
+ * arguments, with different argument to register mapping:
+ * %r12 (arg0), %rbx (arg1), %r13 (arg2), %rdx (arg3),
+ * %rsi (arg4), %rdi (arg5).
+ *
+ * - High bandwidth (HB) hypercalls are I/O port based only. They have
+ * up to 7 input and 7 output arguments passed and returned using
+ * registers: %eax (arg0), %ebx (arg1), %ecx (arg2), %edx (arg3),
+ * %esi (arg4), %edi (arg5), %ebp (arg6).
+ * The following input arguments must be initialized by the caller:
+ * arg0 - VMWARE_HYPERVISOR_MAGIC
+ * arg1 - Hypercall command
+ * arg3 bits [15:0] - Port number, HB and direction flags
+ *
+ * For compatibility purposes, x86_64 systems use only lower 32 bits
+ * for input and output arguments.
+ *
+ * The hypercall definitions differ in the low word of the %edx (arg3)
+ * in the following way: the old I/O port based interface uses the port
+ * number to distinguish between high- and low bandwidth versions, and
+ * uses IN/OUT instructions to define transfer direction.
+ *
+ * The new vmcall interface instead uses a set of flags to select
+ * bandwidth mode and transfer direction. The flags should be loaded
+ * into arg3 by any user and are automatically replaced by the port
+ * number if the I/O port method is used.
+ */
+
+#define VMWARE_HYPERVISOR_HB		BIT(0)
+#define VMWARE_HYPERVISOR_OUT		BIT(1)
+
+#define VMWARE_HYPERVISOR_PORT		0x5658
+#define VMWARE_HYPERVISOR_PORT_HB	(VMWARE_HYPERVISOR_PORT | \
+					 VMWARE_HYPERVISOR_HB)
+
+#define VMWARE_HYPERVISOR_MAGIC		0x564d5868U
+
+#define VMWARE_CMD_GETVERSION		10
+#define VMWARE_CMD_GETHZ		45
+#define VMWARE_CMD_GETVCPU_INFO		68
+#define VMWARE_CMD_STEALCLOCK		91
+/*
+ * Hypercall command mask:
+ *   bits [6:0] command, range [0, 127]
+ *   bits [19:16] sub-command, range [0, 15]
+ */
+#define VMWARE_CMD_MASK			0xf007fU
+
+#define CPUID_VMWARE_FEATURES_ECX_VMMCALL	BIT(0)
+#define CPUID_VMWARE_FEATURES_ECX_VMCALL	BIT(1)
+
+extern unsigned long vmware_hypercall_slow(unsigned long cmd,
+					   unsigned long in1, unsigned long in3,
+					   unsigned long in4, unsigned long in5,
+					   u32 *out1, u32 *out2, u32 *out3,
+					   u32 *out4, u32 *out5);
+
+#define VMWARE_TDX_VENDOR_LEAF 0x1af7e4909ULL
+#define VMWARE_TDX_HCALL_FUNC  1
+
+extern unsigned long vmware_tdx_hypercall(unsigned long cmd,
+					  unsigned long in1, unsigned long in3,
+					  unsigned long in4, unsigned long in5,
+					  u32 *out1, u32 *out2, u32 *out3,
+					  u32 *out4, u32 *out5);
+
+/*
+ * The low bandwidth call. The low word of %edx is presumed to have OUT bit
+ * set. The high word of %edx may contain input data from the caller.
+ */
+#define VMWARE_HYPERCALL					\
+	ALTERNATIVE_2("movw %[port], %%dx\n\t"			\
+		      "inl (%%dx), %%eax",			\
+		      "vmcall", X86_FEATURE_VMCALL,		\
+		      "vmmcall", X86_FEATURE_VMW_VMMCALL)
+
+static inline
+unsigned long vmware_hypercall1(unsigned long cmd, unsigned long in1)
+{
+	unsigned long out0;
+
+	if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+		return vmware_tdx_hypercall(cmd, in1, 0, 0, 0,
+					    NULL, NULL, NULL, NULL, NULL);
+
+	if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+		return vmware_hypercall_slow(cmd, in1, 0, 0, 0,
+					     NULL, NULL, NULL, NULL, NULL);
+
+	asm_inline volatile (VMWARE_HYPERCALL
+		: "=a" (out0)
+		: [port] "i" (VMWARE_HYPERVISOR_PORT),
+		  "a" (VMWARE_HYPERVISOR_MAGIC),
+		  "b" (in1),
+		  "c" (cmd),
+		  "d" (0)
+		: "cc", "memory");
+	return out0;
+}
+
+static inline
+unsigned long vmware_hypercall3(unsigned long cmd, unsigned long in1,
+				u32 *out1, u32 *out2)
+{
+	unsigned long out0;
+
+	if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+		return vmware_tdx_hypercall(cmd, in1, 0, 0, 0,
+					    out1, out2, NULL, NULL, NULL);
+
+	if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+		return vmware_hypercall_slow(cmd, in1, 0, 0, 0,
+					     out1, out2, NULL, NULL, NULL);
+
+	asm_inline volatile (VMWARE_HYPERCALL
+		: "=a" (out0), "=b" (*out1), "=c" (*out2)
+		: [port] "i" (VMWARE_HYPERVISOR_PORT),
+		  "a" (VMWARE_HYPERVISOR_MAGIC),
+		  "b" (in1),
+		  "c" (cmd),
+		  "d" (0)
+		: "cc", "memory");
+	return out0;
+}
+
+static inline
+unsigned long vmware_hypercall4(unsigned long cmd, unsigned long in1,
+				u32 *out1, u32 *out2, u32 *out3)
+{
+	unsigned long out0;
+
+	if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+		return vmware_tdx_hypercall(cmd, in1, 0, 0, 0,
+					    out1, out2, out3, NULL, NULL);
+
+	if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+		return vmware_hypercall_slow(cmd, in1, 0, 0, 0,
+					     out1, out2, out3, NULL, NULL);
+
+	asm_inline volatile (VMWARE_HYPERCALL
+		: "=a" (out0), "=b" (*out1), "=c" (*out2), "=d" (*out3)
+		: [port] "i" (VMWARE_HYPERVISOR_PORT),
+		  "a" (VMWARE_HYPERVISOR_MAGIC),
+		  "b" (in1),
+		  "c" (cmd),
+		  "d" (0)
+		: "cc", "memory");
+	return out0;
+}
+
+static inline
+unsigned long vmware_hypercall5(unsigned long cmd, unsigned long in1,
+				unsigned long in3, unsigned long in4,
+				unsigned long in5, u32 *out2)
+{
+	unsigned long out0;
+
+	if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+		return vmware_tdx_hypercall(cmd, in1, in3, in4, in5,
+					    NULL, out2, NULL, NULL, NULL);
+
+	if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+		return vmware_hypercall_slow(cmd, in1, in3, in4, in5,
+					     NULL, out2, NULL, NULL, NULL);
+
+	asm_inline volatile (VMWARE_HYPERCALL
+		: "=a" (out0), "=c" (*out2)
+		: [port] "i" (VMWARE_HYPERVISOR_PORT),
+		  "a" (VMWARE_HYPERVISOR_MAGIC),
+		  "b" (in1),
+		  "c" (cmd),
+		  "d" (in3),
+		  "S" (in4),
+		  "D" (in5)
+		: "cc", "memory");
+	return out0;
+}
+
+static inline
+unsigned long vmware_hypercall6(unsigned long cmd, unsigned long in1,
+				unsigned long in3, u32 *out2,
+				u32 *out3, u32 *out4, u32 *out5)
+{
+	unsigned long out0;
+
+	if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+		return vmware_tdx_hypercall(cmd, in1, in3, 0, 0,
+					    NULL, out2, out3, out4, out5);
+
+	if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+		return vmware_hypercall_slow(cmd, in1, in3, 0, 0,
+					     NULL, out2, out3, out4, out5);
+
+	asm_inline volatile (VMWARE_HYPERCALL
+		: "=a" (out0), "=c" (*out2), "=d" (*out3), "=S" (*out4),
+		  "=D" (*out5)
+		: [port] "i" (VMWARE_HYPERVISOR_PORT),
+		  "a" (VMWARE_HYPERVISOR_MAGIC),
+		  "b" (in1),
+		  "c" (cmd),
+		  "d" (in3)
+		: "cc", "memory");
+	return out0;
+}
+
+static inline
+unsigned long vmware_hypercall7(unsigned long cmd, unsigned long in1,
+				unsigned long in3, unsigned long in4,
+				unsigned long in5, u32 *out1,
+				u32 *out2, u32 *out3)
+{
+	unsigned long out0;
+
+	if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+		return vmware_tdx_hypercall(cmd, in1, in3, in4, in5,
+					    out1, out2, out3, NULL, NULL);
+
+	if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+		return vmware_hypercall_slow(cmd, in1, in3, in4, in5,
+					     out1, out2, out3, NULL, NULL);
+
+	asm_inline volatile (VMWARE_HYPERCALL
+		: "=a" (out0), "=b" (*out1), "=c" (*out2), "=d" (*out3)
+		: [port] "i" (VMWARE_HYPERVISOR_PORT),
+		  "a" (VMWARE_HYPERVISOR_MAGIC),
+		  "b" (in1),
+		  "c" (cmd),
+		  "d" (in3),
+		  "S" (in4),
+		  "D" (in5)
+		: "cc", "memory");
+	return out0;
+}
+
+#ifdef CONFIG_X86_64
+#define VMW_BP_CONSTRAINT "r"
+#else
+#define VMW_BP_CONSTRAINT "m"
+#endif
+
+/*
+ * High bandwidth calls are not supported on encrypted memory guests.
+ * The caller should check cc_platform_has(CC_ATTR_MEM_ENCRYPT) and use
+ * low bandwidth hypercall if memory encryption is set.
+ * This assumption simplifies HB hypercall implementation to just I/O port
+ * based approach without alternative patching.
+ */
+static inline
+unsigned long vmware_hypercall_hb_out(unsigned long cmd, unsigned long in2,
+				      unsigned long in3, unsigned long in4,
+				      unsigned long in5, unsigned long in6,
+				      u32 *out1)
+{
+	unsigned long out0;
+
+	asm_inline volatile (
+		UNWIND_HINT_SAVE
+		"push %%" _ASM_BP "\n\t"
+		UNWIND_HINT_UNDEFINED
+		"mov %[in6], %%" _ASM_BP "\n\t"
+		"rep outsb\n\t"
+		"pop %%" _ASM_BP "\n\t"
+		UNWIND_HINT_RESTORE
+		: "=a" (out0), "=b" (*out1)
+		: "a" (VMWARE_HYPERVISOR_MAGIC),
+		  "b" (cmd),
+		  "c" (in2),
+		  "d" (in3 | VMWARE_HYPERVISOR_PORT_HB),
+		  "S" (in4),
+		  "D" (in5),
+		  [in6] VMW_BP_CONSTRAINT (in6)
+		: "cc", "memory");
+	return out0;
+}
+
+static inline
+unsigned long vmware_hypercall_hb_in(unsigned long cmd, unsigned long in2,
+				     unsigned long in3, unsigned long in4,
+				     unsigned long in5, unsigned long in6,
+				     u32 *out1)
+{
+	unsigned long out0;
+
+	asm_inline volatile (
+		UNWIND_HINT_SAVE
+		"push %%" _ASM_BP "\n\t"
+		UNWIND_HINT_UNDEFINED
+		"mov %[in6], %%" _ASM_BP "\n\t"
+		"rep insb\n\t"
+		"pop %%" _ASM_BP "\n\t"
+		UNWIND_HINT_RESTORE
+		: "=a" (out0), "=b" (*out1)
+		: "a" (VMWARE_HYPERVISOR_MAGIC),
+		  "b" (cmd),
+		  "c" (in2),
+		  "d" (in3 | VMWARE_HYPERVISOR_PORT_HB),
+		  "S" (in4),
+		  "D" (in5),
+		  [in6] VMW_BP_CONSTRAINT (in6)
+		: "cc", "memory");
+	return out0;
+}
+#undef VMW_BP_CONSTRAINT
+#undef VMWARE_HYPERCALL
+
+#endif
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 4e4133e86484..c85c50019523 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -1,93 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * vmx.h: VMX Architecture related definitions
  * Copyright (c) 2004, Intel Corporation.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
  * A few random additions are:
  * Copyright (C) 2006 Qumranet
  *    Avi Kivity <avi@qumranet.com>
  *    Yaniv Kamay <yaniv@qumranet.com>
- *
  */
 #ifndef VMX_H
 #define VMX_H
 
 
 #include <linux/bitops.h>
+#include <linux/bug.h>
 #include <linux/types.h>
+
 #include <uapi/asm/vmx.h>
+#include <asm/trapnr.h>
+#include <asm/vmxfeatures.h>
+
+#define VMCS_CONTROL_BIT(x)	BIT(VMX_FEATURE_##x & 0x1f)
 
 /*
  * Definitions of Primary Processor-Based VM-Execution Controls.
  */
-#define CPU_BASED_VIRTUAL_INTR_PENDING          0x00000004
-#define CPU_BASED_USE_TSC_OFFSETING             0x00000008
-#define CPU_BASED_HLT_EXITING                   0x00000080
-#define CPU_BASED_INVLPG_EXITING                0x00000200
-#define CPU_BASED_MWAIT_EXITING                 0x00000400
-#define CPU_BASED_RDPMC_EXITING                 0x00000800
-#define CPU_BASED_RDTSC_EXITING                 0x00001000
-#define CPU_BASED_CR3_LOAD_EXITING		0x00008000
-#define CPU_BASED_CR3_STORE_EXITING		0x00010000
-#define CPU_BASED_CR8_LOAD_EXITING              0x00080000
-#define CPU_BASED_CR8_STORE_EXITING             0x00100000
-#define CPU_BASED_TPR_SHADOW                    0x00200000
-#define CPU_BASED_VIRTUAL_NMI_PENDING		0x00400000
-#define CPU_BASED_MOV_DR_EXITING                0x00800000
-#define CPU_BASED_UNCOND_IO_EXITING             0x01000000
-#define CPU_BASED_USE_IO_BITMAPS                0x02000000
-#define CPU_BASED_MONITOR_TRAP_FLAG             0x08000000
-#define CPU_BASED_USE_MSR_BITMAPS               0x10000000
-#define CPU_BASED_MONITOR_EXITING               0x20000000
-#define CPU_BASED_PAUSE_EXITING                 0x40000000
-#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS   0x80000000
+#define CPU_BASED_INTR_WINDOW_EXITING           VMCS_CONTROL_BIT(INTR_WINDOW_EXITING)
+#define CPU_BASED_USE_TSC_OFFSETTING            VMCS_CONTROL_BIT(USE_TSC_OFFSETTING)
+#define CPU_BASED_HLT_EXITING                   VMCS_CONTROL_BIT(HLT_EXITING)
+#define CPU_BASED_INVLPG_EXITING                VMCS_CONTROL_BIT(INVLPG_EXITING)
+#define CPU_BASED_MWAIT_EXITING                 VMCS_CONTROL_BIT(MWAIT_EXITING)
+#define CPU_BASED_RDPMC_EXITING                 VMCS_CONTROL_BIT(RDPMC_EXITING)
+#define CPU_BASED_RDTSC_EXITING                 VMCS_CONTROL_BIT(RDTSC_EXITING)
+#define CPU_BASED_CR3_LOAD_EXITING		VMCS_CONTROL_BIT(CR3_LOAD_EXITING)
+#define CPU_BASED_CR3_STORE_EXITING		VMCS_CONTROL_BIT(CR3_STORE_EXITING)
+#define CPU_BASED_ACTIVATE_TERTIARY_CONTROLS	VMCS_CONTROL_BIT(TERTIARY_CONTROLS)
+#define CPU_BASED_CR8_LOAD_EXITING              VMCS_CONTROL_BIT(CR8_LOAD_EXITING)
+#define CPU_BASED_CR8_STORE_EXITING             VMCS_CONTROL_BIT(CR8_STORE_EXITING)
+#define CPU_BASED_TPR_SHADOW                    VMCS_CONTROL_BIT(VIRTUAL_TPR)
+#define CPU_BASED_NMI_WINDOW_EXITING		VMCS_CONTROL_BIT(NMI_WINDOW_EXITING)
+#define CPU_BASED_MOV_DR_EXITING                VMCS_CONTROL_BIT(MOV_DR_EXITING)
+#define CPU_BASED_UNCOND_IO_EXITING             VMCS_CONTROL_BIT(UNCOND_IO_EXITING)
+#define CPU_BASED_USE_IO_BITMAPS                VMCS_CONTROL_BIT(USE_IO_BITMAPS)
+#define CPU_BASED_MONITOR_TRAP_FLAG             VMCS_CONTROL_BIT(MONITOR_TRAP_FLAG)
+#define CPU_BASED_USE_MSR_BITMAPS               VMCS_CONTROL_BIT(USE_MSR_BITMAPS)
+#define CPU_BASED_MONITOR_EXITING               VMCS_CONTROL_BIT(MONITOR_EXITING)
+#define CPU_BASED_PAUSE_EXITING                 VMCS_CONTROL_BIT(PAUSE_EXITING)
+#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS   VMCS_CONTROL_BIT(SEC_CONTROLS)
 
 #define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR	0x0401e172
 
 /*
  * Definitions of Secondary Processor-Based VM-Execution Controls.
  */
-#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
-#define SECONDARY_EXEC_ENABLE_EPT               0x00000002
-#define SECONDARY_EXEC_DESC			0x00000004
-#define SECONDARY_EXEC_RDTSCP			0x00000008
-#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE   0x00000010
-#define SECONDARY_EXEC_ENABLE_VPID              0x00000020
-#define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
-#define SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
-#define SECONDARY_EXEC_APIC_REGISTER_VIRT       0x00000100
-#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY    0x00000200
-#define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
-#define SECONDARY_EXEC_RDRAND_EXITING		0x00000800
-#define SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
-#define SECONDARY_EXEC_ENABLE_VMFUNC            0x00002000
-#define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
-#define SECONDARY_EXEC_ENCLS_EXITING		0x00008000
-#define SECONDARY_EXEC_RDSEED_EXITING		0x00010000
-#define SECONDARY_EXEC_ENABLE_PML               0x00020000
-#define SECONDARY_EXEC_PT_CONCEAL_VMX		0x00080000
-#define SECONDARY_EXEC_XSAVES			0x00100000
-#define SECONDARY_EXEC_PT_USE_GPA		0x01000000
-#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC	0x00400000
-#define SECONDARY_EXEC_TSC_SCALING              0x02000000
-
-#define PIN_BASED_EXT_INTR_MASK                 0x00000001
-#define PIN_BASED_NMI_EXITING                   0x00000008
-#define PIN_BASED_VIRTUAL_NMIS                  0x00000020
-#define PIN_BASED_VMX_PREEMPTION_TIMER          0x00000040
-#define PIN_BASED_POSTED_INTR                   0x00000080
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES VMCS_CONTROL_BIT(VIRT_APIC_ACCESSES)
+#define SECONDARY_EXEC_ENABLE_EPT               VMCS_CONTROL_BIT(EPT)
+#define SECONDARY_EXEC_DESC			VMCS_CONTROL_BIT(DESC_EXITING)
+#define SECONDARY_EXEC_ENABLE_RDTSCP		VMCS_CONTROL_BIT(RDTSCP)
+#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE   VMCS_CONTROL_BIT(VIRTUAL_X2APIC)
+#define SECONDARY_EXEC_ENABLE_VPID              VMCS_CONTROL_BIT(VPID)
+#define SECONDARY_EXEC_WBINVD_EXITING		VMCS_CONTROL_BIT(WBINVD_EXITING)
+#define SECONDARY_EXEC_UNRESTRICTED_GUEST	VMCS_CONTROL_BIT(UNRESTRICTED_GUEST)
+#define SECONDARY_EXEC_APIC_REGISTER_VIRT       VMCS_CONTROL_BIT(APIC_REGISTER_VIRT)
+#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY    VMCS_CONTROL_BIT(VIRT_INTR_DELIVERY)
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING	VMCS_CONTROL_BIT(PAUSE_LOOP_EXITING)
+#define SECONDARY_EXEC_RDRAND_EXITING		VMCS_CONTROL_BIT(RDRAND_EXITING)
+#define SECONDARY_EXEC_ENABLE_INVPCID		VMCS_CONTROL_BIT(INVPCID)
+#define SECONDARY_EXEC_ENABLE_VMFUNC            VMCS_CONTROL_BIT(VMFUNC)
+#define SECONDARY_EXEC_SHADOW_VMCS              VMCS_CONTROL_BIT(SHADOW_VMCS)
+#define SECONDARY_EXEC_ENCLS_EXITING		VMCS_CONTROL_BIT(ENCLS_EXITING)
+#define SECONDARY_EXEC_RDSEED_EXITING		VMCS_CONTROL_BIT(RDSEED_EXITING)
+#define SECONDARY_EXEC_ENABLE_PML               VMCS_CONTROL_BIT(PAGE_MOD_LOGGING)
+#define SECONDARY_EXEC_EPT_VIOLATION_VE		VMCS_CONTROL_BIT(EPT_VIOLATION_VE)
+#define SECONDARY_EXEC_PT_CONCEAL_VMX		VMCS_CONTROL_BIT(PT_CONCEAL_VMX)
+#define SECONDARY_EXEC_ENABLE_XSAVES		VMCS_CONTROL_BIT(XSAVES)
+#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC	VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC)
+#define SECONDARY_EXEC_PT_USE_GPA		VMCS_CONTROL_BIT(PT_USE_GPA)
+#define SECONDARY_EXEC_TSC_SCALING              VMCS_CONTROL_BIT(TSC_SCALING)
+#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE	VMCS_CONTROL_BIT(USR_WAIT_PAUSE)
+#define SECONDARY_EXEC_BUS_LOCK_DETECTION	VMCS_CONTROL_BIT(BUS_LOCK_DETECTION)
+#define SECONDARY_EXEC_NOTIFY_VM_EXITING	VMCS_CONTROL_BIT(NOTIFY_VM_EXITING)
+
+/*
+ * Definitions of Tertiary Processor-Based VM-Execution Controls.
+ */
+#define TERTIARY_EXEC_IPI_VIRT			VMCS_CONTROL_BIT(IPI_VIRT)
+
+#define PIN_BASED_EXT_INTR_MASK                 VMCS_CONTROL_BIT(INTR_EXITING)
+#define PIN_BASED_NMI_EXITING                   VMCS_CONTROL_BIT(NMI_EXITING)
+#define PIN_BASED_VIRTUAL_NMIS                  VMCS_CONTROL_BIT(VIRTUAL_NMIS)
+#define PIN_BASED_VMX_PREEMPTION_TIMER          VMCS_CONTROL_BIT(PREEMPTION_TIMER)
+#define PIN_BASED_POSTED_INTR                   VMCS_CONTROL_BIT(POSTED_INTR)
 
 #define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR	0x00000016
 
@@ -103,6 +106,7 @@
 #define VM_EXIT_CLEAR_BNDCFGS                   0x00800000
 #define VM_EXIT_PT_CONCEAL_PIP			0x01000000
 #define VM_EXIT_CLEAR_IA32_RTIT_CTL		0x02000000
+#define VM_EXIT_LOAD_CET_STATE                  0x10000000
 
 #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR	0x00036dff
 
@@ -116,18 +120,22 @@
 #define VM_ENTRY_LOAD_BNDCFGS                   0x00010000
 #define VM_ENTRY_PT_CONCEAL_PIP			0x00020000
 #define VM_ENTRY_LOAD_IA32_RTIT_CTL		0x00040000
+#define VM_ENTRY_LOAD_CET_STATE                 0x00100000
 
 #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR	0x000011ff
 
-#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK	0x0000001f
-#define VMX_MISC_SAVE_EFER_LMA			0x00000020
-#define VMX_MISC_ACTIVITY_HLT			0x00000040
-#define VMX_MISC_ZERO_LEN_INS			0x40000000
-
 /* VMFUNC functions */
-#define VMX_VMFUNC_EPTP_SWITCHING               0x00000001
+#define VMFUNC_CONTROL_BIT(x)	BIT((VMX_FEATURE_##x & 0x1f) - 28)
+
+#define VMX_VMFUNC_EPTP_SWITCHING               VMFUNC_CONTROL_BIT(EPTP_SWITCHING)
 #define VMFUNC_EPTP_ENTRIES  512
 
+#define VMX_BASIC_32BIT_PHYS_ADDR_ONLY		BIT_ULL(48)
+#define VMX_BASIC_DUAL_MONITOR_TREATMENT	BIT_ULL(49)
+#define VMX_BASIC_INOUT				BIT_ULL(54)
+#define VMX_BASIC_TRUE_CTLS			BIT_ULL(55)
+#define VMX_BASIC_NO_HW_ERROR_CODE_CC		BIT_ULL(56)
+
 static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
 {
 	return vmx_basic & GENMASK_ULL(30, 0);
@@ -138,9 +146,30 @@ static inline u32 vmx_basic_vmcs_size(u64 vmx_basic)
 	return (vmx_basic & GENMASK_ULL(44, 32)) >> 32;
 }
 
+static inline u32 vmx_basic_vmcs_mem_type(u64 vmx_basic)
+{
+	return (vmx_basic & GENMASK_ULL(53, 50)) >> 50;
+}
+
+static inline u64 vmx_basic_encode_vmcs_info(u32 revision, u16 size, u8 memtype)
+{
+	return revision | ((u64)size << 32) | ((u64)memtype << 50);
+}
+
+#define VMX_MISC_SAVE_EFER_LMA			BIT_ULL(5)
+#define VMX_MISC_ACTIVITY_HLT			BIT_ULL(6)
+#define VMX_MISC_ACTIVITY_SHUTDOWN		BIT_ULL(7)
+#define VMX_MISC_ACTIVITY_WAIT_SIPI		BIT_ULL(8)
+#define VMX_MISC_INTEL_PT			BIT_ULL(14)
+#define VMX_MISC_RDMSR_IN_SMM			BIT_ULL(15)
+#define VMX_MISC_VMXOFF_BLOCK_SMI		BIT_ULL(28)
+#define VMX_MISC_VMWRITE_SHADOW_RO_FIELDS	BIT_ULL(29)
+#define VMX_MISC_ZERO_LEN_INS			BIT_ULL(30)
+#define VMX_MISC_MSR_LIST_MULTIPLIER		512
+
 static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc)
 {
-	return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
+	return vmx_misc & GENMASK_ULL(4, 0);
 }
 
 static inline int vmx_misc_cr3_count(u64 vmx_misc)
@@ -162,6 +191,7 @@ static inline int vmx_misc_mseg_revid(u64 vmx_misc)
 enum vmcs_field {
 	VIRTUAL_PROCESSOR_ID            = 0x00000000,
 	POSTED_INTR_NV                  = 0x00000002,
+	LAST_PID_POINTER_INDEX		= 0x00000008,
 	GUEST_ES_SELECTOR               = 0x00000800,
 	GUEST_CS_SELECTOR               = 0x00000802,
 	GUEST_SS_SELECTOR               = 0x00000804,
@@ -219,12 +249,19 @@ enum vmcs_field {
 	VMREAD_BITMAP_HIGH              = 0x00002027,
 	VMWRITE_BITMAP                  = 0x00002028,
 	VMWRITE_BITMAP_HIGH             = 0x00002029,
+	VE_INFORMATION_ADDRESS		= 0x0000202A,
+	VE_INFORMATION_ADDRESS_HIGH	= 0x0000202B,
 	XSS_EXIT_BITMAP                 = 0x0000202C,
 	XSS_EXIT_BITMAP_HIGH            = 0x0000202D,
 	ENCLS_EXITING_BITMAP		= 0x0000202E,
 	ENCLS_EXITING_BITMAP_HIGH	= 0x0000202F,
 	TSC_MULTIPLIER                  = 0x00002032,
 	TSC_MULTIPLIER_HIGH             = 0x00002033,
+	TERTIARY_VM_EXEC_CONTROL	= 0x00002034,
+	TERTIARY_VM_EXEC_CONTROL_HIGH	= 0x00002035,
+	SHARED_EPT_POINTER		= 0x0000203C,
+	PID_POINTER_TABLE		= 0x00002042,
+	PID_POINTER_TABLE_HIGH		= 0x00002043,
 	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
 	GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
 	VMCS_LINK_POINTER               = 0x00002800,
@@ -273,6 +310,7 @@ enum vmcs_field {
 	SECONDARY_VM_EXEC_CONTROL       = 0x0000401e,
 	PLE_GAP                         = 0x00004020,
 	PLE_WINDOW                      = 0x00004022,
+	NOTIFY_WINDOW                   = 0x00004024,
 	VM_INSTRUCTION_ERROR            = 0x00004400,
 	VM_EXIT_REASON                  = 0x00004402,
 	VM_EXIT_INTR_INFO               = 0x00004404,
@@ -300,7 +338,7 @@ enum vmcs_field {
 	GUEST_LDTR_AR_BYTES             = 0x00004820,
 	GUEST_TR_AR_BYTES               = 0x00004822,
 	GUEST_INTERRUPTIBILITY_INFO     = 0x00004824,
-	GUEST_ACTIVITY_STATE            = 0X00004826,
+	GUEST_ACTIVITY_STATE            = 0x00004826,
 	GUEST_SYSENTER_CS               = 0x0000482A,
 	VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
 	HOST_IA32_SYSENTER_CS           = 0x00004c00,
@@ -334,6 +372,9 @@ enum vmcs_field {
 	GUEST_PENDING_DBG_EXCEPTIONS    = 0x00006822,
 	GUEST_SYSENTER_ESP              = 0x00006824,
 	GUEST_SYSENTER_EIP              = 0x00006826,
+	GUEST_S_CET                     = 0x00006828,
+	GUEST_SSP                       = 0x0000682a,
+	GUEST_INTR_SSP_TABLE            = 0x0000682c,
 	HOST_CR0                        = 0x00006c00,
 	HOST_CR3                        = 0x00006c02,
 	HOST_CR4                        = 0x00006c04,
@@ -346,6 +387,9 @@ enum vmcs_field {
 	HOST_IA32_SYSENTER_EIP          = 0x00006c12,
 	HOST_RSP                        = 0x00006c14,
 	HOST_RIP                        = 0x00006c16,
+	HOST_S_CET                      = 0x00006c18,
+	HOST_SSP                        = 0x00006c1a,
+	HOST_INTR_SSP_TABLE             = 0x00006c1c
 };
 
 /*
@@ -363,20 +407,21 @@ enum vmcs_field {
 #define VECTORING_INFO_DELIVER_CODE_MASK    	INTR_INFO_DELIVER_CODE_MASK
 #define VECTORING_INFO_VALID_MASK       	INTR_INFO_VALID_MASK
 
-#define INTR_TYPE_EXT_INTR              (0 << 8) /* external interrupt */
-#define INTR_TYPE_RESERVED              (1 << 8) /* reserved */
-#define INTR_TYPE_NMI_INTR		(2 << 8) /* NMI */
-#define INTR_TYPE_HARD_EXCEPTION	(3 << 8) /* processor exception */
-#define INTR_TYPE_SOFT_INTR             (4 << 8) /* software interrupt */
-#define INTR_TYPE_PRIV_SW_EXCEPTION	(5 << 8) /* ICE breakpoint - undocumented */
-#define INTR_TYPE_SOFT_EXCEPTION	(6 << 8) /* software exception */
-#define INTR_TYPE_OTHER_EVENT           (7 << 8) /* other event */
+#define INTR_TYPE_EXT_INTR		(EVENT_TYPE_EXTINT << 8)	/* external interrupt */
+#define INTR_TYPE_RESERVED		(EVENT_TYPE_RESERVED << 8)	/* reserved */
+#define INTR_TYPE_NMI_INTR		(EVENT_TYPE_NMI << 8)		/* NMI */
+#define INTR_TYPE_HARD_EXCEPTION	(EVENT_TYPE_HWEXC << 8)		/* processor exception */
+#define INTR_TYPE_SOFT_INTR		(EVENT_TYPE_SWINT << 8)		/* software interrupt */
+#define INTR_TYPE_PRIV_SW_EXCEPTION	(EVENT_TYPE_PRIV_SWEXC << 8)	/* ICE breakpoint */
+#define INTR_TYPE_SOFT_EXCEPTION	(EVENT_TYPE_SWEXC << 8)		/* software exception */
+#define INTR_TYPE_OTHER_EVENT		(EVENT_TYPE_OTHER << 8)		/* other event */
 
 /* GUEST_INTERRUPTIBILITY_INFO flags. */
 #define GUEST_INTR_STATE_STI		0x00000001
 #define GUEST_INTR_STATE_MOV_SS		0x00000002
 #define GUEST_INTR_STATE_SMI		0x00000004
 #define GUEST_INTR_STATE_NMI		0x00000008
+#define GUEST_INTR_STATE_ENCLAVE_INTR	0x00000010
 
 /* GUEST_ACTIVITY_STATE flags */
 #define GUEST_ACTIVITY_ACTIVE		0
@@ -492,20 +537,34 @@ enum vmcs_field {
 #define VMX_EPTP_PWL_4				0x18ull
 #define VMX_EPTP_PWL_5				0x20ull
 #define VMX_EPTP_AD_ENABLE_BIT			(1ull << 6)
+/* The EPTP memtype is encoded in bits 2:0, i.e. doesn't need to be shifted. */
 #define VMX_EPTP_MT_MASK			0x7ull
-#define VMX_EPTP_MT_WB				0x6ull
-#define VMX_EPTP_MT_UC				0x0ull
+#define VMX_EPTP_MT_WB				X86_MEMTYPE_WB
+#define VMX_EPTP_MT_UC				X86_MEMTYPE_UC
 #define VMX_EPT_READABLE_MASK			0x1ull
 #define VMX_EPT_WRITABLE_MASK			0x2ull
 #define VMX_EPT_EXECUTABLE_MASK			0x4ull
 #define VMX_EPT_IPAT_BIT    			(1ull << 6)
 #define VMX_EPT_ACCESS_BIT			(1ull << 8)
 #define VMX_EPT_DIRTY_BIT			(1ull << 9)
+#define VMX_EPT_SUPPRESS_VE_BIT			(1ull << 63)
 #define VMX_EPT_RWX_MASK                        (VMX_EPT_READABLE_MASK |       \
 						 VMX_EPT_WRITABLE_MASK |       \
 						 VMX_EPT_EXECUTABLE_MASK)
 #define VMX_EPT_MT_MASK				(7ull << VMX_EPT_MT_EPTE_SHIFT)
 
+static inline u8 vmx_eptp_page_walk_level(u64 eptp)
+{
+	u64 encoded_level = eptp & VMX_EPTP_PWL_MASK;
+
+	if (encoded_level == VMX_EPTP_PWL_5)
+		return 5;
+
+	/* @eptp must be pre-validated by the caller. */
+	WARN_ON_ONCE(encoded_level != VMX_EPTP_PWL_4);
+	return 4;
+}
+
 /* The mask to use to trigger an EPT Misconfiguration in order to track MMIO */
 #define VMX_EPT_MISCONFIG_WX_VALUE		(VMX_EPT_WRITABLE_MASK |       \
 						 VMX_EPT_EXECUTABLE_MASK)
@@ -521,28 +580,38 @@ struct vmx_msr_entry {
 /*
  * Exit Qualifications for entry failure during or after loading guest state
  */
-#define ENTRY_FAIL_DEFAULT		0
-#define ENTRY_FAIL_PDPTE		2
-#define ENTRY_FAIL_NMI			3
-#define ENTRY_FAIL_VMCS_LINK_PTR	4
+enum vm_entry_failure_code {
+	ENTRY_FAIL_DEFAULT		= 0,
+	ENTRY_FAIL_PDPTE		= 2,
+	ENTRY_FAIL_NMI			= 3,
+	ENTRY_FAIL_VMCS_LINK_PTR	= 4,
+};
 
 /*
  * Exit Qualifications for EPT Violations
  */
-#define EPT_VIOLATION_ACC_READ_BIT	0
-#define EPT_VIOLATION_ACC_WRITE_BIT	1
-#define EPT_VIOLATION_ACC_INSTR_BIT	2
-#define EPT_VIOLATION_READABLE_BIT	3
-#define EPT_VIOLATION_WRITABLE_BIT	4
-#define EPT_VIOLATION_EXECUTABLE_BIT	5
-#define EPT_VIOLATION_GVA_TRANSLATED_BIT 8
-#define EPT_VIOLATION_ACC_READ		(1 << EPT_VIOLATION_ACC_READ_BIT)
-#define EPT_VIOLATION_ACC_WRITE		(1 << EPT_VIOLATION_ACC_WRITE_BIT)
-#define EPT_VIOLATION_ACC_INSTR		(1 << EPT_VIOLATION_ACC_INSTR_BIT)
-#define EPT_VIOLATION_READABLE		(1 << EPT_VIOLATION_READABLE_BIT)
-#define EPT_VIOLATION_WRITABLE		(1 << EPT_VIOLATION_WRITABLE_BIT)
-#define EPT_VIOLATION_EXECUTABLE	(1 << EPT_VIOLATION_EXECUTABLE_BIT)
-#define EPT_VIOLATION_GVA_TRANSLATED	(1 << EPT_VIOLATION_GVA_TRANSLATED_BIT)
+#define EPT_VIOLATION_ACC_READ		BIT(0)
+#define EPT_VIOLATION_ACC_WRITE		BIT(1)
+#define EPT_VIOLATION_ACC_INSTR		BIT(2)
+#define EPT_VIOLATION_PROT_READ		BIT(3)
+#define EPT_VIOLATION_PROT_WRITE	BIT(4)
+#define EPT_VIOLATION_PROT_EXEC		BIT(5)
+#define EPT_VIOLATION_EXEC_FOR_RING3_LIN BIT(6)
+#define EPT_VIOLATION_PROT_MASK		(EPT_VIOLATION_PROT_READ  | \
+					 EPT_VIOLATION_PROT_WRITE | \
+					 EPT_VIOLATION_PROT_EXEC)
+#define EPT_VIOLATION_GVA_IS_VALID	BIT(7)
+#define EPT_VIOLATION_GVA_TRANSLATED	BIT(8)
+
+#define EPT_VIOLATION_RWX_TO_PROT(__epte) (((__epte) & VMX_EPT_RWX_MASK) << 3)
+
+static_assert(EPT_VIOLATION_RWX_TO_PROT(VMX_EPT_RWX_MASK) ==
+	      (EPT_VIOLATION_PROT_READ | EPT_VIOLATION_PROT_WRITE | EPT_VIOLATION_PROT_EXEC));
+
+/*
+ * Exit Qualifications for NOTIFY VM EXIT
+ */
+#define NOTIFY_VM_CONTEXT_INVALID     BIT(0)
 
 /*
  * VM-instruction error numbers
@@ -575,6 +644,20 @@ enum vm_instruction_error_number {
 	VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID = 28,
 };
 
+/*
+ * VM-instruction errors that can be encountered on VM-Enter, used to trace
+ * nested VM-Enter failures reported by hardware.  Errors unique to VM-Enter
+ * from a SMI Transfer Monitor are not included as things have gone seriously
+ * sideways if we get one of those...
+ */
+#define VMX_VMENTER_INSTRUCTION_ERRORS \
+	{ VMXERR_VMLAUNCH_NONCLEAR_VMCS,		"VMLAUNCH_NONCLEAR_VMCS" }, \
+	{ VMXERR_VMRESUME_NONLAUNCHED_VMCS,		"VMRESUME_NONLAUNCHED_VMCS" }, \
+	{ VMXERR_VMRESUME_AFTER_VMXOFF,			"VMRESUME_AFTER_VMXOFF" }, \
+	{ VMXERR_ENTRY_INVALID_CONTROL_FIELD,		"VMENTRY_INVALID_CONTROL_FIELD" }, \
+	{ VMXERR_ENTRY_INVALID_HOST_STATE_FIELD,	"VMENTRY_INVALID_HOST_STATE_FIELD" }, \
+	{ VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS,	"VMENTRY_EVENTS_BLOCKED_BY_MOV_SS" }
+
 enum vmx_l1d_flush_state {
 	VMENTER_L1D_FLUSH_AUTO,
 	VMENTER_L1D_FLUSH_NEVER,
@@ -586,4 +669,13 @@ enum vmx_l1d_flush_state {
 
 extern enum vmx_l1d_flush_state l1tf_vmx_mitigation;
 
+struct vmx_ve_information {
+	u32 exit_reason;
+	u32 delivery;
+	u64 exit_qualification;
+	u64 guest_linear_address;
+	u64 guest_physical_address;
+	u16 eptp_index;
+};
+
 #endif
diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h
new file mode 100644
index 000000000000..09b1d7e607c1
--- /dev/null
+++ b/arch/x86/include/asm/vmxfeatures.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_VMXFEATURES_H
+#define _ASM_X86_VMXFEATURES_H
+
+/*
+ * Defines VMX CPU feature bits
+ */
+#define NVMXINTS			5 /* N 32-bit words worth of info */
+
+/*
+ * Note: If the comment begins with a quoted string, that string is used
+ * in /proc/cpuinfo instead of the macro name.  Otherwise, this feature bit
+ * is not displayed in /proc/cpuinfo at all.
+ */
+
+/* Pin-Based VM-Execution Controls, EPT/VPID, APIC and VM-Functions, word 0 */
+#define VMX_FEATURE_INTR_EXITING	( 0*32+  0) /* VM-Exit on vectored interrupts */
+#define VMX_FEATURE_NMI_EXITING		( 0*32+  3) /* VM-Exit on NMIs */
+#define VMX_FEATURE_VIRTUAL_NMIS	( 0*32+  5) /* "vnmi" NMI virtualization */
+#define VMX_FEATURE_PREEMPTION_TIMER	( 0*32+  6) /* "preemption_timer" VMX Preemption Timer */
+#define VMX_FEATURE_POSTED_INTR		( 0*32+  7) /* "posted_intr" Posted Interrupts */
+
+/* EPT/VPID features, scattered to bits 16-23 */
+#define VMX_FEATURE_INVVPID		( 0*32+ 16) /* "invvpid" INVVPID is supported */
+#define VMX_FEATURE_EPT_EXECUTE_ONLY	( 0*32+ 17) /* "ept_x_only" EPT entries can be execute only */
+#define VMX_FEATURE_EPT_AD		( 0*32+ 18) /* "ept_ad" EPT Accessed/Dirty bits */
+#define VMX_FEATURE_EPT_1GB		( 0*32+ 19) /* "ept_1gb" 1GB EPT pages */
+#define VMX_FEATURE_EPT_5LEVEL		( 0*32+ 20) /* "ept_5level" 5-level EPT paging */
+
+/* Aggregated APIC features 24-27 */
+#define VMX_FEATURE_FLEXPRIORITY	( 0*32+ 24) /* "flexpriority" TPR shadow + virt APIC */
+#define VMX_FEATURE_APICV	        ( 0*32+ 25) /* "apicv" TPR shadow + APIC reg virt + virt intr delivery + posted interrupts */
+
+/* VM-Functions, shifted to bits 28-31 */
+#define VMX_FEATURE_EPTP_SWITCHING	( 0*32+ 28) /* "eptp_switching" EPTP switching (in guest) */
+
+/* Primary Processor-Based VM-Execution Controls, word 1 */
+#define VMX_FEATURE_INTR_WINDOW_EXITING ( 1*32+  2) /* VM-Exit if INTRs are unblocked in guest */
+#define VMX_FEATURE_USE_TSC_OFFSETTING	( 1*32+  3) /* "tsc_offset" Offset hardware TSC when read in guest */
+#define VMX_FEATURE_HLT_EXITING		( 1*32+  7) /* VM-Exit on HLT */
+#define VMX_FEATURE_INVLPG_EXITING	( 1*32+  9) /* VM-Exit on INVLPG */
+#define VMX_FEATURE_MWAIT_EXITING	( 1*32+ 10) /* VM-Exit on MWAIT */
+#define VMX_FEATURE_RDPMC_EXITING	( 1*32+ 11) /* VM-Exit on RDPMC */
+#define VMX_FEATURE_RDTSC_EXITING	( 1*32+ 12) /* VM-Exit on RDTSC */
+#define VMX_FEATURE_CR3_LOAD_EXITING	( 1*32+ 15) /* VM-Exit on writes to CR3 */
+#define VMX_FEATURE_CR3_STORE_EXITING	( 1*32+ 16) /* VM-Exit on reads from CR3 */
+#define VMX_FEATURE_TERTIARY_CONTROLS	( 1*32+ 17) /* Enable Tertiary VM-Execution Controls */
+#define VMX_FEATURE_CR8_LOAD_EXITING	( 1*32+ 19) /* VM-Exit on writes to CR8 */
+#define VMX_FEATURE_CR8_STORE_EXITING	( 1*32+ 20) /* VM-Exit on reads from CR8 */
+#define VMX_FEATURE_VIRTUAL_TPR		( 1*32+ 21) /* "vtpr" TPR virtualization, a.k.a. TPR shadow */
+#define VMX_FEATURE_NMI_WINDOW_EXITING	( 1*32+ 22) /* VM-Exit if NMIs are unblocked in guest */
+#define VMX_FEATURE_MOV_DR_EXITING	( 1*32+ 23) /* VM-Exit on accesses to debug registers */
+#define VMX_FEATURE_UNCOND_IO_EXITING	( 1*32+ 24) /* VM-Exit on *all* IN{S} and OUT{S}*/
+#define VMX_FEATURE_USE_IO_BITMAPS	( 1*32+ 25) /* VM-Exit based on I/O port */
+#define VMX_FEATURE_MONITOR_TRAP_FLAG	( 1*32+ 27) /* "mtf" VMX single-step VM-Exits */
+#define VMX_FEATURE_USE_MSR_BITMAPS	( 1*32+ 28) /* VM-Exit based on MSR index */
+#define VMX_FEATURE_MONITOR_EXITING	( 1*32+ 29) /* VM-Exit on MONITOR (MWAIT's accomplice) */
+#define VMX_FEATURE_PAUSE_EXITING	( 1*32+ 30) /* VM-Exit on PAUSE (unconditionally) */
+#define VMX_FEATURE_SEC_CONTROLS	( 1*32+ 31) /* Enable Secondary VM-Execution Controls */
+
+/* Secondary Processor-Based VM-Execution Controls, word 2 */
+#define VMX_FEATURE_VIRT_APIC_ACCESSES	( 2*32+  0) /* "vapic" Virtualize memory mapped APIC accesses */
+#define VMX_FEATURE_EPT			( 2*32+  1) /* "ept" Extended Page Tables, a.k.a. Two-Dimensional Paging */
+#define VMX_FEATURE_DESC_EXITING	( 2*32+  2) /* VM-Exit on {S,L}*DT instructions */
+#define VMX_FEATURE_RDTSCP		( 2*32+  3) /* Enable RDTSCP in guest */
+#define VMX_FEATURE_VIRTUAL_X2APIC	( 2*32+  4) /* Virtualize X2APIC for the guest */
+#define VMX_FEATURE_VPID		( 2*32+  5) /* "vpid" Virtual Processor ID (TLB ASID modifier) */
+#define VMX_FEATURE_WBINVD_EXITING	( 2*32+  6) /* VM-Exit on WBINVD */
+#define VMX_FEATURE_UNRESTRICTED_GUEST	( 2*32+  7) /* "unrestricted_guest" Allow Big Real Mode and other "invalid" states */
+#define VMX_FEATURE_APIC_REGISTER_VIRT	( 2*32+  8) /* "vapic_reg" Hardware emulation of reads to the virtual-APIC */
+#define VMX_FEATURE_VIRT_INTR_DELIVERY	( 2*32+  9) /* "vid" Evaluation and delivery of pending virtual interrupts */
+#define VMX_FEATURE_PAUSE_LOOP_EXITING	( 2*32+ 10) /* "ple" Conditionally VM-Exit on PAUSE at CPL0 */
+#define VMX_FEATURE_RDRAND_EXITING	( 2*32+ 11) /* VM-Exit on RDRAND*/
+#define VMX_FEATURE_INVPCID		( 2*32+ 12) /* Enable INVPCID in guest */
+#define VMX_FEATURE_VMFUNC		( 2*32+ 13) /* Enable VM-Functions (leaf dependent) */
+#define VMX_FEATURE_SHADOW_VMCS		( 2*32+ 14) /* "shadow_vmcs" VMREAD/VMWRITE in guest can access shadow VMCS */
+#define VMX_FEATURE_ENCLS_EXITING	( 2*32+ 15) /* VM-Exit on ENCLS (leaf dependent) */
+#define VMX_FEATURE_RDSEED_EXITING	( 2*32+ 16) /* VM-Exit on RDSEED */
+#define VMX_FEATURE_PAGE_MOD_LOGGING	( 2*32+ 17) /* "pml" Log dirty pages into buffer */
+#define VMX_FEATURE_EPT_VIOLATION_VE	( 2*32+ 18) /* "ept_violation_ve" Conditionally reflect EPT violations as #VE exceptions */
+#define VMX_FEATURE_PT_CONCEAL_VMX	( 2*32+ 19) /* Suppress VMX indicators in Processor Trace */
+#define VMX_FEATURE_XSAVES		( 2*32+ 20) /* Enable XSAVES and XRSTORS in guest */
+#define VMX_FEATURE_MODE_BASED_EPT_EXEC	( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */
+#define VMX_FEATURE_PT_USE_GPA		( 2*32+ 24) /* Processor Trace logs GPAs */
+#define VMX_FEATURE_TSC_SCALING		( 2*32+ 25) /* "tsc_scaling" Scale hardware TSC when read in guest */
+#define VMX_FEATURE_USR_WAIT_PAUSE	( 2*32+ 26) /* "usr_wait_pause" Enable TPAUSE, UMONITOR, UMWAIT in guest */
+#define VMX_FEATURE_ENCLV_EXITING	( 2*32+ 28) /* VM-Exit on ENCLV (leaf dependent) */
+#define VMX_FEATURE_BUS_LOCK_DETECTION	( 2*32+ 30) /* VM-Exit when bus lock caused */
+#define VMX_FEATURE_NOTIFY_VM_EXITING	( 2*32+ 31) /* "notify_vm_exiting" VM-Exit when no event windows after notify window */
+
+/* Tertiary Processor-Based VM-Execution Controls, word 3 */
+#define VMX_FEATURE_IPI_VIRT		( 3*32+  4) /* "ipi_virt" Enable IPI virtualization */
+#endif /* _ASM_X86_VMXFEATURES_H */
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index b986b2ca688a..472f0263dbc6 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -4,6 +4,7 @@
 
 #include <linux/seqlock.h>
 #include <uapi/asm/vsyscall.h>
+#include <asm/page_types.h>
 
 #ifdef CONFIG_X86_VSYSCALL_EMULATION
 extern void map_vsyscall(void);
@@ -13,13 +14,24 @@ extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
  * Called on instruction fetch fault in vsyscall page.
  * Returns true if handled.
  */
-extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
+extern bool emulate_vsyscall(unsigned long error_code,
+			     struct pt_regs *regs, unsigned long address);
 #else
 static inline void map_vsyscall(void) {}
-static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
+static inline bool emulate_vsyscall(unsigned long error_code,
+				    struct pt_regs *regs, unsigned long address)
 {
 	return false;
 }
 #endif
 
+/*
+ * The (legacy) vsyscall page is the long page in the kernel portion
+ * of the address space that has user-accessible permissions.
+ */
+static inline bool is_vsyscall_vaddr(unsigned long vaddr)
+{
+	return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR);
+}
+
 #endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
deleted file mode 100644
index 3f32dfc2ab73..000000000000
--- a/arch/x86/include/asm/vvar.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * vvar.h: Shared vDSO/kernel variable declarations
- * Copyright (c) 2011 Andy Lutomirski
- * Subject to the GNU General Public License, version 2
- *
- * A handful of variables are accessible (read-only) from userspace
- * code in the vsyscall page and the vdso.  They are declared here.
- * Some other file must define them with DEFINE_VVAR.
- *
- * In normal kernel code, they are used like any other variable.
- * In user code, they are accessed through the VVAR macro.
- *
- * These variables live in a page of kernel data that has an extra RO
- * mapping for userspace.  Each variable needs a unique offset within
- * that page; specify that offset with the DECLARE_VVAR macro.  (If
- * you mess up, the linker will catch it.)
- */
-
-#ifndef _ASM_X86_VVAR_H
-#define _ASM_X86_VVAR_H
-
-#if defined(__VVAR_KERNEL_LDS)
-
-/* The kernel linker script defines its own magic to put vvars in the
- * right place.
- */
-#define DECLARE_VVAR(offset, type, name) \
-	EMIT_VVAR(name, offset)
-
-#else
-
-extern char __vvar_page;
-
-#define DECLARE_VVAR(offset, type, name)				\
-	extern type vvar_ ## name __attribute__((visibility("hidden")));
-
-#define VVAR(name) (vvar_ ## name)
-
-#define DEFINE_VVAR(type, name)						\
-	type name							\
-	__attribute__((section(".vvar_" #name), aligned(16))) __visible
-
-#endif
-
-/* DECLARE_VVAR(offset, type, name) */
-
-DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
-
-#undef DECLARE_VVAR
-
-#endif
diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h
index 06006b0351f3..422a47746657 100644
--- a/arch/x86/include/asm/word-at-a-time.h
+++ b/arch/x86/include/asm/word-at-a-time.h
@@ -2,47 +2,15 @@
 #ifndef _ASM_WORD_AT_A_TIME_H
 #define _ASM_WORD_AT_A_TIME_H
 
-#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/wordpart.h>
 
-/*
- * This is largely generic for little-endian machines, but the
- * optimal byte mask counting is probably going to be something
- * that is architecture-specific. If you have a reliably fast
- * bit count instruction, that might be better than the multiply
- * and shift, for example.
- */
 struct word_at_a_time {
 	const unsigned long one_bits, high_bits;
 };
 
 #define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
 
-#ifdef CONFIG_64BIT
-
-/*
- * Jan Achrenius on G+: microoptimized version of
- * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
- * that works for the bytemasks without having to
- * mask them first.
- */
-static inline long count_masked_bytes(unsigned long mask)
-{
-	return mask*0x0001020304050608ul >> 56;
-}
-
-#else	/* 32-bit case */
-
-/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
-static inline long count_masked_bytes(long mask)
-{
-	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
-	long a = (0x0ff0001+mask) >> 23;
-	/* Fix the 1 for 00 case */
-	return a & mask;
-}
-
-#endif
-
 /* Return nonzero if it has a zero */
 static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
 {
@@ -56,6 +24,22 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits,
 	return bits;
 }
 
+#ifdef CONFIG_64BIT
+
+/* Keep the initial has_zero() value for both bitmask and size calc */
+#define create_zero_mask(bits) (bits)
+
+static inline unsigned long zero_bytemask(unsigned long bits)
+{
+	bits = (bits - 1) & ~bits;
+	return bits >> 7;
+}
+
+#define find_zero(bits) (__ffs(bits) >> 3)
+
+#else
+
+/* Create the final mask for both bytemask and size */
 static inline unsigned long create_zero_mask(unsigned long bits)
 {
 	bits = (bits - 1) & ~bits;
@@ -65,11 +49,17 @@ static inline unsigned long create_zero_mask(unsigned long bits)
 /* The mask we created is directly usable as a bytemask */
 #define zero_bytemask(mask) (mask)
 
+/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
 static inline unsigned long find_zero(unsigned long mask)
 {
-	return count_masked_bytes(mask);
+	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+	long a = (0x0ff0001+mask) >> 23;
+	/* Fix the 1 for 00 case */
+	return a & mask;
 }
 
+#endif
+
 /*
  * Load an unaligned word from kernel space.
  *
@@ -79,27 +69,15 @@ static inline unsigned long find_zero(unsigned long mask)
  */
 static inline unsigned long load_unaligned_zeropad(const void *addr)
 {
-	unsigned long ret, dummy;
+	unsigned long ret;
 
-	asm(
-		"1:\tmov %2,%0\n"
+	asm volatile(
+		"1:	mov %[mem], %[ret]\n"
 		"2:\n"
-		".section .fixup,\"ax\"\n"
-		"3:\t"
-		"lea %2,%1\n\t"
-		"and %3,%1\n\t"
-		"mov (%1),%0\n\t"
-		"leal %2,%%ecx\n\t"
-		"andl %4,%%ecx\n\t"
-		"shll $3,%%ecx\n\t"
-		"shr %%cl,%0\n\t"
-		"jmp 2b\n"
-		".previous\n"
-		_ASM_EXTABLE(1b, 3b)
-		:"=&r" (ret),"=&c" (dummy)
-		:"m" (*(unsigned long *)addr),
-		 "i" (-sizeof(unsigned long)),
-		 "i" (sizeof(unsigned long)-1));
+		_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_ZEROPAD)
+		: [ret] "=r" (ret)
+		: [mem] "m" (*(unsigned long *)addr));
+
 	return ret;
 }
 
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index b85a7c54c6a1..6c8a6ead84f6 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -2,33 +2,26 @@
 #ifndef _ASM_X86_PLATFORM_H
 #define _ASM_X86_PLATFORM_H
 
-#include <asm/bootparam.h>
-
+struct ghcb;
 struct mpc_bus;
 struct mpc_cpu;
+struct pt_regs;
 struct mpc_table;
 struct cpuinfo_x86;
+struct irq_domain;
 
 /**
  * struct x86_init_mpparse - platform specific mpparse ops
- * @mpc_record:			platform specific mpc record accounting
  * @setup_ioapic_ids:		platform specific ioapic id override
- * @mpc_apic_id:		platform specific mpc apic id assignment
- * @smp_read_mpc_oem:		platform specific oem mpc table setup
- * @mpc_oem_pci_bus:		platform specific pci bus setup (default NULL)
- * @mpc_oem_bus_info:		platform specific mpc bus info
- * @find_smp_config:		find the smp configuration
- * @get_smp_config:		get the smp configuration
+ * @find_mptable:		Find MPTABLE early to reserve the memory region
+ * @early_parse_smp_cfg:	Parse the SMP configuration data early before initmem_init()
+ * @parse_smp_cfg:		Parse the SMP configuration data
  */
 struct x86_init_mpparse {
-	void (*mpc_record)(unsigned int mode);
 	void (*setup_ioapic_ids)(void);
-	int (*mpc_apic_id)(struct mpc_cpu *m);
-	void (*smp_read_mpc_oem)(struct mpc_table *mpc);
-	void (*mpc_oem_pci_bus)(struct mpc_bus *m);
-	void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name);
-	void (*find_smp_config)(void);
-	void (*get_smp_config)(unsigned int early);
+	void (*find_mptable)(void);
+	void (*early_parse_smp_cfg)(void);
+	void (*parse_smp_cfg)(void);
 };
 
 /**
@@ -37,12 +30,13 @@ struct x86_init_mpparse {
  * @reserve_resources:		reserve the standard resources for the
  *				platform
  * @memory_setup:		platform specific memory setup
- *
+ * @dmi_setup:			platform specific DMI setup
  */
 struct x86_init_resources {
 	void (*probe_roms)(void);
 	void (*reserve_resources)(void);
 	char *(*memory_setup)(void);
+	void (*dmi_setup)(void);
 };
 
 /**
@@ -50,14 +44,16 @@ struct x86_init_resources {
  * @pre_vector_init:		init code to run before interrupt vectors
  *				are set up.
  * @intr_init:			interrupt init code
- * @trap_init:			platform specific trap setup
+ * @intr_mode_select:		interrupt delivery mode selection
  * @intr_mode_init:		interrupt delivery mode setup
+ * @create_pci_msi_domain:	Create the PCI/MSI interrupt domain
  */
 struct x86_init_irqs {
 	void (*pre_vector_init)(void);
 	void (*intr_init)(void);
-	void (*trap_init)(void);
+	void (*intr_mode_select)(void);
 	void (*intr_mode_init)(void);
+	struct irq_domain *(*create_pci_msi_domain)(void);
 };
 
 /**
@@ -83,7 +79,7 @@ struct x86_init_paging {
 
 /**
  * struct x86_init_timers - platform specific timer setup
- * @setup_perpcu_clockev:	set up the per cpu clock event device for the
+ * @setup_percpu_clockev:	set up the per cpu clock event device for the
  *				boot cpu
  * @timer_init:			initialize the platform timer (default PIT/HPET)
  * @wallclock_init:		init the wallclock device
@@ -121,6 +117,7 @@ struct x86_init_pci {
  * @init_platform:		platform setup
  * @guest_late_init:		guest late init
  * @x2apic_available:		X2APIC detection
+ * @msi_ext_dest_id:		MSI supports 15-bit APIC IDs
  * @init_mem_mapping:		setup early mappings during init_mem_mapping()
  * @init_after_bootmem:		guest init after boot allocator is finished
  */
@@ -128,21 +125,49 @@ struct x86_hyper_init {
 	void (*init_platform)(void);
 	void (*guest_late_init)(void);
 	bool (*x2apic_available)(void);
+	bool (*msi_ext_dest_id)(void);
 	void (*init_mem_mapping)(void);
 	void (*init_after_bootmem)(void);
 };
 
 /**
  * struct x86_init_acpi - x86 ACPI init functions
+ * @set_root_pointer:		set RSDP address
  * @get_root_pointer:		get RSDP address
  * @reduced_hw_early_init:	hardware reduced platform early init
  */
 struct x86_init_acpi {
+	void (*set_root_pointer)(u64 addr);
 	u64 (*get_root_pointer)(void);
 	void (*reduced_hw_early_init)(void);
 };
 
 /**
+ * struct x86_guest - Functions used by misc guest incarnations like SEV, TDX, etc.
+ *
+ * @enc_status_change_prepare:	Notify HV before the encryption status of a range is changed
+ * @enc_status_change_finish:	Notify HV after the encryption status of a range is changed
+ * @enc_tlb_flush_required:	Returns true if a TLB flush is needed before changing page encryption status
+ * @enc_cache_flush_required:	Returns true if a cache flush is needed before changing page encryption status
+ * @enc_kexec_begin:		Begin the two-step process of converting shared memory back
+ *				to private. It stops the new conversions from being started
+ *				and waits in-flight conversions to finish, if possible.
+ * @enc_kexec_finish:		Finish the two-step process of converting shared memory to
+ *				private. All memory is private after the call when
+ *				the function returns.
+ *				It is called on only one CPU while the others are shut down
+ *				and with interrupts disabled.
+ */
+struct x86_guest {
+	int (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
+	int (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
+	bool (*enc_tlb_flush_required)(bool enc);
+	bool (*enc_cache_flush_required)(void);
+	void (*enc_kexec_begin)(void);
+	void (*enc_kexec_finish)(void);
+};
+
+/**
  * struct x86_init_ops - functions for platform specific setup
  *
  */
@@ -163,11 +188,14 @@ struct x86_init_ops {
  * struct x86_cpuinit_ops - platform specific cpu hotplug setups
  * @setup_percpu_clockev:	set up the per cpu clock event device
  * @early_percpu_clock_init:	early init of the per cpu clock event device
+ * @fixup_cpu_id:		fixup function for cpuinfo_x86::topo.pkg_id
+ * @parallel_bringup:		Parallel bringup control
  */
 struct x86_cpuinit_ops {
 	void (*setup_percpu_clockev)(void);
 	void (*early_percpu_clock_init)(void);
 	void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
+	bool parallel_bringup;
 };
 
 struct timespec64;
@@ -201,7 +229,7 @@ struct x86_legacy_devices {
  *	given platform/subarch.
  * @X86_LEGACY_I8042_FIRMWARE_ABSENT: firmware reports that the controller
  *	is absent.
- * @X86_LEGACY_i8042_EXPECTED_PRESENT: the controller is likely to be
+ * @X86_LEGACY_I8042_EXPECTED_PRESENT: the controller is likely to be
  *	present, the i8042 driver should probe for controller existence.
  */
 enum x86_legacy_i8042_state {
@@ -216,6 +244,8 @@ enum x86_legacy_i8042_state {
  * @i8042: indicated if we expect the device to have i8042 controller
  *	present.
  * @rtc: this device has a CMOS real-time clock present
+ * @warm_reset: 1 if platform allows warm reset, else 0
+ * @no_vga: 1 if (FADT.boot_flags & ACPI_FADT_NO_VGA) is set, else 0
  * @reserve_bios_regions: boot code will search for the EBDA address and the
  * 	start of the 640k - 1M BIOS region.  If false, the platform must
  * 	ensure that its memory map correctly reserves sub-1MB regions as needed.
@@ -234,10 +264,26 @@ struct x86_legacy_features {
 /**
  * struct x86_hyper_runtime - x86 hypervisor specific runtime callbacks
  *
- * @pin_vcpu:		pin current vcpu to specified physical cpu (run rarely)
+ * @pin_vcpu:			pin current vcpu to specified physical
+ *				cpu (run rarely)
+ * @sev_es_hcall_prepare:	Load additional hypervisor-specific
+ *				state into the GHCB when doing a VMMCALL under
+ *				SEV-ES. Called from the #VC exception handler.
+ * @sev_es_hcall_finish:	Copies state from the GHCB back into the
+ *				processor (or pt_regs). Also runs checks on the
+ *				state returned from the hypervisor after a
+ *				VMMCALL under SEV-ES.  Needs to return 'false'
+ *				if the checks fail.  Called from the #VC
+ *				exception handler.
+ * @is_private_mmio:		For CoCo VMs, must map MMIO address as private.
+ *				Used when device is emulated by a paravisor
+ *				layer in the VM context.
  */
 struct x86_hyper_runtime {
 	void (*pin_vcpu)(int cpu);
+	void (*sev_es_hcall_prepare)(struct ghcb *ghcb, struct pt_regs *regs);
+	bool (*sev_es_hcall_finish)(struct ghcb *ghcb, struct pt_regs *regs);
+	bool (*is_private_mmio)(u64 addr);
 };
 
 /**
@@ -246,8 +292,10 @@ struct x86_hyper_runtime {
  * @calibrate_tsc:		calibrate TSC, if different from CPU
  * @get_wallclock:		get time from HW clock like RTC etc.
  * @set_wallclock:		set time back to HW clock
- * @is_untracked_pat_range	exclude from PAT logic
- * @nmi_init			enable NMI on cpus
+ * @iommu_shutdown:		set by an IOMMU driver for shutdown if necessary
+ * @is_untracked_pat_range:	exclude from PAT logic
+ * @nmi_init:			enable NMI on cpus
+ * @get_nmi_reason:		get the reason an NMI was received
  * @save_sched_clock_state:	save state for sched_clock() on suspend
  * @restore_sched_clock_state:	restore state for sched_clock() on resume
  * @apic_post_init:		adjust apic if needed
@@ -259,7 +307,10 @@ struct x86_hyper_runtime {
  * 				possible in x86_early_init_platform_quirks() by
  * 				only using the current x86_hardware_subarch
  * 				semantics.
+ * @realmode_reserve:		reserve memory for realmode trampoline
+ * @realmode_init:		initialize realmode trampoline
  * @hyper:			x86 hypervisor specific runtime callbacks
+ * @guest:			guest incarnations callbacks
  */
 struct x86_platform_ops {
 	unsigned long (*calibrate_cpu)(void);
@@ -275,16 +326,10 @@ struct x86_platform_ops {
 	void (*apic_post_init)(void);
 	struct x86_legacy_features legacy;
 	void (*set_legacy_features)(void);
+	void (*realmode_reserve)(void);
+	void (*realmode_init)(void);
 	struct x86_hyper_runtime hyper;
-};
-
-struct pci_dev;
-
-struct x86_msi_ops {
-	int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
-	void (*teardown_msi_irq)(unsigned int irq);
-	void (*teardown_msi_irqs)(struct pci_dev *dev);
-	void (*restore_msi_irqs)(struct pci_dev *dev);
+	struct x86_guest guest;
 };
 
 struct x86_apic_ops {
@@ -301,6 +346,10 @@ extern struct x86_apic_ops x86_apic_ops;
 extern void x86_early_init_platform_quirks(void);
 extern void x86_init_noop(void);
 extern void x86_init_uint_noop(unsigned int unused);
+extern bool bool_x86_init_noop(void);
+extern void x86_op_int_noop(int cpu);
 extern bool x86_pnpbios_disabled(void);
+extern int set_rtc_noop(const struct timespec64 *now);
+extern void get_rtc_noop(struct timespec64 *now);
 
 #endif
diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h
index a9630104f1c4..a3c29b1496c8 100644
--- a/arch/x86/include/asm/xen/cpuid.h
+++ b/arch/x86/include/asm/xen/cpuid.h
@@ -89,17 +89,40 @@
  * Sub-leaf 2: EAX: host tsc frequency in kHz
  */
 
+#define XEN_CPUID_TSC_EMULATED               (1u << 0)
+#define XEN_CPUID_HOST_TSC_RELIABLE          (1u << 1)
+#define XEN_CPUID_RDTSCP_INSTR_AVAIL         (1u << 2)
+
+#define XEN_CPUID_TSC_MODE_DEFAULT           (0)
+#define XEN_CPUID_TSC_MODE_ALWAYS_EMULATE    (1u)
+#define XEN_CPUID_TSC_MODE_NEVER_EMULATE     (2u)
+#define XEN_CPUID_TSC_MODE_PVRDTSCP          (3u)
+
 /*
  * Leaf 5 (0x40000x04)
  * HVM-specific features
  * Sub-leaf 0: EAX: Features
  * Sub-leaf 0: EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag)
+ * Sub-leaf 0: ECX: domain id (iff EAX has XEN_HVM_CPUID_DOMID_PRESENT flag)
  */
 #define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) /* Virtualized APIC registers */
 #define XEN_HVM_CPUID_X2APIC_VIRT      (1u << 1) /* Virtualized x2APIC accesses */
 /* Memory mapped from other domains has valid IOMMU entries */
 #define XEN_HVM_CPUID_IOMMU_MAPPINGS   (1u << 2)
 #define XEN_HVM_CPUID_VCPU_ID_PRESENT  (1u << 3) /* vcpu id is present in EBX */
+#define XEN_HVM_CPUID_DOMID_PRESENT    (1u << 4) /* domid is present in ECX */
+/*
+ * With interrupt format set to 0 (non-remappable) bits 55:49 from the
+ * IO-APIC RTE and bits 11:5 from the MSI address can be used to store
+ * high bits for the Destination ID. This expands the Destination ID
+ * field from 8 to 15 bits, allowing to target APIC IDs up 32768.
+ */
+#define XEN_HVM_CPUID_EXT_DEST_ID      (1u << 5)
+/*
+ * Per-vCPU event channel upcalls work correctly with physical IRQs
+ * bound to event channels.
+ */
+#define XEN_HVM_CPUID_UPCALL_VECTOR    (1u << 6)
 
 /*
  * Leaf 6 (0x40000x05)
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h
index 068d9b067c83..62bdceb594f1 100644
--- a/arch/x86/include/asm/xen/events.h
+++ b/arch/x86/include/asm/xen/events.h
@@ -23,7 +23,7 @@ static inline int xen_irqs_disabled(struct pt_regs *regs)
 /* No need for a barrier -- XCHG is a barrier on x86. */
 #define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
 
-extern int xen_have_vector_callback;
+extern bool xen_have_vector_callback;
 
 /*
  * Events delivered via platform PCI interrupts are always
@@ -34,4 +34,5 @@ static inline bool xen_support_evtchn_rebind(void)
 	return (!xen_hvm_domain() || xen_have_vector_callback);
 }
 
+extern bool xen_percpu_upcall;
 #endif /* _ASM_X86_XEN_EVENTS_H */
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index ef05bea7010d..a16d4631547c 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -38,11 +38,13 @@
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/types.h>
+#include <linux/pgtable.h>
+#include <linux/instrumentation.h>
 
 #include <trace/events/xen.h>
 
+#include <asm/alternative.h>
 #include <asm/page.h>
-#include <asm/pgtable.h>
 #include <asm/smap.h>
 #include <asm/nospec-branch.h>
 
@@ -82,15 +84,25 @@ struct xen_dm_op_buf;
  *     - clobber the rest
  *
  * The result certainly isn't pretty, and it really shows up cpp's
- * weakness as as macro language.  Sorry.  (But let's just give thanks
+ * weakness as a macro language.  Sorry.  (But let's just give thanks
  * there aren't more than 5 arguments...)
  */
 
-extern struct { char _entry[32]; } hypercall_page[];
+void xen_hypercall_func(void);
+DECLARE_STATIC_CALL(xen_hypercall, xen_hypercall_func);
 
-#define __HYPERCALL		"call hypercall_page+%c[offset]"
-#define __HYPERCALL_ENTRY(x)						\
-	[offset] "i" (__HYPERVISOR_##x * sizeof(hypercall_page[0]))
+#ifdef MODULE
+#define __ADDRESSABLE_xen_hypercall
+#else
+#define __ADDRESSABLE_xen_hypercall \
+	__stringify(.global STATIC_CALL_KEY(xen_hypercall);)
+#endif
+
+#define __HYPERCALL					\
+	__ADDRESSABLE_xen_hypercall			\
+	__stringify(call STATIC_CALL_TRAMP(xen_hypercall))
+
+#define __HYPERCALL_ENTRY(x)	"a" (x)
 
 #ifdef CONFIG_X86_32
 #define __HYPERCALL_RETREG	"eax"
@@ -148,7 +160,7 @@ extern struct { char _entry[32]; } hypercall_page[];
 	__HYPERCALL_0ARG();						\
 	asm volatile (__HYPERCALL					\
 		      : __HYPERCALL_0PARAM				\
-		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name)	\
 		      : __HYPERCALL_CLOBBER0);				\
 	(type)__res;							\
 })
@@ -159,7 +171,7 @@ extern struct { char _entry[32]; } hypercall_page[];
 	__HYPERCALL_1ARG(a1);						\
 	asm volatile (__HYPERCALL					\
 		      : __HYPERCALL_1PARAM				\
-		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name)	\
 		      : __HYPERCALL_CLOBBER1);				\
 	(type)__res;							\
 })
@@ -170,7 +182,7 @@ extern struct { char _entry[32]; } hypercall_page[];
 	__HYPERCALL_2ARG(a1, a2);					\
 	asm volatile (__HYPERCALL					\
 		      : __HYPERCALL_2PARAM				\
-		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name)	\
 		      : __HYPERCALL_CLOBBER2);				\
 	(type)__res;							\
 })
@@ -181,7 +193,7 @@ extern struct { char _entry[32]; } hypercall_page[];
 	__HYPERCALL_3ARG(a1, a2, a3);					\
 	asm volatile (__HYPERCALL					\
 		      : __HYPERCALL_3PARAM				\
-		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name)	\
 		      : __HYPERCALL_CLOBBER3);				\
 	(type)__res;							\
 })
@@ -192,7 +204,7 @@ extern struct { char _entry[32]; } hypercall_page[];
 	__HYPERCALL_4ARG(a1, a2, a3, a4);				\
 	asm volatile (__HYPERCALL					\
 		      : __HYPERCALL_4PARAM				\
-		      : __HYPERCALL_ENTRY(name)				\
+		      : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name)	\
 		      : __HYPERCALL_CLOBBER4);				\
 	(type)__res;							\
 })
@@ -206,14 +218,28 @@ xen_single_call(unsigned int call,
 	__HYPERCALL_DECLS;
 	__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
 
-	asm volatile(CALL_NOSPEC
+	asm volatile(__HYPERCALL
 		     : __HYPERCALL_5PARAM
-		     : [thunk_target] "a" (&hypercall_page[call])
+		     : __HYPERCALL_ENTRY(call)
 		     : __HYPERCALL_CLOBBER5);
 
 	return (long)__res;
 }
 
+static __always_inline void __xen_stac(void)
+{
+	/*
+	 * Suppress objtool seeing the STAC/CLAC and getting confused about it
+	 * calling random code with AC=1.
+	 */
+	asm volatile(ASM_STAC_UNSAFE ::: "memory", "flags");
+}
+
+static __always_inline void __xen_clac(void)
+{
+	asm volatile(ASM_CLAC_UNSAFE ::: "memory", "flags");
+}
+
 static inline long
 privcmd_call(unsigned int call,
 	     unsigned long a1, unsigned long a2,
@@ -222,13 +248,14 @@ privcmd_call(unsigned int call,
 {
 	long res;
 
-	stac();
+	__xen_stac();
 	res = xen_single_call(call, a1, a2, a3, a4, a5);
-	clac();
+	__xen_clac();
 
 	return res;
 }
 
+#ifdef CONFIG_XEN_PV
 static inline int
 HYPERVISOR_set_trap_table(struct trap_info *table)
 {
@@ -261,7 +288,108 @@ HYPERVISOR_callback_op(int cmd, void *arg)
 	return _hypercall2(int, callback_op, cmd, arg);
 }
 
+static __always_inline int
+HYPERVISOR_set_debugreg(int reg, unsigned long value)
+{
+	return _hypercall2(int, set_debugreg, reg, value);
+}
+
+static __always_inline unsigned long
+HYPERVISOR_get_debugreg(int reg)
+{
+	return _hypercall1(unsigned long, get_debugreg, reg);
+}
+
 static inline int
+HYPERVISOR_update_descriptor(u64 ma, u64 desc)
+{
+	return _hypercall2(int, update_descriptor, ma, desc);
+}
+
+static inline int
+HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val,
+			     unsigned long flags)
+{
+	return _hypercall3(int, update_va_mapping, va, new_val.pte, flags);
+}
+
+static inline int
+HYPERVISOR_set_segment_base(int reg, unsigned long value)
+{
+	return _hypercall2(int, set_segment_base, reg, value);
+}
+
+static inline void
+MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
+{
+	mcl->op = __HYPERVISOR_fpu_taskswitch;
+	mcl->args[0] = set;
+
+	trace_xen_mc_entry(mcl, 1);
+}
+
+static inline void
+MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
+			pte_t new_val, unsigned long flags)
+{
+	mcl->op = __HYPERVISOR_update_va_mapping;
+	mcl->args[0] = va;
+	mcl->args[1] = new_val.pte;
+	mcl->args[2] = flags;
+
+	trace_xen_mc_entry(mcl, 3);
+}
+
+static inline void
+MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
+			struct desc_struct desc)
+{
+	mcl->op = __HYPERVISOR_update_descriptor;
+	mcl->args[0] = maddr;
+	mcl->args[1] = *(unsigned long *)&desc;
+
+	trace_xen_mc_entry(mcl, 2);
+}
+
+static inline void
+MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req,
+		 int count, int *success_count, domid_t domid)
+{
+	mcl->op = __HYPERVISOR_mmu_update;
+	mcl->args[0] = (unsigned long)req;
+	mcl->args[1] = count;
+	mcl->args[2] = (unsigned long)success_count;
+	mcl->args[3] = domid;
+
+	trace_xen_mc_entry(mcl, 4);
+}
+
+static inline void
+MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count,
+		int *success_count, domid_t domid)
+{
+	mcl->op = __HYPERVISOR_mmuext_op;
+	mcl->args[0] = (unsigned long)op;
+	mcl->args[1] = count;
+	mcl->args[2] = (unsigned long)success_count;
+	mcl->args[3] = domid;
+
+	trace_xen_mc_entry(mcl, 4);
+}
+
+static inline void
+MULTI_stack_switch(struct multicall_entry *mcl,
+		   unsigned long ss, unsigned long esp)
+{
+	mcl->op = __HYPERVISOR_stack_switch;
+	mcl->args[0] = ss;
+	mcl->args[1] = esp;
+
+	trace_xen_mc_entry(mcl, 2);
+}
+#endif
+
+static __always_inline int
 HYPERVISOR_sched_op(int cmd, void *arg)
 {
 	return _hypercall2(int, sched_op, cmd, arg);
@@ -289,26 +417,6 @@ HYPERVISOR_platform_op(struct xen_platform_op *op)
 	return _hypercall1(int, platform_op, op);
 }
 
-static inline int
-HYPERVISOR_set_debugreg(int reg, unsigned long value)
-{
-	return _hypercall2(int, set_debugreg, reg, value);
-}
-
-static inline unsigned long
-HYPERVISOR_get_debugreg(int reg)
-{
-	return _hypercall1(unsigned long, get_debugreg, reg);
-}
-
-static inline int
-HYPERVISOR_update_descriptor(u64 ma, u64 desc)
-{
-	if (sizeof(u64) == sizeof(long))
-		return _hypercall2(int, update_descriptor, ma, desc);
-	return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
-}
-
 static inline long
 HYPERVISOR_memory_op(unsigned int cmd, void *arg)
 {
@@ -322,28 +430,12 @@ HYPERVISOR_multicall(void *call_list, uint32_t nr_calls)
 }
 
 static inline int
-HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val,
-			     unsigned long flags)
-{
-	if (sizeof(new_val) == sizeof(long))
-		return _hypercall3(int, update_va_mapping, va,
-				   new_val.pte, flags);
-	else
-		return _hypercall4(int, update_va_mapping, va,
-				   new_val.pte, new_val.pte >> 32, flags);
-}
-extern int __must_check xen_event_channel_op_compat(int, void *);
-
-static inline int
 HYPERVISOR_event_channel_op(int cmd, void *arg)
 {
-	int rc = _hypercall2(int, event_channel_op, cmd, arg);
-	if (unlikely(rc == -ENOSYS))
-		rc = xen_event_channel_op_compat(cmd, arg);
-	return rc;
+	return _hypercall2(int, event_channel_op, cmd, arg);
 }
 
-static inline int
+static __always_inline int
 HYPERVISOR_xen_version(int cmd, void *arg)
 {
 	return _hypercall2(int, xen_version, cmd, arg);
@@ -355,15 +447,10 @@ HYPERVISOR_console_io(int cmd, int count, char *str)
 	return _hypercall3(int, console_io, cmd, count, str);
 }
 
-extern int __must_check xen_physdev_op_compat(int, void *);
-
 static inline int
 HYPERVISOR_physdev_op(int cmd, void *arg)
 {
-	int rc = _hypercall2(int, physdev_op, cmd, arg);
-	if (unlikely(rc == -ENOSYS))
-		rc = xen_physdev_op_compat(cmd, arg);
-	return rc;
+	return _hypercall2(int, physdev_op, cmd, arg);
 }
 
 static inline int
@@ -384,14 +471,6 @@ HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args)
 	return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
 }
 
-#ifdef CONFIG_X86_64
-static inline int
-HYPERVISOR_set_segment_base(int reg, unsigned long value)
-{
-	return _hypercall2(int, set_segment_base, reg, value);
-}
-#endif
-
 static inline int
 HYPERVISOR_suspend(unsigned long start_info_mfn)
 {
@@ -413,13 +492,6 @@ HYPERVISOR_hvm_op(int op, void *arg)
 }
 
 static inline int
-HYPERVISOR_tmem_op(
-	struct tmem_op *op)
-{
-	return _hypercall1(int, tmem_op, op);
-}
-
-static inline int
 HYPERVISOR_xenpmu_op(unsigned int op, void *arg)
 {
 	return _hypercall2(int, xenpmu_op, op, arg);
@@ -430,94 +502,10 @@ HYPERVISOR_dm_op(
 	domid_t dom, unsigned int nr_bufs, struct xen_dm_op_buf *bufs)
 {
 	int ret;
-	stac();
+	__xen_stac();
 	ret = _hypercall3(int, dm_op, dom, nr_bufs, bufs);
-	clac();
+	__xen_clac();
 	return ret;
 }
 
-static inline void
-MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
-{
-	mcl->op = __HYPERVISOR_fpu_taskswitch;
-	mcl->args[0] = set;
-
-	trace_xen_mc_entry(mcl, 1);
-}
-
-static inline void
-MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
-			pte_t new_val, unsigned long flags)
-{
-	mcl->op = __HYPERVISOR_update_va_mapping;
-	mcl->args[0] = va;
-	if (sizeof(new_val) == sizeof(long)) {
-		mcl->args[1] = new_val.pte;
-		mcl->args[2] = flags;
-	} else {
-		mcl->args[1] = new_val.pte;
-		mcl->args[2] = new_val.pte >> 32;
-		mcl->args[3] = flags;
-	}
-
-	trace_xen_mc_entry(mcl, sizeof(new_val) == sizeof(long) ? 3 : 4);
-}
-
-static inline void
-MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
-			struct desc_struct desc)
-{
-	mcl->op = __HYPERVISOR_update_descriptor;
-	if (sizeof(maddr) == sizeof(long)) {
-		mcl->args[0] = maddr;
-		mcl->args[1] = *(unsigned long *)&desc;
-	} else {
-		u32 *p = (u32 *)&desc;
-
-		mcl->args[0] = maddr;
-		mcl->args[1] = maddr >> 32;
-		mcl->args[2] = *p++;
-		mcl->args[3] = *p;
-	}
-
-	trace_xen_mc_entry(mcl, sizeof(maddr) == sizeof(long) ? 2 : 4);
-}
-
-static inline void
-MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req,
-		 int count, int *success_count, domid_t domid)
-{
-	mcl->op = __HYPERVISOR_mmu_update;
-	mcl->args[0] = (unsigned long)req;
-	mcl->args[1] = count;
-	mcl->args[2] = (unsigned long)success_count;
-	mcl->args[3] = domid;
-
-	trace_xen_mc_entry(mcl, 4);
-}
-
-static inline void
-MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count,
-		int *success_count, domid_t domid)
-{
-	mcl->op = __HYPERVISOR_mmuext_op;
-	mcl->args[0] = (unsigned long)op;
-	mcl->args[1] = count;
-	mcl->args[2] = (unsigned long)success_count;
-	mcl->args[3] = domid;
-
-	trace_xen_mc_entry(mcl, 4);
-}
-
-static inline void
-MULTI_stack_switch(struct multicall_entry *mcl,
-		   unsigned long ss, unsigned long esp)
-{
-	mcl->op = __HYPERVISOR_stack_switch;
-	mcl->args[0] = ss;
-	mcl->args[1] = esp;
-
-	trace_xen_mc_entry(mcl, 2);
-}
-
 #endif /* _ASM_X86_XEN_HYPERCALL_H */
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 39171b3646bb..c2fc7869b996 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -36,25 +36,22 @@
 extern struct shared_info *HYPERVISOR_shared_info;
 extern struct start_info *xen_start_info;
 
+#include <asm/bug.h>
 #include <asm/processor.h>
 
+#define XEN_SIGNATURE "XenVMMXenVMM"
+
 static inline uint32_t xen_cpuid_base(void)
 {
-	return hypervisor_cpuid_base("XenVMMXenVMM", 2);
+	return cpuid_base_hypervisor(XEN_SIGNATURE, 2);
 }
 
-#ifdef CONFIG_XEN
-extern bool xen_hvm_need_lapic(void);
+struct pci_dev;
 
-static inline bool xen_x2apic_para_available(void)
-{
-	return xen_hvm_need_lapic();
-}
+#ifdef CONFIG_XEN_PV_DOM0
+bool xen_initdom_restore_msi(struct pci_dev *dev);
 #else
-static inline bool xen_x2apic_para_available(void)
-{
-	return (xen_cpuid_base() != 0);
-}
+static inline bool xen_initdom_restore_msi(struct pci_dev *dev) { return true; }
 #endif
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -62,6 +59,43 @@ void xen_arch_register_cpu(int num);
 void xen_arch_unregister_cpu(int num);
 #endif
 
-extern void xen_set_iopl_mask(unsigned mask);
+#ifdef CONFIG_PVH
+void __init xen_pvh_init(struct boot_params *boot_params);
+void __init mem_map_via_hcall(struct boot_params *boot_params_p);
+#endif
+
+/* Lazy mode for batching updates / context switch */
+enum xen_lazy_mode {
+	XEN_LAZY_NONE,
+	XEN_LAZY_MMU,
+	XEN_LAZY_CPU,
+};
+
+DECLARE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode);
+
+static inline void enter_lazy(enum xen_lazy_mode mode)
+{
+	BUG_ON(this_cpu_read(xen_lazy_mode) != XEN_LAZY_NONE);
+
+	this_cpu_write(xen_lazy_mode, mode);
+}
+
+static inline void leave_lazy(enum xen_lazy_mode mode)
+{
+	BUG_ON(this_cpu_read(xen_lazy_mode) != mode);
+
+	this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE);
+}
+
+enum xen_lazy_mode xen_get_lazy_mode(void);
+
+#if defined(CONFIG_XEN_DOM0) && defined(CONFIG_ACPI)
+void xen_sanitize_proc_cap_bits(uint32_t *buf);
+#else
+static inline void xen_sanitize_proc_cap_bits(uint32_t *buf)
+{
+	BUG();
+}
+#endif
 
 #endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index 62ca03ef5c65..a078a2b0f032 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -72,7 +72,7 @@
 #endif
 #endif
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 /* Explicitly size integers that represent pfns in the public interface
  * with Xen so that on ARM we can have one ABI that works for 32 and 64
  * bit guests. */
@@ -137,7 +137,7 @@ DEFINE_GUEST_HANDLE(xen_ulong_t);
 #define TI_SET_DPL(_ti, _dpl)	((_ti)->flags |= (_dpl))
 #define TI_SET_IF(_ti, _if)	((_ti)->flags |= ((!!(_if))<<2))
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 struct trap_info {
     uint8_t       vector;  /* exception vector                              */
     uint8_t       flags;   /* 0-3: privilege level; 4: clear event enable?  */
@@ -182,8 +182,11 @@ struct arch_shared_info {
 	unsigned long p2m_cr3;		/* cr3 value of the p2m address space */
 	unsigned long p2m_vaddr;	/* virtual address of the p2m list */
 	unsigned long p2m_generation;	/* generation count of p2m mapping */
+#ifdef CONFIG_X86_32
+	uint32_t wc_sec_hi;
+#endif
 };
-#endif	/* !__ASSEMBLY__ */
+#endif	/* !__ASSEMBLER__ */
 
 #ifdef CONFIG_X86_32
 #include <asm/xen/interface_32.h>
@@ -193,7 +196,7 @@ struct arch_shared_info {
 
 #include <asm/pvclock-abi.h>
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 /*
  * The following is all CPU context. Note that the fpu_ctxt block is filled
  * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
@@ -373,18 +376,15 @@ struct xen_pmu_arch {
 	} c;
 };
 
-#endif	/* !__ASSEMBLY__ */
+#endif	/* !__ASSEMBLER__ */
 
 /*
  * Prefix forces emulation of some non-trapping instructions.
  * Currently only CPUID.
  */
-#ifdef __ASSEMBLY__
-#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ;
-#define XEN_CPUID          XEN_EMULATE_PREFIX cpuid
-#else
-#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; "
-#define XEN_CPUID          XEN_EMULATE_PREFIX "cpuid"
-#endif
+#include <asm/emulate_prefix.h>
+
+#define XEN_EMULATE_PREFIX __ASM_FORM(.byte __XEN_EMULATE_PREFIX ;)
+#define XEN_CPUID          XEN_EMULATE_PREFIX __ASM_FORM(cpuid)
 
 #endif /* _ASM_X86_XEN_INTERFACE_H */
diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h
index dc40578abded..74d9768a9cf7 100644
--- a/arch/x86/include/asm/xen/interface_32.h
+++ b/arch/x86/include/asm/xen/interface_32.h
@@ -44,7 +44,7 @@
  */
 #define __HYPERVISOR_VIRT_START 0xF5800000
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 struct cpu_user_regs {
     uint32_t ebx;
@@ -85,7 +85,7 @@ typedef struct xen_callback xen_callback_t;
 
 #define XEN_CALLBACK(__cs, __eip)				\
 	((struct xen_callback){ .cs = (__cs), .eip = (unsigned long)(__eip) })
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
 
 
 /*
diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h
index c599ec269a25..38a19edb81a3 100644
--- a/arch/x86/include/asm/xen/interface_64.h
+++ b/arch/x86/include/asm/xen/interface_64.h
@@ -61,7 +61,7 @@
  *   RING1 -> RING3 kernel mode.
  *   RING2 -> RING3 kernel mode.
  *   RING3 -> RING3 user mode.
- * However RING0 indicates that the guest kernel should return to iteself
+ * However RING0 indicates that the guest kernel should return to itself
  * directly with
  *      orb   $3,1*8(%rsp)
  *      iretq
@@ -77,7 +77,7 @@
 #define VGCF_in_syscall  (1<<_VGCF_in_syscall)
 #define VGCF_IN_SYSCALL  VGCF_in_syscall
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 struct iret_context {
     /* Top of stack (%rsp at point of hypercall). */
@@ -143,7 +143,7 @@ typedef unsigned long xen_callback_t;
 #define XEN_CALLBACK(__cs, __rip)				\
 	((unsigned long)(__rip))
 
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
 
 
 #endif /* _ASM_X86_XEN_INTERFACE_64_H */
diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h
deleted file mode 100644
index 116777e7f387..000000000000
--- a/arch/x86/include/asm/xen/page-coherent.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_XEN_PAGE_COHERENT_H
-#define _ASM_X86_XEN_PAGE_COHERENT_H
-
-#include <asm/page.h>
-#include <linux/dma-mapping.h>
-
-static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
-		dma_addr_t *dma_handle, gfp_t flags,
-		unsigned long attrs)
-{
-	void *vstart = (void*)__get_free_pages(flags, get_order(size));
-	*dma_handle = virt_to_phys(vstart);
-	return vstart;
-}
-
-static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
-		void *cpu_addr, dma_addr_t dma_handle,
-		unsigned long attrs)
-{
-	free_pages((unsigned long) cpu_addr, get_order(size));
-}
-
-static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
-	     dma_addr_t dev_addr, unsigned long offset, size_t size,
-	     enum dma_data_direction dir, unsigned long attrs) { }
-
-static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
-		size_t size, enum dma_data_direction dir,
-		unsigned long attrs) { }
-
-static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
-
-static inline void xen_dma_sync_single_for_device(struct device *hwdev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
-
-#endif /* _ASM_X86_XEN_PAGE_COHERENT_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 790ce08e41f2..59f642a94b9d 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -11,11 +11,10 @@
 
 #include <asm/extable.h>
 #include <asm/page.h>
-#include <asm/pgtable.h>
 
+#include <xen/xen.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/grant_table.h>
-#include <xen/features.h>
 
 /* Xen machine address */
 typedef struct xmaddr {
@@ -97,11 +96,7 @@ static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val)
 
 	asm volatile("1: mov %[val], %[ptr]\n"
 		     "2:\n"
-		     ".section .fixup, \"ax\"\n"
-		     "3: sub $1, %[ret]\n"
-		     "   jmp 2b\n"
-		     ".previous\n"
-		     _ASM_EXTABLE(1b, 3b)
+		     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %[ret])
 		     : [ret] "+r" (ret), [ptr] "=m" (*addr)
 		     : [val] "r" (val));
 
@@ -111,16 +106,12 @@ static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val)
 static inline int xen_safe_read_ulong(const unsigned long *addr,
 				      unsigned long *val)
 {
-	int ret = 0;
 	unsigned long rval = ~0ul;
+	int ret = 0;
 
 	asm volatile("1: mov %[ptr], %[rval]\n"
 		     "2:\n"
-		     ".section .fixup, \"ax\"\n"
-		     "3: sub $1, %[ret]\n"
-		     "   jmp 2b\n"
-		     ".previous\n"
-		     _ASM_EXTABLE(1b, 3b)
+		     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %[ret])
 		     : [ret] "+r" (ret), [rval] "+r" (rval)
 		     : [ptr] "m" (*addr));
 	*val = rval;
@@ -171,7 +162,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
 	 * pfn_to_mfn. This will have to be removed when we figured
 	 * out which call.
 	 */
-	if (xen_feature(XENFEAT_auto_translated_physmap))
+	if (!xen_pv_domain())
 		return pfn;
 
 	mfn = __pfn_to_mfn(pfn);
@@ -184,7 +175,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
 
 static inline int phys_to_machine_mapping_valid(unsigned long pfn)
 {
-	if (xen_feature(XENFEAT_auto_translated_physmap))
+	if (!xen_pv_domain())
 		return 1;
 
 	return __pfn_to_mfn(pfn) != INVALID_P2M_ENTRY;
@@ -219,7 +210,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
 	 * gfn_to_pfn. This will have to be removed when we figure
 	 * out which call.
 	 */
-	if (xen_feature(XENFEAT_auto_translated_physmap))
+	if (!xen_pv_domain())
 		return mfn;
 
 	pfn = mfn_to_pfn_no_overrides(mfn);
@@ -251,7 +242,7 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
 /* Pseudo-physical <-> Guest conversion */
 static inline unsigned long pfn_to_gfn(unsigned long pfn)
 {
-	if (xen_feature(XENFEAT_auto_translated_physmap))
+	if (!xen_pv_domain())
 		return pfn;
 	else
 		return pfn_to_mfn(pfn);
@@ -259,7 +250,7 @@ static inline unsigned long pfn_to_gfn(unsigned long pfn)
 
 static inline unsigned long gfn_to_pfn(unsigned long gfn)
 {
-	if (xen_feature(XENFEAT_auto_translated_physmap))
+	if (!xen_pv_domain())
 		return gfn;
 	else
 		return mfn_to_pfn(gfn);
@@ -293,7 +284,7 @@ static inline unsigned long bfn_to_local_pfn(unsigned long mfn)
 {
 	unsigned long pfn;
 
-	if (xen_feature(XENFEAT_auto_translated_physmap))
+	if (!xen_pv_domain())
 		return mfn;
 
 	pfn = mfn_to_pfn(mfn);
@@ -304,7 +295,10 @@ static inline unsigned long bfn_to_local_pfn(unsigned long mfn)
 
 /* VIRT <-> MACHINE conversion */
 #define virt_to_machine(v)	(phys_to_machine(XPADDR(__pa(v))))
-#define virt_to_pfn(v)          (PFN_DOWN(__pa(v)))
+static inline unsigned long virt_to_pfn(const void *v)
+{
+	return PFN_DOWN(__pa(v));
+}
 #define virt_to_mfn(v)		(pfn_to_mfn(virt_to_pfn(v)))
 #define mfn_to_virt(m)		(__va(mfn_to_pfn(m) << PAGE_SHIFT))
 
@@ -356,9 +350,6 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr);
 void make_lowmem_page_readonly(void *vaddr);
 void make_lowmem_page_readwrite(void *vaddr);
 
-#define xen_remap(cookie, size) ioremap((cookie), (size));
-#define xen_unmap(cookie) iounmap((cookie))
-
 static inline bool xen_arch_need_swiotlb(struct device *dev,
 					 phys_addr_t phys,
 					 dma_addr_t dev_addr)
@@ -366,9 +357,4 @@ static inline bool xen_arch_need_swiotlb(struct device *dev,
 	return false;
 }
 
-static inline unsigned long xen_get_swiotlb_free_pages(unsigned int order)
-{
-	return __get_free_pages(__GFP_NOWARN, order);
-}
-
 #endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index 3506d8c598c1..9015b888edd6 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -14,29 +14,13 @@ static inline int pci_xen_hvm_init(void)
 	return -1;
 }
 #endif
-#if defined(CONFIG_XEN_DOM0)
+#ifdef CONFIG_XEN_PV_DOM0
 int __init pci_xen_initial_domain(void);
-int xen_find_device_domain_owner(struct pci_dev *dev);
-int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
-int xen_unregister_device_domain_owner(struct pci_dev *dev);
 #else
 static inline int __init pci_xen_initial_domain(void)
 {
 	return -1;
 }
-static inline int xen_find_device_domain_owner(struct pci_dev *dev)
-{
-	return -1;
-}
-static inline int xen_register_device_domain_owner(struct pci_dev *dev,
-						   uint16_t domain)
-{
-	return -1;
-}
-static inline int xen_unregister_device_domain_owner(struct pci_dev *dev)
-{
-	return -1;
-}
 #endif
 
 #if defined(CONFIG_PCI_MSI)
diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h
index 6b56d0d45d15..abde0f44df57 100644
--- a/arch/x86/include/asm/xen/swiotlb-xen.h
+++ b/arch/x86/include/asm/xen/swiotlb-xen.h
@@ -2,16 +2,10 @@
 #ifndef _ASM_X86_SWIOTLB_XEN_H
 #define _ASM_X86_SWIOTLB_XEN_H
 
-#ifdef CONFIG_SWIOTLB_XEN
-extern int xen_swiotlb;
-extern int __init pci_xen_swiotlb_detect(void);
-extern void __init pci_xen_swiotlb_init(void);
-extern int pci_xen_swiotlb_init_late(void);
-#else
-#define xen_swiotlb (0)
-static inline int __init pci_xen_swiotlb_detect(void) { return 0; }
-static inline void __init pci_xen_swiotlb_init(void) { }
-static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; }
-#endif
+int xen_swiotlb_fixup(void *buf, unsigned long nslabs);
+int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
+				unsigned int address_bits,
+				dma_addr_t *dma_handle);
+void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
 
 #endif /* _ASM_X86_SWIOTLB_XEN_H */
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index 45c8605467f1..7b0307acc410 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -1,17 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 #ifndef _ASM_X86_XOR_H
 #define _ASM_X86_XOR_H
 
 /*
  * Optimized RAID-5 checksumming functions for SSE.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * You should have received a copy of the GNU General Public License
- * (for example /usr/src/linux/COPYING); if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 /*
@@ -65,7 +57,8 @@
 					op(i + 3, 3)
 
 static void
-xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_sse_2(unsigned long bytes, unsigned long * __restrict p1,
+	  const unsigned long * __restrict p2)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -116,7 +109,8 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_sse_2_pf64(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -150,8 +144,9 @@ xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	  unsigned long *p3)
+xor_sse_3(unsigned long bytes, unsigned long * __restrict p1,
+	  const unsigned long * __restrict p2,
+	  const unsigned long * __restrict p3)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -209,8 +204,9 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	       unsigned long *p3)
+xor_sse_3_pf64(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -246,8 +242,10 @@ xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	  unsigned long *p3, unsigned long *p4)
+xor_sse_4(unsigned long bytes, unsigned long * __restrict p1,
+	  const unsigned long * __restrict p2,
+	  const unsigned long * __restrict p3,
+	  const unsigned long * __restrict p4)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -312,8 +310,10 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	       unsigned long *p3, unsigned long *p4)
+xor_sse_4_pf64(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3,
+	       const unsigned long * __restrict p4)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -351,8 +351,11 @@ xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	  unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_sse_5(unsigned long bytes, unsigned long * __restrict p1,
+	  const unsigned long * __restrict p2,
+	  const unsigned long * __restrict p3,
+	  const unsigned long * __restrict p4,
+	  const unsigned long * __restrict p5)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -424,8 +427,11 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	       unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_sse_5_pf64(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3,
+	       const unsigned long * __restrict p4,
+	       const unsigned long * __restrict p5)
 {
 	unsigned long lines = bytes >> 8;
 
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index 635eac543922..7a6b9474591e 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -1,17 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 #ifndef _ASM_X86_XOR_32_H
 #define _ASM_X86_XOR_32_H
 
 /*
  * Optimized RAID-5 checksumming functions for MMX.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * You should have received a copy of the GNU General Public License
- * (for example /usr/src/linux/COPYING); if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 /*
@@ -29,7 +21,8 @@
 #include <asm/fpu/api.h>
 
 static void
-xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_pII_mmx_2(unsigned long bytes, unsigned long * __restrict p1,
+	      const unsigned long * __restrict p2)
 {
 	unsigned long lines = bytes >> 7;
 
@@ -72,8 +65,9 @@ xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	      unsigned long *p3)
+xor_pII_mmx_3(unsigned long bytes, unsigned long * __restrict p1,
+	      const unsigned long * __restrict p2,
+	      const unsigned long * __restrict p3)
 {
 	unsigned long lines = bytes >> 7;
 
@@ -121,8 +115,10 @@ xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	      unsigned long *p3, unsigned long *p4)
+xor_pII_mmx_4(unsigned long bytes, unsigned long * __restrict p1,
+	      const unsigned long * __restrict p2,
+	      const unsigned long * __restrict p3,
+	      const unsigned long * __restrict p4)
 {
 	unsigned long lines = bytes >> 7;
 
@@ -176,8 +172,11 @@ xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 
 
 static void
-xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	      unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_pII_mmx_5(unsigned long bytes, unsigned long * __restrict p1,
+	      const unsigned long * __restrict p2,
+	      const unsigned long * __restrict p3,
+	      const unsigned long * __restrict p4,
+	      const unsigned long * __restrict p5)
 {
 	unsigned long lines = bytes >> 7;
 
@@ -256,7 +255,8 @@ xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 #undef BLOCK
 
 static void
-xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_p5_mmx_2(unsigned long bytes, unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2)
 {
 	unsigned long lines = bytes >> 6;
 
@@ -303,8 +303,9 @@ xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	     unsigned long *p3)
+xor_p5_mmx_3(unsigned long bytes, unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2,
+	     const unsigned long * __restrict p3)
 {
 	unsigned long lines = bytes >> 6;
 
@@ -360,8 +361,10 @@ xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	     unsigned long *p3, unsigned long *p4)
+xor_p5_mmx_4(unsigned long bytes, unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2,
+	     const unsigned long * __restrict p3,
+	     const unsigned long * __restrict p4)
 {
 	unsigned long lines = bytes >> 6;
 
@@ -426,8 +429,11 @@ xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	     unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_p5_mmx_5(unsigned long bytes, unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2,
+	     const unsigned long * __restrict p3,
+	     const unsigned long * __restrict p4,
+	     const unsigned long * __restrict p5)
 {
 	unsigned long lines = bytes >> 6;
 
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 22a7b1870a31..7f81dd5897f4 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 #ifndef _ASM_X86_XOR_AVX_H
 #define _ASM_X86_XOR_AVX_H
 
@@ -8,15 +9,8 @@
  * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
  *
  * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
  */
 
-#ifdef CONFIG_AS_AVX
-
 #include <linux/compiler.h>
 #include <asm/fpu/api.h>
 
@@ -32,7 +26,8 @@
 		BLOCK4(8) \
 		BLOCK4(12)
 
-static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
+static void xor_avx_2(unsigned long bytes, unsigned long * __restrict p0,
+		      const unsigned long * __restrict p1)
 {
 	unsigned long lines = bytes >> 9;
 
@@ -58,8 +53,9 @@ do { \
 	kernel_fpu_end();
 }
 
-static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
-	unsigned long *p2)
+static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0,
+		      const unsigned long * __restrict p1,
+		      const unsigned long * __restrict p2)
 {
 	unsigned long lines = bytes >> 9;
 
@@ -88,8 +84,10 @@ do { \
 	kernel_fpu_end();
 }
 
-static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
-	unsigned long *p2, unsigned long *p3)
+static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0,
+		      const unsigned long * __restrict p1,
+		      const unsigned long * __restrict p2,
+		      const unsigned long * __restrict p3)
 {
 	unsigned long lines = bytes >> 9;
 
@@ -121,8 +119,11 @@ do { \
 	kernel_fpu_end();
 }
 
-static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
-	unsigned long *p2, unsigned long *p3, unsigned long *p4)
+static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0,
+	     const unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2,
+	     const unsigned long * __restrict p3,
+	     const unsigned long * __restrict p4)
 {
 	unsigned long lines = bytes >> 9;
 
@@ -174,11 +175,4 @@ do { \
 #define AVX_SELECT(FASTEST) \
 	(boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST)
 
-#else
-
-#define AVX_XOR_SPEED {}
-
-#define AVX_SELECT(FASTEST) (FASTEST)
-
-#endif
 #endif
diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild
index f6648e9928b3..39606a856d3b 100644
--- a/arch/x86/include/uapi/asm/Kbuild
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -1,5 +1,4 @@
-include include/uapi/asm-generic/Kbuild.asm
-
+# SPDX-License-Identifier: GPL-2.0
 generated-y += unistd_32.h
 generated-y += unistd_64.h
 generated-y += unistd_x32.h
diff --git a/arch/x86/include/uapi/asm/amd_hsmp.h b/arch/x86/include/uapi/asm/amd_hsmp.h
new file mode 100644
index 000000000000..92d8f256d096
--- /dev/null
+++ b/arch/x86/include/uapi/asm/amd_hsmp.h
@@ -0,0 +1,477 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _UAPI_ASM_X86_AMD_HSMP_H_
+#define _UAPI_ASM_X86_AMD_HSMP_H_
+
+#include <linux/types.h>
+
+#pragma pack(4)
+
+#define HSMP_MAX_MSG_LEN 8
+
+/*
+ * HSMP Messages supported
+ */
+enum hsmp_message_ids {
+	HSMP_TEST = 1,			/* 01h Increments input value by 1 */
+	HSMP_GET_SMU_VER,		/* 02h SMU FW version */
+	HSMP_GET_PROTO_VER,		/* 03h HSMP interface version */
+	HSMP_GET_SOCKET_POWER,		/* 04h average package power consumption */
+	HSMP_SET_SOCKET_POWER_LIMIT,	/* 05h Set the socket power limit */
+	HSMP_GET_SOCKET_POWER_LIMIT,	/* 06h Get current socket power limit */
+	HSMP_GET_SOCKET_POWER_LIMIT_MAX,/* 07h Get maximum socket power value */
+	HSMP_SET_BOOST_LIMIT,		/* 08h Set a core maximum frequency limit */
+	HSMP_SET_BOOST_LIMIT_SOCKET,	/* 09h Set socket maximum frequency level */
+	HSMP_GET_BOOST_LIMIT,		/* 0Ah Get current frequency limit */
+	HSMP_GET_PROC_HOT,		/* 0Bh Get PROCHOT status */
+	HSMP_SET_XGMI_LINK_WIDTH,	/* 0Ch Set max and min width of xGMI Link */
+	HSMP_SET_DF_PSTATE,		/* 0Dh Alter APEnable/Disable messages behavior */
+	HSMP_SET_AUTO_DF_PSTATE,	/* 0Eh Enable DF P-State Performance Boost algorithm */
+	HSMP_GET_FCLK_MCLK,		/* 0Fh Get FCLK and MEMCLK for current socket */
+	HSMP_GET_CCLK_THROTTLE_LIMIT,	/* 10h Get CCLK frequency limit in socket */
+	HSMP_GET_C0_PERCENT,		/* 11h Get average C0 residency in socket */
+	HSMP_SET_NBIO_DPM_LEVEL,	/* 12h Set max/min LCLK DPM Level for a given NBIO */
+	HSMP_GET_NBIO_DPM_LEVEL,	/* 13h Get LCLK DPM level min and max for a given NBIO */
+	HSMP_GET_DDR_BANDWIDTH,		/* 14h Get theoretical maximum and current DDR Bandwidth */
+	HSMP_GET_TEMP_MONITOR,		/* 15h Get socket temperature */
+	HSMP_GET_DIMM_TEMP_RANGE,	/* 16h Get per-DIMM temperature range and refresh rate */
+	HSMP_GET_DIMM_POWER,		/* 17h Get per-DIMM power consumption */
+	HSMP_GET_DIMM_THERMAL,		/* 18h Get per-DIMM thermal sensors */
+	HSMP_GET_SOCKET_FREQ_LIMIT,	/* 19h Get current active frequency per socket */
+	HSMP_GET_CCLK_CORE_LIMIT,	/* 1Ah Get CCLK frequency limit per core */
+	HSMP_GET_RAILS_SVI,		/* 1Bh Get SVI-based Telemetry for all rails */
+	HSMP_GET_SOCKET_FMAX_FMIN,	/* 1Ch Get Fmax and Fmin per socket */
+	HSMP_GET_IOLINK_BANDWITH,	/* 1Dh Get current bandwidth on IO Link */
+	HSMP_GET_XGMI_BANDWITH,		/* 1Eh Get current bandwidth on xGMI Link */
+	HSMP_SET_GMI3_WIDTH,		/* 1Fh Set max and min GMI3 Link width */
+	HSMP_SET_PCI_RATE,		/* 20h Control link rate on PCIe devices */
+	HSMP_SET_POWER_MODE,		/* 21h Select power efficiency profile policy */
+	HSMP_SET_PSTATE_MAX_MIN,	/* 22h Set the max and min DF P-State  */
+	HSMP_GET_METRIC_TABLE_VER,	/* 23h Get metrics table version */
+	HSMP_GET_METRIC_TABLE,		/* 24h Get metrics table */
+	HSMP_GET_METRIC_TABLE_DRAM_ADDR,/* 25h Get metrics table dram address */
+	HSMP_SET_XGMI_PSTATE_RANGE,	/* 26h Set xGMI P-state range */
+	HSMP_CPU_RAIL_ISO_FREQ_POLICY,	/* 27h Get/Set Cpu Iso frequency policy */
+	HSMP_DFC_ENABLE_CTRL,		/* 28h Enable/Disable DF C-state */
+	HSMP_GET_RAPL_UNITS = 0x30,	/* 30h Get scaling factor for energy */
+	HSMP_GET_RAPL_CORE_COUNTER,	/* 31h Get core energy counter value */
+	HSMP_GET_RAPL_PACKAGE_COUNTER,	/* 32h Get package energy counter value */
+	HSMP_MSG_ID_MAX,
+};
+
+struct hsmp_message {
+	__u32	msg_id;			/* Message ID */
+	__u16	num_args;		/* Number of input argument words in message */
+	__u16	response_sz;		/* Number of expected output/response words */
+	__u32	args[HSMP_MAX_MSG_LEN];	/* argument/response buffer */
+	__u16	sock_ind;		/* socket number */
+};
+
+enum hsmp_msg_type {
+	HSMP_RSVD = -1,
+	HSMP_SET  = 0,
+	HSMP_GET  = 1,
+	HSMP_SET_GET	= 2,
+};
+
+enum hsmp_proto_versions {
+	HSMP_PROTO_VER2	= 2,
+	HSMP_PROTO_VER3,
+	HSMP_PROTO_VER4,
+	HSMP_PROTO_VER5,
+	HSMP_PROTO_VER6,
+	HSMP_PROTO_VER7
+};
+
+struct hsmp_msg_desc {
+	int num_args;
+	int response_sz;
+	enum hsmp_msg_type type;
+};
+
+/*
+ * User may use these comments as reference, please find the
+ * supported list of messages and message definition in the
+ * HSMP chapter of respective family/model PPR.
+ *
+ * Not supported messages would return -ENOMSG.
+ */
+static const struct hsmp_msg_desc hsmp_msg_desc_table[]
+				__attribute__((unused)) = {
+	/* RESERVED */
+	{0, 0, HSMP_RSVD},
+
+	/*
+	 * HSMP_TEST, num_args = 1, response_sz = 1
+	 * input:  args[0] = xx
+	 * output: args[0] = xx + 1
+	 */
+	{1, 1, HSMP_GET},
+
+	/*
+	 * HSMP_GET_SMU_VER, num_args = 0, response_sz = 1
+	 * output: args[0] = smu fw ver
+	 */
+	{0, 1, HSMP_GET},
+
+	/*
+	 * HSMP_GET_PROTO_VER, num_args = 0, response_sz = 1
+	 * output: args[0] = proto version
+	 */
+	{0, 1, HSMP_GET},
+
+	/*
+	 * HSMP_GET_SOCKET_POWER, num_args = 0, response_sz = 1
+	 * output: args[0] = socket power in mWatts
+	 */
+	{0, 1, HSMP_GET},
+
+	/*
+	 * HSMP_SET_SOCKET_POWER_LIMIT, num_args = 1, response_sz = 0
+	 * input: args[0] = power limit value in mWatts
+	 */
+	{1, 0, HSMP_SET},
+
+	/*
+	 * HSMP_GET_SOCKET_POWER_LIMIT, num_args = 0, response_sz = 1
+	 * output: args[0] = socket power limit value in mWatts
+	 */
+	{0, 1, HSMP_GET},
+
+	/*
+	 * HSMP_GET_SOCKET_POWER_LIMIT_MAX, num_args = 0, response_sz = 1
+	 * output: args[0] = maximuam socket power limit in mWatts
+	 */
+	{0, 1, HSMP_GET},
+
+	/*
+	 * HSMP_SET_BOOST_LIMIT, num_args = 1, response_sz = 0
+	 * input: args[0] = apic id[31:16] + boost limit value in MHz[15:0]
+	 */
+	{1, 0, HSMP_SET},
+
+	/*
+	 * HSMP_SET_BOOST_LIMIT_SOCKET, num_args = 1, response_sz = 0
+	 * input: args[0] = boost limit value in MHz
+	 */
+	{1, 0, HSMP_SET},
+
+	/*
+	 * HSMP_GET_BOOST_LIMIT, num_args = 1, response_sz = 1
+	 * input: args[0] = apic id
+	 * output: args[0] = boost limit value in MHz
+	 */
+	{1, 1, HSMP_GET},
+
+	/*
+	 * HSMP_GET_PROC_HOT, num_args = 0, response_sz = 1
+	 * output: args[0] = proc hot status
+	 */
+	{0, 1, HSMP_GET},
+
+	/*
+	 * HSMP_SET_XGMI_LINK_WIDTH, num_args = 1, response_sz = 0
+	 * input: args[0] = min link width[15:8] + max link width[7:0]
+	 */
+	{1, 0, HSMP_SET},
+
+	/*
+	 * HSMP_SET_DF_PSTATE, num_args = 1, response_sz = 0
+	 * input: args[0] = df pstate[7:0]
+	 */
+	{1, 0, HSMP_SET},
+
+	/* HSMP_SET_AUTO_DF_PSTATE, num_args = 0, response_sz = 0 */
+	{0, 0, HSMP_SET},
+
+	/*
+	 * HSMP_GET_FCLK_MCLK, num_args = 0, response_sz = 2
+	 * output: args[0] = fclk in MHz, args[1] = mclk in MHz
+	 */
+	{0, 2, HSMP_GET},
+
+	/*
+	 * HSMP_GET_CCLK_THROTTLE_LIMIT, num_args = 0, response_sz = 1
+	 * output: args[0] = core clock in MHz
+	 */
+	{0, 1, HSMP_GET},
+
+	/*
+	 * HSMP_GET_C0_PERCENT, num_args = 0, response_sz = 1
+	 * output: args[0] = average c0 residency
+	 */
+	{0, 1, HSMP_GET},
+
+	/*
+	 * HSMP_SET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 0
+	 * input: args[0] = nbioid[23:16] + max dpm level[15:8] + min dpm level[7:0]
+	 */
+	{1, 0, HSMP_SET},
+
+	/*
+	 * HSMP_GET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 1
+	 * input: args[0] = nbioid[23:16]
+	 * output: args[0] = max dpm level[15:8] + min dpm level[7:0]
+	 */
+	{1, 1, HSMP_GET},
+
+	/*
+	 * HSMP_GET_DDR_BANDWIDTH, num_args = 0, response_sz = 1
+	 * output: args[0] = max bw in Gbps[31:20] + utilised bw in Gbps[19:8] +
+	 * bw in percentage[7:0]
+	 */
+	{0, 1, HSMP_GET},
+
+	/*
+	 * HSMP_GET_TEMP_MONITOR, num_args = 0, response_sz = 1
+	 * output: args[0] = temperature in degree celsius. [15:8] integer part +
+	 * [7:5] fractional part
+	 */
+	{0, 1, HSMP_GET},
+
+	/*
+	 * HSMP_GET_DIMM_TEMP_RANGE, num_args = 1, response_sz = 1
+	 * input: args[0] = DIMM address[7:0]
+	 * output: args[0] = refresh rate[3] + temperature range[2:0]
+	 */
+	{1, 1, HSMP_GET},
+
+	/*
+	 * HSMP_GET_DIMM_POWER, num_args = 1, response_sz = 1
+	 * input: args[0] = DIMM address[7:0]
+	 * output: args[0] = DIMM power in mW[31:17] + update rate in ms[16:8] +
+	 * DIMM address[7:0]
+	 */
+	{1, 1, HSMP_GET},
+
+	/*
+	 * HSMP_GET_DIMM_THERMAL, num_args = 1, response_sz = 1
+	 * input: args[0] = DIMM address[7:0]
+	 * output: args[0] = temperature in degree celsius[31:21] + update rate in ms[16:8] +
+	 * DIMM address[7:0]
+	 */
+	{1, 1, HSMP_GET},
+
+	/*
+	 * HSMP_GET_SOCKET_FREQ_LIMIT, num_args = 0, response_sz = 1
+	 * output: args[0] = frequency in MHz[31:16] + frequency source[15:0]
+	 */
+	{0, 1, HSMP_GET},
+
+	/*
+	 * HSMP_GET_CCLK_CORE_LIMIT, num_args = 1, response_sz = 1
+	 * input: args[0] = apic id [31:0]
+	 * output: args[0] = frequency in MHz[31:0]
+	 */
+	{1, 1, HSMP_GET},
+
+	/*
+	 * HSMP_GET_RAILS_SVI, num_args = 0, response_sz = 1
+	 * output: args[0] = power in mW[31:0]
+	 */
+	{0, 1, HSMP_GET},
+
+	/*
+	 * HSMP_GET_SOCKET_FMAX_FMIN, num_args = 0, response_sz = 1
+	 * output: args[0] = fmax in MHz[31:16] + fmin in MHz[15:0]
+	 */
+	{0, 1, HSMP_GET},
+
+	/*
+	 * HSMP_GET_IOLINK_BANDWITH, num_args = 1, response_sz = 1
+	 * input: args[0] = link id[15:8] + bw type[2:0]
+	 * output: args[0] = io bandwidth in Mbps[31:0]
+	 */
+	{1, 1, HSMP_GET},
+
+	/*
+	 * HSMP_GET_XGMI_BANDWITH, num_args = 1, response_sz = 1
+	 * input: args[0] = link id[15:8] + bw type[2:0]
+	 * output: args[0] = xgmi bandwidth in Mbps[31:0]
+	 */
+	{1, 1, HSMP_GET},
+
+	/*
+	 * HSMP_SET_GMI3_WIDTH, num_args = 1, response_sz = 0
+	 * input: args[0] = min link width[15:8] + max link width[7:0]
+	 */
+	{1, 0, HSMP_SET},
+
+	/*
+	 * HSMP_SET_PCI_RATE, num_args = 1, response_sz = 1
+	 * input: args[0] = link rate control value
+	 * output: args[0] = previous link rate control value
+	 */
+	{1, 1, HSMP_SET},
+
+	/*
+	 * HSMP_SET_POWER_MODE, num_args = 1, response_sz = 0
+	 * input: args[0] = power efficiency mode[2:0]
+	 */
+	{1, 1, HSMP_SET_GET},
+
+	/*
+	 * HSMP_SET_PSTATE_MAX_MIN, num_args = 1, response_sz = 0
+	 * input: args[0] = min df pstate[15:8] + max df pstate[7:0]
+	 */
+	{1, 0, HSMP_SET},
+
+	/*
+	 * HSMP_GET_METRIC_TABLE_VER, num_args = 0, response_sz = 1
+	 * output: args[0] = metrics table version
+	 */
+	{0, 1, HSMP_GET},
+
+	/*
+	 * HSMP_GET_METRIC_TABLE, num_args = 0, response_sz = 0
+	 */
+	{0, 0, HSMP_GET},
+
+	/*
+	 * HSMP_GET_METRIC_TABLE_DRAM_ADDR, num_args = 0, response_sz = 2
+	 * output: args[0] = lower 32 bits of the address
+	 * output: args[1] = upper 32 bits of the address
+	 */
+	{0, 2, HSMP_GET},
+
+	/*
+	 * HSMP_SET_XGMI_PSTATE_RANGE, num_args = 1, response_sz = 0
+	 * input: args[0] = min xGMI p-state[15:8] + max xGMI p-state[7:0]
+	 */
+	{1, 0, HSMP_SET},
+
+	/*
+	 * HSMP_CPU_RAIL_ISO_FREQ_POLICY, num_args = 1, response_sz = 1
+	 * input: args[0] = set/get policy[31] +
+	 * disable/enable independent control[0]
+	 * output: args[0] = current policy[0]
+	 */
+	{1, 1, HSMP_SET_GET},
+
+	/*
+	 * HSMP_DFC_ENABLE_CTRL, num_args = 1, response_sz = 1
+	 * input: args[0] = set/get policy[31] + enable/disable DFC[0]
+	 * output: args[0] = current policy[0]
+	 */
+	{1, 1, HSMP_SET_GET},
+
+	/* RESERVED(0x29-0x2f) */
+	{0, 0, HSMP_RSVD},
+	{0, 0, HSMP_RSVD},
+	{0, 0, HSMP_RSVD},
+	{0, 0, HSMP_RSVD},
+	{0, 0, HSMP_RSVD},
+	{0, 0, HSMP_RSVD},
+	{0, 0, HSMP_RSVD},
+
+	/*
+	 * HSMP_GET_RAPL_UNITS, response_sz = 1
+	 * output: args[0] = tu value[19:16] + esu value[12:8]
+	 */
+	{0, 1, HSMP_GET},
+
+	/*
+	 * HSMP_GET_RAPL_CORE_COUNTER, num_args = 1, response_sz = 1
+	 * input: args[0] = apic id[15:0]
+	 * output: args[0] = lower 32 bits of energy
+	 * output: args[1] = upper 32 bits of energy
+	 */
+	{1, 2, HSMP_GET},
+
+	/*
+	 * HSMP_GET_RAPL_PACKAGE_COUNTER, num_args = 0, response_sz = 1
+	 * output: args[0] = lower 32 bits of energy
+	 * output: args[1] = upper 32 bits of energy
+	 */
+	{0, 2, HSMP_GET},
+
+};
+
+/* Metrics table (supported only with proto version 6) */
+struct hsmp_metric_table {
+	__u32 accumulation_counter;
+
+	/* TEMPERATURE */
+	__u32 max_socket_temperature;
+	__u32 max_vr_temperature;
+	__u32 max_hbm_temperature;
+	__u64 max_socket_temperature_acc;
+	__u64 max_vr_temperature_acc;
+	__u64 max_hbm_temperature_acc;
+
+	/* POWER */
+	__u32 socket_power_limit;
+	__u32 max_socket_power_limit;
+	__u32 socket_power;
+
+	/* ENERGY */
+	__u64 timestamp;
+	__u64 socket_energy_acc;
+	__u64 ccd_energy_acc;
+	__u64 xcd_energy_acc;
+	__u64 aid_energy_acc;
+	__u64 hbm_energy_acc;
+
+	/* FREQUENCY */
+	__u32 cclk_frequency_limit;
+	__u32 gfxclk_frequency_limit;
+	__u32 fclk_frequency;
+	__u32 uclk_frequency;
+	__u32 socclk_frequency[4];
+	__u32 vclk_frequency[4];
+	__u32 dclk_frequency[4];
+	__u32 lclk_frequency[4];
+	__u64 gfxclk_frequency_acc[8];
+	__u64 cclk_frequency_acc[96];
+
+	/* FREQUENCY RANGE */
+	__u32 max_cclk_frequency;
+	__u32 min_cclk_frequency;
+	__u32 max_gfxclk_frequency;
+	__u32 min_gfxclk_frequency;
+	__u32 fclk_frequency_table[4];
+	__u32 uclk_frequency_table[4];
+	__u32 socclk_frequency_table[4];
+	__u32 vclk_frequency_table[4];
+	__u32 dclk_frequency_table[4];
+	__u32 lclk_frequency_table[4];
+	__u32 max_lclk_dpm_range;
+	__u32 min_lclk_dpm_range;
+
+	/* XGMI */
+	__u32 xgmi_width;
+	__u32 xgmi_bitrate;
+	__u64 xgmi_read_bandwidth_acc[8];
+	__u64 xgmi_write_bandwidth_acc[8];
+
+	/* ACTIVITY */
+	__u32 socket_c0_residency;
+	__u32 socket_gfx_busy;
+	__u32 dram_bandwidth_utilization;
+	__u64 socket_c0_residency_acc;
+	__u64 socket_gfx_busy_acc;
+	__u64 dram_bandwidth_acc;
+	__u32 max_dram_bandwidth;
+	__u64 dram_bandwidth_utilization_acc;
+	__u64 pcie_bandwidth_acc[4];
+
+	/* THROTTLERS */
+	__u32 prochot_residency_acc;
+	__u32 ppt_residency_acc;
+	__u32 socket_thm_residency_acc;
+	__u32 vr_thm_residency_acc;
+	__u32 hbm_thm_residency_acc;
+	__u32 spare;
+
+	/* New items at the end to maintain driver compatibility */
+	__u32 gfxclk_frequency[8];
+};
+
+/* Reset to default packing */
+#pragma pack()
+
+/* Define unique ioctl command for hsmp msgs using generic _IOWR */
+#define HSMP_BASE_IOCTL_NR	0xF8
+#define HSMP_IOCTL_CMD		_IOWR(HSMP_BASE_IOCTL_NR, 0, struct hsmp_message)
+
+#endif /*_ASM_X86_AMD_HSMP_H_*/
diff --git a/arch/x86/include/uapi/asm/auxvec.h b/arch/x86/include/uapi/asm/auxvec.h
index 580e3c567046..6beb55bbefa4 100644
--- a/arch/x86/include/uapi/asm/auxvec.h
+++ b/arch/x86/include/uapi/asm/auxvec.h
@@ -12,9 +12,9 @@
 
 /* entries in ARCH_DLINFO: */
 #if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64)
-# define AT_VECTOR_SIZE_ARCH 2
+# define AT_VECTOR_SIZE_ARCH 3
 #else /* else it's non-compat x86-64 */
-# define AT_VECTOR_SIZE_ARCH 1
+# define AT_VECTOR_SIZE_ARCH 2
 #endif
 
 #endif /* _ASM_X86_AUXVEC_H */
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 60733f137e9a..dafbf581c515 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -2,14 +2,7 @@
 #ifndef _ASM_X86_BOOTPARAM_H
 #define _ASM_X86_BOOTPARAM_H
 
-/* setup_data types */
-#define SETUP_NONE			0
-#define SETUP_E820_EXT			1
-#define SETUP_DTB			2
-#define SETUP_PCI			3
-#define SETUP_EFI			4
-#define SETUP_APPLE_PROPERTIES		5
-#define SETUP_JAILHOUSE			6
+#include <asm/setup_data.h>
 
 /* ram_size flags */
 #define RAMDISK_IMAGE_START_MASK	0x07FF
@@ -29,8 +22,11 @@
 #define XLF_EFI_HANDOVER_32		(1<<2)
 #define XLF_EFI_HANDOVER_64		(1<<3)
 #define XLF_EFI_KEXEC			(1<<4)
+#define XLF_5LEVEL			(1<<5)
+#define XLF_5LEVEL_ENABLED		(1<<6)
+#define XLF_MEM_ENCRYPTION		(1<<7)
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/types.h>
 #include <linux/screen_info.h>
@@ -39,14 +35,6 @@
 #include <asm/ist.h>
 #include <video/edid.h>
 
-/* extensible setup data list node */
-struct setup_data {
-	__u64 next;
-	__u32 type;
-	__u32 len;
-	__u8 data[0];
-};
-
 struct setup_header {
 	__u8	setup_sects;
 	__u16	root_flags;
@@ -86,6 +74,7 @@ struct setup_header {
 	__u64	pref_address;
 	__u32	init_size;
 	__u32	handover_offset;
+	__u32	kernel_info_offset;
 } __attribute__((packed));
 
 struct sys_desc_table {
@@ -119,35 +108,10 @@ struct efi_info {
 #define E820_MAX_ENTRIES_ZEROPAGE 128
 
 /*
- * The E820 memory region entry of the boot protocol ABI:
- */
-struct boot_e820_entry {
-	__u64 addr;
-	__u64 size;
-	__u32 type;
-} __attribute__((packed));
-
-/*
  * Smallest compatible version of jailhouse_setup_data required by this kernel.
  */
 #define JAILHOUSE_SETUP_REQUIRED_VERSION	1
 
-/*
- * The boot loader is passing platform information via this Jailhouse-specific
- * setup data structure.
- */
-struct jailhouse_setup_data {
-	__u16	version;
-	__u16	compatible_version;
-	__u16	pm_timer_address;
-	__u16	num_cpus;
-	__u64	pci_mmconfig_base;
-	__u32	tsc_khz;
-	__u32	apic_khz;
-	__u8	standard_ioapic;
-	__u8	cpu_ids[255];
-} __attribute__((packed));
-
 /* The so-called "zeropage" */
 struct boot_params {
 	struct screen_info screen_info;			/* 0x000 */
@@ -164,7 +128,8 @@ struct boot_params {
 	__u32 ext_ramdisk_image;			/* 0x0c0 */
 	__u32 ext_ramdisk_size;				/* 0x0c4 */
 	__u32 ext_cmd_line_ptr;				/* 0x0c8 */
-	__u8  _pad4[116];				/* 0x0cc */
+	__u8  _pad4[112];				/* 0x0cc */
+	__u32 cc_blob_address;				/* 0x13c */
 	struct edid_info edid_info;			/* 0x140 */
 	struct efi_info efi_info;			/* 0x1c0 */
 	__u32 alt_mem_k;				/* 0x1e0 */
@@ -211,7 +176,7 @@ struct boot_params {
  * handling of page tables.
  *
  * These enums should only ever be used by x86 code, and the code that uses
- * it should be well contained and compartamentalized.
+ * it should be well contained and compartmentalized.
  *
  * KVM and Xen HVM do not have a subarch as these are expected to follow
  * standard x86 boot entries. If there is a genuine need for "hypervisor" type
@@ -229,10 +194,10 @@ struct boot_params {
  * @X86_SUBARCH_XEN: Used for Xen guest types which follow the PV boot path,
  * 	which start at asm startup_xen() entry point and later jump to the C
  * 	xen_start_kernel() entry point. Both domU and dom0 type of guests are
- * 	currently supportd through this PV boot path.
+ * 	currently supported through this PV boot path.
  * @X86_SUBARCH_INTEL_MID: Used for Intel MID (Mobile Internet Device) platform
  *	systems which do not have the PCI legacy interfaces.
- * @X86_SUBARCH_CE4100: Used for Intel CE media processor (CE4100) SoC for
+ * @X86_SUBARCH_CE4100: Used for Intel CE media processor (CE4100) SoC
  * 	for settop boxes and media devices, the use of a subarch for CE4100
  * 	is more of a hack...
  */
@@ -245,6 +210,6 @@ enum x86_hardware_subarch {
 	X86_NR_SUBARCHS,
 };
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* _ASM_X86_BOOTPARAM_H */
diff --git a/arch/x86/include/uapi/asm/byteorder.h b/arch/x86/include/uapi/asm/byteorder.h
index 484e3cfd7ef2..149143cab9ff 100644
--- a/arch/x86/include/uapi/asm/byteorder.h
+++ b/arch/x86/include/uapi/asm/byteorder.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _ASM_X86_BYTEORDER_H
 #define _ASM_X86_BYTEORDER_H
 
diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h
index d95d080b30e3..41da492dfb01 100644
--- a/arch/x86/include/uapi/asm/debugreg.h
+++ b/arch/x86/include/uapi/asm/debugreg.h
@@ -15,7 +15,26 @@
    which debugging register was responsible for the trap.  The other bits
    are either reserved or not of interest to us. */
 
-/* Define reserved bits in DR6 which are always set to 1 */
+/*
+ * Define bits in DR6 which are set to 1 by default.
+ *
+ * This is also the DR6 architectural value following Power-up, Reset or INIT.
+ *
+ * Note, with the introduction of Bus Lock Detection (BLD) and Restricted
+ * Transactional Memory (RTM), the DR6 register has been modified:
+ *
+ * 1) BLD flag (bit 11) is no longer reserved to 1 if the CPU supports
+ *    Bus Lock Detection.  The assertion of a bus lock could clear it.
+ *
+ * 2) RTM flag (bit 16) is no longer reserved to 1 if the CPU supports
+ *    restricted transactional memory.  #DB occurred inside an RTM region
+ *    could clear it.
+ *
+ * Apparently, DR6.BLD and DR6.RTM are active low bits.
+ *
+ * As a result, DR6_RESERVED is an incorrect name now, but it is kept for
+ * compatibility.
+ */
 #define DR6_RESERVED	(0xFFFF0FF0)
 
 #define DR_TRAP0	(0x1)		/* db0 */
@@ -24,6 +43,7 @@
 #define DR_TRAP3	(0x8)		/* db3 */
 #define DR_TRAP_BITS	(DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
 
+#define DR_BUS_LOCK	(0x800)		/* bus_lock */
 #define DR_STEP		(0x4000)	/* single-step */
 #define DR_SWITCH	(0x8000)	/* task switch */
 
diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h
index 2f491efe3a12..55bc66867156 100644
--- a/arch/x86/include/uapi/asm/e820.h
+++ b/arch/x86/include/uapi/asm/e820.h
@@ -54,7 +54,7 @@
  */
 #define E820_RESERVED_KERN        128
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #include <linux/types.h>
 struct e820entry {
 	__u64 addr;	/* start of memory segment */
@@ -76,7 +76,7 @@ struct e820map {
 #define BIOS_ROM_BASE		0xffe00000
 #define BIOS_ROM_END		0xffffffff
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 
 #endif /* _UAPI_ASM_X86_E820_H */
diff --git a/arch/x86/include/uapi/asm/elf.h b/arch/x86/include/uapi/asm/elf.h
new file mode 100644
index 000000000000..468e135fa285
--- /dev/null
+++ b/arch/x86/include/uapi/asm/elf.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_X86_ELF_H
+#define _UAPI_ASM_X86_ELF_H
+
+#include <linux/types.h>
+
+struct x86_xfeat_component {
+	__u32 type;
+	__u32 size;
+	__u32 offset;
+	__u32 flags;
+} __packed;
+
+_Static_assert(sizeof(struct x86_xfeat_component) % 4 == 0, "x86_xfeat_component is not aligned");
+
+#endif /* _UAPI_ASM_X86_ELF_H */
diff --git a/arch/x86/include/uapi/asm/errno.h b/arch/x86/include/uapi/asm/errno.h
deleted file mode 100644
index 4c82b503d92f..000000000000
--- a/arch/x86/include/uapi/asm/errno.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/errno.h>
diff --git a/arch/x86/include/uapi/asm/fcntl.h b/arch/x86/include/uapi/asm/fcntl.h
deleted file mode 100644
index 46ab12db5739..000000000000
--- a/arch/x86/include/uapi/asm/fcntl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/fcntl.h>
diff --git a/arch/x86/include/uapi/asm/hwcap2.h b/arch/x86/include/uapi/asm/hwcap2.h
index 6ebaae90e207..054604aba9f0 100644
--- a/arch/x86/include/uapi/asm/hwcap2.h
+++ b/arch/x86/include/uapi/asm/hwcap2.h
@@ -1,8 +1,13 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _ASM_X86_HWCAP2_H
 #define _ASM_X86_HWCAP2_H
 
+#include <linux/const.h>
+
 /* MONITOR/MWAIT enabled in Ring 3 */
-#define HWCAP2_RING3MWAIT		(1 << 0)
+#define HWCAP2_RING3MWAIT		_BITUL(0)
+
+/* Kernel allows FSGSBASE instructions available in Ring 3 */
+#define HWCAP2_FSGSBASE			_BITUL(1)
 
 #endif
diff --git a/arch/x86/include/uapi/asm/ioctl.h b/arch/x86/include/uapi/asm/ioctl.h
deleted file mode 100644
index b279fe06dfe5..000000000000
--- a/arch/x86/include/uapi/asm/ioctl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctl.h>
diff --git a/arch/x86/include/uapi/asm/ioctls.h b/arch/x86/include/uapi/asm/ioctls.h
deleted file mode 100644
index ec34c760665e..000000000000
--- a/arch/x86/include/uapi/asm/ioctls.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctls.h>
diff --git a/arch/x86/include/uapi/asm/ipcbuf.h b/arch/x86/include/uapi/asm/ipcbuf.h
deleted file mode 100644
index 84c7e51cb6d0..000000000000
--- a/arch/x86/include/uapi/asm/ipcbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipcbuf.h>
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index dabfcf7c3941..7ceff6583652 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -7,11 +7,15 @@
  *
  */
 
+#include <linux/const.h>
+#include <linux/bits.h>
 #include <linux/types.h>
 #include <linux/ioctl.h>
+#include <linux/stddef.h>
 
 #define KVM_PIO_PAGE_OFFSET 1
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
+#define KVM_DIRTY_LOG_PAGE_OFFSET 64
 
 #define DE_VECTOR 0
 #define DB_VECTOR 1
@@ -31,6 +35,11 @@
 #define MC_VECTOR 18
 #define XM_VECTOR 19
 #define VE_VECTOR 20
+#define CP_VECTOR 21
+
+#define HV_VECTOR 28
+#define VC_VECTOR 29
+#define SX_VECTOR 30
 
 /* Select x86 specific features in <linux/kvm.h> */
 #define __KVM_HAVE_PIT
@@ -38,7 +47,6 @@
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_MSI
 #define __KVM_HAVE_USER_NMI
-#define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_MSIX
 #define __KVM_HAVE_MCE
 #define __KVM_HAVE_PIT_STATE2
@@ -47,19 +55,10 @@
 #define __KVM_HAVE_DEBUGREGS
 #define __KVM_HAVE_XSAVE
 #define __KVM_HAVE_XCRS
-#define __KVM_HAVE_READONLY_MEM
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
 
-struct kvm_memory_alias {
-	__u32 slot;  /* this has a different namespace than memory slots */
-	__u32 flags;
-	__u64 guest_phys_addr;
-	__u64 memory_size;
-	__u64 target_phys_addr;
-};
-
 /* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
 struct kvm_pic_state {
 	__u8 last_irr;	/* edge detection */
@@ -111,6 +110,8 @@ struct kvm_ioapic_state {
 #define KVM_NR_IRQCHIPS          3
 
 #define KVM_RUN_X86_SMM		 (1 << 0)
+#define KVM_RUN_X86_BUS_LOCK     (1 << 1)
+#define KVM_RUN_X86_GUEST_MODE   (1 << 2)
 
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
@@ -157,6 +158,19 @@ struct kvm_sregs {
 	__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
 };
 
+struct kvm_sregs2 {
+	/* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */
+	struct kvm_segment cs, ds, es, fs, gs, ss;
+	struct kvm_segment tr, ldt;
+	struct kvm_dtable gdt, idt;
+	__u64 cr0, cr2, cr3, cr4, cr8;
+	__u64 efer;
+	__u64 apic_base;
+	__u64 flags;
+	__u64 pdptrs[4];
+};
+#define KVM_SREGS2_FLAGS_PDPTRS_VALID 1
+
 /* for KVM_GET_FPU and KVM_SET_FPU */
 struct kvm_fpu {
 	__u8  fpr[8][16];
@@ -183,15 +197,40 @@ struct kvm_msrs {
 	__u32 nmsrs; /* number of msrs in entries */
 	__u32 pad;
 
-	struct kvm_msr_entry entries[0];
+	struct kvm_msr_entry entries[];
 };
 
 /* for KVM_GET_MSR_INDEX_LIST */
 struct kvm_msr_list {
 	__u32 nmsrs; /* number of msrs in entries */
-	__u32 indices[0];
+	__u32 indices[];
+};
+
+/* Maximum size of any access bitmap in bytes */
+#define KVM_MSR_FILTER_MAX_BITMAP_SIZE 0x600
+
+/* for KVM_X86_SET_MSR_FILTER */
+struct kvm_msr_filter_range {
+#define KVM_MSR_FILTER_READ  (1 << 0)
+#define KVM_MSR_FILTER_WRITE (1 << 1)
+#define KVM_MSR_FILTER_RANGE_VALID_MASK (KVM_MSR_FILTER_READ | \
+					 KVM_MSR_FILTER_WRITE)
+	__u32 flags;
+	__u32 nmsrs; /* number of msrs in bitmap */
+	__u32 base;  /* MSR index the bitmap starts at */
+	__u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */
 };
 
+#define KVM_MSR_FILTER_MAX_RANGES 16
+struct kvm_msr_filter {
+#ifndef __KERNEL__
+#define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
+#endif
+#define KVM_MSR_FILTER_DEFAULT_DENY  (1 << 0)
+#define KVM_MSR_FILTER_VALID_MASK (KVM_MSR_FILTER_DEFAULT_DENY)
+	__u32 flags;
+	struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
+};
 
 struct kvm_cpuid_entry {
 	__u32 function;
@@ -206,7 +245,7 @@ struct kvm_cpuid_entry {
 struct kvm_cpuid {
 	__u32 nent;
 	__u32 padding;
-	struct kvm_cpuid_entry entries[0];
+	struct kvm_cpuid_entry entries[];
 };
 
 struct kvm_cpuid_entry2 {
@@ -228,7 +267,7 @@ struct kvm_cpuid_entry2 {
 struct kvm_cpuid2 {
 	__u32 nent;
 	__u32 padding;
-	struct kvm_cpuid_entry2 entries[0];
+	struct kvm_cpuid_entry2 entries[];
 };
 
 /* for KVM_GET_PIT and KVM_SET_PIT */
@@ -260,6 +299,7 @@ struct kvm_debug_exit_arch {
 #define KVM_GUESTDBG_USE_HW_BP		0x00020000
 #define KVM_GUESTDBG_INJECT_DB		0x00040000
 #define KVM_GUESTDBG_INJECT_BP		0x00080000
+#define KVM_GUESTDBG_BLOCKIRQ		0x00100000
 
 /* for KVM_SET_GUEST_DEBUG */
 struct kvm_guest_debug_arch {
@@ -270,7 +310,8 @@ struct kvm_pit_state {
 	struct kvm_pit_channel_state channels[3];
 };
 
-#define KVM_PIT_FLAGS_HPET_LEGACY  0x00000001
+#define KVM_PIT_FLAGS_HPET_LEGACY     0x00000001
+#define KVM_PIT_FLAGS_SPEAKER_DATA_ON 0x00000002
 
 struct kvm_pit_state2 {
 	struct kvm_pit_channel_state channels[3];
@@ -289,6 +330,7 @@ struct kvm_reinject_control {
 #define KVM_VCPUEVENT_VALID_SHADOW	0x00000004
 #define KVM_VCPUEVENT_VALID_SMM		0x00000008
 #define KVM_VCPUEVENT_VALID_PAYLOAD	0x00000010
+#define KVM_VCPUEVENT_VALID_TRIPLE_FAULT	0x00000020
 
 /* Interrupt shadow states */
 #define KVM_X86_SHADOW_INT_MOV_SS	0x01
@@ -323,7 +365,10 @@ struct kvm_vcpu_events {
 		__u8 smm_inside_nmi;
 		__u8 latched_init;
 	} smi;
-	__u8 reserved[27];
+	struct {
+		__u8 pending;
+	} triple_fault;
+	__u8 reserved[26];
 	__u8 exception_has_payload;
 	__u64 exception_payload;
 };
@@ -337,9 +382,23 @@ struct kvm_debugregs {
 	__u64 reserved[9];
 };
 
-/* for KVM_CAP_XSAVE */
+/* for KVM_CAP_XSAVE and KVM_CAP_XSAVE2 */
 struct kvm_xsave {
+	/*
+	 * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes
+	 * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+	 * respectively, when invoked on the vm file descriptor.
+	 *
+	 * The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+	 * will always be at least 4096. Currently, it is only greater
+	 * than 4096 if a dynamic feature has been enabled with
+	 * ``arch_prctl()``, but this may change in the future.
+	 *
+	 * The offsets of the state save areas in struct kvm_xsave follow
+	 * the contents of CPUID leaf 0xD on the host.
+	 */
 	__u32 region[1024];
+	__u32 extra[];
 };
 
 #define KVM_MAX_XCRS	16
@@ -357,6 +416,35 @@ struct kvm_xcrs {
 	__u64 padding[16];
 };
 
+#define KVM_X86_REG_TYPE_MSR		2
+#define KVM_X86_REG_TYPE_KVM		3
+
+#define KVM_X86_KVM_REG_SIZE(reg)						\
+({										\
+	reg == KVM_REG_GUEST_SSP ? KVM_REG_SIZE_U64 : 0;			\
+})
+
+#define KVM_X86_REG_TYPE_SIZE(type, reg)					\
+({										\
+	__u64 type_size = (__u64)type << 32;					\
+										\
+	type_size |= type == KVM_X86_REG_TYPE_MSR ? KVM_REG_SIZE_U64 :		\
+		     type == KVM_X86_REG_TYPE_KVM ? KVM_X86_KVM_REG_SIZE(reg) :	\
+		     0;								\
+	type_size;								\
+})
+
+#define KVM_X86_REG_ID(type, index)				\
+	(KVM_REG_X86 | KVM_X86_REG_TYPE_SIZE(type, index) | index)
+
+#define KVM_X86_REG_MSR(index)					\
+	KVM_X86_REG_ID(KVM_X86_REG_TYPE_MSR, index)
+#define KVM_X86_REG_KVM(index)					\
+	KVM_X86_REG_ID(KVM_X86_REG_TYPE_KVM, index)
+
+/* KVM-defined registers starting from 0 */
+#define KVM_REG_GUEST_SSP	0
+
 #define KVM_SYNC_X86_REGS      (1UL << 0)
 #define KVM_SYNC_X86_SREGS     (1UL << 1)
 #define KVM_SYNC_X86_EVENTS    (1UL << 2)
@@ -378,46 +466,581 @@ struct kvm_sync_regs {
 	struct kvm_vcpu_events events;
 };
 
-#define KVM_X86_QUIRK_LINT0_REENABLED	(1 << 0)
-#define KVM_X86_QUIRK_CD_NW_CLEARED	(1 << 1)
-#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE	(1 << 2)
+#define KVM_X86_QUIRK_LINT0_REENABLED		(1 << 0)
+#define KVM_X86_QUIRK_CD_NW_CLEARED		(1 << 1)
+#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE		(1 << 2)
+#define KVM_X86_QUIRK_OUT_7E_INC_RIP		(1 << 3)
+#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT	(1 << 4)
+#define KVM_X86_QUIRK_FIX_HYPERCALL_INSN	(1 << 5)
+#define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS	(1 << 6)
+#define KVM_X86_QUIRK_SLOT_ZAP_ALL		(1 << 7)
+#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS	(1 << 8)
+#define KVM_X86_QUIRK_IGNORE_GUEST_PAT		(1 << 9)
+
+#define KVM_STATE_NESTED_FORMAT_VMX	0
+#define KVM_STATE_NESTED_FORMAT_SVM	1
 
 #define KVM_STATE_NESTED_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
 #define KVM_STATE_NESTED_EVMCS		0x00000004
+#define KVM_STATE_NESTED_MTF_PENDING	0x00000008
+#define KVM_STATE_NESTED_GIF_SET	0x00000100
 
 #define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_SMM_VMXON	0x00000002
 
-struct kvm_vmx_nested_state {
+#define KVM_STATE_NESTED_VMX_VMCS_SIZE	0x1000
+
+#define KVM_STATE_NESTED_SVM_VMCB_SIZE	0x1000
+
+#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE	0x00000001
+
+/* vendor-independent attributes for system fd (group 0) */
+#define KVM_X86_GRP_SYSTEM		0
+#  define KVM_X86_XCOMP_GUEST_SUPP	0
+
+/* vendor-specific groups and attributes for system fd */
+#define KVM_X86_GRP_SEV			1
+#  define KVM_X86_SEV_VMSA_FEATURES	0
+#  define KVM_X86_SNP_POLICY_BITS	1
+
+struct kvm_vmx_nested_state_data {
+	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+};
+
+struct kvm_vmx_nested_state_hdr {
 	__u64 vmxon_pa;
-	__u64 vmcs_pa;
+	__u64 vmcs12_pa;
 
 	struct {
 		__u16 flags;
 	} smm;
+
+	__u16 pad;
+
+	__u32 flags;
+	__u64 preemption_timer_deadline;
+};
+
+struct kvm_svm_nested_state_data {
+	/* Save area only used if KVM_STATE_NESTED_RUN_PENDING.  */
+	__u8 vmcb12[KVM_STATE_NESTED_SVM_VMCB_SIZE];
+};
+
+struct kvm_svm_nested_state_hdr {
+	__u64 vmcb_pa;
 };
 
 /* for KVM_CAP_NESTED_STATE */
 struct kvm_nested_state {
-	/* KVM_STATE_* flags */
 	__u16 flags;
-
-	/* 0 for VMX, 1 for SVM.  */
 	__u16 format;
-
-	/* 128 for SVM, 128 + VMCS size for VMX.  */
 	__u32 size;
 
 	union {
-		/* VMXON, VMCS */
-		struct kvm_vmx_nested_state vmx;
+		struct kvm_vmx_nested_state_hdr vmx;
+		struct kvm_svm_nested_state_hdr svm;
 
 		/* Pad the header to 128 bytes.  */
 		__u8 pad[120];
-	};
+	} hdr;
+
+	/*
+	 * Define data region as 0 bytes to preserve backwards-compatability
+	 * to old definition of kvm_nested_state in order to avoid changing
+	 * KVM_{GET,PUT}_NESTED_STATE ioctl values.
+	 */
+	union {
+		__DECLARE_FLEX_ARRAY(struct kvm_vmx_nested_state_data, vmx);
+		__DECLARE_FLEX_ARRAY(struct kvm_svm_nested_state_data, svm);
+	} data;
+};
+
+/* for KVM_CAP_PMU_EVENT_FILTER */
+struct kvm_pmu_event_filter {
+	__u32 action;
+	__u32 nevents;
+	__u32 fixed_counter_bitmap;
+	__u32 flags;
+	__u32 pad[4];
+	__u64 events[];
+};
+
+#define KVM_PMU_EVENT_ALLOW 0
+#define KVM_PMU_EVENT_DENY 1
+
+#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS _BITUL(0)
+#define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS)
+
+/* for KVM_CAP_MCE */
+struct kvm_x86_mce {
+	__u64 status;
+	__u64 addr;
+	__u64 misc;
+	__u64 mcg_status;
+	__u8 bank;
+	__u8 pad1[7];
+	__u64 pad2[3];
+};
+
+/* for KVM_CAP_XEN_HVM */
+#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR	(1 << 0)
+#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL	(1 << 1)
+#define KVM_XEN_HVM_CONFIG_SHARED_INFO		(1 << 2)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE		(1 << 3)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL	(1 << 4)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND		(1 << 5)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG	(1 << 6)
+#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE	(1 << 7)
+#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA	(1 << 8)
+
+#define KVM_XEN_MSR_MIN_INDEX			0x40000000u
+#define KVM_XEN_MSR_MAX_INDEX			0x4fffffffu
+
+struct kvm_xen_hvm_config {
+	__u32 flags;
+	__u32 msr;
+	__u64 blob_addr_32;
+	__u64 blob_addr_64;
+	__u8 blob_size_32;
+	__u8 blob_size_64;
+	__u8 pad2[30];
+};
+
+struct kvm_xen_hvm_attr {
+	__u16 type;
+	__u16 pad[3];
+	union {
+		__u8 long_mode;
+		__u8 vector;
+		__u8 runstate_update_flag;
+		union {
+			__u64 gfn;
+#define KVM_XEN_INVALID_GFN ((__u64)-1)
+			__u64 hva;
+		} shared_info;
+		struct {
+			__u32 send_port;
+			__u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */
+			__u32 flags;
+#define KVM_XEN_EVTCHN_DEASSIGN		(1 << 0)
+#define KVM_XEN_EVTCHN_UPDATE		(1 << 1)
+#define KVM_XEN_EVTCHN_RESET		(1 << 2)
+			/*
+			 * Events sent by the guest are either looped back to
+			 * the guest itself (potentially on a different port#)
+			 * or signalled via an eventfd.
+			 */
+			union {
+				struct {
+					__u32 port;
+					__u32 vcpu;
+					__u32 priority;
+				} port;
+				struct {
+					__u32 port; /* Zero for eventfd */
+					__s32 fd;
+				} eventfd;
+				__u32 padding[4];
+			} deliver;
+		} evtchn;
+		__u32 xen_version;
+		__u64 pad[8];
+	} u;
+};
+
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_ATTR_TYPE_LONG_MODE		0x0
+#define KVM_XEN_ATTR_TYPE_SHARED_INFO		0x1
+#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR		0x2
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_ATTR_TYPE_EVTCHN		0x3
+#define KVM_XEN_ATTR_TYPE_XEN_VERSION		0x4
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
+#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG	0x5
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */
+#define KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA	0x6
+
+struct kvm_xen_vcpu_attr {
+	__u16 type;
+	__u16 pad[3];
+	union {
+		__u64 gpa;
+#define KVM_XEN_INVALID_GPA ((__u64)-1)
+		__u64 hva;
+		__u64 pad[8];
+		struct {
+			__u64 state;
+			__u64 state_entry_time;
+			__u64 time_running;
+			__u64 time_runnable;
+			__u64 time_blocked;
+			__u64 time_offline;
+		} runstate;
+		__u32 vcpu_id;
+		struct {
+			__u32 port;
+			__u32 priority;
+			__u64 expires_ns;
+		} timer;
+		__u8 vector;
+	} u;
+};
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO	0x0
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO	0x1
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR	0x2
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT	0x3
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA	0x4
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST	0x5
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID		0x6
+#define KVM_XEN_VCPU_ATTR_TYPE_TIMER		0x7
+#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR	0x8
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA	0x9
+
+/* Secure Encrypted Virtualization command */
+enum sev_cmd_id {
+	/* Guest initialization commands */
+	KVM_SEV_INIT = 0,
+	KVM_SEV_ES_INIT,
+	/* Guest launch commands */
+	KVM_SEV_LAUNCH_START,
+	KVM_SEV_LAUNCH_UPDATE_DATA,
+	KVM_SEV_LAUNCH_UPDATE_VMSA,
+	KVM_SEV_LAUNCH_SECRET,
+	KVM_SEV_LAUNCH_MEASURE,
+	KVM_SEV_LAUNCH_FINISH,
+	/* Guest migration commands (outgoing) */
+	KVM_SEV_SEND_START,
+	KVM_SEV_SEND_UPDATE_DATA,
+	KVM_SEV_SEND_UPDATE_VMSA,
+	KVM_SEV_SEND_FINISH,
+	/* Guest migration commands (incoming) */
+	KVM_SEV_RECEIVE_START,
+	KVM_SEV_RECEIVE_UPDATE_DATA,
+	KVM_SEV_RECEIVE_UPDATE_VMSA,
+	KVM_SEV_RECEIVE_FINISH,
+	/* Guest status and debug commands */
+	KVM_SEV_GUEST_STATUS,
+	KVM_SEV_DBG_DECRYPT,
+	KVM_SEV_DBG_ENCRYPT,
+	/* Guest certificates commands */
+	KVM_SEV_CERT_EXPORT,
+	/* Attestation report */
+	KVM_SEV_GET_ATTESTATION_REPORT,
+	/* Guest Migration Extension */
+	KVM_SEV_SEND_CANCEL,
+
+	/* Second time is the charm; improved versions of the above ioctls.  */
+	KVM_SEV_INIT2,
+
+	/* SNP-specific commands */
+	KVM_SEV_SNP_LAUNCH_START = 100,
+	KVM_SEV_SNP_LAUNCH_UPDATE,
+	KVM_SEV_SNP_LAUNCH_FINISH,
+
+	KVM_SEV_NR_MAX,
+};
+
+struct kvm_sev_cmd {
+	__u32 id;
+	__u32 pad0;
+	__u64 data;
+	__u32 error;
+	__u32 sev_fd;
+};
+
+struct kvm_sev_init {
+	__u64 vmsa_features;
+	__u32 flags;
+	__u16 ghcb_version;
+	__u16 pad1;
+	__u32 pad2[8];
+};
+
+struct kvm_sev_launch_start {
+	__u32 handle;
+	__u32 policy;
+	__u64 dh_uaddr;
+	__u32 dh_len;
+	__u32 pad0;
+	__u64 session_uaddr;
+	__u32 session_len;
+	__u32 pad1;
+};
+
+struct kvm_sev_launch_update_data {
+	__u64 uaddr;
+	__u32 len;
+	__u32 pad0;
+};
+
+
+struct kvm_sev_launch_secret {
+	__u64 hdr_uaddr;
+	__u32 hdr_len;
+	__u32 pad0;
+	__u64 guest_uaddr;
+	__u32 guest_len;
+	__u32 pad1;
+	__u64 trans_uaddr;
+	__u32 trans_len;
+	__u32 pad2;
+};
+
+struct kvm_sev_launch_measure {
+	__u64 uaddr;
+	__u32 len;
+	__u32 pad0;
+};
+
+struct kvm_sev_guest_status {
+	__u32 handle;
+	__u32 policy;
+	__u32 state;
+};
+
+struct kvm_sev_dbg {
+	__u64 src_uaddr;
+	__u64 dst_uaddr;
+	__u32 len;
+	__u32 pad0;
+};
+
+struct kvm_sev_attestation_report {
+	__u8 mnonce[16];
+	__u64 uaddr;
+	__u32 len;
+	__u32 pad0;
+};
+
+struct kvm_sev_send_start {
+	__u32 policy;
+	__u32 pad0;
+	__u64 pdh_cert_uaddr;
+	__u32 pdh_cert_len;
+	__u32 pad1;
+	__u64 plat_certs_uaddr;
+	__u32 plat_certs_len;
+	__u32 pad2;
+	__u64 amd_certs_uaddr;
+	__u32 amd_certs_len;
+	__u32 pad3;
+	__u64 session_uaddr;
+	__u32 session_len;
+	__u32 pad4;
+};
+
+struct kvm_sev_send_update_data {
+	__u64 hdr_uaddr;
+	__u32 hdr_len;
+	__u32 pad0;
+	__u64 guest_uaddr;
+	__u32 guest_len;
+	__u32 pad1;
+	__u64 trans_uaddr;
+	__u32 trans_len;
+	__u32 pad2;
+};
+
+struct kvm_sev_receive_start {
+	__u32 handle;
+	__u32 policy;
+	__u64 pdh_uaddr;
+	__u32 pdh_len;
+	__u32 pad0;
+	__u64 session_uaddr;
+	__u32 session_len;
+	__u32 pad1;
+};
+
+struct kvm_sev_receive_update_data {
+	__u64 hdr_uaddr;
+	__u32 hdr_len;
+	__u32 pad0;
+	__u64 guest_uaddr;
+	__u32 guest_len;
+	__u32 pad1;
+	__u64 trans_uaddr;
+	__u32 trans_len;
+	__u32 pad2;
+};
+
+struct kvm_sev_snp_launch_start {
+	__u64 policy;
+	__u8 gosvw[16];
+	__u16 flags;
+	__u8 pad0[6];
+	__u64 pad1[4];
+};
+
+/* Kept in sync with firmware values for simplicity. */
+#define KVM_SEV_PAGE_TYPE_INVALID		0x0
+#define KVM_SEV_SNP_PAGE_TYPE_NORMAL		0x1
+#define KVM_SEV_SNP_PAGE_TYPE_ZERO		0x3
+#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED	0x4
+#define KVM_SEV_SNP_PAGE_TYPE_SECRETS		0x5
+#define KVM_SEV_SNP_PAGE_TYPE_CPUID		0x6
+
+struct kvm_sev_snp_launch_update {
+	__u64 gfn_start;
+	__u64 uaddr;
+	__u64 len;
+	__u8 type;
+	__u8 pad0;
+	__u16 flags;
+	__u32 pad1;
+	__u64 pad2[4];
+};
+
+#define KVM_SEV_SNP_ID_BLOCK_SIZE	96
+#define KVM_SEV_SNP_ID_AUTH_SIZE	4096
+#define KVM_SEV_SNP_FINISH_DATA_SIZE	32
+
+struct kvm_sev_snp_launch_finish {
+	__u64 id_block_uaddr;
+	__u64 id_auth_uaddr;
+	__u8 id_block_en;
+	__u8 auth_key_en;
+	__u8 vcek_disabled;
+	__u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE];
+	__u8 pad0[3];
+	__u16 flags;
+	__u64 pad1[4];
+};
+
+#define KVM_X2APIC_API_USE_32BIT_IDS            (1ULL << 0)
+#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK  (1ULL << 1)
+
+struct kvm_hyperv_eventfd {
+	__u32 conn_id;
+	__s32 fd;
+	__u32 flags;
+	__u32 padding[3];
+};
+
+#define KVM_HYPERV_CONN_ID_MASK		0x00ffffff
+#define KVM_HYPERV_EVENTFD_DEASSIGN	(1 << 0)
+
+/*
+ * Masked event layout.
+ * Bits   Description
+ * ----   -----------
+ * 7:0    event select (low bits)
+ * 15:8   umask match
+ * 31:16  unused
+ * 35:32  event select (high bits)
+ * 36:54  unused
+ * 55     exclude bit
+ * 63:56  umask mask
+ */
+
+#define KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, exclude) \
+	(((event_select) & 0xFFULL) | (((event_select) & 0XF00ULL) << 24) | \
+	(((mask) & 0xFFULL) << 56) | \
+	(((match) & 0xFFULL) << 8) | \
+	((__u64)(!!(exclude)) << 55))
+
+#define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \
+	(__GENMASK_ULL(7, 0) | __GENMASK_ULL(35, 32))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MASK		(__GENMASK_ULL(63, 56))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH	(__GENMASK_ULL(15, 8))
+#define KVM_PMU_MASKED_ENTRY_EXCLUDE		(_BITULL(55))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT	(56)
+
+/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */
+#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */
+#define   KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
+
+/* x86-specific KVM_EXIT_HYPERCALL flags. */
+#define KVM_EXIT_HYPERCALL_LONG_MODE	_BITULL(0)
+
+#define KVM_X86_DEFAULT_VM	0
+#define KVM_X86_SW_PROTECTED_VM	1
+#define KVM_X86_SEV_VM		2
+#define KVM_X86_SEV_ES_VM	3
+#define KVM_X86_SNP_VM		4
+#define KVM_X86_TDX_VM		5
+
+/* Trust Domain eXtension sub-ioctl() commands. */
+enum kvm_tdx_cmd_id {
+	KVM_TDX_CAPABILITIES = 0,
+	KVM_TDX_INIT_VM,
+	KVM_TDX_INIT_VCPU,
+	KVM_TDX_INIT_MEM_REGION,
+	KVM_TDX_FINALIZE_VM,
+	KVM_TDX_GET_CPUID,
+
+	KVM_TDX_CMD_NR_MAX,
+};
+
+struct kvm_tdx_cmd {
+	/* enum kvm_tdx_cmd_id */
+	__u32 id;
+	/* flags for sub-commend. If sub-command doesn't use this, set zero. */
+	__u32 flags;
+	/*
+	 * data for each sub-command. An immediate or a pointer to the actual
+	 * data in process virtual address.  If sub-command doesn't use it,
+	 * set zero.
+	 */
+	__u64 data;
+	/*
+	 * Auxiliary error code.  The sub-command may return TDX SEAMCALL
+	 * status code in addition to -Exxx.
+	 */
+	__u64 hw_error;
+};
+
+struct kvm_tdx_capabilities {
+	__u64 supported_attrs;
+	__u64 supported_xfam;
+
+	__u64 kernel_tdvmcallinfo_1_r11;
+	__u64 user_tdvmcallinfo_1_r11;
+	__u64 kernel_tdvmcallinfo_1_r12;
+	__u64 user_tdvmcallinfo_1_r12;
+
+	__u64 reserved[250];
+
+	/* Configurable CPUID bits for userspace */
+	struct kvm_cpuid2 cpuid;
+};
+
+struct kvm_tdx_init_vm {
+	__u64 attributes;
+	__u64 xfam;
+	__u64 mrconfigid[6];	/* sha384 digest */
+	__u64 mrowner[6];	/* sha384 digest */
+	__u64 mrownerconfig[6];	/* sha384 digest */
+
+	/* The total space for TD_PARAMS before the CPUIDs is 256 bytes */
+	__u64 reserved[12];
+
+	/*
+	 * Call KVM_TDX_INIT_VM before vcpu creation, thus before
+	 * KVM_SET_CPUID2.
+	 * This configuration supersedes KVM_SET_CPUID2s for VCPUs because the
+	 * TDX module directly virtualizes those CPUIDs without VMM.  The user
+	 * space VMM, e.g. qemu, should make KVM_SET_CPUID2 consistent with
+	 * those values.  If it doesn't, KVM may have wrong idea of vCPUIDs of
+	 * the guest, and KVM may wrongly emulate CPUIDs or MSRs that the TDX
+	 * module doesn't virtualize.
+	 */
+	struct kvm_cpuid2 cpuid;
+};
+
+#define KVM_TDX_MEASURE_MEMORY_REGION   _BITULL(0)
 
-	__u8 data[0];
+struct kvm_tdx_init_mem_region {
+	__u64 source_addr;
+	__u64 gpa;
+	__u64 nr_pages;
 };
 
 #endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 19980ec1a316..a1efa7907a0b 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -8,6 +8,7 @@
  * should be used to determine that a VM is running under KVM.
  */
 #define KVM_CPUID_SIGNATURE	0x40000000
+#define KVM_SIGNATURE "KVMKVMKVM\0\0\0"
 
 /* This CPUID returns two feature bitmaps in eax, edx. Before enabling
  * a particular paravirtualization, the appropriate feature bit should
@@ -29,6 +30,12 @@
 #define KVM_FEATURE_PV_TLB_FLUSH	9
 #define KVM_FEATURE_ASYNC_PF_VMEXIT	10
 #define KVM_FEATURE_PV_SEND_IPI	11
+#define KVM_FEATURE_POLL_CONTROL	12
+#define KVM_FEATURE_PV_SCHED_YIELD	13
+#define KVM_FEATURE_ASYNC_PF_INT	14
+#define KVM_FEATURE_MSI_EXT_DEST_ID	15
+#define KVM_FEATURE_HC_MAP_GPA_RANGE	16
+#define KVM_FEATURE_MIGRATION_CONTROL	17
 
 #define KVM_HINTS_REALTIME      0
 
@@ -47,6 +54,10 @@
 #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
 #define MSR_KVM_STEAL_TIME  0x4b564d03
 #define MSR_KVM_PV_EOI_EN      0x4b564d04
+#define MSR_KVM_POLL_CONTROL	0x4b564d05
+#define MSR_KVM_ASYNC_PF_INT	0x4b564d06
+#define MSR_KVM_ASYNC_PF_ACK	0x4b564d07
+#define MSR_KVM_MIGRATION_CONTROL	0x4b564d08
 
 struct kvm_steal_time {
 	__u64 steal;
@@ -78,6 +89,21 @@ struct kvm_clock_pairing {
 #define KVM_ASYNC_PF_ENABLED			(1 << 0)
 #define KVM_ASYNC_PF_SEND_ALWAYS		(1 << 1)
 #define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT	(1 << 2)
+#define KVM_ASYNC_PF_DELIVERY_AS_INT		(1 << 3)
+
+/* MSR_KVM_ASYNC_PF_INT */
+#define KVM_ASYNC_PF_VEC_MASK			__GENMASK(7, 0)
+
+/* MSR_KVM_MIGRATION_CONTROL */
+#define KVM_MIGRATION_READY		(1 << 0)
+
+/* KVM_HC_MAP_GPA_RANGE */
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_4K	0
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_2M	(1 << 0)
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_1G	(1 << 1)
+#define KVM_MAP_GPA_RANGE_ENC_STAT(n)	(n << 4)
+#define KVM_MAP_GPA_RANGE_ENCRYPTED	KVM_MAP_GPA_RANGE_ENC_STAT(1)
+#define KVM_MAP_GPA_RANGE_DECRYPTED	KVM_MAP_GPA_RANGE_ENC_STAT(0)
 
 /* Operations for KVM_HC_MMU_OP */
 #define KVM_MMU_OP_WRITE_PTE            1
@@ -109,9 +135,13 @@ struct kvm_mmu_op_release_pt {
 #define KVM_PV_REASON_PAGE_READY 2
 
 struct kvm_vcpu_pv_apf_data {
-	__u32 reason;
-	__u8 pad[60];
-	__u32 enabled;
+	/* Used for 'page not present' events delivered via #PF */
+	__u32 flags;
+
+	/* Used for 'page ready' events delivered via interrupt notification */
+	__u32 token;
+
+	__u8 pad[56];
 };
 
 #define KVM_PV_EOI_BIT 0
diff --git a/arch/x86/include/uapi/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h
index d62ac5db093b..a82c039d8e6a 100644
--- a/arch/x86/include/uapi/asm/ldt.h
+++ b/arch/x86/include/uapi/asm/ldt.h
@@ -12,7 +12,7 @@
 /* The size of each LDT entry. */
 #define LDT_ENTRY_SIZE	8
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 /*
  * Note on 64bit base and limit is ignored and you cannot set DS/ES/CS
  * not to the default values if you still want to do syscalls. This
@@ -44,5 +44,5 @@ struct user_desc {
 #define MODIFY_LDT_CONTENTS_STACK	1
 #define MODIFY_LDT_CONTENTS_CODE	2
 
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
 #endif /* _ASM_X86_LDT_H */
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 955c2a2e1cf9..cb6b48a7c22b 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -8,7 +8,8 @@
 /*
  * Fields are zero when not available. Also, this struct is shared with
  * userspace mcelog and thus must keep existing fields at current offsets.
- * Only add new fields to the end of the structure
+ * Only add new, shared fields to the end of the structure.
+ * Do not add vendor-specific fields.
  */
 struct mce {
 	__u64 status;		/* Bank's MCi_STATUS MSR */
@@ -35,6 +36,7 @@ struct mce {
 	__u64 ipid;		/* MCA_IPID MSR: only valid on SMCA systems */
 	__u64 ppin;		/* Protected Processor Inventory Number */
 	__u32 microcode;	/* Microcode revision */
+	__u64 kflags;		/* Internal kernel use */
 };
 
 #define MCE_GET_RECORD_LEN   _IOR('M', 1, int)
diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h
index d4a8d0424bfb..ac1e6277212b 100644
--- a/arch/x86/include/uapi/asm/mman.h
+++ b/arch/x86/include/uapi/asm/mman.h
@@ -3,28 +3,7 @@
 #define _ASM_X86_MMAN_H
 
 #define MAP_32BIT	0x40		/* only give out 32bit addresses */
-
-#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-/*
- * Take the 4 protection key bits out of the vma->vm_flags
- * value and turn them in to the bits that we can put in
- * to a pte.
- *
- * Only override these if Protection Keys are available
- * (which is only on 64-bit).
- */
-#define arch_vm_get_page_prot(vm_flags)	__pgprot(	\
-		((vm_flags) & VM_PKEY_BIT0 ? _PAGE_PKEY_BIT0 : 0) |	\
-		((vm_flags) & VM_PKEY_BIT1 ? _PAGE_PKEY_BIT1 : 0) |	\
-		((vm_flags) & VM_PKEY_BIT2 ? _PAGE_PKEY_BIT2 : 0) |	\
-		((vm_flags) & VM_PKEY_BIT3 ? _PAGE_PKEY_BIT3 : 0))
-
-#define arch_calc_vm_prot_bits(prot, key) (		\
-		((key) & 0x1 ? VM_PKEY_BIT0 : 0) |      \
-		((key) & 0x2 ? VM_PKEY_BIT1 : 0) |      \
-		((key) & 0x4 ? VM_PKEY_BIT2 : 0) |      \
-		((key) & 0x8 ? VM_PKEY_BIT3 : 0))
-#endif
+#define MAP_ABOVE4G	0x80		/* only map above 4GB */
 
 #include <asm-generic/mman.h>
 
diff --git a/arch/x86/include/uapi/asm/msgbuf.h b/arch/x86/include/uapi/asm/msgbuf.h
index 90ab9a795b49..ac83e25bbf37 100644
--- a/arch/x86/include/uapi/asm/msgbuf.h
+++ b/arch/x86/include/uapi/asm/msgbuf.h
@@ -5,19 +5,22 @@
 #if !defined(__x86_64__) || !defined(__ILP32__)
 #include <asm-generic/msgbuf.h>
 #else
+
+#include <asm/ipcbuf.h>
+
 /*
  * The msqid64_ds structure for x86 architecture with x32 ABI.
  *
  * On x86-32 and x86-64 we can just use the generic definition, but
- * x32 uses the same binary layout as x86_64, which is differnet
+ * x32 uses the same binary layout as x86_64, which is different
  * from other 32-bit architectures.
  */
 
 struct msqid64_ds {
 	struct ipc64_perm msg_perm;
-	__kernel_time_t msg_stime;	/* last msgsnd time */
-	__kernel_time_t msg_rtime;	/* last msgrcv time */
-	__kernel_time_t msg_ctime;	/* last change time */
+	__kernel_long_t msg_stime;	/* last msgsnd time */
+	__kernel_long_t msg_rtime;	/* last msgrcv time */
+	__kernel_long_t msg_ctime;	/* last change time */
 	__kernel_ulong_t msg_cbytes;	/* current number of bytes on queue */
 	__kernel_ulong_t msg_qnum;	/* number of messages in queue */
 	__kernel_ulong_t msg_qbytes;	/* max number of bytes on queue */
diff --git a/arch/x86/include/uapi/asm/msr.h b/arch/x86/include/uapi/asm/msr.h
index e7516b402a00..4b8917ca28fe 100644
--- a/arch/x86/include/uapi/asm/msr.h
+++ b/arch/x86/include/uapi/asm/msr.h
@@ -2,7 +2,7 @@
 #ifndef _UAPI_ASM_X86_MSR_H
 #define _UAPI_ASM_X86_MSR_H
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/types.h>
 #include <linux/ioctl.h>
@@ -10,5 +10,5 @@
 #define X86_IOC_RDMSR_REGS	_IOWR('c', 0xA0, __u32[8])
 #define X86_IOC_WRMSR_REGS	_IOWR('c', 0xA1, __u32[8])
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #endif /* _UAPI_ASM_X86_MSR_H */
diff --git a/arch/x86/include/uapi/asm/mtrr.h b/arch/x86/include/uapi/asm/mtrr.h
index 376563f2bac1..3a8a8eb8ac3a 100644
--- a/arch/x86/include/uapi/asm/mtrr.h
+++ b/arch/x86/include/uapi/asm/mtrr.h
@@ -81,14 +81,6 @@ typedef __u8 mtrr_type;
 #define MTRR_NUM_FIXED_RANGES 88
 #define MTRR_MAX_VAR_RANGES 256
 
-struct mtrr_state_type {
-	struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
-	mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
-	unsigned char enabled;
-	unsigned char have_fixed;
-	mtrr_type def_type;
-};
-
 #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
 #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
 
@@ -115,9 +107,9 @@ struct mtrr_state_type {
 #define MTRR_NUM_TYPES       7
 
 /*
- * Invalid MTRR memory type.  mtrr_type_lookup() returns this value when
- * MTRRs are disabled.  Note, this value is allocated from the reserved
- * values (0x7-0xff) of the MTRR memory types.
+ * Invalid MTRR memory type.  No longer used outside of MTRR code.
+ * Note, this value is allocated from the reserved values (0x7-0xff) of
+ * the MTRR memory types.
  */
 #define MTRR_TYPE_INVALID    0xff
 
diff --git a/arch/x86/include/uapi/asm/param.h b/arch/x86/include/uapi/asm/param.h
deleted file mode 100644
index 965d45427975..000000000000
--- a/arch/x86/include/uapi/asm/param.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/param.h>
diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
index f3329cabce5c..7c9d2bb3833b 100644
--- a/arch/x86/include/uapi/asm/perf_regs.h
+++ b/arch/x86/include/uapi/asm/perf_regs.h
@@ -27,8 +27,32 @@ enum perf_event_x86_regs {
 	PERF_REG_X86_R13,
 	PERF_REG_X86_R14,
 	PERF_REG_X86_R15,
-
+	/* These are the limits for the GPRs. */
 	PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1,
 	PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1,
+
+	/* These all need two bits set because they are 128bit */
+	PERF_REG_X86_XMM0  = 32,
+	PERF_REG_X86_XMM1  = 34,
+	PERF_REG_X86_XMM2  = 36,
+	PERF_REG_X86_XMM3  = 38,
+	PERF_REG_X86_XMM4  = 40,
+	PERF_REG_X86_XMM5  = 42,
+	PERF_REG_X86_XMM6  = 44,
+	PERF_REG_X86_XMM7  = 46,
+	PERF_REG_X86_XMM8  = 48,
+	PERF_REG_X86_XMM9  = 50,
+	PERF_REG_X86_XMM10 = 52,
+	PERF_REG_X86_XMM11 = 54,
+	PERF_REG_X86_XMM12 = 56,
+	PERF_REG_X86_XMM13 = 58,
+	PERF_REG_X86_XMM14 = 60,
+	PERF_REG_X86_XMM15 = 62,
+
+	/* These include both GPRs and XMMX registers */
+	PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2,
 };
+
+#define PERF_REG_EXTENDED_MASK	(~((1ULL << PERF_REG_X86_XMM0) - 1))
+
 #endif /* _ASM_X86_PERF_REGS_H */
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 5a6aac9fa41f..384e2cc6ac19 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -2,16 +2,42 @@
 #ifndef _ASM_X86_PRCTL_H
 #define _ASM_X86_PRCTL_H
 
-#define ARCH_SET_GS		0x1001
-#define ARCH_SET_FS		0x1002
-#define ARCH_GET_FS		0x1003
-#define ARCH_GET_GS		0x1004
+#define ARCH_SET_GS			0x1001
+#define ARCH_SET_FS			0x1002
+#define ARCH_GET_FS			0x1003
+#define ARCH_GET_GS			0x1004
 
-#define ARCH_GET_CPUID		0x1011
-#define ARCH_SET_CPUID		0x1012
+#define ARCH_GET_CPUID			0x1011
+#define ARCH_SET_CPUID			0x1012
 
-#define ARCH_MAP_VDSO_X32	0x2001
-#define ARCH_MAP_VDSO_32	0x2002
-#define ARCH_MAP_VDSO_64	0x2003
+#define ARCH_GET_XCOMP_SUPP		0x1021
+#define ARCH_GET_XCOMP_PERM		0x1022
+#define ARCH_REQ_XCOMP_PERM		0x1023
+#define ARCH_GET_XCOMP_GUEST_PERM	0x1024
+#define ARCH_REQ_XCOMP_GUEST_PERM	0x1025
+
+#define ARCH_XCOMP_TILECFG		17
+#define ARCH_XCOMP_TILEDATA		18
+
+#define ARCH_MAP_VDSO_X32		0x2001
+#define ARCH_MAP_VDSO_32		0x2002
+#define ARCH_MAP_VDSO_64		0x2003
+
+/* Don't use 0x3001-0x3004 because of old glibcs */
+
+#define ARCH_GET_UNTAG_MASK		0x4001
+#define ARCH_ENABLE_TAGGED_ADDR		0x4002
+#define ARCH_GET_MAX_TAG_BITS		0x4003
+#define ARCH_FORCE_TAGGED_SVA		0x4004
+
+#define ARCH_SHSTK_ENABLE		0x5001
+#define ARCH_SHSTK_DISABLE		0x5002
+#define ARCH_SHSTK_LOCK			0x5003
+#define ARCH_SHSTK_UNLOCK		0x5004
+#define ARCH_SHSTK_STATUS		0x5005
+
+/* ARCH_SHSTK_ features bits */
+#define ARCH_SHSTK_SHSTK		(1ULL <<  0)
+#define ARCH_SHSTK_WRSS			(1ULL <<  1)
 
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index bcba3c643e63..81d0c8bf1137 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -82,6 +82,10 @@
 #define X86_CR3_PCID_BITS	12
 #define X86_CR3_PCID_MASK	(_AC((1UL << X86_CR3_PCID_BITS) - 1, UL))
 
+#define X86_CR3_LAM_U57_BIT	61 /* Activate LAM for userspace, 62:57 bits masked */
+#define X86_CR3_LAM_U57		_BITULL(X86_CR3_LAM_U57_BIT)
+#define X86_CR3_LAM_U48_BIT	62 /* Activate LAM for userspace, 62:48 bits masked */
+#define X86_CR3_LAM_U48		_BITULL(X86_CR3_LAM_U48_BIT)
 #define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
 #define X86_CR3_PCID_NOFLUSH    _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
 
@@ -130,6 +134,19 @@
 #define X86_CR4_SMAP		_BITUL(X86_CR4_SMAP_BIT)
 #define X86_CR4_PKE_BIT		22 /* enable Protection Keys support */
 #define X86_CR4_PKE		_BITUL(X86_CR4_PKE_BIT)
+#define X86_CR4_CET_BIT		23 /* enable Control-flow Enforcement Technology */
+#define X86_CR4_CET		_BITUL(X86_CR4_CET_BIT)
+#define X86_CR4_LASS_BIT	27 /* enable Linear Address Space Separation support */
+#define X86_CR4_LASS		_BITUL(X86_CR4_LASS_BIT)
+#define X86_CR4_LAM_SUP_BIT	28 /* LAM for supervisor pointers */
+#define X86_CR4_LAM_SUP		_BITUL(X86_CR4_LAM_SUP_BIT)
+
+#ifdef __x86_64__
+#define X86_CR4_FRED_BIT	32 /* enable FRED kernel entry */
+#define X86_CR4_FRED		_BITUL(X86_CR4_FRED_BIT)
+#else
+#define X86_CR4_FRED		(0)
+#endif
 
 /*
  * x86-64 Task Priority Register, CR8
diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h
index 16074b9c93bb..5823584dea13 100644
--- a/arch/x86/include/uapi/asm/ptrace-abi.h
+++ b/arch/x86/include/uapi/asm/ptrace-abi.h
@@ -25,7 +25,7 @@
 
 #else /* __i386__ */
 
-#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
+#if defined(__ASSEMBLER__) || defined(__FRAME_OFFSETS)
 /*
  * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
  * unless syscall needs a complete, fully filled "struct pt_regs".
@@ -57,7 +57,7 @@
 #define EFLAGS 144
 #define RSP 152
 #define SS 160
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 /* top of stack page */
 #define FRAME_SIZE 168
@@ -87,7 +87,7 @@
 
 #define PTRACE_SINGLEBLOCK	33	/* resume execution until next branch */
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #include <linux/types.h>
 #endif
 
diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h
index 85165c0edafc..e0b5b4f6226b 100644
--- a/arch/x86/include/uapi/asm/ptrace.h
+++ b/arch/x86/include/uapi/asm/ptrace.h
@@ -7,7 +7,7 @@
 #include <asm/processor-flags.h>
 
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #ifdef __i386__
 /* this struct defines the way the registers are stored on the
@@ -81,6 +81,6 @@ struct pt_regs {
 
 
 
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
 
 #endif /* _UAPI_ASM_X86_PTRACE_H */
diff --git a/arch/x86/include/uapi/asm/resource.h b/arch/x86/include/uapi/asm/resource.h
deleted file mode 100644
index 04bc4db8921b..000000000000
--- a/arch/x86/include/uapi/asm/resource.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/resource.h>
diff --git a/arch/x86/include/uapi/asm/sembuf.h b/arch/x86/include/uapi/asm/sembuf.h
index 89de6cd9f0a7..71205b02a191 100644
--- a/arch/x86/include/uapi/asm/sembuf.h
+++ b/arch/x86/include/uapi/asm/sembuf.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_X86_SEMBUF_H
 #define _ASM_X86_SEMBUF_H
 
+#include <asm/ipcbuf.h>
+
 /*
  * The semid64_ds structure for x86 architecture.
  * Note extra padding because this structure is passed back and forth
@@ -21,9 +23,9 @@ struct semid64_ds {
 	unsigned long	sem_ctime;	/* last change time */
 	unsigned long	sem_ctime_high;
 #else
-	__kernel_time_t	sem_otime;	/* last semop time */
+	__kernel_long_t sem_otime;	/* last semop time */
 	__kernel_ulong_t __unused1;
-	__kernel_time_t	sem_ctime;	/* last change time */
+	__kernel_long_t sem_ctime;	/* last change time */
 	__kernel_ulong_t __unused2;
 #endif
 	__kernel_ulong_t sem_nsems;	/* no. of semaphores in array */
diff --git a/arch/x86/include/uapi/asm/setup_data.h b/arch/x86/include/uapi/asm/setup_data.h
new file mode 100644
index 000000000000..2671c4e1b3a0
--- /dev/null
+++ b/arch/x86/include/uapi/asm/setup_data.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_X86_SETUP_DATA_H
+#define _UAPI_ASM_X86_SETUP_DATA_H
+
+/* setup_data/setup_indirect types */
+#define SETUP_NONE			0
+#define SETUP_E820_EXT			1
+#define SETUP_DTB			2
+#define SETUP_PCI			3
+#define SETUP_EFI			4
+#define SETUP_APPLE_PROPERTIES		5
+#define SETUP_JAILHOUSE			6
+#define SETUP_CC_BLOB			7
+#define SETUP_IMA			8
+#define SETUP_RNG_SEED			9
+#define SETUP_KEXEC_KHO			10
+#define SETUP_ENUM_MAX			SETUP_KEXEC_KHO
+
+#define SETUP_INDIRECT			(1<<31)
+#define SETUP_TYPE_MAX			(SETUP_ENUM_MAX | SETUP_INDIRECT)
+
+#ifndef __ASSEMBLER__
+
+#include <linux/types.h>
+
+/* extensible setup data list node */
+struct setup_data {
+	__u64 next;
+	__u32 type;
+	__u32 len;
+	__u8 data[];
+};
+
+/* extensible setup indirect data node */
+struct setup_indirect {
+	__u32 type;
+	__u32 reserved;  /* Reserved, must be set to zero. */
+	__u64 len;
+	__u64 addr;
+};
+
+/*
+ * The E820 memory region entry of the boot protocol ABI:
+ */
+struct boot_e820_entry {
+	__u64 addr;
+	__u64 size;
+	__u32 type;
+} __attribute__((packed));
+
+/*
+ * The boot loader is passing platform information via this Jailhouse-specific
+ * setup data structure.
+ */
+struct jailhouse_setup_data {
+	struct {
+		__u16	version;
+		__u16	compatible_version;
+	} __attribute__((packed)) hdr;
+	struct {
+		__u16	pm_timer_address;
+		__u16	num_cpus;
+		__u64	pci_mmconfig_base;
+		__u32	tsc_khz;
+		__u32	apic_khz;
+		__u8	standard_ioapic;
+		__u8	cpu_ids[255];
+	} __attribute__((packed)) v1;
+	struct {
+		__u32	flags;
+	} __attribute__((packed)) v2;
+} __attribute__((packed));
+
+/*
+ * IMA buffer setup data information from the previous kernel during kexec
+ */
+struct ima_setup_data {
+	__u64 addr;
+	__u64 size;
+} __attribute__((packed));
+
+/*
+ * Locations of kexec handover metadata
+ */
+struct kho_data {
+	__u64 fdt_addr;
+	__u64 fdt_size;
+	__u64 scratch_addr;
+	__u64 scratch_size;
+} __attribute__((packed));
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* _UAPI_ASM_X86_SETUP_DATA_H */
diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
new file mode 100644
index 000000000000..3c4d52072189
--- /dev/null
+++ b/arch/x86/include/uapi/asm/sgx.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright(c) 2016-20 Intel Corporation.
+ */
+#ifndef _UAPI_ASM_X86_SGX_H
+#define _UAPI_ASM_X86_SGX_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/**
+ * enum sgx_page_flags - page control flags
+ * @SGX_PAGE_MEASURE:	Measure the page contents with a sequence of
+ *			ENCLS[EEXTEND] operations.
+ */
+enum sgx_page_flags {
+	SGX_PAGE_MEASURE	= 0x01,
+};
+
+#define SGX_MAGIC 0xA4
+
+#define SGX_IOC_ENCLAVE_CREATE \
+	_IOW(SGX_MAGIC, 0x00, struct sgx_enclave_create)
+#define SGX_IOC_ENCLAVE_ADD_PAGES \
+	_IOWR(SGX_MAGIC, 0x01, struct sgx_enclave_add_pages)
+#define SGX_IOC_ENCLAVE_INIT \
+	_IOW(SGX_MAGIC, 0x02, struct sgx_enclave_init)
+#define SGX_IOC_ENCLAVE_PROVISION \
+	_IOW(SGX_MAGIC, 0x03, struct sgx_enclave_provision)
+#define SGX_IOC_VEPC_REMOVE_ALL \
+	_IO(SGX_MAGIC, 0x04)
+#define SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS \
+	_IOWR(SGX_MAGIC, 0x05, struct sgx_enclave_restrict_permissions)
+#define SGX_IOC_ENCLAVE_MODIFY_TYPES \
+	_IOWR(SGX_MAGIC, 0x06, struct sgx_enclave_modify_types)
+#define SGX_IOC_ENCLAVE_REMOVE_PAGES \
+	_IOWR(SGX_MAGIC, 0x07, struct sgx_enclave_remove_pages)
+
+/**
+ * struct sgx_enclave_create - parameter structure for the
+ *                             %SGX_IOC_ENCLAVE_CREATE ioctl
+ * @src:	address for the SECS page data
+ */
+struct sgx_enclave_create  {
+	__u64	src;
+};
+
+/**
+ * struct sgx_enclave_add_pages - parameter structure for the
+ *                                %SGX_IOC_ENCLAVE_ADD_PAGE ioctl
+ * @src:	start address for the page data
+ * @offset:	starting page offset
+ * @length:	length of the data (multiple of the page size)
+ * @secinfo:	address for the SECINFO data
+ * @flags:	page control flags
+ * @count:	number of bytes added (multiple of the page size)
+ */
+struct sgx_enclave_add_pages {
+	__u64 src;
+	__u64 offset;
+	__u64 length;
+	__u64 secinfo;
+	__u64 flags;
+	__u64 count;
+};
+
+/**
+ * struct sgx_enclave_init - parameter structure for the
+ *                           %SGX_IOC_ENCLAVE_INIT ioctl
+ * @sigstruct:	address for the SIGSTRUCT data
+ */
+struct sgx_enclave_init {
+	__u64 sigstruct;
+};
+
+/**
+ * struct sgx_enclave_provision - parameter structure for the
+ *				  %SGX_IOC_ENCLAVE_PROVISION ioctl
+ * @fd:		file handle of /dev/sgx_provision
+ */
+struct sgx_enclave_provision {
+	__u64 fd;
+};
+
+/**
+ * struct sgx_enclave_restrict_permissions - parameters for ioctl
+ *                                        %SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS
+ * @offset:	starting page offset (page aligned relative to enclave base
+ *		address defined in SECS)
+ * @length:	length of memory (multiple of the page size)
+ * @permissions:new permission bits for pages in range described by @offset
+ *              and @length
+ * @result:	(output) SGX result code of ENCLS[EMODPR] function
+ * @count:	(output) bytes successfully changed (multiple of page size)
+ */
+struct sgx_enclave_restrict_permissions {
+	__u64 offset;
+	__u64 length;
+	__u64 permissions;
+	__u64 result;
+	__u64 count;
+};
+
+/**
+ * struct sgx_enclave_modify_types - parameters for ioctl
+ *                                   %SGX_IOC_ENCLAVE_MODIFY_TYPES
+ * @offset:	starting page offset (page aligned relative to enclave base
+ *		address defined in SECS)
+ * @length:	length of memory (multiple of the page size)
+ * @page_type:	new type for pages in range described by @offset and @length
+ * @result:	(output) SGX result code of ENCLS[EMODT] function
+ * @count:	(output) bytes successfully changed (multiple of page size)
+ */
+struct sgx_enclave_modify_types {
+	__u64 offset;
+	__u64 length;
+	__u64 page_type;
+	__u64 result;
+	__u64 count;
+};
+
+/**
+ * struct sgx_enclave_remove_pages - %SGX_IOC_ENCLAVE_REMOVE_PAGES parameters
+ * @offset:	starting page offset (page aligned relative to enclave base
+ *		address defined in SECS)
+ * @length:	length of memory (multiple of the page size)
+ * @count:	(output) bytes successfully changed (multiple of page size)
+ *
+ * Regular (PT_REG) or TCS (PT_TCS) can be removed from an initialized
+ * enclave if the system supports SGX2. First, the %SGX_IOC_ENCLAVE_MODIFY_TYPES
+ * ioctl() should be used to change the page type to PT_TRIM. After that
+ * succeeds ENCLU[EACCEPT] should be run from within the enclave and then
+ * %SGX_IOC_ENCLAVE_REMOVE_PAGES can be used to complete the page removal.
+ */
+struct sgx_enclave_remove_pages {
+	__u64 offset;
+	__u64 length;
+	__u64 count;
+};
+
+struct sgx_enclave_run;
+
+/**
+ * typedef sgx_enclave_user_handler_t - Exit handler function accepted by
+ *					__vdso_sgx_enter_enclave()
+ * @rdi:	RDI at the time of EEXIT, undefined on AEX
+ * @rsi:	RSI at the time of EEXIT, undefined on AEX
+ * @rdx:	RDX at the time of EEXIT, undefined on AEX
+ * @rsp:	RSP (untrusted) at the time of EEXIT or AEX
+ * @r8:		R8 at the time of EEXIT, undefined on AEX
+ * @r9:		R9 at the time of EEXIT, undefined on AEX
+ * @run:	The run instance given by the caller
+ *
+ * The register parameters contain the snapshot of their values at enclave
+ * exit. An invalid ENCLU function number will cause -EINVAL to be returned
+ * to the caller.
+ *
+ * Return:
+ * - <= 0:	The given value is returned back to the caller.
+ * - > 0:	ENCLU function to invoke, either EENTER or ERESUME.
+ */
+typedef int (*sgx_enclave_user_handler_t)(long rdi, long rsi, long rdx,
+					  long rsp, long r8, long r9,
+					  struct sgx_enclave_run *run);
+
+/**
+ * struct sgx_enclave_run - the execution context of __vdso_sgx_enter_enclave()
+ * @tcs:			TCS used to enter the enclave
+ * @function:			The last seen ENCLU function (EENTER, ERESUME or EEXIT)
+ * @exception_vector:		The interrupt vector of the exception
+ * @exception_error_code:	The exception error code pulled out of the stack
+ * @exception_addr:		The address that triggered the exception
+ * @user_handler:		User provided callback run on exception
+ * @user_data:			Data passed to the user handler
+ * @reserved:			Reserved for future extensions
+ *
+ * If @user_handler is provided, the handler will be invoked on all return paths
+ * of the normal flow.  The user handler may transfer control, e.g. via a
+ * longjmp() call or a C++ exception, without returning to
+ * __vdso_sgx_enter_enclave().
+ */
+struct sgx_enclave_run {
+	__u64 tcs;
+	__u32 function;
+	__u16 exception_vector;
+	__u16 exception_error_code;
+	__u64 exception_addr;
+	__u64 user_handler;
+	__u64 user_data;
+	__u8  reserved[216];
+};
+
+/**
+ * typedef vdso_sgx_enter_enclave_t - Prototype for __vdso_sgx_enter_enclave(),
+ *				      a vDSO function to enter an SGX enclave.
+ * @rdi:	Pass-through value for RDI
+ * @rsi:	Pass-through value for RSI
+ * @rdx:	Pass-through value for RDX
+ * @function:	ENCLU function, must be EENTER or ERESUME
+ * @r8:		Pass-through value for R8
+ * @r9:		Pass-through value for R9
+ * @run:	struct sgx_enclave_run, must be non-NULL
+ *
+ * NOTE: __vdso_sgx_enter_enclave() does not ensure full compliance with the
+ * x86-64 ABI, e.g. doesn't handle XSAVE state.  Except for non-volatile
+ * general purpose registers, EFLAGS.DF, and RSP alignment, preserving/setting
+ * state in accordance with the x86-64 ABI is the responsibility of the enclave
+ * and its runtime, i.e. __vdso_sgx_enter_enclave() cannot be called from C
+ * code without careful consideration by both the enclave and its runtime.
+ *
+ * All general purpose registers except RAX, RBX and RCX are passed as-is to the
+ * enclave.  RAX, RBX and RCX are consumed by EENTER and ERESUME and are loaded
+ * with @function, asynchronous exit pointer, and @run.tcs respectively.
+ *
+ * RBP and the stack are used to anchor __vdso_sgx_enter_enclave() to the
+ * pre-enclave state, e.g. to retrieve @run.exception and @run.user_handler
+ * after an enclave exit.  All other registers are available for use by the
+ * enclave and its runtime, e.g. an enclave can push additional data onto the
+ * stack (and modify RSP) to pass information to the optional user handler (see
+ * below).
+ *
+ * Most exceptions reported on ENCLU, including those that occur within the
+ * enclave, are fixed up and reported synchronously instead of being delivered
+ * via a standard signal. Debug Exceptions (#DB) and Breakpoints (#BP) are
+ * never fixed up and are always delivered via standard signals. On synchronously
+ * reported exceptions, -EFAULT is returned and details about the exception are
+ * recorded in @run.exception, the optional sgx_enclave_exception struct.
+ *
+ * Return:
+ * - 0:		ENCLU function was successfully executed.
+ * - -EINVAL:	Invalid ENCL number (neither EENTER nor ERESUME).
+ */
+typedef int (*vdso_sgx_enter_enclave_t)(unsigned long rdi, unsigned long rsi,
+					unsigned long rdx, unsigned int function,
+					unsigned long r8,  unsigned long r9,
+					struct sgx_enclave_run *run);
+
+#endif /* _UAPI_ASM_X86_SGX_H */
diff --git a/arch/x86/include/uapi/asm/shmbuf.h b/arch/x86/include/uapi/asm/shmbuf.h
index 644421f3823b..13775bfdfee2 100644
--- a/arch/x86/include/uapi/asm/shmbuf.h
+++ b/arch/x86/include/uapi/asm/shmbuf.h
@@ -5,20 +5,24 @@
 #if !defined(__x86_64__) || !defined(__ILP32__)
 #include <asm-generic/shmbuf.h>
 #else
+
+#include <asm/ipcbuf.h>
+#include <asm/posix_types.h>
+
 /*
  * The shmid64_ds structure for x86 architecture with x32 ABI.
  *
  * On x86-32 and x86-64 we can just use the generic definition, but
- * x32 uses the same binary layout as x86_64, which is differnet
+ * x32 uses the same binary layout as x86_64, which is different
  * from other 32-bit architectures.
  */
 
 struct shmid64_ds {
 	struct ipc64_perm	shm_perm;	/* operation perms */
-	size_t			shm_segsz;	/* size of segment (bytes) */
-	__kernel_time_t		shm_atime;	/* last attach time */
-	__kernel_time_t		shm_dtime;	/* last detach time */
-	__kernel_time_t		shm_ctime;	/* last change time */
+	__kernel_size_t		shm_segsz;	/* size of segment (bytes) */
+	__kernel_long_t		shm_atime;	/* last attach time */
+	__kernel_long_t		shm_dtime;	/* last detach time */
+	__kernel_long_t		shm_ctime;	/* last change time */
 	__kernel_pid_t		shm_cpid;	/* pid of creator */
 	__kernel_pid_t		shm_lpid;	/* pid of last operator */
 	__kernel_ulong_t	shm_nattch;	/* no. of current attaches */
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index 844d60eb1882..d0d9b331d3a1 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -139,7 +139,7 @@ struct _fpstate_32 {
  * The 64-bit FPU frame. (FXSAVE format and later)
  *
  * Note1: If sw_reserved.magic1 == FP_XSTATE_MAGIC1 then the structure is
- *        larger: 'struct _xstate'. Note that 'struct _xstate' embedds
+ *        larger: 'struct _xstate'. Note that 'struct _xstate' embeds
  *        'struct _fpstate' so that you can always assume the _fpstate portion
  *        exists so that you can check the magic value.
  *
diff --git a/arch/x86/include/uapi/asm/sigcontext32.h b/arch/x86/include/uapi/asm/sigcontext32.h
index 6b18e88de8a6..7114801d0499 100644
--- a/arch/x86/include/uapi/asm/sigcontext32.h
+++ b/arch/x86/include/uapi/asm/sigcontext32.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _ASM_X86_SIGCONTEXT32_H
 #define _ASM_X86_SIGCONTEXT32_H
 
diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h
index e5745d593dc7..1067efabf18b 100644
--- a/arch/x86/include/uapi/asm/signal.h
+++ b/arch/x86/include/uapi/asm/signal.h
@@ -2,9 +2,8 @@
 #ifndef _UAPI_ASM_X86_SIGNAL_H
 #define _UAPI_ASM_X86_SIGNAL_H
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #include <linux/types.h>
-#include <linux/time.h>
 #include <linux/compiler.h>
 
 /* Avoid too many header ordering problems.  */
@@ -17,7 +16,7 @@ struct siginfo;
 typedef unsigned long sigset_t;
 
 #endif /* __KERNEL__ */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 
 #define SIGHUP		 1
@@ -62,30 +61,6 @@ typedef unsigned long sigset_t;
 #define SIGRTMIN	32
 #define SIGRTMAX	_NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP	0x00000001u
-#define SA_NOCLDWAIT	0x00000002u
-#define SA_SIGINFO	0x00000004u
-#define SA_ONSTACK	0x08000000u
-#define SA_RESTART	0x10000000u
-#define SA_NODEFER	0x40000000u
-#define SA_RESETHAND	0x80000000u
-
-#define SA_NOMASK	SA_NODEFER
-#define SA_ONESHOT	SA_RESETHAND
-
 #define SA_RESTORER	0x04000000
 
 #define MINSIGSTKSZ	2048
@@ -93,7 +68,7 @@ typedef unsigned long sigset_t;
 
 #include <asm-generic/signal-defs.h>
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 
 # ifndef __KERNEL__
@@ -128,9 +103,9 @@ struct sigaction {
 typedef struct sigaltstack {
 	void __user *ss_sp;
 	int ss_flags;
-	size_t ss_size;
+	__kernel_size_t ss_size;
 } stack_t;
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* _UAPI_ASM_X86_SIGNAL_H */
diff --git a/arch/x86/include/uapi/asm/socket.h b/arch/x86/include/uapi/asm/socket.h
deleted file mode 100644
index 6b71384b9d8b..000000000000
--- a/arch/x86/include/uapi/asm/socket.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/socket.h>
diff --git a/arch/x86/include/uapi/asm/sockios.h b/arch/x86/include/uapi/asm/sockios.h
deleted file mode 100644
index def6d4746ee7..000000000000
--- a/arch/x86/include/uapi/asm/sockios.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/sockios.h>
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index a9731f8a480f..650e3256ea7d 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -29,6 +29,7 @@
 #define SVM_EXIT_WRITE_DR6     0x036
 #define SVM_EXIT_WRITE_DR7     0x037
 #define SVM_EXIT_EXCP_BASE     0x040
+#define SVM_EXIT_LAST_EXCP     0x05f
 #define SVM_EXIT_INTR          0x060
 #define SVM_EXIT_NMI           0x061
 #define SVM_EXIT_SMI           0x062
@@ -75,9 +76,63 @@
 #define SVM_EXIT_MWAIT         0x08b
 #define SVM_EXIT_MWAIT_COND    0x08c
 #define SVM_EXIT_XSETBV        0x08d
+#define SVM_EXIT_RDPRU         0x08e
+#define SVM_EXIT_EFER_WRITE_TRAP		0x08f
+#define SVM_EXIT_CR0_WRITE_TRAP			0x090
+#define SVM_EXIT_CR1_WRITE_TRAP			0x091
+#define SVM_EXIT_CR2_WRITE_TRAP			0x092
+#define SVM_EXIT_CR3_WRITE_TRAP			0x093
+#define SVM_EXIT_CR4_WRITE_TRAP			0x094
+#define SVM_EXIT_CR5_WRITE_TRAP			0x095
+#define SVM_EXIT_CR6_WRITE_TRAP			0x096
+#define SVM_EXIT_CR7_WRITE_TRAP			0x097
+#define SVM_EXIT_CR8_WRITE_TRAP			0x098
+#define SVM_EXIT_CR9_WRITE_TRAP			0x099
+#define SVM_EXIT_CR10_WRITE_TRAP		0x09a
+#define SVM_EXIT_CR11_WRITE_TRAP		0x09b
+#define SVM_EXIT_CR12_WRITE_TRAP		0x09c
+#define SVM_EXIT_CR13_WRITE_TRAP		0x09d
+#define SVM_EXIT_CR14_WRITE_TRAP		0x09e
+#define SVM_EXIT_CR15_WRITE_TRAP		0x09f
+#define SVM_EXIT_INVPCID       0x0a2
+#define SVM_EXIT_BUS_LOCK			0x0a5
+#define SVM_EXIT_IDLE_HLT      0x0a6
 #define SVM_EXIT_NPF           0x400
 #define SVM_EXIT_AVIC_INCOMPLETE_IPI		0x401
 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS	0x402
+#define SVM_EXIT_VMGEXIT       0x403
+
+/* SEV-ES software-defined VMGEXIT events */
+#define SVM_VMGEXIT_MMIO_READ			0x80000001
+#define SVM_VMGEXIT_MMIO_WRITE			0x80000002
+#define SVM_VMGEXIT_NMI_COMPLETE		0x80000003
+#define SVM_VMGEXIT_AP_HLT_LOOP			0x80000004
+#define SVM_VMGEXIT_AP_JUMP_TABLE		0x80000005
+#define SVM_VMGEXIT_SET_AP_JUMP_TABLE		0
+#define SVM_VMGEXIT_GET_AP_JUMP_TABLE		1
+#define SVM_VMGEXIT_PSC				0x80000010
+#define SVM_VMGEXIT_GUEST_REQUEST		0x80000011
+#define SVM_VMGEXIT_EXT_GUEST_REQUEST		0x80000012
+#define SVM_VMGEXIT_AP_CREATION			0x80000013
+#define SVM_VMGEXIT_AP_CREATE_ON_INIT		0
+#define SVM_VMGEXIT_AP_CREATE			1
+#define SVM_VMGEXIT_AP_DESTROY			2
+#define SVM_VMGEXIT_SNP_RUN_VMPL		0x80000018
+#define SVM_VMGEXIT_SAVIC			0x8000001a
+#define SVM_VMGEXIT_SAVIC_REGISTER_GPA		0
+#define SVM_VMGEXIT_SAVIC_UNREGISTER_GPA	1
+#define SVM_VMGEXIT_SAVIC_SELF_GPA		~0ULL
+#define SVM_VMGEXIT_HV_FEATURES			0x8000fffd
+#define SVM_VMGEXIT_TERM_REQUEST		0x8000fffe
+#define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code)	\
+	/* SW_EXITINFO1[3:0] */					\
+	(((((u64)reason_set) & 0xf)) |				\
+	/* SW_EXITINFO1[11:4] */				\
+	((((u64)reason_code) & 0xff) << 4))
+#define SVM_VMGEXIT_UNSUPPORTED_EVENT		0x8000ffff
+
+/* Exit code reserved for hypervisor/software use */
+#define SVM_EXIT_SW				0xf0000000
 
 #define SVM_EXIT_ERR           -1
 
@@ -170,9 +225,27 @@
 	{ SVM_EXIT_MONITOR,     "monitor" }, \
 	{ SVM_EXIT_MWAIT,       "mwait" }, \
 	{ SVM_EXIT_XSETBV,      "xsetbv" }, \
+	{ SVM_EXIT_EFER_WRITE_TRAP,	"write_efer_trap" }, \
+	{ SVM_EXIT_CR0_WRITE_TRAP,	"write_cr0_trap" }, \
+	{ SVM_EXIT_CR4_WRITE_TRAP,	"write_cr4_trap" }, \
+	{ SVM_EXIT_CR8_WRITE_TRAP,	"write_cr8_trap" }, \
+	{ SVM_EXIT_INVPCID,     "invpcid" }, \
+	{ SVM_EXIT_BUS_LOCK,     "buslock" }, \
+	{ SVM_EXIT_IDLE_HLT,     "idle-halt" }, \
 	{ SVM_EXIT_NPF,         "npf" }, \
 	{ SVM_EXIT_AVIC_INCOMPLETE_IPI,		"avic_incomplete_ipi" }, \
 	{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS,   "avic_unaccelerated_access" }, \
+	{ SVM_EXIT_VMGEXIT,		"vmgexit" }, \
+	{ SVM_VMGEXIT_MMIO_READ,	"vmgexit_mmio_read" }, \
+	{ SVM_VMGEXIT_MMIO_WRITE,	"vmgexit_mmio_write" }, \
+	{ SVM_VMGEXIT_NMI_COMPLETE,	"vmgexit_nmi_complete" }, \
+	{ SVM_VMGEXIT_AP_HLT_LOOP,	"vmgexit_ap_hlt_loop" }, \
+	{ SVM_VMGEXIT_AP_JUMP_TABLE,	"vmgexit_ap_jump_table" }, \
+	{ SVM_VMGEXIT_PSC,		"vmgexit_page_state_change" }, \
+	{ SVM_VMGEXIT_GUEST_REQUEST,	"vmgexit_guest_request" }, \
+	{ SVM_VMGEXIT_EXT_GUEST_REQUEST, "vmgexit_ext_guest_request" }, \
+	{ SVM_VMGEXIT_AP_CREATION,	"vmgexit_ap_creation" }, \
+	{ SVM_VMGEXIT_HV_FEATURES,	"vmgexit_hypervisor_feature" }, \
 	{ SVM_EXIT_ERR,         "invalid_guest_state" }
 
 
diff --git a/arch/x86/include/uapi/asm/termbits.h b/arch/x86/include/uapi/asm/termbits.h
deleted file mode 100644
index 3935b106de79..000000000000
--- a/arch/x86/include/uapi/asm/termbits.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/termbits.h>
diff --git a/arch/x86/include/uapi/asm/termios.h b/arch/x86/include/uapi/asm/termios.h
deleted file mode 100644
index 280d78a9d966..000000000000
--- a/arch/x86/include/uapi/asm/termios.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/termios.h>
diff --git a/arch/x86/include/uapi/asm/types.h b/arch/x86/include/uapi/asm/types.h
deleted file mode 100644
index df55e1ddb0c9..000000000000
--- a/arch/x86/include/uapi/asm/types.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_TYPES_H
-#define _ASM_X86_TYPES_H
-
-#include <asm-generic/types.h>
-
-#endif /* _ASM_X86_TYPES_H */
diff --git a/arch/x86/include/uapi/asm/unistd.h b/arch/x86/include/uapi/asm/unistd.h
index 30d7d04d72d6..be5e2e747f50 100644
--- a/arch/x86/include/uapi/asm/unistd.h
+++ b/arch/x86/include/uapi/asm/unistd.h
@@ -2,7 +2,14 @@
 #ifndef _UAPI_ASM_X86_UNISTD_H
 #define _UAPI_ASM_X86_UNISTD_H
 
-/* x32 syscall flag bit */
+/*
+ * x32 syscall flag bit.  Some user programs expect syscall NR macros
+ * and __X32_SYSCALL_BIT to have type int, even though syscall numbers
+ * are, for practical purposes, unsigned long.
+ *
+ * Fortunately, expressions like (nr & ~__X32_SYSCALL_BIT) do the right
+ * thing regardless.
+ */
 #define __X32_SYSCALL_BIT	0x40000000
 
 #ifndef __KERNEL__
diff --git a/arch/x86/include/uapi/asm/vm86.h b/arch/x86/include/uapi/asm/vm86.h
index d2ee4e307ef8..18909b8050bc 100644
--- a/arch/x86/include/uapi/asm/vm86.h
+++ b/arch/x86/include/uapi/asm/vm86.h
@@ -97,7 +97,7 @@ struct revectored_struct {
 struct vm86_struct {
 	struct vm86_regs regs;
 	unsigned long flags;
-	unsigned long screen_bitmap;
+	unsigned long screen_bitmap;		/* unused, preserved by vm86() */
 	unsigned long cpu_type;
 	struct revectored_struct int_revectored;
 	struct revectored_struct int21_revectored;
@@ -106,7 +106,7 @@ struct vm86_struct {
 /*
  * flags masks
  */
-#define VM86_SCREEN_BITMAP	0x0001
+#define VM86_SCREEN_BITMAP	0x0001        /* no longer supported */
 
 struct vm86plus_info_struct {
 	unsigned long force_return_for_pic:1;
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index f0b0c90dd398..1baa86dfe029 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -27,12 +27,16 @@
 
 
 #define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
+#define VMX_EXIT_REASONS_SGX_ENCLAVE_MODE	0x08000000
 
 #define EXIT_REASON_EXCEPTION_NMI       0
 #define EXIT_REASON_EXTERNAL_INTERRUPT  1
 #define EXIT_REASON_TRIPLE_FAULT        2
+#define EXIT_REASON_INIT_SIGNAL			3
+#define EXIT_REASON_SIPI_SIGNAL         4
+#define EXIT_REASON_OTHER_SMI           6
 
-#define EXIT_REASON_PENDING_INTERRUPT   7
+#define EXIT_REASON_INTERRUPT_WINDOW    7
 #define EXIT_REASON_NMI_WINDOW          8
 #define EXIT_REASON_TASK_SWITCH         9
 #define EXIT_REASON_CPUID               10
@@ -85,12 +89,22 @@
 #define EXIT_REASON_PML_FULL            62
 #define EXIT_REASON_XSAVES              63
 #define EXIT_REASON_XRSTORS             64
+#define EXIT_REASON_UMWAIT              67
+#define EXIT_REASON_TPAUSE              68
+#define EXIT_REASON_BUS_LOCK            74
+#define EXIT_REASON_NOTIFY              75
+#define EXIT_REASON_SEAMCALL            76
+#define EXIT_REASON_TDCALL              77
+#define EXIT_REASON_MSR_READ_IMM        84
+#define EXIT_REASON_MSR_WRITE_IMM       85
 
 #define VMX_EXIT_REASONS \
 	{ EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
 	{ EXIT_REASON_EXTERNAL_INTERRUPT,    "EXTERNAL_INTERRUPT" }, \
 	{ EXIT_REASON_TRIPLE_FAULT,          "TRIPLE_FAULT" }, \
-	{ EXIT_REASON_PENDING_INTERRUPT,     "PENDING_INTERRUPT" }, \
+	{ EXIT_REASON_INIT_SIGNAL,           "INIT_SIGNAL" }, \
+	{ EXIT_REASON_SIPI_SIGNAL,           "SIPI_SIGNAL" }, \
+	{ EXIT_REASON_INTERRUPT_WINDOW,      "INTERRUPT_WINDOW" }, \
 	{ EXIT_REASON_NMI_WINDOW,            "NMI_WINDOW" }, \
 	{ EXIT_REASON_TASK_SWITCH,           "TASK_SWITCH" }, \
 	{ EXIT_REASON_CPUID,                 "CPUID" }, \
@@ -142,7 +156,17 @@
 	{ EXIT_REASON_RDSEED,                "RDSEED" }, \
 	{ EXIT_REASON_PML_FULL,              "PML_FULL" }, \
 	{ EXIT_REASON_XSAVES,                "XSAVES" }, \
-	{ EXIT_REASON_XRSTORS,               "XRSTORS" }
+	{ EXIT_REASON_XRSTORS,               "XRSTORS" }, \
+	{ EXIT_REASON_UMWAIT,                "UMWAIT" }, \
+	{ EXIT_REASON_TPAUSE,                "TPAUSE" }, \
+	{ EXIT_REASON_BUS_LOCK,              "BUS_LOCK" }, \
+	{ EXIT_REASON_NOTIFY,                "NOTIFY" }, \
+	{ EXIT_REASON_TDCALL,                "TDCALL" }, \
+	{ EXIT_REASON_MSR_READ_IMM,          "MSR_READ_IMM" }, \
+	{ EXIT_REASON_MSR_WRITE_IMM,         "MSR_WRITE_IMM" }
+
+#define VMX_EXIT_REASON_FLAGS \
+	{ VMX_EXIT_REASONS_FAILED_VMENTRY,	"FAILED_VMENTRY" }
 
 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL       2