summaryrefslogtreecommitdiff
path: root/arch/x86/include
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/Kbuild2
-rw-r--r--arch/x86/include/asm/acpi.h17
-rw-r--r--arch/x86/include/asm/alternative.h295
-rw-r--r--arch/x86/include/asm/amd_nb.h57
-rw-r--r--arch/x86/include/asm/amd_node.h36
-rw-r--r--arch/x86/include/asm/apic.h81
-rw-r--r--arch/x86/include/asm/asm-prototypes.h5
-rw-r--r--arch/x86/include/asm/asm.h17
-rw-r--r--arch/x86/include/asm/atomic.h12
-rw-r--r--arch/x86/include/asm/atomic64_32.h90
-rw-r--r--arch/x86/include/asm/atomic64_64.h12
-rw-r--r--arch/x86/include/asm/barrier.h27
-rw-r--r--arch/x86/include/asm/bitops.h10
-rw-r--r--arch/x86/include/asm/boot.h6
-rw-r--r--arch/x86/include/asm/bug.h14
-rw-r--r--arch/x86/include/asm/cfi.h2
-rw-r--r--arch/x86/include/asm/cmdline.h4
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h203
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h8
-rw-r--r--arch/x86/include/asm/coco.h12
-rw-r--r--arch/x86/include/asm/cpu.h27
-rw-r--r--arch/x86/include/asm/cpu_device_id.h129
-rw-r--r--arch/x86/include/asm/cpufeature.h42
-rw-r--r--arch/x86/include/asm/cpufeatures.h813
-rw-r--r--arch/x86/include/asm/cpuid.h59
-rw-r--r--arch/x86/include/asm/crash_reserve.h (renamed from arch/x86/include/asm/crash_core.h)8
-rw-r--r--arch/x86/include/asm/current.h9
-rw-r--r--arch/x86/include/asm/debugreg.h24
-rw-r--r--arch/x86/include/asm/desc.h3
-rw-r--r--arch/x86/include/asm/disabled-features.h29
-rw-r--r--arch/x86/include/asm/e820/api.h1
-rw-r--r--arch/x86/include/asm/efi.h40
-rw-r--r--arch/x86/include/asm/elf.h1
-rw-r--r--arch/x86/include/asm/entry-common.h28
-rw-r--r--arch/x86/include/asm/extable.h1
-rw-r--r--arch/x86/include/asm/extable_fixup_types.h6
-rw-r--r--arch/x86/include/asm/fb.h19
-rw-r--r--arch/x86/include/asm/fpu.h13
-rw-r--r--arch/x86/include/asm/fpu/api.h3
-rw-r--r--arch/x86/include/asm/fpu/sched.h10
-rw-r--r--arch/x86/include/asm/fpu/signal.h2
-rw-r--r--arch/x86/include/asm/fpu/types.h13
-rw-r--r--arch/x86/include/asm/fpu/xstate.h4
-rw-r--r--arch/x86/include/asm/fred.h118
-rw-r--r--arch/x86/include/asm/fsgsbase.h2
-rw-r--r--arch/x86/include/asm/ftrace.h88
-rw-r--r--arch/x86/include/asm/futex.h8
-rw-r--r--arch/x86/include/asm/hardirq.h16
-rw-r--r--arch/x86/include/asm/hpet.h1
-rw-r--r--arch/x86/include/asm/hw_irq.h2
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h804
-rw-r--r--arch/x86/include/asm/ia32.h15
-rw-r--r--arch/x86/include/asm/ia32_unistd.h12
-rw-r--r--arch/x86/include/asm/idtentry.h98
-rw-r--r--arch/x86/include/asm/inat.h17
-rw-r--r--arch/x86/include/asm/init.h5
-rw-r--r--arch/x86/include/asm/insn.h32
-rw-r--r--arch/x86/include/asm/intel-family.h180
-rw-r--r--arch/x86/include/asm/intel_ds.h1
-rw-r--r--arch/x86/include/asm/intel_pconfig.h65
-rw-r--r--arch/x86/include/asm/intel_punit_ipc.h7
-rw-r--r--arch/x86/include/asm/intel_scu_ipc.h68
-rw-r--r--arch/x86/include/asm/intel_telemetry.h2
-rw-r--r--arch/x86/include/asm/io.h25
-rw-r--r--arch/x86/include/asm/io_apic.h1
-rw-r--r--arch/x86/include/asm/iommu.h1
-rw-r--r--arch/x86/include/asm/irq.h2
-rw-r--r--arch/x86/include/asm/irq_remapping.h7
-rw-r--r--arch/x86/include/asm/irq_stack.h4
-rw-r--r--arch/x86/include/asm/irq_vectors.h14
-rw-r--r--arch/x86/include/asm/irqflags.h20
-rw-r--r--arch/x86/include/asm/jump_label.h35
-rw-r--r--arch/x86/include/asm/kexec.h68
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h25
-rw-r--r--arch/x86/include/asm/kvm-x86-pmu-ops.h7
-rw-r--r--arch/x86/include/asm/kvm_host.h334
-rw-r--r--arch/x86/include/asm/kvmclock.h2
-rw-r--r--arch/x86/include/asm/linkage.h16
-rw-r--r--arch/x86/include/asm/local.h16
-rw-r--r--arch/x86/include/asm/mce.h47
-rw-r--r--arch/x86/include/asm/mem_encrypt.h23
-rw-r--r--arch/x86/include/asm/mmu.h4
-rw-r--r--arch/x86/include/asm/mmu_context.h14
-rw-r--r--arch/x86/include/asm/mmzone.h6
-rw-r--r--arch/x86/include/asm/mmzone_32.h17
-rw-r--r--arch/x86/include/asm/mmzone_64.h18
-rw-r--r--arch/x86/include/asm/mpspec.h72
-rw-r--r--arch/x86/include/asm/mshyperv.h34
-rw-r--r--arch/x86/include/asm/msr-index.h160
-rw-r--r--arch/x86/include/asm/msr.h41
-rw-r--r--arch/x86/include/asm/mtrr.h12
-rw-r--r--arch/x86/include/asm/mwait.h1
-rw-r--r--arch/x86/include/asm/nmi.h3
-rw-r--r--arch/x86/include/asm/nospec-branch.h154
-rw-r--r--arch/x86/include/asm/numa.h26
-rw-r--r--arch/x86/include/asm/page.h8
-rw-r--r--arch/x86/include/asm/page_64.h3
-rw-r--r--arch/x86/include/asm/page_types.h13
-rw-r--r--arch/x86/include/asm/paravirt.h7
-rw-r--r--arch/x86/include/asm/paravirt_types.h2
-rw-r--r--arch/x86/include/asm/pci.h13
-rw-r--r--arch/x86/include/asm/percpu.h612
-rw-r--r--arch/x86/include/asm/perf_event.h65
-rw-r--r--arch/x86/include/asm/perf_event_p4.h4
-rw-r--r--arch/x86/include/asm/pgalloc.h20
-rw-r--r--arch/x86/include/asm/pgtable-2level_types.h8
-rw-r--r--arch/x86/include/asm/pgtable-3level.h2
-rw-r--r--arch/x86/include/asm/pgtable.h267
-rw-r--r--arch/x86/include/asm/pgtable_64.h26
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h6
-rw-r--r--arch/x86/include/asm/pgtable_types.h16
-rw-r--r--arch/x86/include/asm/posted_intr.h118
-rw-r--r--arch/x86/include/asm/preempt.h2
-rw-r--r--arch/x86/include/asm/processor-flags.h2
-rw-r--r--arch/x86/include/asm/processor.h112
-rw-r--r--arch/x86/include/asm/prom.h9
-rw-r--r--arch/x86/include/asm/pti.h2
-rw-r--r--arch/x86/include/asm/ptrace.h104
-rw-r--r--arch/x86/include/asm/pvclock.h1
-rw-r--r--arch/x86/include/asm/qspinlock.h25
-rw-r--r--arch/x86/include/asm/qspinlock_paravirt.h7
-rw-r--r--arch/x86/include/asm/reboot.h6
-rw-r--r--arch/x86/include/asm/required-features.h3
-rw-r--r--arch/x86/include/asm/resctrl.h84
-rw-r--r--arch/x86/include/asm/runtime-const.h61
-rw-r--r--arch/x86/include/asm/seccomp.h2
-rw-r--r--arch/x86/include/asm/sections.h1
-rw-r--r--arch/x86/include/asm/set_memory.h4
-rw-r--r--arch/x86/include/asm/setup.h12
-rw-r--r--arch/x86/include/asm/setup_data.h32
-rw-r--r--arch/x86/include/asm/sev-common.h80
-rw-r--r--arch/x86/include/asm/sev.h346
-rw-r--r--arch/x86/include/asm/shared/tdx.h47
-rw-r--r--arch/x86/include/asm/shstk.h4
-rw-r--r--arch/x86/include/asm/smap.h18
-rw-r--r--arch/x86/include/asm/smp.h12
-rw-r--r--arch/x86/include/asm/sparsemem.h11
-rw-r--r--arch/x86/include/asm/spec-ctrl.h2
-rw-r--r--arch/x86/include/asm/special_insns.h31
-rw-r--r--arch/x86/include/asm/static_call.h17
-rw-r--r--arch/x86/include/asm/string_64.h45
-rw-r--r--arch/x86/include/asm/suspend_32.h10
-rw-r--r--arch/x86/include/asm/svm.h45
-rw-r--r--arch/x86/include/asm/switch_to.h4
-rw-r--r--arch/x86/include/asm/sync_core.h6
-rw-r--r--arch/x86/include/asm/syscall.h18
-rw-r--r--arch/x86/include/asm/tdx.h3
-rw-r--r--arch/x86/include/asm/text-patching.h3
-rw-r--r--arch/x86/include/asm/thread_info.h18
-rw-r--r--arch/x86/include/asm/timer.h2
-rw-r--r--arch/x86/include/asm/tlb.h13
-rw-r--r--arch/x86/include/asm/tlbflush.h10
-rw-r--r--arch/x86/include/asm/topology.h122
-rw-r--r--arch/x86/include/asm/trap_pf.h20
-rw-r--r--arch/x86/include/asm/trapnr.h12
-rw-r--r--arch/x86/include/asm/tsc.h6
-rw-r--r--arch/x86/include/asm/uaccess.h6
-rw-r--r--arch/x86/include/asm/uaccess_64.h64
-rw-r--r--arch/x86/include/asm/unistd.h1
-rw-r--r--arch/x86/include/asm/uv/uv_irq.h1
-rw-r--r--arch/x86/include/asm/vdso/getrandom.h42
-rw-r--r--arch/x86/include/asm/vdso/gettimeofday.h55
-rw-r--r--arch/x86/include/asm/vdso/vsyscall.h22
-rw-r--r--arch/x86/include/asm/vgtod.h5
-rw-r--r--arch/x86/include/asm/video.h21
-rw-r--r--arch/x86/include/asm/vm86.h2
-rw-r--r--arch/x86/include/asm/vmware.h336
-rw-r--r--arch/x86/include/asm/vmx.h70
-rw-r--r--arch/x86/include/asm/vmxfeatures.h109
-rw-r--r--arch/x86/include/asm/vvar.h55
-rw-r--r--arch/x86/include/asm/word-at-a-time.h60
-rw-r--r--arch/x86/include/asm/x86_init.h29
-rw-r--r--arch/x86/include/asm/xen/hypercall.h36
-rw-r--r--arch/x86/include/uapi/asm/amd_hsmp.h67
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h73
-rw-r--r--arch/x86/include/uapi/asm/elf.h16
-rw-r--r--arch/x86/include/uapi/asm/kvm.h382
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h3
-rw-r--r--arch/x86/include/uapi/asm/mce.h3
-rw-r--r--arch/x86/include/uapi/asm/mman.h3
-rw-r--r--arch/x86/include/uapi/asm/processor-flags.h7
-rw-r--r--arch/x86/include/uapi/asm/setup_data.h83
-rw-r--r--arch/x86/include/uapi/asm/svm.h1
183 files changed, 5369 insertions, 3691 deletions
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index a192bdea69e2..58f4ddecc5fa 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -10,4 +10,6 @@ generated-y += unistd_64_x32.h
generated-y += xen-hypercalls.h
generic-y += early_ioremap.h
+generic-y += fprobe.h
generic-y += mcs_spinlock.h
+generic-y += mmzone.h
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index f896eed4516c..5ab1a4598d00 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -56,6 +56,8 @@ static inline void disable_acpi(void)
extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
+extern int acpi_blacklisted(void);
+
static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
static inline void acpi_disable_pci(void)
{
@@ -76,6 +78,13 @@ static inline bool acpi_skip_set_wakeup_address(void)
#define acpi_skip_set_wakeup_address acpi_skip_set_wakeup_address
+union acpi_subtable_headers;
+
+int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
+ const unsigned long end);
+
+void asm_acpi_mp_play_dead(u64 reset_vector, u64 pgd_pa);
+
/*
* Check if the CPU can handle C2 and deeper
*/
@@ -165,6 +174,14 @@ void acpi_generic_reduced_hw_init(void);
void x86_default_set_root_pointer(u64 addr);
u64 x86_default_get_root_pointer(void);
+#ifdef CONFIG_XEN_PV
+/* A Xen PV domain needs a special acpi_os_ioremap() handling. */
+extern void __iomem * (*acpi_os_ioremap)(acpi_physical_address phys,
+ acpi_size size);
+void __iomem *x86_acpi_os_ioremap(acpi_physical_address phys, acpi_size size);
+#define acpi_os_ioremap acpi_os_ioremap
+#endif
+
#else /* !CONFIG_ACPI */
#define acpi_lapic 0
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index fcd20c6dc7f9..e3903b731305 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -4,6 +4,7 @@
#include <linux/types.h>
#include <linux/stringify.h>
+#include <linux/objtool.h>
#include <asm/asm.h>
#define ALT_FLAGS_SHIFT 16
@@ -55,16 +56,6 @@
#endif
/*
- * objtool annotation to ignore the alternatives and only consider the original
- * instruction(s).
- */
-#define ANNOTATE_IGNORE_ALTERNATIVE \
- "999:\n\t" \
- ".pushsection .discard.ignore_alts\n\t" \
- ".long 999b\n\t" \
- ".popsection\n\t"
-
-/*
* The patching flags are part of the upper bits of the @ft_flags parameter when
* specifying them. The split is currently like this:
*
@@ -96,16 +87,16 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
* instructions were patched in already:
*/
extern int alternatives_patched;
+struct module;
extern void alternative_instructions(void);
-extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
-extern void apply_retpolines(s32 *start, s32 *end);
-extern void apply_returns(s32 *start, s32 *end);
-extern void apply_seal_endbr(s32 *start, s32 *end);
+extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end,
+ struct module *mod);
+extern void apply_retpolines(s32 *start, s32 *end, struct module *mod);
+extern void apply_returns(s32 *start, s32 *end, struct module *mod);
+extern void apply_seal_endbr(s32 *start, s32 *end, struct module *mod);
extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine,
- s32 *start_cfi, s32 *end_cfi);
-
-struct module;
+ s32 *start_cfi, s32 *end_cfi, struct module *mod);
struct callthunk_sites {
s32 *call_start, *call_end;
@@ -117,7 +108,7 @@ extern void callthunks_patch_builtin_calls(void);
extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
struct module *mod);
extern void *callthunks_translate_call_dest(void *dest);
-extern int x86_call_depth_emit_accounting(u8 **pprog, void *func);
+extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip);
#else
static __always_inline void callthunks_patch_builtin_calls(void) {}
static __always_inline void
@@ -128,7 +119,7 @@ static __always_inline void *callthunks_translate_call_dest(void *dest)
return dest;
}
static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
- void *func)
+ void *func, void *ip)
{
return 0;
}
@@ -156,102 +147,50 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define ALT_CALL_INSTR "call BUG_func"
-#define b_replacement(num) "664"#num
-#define e_replacement(num) "665"#num
-
-#define alt_end_marker "663"
-#define alt_slen "662b-661b"
-#define alt_total_slen alt_end_marker"b-661b"
-#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f"
+#define alt_slen "772b-771b"
+#define alt_total_slen "773b-771b"
+#define alt_rlen "775f-774f"
-#define OLDINSTR(oldinstr, num) \
- "# ALT: oldnstr\n" \
- "661:\n\t" oldinstr "\n662:\n" \
+#define OLDINSTR(oldinstr) \
+ "# ALT: oldinstr\n" \
+ "771:\n\t" oldinstr "\n772:\n" \
"# ALT: padding\n" \
- ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \
- "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n" \
- alt_end_marker ":\n"
-
-/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))"
-
-/*
- * Pad the second replacement alternative with additional NOPs if it is
- * additionally longer than the first replacement alternative.
- */
-#define OLDINSTR_2(oldinstr, num1, num2) \
- "# ALT: oldinstr2\n" \
- "661:\n\t" oldinstr "\n662:\n" \
- "# ALT: padding2\n" \
- ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \
- "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
- alt_end_marker ":\n"
-
-#define OLDINSTR_3(oldinsn, n1, n2, n3) \
- "# ALT: oldinstr3\n" \
- "661:\n\t" oldinsn "\n662:\n" \
- "# ALT: padding3\n" \
- ".skip -((" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \
- " - (" alt_slen ")) > 0) * " \
- "(" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \
- " - (" alt_slen ")), 0x90\n" \
- alt_end_marker ":\n"
-
-#define ALTINSTR_ENTRY(ft_flags, num) \
- " .long 661b - .\n" /* label */ \
- " .long " b_replacement(num)"f - .\n" /* new instruction */ \
+ ".skip -(((" alt_rlen ")-(" alt_slen ")) > 0) * " \
+ "((" alt_rlen ")-(" alt_slen ")),0x90\n" \
+ "773:\n"
+
+#define ALTINSTR_ENTRY(ft_flags) \
+ ".pushsection .altinstructions,\"a\"\n" \
+ " .long 771b - .\n" /* label */ \
+ " .long 774f - .\n" /* new instruction */ \
" .4byte " __stringify(ft_flags) "\n" /* feature + flags */ \
" .byte " alt_total_slen "\n" /* source len */ \
- " .byte " alt_rlen(num) "\n" /* replacement len */
+ " .byte " alt_rlen "\n" /* replacement len */ \
+ ".popsection\n"
-#define ALTINSTR_REPLACEMENT(newinstr, num) /* replacement */ \
- "# ALT: replacement " #num "\n" \
- b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n"
+#define ALTINSTR_REPLACEMENT(newinstr) /* replacement */ \
+ ".pushsection .altinstr_replacement, \"ax\"\n" \
+ "# ALT: replacement\n" \
+ "774:\n\t" newinstr "\n775:\n" \
+ ".popsection\n"
/* alternative assembly primitive: */
#define ALTERNATIVE(oldinstr, newinstr, ft_flags) \
- OLDINSTR(oldinstr, 1) \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(ft_flags, 1) \
- ".popsection\n" \
- ".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinstr, 1) \
- ".popsection\n"
+ OLDINSTR(oldinstr) \
+ ALTINSTR_ENTRY(ft_flags) \
+ ALTINSTR_REPLACEMENT(newinstr)
#define ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \
- OLDINSTR_2(oldinstr, 1, 2) \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(ft_flags1, 1) \
- ALTINSTR_ENTRY(ft_flags2, 2) \
- ".popsection\n" \
- ".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinstr1, 1) \
- ALTINSTR_REPLACEMENT(newinstr2, 2) \
- ".popsection\n"
+ ALTERNATIVE(ALTERNATIVE(oldinstr, newinstr1, ft_flags1), newinstr2, ft_flags2)
/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
#define ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) \
- ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \
- newinstr_yes, ft_flags)
-
-#define ALTERNATIVE_3(oldinsn, newinsn1, ft_flags1, newinsn2, ft_flags2, \
- newinsn3, ft_flags3) \
- OLDINSTR_3(oldinsn, 1, 2, 3) \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(ft_flags1, 1) \
- ALTINSTR_ENTRY(ft_flags2, 2) \
- ALTINSTR_ENTRY(ft_flags3, 3) \
- ".popsection\n" \
- ".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinsn1, 1) \
- ALTINSTR_REPLACEMENT(newinsn2, 2) \
- ALTINSTR_REPLACEMENT(newinsn3, 3) \
- ".popsection\n"
+ ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, newinstr_yes, ft_flags)
+
+#define ALTERNATIVE_3(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, \
+ newinstr3, ft_flags3) \
+ ALTERNATIVE(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2), \
+ newinstr3, ft_flags3)
/*
* Alternative instructions for different CPU types or capabilities.
@@ -266,14 +205,11 @@ static inline int alternatives_text_reserved(void *start, void *end)
* without volatile and memory clobber.
*/
#define alternative(oldinstr, newinstr, ft_flags) \
- asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) : : : "memory")
+ asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) : : : "memory")
#define alternative_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \
asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) ::: "memory")
-#define alternative_ternary(oldinstr, ft_flags, newinstr_yes, newinstr_no) \
- asm_inline volatile(ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) ::: "memory")
-
/*
* Alternative inline assembly with input.
*
@@ -283,32 +219,28 @@ static inline int alternatives_text_reserved(void *start, void *end)
* Leaving an unused argument 0 to keep API compatibility.
*/
#define alternative_input(oldinstr, newinstr, ft_flags, input...) \
- asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) \
- : : "i" (0), ## input)
-
-/*
- * This is similar to alternative_input. But it has two features and
- * respective instructions.
- *
- * If CPU has feature2, newinstr2 is used.
- * Otherwise, if CPU has feature1, newinstr1 is used.
- * Otherwise, oldinstr is used.
- */
-#define alternative_input_2(oldinstr, newinstr1, ft_flags1, newinstr2, \
- ft_flags2, input...) \
- asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, \
- newinstr2, ft_flags2) \
+ asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) \
: : "i" (0), ## input)
/* Like alternative_input, but with a single output argument */
#define alternative_io(oldinstr, newinstr, ft_flags, output, input...) \
- asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) \
+ asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) \
: output : "i" (0), ## input)
-/* Like alternative_io, but for replacing a direct call with another one. */
-#define alternative_call(oldfunc, newfunc, ft_flags, output, input...) \
- asm_inline volatile (ALTERNATIVE("call %P[old]", "call %P[new]", ft_flags) \
- : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
+/*
+ * Like alternative_io, but for replacing a direct call with another one.
+ *
+ * Use the %c operand modifier which is the generic way to print a bare
+ * constant expression with all syntax-specific punctuation omitted. %P
+ * is the x86-specific variant which can handle constants too, for
+ * historical reasons, but it should be used primarily for PIC
+ * references: i.e., if used for a function, it would add the PLT
+ * suffix.
+ */
+#define alternative_call(oldfunc, newfunc, ft_flags, output, input...) \
+ asm_inline volatile(ALTERNATIVE("call %c[old]", "call %c[new]", ft_flags) \
+ : ALT_OUTPUT_SP(output) \
+ : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
/*
* Like alternative_call, but there are two features and respective functions.
@@ -316,12 +248,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
* Otherwise, if CPU has feature1, function1 is used.
* Otherwise, old function is used.
*/
-#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \
- output, input...) \
- asm_inline volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", ft_flags1,\
- "call %P[new2]", ft_flags2) \
- : output, ASM_CALL_CONSTRAINT \
- : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
+#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \
+ output, input...) \
+ asm_inline volatile(ALTERNATIVE_2("call %c[old]", "call %c[new1]", ft_flags1, \
+ "call %c[new2]", ft_flags2) \
+ : ALT_OUTPUT_SP(output) \
+ : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
[new2] "i" (newfunc2), ## input)
/*
@@ -336,6 +268,8 @@ static inline int alternatives_text_reserved(void *start, void *end)
*/
#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
+#define ALT_OUTPUT_SP(...) ASM_CALL_CONSTRAINT, ## __VA_ARGS__
+
/* Macro for creating assembler functions avoiding any C magic. */
#define DEFINE_ASM_FUNC(func, instr, sec) \
asm (".pushsection " #sec ", \"ax\"\n" \
@@ -368,17 +302,6 @@ void nop_func(void);
#endif
/*
- * objtool annotation to ignore the alternatives and only consider the original
- * instruction(s).
- */
-.macro ANNOTATE_IGNORE_ALTERNATIVE
- .Lannotate_\@:
- .pushsection .discard.ignore_alts
- .long .Lannotate_\@
- .popsection
-.endm
-
-/*
* Issue one struct alt_instr descriptor entry (need to put it into
* the section .altinstructions, see below). This entry contains
* enough information for the alternatives patching code to patch an
@@ -402,22 +325,23 @@ void nop_func(void);
* @newinstr. ".skip" directive takes care of proper instruction padding
* in case @newinstr is longer than @oldinstr.
*/
-.macro ALTERNATIVE oldinstr, newinstr, ft_flags
-140:
- \oldinstr
-141:
- .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstr_entry 140b,143f,\ft_flags,142b-140b,144f-143f
- .popsection
+#define __ALTERNATIVE(oldinst, newinst, flag) \
+740: \
+ oldinst ; \
+741: \
+ .skip -(((744f-743f)-(741b-740b)) > 0) * ((744f-743f)-(741b-740b)),0x90 ;\
+742: \
+ .pushsection .altinstructions,"a" ; \
+ altinstr_entry 740b,743f,flag,742b-740b,744f-743f ; \
+ .popsection ; \
+ .pushsection .altinstr_replacement,"ax" ; \
+743: \
+ newinst ; \
+744: \
+ .popsection ;
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr
-144:
- .popsection
+.macro ALTERNATIVE oldinstr, newinstr, ft_flags
+ __ALTERNATIVE(\oldinstr, \newinstr, \ft_flags)
.endm
#define old_len 141b-140b
@@ -426,65 +350,18 @@ void nop_func(void);
#define new_len3 146f-145f
/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_2(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
-#define alt_max_3(a, b, c) (alt_max_2(alt_max_2(a, b), c))
-
-
-/*
* Same as ALTERNATIVE macro above but for two alternatives. If CPU
* has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
* @feature2, it replaces @oldinstr with @feature2.
*/
.macro ALTERNATIVE_2 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2
-140:
- \oldinstr
-141:
- .skip -((alt_max_2(new_len1, new_len2) - (old_len)) > 0) * \
- (alt_max_2(new_len1, new_len2) - (old_len)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstr_entry 140b,143f,\ft_flags1,142b-140b,144f-143f
- altinstr_entry 140b,144f,\ft_flags2,142b-140b,145f-144f
- .popsection
-
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr1
-144:
- \newinstr2
-145:
- .popsection
+ __ALTERNATIVE(__ALTERNATIVE(\oldinstr, \newinstr1, \ft_flags1),
+ \newinstr2, \ft_flags2)
.endm
.macro ALTERNATIVE_3 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, newinstr3, ft_flags3
-140:
- \oldinstr
-141:
- .skip -((alt_max_3(new_len1, new_len2, new_len3) - (old_len)) > 0) * \
- (alt_max_3(new_len1, new_len2, new_len3) - (old_len)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstr_entry 140b,143f,\ft_flags1,142b-140b,144f-143f
- altinstr_entry 140b,144f,\ft_flags2,142b-140b,145f-144f
- altinstr_entry 140b,145f,\ft_flags3,142b-140b,146f-145f
- .popsection
-
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr1
-144:
- \newinstr2
-145:
- \newinstr3
-146:
- .popsection
+ __ALTERNATIVE(ALTERNATIVE_2(\oldinstr, \newinstr1, \ft_flags1, \newinstr2, \ft_flags2),
+ \newinstr3, \ft_flags3)
.endm
/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 5c37944c8a5e..4c4efb93045e 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -4,7 +4,7 @@
#include <linux/ioport.h>
#include <linux/pci.h>
-#include <linux/refcount.h>
+#include <asm/amd_node.h>
struct amd_nb_bus_dev_range {
u8 bus;
@@ -21,49 +21,16 @@ extern int amd_numa_init(void);
extern int amd_get_subcaches(int);
extern int amd_set_subcaches(int, unsigned long);
-extern int amd_smn_read(u16 node, u32 address, u32 *value);
-extern int amd_smn_write(u16 node, u32 address, u32 value);
-
struct amd_l3_cache {
unsigned indices;
u8 subcaches[4];
};
-struct threshold_block {
- unsigned int block; /* Number within bank */
- unsigned int bank; /* MCA bank the block belongs to */
- unsigned int cpu; /* CPU which controls MCA bank */
- u32 address; /* MSR address for the block */
- u16 interrupt_enable; /* Enable/Disable APIC interrupt */
- bool interrupt_capable; /* Bank can generate an interrupt. */
-
- u16 threshold_limit; /*
- * Value upon which threshold
- * interrupt is generated.
- */
-
- struct kobject kobj; /* sysfs object */
- struct list_head miscj; /*
- * List of threshold blocks
- * within a bank.
- */
-};
-
-struct threshold_bank {
- struct kobject *kobj;
- struct threshold_block *blocks;
-
- /* initialized to the number of CPUs on the node sharing this bank */
- refcount_t cpus;
- unsigned int shared;
-};
-
struct amd_northbridge {
struct pci_dev *root;
struct pci_dev *misc;
struct pci_dev *link;
struct amd_l3_cache l3_cache;
- struct threshold_bank *bank4;
};
struct amd_northbridge_info {
@@ -82,23 +49,6 @@ u16 amd_nb_num(void);
bool amd_nb_has_feature(unsigned int feature);
struct amd_northbridge *node_to_amd_nb(int node);
-static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev)
-{
- struct pci_dev *misc;
- int i;
-
- for (i = 0; i != amd_nb_num(); i++) {
- misc = node_to_amd_nb(i)->misc;
-
- if (pci_domain_nr(misc->bus) == pci_domain_nr(pdev->bus) &&
- PCI_SLOT(misc->devfn) == PCI_SLOT(pdev->devfn))
- return i;
- }
-
- WARN(1, "Unable to find AMD Northbridge id for %s\n", pci_name(pdev));
- return 0;
-}
-
static inline bool amd_gart_present(void)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
@@ -116,7 +66,10 @@ static inline bool amd_gart_present(void)
#define amd_nb_num(x) 0
#define amd_nb_has_feature(x) false
-#define node_to_amd_nb(x) NULL
+static inline struct amd_northbridge *node_to_amd_nb(int node)
+{
+ return NULL;
+}
#define amd_gart_present(x) false
#endif
diff --git a/arch/x86/include/asm/amd_node.h b/arch/x86/include/asm/amd_node.h
new file mode 100644
index 000000000000..113ad3e8ee40
--- /dev/null
+++ b/arch/x86/include/asm/amd_node.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD Node helper functions and common defines
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
+ *
+ * Note:
+ * Items in this file may only be used in a single place.
+ * However, it's prudent to keep all AMD Node functionality
+ * in a unified place rather than spreading throughout the
+ * kernel.
+ */
+
+#ifndef _ASM_X86_AMD_NODE_H_
+#define _ASM_X86_AMD_NODE_H_
+
+#include <linux/pci.h>
+
+#define MAX_AMD_NUM_NODES 8
+#define AMD_NODE0_PCI_SLOT 0x18
+
+struct pci_dev *amd_node_get_func(u16 node, u8 func);
+struct pci_dev *amd_node_get_root(u16 node);
+
+static inline u16 amd_num_nodes(void)
+{
+ return topology_amd_nodes_per_pkg() * topology_max_packages();
+}
+
+int __must_check amd_smn_read(u16 node, u32 address, u32 *value);
+int __must_check amd_smn_write(u16 node, u32 address, u32 value);
+
+#endif /*_ASM_X86_AMD_NODE_H_*/
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 9d159b771dc8..f21ff1932699 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -13,9 +13,16 @@
#include <asm/mpspec.h>
#include <asm/msr.h>
#include <asm/hardirq.h>
+#include <asm/io.h>
+#include <asm/posted_intr.h>
#define ARCH_APICTIMER_STOPS_ON_C3 1
+/* Macros for apic_extnmi which controls external NMI masking */
+#define APIC_EXTNMI_BSP 0 /* Default */
+#define APIC_EXTNMI_ALL 1
+#define APIC_EXTNMI_NONE 2
+
/*
* Debugging macros
*/
@@ -23,22 +30,22 @@
#define APIC_VERBOSE 1
#define APIC_DEBUG 2
-/* Macros for apic_extnmi which controls external NMI masking */
-#define APIC_EXTNMI_BSP 0 /* Default */
-#define APIC_EXTNMI_ALL 1
-#define APIC_EXTNMI_NONE 2
-
/*
- * Define the default level of output to be very little
- * This can be turned up by using apic=verbose for more
- * information and apic=debug for _lots_ of information.
- * apic_verbosity is defined in apic.c
+ * Define the default level of output to be very little This can be turned
+ * up by using apic=verbose for more information and apic=debug for _lots_
+ * of information. apic_verbosity is defined in apic.c
*/
-#define apic_printk(v, s, a...) do { \
- if ((v) <= apic_verbosity) \
- printk(s, ##a); \
- } while (0)
-
+#define apic_printk(v, s, a...) \
+do { \
+ if ((v) <= apic_verbosity) \
+ printk(s, ##a); \
+} while (0)
+
+#define apic_pr_verbose(s, a...) apic_printk(APIC_VERBOSE, KERN_INFO s, ##a)
+#define apic_pr_debug(s, a...) apic_printk(APIC_DEBUG, KERN_DEBUG s, ##a)
+#define apic_pr_debug_cont(s, a...) apic_printk(APIC_DEBUG, KERN_CONT s, ##a)
+/* Unconditional debug prints for code which is guarded by apic_verbosity already */
+#define apic_dbg(s, a...) printk(KERN_DEBUG s, ##a)
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
extern void x86_32_probe_apic(void);
@@ -46,6 +53,10 @@ extern void x86_32_probe_apic(void);
static inline void x86_32_probe_apic(void) { }
#endif
+extern u32 cpuid_to_apicid[];
+
+#define CPU_ACPIID_INVALID U32_MAX
+
#ifdef CONFIG_X86_LOCAL_APIC
extern int apic_verbosity;
@@ -54,8 +65,6 @@ extern int local_apic_timer_c2_ok;
extern bool apic_is_disabled;
extern unsigned int lapic_timer_period;
-extern u32 cpuid_to_apicid[];
-
extern enum apic_intr_mode_id apic_intr_mode;
enum apic_intr_mode_id {
APIC_PIC,
@@ -89,14 +98,14 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
{
volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
- alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP,
+ alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP,
ASM_OUTPUT2("=r" (v), "=m" (*addr)),
ASM_OUTPUT2("0" (v), "m" (*addr)));
}
static inline u32 native_apic_mem_read(u32 reg)
{
- return *((volatile u32 *)(APIC_BASE + reg));
+ return readl((void __iomem *)(APIC_BASE + reg));
}
static inline void native_apic_mem_eoi(void)
@@ -118,8 +127,6 @@ static inline bool apic_is_x2apic_enabled(void)
extern void enable_IR_x2apic(void);
-extern int get_physical_broadcast(void);
-
extern int lapic_get_maxlvt(void);
extern void clear_local_APIC(void);
extern void disconnect_bsp_APIC(int virt_wire_setup);
@@ -169,6 +176,14 @@ extern bool apic_needs_pit(void);
extern void apic_send_IPI_allbutself(unsigned int vector);
+extern void topology_register_apic(u32 apic_id, u32 acpi_id, bool present);
+extern void topology_register_boot_apic(u32 apic_id);
+extern int topology_hotplug_apic(u32 apic_id, u32 acpi_id);
+extern void topology_hotunplug_apic(unsigned int cpu);
+extern void topology_apply_cmdline_limits_early(void);
+extern void topology_init_possible_cpus(void);
+extern void topology_reset_possible_cpus_up(void);
+
#else /* !CONFIG_X86_LOCAL_APIC */
static inline void lapic_shutdown(void) { }
#define local_apic_timer_c2_ok 1
@@ -183,6 +198,8 @@ static inline void apic_intr_mode_init(void) { }
static inline void lapic_assign_system_vectors(void) { }
static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
static inline bool apic_needs_pit(void) { return true; }
+static inline void topology_apply_cmdline_limits_early(void) { }
+static inline void topology_init_possible_cpus(void) { }
#endif /* !CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_X2APIC
@@ -289,16 +306,11 @@ struct apic {
/* Probe, setup and smpboot functions */
int (*probe)(void);
int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
- bool (*apic_id_registered)(void);
- bool (*check_apicid_used)(physid_mask_t *map, u32 apicid);
void (*init_apic_ldr)(void);
- void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap);
u32 (*cpu_present_to_apicid)(int mps_cpu);
- u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb);
u32 (*get_apic_id)(u32 id);
- u32 (*set_apic_id)(u32 apicid);
/* wakeup_secondary_cpu */
int (*wakeup_secondary_cpu)(u32 apicid, unsigned long start_eip);
@@ -336,20 +348,12 @@ extern struct apic *apic;
* APIC drivers are probed based on how they are listed in the .apicdrivers
* section. So the order is important and enforced by the ordering
* of different apic driver files in the Makefile.
- *
- * For the files having two apic drivers, we use apic_drivers()
- * to enforce the order with in them.
*/
#define apic_driver(sym) \
static const struct apic *__apicdrivers_##sym __used \
__aligned(sizeof(struct apic *)) \
__section(".apicdrivers") = { &sym }
-#define apic_drivers(sym1, sym2) \
- static struct apic *__apicdrivers_##sym1##sym2[2] __used \
- __aligned(sizeof(struct apic *)) \
- __section(".apicdrivers") = { &sym1, &sym2 }
-
extern struct apic *__apicdrivers[], *__apicdrivers_end[];
/*
@@ -475,7 +479,6 @@ static inline u64 apic_icr_read(void) { return 0; }
static inline void apic_icr_write(u32 low, u32 high) { }
static inline void apic_wait_icr_idle(void) { }
static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
-static inline void apic_set_eoi_cb(void (*eoi)(void)) {}
static inline void apic_native_eoi(void) { WARN_ON_ONCE(1); }
static inline void apic_setup_apic_calls(void) { }
@@ -492,14 +495,17 @@ static inline bool lapic_vector_set_in_irr(unsigned int vector)
return !!(irr & (1U << (vector % 32)));
}
+static inline bool is_vector_pending(unsigned int vector)
+{
+ return lapic_vector_set_in_irr(vector) || pi_pending_this_cpu(vector);
+}
+
/*
* Warm reset vector position:
*/
#define TRAMPOLINE_PHYS_LOW 0x467
#define TRAMPOLINE_PHYS_HIGH 0x469
-extern void generic_bigsmp_probe(void);
-
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/smp.h>
@@ -522,12 +528,9 @@ static inline int default_acpi_madt_oem_check(char *a, char *b) { return 0; }
static inline void x86_64_probe_apic(void) { }
#endif
-extern int default_apic_id_valid(u32 apicid);
-
extern u32 apic_default_calc_apicid(unsigned int cpu);
extern u32 apic_flat_calc_apicid(unsigned int cpu);
-extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap);
extern u32 default_cpu_present_to_apicid(int mps_cpu);
void apic_send_nmi_to_offline_cpu(unsigned int cpu);
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index b1a98fa38828..3674006e3974 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -12,9 +12,14 @@
#include <asm/special_insns.h>
#include <asm/preempt.h>
#include <asm/asm.h>
+#include <asm/fred.h>
#include <asm/gsseg.h>
+#include <asm/nospec-branch.h>
#ifndef CONFIG_X86_CMPXCHG64
extern void cmpxchg8b_emu(void);
#endif
+#if defined(__GENKSYMS__) && defined(CONFIG_STACKPROTECTOR)
+extern unsigned long __ref_stack_chk_guard;
+#endif
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index fbcfec4dc4cc..2bec0c89a95c 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -113,6 +113,20 @@
#endif
+#ifndef __ASSEMBLY__
+#ifndef __pic__
+static __always_inline __pure void *rip_rel_ptr(void *p)
+{
+ asm("leaq %c1(%%rip), %0" : "=r"(p) : "i"(p));
+
+ return p;
+}
+#define RIP_REL_REF(var) (*(typeof(&(var)))rip_rel_ptr(&(var)))
+#else
+#define RIP_REL_REF(var) (var)
+#endif
+#endif
+
/*
* Macros to generate condition code outputs from inline assembly,
* The output operand must be type "bool".
@@ -215,9 +229,6 @@ register unsigned long current_stack_pointer asm(_ASM_SP);
#define _ASM_EXTABLE_UA(from, to) \
_ASM_EXTABLE_TYPE(from, to, EX_TYPE_UACCESS)
-#define _ASM_EXTABLE_CPY(from, to) \
- _ASM_EXTABLE_TYPE(from, to, EX_TYPE_COPY)
-
#define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_TYPE(from, to, EX_TYPE_FAULT)
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 55a55ec04350..55b4d24356ea 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -86,11 +86,7 @@ static __always_inline int arch_atomic_add_return(int i, atomic_t *v)
}
#define arch_atomic_add_return arch_atomic_add_return
-static __always_inline int arch_atomic_sub_return(int i, atomic_t *v)
-{
- return arch_atomic_add_return(-i, v);
-}
-#define arch_atomic_sub_return arch_atomic_sub_return
+#define arch_atomic_sub_return(i, v) arch_atomic_add_return(-(i), v)
static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
{
@@ -98,11 +94,7 @@ static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
}
#define arch_atomic_fetch_add arch_atomic_fetch_add
-static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v)
-{
- return xadd(&v->counter, -i);
-}
-#define arch_atomic_fetch_sub arch_atomic_fetch_sub
+#define arch_atomic_fetch_sub(i, v) arch_atomic_fetch_add(-(i), v)
static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
{
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 3486d91b8595..6c6e9b9f98a4 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -14,6 +14,32 @@ typedef struct {
#define ATOMIC64_INIT(val) { (val) }
+/*
+ * Read an atomic64_t non-atomically.
+ *
+ * This is intended to be used in cases where a subsequent atomic operation
+ * will handle the torn value, and can be used to prime the first iteration
+ * of unconditional try_cmpxchg() loops, e.g.:
+ *
+ * s64 val = arch_atomic64_read_nonatomic(v);
+ * do { } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i);
+ *
+ * This is NOT safe to use where the value is not always checked by a
+ * subsequent atomic operation, such as in conditional try_cmpxchg() loops
+ * that can break before the atomic operation, e.g.:
+ *
+ * s64 val = arch_atomic64_read_nonatomic(v);
+ * do {
+ * if (condition(val))
+ * break;
+ * } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i);
+ */
+static __always_inline s64 arch_atomic64_read_nonatomic(const atomic64_t *v)
+{
+ /* See comment in arch_atomic_read(). */
+ return __READ_ONCE(v->counter);
+}
+
#define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...)
#ifndef ATOMIC64_EXPORT
#define ATOMIC64_DECL_ONE __ATOMIC64_DECL
@@ -24,8 +50,9 @@ typedef struct {
#ifdef CONFIG_X86_CMPXCHG64
#define __alternative_atomic64(f, g, out, in...) \
- asm volatile("call %P[func]" \
- : out : [func] "i" (atomic64_##g##_cx8), ## in)
+ asm volatile("call %c[func]" \
+ : ALT_OUTPUT_SP(out) \
+ : [func] "i" (atomic64_##g##_cx8), ## in)
#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8)
#else
@@ -61,12 +88,18 @@ ATOMIC64_DECL(add_unless);
#undef __ATOMIC64_DECL
#undef ATOMIC64_EXPORT
-static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
+static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
- return arch_cmpxchg64(&v->counter, o, n);
+ return arch_cmpxchg64(&v->counter, old, new);
}
#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
+static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
+{
+ return arch_try_cmpxchg64(&v->counter, old, new);
+}
+#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
+
static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
{
s64 o;
@@ -131,20 +164,18 @@ static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v)
}
#define arch_atomic64_dec_return arch_atomic64_dec_return
-static __always_inline s64 arch_atomic64_add(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
{
__alternative_atomic64(add, add_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
ASM_NO_INPUT_CLOBBER("memory"));
- return i;
}
-static __always_inline s64 arch_atomic64_sub(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v)
{
__alternative_atomic64(sub, sub_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
ASM_NO_INPUT_CLOBBER("memory"));
- return i;
}
static __always_inline void arch_atomic64_inc(atomic64_t *v)
@@ -195,69 +226,62 @@ static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
}
static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
- return old;
+ return val;
}
#define arch_atomic64_fetch_and arch_atomic64_fetch_and
static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
}
static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
- return old;
+ return val;
}
#define arch_atomic64_fetch_or arch_atomic64_fetch_or
static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
}
static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
- return old;
+ return val;
}
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val + i));
- return old;
+ return val;
}
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 3165c0feedf7..ae12acae5b06 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -80,11 +80,7 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
}
#define arch_atomic64_add_return arch_atomic64_add_return
-static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
-{
- return arch_atomic64_add_return(-i, v);
-}
-#define arch_atomic64_sub_return arch_atomic64_sub_return
+#define arch_atomic64_sub_return(i, v) arch_atomic64_add_return(-(i), v)
static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
@@ -92,11 +88,7 @@ static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
}
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
-static __always_inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v)
-{
- return xadd(&v->counter, -i);
-}
-#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
+#define arch_atomic64_fetch_sub(i, v) arch_atomic64_fetch_add(-(i), v)
static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 0216f63a366b..7b44b3c4cce1 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -33,20 +33,16 @@
* Returns:
* 0 - (index < size)
*/
-static inline unsigned long array_index_mask_nospec(unsigned long index,
- unsigned long size)
-{
- unsigned long mask;
-
- asm volatile ("cmp %1,%2; sbb %0,%0;"
- :"=r" (mask)
- :"g"(size),"r" (index)
- :"cc");
- return mask;
-}
-
-/* Override the default implementation from linux/nospec.h. */
-#define array_index_mask_nospec array_index_mask_nospec
+#define array_index_mask_nospec(idx,sz) ({ \
+ typeof((idx)+(sz)) __idx = (idx); \
+ typeof(__idx) __sz = (sz); \
+ unsigned long __mask; \
+ asm volatile ("cmp %1,%2; sbb %0,%0" \
+ :"=r" (__mask) \
+ :ASM_INPUT_G (__sz), \
+ "r" (__idx) \
+ :"cc"); \
+ __mask; })
/* Prevent speculative execution past this barrier. */
#define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC)
@@ -79,6 +75,9 @@ do { \
#define __smp_mb__before_atomic() do { } while (0)
#define __smp_mb__after_atomic() do { } while (0)
+/* Writing to CR3 provides a full memory barrier in switch_mm(). */
+#define smp_mb__after_switch_mm() do { } while (0)
+
#include <asm-generic/barrier.h>
#endif /* _ASM_X86_BARRIER_H */
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 990eb686ca67..b96d45944c59 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -250,7 +250,7 @@ static __always_inline unsigned long variable__ffs(unsigned long word)
{
asm("rep; bsf %1,%0"
: "=r" (word)
- : "rm" (word));
+ : ASM_INPUT_RM (word));
return word;
}
@@ -297,7 +297,7 @@ static __always_inline unsigned long __fls(unsigned long word)
asm("bsr %1,%0"
: "=r" (word)
- : "rm" (word));
+ : ASM_INPUT_RM (word));
return word;
}
@@ -320,7 +320,7 @@ static __always_inline int variable_ffs(int x)
*/
asm("bsfl %1,%0"
: "=r" (r)
- : "rm" (x), "0" (-1));
+ : ASM_INPUT_RM (x), "0" (-1));
#elif defined(CONFIG_X86_CMOV)
asm("bsfl %1,%0\n\t"
"cmovzl %2,%0"
@@ -377,7 +377,7 @@ static __always_inline int fls(unsigned int x)
*/
asm("bsrl %1,%0"
: "=r" (r)
- : "rm" (x), "0" (-1));
+ : ASM_INPUT_RM (x), "0" (-1));
#elif defined(CONFIG_X86_CMOV)
asm("bsrl %1,%0\n\t"
"cmovzl %2,%0"
@@ -416,7 +416,7 @@ static __always_inline int fls64(__u64 x)
*/
asm("bsrq %1,%q0"
: "+r" (bitpos)
- : "rm" (x));
+ : ASM_INPUT_RM (x));
return bitpos + 1;
}
#else
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index a38cc0afc90a..3e5b111e619d 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -6,11 +6,6 @@
#include <asm/pgtable_types.h>
#include <uapi/asm/boot.h>
-/* Physical address where kernel should be loaded. */
-#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
- + (CONFIG_PHYSICAL_ALIGN - 1)) \
- & ~(CONFIG_PHYSICAL_ALIGN - 1))
-
/* Minimum kernel alignment, as a power of two */
#ifdef CONFIG_X86_64
# define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT
@@ -81,6 +76,7 @@
#ifndef __ASSEMBLY__
extern unsigned int output_len;
+extern const unsigned long kernel_text_size;
extern const unsigned long kernel_total_size;
unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index a3ec87d198ac..e85ac0c7c039 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -13,6 +13,18 @@
#define INSN_UD2 0x0b0f
#define LEN_UD2 2
+/*
+ * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit.
+ */
+#define INSN_ASOP 0x67
+#define OPCODE_ESCAPE 0x0f
+#define SECOND_BYTE_OPCODE_UD1 0xb9
+#define SECOND_BYTE_OPCODE_UD2 0x0b
+
+#define BUG_NONE 0xffff
+#define BUG_UD1 0xfffe
+#define BUG_UD2 0xfffd
+
#ifdef CONFIG_GENERIC_BUG
#ifdef CONFIG_X86_32
@@ -80,7 +92,7 @@ do { \
do { \
__auto_type __flags = BUGFLAG_WARNING|(flags); \
instrumentation_begin(); \
- _BUG_FLAGS(ASM_UD2, __flags, ASM_REACHABLE); \
+ _BUG_FLAGS(ASM_UD2, __flags, ANNOTATE_REACHABLE(1b)); \
instrumentation_end(); \
} while (0)
diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h
index 7cd752557905..31d19c815f99 100644
--- a/arch/x86/include/asm/cfi.h
+++ b/arch/x86/include/asm/cfi.h
@@ -93,7 +93,7 @@
*
*/
enum cfi_mode {
- CFI_DEFAULT, /* FineIBT if hardware has IBT, otherwise kCFI */
+ CFI_AUTO, /* FineIBT if hardware has IBT, otherwise kCFI */
CFI_OFF, /* Taditional / IBT depending on .config */
CFI_KCFI, /* Optionally CALL_PADDING, IBT, RETPOLINE */
CFI_FINEIBT, /* see arch/x86/kernel/alternative.c */
diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h
index 6faaf27e8899..6cbd9ae58b21 100644
--- a/arch/x86/include/asm/cmdline.h
+++ b/arch/x86/include/asm/cmdline.h
@@ -2,6 +2,10 @@
#ifndef _ASM_X86_CMDLINE_H
#define _ASM_X86_CMDLINE_H
+#include <asm/setup.h>
+
+extern char builtin_cmdline[COMMAND_LINE_SIZE];
+
int cmdline_find_option_bool(const char *cmdline_ptr, const char *option);
int cmdline_find_option(const char *cmdline_ptr, const char *option,
char *buffer, int bufsize);
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index b5731c51f0f4..fd1282a783dd 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -3,103 +3,148 @@
#define _ASM_X86_CMPXCHG_32_H
/*
- * Note: if you use set64_bit(), __cmpxchg64(), or their variants,
+ * Note: if you use __cmpxchg64(), or their variants,
* you need to test for the feature in boot_cpu_data.
*/
-#ifdef CONFIG_X86_CMPXCHG64
-#define arch_cmpxchg64(ptr, o, n) \
- ((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \
- (unsigned long long)(n)))
-#define arch_cmpxchg64_local(ptr, o, n) \
- ((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \
- (unsigned long long)(n)))
-#define arch_try_cmpxchg64(ptr, po, n) \
- __try_cmpxchg64((ptr), (unsigned long long *)(po), \
- (unsigned long long)(n))
-#endif
+union __u64_halves {
+ u64 full;
+ struct {
+ u32 low, high;
+ };
+};
+
+#define __arch_cmpxchg64(_ptr, _old, _new, _lock) \
+({ \
+ union __u64_halves o = { .full = (_old), }, \
+ n = { .full = (_new), }; \
+ \
+ asm volatile(_lock "cmpxchg8b %[ptr]" \
+ : [ptr] "+m" (*(_ptr)), \
+ "+a" (o.low), "+d" (o.high) \
+ : "b" (n.low), "c" (n.high) \
+ : "memory"); \
+ \
+ o.full; \
+})
+
+
+static __always_inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
+{
+ return __arch_cmpxchg64(ptr, old, new, LOCK_PREFIX);
+}
-static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
+static __always_inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
{
- u64 prev;
- asm volatile(LOCK_PREFIX "cmpxchg8b %1"
- : "=A" (prev),
- "+m" (*ptr)
- : "b" ((u32)new),
- "c" ((u32)(new >> 32)),
- "0" (old)
- : "memory");
- return prev;
+ return __arch_cmpxchg64(ptr, old, new,);
}
-static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
+#define __arch_try_cmpxchg64(_ptr, _oldp, _new, _lock) \
+({ \
+ union __u64_halves o = { .full = *(_oldp), }, \
+ n = { .full = (_new), }; \
+ bool ret; \
+ \
+ asm volatile(_lock "cmpxchg8b %[ptr]" \
+ CC_SET(e) \
+ : CC_OUT(e) (ret), \
+ [ptr] "+m" (*(_ptr)), \
+ "+a" (o.low), "+d" (o.high) \
+ : "b" (n.low), "c" (n.high) \
+ : "memory"); \
+ \
+ if (unlikely(!ret)) \
+ *(_oldp) = o.full; \
+ \
+ likely(ret); \
+})
+
+static __always_inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new)
{
- u64 prev;
- asm volatile("cmpxchg8b %1"
- : "=A" (prev),
- "+m" (*ptr)
- : "b" ((u32)new),
- "c" ((u32)(new >> 32)),
- "0" (old)
- : "memory");
- return prev;
+ return __arch_try_cmpxchg64(ptr, oldp, new, LOCK_PREFIX);
}
-static inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *pold, u64 new)
+static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new)
{
- bool success;
- u64 old = *pold;
- asm volatile(LOCK_PREFIX "cmpxchg8b %[ptr]"
- CC_SET(z)
- : CC_OUT(z) (success),
- [ptr] "+m" (*ptr),
- "+A" (old)
- : "b" ((u32)new),
- "c" ((u32)(new >> 32))
- : "memory");
-
- if (unlikely(!success))
- *pold = old;
- return success;
+ return __arch_try_cmpxchg64(ptr, oldp, new,);
}
-#ifndef CONFIG_X86_CMPXCHG64
+#ifdef CONFIG_X86_CMPXCHG64
+
+#define arch_cmpxchg64 __cmpxchg64
+
+#define arch_cmpxchg64_local __cmpxchg64_local
+
+#define arch_try_cmpxchg64 __try_cmpxchg64
+
+#define arch_try_cmpxchg64_local __try_cmpxchg64_local
+
+#else
+
/*
* Building a kernel capable running on 80386 and 80486. It may be necessary
* to simulate the cmpxchg8b on the 80386 and 80486 CPU.
*/
-#define arch_cmpxchg64(ptr, o, n) \
-({ \
- __typeof__(*(ptr)) __ret; \
- __typeof__(*(ptr)) __old = (o); \
- __typeof__(*(ptr)) __new = (n); \
- alternative_io(LOCK_PREFIX_HERE \
- "call cmpxchg8b_emu", \
- "lock; cmpxchg8b (%%esi)" , \
- X86_FEATURE_CX8, \
- "=A" (__ret), \
- "S" ((ptr)), "0" (__old), \
- "b" ((unsigned int)__new), \
- "c" ((unsigned int)(__new>>32)) \
- : "memory"); \
- __ret; })
-
-
-#define arch_cmpxchg64_local(ptr, o, n) \
-({ \
- __typeof__(*(ptr)) __ret; \
- __typeof__(*(ptr)) __old = (o); \
- __typeof__(*(ptr)) __new = (n); \
- alternative_io("call cmpxchg8b_emu", \
- "cmpxchg8b (%%esi)" , \
- X86_FEATURE_CX8, \
- "=A" (__ret), \
- "S" ((ptr)), "0" (__old), \
- "b" ((unsigned int)__new), \
- "c" ((unsigned int)(__new>>32)) \
- : "memory"); \
- __ret; })
+#define __arch_cmpxchg64_emu(_ptr, _old, _new, _lock_loc, _lock) \
+({ \
+ union __u64_halves o = { .full = (_old), }, \
+ n = { .full = (_new), }; \
+ \
+ asm volatile(ALTERNATIVE(_lock_loc \
+ "call cmpxchg8b_emu", \
+ _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
+ : ALT_OUTPUT_SP("+a" (o.low), "+d" (o.high)) \
+ : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \
+ : "memory"); \
+ \
+ o.full; \
+})
+
+static __always_inline u64 arch_cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
+{
+ return __arch_cmpxchg64_emu(ptr, old, new, LOCK_PREFIX_HERE, "lock; ");
+}
+#define arch_cmpxchg64 arch_cmpxchg64
+
+static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
+{
+ return __arch_cmpxchg64_emu(ptr, old, new, ,);
+}
+#define arch_cmpxchg64_local arch_cmpxchg64_local
+
+#define __arch_try_cmpxchg64_emu(_ptr, _oldp, _new, _lock_loc, _lock) \
+({ \
+ union __u64_halves o = { .full = *(_oldp), }, \
+ n = { .full = (_new), }; \
+ bool ret; \
+ \
+ asm volatile(ALTERNATIVE(_lock_loc \
+ "call cmpxchg8b_emu", \
+ _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
+ CC_SET(e) \
+ : ALT_OUTPUT_SP(CC_OUT(e) (ret), \
+ "+a" (o.low), "+d" (o.high)) \
+ : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \
+ : "memory"); \
+ \
+ if (unlikely(!ret)) \
+ *(_oldp) = o.full; \
+ \
+ likely(ret); \
+})
+
+static __always_inline bool arch_try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new)
+{
+ return __arch_try_cmpxchg64_emu(ptr, oldp, new, LOCK_PREFIX_HERE, "lock; ");
+}
+#define arch_try_cmpxchg64 arch_try_cmpxchg64
+
+static __always_inline bool arch_try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new)
+{
+ return __arch_try_cmpxchg64_emu(ptr, oldp, new, ,);
+}
+#define arch_try_cmpxchg64_local arch_try_cmpxchg64_local
#endif
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 44b08b53ab32..5e241306db26 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -20,6 +20,12 @@
arch_try_cmpxchg((ptr), (po), (n)); \
})
+#define arch_try_cmpxchg64_local(ptr, po, n) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ arch_try_cmpxchg_local((ptr), (po), (n)); \
+})
+
union __u128_halves {
u128 full;
struct {
@@ -62,7 +68,7 @@ static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old,
asm volatile(_lock "cmpxchg16b %[ptr]" \
CC_SET(e) \
: CC_OUT(e) (ret), \
- [ptr] "+m" (*ptr), \
+ [ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high) \
: "memory"); \
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
index 76c310b19b11..aa6c8f8ca958 100644
--- a/arch/x86/include/asm/coco.h
+++ b/arch/x86/include/asm/coco.h
@@ -2,6 +2,7 @@
#ifndef _ASM_X86_COCO_H
#define _ASM_X86_COCO_H
+#include <asm/asm.h>
#include <asm/types.h>
enum cc_vendor {
@@ -12,11 +13,19 @@ enum cc_vendor {
#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
extern enum cc_vendor cc_vendor;
-void cc_set_mask(u64 mask);
+extern u64 cc_mask;
+
+static inline void cc_set_mask(u64 mask)
+{
+ RIP_REL_REF(cc_mask) = mask;
+}
+
u64 cc_mkenc(u64 val);
u64 cc_mkdec(u64 val);
+void cc_random_init(void);
#else
#define cc_vendor (CC_VENDOR_NONE)
+static const u64 cc_mask = 0;
static inline u64 cc_mkenc(u64 val)
{
@@ -27,6 +36,7 @@ static inline u64 cc_mkdec(u64 val)
{
return val;
}
+static inline void cc_random_init(void) { }
#endif
#endif /* _ASM_X86_COCO_H */
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index f8f9a9b79395..98eced5084ca 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -9,18 +9,10 @@
#include <linux/percpu.h>
#include <asm/ibt.h>
-#ifdef CONFIG_SMP
-
-extern void prefill_possible_map(void);
-
-#else /* CONFIG_SMP */
-
-static inline void prefill_possible_map(void) {}
-
+#ifndef CONFIG_SMP
#define cpu_physical_id(cpu) boot_cpu_physical_apicid
#define cpu_acpi_id(cpu) 0
#define safe_smp_processor_id() 0
-
#endif /* CONFIG_SMP */
#ifdef CONFIG_HOTPLUG_CPU
@@ -34,12 +26,13 @@ int mwait_usable(const struct cpuinfo_x86 *);
unsigned int x86_family(unsigned int sig);
unsigned int x86_model(unsigned int sig);
unsigned int x86_stepping(unsigned int sig);
-#ifdef CONFIG_CPU_SUP_INTEL
+#ifdef CONFIG_X86_BUS_LOCK_DETECT
extern void __init sld_setup(struct cpuinfo_x86 *c);
extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
extern bool handle_guest_split_lock(unsigned long ip);
extern void handle_bus_lock(struct pt_regs *regs);
-u8 get_this_hybrid_cpu_type(void);
+void split_lock_init(void);
+void bus_lock_init(void);
#else
static inline void __init sld_setup(struct cpuinfo_x86 *c) {}
static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
@@ -53,11 +46,23 @@ static inline bool handle_guest_split_lock(unsigned long ip)
}
static inline void handle_bus_lock(struct pt_regs *regs) {}
+static inline void split_lock_init(void) {}
+static inline void bus_lock_init(void) {}
+#endif
+#ifdef CONFIG_CPU_SUP_INTEL
+u8 get_this_hybrid_cpu_type(void);
+u32 get_this_hybrid_cpu_native_id(void);
+#else
static inline u8 get_this_hybrid_cpu_type(void)
{
return 0;
}
+
+static inline u32 get_this_hybrid_cpu_native_id(void)
+{
+ return 0;
+}
#endif
#ifdef CONFIG_IA32_FEAT_CTL
void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
index eb8fcede9e3b..ba32e0f44cba 100644
--- a/arch/x86/include/asm/cpu_device_id.h
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -3,6 +3,39 @@
#define _ASM_X86_CPU_DEVICE_ID
/*
+ * Can't use <linux/bitfield.h> because it generates expressions that
+ * cannot be used in structure initializers. Bitfield construction
+ * here must match the union in struct cpuinfo_86:
+ * union {
+ * struct {
+ * __u8 x86_model;
+ * __u8 x86;
+ * __u8 x86_vendor;
+ * __u8 x86_reserved;
+ * };
+ * __u32 x86_vfm;
+ * };
+ */
+#define VFM_MODEL_BIT 0
+#define VFM_FAMILY_BIT 8
+#define VFM_VENDOR_BIT 16
+#define VFM_RSVD_BIT 24
+
+#define VFM_MODEL_MASK GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT)
+#define VFM_FAMILY_MASK GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT)
+#define VFM_VENDOR_MASK GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT)
+
+#define VFM_MODEL(vfm) (((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT)
+#define VFM_FAMILY(vfm) (((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT)
+#define VFM_VENDOR(vfm) (((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT)
+
+#define VFM_MAKE(_vendor, _family, _model) ( \
+ ((_model) << VFM_MODEL_BIT) | \
+ ((_family) << VFM_FAMILY_BIT) | \
+ ((_vendor) << VFM_VENDOR_BIT) \
+)
+
+/*
* Declare drivers belonging to specific x86 CPUs
* Similar in spirit to pci_device_id and related PCI functions
*
@@ -20,7 +53,9 @@
#define X86_CENTAUR_FAM6_C7_D 0xd
#define X86_CENTAUR_FAM6_NANO 0xf
-#define X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins)
+/* x86_cpu_id::flags */
+#define X86_CPU_ID_FLAG_ENTRY_VALID BIT(0)
+
/**
* X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE - Base macro for CPU matching
* @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
@@ -46,6 +81,18 @@
.model = _model, \
.steppings = _steppings, \
.feature = _feature, \
+ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, \
+ .driver_data = (unsigned long) _data \
+}
+
+#define X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \
+ _steppings, _feature, _data) { \
+ .vendor = _vendor, \
+ .family = _family, \
+ .model = _model, \
+ .steppings = _steppings, \
+ .feature = _feature, \
+ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, \
.driver_data = (unsigned long) _data \
}
@@ -145,52 +192,58 @@
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, X86_MODEL_ANY, data)
/**
- * X86_MATCH_INTEL_FAM6_MODEL - Match vendor INTEL, family 6 and model
- * @model: The model name without the INTEL_FAM6_ prefix or ANY
- * The model name is expanded to INTEL_FAM6_@model internally
+ * X86_MATCH_VFM - Match encoded vendor/family/model
+ * @vfm: Encoded 8-bits each for vendor, family, model
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
- * etc. is casted to unsigned long internally.
+ * etc. is cast to unsigned long internally.
*
- * The vendor is set to INTEL, the family to 6 and all other missing
- * arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are set to wildcards.
+ * Stepping and feature are set to wildcards
+ */
+#define X86_MATCH_VFM(vfm, data) \
+ X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE( \
+ VFM_VENDOR(vfm), \
+ VFM_FAMILY(vfm), \
+ VFM_MODEL(vfm), \
+ X86_STEPPING_ANY, X86_FEATURE_ANY, data)
+
+#define __X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins)
+/**
+ * X86_MATCH_VFM_STEPPINGS - Match encoded vendor/family/model/stepping
+ * @vfm: Encoded 8-bits each for vendor, family, model
+ * @steppings: Bitmask of steppings to match
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is cast to unsigned long internally.
*
- * See X86_MATCH_VENDOR_FAM_MODEL_FEATURE() for further information.
+ * feature is set to wildcard
*/
-#define X86_MATCH_INTEL_FAM6_MODEL(model, data) \
- X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, INTEL_FAM6_##model, data)
-
-#define X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(model, steppings, data) \
- X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, INTEL_FAM6_##model, \
- steppings, X86_FEATURE_ANY, data)
+#define X86_MATCH_VFM_STEPS(vfm, min_step, max_step, data) \
+ X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE( \
+ VFM_VENDOR(vfm), \
+ VFM_FAMILY(vfm), \
+ VFM_MODEL(vfm), \
+ __X86_STEPPINGS(min_step, max_step), \
+ X86_FEATURE_ANY, data)
-/*
- * Match specific microcode revisions.
- *
- * vendor/family/model/stepping must be all set.
+/**
+ * X86_MATCH_VFM_FEATURE - Match encoded vendor/family/model/feature
+ * @vfm: Encoded 8-bits each for vendor, family, model
+ * @feature: A X86_FEATURE bit
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is cast to unsigned long internally.
*
- * Only checks against the boot CPU. When mixed-stepping configs are
- * valid for a CPU model, add a quirk for every valid stepping and
- * do the fine-tuning in the quirk handler.
+ * Steppings is set to wildcard
*/
-
-struct x86_cpu_desc {
- u8 x86_family;
- u8 x86_vendor;
- u8 x86_model;
- u8 x86_stepping;
- u32 x86_microcode_rev;
-};
-
-#define INTEL_CPU_DESC(model, stepping, revision) { \
- .x86_family = 6, \
- .x86_vendor = X86_VENDOR_INTEL, \
- .x86_model = (model), \
- .x86_stepping = (stepping), \
- .x86_microcode_rev = (revision), \
-}
+#define X86_MATCH_VFM_FEATURE(vfm, feature, data) \
+ X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE( \
+ VFM_VENDOR(vfm), \
+ VFM_FAMILY(vfm), \
+ VFM_MODEL(vfm), \
+ X86_STEPPING_ANY, feature, data)
extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match);
-extern bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table);
+extern bool x86_match_min_microcode_rev(const struct x86_cpu_id *table);
#endif /* _ASM_X86_CPU_DEVICE_ID */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index a1273698fc43..de1ad09fe8d7 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -33,6 +33,8 @@ enum cpuid_leafs
CPUID_7_EDX,
CPUID_8000_001F_EAX,
CPUID_8000_0021_EAX,
+ CPUID_LNX_5,
+ NR_CPUID_WORDS,
};
#define X86_CAP_FMT_NUM "%d:%d"
@@ -91,8 +93,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \
REQUIRED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 21))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 22))
#define DISABLED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
@@ -116,8 +119,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \
DISABLED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 21))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 22))
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
@@ -125,15 +129,15 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
#define this_cpu_has(bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
- x86_this_cpu_test_bit(bit, \
- (unsigned long __percpu *)&cpu_info.x86_capability))
+ x86_this_cpu_test_bit(bit, cpu_info.x86_capability))
/*
- * This macro is for detection of features which need kernel
- * infrastructure to be used. It may *not* directly test the CPU
- * itself. Use the cpu_has() family if you want true runtime
- * testing of CPU features, like in hypervisor code where you are
- * supporting a possible guest feature where host support for it
+ * This is the default CPU features testing macro to use in code.
+ *
+ * It is for detection of features which need kernel infrastructure to be
+ * used. It may *not* directly test the CPU itself. Use the cpu_has() family
+ * if you want true runtime testing of CPU features, like in hypervisor code
+ * where you are supporting a possible guest feature where host support for it
* is not relevant.
*/
#define cpu_feature_enabled(bit) \
@@ -146,21 +150,18 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
extern void setup_clear_cpu_cap(unsigned int bit);
extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
-#define setup_force_cpu_cap(bit) do { \
- set_cpu_cap(&boot_cpu_data, bit); \
+#define setup_force_cpu_cap(bit) do { \
+ \
+ if (!boot_cpu_has(bit)) \
+ WARN_ON(alternatives_patched); \
+ \
+ set_cpu_cap(&boot_cpu_data, bit); \
set_bit(bit, (unsigned long *)cpu_caps_set); \
} while (0)
#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
/*
- * Static testing of CPU features. Used the same as boot_cpu_has(). It
- * statically patches the target code for additional performance. Use
- * static_cpu_has() only in fast paths, where every cycle counts. Which
- * means that the boot_cpu_has() variant is already fast enough for the
- * majority of cases and you should stick to using it as it is generally
- * only two instructions: a RIP-relative MOV and a TEST.
- *
* Do not use an "m" constraint for [cap_byte] here: gcc doesn't know
* that this is only used on a fallback path and will sometimes cause
* it to manifest the address of boot_cpu_data in a register, fouling
@@ -168,11 +169,10 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
*/
static __always_inline bool _static_cpu_has(u16 bit)
{
- asm goto(
- ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]")
+ asm goto(ALTERNATIVE_TERNARY("jmp 6f", %c[feature], "", "jmp %l[t_no]")
".pushsection .altinstr_aux,\"ax\"\n"
"6:\n"
- " testb %[bitnum]," _ASM_RIP(%P[cap_byte]) "\n"
+ " testb %[bitnum], %a[cap_byte]\n"
" jnz %l[t_yes]\n"
" jmp %l[t_no]\n"
".popsection\n"
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 2b62cdd8dd12..508c0dad116b 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -13,175 +13,175 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 21 /* N 32-bit words worth of info */
+#define NCAPINTS 22 /* N 32-bit words worth of info */
#define NBUGINTS 2 /* N 32-bit bug flags */
/*
* Note: If the comment begins with a quoted string, that string is used
- * in /proc/cpuinfo instead of the macro name. If the string is "",
- * this feature bit is not displayed in /proc/cpuinfo at all.
+ * in /proc/cpuinfo instead of the macro name. Otherwise, this feature
+ * bit is not displayed in /proc/cpuinfo at all.
*
* When adding new features here that depend on other features,
* please update the table in kernel/cpu/cpuid-deps.c as well.
*/
/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */
-#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
-#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
-#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
-#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
+#define X86_FEATURE_FPU ( 0*32+ 0) /* "fpu" Onboard FPU */
+#define X86_FEATURE_VME ( 0*32+ 1) /* "vme" Virtual Mode Extensions */
+#define X86_FEATURE_DE ( 0*32+ 2) /* "de" Debugging Extensions */
+#define X86_FEATURE_PSE ( 0*32+ 3) /* "pse" Page Size Extensions */
+#define X86_FEATURE_TSC ( 0*32+ 4) /* "tsc" Time Stamp Counter */
+#define X86_FEATURE_MSR ( 0*32+ 5) /* "msr" Model-Specific Registers */
+#define X86_FEATURE_PAE ( 0*32+ 6) /* "pae" Physical Address Extensions */
+#define X86_FEATURE_MCE ( 0*32+ 7) /* "mce" Machine Check Exception */
+#define X86_FEATURE_CX8 ( 0*32+ 8) /* "cx8" CMPXCHG8 instruction */
+#define X86_FEATURE_APIC ( 0*32+ 9) /* "apic" Onboard APIC */
+#define X86_FEATURE_SEP ( 0*32+11) /* "sep" SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR ( 0*32+12) /* "mtrr" Memory Type Range Registers */
+#define X86_FEATURE_PGE ( 0*32+13) /* "pge" Page Global Enable */
+#define X86_FEATURE_MCA ( 0*32+14) /* "mca" Machine Check Architecture */
+#define X86_FEATURE_CMOV ( 0*32+15) /* "cmov" CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_PAT ( 0*32+16) /* "pat" Page Attribute Table */
+#define X86_FEATURE_PSE36 ( 0*32+17) /* "pse36" 36-bit PSEs */
+#define X86_FEATURE_PN ( 0*32+18) /* "pn" Processor serial number */
+#define X86_FEATURE_CLFLUSH ( 0*32+19) /* "clflush" CLFLUSH instruction */
#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
-#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_ACPI ( 0*32+22) /* "acpi" ACPI via MSR */
+#define X86_FEATURE_MMX ( 0*32+23) /* "mmx" Multimedia Extensions */
+#define X86_FEATURE_FXSR ( 0*32+24) /* "fxsr" FXSAVE/FXRSTOR, CR4.OSFXSR */
#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
-#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_HT ( 0*32+28) /* "ht" Hyper-Threading */
#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
-#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
-#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
+#define X86_FEATURE_IA64 ( 0*32+30) /* "ia64" IA-64 processor */
+#define X86_FEATURE_PBE ( 0*32+31) /* "pbe" Pending Break Enable */
/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */
-#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+#define X86_FEATURE_SYSCALL ( 1*32+11) /* "syscall" SYSCALL/SYSRET */
+#define X86_FEATURE_MP ( 1*32+19) /* "mp" MP Capable */
+#define X86_FEATURE_NX ( 1*32+20) /* "nx" Execute Disable */
+#define X86_FEATURE_MMXEXT ( 1*32+22) /* "mmxext" AMD MMX extensions */
+#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* "fxsr_opt" FXSAVE/FXRSTOR optimizations */
#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
-#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */
-#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */
-#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */
+#define X86_FEATURE_RDTSCP ( 1*32+27) /* "rdtscp" RDTSCP */
+#define X86_FEATURE_LM ( 1*32+29) /* "lm" Long Mode (x86-64, 64-bit support) */
+#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* "3dnowext" AMD 3DNow extensions */
+#define X86_FEATURE_3DNOW ( 1*32+31) /* "3dnow" 3DNow */
/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
+#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* "recovery" CPU in recovery mode */
+#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* "longrun" Longrun power control */
+#define X86_FEATURE_LRTI ( 2*32+ 3) /* "lrti" LongRun table interface */
/* Other features, Linux-defined mapping, word 3 */
/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* "" CPU based on Zen5 microarchitecture */
-#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
-#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
-#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */
-#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */
-#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
-#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
-#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
-#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
-#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
-#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */
-#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
-#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
-#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
-#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */
-#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
-#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
-#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
-#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */
-#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */
-#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
-#define X86_FEATURE_RAPL ( 3*32+29) /* AMD/Hygon RAPL interface */
-#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
-#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
+#define X86_FEATURE_CXMMX ( 3*32+ 0) /* "cxmmx" Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* "k6_mtrr" AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* "cyrix_arr" Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */
+#define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */
+#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */
+/* Free ( 3*32+ 6) */
+/* Free ( 3*32+ 7) */
+#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */
+#define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */
+#define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */
+#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* "arch_perfmon" Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS ( 3*32+12) /* "pebs" Precise-Event Based Sampling */
+#define X86_FEATURE_BTS ( 3*32+13) /* "bts" Branch Trace Store */
+#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* syscall in IA32 userspace */
+#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* sysenter in IA32 userspace */
+#define X86_FEATURE_REP_GOOD ( 3*32+16) /* "rep_good" REP microcode works well */
+#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* "amd_lbr_v2" AMD Last Branch Record Extension Version 2 */
+#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* Clear CPU buffers using VERW */
+#define X86_FEATURE_ACC_POWER ( 3*32+19) /* "acc_power" AMD Accumulated Power Mechanism */
+#define X86_FEATURE_NOPL ( 3*32+20) /* "nopl" The NOPL (0F 1F) instructions */
+#define X86_FEATURE_ALWAYS ( 3*32+21) /* Always-present feature */
+#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* "xtopology" CPU topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* "tsc_reliable" TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* "nonstop_tsc" TSC does not stop in C states */
+#define X86_FEATURE_CPUID ( 3*32+25) /* "cpuid" CPU has CPUID instruction itself */
+#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* "extd_apicid" Extended APICID (8 bits) */
+#define X86_FEATURE_AMD_DCM ( 3*32+27) /* "amd_dcm" AMD multi-node processor */
+#define X86_FEATURE_APERFMPERF ( 3*32+28) /* "aperfmperf" P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
+#define X86_FEATURE_RAPL ( 3*32+29) /* "rapl" AMD/Hygon RAPL interface */
+#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* "nonstop_tsc_s3" TSC doesn't stop in S3 state */
+#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* "tsc_known_freq" TSC has known frequency */
/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
-#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
-#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
+#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* "pclmulqdq" PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64 ( 4*32+ 2) /* "dtes64" 64-bit Debug Store */
#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */
#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
-#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
-#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */
-#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
-#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
-#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
-#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
-#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */
-#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */
-#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
-#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
+#define X86_FEATURE_VMX ( 4*32+ 5) /* "vmx" Hardware virtualization */
+#define X86_FEATURE_SMX ( 4*32+ 6) /* "smx" Safer Mode eXtensions */
+#define X86_FEATURE_EST ( 4*32+ 7) /* "est" Enhanced SpeedStep */
+#define X86_FEATURE_TM2 ( 4*32+ 8) /* "tm2" Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* "ssse3" Supplemental SSE-3 */
+#define X86_FEATURE_CID ( 4*32+10) /* "cid" Context ID */
+#define X86_FEATURE_SDBG ( 4*32+11) /* "sdbg" Silicon Debug */
+#define X86_FEATURE_FMA ( 4*32+12) /* "fma" Fused multiply-add */
+#define X86_FEATURE_CX16 ( 4*32+13) /* "cx16" CMPXCHG16B instruction */
+#define X86_FEATURE_XTPR ( 4*32+14) /* "xtpr" Send Task Priority Messages */
+#define X86_FEATURE_PDCM ( 4*32+15) /* "pdcm" Perf/Debug Capabilities MSR */
+#define X86_FEATURE_PCID ( 4*32+17) /* "pcid" Process Context Identifiers */
+#define X86_FEATURE_DCA ( 4*32+18) /* "dca" Direct Cache Access */
#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
-#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */
-#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
-#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
-#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */
-#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
-#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
-#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */
-#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
-#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */
-#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */
-#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
+#define X86_FEATURE_X2APIC ( 4*32+21) /* "x2apic" X2APIC */
+#define X86_FEATURE_MOVBE ( 4*32+22) /* "movbe" MOVBE instruction */
+#define X86_FEATURE_POPCNT ( 4*32+23) /* "popcnt" POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* "tsc_deadline_timer" TSC deadline timer */
+#define X86_FEATURE_AES ( 4*32+25) /* "aes" AES instructions */
+#define X86_FEATURE_XSAVE ( 4*32+26) /* "xsave" XSAVE/XRSTOR/XSETBV/XGETBV instructions */
+#define X86_FEATURE_OSXSAVE ( 4*32+27) /* XSAVE instruction enabled in the OS */
+#define X86_FEATURE_AVX ( 4*32+28) /* "avx" Advanced Vector Extensions */
+#define X86_FEATURE_F16C ( 4*32+29) /* "f16c" 16-bit FP conversions */
+#define X86_FEATURE_RDRAND ( 4*32+30) /* "rdrand" RDRAND instruction */
+#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* "hypervisor" Running on a hypervisor */
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
-#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
-#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
+#define X86_FEATURE_ACE2 ( 5*32+ 8) /* "ace2" Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* "ace2_en" ACE v2 enabled */
+#define X86_FEATURE_PHE ( 5*32+10) /* "phe" PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN ( 5*32+11) /* "phe_en" PHE enabled */
+#define X86_FEATURE_PMM ( 5*32+12) /* "pmm" PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN ( 5*32+13) /* "pmm_en" PMM enabled */
/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */
-#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */
-#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
-#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
-#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
-#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
-#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
-#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
-#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
-#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
-#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
-#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
-#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
-#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
-#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */
-#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
-#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */
-#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */
-#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */
-#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
-#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */
-#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */
-#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */
-#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */
+#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* "lahf_lm" LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* "cmp_legacy" If yes HyperThreading not valid */
+#define X86_FEATURE_SVM ( 6*32+ 2) /* "svm" Secure Virtual Machine */
+#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* "extapic" Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* "cr8_legacy" CR8 in 32-bit mode */
+#define X86_FEATURE_ABM ( 6*32+ 5) /* "abm" Advanced bit manipulation */
+#define X86_FEATURE_SSE4A ( 6*32+ 6) /* "sse4a" SSE-4A */
+#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* "misalignsse" Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* "3dnowprefetch" 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW ( 6*32+ 9) /* "osvw" OS Visible Workaround */
+#define X86_FEATURE_IBS ( 6*32+10) /* "ibs" Instruction Based Sampling */
+#define X86_FEATURE_XOP ( 6*32+11) /* "xop" Extended AVX instructions */
+#define X86_FEATURE_SKINIT ( 6*32+12) /* "skinit" SKINIT/STGI instructions */
+#define X86_FEATURE_WDT ( 6*32+13) /* "wdt" Watchdog timer */
+#define X86_FEATURE_LWP ( 6*32+15) /* "lwp" Light Weight Profiling */
+#define X86_FEATURE_FMA4 ( 6*32+16) /* "fma4" 4 operands MAC instructions */
+#define X86_FEATURE_TCE ( 6*32+17) /* "tce" Translation Cache Extension */
+#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* "nodeid_msr" NodeId MSR */
+#define X86_FEATURE_TBM ( 6*32+21) /* "tbm" Trailing Bit Manipulations */
+#define X86_FEATURE_TOPOEXT ( 6*32+22) /* "topoext" Topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* "perfctr_core" Core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* "perfctr_nb" NB performance counter extensions */
+#define X86_FEATURE_BPEXT ( 6*32+26) /* "bpext" Data breakpoint extension */
+#define X86_FEATURE_PTSC ( 6*32+27) /* "ptsc" Performance time-stamp counter */
+#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* "perfctr_llc" Last Level Cache performance counter extensions */
+#define X86_FEATURE_MWAITX ( 6*32+29) /* "mwaitx" MWAIT extension (MONITORX/MWAITX instructions) */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -189,93 +189,93 @@
*
* Reuse free bits when adding new feature flags!
*/
-#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */
-#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
-#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
-#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
-#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
-#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
-#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* Platform supports being a TDX host */
-#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
-#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
-#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
-#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
-#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */
-#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
-#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
-#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
-#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
-#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
-#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
-#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */
-#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
-#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
-#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
-#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */
-#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
-#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
-#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */
-#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
-#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
-#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */
+#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* "ring3mwait" Ring 3 MONITOR/MWAIT instructions */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* "cpuid_fault" Intel CPUID faulting */
+#define X86_FEATURE_CPB ( 7*32+ 2) /* "cpb" AMD Core Performance Boost */
+#define X86_FEATURE_EPB ( 7*32+ 3) /* "epb" IA32_ENERGY_PERF_BIAS support */
+#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* "cat_l3" Cache Allocation Technology L3 */
+#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* "cat_l2" Cache Allocation Technology L2 */
+#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* "cdp_l3" Code and Data Prioritization L3 */
+#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* "tdx_host_platform" Platform supports being a TDX host */
+#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* "hw_pstate" AMD HW-PState */
+#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* "proc_feedback" AMD ProcFeedbackInterface */
+#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* Use compacted XSTATE (XSAVES or XSAVEC) */
+#define X86_FEATURE_PTI ( 7*32+11) /* "pti" Kernel Page Table Isolation enabled */
+#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* Set/clear IBRS on kernel entry/exit */
+#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* Fill RSB on VM-Exit */
+#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* "intel_ppin" Intel Processor Inventory Number */
+#define X86_FEATURE_CDP_L2 ( 7*32+15) /* "cdp_l2" Code and Data Prioritization L2 */
+#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* MSR SPEC_CTRL is implemented */
+#define X86_FEATURE_SSBD ( 7*32+17) /* "ssbd" Speculative Store Bypass Disable */
+#define X86_FEATURE_MBA ( 7*32+18) /* "mba" Memory Bandwidth Allocation */
+#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
+#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* "perfmon_v2" AMD Performance Monitoring Version 2 */
+#define X86_FEATURE_USE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled */
+#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* Use IBRS during runtime firmware calls */
+#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* Disable Speculative Store Bypass. */
+#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */
+#define X86_FEATURE_IBRS ( 7*32+25) /* "ibrs" Indirect Branch Restricted Speculation */
+#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier without a guaranteed RSB flush */
+#define X86_FEATURE_STIBP ( 7*32+27) /* "stibp" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ZEN ( 7*32+28) /* Generic flag for all Zen and newer */
+#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* L1TF workaround PTE inversion */
+#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* "ibrs_enhanced" Enhanced IBRS */
+#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* MSR IA32_FEAT_CTL configured */
/* Virtualization flags: Linux defined, word 8 */
-#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
-#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* Intel FlexPriority */
-#define X86_FEATURE_EPT ( 8*32+ 2) /* Intel Extended Page Table */
-#define X86_FEATURE_VPID ( 8*32+ 3) /* Intel Virtual Processor ID */
+#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* "tpr_shadow" Intel TPR Shadow */
+#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */
+#define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */
+#define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */
-#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
-#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
-#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
-#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
-#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
-#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */
-#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */
-#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* Intel Trust Domain Extensions Guest */
+#define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */
+#define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */
+#define X86_FEATURE_EPT_AD ( 8*32+17) /* "ept_ad" Intel Extended Page Table access-dirty bit */
+#define X86_FEATURE_VMCALL ( 8*32+18) /* Hypervisor supports the VMCALL instruction */
+#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* VMware prefers VMMCALL hypercall instruction */
+#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* PV unlock function */
+#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* PV vcpu_is_preempted function */
+#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* "tdx_guest" Intel Trust Domain Extensions Guest */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
-#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
-#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */
-#define X86_FEATURE_SGX ( 9*32+ 2) /* Software Guard Extensions */
-#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
-#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
-#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
-#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* "" FPU data pointer updated only on x87 exceptions */
-#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
-#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
-#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
-#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
-#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
-#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
-#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* "" Zero out FPU CS and FPU DS */
-#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
-#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
-#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
-#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
-#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */
-#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */
-#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
-#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
-#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
-#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */
-#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
-#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
-#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
-#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
-#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
-#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* "fsgsbase" RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
+#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* "tsc_adjust" TSC adjustment MSR 0x3B */
+#define X86_FEATURE_SGX ( 9*32+ 2) /* "sgx" Software Guard Extensions */
+#define X86_FEATURE_BMI1 ( 9*32+ 3) /* "bmi1" 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE ( 9*32+ 4) /* "hle" Hardware Lock Elision */
+#define X86_FEATURE_AVX2 ( 9*32+ 5) /* "avx2" AVX2 instructions */
+#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* FPU data pointer updated only on x87 exceptions */
+#define X86_FEATURE_SMEP ( 9*32+ 7) /* "smep" Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2 ( 9*32+ 8) /* "bmi2" 2nd group bit manipulation extensions */
+#define X86_FEATURE_ERMS ( 9*32+ 9) /* "erms" Enhanced REP MOVSB/STOSB instructions */
+#define X86_FEATURE_INVPCID ( 9*32+10) /* "invpcid" Invalidate Processor Context ID */
+#define X86_FEATURE_RTM ( 9*32+11) /* "rtm" Restricted Transactional Memory */
+#define X86_FEATURE_CQM ( 9*32+12) /* "cqm" Cache QoS Monitoring */
+#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* Zero out FPU CS and FPU DS */
+#define X86_FEATURE_MPX ( 9*32+14) /* "mpx" Memory Protection Extension */
+#define X86_FEATURE_RDT_A ( 9*32+15) /* "rdt_a" Resource Director Technology Allocation */
+#define X86_FEATURE_AVX512F ( 9*32+16) /* "avx512f" AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ ( 9*32+17) /* "avx512dq" AVX-512 DQ (Double/Quad granular) Instructions */
+#define X86_FEATURE_RDSEED ( 9*32+18) /* "rdseed" RDSEED instruction */
+#define X86_FEATURE_ADX ( 9*32+19) /* "adx" ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP ( 9*32+20) /* "smap" Supervisor Mode Access Prevention */
+#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* "avx512ifma" AVX-512 Integer Fused Multiply-Add instructions */
+#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* "clflushopt" CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB ( 9*32+24) /* "clwb" CLWB instruction */
+#define X86_FEATURE_INTEL_PT ( 9*32+25) /* "intel_pt" Intel Processor Trace */
+#define X86_FEATURE_AVX512PF ( 9*32+26) /* "avx512pf" AVX-512 Prefetch */
+#define X86_FEATURE_AVX512ER ( 9*32+27) /* "avx512er" AVX-512 Exponential and Reciprocal */
+#define X86_FEATURE_AVX512CD ( 9*32+28) /* "avx512cd" AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI ( 9*32+29) /* "sha_ni" SHA1/SHA256 Instruction Extensions */
+#define X86_FEATURE_AVX512BW ( 9*32+30) /* "avx512bw" AVX-512 BW (Byte/Word granular) Instructions */
+#define X86_FEATURE_AVX512VL ( 9*32+31) /* "avx512vl" AVX-512 VL (128/256 Vector Length) Extensions */
/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */
-#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */
-#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */
-#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
-#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
-#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */
+#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* "xsaveopt" XSAVEOPT instruction */
+#define X86_FEATURE_XSAVEC (10*32+ 1) /* "xsavec" XSAVEC instruction */
+#define X86_FEATURE_XGETBV1 (10*32+ 2) /* "xgetbv1" XGETBV with ECX = 1 instruction */
+#define X86_FEATURE_XSAVES (10*32+ 3) /* "xsaves" XSAVES/XRSTORS instructions */
+#define X86_FEATURE_XFD (10*32+ 4) /* eXtended Feature Disabling */
/*
* Extended auxiliary flags: Linux defined - for features scattered in various
@@ -283,224 +283,255 @@
*
* Reuse free bits when adding new feature flags!
*/
-#define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */
-#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */
-#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */
-#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
-#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
-#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
-#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
-#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
-#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */
-#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
-#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */
-#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */
-#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
-#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
-#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
-#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
-#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
-#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */
-#define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */
-#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
-#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */
-#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */
-#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */
-#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */
-#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
-#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */
-#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */
-#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */
-#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */
-#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */
-#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */
+#define X86_FEATURE_CQM_LLC (11*32+ 0) /* "cqm_llc" LLC QoS if 1 */
+#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* "cqm_occup_llc" LLC occupancy monitoring */
+#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* "cqm_mbm_total" LLC Total MBM monitoring */
+#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* "cqm_mbm_local" LLC Local MBM monitoring */
+#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* LFENCE in user entry SWAPGS path */
+#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* LFENCE in kernel entry SWAPGS path */
+#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* "split_lock_detect" #AC for split lock */
+#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* Per-thread Memory Bandwidth Allocation */
+#define X86_FEATURE_SGX1 (11*32+ 8) /* Basic SGX */
+#define X86_FEATURE_SGX2 (11*32+ 9) /* SGX Enclave Dynamic Memory Management (EDMM) */
+#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* Issue an IBPB on kernel entry */
+#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* RET prediction control */
+#define X86_FEATURE_RETPOLINE (11*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_RETHUNK (11*32+14) /* Use REturn THUNK */
+#define X86_FEATURE_UNRET (11*32+15) /* AMD BTB untrain return */
+#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* Use IBPB during runtime firmware calls */
+#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* Fill RSB on VM exit when EIBRS is enabled */
+#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* SGX EDECCSSA user leaf function */
+#define X86_FEATURE_CALL_DEPTH (11*32+19) /* Call depth tracking for RSB stuffing */
+#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* MSR IA32_TSX_CTRL (Intel) implemented */
+#define X86_FEATURE_SMBA (11*32+21) /* Slow Memory Bandwidth Allocation */
+#define X86_FEATURE_BMEC (11*32+22) /* Bandwidth Monitoring Event Configuration */
+#define X86_FEATURE_USER_SHSTK (11*32+23) /* "user_shstk" Shadow stack support for user mode applications */
+#define X86_FEATURE_SRSO (11*32+24) /* AMD BTB untrain RETs */
+#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* AMD BTB untrain RETs through aliasing */
+#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* Issue an IBPB only on VMEXIT */
+#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* IA32_TSC_DEADLINE and X2APIC MSRs need fencing */
+#define X86_FEATURE_ZEN2 (11*32+28) /* CPU based on Zen2 microarchitecture */
+#define X86_FEATURE_ZEN3 (11*32+29) /* CPU based on Zen3 microarchitecture */
+#define X86_FEATURE_ZEN4 (11*32+30) /* CPU based on Zen4 microarchitecture */
+#define X86_FEATURE_ZEN1 (11*32+31) /* CPU based on Zen1 microarchitecture */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
-#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
-#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
-#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */
-#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */
-#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */
-#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */
-#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */
-#define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */
-#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */
-#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */
-#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */
+#define X86_FEATURE_SHA512 (12*32+ 0) /* SHA512 instructions */
+#define X86_FEATURE_SM3 (12*32+ 1) /* SM3 instructions */
+#define X86_FEATURE_SM4 (12*32+ 2) /* SM4 instructions */
+#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* "avx_vnni" AVX VNNI instructions */
+#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */
+#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* CMPccXADD instructions */
+#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* Intel Architectural PerfMon Extension */
+#define X86_FEATURE_FZRM (12*32+10) /* Fast zero-length REP MOVSB */
+#define X86_FEATURE_FSRS (12*32+11) /* Fast short REP STOSB */
+#define X86_FEATURE_FSRC (12*32+12) /* Fast short REP {CMPSB,SCASB} */
+#define X86_FEATURE_FRED (12*32+17) /* "fred" Flexible Return and Event Delivery */
+#define X86_FEATURE_LKGS (12*32+18) /* Load "kernel" (userspace) GS */
+#define X86_FEATURE_WRMSRNS (12*32+19) /* Non-serializing WRMSR */
+#define X86_FEATURE_AMX_FP16 (12*32+21) /* AMX fp16 Support */
+#define X86_FEATURE_AVX_IFMA (12*32+23) /* Support for VPMADD52[H,L]UQ */
+#define X86_FEATURE_LAM (12*32+26) /* "lam" Linear Address Masking */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
-#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
-#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
-#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
-#define X86_FEATURE_RDPRU (13*32+ 4) /* Read processor register at user level */
-#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */
-#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
-#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
-#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */
-#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */
-#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
-#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
-#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
-#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
-#define X86_FEATURE_AMD_PSFD (13*32+28) /* "" Predictive Store Forwarding Disable */
-#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
-#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */
+#define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */
+#define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */
+#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */
+#define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */
+#define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */
+#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */
+#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */
+#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */
+#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */
+#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* Speculative Store Bypass is fixed in hardware. */
+#define X86_FEATURE_CPPC (13*32+27) /* "cppc" Collaborative Processor Performance Control */
+#define X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */
+#define X86_FEATURE_BTC_NO (13*32+29) /* Not vulnerable to Branch Type Confusion */
+#define X86_FEATURE_AMD_IBPB_RET (13*32+30) /* IBPB clears return address predictor */
+#define X86_FEATURE_BRS (13*32+31) /* "brs" Branch Sampling available */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
-#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
-#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
-#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
-#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
-#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
-#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
-#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
-#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
-#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
-#define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */
+#define X86_FEATURE_DTHERM (14*32+ 0) /* "dtherm" Digital Thermal Sensor */
+#define X86_FEATURE_IDA (14*32+ 1) /* "ida" Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT (14*32+ 2) /* "arat" Always Running APIC Timer */
+#define X86_FEATURE_PLN (14*32+ 4) /* "pln" Intel Power Limit Notification */
+#define X86_FEATURE_PTS (14*32+ 6) /* "pts" Intel Package Thermal Status */
+#define X86_FEATURE_HWP (14*32+ 7) /* "hwp" Intel Hardware P-states */
+#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* "hwp_notify" HWP Notification */
+#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* "hwp_act_window" HWP Activity Window */
+#define X86_FEATURE_HWP_EPP (14*32+10) /* "hwp_epp" HWP Energy Perf. Preference */
+#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* "hwp_pkg_req" HWP Package Level Request */
+#define X86_FEATURE_HWP_HIGHEST_PERF_CHANGE (14*32+15) /* HWP Highest perf change */
+#define X86_FEATURE_HFI (14*32+19) /* "hfi" Hardware Feedback Interface */
/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
-#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
-#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
+#define X86_FEATURE_NPT (15*32+ 0) /* "npt" Nested Page Table support */
+#define X86_FEATURE_LBRV (15*32+ 1) /* "lbrv" LBR Virtualization support */
#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
-#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
-#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
-#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
-#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
-#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
-#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
-#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
-#define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */
-#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */
-#define X86_FEATURE_VNMI (15*32+25) /* Virtual NMI */
-#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */
+#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* "flushbyasid" Flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* "decodeassists" Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER (15*32+10) /* "pausefilter" Filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* "pfthreshold" Pause filter threshold */
+#define X86_FEATURE_AVIC (15*32+13) /* "avic" Virtual Interrupt Controller */
+#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* "v_vmsave_vmload" Virtual VMSAVE VMLOAD */
+#define X86_FEATURE_VGIF (15*32+16) /* "vgif" Virtual GIF */
+#define X86_FEATURE_X2AVIC (15*32+18) /* "x2avic" Virtual x2apic */
+#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */
+#define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */
+#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
-#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
-#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */
-#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
-#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
-#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */
-#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
-#define X86_FEATURE_SHSTK (16*32+ 7) /* "" Shadow stack */
-#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
-#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
-#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */
-#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */
-#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
-#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */
-#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
-#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
-#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
-#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */
-#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */
-#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */
-#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */
-#define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */
-#define X86_FEATURE_SGX_LC (16*32+30) /* Software Guard Extensions Launch Control */
+#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/
+#define X86_FEATURE_UMIP (16*32+ 2) /* "umip" User Mode Instruction Protection */
+#define X86_FEATURE_PKU (16*32+ 3) /* "pku" Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE (16*32+ 4) /* "ospke" OS Protection Keys Enable */
+#define X86_FEATURE_WAITPKG (16*32+ 5) /* "waitpkg" UMONITOR/UMWAIT/TPAUSE Instructions */
+#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* "avx512_vbmi2" Additional AVX512 Vector Bit Manipulation Instructions */
+#define X86_FEATURE_SHSTK (16*32+ 7) /* Shadow stack */
+#define X86_FEATURE_GFNI (16*32+ 8) /* "gfni" Galois Field New Instructions */
+#define X86_FEATURE_VAES (16*32+ 9) /* "vaes" Vector AES */
+#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* "vpclmulqdq" Carry-Less Multiplication Double Quadword */
+#define X86_FEATURE_AVX512_VNNI (16*32+11) /* "avx512_vnni" Vector Neural Network Instructions */
+#define X86_FEATURE_AVX512_BITALG (16*32+12) /* "avx512_bitalg" Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
+#define X86_FEATURE_TME (16*32+13) /* "tme" Intel Total Memory Encryption */
+#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* "avx512_vpopcntdq" POPCNT for vectors of DW/QW */
+#define X86_FEATURE_LA57 (16*32+16) /* "la57" 5-level page tables */
+#define X86_FEATURE_RDPID (16*32+22) /* "rdpid" RDPID instruction */
+#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* "bus_lock_detect" Bus Lock detect */
+#define X86_FEATURE_CLDEMOTE (16*32+25) /* "cldemote" CLDEMOTE instruction */
+#define X86_FEATURE_MOVDIRI (16*32+27) /* "movdiri" MOVDIRI instruction */
+#define X86_FEATURE_MOVDIR64B (16*32+28) /* "movdir64b" MOVDIR64B instruction */
+#define X86_FEATURE_ENQCMD (16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */
+#define X86_FEATURE_SGX_LC (16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */
/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
-#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */
-#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */
-#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */
+#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */
+#define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */
+#define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
-#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
-#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
-#define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */
-#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */
-#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */
-#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
-#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* "" RTM transaction always aborts */
-#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
-#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */
-#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */
-#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */
-#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
-#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
-#define X86_FEATURE_IBT (18*32+20) /* Indirect Branch Tracking */
-#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */
-#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */
-#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
-#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */
-#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
-#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
-#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
-#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */
-#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
+#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* "avx512_4vnniw" AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* "avx512_4fmaps" AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_FSRM (18*32+ 4) /* "fsrm" Fast Short Rep Mov */
+#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* "avx512_vp2intersect" AVX-512 Intersect for D/Q */
+#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* SRBDS mitigation MSR available */
+#define X86_FEATURE_MD_CLEAR (18*32+10) /* "md_clear" VERW clears CPU buffers */
+#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* RTM transaction always aborts */
+#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* TSX_FORCE_ABORT */
+#define X86_FEATURE_SERIALIZE (18*32+14) /* "serialize" SERIALIZE instruction */
+#define X86_FEATURE_HYBRID_CPU (18*32+15) /* This part has CPUs of more than one type */
+#define X86_FEATURE_TSXLDTRK (18*32+16) /* "tsxldtrk" TSX Suspend Load Address Tracking */
+#define X86_FEATURE_PCONFIG (18*32+18) /* "pconfig" Intel PCONFIG */
+#define X86_FEATURE_ARCH_LBR (18*32+19) /* "arch_lbr" Intel ARCH LBR */
+#define X86_FEATURE_IBT (18*32+20) /* "ibt" Indirect Branch Tracking */
+#define X86_FEATURE_AMX_BF16 (18*32+22) /* "amx_bf16" AMX bf16 Support */
+#define X86_FEATURE_AVX512_FP16 (18*32+23) /* "avx512_fp16" AVX512 FP16 */
+#define X86_FEATURE_AMX_TILE (18*32+24) /* "amx_tile" AMX tile Support */
+#define X86_FEATURE_AMX_INT8 (18*32+25) /* "amx_int8" AMX int8 Support */
+#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */
+#define X86_FEATURE_INTEL_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_FLUSH_L1D (18*32+28) /* "flush_l1d" Flush L1D cache */
+#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* "arch_capabilities" IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* IA32_CORE_CAPABILITIES MSR */
+#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */
/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */
-#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */
-#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */
-#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */
-#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
-#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
-#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
-#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */
+#define X86_FEATURE_SME (19*32+ 0) /* "sme" Secure Memory Encryption */
+#define X86_FEATURE_SEV (19*32+ 1) /* "sev" Secure Encrypted Virtualization */
+#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */
+#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */
+#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */
+#define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */
+#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */
+#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */
+#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */
+#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
+#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
-#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */
-#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* "" WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */
-#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */
-#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* "" Null Selector Clears Base */
-#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */
-#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */
+#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
+#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */
+#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */
+#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */
+#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */
+#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */
-#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */
-#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
-#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
+#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */
+#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
+#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */
+#define X86_FEATURE_SRSO_USER_KERNEL_NO (20*32+30) /* CPU is not affected by SRSO across user/kernel boundaries */
+
+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0x80000022, etc and Linux defined features.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* "amd_lbr_pmc_freeze" AMD LBR and PMC Freeze */
+#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */
+#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */
+#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */
+#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */
+#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */
+#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */
+#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */
/*
* BUG word(s)
*/
#define X86_BUG(x) (NCAPINTS*32 + (x))
-#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
-#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
-#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
+#define X86_BUG_F00F X86_BUG(0) /* "f00f" Intel F00F */
+#define X86_BUG_FDIV X86_BUG(1) /* "fdiv" FPU FDIV */
+#define X86_BUG_COMA X86_BUG(2) /* "coma" Cyrix 6x86 coma */
#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
-#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
-#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
-#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
-#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#define X86_BUG_11AP X86_BUG(5) /* "11ap" Bad local APIC aka 11AP */
+#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* "fxsave_leak" FXSAVE leaks FOP/FIP/FOP */
+#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* "clflush_monitor" AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* "sysret_ss_attrs" SYSRET doesn't fix up SS attrs */
#ifdef CONFIG_X86_32
/*
* 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
* to avoid confusion.
*/
-#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
+#define X86_BUG_ESPFIX X86_BUG(9) /* IRET to 16-bit SS corrupts ESP/RSP high bits */
#endif
-#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
-#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
-#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
-#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
-#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
-#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
-#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
-#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
-#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
-#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */
-#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
-#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
-#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
-#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
-#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
-#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
-#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */
-#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
-#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */
-#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */
-#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* CPU may incur #MC if non-TD software does partial write to TDX private memory */
+#define X86_BUG_NULL_SEG X86_BUG(10) /* "null_seg" Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* "swapgs_fence" SWAPGS without input dep on GS */
+#define X86_BUG_MONITOR X86_BUG(12) /* "monitor" IPI required to wake up remote CPU */
+#define X86_BUG_AMD_E400 X86_BUG(13) /* "amd_e400" CPU is among the affected by Erratum 400 */
+#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* "cpu_meltdown" CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* "spectre_v1" CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* "spectre_v2" CPU is affected by Spectre variant 2 attack with indirect branches */
+#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* "spec_store_bypass" CPU is affected by speculative store bypass attack */
+#define X86_BUG_L1TF X86_BUG(18) /* "l1tf" CPU is affected by L1 Terminal Fault */
+#define X86_BUG_MDS X86_BUG(19) /* "mds" CPU is affected by Microarchitectural data sampling */
+#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* "msbds_only" CPU is only affected by the MSDBS variant of BUG_MDS */
+#define X86_BUG_SWAPGS X86_BUG(21) /* "swapgs" CPU is affected by speculation through SWAPGS */
+#define X86_BUG_TAA X86_BUG(22) /* "taa" CPU is affected by TSX Async Abort(TAA) */
+#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */
+#define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */
+#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */
+#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */
+#define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */
+#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */
+#define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */
+#define X86_BUG_GDS X86_BUG(30) /* "gds" CPU is affected by Gather Data Sampling */
+#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */
/* BUG word 2 */
-#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
-#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
+#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* "srso" AMD SRSO bug */
+#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */
+#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */
+#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */
+#define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h
index 9bee3e7bf973..b2b9b4ef3dae 100644
--- a/arch/x86/include/asm/cpuid.h
+++ b/arch/x86/include/asm/cpuid.h
@@ -6,6 +6,8 @@
#ifndef _ASM_X86_CPUID_H
#define _ASM_X86_CPUID_H
+#include <linux/types.h>
+
#include <asm/string.h>
struct cpuid_regs {
@@ -19,12 +21,19 @@ enum cpuid_regs_idx {
CPUID_EDX,
};
+#define CPUID_LEAF_MWAIT 0x5
+#define CPUID_LEAF_DCA 0x9
+#define CPUID_LEAF_XSTATE 0x0d
+#define CPUID_LEAF_TSC 0x15
+#define CPUID_LEAF_FREQ 0x16
+#define CPUID_LEAF_TILE 0x1d
+
#ifdef CONFIG_X86_32
-extern int have_cpuid_p(void);
+bool have_cpuid_p(void);
#else
-static inline int have_cpuid_p(void)
+static inline bool have_cpuid_p(void)
{
- return 1;
+ return true;
}
#endif
static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
@@ -127,6 +136,42 @@ static inline unsigned int cpuid_edx(unsigned int op)
return edx;
}
+static inline void __cpuid_read(unsigned int leaf, unsigned int subleaf, u32 *regs)
+{
+ regs[CPUID_EAX] = leaf;
+ regs[CPUID_ECX] = subleaf;
+ __cpuid(regs + CPUID_EAX, regs + CPUID_EBX, regs + CPUID_ECX, regs + CPUID_EDX);
+}
+
+#define cpuid_subleaf(leaf, subleaf, regs) { \
+ static_assert(sizeof(*(regs)) == 16); \
+ __cpuid_read(leaf, subleaf, (u32 *)(regs)); \
+}
+
+#define cpuid_leaf(leaf, regs) { \
+ static_assert(sizeof(*(regs)) == 16); \
+ __cpuid_read(leaf, 0, (u32 *)(regs)); \
+}
+
+static inline void __cpuid_read_reg(unsigned int leaf, unsigned int subleaf,
+ enum cpuid_regs_idx regidx, u32 *reg)
+{
+ u32 regs[4];
+
+ __cpuid_read(leaf, subleaf, regs);
+ *reg = regs[regidx];
+}
+
+#define cpuid_subleaf_reg(leaf, subleaf, regidx, reg) { \
+ static_assert(sizeof(*(reg)) == 4); \
+ __cpuid_read_reg(leaf, subleaf, regidx, (u32 *)(reg)); \
+}
+
+#define cpuid_leaf_reg(leaf, regidx, reg) { \
+ static_assert(sizeof(*(reg)) == 4); \
+ __cpuid_read_reg(leaf, 0, regidx, (u32 *)(reg)); \
+}
+
static __always_inline bool cpuid_function_is_indexed(u32 function)
{
switch (function) {
@@ -143,6 +188,7 @@ static __always_inline bool cpuid_function_is_indexed(u32 function)
case 0x1d:
case 0x1e:
case 0x1f:
+ case 0x24:
case 0x8000001d:
return true;
}
@@ -160,7 +206,12 @@ static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
for_each_possible_hypervisor_cpuid_base(base) {
cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
- if (!memcmp(sig, signature, 12) &&
+ /*
+ * This must not compile to "call memcmp" because it's called
+ * from PVH early boot code before instrumentation is set up
+ * and memcmp() itself may be instrumented.
+ */
+ if (!__builtin_memcmp(sig, signature, 12) &&
(leaves == 0 || ((eax - base) >= leaves)))
return base;
}
diff --git a/arch/x86/include/asm/crash_core.h b/arch/x86/include/asm/crash_reserve.h
index 76af98f4e801..7835b2cdff04 100644
--- a/arch/x86/include/asm/crash_core.h
+++ b/arch/x86/include/asm/crash_reserve.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _X86_CRASH_CORE_H
-#define _X86_CRASH_CORE_H
+#ifndef _X86_CRASH_RESERVE_H
+#define _X86_CRASH_RESERVE_H
/* 16M alignment for crash kernel regions */
#define CRASH_ALIGN SZ_16M
@@ -39,4 +39,6 @@ static inline unsigned long crash_low_size_default(void)
#endif
}
-#endif /* _X86_CRASH_CORE_H */
+#define HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
+
+#endif /* _X86_CRASH_RESERVE_H */
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index dd4b67101bb7..bf5953883ec3 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -18,7 +18,7 @@ struct pcpu_hot {
struct task_struct *current_task;
int preempt_count;
int cpu_number;
-#ifdef CONFIG_CALL_DEPTH_TRACKING
+#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
u64 call_depth;
#endif
unsigned long top_of_stack;
@@ -37,8 +37,15 @@ static_assert(sizeof(struct pcpu_hot) == 64);
DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot);
+/* const-qualified alias to pcpu_hot, aliased by linker. */
+DECLARE_PER_CPU_ALIGNED(const struct pcpu_hot __percpu_seg_override,
+ const_pcpu_hot);
+
static __always_inline struct task_struct *get_current(void)
{
+ if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT))
+ return this_cpu_read_const(const_pcpu_hot.current_task);
+
return this_cpu_read_stable(pcpu_hot.current_task);
}
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 0cec92c430cc..fdbbbfec745a 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -5,7 +5,9 @@
#include <linux/bug.h>
#include <linux/percpu.h>
#include <uapi/asm/debugreg.h>
+
#include <asm/cpufeature.h>
+#include <asm/msr.h>
DECLARE_PER_CPU(unsigned long, cpu_dr7);
@@ -159,4 +161,26 @@ static inline unsigned long amd_get_dr_addr_mask(unsigned int dr)
}
#endif
+static inline unsigned long get_debugctlmsr(void)
+{
+ unsigned long debugctlmsr = 0;
+
+#ifndef CONFIG_X86_DEBUGCTLMSR
+ if (boot_cpu_data.x86 < 6)
+ return 0;
+#endif
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
+
+ return debugctlmsr;
+}
+
+static inline void update_debugctlmsr(unsigned long debugctlmsr)
+{
+#ifndef CONFIG_X86_DEBUGCTLMSR
+ if (boot_cpu_data.x86 < 6)
+ return;
+#endif
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
+}
+
#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index ab97b22ac04a..62dc9f59ea76 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -46,6 +46,7 @@ struct gdt_page {
} __attribute__((aligned(PAGE_SIZE)));
DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
+DECLARE_INIT_PER_CPU(gdt_page);
/* Provide the original GDT */
static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu)
@@ -402,8 +403,6 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
desc->limit1 = (limit >> 16) & 0xf;
}
-void alloc_intr_gate(unsigned int n, const void *addr);
-
static inline void init_idt_data(struct idt_data *data, unsigned int n,
const void *addr)
{
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 702d93fdd10e..c492bdc97b05 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -44,32 +44,32 @@
# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
#endif
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
# define DISABLE_PTI 0
#else
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
# define DISABLE_RETPOLINE 0
#else
# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
(1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
#endif
-#ifdef CONFIG_RETHUNK
+#ifdef CONFIG_MITIGATION_RETHUNK
# define DISABLE_RETHUNK 0
#else
# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
#endif
-#ifdef CONFIG_CPU_UNRET_ENTRY
+#ifdef CONFIG_MITIGATION_UNRET_ENTRY
# define DISABLE_UNRET 0
#else
# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
#endif
-#ifdef CONFIG_CALL_DEPTH_TRACKING
+#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
# define DISABLE_CALL_DEPTH_TRACKING 0
#else
# define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31))
@@ -117,6 +117,18 @@
#define DISABLE_IBT (1 << (X86_FEATURE_IBT & 31))
#endif
+#ifdef CONFIG_X86_FRED
+# define DISABLE_FRED 0
+#else
+# define DISABLE_FRED (1 << (X86_FEATURE_FRED & 31))
+#endif
+
+#ifdef CONFIG_KVM_AMD_SEV
+#define DISABLE_SEV_SNP 0
+#else
+#define DISABLE_SEV_SNP (1 << (X86_FEATURE_SEV_SNP & 31))
+#endif
+
/*
* Make sure to add features to the correct mask
*/
@@ -133,7 +145,7 @@
#define DISABLED_MASK10 0
#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
-#define DISABLED_MASK12 (DISABLE_LAM)
+#define DISABLED_MASK12 (DISABLE_FRED|DISABLE_LAM)
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
#define DISABLED_MASK15 0
@@ -141,8 +153,9 @@
DISABLE_ENQCMD)
#define DISABLED_MASK17 0
#define DISABLED_MASK18 (DISABLE_IBT)
-#define DISABLED_MASK19 0
+#define DISABLED_MASK19 (DISABLE_SEV_SNP)
#define DISABLED_MASK20 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
+#define DISABLED_MASK21 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h
index e8f58ddd06d9..2e74a7f0e935 100644
--- a/arch/x86/include/asm/e820/api.h
+++ b/arch/x86/include/asm/e820/api.h
@@ -17,6 +17,7 @@ extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type);
extern void e820__range_add (u64 start, u64 size, enum e820_type type);
extern u64 e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
extern u64 e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type);
+extern u64 e820__range_update_table(struct e820_table *t, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
extern void e820__print_table(char *who);
extern int e820__update_table(struct e820_table *table);
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index c4555b269a1b..f227a70ac91f 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -143,15 +143,6 @@ extern void efi_free_boot_services(void);
void arch_efi_call_virt_setup(void);
void arch_efi_call_virt_teardown(void);
-/* kexec external ABI */
-struct efi_setup_data {
- u64 fw_vendor;
- u64 __unused;
- u64 tables;
- u64 smbios;
- u64 reserved[8];
-};
-
extern u64 efi_setup;
#ifdef CONFIG_EFI
@@ -238,7 +229,8 @@ static inline bool efi_is_native(void)
static inline void *efi64_zero_upper(void *p)
{
- ((u32 *)p)[1] = 0;
+ if (p)
+ ((u32 *)p)[1] = 0;
return p;
}
@@ -258,6 +250,9 @@ static inline u32 efi64_convert_status(efi_status_t status)
#define __efi64_argmap_allocate_pool(type, size, buffer) \
((type), (size), efi64_zero_upper(buffer))
+#define __efi64_argmap_locate_handle_buffer(type, proto, key, num, buf) \
+ ((type), (proto), (key), efi64_zero_upper(num), efi64_zero_upper(buf))
+
#define __efi64_argmap_create_event(type, tpl, f, c, event) \
((type), (tpl), (f), (c), efi64_zero_upper(event))
@@ -324,6 +319,10 @@ static inline u32 efi64_convert_status(efi_status_t status)
#define __efi64_argmap_clear_memory_attributes(protocol, phys, size, flags) \
((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags))
+/* EFI SMBIOS protocol */
+#define __efi64_argmap_get_next(protocol, smbioshandle, type, record, phandle) \
+ ((protocol), (smbioshandle), (type), efi64_zero_upper(record), \
+ efi64_zero_upper(phandle))
/*
* The macros below handle the plumbing for the argument mapping. To add a
* mapping for a specific EFI method, simply define a macro
@@ -393,24 +392,8 @@ static inline void efi_reserve_boot_services(void)
}
#endif /* CONFIG_EFI */
-#ifdef CONFIG_EFI_FAKE_MEMMAP
-extern void __init efi_fake_memmap_early(void);
-extern void __init efi_fake_memmap(void);
-#else
-static inline void efi_fake_memmap_early(void)
-{
-}
-
-static inline void efi_fake_memmap(void)
-{
-}
-#endif
-
extern int __init efi_memmap_alloc(unsigned int num_entries,
struct efi_memory_map_data *data);
-extern void __efi_memmap_free(u64 phys, unsigned long size,
- unsigned long flags);
-#define __efi_memmap_free __efi_memmap_free
extern int __init efi_memmap_install(struct efi_memory_map_data *data);
extern int __init efi_memmap_split_count(efi_memory_desc_t *md,
@@ -418,8 +401,9 @@ extern int __init efi_memmap_split_count(efi_memory_desc_t *md,
extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap,
void *buf, struct efi_mem_range *mem);
-#define arch_ima_efi_boot_mode \
- ({ extern struct boot_params boot_params; boot_params.secure_boot; })
+extern enum efi_secureboot_mode __x86_ima_efi_boot_mode(void);
+
+#define arch_ima_efi_boot_mode __x86_ima_efi_boot_mode()
#ifdef CONFIG_EFI_RUNTIME_MAP
int efi_get_runtime_map_size(void);
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 1e16bd5ac781..1fb83d47711f 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -392,5 +392,4 @@ struct va_alignment {
} ____cacheline_aligned;
extern struct va_alignment va_align;
-extern unsigned long align_vdso_addr(unsigned long);
#endif /* _ASM_X86_ELF_H */
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 7e523bb3d2d3..77d20555e04d 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -8,6 +8,7 @@
#include <asm/nospec-branch.h>
#include <asm/io_bitmap.h>
#include <asm/fpu/api.h>
+#include <asm/fred.h>
/* Check that the stack and regs on entry from user mode are sane. */
static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
@@ -44,8 +45,7 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
}
#define arch_enter_from_user_mode arch_enter_from_user_mode
-static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
- unsigned long ti_work)
+static inline void arch_exit_work(unsigned long ti_work)
{
if (ti_work & _TIF_USER_RETURN_NOTIFY)
fire_user_return_notifiers();
@@ -56,6 +56,15 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
fpregs_assert_state_consistent();
if (unlikely(ti_work & _TIF_NEED_FPU_LOAD))
switch_fpu_return();
+}
+
+static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
+ unsigned long ti_work)
+{
+ if (IS_ENABLED(CONFIG_X86_DEBUG_FPU) || unlikely(ti_work))
+ arch_exit_work(ti_work);
+
+ fred_update_rsp0();
#ifdef CONFIG_COMPAT
/*
@@ -73,19 +82,16 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
#endif
/*
- * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
- * but not enough for x86 stack utilization comfort. To keep
- * reasonable stack head room, reduce the maximum offset to 8 bits.
- *
- * The actual entropy will be further reduced by the compiler when
- * applying stack alignment constraints (see cc_stack_align4/8 in
+ * This value will get limited by KSTACK_OFFSET_MAX(), which is 10
+ * bits. The actual entropy will be further reduced by the compiler
+ * when applying stack alignment constraints (see cc_stack_align4/8 in
* arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32)
* low bits from any entropy chosen here.
*
- * Therefore, final stack offset entropy will be 5 (x86_64) or
- * 6 (ia32) bits.
+ * Therefore, final stack offset entropy will be 7 (x86_64) or
+ * 8 (ia32) bits.
*/
- choose_random_kstack_offset(rdtsc() & 0xFF);
+ choose_random_kstack_offset(rdtsc());
}
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h
index eeed395c3177..a0e0c6b50155 100644
--- a/arch/x86/include/asm/extable.h
+++ b/arch/x86/include/asm/extable.h
@@ -37,7 +37,6 @@ struct pt_regs;
extern int fixup_exception(struct pt_regs *regs, int trapnr,
unsigned long error_code, unsigned long fault_addr);
-extern int fixup_bug(struct pt_regs *regs, int trapnr);
extern int ex_get_fixup_type(unsigned long ip);
extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h
index fe6312045042..906b0d5541e8 100644
--- a/arch/x86/include/asm/extable_fixup_types.h
+++ b/arch/x86/include/asm/extable_fixup_types.h
@@ -36,7 +36,7 @@
#define EX_TYPE_DEFAULT 1
#define EX_TYPE_FAULT 2
#define EX_TYPE_UACCESS 3
-#define EX_TYPE_COPY 4
+/* unused, was: #define EX_TYPE_COPY 4 */
#define EX_TYPE_CLEAR_FS 5
#define EX_TYPE_FPU_RESTORE 6
#define EX_TYPE_BPF 7
@@ -64,6 +64,8 @@
#define EX_TYPE_UCOPY_LEN4 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(4))
#define EX_TYPE_UCOPY_LEN8 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(8))
-#define EX_TYPE_ZEROPAD 20 /* longword load with zeropad on fault */
+#define EX_TYPE_ZEROPAD 20 /* longword load with zeropad on fault */
+
+#define EX_TYPE_ERETU 21
#endif
diff --git a/arch/x86/include/asm/fb.h b/arch/x86/include/asm/fb.h
deleted file mode 100644
index c3b9582de7ef..000000000000
--- a/arch/x86/include/asm/fb.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_FB_H
-#define _ASM_X86_FB_H
-
-#include <asm/page.h>
-
-struct fb_info;
-
-pgprot_t pgprot_framebuffer(pgprot_t prot,
- unsigned long vm_start, unsigned long vm_end,
- unsigned long offset);
-#define pgprot_framebuffer pgprot_framebuffer
-
-int fb_is_primary_device(struct fb_info *info);
-#define fb_is_primary_device fb_is_primary_device
-
-#include <asm-generic/fb.h>
-
-#endif /* _ASM_X86_FB_H */
diff --git a/arch/x86/include/asm/fpu.h b/arch/x86/include/asm/fpu.h
new file mode 100644
index 000000000000..b2743fe19339
--- /dev/null
+++ b/arch/x86/include/asm/fpu.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef _ASM_X86_FPU_H
+#define _ASM_X86_FPU_H
+
+#include <asm/fpu/api.h>
+
+#define kernel_fpu_available() true
+
+#endif /* ! _ASM_X86_FPU_H */
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index a2be3aefff9f..f86ad3335529 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -143,6 +143,9 @@ extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfe
extern u64 xstate_get_guest_group_perm(void);
+extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
+
+
/* KVM specific functions */
extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu);
extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu);
diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h
index ca6e5e5f16b2..c485f1944c5f 100644
--- a/arch/x86/include/asm/fpu/sched.h
+++ b/arch/x86/include/asm/fpu/sched.h
@@ -37,10 +37,12 @@ extern void fpu_flush_thread(void);
* The FPU context is only stored/restored for a user task and
* PF_KTHREAD is used to distinguish between kernel and user threads.
*/
-static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
+static inline void switch_fpu_prepare(struct task_struct *old, int cpu)
{
if (cpu_feature_enabled(X86_FEATURE_FPU) &&
- !(current->flags & (PF_KTHREAD | PF_USER_WORKER))) {
+ !(old->flags & (PF_KTHREAD | PF_USER_WORKER))) {
+ struct fpu *old_fpu = &old->thread.fpu;
+
save_fpregs_to_fpstate(old_fpu);
/*
* The save operation preserved register state, so the
@@ -60,10 +62,10 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
* Delay loading of the complete FPU state until the return to userland.
* PKRU is handled separately.
*/
-static inline void switch_fpu_finish(void)
+static inline void switch_fpu_finish(struct task_struct *new)
{
if (cpu_feature_enabled(X86_FEATURE_FPU))
- set_thread_flag(TIF_NEED_FPU_LOAD);
+ set_tsk_thread_flag(new, TIF_NEED_FPU_LOAD);
}
#endif /* _ASM_X86_FPU_SCHED_H */
diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h
index 611fa41711af..eccc75bc9c4f 100644
--- a/arch/x86/include/asm/fpu/signal.h
+++ b/arch/x86/include/asm/fpu/signal.h
@@ -29,7 +29,7 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
unsigned long fpu__get_fpstate_size(void);
-extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
+extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size, u32 pkru);
extern void fpu__clear_user_states(struct fpu *fpu);
extern bool fpu__restore_sig(void __user *buf, int ia32_frame);
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index ace9aa3b78a3..de16862bf230 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -2,8 +2,8 @@
/*
* FPU data structures:
*/
-#ifndef _ASM_X86_FPU_H
-#define _ASM_X86_FPU_H
+#ifndef _ASM_X86_FPU_TYPES_H
+#define _ASM_X86_FPU_TYPES_H
#include <asm/page_types.h>
@@ -591,9 +591,16 @@ struct fpu_state_config {
* even without XSAVE support, i.e. legacy features FP + SSE
*/
u64 legacy_features;
+ /*
+ * @independent_features:
+ *
+ * Features that are supported by XSAVES, but not managed as part of
+ * the FPU core, such as LBR
+ */
+ u64 independent_features;
};
/* FPU state configuration information */
extern struct fpu_state_config fpu_kernel_cfg, fpu_user_cfg;
-#endif /* _ASM_X86_FPU_H */
+#endif /* _ASM_X86_FPU_TYPES_H */
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index d4427b88ee12..7f39fe7980c5 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -12,10 +12,6 @@
/* Bit 63 of XCR0 is reserved for future expansion */
#define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63)))
-#define XSTATE_CPUID 0x0000000d
-
-#define TILE_CPUID 0x0000001d
-
#define FXSAVE_SIZE 512
#define XSAVE_HDR_SIZE 64
diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
new file mode 100644
index 000000000000..25ca00bd70e8
--- /dev/null
+++ b/arch/x86/include/asm/fred.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Macros for Flexible Return and Event Delivery (FRED)
+ */
+
+#ifndef ASM_X86_FRED_H
+#define ASM_X86_FRED_H
+
+#include <linux/const.h>
+
+#include <asm/asm.h>
+#include <asm/trapnr.h>
+
+/*
+ * FRED event return instruction opcodes for ERET{S,U}; supported in
+ * binutils >= 2.41.
+ */
+#define ERETS _ASM_BYTES(0xf2,0x0f,0x01,0xca)
+#define ERETU _ASM_BYTES(0xf3,0x0f,0x01,0xca)
+
+/*
+ * RSP is aligned to a 64-byte boundary before used to push a new stack frame
+ */
+#define FRED_STACK_FRAME_RSP_MASK _AT(unsigned long, (~0x3f))
+
+/*
+ * Used for the return address for call emulation during code patching,
+ * and measured in 64-byte cache lines.
+ */
+#define FRED_CONFIG_REDZONE_AMOUNT 1
+#define FRED_CONFIG_REDZONE (_AT(unsigned long, FRED_CONFIG_REDZONE_AMOUNT) << 6)
+#define FRED_CONFIG_INT_STKLVL(l) (_AT(unsigned long, l) << 9)
+#define FRED_CONFIG_ENTRYPOINT(p) _AT(unsigned long, (p))
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_X86_FRED
+#include <linux/kernel.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/ptrace.h>
+
+struct fred_info {
+ /* Event data: CR2, DR6, ... */
+ unsigned long edata;
+ unsigned long resv;
+};
+
+/* Full format of the FRED stack frame */
+struct fred_frame {
+ struct pt_regs regs;
+ struct fred_info info;
+};
+
+static __always_inline struct fred_info *fred_info(struct pt_regs *regs)
+{
+ return &container_of(regs, struct fred_frame, regs)->info;
+}
+
+static __always_inline unsigned long fred_event_data(struct pt_regs *regs)
+{
+ return fred_info(regs)->edata;
+}
+
+void asm_fred_entrypoint_user(void);
+void asm_fred_entrypoint_kernel(void);
+void asm_fred_entry_from_kvm(struct fred_ss);
+
+__visible void fred_entry_from_user(struct pt_regs *regs);
+__visible void fred_entry_from_kernel(struct pt_regs *regs);
+__visible void __fred_entry_from_kvm(struct pt_regs *regs);
+
+/* Can be called from noinstr code, thus __always_inline */
+static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int vector)
+{
+ struct fred_ss ss = {
+ .ss =__KERNEL_DS,
+ .type = type,
+ .vector = vector,
+ .nmi = type == EVENT_TYPE_NMI,
+ .lm = 1,
+ };
+
+ asm_fred_entry_from_kvm(ss);
+}
+
+void cpu_init_fred_exceptions(void);
+void cpu_init_fred_rsps(void);
+void fred_complete_exception_setup(void);
+
+DECLARE_PER_CPU(unsigned long, fred_rsp0);
+
+static __always_inline void fred_sync_rsp0(unsigned long rsp0)
+{
+ __this_cpu_write(fred_rsp0, rsp0);
+}
+
+static __always_inline void fred_update_rsp0(void)
+{
+ unsigned long rsp0 = (unsigned long) task_stack_page(current) + THREAD_SIZE;
+
+ if (cpu_feature_enabled(X86_FEATURE_FRED) && (__this_cpu_read(fred_rsp0) != rsp0)) {
+ wrmsrns(MSR_IA32_FRED_RSP0, rsp0);
+ __this_cpu_write(fred_rsp0, rsp0);
+ }
+}
+#else /* CONFIG_X86_FRED */
+static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { return 0; }
+static inline void cpu_init_fred_exceptions(void) { }
+static inline void cpu_init_fred_rsps(void) { }
+static inline void fred_complete_exception_setup(void) { }
+static inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
+static inline void fred_sync_rsp0(unsigned long rsp0) { }
+static inline void fred_update_rsp0(void) { }
+#endif /* CONFIG_X86_FRED */
+#endif /* !__ASSEMBLY__ */
+
+#endif /* ASM_X86_FRED_H */
diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h
index 35cff5f2becf..9e7e8ca8e299 100644
--- a/arch/x86/include/asm/fsgsbase.h
+++ b/arch/x86/include/asm/fsgsbase.h
@@ -6,7 +6,7 @@
#ifdef CONFIG_X86_64
-#include <asm/msr-index.h>
+#include <asm/msr.h>
/*
* Read/write a task's FSBASE or GSBASE. This returns the value that
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 897cf02c20b1..f9cb4d07df58 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_FTRACE_H
#define _ASM_X86_FTRACE_H
+#include <asm/ptrace.h>
+
#ifdef CONFIG_FUNCTION_TRACER
#ifndef CC_USING_FENTRY
# error Compiler does not support fentry?
@@ -20,8 +22,6 @@
#define ARCH_SUPPORTS_FTRACE_OPS 1
#endif
-#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
-
#ifndef __ASSEMBLY__
extern void __fentry__(void);
@@ -34,38 +34,56 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
return addr;
}
+static inline unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip)
+{
+#ifdef CONFIG_X86_KERNEL_IBT
+ u32 instr;
+
+ /* We want to be extra safe in case entry ip is on the page edge,
+ * but otherwise we need to avoid get_kernel_nofault()'s overhead.
+ */
+ if ((fentry_ip & ~PAGE_MASK) < ENDBR_INSN_SIZE) {
+ if (get_kernel_nofault(instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE)))
+ return fentry_ip;
+ } else {
+ instr = *(u32 *)(fentry_ip - ENDBR_INSN_SIZE);
+ }
+ if (is_endbr(instr))
+ fentry_ip -= ENDBR_INSN_SIZE;
+#endif
+ return fentry_ip;
+}
+#define ftrace_get_symaddr(fentry_ip) arch_ftrace_get_symaddr(fentry_ip)
+
#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
-struct ftrace_regs {
- struct pt_regs regs;
-};
+
+#include <linux/ftrace_regs.h>
static __always_inline struct pt_regs *
arch_ftrace_get_regs(struct ftrace_regs *fregs)
{
/* Only when FL_SAVE_REGS is set, cs will be non zero */
- if (!fregs->regs.cs)
+ if (!arch_ftrace_regs(fregs)->regs.cs)
return NULL;
- return &fregs->regs;
+ return &arch_ftrace_regs(fregs)->regs;
}
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do { \
+ (_regs)->ip = arch_ftrace_regs(fregs)->regs.ip; \
+ (_regs)->sp = arch_ftrace_regs(fregs)->regs.sp; \
+ (_regs)->cs = __KERNEL_CS; \
+ (_regs)->flags = 0; \
+ } while (0)
+
#define ftrace_regs_set_instruction_pointer(fregs, _ip) \
- do { (fregs)->regs.ip = (_ip); } while (0)
-
-#define ftrace_regs_get_instruction_pointer(fregs) \
- ((fregs)->regs.ip)
-
-#define ftrace_regs_get_argument(fregs, n) \
- regs_get_kernel_argument(&(fregs)->regs, n)
-#define ftrace_regs_get_stack_pointer(fregs) \
- kernel_stack_pointer(&(fregs)->regs)
-#define ftrace_regs_return_value(fregs) \
- regs_return_value(&(fregs)->regs)
-#define ftrace_regs_set_return_value(fregs, ret) \
- regs_set_return_value(&(fregs)->regs, ret)
-#define ftrace_override_function_with_return(fregs) \
- override_function_with_return(&(fregs)->regs)
-#define ftrace_regs_query_register_offset(name) \
- regs_query_register_offset(name)
+ do { arch_ftrace_regs(fregs)->regs.ip = (_ip); } while (0)
+
+
+static __always_inline unsigned long
+ftrace_regs_get_return_address(struct ftrace_regs *fregs)
+{
+ return *(unsigned long *)ftrace_regs_get_stack_pointer(fregs);
+}
struct ftrace_ops;
#define ftrace_graph_func ftrace_graph_func
@@ -90,7 +108,7 @@ __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
regs->orig_ax = addr;
}
#define arch_ftrace_set_direct_caller(fregs, addr) \
- __arch_ftrace_set_direct_caller(&(fregs)->regs, addr)
+ __arch_ftrace_set_direct_caller(&arch_ftrace_regs(fregs)->regs, addr)
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -150,24 +168,4 @@ static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
#endif /* !COMPILE_OFFSETS */
#endif /* !__ASSEMBLY__ */
-#ifndef __ASSEMBLY__
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-struct fgraph_ret_regs {
- unsigned long ax;
- unsigned long dx;
- unsigned long bp;
-};
-
-static inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs)
-{
- return ret_regs->ax;
-}
-
-static inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs)
-{
- return ret_regs->bp;
-}
-#endif /* ifdef CONFIG_FUNCTION_GRAPH_TRACER */
-#endif
-
#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index 99d345b686fa..6e2458088800 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -48,7 +48,9 @@ do { \
static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
u32 __user *uaddr)
{
- if (!user_access_begin(uaddr, sizeof(u32)))
+ if (can_do_masked_user_access())
+ uaddr = masked_user_access_begin(uaddr);
+ else if (!user_access_begin(uaddr, sizeof(u32)))
return -EFAULT;
switch (op) {
@@ -84,7 +86,9 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
{
int ret = 0;
- if (!user_access_begin(uaddr, sizeof(u32)))
+ if (can_do_masked_user_access())
+ uaddr = masked_user_access_begin(uaddr);
+ else if (!user_access_begin(uaddr, sizeof(u32)))
return -EFAULT;
asm volatile("\n"
"1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 66837b8c67f1..6ffa8b75f4cd 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -15,7 +15,7 @@ typedef struct {
unsigned int irq_spurious_count;
unsigned int icr_read_retry_count;
#endif
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
unsigned int kvm_posted_intr_ipis;
unsigned int kvm_posted_intr_wakeup_ipis;
unsigned int kvm_posted_intr_nested_ipis;
@@ -44,10 +44,16 @@ typedef struct {
unsigned int irq_hv_reenlightenment_count;
unsigned int hyperv_stimer0_count;
#endif
+#ifdef CONFIG_X86_POSTED_MSI
+ unsigned int posted_msi_notification_count;
+#endif
} ____cacheline_aligned irq_cpustat_t;
DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+#ifdef CONFIG_X86_POSTED_MSI
+DECLARE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc);
+#endif
#define __ARCH_IRQ_STAT
#define inc_irq_stat(member) this_cpu_inc(irq_stat.member)
@@ -63,7 +69,11 @@ extern u64 arch_irq_stat(void);
#define local_softirq_pending_ref pcpu_hot.softirq_pending
#if IS_ENABLED(CONFIG_KVM_INTEL)
-static inline void kvm_set_cpu_l1tf_flush_l1d(void)
+/*
+ * This function is called from noinstr interrupt contexts
+ * and must be inlined to not get instrumentation.
+ */
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void)
{
__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
}
@@ -78,7 +88,7 @@ static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void)
return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d);
}
#else /* !IS_ENABLED(CONFIG_KVM_INTEL) */
-static inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
#endif /* IS_ENABLED(CONFIG_KVM_INTEL) */
#endif /* _ASM_X86_HARDIRQ_H */
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index ab9f3dd87c80..ab0c78855ecb 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -84,7 +84,6 @@ extern int hpet_set_rtc_irq_bit(unsigned long bit_mask);
extern int hpet_set_alarm_time(unsigned char hrs, unsigned char min,
unsigned char sec);
extern int hpet_set_periodic_freq(unsigned long freq);
-extern int hpet_rtc_dropped_irq(void);
extern int hpet_rtc_timer_init(void);
extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id);
extern int hpet_register_irq_handler(rtc_irq_handler handler);
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b02c3cd3c0f6..edebf1020e04 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -16,8 +16,6 @@
#include <asm/irq_vectors.h>
-#define IRQ_MATRIX_BITS NR_VECTORS
-
#ifndef __ASSEMBLY__
#include <linux/percpu.h>
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
deleted file mode 100644
index 2ff26f53cd62..000000000000
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ /dev/null
@@ -1,804 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/*
- * This file contains definitions from Hyper-V Hypervisor Top-Level Functional
- * Specification (TLFS):
- * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs
- */
-
-#ifndef _ASM_X86_HYPERV_TLFS_H
-#define _ASM_X86_HYPERV_TLFS_H
-
-#include <linux/types.h>
-#include <asm/page.h>
-/*
- * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
- * is set by CPUID(HvCpuIdFunctionVersionAndFeatures).
- */
-#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000
-#define HYPERV_CPUID_INTERFACE 0x40000001
-#define HYPERV_CPUID_VERSION 0x40000002
-#define HYPERV_CPUID_FEATURES 0x40000003
-#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004
-#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005
-#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES 0x40000007
-#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A
-#define HYPERV_CPUID_ISOLATION_CONFIG 0x4000000C
-
-#define HYPERV_CPUID_VIRT_STACK_INTERFACE 0x40000081
-#define HYPERV_VS_INTERFACE_EAX_SIGNATURE 0x31235356 /* "VS#1" */
-
-#define HYPERV_CPUID_VIRT_STACK_PROPERTIES 0x40000082
-/* Support for the extended IOAPIC RTE format */
-#define HYPERV_VS_PROPERTIES_EAX_EXTENDED_IOAPIC_RTE BIT(2)
-
-#define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000
-#define HYPERV_CPUID_MIN 0x40000005
-#define HYPERV_CPUID_MAX 0x4000ffff
-
-/*
- * Group D Features. The bit assignments are custom to each architecture.
- * On x86/x64 these are HYPERV_CPUID_FEATURES.EDX bits.
- */
-/* The MWAIT instruction is available (per section MONITOR / MWAIT) */
-#define HV_X64_MWAIT_AVAILABLE BIT(0)
-/* Guest debugging support is available */
-#define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1)
-/* Performance Monitor support is available*/
-#define HV_X64_PERF_MONITOR_AVAILABLE BIT(2)
-/* Support for physical CPU dynamic partitioning events is available*/
-#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3)
-/*
- * Support for passing hypercall input parameter block via XMM
- * registers is available
- */
-#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4)
-/* Support for a virtual guest idle state is available */
-#define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5)
-/* Frequency MSRs available */
-#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8)
-/* Crash MSR available */
-#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10)
-/* Support for debug MSRs available */
-#define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11)
-/* Support for extended gva ranges for flush hypercalls available */
-#define HV_FEATURE_EXT_GVA_RANGES_FLUSH BIT(14)
-/*
- * Support for returning hypercall output block via XMM
- * registers is available
- */
-#define HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE BIT(15)
-/* stimer Direct Mode is available */
-#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19)
-
-/*
- * Implementation recommendations. Indicates which behaviors the hypervisor
- * recommends the OS implement for optimal performance.
- * These are HYPERV_CPUID_ENLIGHTMENT_INFO.EAX bits.
- */
-/*
- * Recommend using hypercall for address space switches rather
- * than MOV to CR3 instruction
- */
-#define HV_X64_AS_SWITCH_RECOMMENDED BIT(0)
-/* Recommend using hypercall for local TLB flushes rather
- * than INVLPG or MOV to CR3 instructions */
-#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED BIT(1)
-/*
- * Recommend using hypercall for remote TLB flushes rather
- * than inter-processor interrupts
- */
-#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED BIT(2)
-/*
- * Recommend using MSRs for accessing APIC registers
- * EOI, ICR and TPR rather than their memory-mapped counterparts
- */
-#define HV_X64_APIC_ACCESS_RECOMMENDED BIT(3)
-/* Recommend using the hypervisor-provided MSR to initiate a system RESET */
-#define HV_X64_SYSTEM_RESET_RECOMMENDED BIT(4)
-/*
- * Recommend using relaxed timing for this partition. If used,
- * the VM should disable any watchdog timeouts that rely on the
- * timely delivery of external interrupts
- */
-#define HV_X64_RELAXED_TIMING_RECOMMENDED BIT(5)
-
-/*
- * Recommend not using Auto End-Of-Interrupt feature
- */
-#define HV_DEPRECATING_AEOI_RECOMMENDED BIT(9)
-
-/*
- * Recommend using cluster IPI hypercalls.
- */
-#define HV_X64_CLUSTER_IPI_RECOMMENDED BIT(10)
-
-/* Recommend using the newer ExProcessorMasks interface */
-#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11)
-
-/* Indicates that the hypervisor is nested within a Hyper-V partition. */
-#define HV_X64_HYPERV_NESTED BIT(12)
-
-/* Recommend using enlightened VMCS */
-#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
-
-/* Use hypercalls for MMIO config space access */
-#define HV_X64_USE_MMIO_HYPERCALLS BIT(21)
-
-/*
- * CPU management features identification.
- * These are HYPERV_CPUID_CPU_MANAGEMENT_FEATURES.EAX bits.
- */
-#define HV_X64_START_LOGICAL_PROCESSOR BIT(0)
-#define HV_X64_CREATE_ROOT_VIRTUAL_PROCESSOR BIT(1)
-#define HV_X64_PERFORMANCE_COUNTER_SYNC BIT(2)
-#define HV_X64_RESERVED_IDENTITY_BIT BIT(31)
-
-/*
- * Virtual processor will never share a physical core with another virtual
- * processor, except for virtual processors that are reported as sibling SMT
- * threads.
- */
-#define HV_X64_NO_NONARCH_CORESHARING BIT(18)
-
-/* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */
-#define HV_X64_NESTED_DIRECT_FLUSH BIT(17)
-#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
-#define HV_X64_NESTED_MSR_BITMAP BIT(19)
-
-/* Nested features #2. These are HYPERV_CPUID_NESTED_FEATURES.EBX bits. */
-#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL BIT(0)
-
-/*
- * This is specific to AMD and specifies that enlightened TLB flush is
- * supported. If guest opts in to this feature, ASID invalidations only
- * flushes gva -> hpa mapping entries. To flush the TLB entries derived
- * from NPT, hypercalls should be used (HvFlushGuestPhysicalAddressSpace
- * or HvFlushGuestPhysicalAddressList).
- */
-#define HV_X64_NESTED_ENLIGHTENED_TLB BIT(22)
-
-/* HYPERV_CPUID_ISOLATION_CONFIG.EAX bits. */
-#define HV_PARAVISOR_PRESENT BIT(0)
-
-/* HYPERV_CPUID_ISOLATION_CONFIG.EBX bits. */
-#define HV_ISOLATION_TYPE GENMASK(3, 0)
-#define HV_SHARED_GPA_BOUNDARY_ACTIVE BIT(5)
-#define HV_SHARED_GPA_BOUNDARY_BITS GENMASK(11, 6)
-
-enum hv_isolation_type {
- HV_ISOLATION_TYPE_NONE = 0,
- HV_ISOLATION_TYPE_VBS = 1,
- HV_ISOLATION_TYPE_SNP = 2,
- HV_ISOLATION_TYPE_TDX = 3
-};
-
-/* Hyper-V specific model specific registers (MSRs) */
-
-/* MSR used to identify the guest OS. */
-#define HV_X64_MSR_GUEST_OS_ID 0x40000000
-
-/* MSR used to setup pages used to communicate with the hypervisor. */
-#define HV_X64_MSR_HYPERCALL 0x40000001
-
-/* MSR used to provide vcpu index */
-#define HV_REGISTER_VP_INDEX 0x40000002
-
-/* MSR used to reset the guest OS. */
-#define HV_X64_MSR_RESET 0x40000003
-
-/* MSR used to provide vcpu runtime in 100ns units */
-#define HV_X64_MSR_VP_RUNTIME 0x40000010
-
-/* MSR used to read the per-partition time reference counter */
-#define HV_REGISTER_TIME_REF_COUNT 0x40000020
-
-/* A partition's reference time stamp counter (TSC) page */
-#define HV_REGISTER_REFERENCE_TSC 0x40000021
-
-/* MSR used to retrieve the TSC frequency */
-#define HV_X64_MSR_TSC_FREQUENCY 0x40000022
-
-/* MSR used to retrieve the local APIC timer frequency */
-#define HV_X64_MSR_APIC_FREQUENCY 0x40000023
-
-/* Define the virtual APIC registers */
-#define HV_X64_MSR_EOI 0x40000070
-#define HV_X64_MSR_ICR 0x40000071
-#define HV_X64_MSR_TPR 0x40000072
-#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
-
-/* Define synthetic interrupt controller model specific registers. */
-#define HV_REGISTER_SCONTROL 0x40000080
-#define HV_REGISTER_SVERSION 0x40000081
-#define HV_REGISTER_SIEFP 0x40000082
-#define HV_REGISTER_SIMP 0x40000083
-#define HV_REGISTER_EOM 0x40000084
-#define HV_REGISTER_SINT0 0x40000090
-#define HV_REGISTER_SINT1 0x40000091
-#define HV_REGISTER_SINT2 0x40000092
-#define HV_REGISTER_SINT3 0x40000093
-#define HV_REGISTER_SINT4 0x40000094
-#define HV_REGISTER_SINT5 0x40000095
-#define HV_REGISTER_SINT6 0x40000096
-#define HV_REGISTER_SINT7 0x40000097
-#define HV_REGISTER_SINT8 0x40000098
-#define HV_REGISTER_SINT9 0x40000099
-#define HV_REGISTER_SINT10 0x4000009A
-#define HV_REGISTER_SINT11 0x4000009B
-#define HV_REGISTER_SINT12 0x4000009C
-#define HV_REGISTER_SINT13 0x4000009D
-#define HV_REGISTER_SINT14 0x4000009E
-#define HV_REGISTER_SINT15 0x4000009F
-
-/*
- * Define synthetic interrupt controller model specific registers for
- * nested hypervisor.
- */
-#define HV_REGISTER_NESTED_SCONTROL 0x40001080
-#define HV_REGISTER_NESTED_SVERSION 0x40001081
-#define HV_REGISTER_NESTED_SIEFP 0x40001082
-#define HV_REGISTER_NESTED_SIMP 0x40001083
-#define HV_REGISTER_NESTED_EOM 0x40001084
-#define HV_REGISTER_NESTED_SINT0 0x40001090
-
-/*
- * Synthetic Timer MSRs. Four timers per vcpu.
- */
-#define HV_REGISTER_STIMER0_CONFIG 0x400000B0
-#define HV_REGISTER_STIMER0_COUNT 0x400000B1
-#define HV_REGISTER_STIMER1_CONFIG 0x400000B2
-#define HV_REGISTER_STIMER1_COUNT 0x400000B3
-#define HV_REGISTER_STIMER2_CONFIG 0x400000B4
-#define HV_REGISTER_STIMER2_COUNT 0x400000B5
-#define HV_REGISTER_STIMER3_CONFIG 0x400000B6
-#define HV_REGISTER_STIMER3_COUNT 0x400000B7
-
-/* Hyper-V guest idle MSR */
-#define HV_X64_MSR_GUEST_IDLE 0x400000F0
-
-/* Hyper-V guest crash notification MSR's */
-#define HV_REGISTER_CRASH_P0 0x40000100
-#define HV_REGISTER_CRASH_P1 0x40000101
-#define HV_REGISTER_CRASH_P2 0x40000102
-#define HV_REGISTER_CRASH_P3 0x40000103
-#define HV_REGISTER_CRASH_P4 0x40000104
-#define HV_REGISTER_CRASH_CTL 0x40000105
-
-/* TSC emulation after migration */
-#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
-#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
-#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
-
-/* TSC invariant control */
-#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118
-
-/* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */
-#define HV_EXPOSE_INVARIANT_TSC BIT_ULL(0)
-
-/* Register name aliases for temporary compatibility */
-#define HV_X64_MSR_STIMER0_COUNT HV_REGISTER_STIMER0_COUNT
-#define HV_X64_MSR_STIMER0_CONFIG HV_REGISTER_STIMER0_CONFIG
-#define HV_X64_MSR_STIMER1_COUNT HV_REGISTER_STIMER1_COUNT
-#define HV_X64_MSR_STIMER1_CONFIG HV_REGISTER_STIMER1_CONFIG
-#define HV_X64_MSR_STIMER2_COUNT HV_REGISTER_STIMER2_COUNT
-#define HV_X64_MSR_STIMER2_CONFIG HV_REGISTER_STIMER2_CONFIG
-#define HV_X64_MSR_STIMER3_COUNT HV_REGISTER_STIMER3_COUNT
-#define HV_X64_MSR_STIMER3_CONFIG HV_REGISTER_STIMER3_CONFIG
-#define HV_X64_MSR_SCONTROL HV_REGISTER_SCONTROL
-#define HV_X64_MSR_SVERSION HV_REGISTER_SVERSION
-#define HV_X64_MSR_SIMP HV_REGISTER_SIMP
-#define HV_X64_MSR_SIEFP HV_REGISTER_SIEFP
-#define HV_X64_MSR_VP_INDEX HV_REGISTER_VP_INDEX
-#define HV_X64_MSR_EOM HV_REGISTER_EOM
-#define HV_X64_MSR_SINT0 HV_REGISTER_SINT0
-#define HV_X64_MSR_SINT15 HV_REGISTER_SINT15
-#define HV_X64_MSR_CRASH_P0 HV_REGISTER_CRASH_P0
-#define HV_X64_MSR_CRASH_P1 HV_REGISTER_CRASH_P1
-#define HV_X64_MSR_CRASH_P2 HV_REGISTER_CRASH_P2
-#define HV_X64_MSR_CRASH_P3 HV_REGISTER_CRASH_P3
-#define HV_X64_MSR_CRASH_P4 HV_REGISTER_CRASH_P4
-#define HV_X64_MSR_CRASH_CTL HV_REGISTER_CRASH_CTL
-#define HV_X64_MSR_TIME_REF_COUNT HV_REGISTER_TIME_REF_COUNT
-#define HV_X64_MSR_REFERENCE_TSC HV_REGISTER_REFERENCE_TSC
-
-/*
- * Registers are only accessible via HVCALL_GET_VP_REGISTERS hvcall and
- * there is not associated MSR address.
- */
-#define HV_X64_REGISTER_VSM_VP_STATUS 0x000D0003
-#define HV_X64_VTL_MASK GENMASK(3, 0)
-
-/* Hyper-V memory host visibility */
-enum hv_mem_host_visibility {
- VMBUS_PAGE_NOT_VISIBLE = 0,
- VMBUS_PAGE_VISIBLE_READ_ONLY = 1,
- VMBUS_PAGE_VISIBLE_READ_WRITE = 3
-};
-
-/* HvCallModifySparseGpaPageHostVisibility hypercall */
-#define HV_MAX_MODIFY_GPA_REP_COUNT ((PAGE_SIZE / sizeof(u64)) - 2)
-struct hv_gpa_range_for_visibility {
- u64 partition_id;
- u32 host_visibility:2;
- u32 reserved0:30;
- u32 reserved1;
- u64 gpa_page_list[HV_MAX_MODIFY_GPA_REP_COUNT];
-} __packed;
-
-/*
- * Declare the MSR used to setup pages used to communicate with the hypervisor.
- */
-union hv_x64_msr_hypercall_contents {
- u64 as_uint64;
- struct {
- u64 enable:1;
- u64 reserved:11;
- u64 guest_physical_address:52;
- } __packed;
-};
-
-union hv_vp_assist_msr_contents {
- u64 as_uint64;
- struct {
- u64 enable:1;
- u64 reserved:11;
- u64 pfn:52;
- } __packed;
-};
-
-struct hv_reenlightenment_control {
- __u64 vector:8;
- __u64 reserved1:8;
- __u64 enabled:1;
- __u64 reserved2:15;
- __u64 target_vp:32;
-} __packed;
-
-struct hv_tsc_emulation_control {
- __u64 enabled:1;
- __u64 reserved:63;
-} __packed;
-
-struct hv_tsc_emulation_status {
- __u64 inprogress:1;
- __u64 reserved:63;
-} __packed;
-
-#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001
-#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12
-#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
- (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
-
-#define HV_X64_MSR_CRASH_PARAMS \
- (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
-
-#define HV_IPI_LOW_VECTOR 0x10
-#define HV_IPI_HIGH_VECTOR 0xff
-
-#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
-#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
-#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
- (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
-
-/* Hyper-V Enlightened VMCS version mask in nested features CPUID */
-#define HV_X64_ENLIGHTENED_VMCS_VERSION 0xff
-
-#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001
-#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12
-
-/* Number of XMM registers used in hypercall input/output */
-#define HV_HYPERCALL_MAX_XMM_REGISTERS 6
-
-struct hv_nested_enlightenments_control {
- struct {
- __u32 directhypercall:1;
- __u32 reserved:31;
- } features;
- struct {
- __u32 inter_partition_comm:1;
- __u32 reserved:31;
- } hypercallControls;
-} __packed;
-
-/* Define virtual processor assist page structure. */
-struct hv_vp_assist_page {
- __u32 apic_assist;
- __u32 reserved1;
- __u32 vtl_entry_reason;
- __u32 vtl_reserved;
- __u64 vtl_ret_x64rax;
- __u64 vtl_ret_x64rcx;
- struct hv_nested_enlightenments_control nested_control;
- __u8 enlighten_vmentry;
- __u8 reserved2[7];
- __u64 current_nested_vmcs;
- __u8 synthetic_time_unhalted_timer_expired;
- __u8 reserved3[7];
- __u8 virtualization_fault_information[40];
- __u8 reserved4[8];
- __u8 intercept_message[256];
- __u8 vtl_ret_actions[256];
-} __packed;
-
-struct hv_enlightened_vmcs {
- u32 revision_id;
- u32 abort;
-
- u16 host_es_selector;
- u16 host_cs_selector;
- u16 host_ss_selector;
- u16 host_ds_selector;
- u16 host_fs_selector;
- u16 host_gs_selector;
- u16 host_tr_selector;
-
- u16 padding16_1;
-
- u64 host_ia32_pat;
- u64 host_ia32_efer;
-
- u64 host_cr0;
- u64 host_cr3;
- u64 host_cr4;
-
- u64 host_ia32_sysenter_esp;
- u64 host_ia32_sysenter_eip;
- u64 host_rip;
- u32 host_ia32_sysenter_cs;
-
- u32 pin_based_vm_exec_control;
- u32 vm_exit_controls;
- u32 secondary_vm_exec_control;
-
- u64 io_bitmap_a;
- u64 io_bitmap_b;
- u64 msr_bitmap;
-
- u16 guest_es_selector;
- u16 guest_cs_selector;
- u16 guest_ss_selector;
- u16 guest_ds_selector;
- u16 guest_fs_selector;
- u16 guest_gs_selector;
- u16 guest_ldtr_selector;
- u16 guest_tr_selector;
-
- u32 guest_es_limit;
- u32 guest_cs_limit;
- u32 guest_ss_limit;
- u32 guest_ds_limit;
- u32 guest_fs_limit;
- u32 guest_gs_limit;
- u32 guest_ldtr_limit;
- u32 guest_tr_limit;
- u32 guest_gdtr_limit;
- u32 guest_idtr_limit;
-
- u32 guest_es_ar_bytes;
- u32 guest_cs_ar_bytes;
- u32 guest_ss_ar_bytes;
- u32 guest_ds_ar_bytes;
- u32 guest_fs_ar_bytes;
- u32 guest_gs_ar_bytes;
- u32 guest_ldtr_ar_bytes;
- u32 guest_tr_ar_bytes;
-
- u64 guest_es_base;
- u64 guest_cs_base;
- u64 guest_ss_base;
- u64 guest_ds_base;
- u64 guest_fs_base;
- u64 guest_gs_base;
- u64 guest_ldtr_base;
- u64 guest_tr_base;
- u64 guest_gdtr_base;
- u64 guest_idtr_base;
-
- u64 padding64_1[3];
-
- u64 vm_exit_msr_store_addr;
- u64 vm_exit_msr_load_addr;
- u64 vm_entry_msr_load_addr;
-
- u64 cr3_target_value0;
- u64 cr3_target_value1;
- u64 cr3_target_value2;
- u64 cr3_target_value3;
-
- u32 page_fault_error_code_mask;
- u32 page_fault_error_code_match;
-
- u32 cr3_target_count;
- u32 vm_exit_msr_store_count;
- u32 vm_exit_msr_load_count;
- u32 vm_entry_msr_load_count;
-
- u64 tsc_offset;
- u64 virtual_apic_page_addr;
- u64 vmcs_link_pointer;
-
- u64 guest_ia32_debugctl;
- u64 guest_ia32_pat;
- u64 guest_ia32_efer;
-
- u64 guest_pdptr0;
- u64 guest_pdptr1;
- u64 guest_pdptr2;
- u64 guest_pdptr3;
-
- u64 guest_pending_dbg_exceptions;
- u64 guest_sysenter_esp;
- u64 guest_sysenter_eip;
-
- u32 guest_activity_state;
- u32 guest_sysenter_cs;
-
- u64 cr0_guest_host_mask;
- u64 cr4_guest_host_mask;
- u64 cr0_read_shadow;
- u64 cr4_read_shadow;
- u64 guest_cr0;
- u64 guest_cr3;
- u64 guest_cr4;
- u64 guest_dr7;
-
- u64 host_fs_base;
- u64 host_gs_base;
- u64 host_tr_base;
- u64 host_gdtr_base;
- u64 host_idtr_base;
- u64 host_rsp;
-
- u64 ept_pointer;
-
- u16 virtual_processor_id;
- u16 padding16_2[3];
-
- u64 padding64_2[5];
- u64 guest_physical_address;
-
- u32 vm_instruction_error;
- u32 vm_exit_reason;
- u32 vm_exit_intr_info;
- u32 vm_exit_intr_error_code;
- u32 idt_vectoring_info_field;
- u32 idt_vectoring_error_code;
- u32 vm_exit_instruction_len;
- u32 vmx_instruction_info;
-
- u64 exit_qualification;
- u64 exit_io_instruction_ecx;
- u64 exit_io_instruction_esi;
- u64 exit_io_instruction_edi;
- u64 exit_io_instruction_eip;
-
- u64 guest_linear_address;
- u64 guest_rsp;
- u64 guest_rflags;
-
- u32 guest_interruptibility_info;
- u32 cpu_based_vm_exec_control;
- u32 exception_bitmap;
- u32 vm_entry_controls;
- u32 vm_entry_intr_info_field;
- u32 vm_entry_exception_error_code;
- u32 vm_entry_instruction_len;
- u32 tpr_threshold;
-
- u64 guest_rip;
-
- u32 hv_clean_fields;
- u32 padding32_1;
- u32 hv_synthetic_controls;
- struct {
- u32 nested_flush_hypercall:1;
- u32 msr_bitmap:1;
- u32 reserved:30;
- } __packed hv_enlightenments_control;
- u32 hv_vp_id;
- u32 padding32_2;
- u64 hv_vm_id;
- u64 partition_assist_page;
- u64 padding64_4[4];
- u64 guest_bndcfgs;
- u64 guest_ia32_perf_global_ctrl;
- u64 guest_ia32_s_cet;
- u64 guest_ssp;
- u64 guest_ia32_int_ssp_table_addr;
- u64 guest_ia32_lbr_ctl;
- u64 padding64_5[2];
- u64 xss_exit_bitmap;
- u64 encls_exiting_bitmap;
- u64 host_ia32_perf_global_ctrl;
- u64 tsc_multiplier;
- u64 host_ia32_s_cet;
- u64 host_ssp;
- u64 host_ia32_int_ssp_table_addr;
- u64 padding64_6;
-} __packed;
-
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP BIT(1)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2 BIT(2)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1 BIT(3)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC BIT(4)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT BIT(5)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY BIT(6)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN BIT(7)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR BIT(8)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT BIT(9)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC BIT(10)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1 BIT(11)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2 BIT(12)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER BIT(13)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1 BIT(14)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15)
-
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF
-
-/*
- * Note, Hyper-V isn't actually stealing bit 28 from Intel, just abusing it by
- * pairing it with architecturally impossible exit reasons. Bit 28 is set only
- * on SMI exits to a SMI transfer monitor (STM) and if and only if a MTF VM-Exit
- * is pending. I.e. it will never be set by hardware for non-SMI exits (there
- * are only three), nor will it ever be set unless the VMM is an STM.
- */
-#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031
-
-/*
- * Hyper-V uses the software reserved 32 bytes in VMCB control area to expose
- * SVM enlightenments to guests.
- */
-struct hv_vmcb_enlightenments {
- struct __packed hv_enlightenments_control {
- u32 nested_flush_hypercall:1;
- u32 msr_bitmap:1;
- u32 enlightened_npt_tlb: 1;
- u32 reserved:29;
- } __packed hv_enlightenments_control;
- u32 hv_vp_id;
- u64 hv_vm_id;
- u64 partition_assist_page;
- u64 reserved;
-} __packed;
-
-/*
- * Hyper-V uses the software reserved clean bit in VMCB.
- */
-#define HV_VMCB_NESTED_ENLIGHTENMENTS 31
-
-/* Synthetic VM-Exit */
-#define HV_SVM_EXITCODE_ENL 0xf0000000
-#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH (1)
-
-struct hv_partition_assist_pg {
- u32 tlb_lock_count;
-};
-
-enum hv_interrupt_type {
- HV_X64_INTERRUPT_TYPE_FIXED = 0x0000,
- HV_X64_INTERRUPT_TYPE_LOWESTPRIORITY = 0x0001,
- HV_X64_INTERRUPT_TYPE_SMI = 0x0002,
- HV_X64_INTERRUPT_TYPE_REMOTEREAD = 0x0003,
- HV_X64_INTERRUPT_TYPE_NMI = 0x0004,
- HV_X64_INTERRUPT_TYPE_INIT = 0x0005,
- HV_X64_INTERRUPT_TYPE_SIPI = 0x0006,
- HV_X64_INTERRUPT_TYPE_EXTINT = 0x0007,
- HV_X64_INTERRUPT_TYPE_LOCALINT0 = 0x0008,
- HV_X64_INTERRUPT_TYPE_LOCALINT1 = 0x0009,
- HV_X64_INTERRUPT_TYPE_MAXIMUM = 0x000A,
-};
-
-union hv_msi_address_register {
- u32 as_uint32;
- struct {
- u32 reserved1:2;
- u32 destination_mode:1;
- u32 redirection_hint:1;
- u32 reserved2:8;
- u32 destination_id:8;
- u32 msi_base:12;
- };
-} __packed;
-
-union hv_msi_data_register {
- u32 as_uint32;
- struct {
- u32 vector:8;
- u32 delivery_mode:3;
- u32 reserved1:3;
- u32 level_assert:1;
- u32 trigger_mode:1;
- u32 reserved2:16;
- };
-} __packed;
-
-/* HvRetargetDeviceInterrupt hypercall */
-union hv_msi_entry {
- u64 as_uint64;
- struct {
- union hv_msi_address_register address;
- union hv_msi_data_register data;
- } __packed;
-};
-
-struct hv_x64_segment_register {
- u64 base;
- u32 limit;
- u16 selector;
- union {
- struct {
- u16 segment_type : 4;
- u16 non_system_segment : 1;
- u16 descriptor_privilege_level : 2;
- u16 present : 1;
- u16 reserved : 4;
- u16 available : 1;
- u16 _long : 1;
- u16 _default : 1;
- u16 granularity : 1;
- } __packed;
- u16 attributes;
- };
-} __packed;
-
-struct hv_x64_table_register {
- u16 pad[3];
- u16 limit;
- u64 base;
-} __packed;
-
-struct hv_init_vp_context {
- u64 rip;
- u64 rsp;
- u64 rflags;
-
- struct hv_x64_segment_register cs;
- struct hv_x64_segment_register ds;
- struct hv_x64_segment_register es;
- struct hv_x64_segment_register fs;
- struct hv_x64_segment_register gs;
- struct hv_x64_segment_register ss;
- struct hv_x64_segment_register tr;
- struct hv_x64_segment_register ldtr;
-
- struct hv_x64_table_register idtr;
- struct hv_x64_table_register gdtr;
-
- u64 efer;
- u64 cr0;
- u64 cr3;
- u64 cr4;
- u64 msr_cr_pat;
-} __packed;
-
-union hv_input_vtl {
- u8 as_uint8;
- struct {
- u8 target_vtl: 4;
- u8 use_target_vtl: 1;
- u8 reserved_z: 3;
- };
-} __packed;
-
-struct hv_enable_vp_vtl {
- u64 partition_id;
- u32 vp_index;
- union hv_input_vtl target_vtl;
- u8 mbz0;
- u16 mbz1;
- struct hv_init_vp_context vp_context;
-} __packed;
-
-struct hv_get_vp_from_apic_id_in {
- u64 partition_id;
- union hv_input_vtl target_vtl;
- u8 res[7];
- u32 apic_ids[];
-} __packed;
-
-#include <asm-generic/hyperv-tlfs.h>
-
-#endif
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index c7ef6ea2fa99..9d69f3f8dbab 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -56,20 +56,9 @@ struct stat64 {
unsigned long long st_ino;
} __attribute__((packed));
-#define IA32_STACK_TOP IA32_PAGE_OFFSET
-
-#ifdef __KERNEL__
-struct linux_binprm;
-extern int ia32_setup_arg_pages(struct linux_binprm *bprm,
- unsigned long stack_top, int exec_stack);
-struct mm_struct;
-extern void ia32_pick_mmap_layout(struct mm_struct *mm);
-
-#endif
-
extern bool __ia32_enabled;
-static inline bool ia32_enabled(void)
+static __always_inline bool ia32_enabled(void)
{
return __ia32_enabled;
}
@@ -81,7 +70,7 @@ static inline void ia32_disable(void)
#else /* !CONFIG_IA32_EMULATION */
-static inline bool ia32_enabled(void)
+static __always_inline bool ia32_enabled(void)
{
return IS_ENABLED(CONFIG_X86_32);
}
diff --git a/arch/x86/include/asm/ia32_unistd.h b/arch/x86/include/asm/ia32_unistd.h
deleted file mode 100644
index aa065c94ccf5..000000000000
--- a/arch/x86/include/asm/ia32_unistd.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_IA32_UNISTD_H
-#define _ASM_X86_IA32_UNISTD_H
-
-/*
- * This file contains the system call numbers of the ia32 compat ABI,
- * this is for the kernel only.
- */
-#define __SYSCALL_ia32_NR(x) (x)
-#include <asm/unistd_32_ia32.h>
-
-#endif /* _ASM_X86_IA32_UNISTD_H */
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 13639e57e1f8..ad5c68f0509d 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -13,15 +13,18 @@
#include <asm/irq_stack.h>
+typedef void (*idtentry_t)(struct pt_regs *regs);
+
/**
* DECLARE_IDTENTRY - Declare functions for simple IDT entry points
* No error code pushed by hardware
* @vector: Vector number (ignored for C)
* @func: Function name of the entry point
*
- * Declares three functions:
+ * Declares four functions:
* - The ASM entry point: asm_##func
* - The XEN PV trap entry point: xen_##func (maybe unused)
+ * - The C handler called from the FRED event dispatcher (maybe unused)
* - The C handler called from the ASM entry point
*
* Note: This is the C variant of DECLARE_IDTENTRY(). As the name says it
@@ -31,6 +34,7 @@
#define DECLARE_IDTENTRY(vector, func) \
asmlinkage void asm_##func(void); \
asmlinkage void xen_asm_##func(void); \
+ void fred_##func(struct pt_regs *regs); \
__visible void func(struct pt_regs *regs)
/**
@@ -138,6 +142,17 @@ static __always_inline void __##func(struct pt_regs *regs, \
__visible noinstr void func(struct pt_regs *regs)
/**
+ * DEFINE_FREDENTRY_RAW - Emit code for raw FRED entry points
+ * @func: Function name of the entry point
+ *
+ * @func is called from the FRED event dispatcher with interrupts disabled.
+ *
+ * See @DEFINE_IDTENTRY_RAW for further details.
+ */
+#define DEFINE_FREDENTRY_RAW(func) \
+noinstr void fred_##func(struct pt_regs *regs)
+
+/**
* DECLARE_IDTENTRY_RAW_ERRORCODE - Declare functions for raw IDT entry points
* Error code pushed by hardware
* @vector: Vector number (ignored for C)
@@ -197,8 +212,8 @@ __visible noinstr void func(struct pt_regs *regs, \
irqentry_state_t state = irqentry_enter(regs); \
u32 vector = (u32)(u8)error_code; \
\
+ kvm_set_cpu_l1tf_flush_l1d(); \
instrumentation_begin(); \
- kvm_set_cpu_l1tf_flush_l1d(); \
run_irq_on_irqstack_cond(__##func, regs, vector); \
instrumentation_end(); \
irqentry_exit(regs, state); \
@@ -233,17 +248,27 @@ static noinline void __##func(struct pt_regs *regs, u32 vector)
#define DEFINE_IDTENTRY_SYSVEC(func) \
static void __##func(struct pt_regs *regs); \
\
+static __always_inline void instr_##func(struct pt_regs *regs) \
+{ \
+ run_sysvec_on_irqstack_cond(__##func, regs); \
+} \
+ \
__visible noinstr void func(struct pt_regs *regs) \
{ \
irqentry_state_t state = irqentry_enter(regs); \
\
+ kvm_set_cpu_l1tf_flush_l1d(); \
instrumentation_begin(); \
- kvm_set_cpu_l1tf_flush_l1d(); \
- run_sysvec_on_irqstack_cond(__##func, regs); \
+ instr_##func (regs); \
instrumentation_end(); \
irqentry_exit(regs, state); \
} \
\
+void fred_##func(struct pt_regs *regs) \
+{ \
+ instr_##func (regs); \
+} \
+ \
static noinline void __##func(struct pt_regs *regs)
/**
@@ -260,19 +285,29 @@ static noinline void __##func(struct pt_regs *regs)
#define DEFINE_IDTENTRY_SYSVEC_SIMPLE(func) \
static __always_inline void __##func(struct pt_regs *regs); \
\
+static __always_inline void instr_##func(struct pt_regs *regs) \
+{ \
+ __irq_enter_raw(); \
+ __##func (regs); \
+ __irq_exit_raw(); \
+} \
+ \
__visible noinstr void func(struct pt_regs *regs) \
{ \
irqentry_state_t state = irqentry_enter(regs); \
\
+ kvm_set_cpu_l1tf_flush_l1d(); \
instrumentation_begin(); \
- __irq_enter_raw(); \
- kvm_set_cpu_l1tf_flush_l1d(); \
- __##func (regs); \
- __irq_exit_raw(); \
+ instr_##func (regs); \
instrumentation_end(); \
irqentry_exit(regs, state); \
} \
\
+void fred_##func(struct pt_regs *regs) \
+{ \
+ instr_##func (regs); \
+} \
+ \
static __always_inline void __##func(struct pt_regs *regs)
/**
@@ -410,17 +445,35 @@ __visible noinstr void func(struct pt_regs *regs, \
/* C-Code mapping */
#define DECLARE_IDTENTRY_NMI DECLARE_IDTENTRY_RAW
#define DEFINE_IDTENTRY_NMI DEFINE_IDTENTRY_RAW
+#define DEFINE_FREDENTRY_NMI DEFINE_FREDENTRY_RAW
#ifdef CONFIG_X86_64
#define DECLARE_IDTENTRY_MCE DECLARE_IDTENTRY_IST
#define DEFINE_IDTENTRY_MCE DEFINE_IDTENTRY_IST
#define DEFINE_IDTENTRY_MCE_USER DEFINE_IDTENTRY_NOIST
+#define DEFINE_FREDENTRY_MCE DEFINE_FREDENTRY_RAW
#define DECLARE_IDTENTRY_DEBUG DECLARE_IDTENTRY_IST
#define DEFINE_IDTENTRY_DEBUG DEFINE_IDTENTRY_IST
#define DEFINE_IDTENTRY_DEBUG_USER DEFINE_IDTENTRY_NOIST
+#define DEFINE_FREDENTRY_DEBUG DEFINE_FREDENTRY_RAW
#endif
+void idt_install_sysvec(unsigned int n, const void *function);
+
+#ifdef CONFIG_X86_FRED
+void fred_install_sysvec(unsigned int vector, const idtentry_t function);
+#else
+static inline void fred_install_sysvec(unsigned int vector, const idtentry_t function) { }
+#endif
+
+#define sysvec_install(vector, function) { \
+ if (cpu_feature_enabled(X86_FEATURE_FRED)) \
+ fred_install_sysvec(vector, function); \
+ else \
+ idt_install_sysvec(vector, asm_##function); \
+}
+
#else /* !__ASSEMBLY__ */
/*
@@ -447,7 +500,7 @@ __visible noinstr void func(struct pt_regs *regs, \
/* System vector entries */
#define DECLARE_IDTENTRY_SYSVEC(vector, func) \
- idtentry_sysvec vector func
+ DECLARE_IDTENTRY(vector, func)
#ifdef CONFIG_X86_64
# define DECLARE_IDTENTRY_MCE(vector, func) \
@@ -655,36 +708,59 @@ DECLARE_IDTENTRY(RESCHEDULE_VECTOR, sysvec_reschedule_ipi);
DECLARE_IDTENTRY_SYSVEC(REBOOT_VECTOR, sysvec_reboot);
DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_SINGLE_VECTOR, sysvec_call_function_single);
DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_VECTOR, sysvec_call_function);
+#else
+# define fred_sysvec_reschedule_ipi NULL
+# define fred_sysvec_reboot NULL
+# define fred_sysvec_call_function_single NULL
+# define fred_sysvec_call_function NULL
#endif
#ifdef CONFIG_X86_LOCAL_APIC
# ifdef CONFIG_X86_MCE_THRESHOLD
DECLARE_IDTENTRY_SYSVEC(THRESHOLD_APIC_VECTOR, sysvec_threshold);
+# else
+# define fred_sysvec_threshold NULL
# endif
# ifdef CONFIG_X86_MCE_AMD
DECLARE_IDTENTRY_SYSVEC(DEFERRED_ERROR_VECTOR, sysvec_deferred_error);
+# else
+# define fred_sysvec_deferred_error NULL
# endif
# ifdef CONFIG_X86_THERMAL_VECTOR
DECLARE_IDTENTRY_SYSVEC(THERMAL_APIC_VECTOR, sysvec_thermal);
+# else
+# define fred_sysvec_thermal NULL
# endif
# ifdef CONFIG_IRQ_WORK
DECLARE_IDTENTRY_SYSVEC(IRQ_WORK_VECTOR, sysvec_irq_work);
+# else
+# define fred_sysvec_irq_work NULL
# endif
#endif
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_VECTOR, sysvec_kvm_posted_intr_ipi);
DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_WAKEUP_VECTOR, sysvec_kvm_posted_intr_wakeup_ipi);
DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested_ipi);
+#else
+# define fred_sysvec_kvm_posted_intr_ipi NULL
+# define fred_sysvec_kvm_posted_intr_wakeup_ipi NULL
+# define fred_sysvec_kvm_posted_intr_nested_ipi NULL
#endif
+# ifdef CONFIG_X86_POSTED_MSI
+DECLARE_IDTENTRY_SYSVEC(POSTED_MSI_NOTIFICATION_VECTOR, sysvec_posted_msi_notification);
+#else
+# define fred_sysvec_posted_msi_notification NULL
+# endif
+
#if IS_ENABLED(CONFIG_HYPERV)
DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback);
DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment);
-DECLARE_IDTENTRY_SYSVEC(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0);
+DECLARE_IDTENTRY_SYSVEC(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0);
#endif
#if IS_ENABLED(CONFIG_ACRN_GUEST)
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
index b56c5741581a..53e4015242b4 100644
--- a/arch/x86/include/asm/inat.h
+++ b/arch/x86/include/asm/inat.h
@@ -35,6 +35,8 @@
#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */
#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */
#define INAT_PFX_EVEX 15 /* EVEX prefix */
+/* x86-64 REX2 prefix */
+#define INAT_PFX_REX2 16 /* 0xD5 */
#define INAT_LSTPFX_MAX 3
#define INAT_LGCPFX_MAX 11
@@ -50,7 +52,7 @@
/* Legacy prefix */
#define INAT_PFX_OFFS 0
-#define INAT_PFX_BITS 4
+#define INAT_PFX_BITS 5
#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1)
#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS)
/* Escape opcodes */
@@ -77,6 +79,9 @@
#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5))
#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6))
#define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7))
+#define INAT_NO_REX2 (1 << (INAT_FLAG_OFFS + 8))
+#define INAT_REX2_VARIANT (1 << (INAT_FLAG_OFFS + 9))
+#define INAT_EVEX_SCALABLE (1 << (INAT_FLAG_OFFS + 10))
/* Attribute making macros for attribute tables */
#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
@@ -128,6 +133,11 @@ static inline int inat_is_rex_prefix(insn_attr_t attr)
return (attr & INAT_PFX_MASK) == INAT_PFX_REX;
}
+static inline int inat_is_rex2_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_REX2;
+}
+
static inline int inat_last_prefix_id(insn_attr_t attr)
{
if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX)
@@ -227,4 +237,9 @@ static inline int inat_must_evex(insn_attr_t attr)
{
return attr & INAT_EVEXONLY;
}
+
+static inline int inat_evex_scalable(insn_attr_t attr)
+{
+ return attr & INAT_EVEX_SCALABLE;
+}
#endif
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index cc9ccf61b6bd..0e82ebc5d1e1 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -2,10 +2,11 @@
#ifndef _ASM_X86_INIT_H
#define _ASM_X86_INIT_H
-#define __head __section(".head.text")
+#define __head __section(".head.text") __no_sanitize_undefined
struct x86_mapping_info {
void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
+ void (*free_pgt_page)(void *, void *); /* free buf for page table */
void *context; /* context for alloc_pgt_page */
unsigned long page_flag; /* page flag for PMD or PUD entry */
unsigned long offset; /* ident mapping offset */
@@ -16,4 +17,6 @@ struct x86_mapping_info {
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
unsigned long pstart, unsigned long pend);
+void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd);
+
#endif /* _ASM_X86_INIT_H */
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 1b29f58f730f..7152ea809e6a 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -112,10 +112,15 @@ struct insn {
#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3)
#define X86_SIB_BASE(sib) ((sib) & 0x07)
-#define X86_REX_W(rex) ((rex) & 8)
-#define X86_REX_R(rex) ((rex) & 4)
-#define X86_REX_X(rex) ((rex) & 2)
-#define X86_REX_B(rex) ((rex) & 1)
+#define X86_REX2_M(rex) ((rex) & 0x80) /* REX2 M0 */
+#define X86_REX2_R(rex) ((rex) & 0x40) /* REX2 R4 */
+#define X86_REX2_X(rex) ((rex) & 0x20) /* REX2 X4 */
+#define X86_REX2_B(rex) ((rex) & 0x10) /* REX2 B4 */
+
+#define X86_REX_W(rex) ((rex) & 8) /* REX or REX2 W */
+#define X86_REX_R(rex) ((rex) & 4) /* REX or REX2 R3 */
+#define X86_REX_X(rex) ((rex) & 2) /* REX or REX2 X3 */
+#define X86_REX_B(rex) ((rex) & 1) /* REX or REX2 B3 */
/* VEX bit flags */
#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */
@@ -161,6 +166,18 @@ static inline void insn_get_attribute(struct insn *insn)
/* Instruction uses RIP-relative addressing */
extern int insn_rip_relative(struct insn *insn);
+static inline int insn_is_rex2(struct insn *insn)
+{
+ if (!insn->prefixes.got)
+ insn_get_prefixes(insn);
+ return insn->rex_prefix.nbytes == 2;
+}
+
+static inline insn_byte_t insn_rex2_m_bit(struct insn *insn)
+{
+ return X86_REX2_M(insn->rex_prefix.bytes[1]);
+}
+
static inline int insn_is_avx(struct insn *insn)
{
if (!insn->prefixes.got)
@@ -198,6 +215,13 @@ static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
return X86_VEX_P(insn->vex_prefix.bytes[2]);
}
+static inline insn_byte_t insn_vex_w_bit(struct insn *insn)
+{
+ if (insn->vex_prefix.nbytes < 3)
+ return 0;
+ return X86_VEX_W(insn->vex_prefix.bytes[2]);
+}
+
/* Get the last prefix id from last prefix or VEX prefix */
static inline int insn_last_prefix_id(struct insn *insn)
{
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index b65e9c46b922..6d7b04ffc5fd 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -10,7 +10,7 @@
* that group keep the CPUID for the variants sorted by model number.
*
* The defined symbol names have the following form:
- * INTEL_FAM6{OPTFAMILY}_{MICROARCH}{OPTDIFF}
+ * INTEL_{OPTFAMILY}_{MICROARCH}{OPTDIFF}
* where:
* OPTFAMILY Describes the family of CPUs that this belongs to. Default
* is assumed to be "_CORE" (and should be omitted). Other values
@@ -40,136 +40,152 @@
* their own names :-(
*/
-/* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */
-#define INTEL_FAM6_ANY X86_MODEL_ANY
+#define IFM(_fam, _model) VFM_MAKE(X86_VENDOR_INTEL, _fam, _model)
-#define INTEL_FAM6_CORE_YONAH 0x0E
+/* Wildcard match so X86_MATCH_VFM(ANY) works */
+#define INTEL_ANY IFM(X86_FAMILY_ANY, X86_MODEL_ANY)
-#define INTEL_FAM6_CORE2_MEROM 0x0F
-#define INTEL_FAM6_CORE2_MEROM_L 0x16
-#define INTEL_FAM6_CORE2_PENRYN 0x17
-#define INTEL_FAM6_CORE2_DUNNINGTON 0x1D
+#define INTEL_PENTIUM_PRO IFM(6, 0x01)
-#define INTEL_FAM6_NEHALEM 0x1E
-#define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */
-#define INTEL_FAM6_NEHALEM_EP 0x1A
-#define INTEL_FAM6_NEHALEM_EX 0x2E
+#define INTEL_CORE_YONAH IFM(6, 0x0E)
-#define INTEL_FAM6_WESTMERE 0x25
-#define INTEL_FAM6_WESTMERE_EP 0x2C
-#define INTEL_FAM6_WESTMERE_EX 0x2F
+#define INTEL_CORE2_MEROM IFM(6, 0x0F)
+#define INTEL_CORE2_MEROM_L IFM(6, 0x16)
+#define INTEL_CORE2_PENRYN IFM(6, 0x17)
+#define INTEL_CORE2_DUNNINGTON IFM(6, 0x1D)
-#define INTEL_FAM6_SANDYBRIDGE 0x2A
-#define INTEL_FAM6_SANDYBRIDGE_X 0x2D
-#define INTEL_FAM6_IVYBRIDGE 0x3A
-#define INTEL_FAM6_IVYBRIDGE_X 0x3E
+#define INTEL_NEHALEM IFM(6, 0x1E)
+#define INTEL_NEHALEM_G IFM(6, 0x1F) /* Auburndale / Havendale */
+#define INTEL_NEHALEM_EP IFM(6, 0x1A)
+#define INTEL_NEHALEM_EX IFM(6, 0x2E)
-#define INTEL_FAM6_HASWELL 0x3C
-#define INTEL_FAM6_HASWELL_X 0x3F
-#define INTEL_FAM6_HASWELL_L 0x45
-#define INTEL_FAM6_HASWELL_G 0x46
+#define INTEL_WESTMERE IFM(6, 0x25)
+#define INTEL_WESTMERE_EP IFM(6, 0x2C)
+#define INTEL_WESTMERE_EX IFM(6, 0x2F)
-#define INTEL_FAM6_BROADWELL 0x3D
-#define INTEL_FAM6_BROADWELL_G 0x47
-#define INTEL_FAM6_BROADWELL_X 0x4F
-#define INTEL_FAM6_BROADWELL_D 0x56
+#define INTEL_SANDYBRIDGE IFM(6, 0x2A)
+#define INTEL_SANDYBRIDGE_X IFM(6, 0x2D)
+#define INTEL_IVYBRIDGE IFM(6, 0x3A)
+#define INTEL_IVYBRIDGE_X IFM(6, 0x3E)
-#define INTEL_FAM6_SKYLAKE_L 0x4E /* Sky Lake */
-#define INTEL_FAM6_SKYLAKE 0x5E /* Sky Lake */
-#define INTEL_FAM6_SKYLAKE_X 0x55 /* Sky Lake */
+#define INTEL_HASWELL IFM(6, 0x3C)
+#define INTEL_HASWELL_X IFM(6, 0x3F)
+#define INTEL_HASWELL_L IFM(6, 0x45)
+#define INTEL_HASWELL_G IFM(6, 0x46)
+
+#define INTEL_BROADWELL IFM(6, 0x3D)
+#define INTEL_BROADWELL_G IFM(6, 0x47)
+#define INTEL_BROADWELL_X IFM(6, 0x4F)
+#define INTEL_BROADWELL_D IFM(6, 0x56)
+
+#define INTEL_SKYLAKE_L IFM(6, 0x4E) /* Sky Lake */
+#define INTEL_SKYLAKE IFM(6, 0x5E) /* Sky Lake */
+#define INTEL_SKYLAKE_X IFM(6, 0x55) /* Sky Lake */
/* CASCADELAKE_X 0x55 Sky Lake -- s: 7 */
/* COOPERLAKE_X 0x55 Sky Lake -- s: 11 */
-#define INTEL_FAM6_KABYLAKE_L 0x8E /* Sky Lake */
+#define INTEL_KABYLAKE_L IFM(6, 0x8E) /* Sky Lake */
/* AMBERLAKE_L 0x8E Sky Lake -- s: 9 */
/* COFFEELAKE_L 0x8E Sky Lake -- s: 10 */
/* WHISKEYLAKE_L 0x8E Sky Lake -- s: 11,12 */
-#define INTEL_FAM6_KABYLAKE 0x9E /* Sky Lake */
+#define INTEL_KABYLAKE IFM(6, 0x9E) /* Sky Lake */
/* COFFEELAKE 0x9E Sky Lake -- s: 10-13 */
-#define INTEL_FAM6_COMETLAKE 0xA5 /* Sky Lake */
-#define INTEL_FAM6_COMETLAKE_L 0xA6 /* Sky Lake */
+#define INTEL_COMETLAKE IFM(6, 0xA5) /* Sky Lake */
+#define INTEL_COMETLAKE_L IFM(6, 0xA6) /* Sky Lake */
-#define INTEL_FAM6_CANNONLAKE_L 0x66 /* Palm Cove */
+#define INTEL_CANNONLAKE_L IFM(6, 0x66) /* Palm Cove */
-#define INTEL_FAM6_ICELAKE_X 0x6A /* Sunny Cove */
-#define INTEL_FAM6_ICELAKE_D 0x6C /* Sunny Cove */
-#define INTEL_FAM6_ICELAKE 0x7D /* Sunny Cove */
-#define INTEL_FAM6_ICELAKE_L 0x7E /* Sunny Cove */
-#define INTEL_FAM6_ICELAKE_NNPI 0x9D /* Sunny Cove */
+#define INTEL_ICELAKE_X IFM(6, 0x6A) /* Sunny Cove */
+#define INTEL_ICELAKE_D IFM(6, 0x6C) /* Sunny Cove */
+#define INTEL_ICELAKE IFM(6, 0x7D) /* Sunny Cove */
+#define INTEL_ICELAKE_L IFM(6, 0x7E) /* Sunny Cove */
+#define INTEL_ICELAKE_NNPI IFM(6, 0x9D) /* Sunny Cove */
-#define INTEL_FAM6_ROCKETLAKE 0xA7 /* Cypress Cove */
+#define INTEL_ROCKETLAKE IFM(6, 0xA7) /* Cypress Cove */
-#define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */
-#define INTEL_FAM6_TIGERLAKE 0x8D /* Willow Cove */
+#define INTEL_TIGERLAKE_L IFM(6, 0x8C) /* Willow Cove */
+#define INTEL_TIGERLAKE IFM(6, 0x8D) /* Willow Cove */
-#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */
+#define INTEL_SAPPHIRERAPIDS_X IFM(6, 0x8F) /* Golden Cove */
-#define INTEL_FAM6_EMERALDRAPIDS_X 0xCF
+#define INTEL_EMERALDRAPIDS_X IFM(6, 0xCF)
-#define INTEL_FAM6_GRANITERAPIDS_X 0xAD
-#define INTEL_FAM6_GRANITERAPIDS_D 0xAE
+#define INTEL_GRANITERAPIDS_X IFM(6, 0xAD)
+#define INTEL_GRANITERAPIDS_D IFM(6, 0xAE)
/* "Hybrid" Processors (P-Core/E-Core) */
-#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */
+#define INTEL_LAKEFIELD IFM(6, 0x8A) /* Sunny Cove / Tremont */
+
+#define INTEL_ALDERLAKE IFM(6, 0x97) /* Golden Cove / Gracemont */
+#define INTEL_ALDERLAKE_L IFM(6, 0x9A) /* Golden Cove / Gracemont */
-#define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */
-#define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */
+#define INTEL_RAPTORLAKE IFM(6, 0xB7) /* Raptor Cove / Enhanced Gracemont */
+#define INTEL_RAPTORLAKE_P IFM(6, 0xBA)
+#define INTEL_RAPTORLAKE_S IFM(6, 0xBF)
-#define INTEL_FAM6_RAPTORLAKE 0xB7 /* Raptor Cove / Enhanced Gracemont */
-#define INTEL_FAM6_RAPTORLAKE_P 0xBA
-#define INTEL_FAM6_RAPTORLAKE_S 0xBF
+#define INTEL_METEORLAKE IFM(6, 0xAC)
+#define INTEL_METEORLAKE_L IFM(6, 0xAA)
-#define INTEL_FAM6_METEORLAKE 0xAC
-#define INTEL_FAM6_METEORLAKE_L 0xAA
+#define INTEL_ARROWLAKE_H IFM(6, 0xC5)
+#define INTEL_ARROWLAKE IFM(6, 0xC6)
+#define INTEL_ARROWLAKE_U IFM(6, 0xB5)
-#define INTEL_FAM6_ARROWLAKE_H 0xC5
-#define INTEL_FAM6_ARROWLAKE 0xC6
+#define INTEL_LUNARLAKE_M IFM(6, 0xBD)
-#define INTEL_FAM6_LUNARLAKE_M 0xBD
+#define INTEL_PANTHERLAKE_L IFM(6, 0xCC)
/* "Small Core" Processors (Atom/E-Core) */
-#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
-#define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */
+#define INTEL_ATOM_BONNELL IFM(6, 0x1C) /* Diamondville, Pineview */
+#define INTEL_ATOM_BONNELL_MID IFM(6, 0x26) /* Silverthorne, Lincroft */
-#define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */
-#define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */
-#define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */
+#define INTEL_ATOM_SALTWELL IFM(6, 0x36) /* Cedarview */
+#define INTEL_ATOM_SALTWELL_MID IFM(6, 0x27) /* Penwell */
+#define INTEL_ATOM_SALTWELL_TABLET IFM(6, 0x35) /* Cloverview */
-#define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */
-#define INTEL_FAM6_ATOM_SILVERMONT_D 0x4D /* Avaton, Rangely */
-#define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */
+#define INTEL_ATOM_SILVERMONT IFM(6, 0x37) /* Bay Trail, Valleyview */
+#define INTEL_ATOM_SILVERMONT_D IFM(6, 0x4D) /* Avaton, Rangely */
+#define INTEL_ATOM_SILVERMONT_MID IFM(6, 0x4A) /* Merriefield */
-#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */
-#define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */
-#define INTEL_FAM6_ATOM_AIRMONT_NP 0x75 /* Lightning Mountain */
+#define INTEL_ATOM_AIRMONT IFM(6, 0x4C) /* Cherry Trail, Braswell */
+#define INTEL_ATOM_AIRMONT_MID IFM(6, 0x5A) /* Moorefield */
+#define INTEL_ATOM_AIRMONT_NP IFM(6, 0x75) /* Lightning Mountain */
-#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */
-#define INTEL_FAM6_ATOM_GOLDMONT_D 0x5F /* Denverton */
+#define INTEL_ATOM_GOLDMONT IFM(6, 0x5C) /* Apollo Lake */
+#define INTEL_ATOM_GOLDMONT_D IFM(6, 0x5F) /* Denverton */
/* Note: the micro-architecture is "Goldmont Plus" */
-#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */
+#define INTEL_ATOM_GOLDMONT_PLUS IFM(6, 0x7A) /* Gemini Lake */
-#define INTEL_FAM6_ATOM_TREMONT_D 0x86 /* Jacobsville */
-#define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */
-#define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */
+#define INTEL_ATOM_TREMONT_D IFM(6, 0x86) /* Jacobsville */
+#define INTEL_ATOM_TREMONT IFM(6, 0x96) /* Elkhart Lake */
+#define INTEL_ATOM_TREMONT_L IFM(6, 0x9C) /* Jasper Lake */
-#define INTEL_FAM6_ATOM_GRACEMONT 0xBE /* Alderlake N */
+#define INTEL_ATOM_GRACEMONT IFM(6, 0xBE) /* Alderlake N */
-#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */
-#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */
+#define INTEL_ATOM_CRESTMONT_X IFM(6, 0xAF) /* Sierra Forest */
+#define INTEL_ATOM_CRESTMONT IFM(6, 0xB6) /* Grand Ridge */
-#define INTEL_FAM6_ATOM_DARKMONT_X 0xDD /* Clearwater Forest */
+#define INTEL_ATOM_DARKMONT_X IFM(6, 0xDD) /* Clearwater Forest */
/* Xeon Phi */
-#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
-#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */
+#define INTEL_XEON_PHI_KNL IFM(6, 0x57) /* Knights Landing */
+#define INTEL_XEON_PHI_KNM IFM(6, 0x85) /* Knights Mill */
/* Family 5 */
-#define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */
+#define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */
+
+/* Family 19 */
+#define INTEL_PANTHERCOVE_X IFM(19, 0x01) /* Diamond Rapids */
+
+/* CPU core types */
+enum intel_cpu_type {
+ INTEL_CPU_TYPE_ATOM = 0x20,
+ INTEL_CPU_TYPE_CORE = 0x40,
+};
#endif /* _ASM_X86_INTEL_FAMILY_H */
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
index 2f9eeb5c3069..5dbeac48a5b9 100644
--- a/arch/x86/include/asm/intel_ds.h
+++ b/arch/x86/include/asm/intel_ds.h
@@ -9,6 +9,7 @@
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS_FMT4 8
#define MAX_PEBS_EVENTS 32
+#define MAX_PEBS_EVENTS_MASK GENMASK_ULL(MAX_PEBS_EVENTS - 1, 0)
#define MAX_FIXED_PEBS_EVENTS 16
/*
diff --git a/arch/x86/include/asm/intel_pconfig.h b/arch/x86/include/asm/intel_pconfig.h
deleted file mode 100644
index 994638ef171b..000000000000
--- a/arch/x86/include/asm/intel_pconfig.h
+++ /dev/null
@@ -1,65 +0,0 @@
-#ifndef _ASM_X86_INTEL_PCONFIG_H
-#define _ASM_X86_INTEL_PCONFIG_H
-
-#include <asm/asm.h>
-#include <asm/processor.h>
-
-enum pconfig_target {
- INVALID_TARGET = 0,
- MKTME_TARGET = 1,
- PCONFIG_TARGET_NR
-};
-
-int pconfig_target_supported(enum pconfig_target target);
-
-enum pconfig_leaf {
- MKTME_KEY_PROGRAM = 0,
- PCONFIG_LEAF_INVALID,
-};
-
-#define PCONFIG ".byte 0x0f, 0x01, 0xc5"
-
-/* Defines and structure for MKTME_KEY_PROGRAM of PCONFIG instruction */
-
-/* mktme_key_program::keyid_ctrl COMMAND, bits [7:0] */
-#define MKTME_KEYID_SET_KEY_DIRECT 0
-#define MKTME_KEYID_SET_KEY_RANDOM 1
-#define MKTME_KEYID_CLEAR_KEY 2
-#define MKTME_KEYID_NO_ENCRYPT 3
-
-/* mktme_key_program::keyid_ctrl ENC_ALG, bits [23:8] */
-#define MKTME_AES_XTS_128 (1 << 8)
-
-/* Return codes from the PCONFIG MKTME_KEY_PROGRAM */
-#define MKTME_PROG_SUCCESS 0
-#define MKTME_INVALID_PROG_CMD 1
-#define MKTME_ENTROPY_ERROR 2
-#define MKTME_INVALID_KEYID 3
-#define MKTME_INVALID_ENC_ALG 4
-#define MKTME_DEVICE_BUSY 5
-
-/* Hardware requires the structure to be 256 byte aligned. Otherwise #GP(0). */
-struct mktme_key_program {
- u16 keyid;
- u32 keyid_ctrl;
- u8 __rsvd[58];
- u8 key_field_1[64];
- u8 key_field_2[64];
-} __packed __aligned(256);
-
-static inline int mktme_key_program(struct mktme_key_program *key_program)
-{
- unsigned long rax = MKTME_KEY_PROGRAM;
-
- if (!pconfig_target_supported(MKTME_TARGET))
- return -ENXIO;
-
- asm volatile(PCONFIG
- : "=a" (rax), "=b" (key_program)
- : "0" (rax), "1" (key_program)
- : "memory", "cc");
-
- return rax;
-}
-
-#endif /* _ASM_X86_INTEL_PCONFIG_H */
diff --git a/arch/x86/include/asm/intel_punit_ipc.h b/arch/x86/include/asm/intel_punit_ipc.h
index ce16da719596..1f9b5d225912 100644
--- a/arch/x86/include/asm/intel_punit_ipc.h
+++ b/arch/x86/include/asm/intel_punit_ipc.h
@@ -80,17 +80,10 @@ typedef enum {
#if IS_ENABLED(CONFIG_INTEL_PUNIT_IPC)
-int intel_punit_ipc_simple_command(int cmd, int para1, int para2);
int intel_punit_ipc_command(u32 cmd, u32 para1, u32 para2, u32 *in, u32 *out);
#else
-static inline int intel_punit_ipc_simple_command(int cmd,
- int para1, int para2)
-{
- return -ENODEV;
-}
-
static inline int intel_punit_ipc_command(u32 cmd, u32 para1, u32 para2,
u32 *in, u32 *out)
{
diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h
deleted file mode 100644
index 8537f597d20a..000000000000
--- a/arch/x86/include/asm/intel_scu_ipc.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_INTEL_SCU_IPC_H_
-#define _ASM_X86_INTEL_SCU_IPC_H_
-
-#include <linux/ioport.h>
-
-struct device;
-struct intel_scu_ipc_dev;
-
-/**
- * struct intel_scu_ipc_data - Data used to configure SCU IPC
- * @mem: Base address of SCU IPC MMIO registers
- * @irq: The IRQ number used for SCU (optional)
- */
-struct intel_scu_ipc_data {
- struct resource mem;
- int irq;
-};
-
-struct intel_scu_ipc_dev *
-__intel_scu_ipc_register(struct device *parent,
- const struct intel_scu_ipc_data *scu_data,
- struct module *owner);
-
-#define intel_scu_ipc_register(parent, scu_data) \
- __intel_scu_ipc_register(parent, scu_data, THIS_MODULE)
-
-void intel_scu_ipc_unregister(struct intel_scu_ipc_dev *scu);
-
-struct intel_scu_ipc_dev *
-__devm_intel_scu_ipc_register(struct device *parent,
- const struct intel_scu_ipc_data *scu_data,
- struct module *owner);
-
-#define devm_intel_scu_ipc_register(parent, scu_data) \
- __devm_intel_scu_ipc_register(parent, scu_data, THIS_MODULE)
-
-struct intel_scu_ipc_dev *intel_scu_ipc_dev_get(void);
-void intel_scu_ipc_dev_put(struct intel_scu_ipc_dev *scu);
-struct intel_scu_ipc_dev *devm_intel_scu_ipc_dev_get(struct device *dev);
-
-int intel_scu_ipc_dev_ioread8(struct intel_scu_ipc_dev *scu, u16 addr,
- u8 *data);
-int intel_scu_ipc_dev_iowrite8(struct intel_scu_ipc_dev *scu, u16 addr,
- u8 data);
-int intel_scu_ipc_dev_readv(struct intel_scu_ipc_dev *scu, u16 *addr,
- u8 *data, size_t len);
-int intel_scu_ipc_dev_writev(struct intel_scu_ipc_dev *scu, u16 *addr,
- u8 *data, size_t len);
-
-int intel_scu_ipc_dev_update(struct intel_scu_ipc_dev *scu, u16 addr,
- u8 data, u8 mask);
-
-int intel_scu_ipc_dev_simple_command(struct intel_scu_ipc_dev *scu, int cmd,
- int sub);
-int intel_scu_ipc_dev_command_with_size(struct intel_scu_ipc_dev *scu, int cmd,
- int sub, const void *in, size_t inlen,
- size_t size, void *out, size_t outlen);
-
-static inline int intel_scu_ipc_dev_command(struct intel_scu_ipc_dev *scu, int cmd,
- int sub, const void *in, size_t inlen,
- void *out, size_t outlen)
-{
- return intel_scu_ipc_dev_command_with_size(scu, cmd, sub, in, inlen,
- inlen, out, outlen);
-}
-
-#endif
diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h
index 8046e70dfd7c..43b7657febca 100644
--- a/arch/x86/include/asm/intel_telemetry.h
+++ b/arch/x86/include/asm/intel_telemetry.h
@@ -10,7 +10,7 @@
#define TELEM_MAX_EVENTS_SRAM 28
#define TELEM_MAX_OS_ALLOCATED_EVENTS 20
-#include <asm/intel_scu_ipc.h>
+#include <linux/platform_data/x86/intel_scu_ipc.h>
enum telemetry_unit {
TELEM_PSS = 0,
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 3814a9263d64..ed580c7f9d0a 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -42,6 +42,7 @@
#include <asm/early_ioremap.h>
#include <asm/pgtable_types.h>
#include <asm/shared/io.h>
+#include <asm/special_insns.h>
#define build_mmio_read(name, size, type, reg, barrier) \
static inline type name(const volatile void __iomem *addr) \
@@ -151,11 +152,6 @@ static inline void *phys_to_virt(phys_addr_t address)
#define phys_to_virt phys_to_virt
/*
- * Change "struct page" to physical address.
- */
-#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
-
-/*
* ISA I/O bus memory addresses are 1:1 with the physical address.
* However, we truncate the address to unsigned int to avoid undesirable
* promotions in legacy drivers.
@@ -209,6 +205,23 @@ void memset_io(volatile void __iomem *, int, size_t);
#define memcpy_toio memcpy_toio
#define memset_io memset_io
+#ifdef CONFIG_X86_64
+/*
+ * Commit 0f07496144c2 ("[PATCH] Add faster __iowrite32_copy routine for
+ * x86_64") says that circa 2006 rep movsl is noticeably faster than a copy
+ * loop.
+ */
+static inline void __iowrite32_copy(void __iomem *to, const void *from,
+ size_t count)
+{
+ asm volatile("rep ; movsl"
+ : "=&c"(count), "=&D"(to), "=&S"(from)
+ : "0"(count), "1"(to), "2"(from)
+ : "memory");
+}
+#define __iowrite32_copy __iowrite32_copy
+#endif
+
/*
* ISA space is 'always mapped' on a typical x86 system, no need to
* explicitly ioremap() it. The fact that the ISA IO space is mapped
@@ -379,7 +392,7 @@ static inline void iosubmit_cmds512(void __iomem *dst, const void *src,
const u8 *end = from + count * 64;
while (from < end) {
- movdir64b(dst, from);
+ movdir64b_io(dst, from);
from += 64;
}
}
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 51c782600e02..0d806513c4b3 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -140,7 +140,6 @@ extern void mask_ioapic_entries(void);
extern int restore_ioapic_entries(void);
extern void setup_ioapic_ids_from_mpc(void);
-extern void setup_ioapic_ids_from_mpc_nocheck(void);
extern int mp_find_ioapic(u32 gsi);
extern int mp_find_ioapic_pin(int ioapic, u32 gsi);
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 2fd52b65deac..3be2451e7bc8 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -10,6 +10,7 @@ extern int force_iommu, no_iommu;
extern int iommu_detected;
extern int iommu_merge;
extern int panic_on_overflow;
+extern bool amd_iommu_snp_en;
#ifdef CONFIG_SWIOTLB
extern bool x86_swiotlb_enable;
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 836c170d3087..194dfff84cb1 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -29,7 +29,7 @@ struct irq_desc;
extern void fixup_irqs(void);
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
#endif
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 7a2ed154a5e1..5036f13ab69f 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -50,6 +50,13 @@ static inline struct irq_domain *arch_get_ir_parent_domain(void)
return x86_vector_domain;
}
+extern bool enable_posted_msi;
+
+static inline bool posted_msi_supported(void)
+{
+ return enable_posted_msi && irq_remapping_cap(IRQ_POSTING_CAP);
+}
+
#else /* CONFIG_IRQ_REMAP */
static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; }
diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
index 798183867d78..562a547c29a5 100644
--- a/arch/x86/include/asm/irq_stack.h
+++ b/arch/x86/include/asm/irq_stack.h
@@ -100,8 +100,8 @@
}
#define ASM_CALL_ARG0 \
- "call %P[__func] \n" \
- ASM_REACHABLE
+ "1: call %c[__func] \n" \
+ ANNOTATE_REACHABLE(1b)
#define ASM_CALL_ARG1 \
"movq %[arg1], %%rdi \n" \
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 3a19904c2db6..47051871b436 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -18,8 +18,8 @@
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
* Vectors 32 ... 127 : device interrupts
* Vector 128 : legacy int80 syscall interface
- * Vectors 129 ... LOCAL_TIMER_VECTOR-1
- * Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts
+ * Vectors 129 ... FIRST_SYSTEM_VECTOR-1 : device interrupts
+ * Vectors FIRST_SYSTEM_VECTOR ... 255 : special interrupts
*
* 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
*
@@ -84,11 +84,9 @@
#define HYPERVISOR_CALLBACK_VECTOR 0xf3
/* Vector for KVM to deliver posted interrupt IPI */
-#ifdef CONFIG_HAVE_KVM
#define POSTED_INTR_VECTOR 0xf2
#define POSTED_INTR_WAKEUP_VECTOR 0xf1
#define POSTED_INTR_NESTED_VECTOR 0xf0
-#endif
#define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef
@@ -99,10 +97,16 @@
#define LOCAL_TIMER_VECTOR 0xec
+/*
+ * Posted interrupt notification vector for all device MSIs delivered to
+ * the host kernel.
+ */
+#define POSTED_MSI_NOTIFICATION_VECTOR 0xeb
+
#define NR_VECTORS 256
#ifdef CONFIG_X86_LOCAL_APIC
-#define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR
+#define FIRST_SYSTEM_VECTOR POSTED_MSI_NOTIFICATION_VECTOR
#else
#define FIRST_SYSTEM_VECTOR NR_VECTORS
#endif
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 8c5ae649d2df..cf7fc2b8e3ce 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -54,6 +54,26 @@ static __always_inline void native_halt(void)
asm volatile("hlt": : :"memory");
}
+static __always_inline int native_irqs_disabled_flags(unsigned long flags)
+{
+ return !(flags & X86_EFLAGS_IF);
+}
+
+static __always_inline unsigned long native_local_irq_save(void)
+{
+ unsigned long flags = native_save_fl();
+
+ native_irq_disable();
+
+ return flags;
+}
+
+static __always_inline void native_local_irq_restore(unsigned long flags)
+{
+ if (!native_irqs_disabled_flags(flags))
+ native_irq_enable();
+}
+
#endif
#ifdef CONFIG_PARAVIRT_XXL
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index cbbef32517f0..3f1c1d6c0da1 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -12,35 +12,28 @@
#include <linux/stringify.h>
#include <linux/types.h>
-#define JUMP_TABLE_ENTRY \
+#define JUMP_TABLE_ENTRY(key, label) \
".pushsection __jump_table, \"aw\" \n\t" \
_ASM_ALIGN "\n\t" \
".long 1b - . \n\t" \
- ".long %l[l_yes] - . \n\t" \
- _ASM_PTR "%c0 + %c1 - .\n\t" \
+ ".long " label " - . \n\t" \
+ _ASM_PTR " " key " - . \n\t" \
".popsection \n\t"
+/* This macro is also expanded on the Rust side. */
#ifdef CONFIG_HAVE_JUMP_LABEL_HACK
-
-static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
-{
- asm goto("1:"
- "jmp %l[l_yes] # objtool NOPs this \n\t"
- JUMP_TABLE_ENTRY
- : : "i" (key), "i" (2 | branch) : : l_yes);
-
- return false;
-l_yes:
- return true;
-}
-
+#define ARCH_STATIC_BRANCH_ASM(key, label) \
+ "1: jmp " label " # objtool NOPs this \n\t" \
+ JUMP_TABLE_ENTRY(key " + 2", label)
#else /* !CONFIG_HAVE_JUMP_LABEL_HACK */
+#define ARCH_STATIC_BRANCH_ASM(key, label) \
+ "1: .byte " __stringify(BYTES_NOP5) "\n\t" \
+ JUMP_TABLE_ENTRY(key, label)
+#endif /* CONFIG_HAVE_JUMP_LABEL_HACK */
static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
{
- asm goto("1:"
- ".byte " __stringify(BYTES_NOP5) "\n\t"
- JUMP_TABLE_ENTRY
+ asm goto(ARCH_STATIC_BRANCH_ASM("%c0 + %c1", "%l[l_yes]")
: : "i" (key), "i" (branch) : : l_yes);
return false;
@@ -48,13 +41,11 @@ l_yes:
return true;
}
-#endif /* CONFIG_HAVE_JUMP_LABEL_HACK */
-
static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
{
asm goto("1:"
"jmp %l[l_yes]\n\t"
- JUMP_TABLE_ENTRY
+ JUMP_TABLE_ENTRY("%c0 + %c1", "%l[l_yes]")
: : "i" (key), "i" (branch) : : l_yes);
return false;
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index c9f6a6c5de3c..8ad187462b68 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -8,14 +8,9 @@
# define PA_PGD 2
# define PA_SWAP_PAGE 3
# define PAGES_NR 4
-#else
-# define PA_CONTROL_PAGE 0
-# define VA_CONTROL_PAGE 1
-# define PA_TABLE_PAGE 2
-# define PA_SWAP_PAGE 3
-# define PAGES_NR 4
#endif
+# define KEXEC_CONTROL_PAGE_SIZE 4096
# define KEXEC_CONTROL_CODE_MAX_SIZE 2048
#ifndef __ASSEMBLY__
@@ -25,7 +20,6 @@
#include <asm/page.h>
#include <asm/ptrace.h>
-#include <asm/bootparam.h>
struct kimage;
@@ -44,7 +38,6 @@ struct kimage;
/* Maximum address we can use for the control code buffer */
# define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
-# define KEXEC_CONTROL_PAGE_SIZE 4096
/* The native architecture */
# define KEXEC_ARCH KEXEC_ARCH_386
@@ -59,11 +52,12 @@ struct kimage;
/* Maximum address we can use for the control pages */
# define KEXEC_CONTROL_MEMORY_LIMIT (MAXMEM-1)
-/* Allocate one page for the pdp and the second for the code */
-# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL)
-
/* The native architecture */
# define KEXEC_ARCH KEXEC_ARCH_X86_64
+
+extern unsigned long kexec_va_control_page;
+extern unsigned long kexec_pa_table_page;
+extern unsigned long kexec_pa_swap_page;
#endif
/*
@@ -117,21 +111,21 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
}
#ifdef CONFIG_X86_32
-asmlinkage unsigned long
-relocate_kernel(unsigned long indirection_page,
- unsigned long control_page,
- unsigned long start_address,
- unsigned int has_pae,
- unsigned int preserve_context);
+typedef asmlinkage unsigned long
+relocate_kernel_fn(unsigned long indirection_page,
+ unsigned long control_page,
+ unsigned long start_address,
+ unsigned int has_pae,
+ unsigned int preserve_context);
#else
-unsigned long
-relocate_kernel(unsigned long indirection_page,
- unsigned long page_list,
- unsigned long start_address,
- unsigned int preserve_context,
- unsigned int host_mem_enc_active);
+typedef unsigned long
+relocate_kernel_fn(unsigned long indirection_page,
+ unsigned long pa_control_page,
+ unsigned long start_address,
+ unsigned int preserve_context,
+ unsigned int host_mem_enc_active);
#endif
-
+extern relocate_kernel_fn relocate_kernel;
#define ARCH_HAS_KIMAGE_ARCH
#ifdef CONFIG_X86_32
@@ -146,6 +140,19 @@ struct kimage_arch {
};
#else
struct kimage_arch {
+ /*
+ * This is a kimage control page, as it must not overlap with either
+ * source or destination address ranges.
+ */
+ pgd_t *pgd;
+ /*
+ * The virtual mapping of the control code page itself is used only
+ * during the transition, while the current kernel's pages are all
+ * in place. Thus the intermediate page table pages used to map it
+ * are not control pages, but instead just normal pages obtained
+ * with get_zeroed_page(). And have to be tracked (below) so that
+ * they can be freed.
+ */
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
@@ -208,18 +215,11 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image);
extern void kdump_nmi_shootdown_cpus(void);
#ifdef CONFIG_CRASH_HOTPLUG
-void arch_crash_handle_hotplug_event(struct kimage *image);
+void arch_crash_handle_hotplug_event(struct kimage *image, void *arg);
#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
-#ifdef CONFIG_HOTPLUG_CPU
-int arch_crash_hotplug_cpu_support(void);
-#define crash_hotplug_cpu_support arch_crash_hotplug_cpu_support
-#endif
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_crash_hotplug_memory_support(void);
-#define crash_hotplug_memory_support arch_crash_hotplug_memory_support
-#endif
+int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags);
+#define arch_crash_hotplug_support arch_crash_hotplug_support
unsigned int arch_crash_get_elfcorehdr_size(void);
#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 378ed944b849..823c0434bbad 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -9,14 +9,13 @@ BUILD_BUG_ON(1)
* "static_call_update()" calls.
*
* KVM_X86_OP_OPTIONAL() can be used for those functions that can have
- * a NULL definition, for example if "static_call_cond()" will be used
- * at the call sites. KVM_X86_OP_OPTIONAL_RET0() can be used likewise
+ * a NULL definition. KVM_X86_OP_OPTIONAL_RET0() can be used likewise
* to make a definition optional, but in this case the default will
* be __static_call_return0.
*/
KVM_X86_OP(check_processor_compatibility)
-KVM_X86_OP(hardware_enable)
-KVM_X86_OP(hardware_disable)
+KVM_X86_OP(enable_virtualization_cpu)
+KVM_X86_OP(disable_virtualization_cpu)
KVM_X86_OP(hardware_unsetup)
KVM_X86_OP(has_emulated_msr)
KVM_X86_OP(vcpu_after_set_cpuid)
@@ -35,6 +34,7 @@ KVM_X86_OP(set_msr)
KVM_X86_OP(get_segment_base)
KVM_X86_OP(get_segment)
KVM_X86_OP(get_cpl)
+KVM_X86_OP(get_cpl_no_cache)
KVM_X86_OP(set_segment)
KVM_X86_OP(get_cs_db_l_bits)
KVM_X86_OP(is_valid_cr0)
@@ -48,6 +48,7 @@ KVM_X86_OP(set_idt)
KVM_X86_OP(get_gdt)
KVM_X86_OP(set_gdt)
KVM_X86_OP(sync_dirty_debug_regs)
+KVM_X86_OP(set_dr6)
KVM_X86_OP(set_dr7)
KVM_X86_OP(cache_reg)
KVM_X86_OP(get_rflags)
@@ -83,9 +84,7 @@ KVM_X86_OP(enable_nmi_window)
KVM_X86_OP(enable_irq_window)
KVM_X86_OP_OPTIONAL(update_cr8_intercept)
KVM_X86_OP(refresh_apicv_exec_ctrl)
-KVM_X86_OP_OPTIONAL(hwapic_irr_update)
KVM_X86_OP_OPTIONAL(hwapic_isr_update)
-KVM_X86_OP_OPTIONAL_RET0(guest_apic_has_interrupt)
KVM_X86_OP_OPTIONAL(load_eoi_exitmap)
KVM_X86_OP_OPTIONAL(set_virtual_apic_mode)
KVM_X86_OP_OPTIONAL(set_apic_access_page_addr)
@@ -95,16 +94,19 @@ KVM_X86_OP_OPTIONAL_RET0(set_tss_addr)
KVM_X86_OP_OPTIONAL_RET0(set_identity_map_addr)
KVM_X86_OP_OPTIONAL_RET0(get_mt_mask)
KVM_X86_OP(load_mmu_pgd)
+KVM_X86_OP_OPTIONAL(link_external_spt)
+KVM_X86_OP_OPTIONAL(set_external_spte)
+KVM_X86_OP_OPTIONAL(free_external_spt)
+KVM_X86_OP_OPTIONAL(remove_external_spte)
KVM_X86_OP(has_wbinvd_exit)
KVM_X86_OP(get_l2_tsc_offset)
KVM_X86_OP(get_l2_tsc_multiplier)
KVM_X86_OP(write_tsc_offset)
KVM_X86_OP(write_tsc_multiplier)
KVM_X86_OP(get_exit_info)
+KVM_X86_OP(get_entry_info)
KVM_X86_OP(check_intercept)
KVM_X86_OP(handle_exit_irqoff)
-KVM_X86_OP(request_immediate_exit)
-KVM_X86_OP(sched_in)
KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)
KVM_X86_OP_OPTIONAL(vcpu_blocking)
KVM_X86_OP_OPTIONAL(vcpu_unblocking)
@@ -122,13 +124,14 @@ KVM_X86_OP(enter_smm)
KVM_X86_OP(leave_smm)
KVM_X86_OP(enable_smi_window)
#endif
+KVM_X86_OP_OPTIONAL(dev_get_attr)
KVM_X86_OP_OPTIONAL(mem_enc_ioctl)
KVM_X86_OP_OPTIONAL(mem_enc_register_region)
KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from)
KVM_X86_OP_OPTIONAL(vm_move_enc_context_from)
KVM_X86_OP_OPTIONAL(guest_memory_reclaimed)
-KVM_X86_OP(get_msr_feature)
+KVM_X86_OP(get_feature_msr)
KVM_X86_OP(check_emulate_instruction)
KVM_X86_OP(apic_init_signal_blocked)
KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush)
@@ -138,6 +141,10 @@ KVM_X86_OP(complete_emulated_msr)
KVM_X86_OP(vcpu_deliver_sipi_vector)
KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
KVM_X86_OP_OPTIONAL(get_untagged_addr)
+KVM_X86_OP_OPTIONAL(alloc_apic_backing_page)
+KVM_X86_OP_OPTIONAL_RET0(gmem_prepare)
+KVM_X86_OP_OPTIONAL_RET0(private_max_mapping_level)
+KVM_X86_OP_OPTIONAL(gmem_invalidate)
#undef KVM_X86_OP
#undef KVM_X86_OP_OPTIONAL
diff --git a/arch/x86/include/asm/kvm-x86-pmu-ops.h b/arch/x86/include/asm/kvm-x86-pmu-ops.h
index 058bc636356a..9159bf1a4730 100644
--- a/arch/x86/include/asm/kvm-x86-pmu-ops.h
+++ b/arch/x86/include/asm/kvm-x86-pmu-ops.h
@@ -9,14 +9,11 @@ BUILD_BUG_ON(1)
* "static_call_update()" calls.
*
* KVM_X86_PMU_OP_OPTIONAL() can be used for those functions that can have
- * a NULL definition, for example if "static_call_cond()" will be used
- * at the call sites.
+ * a NULL definition.
*/
-KVM_X86_PMU_OP(hw_event_available)
-KVM_X86_PMU_OP(pmc_idx_to_pmc)
KVM_X86_PMU_OP(rdpmc_ecx_to_pmc)
KVM_X86_PMU_OP(msr_idx_to_pmc)
-KVM_X86_PMU_OP(is_valid_rdpmc_ecx)
+KVM_X86_PMU_OP_OPTIONAL(check_rdpmc_early)
KVM_X86_PMU_OP(is_valid_msr)
KVM_X86_PMU_OP(get_msr)
KVM_X86_PMU_OP(set_msr)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d271ba20a0b2..32ae3aa50c7e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -24,8 +24,9 @@
#include <linux/pvclock_gtod.h>
#include <linux/clocksource.h>
#include <linux/irqbypass.h>
-#include <linux/hyperv.h>
#include <linux/kfifo.h>
+#include <linux/sched/vhost_task.h>
+#include <linux/call_once.h>
#include <asm/apic.h>
#include <asm/pvclock-abi.h>
@@ -35,7 +36,8 @@
#include <asm/asm.h>
#include <asm/kvm_page_track.h>
#include <asm/kvm_vcpu_regs.h>
-#include <asm/hyperv-tlfs.h>
+#include <asm/reboot.h>
+#include <hyperv/hvhdk.h>
#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
@@ -121,6 +123,7 @@
KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_HV_TLB_FLUSH \
KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_UPDATE_PROTECTED_GUEST_STATE KVM_ARCH_REQ(34)
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -159,7 +162,6 @@
#define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25
#define KVM_MAX_CPUID_ENTRIES 256
-#define KVM_NR_FIXED_MTRR_REGION 88
#define KVM_NR_VAR_MTRR 8
#define ASYNC_PF_PER_VCPU 64
@@ -211,6 +213,7 @@ enum exit_fastpath_completion {
EXIT_FASTPATH_NONE,
EXIT_FASTPATH_REENTER_GUEST,
EXIT_FASTPATH_EXIT_HANDLED,
+ EXIT_FASTPATH_EXIT_USERSPACE,
};
typedef enum exit_fastpath_completion fastpath_t;
@@ -254,32 +257,31 @@ enum x86_intercept_stage;
KVM_GUESTDBG_INJECT_DB | \
KVM_GUESTDBG_BLOCKIRQ)
+#define PFERR_PRESENT_MASK BIT(0)
+#define PFERR_WRITE_MASK BIT(1)
+#define PFERR_USER_MASK BIT(2)
+#define PFERR_RSVD_MASK BIT(3)
+#define PFERR_FETCH_MASK BIT(4)
+#define PFERR_PK_MASK BIT(5)
+#define PFERR_SGX_MASK BIT(15)
+#define PFERR_GUEST_RMP_MASK BIT_ULL(31)
+#define PFERR_GUEST_FINAL_MASK BIT_ULL(32)
+#define PFERR_GUEST_PAGE_MASK BIT_ULL(33)
+#define PFERR_GUEST_ENC_MASK BIT_ULL(34)
+#define PFERR_GUEST_SIZEM_MASK BIT_ULL(35)
+#define PFERR_GUEST_VMPL_MASK BIT_ULL(36)
-#define PFERR_PRESENT_BIT 0
-#define PFERR_WRITE_BIT 1
-#define PFERR_USER_BIT 2
-#define PFERR_RSVD_BIT 3
-#define PFERR_FETCH_BIT 4
-#define PFERR_PK_BIT 5
-#define PFERR_SGX_BIT 15
-#define PFERR_GUEST_FINAL_BIT 32
-#define PFERR_GUEST_PAGE_BIT 33
-#define PFERR_IMPLICIT_ACCESS_BIT 48
-
-#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT)
-#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT)
-#define PFERR_USER_MASK BIT(PFERR_USER_BIT)
-#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT)
-#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT)
-#define PFERR_PK_MASK BIT(PFERR_PK_BIT)
-#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT)
-#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT)
-#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT)
-#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
-
-#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \
- PFERR_WRITE_MASK | \
- PFERR_PRESENT_MASK)
+/*
+ * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP checks
+ * when emulating instructions that triggers implicit access.
+ */
+#define PFERR_IMPLICIT_ACCESS BIT_ULL(48)
+/*
+ * PRIVATE_ACCESS is a KVM-defined flag us to indicate that a fault occurred
+ * when the guest was accessing private memory.
+ */
+#define PFERR_PRIVATE_ACCESS BIT_ULL(49)
+#define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS)
/* apic attention bits */
#define KVM_APIC_CHECK_VAPIC 0
@@ -311,10 +313,11 @@ struct kvm_kernel_irq_routing_entry;
* the number of unique SPs that can theoretically be created is 2^n, where n
* is the number of bits that are used to compute the role.
*
- * But, even though there are 19 bits in the mask below, not all combinations
+ * But, even though there are 20 bits in the mask below, not all combinations
* of modes and flags are possible:
*
- * - invalid shadow pages are not accounted, so the bits are effectively 18
+ * - invalid shadow pages are not accounted, mirror pages are not shadowed,
+ * so the bits are effectively 18.
*
* - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging);
* execonly and ad_disabled are only used for nested EPT which has
@@ -347,7 +350,8 @@ union kvm_mmu_page_role {
unsigned ad_disabled:1;
unsigned guest_mode:1;
unsigned passthrough:1;
- unsigned :5;
+ unsigned is_mirror:1;
+ unsigned :4;
/*
* This is left at the top of the word so that
@@ -455,6 +459,7 @@ struct kvm_mmu {
int (*sync_spte)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp, int i);
struct kvm_mmu_root_info root;
+ hpa_t mirror_root_hpa;
union kvm_cpu_role cpu_role;
union kvm_mmu_page_role root_role;
@@ -530,28 +535,33 @@ struct kvm_pmc {
};
/* More counters may conflict with other existing Architectural MSRs */
-#define KVM_INTEL_PMC_MAX_GENERIC 8
-#define MSR_ARCH_PERFMON_PERFCTR_MAX (MSR_ARCH_PERFMON_PERFCTR0 + KVM_INTEL_PMC_MAX_GENERIC - 1)
-#define MSR_ARCH_PERFMON_EVENTSEL_MAX (MSR_ARCH_PERFMON_EVENTSEL0 + KVM_INTEL_PMC_MAX_GENERIC - 1)
-#define KVM_PMC_MAX_FIXED 3
-#define MSR_ARCH_PERFMON_FIXED_CTR_MAX (MSR_ARCH_PERFMON_FIXED_CTR0 + KVM_PMC_MAX_FIXED - 1)
-#define KVM_AMD_PMC_MAX_GENERIC 6
+#define KVM_MAX(a, b) ((a) >= (b) ? (a) : (b))
+#define KVM_MAX_NR_INTEL_GP_COUNTERS 8
+#define KVM_MAX_NR_AMD_GP_COUNTERS 6
+#define KVM_MAX_NR_GP_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_GP_COUNTERS, \
+ KVM_MAX_NR_AMD_GP_COUNTERS)
+
+#define KVM_MAX_NR_INTEL_FIXED_COUTNERS 3
+#define KVM_MAX_NR_AMD_FIXED_COUTNERS 0
+#define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUTNERS, \
+ KVM_MAX_NR_AMD_FIXED_COUTNERS)
+
struct kvm_pmu {
u8 version;
unsigned nr_arch_gp_counters;
unsigned nr_arch_fixed_counters;
unsigned available_event_types;
u64 fixed_ctr_ctrl;
- u64 fixed_ctr_ctrl_mask;
+ u64 fixed_ctr_ctrl_rsvd;
u64 global_ctrl;
u64 global_status;
u64 counter_bitmask[2];
- u64 global_ctrl_mask;
- u64 global_status_mask;
+ u64 global_ctrl_rsvd;
+ u64 global_status_rsvd;
u64 reserved_bits;
u64 raw_event_mask;
- struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
- struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
+ struct kvm_pmc gp_counters[KVM_MAX_NR_GP_COUNTERS];
+ struct kvm_pmc fixed_counters[KVM_MAX_NR_FIXED_COUNTERS];
/*
* Overlay the bitmap with a 64-bit atomic so that all bits can be
@@ -567,9 +577,9 @@ struct kvm_pmu {
u64 ds_area;
u64 pebs_enable;
- u64 pebs_enable_mask;
+ u64 pebs_enable_rsvd;
u64 pebs_data_cfg;
- u64 pebs_data_cfg_mask;
+ u64 pebs_data_cfg_rsvd;
/*
* If a guest counter is cross-mapped to host counter with different
@@ -600,18 +610,12 @@ enum {
KVM_DEBUGREG_WONT_EXIT = 2,
};
-struct kvm_mtrr_range {
- u64 base;
- u64 mask;
- struct list_head node;
-};
-
struct kvm_mtrr {
- struct kvm_mtrr_range var_ranges[KVM_NR_VAR_MTRR];
- mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION];
+ u64 var[KVM_NR_VAR_MTRR * 2];
+ u64 fixed_64k;
+ u64 fixed_16k[2];
+ u64 fixed_4k[8];
u64 deftype;
-
- struct list_head head;
};
/* Hyper-V SynIC timer */
@@ -738,6 +742,23 @@ struct kvm_queued_exception {
bool has_payload;
};
+/*
+ * Hardware-defined CPUID leafs that are either scattered by the kernel or are
+ * unknown to the kernel, but need to be directly used by KVM. Note, these
+ * word values conflict with the kernel's "bug" caps, but KVM doesn't use those.
+ */
+enum kvm_only_cpuid_leafs {
+ CPUID_12_EAX = NCAPINTS,
+ CPUID_7_1_EDX,
+ CPUID_8000_0007_EDX,
+ CPUID_8000_0022_EAX,
+ CPUID_7_2_EDX,
+ CPUID_24_0_EBX,
+ NR_KVM_CPU_CAPS,
+
+ NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
+};
+
struct kvm_vcpu_arch {
/*
* rip and regs accesses must go through
@@ -759,6 +780,7 @@ struct kvm_vcpu_arch {
u32 pkru;
u32 hflags;
u64 efer;
+ u64 host_debugctl;
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
bool load_eoi_exitmap_pending;
@@ -812,6 +834,11 @@ struct kvm_vcpu_arch {
struct kvm_mmu_memory_cache mmu_shadow_page_cache;
struct kvm_mmu_memory_cache mmu_shadowed_info_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
+ /*
+ * This cache is to allocate external page table. E.g. private EPT used
+ * by the TDX module.
+ */
+ struct kvm_mmu_memory_cache mmu_external_spt_cache;
/*
* QEMU userspace and the guest each have their own FPU state.
@@ -853,26 +880,23 @@ struct kvm_vcpu_arch {
int cpuid_nent;
struct kvm_cpuid_entry2 *cpuid_entries;
- struct kvm_hypervisor_cpuid kvm_cpuid;
-
- /*
- * FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly
- * when "struct kvm_vcpu_arch" is no longer defined in an
- * arch/x86/include/asm header. The max is mostly arbitrary, i.e.
- * can be increased as necessary.
- */
-#define KVM_MAX_NR_GOVERNED_FEATURES BITS_PER_LONG
+ bool is_amd_compatible;
/*
- * Track whether or not the guest is allowed to use features that are
- * governed by KVM, where "governed" means KVM needs to manage state
- * and/or explicitly enable the feature in hardware. Typically, but
- * not always, governed features can be used by the guest if and only
- * if both KVM and userspace want to expose the feature to the guest.
+ * cpu_caps holds the effective guest capabilities, i.e. the features
+ * the vCPU is allowed to use. Typically, but not always, features can
+ * be used by the guest if and only if both KVM and userspace want to
+ * expose the feature to the guest.
+ *
+ * A common exception is for virtualization holes, i.e. when KVM can't
+ * prevent the guest from using a feature, in which case the vCPU "has"
+ * the feature regardless of what KVM or userspace desires.
+ *
+ * Note, features that don't require KVM involvement in any way are
+ * NOT enforced/sanitized by KVM, i.e. are taken verbatim from the
+ * guest CPUID provided by userspace.
*/
- struct {
- DECLARE_BITMAP(enabled, KVM_MAX_NR_GOVERNED_FEATURES);
- } governed_features;
+ u32 cpu_caps[NR_KVM_CPU_CAPS];
u64 reserved_gpa_bits;
int maxphyaddr;
@@ -992,9 +1016,6 @@ struct kvm_vcpu_arch {
u64 msr_kvm_poll_control;
- /* set at EPT violation at this point */
- unsigned long exit_qualification;
-
/* pv related host specific info */
struct {
bool pv_unhalted;
@@ -1205,7 +1226,7 @@ enum kvm_apicv_inhibit {
* APIC acceleration is disabled by a module parameter
* and/or not supported in hardware.
*/
- APICV_INHIBIT_REASON_DISABLE,
+ APICV_INHIBIT_REASON_DISABLED,
/*
* APIC acceleration is inhibited because AutoEOI feature is
@@ -1275,18 +1296,39 @@ enum kvm_apicv_inhibit {
* mapping between logical ID and vCPU.
*/
APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED,
+
+ NR_APICV_INHIBIT_REASONS,
};
+#define __APICV_INHIBIT_REASON(reason) \
+ { BIT(APICV_INHIBIT_REASON_##reason), #reason }
+
+#define APICV_INHIBIT_REASONS \
+ __APICV_INHIBIT_REASON(DISABLED), \
+ __APICV_INHIBIT_REASON(HYPERV), \
+ __APICV_INHIBIT_REASON(ABSENT), \
+ __APICV_INHIBIT_REASON(BLOCKIRQ), \
+ __APICV_INHIBIT_REASON(PHYSICAL_ID_ALIASED), \
+ __APICV_INHIBIT_REASON(APIC_ID_MODIFIED), \
+ __APICV_INHIBIT_REASON(APIC_BASE_MODIFIED), \
+ __APICV_INHIBIT_REASON(NESTED), \
+ __APICV_INHIBIT_REASON(IRQWIN), \
+ __APICV_INHIBIT_REASON(PIT_REINJ), \
+ __APICV_INHIBIT_REASON(SEV), \
+ __APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED)
+
struct kvm_arch {
- unsigned long vm_type;
unsigned long n_used_mmu_pages;
unsigned long n_requested_mmu_pages;
unsigned long n_max_mmu_pages;
unsigned int indirect_shadow_pages;
u8 mmu_valid_gen;
+ u8 vm_type;
+ bool has_private_mem;
+ bool has_protected_state;
+ bool pre_fault_allowed;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
struct list_head active_mmu_pages;
- struct list_head zapped_obsolete_pages;
/*
* A list of kvm_mmu_page structs that, if zapped, could possibly be
* replaced by an NX huge page. A shadow page is on this list if its
@@ -1310,6 +1352,8 @@ struct kvm_arch {
*/
spinlock_t mmu_unsync_pages_lock;
+ u64 shadow_mmio_value;
+
struct iommu_domain *iommu_domain;
bool iommu_noncoherent;
#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
@@ -1358,6 +1402,7 @@ struct kvm_arch {
u32 default_tsc_khz;
bool user_set_tsc;
+ u64 apic_bus_cycle_ns;
seqcount_raw_spinlock_t pvclock_sc;
bool use_master_clock;
@@ -1420,7 +1465,9 @@ struct kvm_arch {
bool sgx_provisioning_allowed;
struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter;
- struct task_struct *nx_huge_page_recovery_thread;
+ struct vhost_task *nx_huge_page_recovery_thread;
+ u64 nx_huge_page_last;
+ struct once nx_once;
#ifdef CONFIG_X86_64
/* The number of TDP MMU pages across all roots. */
@@ -1468,6 +1515,15 @@ struct kvm_arch {
*/
bool shadow_root_allocated;
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
+ /*
+ * If set, the VM has (or had) an external write tracking user, and
+ * thus all write tracking metadata has been allocated, even if KVM
+ * itself isn't using write tracking.
+ */
+ bool external_write_tracking_enabled;
+#endif
+
#if IS_ENABLED(CONFIG_HYPERV)
hpa_t hv_root_tdp;
spinlock_t hv_root_tdp_lock;
@@ -1503,6 +1559,8 @@ struct kvm_arch {
*/
#define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
struct kvm_mmu_memory_cache split_desc_cache;
+
+ gfn_t gfn_direct_bits;
};
struct kvm_vm_stat {
@@ -1595,8 +1653,10 @@ struct kvm_x86_ops {
int (*check_processor_compatibility)(void);
- int (*hardware_enable)(void);
- void (*hardware_disable)(void);
+ int (*enable_virtualization_cpu)(void);
+ void (*disable_virtualization_cpu)(void);
+ cpu_emergency_virt_cb *emergency_disable_virtualization_cpu;
+
void (*hardware_unsetup)(void);
bool (*has_emulated_msr)(struct kvm *kvm, u32 index);
void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
@@ -1622,6 +1682,7 @@ struct kvm_x86_ops {
void (*get_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
int (*get_cpl)(struct kvm_vcpu *vcpu);
+ int (*get_cpl_no_cache)(struct kvm_vcpu *vcpu);
void (*set_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
@@ -1636,6 +1697,7 @@ struct kvm_x86_ops {
void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
+ void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
@@ -1665,7 +1727,8 @@ struct kvm_x86_ops {
void (*flush_tlb_guest)(struct kvm_vcpu *vcpu);
int (*vcpu_pre_run)(struct kvm_vcpu *vcpu);
- enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu);
+ enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu,
+ bool force_immediate_exit);
int (*handle_exit)(struct kvm_vcpu *vcpu,
enum exit_fastpath_completion exit_fastpath);
int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
@@ -1692,13 +1755,12 @@ struct kvm_x86_ops {
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
- bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason);
+
+ const bool x2apic_icr_is_split;
const unsigned long required_apicv_inhibits;
bool allow_apicv_in_x2apic_without_x2apic_virtualization;
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
- void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
- void (*hwapic_isr_update)(int isr);
- bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu);
+ void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu);
@@ -1712,6 +1774,21 @@ struct kvm_x86_ops {
void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa,
int root_level);
+ /* Update external mapping with page table link. */
+ int (*link_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+ void *external_spt);
+ /* Update the external page table from spte getting set. */
+ int (*set_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+ kvm_pfn_t pfn_for_gfn);
+
+ /* Update external page tables for page table about to be freed. */
+ int (*free_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+ void *external_spt);
+
+ /* Update external page table from spte getting removed, and flush TLB. */
+ int (*remove_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+ kvm_pfn_t pfn_for_gfn);
+
bool (*has_wbinvd_exit)(void);
u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
@@ -1720,12 +1797,15 @@ struct kvm_x86_ops {
void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu);
/*
- * Retrieve somewhat arbitrary exit information. Intended to
+ * Retrieve somewhat arbitrary exit/entry information. Intended to
* be used only from within tracepoints or error paths.
*/
void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason,
u64 *info1, u64 *info2,
- u32 *exit_int_info, u32 *exit_int_info_err_code);
+ u32 *intr_info, u32 *error_code);
+
+ void (*get_entry_info)(struct kvm_vcpu *vcpu,
+ u32 *intr_info, u32 *error_code);
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
@@ -1733,10 +1813,6 @@ struct kvm_x86_ops {
struct x86_exception *exception);
void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
- void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
-
- void (*sched_in)(struct kvm_vcpu *vcpu, int cpu);
-
/*
* Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A zero
* value indicates CPU dirty logging is unsupported or disabled.
@@ -1769,6 +1845,7 @@ struct kvm_x86_ops {
void (*enable_smi_window)(struct kvm_vcpu *vcpu);
#endif
+ int (*dev_get_attr)(u32 group, u64 attr, u64 *val);
int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp);
@@ -1776,7 +1853,7 @@ struct kvm_x86_ops {
int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
void (*guest_memory_reclaimed)(struct kvm *kvm);
- int (*get_msr_feature)(struct kvm_msr_entry *entry);
+ int (*get_feature_msr)(u32 msr, u64 *data);
int (*check_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type,
void *insn, int insn_len);
@@ -1796,6 +1873,10 @@ struct kvm_x86_ops {
unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
+ void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
+ int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order);
+ void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end);
+ int (*private_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn);
};
struct kvm_x86_nested_ops {
@@ -1803,7 +1884,7 @@ struct kvm_x86_nested_ops {
bool (*is_exception_vmexit)(struct kvm_vcpu *vcpu, u8 vector,
u32 error_code);
int (*check_events)(struct kvm_vcpu *vcpu);
- bool (*has_events)(struct kvm_vcpu *vcpu);
+ bool (*has_events)(struct kvm_vcpu *vcpu, bool for_injection);
void (*triple_fault)(struct kvm_vcpu *vcpu);
int (*get_state)(struct kvm_vcpu *vcpu,
struct kvm_nested_state __user *user_kvm_nested_state,
@@ -1833,14 +1914,17 @@ struct kvm_arch_async_pf {
gfn_t gfn;
unsigned long cr3;
bool direct_map;
+ u64 error_code;
};
extern u32 __read_mostly kvm_nr_uret_msrs;
-extern u64 __read_mostly host_efer;
extern bool __read_mostly allow_smaller_maxphyaddr;
extern bool __read_mostly enable_apicv;
extern struct kvm_x86_ops kvm_x86_ops;
+#define kvm_x86_call(func) static_call(kvm_x86_##func)
+#define kvm_pmu_call(func) static_call(kvm_x86_pmu_##func)
+
#define KVM_X86_OP(func) \
DECLARE_STATIC_CALL(kvm_x86_##func, *(((struct kvm_x86_ops *)0)->func));
#define KVM_X86_OP_OPTIONAL KVM_X86_OP
@@ -1864,7 +1948,7 @@ void kvm_arch_free_vm(struct kvm *kvm);
static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
if (kvm_x86_ops.flush_remote_tlbs &&
- !static_call(kvm_x86_flush_remote_tlbs)(kvm))
+ !kvm_x86_call(flush_remote_tlbs)(kvm))
return 0;
else
return -ENOTSUPP;
@@ -1877,12 +1961,20 @@ static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn,
if (!kvm_x86_ops.flush_remote_tlbs_range)
return -EOPNOTSUPP;
- return static_call(kvm_x86_flush_remote_tlbs_range)(kvm, gfn, nr_pages);
+ return kvm_x86_call(flush_remote_tlbs_range)(kvm, gfn, nr_pages);
}
#endif /* CONFIG_HYPERV */
+enum kvm_intr_type {
+ /* Values are arbitrary, but must be non-zero. */
+ KVM_HANDLING_IRQ = 1,
+ KVM_HANDLING_NMI,
+};
+
+/* Enable perf NMI and timer modes to work, and minimise false positives. */
#define kvm_arch_pmi_in_guest(vcpu) \
- ((vcpu) && (vcpu)->arch.handling_intr_from_guest)
+ ((vcpu) && (vcpu)->arch.handling_intr_from_guest && \
+ (!!in_nmi() == ((vcpu)->arch.handling_intr_from_guest == KVM_HANDLING_NMI)))
void __init kvm_mmu_x86_module_init(void);
int kvm_mmu_vendor_module_init(void);
@@ -1908,12 +2000,13 @@ void kvm_mmu_try_split_huge_pages(struct kvm *kvm,
const struct kvm_memory_slot *memslot,
u64 start, u64 end,
int target_level);
-void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
- const struct kvm_memory_slot *memslot);
+void kvm_mmu_recover_huge_pages(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot);
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
const struct kvm_memory_slot *memslot);
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
+void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
@@ -1969,8 +2062,8 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
* VMware backdoor emulation handles select instructions
* and reinjects the #GP for all other cases.
*
- * EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which
- * case the CR2/GPA value pass on the stack is valid.
+ * EMULTYPE_PF - Set when an intercepted #PF triggers the emulation, in which case
+ * the CR2/GPA value pass on the stack is valid.
*
* EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility
* state and inject single-step #DBs after skipping
@@ -2005,6 +2098,11 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
#define EMULTYPE_WRITE_PF_TO_SP (1 << 8)
+static inline bool kvm_can_emulate_event_vectoring(int emul_type)
+{
+ return !(emul_type & EMULTYPE_PF);
+}
+
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
void *insn, int insn_len);
@@ -2012,8 +2110,12 @@ void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu,
u64 *data, u8 ndata);
void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu);
+void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa);
+
void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
+int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data);
int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated);
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
@@ -2047,7 +2149,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
-void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);
+unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr);
unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu);
@@ -2090,7 +2192,15 @@ int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);
void kvm_update_dr7(struct kvm_vcpu *vcpu);
-int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
+bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ bool always_retry);
+
+static inline bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu,
+ gpa_t cr2_or_gpa)
+{
+ return __kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa, false);
+}
+
void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
ulong roots_to_free);
void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu);
@@ -2121,10 +2231,9 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm,
kvm_set_or_clear_apicv_inhibit(kvm, reason, false);
}
-int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
-
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
void *insn, int insn_len);
+void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
u64 addr, unsigned long roots);
@@ -2134,12 +2243,15 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
int tdp_max_root_level, int tdp_huge_page_level);
+
#ifdef CONFIG_KVM_PRIVATE_MEM
-#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.vm_type != KVM_X86_DEFAULT_VM)
+#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem)
#else
#define kvm_arch_has_private_mem(kvm) false
#endif
+#define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state)
+
static inline u16 kvm_read_ldt(void)
{
u16 ldt;
@@ -2200,6 +2312,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_cpu_has_extint(struct kvm_vcpu *v);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
+int kvm_cpu_get_extint(struct kvm_vcpu *v);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
@@ -2240,7 +2353,6 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
-void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
u32 size);
@@ -2262,12 +2374,12 @@ static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
- static_call_cond(kvm_x86_vcpu_blocking)(vcpu);
+ kvm_x86_call(vcpu_blocking)(vcpu);
}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
- static_call_cond(kvm_x86_vcpu_unblocking)(vcpu);
+ kvm_x86_call(vcpu_unblocking)(vcpu);
}
static inline int kvm_cpu_get_apicid(int mps_cpu)
@@ -2292,7 +2404,9 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
KVM_X86_QUIRK_OUT_7E_INC_RIP | \
KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT | \
KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \
- KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS)
+ KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \
+ KVM_X86_QUIRK_SLOT_ZAP_ALL | \
+ KVM_X86_QUIRK_STUFF_FEATURE_MSRS)
/*
* KVM previously used a u32 field in kvm_run to indicate the hypercall was
diff --git a/arch/x86/include/asm/kvmclock.h b/arch/x86/include/asm/kvmclock.h
index 511b35069187..f163176d6f7f 100644
--- a/arch/x86/include/asm/kvmclock.h
+++ b/arch/x86/include/asm/kvmclock.h
@@ -4,8 +4,6 @@
#include <linux/percpu.h>
-extern struct clocksource kvm_clock;
-
DECLARE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
static __always_inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 571fe4d2d232..dc31b13b87a0 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -40,27 +40,27 @@
#ifdef __ASSEMBLY__
-#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
#define RET jmp __x86_return_thunk
-#else /* CONFIG_RETPOLINE */
-#ifdef CONFIG_SLS
+#else /* CONFIG_MITIGATION_RETPOLINE */
+#ifdef CONFIG_MITIGATION_SLS
#define RET ret; int3
#else
#define RET ret
#endif
-#endif /* CONFIG_RETPOLINE */
+#endif /* CONFIG_MITIGATION_RETPOLINE */
#else /* __ASSEMBLY__ */
-#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
#define ASM_RET "jmp __x86_return_thunk\n\t"
-#else /* CONFIG_RETPOLINE */
-#ifdef CONFIG_SLS
+#else /* CONFIG_MITIGATION_RETPOLINE */
+#ifdef CONFIG_MITIGATION_SLS
#define ASM_RET "ret; int3\n\t"
#else
#define ASM_RET "ret\n\t"
#endif
-#endif /* CONFIG_RETPOLINE */
+#endif /* CONFIG_MITIGATION_RETPOLINE */
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 73dba8b94443..59aa966dc212 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -131,8 +131,20 @@ static inline bool local_try_cmpxchg(local_t *l, long *old, long new)
(typeof(l->a.counter) *) old, new);
}
-/* Always has a lock prefix */
-#define local_xchg(l, n) (xchg(&((l)->a.counter), (n)))
+/*
+ * Implement local_xchg using CMPXCHG instruction without the LOCK prefix.
+ * XCHG is expensive due to the implied LOCK prefix. The processor
+ * cannot prefetch cachelines if XCHG is used.
+ */
+static __always_inline long
+local_xchg(local_t *l, long n)
+{
+ long c = local_read(l);
+
+ do { } while (!local_try_cmpxchg(l, &c, n));
+
+ return c;
+}
/**
* local_add_unless - add unless the number is already a given value
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index de3118305838..eb2db07ef39c 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -13,6 +13,7 @@
#define MCG_CTL_P BIT_ULL(8) /* MCG_CTL register available */
#define MCG_EXT_P BIT_ULL(9) /* Extended registers available */
#define MCG_CMCI_P BIT_ULL(10) /* CMCI supported */
+#define MCG_SEAM_NR BIT_ULL(12) /* MCG_STATUS_SEAM_NR supported */
#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
#define MCG_EXT_CNT_SHIFT 16
#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
@@ -25,6 +26,7 @@
#define MCG_STATUS_EIPV BIT_ULL(1) /* ip points to correct instruction */
#define MCG_STATUS_MCIP BIT_ULL(2) /* machine check in progress */
#define MCG_STATUS_LMCES BIT_ULL(3) /* LMCE signaled */
+#define MCG_STATUS_SEAM_NR BIT_ULL(12) /* Machine check inside SEAM non-root mode */
/* MCG_EXT_CTL register defines */
#define MCG_EXT_CTL_LMCE_EN BIT_ULL(0) /* Enable LMCE */
@@ -59,6 +61,7 @@
* - TCC bit is present in MCx_STATUS.
*/
#define MCI_CONFIG_MCAX 0x1
+#define MCI_CONFIG_FRUTEXT BIT_ULL(9)
#define MCI_IPID_MCATYPE 0xFFFF0000
#define MCI_IPID_HWID 0xFFF
@@ -120,6 +123,9 @@
#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008
#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009
#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
+/* Registers MISC2 to MISC4 are at offsets B to D. */
+#define MSR_AMD64_SMCA_MC0_SYND1 0xc000200e
+#define MSR_AMD64_SMCA_MC0_SYND2 0xc000200f
#define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_ADDR(x) (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x))
@@ -130,6 +136,8 @@
#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
+#define MSR_AMD64_SMCA_MCx_SYND1(x) (MSR_AMD64_SMCA_MC0_SYND1 + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_SYND2(x) (MSR_AMD64_SMCA_MC0_SYND2 + 0x10*(x))
#define XEC(x, mask) (((x) >> 16) & mask)
@@ -185,6 +193,32 @@ enum mce_notifier_prios {
MCE_PRIO_HIGHEST = MCE_PRIO_CEC
};
+/**
+ * struct mce_hw_err - Hardware Error Record.
+ * @m: Machine Check record.
+ * @vendor: Vendor-specific error information.
+ *
+ * Vendor-specific fields should not be added to struct mce. Instead, vendors
+ * should export their vendor-specific data through their structure in the
+ * vendor union below.
+ *
+ * AMD's vendor data is parsed by error decoding tools for supplemental error
+ * information. Thus, current offsets of existing fields must be maintained.
+ * Only add new fields at the end of AMD's vendor structure.
+ */
+struct mce_hw_err {
+ struct mce m;
+
+ union vendor_info {
+ struct {
+ u64 synd1; /* MCA_SYND1 MSR */
+ u64 synd2; /* MCA_SYND2 MSR */
+ } amd;
+ } vendor;
+};
+
+#define to_mce_hw_err(mce) container_of(mce, struct mce_hw_err, m)
+
struct notifier_block;
extern void mce_register_decode_chain(struct notifier_block *nb);
extern void mce_unregister_decode_chain(struct notifier_block *nb);
@@ -219,8 +253,8 @@ static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
u64 lapic_id) { return -EINVAL; }
#endif
-void mce_setup(struct mce *m);
-void mce_log(struct mce *m);
+void mce_prep_record(struct mce_hw_err *err);
+void mce_log(struct mce_hw_err *err);
DECLARE_PER_CPU(struct device *, mce_device);
/* Maximum number of MCA banks per CPU. */
@@ -242,7 +276,7 @@ static inline void cmci_rediscover(void) {}
static inline void cmci_recheck(void) {}
#endif
-int mce_available(struct cpuinfo_x86 *c);
+bool mce_available(struct cpuinfo_x86 *c);
bool mce_is_memory_error(struct mce *m);
bool mce_is_correctable(struct mce *m);
bool mce_usable_address(struct mce *m);
@@ -259,9 +293,10 @@ enum mcp_flags {
MCP_DONTLOG = BIT(2), /* only clear, don't log */
MCP_QUEUE_LOG = BIT(3), /* only queue to genpool */
};
-bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
-int mce_notify_irq(void);
+void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
+
+bool mce_notify_irq(void);
DECLARE_PER_CPU(struct mce, injectm);
@@ -351,8 +386,6 @@ static inline bool amd_mce_is_memory_error(struct mce *m) { return false; };
static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
#endif
-static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); }
-
unsigned long copy_mc_fragile_handle_tail(char *to, char *from, unsigned len);
#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 359ada486fa9..f922b682b9b4 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -15,7 +15,8 @@
#include <linux/init.h>
#include <linux/cc_platform.h>
-#include <asm/bootparam.h>
+#include <asm/asm.h>
+struct boot_params;
#ifdef CONFIG_X86_MEM_ENCRYPT
void __init mem_encrypt_init(void);
@@ -46,8 +47,8 @@ void __init sme_unmap_bootdata(char *real_mode_data);
void __init sme_early_init(void);
-void __init sme_encrypt_kernel(struct boot_params *bp);
-void __init sme_enable(struct boot_params *bp);
+void sme_encrypt_kernel(struct boot_params *bp);
+void sme_enable(struct boot_params *bp);
int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
@@ -58,6 +59,11 @@ void __init mem_encrypt_free_decrypted_mem(void);
void __init sev_es_init_vc_handling(void);
+static inline u64 sme_get_me_mask(void)
+{
+ return RIP_REL_REF(sme_me_mask);
+}
+
#define __bss_decrypted __section(".bss..decrypted")
#else /* !CONFIG_AMD_MEM_ENCRYPT */
@@ -75,8 +81,8 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
static inline void __init sme_early_init(void) { }
-static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
-static inline void __init sme_enable(struct boot_params *bp) { }
+static inline void sme_encrypt_kernel(struct boot_params *bp) { }
+static inline void sme_enable(struct boot_params *bp) { }
static inline void sev_es_init_vc_handling(void) { }
@@ -89,6 +95,8 @@ early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool en
static inline void mem_encrypt_free_decrypted_mem(void) { }
+static inline u64 sme_get_me_mask(void) { return 0; }
+
#define __bss_decrypted
#endif /* CONFIG_AMD_MEM_ENCRYPT */
@@ -106,11 +114,6 @@ void add_encrypt_protection_map(void);
extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
-static inline u64 sme_get_me_mask(void)
-{
- return sme_me_mask;
-}
-
#endif /* __ASSEMBLY__ */
#endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 0da5c227f490..3b496cdcb74b 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -37,6 +37,8 @@ typedef struct {
*/
atomic64_t tlb_gen;
+ unsigned long next_trim_cpumask;
+
#ifdef CONFIG_MODIFY_LDT_SYSCALL
struct rw_semaphore ldt_usr_sem;
struct ldt_struct *ldt;
@@ -75,7 +77,7 @@ typedef struct {
.lock = __MUTEX_INITIALIZER(mm.context.lock), \
}
-void leave_mm(int cpu);
+void leave_mm(void);
#define leave_mm leave_mm
#endif /* _ASM_X86_MMU_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 8dac45a2c7fc..795fdd53bd0a 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -88,7 +88,13 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
#ifdef CONFIG_ADDRESS_MASKING
static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
{
- return mm->context.lam_cr3_mask;
+ /*
+ * When switch_mm_irqs_off() is called for a kthread, it may race with
+ * LAM enablement. switch_mm_irqs_off() uses the LAM mask to do two
+ * things: populate CR3 and populate 'cpu_tlbstate.lam'. Make sure it
+ * reads a single value for both.
+ */
+ return READ_ONCE(mm->context.lam_cr3_mask);
}
static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
@@ -145,6 +151,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
atomic64_set(&mm->context.tlb_gen, 0);
+ mm->context.next_trim_cpumask = jiffies + HZ;
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
@@ -232,11 +239,6 @@ static inline bool is_64bit_mm(struct mm_struct *mm)
}
#endif
-static inline void arch_unmap(struct mm_struct *mm, unsigned long start,
- unsigned long end)
-{
-}
-
/*
* We only want to enforce protection keys on the current process
* because we effectively have no access to PKRU for other
diff --git a/arch/x86/include/asm/mmzone.h b/arch/x86/include/asm/mmzone.h
deleted file mode 100644
index c41b41edd691..000000000000
--- a/arch/x86/include/asm/mmzone.h
+++ /dev/null
@@ -1,6 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_X86_32
-# include <asm/mmzone_32.h>
-#else
-# include <asm/mmzone_64.h>
-#endif
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
deleted file mode 100644
index 2d4515e8b7df..000000000000
--- a/arch/x86/include/asm/mmzone_32.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Written by Pat Gaughen (gone@us.ibm.com) Mar 2002
- *
- */
-
-#ifndef _ASM_X86_MMZONE_32_H
-#define _ASM_X86_MMZONE_32_H
-
-#include <asm/smp.h>
-
-#ifdef CONFIG_NUMA
-extern struct pglist_data *node_data[];
-#define NODE_DATA(nid) (node_data[nid])
-#endif /* CONFIG_NUMA */
-
-#endif /* _ASM_X86_MMZONE_32_H */
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
deleted file mode 100644
index 0c585046f744..000000000000
--- a/arch/x86/include/asm/mmzone_64.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* K8 NUMA support */
-/* Copyright 2002,2003 by Andi Kleen, SuSE Labs */
-/* 2.5 Version loosely based on the NUMAQ Code by Pat Gaughen. */
-#ifndef _ASM_X86_MMZONE_64_H
-#define _ASM_X86_MMZONE_64_H
-
-#ifdef CONFIG_NUMA
-
-#include <linux/mmdebug.h>
-#include <asm/smp.h>
-
-extern struct pglist_data *node_data[];
-
-#define NODE_DATA(nid) (node_data[nid])
-
-#endif
-#endif /* _ASM_X86_MMZONE_64_H */
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 4b0f98a8d338..d593e52e6635 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -2,6 +2,7 @@
#ifndef _ASM_X86_MPSPEC_H
#define _ASM_X86_MPSPEC_H
+#include <linux/types.h>
#include <asm/mpspec_def.h>
#include <asm/x86_init.h>
@@ -15,10 +16,10 @@ extern int pic_mode;
* Summit or generic (i.e. installer) kernels need lots of bus entries.
* Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets.
*/
-#if CONFIG_BASE_SMALL == 0
-# define MAX_MP_BUSSES 260
-#else
+#ifdef CONFIG_BASE_SMALL
# define MAX_MP_BUSSES 32
+#else
+# define MAX_MP_BUSSES 260
#endif
#define MAX_IRQ_SOURCES 256
@@ -46,70 +47,31 @@ extern int smp_found_config;
# define smp_found_config 0
#endif
-static inline void get_smp_config(void)
-{
- x86_init.mpparse.get_smp_config(0);
-}
-
-static inline void early_get_smp_config(void)
-{
- x86_init.mpparse.get_smp_config(1);
-}
-
-static inline void find_smp_config(void)
-{
- x86_init.mpparse.find_smp_config();
-}
-
#ifdef CONFIG_X86_MPPARSE
extern void e820__memblock_alloc_reserved_mpc_new(void);
extern int enable_update_mptable;
-extern void default_find_smp_config(void);
-extern void default_get_smp_config(unsigned int early);
+extern void mpparse_find_mptable(void);
+extern void mpparse_parse_early_smp_config(void);
+extern void mpparse_parse_smp_config(void);
#else
static inline void e820__memblock_alloc_reserved_mpc_new(void) { }
-#define enable_update_mptable 0
-#define default_find_smp_config x86_init_noop
-#define default_get_smp_config x86_init_uint_noop
+#define enable_update_mptable 0
+#define mpparse_find_mptable x86_init_noop
+#define mpparse_parse_early_smp_config x86_init_noop
+#define mpparse_parse_smp_config x86_init_noop
#endif
-int generic_processor_info(int apicid);
-
-#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC)
-
-struct physid_mask {
- unsigned long mask[PHYSID_ARRAY_SIZE];
-};
-
-typedef struct physid_mask physid_mask_t;
-
-#define physid_set(physid, map) set_bit(physid, (map).mask)
-#define physid_isset(physid, map) test_bit(physid, (map).mask)
+extern DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC);
-#define physids_or(dst, src1, src2) \
- bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC)
-
-#define physids_clear(map) \
- bitmap_zero((map).mask, MAX_LOCAL_APIC)
-
-#define physids_empty(map) \
- bitmap_empty((map).mask, MAX_LOCAL_APIC)
-
-static inline void physids_promote(unsigned long physids, physid_mask_t *map)
+static inline void reset_phys_cpu_present_map(u32 apicid)
{
- physids_clear(*map);
- map->mask[0] = physids;
+ bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC);
+ set_bit(apicid, phys_cpu_present_map);
}
-static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map)
+static inline void copy_phys_cpu_present_map(unsigned long *dst)
{
- physids_clear(*map);
- physid_set(physid, *map);
+ bitmap_copy(dst, phys_cpu_present_map, MAX_LOCAL_APIC);
}
-#define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} }
-#define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} }
-
-extern physid_mask_t phys_cpu_present_map;
-
#endif /* _ASM_X86_MPSPEC_H */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index ce4ce8720d55..f91ab1e75f9f 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -6,10 +6,9 @@
#include <linux/nmi.h>
#include <linux/msi.h>
#include <linux/io.h>
-#include <asm/hyperv-tlfs.h>
#include <asm/nospec-branch.h>
#include <asm/paravirt.h>
-#include <asm/mshyperv.h>
+#include <hyperv/hvhdk.h>
/*
* Hyper-V always provides a single IO-APIC at this MMIO address.
@@ -40,7 +39,6 @@ static inline unsigned char hv_get_nmi_reason(void)
}
#if IS_ENABLED(CONFIG_HYPERV)
-extern int hyperv_init_cpuhp;
extern bool hyperv_paravisor_present;
extern void *hv_hypercall_pg;
@@ -293,24 +291,24 @@ static inline void hv_ivm_msr_write(u64 msr, u64 value) {}
static inline void hv_ivm_msr_read(u64 msr, u64 *value) {}
#endif
-static inline bool hv_is_synic_reg(unsigned int reg)
+static inline bool hv_is_synic_msr(unsigned int reg)
{
- return (reg >= HV_REGISTER_SCONTROL) &&
- (reg <= HV_REGISTER_SINT15);
+ return (reg >= HV_X64_MSR_SCONTROL) &&
+ (reg <= HV_X64_MSR_SINT15);
}
-static inline bool hv_is_sint_reg(unsigned int reg)
+static inline bool hv_is_sint_msr(unsigned int reg)
{
- return (reg >= HV_REGISTER_SINT0) &&
- (reg <= HV_REGISTER_SINT15);
+ return (reg >= HV_X64_MSR_SINT0) &&
+ (reg <= HV_X64_MSR_SINT15);
}
-u64 hv_get_register(unsigned int reg);
-void hv_set_register(unsigned int reg, u64 value);
-u64 hv_get_non_nested_register(unsigned int reg);
-void hv_set_non_nested_register(unsigned int reg, u64 value);
+u64 hv_get_msr(unsigned int reg);
+void hv_set_msr(unsigned int reg, u64 value);
+u64 hv_get_non_nested_msr(unsigned int reg);
+void hv_set_non_nested_msr(unsigned int reg, u64 value);
-static __always_inline u64 hv_raw_get_register(unsigned int reg)
+static __always_inline u64 hv_raw_get_msr(unsigned int reg)
{
return __rdmsr(reg);
}
@@ -331,10 +329,10 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
{
return -1;
}
-static inline void hv_set_register(unsigned int reg, u64 value) { }
-static inline u64 hv_get_register(unsigned int reg) { return 0; }
-static inline void hv_set_non_nested_register(unsigned int reg, u64 value) { }
-static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; }
+static inline void hv_set_msr(unsigned int reg, u64 value) { }
+static inline u64 hv_get_msr(unsigned int reg) { return 0; }
+static inline void hv_set_non_nested_msr(unsigned int reg, u64 value) { }
+static inline u64 hv_get_non_nested_msr(unsigned int reg) { return 0; }
#endif /* CONFIG_HYPERV */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index f1bd7b91b3c6..72765b2fe0d8 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -36,8 +36,33 @@
#define EFER_FFXSR (1<<_EFER_FFXSR)
#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS)
-/* Intel MSRs. Some also available on other CPUs */
+/*
+ * Architectural memory types that are common to MTRRs, PAT, VMX MSRs, etc.
+ * Most MSRs support/allow only a subset of memory types, but the values
+ * themselves are common across all relevant MSRs.
+ */
+#define X86_MEMTYPE_UC 0ull /* Uncacheable, a.k.a. Strong Uncacheable */
+#define X86_MEMTYPE_WC 1ull /* Write Combining */
+/* RESERVED 2 */
+/* RESERVED 3 */
+#define X86_MEMTYPE_WT 4ull /* Write Through */
+#define X86_MEMTYPE_WP 5ull /* Write Protected */
+#define X86_MEMTYPE_WB 6ull /* Write Back */
+#define X86_MEMTYPE_UC_MINUS 7ull /* Weak Uncacheabled (PAT only) */
+
+/* FRED MSRs */
+#define MSR_IA32_FRED_RSP0 0x1cc /* Level 0 stack pointer */
+#define MSR_IA32_FRED_RSP1 0x1cd /* Level 1 stack pointer */
+#define MSR_IA32_FRED_RSP2 0x1ce /* Level 2 stack pointer */
+#define MSR_IA32_FRED_RSP3 0x1cf /* Level 3 stack pointer */
+#define MSR_IA32_FRED_STKLVLS 0x1d0 /* Exception stack levels */
+#define MSR_IA32_FRED_SSP0 MSR_IA32_PL0_SSP /* Level 0 shadow stack pointer */
+#define MSR_IA32_FRED_SSP1 0x1d1 /* Level 1 shadow stack pointer */
+#define MSR_IA32_FRED_SSP2 0x1d2 /* Level 2 shadow stack pointer */
+#define MSR_IA32_FRED_SSP3 0x1d3 /* Level 3 shadow stack pointer */
+#define MSR_IA32_FRED_CONFIG 0x1d4 /* Entrypoint and interrupt stack level */
+/* Intel MSRs. Some also available on other CPUs */
#define MSR_TEST_CTRL 0x00000033
#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT 29
#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT BIT(MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT)
@@ -50,10 +75,13 @@
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */
+#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT)
/* A mask for bits which the kernel toggles when controlling mitigations */
#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
- | SPEC_CTRL_RRSBA_DIS_S)
+ | SPEC_CTRL_RRSBA_DIS_S \
+ | SPEC_CTRL_BHI_DIS_S)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
@@ -152,6 +180,14 @@
* are restricted to targets in
* kernel.
*/
+#define ARCH_CAP_BHI_NO BIT(20) /*
+ * CPU is not affected by Branch
+ * History Injection.
+ */
+#define ARCH_CAP_XAPIC_DISABLE BIT(21) /*
+ * IA32_XAPIC_DISABLE_STATUS MSR
+ * supported
+ */
#define ARCH_CAP_PBRSB_NO BIT(24) /*
* Not susceptible to Post-Barrier
* Return Stack Buffer Predictions.
@@ -165,10 +201,13 @@
* CPU is not vulnerable to Gather
* Data Sampling (GDS).
*/
-
-#define ARCH_CAP_XAPIC_DISABLE BIT(21) /*
- * IA32_XAPIC_DISABLE_STATUS MSR
- * supported
+#define ARCH_CAP_RFDS_NO BIT(27) /*
+ * Not susceptible to Register
+ * File Data Sampling.
+ */
+#define ARCH_CAP_RFDS_CLEAR BIT(28) /*
+ * VERW clears CPU Register
+ * File.
*/
#define MSR_IA32_FLUSH_CMD 0x0000010b
@@ -222,6 +261,8 @@
#define MSR_INTEGRITY_CAPS_ARRAY_BIST BIT(MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT)
#define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4
#define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT)
+#define MSR_INTEGRITY_CAPS_SBAF_BIT 8
+#define MSR_INTEGRITY_CAPS_SBAF BIT(MSR_INTEGRITY_CAPS_SBAF_BIT)
#define MSR_INTEGRITY_CAPS_SAF_GEN_MASK GENMASK_ULL(10, 9)
#define MSR_LBR_NHM_FROM 0x00000680
@@ -338,6 +379,12 @@
#define MSR_IA32_CR_PAT 0x00000277
+#define PAT_VALUE(p0, p1, p2, p3, p4, p5, p6, p7) \
+ ((X86_MEMTYPE_ ## p0) | (X86_MEMTYPE_ ## p1 << 8) | \
+ (X86_MEMTYPE_ ## p2 << 16) | (X86_MEMTYPE_ ## p3 << 24) | \
+ (X86_MEMTYPE_ ## p4 << 32) | (X86_MEMTYPE_ ## p5 << 40) | \
+ (X86_MEMTYPE_ ## p6 << 48) | (X86_MEMTYPE_ ## p7 << 56))
+
#define MSR_IA32_DEBUGCTLMSR 0x000001d9
#define MSR_IA32_LASTBRANCHFROMIP 0x000001db
#define MSR_IA32_LASTBRANCHTOIP 0x000001dc
@@ -348,7 +395,8 @@
#define MSR_IA32_PASID_VALID BIT_ULL(31)
/* DEBUGCTLMSR bits (others vary by model): */
-#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
+#define DEBUGCTLMSR_LBR_BIT 0 /* last branch recording */
+#define DEBUGCTLMSR_LBR (1UL << DEBUGCTLMSR_LBR_BIT)
#define DEBUGCTLMSR_BTF_SHIFT 1
#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
#define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2)
@@ -541,6 +589,12 @@
#define MSR_RELOAD_PMC0 0x000014c1
#define MSR_RELOAD_FIXED_CTR0 0x00001309
+/* V6 PMON MSR range */
+#define MSR_IA32_PMC_V6_GP0_CTR 0x1900
+#define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901
+#define MSR_IA32_PMC_V6_FX0_CTR 0x1980
+#define MSR_IA32_PMC_V6_STEP 4
+
/* KeyID partitioning between MKTME and TDX */
#define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087
@@ -555,6 +609,7 @@
#define MSR_AMD_PERF_CTL 0xc0010062
#define MSR_AMD_PERF_STATUS 0xc0010063
#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
+#define MSR_AMD64_GUEST_TSC_FREQ 0xc0010134
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD_PPIN_CTL 0xc00102f0
@@ -591,36 +646,53 @@
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
#define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b
#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e
+#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
#define MSR_AMD64_SEV_ES_GHCB 0xc0010130
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
-#define MSR_AMD64_SEV_ES_ENABLED_BIT 1
-#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2
#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
+#define MSR_AMD64_SEV_ES_ENABLED_BIT 1
#define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
+#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2
#define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
-
-/* SNP feature bits enabled by the hypervisor */
-#define MSR_AMD64_SNP_VTOM BIT_ULL(3)
-#define MSR_AMD64_SNP_REFLECT_VC BIT_ULL(4)
-#define MSR_AMD64_SNP_RESTRICTED_INJ BIT_ULL(5)
-#define MSR_AMD64_SNP_ALT_INJ BIT_ULL(6)
-#define MSR_AMD64_SNP_DEBUG_SWAP BIT_ULL(7)
-#define MSR_AMD64_SNP_PREVENT_HOST_IBS BIT_ULL(8)
-#define MSR_AMD64_SNP_BTB_ISOLATION BIT_ULL(9)
-#define MSR_AMD64_SNP_VMPL_SSS BIT_ULL(10)
-#define MSR_AMD64_SNP_SECURE_TSC BIT_ULL(11)
-#define MSR_AMD64_SNP_VMGEXIT_PARAM BIT_ULL(12)
-#define MSR_AMD64_SNP_IBS_VIRT BIT_ULL(14)
-#define MSR_AMD64_SNP_VMSA_REG_PROTECTION BIT_ULL(16)
-#define MSR_AMD64_SNP_SMT_PROTECTION BIT_ULL(17)
-
-/* SNP feature bits reserved for future use. */
-#define MSR_AMD64_SNP_RESERVED_BIT13 BIT_ULL(13)
-#define MSR_AMD64_SNP_RESERVED_BIT15 BIT_ULL(15)
-#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, 18)
-
-#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
+#define MSR_AMD64_SNP_VTOM_BIT 3
+#define MSR_AMD64_SNP_VTOM BIT_ULL(MSR_AMD64_SNP_VTOM_BIT)
+#define MSR_AMD64_SNP_REFLECT_VC_BIT 4
+#define MSR_AMD64_SNP_REFLECT_VC BIT_ULL(MSR_AMD64_SNP_REFLECT_VC_BIT)
+#define MSR_AMD64_SNP_RESTRICTED_INJ_BIT 5
+#define MSR_AMD64_SNP_RESTRICTED_INJ BIT_ULL(MSR_AMD64_SNP_RESTRICTED_INJ_BIT)
+#define MSR_AMD64_SNP_ALT_INJ_BIT 6
+#define MSR_AMD64_SNP_ALT_INJ BIT_ULL(MSR_AMD64_SNP_ALT_INJ_BIT)
+#define MSR_AMD64_SNP_DEBUG_SWAP_BIT 7
+#define MSR_AMD64_SNP_DEBUG_SWAP BIT_ULL(MSR_AMD64_SNP_DEBUG_SWAP_BIT)
+#define MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT 8
+#define MSR_AMD64_SNP_PREVENT_HOST_IBS BIT_ULL(MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT)
+#define MSR_AMD64_SNP_BTB_ISOLATION_BIT 9
+#define MSR_AMD64_SNP_BTB_ISOLATION BIT_ULL(MSR_AMD64_SNP_BTB_ISOLATION_BIT)
+#define MSR_AMD64_SNP_VMPL_SSS_BIT 10
+#define MSR_AMD64_SNP_VMPL_SSS BIT_ULL(MSR_AMD64_SNP_VMPL_SSS_BIT)
+#define MSR_AMD64_SNP_SECURE_TSC_BIT 11
+#define MSR_AMD64_SNP_SECURE_TSC BIT_ULL(MSR_AMD64_SNP_SECURE_TSC_BIT)
+#define MSR_AMD64_SNP_VMGEXIT_PARAM_BIT 12
+#define MSR_AMD64_SNP_VMGEXIT_PARAM BIT_ULL(MSR_AMD64_SNP_VMGEXIT_PARAM_BIT)
+#define MSR_AMD64_SNP_RESERVED_BIT13 BIT_ULL(13)
+#define MSR_AMD64_SNP_IBS_VIRT_BIT 14
+#define MSR_AMD64_SNP_IBS_VIRT BIT_ULL(MSR_AMD64_SNP_IBS_VIRT_BIT)
+#define MSR_AMD64_SNP_RESERVED_BIT15 BIT_ULL(15)
+#define MSR_AMD64_SNP_VMSA_REG_PROT_BIT 16
+#define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT)
+#define MSR_AMD64_SNP_SMT_PROT_BIT 17
+#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
+#define MSR_AMD64_SNP_RESV_BIT 18
+#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
+#define MSR_AMD64_RMP_BASE 0xc0010132
+#define MSR_AMD64_RMP_END 0xc0010133
+#define MSR_AMD64_RMP_CFG 0xc0010136
+#define MSR_AMD64_SEG_RMP_ENABLED_BIT 0
+#define MSR_AMD64_SEG_RMP_ENABLED BIT_ULL(MSR_AMD64_SEG_RMP_ENABLED_BIT)
+#define MSR_AMD64_RMP_SEGMENT_SHIFT(x) (((x) & GENMASK_ULL(13, 8)) >> 8)
+
+#define MSR_SVSM_CAA 0xc001f000
/* AMD Collaborative Processor Performance Control MSRs */
#define MSR_AMD_CPPC_CAP1 0xc00102b0
@@ -708,8 +780,15 @@
#define MSR_K8_TOP_MEM1 0xc001001a
#define MSR_K8_TOP_MEM2 0xc001001d
#define MSR_AMD64_SYSCFG 0xc0010010
-#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23
#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_AMD64_SYSCFG_SNP_EN_BIT 24
+#define MSR_AMD64_SYSCFG_SNP_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_EN_BIT)
+#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT 25
+#define MSR_AMD64_SYSCFG_SNP_VMPL_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT)
+#define MSR_AMD64_SYSCFG_MFDM_BIT 19
+#define MSR_AMD64_SYSCFG_MFDM BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT)
+
#define MSR_K8_INT_PENDING_MSG 0xc0010055
/* C1E active bits in int pending message */
#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
@@ -736,6 +815,8 @@
#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT)
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
+#define MSR_K7_HWCR_CPB_DIS_BIT 25
+#define MSR_K7_HWCR_CPB_DIS BIT_ULL(MSR_K7_HWCR_CPB_DIS_BIT)
/* K6 MSRs */
#define MSR_K6_WHCR 0xc0000082
@@ -1102,15 +1183,6 @@
#define MSR_IA32_VMX_VMFUNC 0x00000491
#define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492
-/* VMX_BASIC bits and bitmasks */
-#define VMX_BASIC_VMCS_SIZE_SHIFT 32
-#define VMX_BASIC_TRUE_CTLS (1ULL << 55)
-#define VMX_BASIC_64 0x0001000000000000LLU
-#define VMX_BASIC_MEM_TYPE_SHIFT 50
-#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU
-#define VMX_BASIC_MEM_TYPE_WB 6LLU
-#define VMX_BASIC_INOUT 0x0040000000000000LLU
-
/* Resctrl MSRs: */
/* - Intel: */
#define MSR_IA32_L3_QOS_CFG 0xc81
@@ -1119,6 +1191,7 @@
#define MSR_IA32_QM_CTR 0xc8e
#define MSR_IA32_PQR_ASSOC 0xc8f
#define MSR_IA32_L3_CBM_BASE 0xc90
+#define MSR_RMID_SNC_CONFIG 0xca0
#define MSR_IA32_L2_CBM_BASE 0xd10
#define MSR_IA32_MBA_THRTL_BASE 0xd50
@@ -1127,11 +1200,6 @@
#define MSR_IA32_SMBA_BW_BASE 0xc0000280
#define MSR_IA32_EVT_CFG_BASE 0xc0000400
-/* MSR_IA32_VMX_MISC bits */
-#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14)
-#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
-#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
-
/* AMD-V MSRs */
#define MSR_VM_CR 0xc0010114
#define MSR_VM_IGNNE 0xc0010115
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 65ec1965cd28..001853541f1e 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -12,11 +12,13 @@
#include <uapi/asm/msr.h>
#include <asm/shared/msr.h>
+#include <linux/percpu.h>
+
struct msr_info {
- u32 msr_no;
- struct msr reg;
- struct msr *msrs;
- int err;
+ u32 msr_no;
+ struct msr reg;
+ struct msr __percpu *msrs;
+ int err;
};
struct msr_regs_info {
@@ -297,6 +299,21 @@ do { \
#endif /* !CONFIG_PARAVIRT_XXL */
+/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */
+#define WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6)
+
+/* Non-serializing WRMSR, when available. Falls back to a serializing WRMSR. */
+static __always_inline void wrmsrns(u32 msr, u64 val)
+{
+ /*
+ * WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant
+ * DS prefix to avoid a trailing NOP.
+ */
+ asm volatile("1: " ALTERNATIVE("ds wrmsr", WRMSRNS, X86_FEATURE_WRMSRNS)
+ "2: " _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
+ : : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)));
+}
+
/*
* 64-bit version of wrmsr_safe():
*/
@@ -305,8 +322,8 @@ static inline int wrmsrl_safe(u32 msr, u64 val)
return wrmsr_safe(msr, (u32)val, (u32)(val >> 32));
}
-struct msr *msrs_alloc(void);
-void msrs_free(struct msr *msrs);
+struct msr __percpu *msrs_alloc(void);
+void msrs_free(struct msr __percpu *msrs);
int msr_set_bit(u32 msr, u8 bit);
int msr_clear_bit(u32 msr, u8 bit);
@@ -315,8 +332,8 @@ int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
-void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs);
-void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs);
+void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs);
+void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs);
int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
@@ -345,14 +362,14 @@ static inline int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
return 0;
}
static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no,
- struct msr *msrs)
+ struct msr __percpu *msrs)
{
- rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h));
+ rdmsr_on_cpu(0, msr_no, raw_cpu_ptr(&msrs->l), raw_cpu_ptr(&msrs->h));
}
static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no,
- struct msr *msrs)
+ struct msr __percpu *msrs)
{
- wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h);
+ wrmsr_on_cpu(0, msr_no, raw_cpu_read(msrs->l), raw_cpu_read(msrs->h));
}
static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no,
u32 *l, u32 *h)
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index 090d658a85a6..c69e269937c5 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -58,8 +58,8 @@ struct mtrr_state_type {
*/
# ifdef CONFIG_MTRR
void mtrr_bp_init(void);
-void mtrr_overwrite_state(struct mtrr_var_range *var, unsigned int num_var,
- mtrr_type def_type);
+void guest_force_mtrr_state(struct mtrr_var_range *var, unsigned int num_var,
+ mtrr_type def_type);
extern u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform);
extern void mtrr_save_fixed_ranges(void *);
extern void mtrr_save_state(void);
@@ -69,16 +69,15 @@ extern int mtrr_add_page(unsigned long base, unsigned long size,
unsigned int type, bool increment);
extern int mtrr_del(int reg, unsigned long base, unsigned long size);
extern int mtrr_del_page(int reg, unsigned long base, unsigned long size);
-extern void mtrr_bp_restore(void);
extern int mtrr_trim_uncached_memory(unsigned long end_pfn);
extern int amd_special_default_mtrr(void);
void mtrr_disable(void);
void mtrr_enable(void);
void mtrr_generic_set_state(void);
# else
-static inline void mtrr_overwrite_state(struct mtrr_var_range *var,
- unsigned int num_var,
- mtrr_type def_type)
+static inline void guest_force_mtrr_state(struct mtrr_var_range *var,
+ unsigned int num_var,
+ mtrr_type def_type)
{
}
@@ -117,7 +116,6 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
return 0;
}
#define mtrr_bp_init() do {} while (0)
-#define mtrr_bp_restore() do {} while (0)
#define mtrr_disable() do {} while (0)
#define mtrr_enable() do {} while (0)
#define mtrr_generic_set_state() do {} while (0)
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 920426d691ce..ce857ef54cf1 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -15,7 +15,6 @@
#define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK)
#define MWAIT_C1_SUBSTATE_MASK 0xf0
-#define CPUID_MWAIT_LEAF 5
#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
#define CPUID5_ECX_INTERRUPT_BREAK 0x2
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 5c5f1e56c404..41a0ebb699ec 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -14,9 +14,6 @@ extern void release_perfctr_nmi(unsigned int);
extern int reserve_evntsel_nmi(unsigned int);
extern void release_evntsel_nmi(unsigned int);
-struct ctl_table;
-extern int proc_nmi_enabled(struct ctl_table *, int ,
- void __user *, size_t *, loff_t *);
extern int unknown_nmi_panic;
#endif /* CONFIG_X86_LOCAL_APIC */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 2aa52cab1e46..aee26bb8230f 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -59,13 +59,13 @@
#ifdef CONFIG_CALL_THUNKS_DEBUG
# define CALL_THUNKS_DEBUG_INC_CALLS \
- incq %gs:__x86_call_count;
+ incq PER_CPU_VAR(__x86_call_count);
# define CALL_THUNKS_DEBUG_INC_RETS \
- incq %gs:__x86_ret_count;
+ incq PER_CPU_VAR(__x86_ret_count);
# define CALL_THUNKS_DEBUG_INC_STUFFS \
- incq %gs:__x86_stuffs_count;
+ incq PER_CPU_VAR(__x86_stuffs_count);
# define CALL_THUNKS_DEBUG_INC_CTXSW \
- incq %gs:__x86_ctxsw_count;
+ incq PER_CPU_VAR(__x86_ctxsw_count);
#else
# define CALL_THUNKS_DEBUG_INC_CALLS
# define CALL_THUNKS_DEBUG_INC_RETS
@@ -73,16 +73,13 @@
# define CALL_THUNKS_DEBUG_INC_CTXSW
#endif
-#if defined(CONFIG_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS)
+#if defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS)
#include <asm/asm-offsets.h>
#define CREDIT_CALL_DEPTH \
movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
-#define ASM_CREDIT_CALL_DEPTH \
- movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
-
#define RESET_CALL_DEPTH \
xor %eax, %eax; \
bts $63, %rax; \
@@ -95,20 +92,14 @@
CALL_THUNKS_DEBUG_INC_CALLS
#define INCREMENT_CALL_DEPTH \
- sarq $5, %gs:pcpu_hot + X86_call_depth; \
- CALL_THUNKS_DEBUG_INC_CALLS
-
-#define ASM_INCREMENT_CALL_DEPTH \
sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth); \
CALL_THUNKS_DEBUG_INC_CALLS
#else
#define CREDIT_CALL_DEPTH
-#define ASM_CREDIT_CALL_DEPTH
#define RESET_CALL_DEPTH
-#define INCREMENT_CALL_DEPTH
-#define ASM_INCREMENT_CALL_DEPTH
#define RESET_CALL_DEPTH_FROM_CALL
+#define INCREMENT_CALL_DEPTH
#endif
/*
@@ -158,7 +149,7 @@
jnz 771b; \
/* barrier for jnz misprediction */ \
lfence; \
- ASM_CREDIT_CALL_DEPTH \
+ CREDIT_CALL_DEPTH \
CALL_THUNKS_DEBUG_INC_CTXSW
#else
/*
@@ -189,18 +180,6 @@
#ifdef __ASSEMBLY__
/*
- * This should be used immediately before an indirect jump/call. It tells
- * objtool the subsequent indirect jump/call is vouched safe for retpoline
- * builds.
- */
-.macro ANNOTATE_RETPOLINE_SAFE
-.Lhere_\@:
- .pushsection .discard.retpoline_safe
- .long .Lhere_\@
- .popsection
-.endm
-
-/*
* (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions
* vs RETBleed validation.
*/
@@ -212,16 +191,15 @@
*/
.macro VALIDATE_UNRET_END
#if defined(CONFIG_NOINSTR_VALIDATION) && \
- (defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO))
+ (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO))
ANNOTATE_RETPOLINE_SAFE
nop
#endif
.endm
/*
- * Equivalent to -mindirect-branch-cs-prefix; emit the 5 byte jmp/call
- * to the retpoline thunk with a CS prefix when the register requires
- * a RAX prefix byte to encode. Also see apply_retpolines().
+ * Emits a conditional CS prefix that is compatible with
+ * -mindirect-branch-cs-prefix.
*/
.macro __CS_PREFIX reg:req
.irp rs,r8,r9,r10,r11,r12,r13,r14,r15
@@ -241,7 +219,7 @@
* instruction irrespective of kCFI.
*/
.macro JMP_NOSPEC reg:req
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
__CS_PREFIX \reg
jmp __x86_indirect_thunk_\reg
#else
@@ -251,7 +229,7 @@
.endm
.macro CALL_NOSPEC reg:req
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
__CS_PREFIX \reg
call __x86_indirect_thunk_\reg
#else
@@ -271,11 +249,20 @@
.Lskip_rsb_\@:
.endm
-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)
-#define CALL_UNTRAIN_RET "call entry_untrain_ret"
-#else
-#define CALL_UNTRAIN_RET ""
+/*
+ * The CALL to srso_alias_untrain_ret() must be patched in directly at
+ * the spot where untraining must be done, ie., srso_alias_untrain_ret()
+ * must be the target of a CALL instruction instead of indirectly
+ * jumping to a wrapper which then calls it. Therefore, this macro is
+ * called outside of __UNTRAIN_RET below, for the time being, before the
+ * kernel can support nested alternatives with arbitrary nesting.
+ */
+.macro CALL_UNTRAIN_RET
+#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)
+ ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \
+ "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
#endif
+.endm
/*
* Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
@@ -289,10 +276,10 @@
* where we have a stack but before any RET instruction.
*/
.macro __UNTRAIN_RET ibpb_feature, call_depth_insns
-#if defined(CONFIG_RETHUNK) || defined(CONFIG_CPU_IBPB_ENTRY)
+#if defined(CONFIG_MITIGATION_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY)
VALIDATE_UNRET_END
- ALTERNATIVE_3 "", \
- CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
+ CALL_UNTRAIN_RET
+ ALTERNATIVE_2 "", \
"call entry_ibpb", \ibpb_feature, \
__stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH
#endif
@@ -309,9 +296,9 @@
.macro CALL_DEPTH_ACCOUNT
-#ifdef CONFIG_CALL_DEPTH_TRACKING
+#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
ALTERNATIVE "", \
- __stringify(ASM_INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
+ __stringify(INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
#endif
.endm
@@ -323,35 +310,53 @@
* Note: Only the memory operand variant of VERW clears the CPU buffers.
*/
.macro CLEAR_CPU_BUFFERS
- ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
+#ifdef CONFIG_X86_64
+ ALTERNATIVE "", "verw mds_verw_sel(%rip)", X86_FEATURE_CLEAR_CPU_BUF
+#else
+ /*
+ * In 32bit mode, the memory operand must be a %cs reference. The data
+ * segments may not be usable (vm86 mode), and the stack segment may not
+ * be flat (ESPFIX32).
+ */
+ ALTERNATIVE "", "verw %cs:mds_verw_sel", X86_FEATURE_CLEAR_CPU_BUF
+#endif
.endm
-#else /* __ASSEMBLY__ */
+#ifdef CONFIG_X86_64
+.macro CLEAR_BRANCH_HISTORY
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
+.endm
+
+.macro CLEAR_BRANCH_HISTORY_VMEXIT
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT
+.endm
+#else
+#define CLEAR_BRANCH_HISTORY
+#define CLEAR_BRANCH_HISTORY_VMEXIT
+#endif
-#define ANNOTATE_RETPOLINE_SAFE \
- "999:\n\t" \
- ".pushsection .discard.retpoline_safe\n\t" \
- ".long 999b\n\t" \
- ".popsection\n\t"
+#else /* __ASSEMBLY__ */
typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
extern retpoline_thunk_t __x86_indirect_thunk_array[];
extern retpoline_thunk_t __x86_indirect_call_thunk_array[];
extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];
-#ifdef CONFIG_RETHUNK
+#ifdef CONFIG_MITIGATION_RETHUNK
extern void __x86_return_thunk(void);
#else
static inline void __x86_return_thunk(void) {}
#endif
-#ifdef CONFIG_CPU_UNRET_ENTRY
+#ifdef CONFIG_MITIGATION_UNRET_ENTRY
extern void retbleed_return_thunk(void);
#else
static inline void retbleed_return_thunk(void) {}
#endif
-#ifdef CONFIG_CPU_SRSO
+extern void srso_alias_untrain_ret(void);
+
+#ifdef CONFIG_MITIGATION_SRSO
extern void srso_return_thunk(void);
extern void srso_alias_return_thunk(void);
#else
@@ -366,9 +371,15 @@ extern void srso_alias_return_thunk(void);
extern void entry_untrain_ret(void);
extern void entry_ibpb(void);
+#ifdef CONFIG_X86_64
+extern void clear_bhb_loop(void);
+#endif
+
extern void (*x86_return_thunk)(void);
-#ifdef CONFIG_CALL_DEPTH_TRACKING
+extern void __warn_thunk(void);
+
+#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
extern void call_depth_return_thunk(void);
#define CALL_DEPTH_ACCOUNT \
@@ -382,14 +393,14 @@ DECLARE_PER_CPU(u64, __x86_ret_count);
DECLARE_PER_CPU(u64, __x86_stuffs_count);
DECLARE_PER_CPU(u64, __x86_ctxsw_count);
#endif
-#else /* !CONFIG_CALL_DEPTH_TRACKING */
+#else /* !CONFIG_MITIGATION_CALL_DEPTH_TRACKING */
static inline void call_depth_return_thunk(void) {}
#define CALL_DEPTH_ACCOUNT ""
-#endif /* CONFIG_CALL_DEPTH_TRACKING */
+#endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
#define GEN(reg) \
extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
@@ -409,19 +420,26 @@ static inline void call_depth_return_thunk(void) {}
#ifdef CONFIG_X86_64
/*
+ * Emits a conditional CS prefix that is compatible with
+ * -mindirect-branch-cs-prefix.
+ */
+#define __CS_PREFIX(reg) \
+ ".irp rs,r8,r9,r10,r11,r12,r13,r14,r15\n" \
+ ".ifc \\rs," reg "\n" \
+ ".byte 0x2e\n" \
+ ".endif\n" \
+ ".endr\n"
+
+/*
* Inline asm uses the %V modifier which is only in newer GCC
- * which is ensured when CONFIG_RETPOLINE is defined.
+ * which is ensured when CONFIG_MITIGATION_RETPOLINE is defined.
*/
-# define CALL_NOSPEC \
- ALTERNATIVE_2( \
- ANNOTATE_RETPOLINE_SAFE \
- "call *%[thunk_target]\n", \
- "call __x86_indirect_thunk_%V[thunk_target]\n", \
- X86_FEATURE_RETPOLINE, \
- "lfence;\n" \
- ANNOTATE_RETPOLINE_SAFE \
- "call *%[thunk_target]\n", \
- X86_FEATURE_RETPOLINE_LFENCE)
+#ifdef CONFIG_MITIGATION_RETPOLINE
+#define CALL_NOSPEC __CS_PREFIX("%V[thunk_target]") \
+ "call __x86_indirect_thunk_%V[thunk_target]\n"
+#else
+#define CALL_NOSPEC "call *%[thunk_target]\n"
+#endif
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index ef2844d69173..5469d7a7c40f 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -10,8 +10,6 @@
#ifdef CONFIG_NUMA
-#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
-
extern int numa_off;
/*
@@ -25,9 +23,6 @@ extern int numa_off;
extern s16 __apicid_to_node[MAX_LOCAL_APIC];
extern nodemask_t numa_nodes_parsed __initdata;
-extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
-extern void __init numa_set_distance(int from, int to, int distance);
-
static inline void set_apicid_to_node(int apicid, s16 node)
{
__apicid_to_node[apicid] = node;
@@ -54,31 +49,20 @@ static inline int numa_cpu_node(int cpu)
extern void numa_set_node(int cpu, int node);
extern void numa_clear_node(int cpu);
extern void __init init_cpu_to_node(void);
-extern void numa_add_cpu(int cpu);
-extern void numa_remove_cpu(int cpu);
+extern void numa_add_cpu(unsigned int cpu);
+extern void numa_remove_cpu(unsigned int cpu);
extern void init_gi_nodes(void);
#else /* CONFIG_NUMA */
static inline void numa_set_node(int cpu, int node) { }
static inline void numa_clear_node(int cpu) { }
static inline void init_cpu_to_node(void) { }
-static inline void numa_add_cpu(int cpu) { }
-static inline void numa_remove_cpu(int cpu) { }
+static inline void numa_add_cpu(unsigned int cpu) { }
+static inline void numa_remove_cpu(unsigned int cpu) { }
static inline void init_gi_nodes(void) { }
#endif /* CONFIG_NUMA */
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-void debug_cpumask_set_cpu(int cpu, int node, bool enable);
+void debug_cpumask_set_cpu(unsigned int cpu, int node, bool enable);
#endif
-#ifdef CONFIG_NUMA_EMU
-#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
-#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
-int numa_emu_cmdline(char *str);
-#else /* CONFIG_NUMA_EMU */
-static inline int numa_emu_cmdline(char *str)
-{
- return -EINVAL;
-}
-#endif /* CONFIG_NUMA_EMU */
-
#endif /* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index d18e5c332cb9..c9fe207916f4 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -35,7 +35,7 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
}
#define vma_alloc_zeroed_movable_folio(vma, vaddr) \
- vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr, false)
+ vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr)
#ifndef __pa
#define __pa(x) __phys_addr((unsigned long)(x))
@@ -66,10 +66,14 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
* virt_addr_valid(kaddr) returns true.
*/
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
extern bool __virt_addr_valid(unsigned long kaddr);
#define virt_addr_valid(kaddr) __virt_addr_valid((unsigned long) (kaddr))
+static __always_inline void *pfn_to_kaddr(unsigned long pfn)
+{
+ return __va(pfn << PAGE_SHIFT);
+}
+
static __always_inline u64 __canonical_address(u64 vaddr, u8 vaddr_bits)
{
return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index cc6b8e087192..d63576608ce7 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -17,6 +17,7 @@ extern unsigned long phys_base;
extern unsigned long page_offset_base;
extern unsigned long vmalloc_base;
extern unsigned long vmemmap_base;
+extern unsigned long direct_map_physmem_end;
static __always_inline unsigned long __phys_addr_nodebug(unsigned long x)
{
@@ -54,7 +55,7 @@ static inline void clear_page(void *page)
clear_page_rep, X86_FEATURE_REP_GOOD,
clear_page_erms, X86_FEATURE_ERMS,
"=D" (page),
- "0" (page)
+ "D" (page)
: "cc", "memory", "rax", "rcx");
}
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 86bd4311daf8..974688973cf6 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -6,10 +6,7 @@
#include <linux/types.h>
#include <linux/mem_encrypt.h>
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 12
-#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1))
+#include <vdso/page.h>
#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
@@ -31,10 +28,12 @@
#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC
-#define __PHYSICAL_START ALIGN(CONFIG_PHYSICAL_START, \
- CONFIG_PHYSICAL_ALIGN)
+/* Physical address where kernel should be loaded. */
+#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
+ + (CONFIG_PHYSICAL_ALIGN - 1)) \
+ & ~(CONFIG_PHYSICAL_ALIGN - 1))
-#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START)
+#define __START_KERNEL (__START_KERNEL_map + LOAD_PHYSICAL_ADDR)
#ifdef CONFIG_X86_64
#include <asm/page_64_types.h>
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index d4eb9e1d61b8..041aff51eb50 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -180,13 +180,6 @@ static inline void halt(void)
PVOP_VCALL0(irq.halt);
}
-extern noinstr void pv_native_wbinvd(void);
-
-static __always_inline void wbinvd(void)
-{
- PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT_XEN);
-}
-
static inline u64 paravirt_read_msr(unsigned msr)
{
return PVOP_CALL1(u64, cpu.read_msr, msr);
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 8d4fbe1be489..fea56b04f436 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -86,8 +86,6 @@ struct pv_cpu_ops {
void (*update_io_bitmap)(void);
#endif
- void (*wbinvd)(void);
-
/* cpuid emulation, mostly so that caps bits can be disabled */
void (*cpuid)(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index b40c462b4af3..b3ab80a03365 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -10,7 +10,6 @@
#include <linux/numa.h>
#include <asm/io.h>
#include <asm/memtype.h>
-#include <asm/x86_init.h>
struct pci_sysdata {
int domain; /* PCI domain */
@@ -124,16 +123,4 @@ cpumask_of_pcibus(const struct pci_bus *bus)
}
#endif
-struct pci_setup_rom {
- struct setup_data data;
- uint16_t vendor;
- uint16_t devid;
- uint64_t pcilen;
- unsigned long segment;
- unsigned long bus;
- unsigned long device;
- unsigned long function;
- uint8_t romdata[];
-};
-
#endif /* _ASM_X86_PCI_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 5e01883eb51e..e525cd85f999 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -3,54 +3,102 @@
#define _ASM_X86_PERCPU_H
#ifdef CONFIG_X86_64
-#define __percpu_seg gs
+# define __percpu_seg gs
+# define __percpu_rel (%rip)
#else
-#define __percpu_seg fs
+# define __percpu_seg fs
+# define __percpu_rel
#endif
#ifdef __ASSEMBLY__
#ifdef CONFIG_SMP
-#define PER_CPU_VAR(var) %__percpu_seg:var
-#else /* ! SMP */
-#define PER_CPU_VAR(var) var
-#endif /* SMP */
+# define __percpu %__percpu_seg:
+#else
+# define __percpu
+#endif
+
+#define PER_CPU_VAR(var) __percpu(var)__percpu_rel
#ifdef CONFIG_X86_64_SMP
-#define INIT_PER_CPU_VAR(var) init_per_cpu__##var
+# define INIT_PER_CPU_VAR(var) init_per_cpu__##var
#else
-#define INIT_PER_CPU_VAR(var) var
+# define INIT_PER_CPU_VAR(var) var
#endif
-#else /* ...!ASSEMBLY */
+#else /* !__ASSEMBLY__: */
+#include <linux/build_bug.h>
#include <linux/stringify.h>
#include <asm/asm.h>
#ifdef CONFIG_SMP
+
+#ifdef CONFIG_CC_HAS_NAMED_AS
+
+#ifdef __CHECKER__
+# define __seg_gs __attribute__((address_space(__seg_gs)))
+# define __seg_fs __attribute__((address_space(__seg_fs)))
+#endif
+
+#ifdef CONFIG_X86_64
+# define __percpu_seg_override __seg_gs
+#else
+# define __percpu_seg_override __seg_fs
+#endif
+
+#define __percpu_prefix ""
+
+#else /* !CONFIG_CC_HAS_NAMED_AS: */
+
+#define __percpu_seg_override
#define __percpu_prefix "%%"__stringify(__percpu_seg)":"
+
+#endif /* CONFIG_CC_HAS_NAMED_AS */
+
+#define __force_percpu_prefix "%%"__stringify(__percpu_seg)":"
#define __my_cpu_offset this_cpu_read(this_cpu_off)
/*
* Compared to the generic __my_cpu_offset version, the following
* saves one instruction and avoids clobbering a temp register.
+ *
+ * arch_raw_cpu_ptr should not be used in 32-bit VDSO for a 64-bit
+ * kernel, because games are played with CONFIG_X86_64 there and
+ * sizeof(this_cpu_off) becames 4.
*/
-#define arch_raw_cpu_ptr(ptr) \
-({ \
- unsigned long tcp_ptr__; \
- asm ("add " __percpu_arg(1) ", %0" \
- : "=r" (tcp_ptr__) \
- : "m" (this_cpu_off), "0" (ptr)); \
- (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \
+#ifndef BUILD_VDSO32_64
+#define arch_raw_cpu_ptr(_ptr) \
+({ \
+ unsigned long tcp_ptr__ = raw_cpu_read_long(this_cpu_off); \
+ \
+ tcp_ptr__ += (__force unsigned long)(_ptr); \
+ (typeof(*(_ptr)) __kernel __force *)tcp_ptr__; \
})
#else
-#define __percpu_prefix ""
+#define arch_raw_cpu_ptr(_ptr) ({ BUILD_BUG(); (typeof(_ptr))0; })
#endif
+#define PER_CPU_VAR(var) %__percpu_seg:(var)__percpu_rel
+
+#else /* !CONFIG_SMP: */
+
+#define __percpu_seg_override
+#define __percpu_prefix ""
+#define __force_percpu_prefix ""
+
+#define PER_CPU_VAR(var) (var)__percpu_rel
+
+#endif /* CONFIG_SMP */
+
+#define __my_cpu_type(var) typeof(var) __percpu_seg_override
+#define __my_cpu_ptr(ptr) (__my_cpu_type(*(ptr))*)(__force uintptr_t)(ptr)
+#define __my_cpu_var(var) (*__my_cpu_ptr(&(var)))
#define __percpu_arg(x) __percpu_prefix "%" #x
+#define __force_percpu_arg(x) __force_percpu_prefix "%" #x
/*
- * Initialized pointers to per-cpu variables needed for the boot
+ * Initialized pointers to per-CPU variables needed for the boot
* processor need to use these macros to get the proper address
* offset from __per_cpu_load on SMP.
*
@@ -60,72 +108,137 @@
extern typeof(var) init_per_cpu_var(var)
#ifdef CONFIG_X86_64_SMP
-#define init_per_cpu_var(var) init_per_cpu__##var
+# define init_per_cpu_var(var) init_per_cpu__##var
#else
-#define init_per_cpu_var(var) var
+# define init_per_cpu_var(var) var
#endif
-/* For arch-specific code, we can use direct single-insn ops (they
- * don't give an lvalue though). */
+/*
+ * For arch-specific code, we can use direct single-insn ops (they
+ * don't give an lvalue though).
+ */
-#define __pcpu_type_1 u8
-#define __pcpu_type_2 u16
-#define __pcpu_type_4 u32
-#define __pcpu_type_8 u64
+#define __pcpu_type_1 u8
+#define __pcpu_type_2 u16
+#define __pcpu_type_4 u32
+#define __pcpu_type_8 u64
-#define __pcpu_cast_1(val) ((u8)(((unsigned long) val) & 0xff))
-#define __pcpu_cast_2(val) ((u16)(((unsigned long) val) & 0xffff))
-#define __pcpu_cast_4(val) ((u32)(((unsigned long) val) & 0xffffffff))
-#define __pcpu_cast_8(val) ((u64)(val))
+#define __pcpu_cast_1(val) ((u8)(((unsigned long) val) & 0xff))
+#define __pcpu_cast_2(val) ((u16)(((unsigned long) val) & 0xffff))
+#define __pcpu_cast_4(val) ((u32)(((unsigned long) val) & 0xffffffff))
+#define __pcpu_cast_8(val) ((u64)(val))
-#define __pcpu_op1_1(op, dst) op "b " dst
-#define __pcpu_op1_2(op, dst) op "w " dst
-#define __pcpu_op1_4(op, dst) op "l " dst
-#define __pcpu_op1_8(op, dst) op "q " dst
+#define __pcpu_op1_1(op, dst) op "b " dst
+#define __pcpu_op1_2(op, dst) op "w " dst
+#define __pcpu_op1_4(op, dst) op "l " dst
+#define __pcpu_op1_8(op, dst) op "q " dst
#define __pcpu_op2_1(op, src, dst) op "b " src ", " dst
#define __pcpu_op2_2(op, src, dst) op "w " src ", " dst
#define __pcpu_op2_4(op, src, dst) op "l " src ", " dst
#define __pcpu_op2_8(op, src, dst) op "q " src ", " dst
-#define __pcpu_reg_1(mod, x) mod "q" (x)
-#define __pcpu_reg_2(mod, x) mod "r" (x)
-#define __pcpu_reg_4(mod, x) mod "r" (x)
-#define __pcpu_reg_8(mod, x) mod "r" (x)
+#define __pcpu_reg_1(mod, x) mod "q" (x)
+#define __pcpu_reg_2(mod, x) mod "r" (x)
+#define __pcpu_reg_4(mod, x) mod "r" (x)
+#define __pcpu_reg_8(mod, x) mod "r" (x)
+
+#define __pcpu_reg_imm_1(x) "qi" (x)
+#define __pcpu_reg_imm_2(x) "ri" (x)
+#define __pcpu_reg_imm_4(x) "ri" (x)
+#define __pcpu_reg_imm_8(x) "re" (x)
-#define __pcpu_reg_imm_1(x) "qi" (x)
-#define __pcpu_reg_imm_2(x) "ri" (x)
-#define __pcpu_reg_imm_4(x) "ri" (x)
-#define __pcpu_reg_imm_8(x) "re" (x)
+#ifdef CONFIG_USE_X86_SEG_SUPPORT
+
+#define __raw_cpu_read(size, qual, pcp) \
+({ \
+ *(qual __my_cpu_type(pcp) *)__my_cpu_ptr(&(pcp)); \
+})
-#define percpu_to_op(size, qual, op, _var, _val) \
+#define __raw_cpu_write(size, qual, pcp, val) \
+do { \
+ *(qual __my_cpu_type(pcp) *)__my_cpu_ptr(&(pcp)) = (val); \
+} while (0)
+
+#define __raw_cpu_read_const(pcp) __raw_cpu_read(, , pcp)
+
+#else /* !CONFIG_USE_X86_SEG_SUPPORT: */
+
+#define __raw_cpu_read(size, qual, _var) \
+({ \
+ __pcpu_type_##size pfo_val__; \
+ \
+ asm qual (__pcpu_op2_##size("mov", __percpu_arg([var]), "%[val]") \
+ : [val] __pcpu_reg_##size("=", pfo_val__) \
+ : [var] "m" (__my_cpu_var(_var))); \
+ \
+ (typeof(_var))(unsigned long) pfo_val__; \
+})
+
+#define __raw_cpu_write(size, qual, _var, _val) \
do { \
__pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val); \
+ \
if (0) { \
typeof(_var) pto_tmp__; \
pto_tmp__ = (_val); \
(void)pto_tmp__; \
} \
- asm qual(__pcpu_op2_##size(op, "%[val]", __percpu_arg([var])) \
- : [var] "+m" (_var) \
+ asm qual(__pcpu_op2_##size("mov", "%[val]", __percpu_arg([var])) \
+ : [var] "=m" (__my_cpu_var(_var)) \
: [val] __pcpu_reg_imm_##size(pto_val__)); \
} while (0)
+/*
+ * The generic per-CPU infrastrucutre is not suitable for
+ * reading const-qualified variables.
+ */
+#define __raw_cpu_read_const(pcp) ({ BUILD_BUG(); (typeof(pcp))0; })
+
+#endif /* CONFIG_USE_X86_SEG_SUPPORT */
+
+#define __raw_cpu_read_stable(size, _var) \
+({ \
+ __pcpu_type_##size pfo_val__; \
+ \
+ asm(__pcpu_op2_##size("mov", __force_percpu_arg(a[var]), "%[val]") \
+ : [val] __pcpu_reg_##size("=", pfo_val__) \
+ : [var] "i" (&(_var))); \
+ \
+ (typeof(_var))(unsigned long) pfo_val__; \
+})
+
#define percpu_unary_op(size, qual, op, _var) \
({ \
asm qual (__pcpu_op1_##size(op, __percpu_arg([var])) \
- : [var] "+m" (_var)); \
+ : [var] "+m" (__my_cpu_var(_var))); \
})
+#define percpu_binary_op(size, qual, op, _var, _val) \
+do { \
+ __pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val); \
+ \
+ if (0) { \
+ typeof(_var) pto_tmp__; \
+ pto_tmp__ = (_val); \
+ (void)pto_tmp__; \
+ } \
+ asm qual(__pcpu_op2_##size(op, "%[val]", __percpu_arg([var])) \
+ : [var] "+m" (__my_cpu_var(_var)) \
+ : [val] __pcpu_reg_imm_##size(pto_val__)); \
+} while (0)
+
/*
- * Generate a percpu add to memory instruction and optimize code
+ * Generate a per-CPU add to memory instruction and optimize code
* if one is added or subtracted.
*/
#define percpu_add_op(size, qual, var, val) \
do { \
- const int pao_ID__ = (__builtin_constant_p(val) && \
- ((val) == 1 || (val) == -1)) ? \
- (int)(val) : 0; \
+ const int pao_ID__ = \
+ (__builtin_constant_p(val) && \
+ ((val) == 1 || \
+ (val) == (typeof(val))-1)) ? (int)(val) : 0; \
+ \
if (0) { \
typeof(var) pao_tmp__; \
pao_tmp__ = (val); \
@@ -136,77 +249,67 @@ do { \
else if (pao_ID__ == -1) \
percpu_unary_op(size, qual, "dec", var); \
else \
- percpu_to_op(size, qual, "add", var, val); \
+ percpu_binary_op(size, qual, "add", var, val); \
} while (0)
-#define percpu_from_op(size, qual, op, _var) \
-({ \
- __pcpu_type_##size pfo_val__; \
- asm qual (__pcpu_op2_##size(op, __percpu_arg([var]), "%[val]") \
- : [val] __pcpu_reg_##size("=", pfo_val__) \
- : [var] "m" (_var)); \
- (typeof(_var))(unsigned long) pfo_val__; \
-})
-
-#define percpu_stable_op(size, op, _var) \
-({ \
- __pcpu_type_##size pfo_val__; \
- asm(__pcpu_op2_##size(op, __percpu_arg(P[var]), "%[val]") \
- : [val] __pcpu_reg_##size("=", pfo_val__) \
- : [var] "p" (&(_var))); \
- (typeof(_var))(unsigned long) pfo_val__; \
-})
-
/*
* Add return operation
*/
#define percpu_add_return_op(size, qual, _var, _val) \
({ \
__pcpu_type_##size paro_tmp__ = __pcpu_cast_##size(_val); \
+ \
asm qual (__pcpu_op2_##size("xadd", "%[tmp]", \
__percpu_arg([var])) \
: [tmp] __pcpu_reg_##size("+", paro_tmp__), \
- [var] "+m" (_var) \
+ [var] "+m" (__my_cpu_var(_var)) \
: : "memory"); \
(typeof(_var))(unsigned long) (paro_tmp__ + _val); \
})
/*
- * xchg is implemented using cmpxchg without a lock prefix. xchg is
- * expensive due to the implied lock prefix. The processor cannot prefetch
- * cachelines if xchg is used.
+ * raw_cpu_xchg() can use a load-store since
+ * it is not required to be IRQ-safe.
*/
-#define percpu_xchg_op(size, qual, _var, _nval) \
+#define raw_percpu_xchg_op(_var, _nval) \
({ \
- __pcpu_type_##size pxo_old__; \
- __pcpu_type_##size pxo_new__ = __pcpu_cast_##size(_nval); \
- asm qual (__pcpu_op2_##size("mov", __percpu_arg([var]), \
- "%[oval]") \
- "\n1:\t" \
- __pcpu_op2_##size("cmpxchg", "%[nval]", \
- __percpu_arg([var])) \
- "\n\tjnz 1b" \
- : [oval] "=&a" (pxo_old__), \
- [var] "+m" (_var) \
- : [nval] __pcpu_reg_##size(, pxo_new__) \
- : "memory"); \
- (typeof(_var))(unsigned long) pxo_old__; \
+ typeof(_var) pxo_old__ = raw_cpu_read(_var); \
+ \
+ raw_cpu_write(_var, _nval); \
+ \
+ pxo_old__; \
+})
+
+/*
+ * this_cpu_xchg() is implemented using CMPXCHG without a LOCK prefix.
+ * XCHG is expensive due to the implied LOCK prefix. The processor
+ * cannot prefetch cachelines if XCHG is used.
+ */
+#define this_percpu_xchg_op(_var, _nval) \
+({ \
+ typeof(_var) pxo_old__ = this_cpu_read(_var); \
+ \
+ do { } while (!this_cpu_try_cmpxchg(_var, &pxo_old__, _nval)); \
+ \
+ pxo_old__; \
})
/*
- * cmpxchg has no such implied lock semantics as a result it is much
- * more efficient for cpu local operations.
+ * CMPXCHG has no such implied lock semantics as a result it is much
+ * more efficient for CPU-local operations.
*/
#define percpu_cmpxchg_op(size, qual, _var, _oval, _nval) \
({ \
__pcpu_type_##size pco_old__ = __pcpu_cast_##size(_oval); \
__pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval); \
+ \
asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]", \
__percpu_arg([var])) \
: [oval] "+a" (pco_old__), \
- [var] "+m" (_var) \
+ [var] "+m" (__my_cpu_var(_var)) \
: [nval] __pcpu_reg_##size(, pco_new__) \
: "memory"); \
+ \
(typeof(_var))(unsigned long) pco_old__; \
})
@@ -216,20 +319,23 @@ do { \
__pcpu_type_##size *pco_oval__ = (__pcpu_type_##size *)(_ovalp); \
__pcpu_type_##size pco_old__ = *pco_oval__; \
__pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval); \
+ \
asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]", \
__percpu_arg([var])) \
CC_SET(z) \
: CC_OUT(z) (success), \
[oval] "+a" (pco_old__), \
- [var] "+m" (_var) \
+ [var] "+m" (__my_cpu_var(_var)) \
: [nval] __pcpu_reg_##size(, pco_new__) \
: "memory"); \
if (unlikely(!success)) \
*pco_oval__ = pco_old__; \
+ \
likely(success); \
})
#if defined(CONFIG_X86_32) && !defined(CONFIG_UML)
+
#define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval) \
({ \
union { \
@@ -244,7 +350,7 @@ do { \
\
asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
"cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
- : [var] "+m" (_var), \
+ : [var] "+m" (__my_cpu_var(_var)), \
"+a" (old__.low), \
"+d" (old__.high) \
: "b" (new__.low), \
@@ -255,8 +361,8 @@ do { \
old__.var; \
})
-#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, , pcp, oval, nval)
-#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, volatile, pcp, oval, nval)
+#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, , pcp, oval, nval)
+#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, volatile, pcp, oval, nval)
#define percpu_try_cmpxchg64_op(size, qual, _var, _ovalp, _nval) \
({ \
@@ -276,7 +382,7 @@ do { \
"cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
CC_SET(z) \
: CC_OUT(z) (success), \
- [var] "+m" (_var), \
+ [var] "+m" (__my_cpu_var(_var)), \
"+a" (old__.low), \
"+d" (old__.high) \
: "b" (new__.low), \
@@ -285,16 +391,18 @@ do { \
: "memory"); \
if (unlikely(!success)) \
*_oval = old__.var; \
+ \
likely(success); \
})
#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg64_op(8, , pcp, ovalp, nval)
#define this_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg64_op(8, volatile, pcp, ovalp, nval)
-#endif
+
+#endif /* defined(CONFIG_X86_32) && !defined(CONFIG_UML) */
#ifdef CONFIG_X86_64
-#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval);
-#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval);
+#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval);
+#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval);
#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, , pcp, ovalp, nval);
#define this_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval);
@@ -313,7 +421,7 @@ do { \
\
asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
"cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
- : [var] "+m" (_var), \
+ : [var] "+m" (__my_cpu_var(_var)), \
"+a" (old__.low), \
"+d" (old__.high) \
: "b" (new__.low), \
@@ -324,8 +432,8 @@ do { \
old__.var; \
})
-#define raw_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, , pcp, oval, nval)
-#define this_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, volatile, pcp, oval, nval)
+#define raw_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, , pcp, oval, nval)
+#define this_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, volatile, pcp, oval, nval)
#define percpu_try_cmpxchg128_op(size, qual, _var, _ovalp, _nval) \
({ \
@@ -345,7 +453,7 @@ do { \
"cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
CC_SET(z) \
: CC_OUT(z) (success), \
- [var] "+m" (_var), \
+ [var] "+m" (__my_cpu_var(_var)), \
"+a" (old__.low), \
"+d" (old__.high) \
: "b" (new__.low), \
@@ -359,150 +467,151 @@ do { \
#define raw_cpu_try_cmpxchg128(pcp, ovalp, nval) percpu_try_cmpxchg128_op(16, , pcp, ovalp, nval)
#define this_cpu_try_cmpxchg128(pcp, ovalp, nval) percpu_try_cmpxchg128_op(16, volatile, pcp, ovalp, nval)
-#endif
-
-/*
- * this_cpu_read() makes gcc load the percpu variable every time it is
- * accessed while this_cpu_read_stable() allows the value to be cached.
- * this_cpu_read_stable() is more efficient and can be used if its value
- * is guaranteed to be valid across cpus. The current users include
- * get_current() and get_thread_info() both of which are actually
- * per-thread variables implemented as per-cpu variables and thus
- * stable for the duration of the respective task.
- */
-#define this_cpu_read_stable_1(pcp) percpu_stable_op(1, "mov", pcp)
-#define this_cpu_read_stable_2(pcp) percpu_stable_op(2, "mov", pcp)
-#define this_cpu_read_stable_4(pcp) percpu_stable_op(4, "mov", pcp)
-#define this_cpu_read_stable_8(pcp) percpu_stable_op(8, "mov", pcp)
-#define this_cpu_read_stable(pcp) __pcpu_size_call_return(this_cpu_read_stable_, pcp)
-
-#define raw_cpu_read_1(pcp) percpu_from_op(1, , "mov", pcp)
-#define raw_cpu_read_2(pcp) percpu_from_op(2, , "mov", pcp)
-#define raw_cpu_read_4(pcp) percpu_from_op(4, , "mov", pcp)
-
-#define raw_cpu_write_1(pcp, val) percpu_to_op(1, , "mov", (pcp), val)
-#define raw_cpu_write_2(pcp, val) percpu_to_op(2, , "mov", (pcp), val)
-#define raw_cpu_write_4(pcp, val) percpu_to_op(4, , "mov", (pcp), val)
-#define raw_cpu_add_1(pcp, val) percpu_add_op(1, , (pcp), val)
-#define raw_cpu_add_2(pcp, val) percpu_add_op(2, , (pcp), val)
-#define raw_cpu_add_4(pcp, val) percpu_add_op(4, , (pcp), val)
-#define raw_cpu_and_1(pcp, val) percpu_to_op(1, , "and", (pcp), val)
-#define raw_cpu_and_2(pcp, val) percpu_to_op(2, , "and", (pcp), val)
-#define raw_cpu_and_4(pcp, val) percpu_to_op(4, , "and", (pcp), val)
-#define raw_cpu_or_1(pcp, val) percpu_to_op(1, , "or", (pcp), val)
-#define raw_cpu_or_2(pcp, val) percpu_to_op(2, , "or", (pcp), val)
-#define raw_cpu_or_4(pcp, val) percpu_to_op(4, , "or", (pcp), val)
-/*
- * raw_cpu_xchg() can use a load-store since it is not required to be
- * IRQ-safe.
- */
-#define raw_percpu_xchg_op(var, nval) \
-({ \
- typeof(var) pxo_ret__ = raw_cpu_read(var); \
- raw_cpu_write(var, (nval)); \
- pxo_ret__; \
-})
-
-#define raw_cpu_xchg_1(pcp, val) raw_percpu_xchg_op(pcp, val)
-#define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val)
-#define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val)
-
-#define this_cpu_read_1(pcp) percpu_from_op(1, volatile, "mov", pcp)
-#define this_cpu_read_2(pcp) percpu_from_op(2, volatile, "mov", pcp)
-#define this_cpu_read_4(pcp) percpu_from_op(4, volatile, "mov", pcp)
-#define this_cpu_write_1(pcp, val) percpu_to_op(1, volatile, "mov", (pcp), val)
-#define this_cpu_write_2(pcp, val) percpu_to_op(2, volatile, "mov", (pcp), val)
-#define this_cpu_write_4(pcp, val) percpu_to_op(4, volatile, "mov", (pcp), val)
-#define this_cpu_add_1(pcp, val) percpu_add_op(1, volatile, (pcp), val)
-#define this_cpu_add_2(pcp, val) percpu_add_op(2, volatile, (pcp), val)
-#define this_cpu_add_4(pcp, val) percpu_add_op(4, volatile, (pcp), val)
-#define this_cpu_and_1(pcp, val) percpu_to_op(1, volatile, "and", (pcp), val)
-#define this_cpu_and_2(pcp, val) percpu_to_op(2, volatile, "and", (pcp), val)
-#define this_cpu_and_4(pcp, val) percpu_to_op(4, volatile, "and", (pcp), val)
-#define this_cpu_or_1(pcp, val) percpu_to_op(1, volatile, "or", (pcp), val)
-#define this_cpu_or_2(pcp, val) percpu_to_op(2, volatile, "or", (pcp), val)
-#define this_cpu_or_4(pcp, val) percpu_to_op(4, volatile, "or", (pcp), val)
-#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(1, volatile, pcp, nval)
-#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(2, volatile, pcp, nval)
-#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(4, volatile, pcp, nval)
-
-#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, val)
-#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, val)
-#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(4, , pcp, val)
-#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, , pcp, oval, nval)
-#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, , pcp, oval, nval)
-#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, , pcp, oval, nval)
-#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) percpu_try_cmpxchg_op(1, , pcp, ovalp, nval)
-#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) percpu_try_cmpxchg_op(2, , pcp, ovalp, nval)
-#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) percpu_try_cmpxchg_op(4, , pcp, ovalp, nval)
-
-#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(1, volatile, pcp, val)
-#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(2, volatile, pcp, val)
-#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(4, volatile, pcp, val)
-#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, volatile, pcp, oval, nval)
-#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, volatile, pcp, oval, nval)
-#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, volatile, pcp, oval, nval)
+#endif /* CONFIG_X86_64 */
+
+#define raw_cpu_read_1(pcp) __raw_cpu_read(1, , pcp)
+#define raw_cpu_read_2(pcp) __raw_cpu_read(2, , pcp)
+#define raw_cpu_read_4(pcp) __raw_cpu_read(4, , pcp)
+#define raw_cpu_write_1(pcp, val) __raw_cpu_write(1, , pcp, val)
+#define raw_cpu_write_2(pcp, val) __raw_cpu_write(2, , pcp, val)
+#define raw_cpu_write_4(pcp, val) __raw_cpu_write(4, , pcp, val)
+
+#define this_cpu_read_1(pcp) __raw_cpu_read(1, volatile, pcp)
+#define this_cpu_read_2(pcp) __raw_cpu_read(2, volatile, pcp)
+#define this_cpu_read_4(pcp) __raw_cpu_read(4, volatile, pcp)
+#define this_cpu_write_1(pcp, val) __raw_cpu_write(1, volatile, pcp, val)
+#define this_cpu_write_2(pcp, val) __raw_cpu_write(2, volatile, pcp, val)
+#define this_cpu_write_4(pcp, val) __raw_cpu_write(4, volatile, pcp, val)
+
+#define this_cpu_read_stable_1(pcp) __raw_cpu_read_stable(1, pcp)
+#define this_cpu_read_stable_2(pcp) __raw_cpu_read_stable(2, pcp)
+#define this_cpu_read_stable_4(pcp) __raw_cpu_read_stable(4, pcp)
+
+#define raw_cpu_add_1(pcp, val) percpu_add_op(1, , (pcp), val)
+#define raw_cpu_add_2(pcp, val) percpu_add_op(2, , (pcp), val)
+#define raw_cpu_add_4(pcp, val) percpu_add_op(4, , (pcp), val)
+#define raw_cpu_and_1(pcp, val) percpu_binary_op(1, , "and", (pcp), val)
+#define raw_cpu_and_2(pcp, val) percpu_binary_op(2, , "and", (pcp), val)
+#define raw_cpu_and_4(pcp, val) percpu_binary_op(4, , "and", (pcp), val)
+#define raw_cpu_or_1(pcp, val) percpu_binary_op(1, , "or", (pcp), val)
+#define raw_cpu_or_2(pcp, val) percpu_binary_op(2, , "or", (pcp), val)
+#define raw_cpu_or_4(pcp, val) percpu_binary_op(4, , "or", (pcp), val)
+#define raw_cpu_xchg_1(pcp, val) raw_percpu_xchg_op(pcp, val)
+#define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val)
+#define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val)
+
+#define this_cpu_add_1(pcp, val) percpu_add_op(1, volatile, (pcp), val)
+#define this_cpu_add_2(pcp, val) percpu_add_op(2, volatile, (pcp), val)
+#define this_cpu_add_4(pcp, val) percpu_add_op(4, volatile, (pcp), val)
+#define this_cpu_and_1(pcp, val) percpu_binary_op(1, volatile, "and", (pcp), val)
+#define this_cpu_and_2(pcp, val) percpu_binary_op(2, volatile, "and", (pcp), val)
+#define this_cpu_and_4(pcp, val) percpu_binary_op(4, volatile, "and", (pcp), val)
+#define this_cpu_or_1(pcp, val) percpu_binary_op(1, volatile, "or", (pcp), val)
+#define this_cpu_or_2(pcp, val) percpu_binary_op(2, volatile, "or", (pcp), val)
+#define this_cpu_or_4(pcp, val) percpu_binary_op(4, volatile, "or", (pcp), val)
+#define this_cpu_xchg_1(pcp, nval) this_percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_2(pcp, nval) this_percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_4(pcp, nval) this_percpu_xchg_op(pcp, nval)
+
+#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, val)
+#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, val)
+#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(4, , pcp, val)
+#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, , pcp, oval, nval)
+#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, , pcp, oval, nval)
+#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, , pcp, oval, nval)
+#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) percpu_try_cmpxchg_op(1, , pcp, ovalp, nval)
+#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) percpu_try_cmpxchg_op(2, , pcp, ovalp, nval)
+#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) percpu_try_cmpxchg_op(4, , pcp, ovalp, nval)
+
+#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(1, volatile, pcp, val)
+#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(2, volatile, pcp, val)
+#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(4, volatile, pcp, val)
+#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, volatile, pcp, oval, nval)
#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) percpu_try_cmpxchg_op(1, volatile, pcp, ovalp, nval)
#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) percpu_try_cmpxchg_op(2, volatile, pcp, ovalp, nval)
#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) percpu_try_cmpxchg_op(4, volatile, pcp, ovalp, nval)
/*
- * Per cpu atomic 64 bit operations are only available under 64 bit.
- * 32 bit must fall back to generic operations.
+ * Per-CPU atomic 64-bit operations are only available under 64-bit kernels.
+ * 32-bit kernels must fall back to generic operations.
*/
#ifdef CONFIG_X86_64
-#define raw_cpu_read_8(pcp) percpu_from_op(8, , "mov", pcp)
-#define raw_cpu_write_8(pcp, val) percpu_to_op(8, , "mov", (pcp), val)
-#define raw_cpu_add_8(pcp, val) percpu_add_op(8, , (pcp), val)
-#define raw_cpu_and_8(pcp, val) percpu_to_op(8, , "and", (pcp), val)
-#define raw_cpu_or_8(pcp, val) percpu_to_op(8, , "or", (pcp), val)
-#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(8, , pcp, val)
-#define raw_cpu_xchg_8(pcp, nval) raw_percpu_xchg_op(pcp, nval)
-#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval)
-#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, , pcp, ovalp, nval)
-
-#define this_cpu_read_8(pcp) percpu_from_op(8, volatile, "mov", pcp)
-#define this_cpu_write_8(pcp, val) percpu_to_op(8, volatile, "mov", (pcp), val)
-#define this_cpu_add_8(pcp, val) percpu_add_op(8, volatile, (pcp), val)
-#define this_cpu_and_8(pcp, val) percpu_to_op(8, volatile, "and", (pcp), val)
-#define this_cpu_or_8(pcp, val) percpu_to_op(8, volatile, "or", (pcp), val)
-#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val)
-#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(8, volatile, pcp, nval)
-#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval)
+
+#define raw_cpu_read_8(pcp) __raw_cpu_read(8, , pcp)
+#define raw_cpu_write_8(pcp, val) __raw_cpu_write(8, , pcp, val)
+
+#define this_cpu_read_8(pcp) __raw_cpu_read(8, volatile, pcp)
+#define this_cpu_write_8(pcp, val) __raw_cpu_write(8, volatile, pcp, val)
+
+#define this_cpu_read_stable_8(pcp) __raw_cpu_read_stable(8, pcp)
+
+#define raw_cpu_add_8(pcp, val) percpu_add_op(8, , (pcp), val)
+#define raw_cpu_and_8(pcp, val) percpu_binary_op(8, , "and", (pcp), val)
+#define raw_cpu_or_8(pcp, val) percpu_binary_op(8, , "or", (pcp), val)
+#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(8, , pcp, val)
+#define raw_cpu_xchg_8(pcp, nval) raw_percpu_xchg_op(pcp, nval)
+#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval)
+#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, , pcp, ovalp, nval)
+
+#define this_cpu_add_8(pcp, val) percpu_add_op(8, volatile, (pcp), val)
+#define this_cpu_and_8(pcp, val) percpu_binary_op(8, volatile, "and", (pcp), val)
+#define this_cpu_or_8(pcp, val) percpu_binary_op(8, volatile, "or", (pcp), val)
+#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val)
+#define this_cpu_xchg_8(pcp, nval) this_percpu_xchg_op(pcp, nval)
+#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval)
#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval)
-#endif
-static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr,
- const unsigned long __percpu *addr)
-{
- unsigned long __percpu *a =
- (unsigned long __percpu *)addr + nr / BITS_PER_LONG;
+#define raw_cpu_read_long(pcp) raw_cpu_read_8(pcp)
-#ifdef CONFIG_X86_64
- return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0;
-#else
- return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_4(*a)) != 0;
-#endif
-}
+#else /* !CONFIG_X86_64: */
-static inline bool x86_this_cpu_variable_test_bit(int nr,
- const unsigned long __percpu *addr)
-{
- bool oldbit;
+/* There is no generic 64-bit read stable operation for 32-bit targets. */
+#define this_cpu_read_stable_8(pcp) ({ BUILD_BUG(); (typeof(pcp))0; })
- asm volatile("btl "__percpu_arg(2)",%1"
- CC_SET(c)
- : CC_OUT(c) (oldbit)
- : "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
+#define raw_cpu_read_long(pcp) raw_cpu_read_4(pcp)
- return oldbit;
-}
+#endif /* CONFIG_X86_64 */
-#define x86_this_cpu_test_bit(nr, addr) \
- (__builtin_constant_p((nr)) \
- ? x86_this_cpu_constant_test_bit((nr), (addr)) \
- : x86_this_cpu_variable_test_bit((nr), (addr)))
+#define this_cpu_read_const(pcp) __raw_cpu_read_const(pcp)
+
+/*
+ * this_cpu_read() makes the compiler load the per-CPU variable every time
+ * it is accessed while this_cpu_read_stable() allows the value to be cached.
+ * this_cpu_read_stable() is more efficient and can be used if its value
+ * is guaranteed to be valid across CPUs. The current users include
+ * pcpu_hot.current_task and pcpu_hot.top_of_stack, both of which are
+ * actually per-thread variables implemented as per-CPU variables and
+ * thus stable for the duration of the respective task.
+ */
+#define this_cpu_read_stable(pcp) __pcpu_size_call_return(this_cpu_read_stable_, pcp)
+
+#define x86_this_cpu_constant_test_bit(_nr, _var) \
+({ \
+ unsigned long __percpu *addr__ = \
+ (unsigned long __percpu *)&(_var) + ((_nr) / BITS_PER_LONG); \
+ \
+ !!((1UL << ((_nr) % BITS_PER_LONG)) & raw_cpu_read(*addr__)); \
+})
+
+#define x86_this_cpu_variable_test_bit(_nr, _var) \
+({ \
+ bool oldbit; \
+ \
+ asm volatile("btl %[nr], " __percpu_arg([var]) \
+ CC_SET(c) \
+ : CC_OUT(c) (oldbit) \
+ : [var] "m" (__my_cpu_var(_var)), \
+ [nr] "rI" (_nr)); \
+ oldbit; \
+})
+
+#define x86_this_cpu_test_bit(_nr, _var) \
+ (__builtin_constant_p(_nr) \
+ ? x86_this_cpu_constant_test_bit(_nr, _var) \
+ : x86_this_cpu_variable_test_bit(_nr, _var))
#include <asm-generic/percpu.h>
@@ -532,46 +641,47 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off);
{ [0 ... NR_CPUS-1] = _initvalue }; \
__typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map
-#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
+#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
EXPORT_PER_CPU_SYMBOL(_name)
-#define DECLARE_EARLY_PER_CPU(_type, _name) \
- DECLARE_PER_CPU(_type, _name); \
- extern __typeof__(_type) *_name##_early_ptr; \
+#define DECLARE_EARLY_PER_CPU(_type, _name) \
+ DECLARE_PER_CPU(_type, _name); \
+ extern __typeof__(_type) *_name##_early_ptr; \
extern __typeof__(_type) _name##_early_map[]
-#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \
- DECLARE_PER_CPU_READ_MOSTLY(_type, _name); \
- extern __typeof__(_type) *_name##_early_ptr; \
+#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \
+ DECLARE_PER_CPU_READ_MOSTLY(_type, _name); \
+ extern __typeof__(_type) *_name##_early_ptr; \
extern __typeof__(_type) _name##_early_map[]
-#define early_per_cpu_ptr(_name) (_name##_early_ptr)
-#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
-#define early_per_cpu(_name, _cpu) \
- *(early_per_cpu_ptr(_name) ? \
- &early_per_cpu_ptr(_name)[_cpu] : \
+#define early_per_cpu_ptr(_name) (_name##_early_ptr)
+#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
+
+#define early_per_cpu(_name, _cpu) \
+ *(early_per_cpu_ptr(_name) ? \
+ &early_per_cpu_ptr(_name)[_cpu] : \
&per_cpu(_name, _cpu))
-#else /* !CONFIG_SMP */
-#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
+#else /* !CONFIG_SMP: */
+#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
DEFINE_PER_CPU(_type, _name) = _initvalue
#define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \
DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue
-#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
+#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
EXPORT_PER_CPU_SYMBOL(_name)
-#define DECLARE_EARLY_PER_CPU(_type, _name) \
+#define DECLARE_EARLY_PER_CPU(_type, _name) \
DECLARE_PER_CPU(_type, _name)
-#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \
+#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \
DECLARE_PER_CPU_READ_MOSTLY(_type, _name)
-#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu)
-#define early_per_cpu_ptr(_name) NULL
+#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu)
+#define early_per_cpu_ptr(_name) NULL
/* no early_per_cpu_map() */
-#endif /* !CONFIG_SMP */
+#endif /* !CONFIG_SMP */
#endif /* _ASM_X86_PERCPU_H */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 3736b8a46c04..0ba8d20f2d1d 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -32,6 +32,8 @@
#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
#define ARCH_PERFMON_EVENTSEL_BR_CNTR (1ULL << 35)
+#define ARCH_PERFMON_EVENTSEL_EQ (1ULL << 36)
+#define ARCH_PERFMON_EVENTSEL_UMASK2 (0xFFULL << 40)
#define INTEL_FIXED_BITS_MASK 0xFULL
#define INTEL_FIXED_BITS_STRIDE 4
@@ -39,6 +41,7 @@
#define INTEL_FIXED_0_USER (1ULL << 1)
#define INTEL_FIXED_0_ANYTHREAD (1ULL << 2)
#define INTEL_FIXED_0_ENABLE_PMI (1ULL << 3)
+#define INTEL_FIXED_3_METRICS_CLEAR (1ULL << 2)
#define HSW_IN_TX (1ULL << 32)
#define HSW_IN_TX_CHECKPOINTED (1ULL << 33)
@@ -185,9 +188,33 @@ union cpuid10_edx {
* detection/enumeration details:
*/
#define ARCH_PERFMON_EXT_LEAF 0x00000023
-#define ARCH_PERFMON_NUM_COUNTER_LEAF_BIT 0x1
#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1
+union cpuid35_eax {
+ struct {
+ unsigned int leaf0:1;
+ /* Counters Sub-Leaf */
+ unsigned int cntr_subleaf:1;
+ /* Auto Counter Reload Sub-Leaf */
+ unsigned int acr_subleaf:1;
+ /* Events Sub-Leaf */
+ unsigned int events_subleaf:1;
+ unsigned int reserved:28;
+ } split;
+ unsigned int full;
+};
+
+union cpuid35_ebx {
+ struct {
+ /* UnitMask2 Supported */
+ unsigned int umask2:1;
+ /* EQ-bit Supported */
+ unsigned int eq:1;
+ unsigned int reserved:30;
+ } split;
+ unsigned int full;
+};
+
/*
* Intel Architectural LBR CPUID detection/enumeration details:
*/
@@ -307,6 +334,10 @@ struct x86_pmu_capability {
#define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3)
#define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS)
+/* TOPDOWN_BAD_SPECULATION.ALL: fixed counter 4 (Atom only) */
+/* TOPDOWN_FE_BOUND.ALL: fixed counter 5 (Atom only) */
+/* TOPDOWN_RETIRING.ALL: fixed counter 6 (Atom only) */
+
static inline bool use_fixed_pseudo_encoding(u64 code)
{
return !(code & 0xff);
@@ -364,6 +395,9 @@ static inline bool use_fixed_pseudo_encoding(u64 code)
#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_MEM_BOUND
#define INTEL_TD_METRIC_NUM 8
+#define INTEL_TD_CFG_METRIC_CLEAR_BIT 0
+#define INTEL_TD_CFG_METRIC_CLEAR BIT_ULL(INTEL_TD_CFG_METRIC_CLEAR_BIT)
+
static inline bool is_metric_idx(int idx)
{
return (unsigned)(idx - INTEL_PMC_IDX_METRIC_BASE) < INTEL_TD_METRIC_NUM;
@@ -414,7 +448,9 @@ static inline bool is_topdown_idx(int idx)
*/
struct pebs_basic {
- u64 format_size;
+ u64 format_group:32,
+ retire_latency:16,
+ format_size:16;
u64 ip;
u64 applicable_counters;
u64 tsc;
@@ -423,7 +459,17 @@ struct pebs_basic {
struct pebs_meminfo {
u64 address;
u64 aux;
- u64 latency;
+ union {
+ /* pre Alder Lake */
+ u64 mem_latency;
+ /* Alder Lake and later */
+ struct {
+ u64 instr_latency:16;
+ u64 pad2:16;
+ u64 cache_latency:16;
+ u64 pad3:16;
+ };
+ };
u64 tsx_tuning;
};
@@ -528,15 +574,17 @@ struct x86_perf_regs {
u64 *xmm_regs;
};
-extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
-extern unsigned long perf_misc_flags(struct pt_regs *regs);
-#define perf_misc_flags(regs) perf_misc_flags(regs)
+extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_arch_misc_flags(struct pt_regs *regs);
+extern unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs);
+#define perf_arch_misc_flags(regs) perf_arch_misc_flags(regs)
+#define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs)
#include <asm/stacktrace.h>
/*
- * We abuse bit 3 from flags to pass exact information, see perf_misc_flags
- * and the comment with PERF_EFLAGS_EXACT.
+ * We abuse bit 3 from flags to pass exact information, see
+ * perf_arch_misc_flags() and the comment with PERF_EFLAGS_EXACT.
*/
#define perf_arch_fetch_caller_regs(regs, __ip) { \
(regs)->ip = (__ip); \
@@ -555,6 +603,7 @@ struct x86_pmu_lbr {
unsigned int from;
unsigned int to;
unsigned int info;
+ bool has_callstack;
};
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 94de1a05aeba..d65e338b6a5f 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -181,7 +181,7 @@ static inline u64 p4_clear_ht_bit(u64 config)
static inline int p4_ht_active(void)
{
#ifdef CONFIG_SMP
- return smp_num_siblings > 1;
+ return __max_threads_per_core > 1;
#endif
return 0;
}
@@ -189,7 +189,7 @@ static inline int p4_ht_active(void)
static inline int p4_ht_thread(int cpu)
{
#ifdef CONFIG_SMP
- if (smp_num_siblings == 2)
+ if (__max_threads_per_core == 2)
return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map));
#endif
return 0;
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index c7ec5bb88334..dd4841231bb9 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -34,7 +34,7 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
*/
extern gfp_t __userpte_alloc_gfp;
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
/*
* Instead of one PGD, we acquire two PGDs. Being order-1, it is
* both 8k in size and 8k-aligned. That lets us just flip bit 12
@@ -147,24 +147,6 @@ static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4
set_pgd_safe(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
}
-static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
- gfp_t gfp = GFP_KERNEL_ACCOUNT;
-
- if (mm == &init_mm)
- gfp &= ~__GFP_ACCOUNT;
- return (p4d_t *)get_zeroed_page(gfp);
-}
-
-static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
-{
- if (!pgtable_l5_enabled())
- return;
-
- BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
- free_page((unsigned long)p4d);
-}
-
extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d);
static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h
index 7f6ccff0ba72..4a12c276b181 100644
--- a/arch/x86/include/asm/pgtable-2level_types.h
+++ b/arch/x86/include/asm/pgtable-2level_types.h
@@ -23,17 +23,17 @@ typedef union {
#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED
/*
- * traditional i386 two-level paging structure:
+ * Traditional i386 two-level paging structure:
*/
#define PGDIR_SHIFT 22
#define PTRS_PER_PGD 1024
-
/*
- * the i386 is two-level, so we don't really have any
- * PMD directory physically.
+ * The i386 is two-level, so we don't really have any
+ * PMD directory physically:
*/
+#define PTRS_PER_PMD 1
#define PTRS_PER_PTE 1024
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 9e7c0b719c3c..dabafba957ea 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -52,7 +52,7 @@ static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
static inline void native_set_pud(pud_t *pudp, pud_t pud)
{
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
pud.p4d.pgd = pti_set_user_pgtbl(&pudp->p4d.pgd, pud.p4d.pgd);
#endif
pxx_xchg64(pud, pudp, native_pud_val(pud));
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 9d077bca6a10..593f10aabd45 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -31,7 +31,8 @@ struct seq_file;
void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm);
void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
bool user);
-void ptdump_walk_pgd_level_checkwx(void);
+bool ptdump_walk_pgd_level_checkwx(void);
+#define ptdump_check_wx ptdump_walk_pgd_level_checkwx
void ptdump_walk_user_pgd_level_checkwx(void);
/*
@@ -41,10 +42,8 @@ void ptdump_walk_user_pgd_level_checkwx(void);
#define pgprot_decrypted(prot) __pgprot(cc_mkdec(pgprot_val(prot)))
#ifdef CONFIG_DEBUG_WX
-#define debug_checkwx() ptdump_walk_pgd_level_checkwx()
#define debug_checkwx_user() ptdump_walk_user_pgd_level_checkwx()
#else
-#define debug_checkwx() do { } while (0)
#define debug_checkwx_user() do { } while (0)
#endif
@@ -121,6 +120,34 @@ extern pmdval_t early_pmd_flags;
#define arch_end_context_switch(prev) do {} while(0)
#endif /* CONFIG_PARAVIRT_XXL */
+static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
+{
+ pmdval_t v = native_pmd_val(pmd);
+
+ return native_make_pmd(v | set);
+}
+
+static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
+{
+ pmdval_t v = native_pmd_val(pmd);
+
+ return native_make_pmd(v & ~clear);
+}
+
+static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
+{
+ pudval_t v = native_pud_val(pud);
+
+ return native_make_pud(v | set);
+}
+
+static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
+{
+ pudval_t v = native_pud_val(pud);
+
+ return native_make_pud(v & ~clear);
+}
+
/*
* The following only work if pte_present() is true.
* Undefined behaviour if not..
@@ -141,6 +168,11 @@ static inline int pte_young(pte_t pte)
return pte_flags(pte) & _PAGE_ACCESSED;
}
+static inline bool pte_decrypted(pte_t pte)
+{
+ return cc_mkdec(pte_val(pte)) == pte_val(pte);
+}
+
#define pmd_dirty pmd_dirty
static inline bool pmd_dirty(pmd_t pmd)
{
@@ -170,6 +202,13 @@ static inline int pud_young(pud_t pud)
return pud_flags(pud) & _PAGE_ACCESSED;
}
+static inline bool pud_shstk(pud_t pud)
+{
+ return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
+ (pud_flags(pud) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) ==
+ (_PAGE_DIRTY | _PAGE_PSE);
+}
+
static inline int pte_write(pte_t pte)
{
/*
@@ -235,6 +274,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
}
+#define pud_pfn pud_pfn
static inline unsigned long pud_pfn(pud_t pud)
{
phys_addr_t pfn = pud_val(pud);
@@ -252,8 +292,8 @@ static inline unsigned long pgd_pfn(pgd_t pgd)
return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT;
}
-#define p4d_leaf p4d_large
-static inline int p4d_large(p4d_t p4d)
+#define p4d_leaf p4d_leaf
+static inline bool p4d_leaf(p4d_t p4d)
{
/* No 512 GiB pages yet */
return 0;
@@ -261,14 +301,14 @@ static inline int p4d_large(p4d_t p4d)
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
-#define pmd_leaf pmd_large
-static inline int pmd_large(pmd_t pte)
+#define pmd_leaf pmd_leaf
+static inline bool pmd_leaf(pmd_t pte)
{
return pmd_flags(pte) & _PAGE_PSE;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-/* NOTE: when predicate huge page, consider also pmd_devmap, or use pmd_large */
+/* NOTE: when predicate huge page, consider also pmd_devmap, or use pmd_leaf */
static inline int pmd_trans_huge(pmd_t pmd)
{
return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
@@ -305,6 +345,30 @@ static inline int pud_devmap(pud_t pud)
}
#endif
+#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
+static inline bool pmd_special(pmd_t pmd)
+{
+ return pmd_flags(pmd) & _PAGE_SPECIAL;
+}
+
+static inline pmd_t pmd_mkspecial(pmd_t pmd)
+{
+ return pmd_set_flags(pmd, _PAGE_SPECIAL);
+}
+#endif /* CONFIG_ARCH_SUPPORTS_PMD_PFNMAP */
+
+#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
+static inline bool pud_special(pud_t pud)
+{
+ return pud_flags(pud) & _PAGE_SPECIAL;
+}
+
+static inline pud_t pud_mkspecial(pud_t pud)
+{
+ return pud_set_flags(pud, _PAGE_SPECIAL);
+}
+#endif /* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */
+
static inline int pgd_devmap(pgd_t pgd)
{
return 0;
@@ -388,23 +452,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
static inline int pte_uffd_wp(pte_t pte)
{
- bool wp = pte_flags(pte) & _PAGE_UFFD_WP;
-
-#ifdef CONFIG_DEBUG_VM
- /*
- * Having write bit for wr-protect-marked present ptes is fatal,
- * because it means the uffd-wp bit will be ignored and write will
- * just go through.
- *
- * Use any chance of pgtable walking to verify this (e.g., when
- * page swapped out or being migrated for all purposes). It means
- * something is already wrong. Tell the admin even before the
- * process crashes. We also nail it with wrong pgtable setup.
- */
- WARN_ON_ONCE(wp && pte_write(pte));
-#endif
-
- return wp;
+ return pte_flags(pte) & _PAGE_UFFD_WP;
}
static inline pte_t pte_mkuffd_wp(pte_t pte)
@@ -491,20 +539,6 @@ static inline pte_t pte_mkdevmap(pte_t pte)
return pte_set_flags(pte, _PAGE_SPECIAL|_PAGE_DEVMAP);
}
-static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
-{
- pmdval_t v = native_pmd_val(pmd);
-
- return native_make_pmd(v | set);
-}
-
-static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
-{
- pmdval_t v = native_pmd_val(pmd);
-
- return native_make_pmd(v & ~clear);
-}
-
/* See comments above mksaveddirty_shift() */
static inline pmd_t pmd_mksaveddirty(pmd_t pmd)
{
@@ -599,20 +633,6 @@ static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
#define pmd_mkwrite pmd_mkwrite
-static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
-{
- pudval_t v = native_pud_val(pud);
-
- return native_make_pud(v | set);
-}
-
-static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
-{
- pudval_t v = native_pud_val(pud);
-
- return native_make_pud(v & ~clear);
-}
-
/* See comments above mksaveddirty_shift() */
static inline pud_t pud_mksaveddirty(pud_t pud)
{
@@ -791,6 +811,12 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd)
__pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
}
+static inline pud_t pud_mkinvalid(pud_t pud)
+{
+ return pfn_pud(pud_pfn(pud),
+ __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
+}
+
static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
@@ -838,14 +864,8 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
pmd_result = __pmd(val);
/*
- * To avoid creating Write=0,Dirty=1 PMDs, pte_modify() needs to avoid:
- * 1. Marking Write=0 PMDs Dirty=1
- * 2. Marking Dirty=1 PMDs Write=0
- *
- * The first case cannot happen because the _PAGE_CHG_MASK will filter
- * out any Dirty bit passed in newprot. Handle the second case by
- * going through the mksaveddirty exercise. Only do this if the old
- * value was Write=1 to avoid doing this on Shadow Stack PTEs.
+ * Avoid creating shadow stack PMD by accident. See comment in
+ * pte_modify().
*/
if (oldval & _PAGE_RW)
pmd_result = pmd_mksaveddirty(pmd_result);
@@ -855,6 +875,29 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
return pmd_result;
}
+static inline pud_t pud_modify(pud_t pud, pgprot_t newprot)
+{
+ pudval_t val = pud_val(pud), oldval = val;
+ pud_t pud_result;
+
+ val &= _HPAGE_CHG_MASK;
+ val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
+ val = flip_protnone_guard(oldval, val, PHYSICAL_PUD_PAGE_MASK);
+
+ pud_result = __pud(val);
+
+ /*
+ * Avoid creating shadow stack PUD by accident. See comment in
+ * pte_modify().
+ */
+ if (oldval & _PAGE_RW)
+ pud_result = pud_mksaveddirty(pud_result);
+ else
+ pud_result = pud_clear_saveddirty(pud_result);
+
+ return pud_result;
+}
+
/*
* mprotect needs to preserve PAT and encryption bits when updating
* vm_page_prot
@@ -909,7 +952,7 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
pmd_t *populate_extra_pmd(unsigned long vaddr);
pte_t *populate_extra_pte(unsigned long vaddr);
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd);
/*
@@ -923,12 +966,12 @@ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
return pgd;
return __pti_set_user_pgtbl(pgdp, pgd);
}
-#else /* CONFIG_PAGE_TABLE_ISOLATION */
+#else /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */
static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
{
return pgd;
}
-#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+#endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */
#endif /* __ASSEMBLY__ */
@@ -956,13 +999,13 @@ static inline int pte_same(pte_t a, pte_t b)
return a.pte == b.pte;
}
-static inline pte_t pte_next_pfn(pte_t pte)
+static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
{
if (__pte_needs_invert(pte_val(pte)))
- return __pte(pte_val(pte) - (1UL << PFN_PTE_SHIFT));
- return __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT));
+ return __pte(pte_val(pte) - (nr << PFN_PTE_SHIFT));
+ return __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT));
}
-#define pte_next_pfn pte_next_pfn
+#define pte_advance_pfn pte_advance_pfn
static inline int pte_present(pte_t a)
{
@@ -1086,23 +1129,16 @@ static inline pmd_t *pud_pgtable(pud_t pud)
*/
#define pud_page(pud) pfn_to_page(pud_pfn(pud))
-#define pud_leaf pud_large
-static inline int pud_large(pud_t pud)
+#define pud_leaf pud_leaf
+static inline bool pud_leaf(pud_t pud)
{
- return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
- (_PAGE_PSE | _PAGE_PRESENT);
+ return pud_val(pud) & _PAGE_PSE;
}
static inline int pud_bad(pud_t pud)
{
return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
}
-#else
-#define pud_leaf pud_large
-static inline int pud_large(pud_t pud)
-{
- return 0;
-}
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
#if CONFIG_PGTABLE_LEVELS > 3
@@ -1131,7 +1167,7 @@ static inline int p4d_bad(p4d_t p4d)
{
unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
- if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+ if (IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION))
ignore_flags |= _PAGE_NX;
return (p4d_flags(p4d) & ~ignore_flags) != 0;
@@ -1177,7 +1213,7 @@ static inline int pgd_bad(pgd_t pgd)
if (!pgtable_l5_enabled())
return 0;
- if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+ if (IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION))
ignore_flags |= _PAGE_NX;
return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
@@ -1207,7 +1243,6 @@ static inline int pgd_none(pgd_t pgd)
extern int direct_gbpages;
void init_mem_mapping(void);
void early_alloc_pgt_buf(void);
-extern void memblock_find_dma_reserve(void);
void __init poking_init(void);
unsigned long init_memory_mapping(unsigned long start,
unsigned long end, pgprot_t prot);
@@ -1401,10 +1436,28 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
}
#endif
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static inline pud_t pudp_establish(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp, pud_t pud)
+{
+ page_table_check_pud_set(vma->vm_mm, pudp, pud);
+ if (IS_ENABLED(CONFIG_SMP)) {
+ return xchg(pudp, pud);
+ } else {
+ pud_t old = *pudp;
+ WRITE_ONCE(*pudp, pud);
+ return old;
+ }
+}
+#endif
+
#define __HAVE_ARCH_PMDP_INVALIDATE_AD
extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
+pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp);
+
/*
* Page table pages are page-aligned. The lower half of the top
* level is used for userspace and the top half for the kernel.
@@ -1419,12 +1472,12 @@ static inline bool pgdp_maps_userspace(void *__ptr)
return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START);
}
-#define pgd_leaf pgd_large
-static inline int pgd_large(pgd_t pgd) { return 0; }
+#define pgd_leaf pgd_leaf
+static inline bool pgd_leaf(pgd_t pgd) { return false; }
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
/*
- * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
+ * All top-level MITIGATION_PAGE_TABLE_ISOLATION page tables are order-1 pages
* (8k-aligned and 8k in size). The kernel one is at the beginning 4k and
* the user one is in the last 4k. To switch between them, you
* just need to flip the 12th bit in their addresses.
@@ -1469,7 +1522,7 @@ static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
{
return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
}
-#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+#endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */
/*
* clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
@@ -1484,7 +1537,7 @@ static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
{
memcpy(dst, src, count * sizeof(pgd_t));
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
if (!static_cpu_has(X86_FEATURE_PTI))
return;
/* Clone the user space pgd as well */
@@ -1686,6 +1739,9 @@ void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte);
#define arch_check_zapped_pmd arch_check_zapped_pmd
void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd);
+#define arch_check_zapped_pud arch_check_zapped_pud
+void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud);
+
#ifdef CONFIG_XEN_PV
#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young
static inline bool arch_has_hw_nonleaf_pmd_young(void)
@@ -1719,6 +1775,43 @@ bool arch_is_platform_page(u64 paddr);
#define arch_is_platform_page arch_is_platform_page
#endif
+/*
+ * Use set_p*_safe(), and elide TLB flushing, when confident that *no*
+ * TLB flush will be required as a result of the "set". For example, use
+ * in scenarios where it is known ahead of time that the routine is
+ * setting non-present entries, or re-setting an existing entry to the
+ * same value. Otherwise, use the typical "set" helpers and flush the
+ * TLB.
+ */
+#define set_pte_safe(ptep, pte) \
+({ \
+ WARN_ON_ONCE(pte_present(*ptep) && !pte_same(*ptep, pte)); \
+ set_pte(ptep, pte); \
+})
+
+#define set_pmd_safe(pmdp, pmd) \
+({ \
+ WARN_ON_ONCE(pmd_present(*pmdp) && !pmd_same(*pmdp, pmd)); \
+ set_pmd(pmdp, pmd); \
+})
+
+#define set_pud_safe(pudp, pud) \
+({ \
+ WARN_ON_ONCE(pud_present(*pudp) && !pud_same(*pudp, pud)); \
+ set_pud(pudp, pud); \
+})
+
+#define set_p4d_safe(p4dp, p4d) \
+({ \
+ WARN_ON_ONCE(p4d_present(*p4dp) && !p4d_same(*p4dp, p4d)); \
+ set_p4d(p4dp, p4d); \
+})
+
+#define set_pgd_safe(pgdp, pgd) \
+({ \
+ WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \
+ set_pgd(pgdp, pgd); \
+})
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PGTABLE_H */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 24af25b1551a..d1426b64c1b9 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -143,7 +143,8 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
{
pgd_t pgd;
- if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
+ if (pgtable_l5_enabled() ||
+ !IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION)) {
WRITE_ONCE(*p4dp, p4d);
return;
}
@@ -269,5 +270,26 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
#include <asm/pgtable-invert.h>
-#endif /* !__ASSEMBLY__ */
+#else /* __ASSEMBLY__ */
+
+#define l4_index(x) (((x) >> 39) & 511)
+#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
+L4_START_KERNEL = l4_index(__START_KERNEL_map)
+
+L3_START_KERNEL = pud_index(__START_KERNEL_map)
+
+#define SYM_DATA_START_PAGE_ALIGNED(name) \
+ SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE)
+
+/* Automate the creation of 1 to 1 mapping pmd entries */
+#define PMDS(START, PERM, COUNT) \
+ i = 0 ; \
+ .rept (COUNT) ; \
+ .quad (START) + (i << PMD_SHIFT) + (PERM) ; \
+ i = i + 1 ; \
+ .endr
+
+#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PGTABLE_64_H */
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 38b54b992f32..ec68f8369bdc 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -21,9 +21,9 @@ typedef unsigned long pgprotval_t;
typedef struct { pteval_t pte; } pte_t;
typedef struct { pmdval_t pmd; } pmd_t;
-#ifdef CONFIG_X86_5LEVEL
extern unsigned int __pgtable_l5_enabled;
+#ifdef CONFIG_X86_5LEVEL
#ifdef USE_EARLY_PGTABLE_L5
/*
* cpu_feature_enabled() is not available in early boot code.
@@ -140,6 +140,10 @@ extern unsigned int ptrs_per_p4d;
# define VMEMMAP_START __VMEMMAP_BASE_L4
#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
+#ifdef CONFIG_RANDOMIZE_MEMORY
+# define DIRECT_MAP_PHYSMEM_END direct_map_physmem_end
+#endif
+
/*
* End of the region for which vmalloc page tables are pre-allocated.
* For non-KMSAN builds, this is the same as VMALLOC_END.
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 0b748ee16b3d..4b804531b03c 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -36,10 +36,12 @@
#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4
#ifdef CONFIG_X86_64
-#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit */
+#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit (leaf) */
+#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW5 /* No PTI shadow (root PGD) */
#else
/* Shared with _PAGE_BIT_UFFD_WP which is not supported on 32 bit */
-#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit */
+#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit (leaf) */
+#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW2 /* No PTI shadow (root PGD) */
#endif
/* If _PAGE_BIT_PRESENT is clear, we use these: */
@@ -139,6 +141,8 @@
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
+#define _PAGE_NOPTISHADOW (_AT(pteval_t, 1) << _PAGE_BIT_NOPTISHADOW)
+
/*
* Set of bits not changed in pte_modify. The pte's
* protection key is treated like _PAGE_RW, for
@@ -148,7 +152,7 @@
#define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
_PAGE_SPECIAL | _PAGE_ACCESSED | \
_PAGE_DIRTY_BITS | _PAGE_SOFT_DIRTY | \
- _PAGE_DEVMAP | _PAGE_ENC | _PAGE_UFFD_WP)
+ _PAGE_DEVMAP | _PAGE_CC | _PAGE_UFFD_WP)
#define _PAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PAT)
#define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE)
@@ -173,6 +177,7 @@ enum page_cache_mode {
};
#endif
+#define _PAGE_CC (_AT(pteval_t, cc_mask))
#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
#define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)
@@ -516,8 +521,6 @@ typedef struct page *pgtable_t;
extern pteval_t __supported_pte_mask;
extern pteval_t __default_kernel_pte_mask;
-extern void set_nx(void);
-extern int nx_enabled;
#define pgprot_writecombine pgprot_writecombine
extern pgprot_t pgprot_writecombine(pgprot_t prot);
@@ -548,6 +551,7 @@ enum pg_level {
PG_LEVEL_2M,
PG_LEVEL_1G,
PG_LEVEL_512G,
+ PG_LEVEL_256T,
PG_LEVEL_NUM
};
@@ -566,6 +570,8 @@ static inline void update_page_count(int level, unsigned long pages) { }
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
unsigned int *level);
+pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
+ unsigned int *level, bool *nx, bool *rw);
extern pmd_t *lookup_pmd_address(unsigned long address);
extern phys_addr_t slow_virt_to_phys(void *__address);
extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn,
diff --git a/arch/x86/include/asm/posted_intr.h b/arch/x86/include/asm/posted_intr.h
new file mode 100644
index 000000000000..de788b400fba
--- /dev/null
+++ b/arch/x86/include/asm/posted_intr.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _X86_POSTED_INTR_H
+#define _X86_POSTED_INTR_H
+#include <asm/irq_vectors.h>
+
+#define POSTED_INTR_ON 0
+#define POSTED_INTR_SN 1
+
+#define PID_TABLE_ENTRY_VALID 1
+
+/* Posted-Interrupt Descriptor */
+struct pi_desc {
+ union {
+ u32 pir[8]; /* Posted interrupt requested */
+ u64 pir64[4];
+ };
+ union {
+ struct {
+ u16 notifications; /* Suppress and outstanding bits */
+ u8 nv;
+ u8 rsvd_2;
+ u32 ndst;
+ };
+ u64 control;
+ };
+ u32 rsvd[6];
+} __aligned(64);
+
+static inline bool pi_test_and_set_on(struct pi_desc *pi_desc)
+{
+ return test_and_set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc)
+{
+ return test_and_clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_and_clear_sn(struct pi_desc *pi_desc)
+{
+ return test_and_clear_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
+{
+ return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
+}
+
+static inline bool pi_is_pir_empty(struct pi_desc *pi_desc)
+{
+ return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS);
+}
+
+static inline void pi_set_sn(struct pi_desc *pi_desc)
+{
+ set_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_set_on(struct pi_desc *pi_desc)
+{
+ set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_clear_on(struct pi_desc *pi_desc)
+{
+ clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_clear_sn(struct pi_desc *pi_desc)
+{
+ clear_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_on(struct pi_desc *pi_desc)
+{
+ return test_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_sn(struct pi_desc *pi_desc)
+{
+ return test_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
+}
+
+/* Non-atomic helpers */
+static inline void __pi_set_sn(struct pi_desc *pi_desc)
+{
+ pi_desc->notifications |= BIT(POSTED_INTR_SN);
+}
+
+static inline void __pi_clear_sn(struct pi_desc *pi_desc)
+{
+ pi_desc->notifications &= ~BIT(POSTED_INTR_SN);
+}
+
+#ifdef CONFIG_X86_POSTED_MSI
+/*
+ * Not all external vectors are subject to interrupt remapping, e.g. IOMMU's
+ * own interrupts. Here we do not distinguish them since those vector bits in
+ * PIR will always be zero.
+ */
+static inline bool pi_pending_this_cpu(unsigned int vector)
+{
+ struct pi_desc *pid = this_cpu_ptr(&posted_msi_pi_desc);
+
+ if (WARN_ON_ONCE(vector > NR_VECTORS || vector < FIRST_EXTERNAL_VECTOR))
+ return false;
+
+ return test_bit(vector, (unsigned long *)pid->pir);
+}
+
+extern void intel_posted_msi_init(void);
+#else
+static inline bool pi_pending_this_cpu(unsigned int vector) { return false; }
+
+static inline void intel_posted_msi_init(void) {};
+#endif /* X86_POSTED_MSI */
+
+#endif /* _X86_POSTED_INTR_H */
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index af77235fded6..919909d8cb77 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -91,7 +91,7 @@ static __always_inline void __preempt_count_sub(int val)
*/
static __always_inline bool __preempt_count_dec_and_test(void)
{
- return GEN_UNARY_RMWcc("decl", pcpu_hot.preempt_count, e,
+ return GEN_UNARY_RMWcc("decl", __my_cpu_var(pcpu_hot.preempt_count), e,
__percpu_arg([var]));
}
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index d8cccadc83a6..e5f204b9b33d 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -51,7 +51,7 @@
#define CR3_NOFLUSH 0
#endif
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
# define X86_CR3_PTI_PCID_USER_BIT 11
#endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 26620d7642a9..c0cd10182e90 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -20,7 +20,6 @@ struct vm86;
#include <asm/page.h>
#include <asm/pgtable_types.h>
#include <asm/percpu.h>
-#include <asm/msr.h>
#include <asm/desc_defs.h>
#include <asm/nops.h>
#include <asm/special_insns.h>
@@ -99,16 +98,52 @@ struct cpuinfo_topology {
// Logical ID mappings
u32 logical_pkg_id;
u32 logical_die_id;
+ u32 logical_core_id;
+
+ // AMD Node ID and Nodes per Package info
+ u32 amd_node_id;
// Cache level topology IDs
u32 llc_id;
u32 l2c_id;
+
+ // Hardware defined CPU-type
+ union {
+ u32 cpu_type;
+ struct {
+ // CPUID.1A.EAX[23-0]
+ u32 intel_native_model_id :24;
+ // CPUID.1A.EAX[31-24]
+ u32 intel_type :8;
+ };
+ struct {
+ // CPUID 0x80000026.EBX
+ u32 amd_num_processors :16,
+ amd_power_eff_ranking :8,
+ amd_native_model_id :4,
+ amd_type :4;
+ };
+ };
};
struct cpuinfo_x86 {
- __u8 x86; /* CPU family */
- __u8 x86_vendor; /* CPU vendor */
- __u8 x86_model;
+ union {
+ /*
+ * The particular ordering (low-to-high) of (vendor,
+ * family, model) is done in case range of models, like
+ * it is usually done on AMD, need to be compared.
+ */
+ struct {
+ __u8 x86_model;
+ /* CPU family */
+ __u8 x86;
+ /* CPU vendor */
+ __u8 x86_vendor;
+ __u8 x86_reserved;
+ };
+ /* combined vendor, family, model */
+ __u32 x86_vfm;
+ };
__u8 x86_stepping;
#ifdef CONFIG_X86_64
/* Number of 4K pages in DTLB/ITLB combined(in pages): */
@@ -119,8 +154,6 @@ struct cpuinfo_x86 {
#endif
__u8 x86_virt_bits;
__u8 x86_phys_bits;
- /* CPUID returned core id bits: */
- __u8 x86_coreid_bits;
/* Max extended CPUID function supported: */
__u32 extended_cpuid_level;
/* Maximum supported CPUID level, -1=no CPUID: */
@@ -148,8 +181,6 @@ struct cpuinfo_x86 {
unsigned long loops_per_jiffy;
/* protected processor identification number */
u64 ppin;
- /* cpuid returned max cores value: */
- u16 x86_max_cores;
u16 x86_clflush_size;
/* number of cores as seen by the OS: */
u16 booted_cores;
@@ -186,13 +217,8 @@ extern struct cpuinfo_x86 new_cpu_data;
extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
-#ifdef CONFIG_SMP
DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
#define cpu_data(cpu) per_cpu(cpu_info, cpu)
-#else
-#define cpu_info boot_cpu_data
-#define cpu_data(cpu) boot_cpu_data
-#endif
extern const struct seq_operations cpuinfo_op;
@@ -205,6 +231,8 @@ static inline unsigned long long l1tf_pfn_limit(void)
return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT);
}
+void init_cpu_devs(void);
+void get_cpu_vendor(struct cpuinfo_x86 *c);
extern void early_cpu_init(void);
extern void identify_secondary_cpu(struct cpuinfo_x86 *);
extern void print_cpu_info(struct cpuinfo_x86 *);
@@ -479,7 +507,6 @@ struct thread_struct {
unsigned long iopl_emul;
unsigned int iopl_warn:1;
- unsigned int sig_on_uaccess_err:1;
/*
* Protection Keys Register for Userspace. Loaded immediately on
@@ -533,6 +560,9 @@ static __always_inline unsigned long current_top_of_stack(void)
* and around vm86 mode and sp0 on x86_64 is special because of the
* entry trampoline.
*/
+ if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT))
+ return this_cpu_read_const(const_pcpu_hot.top_of_stack);
+
return this_cpu_read_stable(pcpu_hot.top_of_stack);
}
@@ -555,7 +585,7 @@ static inline void load_sp0(unsigned long sp0)
unsigned long __get_wchan(struct task_struct *p);
-extern void select_idle_routine(const struct cpuinfo_x86 *c);
+extern void select_idle_routine(void);
extern void amd_e400_c1e_apic_setup(void);
extern unsigned long boot_option_idle_override;
@@ -573,31 +603,10 @@ extern void switch_gdt_and_percpu_base(int);
extern void load_direct_gdt(int);
extern void load_fixmap_gdt(int);
extern void cpu_init(void);
-extern void cpu_init_exception_handling(void);
+extern void cpu_init_exception_handling(bool boot_cpu);
+extern void cpu_init_replace_early_idt(void);
extern void cr4_init(void);
-static inline unsigned long get_debugctlmsr(void)
-{
- unsigned long debugctlmsr = 0;
-
-#ifndef CONFIG_X86_DEBUGCTLMSR
- if (boot_cpu_data.x86 < 6)
- return 0;
-#endif
- rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
-
- return debugctlmsr;
-}
-
-static inline void update_debugctlmsr(unsigned long debugctlmsr)
-{
-#ifndef CONFIG_X86_DEBUGCTLMSR
- if (boot_cpu_data.x86 < 6)
- return;
-#endif
- wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
-}
-
extern void set_task_blockstep(struct task_struct *task, bool on);
/* Boot loader type from the setup header: */
@@ -613,7 +622,7 @@ extern char ignore_fpu_irq;
# define BASE_PREFETCH ""
# define ARCH_HAS_PREFETCH
#else
-# define BASE_PREFETCH "prefetcht0 %P1"
+# define BASE_PREFETCH "prefetcht0 %1"
#endif
/*
@@ -624,7 +633,7 @@ extern char ignore_fpu_irq;
*/
static inline void prefetch(const void *x)
{
- alternative_input(BASE_PREFETCH, "prefetchnta %P1",
+ alternative_input(BASE_PREFETCH, "prefetchnta %1",
X86_FEATURE_XMM,
"m" (*(const char *)x));
}
@@ -636,7 +645,7 @@ static inline void prefetch(const void *x)
*/
static __always_inline void prefetchw(const void *x)
{
- alternative_input(BASE_PREFETCH, "prefetchw %P1",
+ alternative_input(BASE_PREFETCH, "prefetchw %1",
X86_FEATURE_3DNOWPREFETCH,
"m" (*(const char *)x));
}
@@ -662,10 +671,10 @@ static __always_inline void prefetchw(const void *x)
#define KSTK_ESP(task) (task_pt_regs(task)->sp)
#else
-extern unsigned long __end_init_task[];
+extern unsigned long __top_init_kernel_stack[];
-#define INIT_THREAD { \
- .sp = (unsigned long)&__end_init_task - sizeof(struct pt_regs), \
+#define INIT_THREAD { \
+ .sp = (unsigned long)&__top_init_kernel_stack, \
}
extern unsigned long KSTK_ESP(struct task_struct *task);
@@ -704,13 +713,18 @@ static inline u32 per_cpu_l2c_id(unsigned int cpu)
}
#ifdef CONFIG_CPU_SUP_AMD
-extern u32 amd_get_nodes_per_socket(void);
-extern u32 amd_get_highest_perf(void);
-extern void amd_clear_divider(void);
+/*
+ * Issue a DIV 0/1 insn to clear any division data from previous DIV
+ * operations.
+ */
+static __always_inline void amd_clear_divider(void)
+{
+ asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0)
+ :: "a" (0), "d" (0), "r" (1));
+}
+
extern void amd_check_microcode(void);
#else
-static inline u32 amd_get_nodes_per_socket(void) { return 0; }
-static inline u32 amd_get_highest_perf(void) { return 0; }
static inline void amd_clear_divider(void) { }
static inline void amd_check_microcode(void) { }
#endif
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index 65dee2420624..365798cb4408 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -23,19 +23,14 @@ extern int of_ioapic;
extern u64 initial_dtb;
extern void add_dtb(u64 data);
void x86_of_pci_init(void);
-void x86_dtb_init(void);
+void x86_flattree_get_config(void);
#else
static inline void add_dtb(u64 data) { }
static inline void x86_of_pci_init(void) { }
-static inline void x86_dtb_init(void) { }
+static inline void x86_flattree_get_config(void) { }
#define of_ioapic 0
#endif
-#ifdef CONFIG_OF_EARLY_FLATTREE
-void x86_flattree_get_config(void);
-#else
-static inline void x86_flattree_get_config(void) { }
-#endif
extern char cmd_line[COMMAND_LINE_SIZE];
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
index 07375b476c4f..ab167c96b9ab 100644
--- a/arch/x86/include/asm/pti.h
+++ b/arch/x86/include/asm/pti.h
@@ -3,7 +3,7 @@
#define _ASM_X86_PTI_H
#ifndef __ASSEMBLY__
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
extern void pti_init(void);
extern void pti_check_boottime_disable(void);
extern void pti_finalize(void);
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index f4db78b09c8f..5a83fbd9bc0b 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -56,18 +56,64 @@ struct pt_regs {
#else /* __i386__ */
+struct fred_cs {
+ /* CS selector */
+ u64 cs : 16,
+ /* Stack level at event time */
+ sl : 2,
+ /* IBT in WAIT_FOR_ENDBRANCH state */
+ wfe : 1,
+ : 45;
+};
+
+struct fred_ss {
+ /* SS selector */
+ u64 ss : 16,
+ /* STI state */
+ sti : 1,
+ /* Set if syscall, sysenter or INT n */
+ swevent : 1,
+ /* Event is NMI type */
+ nmi : 1,
+ : 13,
+ /* Event vector */
+ vector : 8,
+ : 8,
+ /* Event type */
+ type : 4,
+ : 4,
+ /* Event was incident to enclave execution */
+ enclave : 1,
+ /* CPU was in long mode */
+ lm : 1,
+ /*
+ * Nested exception during FRED delivery, not set
+ * for #DF.
+ */
+ nested : 1,
+ : 1,
+ /*
+ * The length of the instruction causing the event.
+ * Only set for INTO, INT1, INT3, INT n, SYSCALL
+ * and SYSENTER. 0 otherwise.
+ */
+ insnlen : 4;
+};
+
struct pt_regs {
-/*
- * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
- * unless syscall needs a complete, fully filled "struct pt_regs".
- */
+ /*
+ * C ABI says these regs are callee-preserved. They aren't saved on
+ * kernel entry unless syscall needs a complete, fully filled
+ * "struct pt_regs".
+ */
unsigned long r15;
unsigned long r14;
unsigned long r13;
unsigned long r12;
unsigned long bp;
unsigned long bx;
-/* These regs are callee-clobbered. Always saved on kernel entry. */
+
+ /* These regs are callee-clobbered. Always saved on kernel entry. */
unsigned long r11;
unsigned long r10;
unsigned long r9;
@@ -77,18 +123,50 @@ struct pt_regs {
unsigned long dx;
unsigned long si;
unsigned long di;
-/*
- * On syscall entry, this is syscall#. On CPU exception, this is error code.
- * On hw interrupt, it's IRQ number:
- */
+
+ /*
+ * orig_ax is used on entry for:
+ * - the syscall number (syscall, sysenter, int80)
+ * - error_code stored by the CPU on traps and exceptions
+ * - the interrupt number for device interrupts
+ *
+ * A FRED stack frame starts here:
+ * 1) It _always_ includes an error code;
+ *
+ * 2) The return frame for ERET[US] starts here, but
+ * the content of orig_ax is ignored.
+ */
unsigned long orig_ax;
-/* Return frame for iretq */
+
+ /* The IRETQ return frame starts here */
unsigned long ip;
- unsigned long cs;
+
+ union {
+ /* CS selector */
+ u16 cs;
+ /* The extended 64-bit data slot containing CS */
+ u64 csx;
+ /* The FRED CS extension */
+ struct fred_cs fred_cs;
+ };
+
unsigned long flags;
unsigned long sp;
- unsigned long ss;
-/* top of stack page */
+
+ union {
+ /* SS selector */
+ u16 ss;
+ /* The extended 64-bit data slot containing SS */
+ u64 ssx;
+ /* The FRED SS extension */
+ struct fred_ss fred_ss;
+ };
+
+ /*
+ * Top of stack on IDT systems, while FRED systems have extra fields
+ * defined above for storing exception related information, e.g. CR2 or
+ * DR6.
+ */
};
#endif /* !__i386__ */
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 0c92db84469d..6e4f8fae3ce9 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -5,6 +5,7 @@
#include <asm/clocksource.h>
#include <asm/pvclock-abi.h>
+struct timespec64;
/* some helper functions for xen and kvm pv clock sources */
u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
u64 pvclock_clocksource_read_nowd(struct pvclock_vcpu_time_info *src);
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index cde8357bb226..68da67df304d 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -66,13 +66,15 @@ static inline bool vcpu_is_preempted(long cpu)
#ifdef CONFIG_PARAVIRT
/*
- * virt_spin_lock_key - enables (by default) the virt_spin_lock() hijack.
+ * virt_spin_lock_key - disables by default the virt_spin_lock() hijack.
*
- * Native (and PV wanting native due to vCPU pinning) should disable this key.
- * It is done in this backwards fashion to only have a single direction change,
- * which removes ordering between native_pv_spin_init() and HV setup.
+ * Native (and PV wanting native due to vCPU pinning) should keep this key
+ * disabled. Native does not touch the key.
+ *
+ * When in a guest then native_pv_lock_init() enables the key first and
+ * KVM/XEN might conditionally disable it later in the boot process again.
*/
-DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key);
+DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key);
/*
* Shortcut for the queued_spin_lock_slowpath() function that allows
@@ -85,6 +87,8 @@ DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key);
#define virt_spin_lock virt_spin_lock
static inline bool virt_spin_lock(struct qspinlock *lock)
{
+ int val;
+
if (!static_branch_likely(&virt_spin_lock_key))
return false;
@@ -94,10 +98,13 @@ static inline bool virt_spin_lock(struct qspinlock *lock)
* horrible lock 'holder' preemption issues.
*/
- do {
- while (atomic_read(&lock->val) != 0)
- cpu_relax();
- } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0);
+ __retry:
+ val = atomic_read(&lock->val);
+
+ if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) {
+ cpu_relax();
+ goto __retry;
+ }
return true;
}
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
index ef9697f20129..0a985784be9b 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -25,9 +25,9 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
*
* void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock)
* {
- * u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0);
+ * u8 lockval = _Q_LOCKED_VAL;
*
- * if (likely(lockval == _Q_LOCKED_VAL))
+ * if (try_cmpxchg(&lock->locked, &lockval, 0))
* return;
* pv_queued_spin_unlock_slowpath(lock, lockval);
* }
@@ -40,10 +40,9 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
#define PV_UNLOCK_ASM \
FRAME_BEGIN \
"push %rdx\n\t" \
- "mov $0x1,%eax\n\t" \
+ "mov $" __stringify(_Q_LOCKED_VAL) ",%eax\n\t" \
"xor %edx,%edx\n\t" \
LOCK_PREFIX "cmpxchg %dl,(%rdi)\n\t" \
- "cmp $0x1,%al\n\t" \
"jne .slowpath\n\t" \
"pop %rdx\n\t" \
FRAME_END \
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 6536873f8fc0..ecd58ea9a837 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -25,14 +25,16 @@ void __noreturn machine_real_restart(unsigned int type);
#define MRR_BIOS 0
#define MRR_APM 1
-#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
typedef void (cpu_emergency_virt_cb)(void);
+#if IS_ENABLED(CONFIG_KVM_X86)
void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback);
void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback);
void cpu_emergency_disable_virtualization(void);
#else
+static inline void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) {}
+static inline void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) {}
static inline void cpu_emergency_disable_virtualization(void) {}
-#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */
+#endif /* CONFIG_KVM_X86 */
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index 7ba1726b71c7..e9187ddd3d1f 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -99,6 +99,7 @@
#define REQUIRED_MASK18 0
#define REQUIRED_MASK19 0
#define REQUIRED_MASK20 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
+#define REQUIRED_MASK21 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index 255a78d9d906..8b1b6ce1e51b 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -7,6 +7,13 @@
#include <linux/sched.h>
#include <linux/jump_label.h>
+/*
+ * This value can never be a valid CLOSID, and is used when mapping a
+ * (closid, rmid) pair to an index and back. On x86 only the RMID is
+ * needed. The index is a software defined value.
+ */
+#define X86_RESCTRL_EMPTY_CLOSID ((u32)~0)
+
/**
* struct resctrl_pqr_state - State cache for the PQR MSR
* @cur_rmid: The cached Resource Monitoring ID
@@ -31,10 +38,47 @@ struct resctrl_pqr_state {
DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state);
+extern bool rdt_alloc_capable;
+extern bool rdt_mon_capable;
+
DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
+static inline bool resctrl_arch_alloc_capable(void)
+{
+ return rdt_alloc_capable;
+}
+
+static inline void resctrl_arch_enable_alloc(void)
+{
+ static_branch_enable_cpuslocked(&rdt_alloc_enable_key);
+ static_branch_inc_cpuslocked(&rdt_enable_key);
+}
+
+static inline void resctrl_arch_disable_alloc(void)
+{
+ static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
+ static_branch_dec_cpuslocked(&rdt_enable_key);
+}
+
+static inline bool resctrl_arch_mon_capable(void)
+{
+ return rdt_mon_capable;
+}
+
+static inline void resctrl_arch_enable_mon(void)
+{
+ static_branch_enable_cpuslocked(&rdt_mon_enable_key);
+ static_branch_inc_cpuslocked(&rdt_enable_key);
+}
+
+static inline void resctrl_arch_disable_mon(void)
+{
+ static_branch_disable_cpuslocked(&rdt_mon_enable_key);
+ static_branch_dec_cpuslocked(&rdt_enable_key);
+}
+
/*
* __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR
*
@@ -88,12 +132,52 @@ static inline unsigned int resctrl_arch_round_mon_val(unsigned int val)
return val * scale;
}
+static inline void resctrl_arch_set_closid_rmid(struct task_struct *tsk,
+ u32 closid, u32 rmid)
+{
+ WRITE_ONCE(tsk->closid, closid);
+ WRITE_ONCE(tsk->rmid, rmid);
+}
+
+static inline bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid)
+{
+ return READ_ONCE(tsk->closid) == closid;
+}
+
+static inline bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 ignored,
+ u32 rmid)
+{
+ return READ_ONCE(tsk->rmid) == rmid;
+}
+
static inline void resctrl_sched_in(struct task_struct *tsk)
{
if (static_branch_likely(&rdt_enable_key))
__resctrl_sched_in(tsk);
}
+static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
+{
+ *rmid = idx;
+ *closid = X86_RESCTRL_EMPTY_CLOSID;
+}
+
+static inline u32 resctrl_arch_rmid_idx_encode(u32 ignored, u32 rmid)
+{
+ return rmid;
+}
+
+/* x86 can always read an rmid, nothing needs allocating */
+struct rdt_resource;
+static inline void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid)
+{
+ might_sleep();
+ return NULL;
+};
+
+static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid,
+ void *ctx) { };
+
void resctrl_cpu_detect(struct cpuinfo_x86 *c);
#else
diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h
new file mode 100644
index 000000000000..6652ebddfd02
--- /dev/null
+++ b/arch/x86/include/asm/runtime-const.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RUNTIME_CONST_H
+#define _ASM_RUNTIME_CONST_H
+
+#define runtime_const_ptr(sym) ({ \
+ typeof(sym) __ret; \
+ asm_inline("mov %1,%0\n1:\n" \
+ ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
+ ".long 1b - %c2 - .\n" \
+ ".popsection" \
+ :"=r" (__ret) \
+ :"i" ((unsigned long)0x0123456789abcdefull), \
+ "i" (sizeof(long))); \
+ __ret; })
+
+// The 'typeof' will create at _least_ a 32-bit type, but
+// will happily also take a bigger type and the 'shrl' will
+// clear the upper bits
+#define runtime_const_shift_right_32(val, sym) ({ \
+ typeof(0u+(val)) __ret = (val); \
+ asm_inline("shrl $12,%k0\n1:\n" \
+ ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
+ ".long 1b - 1 - .\n" \
+ ".popsection" \
+ :"+r" (__ret)); \
+ __ret; })
+
+#define runtime_const_init(type, sym) do { \
+ extern s32 __start_runtime_##type##_##sym[]; \
+ extern s32 __stop_runtime_##type##_##sym[]; \
+ runtime_const_fixup(__runtime_fixup_##type, \
+ (unsigned long)(sym), \
+ __start_runtime_##type##_##sym, \
+ __stop_runtime_##type##_##sym); \
+} while (0)
+
+/*
+ * The text patching is trivial - you can only do this at init time,
+ * when the text section hasn't been marked RO, and before the text
+ * has ever been executed.
+ */
+static inline void __runtime_fixup_ptr(void *where, unsigned long val)
+{
+ *(unsigned long *)where = val;
+}
+
+static inline void __runtime_fixup_shift(void *where, unsigned long val)
+{
+ *(unsigned char *)where = val;
+}
+
+static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
+ unsigned long val, s32 *start, s32 *end)
+{
+ while (start < end) {
+ fn(*start + (void *)start, val);
+ start++;
+ }
+}
+
+#endif
diff --git a/arch/x86/include/asm/seccomp.h b/arch/x86/include/asm/seccomp.h
index fef16e398161..42bcd42d70d1 100644
--- a/arch/x86/include/asm/seccomp.h
+++ b/arch/x86/include/asm/seccomp.h
@@ -9,7 +9,7 @@
#endif
#ifdef CONFIG_COMPAT
-#include <asm/ia32_unistd.h>
+#include <asm/unistd_32_ia32.h>
#define __NR_seccomp_read_32 __NR_ia32_read
#define __NR_seccomp_write_32 __NR_ia32_write
#define __NR_seccomp_exit_32 __NR_ia32_exit
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 3fa87e5e11ab..30e8ee7006f9 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -5,6 +5,7 @@
#include <asm-generic/sections.h>
#include <asm/extable.h>
+extern char __relocate_kernel_start[], __relocate_kernel_end[];
extern char __brk_base[], __brk_limit[];
extern char __end_rodata_aligned[];
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 9aee31862b4a..cc62ef70ccc0 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -49,8 +49,11 @@ int set_memory_wb(unsigned long addr, int numpages);
int set_memory_np(unsigned long addr, int numpages);
int set_memory_p(unsigned long addr, int numpages);
int set_memory_4k(unsigned long addr, int numpages);
+
+bool set_memory_enc_stop_conversion(void);
int set_memory_encrypted(unsigned long addr, int numpages);
int set_memory_decrypted(unsigned long addr, int numpages);
+
int set_memory_np_noalias(unsigned long addr, int numpages);
int set_memory_nonglobal(unsigned long addr, int numpages);
int set_memory_global(unsigned long addr, int numpages);
@@ -86,6 +89,7 @@ int set_pages_rw(struct page *page, int numpages);
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
+int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid);
bool kernel_page_present(struct page *page);
extern int kernel_set_to_readonly;
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 5c83729c8e71..85f4fde3515c 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -28,6 +28,8 @@
#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
#ifndef __ASSEMBLY__
+#include <linux/cache.h>
+
#include <asm/bootparam.h>
#include <asm/x86_init.h>
@@ -47,8 +49,8 @@ extern unsigned long saved_video_mode;
extern void reserve_standard_io_resources(void);
extern void i386_reserve_resources(void);
-extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp);
-extern void startup_64_setup_env(unsigned long physbase);
+extern unsigned long __startup_64(unsigned long p2v_offset, struct boot_params *bp);
+extern void startup_64_setup_gdt_idt(void);
extern void early_setup_idt(void);
extern void __init do_early_exception(struct pt_regs *regs, int trapnr);
@@ -133,6 +135,12 @@ asmlinkage void __init __noreturn x86_64_start_reservations(char *real_mode_data
#endif /* __i386__ */
#endif /* _SETUP */
+#ifdef CONFIG_CMDLINE_BOOL
+extern bool builtin_cmdline_added __ro_after_init;
+#else
+#define builtin_cmdline_added 0
+#endif
+
#else /* __ASSEMBLY */
.macro __RESERVE_BRK name, size
diff --git a/arch/x86/include/asm/setup_data.h b/arch/x86/include/asm/setup_data.h
new file mode 100644
index 000000000000..77c51111a893
--- /dev/null
+++ b/arch/x86/include/asm/setup_data.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SETUP_DATA_H
+#define _ASM_X86_SETUP_DATA_H
+
+#include <uapi/asm/setup_data.h>
+
+#ifndef __ASSEMBLY__
+
+struct pci_setup_rom {
+ struct setup_data data;
+ uint16_t vendor;
+ uint16_t devid;
+ uint64_t pcilen;
+ unsigned long segment;
+ unsigned long bus;
+ unsigned long device;
+ unsigned long function;
+ uint8_t romdata[];
+};
+
+/* kexec external ABI */
+struct efi_setup_data {
+ u64 fw_vendor;
+ u64 __unused;
+ u64 tables;
+ u64 smbios;
+ u64 reserved[8];
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_X86_SETUP_DATA_H */
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index b463fcbd4b90..dcbccdb280f9 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -54,8 +54,18 @@
(((unsigned long)fn) << 32))
/* AP Reset Hold */
-#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006
-#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007
+#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006
+#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007
+#define GHCB_MSR_AP_RESET_HOLD_RESULT_POS 12
+#define GHCB_MSR_AP_RESET_HOLD_RESULT_MASK GENMASK_ULL(51, 0)
+
+/* Preferred GHCB GPA Request */
+#define GHCB_MSR_PREF_GPA_REQ 0x010
+#define GHCB_MSR_GPA_VALUE_POS 12
+#define GHCB_MSR_GPA_VALUE_MASK GENMASK_ULL(51, 0)
+
+#define GHCB_MSR_PREF_GPA_RESP 0x011
+#define GHCB_MSR_PREF_GPA_NONE 0xfffffffffffff
/* GHCB GPA Register */
#define GHCB_MSR_REG_GPA_REQ 0x012
@@ -91,28 +101,61 @@ enum psc_op {
/* GHCBData[11:0] */ \
GHCB_MSR_PSC_REQ)
+#define GHCB_MSR_PSC_REQ_TO_GFN(msr) (((msr) & GENMASK_ULL(51, 12)) >> 12)
+#define GHCB_MSR_PSC_REQ_TO_OP(msr) (((msr) & GENMASK_ULL(55, 52)) >> 52)
+
#define GHCB_MSR_PSC_RESP 0x015
#define GHCB_MSR_PSC_RESP_VAL(val) \
/* GHCBData[63:32] */ \
(((u64)(val) & GENMASK_ULL(63, 32)) >> 32)
+/* Set highest bit as a generic error response */
+#define GHCB_MSR_PSC_RESP_ERROR (BIT_ULL(63) | GHCB_MSR_PSC_RESP)
+
+/* GHCB Run at VMPL Request/Response */
+#define GHCB_MSR_VMPL_REQ 0x016
+#define GHCB_MSR_VMPL_REQ_LEVEL(v) \
+ /* GHCBData[39:32] */ \
+ (((u64)(v) & GENMASK_ULL(7, 0) << 32) | \
+ /* GHCBDdata[11:0] */ \
+ GHCB_MSR_VMPL_REQ)
+
+#define GHCB_MSR_VMPL_RESP 0x017
+#define GHCB_MSR_VMPL_RESP_VAL(v) \
+ /* GHCBData[63:32] */ \
+ (((u64)(v) & GENMASK_ULL(63, 32)) >> 32)
+
/* GHCB Hypervisor Feature Request/Response */
#define GHCB_MSR_HV_FT_REQ 0x080
#define GHCB_MSR_HV_FT_RESP 0x081
+#define GHCB_MSR_HV_FT_POS 12
+#define GHCB_MSR_HV_FT_MASK GENMASK_ULL(51, 0)
#define GHCB_MSR_HV_FT_RESP_VAL(v) \
/* GHCBData[63:12] */ \
(((u64)(v) & GENMASK_ULL(63, 12)) >> 12)
#define GHCB_HV_FT_SNP BIT_ULL(0)
#define GHCB_HV_FT_SNP_AP_CREATION BIT_ULL(1)
+#define GHCB_HV_FT_SNP_MULTI_VMPL BIT_ULL(5)
/*
* SNP Page State Change NAE event
* The VMGEXIT_PSC_MAX_ENTRY determines the size of the PSC structure, which
* is a local stack variable in set_pages_state(). Do not increase this value
* without evaluating the impact to stack usage.
+ *
+ * Use VMGEXIT_PSC_MAX_COUNT in cases where the actual GHCB-defined max value
+ * is needed, such as when processing GHCB requests on the hypervisor side.
*/
#define VMGEXIT_PSC_MAX_ENTRY 64
+#define VMGEXIT_PSC_MAX_COUNT 253
+
+#define VMGEXIT_PSC_ERROR_GENERIC (0x100UL << 32)
+#define VMGEXIT_PSC_ERROR_INVALID_HDR ((1UL << 32) | 1)
+#define VMGEXIT_PSC_ERROR_INVALID_ENTRY ((1UL << 32) | 2)
+
+#define VMGEXIT_PSC_OP_PRIVATE 1
+#define VMGEXIT_PSC_OP_SHARED 2
struct psc_hdr {
u16 cur_entry;
@@ -159,6 +202,12 @@ struct snp_psc_desc {
#define GHCB_TERM_NOT_VMPL0 3 /* SNP guest is not running at VMPL-0 */
#define GHCB_TERM_CPUID 4 /* CPUID-validation failure */
#define GHCB_TERM_CPUID_HV 5 /* CPUID failure during hypervisor fallback */
+#define GHCB_TERM_SECRETS_PAGE 6 /* Secrets page failure */
+#define GHCB_TERM_NO_SVSM 7 /* SVSM is not advertised in the secrets page */
+#define GHCB_TERM_SVSM_VMPL0 8 /* SVSM is present but has set VMPL to 0 */
+#define GHCB_TERM_SVSM_CAA 9 /* SVSM is present but CAA is not page aligned */
+#define GHCB_TERM_SECURE_TSC 10 /* Secure TSC initialization failed */
+#define GHCB_TERM_SVSM_CA_REMAP_FAIL 11 /* SVSM is present but CA could not be remapped */
#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK)
@@ -173,4 +222,31 @@ struct snp_psc_desc {
#define GHCB_ERR_INVALID_INPUT 5
#define GHCB_ERR_INVALID_EVENT 6
+struct sev_config {
+ __u64 debug : 1,
+
+ /*
+ * Indicates when the per-CPU GHCB has been created and registered
+ * and thus can be used by the BSP instead of the early boot GHCB.
+ *
+ * For APs, the per-CPU GHCB is created before they are started
+ * and registered upon startup, so this flag can be used globally
+ * for the BSP and APs.
+ */
+ ghcbs_initialized : 1,
+
+ /*
+ * Indicates when the per-CPU SVSM CA is to be used instead of the
+ * boot SVSM CA.
+ *
+ * For APs, the per-CPU SVSM CA is created as part of the AP
+ * bringup, so this flag can be used globally for the BSP and APs.
+ */
+ use_cas : 1,
+
+ __reserved : 61;
+};
+
+extern struct sev_config sev_cfg;
+
#endif
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 5b4a1ce3d368..ba7999f66abe 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -13,8 +13,8 @@
#include <asm/insn.h>
#include <asm/sev-common.h>
-#include <asm/bootparam.h>
#include <asm/coco.h>
+#include <asm/set_memory.h>
#define GHCB_PROTOCOL_MIN 1ULL
#define GHCB_PROTOCOL_MAX 2ULL
@@ -22,6 +22,8 @@
#define VMGEXIT() { asm volatile("rep; vmmcall\n\r"); }
+struct boot_params;
+
enum es_result {
ES_OK, /* All good */
ES_UNSUPPORTED, /* Requested operation not supported */
@@ -87,9 +89,26 @@ extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
/* Software defined (when rFlags.CF = 1) */
#define PVALIDATE_FAIL_NOUPDATE 255
+/* RMUPDATE detected 4K page and 2MB page overlap. */
+#define RMPUPDATE_FAIL_OVERLAP 4
+
+/* PSMASH failed due to concurrent access by another CPU */
+#define PSMASH_FAIL_INUSE 3
+
/* RMP page size */
#define RMP_PG_SIZE_4K 0
#define RMP_PG_SIZE_2M 1
+#define RMP_TO_PG_LEVEL(level) (((level) == RMP_PG_SIZE_4K) ? PG_LEVEL_4K : PG_LEVEL_2M)
+#define PG_LEVEL_TO_RMP(level) (((level) == PG_LEVEL_4K) ? RMP_PG_SIZE_4K : RMP_PG_SIZE_2M)
+
+struct rmp_state {
+ u64 gpa;
+ u8 assigned;
+ u8 pagesize;
+ u8 immutable;
+ u8 rsvd;
+ u32 asid;
+} __packed;
#define RMPADJUST_VMSA_PAGE_BIT BIT(16)
@@ -101,8 +120,92 @@ struct snp_req_data {
unsigned int data_npages;
};
-struct sev_guest_platform_data {
- u64 secrets_gpa;
+#define MAX_AUTHTAG_LEN 32
+#define AUTHTAG_LEN 16
+#define AAD_LEN 48
+#define MSG_HDR_VER 1
+
+#define SNP_REQ_MAX_RETRY_DURATION (60*HZ)
+#define SNP_REQ_RETRY_DELAY (2*HZ)
+
+/* See SNP spec SNP_GUEST_REQUEST section for the structure */
+enum msg_type {
+ SNP_MSG_TYPE_INVALID = 0,
+ SNP_MSG_CPUID_REQ,
+ SNP_MSG_CPUID_RSP,
+ SNP_MSG_KEY_REQ,
+ SNP_MSG_KEY_RSP,
+ SNP_MSG_REPORT_REQ,
+ SNP_MSG_REPORT_RSP,
+ SNP_MSG_EXPORT_REQ,
+ SNP_MSG_EXPORT_RSP,
+ SNP_MSG_IMPORT_REQ,
+ SNP_MSG_IMPORT_RSP,
+ SNP_MSG_ABSORB_REQ,
+ SNP_MSG_ABSORB_RSP,
+ SNP_MSG_VMRK_REQ,
+ SNP_MSG_VMRK_RSP,
+
+ SNP_MSG_TSC_INFO_REQ = 17,
+ SNP_MSG_TSC_INFO_RSP,
+
+ SNP_MSG_TYPE_MAX
+};
+
+enum aead_algo {
+ SNP_AEAD_INVALID,
+ SNP_AEAD_AES_256_GCM,
+};
+
+struct snp_guest_msg_hdr {
+ u8 authtag[MAX_AUTHTAG_LEN];
+ u64 msg_seqno;
+ u8 rsvd1[8];
+ u8 algo;
+ u8 hdr_version;
+ u16 hdr_sz;
+ u8 msg_type;
+ u8 msg_version;
+ u16 msg_sz;
+ u32 rsvd2;
+ u8 msg_vmpck;
+ u8 rsvd3[35];
+} __packed;
+
+struct snp_guest_msg {
+ struct snp_guest_msg_hdr hdr;
+ u8 payload[PAGE_SIZE - sizeof(struct snp_guest_msg_hdr)];
+} __packed;
+
+#define SNP_TSC_INFO_REQ_SZ 128
+
+struct snp_tsc_info_req {
+ u8 rsvd[SNP_TSC_INFO_REQ_SZ];
+} __packed;
+
+struct snp_tsc_info_resp {
+ u32 status;
+ u32 rsvd1;
+ u64 tsc_scale;
+ u64 tsc_offset;
+ u32 tsc_factor;
+ u8 rsvd2[100];
+} __packed;
+
+struct snp_guest_req {
+ void *req_buf;
+ size_t req_sz;
+
+ void *resp_buf;
+ size_t resp_sz;
+
+ u64 exit_code;
+ unsigned int vmpck_id;
+ u8 msg_version;
+ u8 msg_type;
+
+ struct snp_req_data input;
+ void *certs_data;
};
/*
@@ -125,7 +228,7 @@ struct secrets_os_area {
#define VMPCK_KEY_LEN 32
/* See the SNP spec version 0.9 for secrets page format */
-struct snp_secrets_page_layout {
+struct snp_secrets_page {
u32 version;
u32 imien : 1,
rsvd1 : 31;
@@ -137,10 +240,154 @@ struct snp_secrets_page_layout {
u8 vmpck2[VMPCK_KEY_LEN];
u8 vmpck3[VMPCK_KEY_LEN];
struct secrets_os_area os_area;
- u8 rsvd3[3840];
+
+ u8 vmsa_tweak_bitmap[64];
+
+ /* SVSM fields */
+ u64 svsm_base;
+ u64 svsm_size;
+ u64 svsm_caa;
+ u32 svsm_max_version;
+ u8 svsm_guest_vmpl;
+ u8 rsvd3[3];
+
+ /* Remainder of page */
+ u8 rsvd4[3744];
} __packed;
+struct snp_msg_desc {
+ /* request and response are in unencrypted memory */
+ struct snp_guest_msg *request, *response;
+
+ /*
+ * Avoid information leakage by double-buffering shared messages
+ * in fields that are in regular encrypted memory.
+ */
+ struct snp_guest_msg secret_request, secret_response;
+
+ struct snp_secrets_page *secrets;
+
+ struct aesgcm_ctx *ctx;
+
+ u32 *os_area_msg_seqno;
+ u8 *vmpck;
+ int vmpck_id;
+};
+
+/*
+ * The SVSM Calling Area (CA) related structures.
+ */
+struct svsm_ca {
+ u8 call_pending;
+ u8 mem_available;
+ u8 rsvd1[6];
+
+ u8 svsm_buffer[PAGE_SIZE - 8];
+};
+
+#define SVSM_SUCCESS 0
+#define SVSM_ERR_INCOMPLETE 0x80000000
+#define SVSM_ERR_UNSUPPORTED_PROTOCOL 0x80000001
+#define SVSM_ERR_UNSUPPORTED_CALL 0x80000002
+#define SVSM_ERR_INVALID_ADDRESS 0x80000003
+#define SVSM_ERR_INVALID_FORMAT 0x80000004
+#define SVSM_ERR_INVALID_PARAMETER 0x80000005
+#define SVSM_ERR_INVALID_REQUEST 0x80000006
+#define SVSM_ERR_BUSY 0x80000007
+#define SVSM_PVALIDATE_FAIL_SIZEMISMATCH 0x80001006
+
+/*
+ * The SVSM PVALIDATE related structures
+ */
+struct svsm_pvalidate_entry {
+ u64 page_size : 2,
+ action : 1,
+ ignore_cf : 1,
+ rsvd : 8,
+ pfn : 52;
+};
+
+struct svsm_pvalidate_call {
+ u16 num_entries;
+ u16 cur_index;
+
+ u8 rsvd1[4];
+
+ struct svsm_pvalidate_entry entry[];
+};
+
+#define SVSM_PVALIDATE_MAX_COUNT ((sizeof_field(struct svsm_ca, svsm_buffer) - \
+ offsetof(struct svsm_pvalidate_call, entry)) / \
+ sizeof(struct svsm_pvalidate_entry))
+
+/*
+ * The SVSM Attestation related structures
+ */
+struct svsm_loc_entry {
+ u64 pa;
+ u32 len;
+ u8 rsvd[4];
+};
+
+struct svsm_attest_call {
+ struct svsm_loc_entry report_buf;
+ struct svsm_loc_entry nonce;
+ struct svsm_loc_entry manifest_buf;
+ struct svsm_loc_entry certificates_buf;
+
+ /* For attesting a single service */
+ u8 service_guid[16];
+ u32 service_manifest_ver;
+ u8 rsvd[4];
+};
+
+/* PTE descriptor used for the prepare_pte_enc() operations. */
+struct pte_enc_desc {
+ pte_t *kpte;
+ int pte_level;
+ bool encrypt;
+ /* pfn of the kpte above */
+ unsigned long pfn;
+ /* physical address of @pfn */
+ unsigned long pa;
+ /* virtual address of @pfn */
+ void *va;
+ /* memory covered by the pte */
+ unsigned long size;
+ pgprot_t new_pgprot;
+};
+
+/*
+ * SVSM protocol structure
+ */
+struct svsm_call {
+ struct svsm_ca *caa;
+ u64 rax;
+ u64 rcx;
+ u64 rdx;
+ u64 r8;
+ u64 r9;
+ u64 rax_out;
+ u64 rcx_out;
+ u64 rdx_out;
+ u64 r8_out;
+ u64 r9_out;
+};
+
+#define SVSM_CORE_CALL(x) ((0ULL << 32) | (x))
+#define SVSM_CORE_REMAP_CA 0
+#define SVSM_CORE_PVALIDATE 1
+#define SVSM_CORE_CREATE_VCPU 2
+#define SVSM_CORE_DELETE_VCPU 3
+
+#define SVSM_ATTEST_CALL(x) ((1ULL << 32) | (x))
+#define SVSM_ATTEST_SERVICES 0
+#define SVSM_ATTEST_SINGLE_SERVICE 1
+
#ifdef CONFIG_AMD_MEM_ENCRYPT
+
+extern u8 snp_vmpl;
+
extern void __sev_es_ist_enter(struct pt_regs *regs);
extern void __sev_es_ist_exit(void);
static __always_inline void sev_es_ist_enter(struct pt_regs *regs)
@@ -166,6 +413,14 @@ static __always_inline void sev_es_nmi_complete(void)
extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
extern void sev_enable(struct boot_params *bp);
+/*
+ * RMPADJUST modifies the RMP permissions of a page of a lesser-
+ * privileged (numerically higher) VMPL.
+ *
+ * If the guest is running at a higher-privilege than the privilege
+ * level the instruction is targeting, the instruction will succeed,
+ * otherwise, it will fail.
+ */
static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
{
int rc;
@@ -199,21 +454,39 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
struct snp_guest_request_ioctl;
void setup_ghcb(void);
-void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
- unsigned long npages);
-void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
- unsigned long npages);
-void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op);
+void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+ unsigned long npages);
+void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+ unsigned long npages);
void snp_set_memory_shared(unsigned long vaddr, unsigned long npages);
void snp_set_memory_private(unsigned long vaddr, unsigned long npages);
void snp_set_wakeup_secondary_cpu(void);
bool snp_init(struct boot_params *bp);
-void __init __noreturn snp_abort(void);
-int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
+void __noreturn snp_abort(void);
+void snp_dmi_setup(void);
+int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input);
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
u64 sev_get_status(void);
-#else
+void sev_show_status(void);
+void snp_update_svsm_ca(void);
+int prepare_pte_enc(struct pte_enc_desc *d);
+void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot);
+void snp_kexec_finish(void);
+void snp_kexec_begin(void);
+
+int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id);
+struct snp_msg_desc *snp_msg_alloc(void);
+void snp_msg_free(struct snp_msg_desc *mdesc);
+int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req,
+ struct snp_guest_request_ioctl *rio);
+
+void __init snp_secure_tsc_prepare(void);
+void __init snp_secure_tsc_init(void);
+
+#else /* !CONFIG_AMD_MEM_ENCRYPT */
+
+#define snp_vmpl 0
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
static inline void sev_es_ist_exit(void) { }
static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
@@ -227,20 +500,61 @@ static inline void __init
early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
static inline void __init
early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
-static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { }
static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { }
static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { }
static inline void snp_set_wakeup_secondary_cpu(void) { }
static inline bool snp_init(struct boot_params *bp) { return false; }
static inline void snp_abort(void) { }
-static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio)
+static inline void snp_dmi_setup(void) { }
+static inline int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input)
{
return -ENOTTY;
}
-
static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
static inline u64 sev_get_status(void) { return 0; }
+static inline void sev_show_status(void) { }
+static inline void snp_update_svsm_ca(void) { }
+static inline int prepare_pte_enc(struct pte_enc_desc *d) { return 0; }
+static inline void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot) { }
+static inline void snp_kexec_finish(void) { }
+static inline void snp_kexec_begin(void) { }
+static inline int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id) { return -1; }
+static inline struct snp_msg_desc *snp_msg_alloc(void) { return NULL; }
+static inline void snp_msg_free(struct snp_msg_desc *mdesc) { }
+static inline int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req,
+ struct snp_guest_request_ioctl *rio) { return -ENODEV; }
+static inline void __init snp_secure_tsc_prepare(void) { }
+static inline void __init snp_secure_tsc_init(void) { }
+
+#endif /* CONFIG_AMD_MEM_ENCRYPT */
+
+#ifdef CONFIG_KVM_AMD_SEV
+bool snp_probe_rmptable_info(void);
+int snp_rmptable_init(void);
+int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level);
+void snp_dump_hva_rmpentry(unsigned long address);
+int psmash(u64 pfn);
+int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable);
+int rmp_make_shared(u64 pfn, enum pg_level level);
+void snp_leak_pages(u64 pfn, unsigned int npages);
+void kdump_sev_callback(void);
+void snp_fixup_e820_tables(void);
+#else
+static inline bool snp_probe_rmptable_info(void) { return false; }
+static inline int snp_rmptable_init(void) { return -ENOSYS; }
+static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
+static inline void snp_dump_hva_rmpentry(unsigned long address) {}
+static inline int psmash(u64 pfn) { return -ENODEV; }
+static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid,
+ bool immutable)
+{
+ return -ENODEV;
+}
+static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
+static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
+static inline void kdump_sev_callback(void) { }
+static inline void snp_fixup_e820_tables(void) {}
#endif
#endif
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index fdfd41511b02..fcbbef484a78 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -16,10 +16,55 @@
#define TDG_VP_VEINFO_GET 3
#define TDG_MR_REPORT 4
#define TDG_MEM_PAGE_ACCEPT 6
+#define TDG_VM_RD 7
#define TDG_VM_WR 8
-/* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */
+/* TDX attributes */
+#define TDX_ATTR_DEBUG_BIT 0
+#define TDX_ATTR_DEBUG BIT_ULL(TDX_ATTR_DEBUG_BIT)
+#define TDX_ATTR_HGS_PLUS_PROF_BIT 4
+#define TDX_ATTR_HGS_PLUS_PROF BIT_ULL(TDX_ATTR_HGS_PLUS_PROF_BIT)
+#define TDX_ATTR_PERF_PROF_BIT 5
+#define TDX_ATTR_PERF_PROF BIT_ULL(TDX_ATTR_PERF_PROF_BIT)
+#define TDX_ATTR_PMT_PROF_BIT 6
+#define TDX_ATTR_PMT_PROF BIT_ULL(TDX_ATTR_PMT_PROF_BIT)
+#define TDX_ATTR_ICSSD_BIT 16
+#define TDX_ATTR_ICSSD BIT_ULL(TDX_ATTR_ICSSD_BIT)
+#define TDX_ATTR_LASS_BIT 27
+#define TDX_ATTR_LASS BIT_ULL(TDX_ATTR_LASS_BIT)
+#define TDX_ATTR_SEPT_VE_DISABLE_BIT 28
+#define TDX_ATTR_SEPT_VE_DISABLE BIT_ULL(TDX_ATTR_SEPT_VE_DISABLE_BIT)
+#define TDX_ATTR_MIGRTABLE_BIT 29
+#define TDX_ATTR_MIGRTABLE BIT_ULL(TDX_ATTR_MIGRTABLE_BIT)
+#define TDX_ATTR_PKS_BIT 30
+#define TDX_ATTR_PKS BIT_ULL(TDX_ATTR_PKS_BIT)
+#define TDX_ATTR_KL_BIT 31
+#define TDX_ATTR_KL BIT_ULL(TDX_ATTR_KL_BIT)
+#define TDX_ATTR_TPA_BIT 62
+#define TDX_ATTR_TPA BIT_ULL(TDX_ATTR_TPA_BIT)
+#define TDX_ATTR_PERFMON_BIT 63
+#define TDX_ATTR_PERFMON BIT_ULL(TDX_ATTR_PERFMON_BIT)
+
+/* TDX TD-Scope Metadata. To be used by TDG.VM.WR and TDG.VM.RD */
+#define TDCS_CONFIG_FLAGS 0x1110000300000016
+#define TDCS_TD_CTLS 0x1110000300000017
#define TDCS_NOTIFY_ENABLES 0x9100000000000010
+#define TDCS_TOPOLOGY_ENUM_CONFIGURED 0x9100000000000019
+
+/* TDCS_CONFIG_FLAGS bits */
+#define TDCS_CONFIG_FLEXIBLE_PENDING_VE BIT_ULL(1)
+
+/* TDCS_TD_CTLS bits */
+#define TD_CTLS_PENDING_VE_DISABLE_BIT 0
+#define TD_CTLS_PENDING_VE_DISABLE BIT_ULL(TD_CTLS_PENDING_VE_DISABLE_BIT)
+#define TD_CTLS_ENUM_TOPOLOGY_BIT 1
+#define TD_CTLS_ENUM_TOPOLOGY BIT_ULL(TD_CTLS_ENUM_TOPOLOGY_BIT)
+#define TD_CTLS_VIRT_CPUID2_BIT 2
+#define TD_CTLS_VIRT_CPUID2 BIT_ULL(TD_CTLS_VIRT_CPUID2_BIT)
+#define TD_CTLS_REDUCE_VE_BIT 3
+#define TD_CTLS_REDUCE_VE BIT_ULL(TD_CTLS_REDUCE_VE_BIT)
+#define TD_CTLS_LOCK_BIT 63
+#define TD_CTLS_LOCK BIT_ULL(TD_CTLS_LOCK_BIT)
/* TDX hypercall Leaf IDs */
#define TDVMCALL_MAP_GPA 0x10001
diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h
index 42fee8959df7..4cb77e004615 100644
--- a/arch/x86/include/asm/shstk.h
+++ b/arch/x86/include/asm/shstk.h
@@ -21,6 +21,8 @@ unsigned long shstk_alloc_thread_stack(struct task_struct *p, unsigned long clon
void shstk_free(struct task_struct *p);
int setup_signal_shadow_stack(struct ksignal *ksig);
int restore_signal_shadow_stack(void);
+int shstk_update_last_frame(unsigned long val);
+bool shstk_is_enabled(void);
#else
static inline long shstk_prctl(struct task_struct *task, int option,
unsigned long arg2) { return -EINVAL; }
@@ -31,6 +33,8 @@ static inline unsigned long shstk_alloc_thread_stack(struct task_struct *p,
static inline void shstk_free(struct task_struct *p) {}
static inline int setup_signal_shadow_stack(struct ksignal *ksig) { return 0; }
static inline int restore_signal_shadow_stack(void) { return 0; }
+static inline int shstk_update_last_frame(unsigned long val) { return 0; }
+static inline bool shstk_is_enabled(void) { return false; }
#endif /* CONFIG_X86_USER_SHADOW_STACK */
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index bab490379c65..2de1e5a75c57 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -13,30 +13,26 @@
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
-/* "Raw" instruction opcodes */
-#define __ASM_CLAC ".byte 0x0f,0x01,0xca"
-#define __ASM_STAC ".byte 0x0f,0x01,0xcb"
-
#ifdef __ASSEMBLY__
#define ASM_CLAC \
- ALTERNATIVE "", __ASM_CLAC, X86_FEATURE_SMAP
+ ALTERNATIVE "", "clac", X86_FEATURE_SMAP
#define ASM_STAC \
- ALTERNATIVE "", __ASM_STAC, X86_FEATURE_SMAP
+ ALTERNATIVE "", "stac", X86_FEATURE_SMAP
#else /* __ASSEMBLY__ */
static __always_inline void clac(void)
{
/* Note: a barrier is implicit in alternative() */
- alternative("", __ASM_CLAC, X86_FEATURE_SMAP);
+ alternative("", "clac", X86_FEATURE_SMAP);
}
static __always_inline void stac(void)
{
/* Note: a barrier is implicit in alternative() */
- alternative("", __ASM_STAC, X86_FEATURE_SMAP);
+ alternative("", "stac", X86_FEATURE_SMAP);
}
static __always_inline unsigned long smap_save(void)
@@ -44,7 +40,7 @@ static __always_inline unsigned long smap_save(void)
unsigned long flags;
asm volatile ("# smap_save\n\t"
- ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC "\n\t",
+ ALTERNATIVE("", "pushf; pop %0; " "clac" "\n\t",
X86_FEATURE_SMAP)
: "=rm" (flags) : : "memory", "cc");
@@ -61,9 +57,9 @@ static __always_inline void smap_restore(unsigned long flags)
/* These macros can be used in asm() statements */
#define ASM_CLAC \
- ALTERNATIVE("", __ASM_CLAC, X86_FEATURE_SMAP)
+ ALTERNATIVE("", "clac", X86_FEATURE_SMAP)
#define ASM_STAC \
- ALTERNATIVE("", __ASM_STAC, X86_FEATURE_SMAP)
+ ALTERNATIVE("", "stac", X86_FEATURE_SMAP)
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 4fab2ed454f3..ca073f40698f 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -8,9 +8,6 @@
#include <asm/current.h>
#include <asm/thread_info.h>
-extern int smp_num_siblings;
-extern unsigned int num_processors;
-
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
@@ -38,6 +35,7 @@ struct smp_ops {
int (*cpu_disable)(void);
void (*cpu_die)(unsigned int cpu);
void (*play_dead)(void);
+ void (*stop_this_cpu)(void);
void (*send_call_func_ipi)(const struct cpumask *mask);
void (*send_call_func_single_ipi)(int cpu);
@@ -59,11 +57,6 @@ static inline void stop_other_cpus(void)
smp_ops.stop_other_cpus(1);
}
-static inline void smp_prepare_boot_cpu(void)
-{
- smp_ops.smp_prepare_boot_cpu();
-}
-
static inline void smp_prepare_cpus(unsigned int max_cpus)
{
smp_ops.smp_prepare_cpus(max_cpus);
@@ -110,7 +103,6 @@ void cpu_disable_common(void);
void native_smp_prepare_boot_cpu(void);
void smp_prepare_cpus_common(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
-void calculate_max_logical_packages(void);
void native_smp_cpus_done(unsigned int max_cpus);
int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_kick_ap(unsigned int cpu, struct task_struct *tidle);
@@ -174,8 +166,6 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu)
}
#endif /* CONFIG_SMP */
-extern unsigned disabled_cpus;
-
#ifdef CONFIG_DEBUG_NMI_SELFTEST
extern void nmi_selftest(void);
#else
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 1be13b2dfe8b..3918c7a434f5 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -31,15 +31,4 @@
#endif /* CONFIG_SPARSEMEM */
-#ifndef __ASSEMBLY__
-#ifdef CONFIG_NUMA_KEEP_MEMINFO
-extern int phys_to_target_node(phys_addr_t start);
-#define phys_to_target_node phys_to_target_node
-extern int memory_add_physaddr_to_nid(u64 start);
-#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
-extern int numa_fill_memblks(u64 start, u64 end);
-#define numa_fill_memblks numa_fill_memblks
-#endif
-#endif /* __ASSEMBLY__ */
-
#endif /* _ASM_X86_SPARSEMEM_H */
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index c648502e4535..658b690b2ccb 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -96,4 +96,6 @@ static inline void speculative_store_bypass_ht_init(void) { }
extern void speculation_ctrl_update(unsigned long tif);
extern void speculation_ctrl_update_current(void);
+extern bool itlb_multihit_kvm_mitigation;
+
#endif
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 48f8dd47cf68..03e7c2d49559 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -2,11 +2,11 @@
#ifndef _ASM_X86_SPECIAL_INSNS_H
#define _ASM_X86_SPECIAL_INSNS_H
-
#ifdef __KERNEL__
-
#include <asm/nops.h>
#include <asm/processor-flags.h>
+
+#include <linux/errno.h>
#include <linux/irqflags.h>
#include <linux/jump_label.h>
@@ -115,7 +115,7 @@ static inline void wrpkru(u32 pkru)
}
#endif
-static __always_inline void native_wbinvd(void)
+static __always_inline void wbinvd(void)
{
asm volatile("wbinvd": : :"memory");
}
@@ -167,12 +167,6 @@ static inline void __write_cr4(unsigned long x)
{
native_write_cr4(x);
}
-
-static __always_inline void wbinvd(void)
-{
- native_wbinvd();
-}
-
#endif /* CONFIG_PARAVIRT_XXL */
static __always_inline void clflush(volatile void *__p)
@@ -182,8 +176,8 @@ static __always_inline void clflush(volatile void *__p)
static inline void clflushopt(volatile void *__p)
{
- alternative_io(".byte 0x3e; clflush %P0",
- ".byte 0x66; clflush %P0",
+ alternative_io(".byte 0x3e; clflush %0",
+ ".byte 0x66; clflush %0",
X86_FEATURE_CLFLUSHOPT,
"+m" (*(volatile char __force *)__p));
}
@@ -205,9 +199,9 @@ static inline void clwb(volatile void *__p)
#ifdef CONFIG_X86_USER_SHADOW_STACK
static inline int write_user_shstk_64(u64 __user *addr, u64 val)
{
- asm goto("1: wrussq %[val], (%[addr])\n"
+ asm goto("1: wrussq %[val], %[addr]\n"
_ASM_EXTABLE(1b, %l[fail])
- :: [addr] "r" (addr), [val] "r" (val)
+ :: [addr] "m" (*addr), [val] "r" (val)
:: fail);
return 0;
fail:
@@ -217,17 +211,17 @@ fail:
#define nop() asm volatile ("nop")
-static inline void serialize(void)
+static __always_inline void serialize(void)
{
/* Instruction opcode for SERIALIZE; supported in binutils >= 2.35. */
asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory");
}
/* The dst parameter must be 64-bytes aligned */
-static inline void movdir64b(void __iomem *dst, const void *src)
+static inline void movdir64b(void *dst, const void *src)
{
const struct { char _[64]; } *__src = src;
- struct { char _[64]; } __iomem *__dst = dst;
+ struct { char _[64]; } *__dst = dst;
/*
* MOVDIR64B %(rdx), rax.
@@ -245,6 +239,11 @@ static inline void movdir64b(void __iomem *dst, const void *src)
: "m" (*__src), "a" (__dst), "d" (__src));
}
+static inline void movdir64b_io(void __iomem *dst, const void *src)
+{
+ movdir64b((void __force *)dst, src);
+}
+
/**
* enqcmds - Enqueue a command in supervisor (CPL0) mode
* @dst: destination, in MMIO space (must be 512-bit aligned)
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
index 343b722ccaf2..41502bd2afd6 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -46,7 +46,7 @@
#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
-#ifdef CONFIG_RETHUNK
+#ifdef CONFIG_MITIGATION_RETHUNK
#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk")
#else
@@ -65,4 +65,19 @@
extern bool __static_call_fixup(void *tramp, u8 op, void *dest);
+extern void __static_call_update_early(void *tramp, void *func);
+
+#define static_call_update_early(name, _func) \
+({ \
+ typeof(&STATIC_CALL_TRAMP(name)) __F = (_func); \
+ if (static_call_initialized) { \
+ __static_call_update(&STATIC_CALL_KEY(name), \
+ STATIC_CALL_TRAMP_ADDR(name), __F);\
+ } else { \
+ WRITE_ONCE(STATIC_CALL_KEY(name).func, _func); \
+ __static_call_update_early(STATIC_CALL_TRAMP_ADDR(name),\
+ __F); \
+ } \
+})
+
#endif /* _ASM_STATIC_CALL_H */
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 857d364b9888..9d0b324eab21 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -30,37 +30,40 @@ void *__memset(void *s, int c, size_t n);
#define __HAVE_ARCH_MEMSET16
static inline void *memset16(uint16_t *s, uint16_t v, size_t n)
{
- long d0, d1;
- asm volatile("rep\n\t"
- "stosw"
- : "=&c" (d0), "=&D" (d1)
- : "a" (v), "1" (s), "0" (n)
- : "memory");
- return s;
+ const __auto_type s0 = s;
+ asm volatile (
+ "rep stosw"
+ : "+D" (s), "+c" (n)
+ : "a" (v)
+ : "memory"
+ );
+ return s0;
}
#define __HAVE_ARCH_MEMSET32
static inline void *memset32(uint32_t *s, uint32_t v, size_t n)
{
- long d0, d1;
- asm volatile("rep\n\t"
- "stosl"
- : "=&c" (d0), "=&D" (d1)
- : "a" (v), "1" (s), "0" (n)
- : "memory");
- return s;
+ const __auto_type s0 = s;
+ asm volatile (
+ "rep stosl"
+ : "+D" (s), "+c" (n)
+ : "a" (v)
+ : "memory"
+ );
+ return s0;
}
#define __HAVE_ARCH_MEMSET64
static inline void *memset64(uint64_t *s, uint64_t v, size_t n)
{
- long d0, d1;
- asm volatile("rep\n\t"
- "stosq"
- : "=&c" (d0), "=&D" (d1)
- : "a" (v), "1" (s), "0" (n)
- : "memory");
- return s;
+ const __auto_type s0 = s;
+ asm volatile (
+ "rep stosq"
+ : "+D" (s), "+c" (n)
+ : "a" (v)
+ : "memory"
+ );
+ return s0;
}
#endif
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index a800abb1a992..d8416b3bf832 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -12,11 +12,6 @@
/* image of the saved processor state */
struct saved_context {
- /*
- * On x86_32, all segment registers except gs are saved at kernel
- * entry in pt_regs.
- */
- u16 gs;
unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
struct saved_msrs saved_msrs;
@@ -27,6 +22,11 @@ struct saved_context {
unsigned long tr;
unsigned long safety;
unsigned long return_address;
+ /*
+ * On x86_32, all segment registers except gs are saved at kernel
+ * entry in pt_regs.
+ */
+ u16 gs;
bool misc_enable_saved;
} __attribute__((packed));
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 87a7b917d30e..e2fac21471f5 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -5,7 +5,7 @@
#include <uapi/asm/svm.h>
#include <uapi/asm/kvm.h>
-#include <asm/hyperv-tlfs.h>
+#include <hyperv/hvhdk.h>
/*
* 32-bit intercept words in the VMCB Control Area, starting
@@ -285,7 +285,14 @@ static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_
#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
-#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5)
+#define SVM_SEV_FEAT_SNP_ACTIVE BIT(0)
+#define SVM_SEV_FEAT_RESTRICTED_INJECTION BIT(3)
+#define SVM_SEV_FEAT_ALTERNATE_INJECTION BIT(4)
+#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5)
+
+#define SVM_SEV_FEAT_INT_INJ_MODES \
+ (SVM_SEV_FEAT_RESTRICTED_INJECTION | \
+ SVM_SEV_FEAT_ALTERNATE_INJECTION)
struct vmcb_seg {
u16 selector;
@@ -358,10 +365,10 @@ struct sev_es_save_area {
struct vmcb_seg ldtr;
struct vmcb_seg idtr;
struct vmcb_seg tr;
- u64 vmpl0_ssp;
- u64 vmpl1_ssp;
- u64 vmpl2_ssp;
- u64 vmpl3_ssp;
+ u64 pl0_ssp;
+ u64 pl1_ssp;
+ u64 pl2_ssp;
+ u64 pl3_ssp;
u64 u_cet;
u8 reserved_0xc8[2];
u8 vmpl;
@@ -410,7 +417,9 @@ struct sev_es_save_area {
u8 reserved_0x298[80];
u32 pkru;
u32 tsc_aux;
- u8 reserved_0x2f0[24];
+ u64 tsc_scale;
+ u64 tsc_offset;
+ u8 reserved_0x300[8];
u64 rcx;
u64 rdx;
u64 rbx;
@@ -509,6 +518,20 @@ struct ghcb {
u32 ghcb_usage;
} __packed;
+struct vmcb {
+ struct vmcb_control_area control;
+ union {
+ struct vmcb_save_area save;
+
+ /*
+ * For SEV-ES VMs, the save area in the VMCB is used only to
+ * save/load host state. Guest state resides in a separate
+ * page, the aptly named VM Save Area (VMSA), that is encrypted
+ * with the guest's private key.
+ */
+ struct sev_es_save_area host_sev_es_save;
+ };
+} __packed;
#define EXPECTED_VMCB_SAVE_AREA_SIZE 744
#define EXPECTED_GHCB_SAVE_AREA_SIZE 1032
@@ -525,6 +548,7 @@ static inline void __unused_size_checks(void)
BUILD_BUG_ON(sizeof(struct ghcb_save_area) != EXPECTED_GHCB_SAVE_AREA_SIZE);
BUILD_BUG_ON(sizeof(struct sev_es_save_area) != EXPECTED_SEV_ES_SAVE_AREA_SIZE);
BUILD_BUG_ON(sizeof(struct vmcb_control_area) != EXPECTED_VMCB_CONTROL_AREA_SIZE);
+ BUILD_BUG_ON(offsetof(struct vmcb, save) != EXPECTED_VMCB_CONTROL_AREA_SIZE);
BUILD_BUG_ON(sizeof(struct ghcb) != EXPECTED_GHCB_SIZE);
/* Check offsets of reserved fields */
@@ -542,7 +566,7 @@ static inline void __unused_size_checks(void)
BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x1c0);
BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x248);
BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x298);
- BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x2f0);
+ BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x300);
BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x320);
BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x380);
BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x3f0);
@@ -561,11 +585,6 @@ static inline void __unused_size_checks(void)
BUILD_BUG_RESERVED_OFFSET(ghcb, 0xff0);
}
-struct vmcb {
- struct vmcb_control_area control;
- struct vmcb_save_area save;
-} __packed;
-
#define SVM_CPUID_FUNC 0x8000000a
#define SVM_SELECTOR_S_SHIFT 4
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index f42dbf17f52b..75248546403d 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -70,8 +70,8 @@ static inline void update_task_stack(struct task_struct *task)
#ifdef CONFIG_X86_32
this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0);
#else
- /* Xen PV enters the kernel on the thread stack. */
- if (cpu_feature_enabled(X86_FEATURE_XENPV))
+ if (!cpu_feature_enabled(X86_FEATURE_FRED) && cpu_feature_enabled(X86_FEATURE_XENPV))
+ /* Xen PV enters the kernel on the thread stack. */
load_sp0(task_top_of_stack(task));
#endif
}
diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h
index ab7382f92aff..96bda43538ee 100644
--- a/arch/x86/include/asm/sync_core.h
+++ b/arch/x86/include/asm/sync_core.h
@@ -8,7 +8,7 @@
#include <asm/special_insns.h>
#ifdef CONFIG_X86_32
-static inline void iret_to_self(void)
+static __always_inline void iret_to_self(void)
{
asm volatile (
"pushfl\n\t"
@@ -19,7 +19,7 @@ static inline void iret_to_self(void)
: ASM_CALL_CONSTRAINT : : "memory");
}
#else
-static inline void iret_to_self(void)
+static __always_inline void iret_to_self(void)
{
unsigned int tmp;
@@ -55,7 +55,7 @@ static inline void iret_to_self(void)
* Like all of Linux's memory ordering operations, this is a
* compiler barrier as well.
*/
-static inline void sync_core(void)
+static __always_inline void sync_core(void)
{
/*
* The SERIALIZE instruction is the most straightforward way to
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index f44e2f9ab65d..7c488ff0c764 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -16,19 +16,17 @@
#include <asm/thread_info.h> /* for TS_COMPAT */
#include <asm/unistd.h>
+/* This is used purely for kernel/trace/trace_syscalls.c */
typedef long (*sys_call_ptr_t)(const struct pt_regs *);
extern const sys_call_ptr_t sys_call_table[];
-#if defined(CONFIG_X86_32)
-#define ia32_sys_call_table sys_call_table
-#else
/*
* These may not exist, but still put the prototypes in so we
* can use IS_ENABLED().
*/
-extern const sys_call_ptr_t ia32_sys_call_table[];
-extern const sys_call_ptr_t x32_sys_call_table[];
-#endif
+extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
/*
* Only the low 32 bits of orig_ax are meaningful, so we return int.
@@ -84,7 +82,12 @@ static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
unsigned long *args)
{
- memcpy(args, &regs->bx, 6 * sizeof(args[0]));
+ args[0] = regs->bx;
+ args[1] = regs->cx;
+ args[2] = regs->dx;
+ args[3] = regs->si;
+ args[4] = regs->di;
+ args[5] = regs->bp;
}
static inline int syscall_get_arch(struct task_struct *task)
@@ -127,6 +130,7 @@ static inline int syscall_get_arch(struct task_struct *task)
}
bool do_syscall_64(struct pt_regs *regs, int nr);
+void do_int80_emulation(struct pt_regs *regs);
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index eba178996d84..b4b16dafd55e 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -66,6 +66,9 @@ int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport);
u64 tdx_hcall_get_quote(u8 *buf, size_t size);
+void __init tdx_dump_attributes(u64 td_attr);
+void __init tdx_dump_td_ctls(u64 td_ctls);
+
#else
static inline void tdx_early_init(void) { };
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 0b70653a98c1..ab9e143ec9fe 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -15,6 +15,8 @@
extern void text_poke_early(void *addr, const void *opcode, size_t len);
+extern void apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len);
+
/*
* Clear and restore the kernel write-protection flag on the local CPU.
* Allows the kernel to edit read-only pages.
@@ -33,6 +35,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void text_poke_sync(void);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern void *text_poke_copy(void *addr, const void *opcode, size_t len);
+#define text_poke_copy text_poke_copy
extern void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, bool core_ok);
extern void *text_poke_set(void *addr, int c, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index d63b02940747..a55c214f3ba6 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -31,7 +31,9 @@
* In vm86 mode, the hardware frame is much longer still, so add 16
* bytes to make room for the real-mode segments.
*
- * x86_64 has a fixed-length stack frame.
+ * x86-64 has a fixed-length stack frame, but it depends on whether
+ * or not FRED is enabled. Future versions of FRED might make this
+ * dynamic, but for now it is always 2 words longer.
*/
#ifdef CONFIG_X86_32
# ifdef CONFIG_VM86
@@ -39,8 +41,12 @@
# else
# define TOP_OF_KERNEL_STACK_PADDING 8
# endif
-#else
-# define TOP_OF_KERNEL_STACK_PADDING 0
+#else /* x86-64 */
+# ifdef CONFIG_X86_FRED
+# define TOP_OF_KERNEL_STACK_PADDING (2 * 8)
+# else
+# define TOP_OF_KERNEL_STACK_PADDING 0
+# endif
#endif
/*
@@ -81,8 +87,9 @@ struct thread_info {
#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
-#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
-#define TIF_SSBD 5 /* Speculative store bypass disable */
+#define TIF_NEED_RESCHED_LAZY 4 /* Lazy rescheduling needed */
+#define TIF_SINGLESTEP 5 /* reenable singlestep on user return*/
+#define TIF_SSBD 6 /* Speculative store bypass disable */
#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
#define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
@@ -104,6 +111,7 @@ struct thread_info {
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_SSBD (1 << TIF_SSBD)
#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 7365dd4acffb..23baf8c9b34c 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -6,8 +6,6 @@
#include <linux/interrupt.h>
#include <linux/math64.h>
-#define TICK_SIZE (tick_nsec / 1000)
-
unsigned long long native_sched_clock(void);
extern void recalibrate_cpu_khz(void);
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 580636cdc257..77f52bc1578a 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -20,18 +20,9 @@ static inline void tlb_flush(struct mmu_gather *tlb)
flush_tlb_mm_range(tlb->mm, start, end, stride_shift, tlb->freed_tables);
}
-/*
- * While x86 architecture in general requires an IPI to perform TLB
- * shootdown, enablement code for several hypervisors overrides
- * .flush_tlb_others hook in pv_mmu_ops and implements it by issuing
- * a hypercall. To keep software pagetable walkers safe in this case we
- * switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the comment
- * below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h
- * for more details.
- */
-static inline void __tlb_remove_table(void *table)
+static inline void invlpg(unsigned long addr)
{
- free_page_and_swap_cache(table);
+ asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
}
#endif /* _ASM_X86_TLB_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 25726893c6f4..02fc2aa06e9e 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -222,6 +222,7 @@ struct flush_tlb_info {
unsigned int initiating_cpu;
u8 stride_shift;
u8 freed_tables;
+ u8 trim_cpumask;
};
void flush_tlb_local(void);
@@ -399,11 +400,10 @@ static inline u64 tlbstate_lam_cr3_mask(void)
return lam << X86_CR3_LAM_U57_BIT;
}
-static inline void set_tlbstate_lam_mode(struct mm_struct *mm)
+static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask)
{
- this_cpu_write(cpu_tlbstate.lam,
- mm->context.lam_cr3_mask >> X86_CR3_LAM_U57_BIT);
- this_cpu_write(tlbstate_untag_mask, mm->context.untag_mask);
+ this_cpu_write(cpu_tlbstate.lam, lam >> X86_CR3_LAM_U57_BIT);
+ this_cpu_write(tlbstate_untag_mask, untag_mask);
}
#else
@@ -413,7 +413,7 @@ static inline u64 tlbstate_lam_cr3_mask(void)
return 0;
}
-static inline void set_tlbstate_lam_mode(struct mm_struct *mm)
+static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask)
{
}
#endif
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 5f87f6b9b09e..ec134b719144 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -102,17 +102,91 @@ static inline void setup_node_to_cpumask_map(void) { }
#include <asm-generic/topology.h>
+/* Topology information */
+enum x86_topology_domains {
+ TOPO_SMT_DOMAIN,
+ TOPO_CORE_DOMAIN,
+ TOPO_MODULE_DOMAIN,
+ TOPO_TILE_DOMAIN,
+ TOPO_DIE_DOMAIN,
+ TOPO_DIEGRP_DOMAIN,
+ TOPO_PKG_DOMAIN,
+ TOPO_MAX_DOMAIN,
+};
+
+enum x86_topology_cpu_type {
+ TOPO_CPU_TYPE_PERFORMANCE,
+ TOPO_CPU_TYPE_EFFICIENCY,
+ TOPO_CPU_TYPE_UNKNOWN,
+};
+
+struct x86_topology_system {
+ unsigned int dom_shifts[TOPO_MAX_DOMAIN];
+ unsigned int dom_size[TOPO_MAX_DOMAIN];
+};
+
+extern struct x86_topology_system x86_topo_system;
+
+static inline unsigned int topology_get_domain_size(enum x86_topology_domains dom)
+{
+ return x86_topo_system.dom_size[dom];
+}
+
+static inline unsigned int topology_get_domain_shift(enum x86_topology_domains dom)
+{
+ return dom == TOPO_SMT_DOMAIN ? 0 : x86_topo_system.dom_shifts[dom - 1];
+}
+
extern const struct cpumask *cpu_coregroup_mask(int cpu);
extern const struct cpumask *cpu_clustergroup_mask(int cpu);
#define topology_logical_package_id(cpu) (cpu_data(cpu).topo.logical_pkg_id)
#define topology_physical_package_id(cpu) (cpu_data(cpu).topo.pkg_id)
#define topology_logical_die_id(cpu) (cpu_data(cpu).topo.logical_die_id)
+#define topology_logical_core_id(cpu) (cpu_data(cpu).topo.logical_core_id)
#define topology_die_id(cpu) (cpu_data(cpu).topo.die_id)
#define topology_core_id(cpu) (cpu_data(cpu).topo.core_id)
#define topology_ppin(cpu) (cpu_data(cpu).ppin)
-extern unsigned int __max_die_per_package;
+#define topology_amd_node_id(cpu) (cpu_data(cpu).topo.amd_node_id)
+
+extern unsigned int __max_dies_per_package;
+extern unsigned int __max_logical_packages;
+extern unsigned int __max_threads_per_core;
+extern unsigned int __num_threads_per_package;
+extern unsigned int __num_cores_per_package;
+
+const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c);
+enum x86_topology_cpu_type get_topology_cpu_type(struct cpuinfo_x86 *c);
+
+static inline unsigned int topology_max_packages(void)
+{
+ return __max_logical_packages;
+}
+
+static inline unsigned int topology_max_dies_per_package(void)
+{
+ return __max_dies_per_package;
+}
+
+static inline unsigned int topology_num_cores_per_package(void)
+{
+ return __num_cores_per_package;
+}
+
+static inline unsigned int topology_num_threads_per_package(void)
+{
+ return __num_threads_per_package;
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level);
+#else
+static inline int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level)
+{
+ return 0;
+}
+#endif
#ifdef CONFIG_SMP
#define topology_cluster_id(cpu) (cpu_data(cpu).topo.l2c_id)
@@ -121,12 +195,11 @@ extern unsigned int __max_die_per_package;
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
-extern unsigned int __max_logical_packages;
-#define topology_max_packages() (__max_logical_packages)
-static inline int topology_max_die_per_package(void)
+static inline int topology_phys_to_logical_pkg(unsigned int pkg)
{
- return __max_die_per_package;
+ return topology_get_logical_id(pkg << x86_topo_system.dom_shifts[TOPO_PKG_DOMAIN],
+ TOPO_PKG_DOMAIN);
}
extern int __max_smt_threads;
@@ -138,9 +211,12 @@ static inline int topology_max_smt_threads(void)
#include <linux/cpu_smt.h>
-int topology_update_package_map(unsigned int apicid, unsigned int cpu);
-int topology_update_die_map(unsigned int dieid, unsigned int cpu);
-int topology_phys_to_logical_pkg(unsigned int pkg);
+extern unsigned int __amd_nodes_per_pkg;
+
+static inline unsigned int topology_amd_nodes_per_pkg(void)
+{
+ return __amd_nodes_per_pkg;
+}
extern struct cpumask __cpu_primary_thread_mask;
#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
@@ -153,16 +229,12 @@ static inline bool topology_is_primary_thread(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_primary_thread_mask);
}
+
#else /* CONFIG_SMP */
-#define topology_max_packages() (1)
-static inline int
-topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
-static inline int
-topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; }
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
-static inline int topology_max_die_per_package(void) { return 1; }
static inline int topology_max_smt_threads(void) { return 1; }
static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
+static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
#endif /* !CONFIG_SMP */
static inline void arch_fix_phys_package_id(int num, u32 slot)
@@ -179,7 +251,7 @@ extern bool x86_topology_update;
#include <asm/percpu.h>
DECLARE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
-extern unsigned int __read_mostly sysctl_sched_itmt_enabled;
+extern bool __read_mostly sysctl_sched_itmt_enabled;
/* Interface to set priority of a cpu */
void sched_set_itmt_core_prio(int prio, int core_cpu);
@@ -192,7 +264,7 @@ void sched_clear_itmt_support(void);
#else /* CONFIG_SCHED_MC_PRIO */
-#define sysctl_sched_itmt_enabled 0
+#define sysctl_sched_itmt_enabled false
static inline void sched_set_itmt_core_prio(int prio, int core_cpu)
{
}
@@ -220,9 +292,22 @@ static inline long arch_scale_freq_capacity(int cpu)
}
#define arch_scale_freq_capacity arch_scale_freq_capacity
+bool arch_enable_hybrid_capacity_scale(void);
+void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap,
+ unsigned long cap_freq, unsigned long base_freq);
+
+unsigned long arch_scale_cpu_capacity(int cpu);
+#define arch_scale_cpu_capacity arch_scale_cpu_capacity
+
extern void arch_set_max_freq_ratio(bool turbo_disabled);
extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled);
#else
+static inline bool arch_enable_hybrid_capacity_scale(void) { return false; }
+static inline void arch_set_cpu_capacity(int cpu, unsigned long cap,
+ unsigned long max_cap,
+ unsigned long cap_freq,
+ unsigned long base_freq) { }
+
static inline void arch_set_max_freq_ratio(bool turbo_disabled) { }
static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { }
#endif
@@ -230,9 +315,4 @@ static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled
extern void arch_scale_freq_tick(void);
#define arch_scale_freq_tick arch_scale_freq_tick
-#ifdef CONFIG_ACPI_CPPC_LIB
-void init_freq_invariance_cppc(void);
-#define arch_init_invariance_cppc init_freq_invariance_cppc
-#endif
-
#endif /* _ASM_X86_TOPOLOGY_H */
diff --git a/arch/x86/include/asm/trap_pf.h b/arch/x86/include/asm/trap_pf.h
index afa524325e55..a23a7b707b64 100644
--- a/arch/x86/include/asm/trap_pf.h
+++ b/arch/x86/include/asm/trap_pf.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_TRAP_PF_H
#define _ASM_X86_TRAP_PF_H
+#include <linux/bits.h>
+
/*
* Page fault error code bits:
*
@@ -13,16 +15,18 @@
* bit 5 == 1: protection keys block access
* bit 6 == 1: shadow stack access fault
* bit 15 == 1: SGX MMU page-fault
+ * bit 31 == 1: fault was due to RMP violation
*/
enum x86_pf_error_code {
- X86_PF_PROT = 1 << 0,
- X86_PF_WRITE = 1 << 1,
- X86_PF_USER = 1 << 2,
- X86_PF_RSVD = 1 << 3,
- X86_PF_INSTR = 1 << 4,
- X86_PF_PK = 1 << 5,
- X86_PF_SHSTK = 1 << 6,
- X86_PF_SGX = 1 << 15,
+ X86_PF_PROT = BIT(0),
+ X86_PF_WRITE = BIT(1),
+ X86_PF_USER = BIT(2),
+ X86_PF_RSVD = BIT(3),
+ X86_PF_INSTR = BIT(4),
+ X86_PF_PK = BIT(5),
+ X86_PF_SHSTK = BIT(6),
+ X86_PF_SGX = BIT(15),
+ X86_PF_RMP = BIT(31),
};
#endif /* _ASM_X86_TRAP_PF_H */
diff --git a/arch/x86/include/asm/trapnr.h b/arch/x86/include/asm/trapnr.h
index f5d2325aa0b7..8d1154cdf787 100644
--- a/arch/x86/include/asm/trapnr.h
+++ b/arch/x86/include/asm/trapnr.h
@@ -2,6 +2,18 @@
#ifndef _ASM_X86_TRAPNR_H
#define _ASM_X86_TRAPNR_H
+/*
+ * Event type codes used by FRED, Intel VT-x and AMD SVM
+ */
+#define EVENT_TYPE_EXTINT 0 // External interrupt
+#define EVENT_TYPE_RESERVED 1
+#define EVENT_TYPE_NMI 2 // NMI
+#define EVENT_TYPE_HWEXC 3 // Hardware originated traps, exceptions
+#define EVENT_TYPE_SWINT 4 // INT n
+#define EVENT_TYPE_PRIV_SWEXC 5 // INT1
+#define EVENT_TYPE_SWEXC 6 // INTO, INT3
+#define EVENT_TYPE_OTHER 7 // FRED SYSCALL/SYSENTER, VT-x MTF
+
/* Interrupts/Exceptions */
#define X86_TRAP_DE 0 /* Divide-by-zero */
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 594fce0ca744..94408a784c8e 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -5,8 +5,9 @@
#ifndef _ASM_X86_TSC_H
#define _ASM_X86_TSC_H
-#include <asm/processor.h>
#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
/*
* Standard way to access the cycle counter.
@@ -27,9 +28,6 @@ static inline cycles_t get_cycles(void)
}
#define get_cycles get_cycles
-extern struct system_counterval_t convert_art_to_tsc(u64 art);
-extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns);
-
extern void tsc_early_init(void);
extern void tsc_init(void);
extern void mark_tsc_unstable(char *reason);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 237dc8cdd12b..3a7755c1a441 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -78,10 +78,10 @@ extern int __get_user_bad(void);
int __ret_gu; \
register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \
__chk_user_ptr(ptr); \
- asm volatile("call __" #fn "_%P4" \
+ asm volatile("call __" #fn "_%c[size]" \
: "=a" (__ret_gu), "=r" (__val_gu), \
ASM_CALL_CONSTRAINT \
- : "0" (ptr), "i" (sizeof(*(ptr)))); \
+ : "0" (ptr), [size] "i" (sizeof(*(ptr)))); \
instrument_get_user(__val_gu); \
(x) = (__force __typeof__(*(ptr))) __val_gu; \
__builtin_expect(__ret_gu, 0); \
@@ -177,7 +177,7 @@ extern void __put_user_nocheck_8(void);
__chk_user_ptr(__ptr); \
__ptr_pu = __ptr; \
__val_pu = __x; \
- asm volatile("call __" #fn "_%P[size]" \
+ asm volatile("call __" #fn "_%c[size]" \
: "=c" (__ret_pu), \
ASM_CALL_CONSTRAINT \
: "0" (__ptr_pu), \
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index f2c02e4469cc..c52f0133425b 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -11,6 +11,14 @@
#include <asm/alternative.h>
#include <asm/cpufeatures.h>
#include <asm/page.h>
+#include <asm/percpu.h>
+#include <asm/runtime-const.h>
+
+/*
+ * Virtual variable: there's no actual backing store for this,
+ * it can purely be used as 'runtime_const_ptr(USER_PTR_MAX)'
+ */
+extern unsigned long USER_PTR_MAX;
#ifdef CONFIG_ADDRESS_MASKING
/*
@@ -18,14 +26,10 @@
*/
static inline unsigned long __untagged_addr(unsigned long addr)
{
- /*
- * Refer tlbstate_untag_mask directly to avoid RIP-relative relocation
- * in alternative instructions. The relocation gets wrong when gets
- * copied to the target place.
- */
asm (ALTERNATIVE("",
- "and %%gs:tlbstate_untag_mask, %[addr]\n\t", X86_FEATURE_LAM)
- : [addr] "+r" (addr) : "m" (tlbstate_untag_mask));
+ "and " __percpu_arg([mask]) ", %[addr]", X86_FEATURE_LAM)
+ : [addr] "+r" (addr)
+ : [mask] "m" (__my_cpu_var(tlbstate_untag_mask)));
return addr;
}
@@ -49,35 +53,44 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
#endif
+#define valid_user_address(x) \
+ ((__force unsigned long)(x) <= runtime_const_ptr(USER_PTR_MAX))
+
/*
- * The virtual address space space is logically divided into a kernel
- * half and a user half. When cast to a signed type, user pointers
- * are positive and kernel pointers are negative.
+ * Masking the user address is an alternative to a conditional
+ * user_access_begin that can avoid the fencing. This only works
+ * for dense accesses starting at the address.
*/
-#define valid_user_address(x) ((long)(x) >= 0)
+static inline void __user *mask_user_address(const void __user *ptr)
+{
+ void __user *ret;
+ asm("cmp %1,%0\n\t"
+ "cmova %1,%0"
+ :"=r" (ret)
+ :"r" (runtime_const_ptr(USER_PTR_MAX)),
+ "0" (ptr));
+ return ret;
+}
+#define masked_user_access_begin(x) ({ \
+ __auto_type __masked_ptr = (x); \
+ __masked_ptr = mask_user_address(__masked_ptr); \
+ __uaccess_begin(); __masked_ptr; })
/*
* User pointers can have tag bits on x86-64. This scheme tolerates
* arbitrary values in those bits rather then masking them off.
*
* Enforce two rules:
- * 1. 'ptr' must be in the user half of the address space
+ * 1. 'ptr' must be in the user part of the address space
* 2. 'ptr+size' must not overflow into kernel addresses
*
- * Note that addresses around the sign change are not valid addresses,
- * and will GP-fault even with LAM enabled if the sign bit is set (see
- * "CR3.LAM_SUP" that can narrow the canonicality check if we ever
- * enable it, but not remove it entirely).
- *
- * So the "overflow into kernel addresses" does not imply some sudden
- * exact boundary at the sign bit, and we can allow a lot of slop on the
- * size check.
+ * Note that we always have at least one guard page between the
+ * max user address and the non-canonical gap, allowing us to
+ * ignore small sizes entirely.
*
* In fact, we could probably remove the size check entirely, since
* any kernel accesses will be in increasing address order starting
- * at 'ptr', and even if the end might be in kernel space, we'll
- * hit the GP faults for non-canonical accesses before we ever get
- * there.
+ * at 'ptr'.
*
* That's a separate optimization, for now just handle the small
* constant case.
@@ -87,8 +100,9 @@ static inline bool __access_ok(const void __user *ptr, unsigned long size)
if (__builtin_constant_p(size <= PAGE_SIZE) && size <= PAGE_SIZE) {
return valid_user_address(ptr);
} else {
- unsigned long sum = size + (unsigned long)ptr;
- return valid_user_address(sum) && sum >= (unsigned long)ptr;
+ unsigned long sum = size + (__force unsigned long)ptr;
+
+ return valid_user_address(sum) && sum >= (__force unsigned long)ptr;
}
}
#define __access_ok __access_ok
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 761173ccc33c..6c9e5bdd3916 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -56,6 +56,5 @@
# define __ARCH_WANT_SYS_FORK
# define __ARCH_WANT_SYS_VFORK
# define __ARCH_WANT_SYS_CLONE
-# define __ARCH_WANT_SYS_CLONE3
#endif /* _ASM_X86_UNISTD_H */
diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h
index d6b17c760622..1876b5edd142 100644
--- a/arch/x86/include/asm/uv/uv_irq.h
+++ b/arch/x86/include/asm/uv/uv_irq.h
@@ -31,7 +31,6 @@ enum {
UV_AFFINITY_CPU
};
-extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
extern int uv_setup_irq(char *, int, int, unsigned long, int);
extern void uv_teardown_irq(unsigned int);
diff --git a/arch/x86/include/asm/vdso/getrandom.h b/arch/x86/include/asm/vdso/getrandom.h
new file mode 100644
index 000000000000..2bf9c0e970c3
--- /dev/null
+++ b/arch/x86/include/asm/vdso/getrandom.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+#ifndef __ASM_VDSO_GETRANDOM_H
+#define __ASM_VDSO_GETRANDOM_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/unistd.h>
+
+/**
+ * getrandom_syscall - Invoke the getrandom() syscall.
+ * @buffer: Destination buffer to fill with random bytes.
+ * @len: Size of @buffer in bytes.
+ * @flags: Zero or more GRND_* flags.
+ * Returns: The number of random bytes written to @buffer, or a negative value indicating an error.
+ */
+static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
+{
+ long ret;
+
+ asm ("syscall" : "=a" (ret) :
+ "0" (__NR_getrandom), "D" (buffer), "S" (len), "d" (flags) :
+ "rcx", "r11", "memory");
+
+ return ret;
+}
+
+extern struct vdso_rng_data vdso_rng_data
+ __attribute__((visibility("hidden")));
+
+static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void)
+{
+ if (IS_ENABLED(CONFIG_TIME_NS) && __arch_get_vdso_data()->clock_mode == VDSO_CLOCKMODE_TIMENS)
+ return (void *)&vdso_rng_data + ((void *)&timens_page - (void *)__arch_get_vdso_data());
+ return &vdso_rng_data;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETRANDOM_H */
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index 8e048ca980df..375a34b0f365 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -14,14 +14,16 @@
#include <uapi/linux/time.h>
#include <asm/vgtod.h>
-#include <asm/vvar.h>
#include <asm/unistd.h>
#include <asm/msr.h>
#include <asm/pvclock.h>
#include <clocksource/hyperv_timer.h>
-#define __vdso_data (VVAR(_vdso_data))
-#define __timens_vdso_data (TIMENS(_vdso_data))
+extern struct vdso_data vvar_page
+ __attribute__((visibility("hidden")));
+
+extern struct vdso_data timens_page
+ __attribute__((visibility("hidden")));
#define VDSO_HAS_TIME 1
@@ -61,7 +63,7 @@ extern struct ms_hyperv_tsc_page hvclock_page
static __always_inline
const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
{
- return __timens_vdso_data;
+ return &timens_page;
}
#endif
@@ -275,7 +277,7 @@ static inline u64 __arch_get_hw_counter(s32 clock_mode,
static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
{
- return __vdso_data;
+ return &vvar_page;
}
static inline bool arch_vdso_clocksource_ok(const struct vdso_data *vd)
@@ -300,7 +302,7 @@ static inline bool arch_vdso_cycles_ok(u64 cycles)
#define vdso_cycles_ok arch_vdso_cycles_ok
/*
- * x86 specific delta calculation.
+ * x86 specific calculation of nanoseconds for the current cycle count
*
* The regular implementation assumes that clocksource reads are globally
* monotonic. The TSC can be slightly off across sockets which can cause
@@ -308,8 +310,8 @@ static inline bool arch_vdso_cycles_ok(u64 cycles)
* jump.
*
* Therefore it needs to be verified that @cycles are greater than
- * @last. If not then use @last, which is the base time of the current
- * conversion period.
+ * @vd->cycles_last. If not then use @vd->cycles_last, which is the base
+ * time of the current conversion period.
*
* This variant also uses a custom mask because while the clocksource mask of
* all the VDSO capable clocksources on x86 is U64_MAX, the above code uses
@@ -317,25 +319,36 @@ static inline bool arch_vdso_cycles_ok(u64 cycles)
* declares everything with the MSB/Sign-bit set as invalid. Therefore the
* effective mask is S64_MAX.
*/
-static __always_inline
-u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
+static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles, u64 base)
{
- /*
- * Due to the MSB/Sign-bit being used as invalid marker (see
- * arch_vdso_cycles_valid() above), the effective mask is S64_MAX.
- */
- u64 delta = (cycles - last) & S64_MAX;
+ u64 delta = cycles - vd->cycle_last;
/*
- * Due to the above mentioned TSC wobbles, filter out negative motion.
- * Per the above masking, the effective sign bit is now bit 62.
+ * Negative motion and deltas which can cause multiplication
+ * overflow require special treatment. This check covers both as
+ * negative motion is guaranteed to be greater than @vd::max_cycles
+ * due to unsigned comparison.
+ *
+ * Due to the MSB/Sign-bit being used as invalid marker (see
+ * arch_vdso_cycles_ok() above), the effective mask is S64_MAX, but that
+ * case is also unlikely and will also take the unlikely path here.
*/
- if (unlikely(delta & (1ULL << 62)))
- return 0;
+ if (unlikely(delta > vd->max_cycles)) {
+ /*
+ * Due to the above mentioned TSC wobbles, filter out
+ * negative motion. Per the above masking, the effective
+ * sign bit is now bit 62.
+ */
+ if (delta & (1ULL << 62))
+ return base >> vd->shift;
+
+ /* Handle multiplication overflow gracefully */
+ return mul_u64_u32_add_u64_shr(delta & S64_MAX, vd->mult, base, vd->shift);
+ }
- return delta * mult;
+ return ((delta * vd->mult) + base) >> vd->shift;
}
-#define vdso_calc_delta vdso_calc_delta
+#define vdso_calc_ns vdso_calc_ns
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h
index be199a9b2676..37b4a70559a8 100644
--- a/arch/x86/include/asm/vdso/vsyscall.h
+++ b/arch/x86/include/asm/vdso/vsyscall.h
@@ -2,25 +2,37 @@
#ifndef __ASM_VDSO_VSYSCALL_H
#define __ASM_VDSO_VSYSCALL_H
+#define __VDSO_RND_DATA_OFFSET 640
+#define __VVAR_PAGES 4
+
+#define VDSO_NR_VCLOCK_PAGES 2
+#define VDSO_PAGE_PVCLOCK_OFFSET 0
+#define VDSO_PAGE_HVCLOCK_OFFSET 1
+
#ifndef __ASSEMBLY__
-#include <linux/hrtimer.h>
-#include <linux/timekeeper_internal.h>
#include <vdso/datapage.h>
#include <asm/vgtod.h>
-#include <asm/vvar.h>
-DEFINE_VVAR(struct vdso_data, _vdso_data);
+extern struct vdso_data *vdso_data;
+
/*
* Update the vDSO data page to keep in sync with kernel timekeeping.
*/
static __always_inline
struct vdso_data *__x86_get_k_vdso_data(void)
{
- return _vdso_data;
+ return vdso_data;
}
#define __arch_get_k_vdso_data __x86_get_k_vdso_data
+static __always_inline
+struct vdso_rng_data *__x86_get_k_vdso_rng_data(void)
+{
+ return (void *)vdso_data + __VDSO_RND_DATA_OFFSET;
+}
+#define __arch_get_k_vdso_rng_data __x86_get_k_vdso_rng_data
+
/* The asm-generic header needs to be included after the definitions above */
#include <asm-generic/vdso/vsyscall.h>
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 7aa38b2ad8a9..a0ce291abcae 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -14,11 +14,6 @@
#include <uapi/linux/time.h>
-#ifdef BUILD_VDSO32_64
-typedef u64 gtod_long_t;
-#else
-typedef unsigned long gtod_long_t;
-#endif
#endif /* CONFIG_GENERIC_GETTIMEOFDAY */
#endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/video.h b/arch/x86/include/asm/video.h
new file mode 100644
index 000000000000..0950c9535fae
--- /dev/null
+++ b/arch/x86/include/asm/video.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_VIDEO_H
+#define _ASM_X86_VIDEO_H
+
+#include <linux/types.h>
+
+#include <asm/page.h>
+
+struct device;
+
+pgprot_t pgprot_framebuffer(pgprot_t prot,
+ unsigned long vm_start, unsigned long vm_end,
+ unsigned long offset);
+#define pgprot_framebuffer pgprot_framebuffer
+
+bool video_is_primary_device(struct device *dev);
+#define video_is_primary_device video_is_primary_device
+
+#include <asm-generic/video.h>
+
+#endif /* _ASM_X86_VIDEO_H */
diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h
index 9e8ac5073ecb..62ee19909903 100644
--- a/arch/x86/include/asm/vm86.h
+++ b/arch/x86/include/asm/vm86.h
@@ -84,7 +84,7 @@ static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c)
static inline void save_v86_state(struct kernel_vm86_regs *a, int b) { }
-#define free_vm86(t) do { } while(0)
+#define free_vm86(task) do { (void)(task); } while(0)
#endif /* CONFIG_VM86 */
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h
index ac9fc51e2b18..c9cf43d5ef23 100644
--- a/arch/x86/include/asm/vmware.h
+++ b/arch/x86/include/asm/vmware.h
@@ -7,51 +7,321 @@
#include <linux/stringify.h>
/*
- * The hypercall definitions differ in the low word of the %edx argument
- * in the following way: the old port base interface uses the port
- * number to distinguish between high- and low bandwidth versions.
+ * VMware hypercall ABI.
+ *
+ * - Low bandwidth (LB) hypercalls (I/O port based, vmcall and vmmcall)
+ * have up to 6 input and 6 output arguments passed and returned using
+ * registers: %eax (arg0), %ebx (arg1), %ecx (arg2), %edx (arg3),
+ * %esi (arg4), %edi (arg5).
+ * The following input arguments must be initialized by the caller:
+ * arg0 - VMWARE_HYPERVISOR_MAGIC
+ * arg2 - Hypercall command
+ * arg3 bits [15:0] - Port number, LB and direction flags
+ *
+ * - Low bandwidth TDX hypercalls (x86_64 only) are similar to LB
+ * hypercalls. They also have up to 6 input and 6 output on registers
+ * arguments, with different argument to register mapping:
+ * %r12 (arg0), %rbx (arg1), %r13 (arg2), %rdx (arg3),
+ * %rsi (arg4), %rdi (arg5).
+ *
+ * - High bandwidth (HB) hypercalls are I/O port based only. They have
+ * up to 7 input and 7 output arguments passed and returned using
+ * registers: %eax (arg0), %ebx (arg1), %ecx (arg2), %edx (arg3),
+ * %esi (arg4), %edi (arg5), %ebp (arg6).
+ * The following input arguments must be initialized by the caller:
+ * arg0 - VMWARE_HYPERVISOR_MAGIC
+ * arg1 - Hypercall command
+ * arg3 bits [15:0] - Port number, HB and direction flags
+ *
+ * For compatibility purposes, x86_64 systems use only lower 32 bits
+ * for input and output arguments.
+ *
+ * The hypercall definitions differ in the low word of the %edx (arg3)
+ * in the following way: the old I/O port based interface uses the port
+ * number to distinguish between high- and low bandwidth versions, and
+ * uses IN/OUT instructions to define transfer direction.
*
* The new vmcall interface instead uses a set of flags to select
* bandwidth mode and transfer direction. The flags should be loaded
- * into %dx by any user and are automatically replaced by the port
- * number if the VMWARE_HYPERVISOR_PORT method is used.
- *
- * In short, new driver code should strictly use the new definition of
- * %dx content.
+ * into arg3 by any user and are automatically replaced by the port
+ * number if the I/O port method is used.
*/
-/* Old port-based version */
-#define VMWARE_HYPERVISOR_PORT 0x5658
-#define VMWARE_HYPERVISOR_PORT_HB 0x5659
+#define VMWARE_HYPERVISOR_HB BIT(0)
+#define VMWARE_HYPERVISOR_OUT BIT(1)
-/* Current vmcall / vmmcall version */
-#define VMWARE_HYPERVISOR_HB BIT(0)
-#define VMWARE_HYPERVISOR_OUT BIT(1)
+#define VMWARE_HYPERVISOR_PORT 0x5658
+#define VMWARE_HYPERVISOR_PORT_HB (VMWARE_HYPERVISOR_PORT | \
+ VMWARE_HYPERVISOR_HB)
-/* The low bandwidth call. The low word of edx is presumed clear. */
-#define VMWARE_HYPERCALL \
- ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT) ", %%dx; " \
- "inl (%%dx), %%eax", \
- "vmcall", X86_FEATURE_VMCALL, \
- "vmmcall", X86_FEATURE_VMW_VMMCALL)
+#define VMWARE_HYPERVISOR_MAGIC 0x564d5868U
+#define VMWARE_CMD_GETVERSION 10
+#define VMWARE_CMD_GETHZ 45
+#define VMWARE_CMD_GETVCPU_INFO 68
+#define VMWARE_CMD_STEALCLOCK 91
/*
- * The high bandwidth out call. The low word of edx is presumed to have the
- * HB and OUT bits set.
+ * Hypercall command mask:
+ * bits [6:0] command, range [0, 127]
+ * bits [19:16] sub-command, range [0, 15]
*/
-#define VMWARE_HYPERCALL_HB_OUT \
- ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \
- "rep outsb", \
- "vmcall", X86_FEATURE_VMCALL, \
- "vmmcall", X86_FEATURE_VMW_VMMCALL)
+#define VMWARE_CMD_MASK 0xf007fU
+
+#define CPUID_VMWARE_FEATURES_ECX_VMMCALL BIT(0)
+#define CPUID_VMWARE_FEATURES_ECX_VMCALL BIT(1)
+
+extern unsigned long vmware_hypercall_slow(unsigned long cmd,
+ unsigned long in1, unsigned long in3,
+ unsigned long in4, unsigned long in5,
+ u32 *out1, u32 *out2, u32 *out3,
+ u32 *out4, u32 *out5);
+
+#define VMWARE_TDX_VENDOR_LEAF 0x1af7e4909ULL
+#define VMWARE_TDX_HCALL_FUNC 1
+
+extern unsigned long vmware_tdx_hypercall(unsigned long cmd,
+ unsigned long in1, unsigned long in3,
+ unsigned long in4, unsigned long in5,
+ u32 *out1, u32 *out2, u32 *out3,
+ u32 *out4, u32 *out5);
/*
- * The high bandwidth in call. The low word of edx is presumed to have the
- * HB bit set.
+ * The low bandwidth call. The low word of %edx is presumed to have OUT bit
+ * set. The high word of %edx may contain input data from the caller.
*/
-#define VMWARE_HYPERCALL_HB_IN \
- ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \
- "rep insb", \
- "vmcall", X86_FEATURE_VMCALL, \
+#define VMWARE_HYPERCALL \
+ ALTERNATIVE_2("movw %[port], %%dx\n\t" \
+ "inl (%%dx), %%eax", \
+ "vmcall", X86_FEATURE_VMCALL, \
"vmmcall", X86_FEATURE_VMW_VMMCALL)
+
+static inline
+unsigned long vmware_hypercall1(unsigned long cmd, unsigned long in1)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, 0, 0, 0,
+ NULL, NULL, NULL, NULL, NULL);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, 0, 0, 0,
+ NULL, NULL, NULL, NULL, NULL);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (0)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall3(unsigned long cmd, unsigned long in1,
+ u32 *out1, u32 *out2)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, 0, 0, 0,
+ out1, out2, NULL, NULL, NULL);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, 0, 0, 0,
+ out1, out2, NULL, NULL, NULL);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0), "=b" (*out1), "=c" (*out2)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (0)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall4(unsigned long cmd, unsigned long in1,
+ u32 *out1, u32 *out2, u32 *out3)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, 0, 0, 0,
+ out1, out2, out3, NULL, NULL);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, 0, 0, 0,
+ out1, out2, out3, NULL, NULL);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0), "=b" (*out1), "=c" (*out2), "=d" (*out3)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (0)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall5(unsigned long cmd, unsigned long in1,
+ unsigned long in3, unsigned long in4,
+ unsigned long in5, u32 *out2)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, in3, in4, in5,
+ NULL, out2, NULL, NULL, NULL);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, in3, in4, in5,
+ NULL, out2, NULL, NULL, NULL);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0), "=c" (*out2)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (in3),
+ "S" (in4),
+ "D" (in5)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall6(unsigned long cmd, unsigned long in1,
+ unsigned long in3, u32 *out2,
+ u32 *out3, u32 *out4, u32 *out5)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, in3, 0, 0,
+ NULL, out2, out3, out4, out5);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, in3, 0, 0,
+ NULL, out2, out3, out4, out5);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0), "=c" (*out2), "=d" (*out3), "=S" (*out4),
+ "=D" (*out5)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (in3)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall7(unsigned long cmd, unsigned long in1,
+ unsigned long in3, unsigned long in4,
+ unsigned long in5, u32 *out1,
+ u32 *out2, u32 *out3)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, in3, in4, in5,
+ out1, out2, out3, NULL, NULL);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, in3, in4, in5,
+ out1, out2, out3, NULL, NULL);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0), "=b" (*out1), "=c" (*out2), "=d" (*out3)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (in3),
+ "S" (in4),
+ "D" (in5)
+ : "cc", "memory");
+ return out0;
+}
+
+#ifdef CONFIG_X86_64
+#define VMW_BP_CONSTRAINT "r"
+#else
+#define VMW_BP_CONSTRAINT "m"
+#endif
+
+/*
+ * High bandwidth calls are not supported on encrypted memory guests.
+ * The caller should check cc_platform_has(CC_ATTR_MEM_ENCRYPT) and use
+ * low bandwidth hypercall if memory encryption is set.
+ * This assumption simplifies HB hypercall implementation to just I/O port
+ * based approach without alternative patching.
+ */
+static inline
+unsigned long vmware_hypercall_hb_out(unsigned long cmd, unsigned long in2,
+ unsigned long in3, unsigned long in4,
+ unsigned long in5, unsigned long in6,
+ u32 *out1)
+{
+ unsigned long out0;
+
+ asm_inline volatile (
+ UNWIND_HINT_SAVE
+ "push %%" _ASM_BP "\n\t"
+ UNWIND_HINT_UNDEFINED
+ "mov %[in6], %%" _ASM_BP "\n\t"
+ "rep outsb\n\t"
+ "pop %%" _ASM_BP "\n\t"
+ UNWIND_HINT_RESTORE
+ : "=a" (out0), "=b" (*out1)
+ : "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (cmd),
+ "c" (in2),
+ "d" (in3 | VMWARE_HYPERVISOR_PORT_HB),
+ "S" (in4),
+ "D" (in5),
+ [in6] VMW_BP_CONSTRAINT (in6)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall_hb_in(unsigned long cmd, unsigned long in2,
+ unsigned long in3, unsigned long in4,
+ unsigned long in5, unsigned long in6,
+ u32 *out1)
+{
+ unsigned long out0;
+
+ asm_inline volatile (
+ UNWIND_HINT_SAVE
+ "push %%" _ASM_BP "\n\t"
+ UNWIND_HINT_UNDEFINED
+ "mov %[in6], %%" _ASM_BP "\n\t"
+ "rep insb\n\t"
+ "pop %%" _ASM_BP "\n\t"
+ UNWIND_HINT_RESTORE
+ : "=a" (out0), "=b" (*out1)
+ : "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (cmd),
+ "c" (in2),
+ "d" (in3 | VMWARE_HYPERVISOR_PORT_HB),
+ "S" (in4),
+ "D" (in5),
+ [in6] VMW_BP_CONSTRAINT (in6)
+ : "cc", "memory");
+ return out0;
+}
+#undef VMW_BP_CONSTRAINT
+#undef VMWARE_HYPERCALL
+
#endif
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 0e73616b82f3..f7fd4369b821 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -17,6 +17,7 @@
#include <linux/types.h>
#include <uapi/asm/vmx.h>
+#include <asm/trapnr.h>
#include <asm/vmxfeatures.h>
#define VMCS_CONTROL_BIT(x) BIT(VMX_FEATURE_##x & 0x1f)
@@ -70,6 +71,7 @@
#define SECONDARY_EXEC_ENCLS_EXITING VMCS_CONTROL_BIT(ENCLS_EXITING)
#define SECONDARY_EXEC_RDSEED_EXITING VMCS_CONTROL_BIT(RDSEED_EXITING)
#define SECONDARY_EXEC_ENABLE_PML VMCS_CONTROL_BIT(PAGE_MOD_LOGGING)
+#define SECONDARY_EXEC_EPT_VIOLATION_VE VMCS_CONTROL_BIT(EPT_VIOLATION_VE)
#define SECONDARY_EXEC_PT_CONCEAL_VMX VMCS_CONTROL_BIT(PT_CONCEAL_VMX)
#define SECONDARY_EXEC_ENABLE_XSAVES VMCS_CONTROL_BIT(XSAVES)
#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC)
@@ -120,19 +122,17 @@
#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
-#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
-#define VMX_MISC_SAVE_EFER_LMA 0x00000020
-#define VMX_MISC_ACTIVITY_HLT 0x00000040
-#define VMX_MISC_ACTIVITY_WAIT_SIPI 0x00000100
-#define VMX_MISC_ZERO_LEN_INS 0x40000000
-#define VMX_MISC_MSR_LIST_MULTIPLIER 512
-
/* VMFUNC functions */
#define VMFUNC_CONTROL_BIT(x) BIT((VMX_FEATURE_##x & 0x1f) - 28)
#define VMX_VMFUNC_EPTP_SWITCHING VMFUNC_CONTROL_BIT(EPTP_SWITCHING)
#define VMFUNC_EPTP_ENTRIES 512
+#define VMX_BASIC_32BIT_PHYS_ADDR_ONLY BIT_ULL(48)
+#define VMX_BASIC_DUAL_MONITOR_TREATMENT BIT_ULL(49)
+#define VMX_BASIC_INOUT BIT_ULL(54)
+#define VMX_BASIC_TRUE_CTLS BIT_ULL(55)
+
static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
{
return vmx_basic & GENMASK_ULL(30, 0);
@@ -143,9 +143,30 @@ static inline u32 vmx_basic_vmcs_size(u64 vmx_basic)
return (vmx_basic & GENMASK_ULL(44, 32)) >> 32;
}
+static inline u32 vmx_basic_vmcs_mem_type(u64 vmx_basic)
+{
+ return (vmx_basic & GENMASK_ULL(53, 50)) >> 50;
+}
+
+static inline u64 vmx_basic_encode_vmcs_info(u32 revision, u16 size, u8 memtype)
+{
+ return revision | ((u64)size << 32) | ((u64)memtype << 50);
+}
+
+#define VMX_MISC_SAVE_EFER_LMA BIT_ULL(5)
+#define VMX_MISC_ACTIVITY_HLT BIT_ULL(6)
+#define VMX_MISC_ACTIVITY_SHUTDOWN BIT_ULL(7)
+#define VMX_MISC_ACTIVITY_WAIT_SIPI BIT_ULL(8)
+#define VMX_MISC_INTEL_PT BIT_ULL(14)
+#define VMX_MISC_RDMSR_IN_SMM BIT_ULL(15)
+#define VMX_MISC_VMXOFF_BLOCK_SMI BIT_ULL(28)
+#define VMX_MISC_VMWRITE_SHADOW_RO_FIELDS BIT_ULL(29)
+#define VMX_MISC_ZERO_LEN_INS BIT_ULL(30)
+#define VMX_MISC_MSR_LIST_MULTIPLIER 512
+
static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc)
{
- return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
+ return vmx_misc & GENMASK_ULL(4, 0);
}
static inline int vmx_misc_cr3_count(u64 vmx_misc)
@@ -225,6 +246,8 @@ enum vmcs_field {
VMREAD_BITMAP_HIGH = 0x00002027,
VMWRITE_BITMAP = 0x00002028,
VMWRITE_BITMAP_HIGH = 0x00002029,
+ VE_INFORMATION_ADDRESS = 0x0000202A,
+ VE_INFORMATION_ADDRESS_HIGH = 0x0000202B,
XSS_EXIT_BITMAP = 0x0000202C,
XSS_EXIT_BITMAP_HIGH = 0x0000202D,
ENCLS_EXITING_BITMAP = 0x0000202E,
@@ -374,14 +397,14 @@ enum vmcs_field {
#define VECTORING_INFO_DELIVER_CODE_MASK INTR_INFO_DELIVER_CODE_MASK
#define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK
-#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
-#define INTR_TYPE_RESERVED (1 << 8) /* reserved */
-#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */
-#define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */
-#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */
-#define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */
-#define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */
-#define INTR_TYPE_OTHER_EVENT (7 << 8) /* other event */
+#define INTR_TYPE_EXT_INTR (EVENT_TYPE_EXTINT << 8) /* external interrupt */
+#define INTR_TYPE_RESERVED (EVENT_TYPE_RESERVED << 8) /* reserved */
+#define INTR_TYPE_NMI_INTR (EVENT_TYPE_NMI << 8) /* NMI */
+#define INTR_TYPE_HARD_EXCEPTION (EVENT_TYPE_HWEXC << 8) /* processor exception */
+#define INTR_TYPE_SOFT_INTR (EVENT_TYPE_SWINT << 8) /* software interrupt */
+#define INTR_TYPE_PRIV_SW_EXCEPTION (EVENT_TYPE_PRIV_SWEXC << 8) /* ICE breakpoint */
+#define INTR_TYPE_SOFT_EXCEPTION (EVENT_TYPE_SWEXC << 8) /* software exception */
+#define INTR_TYPE_OTHER_EVENT (EVENT_TYPE_OTHER << 8) /* other event */
/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define GUEST_INTR_STATE_STI 0x00000001
@@ -504,15 +527,17 @@ enum vmcs_field {
#define VMX_EPTP_PWL_4 0x18ull
#define VMX_EPTP_PWL_5 0x20ull
#define VMX_EPTP_AD_ENABLE_BIT (1ull << 6)
+/* The EPTP memtype is encoded in bits 2:0, i.e. doesn't need to be shifted. */
#define VMX_EPTP_MT_MASK 0x7ull
-#define VMX_EPTP_MT_WB 0x6ull
-#define VMX_EPTP_MT_UC 0x0ull
+#define VMX_EPTP_MT_WB X86_MEMTYPE_WB
+#define VMX_EPTP_MT_UC X86_MEMTYPE_UC
#define VMX_EPT_READABLE_MASK 0x1ull
#define VMX_EPT_WRITABLE_MASK 0x2ull
#define VMX_EPT_EXECUTABLE_MASK 0x4ull
#define VMX_EPT_IPAT_BIT (1ull << 6)
#define VMX_EPT_ACCESS_BIT (1ull << 8)
#define VMX_EPT_DIRTY_BIT (1ull << 9)
+#define VMX_EPT_SUPPRESS_VE_BIT (1ull << 63)
#define VMX_EPT_RWX_MASK (VMX_EPT_READABLE_MASK | \
VMX_EPT_WRITABLE_MASK | \
VMX_EPT_EXECUTABLE_MASK)
@@ -629,4 +654,13 @@ enum vmx_l1d_flush_state {
extern enum vmx_l1d_flush_state l1tf_vmx_mitigation;
+struct vmx_ve_information {
+ u32 exit_reason;
+ u32 delivery;
+ u64 exit_qualification;
+ u64 guest_linear_address;
+ u64 guest_physical_address;
+ u16 eptp_index;
+};
+
#endif
diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h
index c6a7eed03914..09b1d7e607c1 100644
--- a/arch/x86/include/asm/vmxfeatures.h
+++ b/arch/x86/include/asm/vmxfeatures.h
@@ -9,84 +9,85 @@
/*
* Note: If the comment begins with a quoted string, that string is used
- * in /proc/cpuinfo instead of the macro name. If the string is "",
- * this feature bit is not displayed in /proc/cpuinfo at all.
+ * in /proc/cpuinfo instead of the macro name. Otherwise, this feature bit
+ * is not displayed in /proc/cpuinfo at all.
*/
/* Pin-Based VM-Execution Controls, EPT/VPID, APIC and VM-Functions, word 0 */
-#define VMX_FEATURE_INTR_EXITING ( 0*32+ 0) /* "" VM-Exit on vectored interrupts */
-#define VMX_FEATURE_NMI_EXITING ( 0*32+ 3) /* "" VM-Exit on NMIs */
+#define VMX_FEATURE_INTR_EXITING ( 0*32+ 0) /* VM-Exit on vectored interrupts */
+#define VMX_FEATURE_NMI_EXITING ( 0*32+ 3) /* VM-Exit on NMIs */
#define VMX_FEATURE_VIRTUAL_NMIS ( 0*32+ 5) /* "vnmi" NMI virtualization */
-#define VMX_FEATURE_PREEMPTION_TIMER ( 0*32+ 6) /* VMX Preemption Timer */
-#define VMX_FEATURE_POSTED_INTR ( 0*32+ 7) /* Posted Interrupts */
+#define VMX_FEATURE_PREEMPTION_TIMER ( 0*32+ 6) /* "preemption_timer" VMX Preemption Timer */
+#define VMX_FEATURE_POSTED_INTR ( 0*32+ 7) /* "posted_intr" Posted Interrupts */
/* EPT/VPID features, scattered to bits 16-23 */
-#define VMX_FEATURE_INVVPID ( 0*32+ 16) /* INVVPID is supported */
+#define VMX_FEATURE_INVVPID ( 0*32+ 16) /* "invvpid" INVVPID is supported */
#define VMX_FEATURE_EPT_EXECUTE_ONLY ( 0*32+ 17) /* "ept_x_only" EPT entries can be execute only */
-#define VMX_FEATURE_EPT_AD ( 0*32+ 18) /* EPT Accessed/Dirty bits */
-#define VMX_FEATURE_EPT_1GB ( 0*32+ 19) /* 1GB EPT pages */
+#define VMX_FEATURE_EPT_AD ( 0*32+ 18) /* "ept_ad" EPT Accessed/Dirty bits */
+#define VMX_FEATURE_EPT_1GB ( 0*32+ 19) /* "ept_1gb" 1GB EPT pages */
+#define VMX_FEATURE_EPT_5LEVEL ( 0*32+ 20) /* "ept_5level" 5-level EPT paging */
/* Aggregated APIC features 24-27 */
-#define VMX_FEATURE_FLEXPRIORITY ( 0*32+ 24) /* TPR shadow + virt APIC */
-#define VMX_FEATURE_APICV ( 0*32+ 25) /* TPR shadow + APIC reg virt + virt intr delivery + posted interrupts */
+#define VMX_FEATURE_FLEXPRIORITY ( 0*32+ 24) /* "flexpriority" TPR shadow + virt APIC */
+#define VMX_FEATURE_APICV ( 0*32+ 25) /* "apicv" TPR shadow + APIC reg virt + virt intr delivery + posted interrupts */
/* VM-Functions, shifted to bits 28-31 */
-#define VMX_FEATURE_EPTP_SWITCHING ( 0*32+ 28) /* EPTP switching (in guest) */
+#define VMX_FEATURE_EPTP_SWITCHING ( 0*32+ 28) /* "eptp_switching" EPTP switching (in guest) */
/* Primary Processor-Based VM-Execution Controls, word 1 */
-#define VMX_FEATURE_INTR_WINDOW_EXITING ( 1*32+ 2) /* "" VM-Exit if INTRs are unblocked in guest */
+#define VMX_FEATURE_INTR_WINDOW_EXITING ( 1*32+ 2) /* VM-Exit if INTRs are unblocked in guest */
#define VMX_FEATURE_USE_TSC_OFFSETTING ( 1*32+ 3) /* "tsc_offset" Offset hardware TSC when read in guest */
-#define VMX_FEATURE_HLT_EXITING ( 1*32+ 7) /* "" VM-Exit on HLT */
-#define VMX_FEATURE_INVLPG_EXITING ( 1*32+ 9) /* "" VM-Exit on INVLPG */
-#define VMX_FEATURE_MWAIT_EXITING ( 1*32+ 10) /* "" VM-Exit on MWAIT */
-#define VMX_FEATURE_RDPMC_EXITING ( 1*32+ 11) /* "" VM-Exit on RDPMC */
-#define VMX_FEATURE_RDTSC_EXITING ( 1*32+ 12) /* "" VM-Exit on RDTSC */
-#define VMX_FEATURE_CR3_LOAD_EXITING ( 1*32+ 15) /* "" VM-Exit on writes to CR3 */
-#define VMX_FEATURE_CR3_STORE_EXITING ( 1*32+ 16) /* "" VM-Exit on reads from CR3 */
-#define VMX_FEATURE_TERTIARY_CONTROLS ( 1*32+ 17) /* "" Enable Tertiary VM-Execution Controls */
-#define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* "" VM-Exit on writes to CR8 */
-#define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* "" VM-Exit on reads from CR8 */
+#define VMX_FEATURE_HLT_EXITING ( 1*32+ 7) /* VM-Exit on HLT */
+#define VMX_FEATURE_INVLPG_EXITING ( 1*32+ 9) /* VM-Exit on INVLPG */
+#define VMX_FEATURE_MWAIT_EXITING ( 1*32+ 10) /* VM-Exit on MWAIT */
+#define VMX_FEATURE_RDPMC_EXITING ( 1*32+ 11) /* VM-Exit on RDPMC */
+#define VMX_FEATURE_RDTSC_EXITING ( 1*32+ 12) /* VM-Exit on RDTSC */
+#define VMX_FEATURE_CR3_LOAD_EXITING ( 1*32+ 15) /* VM-Exit on writes to CR3 */
+#define VMX_FEATURE_CR3_STORE_EXITING ( 1*32+ 16) /* VM-Exit on reads from CR3 */
+#define VMX_FEATURE_TERTIARY_CONTROLS ( 1*32+ 17) /* Enable Tertiary VM-Execution Controls */
+#define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* VM-Exit on writes to CR8 */
+#define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* VM-Exit on reads from CR8 */
#define VMX_FEATURE_VIRTUAL_TPR ( 1*32+ 21) /* "vtpr" TPR virtualization, a.k.a. TPR shadow */
-#define VMX_FEATURE_NMI_WINDOW_EXITING ( 1*32+ 22) /* "" VM-Exit if NMIs are unblocked in guest */
-#define VMX_FEATURE_MOV_DR_EXITING ( 1*32+ 23) /* "" VM-Exit on accesses to debug registers */
-#define VMX_FEATURE_UNCOND_IO_EXITING ( 1*32+ 24) /* "" VM-Exit on *all* IN{S} and OUT{S}*/
-#define VMX_FEATURE_USE_IO_BITMAPS ( 1*32+ 25) /* "" VM-Exit based on I/O port */
+#define VMX_FEATURE_NMI_WINDOW_EXITING ( 1*32+ 22) /* VM-Exit if NMIs are unblocked in guest */
+#define VMX_FEATURE_MOV_DR_EXITING ( 1*32+ 23) /* VM-Exit on accesses to debug registers */
+#define VMX_FEATURE_UNCOND_IO_EXITING ( 1*32+ 24) /* VM-Exit on *all* IN{S} and OUT{S}*/
+#define VMX_FEATURE_USE_IO_BITMAPS ( 1*32+ 25) /* VM-Exit based on I/O port */
#define VMX_FEATURE_MONITOR_TRAP_FLAG ( 1*32+ 27) /* "mtf" VMX single-step VM-Exits */
-#define VMX_FEATURE_USE_MSR_BITMAPS ( 1*32+ 28) /* "" VM-Exit based on MSR index */
-#define VMX_FEATURE_MONITOR_EXITING ( 1*32+ 29) /* "" VM-Exit on MONITOR (MWAIT's accomplice) */
-#define VMX_FEATURE_PAUSE_EXITING ( 1*32+ 30) /* "" VM-Exit on PAUSE (unconditionally) */
-#define VMX_FEATURE_SEC_CONTROLS ( 1*32+ 31) /* "" Enable Secondary VM-Execution Controls */
+#define VMX_FEATURE_USE_MSR_BITMAPS ( 1*32+ 28) /* VM-Exit based on MSR index */
+#define VMX_FEATURE_MONITOR_EXITING ( 1*32+ 29) /* VM-Exit on MONITOR (MWAIT's accomplice) */
+#define VMX_FEATURE_PAUSE_EXITING ( 1*32+ 30) /* VM-Exit on PAUSE (unconditionally) */
+#define VMX_FEATURE_SEC_CONTROLS ( 1*32+ 31) /* Enable Secondary VM-Execution Controls */
/* Secondary Processor-Based VM-Execution Controls, word 2 */
#define VMX_FEATURE_VIRT_APIC_ACCESSES ( 2*32+ 0) /* "vapic" Virtualize memory mapped APIC accesses */
-#define VMX_FEATURE_EPT ( 2*32+ 1) /* Extended Page Tables, a.k.a. Two-Dimensional Paging */
-#define VMX_FEATURE_DESC_EXITING ( 2*32+ 2) /* "" VM-Exit on {S,L}*DT instructions */
-#define VMX_FEATURE_RDTSCP ( 2*32+ 3) /* "" Enable RDTSCP in guest */
-#define VMX_FEATURE_VIRTUAL_X2APIC ( 2*32+ 4) /* "" Virtualize X2APIC for the guest */
-#define VMX_FEATURE_VPID ( 2*32+ 5) /* Virtual Processor ID (TLB ASID modifier) */
-#define VMX_FEATURE_WBINVD_EXITING ( 2*32+ 6) /* "" VM-Exit on WBINVD */
-#define VMX_FEATURE_UNRESTRICTED_GUEST ( 2*32+ 7) /* Allow Big Real Mode and other "invalid" states */
+#define VMX_FEATURE_EPT ( 2*32+ 1) /* "ept" Extended Page Tables, a.k.a. Two-Dimensional Paging */
+#define VMX_FEATURE_DESC_EXITING ( 2*32+ 2) /* VM-Exit on {S,L}*DT instructions */
+#define VMX_FEATURE_RDTSCP ( 2*32+ 3) /* Enable RDTSCP in guest */
+#define VMX_FEATURE_VIRTUAL_X2APIC ( 2*32+ 4) /* Virtualize X2APIC for the guest */
+#define VMX_FEATURE_VPID ( 2*32+ 5) /* "vpid" Virtual Processor ID (TLB ASID modifier) */
+#define VMX_FEATURE_WBINVD_EXITING ( 2*32+ 6) /* VM-Exit on WBINVD */
+#define VMX_FEATURE_UNRESTRICTED_GUEST ( 2*32+ 7) /* "unrestricted_guest" Allow Big Real Mode and other "invalid" states */
#define VMX_FEATURE_APIC_REGISTER_VIRT ( 2*32+ 8) /* "vapic_reg" Hardware emulation of reads to the virtual-APIC */
#define VMX_FEATURE_VIRT_INTR_DELIVERY ( 2*32+ 9) /* "vid" Evaluation and delivery of pending virtual interrupts */
#define VMX_FEATURE_PAUSE_LOOP_EXITING ( 2*32+ 10) /* "ple" Conditionally VM-Exit on PAUSE at CPL0 */
-#define VMX_FEATURE_RDRAND_EXITING ( 2*32+ 11) /* "" VM-Exit on RDRAND*/
-#define VMX_FEATURE_INVPCID ( 2*32+ 12) /* "" Enable INVPCID in guest */
-#define VMX_FEATURE_VMFUNC ( 2*32+ 13) /* "" Enable VM-Functions (leaf dependent) */
-#define VMX_FEATURE_SHADOW_VMCS ( 2*32+ 14) /* VMREAD/VMWRITE in guest can access shadow VMCS */
-#define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* "" VM-Exit on ENCLS (leaf dependent) */
-#define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* "" VM-Exit on RDSEED */
+#define VMX_FEATURE_RDRAND_EXITING ( 2*32+ 11) /* VM-Exit on RDRAND*/
+#define VMX_FEATURE_INVPCID ( 2*32+ 12) /* Enable INVPCID in guest */
+#define VMX_FEATURE_VMFUNC ( 2*32+ 13) /* Enable VM-Functions (leaf dependent) */
+#define VMX_FEATURE_SHADOW_VMCS ( 2*32+ 14) /* "shadow_vmcs" VMREAD/VMWRITE in guest can access shadow VMCS */
+#define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* VM-Exit on ENCLS (leaf dependent) */
+#define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* VM-Exit on RDSEED */
#define VMX_FEATURE_PAGE_MOD_LOGGING ( 2*32+ 17) /* "pml" Log dirty pages into buffer */
-#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* "" Conditionally reflect EPT violations as #VE exceptions */
-#define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* "" Suppress VMX indicators in Processor Trace */
-#define VMX_FEATURE_XSAVES ( 2*32+ 20) /* "" Enable XSAVES and XRSTORS in guest */
+#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* "ept_violation_ve" Conditionally reflect EPT violations as #VE exceptions */
+#define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* Suppress VMX indicators in Processor Trace */
+#define VMX_FEATURE_XSAVES ( 2*32+ 20) /* Enable XSAVES and XRSTORS in guest */
#define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */
-#define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* "" Processor Trace logs GPAs */
-#define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* Scale hardware TSC when read in guest */
-#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */
-#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */
-#define VMX_FEATURE_BUS_LOCK_DETECTION ( 2*32+ 30) /* "" VM-Exit when bus lock caused */
-#define VMX_FEATURE_NOTIFY_VM_EXITING ( 2*32+ 31) /* VM-Exit when no event windows after notify window */
+#define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* Processor Trace logs GPAs */
+#define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* "tsc_scaling" Scale hardware TSC when read in guest */
+#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* "usr_wait_pause" Enable TPAUSE, UMONITOR, UMWAIT in guest */
+#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* VM-Exit on ENCLV (leaf dependent) */
+#define VMX_FEATURE_BUS_LOCK_DETECTION ( 2*32+ 30) /* VM-Exit when bus lock caused */
+#define VMX_FEATURE_NOTIFY_VM_EXITING ( 2*32+ 31) /* "notify_vm_exiting" VM-Exit when no event windows after notify window */
/* Tertiary Processor-Based VM-Execution Controls, word 3 */
-#define VMX_FEATURE_IPI_VIRT ( 3*32+ 4) /* Enable IPI virtualization */
+#define VMX_FEATURE_IPI_VIRT ( 3*32+ 4) /* "ipi_virt" Enable IPI virtualization */
#endif /* _ASM_X86_VMXFEATURES_H */
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
deleted file mode 100644
index 183e98e49ab9..000000000000
--- a/arch/x86/include/asm/vvar.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * vvar.h: Shared vDSO/kernel variable declarations
- * Copyright (c) 2011 Andy Lutomirski
- *
- * A handful of variables are accessible (read-only) from userspace
- * code in the vsyscall page and the vdso. They are declared here.
- * Some other file must define them with DEFINE_VVAR.
- *
- * In normal kernel code, they are used like any other variable.
- * In user code, they are accessed through the VVAR macro.
- *
- * These variables live in a page of kernel data that has an extra RO
- * mapping for userspace. Each variable needs a unique offset within
- * that page; specify that offset with the DECLARE_VVAR macro. (If
- * you mess up, the linker will catch it.)
- */
-
-#ifndef _ASM_X86_VVAR_H
-#define _ASM_X86_VVAR_H
-
-#ifdef EMIT_VVAR
-/*
- * EMIT_VVAR() is used by the kernel linker script to put vvars in the
- * right place. Also, it's used by kernel code to import offsets values.
- */
-#define DECLARE_VVAR(offset, type, name) \
- EMIT_VVAR(name, offset)
-
-#else
-
-extern char __vvar_page;
-
-#define DECLARE_VVAR(offset, type, name) \
- extern type vvar_ ## name[CS_BASES] \
- __attribute__((visibility("hidden"))); \
- extern type timens_ ## name[CS_BASES] \
- __attribute__((visibility("hidden"))); \
-
-#define VVAR(name) (vvar_ ## name)
-#define TIMENS(name) (timens_ ## name)
-
-#define DEFINE_VVAR(type, name) \
- type name[CS_BASES] \
- __attribute__((section(".vvar_" #name), aligned(16))) __visible
-
-#endif
-
-/* DECLARE_VVAR(offset, type, name) */
-
-DECLARE_VVAR(128, struct vdso_data, _vdso_data)
-
-#undef DECLARE_VVAR
-
-#endif
diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h
index 46b4f1f7f354..422a47746657 100644
--- a/arch/x86/include/asm/word-at-a-time.h
+++ b/arch/x86/include/asm/word-at-a-time.h
@@ -2,47 +2,15 @@
#ifndef _ASM_WORD_AT_A_TIME_H
#define _ASM_WORD_AT_A_TIME_H
-#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/wordpart.h>
-/*
- * This is largely generic for little-endian machines, but the
- * optimal byte mask counting is probably going to be something
- * that is architecture-specific. If you have a reliably fast
- * bit count instruction, that might be better than the multiply
- * and shift, for example.
- */
struct word_at_a_time {
const unsigned long one_bits, high_bits;
};
#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
-#ifdef CONFIG_64BIT
-
-/*
- * Jan Achrenius on G+: microoptimized version of
- * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
- * that works for the bytemasks without having to
- * mask them first.
- */
-static inline long count_masked_bytes(unsigned long mask)
-{
- return mask*0x0001020304050608ul >> 56;
-}
-
-#else /* 32-bit case */
-
-/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
-static inline long count_masked_bytes(long mask)
-{
- /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
- long a = (0x0ff0001+mask) >> 23;
- /* Fix the 1 for 00 case */
- return a & mask;
-}
-
-#endif
-
/* Return nonzero if it has a zero */
static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
{
@@ -56,6 +24,22 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits,
return bits;
}
+#ifdef CONFIG_64BIT
+
+/* Keep the initial has_zero() value for both bitmask and size calc */
+#define create_zero_mask(bits) (bits)
+
+static inline unsigned long zero_bytemask(unsigned long bits)
+{
+ bits = (bits - 1) & ~bits;
+ return bits >> 7;
+}
+
+#define find_zero(bits) (__ffs(bits) >> 3)
+
+#else
+
+/* Create the final mask for both bytemask and size */
static inline unsigned long create_zero_mask(unsigned long bits)
{
bits = (bits - 1) & ~bits;
@@ -65,11 +49,17 @@ static inline unsigned long create_zero_mask(unsigned long bits)
/* The mask we created is directly usable as a bytemask */
#define zero_bytemask(mask) (mask)
+/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
static inline unsigned long find_zero(unsigned long mask)
{
- return count_masked_bytes(mask);
+ /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+ long a = (0x0ff0001+mask) >> 23;
+ /* Fix the 1 for 00 case */
+ return a & mask;
}
+#endif
+
/*
* Load an unaligned word from kernel space.
*
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index c878616a18b8..213cf5379a5a 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -2,8 +2,6 @@
#ifndef _ASM_X86_PLATFORM_H
#define _ASM_X86_PLATFORM_H
-#include <asm/bootparam.h>
-
struct ghcb;
struct mpc_bus;
struct mpc_cpu;
@@ -15,13 +13,15 @@ struct irq_domain;
/**
* struct x86_init_mpparse - platform specific mpparse ops
* @setup_ioapic_ids: platform specific ioapic id override
- * @find_smp_config: find the smp configuration
- * @get_smp_config: get the smp configuration
+ * @find_mptable: Find MPTABLE early to reserve the memory region
+ * @early_parse_smp_cfg: Parse the SMP configuration data early before initmem_init()
+ * @parse_smp_cfg: Parse the SMP configuration data
*/
struct x86_init_mpparse {
void (*setup_ioapic_ids)(void);
- void (*find_smp_config)(void);
- void (*get_smp_config)(unsigned int early);
+ void (*find_mptable)(void);
+ void (*early_parse_smp_cfg)(void);
+ void (*parse_smp_cfg)(void);
};
/**
@@ -30,12 +30,13 @@ struct x86_init_mpparse {
* @reserve_resources: reserve the standard resources for the
* platform
* @memory_setup: platform specific memory setup
- *
+ * @dmi_setup: platform specific DMI setup
*/
struct x86_init_resources {
void (*probe_roms)(void);
void (*reserve_resources)(void);
char *(*memory_setup)(void);
+ void (*dmi_setup)(void);
};
/**
@@ -148,12 +149,22 @@ struct x86_init_acpi {
* @enc_status_change_finish Notify HV after the encryption status of a range is changed
* @enc_tlb_flush_required Returns true if a TLB flush is needed before changing page encryption status
* @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status
+ * @enc_kexec_begin Begin the two-step process of converting shared memory back
+ * to private. It stops the new conversions from being started
+ * and waits in-flight conversions to finish, if possible.
+ * @enc_kexec_finish Finish the two-step process of converting shared memory to
+ * private. All memory is private after the call when
+ * the function returns.
+ * It is called on only one CPU while the others are shut down
+ * and with interrupts disabled.
*/
struct x86_guest {
- bool (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
- bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
+ int (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
+ int (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
bool (*enc_tlb_flush_required)(bool enc);
bool (*enc_cache_flush_required)(void);
+ void (*enc_kexec_begin)(void);
+ void (*enc_kexec_finish)(void);
};
/**
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index a2dd24947eb8..97771b9d33af 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -39,9 +39,11 @@
#include <linux/string.h>
#include <linux/types.h>
#include <linux/pgtable.h>
+#include <linux/instrumentation.h>
#include <trace/events/xen.h>
+#include <asm/alternative.h>
#include <asm/page.h>
#include <asm/smap.h>
#include <asm/nospec-branch.h>
@@ -86,11 +88,20 @@ struct xen_dm_op_buf;
* there aren't more than 5 arguments...)
*/
-extern struct { char _entry[32]; } hypercall_page[];
+void xen_hypercall_func(void);
+DECLARE_STATIC_CALL(xen_hypercall, xen_hypercall_func);
-#define __HYPERCALL "call hypercall_page+%c[offset]"
-#define __HYPERCALL_ENTRY(x) \
- [offset] "i" (__HYPERVISOR_##x * sizeof(hypercall_page[0]))
+#ifdef MODULE
+#define __ADDRESSABLE_xen_hypercall
+#else
+#define __ADDRESSABLE_xen_hypercall __ADDRESSABLE_ASM_STR(__SCK__xen_hypercall)
+#endif
+
+#define __HYPERCALL \
+ __ADDRESSABLE_xen_hypercall \
+ "call __SCT__xen_hypercall"
+
+#define __HYPERCALL_ENTRY(x) "a" (x)
#ifdef CONFIG_X86_32
#define __HYPERCALL_RETREG "eax"
@@ -148,7 +159,7 @@ extern struct { char _entry[32]; } hypercall_page[];
__HYPERCALL_0ARG(); \
asm volatile (__HYPERCALL \
: __HYPERCALL_0PARAM \
- : __HYPERCALL_ENTRY(name) \
+ : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
: __HYPERCALL_CLOBBER0); \
(type)__res; \
})
@@ -159,7 +170,7 @@ extern struct { char _entry[32]; } hypercall_page[];
__HYPERCALL_1ARG(a1); \
asm volatile (__HYPERCALL \
: __HYPERCALL_1PARAM \
- : __HYPERCALL_ENTRY(name) \
+ : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
: __HYPERCALL_CLOBBER1); \
(type)__res; \
})
@@ -170,7 +181,7 @@ extern struct { char _entry[32]; } hypercall_page[];
__HYPERCALL_2ARG(a1, a2); \
asm volatile (__HYPERCALL \
: __HYPERCALL_2PARAM \
- : __HYPERCALL_ENTRY(name) \
+ : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
: __HYPERCALL_CLOBBER2); \
(type)__res; \
})
@@ -181,7 +192,7 @@ extern struct { char _entry[32]; } hypercall_page[];
__HYPERCALL_3ARG(a1, a2, a3); \
asm volatile (__HYPERCALL \
: __HYPERCALL_3PARAM \
- : __HYPERCALL_ENTRY(name) \
+ : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
: __HYPERCALL_CLOBBER3); \
(type)__res; \
})
@@ -192,7 +203,7 @@ extern struct { char _entry[32]; } hypercall_page[];
__HYPERCALL_4ARG(a1, a2, a3, a4); \
asm volatile (__HYPERCALL \
: __HYPERCALL_4PARAM \
- : __HYPERCALL_ENTRY(name) \
+ : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
: __HYPERCALL_CLOBBER4); \
(type)__res; \
})
@@ -206,12 +217,9 @@ xen_single_call(unsigned int call,
__HYPERCALL_DECLS;
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
- if (call >= PAGE_SIZE / sizeof(hypercall_page[0]))
- return -EINVAL;
-
- asm volatile(CALL_NOSPEC
+ asm volatile(__HYPERCALL
: __HYPERCALL_5PARAM
- : [thunk_target] "a" (&hypercall_page[call])
+ : __HYPERCALL_ENTRY(call)
: __HYPERCALL_CLOBBER5);
return (long)__res;
diff --git a/arch/x86/include/uapi/asm/amd_hsmp.h b/arch/x86/include/uapi/asm/amd_hsmp.h
index e5d182c7373c..92d8f256d096 100644
--- a/arch/x86/include/uapi/asm/amd_hsmp.h
+++ b/arch/x86/include/uapi/asm/amd_hsmp.h
@@ -50,6 +50,12 @@ enum hsmp_message_ids {
HSMP_GET_METRIC_TABLE_VER, /* 23h Get metrics table version */
HSMP_GET_METRIC_TABLE, /* 24h Get metrics table */
HSMP_GET_METRIC_TABLE_DRAM_ADDR,/* 25h Get metrics table dram address */
+ HSMP_SET_XGMI_PSTATE_RANGE, /* 26h Set xGMI P-state range */
+ HSMP_CPU_RAIL_ISO_FREQ_POLICY, /* 27h Get/Set Cpu Iso frequency policy */
+ HSMP_DFC_ENABLE_CTRL, /* 28h Enable/Disable DF C-state */
+ HSMP_GET_RAPL_UNITS = 0x30, /* 30h Get scaling factor for energy */
+ HSMP_GET_RAPL_CORE_COUNTER, /* 31h Get core energy counter value */
+ HSMP_GET_RAPL_PACKAGE_COUNTER, /* 32h Get package energy counter value */
HSMP_MSG_ID_MAX,
};
@@ -65,6 +71,7 @@ enum hsmp_msg_type {
HSMP_RSVD = -1,
HSMP_SET = 0,
HSMP_GET = 1,
+ HSMP_SET_GET = 2,
};
enum hsmp_proto_versions {
@@ -72,7 +79,8 @@ enum hsmp_proto_versions {
HSMP_PROTO_VER3,
HSMP_PROTO_VER4,
HSMP_PROTO_VER5,
- HSMP_PROTO_VER6
+ HSMP_PROTO_VER6,
+ HSMP_PROTO_VER7
};
struct hsmp_msg_desc {
@@ -88,7 +96,8 @@ struct hsmp_msg_desc {
*
* Not supported messages would return -ENOMSG.
*/
-static const struct hsmp_msg_desc hsmp_msg_desc_table[] = {
+static const struct hsmp_msg_desc hsmp_msg_desc_table[]
+ __attribute__((unused)) = {
/* RESERVED */
{0, 0, HSMP_RSVD},
@@ -299,7 +308,7 @@ static const struct hsmp_msg_desc hsmp_msg_desc_table[] = {
* HSMP_SET_POWER_MODE, num_args = 1, response_sz = 0
* input: args[0] = power efficiency mode[2:0]
*/
- {1, 0, HSMP_SET},
+ {1, 1, HSMP_SET_GET},
/*
* HSMP_SET_PSTATE_MAX_MIN, num_args = 1, response_sz = 0
@@ -324,6 +333,58 @@ static const struct hsmp_msg_desc hsmp_msg_desc_table[] = {
* output: args[1] = upper 32 bits of the address
*/
{0, 2, HSMP_GET},
+
+ /*
+ * HSMP_SET_XGMI_PSTATE_RANGE, num_args = 1, response_sz = 0
+ * input: args[0] = min xGMI p-state[15:8] + max xGMI p-state[7:0]
+ */
+ {1, 0, HSMP_SET},
+
+ /*
+ * HSMP_CPU_RAIL_ISO_FREQ_POLICY, num_args = 1, response_sz = 1
+ * input: args[0] = set/get policy[31] +
+ * disable/enable independent control[0]
+ * output: args[0] = current policy[0]
+ */
+ {1, 1, HSMP_SET_GET},
+
+ /*
+ * HSMP_DFC_ENABLE_CTRL, num_args = 1, response_sz = 1
+ * input: args[0] = set/get policy[31] + enable/disable DFC[0]
+ * output: args[0] = current policy[0]
+ */
+ {1, 1, HSMP_SET_GET},
+
+ /* RESERVED(0x29-0x2f) */
+ {0, 0, HSMP_RSVD},
+ {0, 0, HSMP_RSVD},
+ {0, 0, HSMP_RSVD},
+ {0, 0, HSMP_RSVD},
+ {0, 0, HSMP_RSVD},
+ {0, 0, HSMP_RSVD},
+ {0, 0, HSMP_RSVD},
+
+ /*
+ * HSMP_GET_RAPL_UNITS, response_sz = 1
+ * output: args[0] = tu value[19:16] + esu value[12:8]
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_RAPL_CORE_COUNTER, num_args = 1, response_sz = 1
+ * input: args[0] = apic id[15:0]
+ * output: args[0] = lower 32 bits of energy
+ * output: args[1] = upper 32 bits of energy
+ */
+ {1, 2, HSMP_GET},
+
+ /*
+ * HSMP_GET_RAPL_PACKAGE_COUNTER, num_args = 0, response_sz = 1
+ * output: args[0] = lower 32 bits of energy
+ * output: args[1] = upper 32 bits of energy
+ */
+ {0, 2, HSMP_GET},
+
};
/* Metrics table (supported only with proto version 6) */
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 01d19fc22346..9b82eebd7add 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -2,21 +2,7 @@
#ifndef _ASM_X86_BOOTPARAM_H
#define _ASM_X86_BOOTPARAM_H
-/* setup_data/setup_indirect types */
-#define SETUP_NONE 0
-#define SETUP_E820_EXT 1
-#define SETUP_DTB 2
-#define SETUP_PCI 3
-#define SETUP_EFI 4
-#define SETUP_APPLE_PROPERTIES 5
-#define SETUP_JAILHOUSE 6
-#define SETUP_CC_BLOB 7
-#define SETUP_IMA 8
-#define SETUP_RNG_SEED 9
-#define SETUP_ENUM_MAX SETUP_RNG_SEED
-
-#define SETUP_INDIRECT (1<<31)
-#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT)
+#include <asm/setup_data.h>
/* ram_size flags */
#define RAMDISK_IMAGE_START_MASK 0x07FF
@@ -38,6 +24,7 @@
#define XLF_EFI_KEXEC (1<<4)
#define XLF_5LEVEL (1<<5)
#define XLF_5LEVEL_ENABLED (1<<6)
+#define XLF_MEM_ENCRYPTION (1<<7)
#ifndef __ASSEMBLY__
@@ -48,22 +35,6 @@
#include <asm/ist.h>
#include <video/edid.h>
-/* extensible setup data list node */
-struct setup_data {
- __u64 next;
- __u32 type;
- __u32 len;
- __u8 data[];
-};
-
-/* extensible setup indirect data node */
-struct setup_indirect {
- __u32 type;
- __u32 reserved; /* Reserved, must be set to zero. */
- __u64 len;
- __u64 addr;
-};
-
struct setup_header {
__u8 setup_sects;
__u16 root_flags;
@@ -137,50 +108,10 @@ struct efi_info {
#define E820_MAX_ENTRIES_ZEROPAGE 128
/*
- * The E820 memory region entry of the boot protocol ABI:
- */
-struct boot_e820_entry {
- __u64 addr;
- __u64 size;
- __u32 type;
-} __attribute__((packed));
-
-/*
* Smallest compatible version of jailhouse_setup_data required by this kernel.
*/
#define JAILHOUSE_SETUP_REQUIRED_VERSION 1
-/*
- * The boot loader is passing platform information via this Jailhouse-specific
- * setup data structure.
- */
-struct jailhouse_setup_data {
- struct {
- __u16 version;
- __u16 compatible_version;
- } __attribute__((packed)) hdr;
- struct {
- __u16 pm_timer_address;
- __u16 num_cpus;
- __u64 pci_mmconfig_base;
- __u32 tsc_khz;
- __u32 apic_khz;
- __u8 standard_ioapic;
- __u8 cpu_ids[255];
- } __attribute__((packed)) v1;
- struct {
- __u32 flags;
- } __attribute__((packed)) v2;
-} __attribute__((packed));
-
-/*
- * IMA buffer setup data information from the previous kernel during kexec
- */
-struct ima_setup_data {
- __u64 addr;
- __u64 size;
-} __attribute__((packed));
-
/* The so-called "zeropage" */
struct boot_params {
struct screen_info screen_info; /* 0x000 */
diff --git a/arch/x86/include/uapi/asm/elf.h b/arch/x86/include/uapi/asm/elf.h
new file mode 100644
index 000000000000..468e135fa285
--- /dev/null
+++ b/arch/x86/include/uapi/asm/elf.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_X86_ELF_H
+#define _UAPI_ASM_X86_ELF_H
+
+#include <linux/types.h>
+
+struct x86_xfeat_component {
+ __u32 type;
+ __u32 size;
+ __u32 offset;
+ __u32 flags;
+} __packed;
+
+_Static_assert(sizeof(struct x86_xfeat_component) % 4 == 0, "x86_xfeat_component is not aligned");
+
+#endif /* _UAPI_ASM_X86_ELF_H */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index a448d0964fc0..9e75da97bce0 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -7,6 +7,8 @@
*
*/
+#include <linux/const.h>
+#include <linux/bits.h>
#include <linux/types.h>
#include <linux/ioctl.h>
#include <linux/stddef.h>
@@ -40,7 +42,6 @@
#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_MSI
#define __KVM_HAVE_USER_NMI
-#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_MSIX
#define __KVM_HAVE_MCE
#define __KVM_HAVE_PIT_STATE2
@@ -49,7 +50,6 @@
#define __KVM_HAVE_DEBUGREGS
#define __KVM_HAVE_XSAVE
#define __KVM_HAVE_XCRS
-#define __KVM_HAVE_READONLY_MEM
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
@@ -106,6 +106,7 @@ struct kvm_ioapic_state {
#define KVM_RUN_X86_SMM (1 << 0)
#define KVM_RUN_X86_BUS_LOCK (1 << 1)
+#define KVM_RUN_X86_GUEST_MODE (1 << 2)
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
@@ -438,6 +439,8 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
#define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5)
#define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6)
+#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7)
+#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8)
#define KVM_STATE_NESTED_FORMAT_VMX 0
#define KVM_STATE_NESTED_FORMAT_SVM 1
@@ -457,8 +460,13 @@ struct kvm_sync_regs {
#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
-/* attributes for system fd (group 0) */
-#define KVM_X86_XCOMP_GUEST_SUPP 0
+/* vendor-independent attributes for system fd (group 0) */
+#define KVM_X86_GRP_SYSTEM 0
+# define KVM_X86_XCOMP_GUEST_SUPP 0
+
+/* vendor-specific groups and attributes for system fd */
+#define KVM_X86_GRP_SEV 1
+# define KVM_X86_SEV_VMSA_FEATURES 0
struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
@@ -526,9 +534,359 @@ struct kvm_pmu_event_filter {
#define KVM_PMU_EVENT_ALLOW 0
#define KVM_PMU_EVENT_DENY 1
-#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS BIT(0)
+#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS _BITUL(0)
#define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS)
+/* for KVM_CAP_MCE */
+struct kvm_x86_mce {
+ __u64 status;
+ __u64 addr;
+ __u64 misc;
+ __u64 mcg_status;
+ __u8 bank;
+ __u8 pad1[7];
+ __u64 pad2[3];
+};
+
+/* for KVM_CAP_XEN_HVM */
+#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
+#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
+#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6)
+#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7)
+#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8)
+
+struct kvm_xen_hvm_config {
+ __u32 flags;
+ __u32 msr;
+ __u64 blob_addr_32;
+ __u64 blob_addr_64;
+ __u8 blob_size_32;
+ __u8 blob_size_64;
+ __u8 pad2[30];
+};
+
+struct kvm_xen_hvm_attr {
+ __u16 type;
+ __u16 pad[3];
+ union {
+ __u8 long_mode;
+ __u8 vector;
+ __u8 runstate_update_flag;
+ union {
+ __u64 gfn;
+#define KVM_XEN_INVALID_GFN ((__u64)-1)
+ __u64 hva;
+ } shared_info;
+ struct {
+ __u32 send_port;
+ __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */
+ __u32 flags;
+#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0)
+#define KVM_XEN_EVTCHN_UPDATE (1 << 1)
+#define KVM_XEN_EVTCHN_RESET (1 << 2)
+ /*
+ * Events sent by the guest are either looped back to
+ * the guest itself (potentially on a different port#)
+ * or signalled via an eventfd.
+ */
+ union {
+ struct {
+ __u32 port;
+ __u32 vcpu;
+ __u32 priority;
+ } port;
+ struct {
+ __u32 port; /* Zero for eventfd */
+ __s32 fd;
+ } eventfd;
+ __u32 padding[4];
+ } deliver;
+ } evtchn;
+ __u32 xen_version;
+ __u64 pad[8];
+ } u;
+};
+
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0
+#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1
+#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3
+#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
+#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */
+#define KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA 0x6
+
+struct kvm_xen_vcpu_attr {
+ __u16 type;
+ __u16 pad[3];
+ union {
+ __u64 gpa;
+#define KVM_XEN_INVALID_GPA ((__u64)-1)
+ __u64 hva;
+ __u64 pad[8];
+ struct {
+ __u64 state;
+ __u64 state_entry_time;
+ __u64 time_running;
+ __u64 time_runnable;
+ __u64 time_blocked;
+ __u64 time_offline;
+ } runstate;
+ __u32 vcpu_id;
+ struct {
+ __u32 port;
+ __u32 priority;
+ __u64 expires_ns;
+ } timer;
+ __u8 vector;
+ } u;
+};
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6
+#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7
+#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA 0x9
+
+/* Secure Encrypted Virtualization command */
+enum sev_cmd_id {
+ /* Guest initialization commands */
+ KVM_SEV_INIT = 0,
+ KVM_SEV_ES_INIT,
+ /* Guest launch commands */
+ KVM_SEV_LAUNCH_START,
+ KVM_SEV_LAUNCH_UPDATE_DATA,
+ KVM_SEV_LAUNCH_UPDATE_VMSA,
+ KVM_SEV_LAUNCH_SECRET,
+ KVM_SEV_LAUNCH_MEASURE,
+ KVM_SEV_LAUNCH_FINISH,
+ /* Guest migration commands (outgoing) */
+ KVM_SEV_SEND_START,
+ KVM_SEV_SEND_UPDATE_DATA,
+ KVM_SEV_SEND_UPDATE_VMSA,
+ KVM_SEV_SEND_FINISH,
+ /* Guest migration commands (incoming) */
+ KVM_SEV_RECEIVE_START,
+ KVM_SEV_RECEIVE_UPDATE_DATA,
+ KVM_SEV_RECEIVE_UPDATE_VMSA,
+ KVM_SEV_RECEIVE_FINISH,
+ /* Guest status and debug commands */
+ KVM_SEV_GUEST_STATUS,
+ KVM_SEV_DBG_DECRYPT,
+ KVM_SEV_DBG_ENCRYPT,
+ /* Guest certificates commands */
+ KVM_SEV_CERT_EXPORT,
+ /* Attestation report */
+ KVM_SEV_GET_ATTESTATION_REPORT,
+ /* Guest Migration Extension */
+ KVM_SEV_SEND_CANCEL,
+
+ /* Second time is the charm; improved versions of the above ioctls. */
+ KVM_SEV_INIT2,
+
+ /* SNP-specific commands */
+ KVM_SEV_SNP_LAUNCH_START = 100,
+ KVM_SEV_SNP_LAUNCH_UPDATE,
+ KVM_SEV_SNP_LAUNCH_FINISH,
+
+ KVM_SEV_NR_MAX,
+};
+
+struct kvm_sev_cmd {
+ __u32 id;
+ __u32 pad0;
+ __u64 data;
+ __u32 error;
+ __u32 sev_fd;
+};
+
+struct kvm_sev_init {
+ __u64 vmsa_features;
+ __u32 flags;
+ __u16 ghcb_version;
+ __u16 pad1;
+ __u32 pad2[8];
+};
+
+struct kvm_sev_launch_start {
+ __u32 handle;
+ __u32 policy;
+ __u64 dh_uaddr;
+ __u32 dh_len;
+ __u32 pad0;
+ __u64 session_uaddr;
+ __u32 session_len;
+ __u32 pad1;
+};
+
+struct kvm_sev_launch_update_data {
+ __u64 uaddr;
+ __u32 len;
+ __u32 pad0;
+};
+
+
+struct kvm_sev_launch_secret {
+ __u64 hdr_uaddr;
+ __u32 hdr_len;
+ __u32 pad0;
+ __u64 guest_uaddr;
+ __u32 guest_len;
+ __u32 pad1;
+ __u64 trans_uaddr;
+ __u32 trans_len;
+ __u32 pad2;
+};
+
+struct kvm_sev_launch_measure {
+ __u64 uaddr;
+ __u32 len;
+ __u32 pad0;
+};
+
+struct kvm_sev_guest_status {
+ __u32 handle;
+ __u32 policy;
+ __u32 state;
+};
+
+struct kvm_sev_dbg {
+ __u64 src_uaddr;
+ __u64 dst_uaddr;
+ __u32 len;
+ __u32 pad0;
+};
+
+struct kvm_sev_attestation_report {
+ __u8 mnonce[16];
+ __u64 uaddr;
+ __u32 len;
+ __u32 pad0;
+};
+
+struct kvm_sev_send_start {
+ __u32 policy;
+ __u32 pad0;
+ __u64 pdh_cert_uaddr;
+ __u32 pdh_cert_len;
+ __u32 pad1;
+ __u64 plat_certs_uaddr;
+ __u32 plat_certs_len;
+ __u32 pad2;
+ __u64 amd_certs_uaddr;
+ __u32 amd_certs_len;
+ __u32 pad3;
+ __u64 session_uaddr;
+ __u32 session_len;
+ __u32 pad4;
+};
+
+struct kvm_sev_send_update_data {
+ __u64 hdr_uaddr;
+ __u32 hdr_len;
+ __u32 pad0;
+ __u64 guest_uaddr;
+ __u32 guest_len;
+ __u32 pad1;
+ __u64 trans_uaddr;
+ __u32 trans_len;
+ __u32 pad2;
+};
+
+struct kvm_sev_receive_start {
+ __u32 handle;
+ __u32 policy;
+ __u64 pdh_uaddr;
+ __u32 pdh_len;
+ __u32 pad0;
+ __u64 session_uaddr;
+ __u32 session_len;
+ __u32 pad1;
+};
+
+struct kvm_sev_receive_update_data {
+ __u64 hdr_uaddr;
+ __u32 hdr_len;
+ __u32 pad0;
+ __u64 guest_uaddr;
+ __u32 guest_len;
+ __u32 pad1;
+ __u64 trans_uaddr;
+ __u32 trans_len;
+ __u32 pad2;
+};
+
+struct kvm_sev_snp_launch_start {
+ __u64 policy;
+ __u8 gosvw[16];
+ __u16 flags;
+ __u8 pad0[6];
+ __u64 pad1[4];
+};
+
+/* Kept in sync with firmware values for simplicity. */
+#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1
+#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3
+#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4
+#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5
+#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6
+
+struct kvm_sev_snp_launch_update {
+ __u64 gfn_start;
+ __u64 uaddr;
+ __u64 len;
+ __u8 type;
+ __u8 pad0;
+ __u16 flags;
+ __u32 pad1;
+ __u64 pad2[4];
+};
+
+#define KVM_SEV_SNP_ID_BLOCK_SIZE 96
+#define KVM_SEV_SNP_ID_AUTH_SIZE 4096
+#define KVM_SEV_SNP_FINISH_DATA_SIZE 32
+
+struct kvm_sev_snp_launch_finish {
+ __u64 id_block_uaddr;
+ __u64 id_auth_uaddr;
+ __u8 id_block_en;
+ __u8 auth_key_en;
+ __u8 vcek_disabled;
+ __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE];
+ __u8 pad0[3];
+ __u16 flags;
+ __u64 pad1[4];
+};
+
+#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
+#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
+
+struct kvm_hyperv_eventfd {
+ __u32 conn_id;
+ __s32 fd;
+ __u32 flags;
+ __u32 padding[3];
+};
+
+#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff
+#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0)
+
/*
* Masked event layout.
* Bits Description
@@ -549,10 +907,10 @@ struct kvm_pmu_event_filter {
((__u64)(!!(exclude)) << 55))
#define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \
- (GENMASK_ULL(7, 0) | GENMASK_ULL(35, 32))
-#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (GENMASK_ULL(63, 56))
-#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (GENMASK_ULL(15, 8))
-#define KVM_PMU_MASKED_ENTRY_EXCLUDE (BIT_ULL(55))
+ (__GENMASK_ULL(7, 0) | __GENMASK_ULL(35, 32))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (__GENMASK_ULL(63, 56))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (__GENMASK_ULL(15, 8))
+#define KVM_PMU_MASKED_ENTRY_EXCLUDE (_BITULL(55))
#define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT (56)
/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */
@@ -560,9 +918,13 @@ struct kvm_pmu_event_filter {
#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
/* x86-specific KVM_EXIT_HYPERCALL flags. */
-#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0)
+#define KVM_EXIT_HYPERCALL_LONG_MODE _BITULL(0)
#define KVM_X86_DEFAULT_VM 0
#define KVM_X86_SW_PROTECTED_VM 1
+#define KVM_X86_SEV_VM 2
+#define KVM_X86_SEV_ES_VM 3
+#define KVM_X86_SNP_VM 4
+#define KVM_X86_TDX_VM 5
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 6e64b27b2c1e..a1efa7907a0b 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -92,7 +92,7 @@ struct kvm_clock_pairing {
#define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3)
/* MSR_KVM_ASYNC_PF_INT */
-#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0)
+#define KVM_ASYNC_PF_VEC_MASK __GENMASK(7, 0)
/* MSR_KVM_MIGRATION_CONTROL */
#define KVM_MIGRATION_READY (1 << 0)
@@ -142,7 +142,6 @@ struct kvm_vcpu_pv_apf_data {
__u32 token;
__u8 pad[56];
- __u32 enabled;
};
#define KVM_PV_EOI_BIT 0
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index db9adc081c5a..cb6b48a7c22b 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -8,7 +8,8 @@
/*
* Fields are zero when not available. Also, this struct is shared with
* userspace mcelog and thus must keep existing fields at current offsets.
- * Only add new fields to the end of the structure
+ * Only add new, shared fields to the end of the structure.
+ * Do not add vendor-specific fields.
*/
struct mce {
__u64 status; /* Bank's MCi_STATUS MSR */
diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h
index 46cdc941f958..ac1e6277212b 100644
--- a/arch/x86/include/uapi/asm/mman.h
+++ b/arch/x86/include/uapi/asm/mman.h
@@ -5,9 +5,6 @@
#define MAP_32BIT 0x40 /* only give out 32bit addresses */
#define MAP_ABOVE4G 0x80 /* only map above 4GB */
-/* Flags for map_shadow_stack(2) */
-#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */
-
#include <asm-generic/mman.h>
#endif /* _ASM_X86_MMAN_H */
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index d898432947ff..f1a4adc78272 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -139,6 +139,13 @@
#define X86_CR4_LAM_SUP_BIT 28 /* LAM for supervisor pointers */
#define X86_CR4_LAM_SUP _BITUL(X86_CR4_LAM_SUP_BIT)
+#ifdef __x86_64__
+#define X86_CR4_FRED_BIT 32 /* enable FRED kernel entry */
+#define X86_CR4_FRED _BITUL(X86_CR4_FRED_BIT)
+#else
+#define X86_CR4_FRED (0)
+#endif
+
/*
* x86-64 Task Priority Register, CR8
*/
diff --git a/arch/x86/include/uapi/asm/setup_data.h b/arch/x86/include/uapi/asm/setup_data.h
new file mode 100644
index 000000000000..b111b0c18544
--- /dev/null
+++ b/arch/x86/include/uapi/asm/setup_data.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_X86_SETUP_DATA_H
+#define _UAPI_ASM_X86_SETUP_DATA_H
+
+/* setup_data/setup_indirect types */
+#define SETUP_NONE 0
+#define SETUP_E820_EXT 1
+#define SETUP_DTB 2
+#define SETUP_PCI 3
+#define SETUP_EFI 4
+#define SETUP_APPLE_PROPERTIES 5
+#define SETUP_JAILHOUSE 6
+#define SETUP_CC_BLOB 7
+#define SETUP_IMA 8
+#define SETUP_RNG_SEED 9
+#define SETUP_ENUM_MAX SETUP_RNG_SEED
+
+#define SETUP_INDIRECT (1<<31)
+#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+/* extensible setup data list node */
+struct setup_data {
+ __u64 next;
+ __u32 type;
+ __u32 len;
+ __u8 data[];
+};
+
+/* extensible setup indirect data node */
+struct setup_indirect {
+ __u32 type;
+ __u32 reserved; /* Reserved, must be set to zero. */
+ __u64 len;
+ __u64 addr;
+};
+
+/*
+ * The E820 memory region entry of the boot protocol ABI:
+ */
+struct boot_e820_entry {
+ __u64 addr;
+ __u64 size;
+ __u32 type;
+} __attribute__((packed));
+
+/*
+ * The boot loader is passing platform information via this Jailhouse-specific
+ * setup data structure.
+ */
+struct jailhouse_setup_data {
+ struct {
+ __u16 version;
+ __u16 compatible_version;
+ } __attribute__((packed)) hdr;
+ struct {
+ __u16 pm_timer_address;
+ __u16 num_cpus;
+ __u64 pci_mmconfig_base;
+ __u32 tsc_khz;
+ __u32 apic_khz;
+ __u8 standard_ioapic;
+ __u8 cpu_ids[255];
+ } __attribute__((packed)) v1;
+ struct {
+ __u32 flags;
+ } __attribute__((packed)) v2;
+} __attribute__((packed));
+
+/*
+ * IMA buffer setup data information from the previous kernel during kexec
+ */
+struct ima_setup_data {
+ __u64 addr;
+ __u64 size;
+} __attribute__((packed));
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI_ASM_X86_SETUP_DATA_H */
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 80e1df482337..1814b413fd57 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -115,6 +115,7 @@
#define SVM_VMGEXIT_AP_CREATE_ON_INIT 0
#define SVM_VMGEXIT_AP_CREATE 1
#define SVM_VMGEXIT_AP_DESTROY 2
+#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018
#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe
#define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \