summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/boot/compressed/head_32.S3
-rw-r--r--arch/x86/boot/compressed/head_64.S3
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S4
-rw-r--r--arch/x86/include/asm/bug.h20
-rw-r--r--arch/x86/include/asm/cpufeatures.h8
-rw-r--r--arch/x86/include/asm/insn.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/include/asm/nospec-branch.h16
-rw-r--r--arch/x86/include/asm/paravirt_types.h4
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/svm.h36
-rw-r--r--arch/x86/include/asm/topology.h1
-rw-r--r--arch/x86/include/asm/xen/cpuid.h7
-rw-r--r--arch/x86/kernel/alternative.c8
-rw-r--r--arch/x86/kernel/cpu/amd.c35
-rw-r--r--arch/x86/kernel/cpu/bugs.c204
-rw-r--r--arch/x86/kernel/cpu/common.c79
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c7
-rw-r--r--arch/x86/kernel/cpu/mce/core.c7
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c42
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/cpu/sgx/encl.c59
-rw-r--r--arch/x86/kernel/cpu/sgx/main.c10
-rw-r--r--arch/x86/kernel/e820.c41
-rw-r--r--arch/x86/kernel/fpu/regset.c9
-rw-r--r--arch/x86/kernel/fpu/xstate.c5
-rw-r--r--arch/x86/kernel/kdebugfs.c37
-rw-r--r--arch/x86/kernel/ksysfs.c77
-rw-r--r--arch/x86/kernel/kvm.c13
-rw-r--r--arch/x86/kernel/kvmclock.c3
-rw-r--r--arch/x86/kernel/module.c13
-rw-r--r--arch/x86/kernel/process.c5
-rw-r--r--arch/x86/kernel/ptrace.c4
-rw-r--r--arch/x86/kernel/resource.c23
-rw-r--r--arch/x86/kernel/setup.c34
-rw-r--r--arch/x86/kernel/traps.c1
-rw-r--r--arch/x86/kvm/cpuid.c5
-rw-r--r--arch/x86/kvm/emulate.c19
-rw-r--r--arch/x86/kvm/lapic.c7
-rw-r--r--arch/x86/kvm/mmu/mmu.c15
-rw-r--r--arch/x86/kvm/pmu.c7
-rw-r--r--arch/x86/kvm/svm/avic.c93
-rw-r--r--arch/x86/kvm/svm/nested.c26
-rw-r--r--arch/x86/kvm/svm/svm.c104
-rw-r--r--arch/x86/kvm/svm/svm.h15
-rw-r--r--arch/x86/kvm/vmx/nested.c11
-rw-r--r--arch/x86/kvm/vmx/vmx.c29
-rw-r--r--arch/x86/kvm/vmx/vmx.h5
-rw-r--r--arch/x86/kvm/x86.c59
-rw-r--r--arch/x86/kvm/xen.c97
-rw-r--r--arch/x86/lib/memcpy_64.S10
-rw-r--r--arch/x86/lib/memmove_64.S4
-rw-r--r--arch/x86/lib/memset_64.S6
-rw-r--r--arch/x86/lib/retpoline.S2
-rw-r--r--arch/x86/lib/x86-opcode-map.txt111
-rw-r--r--arch/x86/mm/ioremap.c57
-rw-r--r--arch/x86/net/bpf_jit_comp.c2
-rw-r--r--arch/x86/um/Kconfig1
-rw-r--r--arch/x86/xen/enlighten_hvm.c9
-rw-r--r--arch/x86/xen/vga.c16
62 files changed, 1047 insertions, 490 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9f5bd41bf660..391c4cac8958 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1638,7 +1638,7 @@ config ARCH_SPARSEMEM_DEFAULT
config ARCH_SELECT_MEMORY_MODEL
def_bool y
- depends on ARCH_SPARSEMEM_ENABLE
+ depends on ARCH_SPARSEMEM_ENABLE && ARCH_FLATMEM_ENABLE
config ARCH_MEMORY_PROBE
bool "Enable sysfs memory/probe interface"
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 659fad53ca82..3b354eb9516d 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -152,14 +152,13 @@ SYM_FUNC_END(startup_32)
#ifdef CONFIG_EFI_STUB
SYM_FUNC_START(efi32_stub_entry)
-SYM_FUNC_START_ALIAS(efi_stub_entry)
add $0x4, %esp
movl 8(%esp), %esi /* save boot_params pointer */
call efi_main
/* efi_main returns the possibly relocated address of startup_32 */
jmp *%eax
SYM_FUNC_END(efi32_stub_entry)
-SYM_FUNC_END_ALIAS(efi_stub_entry)
+SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry)
#endif
.text
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index fd9441f40457..dea95301196b 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -535,7 +535,6 @@ SYM_CODE_END(startup_64)
#ifdef CONFIG_EFI_STUB
.org 0x390
SYM_FUNC_START(efi64_stub_entry)
-SYM_FUNC_START_ALIAS(efi_stub_entry)
and $~0xf, %rsp /* realign the stack */
movq %rdx, %rbx /* save boot_params pointer */
call efi_main
@@ -543,7 +542,7 @@ SYM_FUNC_START_ALIAS(efi_stub_entry)
leaq rva(startup_64)(%rax), %rax
jmp *%rax
SYM_FUNC_END(efi64_stub_entry)
-SYM_FUNC_END_ALIAS(efi_stub_entry)
+SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry)
#endif
.text
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 363699dd7220..837c1e0aa021 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -1751,8 +1751,6 @@ SYM_FUNC_END(aesni_gcm_finalize)
#endif
-
-SYM_FUNC_START_LOCAL_ALIAS(_key_expansion_128)
SYM_FUNC_START_LOCAL(_key_expansion_256a)
pshufd $0b11111111, %xmm1, %xmm1
shufps $0b00010000, %xmm0, %xmm4
@@ -1764,7 +1762,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256a)
add $0x10, TKEYP
RET
SYM_FUNC_END(_key_expansion_256a)
-SYM_FUNC_END_ALIAS(_key_expansion_128)
+SYM_FUNC_ALIAS_LOCAL(_key_expansion_128, _key_expansion_256a)
SYM_FUNC_START_LOCAL(_key_expansion_192a)
pshufd $0b01010101, %xmm1, %xmm1
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 84b87538a15d..bab883c0b6fe 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -22,7 +22,7 @@
#ifdef CONFIG_DEBUG_BUGVERBOSE
-#define _BUG_FLAGS(ins, flags) \
+#define _BUG_FLAGS(ins, flags, extra) \
do { \
asm_inline volatile("1:\t" ins "\n" \
".pushsection __bug_table,\"aw\"\n" \
@@ -31,7 +31,8 @@ do { \
"\t.word %c1" "\t# bug_entry::line\n" \
"\t.word %c2" "\t# bug_entry::flags\n" \
"\t.org 2b+%c3\n" \
- ".popsection" \
+ ".popsection\n" \
+ extra \
: : "i" (__FILE__), "i" (__LINE__), \
"i" (flags), \
"i" (sizeof(struct bug_entry))); \
@@ -39,14 +40,15 @@ do { \
#else /* !CONFIG_DEBUG_BUGVERBOSE */
-#define _BUG_FLAGS(ins, flags) \
+#define _BUG_FLAGS(ins, flags, extra) \
do { \
asm_inline volatile("1:\t" ins "\n" \
".pushsection __bug_table,\"aw\"\n" \
"2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \
"\t.word %c0" "\t# bug_entry::flags\n" \
"\t.org 2b+%c1\n" \
- ".popsection" \
+ ".popsection\n" \
+ extra \
: : "i" (flags), \
"i" (sizeof(struct bug_entry))); \
} while (0)
@@ -55,7 +57,7 @@ do { \
#else
-#define _BUG_FLAGS(ins, flags) asm volatile(ins)
+#define _BUG_FLAGS(ins, flags, extra) asm volatile(ins)
#endif /* CONFIG_GENERIC_BUG */
@@ -63,8 +65,8 @@ do { \
#define BUG() \
do { \
instrumentation_begin(); \
- _BUG_FLAGS(ASM_UD2, 0); \
- unreachable(); \
+ _BUG_FLAGS(ASM_UD2, 0, ""); \
+ __builtin_unreachable(); \
} while (0)
/*
@@ -75,9 +77,9 @@ do { \
*/
#define __WARN_FLAGS(flags) \
do { \
+ __auto_type f = BUGFLAG_WARNING|(flags); \
instrumentation_begin(); \
- _BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags)); \
- annotate_reachable(); \
+ _BUG_FLAGS(ASM_UD2, f, ASM_REACHABLE); \
instrumentation_end(); \
} while (0)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 6db4e2932b3d..f7436fccc076 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -204,7 +204,7 @@
/* FREE! ( 7*32+10) */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
@@ -299,9 +299,6 @@
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
-#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */
-#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
-#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
@@ -390,7 +387,10 @@
#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
+#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */
#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */
+#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
+#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 05a6ab940f45..1b29f58f730f 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -124,7 +124,7 @@ struct insn {
#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */
#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */
/* VEX bit fields */
-#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */
+#define X86_EVEX_M(vex) ((vex) & 0x07) /* EVEX Byte1 */
#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */
#define X86_VEX2_M 1 /* VEX2.M always 1 */
#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6dcccb304775..ec9830d2aabf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -703,7 +703,6 @@ struct kvm_vcpu_arch {
struct fpu_guest guest_fpu;
u64 xcr0;
- u64 guest_supported_xcr0;
struct kvm_pio_request pio;
void *pio_data;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 3faf0f97edb1..a4a39c3e0f19 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -476,6 +476,7 @@
#define MSR_AMD64_ICIBSEXTDCTL 0xc001103c
#define MSR_AMD64_IBSOPDATA4 0xc001103d
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b
#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e
#define MSR_AMD64_SEV_ES_GHCB 0xc0010130
#define MSR_AMD64_SEV 0xc0010131
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index cc74dc584836..acbaeaf83b61 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -84,7 +84,7 @@
#ifdef CONFIG_RETPOLINE
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
__stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE
#else
jmp *%\reg
#endif
@@ -94,7 +94,7 @@
#ifdef CONFIG_RETPOLINE
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
__stringify(call __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_LFENCE
#else
call *%\reg
#endif
@@ -146,7 +146,7 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[];
"lfence;\n" \
ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
- X86_FEATURE_RETPOLINE_AMD)
+ X86_FEATURE_RETPOLINE_LFENCE)
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
@@ -176,7 +176,7 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[];
"lfence;\n" \
ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
- X86_FEATURE_RETPOLINE_AMD)
+ X86_FEATURE_RETPOLINE_LFENCE)
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
#endif
@@ -188,9 +188,11 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[];
/* The Spectre V2 mitigation variants */
enum spectre_v2_mitigation {
SPECTRE_V2_NONE,
- SPECTRE_V2_RETPOLINE_GENERIC,
- SPECTRE_V2_RETPOLINE_AMD,
- SPECTRE_V2_IBRS_ENHANCED,
+ SPECTRE_V2_RETPOLINE,
+ SPECTRE_V2_LFENCE,
+ SPECTRE_V2_EIBRS,
+ SPECTRE_V2_EIBRS_RETPOLINE,
+ SPECTRE_V2_EIBRS_LFENCE,
};
/* The indirect branch speculation control variants */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index a69012e1903f..e1591467668e 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -279,7 +279,7 @@ extern void (*paravirt_iret)(void);
#define paravirt_type(op) \
[paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \
- [paravirt_opptr] "i" (&(pv_ops.op))
+ [paravirt_opptr] "m" (pv_ops.op)
#define paravirt_clobber(clobber) \
[paravirt_clobber] "i" (clobber)
@@ -316,7 +316,7 @@ int paravirt_disable_iospace(void);
*/
#define PARAVIRT_CALL \
ANNOTATE_RETPOLINE_SAFE \
- "call *%c[paravirt_opptr];"
+ "call *%[paravirt_opptr];"
/*
* These macros are intended to wrap calls through one of the paravirt
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2c5f12ae7d04..a87e7c33d5ac 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -119,6 +119,8 @@ struct cpuinfo_x86 {
int x86_cache_mbm_width_offset;
int x86_power;
unsigned long loops_per_jiffy;
+ /* protected processor identification number */
+ u64 ppin;
/* cpuid returned max cores value: */
u16 x86_max_cores;
u16 apicid;
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index b00dbc5fac2b..bb2fb78523ce 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -220,6 +220,42 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define SVM_NESTED_CTL_SEV_ENABLE BIT(1)
#define SVM_NESTED_CTL_SEV_ES_ENABLE BIT(2)
+
+/* AVIC */
+#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
+#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
+#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
+
+#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
+#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
+#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
+#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
+#define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK (0xFF)
+
+#define AVIC_DOORBELL_PHYSICAL_ID_MASK (0xFF)
+
+#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
+#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
+#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
+
+enum avic_ipi_failure_cause {
+ AVIC_IPI_FAILURE_INVALID_INT_TYPE,
+ AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
+ AVIC_IPI_FAILURE_INVALID_TARGET,
+ AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
+};
+
+
+/*
+ * 0xff is broadcast, so the max index allowed for physical APIC ID
+ * table is 0xfe. APIC IDs above 0xff are reserved.
+ */
+#define AVIC_MAX_PHYSICAL_ID_COUNT 0xff
+
+#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
+#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
+
+
struct vmcb_seg {
u16 selector;
u16 attrib;
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 2f0b6be8eaab..43a89476a522 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -110,6 +110,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu);
#define topology_logical_die_id(cpu) (cpu_data(cpu).logical_die_id)
#define topology_die_id(cpu) (cpu_data(cpu).cpu_die_id)
#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
+#define topology_ppin(cpu) (cpu_data(cpu).ppin)
extern unsigned int __max_die_per_package;
diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h
index a9630104f1c4..78e667a31d6c 100644
--- a/arch/x86/include/asm/xen/cpuid.h
+++ b/arch/x86/include/asm/xen/cpuid.h
@@ -100,6 +100,13 @@
/* Memory mapped from other domains has valid IOMMU entries */
#define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2)
#define XEN_HVM_CPUID_VCPU_ID_PRESENT (1u << 3) /* vcpu id is present in EBX */
+#define XEN_HVM_CPUID_DOMID_PRESENT (1u << 4) /* domid is present in ECX */
+/*
+ * Bits 55:49 from the IO-APIC RTE and bits 11:5 from the MSI address can be
+ * used to store high bits for the Destination ID. This expands the Destination
+ * ID field from 8 to 15 bits, allowing to target APIC IDs up 32768.
+ */
+#define XEN_HVM_CPUID_EXT_DEST_ID (1u << 5)
/*
* Leaf 6 (0x40000x05)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5007c3ffe96f..b4470eabf151 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -389,7 +389,7 @@ static int emit_indirect(int op, int reg, u8 *bytes)
*
* CALL *%\reg
*
- * It also tries to inline spectre_v2=retpoline,amd when size permits.
+ * It also tries to inline spectre_v2=retpoline,lfence when size permits.
*/
static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
{
@@ -407,7 +407,7 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
BUG_ON(reg == 4);
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
- !cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD))
+ !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE))
return -1;
op = insn->opcode.bytes[0];
@@ -438,9 +438,9 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
}
/*
- * For RETPOLINE_AMD: prepend the indirect CALL/JMP with an LFENCE.
+ * For RETPOLINE_LFENCE: prepend the indirect CALL/JMP with an LFENCE.
*/
- if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
+ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
bytes[i++] = 0x0f;
bytes[i++] = 0xae;
bytes[i++] = 0xe8; /* LFENCE */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 4edb6f0f628c..0c0b09796ced 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -394,35 +394,6 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
per_cpu(cpu_llc_id, cpu) = c->cpu_die_id = c->phys_proc_id;
}
-static void amd_detect_ppin(struct cpuinfo_x86 *c)
-{
- unsigned long long val;
-
- if (!cpu_has(c, X86_FEATURE_AMD_PPIN))
- return;
-
- /* When PPIN is defined in CPUID, still need to check PPIN_CTL MSR */
- if (rdmsrl_safe(MSR_AMD_PPIN_CTL, &val))
- goto clear_ppin;
-
- /* PPIN is locked in disabled mode, clear feature bit */
- if ((val & 3UL) == 1UL)
- goto clear_ppin;
-
- /* If PPIN is disabled, try to enable it */
- if (!(val & 2UL)) {
- wrmsrl_safe(MSR_AMD_PPIN_CTL, val | 2UL);
- rdmsrl_safe(MSR_AMD_PPIN_CTL, &val);
- }
-
- /* If PPIN_EN bit is 1, return from here; otherwise fall through */
- if (val & 2UL)
- return;
-
-clear_ppin:
- clear_cpu_cap(c, X86_FEATURE_AMD_PPIN);
-}
-
u32 amd_get_nodes_per_socket(void)
{
return nodes_per_socket;
@@ -585,6 +556,8 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
* the SME physical address space reduction value.
* If BIOS has not enabled SME then don't advertise the
* SME feature (set in scattered.c).
+ * If the kernel has not enabled SME via any means then
+ * don't advertise the SME feature.
* For SEV: If BIOS has not enabled SEV then don't advertise the
* SEV and SEV_ES feature (set in scattered.c).
*
@@ -607,6 +580,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
if (IS_ENABLED(CONFIG_X86_32))
goto clear_all;
+ if (!sme_me_mask)
+ setup_clear_cpu_cap(X86_FEATURE_SME);
+
rdmsrl(MSR_K7_HWCR, msr);
if (!(msr & MSR_K7_HWCR_SMMLOCK))
goto clear_sev;
@@ -947,7 +923,6 @@ static void init_amd(struct cpuinfo_x86 *c)
amd_detect_cmp(c);
amd_get_topology(c);
srat_detect_node(c);
- amd_detect_ppin(c);
init_amd_cacheinfo(c);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 1c1f218a701d..6296e1ebed1d 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -16,6 +16,7 @@
#include <linux/prctl.h>
#include <linux/sched/smt.h>
#include <linux/pgtable.h>
+#include <linux/bpf.h>
#include <asm/spec-ctrl.h>
#include <asm/cmdline.h>
@@ -650,6 +651,32 @@ static inline const char *spectre_v2_module_string(void)
static inline const char *spectre_v2_module_string(void) { return ""; }
#endif
+#define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n"
+#define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n"
+#define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n"
+
+#ifdef CONFIG_BPF_SYSCALL
+void unpriv_ebpf_notify(int new_state)
+{
+ if (new_state)
+ return;
+
+ /* Unprivileged eBPF is enabled */
+
+ switch (spectre_v2_enabled) {
+ case SPECTRE_V2_EIBRS:
+ pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
+ break;
+ case SPECTRE_V2_EIBRS_LFENCE:
+ if (sched_smt_active())
+ pr_err(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG);
+ break;
+ default:
+ break;
+ }
+}
+#endif
+
static inline bool match_option(const char *arg, int arglen, const char *opt)
{
int len = strlen(opt);
@@ -664,7 +691,10 @@ enum spectre_v2_mitigation_cmd {
SPECTRE_V2_CMD_FORCE,
SPECTRE_V2_CMD_RETPOLINE,
SPECTRE_V2_CMD_RETPOLINE_GENERIC,
- SPECTRE_V2_CMD_RETPOLINE_AMD,
+ SPECTRE_V2_CMD_RETPOLINE_LFENCE,
+ SPECTRE_V2_CMD_EIBRS,
+ SPECTRE_V2_CMD_EIBRS_RETPOLINE,
+ SPECTRE_V2_CMD_EIBRS_LFENCE,
};
enum spectre_v2_user_cmd {
@@ -737,6 +767,13 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
return SPECTRE_V2_USER_CMD_AUTO;
}
+static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
+{
+ return (mode == SPECTRE_V2_EIBRS ||
+ mode == SPECTRE_V2_EIBRS_RETPOLINE ||
+ mode == SPECTRE_V2_EIBRS_LFENCE);
+}
+
static void __init
spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
{
@@ -804,7 +841,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
*/
if (!boot_cpu_has(X86_FEATURE_STIBP) ||
!smt_possible ||
- spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+ spectre_v2_in_eibrs_mode(spectre_v2_enabled))
return;
/*
@@ -824,9 +861,11 @@ set_mode:
static const char * const spectre_v2_strings[] = {
[SPECTRE_V2_NONE] = "Vulnerable",
- [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
- [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
- [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS",
+ [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines",
+ [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE",
+ [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS",
+ [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE",
+ [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines",
};
static const struct {
@@ -837,8 +876,12 @@ static const struct {
{ "off", SPECTRE_V2_CMD_NONE, false },
{ "on", SPECTRE_V2_CMD_FORCE, true },
{ "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
- { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
+ { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false },
+ { "retpoline,lfence", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false },
{ "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
+ { "eibrs", SPECTRE_V2_CMD_EIBRS, false },
+ { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false },
+ { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false },
{ "auto", SPECTRE_V2_CMD_AUTO, false },
};
@@ -875,10 +918,30 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
}
if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
- cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
- cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
+ cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE ||
+ cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC ||
+ cmd == SPECTRE_V2_CMD_EIBRS_LFENCE ||
+ cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) &&
!IS_ENABLED(CONFIG_RETPOLINE)) {
- pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);
+ pr_err("%s selected but not compiled in. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if ((cmd == SPECTRE_V2_CMD_EIBRS ||
+ cmd == SPECTRE_V2_CMD_EIBRS_LFENCE ||
+ cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) &&
+ !boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
+ pr_err("%s selected but CPU doesn't have eIBRS. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if ((cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE ||
+ cmd == SPECTRE_V2_CMD_EIBRS_LFENCE) &&
+ !boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+ pr_err("%s selected, but CPU doesn't have a serializing LFENCE. Switching to AUTO select\n",
+ mitigation_options[i].option);
return SPECTRE_V2_CMD_AUTO;
}
@@ -887,6 +950,16 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
return cmd;
}
+static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
+{
+ if (!IS_ENABLED(CONFIG_RETPOLINE)) {
+ pr_err("Kernel not compiled with retpoline; no mitigation available!");
+ return SPECTRE_V2_NONE;
+ }
+
+ return SPECTRE_V2_RETPOLINE;
+}
+
static void __init spectre_v2_select_mitigation(void)
{
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -907,49 +980,64 @@ static void __init spectre_v2_select_mitigation(void)
case SPECTRE_V2_CMD_FORCE:
case SPECTRE_V2_CMD_AUTO:
if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
- mode = SPECTRE_V2_IBRS_ENHANCED;
- /* Force it so VMEXIT will restore correctly */
- x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
- goto specv2_set_mode;
+ mode = SPECTRE_V2_EIBRS;
+ break;
}
- if (IS_ENABLED(CONFIG_RETPOLINE))
- goto retpoline_auto;
+
+ mode = spectre_v2_select_retpoline();
break;
- case SPECTRE_V2_CMD_RETPOLINE_AMD:
- if (IS_ENABLED(CONFIG_RETPOLINE))
- goto retpoline_amd;
+
+ case SPECTRE_V2_CMD_RETPOLINE_LFENCE:
+ pr_err(SPECTRE_V2_LFENCE_MSG);
+ mode = SPECTRE_V2_LFENCE;
break;
+
case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
- if (IS_ENABLED(CONFIG_RETPOLINE))
- goto retpoline_generic;
+ mode = SPECTRE_V2_RETPOLINE;
break;
+
case SPECTRE_V2_CMD_RETPOLINE:
- if (IS_ENABLED(CONFIG_RETPOLINE))
- goto retpoline_auto;
+ mode = spectre_v2_select_retpoline();
+ break;
+
+ case SPECTRE_V2_CMD_EIBRS:
+ mode = SPECTRE_V2_EIBRS;
+ break;
+
+ case SPECTRE_V2_CMD_EIBRS_LFENCE:
+ mode = SPECTRE_V2_EIBRS_LFENCE;
+ break;
+
+ case SPECTRE_V2_CMD_EIBRS_RETPOLINE:
+ mode = SPECTRE_V2_EIBRS_RETPOLINE;
break;
}
- pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!");
- return;
-retpoline_auto:
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
- retpoline_amd:
- if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
- pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
- goto retpoline_generic;
- }
- mode = SPECTRE_V2_RETPOLINE_AMD;
- setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
- setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
- } else {
- retpoline_generic:
- mode = SPECTRE_V2_RETPOLINE_GENERIC;
+ if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+ pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
+
+ if (spectre_v2_in_eibrs_mode(mode)) {
+ /* Force it so VMEXIT will restore correctly */
+ x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ }
+
+ switch (mode) {
+ case SPECTRE_V2_NONE:
+ case SPECTRE_V2_EIBRS:
+ break;
+
+ case SPECTRE_V2_LFENCE:
+ case SPECTRE_V2_EIBRS_LFENCE:
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE);
+ fallthrough;
+
+ case SPECTRE_V2_RETPOLINE:
+ case SPECTRE_V2_EIBRS_RETPOLINE:
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ break;
}
-specv2_set_mode:
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
@@ -975,7 +1063,7 @@ specv2_set_mode:
* the CPU supports Enhanced IBRS, kernel might un-intentionally not
* enable IBRS around firmware calls.
*/
- if (boot_cpu_has(X86_FEATURE_IBRS) && mode != SPECTRE_V2_IBRS_ENHANCED) {
+ if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
pr_info("Enabling Restricted Speculation for firmware calls\n");
}
@@ -1045,6 +1133,10 @@ void cpu_bugs_smt_update(void)
{
mutex_lock(&spec_ctrl_mutex);
+ if (sched_smt_active() && unprivileged_ebpf_enabled() &&
+ spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
+ pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG);
+
switch (spectre_v2_user_stibp) {
case SPECTRE_V2_USER_NONE:
break;
@@ -1684,7 +1776,7 @@ static ssize_t tsx_async_abort_show_state(char *buf)
static char *stibp_state(void)
{
- if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+ if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
return "";
switch (spectre_v2_user_stibp) {
@@ -1714,6 +1806,27 @@ static char *ibpb_state(void)
return "";
}
+static ssize_t spectre_v2_show_state(char *buf)
+{
+ if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
+ return sprintf(buf, "Vulnerable: LFENCE\n");
+
+ if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+ return sprintf(buf, "Vulnerable: eIBRS with unprivileged eBPF\n");
+
+ if (sched_smt_active() && unprivileged_ebpf_enabled() &&
+ spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
+ return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
+
+ return sprintf(buf, "%s%s%s%s%s%s\n",
+ spectre_v2_strings[spectre_v2_enabled],
+ ibpb_state(),
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+ stibp_state(),
+ boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
+ spectre_v2_module_string());
+}
+
static ssize_t srbds_show_state(char *buf)
{
return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
@@ -1739,12 +1852,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]);
case X86_BUG_SPECTRE_V2:
- return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
- ibpb_state(),
- boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
- stibp_state(),
- boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
- spectre_v2_module_string());
+ return spectre_v2_show_state(buf);
case X86_BUG_SPEC_STORE_BYPASS:
return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 7b8382c11788..64deb7727d00 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -88,6 +88,83 @@ EXPORT_SYMBOL_GPL(get_llc_id);
/* L2 cache ID of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id) = BAD_APICID;
+static struct ppin_info {
+ int feature;
+ int msr_ppin_ctl;
+ int msr_ppin;
+} ppin_info[] = {
+ [X86_VENDOR_INTEL] = {
+ .feature = X86_FEATURE_INTEL_PPIN,
+ .msr_ppin_ctl = MSR_PPIN_CTL,
+ .msr_ppin = MSR_PPIN
+ },
+ [X86_VENDOR_AMD] = {
+ .feature = X86_FEATURE_AMD_PPIN,
+ .msr_ppin_ctl = MSR_AMD_PPIN_CTL,
+ .msr_ppin = MSR_AMD_PPIN
+ },
+};
+
+static const struct x86_cpu_id ppin_cpuids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_AMD_PPIN, &ppin_info[X86_VENDOR_AMD]),
+ X86_MATCH_FEATURE(X86_FEATURE_INTEL_PPIN, &ppin_info[X86_VENDOR_INTEL]),
+
+ /* Legacy models without CPUID enumeration */
+ X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &ppin_info[X86_VENDOR_INTEL]),
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &ppin_info[X86_VENDOR_INTEL]),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &ppin_info[X86_VENDOR_INTEL]),
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &ppin_info[X86_VENDOR_INTEL]),
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &ppin_info[X86_VENDOR_INTEL]),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &ppin_info[X86_VENDOR_INTEL]),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &ppin_info[X86_VENDOR_INTEL]),
+ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &ppin_info[X86_VENDOR_INTEL]),
+ X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &ppin_info[X86_VENDOR_INTEL]),
+ X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &ppin_info[X86_VENDOR_INTEL]),
+
+ {}
+};
+
+static void ppin_init(struct cpuinfo_x86 *c)
+{
+ const struct x86_cpu_id *id;
+ unsigned long long val;
+ struct ppin_info *info;
+
+ id = x86_match_cpu(ppin_cpuids);
+ if (!id)
+ return;
+
+ /*
+ * Testing the presence of the MSR is not enough. Need to check
+ * that the PPIN_CTL allows reading of the PPIN.
+ */
+ info = (struct ppin_info *)id->driver_data;
+
+ if (rdmsrl_safe(info->msr_ppin_ctl, &val))
+ goto clear_ppin;
+
+ if ((val & 3UL) == 1UL) {
+ /* PPIN locked in disabled mode */
+ goto clear_ppin;
+ }
+
+ /* If PPIN is disabled, try to enable */
+ if (!(val & 2UL)) {
+ wrmsrl_safe(info->msr_ppin_ctl, val | 2UL);
+ rdmsrl_safe(info->msr_ppin_ctl, &val);
+ }
+
+ /* Is the enable bit set? */
+ if (val & 2UL) {
+ c->ppin = __rdmsr(info->msr_ppin);
+ set_cpu_cap(c, info->feature);
+ return;
+ }
+
+clear_ppin:
+ clear_cpu_cap(c, info->feature);
+}
+
/* correctly size the local cpu masks */
void __init setup_cpu_local_masks(void)
{
@@ -1655,6 +1732,8 @@ static void identify_cpu(struct cpuinfo_x86 *c)
c->x86_capability[i] |= boot_cpu_data.x86_capability[i];
}
+ ppin_init(c);
+
/* Init Machine Check Exception if available. */
mcheck_cpu_init(c);
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 9f4b508886dd..1940d305db1c 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -993,6 +993,7 @@ static struct attribute *default_attrs[] = {
NULL, /* possibly interrupt_enable if supported, see below */
NULL,
};
+ATTRIBUTE_GROUPS(default);
#define to_block(k) container_of(k, struct threshold_block, kobj)
#define to_attr(a) container_of(a, struct threshold_attr, attr)
@@ -1029,7 +1030,7 @@ static void threshold_block_release(struct kobject *kobj);
static struct kobj_type threshold_ktype = {
.sysfs_ops = &threshold_ops,
- .default_attrs = default_attrs,
+ .default_groups = default_groups,
.release = threshold_block_release,
};
@@ -1101,10 +1102,10 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb
b->threshold_limit = THRESHOLD_MAX;
if (b->interrupt_capable) {
- threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
+ default_attrs[2] = &interrupt_enable.attr;
b->interrupt_enable = 1;
} else {
- threshold_ktype.default_attrs[2] = NULL;
+ default_attrs[2] = NULL;
}
INIT_LIST_HEAD(&b->miscj);
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 5818b837fd4d..4f1e825033ce 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -138,12 +138,7 @@ void mce_setup(struct mce *m)
m->socketid = cpu_data(m->extcpu).phys_proc_id;
m->apicid = cpu_data(m->extcpu).initial_apicid;
m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP);
-
- if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
- m->ppin = __rdmsr(MSR_PPIN);
- else if (this_cpu_has(X86_FEATURE_AMD_PPIN))
- m->ppin = __rdmsr(MSR_AMD_PPIN);
-
+ m->ppin = cpu_data(m->extcpu).ppin;
m->microcode = boot_cpu_data.microcode;
}
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index baafbb37be67..95275a5e57e0 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -470,47 +470,6 @@ void intel_clear_lmce(void)
wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
}
-static void intel_ppin_init(struct cpuinfo_x86 *c)
-{
- unsigned long long val;
-
- /*
- * Even if testing the presence of the MSR would be enough, we don't
- * want to risk the situation where other models reuse this MSR for
- * other purposes.
- */
- switch (c->x86_model) {
- case INTEL_FAM6_IVYBRIDGE_X:
- case INTEL_FAM6_HASWELL_X:
- case INTEL_FAM6_BROADWELL_D:
- case INTEL_FAM6_BROADWELL_X:
- case INTEL_FAM6_SKYLAKE_X:
- case INTEL_FAM6_ICELAKE_X:
- case INTEL_FAM6_ICELAKE_D:
- case INTEL_FAM6_SAPPHIRERAPIDS_X:
- case INTEL_FAM6_XEON_PHI_KNL:
- case INTEL_FAM6_XEON_PHI_KNM:
-
- if (rdmsrl_safe(MSR_PPIN_CTL, &val))
- return;
-
- if ((val & 3UL) == 1UL) {
- /* PPIN locked in disabled mode */
- return;
- }
-
- /* If PPIN is disabled, try to enable */
- if (!(val & 2UL)) {
- wrmsrl_safe(MSR_PPIN_CTL, val | 2UL);
- rdmsrl_safe(MSR_PPIN_CTL, &val);
- }
-
- /* Is the enable bit set? */
- if (val & 2UL)
- set_cpu_cap(c, X86_FEATURE_INTEL_PPIN);
- }
-}
-
/*
* Enable additional error logs from the integrated
* memory controller on processors that support this.
@@ -535,7 +494,6 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c)
{
intel_init_cmci();
intel_init_lmce();
- intel_ppin_init(c);
intel_imc_init(c);
}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 21d1f062895a..4143b1e4c5c6 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -26,6 +26,7 @@ struct cpuid_bit {
static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
+ { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
{ X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
{ X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 001808e3901c..7c63a1911fae 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -13,6 +13,30 @@
#include "sgx.h"
/*
+ * Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's
+ * follow right after the EPC data in the backing storage. In addition to the
+ * visible enclave pages, there's one extra page slot for SECS, before PCMD
+ * structs.
+ */
+static inline pgoff_t sgx_encl_get_backing_page_pcmd_offset(struct sgx_encl *encl,
+ unsigned long page_index)
+{
+ pgoff_t epc_end_off = encl->size + sizeof(struct sgx_secs);
+
+ return epc_end_off + page_index * sizeof(struct sgx_pcmd);
+}
+
+/*
+ * Free a page from the backing storage in the given page index.
+ */
+static inline void sgx_encl_truncate_backing_page(struct sgx_encl *encl, unsigned long page_index)
+{
+ struct inode *inode = file_inode(encl->backing);
+
+ shmem_truncate_range(inode, PFN_PHYS(page_index), PFN_PHYS(page_index) + PAGE_SIZE - 1);
+}
+
+/*
* ELDU: Load an EPC page as unblocked. For more info, see "OS Management of EPC
* Pages" in the SDM.
*/
@@ -22,9 +46,11 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
{
unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK;
struct sgx_encl *encl = encl_page->encl;
+ pgoff_t page_index, page_pcmd_off;
struct sgx_pageinfo pginfo;
struct sgx_backing b;
- pgoff_t page_index;
+ bool pcmd_page_empty;
+ u8 *pcmd_page;
int ret;
if (secs_page)
@@ -32,14 +58,16 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
else
page_index = PFN_DOWN(encl->size);
+ page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
+
ret = sgx_encl_get_backing(encl, page_index, &b);
if (ret)
return ret;
pginfo.addr = encl_page->desc & PAGE_MASK;
pginfo.contents = (unsigned long)kmap_atomic(b.contents);
- pginfo.metadata = (unsigned long)kmap_atomic(b.pcmd) +
- b.pcmd_offset;
+ pcmd_page = kmap_atomic(b.pcmd);
+ pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset;
if (secs_page)
pginfo.secs = (u64)sgx_get_epc_virt_addr(secs_page);
@@ -55,11 +83,24 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
ret = -EFAULT;
}
- kunmap_atomic((void *)(unsigned long)(pginfo.metadata - b.pcmd_offset));
+ memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd));
+
+ /*
+ * The area for the PCMD in the page was zeroed above. Check if the
+ * whole page is now empty meaning that all PCMD's have been zeroed:
+ */
+ pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE);
+
+ kunmap_atomic(pcmd_page);
kunmap_atomic((void *)(unsigned long)pginfo.contents);
sgx_encl_put_backing(&b, false);
+ sgx_encl_truncate_backing_page(encl, page_index);
+
+ if (pcmd_page_empty)
+ sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off));
+
return ret;
}
@@ -410,6 +451,8 @@ void sgx_encl_release(struct kref *ref)
}
kfree(entry);
+ /* Invoke scheduler to prevent soft lockups. */
+ cond_resched();
}
xa_destroy(&encl->page_array);
@@ -577,7 +620,7 @@ static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl,
int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
struct sgx_backing *backing)
{
- pgoff_t pcmd_index = PFN_DOWN(encl->size) + 1 + (page_index >> 5);
+ pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
struct page *contents;
struct page *pcmd;
@@ -585,7 +628,7 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
if (IS_ERR(contents))
return PTR_ERR(contents);
- pcmd = sgx_encl_get_backing_page(encl, pcmd_index);
+ pcmd = sgx_encl_get_backing_page(encl, PFN_DOWN(page_pcmd_off));
if (IS_ERR(pcmd)) {
put_page(contents);
return PTR_ERR(pcmd);
@@ -594,9 +637,7 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
backing->page_index = page_index;
backing->contents = contents;
backing->pcmd = pcmd;
- backing->pcmd_offset =
- (page_index & (PAGE_SIZE / sizeof(struct sgx_pcmd) - 1)) *
- sizeof(struct sgx_pcmd);
+ backing->pcmd_offset = page_pcmd_off & (PAGE_SIZE - 1);
return 0;
}
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 4b41efc9e367..8e4bc6453d26 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -344,10 +344,8 @@ static void sgx_reclaim_pages(void)
{
struct sgx_epc_page *chunk[SGX_NR_TO_SCAN];
struct sgx_backing backing[SGX_NR_TO_SCAN];
- struct sgx_epc_section *section;
struct sgx_encl_page *encl_page;
struct sgx_epc_page *epc_page;
- struct sgx_numa_node *node;
pgoff_t page_index;
int cnt = 0;
int ret;
@@ -418,13 +416,7 @@ skip:
kref_put(&encl_page->encl->refcount, sgx_encl_release);
epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
- section = &sgx_epc_sections[epc_page->section];
- node = section->node;
-
- spin_lock(&node->lock);
- list_add_tail(&epc_page->list, &node->free_page_list);
- spin_unlock(&node->lock);
- atomic_long_inc(&sgx_nr_free_pages);
+ sgx_free_epc_page(epc_page);
}
}
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index bc0657f0deed..f267205f2d5a 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -995,8 +995,10 @@ early_param("memmap", parse_memmap_opt);
*/
void __init e820__reserve_setup_data(void)
{
+ struct setup_indirect *indirect;
struct setup_data *data;
- u64 pa_data;
+ u64 pa_data, pa_next;
+ u32 len;
pa_data = boot_params.hdr.setup_data;
if (!pa_data)
@@ -1004,6 +1006,14 @@ void __init e820__reserve_setup_data(void)
while (pa_data) {
data = early_memremap(pa_data, sizeof(*data));
+ if (!data) {
+ pr_warn("e820: failed to memremap setup_data entry\n");
+ return;
+ }
+
+ len = sizeof(*data);
+ pa_next = data->next;
+
e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
/*
@@ -1015,18 +1025,27 @@ void __init e820__reserve_setup_data(void)
sizeof(*data) + data->len,
E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
- if (data->type == SETUP_INDIRECT &&
- ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
- e820__range_update(((struct setup_indirect *)data->data)->addr,
- ((struct setup_indirect *)data->data)->len,
- E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
- e820__range_update_kexec(((struct setup_indirect *)data->data)->addr,
- ((struct setup_indirect *)data->data)->len,
- E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+ if (data->type == SETUP_INDIRECT) {
+ len += data->len;
+ early_memunmap(data, sizeof(*data));
+ data = early_memremap(pa_data, len);
+ if (!data) {
+ pr_warn("e820: failed to memremap indirect setup_data\n");
+ return;
+ }
+
+ indirect = (struct setup_indirect *)data->data;
+
+ if (indirect->type != SETUP_INDIRECT) {
+ e820__range_update(indirect->addr, indirect->len,
+ E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+ e820__range_update_kexec(indirect->addr, indirect->len,
+ E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+ }
}
- pa_data = data->next;
- early_memunmap(data, sizeof(*data));
+ pa_data = pa_next;
+ early_memunmap(data, len);
}
e820__update_table(e820_table);
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 437d7c930c0b..75ffaef8c299 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -91,11 +91,9 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
const void *kbuf, const void __user *ubuf)
{
struct fpu *fpu = &target->thread.fpu;
- struct user32_fxsr_struct newstate;
+ struct fxregs_state newstate;
int ret;
- BUILD_BUG_ON(sizeof(newstate) != sizeof(struct fxregs_state));
-
if (!cpu_feature_enabled(X86_FEATURE_FXSR))
return -ENODEV;
@@ -116,9 +114,10 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
/* Copy the state */
memcpy(&fpu->fpstate->regs.fxsave, &newstate, sizeof(newstate));
- /* Clear xmm8..15 */
+ /* Clear xmm8..15 for 32-bit callers */
BUILD_BUG_ON(sizeof(fpu->__fpstate.regs.fxsave.xmm_space) != 16 * 16);
- memset(&fpu->fpstate->regs.fxsave.xmm_space[8], 0, 8 * 16);
+ if (in_ia32_syscall())
+ memset(&fpu->fpstate->regs.fxsave.xmm_space[8*4], 0, 8 * 16);
/* Mark FP and SSE as in use when XSAVE is enabled */
if (use_xsave())
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 02b3ddaf4f75..7c7824ae7862 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1558,7 +1558,10 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
fpregs_restore_userregs();
newfps->xfeatures = curfps->xfeatures | xfeatures;
- newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
+
+ if (!guest_fpu)
+ newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
+
newfps->xfd = curfps->xfd & ~xfeatures;
/* Do the final updates within the locked region */
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 64b6da95af98..e2e89bebcbc3 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -88,11 +88,13 @@ create_setup_data_node(struct dentry *parent, int no,
static int __init create_setup_data_nodes(struct dentry *parent)
{
+ struct setup_indirect *indirect;
struct setup_data_node *node;
struct setup_data *data;
- int error;
+ u64 pa_data, pa_next;
struct dentry *d;
- u64 pa_data;
+ int error;
+ u32 len;
int no = 0;
d = debugfs_create_dir("setup_data", parent);
@@ -112,12 +114,29 @@ static int __init create_setup_data_nodes(struct dentry *parent)
error = -ENOMEM;
goto err_dir;
}
-
- if (data->type == SETUP_INDIRECT &&
- ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
- node->paddr = ((struct setup_indirect *)data->data)->addr;
- node->type = ((struct setup_indirect *)data->data)->type;
- node->len = ((struct setup_indirect *)data->data)->len;
+ pa_next = data->next;
+
+ if (data->type == SETUP_INDIRECT) {
+ len = sizeof(*data) + data->len;
+ memunmap(data);
+ data = memremap(pa_data, len, MEMREMAP_WB);
+ if (!data) {
+ kfree(node);
+ error = -ENOMEM;
+ goto err_dir;
+ }
+
+ indirect = (struct setup_indirect *)data->data;
+
+ if (indirect->type != SETUP_INDIRECT) {
+ node->paddr = indirect->addr;
+ node->type = indirect->type;
+ node->len = indirect->len;
+ } else {
+ node->paddr = pa_data;
+ node->type = data->type;
+ node->len = data->len;
+ }
} else {
node->paddr = pa_data;
node->type = data->type;
@@ -125,7 +144,7 @@ static int __init create_setup_data_nodes(struct dentry *parent)
}
create_setup_data_node(d, no, node);
- pa_data = data->next;
+ pa_data = pa_next;
memunmap(data);
no++;
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index d0a19121c6a4..257892fcefa7 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -91,26 +91,41 @@ static int get_setup_data_paddr(int nr, u64 *paddr)
static int __init get_setup_data_size(int nr, size_t *size)
{
- int i = 0;
+ u64 pa_data = boot_params.hdr.setup_data, pa_next;
+ struct setup_indirect *indirect;
struct setup_data *data;
- u64 pa_data = boot_params.hdr.setup_data;
+ int i = 0;
+ u32 len;
while (pa_data) {
data = memremap(pa_data, sizeof(*data), MEMREMAP_WB);
if (!data)
return -ENOMEM;
+ pa_next = data->next;
+
if (nr == i) {
- if (data->type == SETUP_INDIRECT &&
- ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT)
- *size = ((struct setup_indirect *)data->data)->len;
- else
+ if (data->type == SETUP_INDIRECT) {
+ len = sizeof(*data) + data->len;
+ memunmap(data);
+ data = memremap(pa_data, len, MEMREMAP_WB);
+ if (!data)
+ return -ENOMEM;
+
+ indirect = (struct setup_indirect *)data->data;
+
+ if (indirect->type != SETUP_INDIRECT)
+ *size = indirect->len;
+ else
+ *size = data->len;
+ } else {
*size = data->len;
+ }
memunmap(data);
return 0;
}
- pa_data = data->next;
+ pa_data = pa_next;
memunmap(data);
i++;
}
@@ -120,9 +135,11 @@ static int __init get_setup_data_size(int nr, size_t *size)
static ssize_t type_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
+ struct setup_indirect *indirect;
+ struct setup_data *data;
int nr, ret;
u64 paddr;
- struct setup_data *data;
+ u32 len;
ret = kobj_to_setup_data_nr(kobj, &nr);
if (ret)
@@ -135,10 +152,20 @@ static ssize_t type_show(struct kobject *kobj,
if (!data)
return -ENOMEM;
- if (data->type == SETUP_INDIRECT)
- ret = sprintf(buf, "0x%x\n", ((struct setup_indirect *)data->data)->type);
- else
+ if (data->type == SETUP_INDIRECT) {
+ len = sizeof(*data) + data->len;
+ memunmap(data);
+ data = memremap(paddr, len, MEMREMAP_WB);
+ if (!data)
+ return -ENOMEM;
+
+ indirect = (struct setup_indirect *)data->data;
+
+ ret = sprintf(buf, "0x%x\n", indirect->type);
+ } else {
ret = sprintf(buf, "0x%x\n", data->type);
+ }
+
memunmap(data);
return ret;
}
@@ -149,9 +176,10 @@ static ssize_t setup_data_data_read(struct file *fp,
char *buf,
loff_t off, size_t count)
{
+ struct setup_indirect *indirect;
+ struct setup_data *data;
int nr, ret = 0;
u64 paddr, len;
- struct setup_data *data;
void *p;
ret = kobj_to_setup_data_nr(kobj, &nr);
@@ -165,10 +193,27 @@ static ssize_t setup_data_data_read(struct file *fp,
if (!data)
return -ENOMEM;
- if (data->type == SETUP_INDIRECT &&
- ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
- paddr = ((struct setup_indirect *)data->data)->addr;
- len = ((struct setup_indirect *)data->data)->len;
+ if (data->type == SETUP_INDIRECT) {
+ len = sizeof(*data) + data->len;
+ memunmap(data);
+ data = memremap(paddr, len, MEMREMAP_WB);
+ if (!data)
+ return -ENOMEM;
+
+ indirect = (struct setup_indirect *)data->data;
+
+ if (indirect->type != SETUP_INDIRECT) {
+ paddr = indirect->addr;
+ len = indirect->len;
+ } else {
+ /*
+ * Even though this is technically undefined, return
+ * the data as though it is a normal setup_data struct.
+ * This will at least allow it to be inspected.
+ */
+ paddr += sizeof(*data);
+ len = data->len;
+ }
} else {
paddr += sizeof(*data);
len = data->len;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index a438217cbfac..d77481ecb0d5 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -462,19 +462,24 @@ static bool pv_tlb_flush_supported(void)
{
return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
!kvm_para_has_hint(KVM_HINTS_REALTIME) &&
- kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
+ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) &&
+ !boot_cpu_has(X86_FEATURE_MWAIT) &&
+ (num_possible_cpus() != 1));
}
static bool pv_ipi_supported(void)
{
- return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI);
+ return (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI) &&
+ (num_possible_cpus() != 1));
}
static bool pv_sched_yield_supported(void)
{
return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
!kvm_para_has_hint(KVM_HINTS_REALTIME) &&
- kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
+ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) &&
+ !boot_cpu_has(X86_FEATURE_MWAIT) &&
+ (num_possible_cpus() != 1));
}
#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
@@ -619,7 +624,7 @@ static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
/* Make sure other vCPUs get a chance to run if they need to. */
for_each_cpu(cpu, mask) {
- if (vcpu_is_preempted(cpu)) {
+ if (!idle_cpu(cpu) && vcpu_is_preempted(cpu)) {
kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu));
break;
}
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index a35cbf9107af..c5caa7311bd8 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -239,6 +239,9 @@ static void __init kvmclock_init_mem(void)
static int __init kvm_setup_vsyscall_timeinfo(void)
{
+ if (!kvm_para_available() || !kvmclock)
+ return 0;
+
kvmclock_init_mem();
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 95fa745e310a..96d7c27b7093 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -273,6 +273,14 @@ int module_finalize(const Elf_Ehdr *hdr,
retpolines = s;
}
+ /*
+ * See alternative_instructions() for the ordering rules between the
+ * various patching types.
+ */
+ if (para) {
+ void *pseg = (void *)para->sh_addr;
+ apply_paravirt(pseg, pseg + para->sh_size);
+ }
if (retpolines) {
void *rseg = (void *)retpolines->sh_addr;
apply_retpolines(rseg, rseg + retpolines->sh_size);
@@ -290,11 +298,6 @@ int module_finalize(const Elf_Ehdr *hdr,
tseg, tseg + text->sh_size);
}
- if (para) {
- void *pseg = (void *)para->sh_addr;
- apply_paravirt(pseg, pseg + para->sh_size);
- }
-
/* make jump label nops */
jump_label_apply_nops(me);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 81d8ef036637..e131d71b3cae 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -765,8 +765,11 @@ void stop_this_cpu(void *dummy)
* without the encryption bit, they don't race each other when flushed
* and potentially end up with the wrong entry being committed to
* memory.
+ *
+ * Test the CPUID bit directly because the machine might've cleared
+ * X86_FEATURE_SME due to cmdline options.
*/
- if (boot_cpu_has(X86_FEATURE_SME))
+ if (cpuid_eax(0x8000001f) & BIT(0))
native_wbinvd();
for (;;) {
/*
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 6d2244c94799..8d2f2f995539 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1224,7 +1224,7 @@ static struct user_regset x86_64_regsets[] __ro_after_init = {
},
[REGSET_FP] = {
.core_note_type = NT_PRFPREG,
- .n = sizeof(struct user_i387_struct) / sizeof(long),
+ .n = sizeof(struct fxregs_state) / sizeof(long),
.size = sizeof(long), .align = sizeof(long),
.active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set
},
@@ -1271,7 +1271,7 @@ static struct user_regset x86_32_regsets[] __ro_after_init = {
},
[REGSET_XFP] = {
.core_note_type = NT_PRXFPREG,
- .n = sizeof(struct user32_fxsr_struct) / sizeof(u32),
+ .n = sizeof(struct fxregs_state) / sizeof(u32),
.size = sizeof(u32), .align = sizeof(u32),
.active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set
},
diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c
index 9ae64f9af956..9b9fb7882c20 100644
--- a/arch/x86/kernel/resource.c
+++ b/arch/x86/kernel/resource.c
@@ -1,5 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/dmi.h>
#include <linux/ioport.h>
#include <asm/e820/api.h>
@@ -24,31 +23,11 @@ static void resource_clip(struct resource *res, resource_size_t start,
res->start = end + 1;
}
-/*
- * Some BIOS-es contain a bug where they add addresses which map to
- * system RAM in the PCI host bridge window returned by the ACPI _CRS
- * method, see commit 4dc2287c1805 ("x86: avoid E820 regions when
- * allocating address space"). To avoid this Linux by default excludes
- * E820 reservations when allocating addresses since 2010.
- * In 2019 some systems have shown-up with E820 reservations which cover
- * the entire _CRS returned PCI host bridge window, causing all attempts
- * to assign memory to PCI BARs to fail if Linux uses E820 reservations.
- *
- * Ideally Linux would fully stop using E820 reservations, but then
- * the old systems this was added for will regress.
- * Instead keep the old behavior for old systems, while ignoring the
- * E820 reservations for any systems from now on.
- */
static void remove_e820_regions(struct resource *avail)
{
- int i, year = dmi_get_bios_year();
+ int i;
struct e820_entry *entry;
- if (year >= 2018)
- return;
-
- pr_info_once("PCI: Removing E820 reservations from host bridge windows\n");
-
for (i = 0; i < e820_table->nr_entries; i++) {
entry = &e820_table->entries[i];
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f7a132eb794d..90d7e1788c91 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -369,21 +369,41 @@ static void __init parse_setup_data(void)
static void __init memblock_x86_reserve_range_setup_data(void)
{
+ struct setup_indirect *indirect;
struct setup_data *data;
- u64 pa_data;
+ u64 pa_data, pa_next;
+ u32 len;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
data = early_memremap(pa_data, sizeof(*data));
+ if (!data) {
+ pr_warn("setup: failed to memremap setup_data entry\n");
+ return;
+ }
+
+ len = sizeof(*data);
+ pa_next = data->next;
+
memblock_reserve(pa_data, sizeof(*data) + data->len);
- if (data->type == SETUP_INDIRECT &&
- ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT)
- memblock_reserve(((struct setup_indirect *)data->data)->addr,
- ((struct setup_indirect *)data->data)->len);
+ if (data->type == SETUP_INDIRECT) {
+ len += data->len;
+ early_memunmap(data, sizeof(*data));
+ data = early_memremap(pa_data, len);
+ if (!data) {
+ pr_warn("setup: failed to memremap indirect setup_data\n");
+ return;
+ }
- pa_data = data->next;
- early_memunmap(data, sizeof(*data));
+ indirect = (struct setup_indirect *)data->data;
+
+ if (indirect->type != SETUP_INDIRECT)
+ memblock_reserve(indirect->addr, indirect->len);
+ }
+
+ pa_data = pa_next;
+ early_memunmap(data, len);
}
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c9d566dcf89a..8143693a7ea6 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -659,6 +659,7 @@ static bool do_int3(struct pt_regs *regs)
return res == NOTIFY_STOP;
}
+NOKPROBE_SYMBOL(do_int3);
static void do_int3_user(struct pt_regs *regs)
{
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 494d4d351859..b8f8d268d058 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -282,6 +282,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
struct kvm_cpuid_entry2 *best;
+ u64 guest_supported_xcr0;
best = kvm_find_cpuid_entry(vcpu, 1, 0);
if (best && apic) {
@@ -293,9 +294,11 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_apic_set_version(vcpu);
}
- vcpu->arch.guest_supported_xcr0 =
+ guest_supported_xcr0 =
cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
+ vcpu->arch.guest_fpu.fpstate->user_xfeatures = guest_supported_xcr0;
+
kvm_update_pv_runtime(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5719d8cfdbd9..e86d610dc6b7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -429,8 +429,23 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
FOP_END
/* Special case for SETcc - 1 instruction per cc */
+
+/*
+ * Depending on .config the SETcc functions look like:
+ *
+ * SETcc %al [3 bytes]
+ * RET [1 byte]
+ * INT3 [1 byte; CONFIG_SLS]
+ *
+ * Which gives possible sizes 4 or 5. When rounded up to the
+ * next power-of-two alignment they become 4 or 8.
+ */
+#define SETCC_LENGTH (4 + IS_ENABLED(CONFIG_SLS))
+#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS))
+static_assert(SETCC_LENGTH <= SETCC_ALIGN);
+
#define FOP_SETCC(op) \
- ".align 4 \n\t" \
+ ".align " __stringify(SETCC_ALIGN) " \n\t" \
".type " #op ", @function \n\t" \
#op ": \n\t" \
#op " %al \n\t" \
@@ -1047,7 +1062,7 @@ static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
{
u8 rc;
- void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
+ void (*fop)(void) = (void *)em_setcc + SETCC_ALIGN * (condition & 0xf);
flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
asm("push %[flags]; popf; " CALL_NOSPEC
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d7e6fde82d25..9322e6340a74 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2306,7 +2306,12 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
apic->irr_pending = true;
apic->isr_count = 1;
} else {
- apic->irr_pending = (apic_search_irr(apic) != -1);
+ /*
+ * Don't clear irr_pending, searching the IRR can race with
+ * updates from the CPU as APICv is still active from hardware's
+ * perspective. The flag will be cleared as appropriate when
+ * KVM injects the interrupt.
+ */
apic->isr_count = count_vectors(apic->regs + APIC_ISR);
}
}
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 593093b52395..5628d0ba637e 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3565,7 +3565,7 @@ set_root_pgd:
out_unlock:
write_unlock(&vcpu->kvm->mmu_lock);
- return 0;
+ return r;
}
static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
@@ -3889,12 +3889,23 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
walk_shadow_page_lockless_end(vcpu);
}
+static u32 alloc_apf_token(struct kvm_vcpu *vcpu)
+{
+ /* make sure the token value is not 0 */
+ u32 id = vcpu->arch.apf.id;
+
+ if (id << 12 == 0)
+ vcpu->arch.apf.id = 1;
+
+ return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
+}
+
static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
gfn_t gfn)
{
struct kvm_arch_async_pf arch;
- arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
+ arch.token = alloc_apf_token(vcpu);
arch.gfn = gfn;
arch.direct_map = vcpu->arch.mmu->direct_map;
arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu);
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index f614f95acc6b..b1a02993782b 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -95,7 +95,7 @@ static void kvm_perf_overflow(struct perf_event *perf_event,
}
static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
- unsigned config, bool exclude_user,
+ u64 config, bool exclude_user,
bool exclude_kernel, bool intr,
bool in_tx, bool in_tx_cp)
{
@@ -181,7 +181,8 @@ static int cmp_u64(const void *a, const void *b)
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
{
- unsigned config, type = PERF_TYPE_RAW;
+ u64 config;
+ u32 type = PERF_TYPE_RAW;
struct kvm *kvm = pmc->vcpu->kvm;
struct kvm_pmu_event_filter *filter;
bool allow_event = true;
@@ -220,7 +221,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
}
if (type == PERF_TYPE_RAW)
- config = eventsel & X86_RAW_EVENT_MASK;
+ config = eventsel & AMD64_RAW_EVENT_MASK;
if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
return;
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 90364d02f22a..fb3e20791338 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -27,20 +27,6 @@
#include "irq.h"
#include "svm.h"
-#define SVM_AVIC_DOORBELL 0xc001011b
-
-#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
-
-/*
- * 0xff is broadcast, so the max index allowed for physical APIC ID
- * table is 0xfe. APIC IDs above 0xff are reserved.
- */
-#define AVIC_MAX_PHYSICAL_ID_COUNT 255
-
-#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
-#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
-#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
-
/* AVIC GATAG is encoded using VM and VCPU IDs */
#define AVIC_VCPU_ID_BITS 8
#define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1)
@@ -73,12 +59,6 @@ struct amd_svm_iommu_ir {
void *data; /* Storing pointer to struct amd_ir_data */
};
-enum avic_ipi_failure_cause {
- AVIC_IPI_FAILURE_INVALID_INT_TYPE,
- AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
- AVIC_IPI_FAILURE_INVALID_TARGET,
- AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
-};
/* Note:
* This function is called from IOMMU driver to notify
@@ -289,6 +269,22 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
return 0;
}
+void avic_ring_doorbell(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Note, the vCPU could get migrated to a different pCPU at any point,
+ * which could result in signalling the wrong/previous pCPU. But if
+ * that happens the vCPU is guaranteed to do a VMRUN (after being
+ * migrated) and thus will process pending interrupts, i.e. a doorbell
+ * is not needed (and the spurious one is harmless).
+ */
+ int cpu = READ_ONCE(vcpu->cpu);
+
+ if (cpu != get_cpu())
+ wrmsrl(MSR_AMD64_SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu));
+ put_cpu();
+}
+
static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
u32 icrl, u32 icrh)
{
@@ -304,8 +300,13 @@ static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
kvm_for_each_vcpu(i, vcpu, kvm) {
if (kvm_apic_match_dest(vcpu, source, icrl & APIC_SHORT_MASK,
GET_APIC_DEST_FIELD(icrh),
- icrl & APIC_DEST_MASK))
- kvm_vcpu_wake_up(vcpu);
+ icrl & APIC_DEST_MASK)) {
+ vcpu->arch.apic->irr_pending = true;
+ svm_complete_interrupt_delivery(vcpu,
+ icrl & APIC_MODE_MASK,
+ icrl & APIC_INT_LEVELTRIG,
+ icrl & APIC_VECTOR_MASK);
+ }
}
}
@@ -345,8 +346,6 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh);
break;
case AVIC_IPI_FAILURE_INVALID_TARGET:
- WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n",
- index, vcpu->vcpu_id, icrh, icrl);
break;
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
WARN_ONCE(1, "Invalid backing page\n");
@@ -669,52 +668,6 @@ void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
return;
}
-int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
-{
- if (!vcpu->arch.apicv_active)
- return -1;
-
- kvm_lapic_set_irr(vec, vcpu->arch.apic);
-
- /*
- * Pairs with the smp_mb_*() after setting vcpu->guest_mode in
- * vcpu_enter_guest() to ensure the write to the vIRR is ordered before
- * the read of guest_mode, which guarantees that either VMRUN will see
- * and process the new vIRR entry, or that the below code will signal
- * the doorbell if the vCPU is already running in the guest.
- */
- smp_mb__after_atomic();
-
- /*
- * Signal the doorbell to tell hardware to inject the IRQ if the vCPU
- * is in the guest. If the vCPU is not in the guest, hardware will
- * automatically process AVIC interrupts at VMRUN.
- */
- if (vcpu->mode == IN_GUEST_MODE) {
- int cpu = READ_ONCE(vcpu->cpu);
-
- /*
- * Note, the vCPU could get migrated to a different pCPU at any
- * point, which could result in signalling the wrong/previous
- * pCPU. But if that happens the vCPU is guaranteed to do a
- * VMRUN (after being migrated) and thus will process pending
- * interrupts, i.e. a doorbell is not needed (and the spurious
- * one is harmless).
- */
- if (cpu != get_cpu())
- wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu));
- put_cpu();
- } else {
- /*
- * Wake the vCPU if it was blocking. KVM will then detect the
- * pending IRQ when checking if the vCPU has a wake event.
- */
- kvm_vcpu_wake_up(vcpu);
- }
-
- return 0;
-}
-
bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
{
return false;
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 1218b5a342fc..39d280e7e80e 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1457,18 +1457,6 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
!__nested_vmcb_check_save(vcpu, &save_cached))
goto out_free;
- /*
- * While the nested guest CR3 is already checked and set by
- * KVM_SET_SREGS, it was set when nested state was yet loaded,
- * thus MMU might not be initialized correctly.
- * Set it again to fix this.
- */
-
- ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
- nested_npt_enabled(svm), false);
- if (WARN_ON_ONCE(ret))
- goto out_free;
-
/*
* All checks done, we can enter guest mode. Userspace provides
@@ -1494,6 +1482,20 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
svm_switch_vmcb(svm, &svm->nested.vmcb02);
nested_vmcb02_prepare_control(svm);
+
+ /*
+ * While the nested guest CR3 is already checked and set by
+ * KVM_SET_SREGS, it was set when nested state was yet loaded,
+ * thus MMU might not be initialized correctly.
+ * Set it again to fix this.
+ */
+
+ ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
+ nested_npt_enabled(svm), false);
+ if (WARN_ON_ONCE(ret))
+ goto out_free;
+
+
kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
ret = 0;
out_free:
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index a290efb272ad..fd3a00c892c7 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1585,6 +1585,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_svm *svm = to_svm(vcpu);
u64 hcr0 = cr0;
+ bool old_paging = is_paging(vcpu);
#ifdef CONFIG_X86_64
if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) {
@@ -1601,8 +1602,11 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
#endif
vcpu->arch.cr0 = cr0;
- if (!npt_enabled)
+ if (!npt_enabled) {
hcr0 |= X86_CR0_PG | X86_CR0_WP;
+ if (old_paging != is_paging(vcpu))
+ svm_set_cr4(vcpu, kvm_read_cr4(vcpu));
+ }
/*
* re-enable caching here because the QEMU bios
@@ -1646,8 +1650,12 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
svm_flush_tlb(vcpu);
vcpu->arch.cr4 = cr4;
- if (!npt_enabled)
+ if (!npt_enabled) {
cr4 |= X86_CR4_PAE;
+
+ if (!is_paging(vcpu))
+ cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
+ }
cr4 |= host_cr4_mce;
to_svm(vcpu)->vmcb->save.cr4 = cr4;
vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
@@ -2685,8 +2693,23 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
u64 data = msr->data;
switch (ecx) {
case MSR_AMD64_TSC_RATIO:
- if (!msr->host_initiated && !svm->tsc_scaling_enabled)
- return 1;
+
+ if (!svm->tsc_scaling_enabled) {
+
+ if (!msr->host_initiated)
+ return 1;
+ /*
+ * In case TSC scaling is not enabled, always
+ * leave this MSR at the default value.
+ *
+ * Due to bug in qemu 6.2.0, it would try to set
+ * this msr to 0 if tsc scaling is not enabled.
+ * Ignore this value as well.
+ */
+ if (data != 0 && data != svm->tsc_ratio_msr)
+ return 1;
+ break;
+ }
if (data & TSC_RATIO_RSVD)
return 1;
@@ -3291,21 +3314,55 @@ static void svm_set_irq(struct kvm_vcpu *vcpu)
SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
}
-static void svm_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
- int trig_mode, int vector)
+void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
+ int trig_mode, int vector)
{
- struct kvm_vcpu *vcpu = apic->vcpu;
+ /*
+ * vcpu->arch.apicv_active must be read after vcpu->mode.
+ * Pairs with smp_store_release in vcpu_enter_guest.
+ */
+ bool in_guest_mode = (smp_load_acquire(&vcpu->mode) == IN_GUEST_MODE);
- if (svm_deliver_avic_intr(vcpu, vector)) {
- kvm_lapic_set_irr(vector, apic);
+ if (!READ_ONCE(vcpu->arch.apicv_active)) {
+ /* Process the interrupt via inject_pending_event */
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
+ return;
+ }
+
+ trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode, trig_mode, vector);
+ if (in_guest_mode) {
+ /*
+ * Signal the doorbell to tell hardware to inject the IRQ. If
+ * the vCPU exits the guest before the doorbell chimes, hardware
+ * will automatically process AVIC interrupts at the next VMRUN.
+ */
+ avic_ring_doorbell(vcpu);
} else {
- trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode,
- trig_mode, vector);
+ /*
+ * Wake the vCPU if it was blocking. KVM will then detect the
+ * pending IRQ when checking if the vCPU has a wake event.
+ */
+ kvm_vcpu_wake_up(vcpu);
}
}
+static void svm_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
+ int trig_mode, int vector)
+{
+ kvm_lapic_set_irr(vector, apic);
+
+ /*
+ * Pairs with the smp_mb_*() after setting vcpu->guest_mode in
+ * vcpu_enter_guest() to ensure the write to the vIRR is ordered before
+ * the read of guest_mode. This guarantees that either VMRUN will see
+ * and process the new vIRR entry, or that svm_complete_interrupt_delivery
+ * will signal the doorbell if the CPU has already entered the guest.
+ */
+ smp_mb__after_atomic();
+ svm_complete_interrupt_delivery(apic->vcpu, delivery_mode, trig_mode, vector);
+}
+
static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3353,11 +3410,13 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
if (svm->nested.nested_run_pending)
return -EBUSY;
+ if (svm_nmi_blocked(vcpu))
+ return 0;
+
/* An NMI must not be injected into L2 if it's supposed to VM-Exit. */
if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
return -EBUSY;
-
- return !svm_nmi_blocked(vcpu);
+ return 1;
}
static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
@@ -3409,9 +3468,13 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
{
struct vcpu_svm *svm = to_svm(vcpu);
+
if (svm->nested.nested_run_pending)
return -EBUSY;
+ if (svm_interrupt_blocked(vcpu))
+ return 0;
+
/*
* An IRQ must not be injected into L2 if it's supposed to VM-Exit,
* e.g. if the IRQ arrived asynchronously after checking nested events.
@@ -3419,7 +3482,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(svm))
return -EBUSY;
- return !svm_interrupt_blocked(vcpu);
+ return 1;
}
static void svm_enable_irq_window(struct kvm_vcpu *vcpu)
@@ -4150,11 +4213,14 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
if (svm->nested.nested_run_pending)
return -EBUSY;
+ if (svm_smi_blocked(vcpu))
+ return 0;
+
/* An SMI must not be injected into L2 if it's supposed to VM-Exit. */
if (for_injection && is_guest_mode(vcpu) && nested_exit_on_smi(svm))
return -EBUSY;
- return !svm_smi_blocked(vcpu);
+ return 1;
}
static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
@@ -4248,11 +4314,18 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
* Enter the nested guest now
*/
+ vmcb_mark_all_dirty(svm->vmcb01.ptr);
+
vmcb12 = map.hva;
nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false);
+ if (ret)
+ goto unmap_save;
+
+ svm->nested.nested_run_pending = 1;
+
unmap_save:
kvm_vcpu_unmap(vcpu, &map_save, true);
unmap_map:
@@ -4637,6 +4710,7 @@ static __init void svm_set_cpu_caps(void)
/* CPUID 0x80000001 and 0x8000000A (SVM features) */
if (nested) {
kvm_cpu_cap_set(X86_FEATURE_SVM);
+ kvm_cpu_cap_set(X86_FEATURE_VMCBCLEAN);
if (nrips)
kvm_cpu_cap_set(X86_FEATURE_NRIPS);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 73525353e424..fa98d6844728 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -489,6 +489,8 @@ void svm_set_gif(struct vcpu_svm *svm, bool value);
int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code);
void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
int read, int write);
+void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
+ int trig_mode, int vec);
/* nested.c */
@@ -556,17 +558,6 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
/* avic.c */
-#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
-#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
-#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
-
-#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
-#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
-#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
-#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
-
-#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
-
int avic_ga_log_notifier(u32 ga_tag);
void avic_vm_destroy(struct kvm *kvm);
int avic_vm_init(struct kvm *kvm);
@@ -583,12 +574,12 @@ bool svm_check_apicv_inhibit_reasons(ulong bit);
void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr);
void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr);
-int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec);
bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu);
int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set);
void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
+void avic_ring_doorbell(struct kvm_vcpu *vcpu);
/* sev.c */
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index ba34e94049c7..dc822a1d403d 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -246,8 +246,7 @@ static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
src = &prev->host_state;
dest = &vmx->loaded_vmcs->host_state;
- vmx_set_vmcs_host_state(dest, src->cr3, src->fs_sel, src->gs_sel,
- src->fs_base, src->gs_base);
+ vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base);
dest->ldt_sel = src->ldt_sel;
#ifdef CONFIG_X86_64
dest->ds_sel = src->ds_sel;
@@ -3056,7 +3055,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long cr4;
+ unsigned long cr3, cr4;
bool vm_fail;
if (!nested_early_check)
@@ -3079,6 +3078,12 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
*/
vmcs_writel(GUEST_RFLAGS, 0);
+ cr3 = __get_current_cr3_fast();
+ if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
+ vmcs_writel(HOST_CR3, cr3);
+ vmx->loaded_vmcs->host_state.cr3 = cr3;
+ }
+
cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
vmcs_writel(HOST_CR4, cr4);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6c27bd0c89e1..b730d799c26e 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1080,14 +1080,9 @@ static void pt_guest_exit(struct vcpu_vmx *vmx)
wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
}
-void vmx_set_vmcs_host_state(struct vmcs_host_state *host, unsigned long cr3,
- u16 fs_sel, u16 gs_sel,
- unsigned long fs_base, unsigned long gs_base)
+void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
+ unsigned long fs_base, unsigned long gs_base)
{
- if (unlikely(cr3 != host->cr3)) {
- vmcs_writel(HOST_CR3, cr3);
- host->cr3 = cr3;
- }
if (unlikely(fs_sel != host->fs_sel)) {
if (!(fs_sel & 7))
vmcs_write16(HOST_FS_SELECTOR, fs_sel);
@@ -1182,9 +1177,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
gs_base = segment_base(gs_sel);
#endif
- vmx_set_vmcs_host_state(host_state, __get_current_cr3_fast(),
- fs_sel, gs_sel, fs_base, gs_base);
-
+ vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
vmx->guest_state_loaded = true;
}
@@ -6791,7 +6784,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long cr4;
+ unsigned long cr3, cr4;
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!enable_vnmi &&
@@ -6834,6 +6827,19 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
vcpu->arch.regs_dirty = 0;
+ /*
+ * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately
+ * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time
+ * it switches back to the current->mm, which can occur in KVM context
+ * when switching to a temporary mm to patch kernel code, e.g. if KVM
+ * toggles a static key while handling a VM-Exit.
+ */
+ cr3 = __get_current_cr3_fast();
+ if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
+ vmcs_writel(HOST_CR3, cr3);
+ vmx->loaded_vmcs->host_state.cr3 = cr3;
+ }
+
cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
vmcs_writel(HOST_CR4, cr4);
@@ -7659,6 +7665,7 @@ static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
if (ret)
return ret;
+ vmx->nested.nested_run_pending = 1;
vmx->nested.smm.guest_mode = false;
}
return 0;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 7f2c82e7f38f..9c6bfcd84008 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -374,9 +374,8 @@ int allocate_vpid(void);
void free_vpid(int vpid);
void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
-void vmx_set_vmcs_host_state(struct vmcs_host_state *host, unsigned long cr3,
- u16 fs_sel, u16 gs_sel,
- unsigned long fs_base, unsigned long gs_base);
+void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
+ unsigned long fs_base, unsigned long gs_base);
int vmx_get_cpl(struct kvm_vcpu *vcpu);
bool vmx_emulation_required(struct kvm_vcpu *vcpu);
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7131d735b1ef..eb4029660bd9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -984,6 +984,18 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state);
+static inline u64 kvm_guest_supported_xcr0(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.guest_fpu.fpstate->user_xfeatures;
+}
+
+#ifdef CONFIG_X86_64
+static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu)
+{
+ return kvm_guest_supported_xcr0(vcpu) & XFEATURE_MASK_USER_DYNAMIC;
+}
+#endif
+
static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
u64 xcr0 = xcr;
@@ -1003,7 +1015,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
* saving. However, xcr0 bit 0 is always set, even if the
* emulated CPU does not support XSAVE (see kvm_vcpu_reset()).
*/
- valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
+ valid_bits = kvm_guest_supported_xcr0(vcpu) | XFEATURE_MASK_FP;
if (xcr0 & ~valid_bits)
return 1;
@@ -2351,10 +2363,12 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
return tsc;
}
+#ifdef CONFIG_X86_64
static inline int gtod_is_based_on_tsc(int mode)
{
return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK;
}
+#endif
static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
{
@@ -3706,8 +3720,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
!guest_cpuid_has(vcpu, X86_FEATURE_XFD))
return 1;
- if (data & ~(XFEATURE_MASK_USER_DYNAMIC &
- vcpu->arch.guest_supported_xcr0))
+ if (data & ~kvm_guest_supported_xfd(vcpu))
return 1;
fpu_update_guest_xfd(&vcpu->arch.guest_fpu, data);
@@ -3717,8 +3730,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
!guest_cpuid_has(vcpu, X86_FEATURE_XFD))
return 1;
- if (data & ~(XFEATURE_MASK_USER_DYNAMIC &
- vcpu->arch.guest_supported_xcr0))
+ if (data & ~kvm_guest_supported_xfd(vcpu))
return 1;
vcpu->arch.guest_fpu.xfd_err = data;
@@ -4233,6 +4245,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
case KVM_CAP_VCPU_ATTRIBUTES:
case KVM_CAP_SYS_ATTRIBUTES:
+ case KVM_CAP_ENABLE_CAP:
r = 1;
break;
case KVM_CAP_EXIT_HYPERCALL:
@@ -8942,6 +8955,13 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK)
return -KVM_EOPNOTSUPP;
+ /*
+ * When tsc is in permanent catchup mode guests won't be able to use
+ * pvclock_read_retry loop to get consistent view of pvclock
+ */
+ if (vcpu->arch.tsc_always_catchup)
+ return -KVM_EOPNOTSUPP;
+
if (!kvm_get_walltime_and_clockread(&ts, &cycle))
return -KVM_EOPNOTSUPP;
@@ -9160,6 +9180,7 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
likely(!pic_in_kernel(vcpu->kvm));
}
+/* Called within kvm->srcu read side. */
static void post_kvm_run_save(struct kvm_vcpu *vcpu)
{
struct kvm_run *kvm_run = vcpu->run;
@@ -9168,16 +9189,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
kvm_run->cr8 = kvm_get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu);
- /*
- * The call to kvm_ready_for_interrupt_injection() may end up in
- * kvm_xen_has_interrupt() which may require the srcu lock to be
- * held, to protect against changes in the vcpu_info address.
- */
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
kvm_run->ready_for_interrupt_injection =
pic_in_kernel(vcpu->kvm) ||
kvm_vcpu_ready_for_interrupt_injection(vcpu);
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (is_smm(vcpu))
kvm_run->flags |= KVM_RUN_X86_SMM;
@@ -9795,6 +9809,7 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
/*
+ * Called within kvm->srcu read side.
* Returns 1 to let vcpu_run() continue the guest execution loop without
* exiting to the userspace. Otherwise, the value will be returned to the
* userspace.
@@ -9983,7 +9998,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
* result in virtual interrupt delivery.
*/
local_irq_disable();
- vcpu->mode = IN_GUEST_MODE;
+
+ /* Store vcpu->apicv_active before vcpu->mode. */
+ smp_store_release(&vcpu->mode, IN_GUEST_MODE);
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
@@ -10171,6 +10188,7 @@ out:
return r;
}
+/* Called within kvm->srcu read side. */
static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
{
bool hv_timer;
@@ -10230,12 +10248,12 @@ static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
!vcpu->arch.apf.halted);
}
+/* Called within kvm->srcu read side. */
static int vcpu_run(struct kvm_vcpu *vcpu)
{
int r;
struct kvm *kvm = vcpu->kvm;
- vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
vcpu->arch.l1tf_flush_l1d = true;
for (;;) {
@@ -10263,14 +10281,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
if (__xfer_to_guest_mode_work_pending()) {
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
r = xfer_to_guest_mode_handle_work(vcpu);
+ vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
if (r)
return r;
- vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
}
}
- srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
-
return r;
}
@@ -10376,6 +10392,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
struct kvm_run *kvm_run = vcpu->run;
+ struct kvm *kvm = vcpu->kvm;
int r;
vcpu_load(vcpu);
@@ -10383,6 +10400,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_run->flags = 0;
kvm_load_guest_fpu(vcpu);
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
if (kvm_run->immediate_exit) {
r = -EINTR;
@@ -10393,7 +10411,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* use before KVM has ever run the vCPU.
*/
WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu));
+
+ srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
kvm_vcpu_block(vcpu);
+ vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+
if (kvm_apic_accept_events(vcpu) < 0) {
r = 0;
goto out;
@@ -10453,8 +10475,9 @@ out:
if (kvm_run->kvm_valid_regs)
store_regs(vcpu);
post_kvm_run_save(vcpu);
- kvm_sigset_deactivate(vcpu);
+ srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+ kvm_sigset_deactivate(vcpu);
vcpu_put(vcpu);
return r;
}
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index bad57535fad0..74be1fda58e3 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -133,32 +133,57 @@ static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
{
struct kvm_vcpu_xen *vx = &v->arch.xen;
+ struct gfn_to_hva_cache *ghc = &vx->runstate_cache;
+ struct kvm_memslots *slots = kvm_memslots(v->kvm);
+ bool atomic = (state == RUNSTATE_runnable);
uint64_t state_entry_time;
- unsigned int offset;
+ int __user *user_state;
+ uint64_t __user *user_times;
kvm_xen_update_runstate(v, state);
if (!vx->runstate_set)
return;
- BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+ if (unlikely(slots->generation != ghc->generation || kvm_is_error_hva(ghc->hva)) &&
+ kvm_gfn_to_hva_cache_init(v->kvm, ghc, ghc->gpa, ghc->len))
+ return;
+
+ /* We made sure it fits in a single page */
+ BUG_ON(!ghc->memslot);
+
+ if (atomic)
+ pagefault_disable();
- offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time);
-#ifdef CONFIG_X86_64
/*
- * The only difference is alignment of uint64_t in 32-bit.
- * So the first field 'state' is accessed directly using
- * offsetof() (where its offset happens to be zero), while the
- * remaining fields which are all uint64_t, start at 'offset'
- * which we tweak here by adding 4.
+ * The only difference between 32-bit and 64-bit versions of the
+ * runstate struct us the alignment of uint64_t in 32-bit, which
+ * means that the 64-bit version has an additional 4 bytes of
+ * padding after the first field 'state'.
+ *
+ * So we use 'int __user *user_state' to point to the state field,
+ * and 'uint64_t __user *user_times' for runstate_entry_time. So
+ * the actual array of time[] in each state starts at user_times[1].
*/
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0);
+ BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0);
+ user_state = (int __user *)ghc->hva;
+
+ BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+
+ user_times = (uint64_t __user *)(ghc->hva +
+ offsetof(struct compat_vcpu_runstate_info,
+ state_entry_time));
+#ifdef CONFIG_X86_64
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
offsetof(struct compat_vcpu_runstate_info, time) + 4);
if (v->kvm->arch.xen.long_mode)
- offset = offsetof(struct vcpu_runstate_info, state_entry_time);
+ user_times = (uint64_t __user *)(ghc->hva +
+ offsetof(struct vcpu_runstate_info,
+ state_entry_time));
#endif
/*
* First write the updated state_entry_time at the appropriate
@@ -172,10 +197,8 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
sizeof(state_entry_time));
- if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
- &state_entry_time, offset,
- sizeof(state_entry_time)))
- return;
+ if (__put_user(state_entry_time, user_times))
+ goto out;
smp_wmb();
/*
@@ -189,11 +212,8 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) !=
sizeof(vx->current_runstate));
- if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
- &vx->current_runstate,
- offsetof(struct vcpu_runstate_info, state),
- sizeof(vx->current_runstate)))
- return;
+ if (__put_user(vx->current_runstate, user_state))
+ goto out;
/*
* Write the actual runstate times immediately after the
@@ -208,24 +228,23 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
sizeof(vx->runstate_times));
- if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
- &vx->runstate_times[0],
- offset + sizeof(u64),
- sizeof(vx->runstate_times)))
- return;
-
+ if (__copy_to_user(user_times + 1, vx->runstate_times, sizeof(vx->runstate_times)))
+ goto out;
smp_wmb();
/*
* Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
* runstate_entry_time field.
*/
-
state_entry_time &= ~XEN_RUNSTATE_UPDATE;
- if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
- &state_entry_time, offset,
- sizeof(state_entry_time)))
- return;
+ __put_user(state_entry_time, user_times);
+ smp_wmb();
+
+ out:
+ mark_page_dirty_in_slot(v->kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
+
+ if (atomic)
+ pagefault_enable();
}
int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
@@ -443,6 +462,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
break;
}
+ /* It must fit within a single page */
+ if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct vcpu_info) > PAGE_SIZE) {
+ r = -EINVAL;
+ break;
+ }
+
r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
&vcpu->arch.xen.vcpu_info_cache,
data->u.gpa,
@@ -460,6 +485,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
break;
}
+ /* It must fit within a single page */
+ if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct pvclock_vcpu_time_info) > PAGE_SIZE) {
+ r = -EINVAL;
+ break;
+ }
+
r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
&vcpu->arch.xen.vcpu_time_info_cache,
data->u.gpa,
@@ -481,6 +512,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
break;
}
+ /* It must fit within a single page */
+ if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct vcpu_runstate_info) > PAGE_SIZE) {
+ r = -EINVAL;
+ break;
+ }
+
r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
&vcpu->arch.xen.runstate_cache,
data->u.gpa,
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 59cf2343f3d9..d0d7b9bc6cad 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -27,8 +27,7 @@
* Output:
* rax original destination
*/
-SYM_FUNC_START_ALIAS(__memcpy)
-SYM_FUNC_START_WEAK(memcpy)
+SYM_FUNC_START(__memcpy)
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memcpy_erms", X86_FEATURE_ERMS
@@ -40,11 +39,12 @@ SYM_FUNC_START_WEAK(memcpy)
movl %edx, %ecx
rep movsb
RET
-SYM_FUNC_END(memcpy)
-SYM_FUNC_END_ALIAS(__memcpy)
-EXPORT_SYMBOL(memcpy)
+SYM_FUNC_END(__memcpy)
EXPORT_SYMBOL(__memcpy)
+SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)
+EXPORT_SYMBOL(memcpy)
+
/*
* memcpy_erms() - enhanced fast string memcpy. This is faster and
* simpler than memcpy. Use memcpy_erms when possible.
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 50ea390df712..d83cba364e31 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -24,7 +24,6 @@
* Output:
* rax: dest
*/
-SYM_FUNC_START_WEAK(memmove)
SYM_FUNC_START(__memmove)
mov %rdi, %rax
@@ -207,6 +206,7 @@ SYM_FUNC_START(__memmove)
13:
RET
SYM_FUNC_END(__memmove)
-SYM_FUNC_END_ALIAS(memmove)
EXPORT_SYMBOL(__memmove)
+
+SYM_FUNC_ALIAS_WEAK(memmove, __memmove)
EXPORT_SYMBOL(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index d624f2bc42f1..fc9ffd3ff3b2 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -17,7 +17,6 @@
*
* rax original destination
*/
-SYM_FUNC_START_WEAK(memset)
SYM_FUNC_START(__memset)
/*
* Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
@@ -42,10 +41,11 @@ SYM_FUNC_START(__memset)
movq %r9,%rax
RET
SYM_FUNC_END(__memset)
-SYM_FUNC_END_ALIAS(memset)
-EXPORT_SYMBOL(memset)
EXPORT_SYMBOL(__memset)
+SYM_FUNC_ALIAS_WEAK(memset, __memset)
+EXPORT_SYMBOL(memset)
+
/*
* ISO C memset - set a memory block to a byte value. This function uses
* enhanced rep stosb to override the fast string function.
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 89b3fb244e15..afbdda539b80 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -34,7 +34,7 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
__stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_AMD
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE
.endm
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index ec31f5b60323..d12d1358f96d 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -690,7 +690,10 @@ AVXcode: 2
45: vpsrlvd/q Vx,Hx,Wx (66),(v)
46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo)
47: vpsllvd/q Vx,Hx,Wx (66),(v)
-# Skip 0x48-0x4b
+# Skip 0x48
+49: TILERELEASE (v1),(000),(11B) | LDTILECFG Mtc (v1)(000) | STTILECFG Mtc (66),(v1),(000) | TILEZERO Vt (F2),(v1),(11B)
+# Skip 0x4a
+4b: TILELOADD Vt,Wsm (F2),(v1) | TILELOADDT1 Vt,Wsm (66),(v1) | TILESTORED Wsm,Vt (F3),(v)
4c: vrcp14ps/d Vpd,Wpd (66),(ev)
4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
4e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
@@ -705,7 +708,10 @@ AVXcode: 2
59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo)
5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo)
5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev)
-# Skip 0x5c-0x61
+5c: TDPBF16PS Vt,Wt,Ht (F3),(v1)
+# Skip 0x5d
+5e: TDPBSSD Vt,Wt,Ht (F2),(v1) | TDPBSUD Vt,Wt,Ht (F3),(v1) | TDPBUSD Vt,Wt,Ht (66),(v1) | TDPBUUD Vt,Wt,Ht (v1)
+# Skip 0x5f-0x61
62: vpexpandb/w Vx,Wx (66),(ev)
63: vpcompressb/w Wx,Vx (66),(ev)
64: vpblendmd/q Vx,Hx,Wx (66),(ev)
@@ -822,9 +828,9 @@ AVXcode: 3
05: vpermilpd Vx,Wx,Ib (66),(v)
06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
07:
-08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo)
+08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) | vrndscaleph Vx,Wx,Ib (evo)
09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo)
-0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo)
+0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) | vrndscalesh Vx,Hx,Wx,Ib (evo)
0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo)
0c: vblendps Vx,Hx,Wx,Ib (66)
0d: vblendpd Vx,Hx,Wx,Ib (66)
@@ -846,8 +852,8 @@ AVXcode: 3
22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev)
-26: vgetmantps/d Vx,Wx,Ib (66),(ev)
-27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev)
+26: vgetmantps/d Vx,Wx,Ib (66),(ev) | vgetmantph Vx,Wx,Ib (ev)
+27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) | vgetmantsh Vx,Hx,Wx,Ib (ev)
30: kshiftrb/w Vk,Uk,Ib (66),(v)
31: kshiftrd/q Vk,Uk,Ib (66),(v)
32: kshiftlb/w Vk,Uk,Ib (66),(v)
@@ -871,23 +877,102 @@ AVXcode: 3
51: vrangess/d Vx,Hx,Wx,Ib (66),(ev)
54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev)
55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev)
-56: vreduceps/d Vx,Wx,Ib (66),(ev)
-57: vreducess/d Vx,Hx,Wx,Ib (66),(ev)
+56: vreduceps/d Vx,Wx,Ib (66),(ev) | vreduceph Vx,Wx,Ib (ev)
+57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) | vreducesh Vx,Hx,Wx,Ib (ev)
60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
-66: vfpclassps/d Vk,Wx,Ib (66),(ev)
-67: vfpclassss/d Vk,Wx,Ib (66),(ev)
+66: vfpclassps/d Vk,Wx,Ib (66),(ev) | vfpclassph Vx,Wx,Ib (ev)
+67: vfpclassss/d Vk,Wx,Ib (66),(ev) | vfpclasssh Vx,Wx,Ib (ev)
70: vpshldw Vx,Hx,Wx,Ib (66),(ev)
71: vpshldd/q Vx,Hx,Wx,Ib (66),(ev)
72: vpshrdw Vx,Hx,Wx,Ib (66),(ev)
73: vpshrdd/q Vx,Hx,Wx,Ib (66),(ev)
+c2: vcmpph Vx,Hx,Wx,Ib (ev) | vcmpsh Vx,Hx,Wx,Ib (F3),(ev)
cc: sha1rnds4 Vdq,Wdq,Ib
ce: vgf2p8affineqb Vx,Wx,Ib (66)
cf: vgf2p8affineinvqb Vx,Wx,Ib (66)
df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
-f0: RORX Gy,Ey,Ib (F2),(v)
+f0: RORX Gy,Ey,Ib (F2),(v) | HRESET Gv,Ib (F3),(000),(11B)
+EndTable
+
+Table: EVEX map 5
+Referrer:
+AVXcode: 5
+10: vmovsh Vx,Hx,Wx (F3),(ev) | vmovsh Vx,Wx (F3),(ev)
+11: vmovsh Wx,Hx,Vx (F3),(ev) | vmovsh Wx,Vx (F3),(ev)
+1d: vcvtps2phx Vx,Wx (66),(ev) | vcvtss2sh Vx,Hx,Wx (ev)
+2a: vcvtsi2sh Vx,Hx,Wx (F3),(ev)
+2c: vcvttsh2si Vx,Wx (F3),(ev)
+2d: vcvtsh2si Vx,Wx (F3),(ev)
+2e: vucomish Vx,Wx (ev)
+2f: vcomish Vx,Wx (ev)
+51: vsqrtph Vx,Wx (ev) | vsqrtsh Vx,Hx,Wx (F3),(ev)
+58: vaddph Vx,Hx,Wx (ev) | vaddsh Vx,Hx,Wx (F3),(ev)
+59: vmulph Vx,Hx,Wx (ev) | vmulsh Vx,Hx,Wx (F3),(ev)
+5a: vcvtpd2ph Vx,Wx (66),(ev) | vcvtph2pd Vx,Wx (ev) | vcvtsd2sh Vx,Hx,Wx (F2),(ev) | vcvtsh2sd Vx,Hx,Wx (F3),(ev)
+5b: vcvtdq2ph Vx,Wx (ev) | vcvtph2dq Vx,Wx (66),(ev) | vcvtqq2ph Vx,Wx (ev) | vcvttph2dq Vx,Wx (F3),(ev)
+5c: vsubph Vx,Hx,Wx (ev) | vsubsh Vx,Hx,Wx (F3),(ev)
+5d: vminph Vx,Hx,Wx (ev) | vminsh Vx,Hx,Wx (F3),(ev)
+5e: vdivph Vx,Hx,Wx (ev) | vdivsh Vx,Hx,Wx (F3),(ev)
+5f: vmaxph Vx,Hx,Wx (ev) | vmaxsh Vx,Hx,Wx (F3),(ev)
+6e: vmovw Vx,Wx (66),(ev)
+78: vcvttph2udq Vx,Wx (ev) | vcvttph2uqq Vx,Wx (66),(ev) | vcvttsh2usi Vx,Wx (F3),(ev)
+79: vcvtph2udq Vx,Wx (ev) | vcvtph2uqq Vx,Wx (66),(ev) | vcvtsh2usi Vx,Wx (F3),(ev)
+7a: vcvttph2qq Vx,Wx (66),(ev) | vcvtudq2ph Vx,Wx (F2),(ev) | vcvtuqq2ph Vx,Wx (F2),(ev)
+7b: vcvtph2qq Vx,Wx (66),(ev) | vcvtusi2sh Vx,Hx,Wx (F3),(ev)
+7c: vcvttph2uw Vx,Wx (ev) | vcvttph2w Vx,Wx (66),(ev)
+7d: vcvtph2uw Vx,Wx (ev) | vcvtph2w Vx,Wx (66),(ev) | vcvtuw2ph Vx,Wx (F2),(ev) | vcvtw2ph Vx,Wx (F3),(ev)
+7e: vmovw Wx,Vx (66),(ev)
+EndTable
+
+Table: EVEX map 6
+Referrer:
+AVXcode: 6
+13: vcvtph2psx Vx,Wx (66),(ev) | vcvtsh2ss Vx,Hx,Wx (ev)
+2c: vscalefph Vx,Hx,Wx (66),(ev)
+2d: vscalefsh Vx,Hx,Wx (66),(ev)
+42: vgetexpph Vx,Wx (66),(ev)
+43: vgetexpsh Vx,Hx,Wx (66),(ev)
+4c: vrcpph Vx,Wx (66),(ev)
+4d: vrcpsh Vx,Hx,Wx (66),(ev)
+4e: vrsqrtph Vx,Wx (66),(ev)
+4f: vrsqrtsh Vx,Hx,Wx (66),(ev)
+56: vfcmaddcph Vx,Hx,Wx (F2),(ev) | vfmaddcph Vx,Hx,Wx (F3),(ev)
+57: vfcmaddcsh Vx,Hx,Wx (F2),(ev) | vfmaddcsh Vx,Hx,Wx (F3),(ev)
+96: vfmaddsub132ph Vx,Hx,Wx (66),(ev)
+97: vfmsubadd132ph Vx,Hx,Wx (66),(ev)
+98: vfmadd132ph Vx,Hx,Wx (66),(ev)
+99: vfmadd132sh Vx,Hx,Wx (66),(ev)
+9a: vfmsub132ph Vx,Hx,Wx (66),(ev)
+9b: vfmsub132sh Vx,Hx,Wx (66),(ev)
+9c: vfnmadd132ph Vx,Hx,Wx (66),(ev)
+9d: vfnmadd132sh Vx,Hx,Wx (66),(ev)
+9e: vfnmsub132ph Vx,Hx,Wx (66),(ev)
+9f: vfnmsub132sh Vx,Hx,Wx (66),(ev)
+a6: vfmaddsub213ph Vx,Hx,Wx (66),(ev)
+a7: vfmsubadd213ph Vx,Hx,Wx (66),(ev)
+a8: vfmadd213ph Vx,Hx,Wx (66),(ev)
+a9: vfmadd213sh Vx,Hx,Wx (66),(ev)
+aa: vfmsub213ph Vx,Hx,Wx (66),(ev)
+ab: vfmsub213sh Vx,Hx,Wx (66),(ev)
+ac: vfnmadd213ph Vx,Hx,Wx (66),(ev)
+ad: vfnmadd213sh Vx,Hx,Wx (66),(ev)
+ae: vfnmsub213ph Vx,Hx,Wx (66),(ev)
+af: vfnmsub213sh Vx,Hx,Wx (66),(ev)
+b6: vfmaddsub231ph Vx,Hx,Wx (66),(ev)
+b7: vfmsubadd231ph Vx,Hx,Wx (66),(ev)
+b8: vfmadd231ph Vx,Hx,Wx (66),(ev)
+b9: vfmadd231sh Vx,Hx,Wx (66),(ev)
+ba: vfmsub231ph Vx,Hx,Wx (66),(ev)
+bb: vfmsub231sh Vx,Hx,Wx (66),(ev)
+bc: vfnmadd231ph Vx,Hx,Wx (66),(ev)
+bd: vfnmadd231sh Vx,Hx,Wx (66),(ev)
+be: vfnmsub231ph Vx,Hx,Wx (66),(ev)
+bf: vfnmsub231sh Vx,Hx,Wx (66),(ev)
+d6: vfcmulcph Vx,Hx,Wx (F2),(ev) | vfmulcph Vx,Hx,Wx (F3),(ev)
+d7: vfcmulcsh Vx,Hx,Wx (F2),(ev) | vfmulcsh Vx,Hx,Wx (F3),(ev)
EndTable
GrpTable: Grp1
@@ -970,7 +1055,7 @@ GrpTable: Grp7
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
3: LIDT Ms
4: SMSW Mw/Rv
-5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B)
+5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) | CLUI (F3),(110),(11B) | SERIALIZE (000),(11B) | STUI (F3),(111),(11B) | TESTUI (F3)(101)(11B) | UIRET (F3),(100),(11B) | XRESLDTRK (F2),(000),(11B) | XSUSLDTRK (F2),(001),(11B)
6: LMSW Ew
7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
EndTable
@@ -987,7 +1072,7 @@ GrpTable: Grp9
3: xrstors
4: xsavec
5: xsaves
-6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
+6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) | SENDUIPI Gq (F3)
7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
EndTable
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 026031b3b782..17a492c27306 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -615,6 +615,7 @@ static bool memremap_is_efi_data(resource_size_t phys_addr,
static bool memremap_is_setup_data(resource_size_t phys_addr,
unsigned long size)
{
+ struct setup_indirect *indirect;
struct setup_data *data;
u64 paddr, paddr_next;
@@ -627,6 +628,10 @@ static bool memremap_is_setup_data(resource_size_t phys_addr,
data = memremap(paddr, sizeof(*data),
MEMREMAP_WB | MEMREMAP_DEC);
+ if (!data) {
+ pr_warn("failed to memremap setup_data entry\n");
+ return false;
+ }
paddr_next = data->next;
len = data->len;
@@ -636,10 +641,21 @@ static bool memremap_is_setup_data(resource_size_t phys_addr,
return true;
}
- if (data->type == SETUP_INDIRECT &&
- ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
- paddr = ((struct setup_indirect *)data->data)->addr;
- len = ((struct setup_indirect *)data->data)->len;
+ if (data->type == SETUP_INDIRECT) {
+ memunmap(data);
+ data = memremap(paddr, sizeof(*data) + len,
+ MEMREMAP_WB | MEMREMAP_DEC);
+ if (!data) {
+ pr_warn("failed to memremap indirect setup_data\n");
+ return false;
+ }
+
+ indirect = (struct setup_indirect *)data->data;
+
+ if (indirect->type != SETUP_INDIRECT) {
+ paddr = indirect->addr;
+ len = indirect->len;
+ }
}
memunmap(data);
@@ -660,22 +676,51 @@ static bool memremap_is_setup_data(resource_size_t phys_addr,
static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
unsigned long size)
{
+ struct setup_indirect *indirect;
struct setup_data *data;
u64 paddr, paddr_next;
paddr = boot_params.hdr.setup_data;
while (paddr) {
- unsigned int len;
+ unsigned int len, size;
if (phys_addr == paddr)
return true;
data = early_memremap_decrypted(paddr, sizeof(*data));
+ if (!data) {
+ pr_warn("failed to early memremap setup_data entry\n");
+ return false;
+ }
+
+ size = sizeof(*data);
paddr_next = data->next;
len = data->len;
- early_memunmap(data, sizeof(*data));
+ if ((phys_addr > paddr) && (phys_addr < (paddr + len))) {
+ early_memunmap(data, sizeof(*data));
+ return true;
+ }
+
+ if (data->type == SETUP_INDIRECT) {
+ size += len;
+ early_memunmap(data, sizeof(*data));
+ data = early_memremap_decrypted(paddr, size);
+ if (!data) {
+ pr_warn("failed to early memremap indirect setup_data\n");
+ return false;
+ }
+
+ indirect = (struct setup_indirect *)data->data;
+
+ if (indirect->type != SETUP_INDIRECT) {
+ paddr = indirect->addr;
+ len = indirect->len;
+ }
+ }
+
+ early_memunmap(data, size);
if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
return true;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 2b1e266ff95c..0ecb140864b2 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -394,7 +394,7 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
u8 *prog = *pprog;
#ifdef CONFIG_RETPOLINE
- if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
+ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
EMIT_LFENCE();
EMIT2(0xFF, 0xE0 + reg);
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 40d6a06e41c8..ead7e5b3a975 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -8,6 +8,7 @@ endmenu
config UML_X86
def_bool y
+ select ARCH_BINFMT_ELF_EXTRA_PHDRS if X86_32
config 64BIT
bool "64-bit kernel" if "$(SUBARCH)" = "x86"
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 6448c5071117..517a9d8d8f94 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -185,8 +185,7 @@ static int xen_cpu_dead_hvm(unsigned int cpu)
if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock))
xen_teardown_timer(cpu);
-
- return 0;
+ return 0;
}
static bool no_vector_callback __initdata;
@@ -248,6 +247,11 @@ static __init bool xen_x2apic_available(void)
return x2apic_supported();
}
+static bool __init msi_ext_dest_id(void)
+{
+ return cpuid_eax(xen_cpuid_base() + 4) & XEN_HVM_CPUID_EXT_DEST_ID;
+}
+
static __init void xen_hvm_guest_late_init(void)
{
#ifdef CONFIG_XEN_PVH
@@ -310,6 +314,7 @@ struct hypervisor_x86 x86_hyper_xen_hvm __initdata = {
.init.x2apic_available = xen_x2apic_available,
.init.init_mem_mapping = xen_hvm_init_mem_mapping,
.init.guest_late_init = xen_hvm_guest_late_init,
+ .init.msi_ext_dest_id = msi_ext_dest_id,
.runtime.pin_vcpu = xen_pin_vcpu,
.ignore_nopv = true,
};
diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c
index 31b1e3477cb6..14ea32e734d5 100644
--- a/arch/x86/xen/vga.c
+++ b/arch/x86/xen/vga.c
@@ -57,6 +57,14 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size;
screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos;
+ if (size >= offsetof(struct dom0_vga_console_info,
+ u.vesa_lfb.ext_lfb_base)
+ + sizeof(info->u.vesa_lfb.ext_lfb_base)
+ && info->u.vesa_lfb.ext_lfb_base) {
+ screen_info->ext_lfb_base = info->u.vesa_lfb.ext_lfb_base;
+ screen_info->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
+ }
+
if (info->video_type == XEN_VGATYPE_EFI_LFB) {
screen_info->orig_video_isVGA = VIDEO_TYPE_EFI;
break;
@@ -66,14 +74,6 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
u.vesa_lfb.mode_attrs)
+ sizeof(info->u.vesa_lfb.mode_attrs))
screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs;
-
- if (size >= offsetof(struct dom0_vga_console_info,
- u.vesa_lfb.ext_lfb_base)
- + sizeof(info->u.vesa_lfb.ext_lfb_base)
- && info->u.vesa_lfb.ext_lfb_base) {
- screen_info->ext_lfb_base = info->u.vesa_lfb.ext_lfb_base;
- screen_info->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
- }
break;
}
}