summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/configs/i386_defconfig2
-rw-r--r--arch/x86/configs/x86_64_defconfig2
-rw-r--r--arch/x86/entry/entry_64.S16
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/include/asm/microcode.h1
-rw-r--r--arch/x86/include/asm/microcode_amd.h2
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/kernel/cpu/amd.c199
-rw-r--r--arch/x86/kernel/cpu/bugs.c15
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c4
-rw-r--r--arch/x86/kernel/traps.c18
-rw-r--r--arch/x86/kvm/lapic.c25
-rw-r--r--arch/x86/kvm/svm/svm.c16
-rw-r--r--arch/x86/kvm/vmx/vmenter.S8
-rw-r--r--arch/x86/kvm/vmx/vmx.c62
-rw-r--r--arch/x86/kvm/vmx/vmx_ops.h12
-rw-r--r--arch/x86/kvm/x86.c50
19 files changed, 293 insertions, 146 deletions
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 3cf34912abfe..75a343f10e58 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -245,7 +245,7 @@ CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_QFMT_V2=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 27759236fd60..0902518e9b93 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -242,7 +242,7 @@ CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_QFMT_V2=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 91f6818884fa..43606de22511 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -285,7 +285,15 @@ SYM_FUNC_END(__switch_to_asm)
*/
.pushsection .text, "ax"
SYM_CODE_START(ret_from_fork_asm)
- UNWIND_HINT_REGS
+ /*
+ * This is the start of the kernel stack; even through there's a
+ * register set at the top, the regset isn't necessarily coherent
+ * (consider kthreads) and one cannot unwind further.
+ *
+ * This ensures stack unwinds of kernel threads terminate in a known
+ * good state.
+ */
+ UNWIND_HINT_END_OF_STACK
ANNOTATE_NOENDBR // copy_thread
CALL_DEPTH_ACCOUNT
@@ -295,6 +303,12 @@ SYM_CODE_START(ret_from_fork_asm)
movq %r12, %rcx /* fn_arg */
call ret_from_fork
+ /*
+ * Set the stack state to what is expected for the target function
+ * -- at this point the register set should be a valid user set
+ * and unwind should work normally.
+ */
+ UNWIND_HINT_REGS
jmp swapgs_restore_regs_and_return_to_usermode
SYM_CODE_END(ret_from_fork_asm)
.popsection
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 13bc212cd4bc..e3054e3e46d5 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -37,6 +37,7 @@ KVM_X86_OP(get_segment)
KVM_X86_OP(get_cpl)
KVM_X86_OP(set_segment)
KVM_X86_OP(get_cs_db_l_bits)
+KVM_X86_OP(is_valid_cr0)
KVM_X86_OP(set_cr0)
KVM_X86_OP_OPTIONAL(post_set_cr3)
KVM_X86_OP(is_valid_cr4)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 28bd38303d70..3bc146dfd38d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1566,9 +1566,10 @@ struct kvm_x86_ops {
void (*set_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
+ bool (*is_valid_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
- bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
+ bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 320566a0443d..66dbba181bd9 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -5,6 +5,7 @@
#include <asm/cpu.h>
#include <linux/earlycpio.h>
#include <linux/initrd.h>
+#include <asm/microcode_amd.h>
struct ucode_patch {
struct list_head plist;
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
index e6662adf3af4..9675c621c1ca 100644
--- a/arch/x86/include/asm/microcode_amd.h
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -48,11 +48,13 @@ extern void __init load_ucode_amd_bsp(unsigned int family);
extern void load_ucode_amd_ap(unsigned int family);
extern int __init save_microcode_in_initrd_amd(unsigned int family);
void reload_ucode_amd(unsigned int cpu);
+extern void amd_check_microcode(void);
#else
static inline void __init load_ucode_amd_bsp(unsigned int family) {}
static inline void load_ucode_amd_ap(unsigned int family) {}
static inline int __init
save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; }
static inline void reload_ucode_amd(unsigned int cpu) {}
+static inline void amd_check_microcode(void) {}
#endif
#endif /* _ASM_X86_MICROCODE_AMD_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 3aedae61af4f..a00a53e15ab7 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -545,6 +545,7 @@
#define MSR_AMD64_DE_CFG 0xc0011029
#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1
#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT)
+#define MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT 9
#define MSR_AMD64_BU_CFG2 0xc001102a
#define MSR_AMD64_IBSFETCHCTL 0xc0011030
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 571abf808ea3..26ad7ca423e7 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -27,11 +27,6 @@
#include "cpu.h"
-static const int amd_erratum_383[];
-static const int amd_erratum_400[];
-static const int amd_erratum_1054[];
-static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
-
/*
* nodes_per_socket: Stores the number of nodes per socket.
* Refer to Fam15h Models 00-0fh BKDG - CPUID Fn8000_001E_ECX
@@ -39,6 +34,78 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
*/
static u32 nodes_per_socket = 1;
+/*
+ * AMD errata checking
+ *
+ * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or
+ * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that
+ * have an OSVW id assigned, which it takes as first argument. Both take a
+ * variable number of family-specific model-stepping ranges created by
+ * AMD_MODEL_RANGE().
+ *
+ * Example:
+ *
+ * const int amd_erratum_319[] =
+ * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2),
+ * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0),
+ * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0));
+ */
+
+#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 }
+#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 }
+#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \
+ ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end))
+#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff)
+#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff)
+#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff)
+
+static const int amd_erratum_400[] =
+ AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
+ AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
+
+static const int amd_erratum_383[] =
+ AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
+
+/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
+static const int amd_erratum_1054[] =
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
+
+static const int amd_zenbleed[] =
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x30, 0x0, 0x4f, 0xf),
+ AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf),
+ AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf));
+
+static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
+{
+ int osvw_id = *erratum++;
+ u32 range;
+ u32 ms;
+
+ if (osvw_id >= 0 && osvw_id < 65536 &&
+ cpu_has(cpu, X86_FEATURE_OSVW)) {
+ u64 osvw_len;
+
+ rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len);
+ if (osvw_id < osvw_len) {
+ u64 osvw_bits;
+
+ rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6),
+ osvw_bits);
+ return osvw_bits & (1ULL << (osvw_id & 0x3f));
+ }
+ }
+
+ /* OSVW unavailable or ID unknown, match family-model-stepping range */
+ ms = (cpu->x86_model << 4) | cpu->x86_stepping;
+ while ((range = *erratum++))
+ if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
+ (ms >= AMD_MODEL_RANGE_START(range)) &&
+ (ms <= AMD_MODEL_RANGE_END(range)))
+ return true;
+
+ return false;
+}
+
static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
{
u32 gprs[8] = { 0 };
@@ -916,6 +983,47 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
}
}
+static bool cpu_has_zenbleed_microcode(void)
+{
+ u32 good_rev = 0;
+
+ switch (boot_cpu_data.x86_model) {
+ case 0x30 ... 0x3f: good_rev = 0x0830107a; break;
+ case 0x60 ... 0x67: good_rev = 0x0860010b; break;
+ case 0x68 ... 0x6f: good_rev = 0x08608105; break;
+ case 0x70 ... 0x7f: good_rev = 0x08701032; break;
+ case 0xa0 ... 0xaf: good_rev = 0x08a00008; break;
+
+ default:
+ return false;
+ break;
+ }
+
+ if (boot_cpu_data.microcode < good_rev)
+ return false;
+
+ return true;
+}
+
+static void zenbleed_check(struct cpuinfo_x86 *c)
+{
+ if (!cpu_has_amd_erratum(c, amd_zenbleed))
+ return;
+
+ if (cpu_has(c, X86_FEATURE_HYPERVISOR))
+ return;
+
+ if (!cpu_has(c, X86_FEATURE_AVX))
+ return;
+
+ if (!cpu_has_zenbleed_microcode()) {
+ pr_notice_once("Zenbleed: please update your microcode for the most optimal fix\n");
+ msr_set_bit(MSR_AMD64_DE_CFG, MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT);
+ } else {
+ msr_clear_bit(MSR_AMD64_DE_CFG, MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT);
+ }
+}
+
static void init_amd(struct cpuinfo_x86 *c)
{
early_init_amd(c);
@@ -1020,6 +1128,8 @@ static void init_amd(struct cpuinfo_x86 *c)
if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
cpu_has(c, X86_FEATURE_AUTOIBRS))
WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS));
+
+ zenbleed_check(c);
}
#ifdef CONFIG_X86_32
@@ -1115,73 +1225,6 @@ static const struct cpu_dev amd_cpu_dev = {
cpu_dev_register(amd_cpu_dev);
-/*
- * AMD errata checking
- *
- * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or
- * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that
- * have an OSVW id assigned, which it takes as first argument. Both take a
- * variable number of family-specific model-stepping ranges created by
- * AMD_MODEL_RANGE().
- *
- * Example:
- *
- * const int amd_erratum_319[] =
- * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2),
- * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0),
- * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0));
- */
-
-#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 }
-#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 }
-#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \
- ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end))
-#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff)
-#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff)
-#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff)
-
-static const int amd_erratum_400[] =
- AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
- AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
-
-static const int amd_erratum_383[] =
- AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
-
-/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
-static const int amd_erratum_1054[] =
- AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
-
-static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
-{
- int osvw_id = *erratum++;
- u32 range;
- u32 ms;
-
- if (osvw_id >= 0 && osvw_id < 65536 &&
- cpu_has(cpu, X86_FEATURE_OSVW)) {
- u64 osvw_len;
-
- rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len);
- if (osvw_id < osvw_len) {
- u64 osvw_bits;
-
- rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6),
- osvw_bits);
- return osvw_bits & (1ULL << (osvw_id & 0x3f));
- }
- }
-
- /* OSVW unavailable or ID unknown, match family-model-stepping range */
- ms = (cpu->x86_model << 4) | cpu->x86_stepping;
- while ((range = *erratum++))
- if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
- (ms >= AMD_MODEL_RANGE_START(range)) &&
- (ms <= AMD_MODEL_RANGE_END(range)))
- return true;
-
- return false;
-}
-
static DEFINE_PER_CPU_READ_MOSTLY(unsigned long[4], amd_dr_addr_mask);
static unsigned int amd_msr_dr_addr_masks[] = {
@@ -1235,3 +1278,15 @@ u32 amd_get_highest_perf(void)
return 255;
}
EXPORT_SYMBOL_GPL(amd_get_highest_perf);
+
+static void zenbleed_check_cpu(void *unused)
+{
+ struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+
+ zenbleed_check(c);
+}
+
+void amd_check_microcode(void)
+{
+ on_each_cpu(zenbleed_check_cpu, NULL, 1);
+}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 9e2a91830f72..95507448e781 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1150,19 +1150,21 @@ spectre_v2_user_select_mitigation(void)
}
/*
- * If no STIBP, enhanced IBRS is enabled, or SMT impossible, STIBP
+ * If no STIBP, Intel enhanced IBRS is enabled, or SMT impossible, STIBP
* is not required.
*
- * Enhanced IBRS also protects against cross-thread branch target
+ * Intel's Enhanced IBRS also protects against cross-thread branch target
* injection in user-mode as the IBRS bit remains always set which
* implicitly enables cross-thread protections. However, in legacy IBRS
* mode, the IBRS bit is set only on kernel entry and cleared on return
- * to userspace. This disables the implicit cross-thread protection,
- * so allow for STIBP to be selected in that case.
+ * to userspace. AMD Automatic IBRS also does not protect userspace.
+ * These modes therefore disable the implicit cross-thread protection,
+ * so allow for STIBP to be selected in those cases.
*/
if (!boot_cpu_has(X86_FEATURE_STIBP) ||
!smt_possible ||
- spectre_v2_in_eibrs_mode(spectre_v2_enabled))
+ (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
+ !boot_cpu_has(X86_FEATURE_AUTOIBRS)))
return;
/*
@@ -2294,7 +2296,8 @@ static ssize_t mmio_stale_data_show_state(char *buf)
static char *stibp_state(void)
{
- if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
+ if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
+ !boot_cpu_has(X86_FEATURE_AUTOIBRS))
return "";
switch (spectre_v2_user_stibp) {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 52683fddafaf..0ba1067f4e5f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2287,6 +2287,8 @@ void microcode_check(struct cpuinfo_x86 *prev_info)
perf_check_microcode();
+ amd_check_microcode();
+
store_cpu_caps(&curr_info);
if (!memcmp(&prev_info->x86_capability, &curr_info.x86_capability,
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 5e74610b39e7..c4ec4ca47e11 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -1261,10 +1261,10 @@ static void __threshold_remove_blocks(struct threshold_bank *b)
struct threshold_block *pos = NULL;
struct threshold_block *tmp = NULL;
- kobject_del(b->kobj);
+ kobject_put(b->kobj);
list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
- kobject_del(&pos->kobj);
+ kobject_put(b->kobj);
}
static void threshold_remove_bank(struct threshold_bank *bank)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 58b1f208eff5..4a817d20ce3b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -697,9 +697,10 @@ static bool try_fixup_enqcmd_gp(void)
}
static bool gp_try_fixup_and_notify(struct pt_regs *regs, int trapnr,
- unsigned long error_code, const char *str)
+ unsigned long error_code, const char *str,
+ unsigned long address)
{
- if (fixup_exception(regs, trapnr, error_code, 0))
+ if (fixup_exception(regs, trapnr, error_code, address))
return true;
current->thread.error_code = error_code;
@@ -759,7 +760,7 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
goto exit;
}
- if (gp_try_fixup_and_notify(regs, X86_TRAP_GP, error_code, desc))
+ if (gp_try_fixup_and_notify(regs, X86_TRAP_GP, error_code, desc, 0))
goto exit;
if (error_code)
@@ -1357,17 +1358,20 @@ DEFINE_IDTENTRY(exc_device_not_available)
#define VE_FAULT_STR "VE fault"
-static void ve_raise_fault(struct pt_regs *regs, long error_code)
+static void ve_raise_fault(struct pt_regs *regs, long error_code,
+ unsigned long address)
{
if (user_mode(regs)) {
gp_user_force_sig_segv(regs, X86_TRAP_VE, error_code, VE_FAULT_STR);
return;
}
- if (gp_try_fixup_and_notify(regs, X86_TRAP_VE, error_code, VE_FAULT_STR))
+ if (gp_try_fixup_and_notify(regs, X86_TRAP_VE, error_code,
+ VE_FAULT_STR, address)) {
return;
+ }
- die_addr(VE_FAULT_STR, regs, error_code, 0);
+ die_addr(VE_FAULT_STR, regs, error_code, address);
}
/*
@@ -1431,7 +1435,7 @@ DEFINE_IDTENTRY(exc_virtualization_exception)
* it successfully, treat it as #GP(0) and handle it.
*/
if (!tdx_handle_virt_exception(regs, &ve))
- ve_raise_fault(regs, 0);
+ ve_raise_fault(regs, 0, ve.gla);
cond_local_irq_disable(regs);
}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 113ca9661ab2..a983a16163b1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -637,16 +637,22 @@ bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
*max_irr = -1;
for (i = vec = 0; i <= 7; i++, vec += 32) {
+ u32 *p_irr = (u32 *)(regs + APIC_IRR + i * 0x10);
+
+ irr_val = *p_irr;
pir_val = READ_ONCE(pir[i]);
- irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
+
if (pir_val) {
+ pir_val = xchg(&pir[i], 0);
+
prev_irr_val = irr_val;
- irr_val |= xchg(&pir[i], 0);
- *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
- if (prev_irr_val != irr_val) {
- max_updated_irr =
- __fls(irr_val ^ prev_irr_val) + vec;
- }
+ do {
+ irr_val = prev_irr_val | pir_val;
+ } while (prev_irr_val != irr_val &&
+ !try_cmpxchg(p_irr, &prev_irr_val, irr_val));
+
+ if (prev_irr_val != irr_val)
+ max_updated_irr = __fls(irr_val ^ prev_irr_val) + vec;
}
if (irr_val)
*max_irr = __fls(irr_val) + vec;
@@ -660,8 +666,11 @@ EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
{
struct kvm_lapic *apic = vcpu->arch.apic;
+ bool irr_updated = __kvm_apic_update_irr(pir, apic->regs, max_irr);
- return __kvm_apic_update_irr(pir, apic->regs, max_irr);
+ if (unlikely(!apic->apicv_active && irr_updated))
+ apic->irr_pending = true;
+ return irr_updated;
}
EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d381ad424554..956726d867aa 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1786,6 +1786,11 @@ static void sev_post_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
}
}
+static bool svm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+{
+ return true;
+}
+
void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3986,14 +3991,8 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
{
- struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
-
- /*
- * Note, the next RIP must be provided as SRCU isn't held, i.e. KVM
- * can't read guest memory (dereference memslots) to decode the WRMSR.
- */
- if (control->exit_code == SVM_EXIT_MSR && control->exit_info_1 &&
- nrips && control->next_rip)
+ if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
+ to_svm(vcpu)->vmcb->control.exit_info_1)
return handle_fastpath_set_msr_irqoff(vcpu);
return EXIT_FASTPATH_NONE;
@@ -4815,6 +4814,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.set_segment = svm_set_segment,
.get_cpl = svm_get_cpl,
.get_cs_db_l_bits = svm_get_cs_db_l_bits,
+ .is_valid_cr0 = svm_is_valid_cr0,
.set_cr0 = svm_set_cr0,
.post_set_cr3 = sev_post_set_cr3,
.is_valid_cr4 = svm_is_valid_cr4,
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 07e927d4d099..be275a0410a8 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -303,10 +303,8 @@ SYM_FUNC_START(vmx_do_nmi_irqoff)
VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
SYM_FUNC_END(vmx_do_nmi_irqoff)
-
-.section .text, "ax"
-
#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
/**
* vmread_error_trampoline - Trampoline from inline asm to vmread_error()
* @field: VMCS field encoding that failed
@@ -335,7 +333,7 @@ SYM_FUNC_START(vmread_error_trampoline)
mov 3*WORD_SIZE(%_ASM_BP), %_ASM_ARG2
mov 2*WORD_SIZE(%_ASM_BP), %_ASM_ARG1
- call vmread_error
+ call vmread_error_trampoline2
/* Zero out @fault, which will be popped into the result register. */
_ASM_MOV $0, 3*WORD_SIZE(%_ASM_BP)
@@ -357,6 +355,8 @@ SYM_FUNC_START(vmread_error_trampoline)
SYM_FUNC_END(vmread_error_trampoline)
#endif
+.section .text, "ax"
+
SYM_FUNC_START(vmx_do_interrupt_irqoff)
VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
SYM_FUNC_END(vmx_do_interrupt_irqoff)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 0ecf4be2c6af..df461f387e20 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -441,13 +441,23 @@ do { \
pr_warn_ratelimited(fmt); \
} while (0)
-void vmread_error(unsigned long field, bool fault)
+noinline void vmread_error(unsigned long field)
{
- if (fault)
+ vmx_insn_failed("vmread failed: field=%lx\n", field);
+}
+
+#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+noinstr void vmread_error_trampoline2(unsigned long field, bool fault)
+{
+ if (fault) {
kvm_spurious_fault();
- else
- vmx_insn_failed("vmread failed: field=%lx\n", field);
+ } else {
+ instrumentation_begin();
+ vmread_error(field);
+ instrumentation_end();
+ }
}
+#endif
noinline void vmwrite_error(unsigned long field, unsigned long value)
{
@@ -1503,6 +1513,11 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long old_rflags;
+ /*
+ * Unlike CR0 and CR4, RFLAGS handling requires checking if the vCPU
+ * is an unrestricted guest in order to mark L2 as needing emulation
+ * if L1 runs L2 as a restricted guest.
+ */
if (is_unrestricted_guest(vcpu)) {
kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
vmx->rflags = rflags;
@@ -3037,6 +3052,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
+ /*
+ * KVM should never use VM86 to virtualize Real Mode when L2 is active,
+ * as using VM86 is unnecessary if unrestricted guest is enabled, and
+ * if unrestricted guest is disabled, VM-Enter (from L1) with CR0.PG=0
+ * should VM-Fail and KVM should reject userspace attempts to stuff
+ * CR0.PG=0 when L2 is active.
+ */
+ WARN_ON_ONCE(is_guest_mode(vcpu));
+
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
@@ -3226,6 +3250,17 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
#define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \
CPU_BASED_CR3_STORE_EXITING)
+static bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+{
+ if (is_guest_mode(vcpu))
+ return nested_guest_cr0_valid(vcpu, cr0);
+
+ if (to_vmx(vcpu)->nested.vmxon)
+ return nested_host_cr0_valid(vcpu, cr0);
+
+ return true;
+}
+
void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -3235,7 +3270,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
old_cr0_pg = kvm_read_cr0_bits(vcpu, X86_CR0_PG);
hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
- if (is_unrestricted_guest(vcpu))
+ if (enable_unrestricted_guest)
hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
else {
hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
@@ -3263,7 +3298,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
}
#endif
- if (enable_ept && !is_unrestricted_guest(vcpu)) {
+ if (enable_ept && !enable_unrestricted_guest) {
/*
* Ensure KVM has an up-to-date snapshot of the guest's CR3. If
* the below code _enables_ CR3 exiting, vmx_cache_reg() will
@@ -3394,7 +3429,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
* this bit, even if host CR4.MCE == 0.
*/
hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
- if (is_unrestricted_guest(vcpu))
+ if (enable_unrestricted_guest)
hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
else if (vmx->rmode.vm86_active)
hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
@@ -3414,7 +3449,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
vcpu->arch.cr4 = cr4;
kvm_register_mark_available(vcpu, VCPU_EXREG_CR4);
- if (!is_unrestricted_guest(vcpu)) {
+ if (!enable_unrestricted_guest) {
if (enable_ept) {
if (!is_paging(vcpu)) {
hw_cr4 &= ~X86_CR4_PAE;
@@ -4651,7 +4686,8 @@ static int vmx_alloc_ipiv_pid_table(struct kvm *kvm)
if (kvm_vmx->pid_table)
return 0;
- pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, vmx_get_pid_table_order(kvm));
+ pages = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO,
+ vmx_get_pid_table_order(kvm));
if (!pages)
return -ENOMEM;
@@ -5364,18 +5400,11 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
val = (val & ~vmcs12->cr0_guest_host_mask) |
(vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
- if (!nested_guest_cr0_valid(vcpu, val))
- return 1;
-
if (kvm_set_cr0(vcpu, val))
return 1;
vmcs_writel(CR0_READ_SHADOW, orig_val);
return 0;
} else {
- if (to_vmx(vcpu)->nested.vmxon &&
- !nested_host_cr0_valid(vcpu, val))
- return 1;
-
return kvm_set_cr0(vcpu, val);
}
}
@@ -8203,6 +8232,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.set_segment = vmx_set_segment,
.get_cpl = vmx_get_cpl,
.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
+ .is_valid_cr0 = vmx_is_valid_cr0,
.set_cr0 = vmx_set_cr0,
.is_valid_cr4 = vmx_is_valid_cr4,
.set_cr4 = vmx_set_cr4,
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index ce47dc265f89..33af7b4c6eb4 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -10,7 +10,7 @@
#include "vmcs.h"
#include "../x86.h"
-void vmread_error(unsigned long field, bool fault);
+void vmread_error(unsigned long field);
void vmwrite_error(unsigned long field, unsigned long value);
void vmclear_error(struct vmcs *vmcs, u64 phys_addr);
void vmptrld_error(struct vmcs *vmcs, u64 phys_addr);
@@ -31,6 +31,13 @@ void invept_error(unsigned long ext, u64 eptp, gpa_t gpa);
* void vmread_error_trampoline(unsigned long field, bool fault);
*/
extern unsigned long vmread_error_trampoline;
+
+/*
+ * The second VMREAD error trampoline, called from the assembly trampoline,
+ * exists primarily to enable instrumentation for the VM-Fail path.
+ */
+void vmread_error_trampoline2(unsigned long field, bool fault);
+
#endif
static __always_inline void vmcs_check16(unsigned long field)
@@ -101,8 +108,7 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
do_fail:
instrumentation_begin();
- WARN_ONCE(1, KBUILD_MODNAME ": vmread failed: field=%lx\n", field);
- pr_warn_ratelimited(KBUILD_MODNAME ": vmread failed: field=%lx\n", field);
+ vmread_error(field);
instrumentation_end();
return 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a6b9bea62fb8..278dbd37dab2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -906,6 +906,22 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
}
EXPORT_SYMBOL_GPL(load_pdptrs);
+static bool kvm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+{
+#ifdef CONFIG_X86_64
+ if (cr0 & 0xffffffff00000000UL)
+ return false;
+#endif
+
+ if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
+ return false;
+
+ if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
+ return false;
+
+ return static_call(kvm_x86_is_valid_cr0)(vcpu, cr0);
+}
+
void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
{
/*
@@ -952,20 +968,13 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
unsigned long old_cr0 = kvm_read_cr0(vcpu);
- cr0 |= X86_CR0_ET;
-
-#ifdef CONFIG_X86_64
- if (cr0 & 0xffffffff00000000UL)
+ if (!kvm_is_valid_cr0(vcpu, cr0))
return 1;
-#endif
- cr0 &= ~CR0_RESERVED_BITS;
-
- if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
- return 1;
+ cr0 |= X86_CR0_ET;
- if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
- return 1;
+ /* Write to CR0 reserved bits are ignored, even on Intel. */
+ cr0 &= ~CR0_RESERVED_BITS;
#ifdef CONFIG_X86_64
if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) &&
@@ -2172,6 +2181,8 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
u64 data;
fastpath_t ret = EXIT_FASTPATH_NONE;
+ kvm_vcpu_srcu_read_lock(vcpu);
+
switch (msr) {
case APIC_BASE_MSR + (APIC_ICR >> 4):
data = kvm_read_edx_eax(vcpu);
@@ -2194,6 +2205,8 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
if (ret != EXIT_FASTPATH_NONE)
trace_kvm_msr_write(msr, data);
+ kvm_vcpu_srcu_read_unlock(vcpu);
+
return ret;
}
EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
@@ -10203,9 +10216,13 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
if (r < 0)
goto out;
if (r) {
- kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
- static_call(kvm_x86_inject_irq)(vcpu, false);
- WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0);
+ int irq = kvm_cpu_get_interrupt(vcpu);
+
+ if (!WARN_ON_ONCE(irq == -1)) {
+ kvm_queue_interrupt(vcpu, irq, false);
+ static_call(kvm_x86_inject_irq)(vcpu, false);
+ WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0);
+ }
}
if (kvm_cpu_has_injectable_intr(vcpu))
static_call(kvm_x86_enable_irq_window)(vcpu);
@@ -11460,7 +11477,8 @@ static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return false;
}
- return kvm_is_valid_cr4(vcpu, sregs->cr4);
+ return kvm_is_valid_cr4(vcpu, sregs->cr4) &&
+ kvm_is_valid_cr0(vcpu, sregs->cr0);
}
static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
@@ -13185,7 +13203,7 @@ EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
bool kvm_arch_has_irq_bypass(void)
{
- return true;
+ return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP);
}
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,