summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c946
1 files changed, 678 insertions, 268 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f122906ed9f3..4b8138bd4857 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -97,10 +97,10 @@
* vendor module being reloaded with different module parameters.
*/
struct kvm_caps kvm_caps __read_mostly;
-EXPORT_SYMBOL_GPL(kvm_caps);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_caps);
struct kvm_host_values kvm_host __read_mostly;
-EXPORT_SYMBOL_GPL(kvm_host);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_host);
#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e))
@@ -136,6 +136,9 @@ static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
static DEFINE_MUTEX(vendor_module_lock);
+static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
+static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
+
struct kvm_x86_ops kvm_x86_ops __read_mostly;
#define KVM_X86_OP(func) \
@@ -152,7 +155,7 @@ module_param(ignore_msrs, bool, 0644);
bool __read_mostly report_ignored_msrs = true;
module_param(report_ignored_msrs, bool, 0644);
-EXPORT_SYMBOL_GPL(report_ignored_msrs);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(report_ignored_msrs);
unsigned int min_timer_period_us = 200;
module_param(min_timer_period_us, uint, 0644);
@@ -164,12 +167,9 @@ module_param(kvmclock_periodic_sync, bool, 0444);
static u32 __read_mostly tsc_tolerance_ppm = 250;
module_param(tsc_tolerance_ppm, uint, 0644);
-static bool __read_mostly vector_hashing = true;
-module_param(vector_hashing, bool, 0444);
-
bool __read_mostly enable_vmware_backdoor = false;
module_param(enable_vmware_backdoor, bool, 0444);
-EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_vmware_backdoor);
/*
* Flags to manipulate forced emulation behavior (any non-zero value will
@@ -184,7 +184,7 @@ module_param(pi_inject_timer, bint, 0644);
/* Enable/disable PMU virtualization */
bool __read_mostly enable_pmu = true;
-EXPORT_SYMBOL_GPL(enable_pmu);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_pmu);
module_param(enable_pmu, bool, 0444);
bool __read_mostly eager_page_split = true;
@@ -211,7 +211,7 @@ struct kvm_user_return_msrs {
};
u32 __read_mostly kvm_nr_uret_msrs;
-EXPORT_SYMBOL_GPL(kvm_nr_uret_msrs);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_nr_uret_msrs);
static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS];
static struct kvm_user_return_msrs __percpu *user_return_msrs;
@@ -220,17 +220,26 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs;
| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
| XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE)
+#define XFEATURE_MASK_CET_ALL (XFEATURE_MASK_CET_USER | XFEATURE_MASK_CET_KERNEL)
+/*
+ * Note, KVM supports exposing PT to the guest, but does not support context
+ * switching PT via XSTATE (KVM's PT virtualization relies on perf; swapping
+ * PT via guest XSTATE would clobber perf state), i.e. KVM doesn't support
+ * IA32_XSS[bit 8] (guests can/must use RDMSR/WRMSR to save/restore PT MSRs).
+ */
+#define KVM_SUPPORTED_XSS (XFEATURE_MASK_CET_ALL)
+
bool __read_mostly allow_smaller_maxphyaddr = 0;
-EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(allow_smaller_maxphyaddr);
bool __read_mostly enable_apicv = true;
-EXPORT_SYMBOL_GPL(enable_apicv);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_apicv);
bool __read_mostly enable_ipiv = true;
-EXPORT_SYMBOL_GPL(enable_ipiv);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_ipiv);
bool __read_mostly enable_device_posted_irqs = true;
-EXPORT_SYMBOL_GPL(enable_device_posted_irqs);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_device_posted_irqs);
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
KVM_GENERIC_VM_STATS(),
@@ -335,7 +344,11 @@ static const u32 msrs_to_save_base[] = {
MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
MSR_IA32_UMWAIT_CONTROL,
- MSR_IA32_XFD, MSR_IA32_XFD_ERR,
+ MSR_IA32_XFD, MSR_IA32_XFD_ERR, MSR_IA32_XSS,
+
+ MSR_IA32_U_CET, MSR_IA32_S_CET,
+ MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP,
+ MSR_IA32_PL3_SSP, MSR_IA32_INT_SSP_TAB,
};
static const u32 msrs_to_save_pmu[] = {
@@ -367,6 +380,7 @@ static const u32 msrs_to_save_pmu[] = {
MSR_AMD64_PERF_CNTR_GLOBAL_CTL,
MSR_AMD64_PERF_CNTR_GLOBAL_STATUS,
MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR,
+ MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET,
};
static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) +
@@ -614,7 +628,7 @@ int kvm_add_user_return_msr(u32 msr)
kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr;
return kvm_nr_uret_msrs++;
}
-EXPORT_SYMBOL_GPL(kvm_add_user_return_msr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_add_user_return_msr);
int kvm_find_user_return_msr(u32 msr)
{
@@ -626,7 +640,7 @@ int kvm_find_user_return_msr(u32 msr)
}
return -1;
}
-EXPORT_SYMBOL_GPL(kvm_find_user_return_msr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_find_user_return_msr);
static void kvm_user_return_msr_cpu_online(void)
{
@@ -666,7 +680,7 @@ int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
kvm_user_return_register_notifier(msrs);
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_user_return_msr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_user_return_msr);
void kvm_user_return_msr_update_cache(unsigned int slot, u64 value)
{
@@ -675,7 +689,13 @@ void kvm_user_return_msr_update_cache(unsigned int slot, u64 value)
msrs->values[slot].curr = value;
kvm_user_return_register_notifier(msrs);
}
-EXPORT_SYMBOL_GPL(kvm_user_return_msr_update_cache);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_user_return_msr_update_cache);
+
+u64 kvm_get_user_return_msr(unsigned int slot)
+{
+ return this_cpu_ptr(user_return_msrs)->values[slot].curr;
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_user_return_msr);
static void drop_user_return_notifiers(void)
{
@@ -697,7 +717,7 @@ noinstr void kvm_spurious_fault(void)
/* Fault while not rebooting. We want the trace. */
BUG_ON(!kvm_rebooting);
}
-EXPORT_SYMBOL_GPL(kvm_spurious_fault);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_spurious_fault);
#define EXCPT_BENIGN 0
#define EXCPT_CONTRIBUTORY 1
@@ -802,7 +822,7 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu,
ex->has_payload = false;
ex->payload = 0;
}
-EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_deliver_exception_payload);
static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vector,
bool has_error_code, u32 error_code,
@@ -886,7 +906,7 @@ void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
{
kvm_multiple_exception(vcpu, nr, false, 0, false, 0);
}
-EXPORT_SYMBOL_GPL(kvm_queue_exception);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_queue_exception);
void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
@@ -894,7 +914,7 @@ void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
{
kvm_multiple_exception(vcpu, nr, false, 0, true, payload);
}
-EXPORT_SYMBOL_GPL(kvm_queue_exception_p);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_queue_exception_p);
static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
u32 error_code, unsigned long payload)
@@ -929,7 +949,7 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr,
vcpu->arch.exception.has_payload = false;
vcpu->arch.exception.payload = 0;
}
-EXPORT_SYMBOL_GPL(kvm_requeue_exception);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_requeue_exception);
int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
{
@@ -940,7 +960,7 @@ int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
return 1;
}
-EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_complete_insn_gp);
static int complete_emulated_insn_gp(struct kvm_vcpu *vcpu, int err)
{
@@ -990,7 +1010,7 @@ void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
fault_mmu->inject_page_fault(vcpu, fault);
}
-EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_inject_emulated_page_fault);
void kvm_inject_nmi(struct kvm_vcpu *vcpu)
{
@@ -1002,7 +1022,7 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
{
kvm_multiple_exception(vcpu, nr, true, error_code, false, 0);
}
-EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_queue_exception_e);
/*
* Checks if cpl <= required_cpl; if true, return true. Otherwise queue
@@ -1024,7 +1044,7 @@ bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
kvm_queue_exception(vcpu, UD_VECTOR);
return false;
}
-EXPORT_SYMBOL_GPL(kvm_require_dr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_require_dr);
static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu)
{
@@ -1079,7 +1099,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
return 1;
}
-EXPORT_SYMBOL_GPL(load_pdptrs);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(load_pdptrs);
static bool kvm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
@@ -1132,7 +1152,7 @@ void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned lon
if ((cr0 ^ old_cr0) & KVM_MMU_CR0_ROLE_BITS)
kvm_mmu_reset_context(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_post_set_cr0);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_post_set_cr0);
int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
@@ -1167,19 +1187,22 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
(is_64_bit_mode(vcpu) || kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE)))
return 1;
+ if (!(cr0 & X86_CR0_WP) && kvm_is_cr4_bit_set(vcpu, X86_CR4_CET))
+ return 1;
+
kvm_x86_call(set_cr0)(vcpu, cr0);
kvm_post_set_cr0(vcpu, old_cr0, cr0);
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_cr0);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cr0);
void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
{
(void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
}
-EXPORT_SYMBOL_GPL(kvm_lmsw);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lmsw);
void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
{
@@ -1202,7 +1225,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE)))
wrpkru(vcpu->arch.pkru);
}
-EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_load_guest_xsave_state);
void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
{
@@ -1228,7 +1251,7 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
}
}
-EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_load_host_xsave_state);
#ifdef CONFIG_X86_64
static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu)
@@ -1237,7 +1260,7 @@ static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu)
}
#endif
-static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
+int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
u64 xcr0 = xcr;
u64 old_xcr0 = vcpu->arch.xcr0;
@@ -1281,6 +1304,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
vcpu->arch.cpuid_dynamic_bits_dirty = true;
return 0;
}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_set_xcr);
int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu)
{
@@ -1293,7 +1317,7 @@ int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_xsetbv);
static bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
@@ -1341,7 +1365,7 @@ void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned lon
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_post_set_cr4);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_post_set_cr4);
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
@@ -1366,13 +1390,16 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return 1;
}
+ if ((cr4 & X86_CR4_CET) && !kvm_is_cr0_bit_set(vcpu, X86_CR0_WP))
+ return 1;
+
kvm_x86_call(set_cr4)(vcpu, cr4);
kvm_post_set_cr4(vcpu, old_cr4, cr4);
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_cr4);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cr4);
static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid)
{
@@ -1464,7 +1491,7 @@ handle_tlb_flush:
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_cr3);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cr3);
int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
{
@@ -1476,7 +1503,7 @@ int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
vcpu->arch.cr8 = cr8;
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_cr8);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cr8);
unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
{
@@ -1485,7 +1512,7 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
else
return vcpu->arch.cr8;
}
-EXPORT_SYMBOL_GPL(kvm_get_cr8);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_cr8);
static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
{
@@ -1510,7 +1537,7 @@ void kvm_update_dr7(struct kvm_vcpu *vcpu)
if (dr7 & DR7_BP_EN_MASK)
vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
}
-EXPORT_SYMBOL_GPL(kvm_update_dr7);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_update_dr7);
static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
{
@@ -1551,7 +1578,7 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_dr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_dr);
unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr)
{
@@ -1568,14 +1595,14 @@ unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr)
return vcpu->arch.dr7;
}
}
-EXPORT_SYMBOL_GPL(kvm_get_dr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_dr);
int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu)
{
- u32 ecx = kvm_rcx_read(vcpu);
+ u32 pmc = kvm_rcx_read(vcpu);
u64 data;
- if (kvm_pmu_rdpmc(vcpu, ecx, &data)) {
+ if (kvm_pmu_rdpmc(vcpu, pmc, &data)) {
kvm_inject_gp(vcpu, 0);
return 1;
}
@@ -1584,7 +1611,7 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu)
kvm_rdx_write(vcpu, data >> 32);
return kvm_skip_emulated_instruction(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_rdpmc);
/*
* Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM
@@ -1723,7 +1750,7 @@ bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
return __kvm_valid_efer(vcpu, efer);
}
-EXPORT_SYMBOL_GPL(kvm_valid_efer);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_valid_efer);
static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
@@ -1766,7 +1793,7 @@ void kvm_enable_efer_bits(u64 mask)
{
efer_reserved_bits &= ~mask;
}
-EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_enable_efer_bits);
bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
{
@@ -1809,7 +1836,7 @@ out:
return allowed;
}
-EXPORT_SYMBOL_GPL(kvm_msr_allowed);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_msr_allowed);
/*
* Write @data into the MSR specified by @index. Select MSR specific fault
@@ -1870,6 +1897,44 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
data = (u32)data;
break;
+ case MSR_IA32_U_CET:
+ case MSR_IA32_S_CET:
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_IBT))
+ return KVM_MSR_RET_UNSUPPORTED;
+ if (!kvm_is_valid_u_s_cet(vcpu, data))
+ return 1;
+ break;
+ case MSR_KVM_INTERNAL_GUEST_SSP:
+ if (!host_initiated)
+ return 1;
+ fallthrough;
+ /*
+ * Note that the MSR emulation here is flawed when a vCPU
+ * doesn't support the Intel 64 architecture. The expected
+ * architectural behavior in this case is that the upper 32
+ * bits do not exist and should always read '0'. However,
+ * because the actual hardware on which the virtual CPU is
+ * running does support Intel 64, XRSTORS/XSAVES in the
+ * guest could observe behavior that violates the
+ * architecture. Intercepting XRSTORS/XSAVES for this
+ * special case isn't deemed worthwhile.
+ */
+ case MSR_IA32_PL0_SSP ... MSR_IA32_INT_SSP_TAB:
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK))
+ return KVM_MSR_RET_UNSUPPORTED;
+ /*
+ * MSR_IA32_INT_SSP_TAB is not present on processors that do
+ * not support Intel 64 architecture.
+ */
+ if (index == MSR_IA32_INT_SSP_TAB && !guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
+ return KVM_MSR_RET_UNSUPPORTED;
+ if (is_noncanonical_msr_address(data, vcpu))
+ return 1;
+ /* All SSP MSRs except MSR_IA32_INT_SSP_TAB must be 4-byte aligned */
+ if (index != MSR_IA32_INT_SSP_TAB && !IS_ALIGNED(data, 4))
+ return 1;
+ break;
}
msr.data = data;
@@ -1898,8 +1963,8 @@ static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
* Returns 0 on success, non-0 otherwise.
* Assumes vcpu_load() was already called.
*/
-int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
- bool host_initiated)
+static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
+ bool host_initiated)
{
struct msr_data msr;
int ret;
@@ -1914,6 +1979,20 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
!guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID))
return 1;
break;
+ case MSR_IA32_U_CET:
+ case MSR_IA32_S_CET:
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_IBT))
+ return KVM_MSR_RET_UNSUPPORTED;
+ break;
+ case MSR_KVM_INTERNAL_GUEST_SSP:
+ if (!host_initiated)
+ return 1;
+ fallthrough;
+ case MSR_IA32_PL0_SSP ... MSR_IA32_INT_SSP_TAB:
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK))
+ return KVM_MSR_RET_UNSUPPORTED;
+ break;
}
msr.index = index;
@@ -1925,6 +2004,16 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
return ret;
}
+int kvm_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data)
+{
+ return __kvm_set_msr(vcpu, index, data, true);
+}
+
+int kvm_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data)
+{
+ return __kvm_get_msr(vcpu, index, data, true);
+}
+
static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
u32 index, u64 *data, bool host_initiated)
{
@@ -1932,33 +2021,36 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
__kvm_get_msr);
}
-int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data)
+int __kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data)
{
- if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
- return KVM_MSR_RET_FILTERED;
return kvm_get_msr_ignored_check(vcpu, index, data, false);
}
-EXPORT_SYMBOL_GPL(kvm_get_msr_with_filter);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_emulate_msr_read);
-int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data)
+int __kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data)
{
- if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE))
- return KVM_MSR_RET_FILTERED;
return kvm_set_msr_ignored_check(vcpu, index, data, false);
}
-EXPORT_SYMBOL_GPL(kvm_set_msr_with_filter);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_emulate_msr_write);
-int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
+int kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data)
{
- return kvm_get_msr_ignored_check(vcpu, index, data, false);
+ if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
+ return KVM_MSR_RET_FILTERED;
+
+ return __kvm_emulate_msr_read(vcpu, index, data);
}
-EXPORT_SYMBOL_GPL(kvm_get_msr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_msr_read);
-int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
+int kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data)
{
- return kvm_set_msr_ignored_check(vcpu, index, data, false);
+ if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE))
+ return KVM_MSR_RET_FILTERED;
+
+ return __kvm_emulate_msr_write(vcpu, index, data);
}
-EXPORT_SYMBOL_GPL(kvm_set_msr);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_msr_write);
+
static void complete_userspace_rdmsr(struct kvm_vcpu *vcpu)
{
@@ -1990,6 +2082,15 @@ static int complete_fast_rdmsr(struct kvm_vcpu *vcpu)
return complete_fast_msr_access(vcpu);
}
+static int complete_fast_rdmsr_imm(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu->run->msr.error)
+ kvm_register_write(vcpu, vcpu->arch.cui_rdmsr_imm_reg,
+ vcpu->run->msr.data);
+
+ return complete_fast_msr_access(vcpu);
+}
+
static u64 kvm_msr_reason(int r)
{
switch (r) {
@@ -2024,55 +2125,82 @@ static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index,
return 1;
}
-int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
+static int __kvm_emulate_rdmsr(struct kvm_vcpu *vcpu, u32 msr, int reg,
+ int (*complete_rdmsr)(struct kvm_vcpu *))
{
- u32 ecx = kvm_rcx_read(vcpu);
u64 data;
int r;
- r = kvm_get_msr_with_filter(vcpu, ecx, &data);
+ r = kvm_emulate_msr_read(vcpu, msr, &data);
if (!r) {
- trace_kvm_msr_read(ecx, data);
+ trace_kvm_msr_read(msr, data);
- kvm_rax_write(vcpu, data & -1u);
- kvm_rdx_write(vcpu, (data >> 32) & -1u);
+ if (reg < 0) {
+ kvm_rax_write(vcpu, data & -1u);
+ kvm_rdx_write(vcpu, (data >> 32) & -1u);
+ } else {
+ kvm_register_write(vcpu, reg, data);
+ }
} else {
/* MSR read failed? See if we should ask user space */
- if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_RDMSR, 0,
- complete_fast_rdmsr, r))
+ if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_RDMSR, 0,
+ complete_rdmsr, r))
return 0;
- trace_kvm_msr_read_ex(ecx);
+ trace_kvm_msr_read_ex(msr);
}
return kvm_x86_call(complete_emulated_msr)(vcpu, r);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
-int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
+int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
{
- u32 ecx = kvm_rcx_read(vcpu);
- u64 data = kvm_read_edx_eax(vcpu);
- int r;
+ return __kvm_emulate_rdmsr(vcpu, kvm_rcx_read(vcpu), -1,
+ complete_fast_rdmsr);
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_rdmsr);
+
+int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg)
+{
+ vcpu->arch.cui_rdmsr_imm_reg = reg;
+
+ return __kvm_emulate_rdmsr(vcpu, msr, reg, complete_fast_rdmsr_imm);
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_rdmsr_imm);
- r = kvm_set_msr_with_filter(vcpu, ecx, data);
+static int __kvm_emulate_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+ int r;
+ r = kvm_emulate_msr_write(vcpu, msr, data);
if (!r) {
- trace_kvm_msr_write(ecx, data);
+ trace_kvm_msr_write(msr, data);
} else {
/* MSR write failed? See if we should ask user space */
- if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_WRMSR, data,
+ if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_WRMSR, data,
complete_fast_msr_access, r))
return 0;
/* Signal all other negative errors to userspace */
if (r < 0)
return r;
- trace_kvm_msr_write_ex(ecx, data);
+ trace_kvm_msr_write_ex(msr, data);
}
return kvm_x86_call(complete_emulated_msr)(vcpu, r);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
+
+int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
+{
+ return __kvm_emulate_wrmsr(vcpu, kvm_rcx_read(vcpu),
+ kvm_read_edx_eax(vcpu));
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_wrmsr);
+
+int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg)
+{
+ return __kvm_emulate_wrmsr(vcpu, msr, kvm_register_read(vcpu, reg));
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_wrmsr_imm);
int kvm_emulate_as_nop(struct kvm_vcpu *vcpu)
{
@@ -2084,14 +2212,23 @@ int kvm_emulate_invd(struct kvm_vcpu *vcpu)
/* Treat an INVD instruction as a NOP and just skip it. */
return kvm_emulate_as_nop(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_invd);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_invd);
+
+fastpath_t handle_fastpath_invd(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_emulate_invd(vcpu))
+ return EXIT_FASTPATH_EXIT_USERSPACE;
+
+ return EXIT_FASTPATH_REENTER_GUEST;
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_fastpath_invd);
int kvm_handle_invalid_op(struct kvm_vcpu *vcpu)
{
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
-EXPORT_SYMBOL_GPL(kvm_handle_invalid_op);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_handle_invalid_op);
static int kvm_emulate_monitor_mwait(struct kvm_vcpu *vcpu, const char *insn)
@@ -2117,13 +2254,13 @@ int kvm_emulate_mwait(struct kvm_vcpu *vcpu)
{
return kvm_emulate_monitor_mwait(vcpu, "MWAIT");
}
-EXPORT_SYMBOL_GPL(kvm_emulate_mwait);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_mwait);
int kvm_emulate_monitor(struct kvm_vcpu *vcpu)
{
return kvm_emulate_monitor_mwait(vcpu, "MONITOR");
}
-EXPORT_SYMBOL_GPL(kvm_emulate_monitor);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_monitor);
static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
{
@@ -2133,74 +2270,41 @@ static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending();
}
-/*
- * The fast path for frequent and performance sensitive wrmsr emulation,
- * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces
- * the latency of virtual IPI by avoiding the expensive bits of transitioning
- * from guest to host, e.g. reacquiring KVM's SRCU lock. In contrast to the
- * other cases which must be called after interrupts are enabled on the host.
- */
-static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
-{
- if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic))
- return 1;
-
- if (((data & APIC_SHORT_MASK) == APIC_DEST_NOSHORT) &&
- ((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
- ((data & APIC_MODE_MASK) == APIC_DM_FIXED) &&
- ((u32)(data >> 32) != X2APIC_BROADCAST))
- return kvm_x2apic_icr_write(vcpu->arch.apic, data);
-
- return 1;
-}
-
-static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data)
-{
- if (!kvm_can_use_hv_timer(vcpu))
- return 1;
-
- kvm_set_lapic_tscdeadline_msr(vcpu, data);
- return 0;
-}
-
-fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
+static fastpath_t __handle_fastpath_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
- u32 msr = kvm_rcx_read(vcpu);
- u64 data;
- fastpath_t ret;
- bool handled;
-
- kvm_vcpu_srcu_read_lock(vcpu);
-
switch (msr) {
case APIC_BASE_MSR + (APIC_ICR >> 4):
- data = kvm_read_edx_eax(vcpu);
- handled = !handle_fastpath_set_x2apic_icr_irqoff(vcpu, data);
+ if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic) ||
+ kvm_x2apic_icr_write_fast(vcpu->arch.apic, data))
+ return EXIT_FASTPATH_NONE;
break;
case MSR_IA32_TSC_DEADLINE:
- data = kvm_read_edx_eax(vcpu);
- handled = !handle_fastpath_set_tscdeadline(vcpu, data);
+ kvm_set_lapic_tscdeadline_msr(vcpu, data);
break;
default:
- handled = false;
- break;
+ return EXIT_FASTPATH_NONE;
}
- if (handled) {
- if (!kvm_skip_emulated_instruction(vcpu))
- ret = EXIT_FASTPATH_EXIT_USERSPACE;
- else
- ret = EXIT_FASTPATH_REENTER_GUEST;
- trace_kvm_msr_write(msr, data);
- } else {
- ret = EXIT_FASTPATH_NONE;
- }
+ trace_kvm_msr_write(msr, data);
- kvm_vcpu_srcu_read_unlock(vcpu);
+ if (!kvm_skip_emulated_instruction(vcpu))
+ return EXIT_FASTPATH_EXIT_USERSPACE;
- return ret;
+ return EXIT_FASTPATH_REENTER_GUEST;
+}
+
+fastpath_t handle_fastpath_wrmsr(struct kvm_vcpu *vcpu)
+{
+ return __handle_fastpath_wrmsr(vcpu, kvm_rcx_read(vcpu),
+ kvm_read_edx_eax(vcpu));
}
-EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_fastpath_wrmsr);
+
+fastpath_t handle_fastpath_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg)
+{
+ return __handle_fastpath_wrmsr(vcpu, msr, kvm_register_read(vcpu, reg));
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_fastpath_wrmsr_imm);
/*
* Adapt set_msr() to msr_io()'s calling convention
@@ -2566,7 +2670,7 @@ u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
return vcpu->arch.l1_tsc_offset +
kvm_scale_tsc(host_tsc, vcpu->arch.l1_tsc_scaling_ratio);
}
-EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_read_l1_tsc);
u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier)
{
@@ -2581,7 +2685,7 @@ u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier)
nested_offset += l2_offset;
return nested_offset;
}
-EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_offset);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_calc_nested_tsc_offset);
u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier)
{
@@ -2591,7 +2695,7 @@ u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier)
return l1_multiplier;
}
-EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_multiplier);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_calc_nested_tsc_multiplier);
static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset)
{
@@ -3669,7 +3773,7 @@ void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
kvm_vcpu_flush_tlb_guest(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_service_local_tlb_flush_requests);
static void record_steal_time(struct kvm_vcpu *vcpu)
{
@@ -3769,6 +3873,67 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
}
+/*
+ * Returns true if the MSR in question is managed via XSTATE, i.e. is context
+ * switched with the rest of guest FPU state. Note! S_CET is _not_ context
+ * switched via XSTATE even though it _is_ saved/restored via XSAVES/XRSTORS.
+ * Because S_CET is loaded on VM-Enter and VM-Exit via dedicated VMCS fields,
+ * the value saved/restored via XSTATE is always the host's value. That detail
+ * is _extremely_ important, as the guest's S_CET must _never_ be resident in
+ * hardware while executing in the host. Loading guest values for U_CET and
+ * PL[0-3]_SSP while executing in the kernel is safe, as U_CET is specific to
+ * userspace, and PL[0-3]_SSP are only consumed when transitioning to lower
+ * privilege levels, i.e. are effectively only consumed by userspace as well.
+ */
+static bool is_xstate_managed_msr(struct kvm_vcpu *vcpu, u32 msr)
+{
+ if (!vcpu)
+ return false;
+
+ switch (msr) {
+ case MSR_IA32_U_CET:
+ return guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) ||
+ guest_cpu_cap_has(vcpu, X86_FEATURE_IBT);
+ case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
+ return guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK);
+ default:
+ return false;
+ }
+}
+
+/*
+ * Lock (and if necessary, re-load) the guest FPU, i.e. XSTATE, and access an
+ * MSR that is managed via XSTATE. Note, the caller is responsible for doing
+ * the initial FPU load, this helper only ensures that guest state is resident
+ * in hardware (the kernel can load its FPU state in IRQ context).
+ */
+static __always_inline void kvm_access_xstate_msr(struct kvm_vcpu *vcpu,
+ struct msr_data *msr_info,
+ int access)
+{
+ BUILD_BUG_ON(access != MSR_TYPE_R && access != MSR_TYPE_W);
+
+ KVM_BUG_ON(!is_xstate_managed_msr(vcpu, msr_info->index), vcpu->kvm);
+ KVM_BUG_ON(!vcpu->arch.guest_fpu.fpstate->in_use, vcpu->kvm);
+
+ kvm_fpu_get();
+ if (access == MSR_TYPE_R)
+ rdmsrq(msr_info->index, msr_info->data);
+ else
+ wrmsrq(msr_info->index, msr_info->data);
+ kvm_fpu_put();
+}
+
+static void kvm_set_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+ kvm_access_xstate_msr(vcpu, msr_info, MSR_TYPE_W);
+}
+
+static void kvm_get_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+ kvm_access_xstate_msr(vcpu, msr_info, MSR_TYPE_R);
+}
+
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
u32 msr = msr_info->index;
@@ -3960,16 +4125,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
break;
case MSR_IA32_XSS:
- if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
- return 1;
- /*
- * KVM supports exposing PT to the guest, but does not support
- * IA32_XSS[bit 8]. Guests have to use RDMSR/WRMSR rather than
- * XSAVES/XRSTORS to save/restore PT MSRs.
- */
- if (data & ~kvm_caps.supported_xss)
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
+ return KVM_MSR_RET_UNSUPPORTED;
+
+ if (data & ~vcpu->arch.guest_supported_xss)
return 1;
+ if (vcpu->arch.ia32_xss == data)
+ break;
vcpu->arch.ia32_xss = data;
vcpu->arch.cpuid_dynamic_bits_dirty = true;
break;
@@ -4153,6 +4315,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu->arch.guest_fpu.xfd_err = data;
break;
#endif
+ case MSR_IA32_U_CET:
+ case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
+ kvm_set_xstate_msr(vcpu, msr_info);
+ break;
default:
if (kvm_pmu_is_valid_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info);
@@ -4161,7 +4327,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_msr_common);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_msr_common);
static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
{
@@ -4502,6 +4668,10 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vcpu->arch.guest_fpu.xfd_err;
break;
#endif
+ case MSR_IA32_U_CET:
+ case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
+ kvm_get_xstate_msr(vcpu, msr_info);
+ break;
default:
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
return kvm_pmu_get_msr(vcpu, msr_info);
@@ -4510,7 +4680,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_get_msr_common);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_msr_common);
/*
* Read or write a bunch of msrs. All parameters are kernel addresses.
@@ -4522,11 +4692,25 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
int (*do_msr)(struct kvm_vcpu *vcpu,
unsigned index, u64 *data))
{
+ bool fpu_loaded = false;
int i;
- for (i = 0; i < msrs->nmsrs; ++i)
+ for (i = 0; i < msrs->nmsrs; ++i) {
+ /*
+ * If userspace is accessing one or more XSTATE-managed MSRs,
+ * temporarily load the guest's FPU state so that the guest's
+ * MSR value(s) is resident in hardware and thus can be accessed
+ * via RDMSR/WRMSR.
+ */
+ if (!fpu_loaded && is_xstate_managed_msr(vcpu, entries[i].index)) {
+ kvm_load_guest_fpu(vcpu);
+ fpu_loaded = true;
+ }
if (do_msr(vcpu, entries[i].index, &entries[i].data))
break;
+ }
+ if (fpu_loaded)
+ kvm_put_guest_fpu(vcpu);
return i;
}
@@ -4711,6 +4895,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IRQFD_RESAMPLE:
case KVM_CAP_MEMORY_FAULT_INFO:
case KVM_CAP_X86_GUEST_MODE:
+ case KVM_CAP_ONE_REG:
r = 1;
break;
case KVM_CAP_PRE_FAULT_MEMORY:
@@ -5889,6 +6074,134 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
}
}
+struct kvm_x86_reg_id {
+ __u32 index;
+ __u8 type;
+ __u8 rsvd1;
+ __u8 rsvd2:4;
+ __u8 size:4;
+ __u8 x86;
+};
+
+static int kvm_translate_kvm_reg(struct kvm_vcpu *vcpu,
+ struct kvm_x86_reg_id *reg)
+{
+ switch (reg->index) {
+ case KVM_REG_GUEST_SSP:
+ /*
+ * FIXME: If host-initiated accesses are ever exempted from
+ * ignore_msrs (in kvm_do_msr_access()), drop this manual check
+ * and rely on KVM's standard checks to reject accesses to regs
+ * that don't exist.
+ */
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK))
+ return -EINVAL;
+
+ reg->type = KVM_X86_REG_TYPE_MSR;
+ reg->index = MSR_KVM_INTERNAL_GUEST_SSP;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int kvm_get_one_msr(struct kvm_vcpu *vcpu, u32 msr, u64 __user *user_val)
+{
+ u64 val;
+
+ if (do_get_msr(vcpu, msr, &val))
+ return -EINVAL;
+
+ if (put_user(val, user_val))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int kvm_set_one_msr(struct kvm_vcpu *vcpu, u32 msr, u64 __user *user_val)
+{
+ u64 val;
+
+ if (get_user(val, user_val))
+ return -EFAULT;
+
+ if (do_set_msr(vcpu, msr, &val))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int kvm_get_set_one_reg(struct kvm_vcpu *vcpu, unsigned int ioctl,
+ void __user *argp)
+{
+ struct kvm_one_reg one_reg;
+ struct kvm_x86_reg_id *reg;
+ u64 __user *user_val;
+ bool load_fpu;
+ int r;
+
+ if (copy_from_user(&one_reg, argp, sizeof(one_reg)))
+ return -EFAULT;
+
+ if ((one_reg.id & KVM_REG_ARCH_MASK) != KVM_REG_X86)
+ return -EINVAL;
+
+ reg = (struct kvm_x86_reg_id *)&one_reg.id;
+ if (reg->rsvd1 || reg->rsvd2)
+ return -EINVAL;
+
+ if (reg->type == KVM_X86_REG_TYPE_KVM) {
+ r = kvm_translate_kvm_reg(vcpu, reg);
+ if (r)
+ return r;
+ }
+
+ if (reg->type != KVM_X86_REG_TYPE_MSR)
+ return -EINVAL;
+
+ if ((one_reg.id & KVM_REG_SIZE_MASK) != KVM_REG_SIZE_U64)
+ return -EINVAL;
+
+ guard(srcu)(&vcpu->kvm->srcu);
+
+ load_fpu = is_xstate_managed_msr(vcpu, reg->index);
+ if (load_fpu)
+ kvm_load_guest_fpu(vcpu);
+
+ user_val = u64_to_user_ptr(one_reg.addr);
+ if (ioctl == KVM_GET_ONE_REG)
+ r = kvm_get_one_msr(vcpu, reg->index, user_val);
+ else
+ r = kvm_set_one_msr(vcpu, reg->index, user_val);
+
+ if (load_fpu)
+ kvm_put_guest_fpu(vcpu);
+ return r;
+}
+
+static int kvm_get_reg_list(struct kvm_vcpu *vcpu,
+ struct kvm_reg_list __user *user_list)
+{
+ u64 nr_regs = guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) ? 1 : 0;
+ u64 user_nr_regs;
+
+ if (get_user(user_nr_regs, &user_list->n))
+ return -EFAULT;
+
+ if (put_user(nr_regs, &user_list->n))
+ return -EFAULT;
+
+ if (user_nr_regs < nr_regs)
+ return -E2BIG;
+
+ if (nr_regs &&
+ put_user(KVM_X86_REG_KVM(KVM_REG_GUEST_SSP), &user_list->reg[0]))
+ return -EFAULT;
+
+ return 0;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -6005,6 +6318,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
+ case KVM_GET_ONE_REG:
+ case KVM_SET_ONE_REG:
+ r = kvm_get_set_one_reg(vcpu, ioctl, argp);
+ break;
+ case KVM_GET_REG_LIST:
+ r = kvm_get_reg_list(vcpu, argp);
+ break;
case KVM_TPR_ACCESS_REPORTING: {
struct kvm_tpr_access_ctl tac;
@@ -6771,7 +7091,11 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm,
kvm_free_msr_filter(old_filter);
- kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED);
+ /*
+ * Recalc MSR intercepts as userspace may want to intercept accesses to
+ * MSRs that KVM would otherwise pass through to the guest.
+ */
+ kvm_make_all_cpus_request(kvm, KVM_REQ_RECALC_INTERCEPTS);
return 0;
}
@@ -6966,6 +7290,15 @@ set_identity_unlock:
if (irqchip_in_kernel(kvm))
goto create_irqchip_unlock;
+ /*
+ * Disallow an in-kernel I/O APIC if the VM has protected EOIs,
+ * i.e. if KVM can't intercept EOIs and thus can't properly
+ * emulate level-triggered interrupts.
+ */
+ r = -ENOTTY;
+ if (kvm->arch.has_protected_eoi)
+ goto create_irqchip_unlock;
+
r = -EINVAL;
if (kvm->created_vcpus)
goto create_irqchip_unlock;
@@ -7353,6 +7686,7 @@ static void kvm_probe_msr_to_save(u32 msr_index)
case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET:
if (!kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2))
return;
break;
@@ -7365,6 +7699,24 @@ static void kvm_probe_msr_to_save(u32 msr_index)
if (!(kvm_get_arch_capabilities() & ARCH_CAP_TSX_CTRL_MSR))
return;
break;
+ case MSR_IA32_XSS:
+ if (!kvm_caps.supported_xss)
+ return;
+ break;
+ case MSR_IA32_U_CET:
+ case MSR_IA32_S_CET:
+ if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
+ !kvm_cpu_cap_has(X86_FEATURE_IBT))
+ return;
+ break;
+ case MSR_IA32_INT_SSP_TAB:
+ if (!kvm_cpu_cap_has(X86_FEATURE_LM))
+ return;
+ fallthrough;
+ case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
+ if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK))
+ return;
+ break;
default:
break;
}
@@ -7484,7 +7836,7 @@ gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
u64 access = (kvm_x86_call(get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
}
-EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_gva_to_gpa_read);
gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception)
@@ -7495,7 +7847,7 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
access |= PFERR_WRITE_MASK;
return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
}
-EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_write);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_gva_to_gpa_write);
/* uses this to access any guest's mapped memory without checking CPL */
gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
@@ -7581,7 +7933,7 @@ int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
exception);
}
-EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_read_guest_virt);
static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
gva_t addr, void *val, unsigned int bytes,
@@ -7653,7 +8005,7 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
PFERR_WRITE_MASK, exception);
}
-EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_write_guest_virt_system);
static int kvm_check_emulate_insn(struct kvm_vcpu *vcpu, int emul_type,
void *insn, int insn_len)
@@ -7687,7 +8039,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
return kvm_emulate_instruction(vcpu, emul_type);
}
-EXPORT_SYMBOL_GPL(handle_ud);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_ud);
static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
gpa_t gpa, bool write)
@@ -8166,7 +8518,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
kvm_emulate_wbinvd_noskip(vcpu);
return kvm_skip_emulated_instruction(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_wbinvd);
@@ -8353,7 +8705,7 @@ static int emulator_get_msr_with_filter(struct x86_emulate_ctxt *ctxt,
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
int r;
- r = kvm_get_msr_with_filter(vcpu, msr_index, pdata);
+ r = kvm_emulate_msr_read(vcpu, msr_index, pdata);
if (r < 0)
return X86EMUL_UNHANDLEABLE;
@@ -8376,7 +8728,7 @@ static int emulator_set_msr_with_filter(struct x86_emulate_ctxt *ctxt,
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
int r;
- r = kvm_set_msr_with_filter(vcpu, msr_index, data);
+ r = kvm_emulate_msr_write(vcpu, msr_index, data);
if (r < 0)
return X86EMUL_UNHANDLEABLE;
@@ -8396,7 +8748,16 @@ static int emulator_set_msr_with_filter(struct x86_emulate_ctxt *ctxt,
static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
u32 msr_index, u64 *pdata)
{
- return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
+ /*
+ * Treat emulator accesses to the current shadow stack pointer as host-
+ * initiated, as they aren't true MSR accesses (SSP is a "just a reg"),
+ * and this API is used only for implicit accesses, i.e. not RDMSR, and
+ * so the index is fully KVM-controlled.
+ */
+ if (unlikely(msr_index == MSR_KVM_INTERNAL_GUEST_SSP))
+ return kvm_msr_read(emul_to_vcpu(ctxt), msr_index, pdata);
+
+ return __kvm_emulate_msr_read(emul_to_vcpu(ctxt), msr_index, pdata);
}
static int emulator_check_rdpmc_early(struct x86_emulate_ctxt *ctxt, u32 pmc)
@@ -8470,11 +8831,6 @@ static bool emulator_is_smm(struct x86_emulate_ctxt *ctxt)
return is_smm(emul_to_vcpu(ctxt));
}
-static bool emulator_is_guest_mode(struct x86_emulate_ctxt *ctxt)
-{
- return is_guest_mode(emul_to_vcpu(ctxt));
-}
-
#ifndef CONFIG_KVM_SMM
static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
{
@@ -8558,7 +8914,6 @@ static const struct x86_emulate_ops emulate_ops = {
.guest_cpuid_is_intel_compatible = emulator_guest_cpuid_is_intel_compatible,
.set_nmi_mask = emulator_set_nmi_mask,
.is_smm = emulator_is_smm,
- .is_guest_mode = emulator_is_guest_mode,
.leave_smm = emulator_leave_smm,
.triple_fault = emulator_triple_fault,
.set_xcr = emulator_set_xcr,
@@ -8661,7 +9016,7 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
kvm_set_rflags(vcpu, ctxt->eflags);
}
}
-EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_inject_realmode_interrupt);
static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data,
u8 ndata, u8 *insn_bytes, u8 insn_size)
@@ -8726,13 +9081,13 @@ void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data,
{
prepare_emulation_failure_exit(vcpu, data, ndata, NULL, 0);
}
-EXPORT_SYMBOL_GPL(__kvm_prepare_emulation_failure_exit);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_prepare_emulation_failure_exit);
void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
{
__kvm_prepare_emulation_failure_exit(vcpu, NULL, 0);
}
-EXPORT_SYMBOL_GPL(kvm_prepare_emulation_failure_exit);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_prepare_emulation_failure_exit);
void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa)
{
@@ -8754,7 +9109,7 @@ void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa)
run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
run->internal.ndata = ndata;
}
-EXPORT_SYMBOL_GPL(kvm_prepare_event_vectoring_exit);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_prepare_event_vectoring_exit);
static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
{
@@ -8864,7 +9219,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
if (unlikely(!r))
return 0;
- kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED);
+ kvm_pmu_instruction_retired(vcpu);
/*
* rflags is the old, "raw" value of the flags. The new value has
@@ -8878,7 +9233,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
r = kvm_vcpu_do_singlestep(vcpu);
return r;
}
-EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_skip_emulated_instruction);
static bool kvm_is_code_breakpoint_inhibited(struct kvm_vcpu *vcpu)
{
@@ -9009,7 +9364,7 @@ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
return r;
}
-EXPORT_SYMBOL_GPL(x86_decode_emulated_instruction);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(x86_decode_emulated_instruction);
int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int emulation_type, void *insn, int insn_len)
@@ -9143,7 +9498,14 @@ restart:
ctxt->exception.address = 0;
}
- r = x86_emulate_insn(ctxt);
+ /*
+ * Check L1's instruction intercepts when emulating instructions for
+ * L2, unless KVM is re-emulating a previously decoded instruction,
+ * e.g. to complete userspace I/O, in which case KVM has already
+ * checked the intercepts.
+ */
+ r = x86_emulate_insn(ctxt, is_guest_mode(vcpu) &&
+ !(emulation_type & EMULTYPE_NO_DECODE));
if (r == EMULATION_INTERCEPTED)
return 1;
@@ -9198,9 +9560,9 @@ writeback:
*/
if (!ctxt->have_exception ||
exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
- kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED);
+ kvm_pmu_instruction_retired(vcpu);
if (ctxt->is_branch)
- kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED);
+ kvm_pmu_branch_retired(vcpu);
kvm_rip_write(vcpu, ctxt->eip);
if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
r = kvm_vcpu_do_singlestep(vcpu);
@@ -9226,14 +9588,14 @@ int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type)
{
return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_instruction);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_instruction);
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
void *insn, int insn_len)
{
return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_instruction_from_buffer);
static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
{
@@ -9328,7 +9690,7 @@ int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in)
ret = kvm_fast_pio_out(vcpu, size, port);
return ret && kvm_skip_emulated_instruction(vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_fast_pio);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_fast_pio);
static int kvmclock_cpu_down_prep(unsigned int cpu)
{
@@ -9651,6 +10013,18 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
return -EIO;
}
+ if (boot_cpu_has(X86_FEATURE_SHSTK) || boot_cpu_has(X86_FEATURE_IBT)) {
+ rdmsrq(MSR_IA32_S_CET, kvm_host.s_cet);
+ /*
+ * Linux doesn't yet support supervisor shadow stacks (SSS), so
+ * KVM doesn't save/restore the associated MSRs, i.e. KVM may
+ * clobber the host values. Yell and refuse to load if SSS is
+ * unexpectedly enabled, e.g. to avoid crashing the host.
+ */
+ if (WARN_ON_ONCE(kvm_host.s_cet & CET_SHSTK_EN))
+ return -EIO;
+ }
+
memset(&kvm_caps, 0, sizeof(kvm_caps));
x86_emulator_cache = kvm_alloc_emulator_cache();
@@ -9678,14 +10052,17 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
kvm_host.xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
kvm_caps.supported_xcr0 = kvm_host.xcr0 & KVM_SUPPORTED_XCR0;
}
+
+ if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+ rdmsrq(MSR_IA32_XSS, kvm_host.xss);
+ kvm_caps.supported_xss = kvm_host.xss & KVM_SUPPORTED_XSS;
+ }
+
kvm_caps.supported_quirks = KVM_X86_VALID_QUIRKS;
kvm_caps.inapplicable_quirks = KVM_X86_CONDITIONAL_QUIRKS;
rdmsrq_safe(MSR_EFER, &kvm_host.efer);
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- rdmsrq(MSR_IA32_XSS, kvm_host.xss);
-
kvm_init_pmu_capability(ops->pmu_ops);
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
@@ -9734,6 +10111,16 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
kvm_caps.supported_xss = 0;
+ if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
+ !kvm_cpu_cap_has(X86_FEATURE_IBT))
+ kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
+
+ if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) {
+ kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+ kvm_cpu_cap_clear(X86_FEATURE_IBT);
+ kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
+ }
+
if (kvm_caps.has_tsc_control) {
/*
* Make sure the user can only configure tsc_khz values that
@@ -9760,7 +10147,7 @@ out_free_x86_emulator_cache:
kmem_cache_destroy(x86_emulator_cache);
return r;
}
-EXPORT_SYMBOL_GPL(kvm_x86_vendor_init);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_x86_vendor_init);
void kvm_x86_vendor_exit(void)
{
@@ -9794,7 +10181,7 @@ void kvm_x86_vendor_exit(void)
kvm_x86_ops.enable_virtualization_cpu = NULL;
mutex_unlock(&vendor_module_lock);
}
-EXPORT_SYMBOL_GPL(kvm_x86_vendor_exit);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_x86_vendor_exit);
#ifdef CONFIG_X86_64
static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
@@ -9858,7 +10245,7 @@ bool kvm_apicv_activated(struct kvm *kvm)
{
return (READ_ONCE(kvm->arch.apicv_inhibit_reasons) == 0);
}
-EXPORT_SYMBOL_GPL(kvm_apicv_activated);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apicv_activated);
bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu)
{
@@ -9868,7 +10255,7 @@ bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu)
return (vm_reasons | vcpu_reasons) == 0;
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_apicv_activated);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_apicv_activated);
static void set_or_clear_apicv_inhibit(unsigned long *inhibits,
enum kvm_apicv_inhibit reason, bool set)
@@ -10044,7 +10431,7 @@ out:
vcpu->run->hypercall.ret = ret;
return 1;
}
-EXPORT_SYMBOL_GPL(____kvm_emulate_hypercall);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(____kvm_emulate_hypercall);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
@@ -10057,7 +10444,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
return __kvm_emulate_hypercall(vcpu, kvm_x86_call(get_cpl)(vcpu),
complete_hypercall_exit);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_hypercall);
static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
{
@@ -10500,7 +10887,7 @@ out:
preempt_enable();
up_read(&vcpu->kvm->arch.apicv_update_lock);
}
-EXPORT_SYMBOL_GPL(__kvm_vcpu_update_apicv);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_vcpu_update_apicv);
static void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
{
@@ -10576,7 +10963,7 @@ void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
__kvm_set_or_clear_apicv_inhibit(kvm, reason, set);
up_write(&kvm->arch.apicv_update_lock);
}
-EXPORT_SYMBOL_GPL(kvm_set_or_clear_apicv_inhibit);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_or_clear_apicv_inhibit);
static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
{
@@ -10796,13 +11183,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
kvm_check_async_pf_completion(vcpu);
- /*
- * Recalc MSR intercepts as userspace may want to intercept
- * accesses to MSRs that KVM would otherwise pass through to
- * the guest.
- */
- if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu))
- kvm_x86_call(recalc_msr_intercepts)(vcpu);
+ if (kvm_check_request(KVM_REQ_RECALC_INTERCEPTS, vcpu))
+ kvm_x86_call(recalc_intercepts)(vcpu);
if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu))
kvm_x86_call(update_cpu_dirty_logging)(vcpu);
@@ -11135,7 +11517,7 @@ bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
return false;
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_has_events);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_has_events);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
@@ -11288,7 +11670,7 @@ int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu)
{
return __kvm_emulate_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT);
}
-EXPORT_SYMBOL_GPL(kvm_emulate_halt_noskip);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_halt_noskip);
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
{
@@ -11299,17 +11681,11 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
*/
return kvm_emulate_halt_noskip(vcpu) && ret;
}
-EXPORT_SYMBOL_GPL(kvm_emulate_halt);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_halt);
fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu)
{
- int ret;
-
- kvm_vcpu_srcu_read_lock(vcpu);
- ret = kvm_emulate_halt(vcpu);
- kvm_vcpu_srcu_read_unlock(vcpu);
-
- if (!ret)
+ if (!kvm_emulate_halt(vcpu))
return EXIT_FASTPATH_EXIT_USERSPACE;
if (kvm_vcpu_running(vcpu))
@@ -11317,7 +11693,7 @@ fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu)
return EXIT_FASTPATH_EXIT_HANDLED;
}
-EXPORT_SYMBOL_GPL(handle_fastpath_hlt);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_fastpath_hlt);
int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu)
{
@@ -11326,7 +11702,7 @@ int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu)
return __kvm_emulate_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD,
KVM_EXIT_AP_RESET_HOLD) && ret;
}
-EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_ap_reset_hold);
bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
{
@@ -11837,6 +12213,25 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
int ret;
+ if (kvm_is_cr4_bit_set(vcpu, X86_CR4_CET)) {
+ u64 u_cet, s_cet;
+
+ /*
+ * Check both User and Supervisor on task switches as inter-
+ * privilege level task switches are impacted by CET at both
+ * the current privilege level and the new privilege level, and
+ * that information is not known at this time. The expectation
+ * is that the guest won't require emulation of task switches
+ * while using IBT or Shadow Stacks.
+ */
+ if (__kvm_emulate_msr_read(vcpu, MSR_IA32_U_CET, &u_cet) ||
+ __kvm_emulate_msr_read(vcpu, MSR_IA32_S_CET, &s_cet))
+ goto unhandled_task_switch;
+
+ if ((u_cet | s_cet) & (CET_ENDBR_EN | CET_SHSTK_EN))
+ goto unhandled_task_switch;
+ }
+
init_emulate_ctxt(vcpu);
ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
@@ -11846,19 +12241,21 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
* Report an error userspace if MMIO is needed, as KVM doesn't support
* MMIO during a task switch (or any other complex operation).
*/
- if (ret || vcpu->mmio_needed) {
- vcpu->mmio_needed = false;
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
- return 0;
- }
+ if (ret || vcpu->mmio_needed)
+ goto unhandled_task_switch;
kvm_rip_write(vcpu, ctxt->eip);
kvm_set_rflags(vcpu, ctxt->eflags);
return 1;
+
+unhandled_task_switch:
+ vcpu->mmio_needed = false;
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+ return 0;
}
-EXPORT_SYMBOL_GPL(kvm_task_switch);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_task_switch);
static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
@@ -12388,6 +12785,42 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvfree(vcpu->arch.cpuid_entries);
}
+static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event)
+{
+ struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate;
+ u64 xfeatures_mask;
+ int i;
+
+ /*
+ * Guest FPU state is zero allocated and so doesn't need to be manually
+ * cleared on RESET, i.e. during vCPU creation.
+ */
+ if (!init_event || !fpstate)
+ return;
+
+ /*
+ * On INIT, only select XSTATE components are zeroed, most components
+ * are unchanged. Currently, the only components that are zeroed and
+ * supported by KVM are MPX and CET related.
+ */
+ xfeatures_mask = (kvm_caps.supported_xcr0 | kvm_caps.supported_xss) &
+ (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR |
+ XFEATURE_MASK_CET_ALL);
+ if (!xfeatures_mask)
+ return;
+
+ BUILD_BUG_ON(sizeof(xfeatures_mask) * BITS_PER_BYTE <= XFEATURE_MAX);
+
+ /*
+ * All paths that lead to INIT are required to load the guest's FPU
+ * state (because most paths are buried in KVM_RUN).
+ */
+ kvm_put_guest_fpu(vcpu);
+ for_each_set_bit(i, (unsigned long *)&xfeatures_mask, XFEATURE_MAX)
+ fpstate_clear_xstate_component(fpstate, i);
+ kvm_load_guest_fpu(vcpu);
+}
+
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
{
struct kvm_cpuid_entry2 *cpuid_0x1;
@@ -12445,22 +12878,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_async_pf_hash_reset(vcpu);
vcpu->arch.apf.halted = false;
- if (vcpu->arch.guest_fpu.fpstate && kvm_mpx_supported()) {
- struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate;
-
- /*
- * All paths that lead to INIT are required to load the guest's
- * FPU state (because most paths are buried in KVM_RUN).
- */
- if (init_event)
- kvm_put_guest_fpu(vcpu);
-
- fpstate_clear_xstate_component(fpstate, XFEATURE_BNDREGS);
- fpstate_clear_xstate_component(fpstate, XFEATURE_BNDCSR);
-
- if (init_event)
- kvm_load_guest_fpu(vcpu);
- }
+ kvm_xstate_reset(vcpu, init_event);
if (!init_event) {
vcpu->arch.smbase = 0x30000;
@@ -12472,7 +12890,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
__kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP);
- __kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true);
+ kvm_msr_write(vcpu, MSR_IA32_XSS, 0);
}
/* All GPRs except RDX (handled below) are zeroed on RESET/INIT. */
@@ -12538,7 +12956,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
if (init_event)
kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_reset);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_reset);
void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
{
@@ -12550,7 +12968,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
kvm_rip_write(vcpu, 0);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_deliver_sipi_vector);
void kvm_arch_enable_virtualization(void)
{
@@ -12668,7 +13086,7 @@ bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
{
return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_is_reset_bsp);
bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
{
@@ -12832,7 +13250,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
return (void __user *)hva;
}
-EXPORT_SYMBOL_GPL(__x86_set_memory_region);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(__x86_set_memory_region);
void kvm_arch_pre_destroy_vm(struct kvm *kvm)
{
@@ -13240,13 +13658,13 @@ unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
kvm_rip_read(vcpu));
}
-EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_linear_rip);
bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
{
return kvm_get_linear_rip(vcpu) == linear_rip;
}
-EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_is_linear_rip);
unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
{
@@ -13257,7 +13675,7 @@ unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
rflags &= ~X86_EFLAGS_TF;
return rflags;
}
-EXPORT_SYMBOL_GPL(kvm_get_rflags);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_rflags);
static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
@@ -13272,7 +13690,7 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
__kvm_set_rflags(vcpu, rflags);
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
-EXPORT_SYMBOL_GPL(kvm_set_rflags);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_rflags);
static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
{
@@ -13504,31 +13922,23 @@ void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
if (atomic_inc_return(&kvm->arch.noncoherent_dma_count) == 1)
kvm_noncoherent_dma_assignment_start_or_stop(kvm);
}
-EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
{
if (!atomic_dec_return(&kvm->arch.noncoherent_dma_count))
kvm_noncoherent_dma_assignment_start_or_stop(kvm);
}
-EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
{
return atomic_read(&kvm->arch.noncoherent_dma_count);
}
-EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
-
-bool kvm_vector_hashing_enabled(void)
-{
- return vector_hashing;
-}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_arch_has_noncoherent_dma);
bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
{
return (vcpu->arch.msr_kvm_poll_control & 1) == 0;
}
-EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
#ifdef CONFIG_KVM_GUEST_MEMFD
/*
@@ -13579,7 +13989,7 @@ int kvm_spec_ctrl_test_value(u64 value)
return ret;
}
-EXPORT_SYMBOL_GPL(kvm_spec_ctrl_test_value);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_spec_ctrl_test_value);
void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code)
{
@@ -13604,7 +14014,7 @@ void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_c
}
vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault);
}
-EXPORT_SYMBOL_GPL(kvm_fixup_and_inject_pf_error);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_fixup_and_inject_pf_error);
/*
* Handles kvm_read/write_guest_virt*() result and either injects #PF or returns
@@ -13633,7 +14043,7 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_handle_memory_failure);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_handle_memory_failure);
int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
{
@@ -13697,7 +14107,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
return 1;
}
}
-EXPORT_SYMBOL_GPL(kvm_handle_invpcid);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_handle_invpcid);
static int complete_sev_es_emulated_mmio(struct kvm_vcpu *vcpu)
{
@@ -13782,7 +14192,7 @@ int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_write);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_sev_es_mmio_write);
int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
void *data)
@@ -13820,7 +14230,7 @@ int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_sev_es_mmio_read);
static void advance_sev_es_emulated_pio(struct kvm_vcpu *vcpu, unsigned count, int size)
{
@@ -13908,7 +14318,7 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
return in ? kvm_sev_es_ins(vcpu, size, port)
: kvm_sev_es_outs(vcpu, size, port);
}
-EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_sev_es_string_io);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);