diff options
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 946 |
1 files changed, 678 insertions, 268 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f122906ed9f3..4b8138bd4857 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -97,10 +97,10 @@ * vendor module being reloaded with different module parameters. */ struct kvm_caps kvm_caps __read_mostly; -EXPORT_SYMBOL_GPL(kvm_caps); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_caps); struct kvm_host_values kvm_host __read_mostly; -EXPORT_SYMBOL_GPL(kvm_host); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_host); #define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e)) @@ -136,6 +136,9 @@ static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2); static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2); static DEFINE_MUTEX(vendor_module_lock); +static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); +static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); + struct kvm_x86_ops kvm_x86_ops __read_mostly; #define KVM_X86_OP(func) \ @@ -152,7 +155,7 @@ module_param(ignore_msrs, bool, 0644); bool __read_mostly report_ignored_msrs = true; module_param(report_ignored_msrs, bool, 0644); -EXPORT_SYMBOL_GPL(report_ignored_msrs); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(report_ignored_msrs); unsigned int min_timer_period_us = 200; module_param(min_timer_period_us, uint, 0644); @@ -164,12 +167,9 @@ module_param(kvmclock_periodic_sync, bool, 0444); static u32 __read_mostly tsc_tolerance_ppm = 250; module_param(tsc_tolerance_ppm, uint, 0644); -static bool __read_mostly vector_hashing = true; -module_param(vector_hashing, bool, 0444); - bool __read_mostly enable_vmware_backdoor = false; module_param(enable_vmware_backdoor, bool, 0444); -EXPORT_SYMBOL_GPL(enable_vmware_backdoor); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_vmware_backdoor); /* * Flags to manipulate forced emulation behavior (any non-zero value will @@ -184,7 +184,7 @@ module_param(pi_inject_timer, bint, 0644); /* Enable/disable PMU virtualization */ bool __read_mostly enable_pmu = true; -EXPORT_SYMBOL_GPL(enable_pmu); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_pmu); module_param(enable_pmu, bool, 0444); bool __read_mostly eager_page_split = true; @@ -211,7 +211,7 @@ struct kvm_user_return_msrs { }; u32 __read_mostly kvm_nr_uret_msrs; -EXPORT_SYMBOL_GPL(kvm_nr_uret_msrs); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_nr_uret_msrs); static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS]; static struct kvm_user_return_msrs __percpu *user_return_msrs; @@ -220,17 +220,26 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs; | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \ | XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE) +#define XFEATURE_MASK_CET_ALL (XFEATURE_MASK_CET_USER | XFEATURE_MASK_CET_KERNEL) +/* + * Note, KVM supports exposing PT to the guest, but does not support context + * switching PT via XSTATE (KVM's PT virtualization relies on perf; swapping + * PT via guest XSTATE would clobber perf state), i.e. KVM doesn't support + * IA32_XSS[bit 8] (guests can/must use RDMSR/WRMSR to save/restore PT MSRs). + */ +#define KVM_SUPPORTED_XSS (XFEATURE_MASK_CET_ALL) + bool __read_mostly allow_smaller_maxphyaddr = 0; -EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(allow_smaller_maxphyaddr); bool __read_mostly enable_apicv = true; -EXPORT_SYMBOL_GPL(enable_apicv); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_apicv); bool __read_mostly enable_ipiv = true; -EXPORT_SYMBOL_GPL(enable_ipiv); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_ipiv); bool __read_mostly enable_device_posted_irqs = true; -EXPORT_SYMBOL_GPL(enable_device_posted_irqs); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_device_posted_irqs); const struct _kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), @@ -335,7 +344,11 @@ static const u32 msrs_to_save_base[] = { MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B, MSR_IA32_UMWAIT_CONTROL, - MSR_IA32_XFD, MSR_IA32_XFD_ERR, + MSR_IA32_XFD, MSR_IA32_XFD_ERR, MSR_IA32_XSS, + + MSR_IA32_U_CET, MSR_IA32_S_CET, + MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP, + MSR_IA32_PL3_SSP, MSR_IA32_INT_SSP_TAB, }; static const u32 msrs_to_save_pmu[] = { @@ -367,6 +380,7 @@ static const u32 msrs_to_save_pmu[] = { MSR_AMD64_PERF_CNTR_GLOBAL_CTL, MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, + MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET, }; static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) + @@ -614,7 +628,7 @@ int kvm_add_user_return_msr(u32 msr) kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr; return kvm_nr_uret_msrs++; } -EXPORT_SYMBOL_GPL(kvm_add_user_return_msr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_add_user_return_msr); int kvm_find_user_return_msr(u32 msr) { @@ -626,7 +640,7 @@ int kvm_find_user_return_msr(u32 msr) } return -1; } -EXPORT_SYMBOL_GPL(kvm_find_user_return_msr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_find_user_return_msr); static void kvm_user_return_msr_cpu_online(void) { @@ -666,7 +680,7 @@ int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask) kvm_user_return_register_notifier(msrs); return 0; } -EXPORT_SYMBOL_GPL(kvm_set_user_return_msr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_user_return_msr); void kvm_user_return_msr_update_cache(unsigned int slot, u64 value) { @@ -675,7 +689,13 @@ void kvm_user_return_msr_update_cache(unsigned int slot, u64 value) msrs->values[slot].curr = value; kvm_user_return_register_notifier(msrs); } -EXPORT_SYMBOL_GPL(kvm_user_return_msr_update_cache); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_user_return_msr_update_cache); + +u64 kvm_get_user_return_msr(unsigned int slot) +{ + return this_cpu_ptr(user_return_msrs)->values[slot].curr; +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_user_return_msr); static void drop_user_return_notifiers(void) { @@ -697,7 +717,7 @@ noinstr void kvm_spurious_fault(void) /* Fault while not rebooting. We want the trace. */ BUG_ON(!kvm_rebooting); } -EXPORT_SYMBOL_GPL(kvm_spurious_fault); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_spurious_fault); #define EXCPT_BENIGN 0 #define EXCPT_CONTRIBUTORY 1 @@ -802,7 +822,7 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu, ex->has_payload = false; ex->payload = 0; } -EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_deliver_exception_payload); static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vector, bool has_error_code, u32 error_code, @@ -886,7 +906,7 @@ void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) { kvm_multiple_exception(vcpu, nr, false, 0, false, 0); } -EXPORT_SYMBOL_GPL(kvm_queue_exception); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_queue_exception); void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, @@ -894,7 +914,7 @@ void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, { kvm_multiple_exception(vcpu, nr, false, 0, true, payload); } -EXPORT_SYMBOL_GPL(kvm_queue_exception_p); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_queue_exception_p); static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code, unsigned long payload) @@ -929,7 +949,7 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr, vcpu->arch.exception.has_payload = false; vcpu->arch.exception.payload = 0; } -EXPORT_SYMBOL_GPL(kvm_requeue_exception); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_requeue_exception); int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err) { @@ -940,7 +960,7 @@ int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err) return 1; } -EXPORT_SYMBOL_GPL(kvm_complete_insn_gp); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_complete_insn_gp); static int complete_emulated_insn_gp(struct kvm_vcpu *vcpu, int err) { @@ -990,7 +1010,7 @@ void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, fault_mmu->inject_page_fault(vcpu, fault); } -EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_inject_emulated_page_fault); void kvm_inject_nmi(struct kvm_vcpu *vcpu) { @@ -1002,7 +1022,7 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) { kvm_multiple_exception(vcpu, nr, true, error_code, false, 0); } -EXPORT_SYMBOL_GPL(kvm_queue_exception_e); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_queue_exception_e); /* * Checks if cpl <= required_cpl; if true, return true. Otherwise queue @@ -1024,7 +1044,7 @@ bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr) kvm_queue_exception(vcpu, UD_VECTOR); return false; } -EXPORT_SYMBOL_GPL(kvm_require_dr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_require_dr); static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu) { @@ -1079,7 +1099,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) return 1; } -EXPORT_SYMBOL_GPL(load_pdptrs); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(load_pdptrs); static bool kvm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { @@ -1132,7 +1152,7 @@ void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned lon if ((cr0 ^ old_cr0) & KVM_MMU_CR0_ROLE_BITS) kvm_mmu_reset_context(vcpu); } -EXPORT_SYMBOL_GPL(kvm_post_set_cr0); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_post_set_cr0); int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { @@ -1167,19 +1187,22 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) (is_64_bit_mode(vcpu) || kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE))) return 1; + if (!(cr0 & X86_CR0_WP) && kvm_is_cr4_bit_set(vcpu, X86_CR4_CET)) + return 1; + kvm_x86_call(set_cr0)(vcpu, cr0); kvm_post_set_cr0(vcpu, old_cr0, cr0); return 0; } -EXPORT_SYMBOL_GPL(kvm_set_cr0); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cr0); void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) { (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); } -EXPORT_SYMBOL_GPL(kvm_lmsw); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lmsw); void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu) { @@ -1202,7 +1225,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu) kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE))) wrpkru(vcpu->arch.pkru); } -EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_load_guest_xsave_state); void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) { @@ -1228,7 +1251,7 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) } } -EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_load_host_xsave_state); #ifdef CONFIG_X86_64 static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu) @@ -1237,7 +1260,7 @@ static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu) } #endif -static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) +int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) { u64 xcr0 = xcr; u64 old_xcr0 = vcpu->arch.xcr0; @@ -1281,6 +1304,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) vcpu->arch.cpuid_dynamic_bits_dirty = true; return 0; } +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_set_xcr); int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu) { @@ -1293,7 +1317,7 @@ int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } -EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_xsetbv); static bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { @@ -1341,7 +1365,7 @@ void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned lon kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } -EXPORT_SYMBOL_GPL(kvm_post_set_cr4); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_post_set_cr4); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { @@ -1366,13 +1390,16 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; } + if ((cr4 & X86_CR4_CET) && !kvm_is_cr0_bit_set(vcpu, X86_CR0_WP)) + return 1; + kvm_x86_call(set_cr4)(vcpu, cr4); kvm_post_set_cr4(vcpu, old_cr4, cr4); return 0; } -EXPORT_SYMBOL_GPL(kvm_set_cr4); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cr4); static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid) { @@ -1464,7 +1491,7 @@ handle_tlb_flush: return 0; } -EXPORT_SYMBOL_GPL(kvm_set_cr3); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cr3); int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) { @@ -1476,7 +1503,7 @@ int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) vcpu->arch.cr8 = cr8; return 0; } -EXPORT_SYMBOL_GPL(kvm_set_cr8); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cr8); unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) { @@ -1485,7 +1512,7 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) else return vcpu->arch.cr8; } -EXPORT_SYMBOL_GPL(kvm_get_cr8); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_cr8); static void kvm_update_dr0123(struct kvm_vcpu *vcpu) { @@ -1510,7 +1537,7 @@ void kvm_update_dr7(struct kvm_vcpu *vcpu) if (dr7 & DR7_BP_EN_MASK) vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED; } -EXPORT_SYMBOL_GPL(kvm_update_dr7); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_update_dr7); static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) { @@ -1551,7 +1578,7 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) return 0; } -EXPORT_SYMBOL_GPL(kvm_set_dr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_dr); unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr) { @@ -1568,14 +1595,14 @@ unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr) return vcpu->arch.dr7; } } -EXPORT_SYMBOL_GPL(kvm_get_dr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_dr); int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu) { - u32 ecx = kvm_rcx_read(vcpu); + u32 pmc = kvm_rcx_read(vcpu); u64 data; - if (kvm_pmu_rdpmc(vcpu, ecx, &data)) { + if (kvm_pmu_rdpmc(vcpu, pmc, &data)) { kvm_inject_gp(vcpu, 0); return 1; } @@ -1584,7 +1611,7 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu) kvm_rdx_write(vcpu, data >> 32); return kvm_skip_emulated_instruction(vcpu); } -EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_rdpmc); /* * Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM @@ -1723,7 +1750,7 @@ bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) return __kvm_valid_efer(vcpu, efer); } -EXPORT_SYMBOL_GPL(kvm_valid_efer); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_valid_efer); static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { @@ -1766,7 +1793,7 @@ void kvm_enable_efer_bits(u64 mask) { efer_reserved_bits &= ~mask; } -EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_enable_efer_bits); bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type) { @@ -1809,7 +1836,7 @@ out: return allowed; } -EXPORT_SYMBOL_GPL(kvm_msr_allowed); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_msr_allowed); /* * Write @data into the MSR specified by @index. Select MSR specific fault @@ -1870,6 +1897,44 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, data = (u32)data; break; + case MSR_IA32_U_CET: + case MSR_IA32_S_CET: + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) && + !guest_cpu_cap_has(vcpu, X86_FEATURE_IBT)) + return KVM_MSR_RET_UNSUPPORTED; + if (!kvm_is_valid_u_s_cet(vcpu, data)) + return 1; + break; + case MSR_KVM_INTERNAL_GUEST_SSP: + if (!host_initiated) + return 1; + fallthrough; + /* + * Note that the MSR emulation here is flawed when a vCPU + * doesn't support the Intel 64 architecture. The expected + * architectural behavior in this case is that the upper 32 + * bits do not exist and should always read '0'. However, + * because the actual hardware on which the virtual CPU is + * running does support Intel 64, XRSTORS/XSAVES in the + * guest could observe behavior that violates the + * architecture. Intercepting XRSTORS/XSAVES for this + * special case isn't deemed worthwhile. + */ + case MSR_IA32_PL0_SSP ... MSR_IA32_INT_SSP_TAB: + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) + return KVM_MSR_RET_UNSUPPORTED; + /* + * MSR_IA32_INT_SSP_TAB is not present on processors that do + * not support Intel 64 architecture. + */ + if (index == MSR_IA32_INT_SSP_TAB && !guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) + return KVM_MSR_RET_UNSUPPORTED; + if (is_noncanonical_msr_address(data, vcpu)) + return 1; + /* All SSP MSRs except MSR_IA32_INT_SSP_TAB must be 4-byte aligned */ + if (index != MSR_IA32_INT_SSP_TAB && !IS_ALIGNED(data, 4)) + return 1; + break; } msr.data = data; @@ -1898,8 +1963,8 @@ static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu, * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. */ -int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, - bool host_initiated) +static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, + bool host_initiated) { struct msr_data msr; int ret; @@ -1914,6 +1979,20 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, !guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID)) return 1; break; + case MSR_IA32_U_CET: + case MSR_IA32_S_CET: + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) && + !guest_cpu_cap_has(vcpu, X86_FEATURE_IBT)) + return KVM_MSR_RET_UNSUPPORTED; + break; + case MSR_KVM_INTERNAL_GUEST_SSP: + if (!host_initiated) + return 1; + fallthrough; + case MSR_IA32_PL0_SSP ... MSR_IA32_INT_SSP_TAB: + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) + return KVM_MSR_RET_UNSUPPORTED; + break; } msr.index = index; @@ -1925,6 +2004,16 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, return ret; } +int kvm_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data) +{ + return __kvm_set_msr(vcpu, index, data, true); +} + +int kvm_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data) +{ + return __kvm_get_msr(vcpu, index, data, true); +} + static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated) { @@ -1932,33 +2021,36 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, __kvm_get_msr); } -int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data) +int __kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data) { - if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) - return KVM_MSR_RET_FILTERED; return kvm_get_msr_ignored_check(vcpu, index, data, false); } -EXPORT_SYMBOL_GPL(kvm_get_msr_with_filter); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_emulate_msr_read); -int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data) +int __kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data) { - if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE)) - return KVM_MSR_RET_FILTERED; return kvm_set_msr_ignored_check(vcpu, index, data, false); } -EXPORT_SYMBOL_GPL(kvm_set_msr_with_filter); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_emulate_msr_write); -int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) +int kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data) { - return kvm_get_msr_ignored_check(vcpu, index, data, false); + if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) + return KVM_MSR_RET_FILTERED; + + return __kvm_emulate_msr_read(vcpu, index, data); } -EXPORT_SYMBOL_GPL(kvm_get_msr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_msr_read); -int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) +int kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data) { - return kvm_set_msr_ignored_check(vcpu, index, data, false); + if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE)) + return KVM_MSR_RET_FILTERED; + + return __kvm_emulate_msr_write(vcpu, index, data); } -EXPORT_SYMBOL_GPL(kvm_set_msr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_msr_write); + static void complete_userspace_rdmsr(struct kvm_vcpu *vcpu) { @@ -1990,6 +2082,15 @@ static int complete_fast_rdmsr(struct kvm_vcpu *vcpu) return complete_fast_msr_access(vcpu); } +static int complete_fast_rdmsr_imm(struct kvm_vcpu *vcpu) +{ + if (!vcpu->run->msr.error) + kvm_register_write(vcpu, vcpu->arch.cui_rdmsr_imm_reg, + vcpu->run->msr.data); + + return complete_fast_msr_access(vcpu); +} + static u64 kvm_msr_reason(int r) { switch (r) { @@ -2024,55 +2125,82 @@ static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index, return 1; } -int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) +static int __kvm_emulate_rdmsr(struct kvm_vcpu *vcpu, u32 msr, int reg, + int (*complete_rdmsr)(struct kvm_vcpu *)) { - u32 ecx = kvm_rcx_read(vcpu); u64 data; int r; - r = kvm_get_msr_with_filter(vcpu, ecx, &data); + r = kvm_emulate_msr_read(vcpu, msr, &data); if (!r) { - trace_kvm_msr_read(ecx, data); + trace_kvm_msr_read(msr, data); - kvm_rax_write(vcpu, data & -1u); - kvm_rdx_write(vcpu, (data >> 32) & -1u); + if (reg < 0) { + kvm_rax_write(vcpu, data & -1u); + kvm_rdx_write(vcpu, (data >> 32) & -1u); + } else { + kvm_register_write(vcpu, reg, data); + } } else { /* MSR read failed? See if we should ask user space */ - if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_RDMSR, 0, - complete_fast_rdmsr, r)) + if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_RDMSR, 0, + complete_rdmsr, r)) return 0; - trace_kvm_msr_read_ex(ecx); + trace_kvm_msr_read_ex(msr); } return kvm_x86_call(complete_emulated_msr)(vcpu, r); } -EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr); -int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) +int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) { - u32 ecx = kvm_rcx_read(vcpu); - u64 data = kvm_read_edx_eax(vcpu); - int r; + return __kvm_emulate_rdmsr(vcpu, kvm_rcx_read(vcpu), -1, + complete_fast_rdmsr); +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_rdmsr); + +int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg) +{ + vcpu->arch.cui_rdmsr_imm_reg = reg; + + return __kvm_emulate_rdmsr(vcpu, msr, reg, complete_fast_rdmsr_imm); +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_rdmsr_imm); - r = kvm_set_msr_with_filter(vcpu, ecx, data); +static int __kvm_emulate_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data) +{ + int r; + r = kvm_emulate_msr_write(vcpu, msr, data); if (!r) { - trace_kvm_msr_write(ecx, data); + trace_kvm_msr_write(msr, data); } else { /* MSR write failed? See if we should ask user space */ - if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_WRMSR, data, + if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_WRMSR, data, complete_fast_msr_access, r)) return 0; /* Signal all other negative errors to userspace */ if (r < 0) return r; - trace_kvm_msr_write_ex(ecx, data); + trace_kvm_msr_write_ex(msr, data); } return kvm_x86_call(complete_emulated_msr)(vcpu, r); } -EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); + +int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) +{ + return __kvm_emulate_wrmsr(vcpu, kvm_rcx_read(vcpu), + kvm_read_edx_eax(vcpu)); +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_wrmsr); + +int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg) +{ + return __kvm_emulate_wrmsr(vcpu, msr, kvm_register_read(vcpu, reg)); +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_wrmsr_imm); int kvm_emulate_as_nop(struct kvm_vcpu *vcpu) { @@ -2084,14 +2212,23 @@ int kvm_emulate_invd(struct kvm_vcpu *vcpu) /* Treat an INVD instruction as a NOP and just skip it. */ return kvm_emulate_as_nop(vcpu); } -EXPORT_SYMBOL_GPL(kvm_emulate_invd); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_invd); + +fastpath_t handle_fastpath_invd(struct kvm_vcpu *vcpu) +{ + if (!kvm_emulate_invd(vcpu)) + return EXIT_FASTPATH_EXIT_USERSPACE; + + return EXIT_FASTPATH_REENTER_GUEST; +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_fastpath_invd); int kvm_handle_invalid_op(struct kvm_vcpu *vcpu) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; } -EXPORT_SYMBOL_GPL(kvm_handle_invalid_op); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_handle_invalid_op); static int kvm_emulate_monitor_mwait(struct kvm_vcpu *vcpu, const char *insn) @@ -2117,13 +2254,13 @@ int kvm_emulate_mwait(struct kvm_vcpu *vcpu) { return kvm_emulate_monitor_mwait(vcpu, "MWAIT"); } -EXPORT_SYMBOL_GPL(kvm_emulate_mwait); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_mwait); int kvm_emulate_monitor(struct kvm_vcpu *vcpu) { return kvm_emulate_monitor_mwait(vcpu, "MONITOR"); } -EXPORT_SYMBOL_GPL(kvm_emulate_monitor); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_monitor); static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu) { @@ -2133,74 +2270,41 @@ static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu) kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending(); } -/* - * The fast path for frequent and performance sensitive wrmsr emulation, - * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces - * the latency of virtual IPI by avoiding the expensive bits of transitioning - * from guest to host, e.g. reacquiring KVM's SRCU lock. In contrast to the - * other cases which must be called after interrupts are enabled on the host. - */ -static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data) -{ - if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic)) - return 1; - - if (((data & APIC_SHORT_MASK) == APIC_DEST_NOSHORT) && - ((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) && - ((data & APIC_MODE_MASK) == APIC_DM_FIXED) && - ((u32)(data >> 32) != X2APIC_BROADCAST)) - return kvm_x2apic_icr_write(vcpu->arch.apic, data); - - return 1; -} - -static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data) -{ - if (!kvm_can_use_hv_timer(vcpu)) - return 1; - - kvm_set_lapic_tscdeadline_msr(vcpu, data); - return 0; -} - -fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) +static fastpath_t __handle_fastpath_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { - u32 msr = kvm_rcx_read(vcpu); - u64 data; - fastpath_t ret; - bool handled; - - kvm_vcpu_srcu_read_lock(vcpu); - switch (msr) { case APIC_BASE_MSR + (APIC_ICR >> 4): - data = kvm_read_edx_eax(vcpu); - handled = !handle_fastpath_set_x2apic_icr_irqoff(vcpu, data); + if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic) || + kvm_x2apic_icr_write_fast(vcpu->arch.apic, data)) + return EXIT_FASTPATH_NONE; break; case MSR_IA32_TSC_DEADLINE: - data = kvm_read_edx_eax(vcpu); - handled = !handle_fastpath_set_tscdeadline(vcpu, data); + kvm_set_lapic_tscdeadline_msr(vcpu, data); break; default: - handled = false; - break; + return EXIT_FASTPATH_NONE; } - if (handled) { - if (!kvm_skip_emulated_instruction(vcpu)) - ret = EXIT_FASTPATH_EXIT_USERSPACE; - else - ret = EXIT_FASTPATH_REENTER_GUEST; - trace_kvm_msr_write(msr, data); - } else { - ret = EXIT_FASTPATH_NONE; - } + trace_kvm_msr_write(msr, data); - kvm_vcpu_srcu_read_unlock(vcpu); + if (!kvm_skip_emulated_instruction(vcpu)) + return EXIT_FASTPATH_EXIT_USERSPACE; - return ret; + return EXIT_FASTPATH_REENTER_GUEST; +} + +fastpath_t handle_fastpath_wrmsr(struct kvm_vcpu *vcpu) +{ + return __handle_fastpath_wrmsr(vcpu, kvm_rcx_read(vcpu), + kvm_read_edx_eax(vcpu)); } -EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_fastpath_wrmsr); + +fastpath_t handle_fastpath_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg) +{ + return __handle_fastpath_wrmsr(vcpu, msr, kvm_register_read(vcpu, reg)); +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_fastpath_wrmsr_imm); /* * Adapt set_msr() to msr_io()'s calling convention @@ -2566,7 +2670,7 @@ u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) return vcpu->arch.l1_tsc_offset + kvm_scale_tsc(host_tsc, vcpu->arch.l1_tsc_scaling_ratio); } -EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_read_l1_tsc); u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier) { @@ -2581,7 +2685,7 @@ u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier) nested_offset += l2_offset; return nested_offset; } -EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_offset); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_calc_nested_tsc_offset); u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier) { @@ -2591,7 +2695,7 @@ u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier) return l1_multiplier; } -EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_multiplier); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_calc_nested_tsc_multiplier); static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset) { @@ -3669,7 +3773,7 @@ void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu)) kvm_vcpu_flush_tlb_guest(vcpu); } -EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_service_local_tlb_flush_requests); static void record_steal_time(struct kvm_vcpu *vcpu) { @@ -3769,6 +3873,67 @@ static void record_steal_time(struct kvm_vcpu *vcpu) mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); } +/* + * Returns true if the MSR in question is managed via XSTATE, i.e. is context + * switched with the rest of guest FPU state. Note! S_CET is _not_ context + * switched via XSTATE even though it _is_ saved/restored via XSAVES/XRSTORS. + * Because S_CET is loaded on VM-Enter and VM-Exit via dedicated VMCS fields, + * the value saved/restored via XSTATE is always the host's value. That detail + * is _extremely_ important, as the guest's S_CET must _never_ be resident in + * hardware while executing in the host. Loading guest values for U_CET and + * PL[0-3]_SSP while executing in the kernel is safe, as U_CET is specific to + * userspace, and PL[0-3]_SSP are only consumed when transitioning to lower + * privilege levels, i.e. are effectively only consumed by userspace as well. + */ +static bool is_xstate_managed_msr(struct kvm_vcpu *vcpu, u32 msr) +{ + if (!vcpu) + return false; + + switch (msr) { + case MSR_IA32_U_CET: + return guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) || + guest_cpu_cap_has(vcpu, X86_FEATURE_IBT); + case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP: + return guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK); + default: + return false; + } +} + +/* + * Lock (and if necessary, re-load) the guest FPU, i.e. XSTATE, and access an + * MSR that is managed via XSTATE. Note, the caller is responsible for doing + * the initial FPU load, this helper only ensures that guest state is resident + * in hardware (the kernel can load its FPU state in IRQ context). + */ +static __always_inline void kvm_access_xstate_msr(struct kvm_vcpu *vcpu, + struct msr_data *msr_info, + int access) +{ + BUILD_BUG_ON(access != MSR_TYPE_R && access != MSR_TYPE_W); + + KVM_BUG_ON(!is_xstate_managed_msr(vcpu, msr_info->index), vcpu->kvm); + KVM_BUG_ON(!vcpu->arch.guest_fpu.fpstate->in_use, vcpu->kvm); + + kvm_fpu_get(); + if (access == MSR_TYPE_R) + rdmsrq(msr_info->index, msr_info->data); + else + wrmsrq(msr_info->index, msr_info->data); + kvm_fpu_put(); +} + +static void kvm_set_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +{ + kvm_access_xstate_msr(vcpu, msr_info, MSR_TYPE_W); +} + +static void kvm_get_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +{ + kvm_access_xstate_msr(vcpu, msr_info, MSR_TYPE_R); +} + int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { u32 msr = msr_info->index; @@ -3960,16 +4125,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } break; case MSR_IA32_XSS: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) - return 1; - /* - * KVM supports exposing PT to the guest, but does not support - * IA32_XSS[bit 8]. Guests have to use RDMSR/WRMSR rather than - * XSAVES/XRSTORS to save/restore PT MSRs. - */ - if (data & ~kvm_caps.supported_xss) + if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) + return KVM_MSR_RET_UNSUPPORTED; + + if (data & ~vcpu->arch.guest_supported_xss) return 1; + if (vcpu->arch.ia32_xss == data) + break; vcpu->arch.ia32_xss = data; vcpu->arch.cpuid_dynamic_bits_dirty = true; break; @@ -4153,6 +4315,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu->arch.guest_fpu.xfd_err = data; break; #endif + case MSR_IA32_U_CET: + case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP: + kvm_set_xstate_msr(vcpu, msr_info); + break; default: if (kvm_pmu_is_valid_msr(vcpu, msr)) return kvm_pmu_set_msr(vcpu, msr_info); @@ -4161,7 +4327,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } return 0; } -EXPORT_SYMBOL_GPL(kvm_set_msr_common); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_msr_common); static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) { @@ -4502,6 +4668,10 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.guest_fpu.xfd_err; break; #endif + case MSR_IA32_U_CET: + case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP: + kvm_get_xstate_msr(vcpu, msr_info); + break; default: if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) return kvm_pmu_get_msr(vcpu, msr_info); @@ -4510,7 +4680,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } return 0; } -EXPORT_SYMBOL_GPL(kvm_get_msr_common); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_msr_common); /* * Read or write a bunch of msrs. All parameters are kernel addresses. @@ -4522,11 +4692,25 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, int (*do_msr)(struct kvm_vcpu *vcpu, unsigned index, u64 *data)) { + bool fpu_loaded = false; int i; - for (i = 0; i < msrs->nmsrs; ++i) + for (i = 0; i < msrs->nmsrs; ++i) { + /* + * If userspace is accessing one or more XSTATE-managed MSRs, + * temporarily load the guest's FPU state so that the guest's + * MSR value(s) is resident in hardware and thus can be accessed + * via RDMSR/WRMSR. + */ + if (!fpu_loaded && is_xstate_managed_msr(vcpu, entries[i].index)) { + kvm_load_guest_fpu(vcpu); + fpu_loaded = true; + } if (do_msr(vcpu, entries[i].index, &entries[i].data)) break; + } + if (fpu_loaded) + kvm_put_guest_fpu(vcpu); return i; } @@ -4711,6 +4895,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IRQFD_RESAMPLE: case KVM_CAP_MEMORY_FAULT_INFO: case KVM_CAP_X86_GUEST_MODE: + case KVM_CAP_ONE_REG: r = 1; break; case KVM_CAP_PRE_FAULT_MEMORY: @@ -5889,6 +6074,134 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, } } +struct kvm_x86_reg_id { + __u32 index; + __u8 type; + __u8 rsvd1; + __u8 rsvd2:4; + __u8 size:4; + __u8 x86; +}; + +static int kvm_translate_kvm_reg(struct kvm_vcpu *vcpu, + struct kvm_x86_reg_id *reg) +{ + switch (reg->index) { + case KVM_REG_GUEST_SSP: + /* + * FIXME: If host-initiated accesses are ever exempted from + * ignore_msrs (in kvm_do_msr_access()), drop this manual check + * and rely on KVM's standard checks to reject accesses to regs + * that don't exist. + */ + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) + return -EINVAL; + + reg->type = KVM_X86_REG_TYPE_MSR; + reg->index = MSR_KVM_INTERNAL_GUEST_SSP; + break; + default: + return -EINVAL; + } + return 0; +} + +static int kvm_get_one_msr(struct kvm_vcpu *vcpu, u32 msr, u64 __user *user_val) +{ + u64 val; + + if (do_get_msr(vcpu, msr, &val)) + return -EINVAL; + + if (put_user(val, user_val)) + return -EFAULT; + + return 0; +} + +static int kvm_set_one_msr(struct kvm_vcpu *vcpu, u32 msr, u64 __user *user_val) +{ + u64 val; + + if (get_user(val, user_val)) + return -EFAULT; + + if (do_set_msr(vcpu, msr, &val)) + return -EINVAL; + + return 0; +} + +static int kvm_get_set_one_reg(struct kvm_vcpu *vcpu, unsigned int ioctl, + void __user *argp) +{ + struct kvm_one_reg one_reg; + struct kvm_x86_reg_id *reg; + u64 __user *user_val; + bool load_fpu; + int r; + + if (copy_from_user(&one_reg, argp, sizeof(one_reg))) + return -EFAULT; + + if ((one_reg.id & KVM_REG_ARCH_MASK) != KVM_REG_X86) + return -EINVAL; + + reg = (struct kvm_x86_reg_id *)&one_reg.id; + if (reg->rsvd1 || reg->rsvd2) + return -EINVAL; + + if (reg->type == KVM_X86_REG_TYPE_KVM) { + r = kvm_translate_kvm_reg(vcpu, reg); + if (r) + return r; + } + + if (reg->type != KVM_X86_REG_TYPE_MSR) + return -EINVAL; + + if ((one_reg.id & KVM_REG_SIZE_MASK) != KVM_REG_SIZE_U64) + return -EINVAL; + + guard(srcu)(&vcpu->kvm->srcu); + + load_fpu = is_xstate_managed_msr(vcpu, reg->index); + if (load_fpu) + kvm_load_guest_fpu(vcpu); + + user_val = u64_to_user_ptr(one_reg.addr); + if (ioctl == KVM_GET_ONE_REG) + r = kvm_get_one_msr(vcpu, reg->index, user_val); + else + r = kvm_set_one_msr(vcpu, reg->index, user_val); + + if (load_fpu) + kvm_put_guest_fpu(vcpu); + return r; +} + +static int kvm_get_reg_list(struct kvm_vcpu *vcpu, + struct kvm_reg_list __user *user_list) +{ + u64 nr_regs = guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) ? 1 : 0; + u64 user_nr_regs; + + if (get_user(user_nr_regs, &user_list->n)) + return -EFAULT; + + if (put_user(nr_regs, &user_list->n)) + return -EFAULT; + + if (user_nr_regs < nr_regs) + return -E2BIG; + + if (nr_regs && + put_user(KVM_X86_REG_KVM(KVM_REG_GUEST_SSP), &user_list->reg[0])) + return -EFAULT; + + return 0; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -6005,6 +6318,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp, srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } + case KVM_GET_ONE_REG: + case KVM_SET_ONE_REG: + r = kvm_get_set_one_reg(vcpu, ioctl, argp); + break; + case KVM_GET_REG_LIST: + r = kvm_get_reg_list(vcpu, argp); + break; case KVM_TPR_ACCESS_REPORTING: { struct kvm_tpr_access_ctl tac; @@ -6771,7 +7091,11 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, kvm_free_msr_filter(old_filter); - kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED); + /* + * Recalc MSR intercepts as userspace may want to intercept accesses to + * MSRs that KVM would otherwise pass through to the guest. + */ + kvm_make_all_cpus_request(kvm, KVM_REQ_RECALC_INTERCEPTS); return 0; } @@ -6966,6 +7290,15 @@ set_identity_unlock: if (irqchip_in_kernel(kvm)) goto create_irqchip_unlock; + /* + * Disallow an in-kernel I/O APIC if the VM has protected EOIs, + * i.e. if KVM can't intercept EOIs and thus can't properly + * emulate level-triggered interrupts. + */ + r = -ENOTTY; + if (kvm->arch.has_protected_eoi) + goto create_irqchip_unlock; + r = -EINVAL; if (kvm->created_vcpus) goto create_irqchip_unlock; @@ -7353,6 +7686,7 @@ static void kvm_probe_msr_to_save(u32 msr_index) case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS: case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET: if (!kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) return; break; @@ -7365,6 +7699,24 @@ static void kvm_probe_msr_to_save(u32 msr_index) if (!(kvm_get_arch_capabilities() & ARCH_CAP_TSX_CTRL_MSR)) return; break; + case MSR_IA32_XSS: + if (!kvm_caps.supported_xss) + return; + break; + case MSR_IA32_U_CET: + case MSR_IA32_S_CET: + if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) && + !kvm_cpu_cap_has(X86_FEATURE_IBT)) + return; + break; + case MSR_IA32_INT_SSP_TAB: + if (!kvm_cpu_cap_has(X86_FEATURE_LM)) + return; + fallthrough; + case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP: + if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK)) + return; + break; default: break; } @@ -7484,7 +7836,7 @@ gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u64 access = (kvm_x86_call(get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception); } -EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_gva_to_gpa_read); gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception) @@ -7495,7 +7847,7 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, access |= PFERR_WRITE_MASK; return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception); } -EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_write); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_gva_to_gpa_write); /* uses this to access any guest's mapped memory without checking CPL */ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, @@ -7581,7 +7933,7 @@ int kvm_read_guest_virt(struct kvm_vcpu *vcpu, return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception); } -EXPORT_SYMBOL_GPL(kvm_read_guest_virt); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_read_guest_virt); static int emulator_read_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val, unsigned int bytes, @@ -7653,7 +8005,7 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val, return kvm_write_guest_virt_helper(addr, val, bytes, vcpu, PFERR_WRITE_MASK, exception); } -EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_write_guest_virt_system); static int kvm_check_emulate_insn(struct kvm_vcpu *vcpu, int emul_type, void *insn, int insn_len) @@ -7687,7 +8039,7 @@ int handle_ud(struct kvm_vcpu *vcpu) return kvm_emulate_instruction(vcpu, emul_type); } -EXPORT_SYMBOL_GPL(handle_ud); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_ud); static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva, gpa_t gpa, bool write) @@ -8166,7 +8518,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) kvm_emulate_wbinvd_noskip(vcpu); return kvm_skip_emulated_instruction(vcpu); } -EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_wbinvd); @@ -8353,7 +8705,7 @@ static int emulator_get_msr_with_filter(struct x86_emulate_ctxt *ctxt, struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); int r; - r = kvm_get_msr_with_filter(vcpu, msr_index, pdata); + r = kvm_emulate_msr_read(vcpu, msr_index, pdata); if (r < 0) return X86EMUL_UNHANDLEABLE; @@ -8376,7 +8728,7 @@ static int emulator_set_msr_with_filter(struct x86_emulate_ctxt *ctxt, struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); int r; - r = kvm_set_msr_with_filter(vcpu, msr_index, data); + r = kvm_emulate_msr_write(vcpu, msr_index, data); if (r < 0) return X86EMUL_UNHANDLEABLE; @@ -8396,7 +8748,16 @@ static int emulator_set_msr_with_filter(struct x86_emulate_ctxt *ctxt, static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata) { - return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata); + /* + * Treat emulator accesses to the current shadow stack pointer as host- + * initiated, as they aren't true MSR accesses (SSP is a "just a reg"), + * and this API is used only for implicit accesses, i.e. not RDMSR, and + * so the index is fully KVM-controlled. + */ + if (unlikely(msr_index == MSR_KVM_INTERNAL_GUEST_SSP)) + return kvm_msr_read(emul_to_vcpu(ctxt), msr_index, pdata); + + return __kvm_emulate_msr_read(emul_to_vcpu(ctxt), msr_index, pdata); } static int emulator_check_rdpmc_early(struct x86_emulate_ctxt *ctxt, u32 pmc) @@ -8470,11 +8831,6 @@ static bool emulator_is_smm(struct x86_emulate_ctxt *ctxt) return is_smm(emul_to_vcpu(ctxt)); } -static bool emulator_is_guest_mode(struct x86_emulate_ctxt *ctxt) -{ - return is_guest_mode(emul_to_vcpu(ctxt)); -} - #ifndef CONFIG_KVM_SMM static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) { @@ -8558,7 +8914,6 @@ static const struct x86_emulate_ops emulate_ops = { .guest_cpuid_is_intel_compatible = emulator_guest_cpuid_is_intel_compatible, .set_nmi_mask = emulator_set_nmi_mask, .is_smm = emulator_is_smm, - .is_guest_mode = emulator_is_guest_mode, .leave_smm = emulator_leave_smm, .triple_fault = emulator_triple_fault, .set_xcr = emulator_set_xcr, @@ -8661,7 +9016,7 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) kvm_set_rflags(vcpu, ctxt->eflags); } } -EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_inject_realmode_interrupt); static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data, u8 ndata, u8 *insn_bytes, u8 insn_size) @@ -8726,13 +9081,13 @@ void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data, { prepare_emulation_failure_exit(vcpu, data, ndata, NULL, 0); } -EXPORT_SYMBOL_GPL(__kvm_prepare_emulation_failure_exit); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_prepare_emulation_failure_exit); void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu) { __kvm_prepare_emulation_failure_exit(vcpu, NULL, 0); } -EXPORT_SYMBOL_GPL(kvm_prepare_emulation_failure_exit); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_prepare_emulation_failure_exit); void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa) { @@ -8754,7 +9109,7 @@ void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa) run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV; run->internal.ndata = ndata; } -EXPORT_SYMBOL_GPL(kvm_prepare_event_vectoring_exit); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_prepare_event_vectoring_exit); static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) { @@ -8864,7 +9219,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) if (unlikely(!r)) return 0; - kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED); + kvm_pmu_instruction_retired(vcpu); /* * rflags is the old, "raw" value of the flags. The new value has @@ -8878,7 +9233,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) r = kvm_vcpu_do_singlestep(vcpu); return r; } -EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_skip_emulated_instruction); static bool kvm_is_code_breakpoint_inhibited(struct kvm_vcpu *vcpu) { @@ -9009,7 +9364,7 @@ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type, return r; } -EXPORT_SYMBOL_GPL(x86_decode_emulated_instruction); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(x86_decode_emulated_instruction); int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type, void *insn, int insn_len) @@ -9143,7 +9498,14 @@ restart: ctxt->exception.address = 0; } - r = x86_emulate_insn(ctxt); + /* + * Check L1's instruction intercepts when emulating instructions for + * L2, unless KVM is re-emulating a previously decoded instruction, + * e.g. to complete userspace I/O, in which case KVM has already + * checked the intercepts. + */ + r = x86_emulate_insn(ctxt, is_guest_mode(vcpu) && + !(emulation_type & EMULTYPE_NO_DECODE)); if (r == EMULATION_INTERCEPTED) return 1; @@ -9198,9 +9560,9 @@ writeback: */ if (!ctxt->have_exception || exception_type(ctxt->exception.vector) == EXCPT_TRAP) { - kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED); + kvm_pmu_instruction_retired(vcpu); if (ctxt->is_branch) - kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED); + kvm_pmu_branch_retired(vcpu); kvm_rip_write(vcpu, ctxt->eip); if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) r = kvm_vcpu_do_singlestep(vcpu); @@ -9226,14 +9588,14 @@ int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type) { return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); } -EXPORT_SYMBOL_GPL(kvm_emulate_instruction); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_instruction); int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, void *insn, int insn_len) { return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len); } -EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_instruction_from_buffer); static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu) { @@ -9328,7 +9690,7 @@ int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in) ret = kvm_fast_pio_out(vcpu, size, port); return ret && kvm_skip_emulated_instruction(vcpu); } -EXPORT_SYMBOL_GPL(kvm_fast_pio); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_fast_pio); static int kvmclock_cpu_down_prep(unsigned int cpu) { @@ -9651,6 +10013,18 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) return -EIO; } + if (boot_cpu_has(X86_FEATURE_SHSTK) || boot_cpu_has(X86_FEATURE_IBT)) { + rdmsrq(MSR_IA32_S_CET, kvm_host.s_cet); + /* + * Linux doesn't yet support supervisor shadow stacks (SSS), so + * KVM doesn't save/restore the associated MSRs, i.e. KVM may + * clobber the host values. Yell and refuse to load if SSS is + * unexpectedly enabled, e.g. to avoid crashing the host. + */ + if (WARN_ON_ONCE(kvm_host.s_cet & CET_SHSTK_EN)) + return -EIO; + } + memset(&kvm_caps, 0, sizeof(kvm_caps)); x86_emulator_cache = kvm_alloc_emulator_cache(); @@ -9678,14 +10052,17 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) kvm_host.xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); kvm_caps.supported_xcr0 = kvm_host.xcr0 & KVM_SUPPORTED_XCR0; } + + if (boot_cpu_has(X86_FEATURE_XSAVES)) { + rdmsrq(MSR_IA32_XSS, kvm_host.xss); + kvm_caps.supported_xss = kvm_host.xss & KVM_SUPPORTED_XSS; + } + kvm_caps.supported_quirks = KVM_X86_VALID_QUIRKS; kvm_caps.inapplicable_quirks = KVM_X86_CONDITIONAL_QUIRKS; rdmsrq_safe(MSR_EFER, &kvm_host.efer); - if (boot_cpu_has(X86_FEATURE_XSAVES)) - rdmsrq(MSR_IA32_XSS, kvm_host.xss); - kvm_init_pmu_capability(ops->pmu_ops); if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) @@ -9734,6 +10111,16 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) kvm_caps.supported_xss = 0; + if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) && + !kvm_cpu_cap_has(X86_FEATURE_IBT)) + kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL; + + if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) { + kvm_cpu_cap_clear(X86_FEATURE_SHSTK); + kvm_cpu_cap_clear(X86_FEATURE_IBT); + kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL; + } + if (kvm_caps.has_tsc_control) { /* * Make sure the user can only configure tsc_khz values that @@ -9760,7 +10147,7 @@ out_free_x86_emulator_cache: kmem_cache_destroy(x86_emulator_cache); return r; } -EXPORT_SYMBOL_GPL(kvm_x86_vendor_init); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_x86_vendor_init); void kvm_x86_vendor_exit(void) { @@ -9794,7 +10181,7 @@ void kvm_x86_vendor_exit(void) kvm_x86_ops.enable_virtualization_cpu = NULL; mutex_unlock(&vendor_module_lock); } -EXPORT_SYMBOL_GPL(kvm_x86_vendor_exit); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_x86_vendor_exit); #ifdef CONFIG_X86_64 static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, @@ -9858,7 +10245,7 @@ bool kvm_apicv_activated(struct kvm *kvm) { return (READ_ONCE(kvm->arch.apicv_inhibit_reasons) == 0); } -EXPORT_SYMBOL_GPL(kvm_apicv_activated); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apicv_activated); bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu) { @@ -9868,7 +10255,7 @@ bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu) return (vm_reasons | vcpu_reasons) == 0; } -EXPORT_SYMBOL_GPL(kvm_vcpu_apicv_activated); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_apicv_activated); static void set_or_clear_apicv_inhibit(unsigned long *inhibits, enum kvm_apicv_inhibit reason, bool set) @@ -10044,7 +10431,7 @@ out: vcpu->run->hypercall.ret = ret; return 1; } -EXPORT_SYMBOL_GPL(____kvm_emulate_hypercall); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(____kvm_emulate_hypercall); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { @@ -10057,7 +10444,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) return __kvm_emulate_hypercall(vcpu, kvm_x86_call(get_cpl)(vcpu), complete_hypercall_exit); } -EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_hypercall); static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) { @@ -10500,7 +10887,7 @@ out: preempt_enable(); up_read(&vcpu->kvm->arch.apicv_update_lock); } -EXPORT_SYMBOL_GPL(__kvm_vcpu_update_apicv); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_vcpu_update_apicv); static void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) { @@ -10576,7 +10963,7 @@ void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, __kvm_set_or_clear_apicv_inhibit(kvm, reason, set); up_write(&kvm->arch.apicv_update_lock); } -EXPORT_SYMBOL_GPL(kvm_set_or_clear_apicv_inhibit); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_or_clear_apicv_inhibit); static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { @@ -10796,13 +11183,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_APF_READY, vcpu)) kvm_check_async_pf_completion(vcpu); - /* - * Recalc MSR intercepts as userspace may want to intercept - * accesses to MSRs that KVM would otherwise pass through to - * the guest. - */ - if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu)) - kvm_x86_call(recalc_msr_intercepts)(vcpu); + if (kvm_check_request(KVM_REQ_RECALC_INTERCEPTS, vcpu)) + kvm_x86_call(recalc_intercepts)(vcpu); if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu)) kvm_x86_call(update_cpu_dirty_logging)(vcpu); @@ -11135,7 +11517,7 @@ bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) return false; } -EXPORT_SYMBOL_GPL(kvm_vcpu_has_events); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_has_events); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { @@ -11288,7 +11670,7 @@ int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu) { return __kvm_emulate_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT); } -EXPORT_SYMBOL_GPL(kvm_emulate_halt_noskip); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_halt_noskip); int kvm_emulate_halt(struct kvm_vcpu *vcpu) { @@ -11299,17 +11681,11 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) */ return kvm_emulate_halt_noskip(vcpu) && ret; } -EXPORT_SYMBOL_GPL(kvm_emulate_halt); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_halt); fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu) { - int ret; - - kvm_vcpu_srcu_read_lock(vcpu); - ret = kvm_emulate_halt(vcpu); - kvm_vcpu_srcu_read_unlock(vcpu); - - if (!ret) + if (!kvm_emulate_halt(vcpu)) return EXIT_FASTPATH_EXIT_USERSPACE; if (kvm_vcpu_running(vcpu)) @@ -11317,7 +11693,7 @@ fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu) return EXIT_FASTPATH_EXIT_HANDLED; } -EXPORT_SYMBOL_GPL(handle_fastpath_hlt); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_fastpath_hlt); int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu) { @@ -11326,7 +11702,7 @@ int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu) return __kvm_emulate_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, KVM_EXIT_AP_RESET_HOLD) && ret; } -EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_ap_reset_hold); bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu) { @@ -11837,6 +12213,25 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; int ret; + if (kvm_is_cr4_bit_set(vcpu, X86_CR4_CET)) { + u64 u_cet, s_cet; + + /* + * Check both User and Supervisor on task switches as inter- + * privilege level task switches are impacted by CET at both + * the current privilege level and the new privilege level, and + * that information is not known at this time. The expectation + * is that the guest won't require emulation of task switches + * while using IBT or Shadow Stacks. + */ + if (__kvm_emulate_msr_read(vcpu, MSR_IA32_U_CET, &u_cet) || + __kvm_emulate_msr_read(vcpu, MSR_IA32_S_CET, &s_cet)) + goto unhandled_task_switch; + + if ((u_cet | s_cet) & (CET_ENDBR_EN | CET_SHSTK_EN)) + goto unhandled_task_switch; + } + init_emulate_ctxt(vcpu); ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason, @@ -11846,19 +12241,21 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, * Report an error userspace if MMIO is needed, as KVM doesn't support * MMIO during a task switch (or any other complex operation). */ - if (ret || vcpu->mmio_needed) { - vcpu->mmio_needed = false; - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; - return 0; - } + if (ret || vcpu->mmio_needed) + goto unhandled_task_switch; kvm_rip_write(vcpu, ctxt->eip); kvm_set_rflags(vcpu, ctxt->eflags); return 1; + +unhandled_task_switch: + vcpu->mmio_needed = false; + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return 0; } -EXPORT_SYMBOL_GPL(kvm_task_switch); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_task_switch); static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { @@ -12388,6 +12785,42 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvfree(vcpu->arch.cpuid_entries); } +static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event) +{ + struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate; + u64 xfeatures_mask; + int i; + + /* + * Guest FPU state is zero allocated and so doesn't need to be manually + * cleared on RESET, i.e. during vCPU creation. + */ + if (!init_event || !fpstate) + return; + + /* + * On INIT, only select XSTATE components are zeroed, most components + * are unchanged. Currently, the only components that are zeroed and + * supported by KVM are MPX and CET related. + */ + xfeatures_mask = (kvm_caps.supported_xcr0 | kvm_caps.supported_xss) & + (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR | + XFEATURE_MASK_CET_ALL); + if (!xfeatures_mask) + return; + + BUILD_BUG_ON(sizeof(xfeatures_mask) * BITS_PER_BYTE <= XFEATURE_MAX); + + /* + * All paths that lead to INIT are required to load the guest's FPU + * state (because most paths are buried in KVM_RUN). + */ + kvm_put_guest_fpu(vcpu); + for_each_set_bit(i, (unsigned long *)&xfeatures_mask, XFEATURE_MAX) + fpstate_clear_xstate_component(fpstate, i); + kvm_load_guest_fpu(vcpu); +} + void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { struct kvm_cpuid_entry2 *cpuid_0x1; @@ -12445,22 +12878,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_async_pf_hash_reset(vcpu); vcpu->arch.apf.halted = false; - if (vcpu->arch.guest_fpu.fpstate && kvm_mpx_supported()) { - struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate; - - /* - * All paths that lead to INIT are required to load the guest's - * FPU state (because most paths are buried in KVM_RUN). - */ - if (init_event) - kvm_put_guest_fpu(vcpu); - - fpstate_clear_xstate_component(fpstate, XFEATURE_BNDREGS); - fpstate_clear_xstate_component(fpstate, XFEATURE_BNDCSR); - - if (init_event) - kvm_load_guest_fpu(vcpu); - } + kvm_xstate_reset(vcpu, init_event); if (!init_event) { vcpu->arch.smbase = 0x30000; @@ -12472,7 +12890,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) MSR_IA32_MISC_ENABLE_BTS_UNAVAIL; __kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP); - __kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true); + kvm_msr_write(vcpu, MSR_IA32_XSS, 0); } /* All GPRs except RDX (handled below) are zeroed on RESET/INIT. */ @@ -12538,7 +12956,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) if (init_event) kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu); } -EXPORT_SYMBOL_GPL(kvm_vcpu_reset); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_reset); void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) { @@ -12550,7 +12968,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); kvm_rip_write(vcpu, 0); } -EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_deliver_sipi_vector); void kvm_arch_enable_virtualization(void) { @@ -12668,7 +13086,7 @@ bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu) { return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id; } -EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_is_reset_bsp); bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) { @@ -12832,7 +13250,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, return (void __user *)hva; } -EXPORT_SYMBOL_GPL(__x86_set_memory_region); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__x86_set_memory_region); void kvm_arch_pre_destroy_vm(struct kvm *kvm) { @@ -13240,13 +13658,13 @@ unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu) return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) + kvm_rip_read(vcpu)); } -EXPORT_SYMBOL_GPL(kvm_get_linear_rip); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_linear_rip); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip) { return kvm_get_linear_rip(vcpu) == linear_rip; } -EXPORT_SYMBOL_GPL(kvm_is_linear_rip); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_is_linear_rip); unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) { @@ -13257,7 +13675,7 @@ unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) rflags &= ~X86_EFLAGS_TF; return rflags; } -EXPORT_SYMBOL_GPL(kvm_get_rflags); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_rflags); static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { @@ -13272,7 +13690,7 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) __kvm_set_rflags(vcpu, rflags); kvm_make_request(KVM_REQ_EVENT, vcpu); } -EXPORT_SYMBOL_GPL(kvm_set_rflags); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_rflags); static inline u32 kvm_async_pf_hash_fn(gfn_t gfn) { @@ -13504,31 +13922,23 @@ void kvm_arch_register_noncoherent_dma(struct kvm *kvm) if (atomic_inc_return(&kvm->arch.noncoherent_dma_count) == 1) kvm_noncoherent_dma_assignment_start_or_stop(kvm); } -EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma); void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm) { if (!atomic_dec_return(&kvm->arch.noncoherent_dma_count)) kvm_noncoherent_dma_assignment_start_or_stop(kvm); } -EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma); bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) { return atomic_read(&kvm->arch.noncoherent_dma_count); } -EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); - -bool kvm_vector_hashing_enabled(void) -{ - return vector_hashing; -} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_arch_has_noncoherent_dma); bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) { return (vcpu->arch.msr_kvm_poll_control & 1) == 0; } -EXPORT_SYMBOL_GPL(kvm_arch_no_poll); #ifdef CONFIG_KVM_GUEST_MEMFD /* @@ -13579,7 +13989,7 @@ int kvm_spec_ctrl_test_value(u64 value) return ret; } -EXPORT_SYMBOL_GPL(kvm_spec_ctrl_test_value); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_spec_ctrl_test_value); void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code) { @@ -13604,7 +14014,7 @@ void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_c } vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault); } -EXPORT_SYMBOL_GPL(kvm_fixup_and_inject_pf_error); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_fixup_and_inject_pf_error); /* * Handles kvm_read/write_guest_virt*() result and either injects #PF or returns @@ -13633,7 +14043,7 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, return 0; } -EXPORT_SYMBOL_GPL(kvm_handle_memory_failure); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_handle_memory_failure); int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva) { @@ -13697,7 +14107,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva) return 1; } } -EXPORT_SYMBOL_GPL(kvm_handle_invpcid); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_handle_invpcid); static int complete_sev_es_emulated_mmio(struct kvm_vcpu *vcpu) { @@ -13782,7 +14192,7 @@ int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, return 0; } -EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_write); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_sev_es_mmio_write); int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, void *data) @@ -13820,7 +14230,7 @@ int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, return 0; } -EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_sev_es_mmio_read); static void advance_sev_es_emulated_pio(struct kvm_vcpu *vcpu, unsigned count, int size) { @@ -13908,7 +14318,7 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size, return in ? kvm_sev_es_ins(vcpu, size, port) : kvm_sev_es_outs(vcpu, size, port); } -EXPORT_SYMBOL_GPL(kvm_sev_es_string_io); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_sev_es_string_io); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); |