diff options
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 12 | ||||
-rw-r--r-- | arch/x86/include/asm/tdx.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/common.h | 69 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/main.c | 48 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 10 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/posted_intr.c | 18 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/tdx.c | 238 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/tdx.h | 20 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 93 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.h | 102 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/x86_ops.h | 11 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 28 | ||||
-rw-r--r-- | arch/x86/virt/vmx/tdx/seamcall.S | 3 | ||||
-rw-r--r-- | arch/x86/virt/vmx/tdx/tdx.c | 8 | ||||
-rw-r--r-- | arch/x86/virt/vmx/tdx/tdx.h | 1 |
15 files changed, 513 insertions, 149 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ddb08581c64d..3a6373fc58a1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -606,8 +606,15 @@ struct kvm_pmu { struct kvm_pmu_ops; enum { - KVM_DEBUGREG_BP_ENABLED = 1, - KVM_DEBUGREG_WONT_EXIT = 2, + KVM_DEBUGREG_BP_ENABLED = BIT(0), + KVM_DEBUGREG_WONT_EXIT = BIT(1), + /* + * Guest debug registers (DR0-3, DR6 and DR7) are saved/restored by + * hardware on exit from or enter to guest. KVM needn't switch them. + * DR0-3, DR6 and DR7 are set to their architectural INIT value on VM + * exit, host values need to be restored. + */ + KVM_DEBUGREG_AUTO_SWITCH = BIT(2), }; struct kvm_mtrr { @@ -2328,6 +2335,7 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, int kvm_add_user_return_msr(u32 msr); int kvm_find_user_return_msr(u32 msr); int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); +void kvm_user_return_msr_update_cache(unsigned int index, u64 val); static inline bool kvm_is_supported_user_return_msr(u32 msr) { diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index e38e7558c02f..bb4a3bc26219 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -165,6 +165,7 @@ static inline int pg_level_to_tdx_sept_level(enum pg_level level) return level - 1; } +u64 tdh_vp_enter(struct tdx_vp *vp, struct tdx_module_args *args); u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page); u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2); u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2); diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h index 7a592467a044..a298ac795c09 100644 --- a/arch/x86/kvm/vmx/common.h +++ b/arch/x86/kvm/vmx/common.h @@ -3,9 +3,78 @@ #define __KVM_X86_VMX_COMMON_H #include <linux/kvm_host.h> +#include <asm/posted_intr.h> #include "mmu.h" +union vmx_exit_reason { + struct { + u32 basic : 16; + u32 reserved16 : 1; + u32 reserved17 : 1; + u32 reserved18 : 1; + u32 reserved19 : 1; + u32 reserved20 : 1; + u32 reserved21 : 1; + u32 reserved22 : 1; + u32 reserved23 : 1; + u32 reserved24 : 1; + u32 reserved25 : 1; + u32 bus_lock_detected : 1; + u32 enclave_mode : 1; + u32 smi_pending_mtf : 1; + u32 smi_from_vmx_root : 1; + u32 reserved30 : 1; + u32 failed_vmentry : 1; + }; + u32 full; +}; + +struct vcpu_vt { + /* Posted interrupt descriptor */ + struct pi_desc pi_desc; + + /* Used if this vCPU is waiting for PI notification wakeup. */ + struct list_head pi_wakeup_list; + + union vmx_exit_reason exit_reason; + + unsigned long exit_qualification; + u32 exit_intr_info; + + /* + * If true, guest state has been loaded into hardware, and host state + * saved into vcpu_{vt,vmx,tdx}. If false, host state is loaded into + * hardware. + */ + bool guest_state_loaded; + +#ifdef CONFIG_X86_64 + u64 msr_host_kernel_gs_base; +#endif + + unsigned long host_debugctlmsr; +}; + +#ifdef CONFIG_KVM_INTEL_TDX + +static __always_inline bool is_td(struct kvm *kvm) +{ + return kvm->arch.vm_type == KVM_X86_TDX_VM; +} + +static __always_inline bool is_td_vcpu(struct kvm_vcpu *vcpu) +{ + return is_td(vcpu->kvm); +} + +#else + +static inline bool is_td(struct kvm *kvm) { return false; } +static inline bool is_td_vcpu(struct kvm_vcpu *vcpu) { return false; } + +#endif + static inline bool vt_is_tdx_private_gpa(struct kvm *kvm, gpa_t gpa) { /* For TDX the direct mask is the shared mask. */ diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index ec8223ee9d28..c0497ed0c9be 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -10,6 +10,10 @@ #include "tdx.h" #include "tdx_arch.h" +#ifdef CONFIG_KVM_INTEL_TDX +static_assert(offsetof(struct vcpu_vmx, vt) == offsetof(struct vcpu_tdx, vt)); +#endif + static void vt_disable_virtualization_cpu(void) { /* Note, TDX *and* VMX need to be disabled if TDX is enabled. */ @@ -141,6 +145,42 @@ static void vt_update_cpu_dirty_logging(struct kvm_vcpu *vcpu) vmx_update_cpu_dirty_logging(vcpu); } +static void vt_prepare_switch_to_guest(struct kvm_vcpu *vcpu) +{ + if (is_td_vcpu(vcpu)) { + tdx_prepare_switch_to_guest(vcpu); + return; + } + + vmx_prepare_switch_to_guest(vcpu); +} + +static void vt_vcpu_put(struct kvm_vcpu *vcpu) +{ + if (is_td_vcpu(vcpu)) { + tdx_vcpu_put(vcpu); + return; + } + + vmx_vcpu_put(vcpu); +} + +static int vt_vcpu_pre_run(struct kvm_vcpu *vcpu) +{ + if (is_td_vcpu(vcpu)) + return tdx_vcpu_pre_run(vcpu); + + return vmx_vcpu_pre_run(vcpu); +} + +static fastpath_t vt_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) +{ + if (is_td_vcpu(vcpu)) + return tdx_vcpu_run(vcpu, force_immediate_exit); + + return vmx_vcpu_run(vcpu, force_immediate_exit); +} + static void vt_flush_tlb_all(struct kvm_vcpu *vcpu) { if (is_td_vcpu(vcpu)) { @@ -245,9 +285,9 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .vcpu_free = vt_vcpu_free, .vcpu_reset = vt_vcpu_reset, - .prepare_switch_to_guest = vmx_prepare_switch_to_guest, + .prepare_switch_to_guest = vt_prepare_switch_to_guest, .vcpu_load = vt_vcpu_load, - .vcpu_put = vmx_vcpu_put, + .vcpu_put = vt_vcpu_put, .update_exception_bitmap = vmx_update_exception_bitmap, .get_feature_msr = vmx_get_feature_msr, @@ -281,8 +321,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .flush_tlb_gva = vt_flush_tlb_gva, .flush_tlb_guest = vt_flush_tlb_guest, - .vcpu_pre_run = vmx_vcpu_pre_run, - .vcpu_run = vmx_vcpu_run, + .vcpu_pre_run = vt_vcpu_pre_run, + .vcpu_run = vt_vcpu_run, .handle_exit = vmx_handle_exit, .skip_emulated_instruction = vmx_skip_emulated_instruction, .update_emulated_instruction = vmx_update_emulated_instruction, diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index ed8a3cb53961..99f02972cd74 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -275,7 +275,7 @@ static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx, { struct vmcs_host_state *dest, *src; - if (unlikely(!vmx->guest_state_loaded)) + if (unlikely(!vmx->vt.guest_state_loaded)) return; src = &prev->host_state; @@ -425,7 +425,7 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, * tables also changed, but KVM should not treat EPT Misconfig * VM-Exits as writes. */ - WARN_ON_ONCE(vmx->exit_reason.basic != EXIT_REASON_EPT_VIOLATION); + WARN_ON_ONCE(vmx->vt.exit_reason.basic != EXIT_REASON_EPT_VIOLATION); /* * PML Full and EPT Violation VM-Exits both use bit 12 to report @@ -4622,7 +4622,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, { /* update exit information fields: */ vmcs12->vm_exit_reason = vm_exit_reason; - if (to_vmx(vcpu)->exit_reason.enclave_mode) + if (vmx_get_exit_reason(vcpu).enclave_mode) vmcs12->vm_exit_reason |= VMX_EXIT_REASONS_SGX_ENCLAVE_MODE; vmcs12->exit_qualification = exit_qualification; @@ -6126,7 +6126,7 @@ fail: * nested VM-Exit. Pass the original exit reason, i.e. don't hardcode * EXIT_REASON_VMFUNC as the exit reason. */ - nested_vmx_vmexit(vcpu, vmx->exit_reason.full, + nested_vmx_vmexit(vcpu, vmx->vt.exit_reason.full, vmx_get_intr_info(vcpu), vmx_get_exit_qual(vcpu)); return 1; @@ -6571,7 +6571,7 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - union vmx_exit_reason exit_reason = vmx->exit_reason; + union vmx_exit_reason exit_reason = vmx->vt.exit_reason; unsigned long exit_qual; u32 exit_intr_info; diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index ec08fa3caf43..5696e0f9f924 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -33,7 +33,7 @@ static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock); static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) { - return &(to_vmx(vcpu)->pi_desc); + return &(to_vt(vcpu)->pi_desc); } static int pi_try_set_control(struct pi_desc *pi_desc, u64 *pold, u64 new) @@ -53,7 +53,7 @@ static int pi_try_set_control(struct pi_desc *pi_desc, u64 *pold, u64 new) void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) { struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vcpu_vt *vt = to_vt(vcpu); struct pi_desc old, new; unsigned long flags; unsigned int dest; @@ -90,7 +90,7 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) */ if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) { raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); - list_del(&vmx->pi_wakeup_list); + list_del(&vt->pi_wakeup_list); raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); } @@ -146,14 +146,14 @@ static bool vmx_can_use_vtd_pi(struct kvm *kvm) static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu) { struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vcpu_vt *vt = to_vt(vcpu); struct pi_desc old, new; unsigned long flags; local_irq_save(flags); raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); - list_add_tail(&vmx->pi_wakeup_list, + list_add_tail(&vt->pi_wakeup_list, &per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu)); raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); @@ -220,13 +220,13 @@ void pi_wakeup_handler(void) int cpu = smp_processor_id(); struct list_head *wakeup_list = &per_cpu(wakeup_vcpus_on_cpu, cpu); raw_spinlock_t *spinlock = &per_cpu(wakeup_vcpus_on_cpu_lock, cpu); - struct vcpu_vmx *vmx; + struct vcpu_vt *vt; raw_spin_lock(spinlock); - list_for_each_entry(vmx, wakeup_list, pi_wakeup_list) { + list_for_each_entry(vt, wakeup_list, pi_wakeup_list) { - if (pi_test_on(&vmx->pi_desc)) - kvm_vcpu_wake_up(&vmx->vcpu); + if (pi_test_on(&vt->pi_desc)) + kvm_vcpu_wake_up(vt_to_vcpu(vt)); } raw_spin_unlock(spinlock); } diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 277f5decde9c..a6388eb95988 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -2,7 +2,9 @@ #include <linux/cleanup.h> #include <linux/cpu.h> #include <asm/cpufeature.h> +#include <asm/fpu/xcr.h> #include <linux/misc_cgroup.h> +#include <linux/mmu_context.h> #include <asm/tdx.h> #include "capabilities.h" #include "mmu.h" @@ -12,6 +14,9 @@ #include "vmx.h" #include "mmu/spte.h" #include "common.h" +#include "posted_intr.h" +#include <trace/events/kvm.h> +#include "trace.h" #pragma GCC poison to_vmx @@ -102,6 +107,44 @@ static u32 tdx_set_guest_phys_addr_bits(const u32 eax, int addr_bits) return (eax & ~GENMASK(23, 16)) | (addr_bits & 0xff) << 16; } +#define TDX_FEATURE_TSX (__feature_bit(X86_FEATURE_HLE) | __feature_bit(X86_FEATURE_RTM)) + +static bool has_tsx(const struct kvm_cpuid_entry2 *entry) +{ + return entry->function == 7 && entry->index == 0 && + (entry->ebx & TDX_FEATURE_TSX); +} + +static void clear_tsx(struct kvm_cpuid_entry2 *entry) +{ + entry->ebx &= ~TDX_FEATURE_TSX; +} + +static bool has_waitpkg(const struct kvm_cpuid_entry2 *entry) +{ + return entry->function == 7 && entry->index == 0 && + (entry->ecx & __feature_bit(X86_FEATURE_WAITPKG)); +} + +static void clear_waitpkg(struct kvm_cpuid_entry2 *entry) +{ + entry->ecx &= ~__feature_bit(X86_FEATURE_WAITPKG); +} + +static void tdx_clear_unsupported_cpuid(struct kvm_cpuid_entry2 *entry) +{ + if (has_tsx(entry)) + clear_tsx(entry); + + if (has_waitpkg(entry)) + clear_waitpkg(entry); +} + +static bool tdx_unsupported_cpuid(const struct kvm_cpuid_entry2 *entry) +{ + return has_tsx(entry) || has_waitpkg(entry); +} + #define KVM_TDX_CPUID_NO_SUBLEAF ((__u32)-1) static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char idx) @@ -125,6 +168,8 @@ static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char i */ if (entry->function == 0x80000008) entry->eax = tdx_set_guest_phys_addr_bits(entry->eax, 0xff); + + tdx_clear_unsupported_cpuid(entry); } static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf, @@ -585,6 +630,7 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.efer = EFER_SCE | EFER_LME | EFER_LMA | EFER_NX; + vcpu->arch.switch_db_regs = KVM_DEBUGREG_AUTO_SWITCH; vcpu->arch.cr0_guest_owned_bits = -1ul; vcpu->arch.cr4_guest_owned_bits = -1ul; @@ -627,6 +673,74 @@ void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) local_irq_enable(); } +/* + * Compared to vmx_prepare_switch_to_guest(), there is not much to do + * as SEAMCALL/SEAMRET calls take care of most of save and restore. + */ +void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) +{ + struct vcpu_vt *vt = to_vt(vcpu); + + if (vt->guest_state_loaded) + return; + + if (likely(is_64bit_mm(current->mm))) + vt->msr_host_kernel_gs_base = current->thread.gsbase; + else + vt->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); + + vt->host_debugctlmsr = get_debugctlmsr(); + + vt->guest_state_loaded = true; +} + +struct tdx_uret_msr { + u32 msr; + unsigned int slot; + u64 defval; +}; + +static struct tdx_uret_msr tdx_uret_msrs[] = { + {.msr = MSR_SYSCALL_MASK, .defval = 0x20200 }, + {.msr = MSR_STAR,}, + {.msr = MSR_LSTAR,}, + {.msr = MSR_TSC_AUX,}, +}; + +static void tdx_user_return_msr_update_cache(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tdx_uret_msrs); i++) + kvm_user_return_msr_update_cache(tdx_uret_msrs[i].slot, + tdx_uret_msrs[i].defval); +} + +static void tdx_prepare_switch_to_host(struct kvm_vcpu *vcpu) +{ + struct vcpu_vt *vt = to_vt(vcpu); + struct vcpu_tdx *tdx = to_tdx(vcpu); + + if (!vt->guest_state_loaded) + return; + + ++vcpu->stat.host_state_reload; + wrmsrl(MSR_KERNEL_GS_BASE, vt->msr_host_kernel_gs_base); + + if (tdx->guest_entered) { + tdx_user_return_msr_update_cache(); + tdx->guest_entered = false; + } + + vt->guest_state_loaded = false; +} + +void tdx_vcpu_put(struct kvm_vcpu *vcpu) +{ + vmx_vcpu_pi_put(vcpu); + tdx_prepare_switch_to_host(vcpu); +} + void tdx_vcpu_free(struct kvm_vcpu *vcpu) { struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm); @@ -660,6 +774,102 @@ void tdx_vcpu_free(struct kvm_vcpu *vcpu) tdx->state = VCPU_TD_STATE_UNINITIALIZED; } +int tdx_vcpu_pre_run(struct kvm_vcpu *vcpu) +{ + if (unlikely(to_tdx(vcpu)->state != VCPU_TD_STATE_INITIALIZED || + to_kvm_tdx(vcpu->kvm)->state != TD_STATE_RUNNABLE)) + return -EINVAL; + + return 1; +} + +static noinstr void tdx_vcpu_enter_exit(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + + guest_state_enter_irqoff(); + + tdx->vp_enter_ret = tdh_vp_enter(&tdx->vp, &tdx->vp_enter_args); + + guest_state_exit_irqoff(); +} + +#define TDX_REGS_AVAIL_SET (BIT_ULL(VCPU_EXREG_EXIT_INFO_1) | \ + BIT_ULL(VCPU_EXREG_EXIT_INFO_2) | \ + BIT_ULL(VCPU_REGS_RAX) | \ + BIT_ULL(VCPU_REGS_RBX) | \ + BIT_ULL(VCPU_REGS_RCX) | \ + BIT_ULL(VCPU_REGS_RDX) | \ + BIT_ULL(VCPU_REGS_RBP) | \ + BIT_ULL(VCPU_REGS_RSI) | \ + BIT_ULL(VCPU_REGS_RDI) | \ + BIT_ULL(VCPU_REGS_R8) | \ + BIT_ULL(VCPU_REGS_R9) | \ + BIT_ULL(VCPU_REGS_R10) | \ + BIT_ULL(VCPU_REGS_R11) | \ + BIT_ULL(VCPU_REGS_R12) | \ + BIT_ULL(VCPU_REGS_R13) | \ + BIT_ULL(VCPU_REGS_R14) | \ + BIT_ULL(VCPU_REGS_R15)) + +static void tdx_load_host_xsave_state(struct kvm_vcpu *vcpu) +{ + struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm); + + /* + * All TDX hosts support PKRU; but even if they didn't, + * vcpu->arch.host_pkru would be 0 and the wrpkru would be + * skipped. + */ + if (vcpu->arch.host_pkru != 0) + wrpkru(vcpu->arch.host_pkru); + + if (kvm_host.xcr0 != (kvm_tdx->xfam & kvm_caps.supported_xcr0)) + xsetbv(XCR_XFEATURE_ENABLED_MASK, kvm_host.xcr0); + + /* + * Likewise, even if a TDX hosts didn't support XSS both arms of + * the comparison would be 0 and the wrmsrl would be skipped. + */ + if (kvm_host.xss != (kvm_tdx->xfam & kvm_caps.supported_xss)) + wrmsrl(MSR_IA32_XSS, kvm_host.xss); +} + +#define TDX_DEBUGCTL_PRESERVED (DEBUGCTLMSR_BTF | \ + DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI | \ + DEBUGCTLMSR_FREEZE_IN_SMM) + +fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + struct vcpu_vt *vt = to_vt(vcpu); + + /* + * force_immediate_exit requires vCPU entering for events injection with + * an immediately exit followed. But The TDX module doesn't guarantee + * entry, it's already possible for KVM to _think_ it completely entry + * to the guest without actually having done so. + * Since KVM never needs to force an immediate exit for TDX, and can't + * do direct injection, just warn on force_immediate_exit. + */ + WARN_ON_ONCE(force_immediate_exit); + + trace_kvm_entry(vcpu, force_immediate_exit); + + tdx_vcpu_enter_exit(vcpu); + + if (vt->host_debugctlmsr & ~TDX_DEBUGCTL_PRESERVED) + update_debugctlmsr(vt->host_debugctlmsr); + + tdx_load_host_xsave_state(vcpu); + tdx->guest_entered = true; + + vcpu->arch.regs_avail &= TDX_REGS_AVAIL_SET; + + trace_kvm_exit(vcpu, KVM_ISA_VMX); + + return EXIT_FASTPATH_NONE; +} void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level) { @@ -1084,6 +1294,9 @@ static int setup_tdparams_cpuids(struct kvm_cpuid2 *cpuid, if (!entry) continue; + if (tdx_unsupported_cpuid(entry)) + return -EINVAL; + copy_cnt++; value = &td_params->cpuid_values[i]; @@ -2113,7 +2326,25 @@ static int __init __do_tdx_bringup(void) static int __init __tdx_bringup(void) { const struct tdx_sys_info_td_conf *td_conf; - int r; + int r, i; + + for (i = 0; i < ARRAY_SIZE(tdx_uret_msrs); i++) { + /* + * Check if MSRs (tdx_uret_msrs) can be saved/restored + * before returning to user space. + * + * this_cpu_ptr(user_return_msrs)->registered isn't checked + * because the registration is done at vcpu runtime by + * tdx_user_return_msr_update_cache(). + */ + tdx_uret_msrs[i].slot = kvm_find_user_return_msr(tdx_uret_msrs[i].msr); + if (tdx_uret_msrs[i].slot == -1) { + /* If any MSR isn't supported, it is a KVM bug */ + pr_err("MSR %x isn't included by kvm_find_user_return_msr\n", + tdx_uret_msrs[i].msr); + return -EIO; + } + } /* * Enabling TDX requires enabling hardware virtualization first, @@ -2230,6 +2461,11 @@ int __init tdx_bringup(void) goto success_disable_tdx; } + if (!cpu_feature_enabled(X86_FEATURE_OSXSAVE)) { + pr_err("tdx: OSXSAVE is required for TDX\n"); + goto success_disable_tdx; + } + if (!cpu_feature_enabled(X86_FEATURE_MOVDIR64B)) { pr_err("tdx: MOVDIR64B is required for TDX\n"); goto success_disable_tdx; diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h index 5f34b79d16dd..0a54d286e380 100644 --- a/arch/x86/kvm/vmx/tdx.h +++ b/arch/x86/kvm/vmx/tdx.h @@ -6,6 +6,8 @@ #include "tdx_errno.h" #ifdef CONFIG_KVM_INTEL_TDX +#include "common.h" + int tdx_bringup(void); void tdx_cleanup(void); @@ -45,28 +47,23 @@ enum vcpu_tdx_state { struct vcpu_tdx { struct kvm_vcpu vcpu; + struct vcpu_vt vt; + struct tdx_module_args vp_enter_args; struct tdx_vp vp; struct list_head cpu_list; + u64 vp_enter_ret; + enum vcpu_tdx_state state; + bool guest_entered; }; void tdh_vp_rd_failed(struct vcpu_tdx *tdx, char *uclass, u32 field, u64 err); void tdh_vp_wr_failed(struct vcpu_tdx *tdx, char *uclass, char *op, u32 field, u64 val, u64 err); -static inline bool is_td(struct kvm *kvm) -{ - return kvm->arch.vm_type == KVM_X86_TDX_VM; -} - -static inline bool is_td_vcpu(struct kvm_vcpu *vcpu) -{ - return is_td(vcpu->kvm); -} - static __always_inline u64 td_tdcs_exec_read64(struct kvm_tdx *kvm_tdx, u32 field) { u64 err, data; @@ -176,9 +173,6 @@ struct vcpu_tdx { struct kvm_vcpu vcpu; }; -static inline bool is_td(struct kvm *kvm) { return false; } -static inline bool is_td_vcpu(struct kvm_vcpu *vcpu) { return false; } - #endif #endif diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 39d55d638899..e7e451209306 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1282,6 +1282,7 @@ void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vcpu_vt *vt = to_vt(vcpu); struct vmcs_host_state *host_state; #ifdef CONFIG_X86_64 int cpu = raw_smp_processor_id(); @@ -1310,7 +1311,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) if (vmx->nested.need_vmcs12_to_shadow_sync) nested_sync_vmcs12_to_shadow(vcpu); - if (vmx->guest_state_loaded) + if (vt->guest_state_loaded) return; host_state = &vmx->loaded_vmcs->host_state; @@ -1331,12 +1332,12 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) fs_sel = current->thread.fsindex; gs_sel = current->thread.gsindex; fs_base = current->thread.fsbase; - vmx->msr_host_kernel_gs_base = current->thread.gsbase; + vt->msr_host_kernel_gs_base = current->thread.gsbase; } else { savesegment(fs, fs_sel); savesegment(gs, gs_sel); fs_base = read_msr(MSR_FS_BASE); - vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); + vt->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); } wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); @@ -1348,14 +1349,14 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) #endif vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base); - vmx->guest_state_loaded = true; + vt->guest_state_loaded = true; } static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) { struct vmcs_host_state *host_state; - if (!vmx->guest_state_loaded) + if (!vmx->vt.guest_state_loaded) return; host_state = &vmx->loaded_vmcs->host_state; @@ -1383,10 +1384,10 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) #endif invalidate_tss_limit(); #ifdef CONFIG_X86_64 - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); + wrmsrl(MSR_KERNEL_GS_BASE, vmx->vt.msr_host_kernel_gs_base); #endif load_fixmap_gdt(raw_smp_processor_id()); - vmx->guest_state_loaded = false; + vmx->vt.guest_state_loaded = false; vmx->guest_uret_msrs_loaded = false; } @@ -1394,7 +1395,7 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) { preempt_disable(); - if (vmx->guest_state_loaded) + if (vmx->vt.guest_state_loaded) rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); preempt_enable(); return vmx->msr_guest_kernel_gs_base; @@ -1403,7 +1404,7 @@ static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) { preempt_disable(); - if (vmx->guest_state_loaded) + if (vmx->vt.guest_state_loaded) wrmsrl(MSR_KERNEL_GS_BASE, data); preempt_enable(); vmx->msr_guest_kernel_gs_base = data; @@ -1699,7 +1700,7 @@ int vmx_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, * so that guest userspace can't DoS the guest simply by triggering * emulation (enclaves are CPL3 only). */ - if (to_vmx(vcpu)->exit_reason.enclave_mode) { + if (vmx_get_exit_reason(vcpu).enclave_mode) { kvm_queue_exception(vcpu, UD_VECTOR); return X86EMUL_PROPAGATE_FAULT; } @@ -1714,7 +1715,7 @@ int vmx_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, static int skip_emulated_instruction(struct kvm_vcpu *vcpu) { - union vmx_exit_reason exit_reason = to_vmx(vcpu)->exit_reason; + union vmx_exit_reason exit_reason = vmx_get_exit_reason(vcpu); unsigned long rip, orig_rip; u32 instr_len; @@ -4273,7 +4274,7 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, */ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) { - struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vcpu_vt *vt = to_vt(vcpu); int r; r = vmx_deliver_nested_posted_interrupt(vcpu, vector); @@ -4284,11 +4285,11 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) if (!vcpu->arch.apic->apicv_active) return -1; - if (pi_test_and_set_pir(vector, &vmx->pi_desc)) + if (pi_test_and_set_pir(vector, &vt->pi_desc)) return 0; /* If a previous notification has sent the IPI, nothing to do. */ - if (pi_test_and_set_on(&vmx->pi_desc)) + if (pi_test_and_set_on(&vt->pi_desc)) return 0; /* @@ -4764,7 +4765,7 @@ static void init_vmcs(struct vcpu_vmx *vmx) vmcs_write16(GUEST_INTR_STATUS, 0); vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); - vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); + vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->vt.pi_desc))); } if (vmx_can_use_ipiv(&vmx->vcpu)) { @@ -4877,8 +4878,8 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu) * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR * or POSTED_INTR_WAKEUP_VECTOR. */ - vmx->pi_desc.nv = POSTED_INTR_VECTOR; - __pi_set_sn(&vmx->pi_desc); + vmx->vt.pi_desc.nv = POSTED_INTR_VECTOR; + __pi_set_sn(&vmx->vt.pi_desc); } void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) @@ -6064,7 +6065,7 @@ static int handle_bus_lock_vmexit(struct kvm_vcpu *vcpu) * VM-Exits. Unconditionally set the flag here and leave the handling to * vmx_handle_exit(). */ - to_vmx(vcpu)->exit_reason.bus_lock_detected = true; + to_vt(vcpu)->exit_reason.bus_lock_detected = true; return 1; } @@ -6162,9 +6163,9 @@ void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, { struct vcpu_vmx *vmx = to_vmx(vcpu); - *reason = vmx->exit_reason.full; + *reason = vmx->vt.exit_reason.full; *info1 = vmx_get_exit_qual(vcpu); - if (!(vmx->exit_reason.failed_vmentry)) { + if (!(vmx->vt.exit_reason.failed_vmentry)) { *info2 = vmx->idt_vectoring_info; *intr_info = vmx_get_intr_info(vcpu); if (is_exception_with_error_code(*intr_info)) @@ -6460,7 +6461,7 @@ void dump_vmcs(struct kvm_vcpu *vcpu) static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) { struct vcpu_vmx *vmx = to_vmx(vcpu); - union vmx_exit_reason exit_reason = vmx->exit_reason; + union vmx_exit_reason exit_reason = vmx_get_exit_reason(vcpu); u32 vectoring_info = vmx->idt_vectoring_info; u16 exit_handler_index; @@ -6626,7 +6627,7 @@ int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) * Exit to user space when bus lock detected to inform that there is * a bus lock in guest. */ - if (to_vmx(vcpu)->exit_reason.bus_lock_detected) { + if (vmx_get_exit_reason(vcpu).bus_lock_detected) { if (ret > 0) vcpu->run->exit_reason = KVM_EXIT_X86_BUS_LOCK; @@ -6905,22 +6906,22 @@ static void vmx_set_rvi(int vector) int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) { - struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vcpu_vt *vt = to_vt(vcpu); int max_irr; bool got_posted_interrupt; if (KVM_BUG_ON(!enable_apicv, vcpu->kvm)) return -EIO; - if (pi_test_on(&vmx->pi_desc)) { - pi_clear_on(&vmx->pi_desc); + if (pi_test_on(&vt->pi_desc)) { + pi_clear_on(&vt->pi_desc); /* * IOMMU can write to PID.ON, so the barrier matters even on UP. * But on x86 this is just a compiler barrier anyway. */ smp_mb__after_atomic(); got_posted_interrupt = - kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr); + kvm_apic_update_irr(vcpu, vt->pi_desc.pir, &max_irr); } else { max_irr = kvm_lapic_find_highest_irr(vcpu); got_posted_interrupt = false; @@ -6962,10 +6963,10 @@ void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu) { - struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vcpu_vt *vt = to_vt(vcpu); - pi_clear_on(&vmx->pi_desc); - memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir)); + pi_clear_on(&vt->pi_desc); + memset(vt->pi_desc.pir, 0, sizeof(vt->pi_desc.pir)); } void vmx_do_interrupt_irqoff(unsigned long entry); @@ -7030,9 +7031,9 @@ void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) if (vmx->emulation_required) return; - if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT) + if (vmx_get_exit_reason(vcpu).basic == EXIT_REASON_EXTERNAL_INTERRUPT) handle_external_interrupt_irqoff(vcpu, vmx_get_intr_info(vcpu)); - else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI) + else if (vmx_get_exit_reason(vcpu).basic == EXIT_REASON_EXCEPTION_NMI) handle_exception_irqoff(vcpu, vmx_get_intr_info(vcpu)); } @@ -7263,10 +7264,10 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu, * the fastpath even, all other exits must use the slow path. */ if (is_guest_mode(vcpu) && - to_vmx(vcpu)->exit_reason.basic != EXIT_REASON_PREEMPTION_TIMER) + vmx_get_exit_reason(vcpu).basic != EXIT_REASON_PREEMPTION_TIMER) return EXIT_FASTPATH_NONE; - switch (to_vmx(vcpu)->exit_reason.basic) { + switch (vmx_get_exit_reason(vcpu).basic) { case EXIT_REASON_MSR_WRITE: return handle_fastpath_set_msr_irqoff(vcpu); case EXIT_REASON_PREEMPTION_TIMER: @@ -7313,15 +7314,15 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vmx_enable_fb_clear(vmx); if (unlikely(vmx->fail)) { - vmx->exit_reason.full = 0xdead; + vmx->vt.exit_reason.full = 0xdead; goto out; } - vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON); - if (likely(!vmx->exit_reason.failed_vmentry)) + vmx->vt.exit_reason.full = vmcs_read32(VM_EXIT_REASON); + if (likely(!vmx_get_exit_reason(vcpu).failed_vmentry)) vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); - if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI && + if ((u16)vmx_get_exit_reason(vcpu).basic == EXIT_REASON_EXCEPTION_NMI && is_nmi(vmx_get_intr_info(vcpu))) { kvm_before_interrupt(vcpu, KVM_HANDLING_NMI); if (cpu_feature_enabled(X86_FEATURE_FRED)) @@ -7353,12 +7354,12 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) if (unlikely(vmx->emulation_required)) { vmx->fail = 0; - vmx->exit_reason.full = EXIT_REASON_INVALID_STATE; - vmx->exit_reason.failed_vmentry = 1; + vmx->vt.exit_reason.full = EXIT_REASON_INVALID_STATE; + vmx->vt.exit_reason.failed_vmentry = 1; kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1); - vmx->exit_qualification = ENTRY_FAIL_DEFAULT; + vmx->vt.exit_qualification = ENTRY_FAIL_DEFAULT; kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2); - vmx->exit_intr_info = 0; + vmx->vt.exit_intr_info = 0; return EXIT_FASTPATH_NONE; } @@ -7461,7 +7462,7 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) * checking. */ if (vmx->nested.nested_run_pending && - !vmx->exit_reason.failed_vmentry) + !vmx_get_exit_reason(vcpu).failed_vmentry) ++vcpu->stat.nested_run; vmx->nested.nested_run_pending = 0; @@ -7470,12 +7471,12 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) if (unlikely(vmx->fail)) return EXIT_FASTPATH_NONE; - if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY)) + if (unlikely((u16)vmx_get_exit_reason(vcpu).basic == EXIT_REASON_MCE_DURING_VMENTRY)) kvm_machine_check(); trace_kvm_exit(vcpu, KVM_ISA_VMX); - if (unlikely(vmx->exit_reason.failed_vmentry)) + if (unlikely(vmx_get_exit_reason(vcpu).failed_vmentry)) return EXIT_FASTPATH_NONE; vmx->loaded_vmcs->launched = 1; @@ -7507,7 +7508,7 @@ int vmx_vcpu_create(struct kvm_vcpu *vcpu) BUILD_BUG_ON(offsetof(struct vcpu_vmx, vcpu) != 0); vmx = to_vmx(vcpu); - INIT_LIST_HEAD(&vmx->pi_wakeup_list); + INIT_LIST_HEAD(&vmx->vt.pi_wakeup_list); err = -ENOMEM; @@ -7605,7 +7606,7 @@ int vmx_vcpu_create(struct kvm_vcpu *vcpu) if (vmx_can_use_ipiv(vcpu)) WRITE_ONCE(to_kvm_vmx(vcpu->kvm)->pid_table[vcpu->vcpu_id], - __pa(&vmx->pi_desc) | PID_TABLE_ENTRY_VALID); + __pa(&vmx->vt.pi_desc) | PID_TABLE_ENTRY_VALID); return 0; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index a4ee44ffaede..e635199901e2 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -17,6 +17,7 @@ #include "../cpuid.h" #include "run_flags.h" #include "../mmu.h" +#include "common.h" #define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) @@ -68,29 +69,6 @@ struct pt_desc { struct pt_ctx guest; }; -union vmx_exit_reason { - struct { - u32 basic : 16; - u32 reserved16 : 1; - u32 reserved17 : 1; - u32 reserved18 : 1; - u32 reserved19 : 1; - u32 reserved20 : 1; - u32 reserved21 : 1; - u32 reserved22 : 1; - u32 reserved23 : 1; - u32 reserved24 : 1; - u32 reserved25 : 1; - u32 bus_lock_detected : 1; - u32 enclave_mode : 1; - u32 smi_pending_mtf : 1; - u32 smi_from_vmx_root : 1; - u32 reserved30 : 1; - u32 failed_vmentry : 1; - }; - u32 full; -}; - /* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. @@ -231,20 +209,10 @@ struct nested_vmx { struct vcpu_vmx { struct kvm_vcpu vcpu; + struct vcpu_vt vt; u8 fail; u8 x2apic_msr_bitmap_mode; - /* - * If true, host state has been stored in vmx->loaded_vmcs for - * the CPU registers that only need to be switched when transitioning - * to/from the kernel, and the registers have been loaded with guest - * values. If false, host state is loaded in the CPU registers - * and vmx->loaded_vmcs->host_state is invalid. - */ - bool guest_state_loaded; - - unsigned long exit_qualification; - u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; @@ -257,7 +225,6 @@ struct vcpu_vmx { struct vmx_uret_msr guest_uret_msrs[MAX_NR_USER_RETURN_MSRS]; bool guest_uret_msrs_loaded; #ifdef CONFIG_X86_64 - u64 msr_host_kernel_gs_base; u64 msr_guest_kernel_gs_base; #endif @@ -298,14 +265,6 @@ struct vcpu_vmx { int vpid; bool emulation_required; - union vmx_exit_reason exit_reason; - - /* Posted interrupt descriptor */ - struct pi_desc pi_desc; - - /* Used if this vCPU is waiting for PI notification wakeup. */ - struct list_head pi_wakeup_list; - /* Support for a guest hypervisor (nested VMX) */ struct nested_vmx nested; @@ -359,6 +318,43 @@ struct kvm_vmx { u64 *pid_table; }; +static __always_inline struct vcpu_vt *to_vt(struct kvm_vcpu *vcpu) +{ + return &(container_of(vcpu, struct vcpu_vmx, vcpu)->vt); +} + +static __always_inline struct kvm_vcpu *vt_to_vcpu(struct vcpu_vt *vt) +{ + return &(container_of(vt, struct vcpu_vmx, vt)->vcpu); +} + +static __always_inline union vmx_exit_reason vmx_get_exit_reason(struct kvm_vcpu *vcpu) +{ + return to_vt(vcpu)->exit_reason; +} + +static __always_inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu) +{ + struct vcpu_vt *vt = to_vt(vcpu); + + if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1) && + !WARN_ON_ONCE(is_td_vcpu(vcpu))) + vt->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + + return vt->exit_qualification; +} + +static __always_inline u32 vmx_get_intr_info(struct kvm_vcpu *vcpu) +{ + struct vcpu_vt *vt = to_vt(vcpu); + + if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2) && + !WARN_ON_ONCE(is_td_vcpu(vcpu))) + vt->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + + return vt->exit_intr_info; +} + void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, struct loaded_vmcs *buddy); int allocate_vpid(void); @@ -649,26 +645,6 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu); int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu); void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu); -static __always_inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1)) - vmx->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - - return vmx->exit_qualification; -} - -static __always_inline u32 vmx_get_intr_info(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2)) - vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - - return vmx->exit_intr_info; -} - struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags); void free_vmcs(struct vmcs *vmcs); int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index f47d739051cf..cd18e9b1e124 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -131,6 +131,10 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user *argp); int tdx_vcpu_create(struct kvm_vcpu *vcpu); void tdx_vcpu_free(struct kvm_vcpu *vcpu); void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu); +int tdx_vcpu_pre_run(struct kvm_vcpu *vcpu); +fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit); +void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); +void tdx_vcpu_put(struct kvm_vcpu *vcpu); int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp); @@ -157,6 +161,13 @@ static inline int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) { return -EOP static inline int tdx_vcpu_create(struct kvm_vcpu *vcpu) { return -EOPNOTSUPP; } static inline void tdx_vcpu_free(struct kvm_vcpu *vcpu) {} static inline void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) {} +static inline int tdx_vcpu_pre_run(struct kvm_vcpu *vcpu) { return -EOPNOTSUPP; } +static inline fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) +{ + return EXIT_FASTPATH_NONE; +} +static inline void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) {} +static inline void tdx_vcpu_put(struct kvm_vcpu *vcpu) {} static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8f3292ccdca4..1ef83f24449d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -636,6 +636,15 @@ static void kvm_user_return_msr_cpu_online(void) } } +static void kvm_user_return_register_notifier(struct kvm_user_return_msrs *msrs) +{ + if (!msrs->registered) { + msrs->urn.on_user_return = kvm_on_user_return; + user_return_notifier_register(&msrs->urn); + msrs->registered = true; + } +} + int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask) { struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); @@ -649,15 +658,20 @@ int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask) return 1; msrs->values[slot].curr = value; - if (!msrs->registered) { - msrs->urn.on_user_return = kvm_on_user_return; - user_return_notifier_register(&msrs->urn); - msrs->registered = true; - } + kvm_user_return_register_notifier(msrs); return 0; } EXPORT_SYMBOL_GPL(kvm_set_user_return_msr); +void kvm_user_return_msr_update_cache(unsigned int slot, u64 value) +{ + struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); + + msrs->values[slot].curr = value; + kvm_user_return_register_notifier(msrs); +} +EXPORT_SYMBOL_GPL(kvm_user_return_msr_update_cache); + static void drop_user_return_notifiers(void) { struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); @@ -10971,7 +10985,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (vcpu->arch.guest_fpu.xfd_err) wrmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err); - if (unlikely(vcpu->arch.switch_db_regs)) { + if (unlikely(vcpu->arch.switch_db_regs && + !(vcpu->arch.switch_db_regs & KVM_DEBUGREG_AUTO_SWITCH))) { set_debugreg(0, 7); set_debugreg(vcpu->arch.eff_db[0], 0); set_debugreg(vcpu->arch.eff_db[1], 1); @@ -11023,6 +11038,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) */ if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) { WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP); + WARN_ON(vcpu->arch.switch_db_regs & KVM_DEBUGREG_AUTO_SWITCH); kvm_x86_call(sync_dirty_debug_regs)(vcpu); kvm_update_dr0123(vcpu); kvm_update_dr7(vcpu); diff --git a/arch/x86/virt/vmx/tdx/seamcall.S b/arch/x86/virt/vmx/tdx/seamcall.S index 5b1f2286aea9..6854c52c374b 100644 --- a/arch/x86/virt/vmx/tdx/seamcall.S +++ b/arch/x86/virt/vmx/tdx/seamcall.S @@ -41,6 +41,9 @@ SYM_FUNC_START(__seamcall_ret) TDX_MODULE_CALL host=1 ret=1 SYM_FUNC_END(__seamcall_ret) +/* KVM requires non-instrumentable __seamcall_saved_ret() for TDH.VP.ENTER */ +.section .noinstr.text, "ax" + /* * __seamcall_saved_ret() - Host-side interface functions to SEAM software * (the P-SEAMLDR or the TDX module), with saving output registers to the diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index 8eaaf31a4187..f5e2a937c1e7 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -1517,6 +1517,14 @@ static void tdx_clflush_page(struct page *page) clflush_cache_range(page_to_virt(page), PAGE_SIZE); } +noinstr u64 tdh_vp_enter(struct tdx_vp *td, struct tdx_module_args *args) +{ + args->rcx = tdx_tdvpr_pa(td); + + return __seamcall_saved_ret(TDH_VP_ENTER, args); +} +EXPORT_SYMBOL_GPL(tdh_vp_enter); + u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page) { struct tdx_module_args args = { diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h index ed7152f81a6d..82bb82be8567 100644 --- a/arch/x86/virt/vmx/tdx/tdx.h +++ b/arch/x86/virt/vmx/tdx/tdx.h @@ -14,6 +14,7 @@ /* * TDX module SEAMCALL leaf functions */ +#define TDH_VP_ENTER 0 #define TDH_MNG_ADDCX 1 #define TDH_MEM_PAGE_ADD 2 #define TDH_MEM_SEPT_ADD 3 |