diff options
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r-- | arch/x86/kvm/vmx.c | 419 |
1 files changed, 233 insertions, 186 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6970249c09fc..7c3522a989d0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -200,6 +200,8 @@ struct loaded_vmcs { int cpu; bool launched; bool nmi_known_unmasked; + unsigned long vmcs_host_cr3; /* May not match real cr3 */ + unsigned long vmcs_host_cr4; /* May not match real cr4 */ struct list_head loaded_vmcss_on_cpu_link; }; @@ -484,6 +486,14 @@ struct nested_vmx { u64 nested_vmx_cr4_fixed1; u64 nested_vmx_vmcs_enum; u64 nested_vmx_vmfunc_controls; + + /* SMM related state */ + struct { + /* in VMX operation on SMM entry? */ + bool vmxon; + /* in guest mode on SMM entry? */ + bool guest_mode; + } smm; }; #define POSTED_INTR_ON 0 @@ -600,8 +610,6 @@ struct vcpu_vmx { int gs_ldt_reload_needed; int fs_reload_needed; u64 msr_host_bndcfgs; - unsigned long vmcs_host_cr3; /* May not match real cr3 */ - unsigned long vmcs_host_cr4; /* May not match real cr4 */ } host_state; struct { int vm86_active; @@ -900,16 +908,13 @@ static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu); static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); static bool vmx_xsaves_supported(void); -static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); static void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); static bool guest_state_valid(struct kvm_vcpu *vcpu); static u32 vmx_segment_access_rights(struct kvm_segment *var); -static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); -static int alloc_identity_pagetable(struct kvm *kvm); static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, @@ -1598,18 +1603,15 @@ static inline void vpid_sync_context(int vpid) static inline void ept_sync_global(void) { - if (cpu_has_vmx_invept_global()) - __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0); + __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0); } static inline void ept_sync_context(u64 eptp) { - if (enable_ept) { - if (cpu_has_vmx_invept_context()) - __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0); - else - ept_sync_global(); - } + if (cpu_has_vmx_invept_context()) + __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0); + else + ept_sync_global(); } static __always_inline void vmcs_check16(unsigned long field) @@ -2202,46 +2204,44 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) struct pi_desc old, new; unsigned int dest; - if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(vcpu)) + /* + * In case of hot-plug or hot-unplug, we may have to undo + * vmx_vcpu_pi_put even if there is no assigned device. And we + * always keep PI.NDST up to date for simplicity: it makes the + * code easier, and CPU migration is not a fast path. + */ + if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) + return; + + /* + * First handle the simple case where no cmpxchg is necessary; just + * allow posting non-urgent interrupts. + * + * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change + * PI.NDST: pi_post_block will do it for us and the wakeup_handler + * expects the VCPU to be on the blocked_vcpu_list that matches + * PI.NDST. + */ + if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || + vcpu->cpu == cpu) { + pi_clear_sn(pi_desc); return; + } + /* The full case. */ do { old.control = new.control = pi_desc->control; - /* - * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there - * are two possible cases: - * 1. After running 'pre_block', context switch - * happened. For this case, 'sn' was set in - * vmx_vcpu_put(), so we need to clear it here. - * 2. After running 'pre_block', we were blocked, - * and woken up by some other guy. For this case, - * we don't need to do anything, 'pi_post_block' - * will do everything for us. However, we cannot - * check whether it is case #1 or case #2 here - * (maybe, not needed), so we also clear sn here, - * I think it is not a big deal. - */ - if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) { - if (vcpu->cpu != cpu) { - dest = cpu_physical_id(cpu); - - if (x2apic_enabled()) - new.ndst = dest; - else - new.ndst = (dest << 8) & 0xFF00; - } + dest = cpu_physical_id(cpu); - /* set 'NV' to 'notification vector' */ - new.nv = POSTED_INTR_VECTOR; - } + if (x2apic_enabled()) + new.ndst = dest; + else + new.ndst = (dest << 8) & 0xFF00; - /* Allow posting non-urgent interrupts */ new.sn = 0; - } while (cmpxchg(&pi_desc->control, old.control, - new.control) != old.control); + } while (cmpxchg64(&pi_desc->control, old.control, + new.control) != old.control); } static void decache_tsc_multiplier(struct vcpu_vmx *vmx) @@ -2833,8 +2833,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) SECONDARY_EXEC_ENABLE_PML; vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT; } - } else - vmx->nested.nested_vmx_ept_caps = 0; + } if (cpu_has_vmx_vmfunc()) { vmx->nested.nested_vmx_secondary_ctls_high |= @@ -2843,8 +2842,9 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) * Advertise EPTP switching unconditionally * since we emulate it */ - vmx->nested.nested_vmx_vmfunc_controls = - VMX_VMFUNC_EPTP_SWITCHING; + if (enable_ept) + vmx->nested.nested_vmx_vmfunc_controls = + VMX_VMFUNC_EPTP_SWITCHING; } /* @@ -2858,8 +2858,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) SECONDARY_EXEC_ENABLE_VPID; vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT | VMX_VPID_EXTENT_SUPPORTED_MASK; - } else - vmx->nested.nested_vmx_vpid_caps = 0; + } if (enable_unrestricted_guest) vmx->nested.nested_vmx_secondary_ctls_high |= @@ -3546,7 +3545,8 @@ static int hardware_enable(void) wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); } kvm_cpu_vmxon(phys_addr); - ept_sync_global(); + if (enable_ept) + ept_sync_global(); return 0; } @@ -3659,8 +3659,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_SHADOW_VMCS | SECONDARY_EXEC_XSAVES | - SECONDARY_EXEC_RDSEED | - SECONDARY_EXEC_RDRAND | + SECONDARY_EXEC_RDSEED_EXITING | + SECONDARY_EXEC_RDRAND_EXITING | SECONDARY_EXEC_ENABLE_PML | SECONDARY_EXEC_TSC_SCALING | SECONDARY_EXEC_ENABLE_VMFUNC; @@ -3681,14 +3681,25 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); + rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP, + &vmx_capability.ept, &vmx_capability.vpid); + if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { /* CR3 accesses and invlpg don't need to cause VM Exits when EPT enabled */ _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING | CPU_BASED_INVLPG_EXITING); - rdmsr(MSR_IA32_VMX_EPT_VPID_CAP, - vmx_capability.ept, vmx_capability.vpid); + } else if (vmx_capability.ept) { + vmx_capability.ept = 0; + pr_warn_once("EPT CAP should not exist if not support " + "1-setting enable EPT VM-execution control\n"); + } + if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) && + vmx_capability.vpid) { + vmx_capability.vpid = 0; + pr_warn_once("VPID CAP should not exist if not support " + "1-setting enable VPID VM-execution control\n"); } min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT; @@ -4783,18 +4794,18 @@ static int init_rmode_identity_map(struct kvm *kvm) kvm_pfn_t identity_map_pfn; u32 tmp; - if (!enable_ept) - return 0; - /* Protect kvm->arch.ept_identity_pagetable_done. */ mutex_lock(&kvm->slots_lock); if (likely(kvm->arch.ept_identity_pagetable_done)) goto out2; + if (!kvm->arch.ept_identity_map_addr) + kvm->arch.ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT; - r = alloc_identity_pagetable(kvm); + r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, + kvm->arch.ept_identity_map_addr, PAGE_SIZE); if (r < 0) goto out2; @@ -4866,20 +4877,6 @@ out: return r; } -static int alloc_identity_pagetable(struct kvm *kvm) -{ - /* Called with kvm->slots_lock held. */ - - int r = 0; - - BUG_ON(kvm->arch.ept_identity_pagetable_done); - - r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, - kvm->arch.ept_identity_map_addr, PAGE_SIZE); - - return r; -} - static int allocate_vpid(void) { int vpid; @@ -5178,12 +5175,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) */ cr3 = __read_cr3(); vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ - vmx->host_state.vmcs_host_cr3 = cr3; + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; /* Save the most likely value for this task's CR4 in the VMCS. */ cr4 = cr4_read_shadow(); vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ - vmx->host_state.vmcs_host_cr4 = cr4; + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ #ifdef CONFIG_X86_64 @@ -5284,13 +5281,13 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) static bool vmx_rdrand_supported(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & - SECONDARY_EXEC_RDRAND; + SECONDARY_EXEC_RDRAND_EXITING; } static bool vmx_rdseed_supported(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & - SECONDARY_EXEC_RDSEED; + SECONDARY_EXEC_RDSEED_EXITING; } static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) @@ -5384,30 +5381,30 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) if (vmx_rdrand_supported()) { bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND); if (rdrand_enabled) - exec_control &= ~SECONDARY_EXEC_RDRAND; + exec_control &= ~SECONDARY_EXEC_RDRAND_EXITING; if (nested) { if (rdrand_enabled) vmx->nested.nested_vmx_secondary_ctls_high |= - SECONDARY_EXEC_RDRAND; + SECONDARY_EXEC_RDRAND_EXITING; else vmx->nested.nested_vmx_secondary_ctls_high &= - ~SECONDARY_EXEC_RDRAND; + ~SECONDARY_EXEC_RDRAND_EXITING; } } if (vmx_rdseed_supported()) { bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED); if (rdseed_enabled) - exec_control &= ~SECONDARY_EXEC_RDSEED; + exec_control &= ~SECONDARY_EXEC_RDSEED_EXITING; if (nested) { if (rdseed_enabled) vmx->nested.nested_vmx_secondary_ctls_high |= - SECONDARY_EXEC_RDSEED; + SECONDARY_EXEC_RDSEED_EXITING; else vmx->nested.nested_vmx_secondary_ctls_high &= - ~SECONDARY_EXEC_RDSEED; + ~SECONDARY_EXEC_RDSEED_EXITING; } } @@ -5428,7 +5425,7 @@ static void ept_set_mmio_spte_mask(void) /* * Sets up the vmcs for emulated real mode. */ -static int vmx_vcpu_setup(struct vcpu_vmx *vmx) +static void vmx_vcpu_setup(struct vcpu_vmx *vmx) { #ifdef CONFIG_X86_64 unsigned long a; @@ -5541,8 +5538,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); } - - return 0; } static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) @@ -5606,6 +5601,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0); + if (kvm_mpx_supported()) + vmcs_write64(GUEST_BNDCFGS, 0); setup_msrs(vmx); @@ -5621,9 +5618,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); - if (kvm_vcpu_apicv_active(vcpu)) - memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); - if (vmx->vpid != 0) vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); @@ -5917,8 +5911,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) cr2 = vmcs_readl(EXIT_QUALIFICATION); /* EPT won't cause page fault directly */ WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept); - return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0, - true); + return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0); } ex_no = intr_info & INTR_INFO_VECTOR_MASK; @@ -6752,16 +6745,14 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_ept() || !cpu_has_vmx_ept_4levels() || - !cpu_has_vmx_ept_mt_wb()) { + !cpu_has_vmx_ept_mt_wb() || + !cpu_has_vmx_invept_global()) enable_ept = 0; - enable_unrestricted_guest = 0; - enable_ept_ad_bits = 0; - } if (!cpu_has_vmx_ept_ad_bits() || !enable_ept) enable_ept_ad_bits = 0; - if (!cpu_has_vmx_unrestricted_guest()) + if (!cpu_has_vmx_unrestricted_guest() || !enable_ept) enable_unrestricted_guest = 0; if (!cpu_has_vmx_flexpriority()) @@ -6781,8 +6772,13 @@ static __init int hardware_setup(void) if (enable_ept && !cpu_has_vmx_ept_2m_page()) kvm_disable_largepages(); - if (!cpu_has_vmx_ple()) + if (!cpu_has_vmx_ple()) { ple_gap = 0; + ple_window = 0; + ple_window_grow = 0; + ple_window_max = 0; + ple_window_shrink = 0; + } if (!cpu_has_vmx_apicv()) { enable_apicv = 0; @@ -8420,9 +8416,9 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) case EXIT_REASON_RDPMC: return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING); case EXIT_REASON_RDRAND: - return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND); + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND_EXITING); case EXIT_REASON_RDSEED: - return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED); + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING); case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP: return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: @@ -9273,15 +9269,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); cr3 = __get_current_cr3_fast(); - if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) { + if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) { vmcs_writel(HOST_CR3, cr3); - vmx->host_state.vmcs_host_cr3 = cr3; + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; } cr4 = cr4_read_shadow(); - if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { + if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) { vmcs_writel(HOST_CR4, cr4); - vmx->host_state.vmcs_host_cr4 = cr4; + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; } /* When single-stepping over STI and MOV SS, we must clear the @@ -9480,7 +9476,6 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) vmx->loaded_vmcs = vmcs; vmx_vcpu_put(vcpu); vmx_vcpu_load(vcpu, cpu); - vcpu->cpu = cpu; put_cpu(); } @@ -9561,11 +9556,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu); vmx->vcpu.cpu = cpu; - err = vmx_vcpu_setup(vmx); + vmx_vcpu_setup(vmx); vmx_vcpu_put(&vmx->vcpu); put_cpu(); - if (err) - goto free_vmcs; if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { err = alloc_apic_access_page(kvm); if (err) @@ -9573,9 +9566,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) } if (enable_ept) { - if (!kvm->arch.ept_identity_map_addr) - kvm->arch.ept_identity_map_addr = - VMX_EPT_IDENTITY_PAGETABLE_ADDR; err = init_rmode_identity_map(kvm); if (err) goto free_vmcs; @@ -9591,6 +9581,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; + /* + * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR + * or POSTED_INTR_WAKEUP_VECTOR. + */ + vmx->pi_desc.nv = POSTED_INTR_VECTOR; + vmx->pi_desc.sn = 1; + return &vmx->vcpu; free_vmcs: @@ -9839,7 +9836,8 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, WARN_ON(!is_guest_mode(vcpu)); - if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) { + if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) && + !to_vmx(vcpu)->nested.nested_run_pending) { vmcs12->vm_exit_intr_error_code = fault->error_code; nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | @@ -11291,7 +11289,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, /* Same as above - no reason to call set_cr4_guest_host_mask(). */ vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); - kvm_set_cr4(vcpu, vmcs12->host_cr4); + vmx_set_cr4(vcpu, vmcs12->host_cr4); nested_ept_uninit_mmu_context(vcpu); @@ -11322,6 +11320,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip); vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base); vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base); + vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF); + vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF); /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */ if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS) @@ -11418,8 +11418,11 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, leave_guest_mode(vcpu); if (likely(!vmx->fail)) { - prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, - exit_qualification); + if (exit_reason == -1) + sync_vmcs12(vcpu, vmcs12); + else + prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, + exit_qualification); if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr, vmcs12->vm_exit_msr_store_count)) @@ -11483,7 +11486,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, */ kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); - if (enable_shadow_vmcs) + if (enable_shadow_vmcs && exit_reason != -1) vmx->nested.sync_shadow_vmcs = true; /* in case we halted in L2 */ @@ -11507,12 +11510,13 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR; } - trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason, - vmcs12->exit_qualification, - vmcs12->idt_vectoring_info_field, - vmcs12->vm_exit_intr_info, - vmcs12->vm_exit_intr_error_code, - KVM_ISA_VMX); + if (exit_reason != -1) + trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason, + vmcs12->exit_qualification, + vmcs12->idt_vectoring_info_field, + vmcs12->vm_exit_intr_info, + vmcs12->vm_exit_intr_error_code, + KVM_ISA_VMX); load_vmcs12_host_state(vcpu, vmcs12); @@ -11704,6 +11708,37 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); } +static void __pi_post_block(struct kvm_vcpu *vcpu) +{ + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + struct pi_desc old, new; + unsigned int dest; + + do { + old.control = new.control = pi_desc->control; + WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR, + "Wakeup handler not enabled while the VCPU is blocked\n"); + + dest = cpu_physical_id(vcpu->cpu); + + if (x2apic_enabled()) + new.ndst = dest; + else + new.ndst = (dest << 8) & 0xFF00; + + /* set 'NV' to 'notification vector' */ + new.nv = POSTED_INTR_VECTOR; + } while (cmpxchg64(&pi_desc->control, old.control, + new.control) != old.control); + + if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) { + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + list_del(&vcpu->blocked_vcpu_list); + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + vcpu->pre_pcpu = -1; + } +} + /* * This routine does the following things for vCPU which is going * to be blocked if VT-d PI is enabled. @@ -11719,7 +11754,6 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, */ static int pi_pre_block(struct kvm_vcpu *vcpu) { - unsigned long flags; unsigned int dest; struct pi_desc old, new; struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); @@ -11729,34 +11763,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu) !kvm_vcpu_apicv_active(vcpu)) return 0; - vcpu->pre_pcpu = vcpu->cpu; - spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); - list_add_tail(&vcpu->blocked_vcpu_list, - &per_cpu(blocked_vcpu_on_cpu, - vcpu->pre_pcpu)); - spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); + WARN_ON(irqs_disabled()); + local_irq_disable(); + if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) { + vcpu->pre_pcpu = vcpu->cpu; + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + list_add_tail(&vcpu->blocked_vcpu_list, + &per_cpu(blocked_vcpu_on_cpu, + vcpu->pre_pcpu)); + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + } do { old.control = new.control = pi_desc->control; - /* - * We should not block the vCPU if - * an interrupt is posted for it. - */ - if (pi_test_on(pi_desc) == 1) { - spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); - list_del(&vcpu->blocked_vcpu_list); - spin_unlock_irqrestore( - &per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); - vcpu->pre_pcpu = -1; - - return 1; - } - WARN((pi_desc->sn == 1), "Warning: SN field of posted-interrupts " "is set before blocking\n"); @@ -11778,10 +11798,15 @@ static int pi_pre_block(struct kvm_vcpu *vcpu) /* set 'NV' to 'wakeup vector' */ new.nv = POSTED_INTR_WAKEUP_VECTOR; - } while (cmpxchg(&pi_desc->control, old.control, - new.control) != old.control); + } while (cmpxchg64(&pi_desc->control, old.control, + new.control) != old.control); - return 0; + /* We should not block the vCPU if an interrupt is posted for it. */ + if (pi_test_on(pi_desc) == 1) + __pi_post_block(vcpu); + + local_irq_enable(); + return (vcpu->pre_pcpu == -1); } static int vmx_pre_block(struct kvm_vcpu *vcpu) @@ -11797,44 +11822,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu) static void pi_post_block(struct kvm_vcpu *vcpu) { - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - struct pi_desc old, new; - unsigned int dest; - unsigned long flags; - - if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(vcpu)) + if (vcpu->pre_pcpu == -1) return; - do { - old.control = new.control = pi_desc->control; - - dest = cpu_physical_id(vcpu->cpu); - - if (x2apic_enabled()) - new.ndst = dest; - else - new.ndst = (dest << 8) & 0xFF00; - - /* Allow posting non-urgent interrupts */ - new.sn = 0; - - /* set 'NV' to 'notification vector' */ - new.nv = POSTED_INTR_VECTOR; - } while (cmpxchg(&pi_desc->control, old.control, - new.control) != old.control); - - if(vcpu->pre_pcpu != -1) { - spin_lock_irqsave( - &per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); - list_del(&vcpu->blocked_vcpu_list); - spin_unlock_irqrestore( - &per_cpu(blocked_vcpu_on_cpu_lock, - vcpu->pre_pcpu), flags); - vcpu->pre_pcpu = -1; - } + WARN_ON(irqs_disabled()); + local_irq_disable(); + __pi_post_block(vcpu); + local_irq_enable(); } static void vmx_post_block(struct kvm_vcpu *vcpu) @@ -11945,6 +11939,54 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu) ~FEATURE_CONTROL_LMCE; } +static int vmx_smi_allowed(struct kvm_vcpu *vcpu) +{ + /* we need a nested vmexit to enter SMM, postpone if run is pending */ + if (to_vmx(vcpu)->nested.nested_run_pending) + return 0; + return 1; +} + +static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + vmx->nested.smm.guest_mode = is_guest_mode(vcpu); + if (vmx->nested.smm.guest_mode) + nested_vmx_vmexit(vcpu, -1, 0, 0); + + vmx->nested.smm.vmxon = vmx->nested.vmxon; + vmx->nested.vmxon = false; + return 0; +} + +static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int ret; + + if (vmx->nested.smm.vmxon) { + vmx->nested.vmxon = true; + vmx->nested.smm.vmxon = false; + } + + if (vmx->nested.smm.guest_mode) { + vcpu->arch.hflags &= ~HF_SMM_MASK; + ret = enter_vmx_non_root_mode(vcpu, false); + vcpu->arch.hflags |= HF_SMM_MASK; + if (ret) + return ret; + + vmx->nested.smm.guest_mode = false; + } + return 0; +} + +static int enable_smi_window(struct kvm_vcpu *vcpu) +{ + return 0; +} + static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -12070,6 +12112,11 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { #endif .setup_mce = vmx_setup_mce, + + .smi_allowed = vmx_smi_allowed, + .pre_enter_smm = vmx_pre_enter_smm, + .pre_leave_smm = vmx_pre_leave_smm, + .enable_smi_window = enable_smi_window, }; static int __init vmx_init(void) |