diff options
Diffstat (limited to 'arch/x86/kvm/vmx/nested.c')
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 136 |
1 files changed, 97 insertions, 39 deletions
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 9c941535f78c..dc822a1d403d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -7,6 +7,7 @@ #include <asm/mmu_context.h> #include "cpuid.h" +#include "evmcs.h" #include "hyperv.h" #include "mmu.h" #include "nested.h" @@ -269,7 +270,13 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) vmx_sync_vmcs_host_state(vmx, prev); put_cpu(); - vmx_register_cache_reset(vcpu); + vcpu->arch.regs_avail = ~VMX_REGS_LAZY_LOAD_SET; + + /* + * All lazily updated registers will be reloaded from VMCS12 on both + * vmentry and vmexit. + */ + vcpu->arch.regs_dirty = 0; } /* @@ -391,9 +398,11 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, static void nested_ept_new_eptp(struct kvm_vcpu *vcpu) { - kvm_init_shadow_ept_mmu(vcpu, - to_vmx(vcpu)->nested.msrs.ept_caps & - VMX_EPT_EXECUTE_ONLY_BIT, + struct vcpu_vmx *vmx = to_vmx(vcpu); + bool execonly = vmx->nested.msrs.ept_caps & VMX_EPT_EXECUTE_ONLY_BIT; + int ept_lpage_level = ept_caps_to_lpage_level(vmx->nested.msrs.ept_caps); + + kvm_init_shadow_ept_mmu(vcpu, execonly, ept_lpage_level, nested_ept_ad_enabled(vcpu), nested_ept_get_eptp(vcpu)); } @@ -591,6 +600,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, int msr; unsigned long *msr_bitmap_l1; unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap; + struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs; struct kvm_host_map *map = &vmx->nested.msr_bitmap_map; /* Nothing to do if the MSR bitmap is not in use. */ @@ -598,6 +608,19 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) return false; + /* + * MSR bitmap update can be skipped when: + * - MSR bitmap for L1 hasn't changed. + * - Nested hypervisor (L1) is attempting to launch the same L2 as + * before. + * - Nested hypervisor (L1) has enabled 'Enlightened MSR Bitmap' feature + * and tells KVM (L0) there were no changes in MSR bitmap for L2. + */ + if (!vmx->nested.force_msr_bitmap_recalc && evmcs && + evmcs->hv_enlightenments_control.msr_bitmap && + evmcs->hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP) + return true; + if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map)) return false; @@ -664,6 +687,8 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); + vmx->nested.force_msr_bitmap_recalc = false; + return true; } @@ -1095,7 +1120,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, * must not be dereferenced. */ if (reload_pdptrs && !nested_ept && is_pae_paging(vcpu) && - CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) { + CC(!load_pdptrs(vcpu, cr3))) { *entry_failure_code = ENTRY_FAIL_PDPTE; return -EINVAL; } @@ -1104,7 +1129,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, kvm_mmu_new_pgd(vcpu, cr3); vcpu->arch.cr3 = cr3; - kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); + kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3); /* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */ kvm_init_mmu(vcpu); @@ -2021,10 +2046,13 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld( * Clean fields data can't be used on VMLAUNCH and when we switch * between different L2 guests as KVM keeps a single VMCS12 per L1. */ - if (from_launch || evmcs_gpa_changed) + if (from_launch || evmcs_gpa_changed) { vmx->nested.hv_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + vmx->nested.force_msr_bitmap_recalc = true; + } + return EVMPTRLD_SUCCEEDED; } @@ -3145,7 +3173,7 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) * the guest CR3 might be restored prior to setting the nested * state which can lead to a load of wrong PDPTRs. */ - if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3))) + if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3))) return false; } @@ -3504,10 +3532,13 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) if (evmptrld_status == EVMPTRLD_ERROR) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; - } else if (CC(evmptrld_status == EVMPTRLD_VMFAIL)) { - return nested_vmx_failInvalid(vcpu); } + kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); + + if (CC(evmptrld_status == EVMPTRLD_VMFAIL)) + return nested_vmx_failInvalid(vcpu); + if (CC(!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) && vmx->nested.current_vmptr == INVALID_GPA)) return nested_vmx_failInvalid(vcpu); @@ -3603,7 +3634,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) !(nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING) && (vmcs12->guest_rflags & X86_EFLAGS_IF))) { vmx->nested.nested_run_pending = 0; - return kvm_vcpu_halt(vcpu); + return kvm_emulate_halt_noskip(vcpu); } break; case GUEST_ACTIVITY_WAIT_SIPI: @@ -4826,18 +4857,20 @@ static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu) struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs; /* - * We should allocate a shadow vmcs for vmcs01 only when L1 - * executes VMXON and free it when L1 executes VMXOFF. - * As it is invalid to execute VMXON twice, we shouldn't reach - * here when vmcs01 already have an allocated shadow vmcs. + * KVM allocates a shadow VMCS only when L1 executes VMXON and frees it + * when L1 executes VMXOFF or the vCPU is forced out of nested + * operation. VMXON faults if the CPU is already post-VMXON, so it + * should be impossible to already have an allocated shadow VMCS. KVM + * doesn't support virtualization of VMCS shadowing, so vmcs01 should + * always be the loaded VMCS. */ - WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs); + if (WARN_ON(loaded_vmcs != &vmx->vmcs01 || loaded_vmcs->shadow_vmcs)) + return loaded_vmcs->shadow_vmcs; + + loaded_vmcs->shadow_vmcs = alloc_vmcs(true); + if (loaded_vmcs->shadow_vmcs) + vmcs_clear(loaded_vmcs->shadow_vmcs); - if (!loaded_vmcs->shadow_vmcs) { - loaded_vmcs->shadow_vmcs = alloc_vmcs(true); - if (loaded_vmcs->shadow_vmcs) - vmcs_clear(loaded_vmcs->shadow_vmcs); - } return loaded_vmcs->shadow_vmcs; } @@ -5074,27 +5107,49 @@ static int handle_vmread(struct kvm_vcpu *vcpu) if (!nested_vmx_check_permission(vcpu)) return 1; - /* - * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA, - * any VMREAD sets the ALU flags for VMfailInvalid. - */ - if (vmx->nested.current_vmptr == INVALID_GPA || - (is_guest_mode(vcpu) && - get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA)) - return nested_vmx_failInvalid(vcpu); - /* Decode instruction info and find the field to read */ field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf)); - offset = vmcs_field_to_offset(field); - if (offset < 0) - return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); + if (!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) { + /* + * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA, + * any VMREAD sets the ALU flags for VMfailInvalid. + */ + if (vmx->nested.current_vmptr == INVALID_GPA || + (is_guest_mode(vcpu) && + get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA)) + return nested_vmx_failInvalid(vcpu); - if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field)) - copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); + offset = get_vmcs12_field_offset(field); + if (offset < 0) + return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); + + if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field)) + copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); + + /* Read the field, zero-extended to a u64 value */ + value = vmcs12_read_any(vmcs12, field, offset); + } else { + /* + * Hyper-V TLFS (as of 6.0b) explicitly states, that while an + * enlightened VMCS is active VMREAD/VMWRITE instructions are + * unsupported. Unfortunately, certain versions of Windows 11 + * don't comply with this requirement which is not enforced in + * genuine Hyper-V. Allow VMREAD from an enlightened VMCS as a + * workaround, as misbehaving guests will panic on VM-Fail. + * Note, enlightened VMCS is incompatible with shadow VMCS so + * all VMREADs from L2 should go to L1. + */ + if (WARN_ON_ONCE(is_guest_mode(vcpu))) + return nested_vmx_failInvalid(vcpu); - /* Read the field, zero-extended to a u64 value */ - value = vmcs12_read_any(vmcs12, field, offset); + offset = evmcs_field_offset(field, NULL); + if (offset < 0) + return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); + + /* Read the field, zero-extended to a u64 value */ + value = evmcs_read_any(vmx->nested.hv_evmcs, field, offset); + } /* * Now copy part of this value to register or memory, as requested. @@ -5189,7 +5244,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf)); - offset = vmcs_field_to_offset(field); + offset = get_vmcs12_field_offset(field); if (offset < 0) return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); @@ -5258,6 +5313,7 @@ static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr) vmx->nested.need_vmcs12_to_shadow_sync = true; } vmx->nested.dirty_vmcs12 = true; + vmx->nested.force_msr_bitmap_recalc = true; } /* Emulate the VMPTRLD instruction */ @@ -6393,6 +6449,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, goto error_guest_mode; vmx->nested.dirty_vmcs12 = true; + vmx->nested.force_msr_bitmap_recalc = true; ret = nested_vmx_enter_non_root_mode(vcpu, false); if (ret) goto error_guest_mode; @@ -6435,7 +6492,7 @@ static u64 nested_vmx_calc_vmcs_enum_msr(void) max_idx = 0; for (i = 0; i < nr_vmcs12_fields; i++) { /* The vmcs12 table is very, very sparsely populated. */ - if (!vmcs_field_to_offset_table[i]) + if (!vmcs12_field_offsets[i]) continue; idx = vmcs_field_index(VMCS12_IDX_TO_ENC(i)); @@ -6744,6 +6801,7 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) } struct kvm_x86_nested_ops vmx_nested_ops = { + .leave_nested = vmx_leave_nested, .check_events = vmx_check_nested_events, .hv_timer_pending = nested_vmx_preemption_timer_pending, .triple_fault = nested_vmx_triple_fault, |