summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/vmx
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/vmx')
-rw-r--r--arch/x86/kvm/vmx/capabilities.h1
-rw-r--r--arch/x86/kvm/vmx/evmcs.c3
-rw-r--r--arch/x86/kvm/vmx/evmcs.h8
-rw-r--r--arch/x86/kvm/vmx/nested.c418
-rw-r--r--arch/x86/kvm/vmx/nested.h11
-rw-r--r--arch/x86/kvm/vmx/vmcs.h13
-rw-r--r--arch/x86/kvm/vmx/vmcs12.c1
-rw-r--r--arch/x86/kvm/vmx/vmcs12.h10
-rw-r--r--arch/x86/kvm/vmx/vmx.c225
-rw-r--r--arch/x86/kvm/vmx/vmx.h21
10 files changed, 359 insertions, 352 deletions
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index aa0e7872fcc9..4705ad55abb5 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -12,7 +12,6 @@ extern bool __read_mostly enable_ept;
extern bool __read_mostly enable_unrestricted_guest;
extern bool __read_mostly enable_ept_ad_bits;
extern bool __read_mostly enable_pml;
-extern bool __read_mostly enable_apicv;
extern int __read_mostly pt_mode;
#define PT_MODE_SYSTEM 0
diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index 41f24661af04..896b2a50b4aa 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -319,6 +319,9 @@ bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa)
if (unlikely(!assist_page.enlighten_vmentry))
return false;
+ if (unlikely(!evmptr_is_valid(assist_page.current_nested_vmcs)))
+ return false;
+
*evmcs_gpa = assist_page.current_nested_vmcs;
return true;
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
index bd41d9462355..2ec9b46f0d0c 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/evmcs.h
@@ -197,6 +197,14 @@ static inline void evmcs_load(u64 phys_addr) {}
static inline void evmcs_touch_msr_bitmap(void) {}
#endif /* IS_ENABLED(CONFIG_HYPERV) */
+#define EVMPTR_INVALID (-1ULL)
+#define EVMPTR_MAP_PENDING (-2ULL)
+
+static inline bool evmptr_is_valid(u64 evmptr)
+{
+ return evmptr != EVMPTR_INVALID && evmptr != EVMPTR_MAP_PENDING;
+}
+
enum nested_evmptrld_status {
EVMPTRLD_DISABLED,
EVMPTRLD_SUCCEEDED,
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 6058a65a6ede..1a52134b0c42 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -173,9 +173,13 @@ static int nested_vmx_failValid(struct kvm_vcpu *vcpu,
| X86_EFLAGS_ZF);
get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
/*
- * We don't need to force a shadow sync because
- * VM_INSTRUCTION_ERROR is not shadowed
+ * We don't need to force sync to shadow VMCS because
+ * VM_INSTRUCTION_ERROR is not shadowed. Enlightened VMCS 'shadows' all
+ * fields and thus must be synced.
*/
+ if (to_vmx(vcpu)->nested.hv_evmcs_vmptr != EVMPTR_INVALID)
+ to_vmx(vcpu)->nested.need_vmcs12_to_shadow_sync = true;
+
return kvm_skip_emulated_instruction(vcpu);
}
@@ -187,7 +191,8 @@ static int nested_vmx_fail(struct kvm_vcpu *vcpu, u32 vm_instruction_error)
* failValid writes the error number to the current VMCS, which
* can't be done if there isn't a current VMCS.
*/
- if (vmx->nested.current_vmptr == -1ull && !vmx->nested.hv_evmcs)
+ if (vmx->nested.current_vmptr == -1ull &&
+ !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
return nested_vmx_failInvalid(vcpu);
return nested_vmx_failValid(vcpu, vm_instruction_error);
@@ -221,12 +226,12 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (!vmx->nested.hv_evmcs)
- return;
+ if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
+ kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
+ vmx->nested.hv_evmcs = NULL;
+ }
- kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
- vmx->nested.hv_evmcs_vmptr = 0;
- vmx->nested.hv_evmcs = NULL;
+ vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID;
}
static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
@@ -346,16 +351,21 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
vmcs12->guest_physical_address = fault->address;
}
+static void nested_ept_new_eptp(struct kvm_vcpu *vcpu)
+{
+ kvm_init_shadow_ept_mmu(vcpu,
+ to_vmx(vcpu)->nested.msrs.ept_caps &
+ VMX_EPT_EXECUTE_ONLY_BIT,
+ nested_ept_ad_enabled(vcpu),
+ nested_ept_get_eptp(vcpu));
+}
+
static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
{
WARN_ON(mmu_is_nested(vcpu));
vcpu->arch.mmu = &vcpu->arch.guest_mmu;
- kvm_init_shadow_ept_mmu(vcpu,
- to_vmx(vcpu)->nested.msrs.ept_caps &
- VMX_EPT_EXECUTE_ONLY_BIT,
- nested_ept_ad_enabled(vcpu),
- nested_ept_get_eptp(vcpu));
+ nested_ept_new_eptp(vcpu);
vcpu->arch.mmu->get_guest_pgd = nested_ept_get_eptp;
vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault;
vcpu->arch.mmu->get_pdptr = kvm_pdptr_read;
@@ -1058,54 +1068,13 @@ static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu,
}
/*
- * Returns true if the MMU needs to be sync'd on nested VM-Enter/VM-Exit.
- * tl;dr: the MMU needs a sync if L0 is using shadow paging and L1 didn't
- * enable VPID for L2 (implying it expects a TLB flush on VMX transitions).
- * Here's why.
- *
- * If EPT is enabled by L0 a sync is never needed:
- * - if it is disabled by L1, then L0 is not shadowing L1 or L2 PTEs, there
- * cannot be unsync'd SPTEs for either L1 or L2.
- *
- * - if it is also enabled by L1, then L0 doesn't need to sync on VM-Enter
- * VM-Enter as VM-Enter isn't required to invalidate guest-physical mappings
- * (irrespective of VPID), i.e. L1 can't rely on the (virtual) CPU to flush
- * stale guest-physical mappings for L2 from the TLB. And as above, L0 isn't
- * shadowing L1 PTEs so there are no unsync'd SPTEs to sync on VM-Exit.
- *
- * If EPT is disabled by L0:
- * - if VPID is enabled by L1 (for L2), the situation is similar to when L1
- * enables EPT: L0 doesn't need to sync as VM-Enter and VM-Exit aren't
- * required to invalidate linear mappings (EPT is disabled so there are
- * no combined or guest-physical mappings), i.e. L1 can't rely on the
- * (virtual) CPU to flush stale linear mappings for either L2 or itself (L1).
- *
- * - however if VPID is disabled by L1, then a sync is needed as L1 expects all
- * linear mappings (EPT is disabled so there are no combined or guest-physical
- * mappings) to be invalidated on both VM-Enter and VM-Exit.
- *
- * Note, this logic is subtly different than nested_has_guest_tlb_tag(), which
- * additionally checks that L2 has been assigned a VPID (when EPT is disabled).
- * Whether or not L2 has been assigned a VPID by L0 is irrelevant with respect
- * to L1's expectations, e.g. L0 needs to invalidate hardware TLB entries if L2
- * doesn't have a unique VPID to prevent reusing L1's entries (assuming L1 has
- * been assigned a VPID), but L0 doesn't need to do a MMU sync because L1
- * doesn't expect stale (virtual) TLB entries to be flushed, i.e. L1 doesn't
- * know that L0 will flush the TLB and so L1 will do INVVPID as needed to flush
- * stale TLB entries, at which point L0 will sync L2's MMU.
- */
-static bool nested_vmx_transition_mmu_sync(struct kvm_vcpu *vcpu)
-{
- return !enable_ept && !nested_cpu_has_vpid(get_vmcs12(vcpu));
-}
-
-/*
* Load guest's/host's cr3 at nested entry/exit. @nested_ept is true if we are
* emulating VM-Entry into a guest with EPT enabled. On failure, the expected
* Exit Qualification (for a VM-Entry consistency check VM-Exit) is assigned to
* @entry_failure_code.
*/
-static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept,
+static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
+ bool nested_ept, bool reload_pdptrs,
enum vm_entry_failure_code *entry_failure_code)
{
if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3))) {
@@ -1117,27 +1086,20 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
* If PAE paging and EPT are both on, CR3 is not used by the CPU and
* must not be dereferenced.
*/
- if (!nested_ept && is_pae_paging(vcpu) &&
- (cr3 != kvm_read_cr3(vcpu) || pdptrs_changed(vcpu))) {
- if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) {
- *entry_failure_code = ENTRY_FAIL_PDPTE;
- return -EINVAL;
- }
+ if (reload_pdptrs && !nested_ept && is_pae_paging(vcpu) &&
+ CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) {
+ *entry_failure_code = ENTRY_FAIL_PDPTE;
+ return -EINVAL;
}
- /*
- * Unconditionally skip the TLB flush on fast CR3 switch, all TLB
- * flushes are handled by nested_vmx_transition_tlb_flush(). See
- * nested_vmx_transition_mmu_sync for details on skipping the MMU sync.
- */
if (!nested_ept)
- kvm_mmu_new_pgd(vcpu, cr3, true,
- !nested_vmx_transition_mmu_sync(vcpu));
+ kvm_mmu_new_pgd(vcpu, cr3);
vcpu->arch.cr3 = cr3;
kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
- kvm_init_mmu(vcpu, false);
+ /* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
+ kvm_init_mmu(vcpu);
return 0;
}
@@ -1170,17 +1132,28 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
struct vcpu_vmx *vmx = to_vmx(vcpu);
/*
- * If VPID is disabled, linear and combined mappings are flushed on
- * VM-Enter/VM-Exit, and guest-physical mappings are valid only for
- * their associated EPTP.
+ * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings
+ * for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a
+ * full TLB flush from the guest's perspective. This is required even
+ * if VPID is disabled in the host as KVM may need to synchronize the
+ * MMU in response to the guest TLB flush.
+ *
+ * Note, using TLB_FLUSH_GUEST is correct even if nested EPT is in use.
+ * EPT is a special snowflake, as guest-physical mappings aren't
+ * flushed on VPID invalidations, including VM-Enter or VM-Exit with
+ * VPID disabled. As a result, KVM _never_ needs to sync nEPT
+ * entries on VM-Enter because L1 can't rely on VM-Enter to flush
+ * those mappings.
*/
- if (!enable_vpid)
+ if (!nested_cpu_has_vpid(vmcs12)) {
+ kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
return;
+ }
+
+ /* L2 should never have a VPID if VPID is disabled. */
+ WARN_ON(!enable_vpid);
/*
- * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings
- * for *all* contexts to be flushed on VM-Enter/VM-Exit.
- *
* If VPID is enabled and used by vmc12, but L2 does not have a unique
* TLB tag (ASID), i.e. EPT is disabled and KVM was unable to allocate
* a VPID for L2, flush the current context as the effective ASID is
@@ -1192,13 +1165,12 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
*
* If a TLB flush isn't required due to any of the above, and vpid12 is
* changing then the new "virtual" VPID (vpid12) will reuse the same
- * "real" VPID (vpid02), and so needs to be sync'd. There is no direct
+ * "real" VPID (vpid02), and so needs to be flushed. There's no direct
* mapping between vpid02 and vpid12, vpid02 is per-vCPU and reused for
- * all nested vCPUs.
+ * all nested vCPUs. Remember, a flush on VM-Enter does not invalidate
+ * guest-physical mappings, so there is no need to sync the nEPT MMU.
*/
- if (!nested_cpu_has_vpid(vmcs12)) {
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
- } else if (!nested_has_guest_tlb_tag(vcpu)) {
+ if (!nested_has_guest_tlb_tag(vcpu)) {
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
} else if (is_vmenter &&
vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
@@ -1586,7 +1558,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
vmcs_load(vmx->loaded_vmcs->vmcs);
}
-static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
+static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields)
{
struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
@@ -1595,7 +1567,7 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
vmcs12->tpr_threshold = evmcs->tpr_threshold;
vmcs12->guest_rip = evmcs->guest_rip;
- if (unlikely(!(evmcs->hv_clean_fields &
+ if (unlikely(!(hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
vmcs12->guest_rsp = evmcs->guest_rsp;
vmcs12->guest_rflags = evmcs->guest_rflags;
@@ -1603,23 +1575,23 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
evmcs->guest_interruptibility_info;
}
- if (unlikely(!(evmcs->hv_clean_fields &
+ if (unlikely(!(hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
vmcs12->cpu_based_vm_exec_control =
evmcs->cpu_based_vm_exec_control;
}
- if (unlikely(!(evmcs->hv_clean_fields &
+ if (unlikely(!(hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN))) {
vmcs12->exception_bitmap = evmcs->exception_bitmap;
}
- if (unlikely(!(evmcs->hv_clean_fields &
+ if (unlikely(!(hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
}
- if (unlikely(!(evmcs->hv_clean_fields &
+ if (unlikely(!(hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
vmcs12->vm_entry_intr_info_field =
evmcs->vm_entry_intr_info_field;
@@ -1629,7 +1601,7 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
evmcs->vm_entry_instruction_len;
}
- if (unlikely(!(evmcs->hv_clean_fields &
+ if (unlikely(!(hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
@@ -1649,7 +1621,7 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
vmcs12->host_tr_selector = evmcs->host_tr_selector;
}
- if (unlikely(!(evmcs->hv_clean_fields &
+ if (unlikely(!(hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1))) {
vmcs12->pin_based_vm_exec_control =
evmcs->pin_based_vm_exec_control;
@@ -1658,18 +1630,18 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
evmcs->secondary_vm_exec_control;
}
- if (unlikely(!(evmcs->hv_clean_fields &
+ if (unlikely(!(hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) {
vmcs12->io_bitmap_a = evmcs->io_bitmap_a;
vmcs12->io_bitmap_b = evmcs->io_bitmap_b;
}
- if (unlikely(!(evmcs->hv_clean_fields &
+ if (unlikely(!(hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) {
vmcs12->msr_bitmap = evmcs->msr_bitmap;
}
- if (unlikely(!(evmcs->hv_clean_fields &
+ if (unlikely(!(hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) {
vmcs12->guest_es_base = evmcs->guest_es_base;
vmcs12->guest_cs_base = evmcs->guest_cs_base;
@@ -1709,14 +1681,14 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
vmcs12->guest_tr_selector = evmcs->guest_tr_selector;
}
- if (unlikely(!(evmcs->hv_clean_fields &
+ if (unlikely(!(hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) {
vmcs12->tsc_offset = evmcs->tsc_offset;
vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr;
vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap;
}
- if (unlikely(!(evmcs->hv_clean_fields &
+ if (unlikely(!(hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) {
vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask;
vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask;
@@ -1728,7 +1700,7 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
vmcs12->guest_dr7 = evmcs->guest_dr7;
}
- if (unlikely(!(evmcs->hv_clean_fields &
+ if (unlikely(!(hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) {
vmcs12->host_fs_base = evmcs->host_fs_base;
vmcs12->host_gs_base = evmcs->host_gs_base;
@@ -1738,13 +1710,13 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
vmcs12->host_rsp = evmcs->host_rsp;
}
- if (unlikely(!(evmcs->hv_clean_fields &
+ if (unlikely(!(hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) {
vmcs12->ept_pointer = evmcs->ept_pointer;
vmcs12->virtual_processor_id = evmcs->virtual_processor_id;
}
- if (unlikely(!(evmcs->hv_clean_fields &
+ if (unlikely(!(hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) {
vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer;
vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl;
@@ -1799,10 +1771,10 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
* vmcs12->exit_io_instruction_eip = evmcs->exit_io_instruction_eip;
*/
- return 0;
+ return;
}
-static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
+static void copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
{
struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
@@ -1962,7 +1934,7 @@ static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs;
- return 0;
+ return;
}
/*
@@ -1979,13 +1951,13 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
if (likely(!vmx->nested.enlightened_vmcs_enabled))
return EVMPTRLD_DISABLED;
- if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa))
+ if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa)) {
+ nested_release_evmcs(vcpu);
return EVMPTRLD_DISABLED;
+ }
- if (unlikely(!vmx->nested.hv_evmcs ||
- evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
- if (!vmx->nested.hv_evmcs)
- vmx->nested.current_vmptr = -1ull;
+ if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
+ vmx->nested.current_vmptr = -1ull;
nested_release_evmcs(vcpu);
@@ -2023,7 +1995,6 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
return EVMPTRLD_VMFAIL;
}
- vmx->nested.dirty_vmcs12 = true;
vmx->nested.hv_evmcs_vmptr = evmcs_gpa;
evmcs_gpa_changed = true;
@@ -2056,14 +2027,10 @@ void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (vmx->nested.hv_evmcs) {
+ if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
copy_vmcs12_to_enlightened(vmx);
- /* All fields are clean */
- vmx->nested.hv_evmcs->hv_clean_fields |=
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
- } else {
+ else
copy_vmcs12_to_shadow(vmx);
- }
vmx->nested.need_vmcs12_to_shadow_sync = false;
}
@@ -2208,7 +2175,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
u32 exec_control;
u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
- if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs)
+ if (vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
prepare_vmcs02_early_rare(vmx, vmcs12);
/*
@@ -2277,7 +2244,8 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
- SECONDARY_EXEC_ENABLE_VMFUNC);
+ SECONDARY_EXEC_ENABLE_VMFUNC |
+ SECONDARY_EXEC_TSC_SCALING);
if (nested_cpu_has(vmcs12,
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
exec_control |= vmcs12->secondary_vm_exec_control;
@@ -2488,18 +2456,18 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
* is assigned to entry_failure_code on failure.
*/
static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+ bool from_vmentry,
enum vm_entry_failure_code *entry_failure_code)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
bool load_guest_pdptrs_vmcs12 = false;
- if (vmx->nested.dirty_vmcs12 || hv_evmcs) {
+ if (vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
prepare_vmcs02_rare(vmx, vmcs12);
vmx->nested.dirty_vmcs12 = false;
- load_guest_pdptrs_vmcs12 = !hv_evmcs ||
- !(hv_evmcs->hv_clean_fields &
+ load_guest_pdptrs_vmcs12 = !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) ||
+ !(vmx->nested.hv_evmcs->hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
}
@@ -2532,10 +2500,18 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
}
- vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
+ vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
+ vcpu->arch.l1_tsc_offset,
+ vmx_get_l2_tsc_offset(vcpu),
+ vmx_get_l2_tsc_multiplier(vcpu));
+ vcpu->arch.tsc_scaling_ratio = kvm_calc_nested_tsc_multiplier(
+ vcpu->arch.l1_tsc_scaling_ratio,
+ vmx_get_l2_tsc_multiplier(vcpu));
+
+ vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
if (kvm_has_tsc_control)
- decache_tsc_multiplier(vmx);
+ vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
nested_vmx_transition_tlb_flush(vcpu, vmcs12, true);
@@ -2572,7 +2548,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
/* Shadow page tables on either EPT or shadow page tables. */
if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
- entry_failure_code))
+ from_vmentry, entry_failure_code))
return -EINVAL;
/*
@@ -2604,6 +2580,17 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
kvm_rsp_write(vcpu, vmcs12->guest_rsp);
kvm_rip_write(vcpu, vmcs12->guest_rip);
+
+ /*
+ * It was observed that genuine Hyper-V running in L1 doesn't reset
+ * 'hv_clean_fields' by itself, it only sets the corresponding dirty
+ * bits when it changes a field in eVMCS. Mark all fields as clean
+ * here.
+ */
+ if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
+ vmx->nested.hv_evmcs->hv_clean_fields |=
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+
return 0;
}
@@ -3093,13 +3080,20 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
* L2 was running), map it here to make sure vmcs12 changes are
* properly reflected.
*/
- if (vmx->nested.enlightened_vmcs_enabled && !vmx->nested.hv_evmcs) {
+ if (vmx->nested.enlightened_vmcs_enabled &&
+ vmx->nested.hv_evmcs_vmptr == EVMPTR_MAP_PENDING) {
enum nested_evmptrld_status evmptrld_status =
nested_vmx_handle_enlightened_vmptrld(vcpu, false);
if (evmptrld_status == EVMPTRLD_VMFAIL ||
evmptrld_status == EVMPTRLD_ERROR)
return false;
+
+ /*
+ * Post migration VMCS12 always provides the most actual
+ * information, copy it to eVMCS upon entry.
+ */
+ vmx->nested.need_vmcs12_to_shadow_sync = true;
}
return true;
@@ -3113,6 +3107,18 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
struct page *page;
u64 hpa;
+ if (!vcpu->arch.pdptrs_from_userspace &&
+ !nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
+ /*
+ * Reload the guest's PDPTRs since after a migration
+ * the guest CR3 might be restored prior to setting the nested
+ * state which can lead to a load of wrong PDPTRs.
+ */
+ if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3)))
+ return false;
+ }
+
+
if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
/*
* Translate L1 physical address to host physical
@@ -3175,6 +3181,15 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
offset_in_page(vmcs12->posted_intr_desc_addr));
vmcs_write64(POSTED_INTR_DESC_ADDR,
pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr));
+ } else {
+ /*
+ * Defer the KVM_INTERNAL_EXIT until KVM tries to
+ * access the contents of the VMCS12 posted interrupt
+ * descriptor. (Note that KVM may do this when it
+ * should not, per the architectural specification.)
+ */
+ vmx->nested.pi_desc = NULL;
+ pin_controls_clearbit(vmx, PIN_BASED_POSTED_INTR);
}
}
if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
@@ -3354,10 +3369,8 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
}
enter_guest_mode(vcpu);
- if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
- vcpu->arch.tsc_offset += vmcs12->tsc_offset;
- if (prepare_vmcs02(vcpu, vmcs12, &entry_failure_code)) {
+ if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &entry_failure_code)) {
exit_reason.basic = EXIT_REASON_INVALID_STATE;
vmcs12->exit_qualification = entry_failure_code;
goto vmentry_fail_vmexit_guest_mode;
@@ -3437,7 +3450,7 @@ vmentry_fail_vmexit:
load_vmcs12_host_state(vcpu, vmcs12);
vmcs12->vm_exit_reason = exit_reason.full;
- if (enable_shadow_vmcs || vmx->nested.hv_evmcs)
+ if (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
vmx->nested.need_vmcs12_to_shadow_sync = true;
return NVMX_VMENTRY_VMEXIT;
}
@@ -3454,8 +3467,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
enum nested_evmptrld_status evmptrld_status;
- ++vcpu->stat.nested_run;
-
if (!nested_vmx_check_permission(vcpu))
return 1;
@@ -3467,7 +3478,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
return nested_vmx_failInvalid(vcpu);
}
- if (CC(!vmx->nested.hv_evmcs && vmx->nested.current_vmptr == -1ull))
+ if (CC(!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) &&
+ vmx->nested.current_vmptr == -1ull))
return nested_vmx_failInvalid(vcpu);
vmcs12 = get_vmcs12(vcpu);
@@ -3481,8 +3493,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (CC(vmcs12->hdr.shadow_vmcs))
return nested_vmx_failInvalid(vcpu);
- if (vmx->nested.hv_evmcs) {
- copy_enlightened_to_vmcs12(vmx);
+ if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
+ copy_enlightened_to_vmcs12(vmx, vmx->nested.hv_evmcs->hv_clean_fields);
/* Enlightened VMCS doesn't have launch state */
vmcs12->launch_state = !launch;
} else if (enable_shadow_vmcs) {
@@ -3682,25 +3694,29 @@ void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
}
}
-static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
+static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int max_irr;
void *vapic_page;
u16 status;
- if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
- return;
+ if (!vmx->nested.pi_pending)
+ return 0;
+
+ if (!vmx->nested.pi_desc)
+ goto mmio_needed;
vmx->nested.pi_pending = false;
+
if (!pi_test_and_clear_on(vmx->nested.pi_desc))
- return;
+ return 0;
max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
if (max_irr != 256) {
vapic_page = vmx->nested.virtual_apic_map.hva;
if (!vapic_page)
- return;
+ goto mmio_needed;
__kvm_apic_update_irr(vmx->nested.pi_desc->pir,
vapic_page, &max_irr);
@@ -3713,6 +3729,11 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
}
nested_mark_vmcs12_pages_dirty(vcpu);
+ return 0;
+
+mmio_needed:
+ kvm_handle_memory_failure(vcpu, X86EMUL_IO_NEEDED, NULL);
+ return -ENXIO;
}
static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
@@ -3887,8 +3908,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
}
no_vmexit:
- vmx_complete_nested_posted_interrupt(vcpu);
- return 0;
+ return vmx_complete_nested_posted_interrupt(vcpu);
}
static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
@@ -4032,10 +4052,11 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (vmx->nested.hv_evmcs)
+ if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
- vmx->nested.need_sync_vmcs02_to_vmcs12_rare = !vmx->nested.hv_evmcs;
+ vmx->nested.need_sync_vmcs02_to_vmcs12_rare =
+ !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr);
vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
@@ -4206,7 +4227,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
* Only PDPTE load can fail as the value of cr3 was checked on entry and
* couldn't have changed.
*/
- if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &ignored))
+ if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, true, &ignored))
nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
nested_vmx_transition_tlb_flush(vcpu, vmcs12, false);
@@ -4463,8 +4484,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
if (nested_cpu_has_preemption_timer(vmcs12))
hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
- if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
- vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
+ if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETTING)) {
+ vcpu->arch.tsc_offset = vcpu->arch.l1_tsc_offset;
+ if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_TSC_SCALING))
+ vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
+ }
if (likely(!vmx->fail)) {
sync_vmcs02_to_vmcs12(vcpu, vmcs12);
@@ -4501,12 +4525,12 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
+ if (kvm_has_tsc_control)
+ vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
+
if (vmx->nested.l1_tpr_threshold != -1)
vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold);
- if (kvm_has_tsc_control)
- decache_tsc_multiplier(vmx);
-
if (vmx->nested.change_vmcs01_virtual_apic_mode) {
vmx->nested.change_vmcs01_virtual_apic_mode = false;
vmx_set_virtual_apic_mode(vcpu);
@@ -4532,7 +4556,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
}
if ((vm_exit_reason != -1) &&
- (enable_shadow_vmcs || vmx->nested.hv_evmcs))
+ (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)))
vmx->nested.need_vmcs12_to_shadow_sync = true;
/* in case we halted in L2 */
@@ -4987,6 +5011,8 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
vmptr + offsetof(struct vmcs12,
launch_state),
&zero, sizeof(zero));
+ } else if (vmx->nested.hv_evmcs && vmptr == vmx->nested.hv_evmcs_vmptr) {
+ nested_release_evmcs(vcpu);
}
return nested_vmx_succeed(vcpu);
@@ -5228,7 +5254,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
/* Forbid normal VMPTRLD if Enlightened version was used */
- if (vmx->nested.hv_evmcs)
+ if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
return 1;
if (vmx->nested.current_vmptr != vmptr) {
@@ -5284,7 +5310,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (unlikely(to_vmx(vcpu)->nested.hv_evmcs))
+ if (unlikely(evmptr_is_valid(to_vmx(vcpu)->nested.hv_evmcs_vmptr)))
return 1;
if (get_vmx_mem_address(vcpu, exit_qual, instr_info,
@@ -5461,8 +5487,8 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
/*
* Sync the shadow page tables if EPT is disabled, L1 is invalidating
- * linear mappings for L2 (tagged with L2's VPID). Free all roots as
- * VPIDs are not tracked in the MMU role.
+ * linear mappings for L2 (tagged with L2's VPID). Free all guest
+ * roots as VPIDs are not tracked in the MMU role.
*
* Note, this operates on root_mmu, not guest_mmu, as L1 and L2 share
* an MMU when EPT is disabled.
@@ -5470,8 +5496,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
* TODO: sync only the affected SPTEs for INVDIVIDUAL_ADDR.
*/
if (!enable_ept)
- kvm_mmu_free_roots(vcpu, &vcpu->arch.root_mmu,
- KVM_MMU_ROOTS_ALL);
+ kvm_mmu_free_guest_mode_roots(vcpu, &vcpu->arch.root_mmu);
return nested_vmx_succeed(vcpu);
}
@@ -5481,23 +5506,16 @@ static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
{
u32 index = kvm_rcx_read(vcpu);
u64 new_eptp;
- bool accessed_dirty;
- struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
- if (!nested_cpu_has_eptp_switching(vmcs12) ||
- !nested_cpu_has_ept(vmcs12))
+ if (WARN_ON_ONCE(!nested_cpu_has_ept(vmcs12)))
return 1;
-
if (index >= VMFUNC_EPTP_ENTRIES)
return 1;
-
if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
&new_eptp, index * 8, 8))
return 1;
- accessed_dirty = !!(new_eptp & VMX_EPTP_AD_ENABLE_BIT);
-
/*
* If the (L2) guest does a vmfunc to the currently
* active ept pointer, we don't have to do anything else
@@ -5506,11 +5524,11 @@ static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
if (!nested_vmx_check_eptp(vcpu, new_eptp))
return 1;
- mmu->ept_ad = accessed_dirty;
- mmu->mmu_role.base.ad_disabled = !accessed_dirty;
vmcs12->ept_pointer = new_eptp;
+ nested_ept_new_eptp(vcpu);
- kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+ if (!nested_cpu_has_vpid(vmcs12))
+ kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
}
return 0;
@@ -5533,7 +5551,17 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
}
vmcs12 = get_vmcs12(vcpu);
- if ((vmcs12->vm_function_control & (1 << function)) == 0)
+
+ /*
+ * #UD on out-of-bounds function has priority over VM-Exit, and VMFUNC
+ * is enabled in vmcs02 if and only if it's enabled in vmcs12.
+ */
+ if (WARN_ON_ONCE((function > 63) || !nested_cpu_has_vmfunc(vmcs12))) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ if (!(vmcs12->vm_function_control & BIT_ULL(function)))
goto fail;
switch (function) {
@@ -5806,6 +5834,9 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
else if (is_breakpoint(intr_info) &&
vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
return true;
+ else if (is_alignment_check(intr_info) &&
+ !vmx_guest_inject_ac(vcpu))
+ return true;
return false;
case EXIT_REASON_EXTERNAL_INTERRUPT:
return true;
@@ -6056,7 +6087,8 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
if (vmx_has_valid_vmcs12(vcpu)) {
kvm_state.size += sizeof(user_vmx_nested_state->vmcs12);
- if (vmx->nested.hv_evmcs)
+ /* 'hv_evmcs_vmptr' can also be EVMPTR_MAP_PENDING here */
+ if (vmx->nested.hv_evmcs_vmptr != EVMPTR_INVALID)
kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
if (is_guest_mode(vcpu) &&
@@ -6112,8 +6144,15 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
} else {
copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
if (!vmx->nested.need_vmcs12_to_shadow_sync) {
- if (vmx->nested.hv_evmcs)
- copy_enlightened_to_vmcs12(vmx);
+ if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
+ /*
+ * L1 hypervisor is not obliged to keep eVMCS
+ * clean fields data always up-to-date while
+ * not in guest mode, 'hv_clean_fields' is only
+ * supposed to be actual upon vmentry so we need
+ * to ignore it here and do full copy.
+ */
+ copy_enlightened_to_vmcs12(vmx, 0);
else if (enable_shadow_vmcs)
copy_shadow_to_vmcs12(vmx);
}
@@ -6255,6 +6294,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
* restored yet. EVMCS will be mapped from
* nested_get_vmcs12_pages().
*/
+ vmx->nested.hv_evmcs_vmptr = EVMPTR_MAP_PENDING;
kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
} else {
return -EINVAL;
@@ -6339,6 +6379,40 @@ void nested_vmx_set_vmcs_shadowing_bitmap(void)
}
/*
+ * Indexing into the vmcs12 uses the VMCS encoding rotated left by 6. Undo
+ * that madness to get the encoding for comparison.
+ */
+#define VMCS12_IDX_TO_ENC(idx) ((u16)(((u16)(idx) >> 6) | ((u16)(idx) << 10)))
+
+static u64 nested_vmx_calc_vmcs_enum_msr(void)
+{
+ /*
+ * Note these are the so called "index" of the VMCS field encoding, not
+ * the index into vmcs12.
+ */
+ unsigned int max_idx, idx;
+ int i;
+
+ /*
+ * For better or worse, KVM allows VMREAD/VMWRITE to all fields in
+ * vmcs12, regardless of whether or not the associated feature is
+ * exposed to L1. Simply find the field with the highest index.
+ */
+ max_idx = 0;
+ for (i = 0; i < nr_vmcs12_fields; i++) {
+ /* The vmcs12 table is very, very sparsely populated. */
+ if (!vmcs_field_to_offset_table[i])
+ continue;
+
+ idx = vmcs_field_index(VMCS12_IDX_TO_ENC(i));
+ if (idx > max_idx)
+ max_idx = idx;
+ }
+
+ return (u64)max_idx << VMCS_FIELD_INDEX_SHIFT;
+}
+
+/*
* nested_vmx_setup_ctls_msrs() sets up variables containing the values to be
* returned for the various VMX controls MSRs when nested VMX is enabled.
* The same values should also be used to verify that vmcs12 control fields are
@@ -6474,7 +6548,8 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
SECONDARY_EXEC_RDRAND_EXITING |
SECONDARY_EXEC_ENABLE_INVPCID |
SECONDARY_EXEC_RDSEED_EXITING |
- SECONDARY_EXEC_XSAVES;
+ SECONDARY_EXEC_XSAVES |
+ SECONDARY_EXEC_TSC_SCALING;
/*
* We can emulate "VMCS shadowing," even if the hardware
@@ -6582,8 +6657,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
- /* highest index: VMX_PREEMPTION_TIMER_VALUE */
- msrs->vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
+ msrs->vmcs_enum = nested_vmx_calc_vmcs_enum_msr();
}
void nested_vmx_hardware_unsetup(void)
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 184418baeb3c..b69a80f43b37 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -56,14 +56,9 @@ static inline int vmx_has_valid_vmcs12(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- /*
- * In case we do two consecutive get/set_nested_state()s while L2 was
- * running hv_evmcs may end up not being mapped (we map it from
- * nested_vmx_run()/vmx_vcpu_run()). Check is_guest_mode() as we always
- * have vmcs12 if it is true.
- */
- return is_guest_mode(vcpu) || vmx->nested.current_vmptr != -1ull ||
- vmx->nested.hv_evmcs;
+ /* 'hv_evmcs_vmptr' can also be EVMPTR_MAP_PENDING here */
+ return vmx->nested.current_vmptr != -1ull ||
+ vmx->nested.hv_evmcs_vmptr != EVMPTR_INVALID;
}
static inline u16 nested_get_vpid02(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index 1472c6c376f7..4b9957e2bf5b 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -117,6 +117,11 @@ static inline bool is_gp_fault(u32 intr_info)
return is_exception_n(intr_info, GP_VECTOR);
}
+static inline bool is_alignment_check(u32 intr_info)
+{
+ return is_exception_n(intr_info, AC_VECTOR);
+}
+
static inline bool is_machine_check(u32 intr_info)
{
return is_exception_n(intr_info, MC_VECTOR);
@@ -164,4 +169,12 @@ static inline int vmcs_field_readonly(unsigned long field)
return (((field >> 10) & 0x3) == 1);
}
+#define VMCS_FIELD_INDEX_SHIFT (1)
+#define VMCS_FIELD_INDEX_MASK GENMASK(9, 1)
+
+static inline unsigned int vmcs_field_index(unsigned long field)
+{
+ return (field & VMCS_FIELD_INDEX_MASK) >> VMCS_FIELD_INDEX_SHIFT;
+}
+
#endif /* __KVM_X86_VMX_VMCS_H */
diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c
index 034adb6404dc..d9f5d7c56ae3 100644
--- a/arch/x86/kvm/vmx/vmcs12.c
+++ b/arch/x86/kvm/vmx/vmcs12.c
@@ -37,6 +37,7 @@ const unsigned short vmcs_field_to_offset_table[] = {
FIELD64(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr),
FIELD64(PML_ADDRESS, pml_address),
FIELD64(TSC_OFFSET, tsc_offset),
+ FIELD64(TSC_MULTIPLIER, tsc_multiplier),
FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr),
diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h
index 13494956d0e9..5e0e1b39f495 100644
--- a/arch/x86/kvm/vmx/vmcs12.h
+++ b/arch/x86/kvm/vmx/vmcs12.h
@@ -70,7 +70,8 @@ struct __packed vmcs12 {
u64 eptp_list_address;
u64 pml_address;
u64 encls_exiting_bitmap;
- u64 padding64[2]; /* room for future expansion */
+ u64 tsc_multiplier;
+ u64 padding64[1]; /* room for future expansion */
/*
* To allow migration of L1 (complete with its L2 guests) between
* machines of different natural widths (32 or 64 bit), we cannot have
@@ -205,12 +206,6 @@ struct __packed vmcs12 {
#define VMCS12_SIZE KVM_STATE_NESTED_VMX_VMCS_SIZE
/*
- * VMCS12_MAX_FIELD_INDEX is the highest index value used in any
- * supported VMCS12 field encoding.
- */
-#define VMCS12_MAX_FIELD_INDEX 0x17
-
-/*
* For save/restore compatibility, the vmcs12 field offsets must not change.
*/
#define CHECK_OFFSET(field, loc) \
@@ -258,6 +253,7 @@ static inline void vmx_check_vmcs12_offsets(void)
CHECK_OFFSET(eptp_list_address, 304);
CHECK_OFFSET(pml_address, 312);
CHECK_OFFSET(encls_exiting_bitmap, 320);
+ CHECK_OFFSET(tsc_multiplier, 328);
CHECK_OFFSET(cr0_guest_host_mask, 344);
CHECK_OFFSET(cr4_guest_host_mask, 352);
CHECK_OFFSET(cr0_read_shadow, 360);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c2a779b688e6..927a552393b9 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -52,6 +52,7 @@
#include "cpuid.h"
#include "evmcs.h"
#include "hyperv.h"
+#include "kvm_onhyperv.h"
#include "irq.h"
#include "kvm_cache_regs.h"
#include "lapic.h"
@@ -101,7 +102,6 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
-bool __read_mostly enable_apicv = 1;
module_param(enable_apicv, bool, S_IRUGO);
/*
@@ -459,86 +459,6 @@ static unsigned long host_idt_base;
static bool __read_mostly enlightened_vmcs = true;
module_param(enlightened_vmcs, bool, 0444);
-static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush,
- void *data)
-{
- struct kvm_tlb_range *range = data;
-
- return hyperv_fill_flush_guest_mapping_list(flush, range->start_gfn,
- range->pages);
-}
-
-static inline int hv_remote_flush_root_ept(hpa_t root_ept,
- struct kvm_tlb_range *range)
-{
- if (range)
- return hyperv_flush_guest_mapping_range(root_ept,
- kvm_fill_hv_flush_list_func, (void *)range);
- else
- return hyperv_flush_guest_mapping(root_ept);
-}
-
-static int hv_remote_flush_tlb_with_range(struct kvm *kvm,
- struct kvm_tlb_range *range)
-{
- struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
- struct kvm_vcpu *vcpu;
- int ret = 0, i, nr_unique_valid_roots;
- hpa_t root;
-
- spin_lock(&kvm_vmx->hv_root_ept_lock);
-
- if (!VALID_PAGE(kvm_vmx->hv_root_ept)) {
- nr_unique_valid_roots = 0;
-
- /*
- * Flush all valid roots, and see if all vCPUs have converged
- * on a common root, in which case future flushes can skip the
- * loop and flush the common root.
- */
- kvm_for_each_vcpu(i, vcpu, kvm) {
- root = to_vmx(vcpu)->hv_root_ept;
- if (!VALID_PAGE(root) || root == kvm_vmx->hv_root_ept)
- continue;
-
- /*
- * Set the tracked root to the first valid root. Keep
- * this root for the entirety of the loop even if more
- * roots are encountered as a low effort optimization
- * to avoid flushing the same (first) root again.
- */
- if (++nr_unique_valid_roots == 1)
- kvm_vmx->hv_root_ept = root;
-
- if (!ret)
- ret = hv_remote_flush_root_ept(root, range);
-
- /*
- * Stop processing roots if a failure occurred and
- * multiple valid roots have already been detected.
- */
- if (ret && nr_unique_valid_roots > 1)
- break;
- }
-
- /*
- * The optimized flush of a single root can't be used if there
- * are multiple valid roots (obviously).
- */
- if (nr_unique_valid_roots > 1)
- kvm_vmx->hv_root_ept = INVALID_PAGE;
- } else {
- ret = hv_remote_flush_root_ept(kvm_vmx->hv_root_ept, range);
- }
-
- spin_unlock(&kvm_vmx->hv_root_ept_lock);
- return ret;
-}
-static int hv_remote_flush_tlb(struct kvm *kvm)
-{
- return hv_remote_flush_tlb_with_range(kvm, NULL);
-}
-
static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
{
struct hv_enlightened_vmcs *evmcs;
@@ -566,21 +486,6 @@ static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
#endif /* IS_ENABLED(CONFIG_HYPERV) */
-static void hv_track_root_ept(struct kvm_vcpu *vcpu, hpa_t root_ept)
-{
-#if IS_ENABLED(CONFIG_HYPERV)
- struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
-
- if (kvm_x86_ops.tlb_remote_flush == hv_remote_flush_tlb) {
- spin_lock(&kvm_vmx->hv_root_ept_lock);
- to_vmx(vcpu)->hv_root_ept = root_ept;
- if (root_ept != kvm_vmx->hv_root_ept)
- kvm_vmx->hv_root_ept = INVALID_PAGE;
- spin_unlock(&kvm_vmx->hv_root_ept_lock);
- }
-#endif
-}
-
/*
* Comment's format: document - errata name - stepping - processor name.
* Refer from
@@ -842,16 +747,21 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
if (is_guest_mode(vcpu))
eb |= get_vmcs12(vcpu)->exception_bitmap;
else {
- /*
- * If EPT is enabled, #PF is only trapped if MAXPHYADDR is mismatched
- * between guest and host. In that case we only care about present
- * faults. For vmcs02, however, PFEC_MASK and PFEC_MATCH are set in
- * prepare_vmcs02_rare.
- */
- bool selective_pf_trap = enable_ept && (eb & (1u << PF_VECTOR));
- int mask = selective_pf_trap ? PFERR_PRESENT_MASK : 0;
+ int mask = 0, match = 0;
+
+ if (enable_ept && (eb & (1u << PF_VECTOR))) {
+ /*
+ * If EPT is enabled, #PF is currently only intercepted
+ * if MAXPHYADDR is smaller on the guest than on the
+ * host. In that case we only care about present,
+ * non-reserved faults. For vmcs02, however, PFEC_MASK
+ * and PFEC_MATCH are set in prepare_vmcs02_rare.
+ */
+ mask = PFERR_PRESENT_MASK | PFERR_RSVD_MASK;
+ match = PFERR_PRESENT_MASK;
+ }
vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, mask);
- vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, mask);
+ vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, match);
}
vmcs_write32(EXCEPTION_BITMAP, eb);
@@ -1390,11 +1300,6 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
vmx->loaded_vmcs->cpu = cpu;
}
-
- /* Setup TSC multiplier */
- if (kvm_has_tsc_control &&
- vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
- decache_tsc_multiplier(vmx);
}
/*
@@ -1787,26 +1692,35 @@ static void setup_msrs(struct vcpu_vmx *vmx)
vmx->guest_uret_msrs_loaded = false;
}
-static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- u64 g_tsc_offset = 0;
- /*
- * We're here if L1 chose not to trap WRMSR to TSC. According
- * to the spec, this should set L1's TSC; The offset that L1
- * set for L2 remains unchanged, and still needs to be added
- * to the newly set TSC to get L2's TSC.
- */
- if (is_guest_mode(vcpu) &&
- (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING))
- g_tsc_offset = vmcs12->tsc_offset;
+ if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETTING))
+ return vmcs12->tsc_offset;
+
+ return 0;
+}
+
+u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
+{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+ if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETTING) &&
+ nested_cpu_has2(vmcs12, SECONDARY_EXEC_TSC_SCALING))
+ return vmcs12->tsc_multiplier;
- trace_kvm_write_tsc_offset(vcpu->vcpu_id,
- vcpu->arch.tsc_offset - g_tsc_offset,
- offset);
- vmcs_write64(TSC_OFFSET, offset + g_tsc_offset);
- return offset + g_tsc_offset;
+ return kvm_default_tsc_scaling_ratio;
+}
+
+static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+{
+ vmcs_write64(TSC_OFFSET, offset);
+}
+
+static void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
+{
+ vmcs_write64(TSC_MULTIPLIER, multiplier);
}
/*
@@ -3181,7 +3095,7 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
eptp = construct_eptp(vcpu, root_hpa, root_level);
vmcs_write64(EPT_POINTER, eptp);
- hv_track_root_ept(vcpu, root_hpa);
+ hv_track_root_tdp(vcpu, root_hpa);
if (!enable_unrestricted_guest && !is_paging(vcpu))
guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
@@ -3707,7 +3621,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
int ret = 0;
mutex_lock(&kvm->slots_lock);
- if (kvm->arch.apic_access_page_done)
+ if (kvm->arch.apic_access_memslot_enabled)
goto out;
hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
@@ -3727,7 +3641,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
* is able to migrate it.
*/
put_page(page);
- kvm->arch.apic_access_page_done = true;
+ kvm->arch.apic_access_memslot_enabled = true;
out:
mutex_unlock(&kvm->slots_lock);
return ret;
@@ -4829,7 +4743,7 @@ static int handle_machine_check(struct kvm_vcpu *vcpu)
* - Guest has #AC detection enabled in CR0
* - Guest EFLAGS has AC bit set
*/
-static inline bool guest_inject_ac(struct kvm_vcpu *vcpu)
+bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu)
{
if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
return true;
@@ -4937,7 +4851,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
kvm_run->debug.arch.exception = ex_no;
break;
case AC_VECTOR:
- if (guest_inject_ac(vcpu)) {
+ if (vmx_guest_inject_ac(vcpu)) {
kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
return 1;
}
@@ -5810,6 +5724,8 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
if (cpu_has_secondary_exec_ctrls())
secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+ pr_err("VMCS %p, last attempted VM-entry on CPU %d\n",
+ vmx->loaded_vmcs->vmcs, vcpu->arch.last_vmentry_cpu);
pr_err("*** Guest State ***\n");
pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW),
@@ -6806,7 +6722,18 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
kvm_load_host_xsave_state(vcpu);
- vmx->nested.nested_run_pending = 0;
+ if (is_guest_mode(vcpu)) {
+ /*
+ * Track VMLAUNCH/VMRESUME that have made past guest state
+ * checking.
+ */
+ if (vmx->nested.nested_run_pending &&
+ !vmx->exit_reason.failed_vmentry)
+ ++vcpu->stat.nested_run;
+
+ vmx->nested.nested_run_pending = 0;
+ }
+
vmx->idt_vectoring_info = 0;
if (unlikely(vmx->fail)) {
@@ -6941,6 +6868,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
vmx->nested.posted_intr_nv = -1;
vmx->nested.current_vmptr = -1ull;
+ vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID;
vcpu->arch.microcode_version = 0x100000000ULL;
vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED;
@@ -6952,9 +6880,6 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
vmx->pi_desc.nv = POSTED_INTR_VECTOR;
vmx->pi_desc.sn = 1;
-#if IS_ENABLED(CONFIG_HYPERV)
- vmx->hv_root_ept = INVALID_PAGE;
-#endif
return 0;
free_vmcs:
@@ -6971,10 +6896,6 @@ free_vpid:
static int vmx_vm_init(struct kvm *kvm)
{
-#if IS_ENABLED(CONFIG_HYPERV)
- spin_lock_init(&to_kvm_vmx(kvm)->hv_root_ept_lock);
-#endif
-
if (!ple_gap)
kvm->arch.pause_in_guest = true;
@@ -7001,7 +6922,6 @@ static int vmx_vm_init(struct kvm *kvm)
break;
}
}
- kvm_apicv_init(kvm, enable_apicv);
return 0;
}
@@ -7453,10 +7373,10 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
delta_tsc = 0;
/* Convert to host delta tsc if tsc scaling is enabled */
- if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
+ if (vcpu->arch.l1_tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
delta_tsc && u64_shl_div_u64(delta_tsc,
kvm_tsc_scaling_ratio_frac_bits,
- vcpu->arch.tsc_scaling_ratio, &delta_tsc))
+ vcpu->arch.l1_tsc_scaling_ratio, &delta_tsc))
return -ERANGE;
/*
@@ -7542,7 +7462,7 @@ static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
return !is_smm(vcpu);
}
-static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
+static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -7556,7 +7476,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
return 0;
}
-static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
+static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int ret;
@@ -7700,7 +7620,10 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
- .write_l1_tsc_offset = vmx_write_l1_tsc_offset,
+ .get_l2_tsc_offset = vmx_get_l2_tsc_offset,
+ .get_l2_tsc_multiplier = vmx_get_l2_tsc_multiplier,
+ .write_tsc_offset = vmx_write_tsc_offset,
+ .write_tsc_multiplier = vmx_write_tsc_multiplier,
.load_mmu_pgd = vmx_load_mmu_pgd,
@@ -7731,8 +7654,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.setup_mce = vmx_setup_mce,
.smi_allowed = vmx_smi_allowed,
- .pre_enter_smm = vmx_pre_enter_smm,
- .pre_leave_smm = vmx_pre_leave_smm,
+ .enter_smm = vmx_enter_smm,
+ .leave_smm = vmx_leave_smm,
.enable_smi_window = vmx_enable_smi_window,
.can_emulate_instruction = vmx_can_emulate_instruction,
@@ -7807,6 +7730,12 @@ static __init int hardware_setup(void)
!cpu_has_vmx_invept_global())
enable_ept = 0;
+ /* NX support is required for shadow paging. */
+ if (!enable_ept && !boot_cpu_has(X86_FEATURE_NX)) {
+ pr_err_ratelimited("kvm: NX (Execute Disable) not supported\n");
+ return -EOPNOTSUPP;
+ }
+
if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
enable_ept_ad_bits = 0;
@@ -7996,6 +7925,8 @@ static void vmx_exit(void)
}
#endif
vmx_cleanup_l1d_flush();
+
+ allow_smaller_maxphyaddr = false;
}
module_exit(vmx_exit);
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 16e4e457ba23..3979a947933a 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -322,8 +322,6 @@ struct vcpu_vmx {
/* apic deadline value in host tsc */
u64 hv_deadline_tsc;
- u64 current_tsc_ratio;
-
unsigned long host_debugctlmsr;
/*
@@ -336,10 +334,6 @@ struct vcpu_vmx {
/* SGX Launch Control public key hash */
u64 msr_ia32_sgxlepubkeyhash[4];
-#if IS_ENABLED(CONFIG_HYPERV)
- u64 hv_root_ept;
-#endif
-
struct pt_desc pt_desc;
struct lbr_desc lbr_desc;
@@ -357,11 +351,6 @@ struct kvm_vmx {
unsigned int tss_addr;
bool ept_identity_pagetable_done;
gpa_t ept_identity_map_addr;
-
-#if IS_ENABLED(CONFIG_HYPERV)
- hpa_t hv_root_ept;
- spinlock_t hv_root_ept_lock;
-#endif
};
bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
@@ -387,6 +376,7 @@ void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level);
+bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu);
void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu);
void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
bool vmx_nmi_blocked(struct kvm_vcpu *vcpu);
@@ -404,6 +394,9 @@ void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type);
void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type);
+u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu);
+u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
+
static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
int type, bool value)
{
@@ -529,12 +522,6 @@ static inline struct vmcs *alloc_vmcs(bool shadow)
GFP_KERNEL_ACCOUNT);
}
-static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx)
-{
- vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
- vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
-}
-
static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx)
{
return vmx->secondary_exec_control &