summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/vmx/vmx.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/vmx/vmx.h')
-rw-r--r--arch/x86/kvm/vmx/vmx.h298
1 files changed, 143 insertions, 155 deletions
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index a3da84f4ea45..bc3ed3145d7e 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -7,20 +7,17 @@
#include <asm/kvm.h>
#include <asm/intel_pt.h>
#include <asm/perf_event.h>
+#include <asm/posted_intr.h>
#include "capabilities.h"
#include "../kvm_cache_regs.h"
-#include "posted_intr.h"
+#include "pmu_intel.h"
#include "vmcs.h"
#include "vmx_ops.h"
#include "../cpuid.h"
#include "run_flags.h"
-
-#define MSR_TYPE_R 1
-#define MSR_TYPE_W 2
-#define MSR_TYPE_RW 3
-
-#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
+#include "../mmu.h"
+#include "common.h"
#ifdef CONFIG_X86_64
#define MAX_NR_USER_RETURN_MSRS 7
@@ -70,57 +67,6 @@ struct pt_desc {
struct pt_ctx guest;
};
-union vmx_exit_reason {
- struct {
- u32 basic : 16;
- u32 reserved16 : 1;
- u32 reserved17 : 1;
- u32 reserved18 : 1;
- u32 reserved19 : 1;
- u32 reserved20 : 1;
- u32 reserved21 : 1;
- u32 reserved22 : 1;
- u32 reserved23 : 1;
- u32 reserved24 : 1;
- u32 reserved25 : 1;
- u32 bus_lock_detected : 1;
- u32 enclave_mode : 1;
- u32 smi_pending_mtf : 1;
- u32 smi_from_vmx_root : 1;
- u32 reserved30 : 1;
- u32 failed_vmentry : 1;
- };
- u32 full;
-};
-
-static inline bool intel_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
-{
- /*
- * Architecturally, Intel's SDM states that IA32_PERF_GLOBAL_CTRL is
- * supported if "CPUID.0AH: EAX[7:0] > 0", i.e. if the PMU version is
- * greater than zero. However, KVM only exposes and emulates the MSR
- * to/for the guest if the guest PMU supports at least "Architectural
- * Performance Monitoring Version 2".
- */
- return pmu->version > 1;
-}
-
-struct lbr_desc {
- /* Basic info about guest LBR records. */
- struct x86_pmu_lbr records;
-
- /*
- * Emulate LBR feature via passthrough LBR registers when the
- * per-vcpu guest LBR event is scheduled on the current pcpu.
- *
- * The records may be inaccurate if the host reclaims the LBR.
- */
- struct perf_event *event;
-
- /* True if LBRs are marked as not intercepted in the MSR bitmap */
- bool msr_passthrough;
-};
-
/*
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
@@ -189,6 +135,7 @@ struct nested_vmx {
bool reload_vmcs01_apic_access_page;
bool update_vmcs01_cpu_dirty_logging;
bool update_vmcs01_apicv_status;
+ bool update_vmcs01_hwapic_isr;
/*
* Enlightened VMCS has been enabled. It does not mean that L1 has to
@@ -213,8 +160,6 @@ struct nested_vmx {
struct kvm_host_map virtual_apic_map;
struct kvm_host_map pi_desc_map;
- struct kvm_host_map msr_bitmap_map;
-
struct pi_desc *pi_desc;
bool pi_pending;
u16 posted_intr_nv;
@@ -236,6 +181,9 @@ struct nested_vmx {
*/
u64 pre_vmenter_debugctl;
u64 pre_vmenter_bndcfgs;
+ u64 pre_vmenter_s_cet;
+ u64 pre_vmenter_ssp;
+ u64 pre_vmenter_ssp_tbl;
/* to migrate it to L1 if L2 writes to L1's CR8 directly */
int l1_tpr_threshold;
@@ -253,27 +201,19 @@ struct nested_vmx {
bool guest_mode;
} smm;
+#ifdef CONFIG_KVM_HYPERV
gpa_t hv_evmcs_vmptr;
struct kvm_host_map hv_evmcs_map;
struct hv_enlightened_vmcs *hv_evmcs;
+#endif
};
struct vcpu_vmx {
struct kvm_vcpu vcpu;
+ struct vcpu_vt vt;
u8 fail;
u8 x2apic_msr_bitmap_mode;
- /*
- * If true, host state has been stored in vmx->loaded_vmcs for
- * the CPU registers that only need to be switched when transitioning
- * to/from the kernel, and the registers have been loaded with guest
- * values. If false, host state is loaded in the CPU registers
- * and vmx->loaded_vmcs->host_state is invalid.
- */
- bool guest_state_loaded;
-
- unsigned long exit_qualification;
- u32 exit_intr_info;
u32 idt_vectoring_info;
ulong rflags;
@@ -286,7 +226,6 @@ struct vcpu_vmx {
struct vmx_uret_msr guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
bool guest_uret_msrs_loaded;
#ifdef CONFIG_X86_64
- u64 msr_host_kernel_gs_base;
u64 msr_guest_kernel_gs_base;
#endif
@@ -325,15 +264,6 @@ struct vcpu_vmx {
} seg[8];
} segment_cache;
int vpid;
- bool emulation_required;
-
- union vmx_exit_reason exit_reason;
-
- /* Posted interrupt descriptor */
- struct pi_desc pi_desc;
-
- /* Used if this vCPU is waiting for PI notification wakeup. */
- struct list_head pi_wakeup_list;
/* Support for a guest hypervisor (nested VMX) */
struct nested_vmx nested;
@@ -342,17 +272,16 @@ struct vcpu_vmx {
unsigned int ple_window;
bool ple_window_dirty;
- bool req_immediate_exit;
-
/* Support for PML */
-#define PML_ENTITY_NUM 512
+#define PML_LOG_NR_ENTRIES 512
+ /* PML is written backwards: this is the first entry written by the CPU */
+#define PML_HEAD_INDEX (PML_LOG_NR_ENTRIES-1)
+
struct page *pml_pg;
/* apic deadline value in host tsc */
u64 hv_deadline_tsc;
- unsigned long host_debugctlmsr;
-
/*
* Only bits masked by msr_ia32_feature_control_valid_bits can be set in
* msr_ia32_feature_control. FEAT_CTL_LOCKED is always included
@@ -368,12 +297,8 @@ struct vcpu_vmx {
struct pt_desc pt_desc;
struct lbr_desc lbr_desc;
- /* Save desired MSR intercept (read: pass-through) state */
-#define MAX_POSSIBLE_PASSTHROUGH_MSRS 15
- struct {
- DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
- DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
- } shadow_msr_intercept;
+ /* ve_info must be page aligned. */
+ struct vmx_ve_information *ve_info;
};
struct kvm_vmx {
@@ -386,9 +311,44 @@ struct kvm_vmx {
u64 *pid_table;
};
-bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
-void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
- struct loaded_vmcs *buddy);
+static __always_inline struct vcpu_vt *to_vt(struct kvm_vcpu *vcpu)
+{
+ return &(container_of(vcpu, struct vcpu_vmx, vcpu)->vt);
+}
+
+static __always_inline struct kvm_vcpu *vt_to_vcpu(struct vcpu_vt *vt)
+{
+ return &(container_of(vt, struct vcpu_vmx, vt)->vcpu);
+}
+
+static __always_inline union vmx_exit_reason vmx_get_exit_reason(struct kvm_vcpu *vcpu)
+{
+ return to_vt(vcpu)->exit_reason;
+}
+
+static __always_inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vt *vt = to_vt(vcpu);
+
+ if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1) &&
+ !WARN_ON_ONCE(is_td_vcpu(vcpu)))
+ vt->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+
+ return vt->exit_qualification;
+}
+
+static __always_inline u32 vmx_get_intr_info(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vt *vt = to_vt(vcpu);
+
+ if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2) &&
+ !WARN_ON_ONCE(is_td_vcpu(vcpu)))
+ vt->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+
+ return vt->exit_intr_info;
+}
+
+void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu);
int allocate_vpid(void);
void free_vpid(int vpid);
void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
@@ -396,6 +356,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
unsigned long fs_base, unsigned long gs_base);
int vmx_get_cpl(struct kvm_vcpu *vcpu);
+int vmx_get_cpl_no_cache(struct kvm_vcpu *vcpu);
bool vmx_emulation_required(struct kvm_vcpu *vcpu);
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
@@ -408,11 +369,11 @@ void set_cr4_guest_host_mask(struct vcpu_vmx *vmx);
void ept_save_pdptrs(struct kvm_vcpu *vcpu);
void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
-u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level);
bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu);
void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu);
bool vmx_nmi_blocked(struct kvm_vcpu *vcpu);
+bool __vmx_interrupt_blocked(struct kvm_vcpu *vcpu);
bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu);
bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
@@ -427,22 +388,54 @@ bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
-void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type);
-void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type);
+void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type, bool set);
+
+static inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu,
+ u32 msr, int type)
+{
+ vmx_set_intercept_for_msr(vcpu, msr, type, false);
+}
+
+static inline void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu,
+ u32 msr, int type)
+{
+ vmx_set_intercept_for_msr(vcpu, msr, type, true);
+}
u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu);
u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
-static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
- int type, bool value)
+gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
+
+void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
+
+u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated);
+bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated);
+
+#define VMX_HOST_OWNED_DEBUGCTL_BITS (DEBUGCTLMSR_FREEZE_IN_SMM)
+
+static inline void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val)
{
- if (value)
- vmx_enable_intercept_for_msr(vcpu, msr, type);
- else
- vmx_disable_intercept_for_msr(vcpu, msr, type);
+ WARN_ON_ONCE(val & VMX_HOST_OWNED_DEBUGCTL_BITS);
+
+ val |= vcpu->arch.host_debugctl & VMX_HOST_OWNED_DEBUGCTL_BITS;
+ vmcs_write64(GUEST_IA32_DEBUGCTL, val);
}
-void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
+static inline u64 vmx_guest_debugctl_read(void)
+{
+ return vmcs_read64(GUEST_IA32_DEBUGCTL) & ~VMX_HOST_OWNED_DEBUGCTL_BITS;
+}
+
+static inline void vmx_reload_guest_debugctl(struct kvm_vcpu *vcpu)
+{
+ u64 val = vmcs_read64(GUEST_IA32_DEBUGCTL);
+
+ if (!((val ^ vcpu->arch.host_debugctl) & VMX_HOST_OWNED_DEBUGCTL_BITS))
+ return;
+
+ vmx_guest_debugctl_write(vcpu, val & ~VMX_HOST_OWNED_DEBUGCTL_BITS);
+}
/*
* Note, early Intel manuals have the write-low and read-high bitmap offsets
@@ -493,7 +486,8 @@ static inline u8 vmx_get_rvi(void)
VM_ENTRY_LOAD_IA32_EFER | \
VM_ENTRY_LOAD_BNDCFGS | \
VM_ENTRY_PT_CONCEAL_PIP | \
- VM_ENTRY_LOAD_IA32_RTIT_CTL)
+ VM_ENTRY_LOAD_IA32_RTIT_CTL | \
+ VM_ENTRY_LOAD_CET_STATE)
#define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \
(VM_EXIT_SAVE_DEBUG_CONTROLS | \
@@ -515,7 +509,8 @@ static inline u8 vmx_get_rvi(void)
VM_EXIT_LOAD_IA32_EFER | \
VM_EXIT_CLEAR_BNDCFGS | \
VM_EXIT_PT_CONCEAL_PIP | \
- VM_EXIT_CLEAR_IA32_RTIT_CTL)
+ VM_EXIT_CLEAR_IA32_RTIT_CTL | \
+ VM_EXIT_LOAD_CET_STATE)
#define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \
(PIN_BASED_EXT_INTR_MASK | \
@@ -574,7 +569,7 @@ static inline u8 vmx_get_rvi(void)
SECONDARY_EXEC_APIC_REGISTER_VIRT | \
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \
SECONDARY_EXEC_SHADOW_VMCS | \
- SECONDARY_EXEC_XSAVES | \
+ SECONDARY_EXEC_ENABLE_XSAVES | \
SECONDARY_EXEC_RDSEED_EXITING | \
SECONDARY_EXEC_RDRAND_EXITING | \
SECONDARY_EXEC_ENABLE_PML | \
@@ -585,7 +580,8 @@ static inline u8 vmx_get_rvi(void)
SECONDARY_EXEC_ENABLE_VMFUNC | \
SECONDARY_EXEC_BUS_LOCK_DETECTION | \
SECONDARY_EXEC_NOTIFY_VM_EXITING | \
- SECONDARY_EXEC_ENCLS_EXITING)
+ SECONDARY_EXEC_ENCLS_EXITING | \
+ SECONDARY_EXEC_EPT_VIOLATION_VE)
#define KVM_REQUIRED_VMX_TERTIARY_VM_EXEC_CONTROL 0
#define KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL \
@@ -616,6 +612,14 @@ static __always_inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u##b
{ \
BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname))); \
lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \
+} \
+static __always_inline void lname##_controls_changebit(struct vcpu_vmx *vmx, u##bits val, \
+ bool set) \
+{ \
+ if (set) \
+ lname##_controls_setbit(vmx, val); \
+ else \
+ lname##_controls_clearbit(vmx, val); \
}
BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS, 32)
BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS, 32)
@@ -640,62 +644,42 @@ BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64)
(1 << VCPU_EXREG_EXIT_INFO_1) | \
(1 << VCPU_EXREG_EXIT_INFO_2))
-static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
+static inline unsigned long vmx_l1_guest_owned_cr0_bits(void)
{
- return container_of(kvm, struct kvm_vmx, kvm);
-}
+ unsigned long bits = KVM_POSSIBLE_CR0_GUEST_BITS;
-static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
-{
- return container_of(vcpu, struct vcpu_vmx, vcpu);
-}
-
-static inline struct lbr_desc *vcpu_to_lbr_desc(struct kvm_vcpu *vcpu)
-{
- return &to_vmx(vcpu)->lbr_desc;
+ /*
+ * CR0.WP needs to be intercepted when KVM is shadowing legacy paging
+ * in order to construct shadow PTEs with the correct protections.
+ * Note! CR0.WP technically can be passed through to the guest if
+ * paging is disabled, but checking CR0.PG would generate a cyclical
+ * dependency of sorts due to forcing the caller to ensure CR0 holds
+ * the correct value prior to determining which CR0 bits can be owned
+ * by L1. Keep it simple and limit the optimization to EPT.
+ */
+ if (!enable_ept)
+ bits &= ~X86_CR0_WP;
+ return bits;
}
-static inline struct x86_pmu_lbr *vcpu_to_lbr_records(struct kvm_vcpu *vcpu)
+static __always_inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
{
- return &vcpu_to_lbr_desc(vcpu)->records;
+ return container_of(kvm, struct kvm_vmx, kvm);
}
-static inline bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu)
+static __always_inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
{
- return !!vcpu_to_lbr_records(vcpu)->nr;
+ return container_of(vcpu, struct vcpu_vmx, vcpu);
}
void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu);
int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu);
void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu);
-static inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
- if (!kvm_register_is_available(vcpu, VCPU_EXREG_EXIT_INFO_1)) {
- kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1);
- vmx->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- }
- return vmx->exit_qualification;
-}
-
-static inline u32 vmx_get_intr_info(struct kvm_vcpu *vcpu)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
- if (!kvm_register_is_available(vcpu, VCPU_EXREG_EXIT_INFO_2)) {
- kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2);
- vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- }
- return vmx->exit_intr_info;
-}
-
struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags);
void free_vmcs(struct vmcs *vmcs);
int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs);
void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs);
-void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs);
static inline struct vmcs *alloc_vmcs(bool shadow)
{
@@ -714,7 +698,8 @@ static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu)
if (!enable_ept)
return true;
- return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits;
+ return allow_smaller_maxphyaddr &&
+ cpuid_maxphyaddr(vcpu) < kvm_host.maxphyaddr;
}
static inline bool is_unrestricted_guest(struct kvm_vcpu *vcpu)
@@ -732,6 +717,11 @@ static inline bool vmx_guest_state_valid(struct kvm_vcpu *vcpu)
void dump_vmcs(struct kvm_vcpu *vcpu);
+static inline int vmx_get_instr_info_reg(u32 vmx_instr_info)
+{
+ return (vmx_instr_info >> 3) & 0xf;
+}
+
static inline int vmx_get_instr_info_reg2(u32 vmx_instr_info)
{
return (vmx_instr_info >> 28) & 0xf;
@@ -742,14 +732,12 @@ static inline bool vmx_can_use_ipiv(struct kvm_vcpu *vcpu)
return lapic_in_kernel(vcpu) && enable_ipiv;
}
-static inline bool guest_cpuid_has_evmcs(struct kvm_vcpu *vcpu)
+static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
{
- /*
- * eVMCS is exposed to the guest if Hyper-V is enabled in CPUID and
- * eVMCS has been explicitly enabled by userspace.
- */
- return vcpu->arch.hyperv_enabled &&
- to_vmx(vcpu)->nested.enlightened_vmcs_enabled;
+ vmx->segment_cache.bitmask = 0;
}
+int vmx_init(void);
+void vmx_exit(void);
+
#endif /* __KVM_X86_VMX_H */