diff options
Diffstat (limited to 'arch/x86/kvm/vmx/vmx.h')
| -rw-r--r-- | arch/x86/kvm/vmx/vmx.h | 690 |
1 files changed, 457 insertions, 233 deletions
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 99328954c2fc..bc3ed3145d7e 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -6,29 +6,34 @@ #include <asm/kvm.h> #include <asm/intel_pt.h> +#include <asm/perf_event.h> +#include <asm/posted_intr.h> #include "capabilities.h" -#include "ops.h" +#include "../kvm_cache_regs.h" +#include "pmu_intel.h" #include "vmcs.h" +#include "vmx_ops.h" +#include "../cpuid.h" +#include "run_flags.h" +#include "../mmu.h" +#include "common.h" -extern const u32 vmx_msr_index[]; -extern u64 host_efer; - -#define MSR_TYPE_R 1 -#define MSR_TYPE_W 2 -#define MSR_TYPE_RW 3 - -#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) +#ifdef CONFIG_X86_64 +#define MAX_NR_USER_RETURN_MSRS 7 +#else +#define MAX_NR_USER_RETURN_MSRS 4 +#endif -#define NR_AUTOLOAD_MSRS 8 +#define MAX_NR_LOADSTORE_MSRS 8 struct vmx_msrs { unsigned int nr; - struct vmx_msr_entry val[NR_AUTOLOAD_MSRS]; + struct vmx_msr_entry val[MAX_NR_LOADSTORE_MSRS]; }; -struct shared_msr_entry { - unsigned index; +struct vmx_uret_msr { + bool load_into_hardware; u64 data; u64 mask; }; @@ -42,29 +47,6 @@ enum segment_cache_field { SEG_FIELD_NR = 4 }; -/* Posted-Interrupt Descriptor */ -struct pi_desc { - u32 pir[8]; /* Posted interrupt requested */ - union { - struct { - /* bit 256 - Outstanding Notification */ - u16 on : 1, - /* bit 257 - Suppress Notification */ - sn : 1, - /* bit 271:258 - Reserved */ - rsvd_1 : 14; - /* bit 279:272 - Notification Vector */ - u8 nv; - /* bit 287:280 - Reserved */ - u8 rsvd_2; - /* bit 319:288 - Notification Destination */ - u32 ndst; - }; - u64 control; - }; - u32 rsvd[6]; -} __aligned(64); - #define RTIT_ADDR_RANGE 4 struct pt_ctx { @@ -79,7 +61,7 @@ struct pt_ctx { struct pt_desc { u64 ctl_bitmask; - u32 addr_range; + u32 num_address_ranges; u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES]; struct pt_ctx host; struct pt_ctx guest; @@ -109,14 +91,40 @@ struct nested_vmx { * to guest memory during VM exit. */ struct vmcs12 *cached_shadow_vmcs12; + + /* + * GPA to HVA cache for accessing vmcs12->vmcs_link_pointer + */ + struct gfn_to_hva_cache shadow_vmcs12_cache; + + /* + * GPA to HVA cache for VMCS12 + */ + struct gfn_to_hva_cache vmcs12_cache; + /* * Indicates if the shadow vmcs or enlightened vmcs must be updated * with the data held by struct vmcs12. */ - bool need_vmcs12_sync; + bool need_vmcs12_to_shadow_sync; bool dirty_vmcs12; /* + * Indicates whether MSR bitmap for L2 needs to be rebuilt due to + * changes in MSR bitmap for L1 or switching to a different L2. Note, + * this flag can only be used reliably in conjunction with a paravirt L1 + * which informs L0 whether any changes to MSR bitmap for L2 were done + * on its side. + */ + bool force_msr_bitmap_recalc; + + /* + * Indicates lazily loaded guest state has not yet been decached from + * vmcs02. + */ + bool need_sync_vmcs02_to_vmcs12_rare; + + /* * vmcs02 has been initialized, i.e. state that is constant for * vmcs02 has been written to the backing VMCS. Initialization * is delayed until L1 actually attempts to run a nested VM. @@ -124,6 +132,10 @@ struct nested_vmx { bool vmcs02_initialized; bool change_vmcs01_virtual_apic_mode; + bool reload_vmcs01_apic_access_page; + bool update_vmcs01_cpu_dirty_logging; + bool update_vmcs01_apicv_status; + bool update_vmcs01_hwapic_isr; /* * Enlightened VMCS has been enabled. It does not mean that L1 has to @@ -135,25 +147,46 @@ struct nested_vmx { /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; + /* Pending MTF VM-exit into L1. */ + bool mtf_pending; + struct loaded_vmcs vmcs02; /* * Guest pages referred to in the vmcs02 with host-physical * pointers, so we must keep them pinned while L2 runs. */ - struct page *apic_access_page; - struct page *virtual_apic_page; - struct page *pi_desc_page; + struct kvm_host_map apic_access_page_map; + struct kvm_host_map virtual_apic_map; + struct kvm_host_map pi_desc_map; + struct pi_desc *pi_desc; bool pi_pending; u16 posted_intr_nv; struct hrtimer preemption_timer; + u64 preemption_timer_deadline; + bool has_preemption_timer_deadline; bool preemption_timer_expired; - /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ - u64 vmcs01_debugctl; - u64 vmcs01_guest_bndcfgs; + /* + * Used to snapshot MSRs that are conditionally loaded on VM-Enter in + * order to propagate the guest's pre-VM-Enter value into vmcs02. For + * emulation of VMLAUNCH/VMRESUME, the snapshot will be of L1's value. + * For KVM_SET_NESTED_STATE, the snapshot is of L2's value, _if_ + * userspace restores MSRs before nested state. If userspace restores + * MSRs after nested state, the snapshot holds garbage, but KVM can't + * detect that, and the garbage value in vmcs02 will be overwritten by + * MSR restoration in any case. + */ + u64 pre_vmenter_debugctl; + u64 pre_vmenter_bndcfgs; + u64 pre_vmenter_s_cet; + u64 pre_vmenter_ssp; + u64 pre_vmenter_ssp_tbl; + + /* to migrate it to L1 if L2 writes to L1's CR8 directly */ + int l1_tpr_threshold; u16 vpid02; u16 last_vpid; @@ -168,53 +201,54 @@ struct nested_vmx { bool guest_mode; } smm; +#ifdef CONFIG_KVM_HYPERV gpa_t hv_evmcs_vmptr; - struct page *hv_evmcs_page; + struct kvm_host_map hv_evmcs_map; struct hv_enlightened_vmcs *hv_evmcs; +#endif }; struct vcpu_vmx { struct kvm_vcpu vcpu; - unsigned long host_rsp; + struct vcpu_vt vt; u8 fail; - u8 msr_bitmap_mode; - u32 exit_intr_info; + u8 x2apic_msr_bitmap_mode; + u32 idt_vectoring_info; ulong rflags; - struct shared_msr_entry *guest_msrs; - int nmsrs; - int save_nmsrs; - bool guest_msrs_dirty; - unsigned long host_idt_base; + + /* + * User return MSRs are always emulated when enabled in the guest, but + * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside + * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to + * be loaded into hardware if those conditions aren't met. + */ + struct vmx_uret_msr guest_uret_msrs[MAX_NR_USER_RETURN_MSRS]; + bool guest_uret_msrs_loaded; #ifdef CONFIG_X86_64 - u64 msr_host_kernel_gs_base; u64 msr_guest_kernel_gs_base; #endif - u64 arch_capabilities; u64 spec_ctrl; - - u32 vm_entry_controls_shadow; - u32 vm_exit_controls_shadow; - u32 secondary_exec_control; + u32 msr_ia32_umwait_control; /* * loaded_vmcs points to the VMCS currently used in this vcpu. For a * non-nested (L1) guest, it always points to vmcs01. For a nested - * guest (L2), it points to a different VMCS. loaded_cpu_state points - * to the VMCS whose state is loaded into the CPU registers that only - * need to be switched when transitioning to/from the kernel; a NULL - * value indicates that host state is loaded. + * guest (L2), it points to a different VMCS. */ struct loaded_vmcs vmcs01; struct loaded_vmcs *loaded_vmcs; - struct loaded_vmcs *loaded_cpu_state; - bool __launched; /* temporary, used in vmx_vcpu_run */ + struct msr_autoload { struct vmx_msrs guest; struct vmx_msrs host; } msr_autoload; + struct msr_autostore { + struct vmx_msrs guest; + } msr_autostore; + struct { int vm86_active; ulong save_rflags; @@ -230,51 +264,41 @@ struct vcpu_vmx { } seg[8]; } segment_cache; int vpid; - bool emulation_required; - - u32 exit_reason; - - /* Posted interrupt descriptor */ - struct pi_desc pi_desc; /* Support for a guest hypervisor (nested VMX) */ struct nested_vmx nested; /* Dynamic PLE window. */ - int ple_window; + unsigned int ple_window; bool ple_window_dirty; - bool req_immediate_exit; - /* Support for PML */ -#define PML_ENTITY_NUM 512 +#define PML_LOG_NR_ENTRIES 512 + /* PML is written backwards: this is the first entry written by the CPU */ +#define PML_HEAD_INDEX (PML_LOG_NR_ENTRIES-1) + struct page *pml_pg; /* apic deadline value in host tsc */ u64 hv_deadline_tsc; - u64 current_tsc_ratio; - - u32 host_pkru; - - unsigned long host_debugctlmsr; - /* * Only bits masked by msr_ia32_feature_control_valid_bits can be set in - * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included + * msr_ia32_feature_control. FEAT_CTL_LOCKED is always included * in msr_ia32_feature_control_valid_bits. */ u64 msr_ia32_feature_control; u64 msr_ia32_feature_control_valid_bits; - u64 ept_pointer; + /* SGX Launch Control public key hash */ + u64 msr_ia32_sgxlepubkeyhash[4]; + u64 msr_ia32_mcu_opt_ctrl; + bool disable_fb_clear; struct pt_desc pt_desc; -}; + struct lbr_desc lbr_desc; -enum ept_pointers_status { - EPT_POINTERS_CHECK = 0, - EPT_POINTERS_MATCH = 1, - EPT_POINTERS_MISMATCH = 2 + /* ve_info must be page aligned. */ + struct vmx_ve_information *ve_info; }; struct kvm_vmx { @@ -283,237 +307,437 @@ struct kvm_vmx { unsigned int tss_addr; bool ept_identity_pagetable_done; gpa_t ept_identity_map_addr; - - enum ept_pointers_status ept_pointers_match; - spinlock_t ept_pointer_lock; + /* Posted Interrupt Descriptor (PID) table for IPI virtualization */ + u64 *pid_table; }; -bool nested_vmx_allowed(struct kvm_vcpu *vcpu); -void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu); -void vmx_vcpu_put(struct kvm_vcpu *vcpu); +static __always_inline struct vcpu_vt *to_vt(struct kvm_vcpu *vcpu) +{ + return &(container_of(vcpu, struct vcpu_vmx, vcpu)->vt); +} + +static __always_inline struct kvm_vcpu *vt_to_vcpu(struct vcpu_vt *vt) +{ + return &(container_of(vt, struct vcpu_vmx, vt)->vcpu); +} + +static __always_inline union vmx_exit_reason vmx_get_exit_reason(struct kvm_vcpu *vcpu) +{ + return to_vt(vcpu)->exit_reason; +} + +static __always_inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu) +{ + struct vcpu_vt *vt = to_vt(vcpu); + + if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1) && + !WARN_ON_ONCE(is_td_vcpu(vcpu))) + vt->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + + return vt->exit_qualification; +} + +static __always_inline u32 vmx_get_intr_info(struct kvm_vcpu *vcpu) +{ + struct vcpu_vt *vt = to_vt(vcpu); + + if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2) && + !WARN_ON_ONCE(is_td_vcpu(vcpu))) + vt->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + + return vt->exit_intr_info; +} + +void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu); int allocate_vpid(void); void free_vpid(int vpid); void vmx_set_constant_host_state(struct vcpu_vmx *vmx); void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); +void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, + unsigned long fs_base, unsigned long gs_base); int vmx_get_cpl(struct kvm_vcpu *vcpu); +int vmx_get_cpl_no_cache(struct kvm_vcpu *vcpu); +bool vmx_emulation_required(struct kvm_vcpu *vcpu); unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu); void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu); void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask); -void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer); +int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer); void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); -void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); -int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); +void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); void set_cr4_guest_host_mask(struct vcpu_vmx *vmx); void ept_save_pdptrs(struct kvm_vcpu *vcpu); void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); -void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); -u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); -void update_exception_bitmap(struct kvm_vcpu *vcpu); -void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); +void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); + +bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu); +void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu); +bool vmx_nmi_blocked(struct kvm_vcpu *vcpu); +bool __vmx_interrupt_blocked(struct kvm_vcpu *vcpu); +bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu); bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); -struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr); -void pt_update_intercept_for_msr(struct vcpu_vmx *vmx); - -#define POSTED_INTR_ON 0 -#define POSTED_INTR_SN 1 - -static inline bool pi_test_and_set_on(struct pi_desc *pi_desc) +struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); +void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); +void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); +void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags); +unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); +bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, + unsigned int flags); +int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr); +void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu); + +void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type, bool set); + +static inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, + u32 msr, int type) { - return test_and_set_bit(POSTED_INTR_ON, - (unsigned long *)&pi_desc->control); + vmx_set_intercept_for_msr(vcpu, msr, type, false); } -static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc) +static inline void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, + u32 msr, int type) { - return test_and_clear_bit(POSTED_INTR_ON, - (unsigned long *)&pi_desc->control); + vmx_set_intercept_for_msr(vcpu, msr, type, true); } -static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) -{ - return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); -} +u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu); +u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu); -static inline void pi_clear_sn(struct pi_desc *pi_desc) -{ - return clear_bit(POSTED_INTR_SN, - (unsigned long *)&pi_desc->control); -} +gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags); -static inline void pi_set_sn(struct pi_desc *pi_desc) -{ - return set_bit(POSTED_INTR_SN, - (unsigned long *)&pi_desc->control); -} +void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu); -static inline void pi_clear_on(struct pi_desc *pi_desc) -{ - clear_bit(POSTED_INTR_ON, - (unsigned long *)&pi_desc->control); -} +u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated); +bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated); -static inline int pi_test_on(struct pi_desc *pi_desc) -{ - return test_bit(POSTED_INTR_ON, - (unsigned long *)&pi_desc->control); -} +#define VMX_HOST_OWNED_DEBUGCTL_BITS (DEBUGCTLMSR_FREEZE_IN_SMM) -static inline int pi_test_sn(struct pi_desc *pi_desc) +static inline void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val) { - return test_bit(POSTED_INTR_SN, - (unsigned long *)&pi_desc->control); -} + WARN_ON_ONCE(val & VMX_HOST_OWNED_DEBUGCTL_BITS); -static inline u8 vmx_get_rvi(void) -{ - return vmcs_read16(GUEST_INTR_STATUS) & 0xff; + val |= vcpu->arch.host_debugctl & VMX_HOST_OWNED_DEBUGCTL_BITS; + vmcs_write64(GUEST_IA32_DEBUGCTL, val); } -static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx) +static inline u64 vmx_guest_debugctl_read(void) { - vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS); + return vmcs_read64(GUEST_IA32_DEBUGCTL) & ~VMX_HOST_OWNED_DEBUGCTL_BITS; } -static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val) +static inline void vmx_reload_guest_debugctl(struct kvm_vcpu *vcpu) { - vmcs_write32(VM_ENTRY_CONTROLS, val); - vmx->vm_entry_controls_shadow = val; -} + u64 val = vmcs_read64(GUEST_IA32_DEBUGCTL); -static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val) -{ - if (vmx->vm_entry_controls_shadow != val) - vm_entry_controls_init(vmx, val); -} + if (!((val ^ vcpu->arch.host_debugctl) & VMX_HOST_OWNED_DEBUGCTL_BITS)) + return; -static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx) -{ - return vmx->vm_entry_controls_shadow; + vmx_guest_debugctl_write(vcpu, val & ~VMX_HOST_OWNED_DEBUGCTL_BITS); } -static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val) -{ - vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val); +/* + * Note, early Intel manuals have the write-low and read-high bitmap offsets + * the wrong way round. The bitmaps control MSRs 0x00000000-0x00001fff and + * 0xc0000000-0xc0001fff. The former (low) uses bytes 0-0x3ff for reads and + * 0x800-0xbff for writes. The latter (high) uses 0x400-0x7ff for reads and + * 0xc00-0xfff for writes. MSRs not covered by either of the ranges always + * VM-Exit. + */ +#define __BUILD_VMX_MSR_BITMAP_HELPER(rtype, action, bitop, access, base) \ +static inline rtype vmx_##action##_msr_bitmap_##access(unsigned long *bitmap, \ + u32 msr) \ +{ \ + int f = sizeof(unsigned long); \ + \ + if (msr <= 0x1fff) \ + return bitop##_bit(msr, bitmap + base / f); \ + else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) \ + return bitop##_bit(msr & 0x1fff, bitmap + (base + 0x400) / f); \ + return (rtype)true; \ } +#define BUILD_VMX_MSR_BITMAP_HELPERS(ret_type, action, bitop) \ + __BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, read, 0x0) \ + __BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, write, 0x800) -static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val) -{ - vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val); -} +BUILD_VMX_MSR_BITMAP_HELPERS(bool, test, test) +BUILD_VMX_MSR_BITMAP_HELPERS(void, clear, __clear) +BUILD_VMX_MSR_BITMAP_HELPERS(void, set, __set) -static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx) +static inline u8 vmx_get_rvi(void) { - vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS); + return vmcs_read16(GUEST_INTR_STATUS) & 0xff; } -static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val) -{ - vmcs_write32(VM_EXIT_CONTROLS, val); - vmx->vm_exit_controls_shadow = val; -} +#define __KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS \ + (VM_ENTRY_LOAD_DEBUG_CONTROLS) +#ifdef CONFIG_X86_64 + #define KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS \ + (__KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS | \ + VM_ENTRY_IA32E_MODE) +#else + #define KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS \ + __KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS +#endif +#define KVM_OPTIONAL_VMX_VM_ENTRY_CONTROLS \ + (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | \ + VM_ENTRY_LOAD_IA32_PAT | \ + VM_ENTRY_LOAD_IA32_EFER | \ + VM_ENTRY_LOAD_BNDCFGS | \ + VM_ENTRY_PT_CONCEAL_PIP | \ + VM_ENTRY_LOAD_IA32_RTIT_CTL | \ + VM_ENTRY_LOAD_CET_STATE) + +#define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \ + (VM_EXIT_SAVE_DEBUG_CONTROLS | \ + VM_EXIT_ACK_INTR_ON_EXIT) +#ifdef CONFIG_X86_64 + #define KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \ + (__KVM_REQUIRED_VMX_VM_EXIT_CONTROLS | \ + VM_EXIT_HOST_ADDR_SPACE_SIZE) +#else + #define KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \ + __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS +#endif +#define KVM_OPTIONAL_VMX_VM_EXIT_CONTROLS \ + (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \ + VM_EXIT_SAVE_IA32_PAT | \ + VM_EXIT_LOAD_IA32_PAT | \ + VM_EXIT_SAVE_IA32_EFER | \ + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | \ + VM_EXIT_LOAD_IA32_EFER | \ + VM_EXIT_CLEAR_BNDCFGS | \ + VM_EXIT_PT_CONCEAL_PIP | \ + VM_EXIT_CLEAR_IA32_RTIT_CTL | \ + VM_EXIT_LOAD_CET_STATE) + +#define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \ + (PIN_BASED_EXT_INTR_MASK | \ + PIN_BASED_NMI_EXITING) +#define KVM_OPTIONAL_VMX_PIN_BASED_VM_EXEC_CONTROL \ + (PIN_BASED_VIRTUAL_NMIS | \ + PIN_BASED_POSTED_INTR | \ + PIN_BASED_VMX_PREEMPTION_TIMER) + +#define __KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL \ + (CPU_BASED_HLT_EXITING | \ + CPU_BASED_CR3_LOAD_EXITING | \ + CPU_BASED_CR3_STORE_EXITING | \ + CPU_BASED_UNCOND_IO_EXITING | \ + CPU_BASED_MOV_DR_EXITING | \ + CPU_BASED_USE_TSC_OFFSETTING | \ + CPU_BASED_MWAIT_EXITING | \ + CPU_BASED_MONITOR_EXITING | \ + CPU_BASED_INVLPG_EXITING | \ + CPU_BASED_RDPMC_EXITING | \ + CPU_BASED_INTR_WINDOW_EXITING) -static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val) -{ - if (vmx->vm_exit_controls_shadow != val) - vm_exit_controls_init(vmx, val); -} +#ifdef CONFIG_X86_64 + #define KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL \ + (__KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL | \ + CPU_BASED_CR8_LOAD_EXITING | \ + CPU_BASED_CR8_STORE_EXITING) +#else + #define KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL \ + __KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL +#endif -static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx) -{ - return vmx->vm_exit_controls_shadow; +#define KVM_OPTIONAL_VMX_CPU_BASED_VM_EXEC_CONTROL \ + (CPU_BASED_RDTSC_EXITING | \ + CPU_BASED_TPR_SHADOW | \ + CPU_BASED_USE_IO_BITMAPS | \ + CPU_BASED_MONITOR_TRAP_FLAG | \ + CPU_BASED_USE_MSR_BITMAPS | \ + CPU_BASED_NMI_WINDOW_EXITING | \ + CPU_BASED_PAUSE_EXITING | \ + CPU_BASED_ACTIVATE_SECONDARY_CONTROLS | \ + CPU_BASED_ACTIVATE_TERTIARY_CONTROLS) + +#define KVM_REQUIRED_VMX_SECONDARY_VM_EXEC_CONTROL 0 +#define KVM_OPTIONAL_VMX_SECONDARY_VM_EXEC_CONTROL \ + (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | \ + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | \ + SECONDARY_EXEC_WBINVD_EXITING | \ + SECONDARY_EXEC_ENABLE_VPID | \ + SECONDARY_EXEC_ENABLE_EPT | \ + SECONDARY_EXEC_UNRESTRICTED_GUEST | \ + SECONDARY_EXEC_PAUSE_LOOP_EXITING | \ + SECONDARY_EXEC_DESC | \ + SECONDARY_EXEC_ENABLE_RDTSCP | \ + SECONDARY_EXEC_ENABLE_INVPCID | \ + SECONDARY_EXEC_APIC_REGISTER_VIRT | \ + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \ + SECONDARY_EXEC_SHADOW_VMCS | \ + SECONDARY_EXEC_ENABLE_XSAVES | \ + SECONDARY_EXEC_RDSEED_EXITING | \ + SECONDARY_EXEC_RDRAND_EXITING | \ + SECONDARY_EXEC_ENABLE_PML | \ + SECONDARY_EXEC_TSC_SCALING | \ + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | \ + SECONDARY_EXEC_PT_USE_GPA | \ + SECONDARY_EXEC_PT_CONCEAL_VMX | \ + SECONDARY_EXEC_ENABLE_VMFUNC | \ + SECONDARY_EXEC_BUS_LOCK_DETECTION | \ + SECONDARY_EXEC_NOTIFY_VM_EXITING | \ + SECONDARY_EXEC_ENCLS_EXITING | \ + SECONDARY_EXEC_EPT_VIOLATION_VE) + +#define KVM_REQUIRED_VMX_TERTIARY_VM_EXEC_CONTROL 0 +#define KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL \ + (TERTIARY_EXEC_IPI_VIRT) + +#define BUILD_CONTROLS_SHADOW(lname, uname, bits) \ +static inline void lname##_controls_set(struct vcpu_vmx *vmx, u##bits val) \ +{ \ + if (vmx->loaded_vmcs->controls_shadow.lname != val) { \ + vmcs_write##bits(uname, val); \ + vmx->loaded_vmcs->controls_shadow.lname = val; \ + } \ +} \ +static inline u##bits __##lname##_controls_get(struct loaded_vmcs *vmcs) \ +{ \ + return vmcs->controls_shadow.lname; \ +} \ +static inline u##bits lname##_controls_get(struct vcpu_vmx *vmx) \ +{ \ + return __##lname##_controls_get(vmx->loaded_vmcs); \ +} \ +static __always_inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u##bits val) \ +{ \ + BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname))); \ + lname##_controls_set(vmx, lname##_controls_get(vmx) | val); \ +} \ +static __always_inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u##bits val) \ +{ \ + BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname))); \ + lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \ +} \ +static __always_inline void lname##_controls_changebit(struct vcpu_vmx *vmx, u##bits val, \ + bool set) \ +{ \ + if (set) \ + lname##_controls_setbit(vmx, val); \ + else \ + lname##_controls_clearbit(vmx, val); \ } +BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS, 32) +BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS, 32) +BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL, 32) +BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL, 32) +BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL, 32) +BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64) -static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val) +/* + * VMX_REGS_LAZY_LOAD_SET - The set of registers that will be updated in the + * cache on demand. Other registers not listed here are synced to + * the cache immediately after VM-Exit. + */ +#define VMX_REGS_LAZY_LOAD_SET ((1 << VCPU_REGS_RIP) | \ + (1 << VCPU_REGS_RSP) | \ + (1 << VCPU_EXREG_RFLAGS) | \ + (1 << VCPU_EXREG_PDPTR) | \ + (1 << VCPU_EXREG_SEGMENTS) | \ + (1 << VCPU_EXREG_CR0) | \ + (1 << VCPU_EXREG_CR3) | \ + (1 << VCPU_EXREG_CR4) | \ + (1 << VCPU_EXREG_EXIT_INFO_1) | \ + (1 << VCPU_EXREG_EXIT_INFO_2)) + +static inline unsigned long vmx_l1_guest_owned_cr0_bits(void) { - vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val); + unsigned long bits = KVM_POSSIBLE_CR0_GUEST_BITS; + + /* + * CR0.WP needs to be intercepted when KVM is shadowing legacy paging + * in order to construct shadow PTEs with the correct protections. + * Note! CR0.WP technically can be passed through to the guest if + * paging is disabled, but checking CR0.PG would generate a cyclical + * dependency of sorts due to forcing the caller to ensure CR0 holds + * the correct value prior to determining which CR0 bits can be owned + * by L1. Keep it simple and limit the optimization to EPT. + */ + if (!enable_ept) + bits &= ~X86_CR0_WP; + return bits; } -static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val) +static __always_inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm) { - vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val); + return container_of(kvm, struct kvm_vmx, kvm); } -static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) +static __always_inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) { - vmx->segment_cache.bitmask = 0; + return container_of(vcpu, struct vcpu_vmx, vcpu); } -static inline u32 vmx_vmentry_ctrl(void) +void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu); +int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu); +void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu); + +struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags); +void free_vmcs(struct vmcs *vmcs); +int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); +void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); + +static inline struct vmcs *alloc_vmcs(bool shadow) { - u32 vmentry_ctrl = vmcs_config.vmentry_ctrl; - if (pt_mode == PT_MODE_SYSTEM) - vmentry_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | VM_EXIT_CLEAR_IA32_RTIT_CTL); - /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ - return vmentry_ctrl & - ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER); + return alloc_vmcs_cpu(shadow, raw_smp_processor_id(), + GFP_KERNEL_ACCOUNT); } -static inline u32 vmx_vmexit_ctrl(void) +static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx) { - u32 vmexit_ctrl = vmcs_config.vmexit_ctrl; - if (pt_mode == PT_MODE_SYSTEM) - vmexit_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | VM_ENTRY_LOAD_IA32_RTIT_CTL); - /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ - return vmcs_config.vmexit_ctrl & - ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER); + return secondary_exec_controls_get(vmx) & + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; } -u32 vmx_exec_control(struct vcpu_vmx *vmx); - -static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm) +static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu) { - return container_of(kvm, struct kvm_vmx, kvm); + if (!enable_ept) + return true; + + return allow_smaller_maxphyaddr && + cpuid_maxphyaddr(vcpu) < kvm_host.maxphyaddr; } -static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) +static inline bool is_unrestricted_guest(struct kvm_vcpu *vcpu) { - return container_of(vcpu, struct vcpu_vmx, vcpu); + return enable_unrestricted_guest && (!is_guest_mode(vcpu) || + (secondary_exec_controls_get(to_vmx(vcpu)) & + SECONDARY_EXEC_UNRESTRICTED_GUEST)); } -static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) +bool __vmx_guest_state_valid(struct kvm_vcpu *vcpu); +static inline bool vmx_guest_state_valid(struct kvm_vcpu *vcpu) { - return &(to_vmx(vcpu)->pi_desc); + return is_unrestricted_guest(vcpu) || __vmx_guest_state_valid(vcpu); } -struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu); -void free_vmcs(struct vmcs *vmcs); -int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); -void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); -void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs); -void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs); +void dump_vmcs(struct kvm_vcpu *vcpu); -static inline struct vmcs *alloc_vmcs(bool shadow) +static inline int vmx_get_instr_info_reg(u32 vmx_instr_info) { - return alloc_vmcs_cpu(shadow, raw_smp_processor_id()); + return (vmx_instr_info >> 3) & 0xf; } -u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); - -static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid, - bool invalidate_gpa) +static inline int vmx_get_instr_info_reg2(u32 vmx_instr_info) { - if (enable_ept && (invalidate_gpa || !enable_vpid)) { - if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) - return; - ept_sync_context(construct_eptp(vcpu, - vcpu->arch.mmu->root_hpa)); - } else { - vpid_sync_context(vpid); - } + return (vmx_instr_info >> 28) & 0xf; } -static inline void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) +static inline bool vmx_can_use_ipiv(struct kvm_vcpu *vcpu) { - __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa); + return lapic_in_kernel(vcpu) && enable_ipiv; } -static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx) +static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) { - vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio; - vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); + vmx->segment_cache.bitmask = 0; } +int vmx_init(void); +void vmx_exit(void); + #endif /* __KVM_X86_VMX_H */ |
