diff options
Diffstat (limited to 'arch/x86/kvm/vmx/vmx.h')
| -rw-r--r-- | arch/x86/kvm/vmx/vmx.h | 585 |
1 files changed, 387 insertions, 198 deletions
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 3979a947933a..bc3ed3145d7e 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -6,21 +6,18 @@ #include <asm/kvm.h> #include <asm/intel_pt.h> +#include <asm/perf_event.h> +#include <asm/posted_intr.h> #include "capabilities.h" -#include "kvm_cache_regs.h" -#include "posted_intr.h" +#include "../kvm_cache_regs.h" +#include "pmu_intel.h" #include "vmcs.h" #include "vmx_ops.h" -#include "cpuid.h" - -extern const u32 vmx_msr_index[]; - -#define MSR_TYPE_R 1 -#define MSR_TYPE_W 2 -#define MSR_TYPE_RW 3 - -#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) +#include "../cpuid.h" +#include "run_flags.h" +#include "../mmu.h" +#include "common.h" #ifdef CONFIG_X86_64 #define MAX_NR_USER_RETURN_MSRS 7 @@ -64,60 +61,12 @@ struct pt_ctx { struct pt_desc { u64 ctl_bitmask; - u32 addr_range; + u32 num_address_ranges; u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES]; struct pt_ctx host; struct pt_ctx guest; }; -union vmx_exit_reason { - struct { - u32 basic : 16; - u32 reserved16 : 1; - u32 reserved17 : 1; - u32 reserved18 : 1; - u32 reserved19 : 1; - u32 reserved20 : 1; - u32 reserved21 : 1; - u32 reserved22 : 1; - u32 reserved23 : 1; - u32 reserved24 : 1; - u32 reserved25 : 1; - u32 bus_lock_detected : 1; - u32 enclave_mode : 1; - u32 smi_pending_mtf : 1; - u32 smi_from_vmx_root : 1; - u32 reserved30 : 1; - u32 failed_vmentry : 1; - }; - u32 full; -}; - -#define vcpu_to_lbr_desc(vcpu) (&to_vmx(vcpu)->lbr_desc) -#define vcpu_to_lbr_records(vcpu) (&to_vmx(vcpu)->lbr_desc.records) - -bool intel_pmu_lbr_is_compatible(struct kvm_vcpu *vcpu); -bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu); - -int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu); -void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu); - -struct lbr_desc { - /* Basic info about guest LBR records. */ - struct x86_pmu_lbr records; - - /* - * Emulate LBR feature via passthrough LBR registers when the - * per-vcpu guest LBR event is scheduled on the current pcpu. - * - * The records may be inaccurate if the host reclaims the LBR. - */ - struct perf_event *event; - - /* True if LBRs are marked as not intercepted in the MSR bitmap */ - bool msr_passthrough; -}; - /* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. @@ -144,6 +93,16 @@ struct nested_vmx { struct vmcs12 *cached_shadow_vmcs12; /* + * GPA to HVA cache for accessing vmcs12->vmcs_link_pointer + */ + struct gfn_to_hva_cache shadow_vmcs12_cache; + + /* + * GPA to HVA cache for VMCS12 + */ + struct gfn_to_hva_cache vmcs12_cache; + + /* * Indicates if the shadow vmcs or enlightened vmcs must be updated * with the data held by struct vmcs12. */ @@ -151,6 +110,15 @@ struct nested_vmx { bool dirty_vmcs12; /* + * Indicates whether MSR bitmap for L2 needs to be rebuilt due to + * changes in MSR bitmap for L1 or switching to a different L2. Note, + * this flag can only be used reliably in conjunction with a paravirt L1 + * which informs L0 whether any changes to MSR bitmap for L2 were done + * on its side. + */ + bool force_msr_bitmap_recalc; + + /* * Indicates lazily loaded guest state has not yet been decached from * vmcs02. */ @@ -166,6 +134,8 @@ struct nested_vmx { bool change_vmcs01_virtual_apic_mode; bool reload_vmcs01_apic_access_page; bool update_vmcs01_cpu_dirty_logging; + bool update_vmcs01_apicv_status; + bool update_vmcs01_hwapic_isr; /* * Enlightened VMCS has been enabled. It does not mean that L1 has to @@ -186,12 +156,10 @@ struct nested_vmx { * Guest pages referred to in the vmcs02 with host-physical * pointers, so we must keep them pinned while L2 runs. */ - struct page *apic_access_page; + struct kvm_host_map apic_access_page_map; struct kvm_host_map virtual_apic_map; struct kvm_host_map pi_desc_map; - struct kvm_host_map msr_bitmap_map; - struct pi_desc *pi_desc; bool pi_pending; u16 posted_intr_nv; @@ -201,9 +169,21 @@ struct nested_vmx { bool has_preemption_timer_deadline; bool preemption_timer_expired; - /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ - u64 vmcs01_debugctl; - u64 vmcs01_guest_bndcfgs; + /* + * Used to snapshot MSRs that are conditionally loaded on VM-Enter in + * order to propagate the guest's pre-VM-Enter value into vmcs02. For + * emulation of VMLAUNCH/VMRESUME, the snapshot will be of L1's value. + * For KVM_SET_NESTED_STATE, the snapshot is of L2's value, _if_ + * userspace restores MSRs before nested state. If userspace restores + * MSRs after nested state, the snapshot holds garbage, but KVM can't + * detect that, and the garbage value in vmcs02 will be overwritten by + * MSR restoration in any case. + */ + u64 pre_vmenter_debugctl; + u64 pre_vmenter_bndcfgs; + u64 pre_vmenter_s_cet; + u64 pre_vmenter_ssp; + u64 pre_vmenter_ssp_tbl; /* to migrate it to L1 if L2 writes to L1's CR8 directly */ int l1_tpr_threshold; @@ -221,27 +201,19 @@ struct nested_vmx { bool guest_mode; } smm; +#ifdef CONFIG_KVM_HYPERV gpa_t hv_evmcs_vmptr; struct kvm_host_map hv_evmcs_map; struct hv_enlightened_vmcs *hv_evmcs; +#endif }; struct vcpu_vmx { struct kvm_vcpu vcpu; + struct vcpu_vt vt; u8 fail; - u8 msr_bitmap_mode; + u8 x2apic_msr_bitmap_mode; - /* - * If true, host state has been stored in vmx->loaded_vmcs for - * the CPU registers that only need to be switched when transitioning - * to/from the kernel, and the registers have been loaded with guest - * values. If false, host state is loaded in the CPU registers - * and vmx->loaded_vmcs->host_state is invalid. - */ - bool guest_state_loaded; - - unsigned long exit_qualification; - u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; @@ -250,23 +222,16 @@ struct vcpu_vmx { * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to * be loaded into hardware if those conditions aren't met. - * nr_active_uret_msrs tracks the number of MSRs that need to be loaded - * into hardware when running the guest. guest_uret_msrs[] is resorted - * whenever the number of "active" uret MSRs is modified. */ struct vmx_uret_msr guest_uret_msrs[MAX_NR_USER_RETURN_MSRS]; - int nr_active_uret_msrs; bool guest_uret_msrs_loaded; #ifdef CONFIG_X86_64 - u64 msr_host_kernel_gs_base; u64 msr_guest_kernel_gs_base; #endif u64 spec_ctrl; u32 msr_ia32_umwait_control; - u32 secondary_exec_control; - /* * loaded_vmcs points to the VMCS currently used in this vcpu. For a * non-nested (L1) guest, it always points to vmcs01. For a nested @@ -299,12 +264,6 @@ struct vcpu_vmx { } seg[8]; } segment_cache; int vpid; - bool emulation_required; - - union vmx_exit_reason exit_reason; - - /* Posted interrupt descriptor */ - struct pi_desc pi_desc; /* Support for a guest hypervisor (nested VMX) */ struct nested_vmx nested; @@ -313,17 +272,16 @@ struct vcpu_vmx { unsigned int ple_window; bool ple_window_dirty; - bool req_immediate_exit; - /* Support for PML */ -#define PML_ENTITY_NUM 512 +#define PML_LOG_NR_ENTRIES 512 + /* PML is written backwards: this is the first entry written by the CPU */ +#define PML_HEAD_INDEX (PML_LOG_NR_ENTRIES-1) + struct page *pml_pg; /* apic deadline value in host tsc */ u64 hv_deadline_tsc; - unsigned long host_debugctlmsr; - /* * Only bits masked by msr_ia32_feature_control_valid_bits can be set in * msr_ia32_feature_control. FEAT_CTL_LOCKED is always included @@ -333,16 +291,14 @@ struct vcpu_vmx { u64 msr_ia32_feature_control_valid_bits; /* SGX Launch Control public key hash */ u64 msr_ia32_sgxlepubkeyhash[4]; + u64 msr_ia32_mcu_opt_ctrl; + bool disable_fb_clear; struct pt_desc pt_desc; struct lbr_desc lbr_desc; - /* Save desired MSR intercept (read: pass-through) state */ -#define MAX_POSSIBLE_PASSTHROUGH_MSRS 13 - struct { - DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS); - DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS); - } shadow_msr_intercept; + /* ve_info must be page aligned. */ + struct vmx_ve_information *ve_info; }; struct kvm_vmx { @@ -351,11 +307,48 @@ struct kvm_vmx { unsigned int tss_addr; bool ept_identity_pagetable_done; gpa_t ept_identity_map_addr; + /* Posted Interrupt Descriptor (PID) table for IPI virtualization */ + u64 *pid_table; }; -bool nested_vmx_allowed(struct kvm_vcpu *vcpu); -void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, - struct loaded_vmcs *buddy); +static __always_inline struct vcpu_vt *to_vt(struct kvm_vcpu *vcpu) +{ + return &(container_of(vcpu, struct vcpu_vmx, vcpu)->vt); +} + +static __always_inline struct kvm_vcpu *vt_to_vcpu(struct vcpu_vt *vt) +{ + return &(container_of(vt, struct vcpu_vmx, vt)->vcpu); +} + +static __always_inline union vmx_exit_reason vmx_get_exit_reason(struct kvm_vcpu *vcpu) +{ + return to_vt(vcpu)->exit_reason; +} + +static __always_inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu) +{ + struct vcpu_vt *vt = to_vt(vcpu); + + if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1) && + !WARN_ON_ONCE(is_td_vcpu(vcpu))) + vt->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + + return vt->exit_qualification; +} + +static __always_inline u32 vmx_get_intr_info(struct kvm_vcpu *vcpu) +{ + struct vcpu_vt *vt = to_vt(vcpu); + + if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2) && + !WARN_ON_ONCE(is_td_vcpu(vcpu))) + vt->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + + return vt->exit_intr_info; +} + +void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu); int allocate_vpid(void); void free_vpid(int vpid); void vmx_set_constant_host_state(struct vcpu_vmx *vmx); @@ -363,6 +356,8 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, unsigned long fs_base, unsigned long gs_base); int vmx_get_cpl(struct kvm_vcpu *vcpu); +int vmx_get_cpl_no_cache(struct kvm_vcpu *vcpu); +bool vmx_emulation_required(struct kvm_vcpu *vcpu); unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu); void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu); @@ -373,13 +368,12 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); void set_cr4_guest_host_mask(struct vcpu_vmx *vmx); void ept_save_pdptrs(struct kvm_vcpu *vcpu); void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); -void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); -u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level); +void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu); void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu); -void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); bool vmx_nmi_blocked(struct kvm_vcpu *vcpu); +bool __vmx_interrupt_blocked(struct kvm_vcpu *vcpu); bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu); bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); @@ -387,134 +381,305 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); +void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags); +unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); +bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, + unsigned int flags); int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr); void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu); -void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type); -void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type); +void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type, bool set); -u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu); -u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu); +static inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, + u32 msr, int type) +{ + vmx_set_intercept_for_msr(vcpu, msr, type, false); +} -static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, - int type, bool value) +static inline void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, + u32 msr, int type) { - if (value) - vmx_enable_intercept_for_msr(vcpu, msr, type); - else - vmx_disable_intercept_for_msr(vcpu, msr, type); + vmx_set_intercept_for_msr(vcpu, msr, type, true); } +u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu); +u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu); + +gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags); + void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu); -static inline u8 vmx_get_rvi(void) +u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated); +bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated); + +#define VMX_HOST_OWNED_DEBUGCTL_BITS (DEBUGCTLMSR_FREEZE_IN_SMM) + +static inline void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val) { - return vmcs_read16(GUEST_INTR_STATUS) & 0xff; -} + WARN_ON_ONCE(val & VMX_HOST_OWNED_DEBUGCTL_BITS); -#define BUILD_CONTROLS_SHADOW(lname, uname) \ -static inline void lname##_controls_set(struct vcpu_vmx *vmx, u32 val) \ -{ \ - if (vmx->loaded_vmcs->controls_shadow.lname != val) { \ - vmcs_write32(uname, val); \ - vmx->loaded_vmcs->controls_shadow.lname = val; \ - } \ -} \ -static inline u32 lname##_controls_get(struct vcpu_vmx *vmx) \ -{ \ - return vmx->loaded_vmcs->controls_shadow.lname; \ -} \ -static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u32 val) \ -{ \ - lname##_controls_set(vmx, lname##_controls_get(vmx) | val); \ -} \ -static inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u32 val) \ -{ \ - lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \ + val |= vcpu->arch.host_debugctl & VMX_HOST_OWNED_DEBUGCTL_BITS; + vmcs_write64(GUEST_IA32_DEBUGCTL, val); } -BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS) -BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS) -BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL) -BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL) -BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL) -static inline void vmx_register_cache_reset(struct kvm_vcpu *vcpu) +static inline u64 vmx_guest_debugctl_read(void) { - vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) - | (1 << VCPU_EXREG_RFLAGS) - | (1 << VCPU_EXREG_PDPTR) - | (1 << VCPU_EXREG_SEGMENTS) - | (1 << VCPU_EXREG_CR0) - | (1 << VCPU_EXREG_CR3) - | (1 << VCPU_EXREG_CR4) - | (1 << VCPU_EXREG_EXIT_INFO_1) - | (1 << VCPU_EXREG_EXIT_INFO_2)); - vcpu->arch.regs_dirty = 0; + return vmcs_read64(GUEST_IA32_DEBUGCTL) & ~VMX_HOST_OWNED_DEBUGCTL_BITS; } -static inline u32 vmx_vmentry_ctrl(void) +static inline void vmx_reload_guest_debugctl(struct kvm_vcpu *vcpu) { - u32 vmentry_ctrl = vmcs_config.vmentry_ctrl; - if (vmx_pt_mode_is_system()) - vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | - VM_ENTRY_LOAD_IA32_RTIT_CTL); - /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ - return vmentry_ctrl & - ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER); + u64 val = vmcs_read64(GUEST_IA32_DEBUGCTL); + + if (!((val ^ vcpu->arch.host_debugctl) & VMX_HOST_OWNED_DEBUGCTL_BITS)) + return; + + vmx_guest_debugctl_write(vcpu, val & ~VMX_HOST_OWNED_DEBUGCTL_BITS); } -static inline u32 vmx_vmexit_ctrl(void) -{ - u32 vmexit_ctrl = vmcs_config.vmexit_ctrl; - if (vmx_pt_mode_is_system()) - vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | - VM_EXIT_CLEAR_IA32_RTIT_CTL); - /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ - return vmexit_ctrl & - ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER); +/* + * Note, early Intel manuals have the write-low and read-high bitmap offsets + * the wrong way round. The bitmaps control MSRs 0x00000000-0x00001fff and + * 0xc0000000-0xc0001fff. The former (low) uses bytes 0-0x3ff for reads and + * 0x800-0xbff for writes. The latter (high) uses 0x400-0x7ff for reads and + * 0xc00-0xfff for writes. MSRs not covered by either of the ranges always + * VM-Exit. + */ +#define __BUILD_VMX_MSR_BITMAP_HELPER(rtype, action, bitop, access, base) \ +static inline rtype vmx_##action##_msr_bitmap_##access(unsigned long *bitmap, \ + u32 msr) \ +{ \ + int f = sizeof(unsigned long); \ + \ + if (msr <= 0x1fff) \ + return bitop##_bit(msr, bitmap + base / f); \ + else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) \ + return bitop##_bit(msr & 0x1fff, bitmap + (base + 0x400) / f); \ + return (rtype)true; \ } +#define BUILD_VMX_MSR_BITMAP_HELPERS(ret_type, action, bitop) \ + __BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, read, 0x0) \ + __BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, write, 0x800) -u32 vmx_exec_control(struct vcpu_vmx *vmx); -u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx); +BUILD_VMX_MSR_BITMAP_HELPERS(bool, test, test) +BUILD_VMX_MSR_BITMAP_HELPERS(void, clear, __clear) +BUILD_VMX_MSR_BITMAP_HELPERS(void, set, __set) -static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm) +static inline u8 vmx_get_rvi(void) { - return container_of(kvm, struct kvm_vmx, kvm); + return vmcs_read16(GUEST_INTR_STATUS) & 0xff; } -static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) -{ - return container_of(vcpu, struct vcpu_vmx, vcpu); +#define __KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS \ + (VM_ENTRY_LOAD_DEBUG_CONTROLS) +#ifdef CONFIG_X86_64 + #define KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS \ + (__KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS | \ + VM_ENTRY_IA32E_MODE) +#else + #define KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS \ + __KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS +#endif +#define KVM_OPTIONAL_VMX_VM_ENTRY_CONTROLS \ + (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | \ + VM_ENTRY_LOAD_IA32_PAT | \ + VM_ENTRY_LOAD_IA32_EFER | \ + VM_ENTRY_LOAD_BNDCFGS | \ + VM_ENTRY_PT_CONCEAL_PIP | \ + VM_ENTRY_LOAD_IA32_RTIT_CTL | \ + VM_ENTRY_LOAD_CET_STATE) + +#define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \ + (VM_EXIT_SAVE_DEBUG_CONTROLS | \ + VM_EXIT_ACK_INTR_ON_EXIT) +#ifdef CONFIG_X86_64 + #define KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \ + (__KVM_REQUIRED_VMX_VM_EXIT_CONTROLS | \ + VM_EXIT_HOST_ADDR_SPACE_SIZE) +#else + #define KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \ + __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS +#endif +#define KVM_OPTIONAL_VMX_VM_EXIT_CONTROLS \ + (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \ + VM_EXIT_SAVE_IA32_PAT | \ + VM_EXIT_LOAD_IA32_PAT | \ + VM_EXIT_SAVE_IA32_EFER | \ + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | \ + VM_EXIT_LOAD_IA32_EFER | \ + VM_EXIT_CLEAR_BNDCFGS | \ + VM_EXIT_PT_CONCEAL_PIP | \ + VM_EXIT_CLEAR_IA32_RTIT_CTL | \ + VM_EXIT_LOAD_CET_STATE) + +#define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \ + (PIN_BASED_EXT_INTR_MASK | \ + PIN_BASED_NMI_EXITING) +#define KVM_OPTIONAL_VMX_PIN_BASED_VM_EXEC_CONTROL \ + (PIN_BASED_VIRTUAL_NMIS | \ + PIN_BASED_POSTED_INTR | \ + PIN_BASED_VMX_PREEMPTION_TIMER) + +#define __KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL \ + (CPU_BASED_HLT_EXITING | \ + CPU_BASED_CR3_LOAD_EXITING | \ + CPU_BASED_CR3_STORE_EXITING | \ + CPU_BASED_UNCOND_IO_EXITING | \ + CPU_BASED_MOV_DR_EXITING | \ + CPU_BASED_USE_TSC_OFFSETTING | \ + CPU_BASED_MWAIT_EXITING | \ + CPU_BASED_MONITOR_EXITING | \ + CPU_BASED_INVLPG_EXITING | \ + CPU_BASED_RDPMC_EXITING | \ + CPU_BASED_INTR_WINDOW_EXITING) + +#ifdef CONFIG_X86_64 + #define KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL \ + (__KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL | \ + CPU_BASED_CR8_LOAD_EXITING | \ + CPU_BASED_CR8_STORE_EXITING) +#else + #define KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL \ + __KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL +#endif + +#define KVM_OPTIONAL_VMX_CPU_BASED_VM_EXEC_CONTROL \ + (CPU_BASED_RDTSC_EXITING | \ + CPU_BASED_TPR_SHADOW | \ + CPU_BASED_USE_IO_BITMAPS | \ + CPU_BASED_MONITOR_TRAP_FLAG | \ + CPU_BASED_USE_MSR_BITMAPS | \ + CPU_BASED_NMI_WINDOW_EXITING | \ + CPU_BASED_PAUSE_EXITING | \ + CPU_BASED_ACTIVATE_SECONDARY_CONTROLS | \ + CPU_BASED_ACTIVATE_TERTIARY_CONTROLS) + +#define KVM_REQUIRED_VMX_SECONDARY_VM_EXEC_CONTROL 0 +#define KVM_OPTIONAL_VMX_SECONDARY_VM_EXEC_CONTROL \ + (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | \ + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | \ + SECONDARY_EXEC_WBINVD_EXITING | \ + SECONDARY_EXEC_ENABLE_VPID | \ + SECONDARY_EXEC_ENABLE_EPT | \ + SECONDARY_EXEC_UNRESTRICTED_GUEST | \ + SECONDARY_EXEC_PAUSE_LOOP_EXITING | \ + SECONDARY_EXEC_DESC | \ + SECONDARY_EXEC_ENABLE_RDTSCP | \ + SECONDARY_EXEC_ENABLE_INVPCID | \ + SECONDARY_EXEC_APIC_REGISTER_VIRT | \ + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \ + SECONDARY_EXEC_SHADOW_VMCS | \ + SECONDARY_EXEC_ENABLE_XSAVES | \ + SECONDARY_EXEC_RDSEED_EXITING | \ + SECONDARY_EXEC_RDRAND_EXITING | \ + SECONDARY_EXEC_ENABLE_PML | \ + SECONDARY_EXEC_TSC_SCALING | \ + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | \ + SECONDARY_EXEC_PT_USE_GPA | \ + SECONDARY_EXEC_PT_CONCEAL_VMX | \ + SECONDARY_EXEC_ENABLE_VMFUNC | \ + SECONDARY_EXEC_BUS_LOCK_DETECTION | \ + SECONDARY_EXEC_NOTIFY_VM_EXITING | \ + SECONDARY_EXEC_ENCLS_EXITING | \ + SECONDARY_EXEC_EPT_VIOLATION_VE) + +#define KVM_REQUIRED_VMX_TERTIARY_VM_EXEC_CONTROL 0 +#define KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL \ + (TERTIARY_EXEC_IPI_VIRT) + +#define BUILD_CONTROLS_SHADOW(lname, uname, bits) \ +static inline void lname##_controls_set(struct vcpu_vmx *vmx, u##bits val) \ +{ \ + if (vmx->loaded_vmcs->controls_shadow.lname != val) { \ + vmcs_write##bits(uname, val); \ + vmx->loaded_vmcs->controls_shadow.lname = val; \ + } \ +} \ +static inline u##bits __##lname##_controls_get(struct loaded_vmcs *vmcs) \ +{ \ + return vmcs->controls_shadow.lname; \ +} \ +static inline u##bits lname##_controls_get(struct vcpu_vmx *vmx) \ +{ \ + return __##lname##_controls_get(vmx->loaded_vmcs); \ +} \ +static __always_inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u##bits val) \ +{ \ + BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname))); \ + lname##_controls_set(vmx, lname##_controls_get(vmx) | val); \ +} \ +static __always_inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u##bits val) \ +{ \ + BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname))); \ + lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \ +} \ +static __always_inline void lname##_controls_changebit(struct vcpu_vmx *vmx, u##bits val, \ + bool set) \ +{ \ + if (set) \ + lname##_controls_setbit(vmx, val); \ + else \ + lname##_controls_clearbit(vmx, val); \ } +BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS, 32) +BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS, 32) +BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL, 32) +BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL, 32) +BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL, 32) +BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64) -static inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu) +/* + * VMX_REGS_LAZY_LOAD_SET - The set of registers that will be updated in the + * cache on demand. Other registers not listed here are synced to + * the cache immediately after VM-Exit. + */ +#define VMX_REGS_LAZY_LOAD_SET ((1 << VCPU_REGS_RIP) | \ + (1 << VCPU_REGS_RSP) | \ + (1 << VCPU_EXREG_RFLAGS) | \ + (1 << VCPU_EXREG_PDPTR) | \ + (1 << VCPU_EXREG_SEGMENTS) | \ + (1 << VCPU_EXREG_CR0) | \ + (1 << VCPU_EXREG_CR3) | \ + (1 << VCPU_EXREG_CR4) | \ + (1 << VCPU_EXREG_EXIT_INFO_1) | \ + (1 << VCPU_EXREG_EXIT_INFO_2)) + +static inline unsigned long vmx_l1_guest_owned_cr0_bits(void) { - struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long bits = KVM_POSSIBLE_CR0_GUEST_BITS; - if (!kvm_register_is_available(vcpu, VCPU_EXREG_EXIT_INFO_1)) { - kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1); - vmx->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - } - return vmx->exit_qualification; + /* + * CR0.WP needs to be intercepted when KVM is shadowing legacy paging + * in order to construct shadow PTEs with the correct protections. + * Note! CR0.WP technically can be passed through to the guest if + * paging is disabled, but checking CR0.PG would generate a cyclical + * dependency of sorts due to forcing the caller to ensure CR0 holds + * the correct value prior to determining which CR0 bits can be owned + * by L1. Keep it simple and limit the optimization to EPT. + */ + if (!enable_ept) + bits &= ~X86_CR0_WP; + return bits; } -static inline u32 vmx_get_intr_info(struct kvm_vcpu *vcpu) +static __always_inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm) { - struct vcpu_vmx *vmx = to_vmx(vcpu); + return container_of(kvm, struct kvm_vmx, kvm); +} - if (!kvm_register_is_available(vcpu, VCPU_EXREG_EXIT_INFO_2)) { - kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2); - vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - } - return vmx->exit_intr_info; +static __always_inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) +{ + return container_of(vcpu, struct vcpu_vmx, vcpu); } +void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu); +int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu); +void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu); + struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags); void free_vmcs(struct vmcs *vmcs); int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); -void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs); static inline struct vmcs *alloc_vmcs(bool shadow) { @@ -524,7 +689,7 @@ static inline struct vmcs *alloc_vmcs(bool shadow) static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx) { - return vmx->secondary_exec_control & + return secondary_exec_controls_get(vmx) & SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; } @@ -533,7 +698,8 @@ static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu) if (!enable_ept) return true; - return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits; + return allow_smaller_maxphyaddr && + cpuid_maxphyaddr(vcpu) < kvm_host.maxphyaddr; } static inline bool is_unrestricted_guest(struct kvm_vcpu *vcpu) @@ -551,4 +717,27 @@ static inline bool vmx_guest_state_valid(struct kvm_vcpu *vcpu) void dump_vmcs(struct kvm_vcpu *vcpu); +static inline int vmx_get_instr_info_reg(u32 vmx_instr_info) +{ + return (vmx_instr_info >> 3) & 0xf; +} + +static inline int vmx_get_instr_info_reg2(u32 vmx_instr_info) +{ + return (vmx_instr_info >> 28) & 0xf; +} + +static inline bool vmx_can_use_ipiv(struct kvm_vcpu *vcpu) +{ + return lapic_in_kernel(vcpu) && enable_ipiv; +} + +static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) +{ + vmx->segment_cache.bitmask = 0; +} + +int vmx_init(void); +void vmx_exit(void); + #endif /* __KVM_X86_VMX_H */ |
