From 511a8556e3342af6a46eb9477936b29aa983f154 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 21 May 2019 14:06:54 +0800 Subject: KVM: X86: Emulate MSR_IA32_MISC_ENABLE MWAIT bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MSR IA32_MISC_ENABLE bit 18, according to SDM: | When this bit is set to 0, the MONITOR feature flag is not set (CPUID.01H:ECX[bit 3] = 0). | This indicates that MONITOR/MWAIT are not supported. | | Software attempts to execute MONITOR/MWAIT will cause #UD when this bit is 0. | | When this bit is set to 1 (default), MONITOR/MWAIT are supported (CPUID.01H:ECX[bit 3] = 1). The CPUID.01H:ECX[bit 3] ought to mirror the value of the MSR bit, CPUID.01H:ECX[bit 3] is a better guard than kvm_mwait_in_guest(). kvm_mwait_in_guest() affects the behavior of MONITOR/MWAIT, not its guest visibility. This patch implements toggling of the CPUID bit based on guest writes to the MSR. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Sean Christopherson Cc: Liran Alon Cc: Konrad Rzeszutek Wilk Signed-off-by: Wanpeng Li [Fixes for backwards compatibility - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/kvm.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/include/uapi') diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 7a0e64ccd6ff..f9b021e16ebc 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -378,10 +378,11 @@ struct kvm_sync_regs { struct kvm_vcpu_events events; }; -#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) -#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) -#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) -#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3) +#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) +#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) +#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) +#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3) +#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4) #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 -- cgit From 2d5ba19bdfef4dd06add144eb04287ee98409f75 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 3 Jun 2019 19:52:44 -0300 Subject: kvm: x86: add host poll control msrs Add an MSRs which allows the guest to disable host polling (specifically the cpuidle-haltpoll, when performing polling in the guest, disables host side polling). Signed-off-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/kvm_para.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include/uapi') diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 19980ec1a316..21d5f0240595 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -29,6 +29,7 @@ #define KVM_FEATURE_PV_TLB_FLUSH 9 #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 #define KVM_FEATURE_PV_SEND_IPI 11 +#define KVM_FEATURE_POLL_CONTROL 12 #define KVM_HINTS_REALTIME 0 @@ -47,6 +48,7 @@ #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 #define MSR_KVM_STEAL_TIME 0x4b564d03 #define MSR_KVM_PV_EOI_EN 0x4b564d04 +#define MSR_KVM_POLL_CONTROL 0x4b564d05 struct kvm_steal_time { __u64 steal; -- cgit From f85f6e7bc9682a6d8b342c010cd6aa58521fdeec Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 11 Jun 2019 20:23:48 +0800 Subject: KVM: X86: Yield to IPI target if necessary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When sending a call-function IPI-many to vCPUs, yield if any of the IPI target vCPUs was preempted, we just select the first preempted target vCPU which we found since the state of target vCPUs can change underneath and to avoid race conditions. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Liran Alon Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/kvm_para.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include/uapi') diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 21d5f0240595..2a8e0b6b9805 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -30,6 +30,7 @@ #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 #define KVM_FEATURE_PV_SEND_IPI 11 #define KVM_FEATURE_POLL_CONTROL 12 +#define KVM_FEATURE_PV_SCHED_YIELD 13 #define KVM_HINTS_REALTIME 0 -- cgit From f087a02941feacf7d6f097522bc67c602fda18e6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 7 Jun 2019 11:55:34 -0700 Subject: KVM: nVMX: Stash L1's CR3 in vmcs01.GUEST_CR3 on nested entry w/o EPT KVM does not have 100% coverage of VMX consistency checks, i.e. some checks that cause VM-Fail may only be detected by hardware during a nested VM-Entry. In such a case, KVM must restore L1's state to the pre-VM-Enter state as L2's state has already been loaded into KVM's software model. L1's CR3 and PDPTRs in particular are loaded from vmcs01.GUEST_*. But when EPT is disabled, the associated fields hold KVM's shadow values, not L1's "real" values. Fortunately, when EPT is disabled the PDPTRs come from memory, i.e. are not cached in the VMCS. Which leaves CR3 as the sole anomaly. A previously applied workaround to handle CR3 was to force nested early checks if EPT is disabled: commit 2b27924bb1d48 ("KVM: nVMX: always use early vmcs check when EPT is disabled") Forcing nested early checks is undesirable as doing so adds hundreds of cycles to every nested VM-Entry. Rather than take this performance hit, handle CR3 by overwriting vmcs01.GUEST_CR3 with L1's CR3 during nested VM-Entry when EPT is disabled *and* nested early checks are disabled. By stuffing vmcs01.GUEST_CR3, nested_vmx_restore_host_state() will naturally restore the correct vcpu->arch.cr3 from vmcs01.GUEST_CR3. These shenanigans work because nested_vmx_restore_host_state() does a full kvm_mmu_reset_context(), i.e. unloads the current MMU, which guarantees vmcs01.GUEST_CR3 will be rewritten with a new shadow CR3 prior to re-entering L1. vcpu->arch.root_mmu.root_hpa is set to INVALID_PAGE via: nested_vmx_restore_host_state() -> kvm_mmu_reset_context() -> kvm_mmu_unload() -> kvm_mmu_free_roots() kvm_mmu_unload() has WARN_ON(root_hpa != INVALID_PAGE), i.e. we can bank on 'root_hpa == INVALID_PAGE' unless the implementation of kvm_mmu_reset_context() is changed. On the way into L1, VMCS.GUEST_CR3 is guaranteed to be written (on a successful entry) via: vcpu_enter_guest() -> kvm_mmu_reload() -> kvm_mmu_load() -> kvm_mmu_load_cr3() -> vmx_set_cr3() Stuff vmcs01.GUEST_CR3 if and only if nested early checks are disabled as a "late" VM-Fail should never happen win that case (KVM WARNs), and the conditional write avoids the need to restore the correct GUEST_CR3 when nested_vmx_check_vmentry_hw() fails. Signed-off-by: Sean Christopherson Message-Id: <20190607185534.24368-1-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/vmx.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include/uapi') diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index d213ec5c3766..f0b0c90dd398 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -146,7 +146,6 @@ #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2 -#define VMX_ABORT_VMCS_CORRUPTED 3 #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 #endif /* _UAPIVMX_H */ -- cgit From 66bb8a065f5aedd4551d8d3fbce582972f65c2e1 Mon Sep 17 00:00:00 2001 From: Eric Hankland Date: Wed, 10 Jul 2019 18:25:15 -0700 Subject: KVM: x86: PMU Event Filter Some events can provide a guest with information about other guests or the host (e.g. L3 cache stats); providing the capability to restrict access to a "safe" set of events would limit the potential for the PMU to be used in any side channel attacks. This change introduces a new VM ioctl that sets an event filter. If the guest attempts to program a counter for any blacklisted or non-whitelisted event, the kernel counter won't be created, so any RDPMC/RDMSR will show 0 instances of that event. Signed-off-by: Eric Hankland [Lots of changes. All remaining bugs are probably mine. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/kvm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86/include/uapi') diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index f9b021e16ebc..46588f5d6283 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -422,4 +422,14 @@ struct kvm_nested_state { __u8 data[0]; }; +/* for KVM_CAP_PMU_EVENT_FILTER */ +struct kvm_pmu_event_filter { + __u32 action; + __u32 nevents; + __u64 events[0]; +}; + +#define KVM_PMU_EVENT_ALLOW 0 +#define KVM_PMU_EVENT_DENY 1 + #endif /* _ASM_X86_KVM_H */ -- cgit