summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-12-26 11:46:28 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-26 11:46:28 -0800
commit42b00f122cfbfed79fc29b0b3610f3abbb1e3864 (patch)
treec110a18c03e9ada45b6f3593843f0a06b36773dc /arch/x86/kvm/x86.c
parent460023a5d1d2aa0f733b6708b2fae5ea9f9dfec0 (diff)
parenta0aea130afebcd091d5396d13f25b9da24c9144a (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "ARM: - selftests improvements - large PUD support for HugeTLB - single-stepping fixes - improved tracing - various timer and vGIC fixes x86: - Processor Tracing virtualization - STIBP support - some correctness fixes - refactorings and splitting of vmx.c - use the Hyper-V range TLB flush hypercall - reduce order of vcpu struct - WBNOINVD support - do not use -ftrace for __noclone functions - nested guest support for PAUSE filtering on AMD - more Hyper-V enlightenments (direct mode for synthetic timers) PPC: - nested VFIO s390: - bugfixes only this time" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (171 commits) KVM: x86: Add CPUID support for new instruction WBNOINVD kvm: selftests: ucall: fix exit mmio address guessing Revert "compiler-gcc: disable -ftracer for __noclone functions" KVM: VMX: Move VM-Enter + VM-Exit handling to non-inline sub-routines KVM: VMX: Explicitly reference RCX as the vmx_vcpu pointer in asm blobs KVM: x86: Use jmp to invoke kvm_spurious_fault() from .fixup MAINTAINERS: Add arch/x86/kvm sub-directories to existing KVM/x86 entry KVM/x86: Use SVM assembly instruction mnemonics instead of .byte streams KVM/MMU: Flush tlb directly in the kvm_zap_gfn_range() KVM/MMU: Flush tlb directly in kvm_set_pte_rmapp() KVM/MMU: Move tlb flush in kvm_set_pte_rmapp() to kvm_mmu_notifier_change_pte() KVM: Make kvm_set_spte_hva() return int KVM: Replace old tlb flush function with new one to flush a specified range. KVM/MMU: Add tlb flush with range helper function KVM/VMX: Add hv tlb range flush support x86/hyper-v: Add HvFlushGuestAddressList hypercall support KVM: Add tlb_remote_flush_with_range callback in kvm_x86_ops KVM: x86: Disable Intel PT when VMXON in L1 guest KVM: x86: Set intercept for Intel PT MSRs read/write KVM: x86: Implement Intel PT MSRs read/write emulation ...
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c161
1 files changed, 128 insertions, 33 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f049ecfac7bb..02c8e095a239 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -69,6 +69,7 @@
#include <asm/irq_remapping.h>
#include <asm/mshyperv.h>
#include <asm/hypervisor.h>
+#include <asm/intel_pt.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -213,6 +214,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
u64 __read_mostly host_xcr0;
+struct kmem_cache *x86_fpu_cache;
+EXPORT_SYMBOL_GPL(x86_fpu_cache);
+
static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
@@ -1121,7 +1125,13 @@ static u32 msrs_to_save[] = {
#endif
MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
- MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES
+ MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES,
+ MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
+ MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
+ MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
+ MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
+ MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
+ MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
};
static unsigned num_msrs_to_save;
@@ -2999,6 +3009,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_HYPERV_TLBFLUSH:
case KVM_CAP_HYPERV_SEND_IPI:
case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
+ case KVM_CAP_HYPERV_CPUID:
case KVM_CAP_PCI_SEGMENT:
case KVM_CAP_DEBUGREGS:
case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -3010,7 +3021,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_HYPERV_TIME:
case KVM_CAP_IOAPIC_POLARITY_IGNORED:
case KVM_CAP_TSC_DEADLINE_TIMER:
- case KVM_CAP_ENABLE_CAP_VM:
case KVM_CAP_DISABLE_QUIRKS:
case KVM_CAP_SET_BOOT_CPU_ID:
case KVM_CAP_SPLIT_IRQCHIP:
@@ -3632,7 +3642,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
{
- struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
+ struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
u64 xstate_bv = xsave->header.xfeatures;
u64 valid;
@@ -3674,7 +3684,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
{
- struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
+ struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
u64 valid;
@@ -3722,7 +3732,7 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
fill_xsave((u8 *) guest_xsave->region, vcpu);
} else {
memcpy(guest_xsave->region,
- &vcpu->arch.guest_fpu.state.fxsave,
+ &vcpu->arch.guest_fpu->state.fxsave,
sizeof(struct fxregs_state));
*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
XFEATURE_MASK_FPSSE;
@@ -3752,7 +3762,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
mxcsr & ~mxcsr_feature_mask)
return -EINVAL;
- memcpy(&vcpu->arch.guest_fpu.state.fxsave,
+ memcpy(&vcpu->arch.guest_fpu->state.fxsave,
guest_xsave->region, sizeof(struct fxregs_state));
}
return 0;
@@ -3830,6 +3840,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return kvm_hv_activate_synic(vcpu, cap->cap ==
KVM_CAP_HYPERV_SYNIC2);
case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
+ if (!kvm_x86_ops->nested_enable_evmcs)
+ return -ENOTTY;
r = kvm_x86_ops->nested_enable_evmcs(vcpu, &vmcs_version);
if (!r) {
user_ptr = (void __user *)(uintptr_t)cap->args[0];
@@ -4192,6 +4204,25 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
break;
}
+ case KVM_GET_SUPPORTED_HV_CPUID: {
+ struct kvm_cpuid2 __user *cpuid_arg = argp;
+ struct kvm_cpuid2 cpuid;
+
+ r = -EFAULT;
+ if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
+ goto out;
+
+ r = kvm_vcpu_ioctl_get_hv_cpuid(vcpu, &cpuid,
+ cpuid_arg->entries);
+ if (r)
+ goto out;
+
+ r = -EFAULT;
+ if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
+ goto out;
+ r = 0;
+ break;
+ }
default:
r = -EINVAL;
}
@@ -4396,7 +4427,7 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
*/
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
{
- bool is_dirty = false;
+ bool flush = false;
int r;
mutex_lock(&kvm->slots_lock);
@@ -4407,14 +4438,41 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
if (kvm_x86_ops->flush_log_dirty)
kvm_x86_ops->flush_log_dirty(kvm);
- r = kvm_get_dirty_log_protect(kvm, log, &is_dirty);
+ r = kvm_get_dirty_log_protect(kvm, log, &flush);
/*
* All the TLBs can be flushed out of mmu lock, see the comments in
* kvm_mmu_slot_remove_write_access().
*/
lockdep_assert_held(&kvm->slots_lock);
- if (is_dirty)
+ if (flush)
+ kvm_flush_remote_tlbs(kvm);
+
+ mutex_unlock(&kvm->slots_lock);
+ return r;
+}
+
+int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
+{
+ bool flush = false;
+ int r;
+
+ mutex_lock(&kvm->slots_lock);
+
+ /*
+ * Flush potentially hardware-cached dirty pages to dirty_bitmap.
+ */
+ if (kvm_x86_ops->flush_log_dirty)
+ kvm_x86_ops->flush_log_dirty(kvm);
+
+ r = kvm_clear_dirty_log_protect(kvm, log, &flush);
+
+ /*
+ * All the TLBs can be flushed out of mmu lock, see the comments in
+ * kvm_mmu_slot_remove_write_access().
+ */
+ lockdep_assert_held(&kvm->slots_lock);
+ if (flush)
kvm_flush_remote_tlbs(kvm);
mutex_unlock(&kvm->slots_lock);
@@ -4433,8 +4491,8 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
return 0;
}
-static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
- struct kvm_enable_cap *cap)
+int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+ struct kvm_enable_cap *cap)
{
int r;
@@ -4767,15 +4825,6 @@ set_identity_unlock:
r = 0;
break;
}
- case KVM_ENABLE_CAP: {
- struct kvm_enable_cap cap;
-
- r = -EFAULT;
- if (copy_from_user(&cap, argp, sizeof(cap)))
- goto out;
- r = kvm_vm_ioctl_enable_cap(kvm, &cap);
- break;
- }
case KVM_MEMORY_ENCRYPT_OP: {
r = -ENOTTY;
if (kvm_x86_ops->mem_enc_op)
@@ -4844,6 +4893,30 @@ static void kvm_init_msr_list(void)
if (!kvm_x86_ops->rdtscp_supported())
continue;
break;
+ case MSR_IA32_RTIT_CTL:
+ case MSR_IA32_RTIT_STATUS:
+ if (!kvm_x86_ops->pt_supported())
+ continue;
+ break;
+ case MSR_IA32_RTIT_CR3_MATCH:
+ if (!kvm_x86_ops->pt_supported() ||
+ !intel_pt_validate_hw_cap(PT_CAP_cr3_filtering))
+ continue;
+ break;
+ case MSR_IA32_RTIT_OUTPUT_BASE:
+ case MSR_IA32_RTIT_OUTPUT_MASK:
+ if (!kvm_x86_ops->pt_supported() ||
+ (!intel_pt_validate_hw_cap(PT_CAP_topa_output) &&
+ !intel_pt_validate_hw_cap(PT_CAP_single_range_output)))
+ continue;
+ break;
+ case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: {
+ if (!kvm_x86_ops->pt_supported() ||
+ msrs_to_save[i] - MSR_IA32_RTIT_ADDR0_A >=
+ intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
+ continue;
+ break;
+ }
default:
break;
}
@@ -6815,11 +6888,30 @@ int kvm_arch_init(void *opaque)
goto out;
}
+ /*
+ * KVM explicitly assumes that the guest has an FPU and
+ * FXSAVE/FXRSTOR. For example, the KVM_GET_FPU explicitly casts the
+ * vCPU's FPU state as a fxregs_state struct.
+ */
+ if (!boot_cpu_has(X86_FEATURE_FPU) || !boot_cpu_has(X86_FEATURE_FXSR)) {
+ printk(KERN_ERR "kvm: inadequate fpu\n");
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+
r = -ENOMEM;
+ x86_fpu_cache = kmem_cache_create("x86_fpu", sizeof(struct fpu),
+ __alignof__(struct fpu), SLAB_ACCOUNT,
+ NULL);
+ if (!x86_fpu_cache) {
+ printk(KERN_ERR "kvm: failed to allocate cache for x86 fpu\n");
+ goto out;
+ }
+
shared_msrs = alloc_percpu(struct kvm_shared_msrs);
if (!shared_msrs) {
printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
- goto out;
+ goto out_free_x86_fpu_cache;
}
r = kvm_mmu_module_init();
@@ -6852,6 +6944,8 @@ int kvm_arch_init(void *opaque)
out_free_percpu:
free_percpu(shared_msrs);
+out_free_x86_fpu_cache:
+ kmem_cache_destroy(x86_fpu_cache);
out:
return r;
}
@@ -6875,6 +6969,7 @@ void kvm_arch_exit(void)
kvm_x86_ops = NULL;
kvm_mmu_module_exit();
free_percpu(shared_msrs);
+ kmem_cache_destroy(x86_fpu_cache);
}
int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
@@ -7998,9 +8093,9 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
preempt_disable();
- copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
+ copy_fpregs_to_fpstate(&current->thread.fpu);
/* PKRU is separately restored in kvm_x86_ops->run. */
- __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
+ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
~XFEATURE_MASK_PKRU);
preempt_enable();
trace_kvm_fpu(1);
@@ -8010,8 +8105,8 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
preempt_disable();
- copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
- copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
+ copy_fpregs_to_fpstate(vcpu->arch.guest_fpu);
+ copy_kernel_to_fpregs(&current->thread.fpu.state);
preempt_enable();
++vcpu->stat.fpu_reload;
trace_kvm_fpu(0);
@@ -8505,7 +8600,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
vcpu_load(vcpu);
- fxsave = &vcpu->arch.guest_fpu.state.fxsave;
+ fxsave = &vcpu->arch.guest_fpu->state.fxsave;
memcpy(fpu->fpr, fxsave->st_space, 128);
fpu->fcw = fxsave->cwd;
fpu->fsw = fxsave->swd;
@@ -8525,7 +8620,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
vcpu_load(vcpu);
- fxsave = &vcpu->arch.guest_fpu.state.fxsave;
+ fxsave = &vcpu->arch.guest_fpu->state.fxsave;
memcpy(fxsave->st_space, fpu->fpr, 128);
fxsave->cwd = fpu->fcw;
@@ -8581,9 +8676,9 @@ static int sync_regs(struct kvm_vcpu *vcpu)
static void fx_init(struct kvm_vcpu *vcpu)
{
- fpstate_init(&vcpu->arch.guest_fpu.state);
+ fpstate_init(&vcpu->arch.guest_fpu->state);
if (boot_cpu_has(X86_FEATURE_XSAVES))
- vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv =
+ vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv =
host_xcr0 | XSTATE_COMPACTION_ENABLED;
/*
@@ -8621,6 +8716,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
+ vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
kvm_vcpu_mtrr_init(vcpu);
vcpu_load(vcpu);
kvm_vcpu_reset(vcpu, false);
@@ -8707,11 +8803,11 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
*/
if (init_event)
kvm_put_guest_fpu(vcpu);
- mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
+ mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
XFEATURE_MASK_BNDREGS);
if (mpx_state_buffer)
memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
- mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
+ mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
XFEATURE_MASK_BNDCSR);
if (mpx_state_buffer)
memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
@@ -8723,7 +8819,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_pmu_reset(vcpu);
vcpu->arch.smbase = 0x30000;
- vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
vcpu->arch.msr_misc_features_enables = 0;
vcpu->arch.xcr0 = XFEATURE_MASK_FP;
@@ -9282,7 +9377,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
* with dirty logging disabled in order to eliminate unnecessary GPA
* logging in PML buffer (and potential PML buffer full VMEXT). This
* guarantees leaving PML enabled during guest's lifetime won't have
- * any additonal overhead from PML when guest is running with dirty
+ * any additional overhead from PML when guest is running with dirty
* logging disabled for memory slots.
*
* kvm_x86_ops->slot_enable_log_dirty is called when switching new slot