From 801e459a6f3a63af9d447e6249088c76ae16efc4 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 21 Feb 2018 13:39:51 -0600 Subject: KVM: x86: Add a framework for supporting MSR-based features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide a new KVM capability that allows bits within MSRs to be recognized as features. Two new ioctls are added to the /dev/kvm ioctl routine to retrieve the list of these MSRs and then retrieve their values. A kvm_x86_ops callback is used to determine support for the listed MSR-based features. Signed-off-by: Tom Lendacky Signed-off-by: Paolo Bonzini [Tweaked documentation. - Radim] Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/svm.c | 6 ++++ arch/x86/kvm/vmx.c | 6 ++++ arch/x86/kvm/x86.c | 75 ++++++++++++++++++++++++++++++++++++++--- 4 files changed, 84 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0a9e330b34f0..bab0694b35c3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1095,6 +1095,8 @@ struct kvm_x86_ops { int (*mem_enc_op)(struct kvm *kvm, void __user *argp); int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp); + + int (*get_msr_feature)(struct kvm_msr_entry *entry); }; struct kvm_arch_async_pf { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 3d8377f75eda..d8db947acf70 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3869,6 +3869,11 @@ static int cr8_write_interception(struct vcpu_svm *svm) return 0; } +static int svm_get_msr_feature(struct kvm_msr_entry *msr) +{ + return 1; +} + static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct vcpu_svm *svm = to_svm(vcpu); @@ -6832,6 +6837,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .vcpu_unblocking = svm_vcpu_unblocking, .update_bp_intercept = update_bp_intercept, + .get_msr_feature = svm_get_msr_feature, .get_msr = svm_get_msr, .set_msr = svm_set_msr, .get_segment_base = svm_get_segment_base, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ec14f2319a87..fafc1f6d8987 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3226,6 +3226,11 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, return !(val & ~valid_bits); } +static int vmx_get_msr_feature(struct kvm_msr_entry *msr) +{ + return 1; +} + /* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. @@ -12296,6 +12301,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .vcpu_put = vmx_vcpu_put, .update_bp_intercept = update_exception_bitmap, + .get_msr_feature = vmx_get_msr_feature, .get_msr = vmx_get_msr, .set_msr = vmx_set_msr, .get_segment_base = vmx_get_segment_base, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 96edda878dbf..239fc1fd7845 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1049,6 +1049,28 @@ static u32 emulated_msrs[] = { static unsigned num_emulated_msrs; +/* + * List of msr numbers which are used to expose MSR-based features that + * can be used by a hypervisor to validate requested CPU features. + */ +static u32 msr_based_features[] = { +}; + +static unsigned int num_msr_based_features; + +static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) +{ + struct kvm_msr_entry msr; + + msr.index = index; + if (kvm_x86_ops->get_msr_feature(&msr)) + return 1; + + *data = msr.data; + + return 0; +} + bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) { if (efer & efer_reserved_bits) @@ -2680,13 +2702,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, int (*do_msr)(struct kvm_vcpu *vcpu, unsigned index, u64 *data)) { - int i, idx; + int i; - idx = srcu_read_lock(&vcpu->kvm->srcu); for (i = 0; i < msrs->nmsrs; ++i) if (do_msr(vcpu, entries[i].index, &entries[i].data)) break; - srcu_read_unlock(&vcpu->kvm->srcu, idx); return i; } @@ -2785,6 +2805,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SET_BOOT_CPU_ID: case KVM_CAP_SPLIT_IRQCHIP: case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_GET_MSR_FEATURES: r = 1; break; case KVM_CAP_ADJUST_CLOCK: @@ -2899,6 +2920,31 @@ long kvm_arch_dev_ioctl(struct file *filp, goto out; r = 0; break; + case KVM_GET_MSR_FEATURE_INDEX_LIST: { + struct kvm_msr_list __user *user_msr_list = argp; + struct kvm_msr_list msr_list; + unsigned int n; + + r = -EFAULT; + if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list))) + goto out; + n = msr_list.nmsrs; + msr_list.nmsrs = num_msr_based_features; + if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list))) + goto out; + r = -E2BIG; + if (n < msr_list.nmsrs) + goto out; + r = -EFAULT; + if (copy_to_user(user_msr_list->indices, &msr_based_features, + num_msr_based_features * sizeof(u32))) + goto out; + r = 0; + break; + } + case KVM_GET_MSRS: + r = msr_io(NULL, argp, do_get_msr_feature, 1); + break; } default: r = -EINVAL; @@ -3636,12 +3682,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = 0; break; } - case KVM_GET_MSRS: + case KVM_GET_MSRS: { + int idx = srcu_read_lock(&vcpu->kvm->srcu); r = msr_io(vcpu, argp, do_get_msr, 1); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; - case KVM_SET_MSRS: + } + case KVM_SET_MSRS: { + int idx = srcu_read_lock(&vcpu->kvm->srcu); r = msr_io(vcpu, argp, do_set_msr, 0); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; + } case KVM_TPR_ACCESS_REPORTING: { struct kvm_tpr_access_ctl tac; @@ -4464,6 +4516,19 @@ static void kvm_init_msr_list(void) j++; } num_emulated_msrs = j; + + for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) { + struct kvm_msr_entry msr; + + msr.index = msr_based_features[i]; + if (kvm_x86_ops->get_msr_feature(&msr)) + continue; + + if (j < i) + msr_based_features[j] = msr_based_features[i]; + j++; + } + num_msr_based_features = j; } static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, -- cgit From d1d93fa90f1afa926cb060b7f78ab01a65705b4d Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Sat, 24 Feb 2018 00:18:20 +0100 Subject: KVM: SVM: Add MSR-based feature support for serializing LFENCE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to determine if LFENCE is a serializing instruction on AMD processors, MSR 0xc0011029 (MSR_F10H_DECFG) must be read and the state of bit 1 checked. This patch will add support to allow a guest to properly make this determination. Add the MSR feature callback operation to svm.c and add MSR 0xc0011029 to the list of MSR-based features. If LFENCE is serializing, then the feature is supported, allowing the hypervisor to set the value of the MSR that guest will see. Support is also added to write (hypervisor only) and read the MSR value for the guest. A write by the guest will result in a #GP. A read by the guest will return the value as set by the host. In this way, the support to expose the feature to the guest is controlled by the hypervisor. Reviewed-by: Paolo Bonzini Signed-off-by: Tom Lendacky Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/svm.c | 36 +++++++++++++++++++++++++++++++++++- arch/x86/kvm/x86.c | 1 + 2 files changed, 36 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d8db947acf70..f874798f8209 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -178,6 +178,8 @@ struct vcpu_svm { uint64_t sysenter_eip; uint64_t tsc_aux; + u64 msr_decfg; + u64 next_rip; u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; @@ -3871,7 +3873,18 @@ static int cr8_write_interception(struct vcpu_svm *svm) static int svm_get_msr_feature(struct kvm_msr_entry *msr) { - return 1; + msr->data = 0; + + switch (msr->index) { + case MSR_F10H_DECFG: + if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) + msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE; + break; + default: + return 1; + } + + return 0; } static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) @@ -3969,6 +3982,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0x1E; } break; + case MSR_F10H_DECFG: + msr_info->data = svm->msr_decfg; + break; default: return kvm_get_msr_common(vcpu, msr_info); } @@ -4147,6 +4163,24 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; + case MSR_F10H_DECFG: { + struct kvm_msr_entry msr_entry; + + msr_entry.index = msr->index; + if (svm_get_msr_feature(&msr_entry)) + return 1; + + /* Check the supported bits */ + if (data & ~msr_entry.data) + return 1; + + /* Don't allow the guest to change a bit, #GP */ + if (!msr->host_initiated && (data ^ msr_entry.data)) + return 1; + + svm->msr_decfg = data; + break; + } case MSR_IA32_APICBASE: if (kvm_vcpu_apicv_active(vcpu)) avic_update_vapic_bar(to_svm(vcpu), data); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 239fc1fd7845..54b4ed55945b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1054,6 +1054,7 @@ static unsigned num_emulated_msrs; * can be used by a hypervisor to validate requested CPU features. */ static u32 msr_based_features[] = { + MSR_F10H_DECFG, }; static unsigned int num_msr_based_features; -- cgit From 66421c1ec340096b291af763ed5721314cdd9c5c Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 28 Feb 2018 14:03:30 +0800 Subject: KVM: X86: Introduce kvm_get_msr_feature() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce kvm_get_msr_feature() to handle the msrs which are supported by different vendors and sharing the same emulation logic. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Liran Alon Cc: Nadav Amit Cc: Borislav Petkov Cc: Tom Lendacky Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 54b4ed55945b..d97620eeb394 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1059,13 +1059,25 @@ static u32 msr_based_features[] = { static unsigned int num_msr_based_features; +static int kvm_get_msr_feature(struct kvm_msr_entry *msr) +{ + switch (msr->index) { + default: + if (kvm_x86_ops->get_msr_feature(msr)) + return 1; + } + return 0; +} + static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) { struct kvm_msr_entry msr; + int r; msr.index = index; - if (kvm_x86_ops->get_msr_feature(&msr)) - return 1; + r = kvm_get_msr_feature(&msr); + if (r) + return r; *data = msr.data; @@ -4522,7 +4534,7 @@ static void kvm_init_msr_list(void) struct kvm_msr_entry msr; msr.index = msr_based_features[i]; - if (kvm_x86_ops->get_msr_feature(&msr)) + if (kvm_get_msr_feature(&msr)) continue; if (j < i) -- cgit From 518e7b94817abed94becfe6a44f1ece0d4745afe Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 28 Feb 2018 14:03:31 +0800 Subject: KVM: X86: Allow userspace to define the microcode version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Linux (among the others) has checks to make sure that certain features aren't enabled on a certain family/model/stepping if the microcode version isn't greater than or equal to a known good version. By exposing the real microcode version, we're preventing buggy guests that don't check that they are running virtualized (i.e., they should trust the hypervisor) from disabling features that are effectively not buggy. Suggested-by: Filippo Sironi Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Liran Alon Cc: Nadav Amit Cc: Borislav Petkov Cc: Tom Lendacky Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm.c | 4 +--- arch/x86/kvm/vmx.c | 1 + arch/x86/kvm/x86.c | 11 +++++++++-- 4 files changed, 12 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index bab0694b35c3..b605a5b6a30c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -507,6 +507,7 @@ struct kvm_vcpu_arch { u64 smi_count; bool tpr_access_reporting; u64 ia32_xss; + u64 microcode_version; /* * Paging state of the vcpu diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f874798f8209..312f33f4ed36 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1907,6 +1907,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u32 dummy; u32 eax = 1; + vcpu->arch.microcode_version = 0x01000065; svm->spec_ctrl = 0; if (!init_event) { @@ -3962,9 +3963,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = svm->spec_ctrl; break; - case MSR_IA32_UCODE_REV: - msr_info->data = 0x01000065; - break; case MSR_F15H_IC_CFG: { int family, model; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fafc1f6d8987..591214843046 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5771,6 +5771,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx->rmode.vm86_active = 0; vmx->spec_ctrl = 0; + vcpu->arch.microcode_version = 0x100000000ULL; vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); kvm_set_cr8(vcpu, 0); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d97620eeb394..11649d290b93 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1055,6 +1055,7 @@ static unsigned num_emulated_msrs; */ static u32 msr_based_features[] = { MSR_F10H_DECFG, + MSR_IA32_UCODE_REV, }; static unsigned int num_msr_based_features; @@ -1062,6 +1063,9 @@ static unsigned int num_msr_based_features; static int kvm_get_msr_feature(struct kvm_msr_entry *msr) { switch (msr->index) { + case MSR_IA32_UCODE_REV: + rdmsrl(msr->index, msr->data); + break; default: if (kvm_x86_ops->get_msr_feature(msr)) return 1; @@ -2257,7 +2261,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr) { case MSR_AMD64_NB_CFG: - case MSR_IA32_UCODE_REV: case MSR_IA32_UCODE_WRITE: case MSR_VM_HSAVE_PA: case MSR_AMD64_PATCH_LOADER: @@ -2265,6 +2268,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_AMD64_DC_CFG: break; + case MSR_IA32_UCODE_REV: + if (msr_info->host_initiated) + vcpu->arch.microcode_version = data; + break; case MSR_EFER: return set_efer(vcpu, data); case MSR_K7_HWCR: @@ -2560,7 +2567,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0; break; case MSR_IA32_UCODE_REV: - msr_info->data = 0x100000000ULL; + msr_info->data = vcpu->arch.microcode_version; break; case MSR_MTRRcap: case 0x200 ... 0x2ff: -- cgit From b7e31be385584afe7f073130e8e570d53c95f7fe Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Thu, 1 Mar 2018 15:24:25 +0100 Subject: KVM: x86: fix vcpu initialization with userspace lapic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moving the code around broke this rare configuration. Use this opportunity to finally call lapic reset from vcpu reset. Reported-by: syzbot+fb7a33a4b6c35007a72b@syzkaller.appspotmail.com Suggested-by: Paolo Bonzini Fixes: 0b2e9904c159 ("KVM: x86: move LAPIC initialization after VMCS creation") Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář --- arch/x86/kvm/lapic.c | 10 ++++------ arch/x86/kvm/x86.c | 3 ++- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index cc5fe7a50dde..391dda8d43b7 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2002,14 +2002,13 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) { - struct kvm_lapic *apic; + struct kvm_lapic *apic = vcpu->arch.apic; int i; - apic_debug("%s\n", __func__); + if (!apic) + return; - ASSERT(vcpu); - apic = vcpu->arch.apic; - ASSERT(apic != NULL); + apic_debug("%s\n", __func__); /* Stop the timer in case it's a reset to an active apic */ hrtimer_cancel(&apic->lapic_timer.timer); @@ -2568,7 +2567,6 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) pe = xchg(&apic->pending_events, 0); if (test_bit(KVM_APIC_INIT, &pe)) { - kvm_lapic_reset(vcpu, true); kvm_vcpu_reset(vcpu, true); if (kvm_vcpu_is_bsp(apic->vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 11649d290b93..18b5ca7a3197 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8060,7 +8060,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) kvm_vcpu_mtrr_init(vcpu); vcpu_load(vcpu); kvm_vcpu_reset(vcpu, false); - kvm_lapic_reset(vcpu, false); kvm_mmu_setup(vcpu); vcpu_put(vcpu); return 0; @@ -8103,6 +8102,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { + kvm_lapic_reset(vcpu, init_event); + vcpu->arch.hflags = 0; vcpu->arch.smi_pending = 0; -- cgit