summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c92
1 files changed, 83 insertions, 9 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 205ebdc2b11b..43a6a7efc6ec 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1557,12 +1557,32 @@ static const u32 msr_based_features_all[] = {
static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
static unsigned int num_msr_based_features;
+/*
+ * Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM
+ * does not yet virtualize. These include:
+ * 10 - MISC_PACKAGE_CTRLS
+ * 11 - ENERGY_FILTERING_CTL
+ * 12 - DOITM
+ * 18 - FB_CLEAR_CTRL
+ * 21 - XAPIC_DISABLE_STATUS
+ * 23 - OVERCLOCKING_STATUS
+ */
+
+#define KVM_SUPPORTED_ARCH_CAP \
+ (ARCH_CAP_RDCL_NO | ARCH_CAP_IBRS_ALL | ARCH_CAP_RSBA | \
+ ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \
+ ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
+ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
+ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO)
+
static u64 kvm_get_arch_capabilities(void)
{
u64 data = 0;
- if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
+ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
+ data &= KVM_SUPPORTED_ARCH_CAP;
+ }
/*
* If nx_huge_pages is enabled, KVM's shadow paging will ensure that
@@ -1610,9 +1630,6 @@ static u64 kvm_get_arch_capabilities(void)
*/
}
- /* Guests don't need to know "Fill buffer clear control" exists */
- data &= ~ARCH_CAP_FB_CLEAR_CTRL;
-
return data;
}
@@ -10652,7 +10669,8 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu)
case KVM_MP_STATE_INIT_RECEIVED:
break;
default:
- return -EINTR;
+ WARN_ON_ONCE(1);
+ break;
}
return 1;
}
@@ -11093,9 +11111,22 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
vcpu_load(vcpu);
- if (!lapic_in_kernel(vcpu) &&
- mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
+ switch (mp_state->mp_state) {
+ case KVM_MP_STATE_UNINITIALIZED:
+ case KVM_MP_STATE_HALTED:
+ case KVM_MP_STATE_AP_RESET_HOLD:
+ case KVM_MP_STATE_INIT_RECEIVED:
+ case KVM_MP_STATE_SIPI_RECEIVED:
+ if (!lapic_in_kernel(vcpu))
+ goto out;
+ break;
+
+ case KVM_MP_STATE_RUNNABLE:
+ break;
+
+ default:
goto out;
+ }
/*
* KVM_MP_STATE_INIT_RECEIVED means the processor is in
@@ -11563,7 +11594,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.mci_ctl2_banks = kcalloc(KVM_MAX_MCE_BANKS, sizeof(u64),
GFP_KERNEL_ACCOUNT);
if (!vcpu->arch.mce_banks || !vcpu->arch.mci_ctl2_banks)
- goto fail_free_pio_data;
+ goto fail_free_mce_banks;
vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask,
@@ -11617,7 +11648,6 @@ free_wbinvd_dirty_mask:
fail_free_mce_banks:
kfree(vcpu->arch.mce_banks);
kfree(vcpu->arch.mci_ctl2_banks);
-fail_free_pio_data:
free_page((unsigned long)vcpu->arch.pio_data);
fail_free_lapic:
kvm_free_lapic(vcpu);
@@ -12473,6 +12503,50 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
} else {
kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K);
}
+
+ /*
+ * Unconditionally flush the TLBs after enabling dirty logging.
+ * A flush is almost always going to be necessary (see below),
+ * and unconditionally flushing allows the helpers to omit
+ * the subtly complex checks when removing write access.
+ *
+ * Do the flush outside of mmu_lock to reduce the amount of
+ * time mmu_lock is held. Flushing after dropping mmu_lock is
+ * safe as KVM only needs to guarantee the slot is fully
+ * write-protected before returning to userspace, i.e. before
+ * userspace can consume the dirty status.
+ *
+ * Flushing outside of mmu_lock requires KVM to be careful when
+ * making decisions based on writable status of an SPTE, e.g. a
+ * !writable SPTE doesn't guarantee a CPU can't perform writes.
+ *
+ * Specifically, KVM also write-protects guest page tables to
+ * monitor changes when using shadow paging, and must guarantee
+ * no CPUs can write to those page before mmu_lock is dropped.
+ * Because CPUs may have stale TLB entries at this point, a
+ * !writable SPTE doesn't guarantee CPUs can't perform writes.
+ *
+ * KVM also allows making SPTES writable outside of mmu_lock,
+ * e.g. to allow dirty logging without taking mmu_lock.
+ *
+ * To handle these scenarios, KVM uses a separate software-only
+ * bit (MMU-writable) to track if a SPTE is !writable due to
+ * a guest page table being write-protected (KVM clears the
+ * MMU-writable flag when write-protecting for shadow paging).
+ *
+ * The use of MMU-writable is also the primary motivation for
+ * the unconditional flush. Because KVM must guarantee that a
+ * CPU doesn't contain stale, writable TLB entries for a
+ * !MMU-writable SPTE, KVM must flush if it encounters any
+ * MMU-writable SPTE regardless of whether the actual hardware
+ * writable bit was set. I.e. KVM is almost guaranteed to need
+ * to flush, while unconditionally flushing allows the "remove
+ * write access" helpers to ignore MMU-writable entirely.
+ *
+ * See is_writable_pte() for more details (the case involving
+ * access-tracked SPTEs is particularly relevant).
+ */
+ kvm_arch_flush_remote_tlbs_memslot(kvm, new);
}
}