summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/svm.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r--arch/x86/kvm/svm.c183
1 files changed, 100 insertions, 83 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1107626938cc..2c1cfe68a9af 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -280,9 +280,9 @@ module_param(avic, int, S_IRUGO);
static int vls = true;
module_param(vls, int, 0444);
-/* AVIC VM ID bit masks and lock */
-static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR);
-static DEFINE_SPINLOCK(avic_vm_id_lock);
+/* enable/disable Virtual GIF */
+static int vgif = true;
+module_param(vgif, int, 0444);
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
static void svm_flush_tlb(struct kvm_vcpu *vcpu);
@@ -479,19 +479,33 @@ static inline void clr_intercept(struct vcpu_svm *svm, int bit)
recalc_intercepts(svm);
}
+static inline bool vgif_enabled(struct vcpu_svm *svm)
+{
+ return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK);
+}
+
static inline void enable_gif(struct vcpu_svm *svm)
{
- svm->vcpu.arch.hflags |= HF_GIF_MASK;
+ if (vgif_enabled(svm))
+ svm->vmcb->control.int_ctl |= V_GIF_MASK;
+ else
+ svm->vcpu.arch.hflags |= HF_GIF_MASK;
}
static inline void disable_gif(struct vcpu_svm *svm)
{
- svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
+ if (vgif_enabled(svm))
+ svm->vmcb->control.int_ctl &= ~V_GIF_MASK;
+ else
+ svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
}
static inline bool gif_set(struct vcpu_svm *svm)
{
- return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
+ if (vgif_enabled(svm))
+ return !!(svm->vmcb->control.int_ctl & V_GIF_MASK);
+ else
+ return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
}
static unsigned long iopm_base;
@@ -567,10 +581,10 @@ static inline void invlpga(unsigned long addr, u32 asid)
asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
}
-static int get_npt_level(void)
+static int get_npt_level(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
- return PT64_ROOT_LEVEL;
+ return PT64_ROOT_4LEVEL;
#else
return PT32E_ROOT_LEVEL;
#endif
@@ -641,7 +655,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
unsigned nr = vcpu->arch.exception.nr;
bool has_error_code = vcpu->arch.exception.has_error_code;
- bool reinject = vcpu->arch.exception.reinject;
+ bool reinject = vcpu->arch.exception.injected;
u32 error_code = vcpu->arch.exception.error_code;
/*
@@ -973,6 +987,7 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
static void disable_nmi_singlestep(struct vcpu_svm *svm)
{
svm->nmi_singlestep = false;
+
if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
/* Clear our flags if they were not set by the guest */
if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
@@ -989,6 +1004,8 @@ static void disable_nmi_singlestep(struct vcpu_svm *svm)
*/
#define SVM_VM_DATA_HASH_BITS 8
static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
+static u32 next_vm_id = 0;
+static bool next_vm_id_wrapped = 0;
static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
/* Note:
@@ -1100,7 +1117,7 @@ static __init int svm_hardware_setup(void)
if (vls) {
if (!npt_enabled ||
- !boot_cpu_has(X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE) ||
+ !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
!IS_ENABLED(CONFIG_X86_64)) {
vls = false;
} else {
@@ -1108,6 +1125,13 @@ static __init int svm_hardware_setup(void)
}
}
+ if (vgif) {
+ if (!boot_cpu_has(X86_FEATURE_VGIF))
+ vgif = false;
+ else
+ pr_info("Virtual GIF supported\n");
+ }
+
return 0;
err:
@@ -1167,9 +1191,9 @@ static void avic_init_vmcb(struct vcpu_svm *svm)
{
struct vmcb *vmcb = svm->vmcb;
struct kvm_arch *vm_data = &svm->vcpu.kvm->arch;
- phys_addr_t bpa = page_to_phys(svm->avic_backing_page);
- phys_addr_t lpa = page_to_phys(vm_data->avic_logical_id_table_page);
- phys_addr_t ppa = page_to_phys(vm_data->avic_physical_id_table_page);
+ phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
+ phys_addr_t lpa = __sme_set(page_to_phys(vm_data->avic_logical_id_table_page));
+ phys_addr_t ppa = __sme_set(page_to_phys(vm_data->avic_physical_id_table_page));
vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
@@ -1232,8 +1256,8 @@ static void init_vmcb(struct vcpu_svm *svm)
set_intercept(svm, INTERCEPT_MWAIT);
}
- control->iopm_base_pa = iopm_base;
- control->msrpm_base_pa = __pa(svm->msrpm);
+ control->iopm_base_pa = __sme_set(iopm_base);
+ control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
control->int_ctl = V_INTR_MASKING_MASK;
init_seg(&save->es);
@@ -1305,6 +1329,12 @@ static void init_vmcb(struct vcpu_svm *svm)
svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
}
+ if (vgif) {
+ clr_intercept(svm, INTERCEPT_STGI);
+ clr_intercept(svm, INTERCEPT_CLGI);
+ svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
+ }
+
mark_all_dirty(svm->vmcb);
enable_gif(svm);
@@ -1377,9 +1407,9 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
return -EINVAL;
new_entry = READ_ONCE(*entry);
- new_entry = (page_to_phys(svm->avic_backing_page) &
- AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
- AVIC_PHYSICAL_ID_ENTRY_VALID_MASK;
+ new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
+ AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
+ AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
WRITE_ONCE(*entry, new_entry);
svm->avic_physical_id_cache = entry;
@@ -1387,34 +1417,6 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
return 0;
}
-static inline int avic_get_next_vm_id(void)
-{
- int id;
-
- spin_lock(&avic_vm_id_lock);
-
- /* AVIC VM ID is one-based. */
- id = find_next_zero_bit(avic_vm_id_bitmap, AVIC_VM_ID_NR, 1);
- if (id <= AVIC_VM_ID_MASK)
- __set_bit(id, avic_vm_id_bitmap);
- else
- id = -EAGAIN;
-
- spin_unlock(&avic_vm_id_lock);
- return id;
-}
-
-static inline int avic_free_vm_id(int id)
-{
- if (id <= 0 || id > AVIC_VM_ID_MASK)
- return -EINVAL;
-
- spin_lock(&avic_vm_id_lock);
- __clear_bit(id, avic_vm_id_bitmap);
- spin_unlock(&avic_vm_id_lock);
- return 0;
-}
-
static void avic_vm_destroy(struct kvm *kvm)
{
unsigned long flags;
@@ -1423,8 +1425,6 @@ static void avic_vm_destroy(struct kvm *kvm)
if (!avic)
return;
- avic_free_vm_id(vm_data->avic_vm_id);
-
if (vm_data->avic_logical_id_table_page)
__free_page(vm_data->avic_logical_id_table_page);
if (vm_data->avic_physical_id_table_page)
@@ -1438,19 +1438,16 @@ static void avic_vm_destroy(struct kvm *kvm)
static int avic_vm_init(struct kvm *kvm)
{
unsigned long flags;
- int vm_id, err = -ENOMEM;
+ int err = -ENOMEM;
struct kvm_arch *vm_data = &kvm->arch;
struct page *p_page;
struct page *l_page;
+ struct kvm_arch *ka;
+ u32 vm_id;
if (!avic)
return 0;
- vm_id = avic_get_next_vm_id();
- if (vm_id < 0)
- return vm_id;
- vm_data->avic_vm_id = (u32)vm_id;
-
/* Allocating physical APIC ID table (4KB) */
p_page = alloc_page(GFP_KERNEL);
if (!p_page)
@@ -1468,6 +1465,22 @@ static int avic_vm_init(struct kvm *kvm)
clear_page(page_address(l_page));
spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
+ again:
+ vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
+ if (vm_id == 0) { /* id is 1-based, zero is not okay */
+ next_vm_id_wrapped = 1;
+ goto again;
+ }
+ /* Is it still in use? Only possible if wrapped at least once */
+ if (next_vm_id_wrapped) {
+ hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
+ struct kvm *k2 = container_of(ka, struct kvm, arch);
+ struct kvm_arch *vd2 = &k2->arch;
+ if (vd2->avic_vm_id == vm_id)
+ goto again;
+ }
+ }
+ vm_data->avic_vm_id = vm_id;
hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id);
spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
@@ -1580,7 +1593,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
}
init_vmcb(svm);
- kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
+ kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true);
kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
if (kvm_vcpu_apicv_active(vcpu) && !init_event)
@@ -1647,7 +1660,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
svm->vmcb = page_address(page);
clear_page(svm->vmcb);
- svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
+ svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT);
svm->asid_generation = 0;
init_vmcb(svm);
@@ -1675,7 +1688,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
+ __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
__free_page(virt_to_page(svm->nested.hsave));
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
@@ -1777,11 +1790,6 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
to_svm(vcpu)->vmcb->save.rflags = rflags;
}
-static u32 svm_get_pkru(struct kvm_vcpu *vcpu)
-{
- return 0;
-}
-
static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
{
switch (reg) {
@@ -2335,7 +2343,7 @@ static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
u64 pdpte;
int ret;
- ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte,
+ ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte,
offset_in_page(cr3) + index * 8, 8);
if (ret)
return 0;
@@ -2347,7 +2355,7 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
{
struct vcpu_svm *svm = to_svm(vcpu);
- svm->vmcb->control.nested_cr3 = root;
+ svm->vmcb->control.nested_cr3 = __sme_set(root);
mark_dirty(svm->vmcb, VMCB_NPT);
svm_flush_tlb(vcpu);
}
@@ -2389,7 +2397,7 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr;
vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
- vcpu->arch.mmu.shadow_root_level = get_npt_level();
+ vcpu->arch.mmu.shadow_root_level = get_npt_level(vcpu);
reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu);
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
}
@@ -2878,7 +2886,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
svm->nested.msrpm[p] = svm->msrpm[p] | value;
}
- svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
+ svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm));
return true;
}
@@ -3152,6 +3160,13 @@ static int stgi_interception(struct vcpu_svm *svm)
if (nested_svm_check_permissions(svm))
return 1;
+ /*
+ * If VGIF is enabled, the STGI intercept is only added to
+ * detect the opening of the NMI window; remove it now.
+ */
+ if (vgif_enabled(svm))
+ clr_intercept(svm, INTERCEPT_STGI);
+
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
ret = kvm_skip_emulated_instruction(&svm->vcpu);
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
@@ -3749,7 +3764,10 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
static int pause_interception(struct vcpu_svm *svm)
{
- kvm_vcpu_on_spin(&(svm->vcpu));
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ bool in_kernel = (svm_get_cpl(vcpu) == 0);
+
+ kvm_vcpu_on_spin(vcpu, in_kernel);
return 1;
}
@@ -4233,8 +4251,6 @@ static int handle_exit(struct kvm_vcpu *vcpu)
trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
- vcpu->arch.gpa_available = (exit_code == SVM_EXIT_NPF);
-
if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
vcpu->arch.cr0 = svm->vmcb->save.cr0;
if (npt_enabled)
@@ -4511,7 +4527,7 @@ get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
irq.vector);
*svm = to_svm(vcpu);
- vcpu_info->pi_desc_addr = page_to_phys((*svm)->avic_backing_page);
+ vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
vcpu_info->vector = irq.vector;
return 0;
@@ -4562,7 +4578,8 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
struct amd_iommu_pi_data pi;
/* Try to enable guest_mode in IRTE */
- pi.base = page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK;
+ pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
+ AVIC_HPA_MASK);
pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id,
svm->vcpu.vcpu_id);
pi.is_guest_mode = true;
@@ -4686,9 +4703,11 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
* In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
* 1, because that's a separate STGI/VMRUN intercept. The next time we
* get that intercept, this function will be called again though and
- * we'll get the vintr intercept.
+ * we'll get the vintr intercept. However, if the vGIF feature is
+ * enabled, the STGI interception will not occur. Enable the irq
+ * window under the assumption that the hardware will set the GIF.
*/
- if (gif_set(svm) && nested_svm_intr(svm)) {
+ if ((vgif_enabled(svm) || gif_set(svm)) && nested_svm_intr(svm)) {
svm_set_vintr(svm);
svm_inject_irq(svm, 0x0);
}
@@ -4702,8 +4721,11 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
== HF_NMI_MASK)
return; /* IRET will cause a vm exit */
- if ((svm->vcpu.arch.hflags & HF_GIF_MASK) == 0)
+ if (!gif_set(svm)) {
+ if (vgif_enabled(svm))
+ set_intercept(svm, INTERCEPT_STGI);
return; /* STGI will cause a vm exit */
+ }
if (svm->nested.exit_required)
return; /* we're not going to run the guest yet */
@@ -5011,7 +5033,7 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
{
struct vcpu_svm *svm = to_svm(vcpu);
- svm->vmcb->save.cr3 = root;
+ svm->vmcb->save.cr3 = __sme_set(root);
mark_dirty(svm->vmcb, VMCB_CR);
svm_flush_tlb(vcpu);
}
@@ -5020,7 +5042,7 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
{
struct vcpu_svm *svm = to_svm(vcpu);
- svm->vmcb->control.nested_cr3 = root;
+ svm->vmcb->control.nested_cr3 = __sme_set(root);
mark_dirty(svm->vmcb, VMCB_NPT);
/* Also sync guest cr3 here in case we live migrate */
@@ -5075,17 +5097,14 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
static void svm_cpuid_update(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct kvm_cpuid_entry2 *entry;
/* Update nrips enabled cache */
- svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu);
+ svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
if (!kvm_vcpu_apicv_active(vcpu))
return;
- entry = kvm_find_cpuid_entry(vcpu, 1, 0);
- if (entry)
- entry->ecx &= ~bit(X86_FEATURE_X2APIC);
+ guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC);
}
static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -5413,8 +5432,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,
- .get_pkru = svm_get_pkru,
-
.tlb_flush = svm_flush_tlb,
.run = svm_vcpu_run,