summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/mmu/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/mmu/mmu.c')
-rw-r--r--arch/x86/kvm/mmu/mmu.c57
1 files changed, 50 insertions, 7 deletions
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f9080ee50ffa..64a2a7e2be90 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2804,8 +2804,12 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
const struct kvm_memory_slot *slot)
{
unsigned long hva;
- pte_t *pte;
- int level;
+ unsigned long flags;
+ int level = PG_LEVEL_4K;
+ pgd_t pgd;
+ p4d_t p4d;
+ pud_t pud;
+ pmd_t pmd;
if (!PageCompound(pfn_to_page(pfn)) && !kvm_is_zone_device_pfn(pfn))
return PG_LEVEL_4K;
@@ -2820,10 +2824,43 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
*/
hva = __gfn_to_hva_memslot(slot, gfn);
- pte = lookup_address_in_mm(kvm->mm, hva, &level);
- if (unlikely(!pte))
- return PG_LEVEL_4K;
+ /*
+ * Lookup the mapping level in the current mm. The information
+ * may become stale soon, but it is safe to use as long as
+ * 1) mmu_notifier_retry was checked after taking mmu_lock, and
+ * 2) mmu_lock is taken now.
+ *
+ * We still need to disable IRQs to prevent concurrent tear down
+ * of page tables.
+ */
+ local_irq_save(flags);
+
+ pgd = READ_ONCE(*pgd_offset(kvm->mm, hva));
+ if (pgd_none(pgd))
+ goto out;
+
+ p4d = READ_ONCE(*p4d_offset(&pgd, hva));
+ if (p4d_none(p4d) || !p4d_present(p4d))
+ goto out;
+ pud = READ_ONCE(*pud_offset(&p4d, hva));
+ if (pud_none(pud) || !pud_present(pud))
+ goto out;
+
+ if (pud_large(pud)) {
+ level = PG_LEVEL_1G;
+ goto out;
+ }
+
+ pmd = READ_ONCE(*pmd_offset(&pud, hva));
+ if (pmd_none(pmd) || !pmd_present(pmd))
+ goto out;
+
+ if (pmd_large(pmd))
+ level = PG_LEVEL_2M;
+
+out:
+ local_irq_restore(flags);
return level;
}
@@ -2992,9 +3029,15 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fa
/*
* If MMIO caching is disabled, emulate immediately without
* touching the shadow page tables as attempting to install an
- * MMIO SPTE will just be an expensive nop.
+ * MMIO SPTE will just be an expensive nop. Do not cache MMIO
+ * whose gfn is greater than host.MAXPHYADDR, any guest that
+ * generates such gfns is running nested and is being tricked
+ * by L0 userspace (you can observe gfn > L1.MAXPHYADDR if
+ * and only if L1's MAXPHYADDR is inaccurate with respect to
+ * the hardware's).
*/
- if (unlikely(!shadow_mmio_value)) {
+ if (unlikely(!shadow_mmio_value) ||
+ unlikely(fault->gfn > kvm_mmu_max_gfn())) {
*ret_val = RET_PF_EMULATE;
return true;
}