diff options
Diffstat (limited to 'arch/x86/kvm/mmu')
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 84 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu_internal.h | 3 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/paging_tmpl.h | 8 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/spte.c | 43 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/spte.h | 10 |
5 files changed, 136 insertions, 12 deletions
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index cbc84c6abc2e..6e838cb6c9e1 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1983,14 +1983,35 @@ static bool sp_has_gptes(struct kvm_mmu_page *sp) return true; } +static __ro_after_init HLIST_HEAD(empty_page_hash); + +static struct hlist_head *kvm_get_mmu_page_hash(struct kvm *kvm, gfn_t gfn) +{ + /* + * Ensure the load of the hash table pointer itself is ordered before + * loads to walk the table. The pointer is set at runtime outside of + * mmu_lock when the TDP MMU is enabled, i.e. when the hash table of + * shadow pages becomes necessary only when KVM needs to shadow L1's + * TDP for an L2 guest. Pairs with the smp_store_release() in + * kvm_mmu_alloc_page_hash(). + */ + struct hlist_head *page_hash = smp_load_acquire(&kvm->arch.mmu_page_hash); + + lockdep_assert_held(&kvm->mmu_lock); + + if (!page_hash) + return &empty_page_hash; + + return &page_hash[kvm_page_table_hashfn(gfn)]; +} + #define for_each_valid_sp(_kvm, _sp, _list) \ hlist_for_each_entry(_sp, _list, hash_link) \ if (is_obsolete_sp((_kvm), (_sp))) { \ } else #define for_each_gfn_valid_sp_with_gptes(_kvm, _sp, _gfn) \ - for_each_valid_sp(_kvm, _sp, \ - &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)]) \ + for_each_valid_sp(_kvm, _sp, kvm_get_mmu_page_hash(_kvm, _gfn)) \ if ((_sp)->gfn != (_gfn) || !sp_has_gptes(_sp)) {} else static bool kvm_sync_page_check(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) @@ -2358,6 +2379,12 @@ static struct kvm_mmu_page *__kvm_mmu_get_shadow_page(struct kvm *kvm, struct kvm_mmu_page *sp; bool created = false; + /* + * No need for memory barriers, unlike in kvm_get_mmu_page_hash(), as + * mmu_page_hash must be set prior to creating the first shadow root, + * i.e. reaching this point is fully serialized by slots_arch_lock. + */ + BUG_ON(!kvm->arch.mmu_page_hash); sp_list = &kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]; sp = kvm_mmu_find_shadow_page(kvm, vcpu, gfn, sp_list, role); @@ -3882,6 +3909,28 @@ out_unlock: return r; } +static int kvm_mmu_alloc_page_hash(struct kvm *kvm) +{ + struct hlist_head *h; + + if (kvm->arch.mmu_page_hash) + return 0; + + h = kvcalloc(KVM_NUM_MMU_PAGES, sizeof(*h), GFP_KERNEL_ACCOUNT); + if (!h) + return -ENOMEM; + + /* + * Ensure the hash table pointer is set only after all stores to zero + * the memory are retired. Pairs with the smp_load_acquire() in + * kvm_get_mmu_page_hash(). Note, mmu_lock must be held for write to + * add (or remove) shadow pages, and so readers are guaranteed to see + * an empty list for their current mmu_lock critical section. + */ + smp_store_release(&kvm->arch.mmu_page_hash, h); + return 0; +} + static int mmu_first_shadow_root_alloc(struct kvm *kvm) { struct kvm_memslots *slots; @@ -3901,9 +3950,13 @@ static int mmu_first_shadow_root_alloc(struct kvm *kvm) if (kvm_shadow_root_allocated(kvm)) goto out_unlock; + r = kvm_mmu_alloc_page_hash(kvm); + if (r) + goto out_unlock; + /* - * Check if anything actually needs to be allocated, e.g. all metadata - * will be allocated upfront if TDP is disabled. + * Check if memslot metadata actually needs to be allocated, e.g. all + * metadata will be allocated upfront if TDP is disabled. */ if (kvm_memslots_have_rmaps(kvm) && kvm_page_track_write_tracking_enabled(kvm)) @@ -4896,12 +4949,16 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, { u64 error_code = PFERR_GUEST_FINAL_MASK; u8 level = PG_LEVEL_4K; + u64 direct_bits; u64 end; int r; if (!vcpu->kvm->arch.pre_fault_allowed) return -EOPNOTSUPP; + if (kvm_is_gfn_alias(vcpu->kvm, gpa_to_gfn(range->gpa))) + return -EINVAL; + /* * reload is efficient when called repeatedly, so we can do it on * every iteration. @@ -4910,15 +4967,18 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, if (r) return r; + direct_bits = 0; if (kvm_arch_has_private_mem(vcpu->kvm) && kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(range->gpa))) error_code |= PFERR_PRIVATE_ACCESS; + else + direct_bits = gfn_to_gpa(kvm_gfn_direct_bits(vcpu->kvm)); /* * Shadow paging uses GVA for kvm page fault, so restrict to * two-dimensional paging. */ - r = kvm_tdp_map_page(vcpu, range->gpa, error_code, &level); + r = kvm_tdp_map_page(vcpu, range->gpa | direct_bits, error_code, &level); if (r < 0) return r; @@ -6675,15 +6735,22 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm) kvm_tdp_mmu_zap_invalidated_roots(kvm, true); } -void kvm_mmu_init_vm(struct kvm *kvm) +int kvm_mmu_init_vm(struct kvm *kvm) { + int r; + kvm->arch.shadow_mmio_value = shadow_mmio_value; INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages); spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); - if (tdp_mmu_enabled) + if (tdp_mmu_enabled) { kvm_mmu_init_tdp_mmu(kvm); + } else { + r = kvm_mmu_alloc_page_hash(kvm); + if (r) + return r; + } kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache; kvm->arch.split_page_header_cache.gfp_zero = __GFP_ZERO; @@ -6692,6 +6759,7 @@ void kvm_mmu_init_vm(struct kvm *kvm) kvm->arch.split_desc_cache.kmem_cache = pte_list_desc_cache; kvm->arch.split_desc_cache.gfp_zero = __GFP_ZERO; + return 0; } static void mmu_free_vm_memory_caches(struct kvm *kvm) @@ -6703,6 +6771,8 @@ static void mmu_free_vm_memory_caches(struct kvm *kvm) void kvm_mmu_uninit_vm(struct kvm *kvm) { + kvfree(kvm->arch.mmu_page_hash); + if (tdp_mmu_enabled) kvm_mmu_uninit_tdp_mmu(kvm); diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index db8f33e4de62..65f3c89d7c5d 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -103,6 +103,9 @@ struct kvm_mmu_page { int root_count; refcount_t tdp_mmu_root_count; }; + + bool has_mapped_host_mmio; + union { /* These two members aren't used for TDP MMU */ struct { diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 68e323568e95..ed762bb4b007 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -804,9 +804,12 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault if (r != RET_PF_CONTINUE) return r; +#if PTTYPE != PTTYPE_EPT /* - * Do not change pte_access if the pfn is a mmio page, otherwise - * we will cache the incorrect access into mmio spte. + * Treat the guest PTE protections as writable, supervisor-only if this + * is a supervisor write fault and CR0.WP=0 (supervisor accesses ignore + * PTE.W if CR0.WP=0). Don't change the access type for emulated MMIO, + * otherwise KVM will cache incorrect access information in the SPTE. */ if (fault->write && !(walker.pte_access & ACC_WRITE_MASK) && !is_cr0_wp(vcpu->arch.mmu) && !fault->user && fault->slot) { @@ -822,6 +825,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault if (is_cr4_smep(vcpu->arch.mmu)) walker.pte_access &= ~ACC_EXEC_MASK; } +#endif r = RET_PF_RETRY; write_lock(&vcpu->kvm->mmu_lock); diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index cfce03d8f123..df31039b5d63 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -104,7 +104,7 @@ u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access) return spte; } -static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) +static bool __kvm_is_mmio_pfn(kvm_pfn_t pfn) { if (pfn_valid(pfn)) return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)) && @@ -125,6 +125,35 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) E820_TYPE_RAM); } +static bool kvm_is_mmio_pfn(kvm_pfn_t pfn, int *is_host_mmio) +{ + /* + * Determining if a PFN is host MMIO is relative expensive. Cache the + * result locally (in the sole caller) to avoid doing the full query + * multiple times when creating a single SPTE. + */ + if (*is_host_mmio < 0) + *is_host_mmio = __kvm_is_mmio_pfn(pfn); + + return *is_host_mmio; +} + +static void kvm_track_host_mmio_mapping(struct kvm_vcpu *vcpu) +{ + struct kvm_mmu_page *root = root_to_sp(vcpu->arch.mmu->root.hpa); + + if (root) + WRITE_ONCE(root->has_mapped_host_mmio, true); + else + WRITE_ONCE(vcpu->kvm->arch.has_mapped_host_mmio, true); + + /* + * Force vCPUs to exit and flush CPU buffers if the vCPU is using the + * affected root(s). + */ + kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_OUTSIDE_GUEST_MODE); +} + /* * Returns true if the SPTE needs to be updated atomically due to having bits * that may be changed without holding mmu_lock, and for which KVM must not @@ -162,6 +191,7 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, { int level = sp->role.level; u64 spte = SPTE_MMU_PRESENT_MASK; + int is_host_mmio = -1; bool wrprot = false; /* @@ -209,13 +239,15 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, if (level > PG_LEVEL_4K) spte |= PT_PAGE_SIZE_MASK; - spte |= kvm_x86_call(get_mt_mask)(vcpu, gfn, kvm_is_mmio_pfn(pfn)); + if (kvm_x86_ops.get_mt_mask) + spte |= kvm_x86_call(get_mt_mask)(vcpu, gfn, + kvm_is_mmio_pfn(pfn, &is_host_mmio)); if (host_writable) spte |= shadow_host_writable_mask; else pte_access &= ~ACC_WRITE_MASK; - if (shadow_me_value && !kvm_is_mmio_pfn(pfn)) + if (shadow_me_value && !kvm_is_mmio_pfn(pfn, &is_host_mmio)) spte |= shadow_me_value; spte |= (u64)pfn << PAGE_SHIFT; @@ -260,6 +292,11 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, mark_page_dirty_in_slot(vcpu->kvm, slot, gfn); } + if (static_branch_unlikely(&cpu_buf_vm_clear) && + !kvm_vcpu_can_access_host_mmio(vcpu) && + kvm_is_mmio_pfn(pfn, &is_host_mmio)) + kvm_track_host_mmio_mapping(vcpu); + *new_spte = spte; return wrprot; } diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 1e94f081bdaf..3133f066927e 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -280,6 +280,16 @@ static inline bool is_mirror_sptep(tdp_ptep_t sptep) return is_mirror_sp(sptep_to_sp(rcu_dereference(sptep))); } +static inline bool kvm_vcpu_can_access_host_mmio(struct kvm_vcpu *vcpu) +{ + struct kvm_mmu_page *root = root_to_sp(vcpu->arch.mmu->root.hpa); + + if (root) + return READ_ONCE(root->has_mapped_host_mmio); + + return READ_ONCE(vcpu->kvm->arch.has_mapped_host_mmio); +} + static inline bool is_mmio_spte(struct kvm *kvm, u64 spte) { return (spte & shadow_mmio_mask) == kvm->arch.shadow_mmio_value && |