diff options
Diffstat (limited to 'arch/x86/kvm/mmu/mmu.c')
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 84 |
1 files changed, 77 insertions, 7 deletions
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index cbc84c6abc2e..6e838cb6c9e1 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1983,14 +1983,35 @@ static bool sp_has_gptes(struct kvm_mmu_page *sp) return true; } +static __ro_after_init HLIST_HEAD(empty_page_hash); + +static struct hlist_head *kvm_get_mmu_page_hash(struct kvm *kvm, gfn_t gfn) +{ + /* + * Ensure the load of the hash table pointer itself is ordered before + * loads to walk the table. The pointer is set at runtime outside of + * mmu_lock when the TDP MMU is enabled, i.e. when the hash table of + * shadow pages becomes necessary only when KVM needs to shadow L1's + * TDP for an L2 guest. Pairs with the smp_store_release() in + * kvm_mmu_alloc_page_hash(). + */ + struct hlist_head *page_hash = smp_load_acquire(&kvm->arch.mmu_page_hash); + + lockdep_assert_held(&kvm->mmu_lock); + + if (!page_hash) + return &empty_page_hash; + + return &page_hash[kvm_page_table_hashfn(gfn)]; +} + #define for_each_valid_sp(_kvm, _sp, _list) \ hlist_for_each_entry(_sp, _list, hash_link) \ if (is_obsolete_sp((_kvm), (_sp))) { \ } else #define for_each_gfn_valid_sp_with_gptes(_kvm, _sp, _gfn) \ - for_each_valid_sp(_kvm, _sp, \ - &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)]) \ + for_each_valid_sp(_kvm, _sp, kvm_get_mmu_page_hash(_kvm, _gfn)) \ if ((_sp)->gfn != (_gfn) || !sp_has_gptes(_sp)) {} else static bool kvm_sync_page_check(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) @@ -2358,6 +2379,12 @@ static struct kvm_mmu_page *__kvm_mmu_get_shadow_page(struct kvm *kvm, struct kvm_mmu_page *sp; bool created = false; + /* + * No need for memory barriers, unlike in kvm_get_mmu_page_hash(), as + * mmu_page_hash must be set prior to creating the first shadow root, + * i.e. reaching this point is fully serialized by slots_arch_lock. + */ + BUG_ON(!kvm->arch.mmu_page_hash); sp_list = &kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]; sp = kvm_mmu_find_shadow_page(kvm, vcpu, gfn, sp_list, role); @@ -3882,6 +3909,28 @@ out_unlock: return r; } +static int kvm_mmu_alloc_page_hash(struct kvm *kvm) +{ + struct hlist_head *h; + + if (kvm->arch.mmu_page_hash) + return 0; + + h = kvcalloc(KVM_NUM_MMU_PAGES, sizeof(*h), GFP_KERNEL_ACCOUNT); + if (!h) + return -ENOMEM; + + /* + * Ensure the hash table pointer is set only after all stores to zero + * the memory are retired. Pairs with the smp_load_acquire() in + * kvm_get_mmu_page_hash(). Note, mmu_lock must be held for write to + * add (or remove) shadow pages, and so readers are guaranteed to see + * an empty list for their current mmu_lock critical section. + */ + smp_store_release(&kvm->arch.mmu_page_hash, h); + return 0; +} + static int mmu_first_shadow_root_alloc(struct kvm *kvm) { struct kvm_memslots *slots; @@ -3901,9 +3950,13 @@ static int mmu_first_shadow_root_alloc(struct kvm *kvm) if (kvm_shadow_root_allocated(kvm)) goto out_unlock; + r = kvm_mmu_alloc_page_hash(kvm); + if (r) + goto out_unlock; + /* - * Check if anything actually needs to be allocated, e.g. all metadata - * will be allocated upfront if TDP is disabled. + * Check if memslot metadata actually needs to be allocated, e.g. all + * metadata will be allocated upfront if TDP is disabled. */ if (kvm_memslots_have_rmaps(kvm) && kvm_page_track_write_tracking_enabled(kvm)) @@ -4896,12 +4949,16 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, { u64 error_code = PFERR_GUEST_FINAL_MASK; u8 level = PG_LEVEL_4K; + u64 direct_bits; u64 end; int r; if (!vcpu->kvm->arch.pre_fault_allowed) return -EOPNOTSUPP; + if (kvm_is_gfn_alias(vcpu->kvm, gpa_to_gfn(range->gpa))) + return -EINVAL; + /* * reload is efficient when called repeatedly, so we can do it on * every iteration. @@ -4910,15 +4967,18 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, if (r) return r; + direct_bits = 0; if (kvm_arch_has_private_mem(vcpu->kvm) && kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(range->gpa))) error_code |= PFERR_PRIVATE_ACCESS; + else + direct_bits = gfn_to_gpa(kvm_gfn_direct_bits(vcpu->kvm)); /* * Shadow paging uses GVA for kvm page fault, so restrict to * two-dimensional paging. */ - r = kvm_tdp_map_page(vcpu, range->gpa, error_code, &level); + r = kvm_tdp_map_page(vcpu, range->gpa | direct_bits, error_code, &level); if (r < 0) return r; @@ -6675,15 +6735,22 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm) kvm_tdp_mmu_zap_invalidated_roots(kvm, true); } -void kvm_mmu_init_vm(struct kvm *kvm) +int kvm_mmu_init_vm(struct kvm *kvm) { + int r; + kvm->arch.shadow_mmio_value = shadow_mmio_value; INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages); spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); - if (tdp_mmu_enabled) + if (tdp_mmu_enabled) { kvm_mmu_init_tdp_mmu(kvm); + } else { + r = kvm_mmu_alloc_page_hash(kvm); + if (r) + return r; + } kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache; kvm->arch.split_page_header_cache.gfp_zero = __GFP_ZERO; @@ -6692,6 +6759,7 @@ void kvm_mmu_init_vm(struct kvm *kvm) kvm->arch.split_desc_cache.kmem_cache = pte_list_desc_cache; kvm->arch.split_desc_cache.gfp_zero = __GFP_ZERO; + return 0; } static void mmu_free_vm_memory_caches(struct kvm *kvm) @@ -6703,6 +6771,8 @@ static void mmu_free_vm_memory_caches(struct kvm *kvm) void kvm_mmu_uninit_vm(struct kvm *kvm) { + kvfree(kvm->arch.mmu_page_hash); + if (tdp_mmu_enabled) kvm_mmu_uninit_tdp_mmu(kvm); |