diff options
| -rw-r--r-- | arch/x86/include/asm/kvm_host.h | 39 | ||||
| -rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 59 | ||||
| -rw-r--r-- | arch/x86/kvm/mmu/mmu_internal.h | 6 | ||||
| -rw-r--r-- | arch/x86/kvm/mmu/tdp_mmu.c | 4 |
4 files changed, 72 insertions, 36 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f19a76d3ca0e..c038d7cd187d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1348,6 +1348,30 @@ enum kvm_apicv_inhibit { __APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED), \ __APICV_INHIBIT_REASON(PHYSICAL_ID_TOO_BIG) +struct kvm_possible_nx_huge_pages { + /* + * A list of kvm_mmu_page structs that, if zapped, could possibly be + * replaced by an NX huge page. A shadow page is on this list if its + * existence disallows an NX huge page (nx_huge_page_disallowed is set) + * and there are no other conditions that prevent a huge page, e.g. + * the backing host page is huge, dirtly logging is not enabled for its + * memslot, etc... Note, zapping shadow pages on this list doesn't + * guarantee an NX huge page will be created in its stead, e.g. if the + * guest attempts to execute from the region then KVM obviously can't + * create an NX huge page (without hanging the guest). + */ + struct list_head pages; + u64 nr_pages; +}; + +enum kvm_mmu_type { + KVM_SHADOW_MMU, +#ifdef CONFIG_X86_64 + KVM_TDP_MMU, +#endif + KVM_NR_MMU_TYPES, +}; + struct kvm_arch { unsigned long n_used_mmu_pages; unsigned long n_requested_mmu_pages; @@ -1360,18 +1384,7 @@ struct kvm_arch { bool pre_fault_allowed; struct hlist_head *mmu_page_hash; struct list_head active_mmu_pages; - /* - * A list of kvm_mmu_page structs that, if zapped, could possibly be - * replaced by an NX huge page. A shadow page is on this list if its - * existence disallows an NX huge page (nx_huge_page_disallowed is set) - * and there are no other conditions that prevent a huge page, e.g. - * the backing host page is huge, dirtly logging is not enabled for its - * memslot, etc... Note, zapping shadow pages on this list doesn't - * guarantee an NX huge page will be created in its stead, e.g. if the - * guest attempts to execute from the region then KVM obviously can't - * create an NX huge page (without hanging the guest). - */ - struct list_head possible_nx_huge_pages; + struct kvm_possible_nx_huge_pages possible_nx_huge_pages[KVM_NR_MMU_TYPES]; #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING struct kvm_page_track_notifier_head track_notifier_head; #endif @@ -1526,7 +1539,7 @@ struct kvm_arch { * is held in read mode: * - tdp_mmu_roots (above) * - the link field of kvm_mmu_page structs used by the TDP MMU - * - possible_nx_huge_pages; + * - possible_nx_huge_pages[KVM_TDP_MMU]; * - the possible_nx_huge_page_link field of kvm_mmu_page structs used * by the TDP MMU * Because the lock is only taken within the MMU lock, strictly diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6e838cb6c9e1..e0d6579db531 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -776,7 +776,8 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_flush_remote_tlbs_gfn(kvm, gfn, PG_LEVEL_4K); } -void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) +void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, + enum kvm_mmu_type mmu_type) { /* * If it's possible to replace the shadow page with an NX huge page, @@ -790,8 +791,9 @@ void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) return; ++kvm->stat.nx_lpage_splits; + ++kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages; list_add_tail(&sp->possible_nx_huge_page_link, - &kvm->arch.possible_nx_huge_pages); + &kvm->arch.possible_nx_huge_pages[mmu_type].pages); } static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, @@ -800,7 +802,7 @@ static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, sp->nx_huge_page_disallowed = true; if (nx_huge_page_possible) - track_possible_nx_huge_page(kvm, sp); + track_possible_nx_huge_page(kvm, sp, KVM_SHADOW_MMU); } static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) @@ -819,12 +821,14 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_mmu_gfn_allow_lpage(slot, gfn); } -void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) +void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, + enum kvm_mmu_type mmu_type) { if (list_empty(&sp->possible_nx_huge_page_link)) return; --kvm->stat.nx_lpage_splits; + --kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages; list_del_init(&sp->possible_nx_huge_page_link); } @@ -832,7 +836,7 @@ static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) { sp->nx_huge_page_disallowed = false; - untrack_possible_nx_huge_page(kvm, sp); + untrack_possible_nx_huge_page(kvm, sp, KVM_SHADOW_MMU); } static struct kvm_memory_slot *gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, @@ -6737,11 +6741,12 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm) int kvm_mmu_init_vm(struct kvm *kvm) { - int r; + int r, i; kvm->arch.shadow_mmio_value = shadow_mmio_value; INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); - INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages); + for (i = 0; i < KVM_NR_MMU_TYPES; ++i) + INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages[i].pages); spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); if (tdp_mmu_enabled) { @@ -7582,16 +7587,32 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel return err; } -static void kvm_recover_nx_huge_pages(struct kvm *kvm) +static unsigned long nx_huge_pages_to_zap(struct kvm *kvm, + enum kvm_mmu_type mmu_type) { - unsigned long nx_lpage_splits = kvm->stat.nx_lpage_splits; + unsigned long pages = READ_ONCE(kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages); + unsigned int ratio = READ_ONCE(nx_huge_pages_recovery_ratio); + + return ratio ? DIV_ROUND_UP(pages, ratio) : 0; +} + +static void kvm_recover_nx_huge_pages(struct kvm *kvm, + enum kvm_mmu_type mmu_type) +{ +#ifdef CONFIG_X86_64 + const bool is_tdp_mmu = mmu_type == KVM_TDP_MMU; +#else + const bool is_tdp_mmu = false; +#endif + unsigned long to_zap = nx_huge_pages_to_zap(kvm, mmu_type); + struct list_head *nx_huge_pages; struct kvm_memory_slot *slot; - int rcu_idx; struct kvm_mmu_page *sp; - unsigned int ratio; LIST_HEAD(invalid_list); bool flush = false; - ulong to_zap; + int rcu_idx; + + nx_huge_pages = &kvm->arch.possible_nx_huge_pages[mmu_type].pages; rcu_idx = srcu_read_lock(&kvm->srcu); write_lock(&kvm->mmu_lock); @@ -7603,10 +7624,8 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm) */ rcu_read_lock(); - ratio = READ_ONCE(nx_huge_pages_recovery_ratio); - to_zap = ratio ? DIV_ROUND_UP(nx_lpage_splits, ratio) : 0; for ( ; to_zap; --to_zap) { - if (list_empty(&kvm->arch.possible_nx_huge_pages)) + if (list_empty(nx_huge_pages)) break; /* @@ -7616,7 +7635,7 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm) * the total number of shadow pages. And because the TDP MMU * doesn't use active_mmu_pages. */ - sp = list_first_entry(&kvm->arch.possible_nx_huge_pages, + sp = list_first_entry(nx_huge_pages, struct kvm_mmu_page, possible_nx_huge_page_link); WARN_ON_ONCE(!sp->nx_huge_page_disallowed); @@ -7653,7 +7672,7 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm) if (slot && kvm_slot_dirty_track_enabled(slot)) unaccount_nx_huge_page(kvm, sp); - else if (is_tdp_mmu_page(sp)) + else if (is_tdp_mmu) flush |= kvm_tdp_mmu_zap_sp(kvm, sp); else kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); @@ -7684,9 +7703,10 @@ static void kvm_nx_huge_page_recovery_worker_kill(void *data) static bool kvm_nx_huge_page_recovery_worker(void *data) { struct kvm *kvm = data; + long remaining_time; bool enabled; uint period; - long remaining_time; + int i; enabled = calc_nx_huge_pages_recovery_period(&period); if (!enabled) @@ -7701,7 +7721,8 @@ static bool kvm_nx_huge_page_recovery_worker(void *data) } __set_current_state(TASK_RUNNING); - kvm_recover_nx_huge_pages(kvm); + for (i = 0; i < KVM_NR_MMU_TYPES; ++i) + kvm_recover_nx_huge_pages(kvm, i); kvm->arch.nx_huge_page_last = get_jiffies_64(); return true; } diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 65f3c89d7c5d..0b772c758f37 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -416,7 +416,9 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm, void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault); void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level); -void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); -void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); +void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, + enum kvm_mmu_type mmu_type); +void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, + enum kvm_mmu_type mmu_type); #endif /* __KVM_X86_MMU_INTERNAL_H */ diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 7f3d7229b2c1..48b070f9f4e1 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -355,7 +355,7 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp) spin_lock(&kvm->arch.tdp_mmu_pages_lock); sp->nx_huge_page_disallowed = false; - untrack_possible_nx_huge_page(kvm, sp); + untrack_possible_nx_huge_page(kvm, sp, KVM_TDP_MMU); spin_unlock(&kvm->arch.tdp_mmu_pages_lock); } @@ -1303,7 +1303,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) fault->req_level >= iter.level) { spin_lock(&kvm->arch.tdp_mmu_pages_lock); if (sp->nx_huge_page_disallowed) - track_possible_nx_huge_page(kvm, sp); + track_possible_nx_huge_page(kvm, sp, KVM_TDP_MMU); spin_unlock(&kvm->arch.tdp_mmu_pages_lock); } } |
