diff options
Diffstat (limited to 'arch/x86/kvm/mmu/page_track.c')
| -rw-r--r-- | arch/x86/kvm/mmu/page_track.c | 313 |
1 files changed, 212 insertions, 101 deletions
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index 91a9f7e0fd91..1b17b12393a8 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -10,88 +10,95 @@ * Author: * Xiao Guangrong <guangrong.xiao@linux.intel.com> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/lockdep.h> #include <linux/kvm_host.h> #include <linux/rculist.h> -#include <asm/kvm_page_track.h> - +#include "mmu.h" #include "mmu_internal.h" +#include "page_track.h" -void kvm_page_track_free_memslot(struct kvm_memory_slot *slot) +static bool kvm_external_write_tracking_enabled(struct kvm *kvm) { - int i; +#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING + /* + * Read external_write_tracking_enabled before related pointers. Pairs + * with the smp_store_release in kvm_page_track_write_tracking_enable(). + */ + return smp_load_acquire(&kvm->arch.external_write_tracking_enabled); +#else + return false; +#endif +} - for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { - kvfree(slot->arch.gfn_track[i]); - slot->arch.gfn_track[i] = NULL; - } +bool kvm_page_track_write_tracking_enabled(struct kvm *kvm) +{ + return kvm_external_write_tracking_enabled(kvm) || + kvm_shadow_root_allocated(kvm) || !tdp_enabled; } -int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, - unsigned long npages) +void kvm_page_track_free_memslot(struct kvm_memory_slot *slot) { - int i; - - for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { - slot->arch.gfn_track[i] = - kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]), - GFP_KERNEL_ACCOUNT); - if (!slot->arch.gfn_track[i]) - goto track_free; - } + vfree(slot->arch.gfn_write_track); + slot->arch.gfn_write_track = NULL; +} - return 0; +static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot, + unsigned long npages) +{ + const size_t size = sizeof(*slot->arch.gfn_write_track); -track_free: - kvm_page_track_free_memslot(slot); - return -ENOMEM; + if (!slot->arch.gfn_write_track) + slot->arch.gfn_write_track = __vcalloc(npages, size, + GFP_KERNEL_ACCOUNT); + + return slot->arch.gfn_write_track ? 0 : -ENOMEM; } -static inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode) +int kvm_page_track_create_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot, + unsigned long npages) { - if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX) - return false; + if (!kvm_page_track_write_tracking_enabled(kvm)) + return 0; - return true; + return __kvm_page_track_write_tracking_alloc(slot, npages); } -static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn, - enum kvm_page_track_mode mode, short count) +int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot) +{ + return __kvm_page_track_write_tracking_alloc(slot, slot->npages); +} + +static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn, + short count) { int index, val; index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); - val = slot->arch.gfn_track[mode][index]; + val = slot->arch.gfn_write_track[index]; - if (WARN_ON(val + count < 0 || val + count > USHRT_MAX)) + if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX)) return; - slot->arch.gfn_track[mode][index] += count; + slot->arch.gfn_write_track[index] += count; } -/* - * add guest page to the tracking pool so that corresponding access on that - * page will be intercepted. - * - * It should be called under the protection both of mmu-lock and kvm->srcu - * or kvm->slots_lock. - * - * @kvm: the guest instance we are interested in. - * @slot: the @gfn belongs to. - * @gfn: the guest page. - * @mode: tracking mode, currently only write track is supported. - */ -void kvm_slot_page_track_add_page(struct kvm *kvm, - struct kvm_memory_slot *slot, gfn_t gfn, - enum kvm_page_track_mode mode) +void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, + gfn_t gfn) { + lockdep_assert_held_write(&kvm->mmu_lock); + + lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) || + srcu_read_lock_held(&kvm->srcu)); - if (WARN_ON(!page_track_mode_is_valid(mode))) + if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm)) return; - update_gfn_track(slot, gfn, mode, 1); + update_gfn_write_track(slot, gfn, 1); /* * new track stops large page mapping for the @@ -99,33 +106,22 @@ void kvm_slot_page_track_add_page(struct kvm *kvm, */ kvm_mmu_gfn_disallow_lpage(slot, gfn); - if (mode == KVM_PAGE_TRACK_WRITE) - if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K)) - kvm_flush_remote_tlbs(kvm); + if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K)) + kvm_flush_remote_tlbs(kvm); } -EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page); -/* - * remove the guest page from the tracking pool which stops the interception - * of corresponding access on that page. It is the opposed operation of - * kvm_slot_page_track_add_page(). - * - * It should be called under the protection both of mmu-lock and kvm->srcu - * or kvm->slots_lock. - * - * @kvm: the guest instance we are interested in. - * @slot: the @gfn belongs to. - * @gfn: the guest page. - * @mode: tracking mode, currently only write track is supported. - */ -void kvm_slot_page_track_remove_page(struct kvm *kvm, - struct kvm_memory_slot *slot, gfn_t gfn, - enum kvm_page_track_mode mode) +void __kvm_write_track_remove_gfn(struct kvm *kvm, + struct kvm_memory_slot *slot, gfn_t gfn) { - if (WARN_ON(!page_track_mode_is_valid(mode))) + lockdep_assert_held_write(&kvm->mmu_lock); + + lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) || + srcu_read_lock_held(&kvm->srcu)); + + if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm)) return; - update_gfn_track(slot, gfn, mode, -1); + update_gfn_write_track(slot, gfn, -1); /* * allow large page mapping for the tracked page @@ -133,28 +129,26 @@ void kvm_slot_page_track_remove_page(struct kvm *kvm, */ kvm_mmu_gfn_allow_lpage(slot, gfn); } -EXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page); /* * check if the corresponding access on the specified guest page is tracked. */ -bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn, - enum kvm_page_track_mode mode) +bool kvm_gfn_is_write_tracked(struct kvm *kvm, + const struct kvm_memory_slot *slot, gfn_t gfn) { - struct kvm_memory_slot *slot; int index; - if (WARN_ON(!page_track_mode_is_valid(mode))) + if (!slot) return false; - slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - if (!slot) + if (!kvm_page_track_write_tracking_enabled(kvm)) return false; index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); - return !!READ_ONCE(slot->arch.gfn_track[mode][index]); + return !!READ_ONCE(slot->arch.gfn_write_track[index]); } +#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING void kvm_page_track_cleanup(struct kvm *kvm) { struct kvm_page_track_notifier_head *head; @@ -163,30 +157,89 @@ void kvm_page_track_cleanup(struct kvm *kvm) cleanup_srcu_struct(&head->track_srcu); } -void kvm_page_track_init(struct kvm *kvm) +int kvm_page_track_init(struct kvm *kvm) { struct kvm_page_track_notifier_head *head; head = &kvm->arch.track_notifier_head; - init_srcu_struct(&head->track_srcu); INIT_HLIST_HEAD(&head->track_notifier_list); + return init_srcu_struct(&head->track_srcu); +} + +static int kvm_enable_external_write_tracking(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *slot; + int r = 0, i, bkt; + + if (kvm->arch.vm_type == KVM_X86_TDX_VM) + return -EOPNOTSUPP; + + mutex_lock(&kvm->slots_arch_lock); + + /* + * Check for *any* write tracking user (not just external users) under + * lock. This avoids unnecessary work, e.g. if KVM itself is using + * write tracking, or if two external users raced when registering. + */ + if (kvm_page_track_write_tracking_enabled(kvm)) + goto out_success; + + for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) { + slots = __kvm_memslots(kvm, i); + kvm_for_each_memslot(slot, bkt, slots) { + /* + * Intentionally do NOT free allocations on failure to + * avoid having to track which allocations were made + * now versus when the memslot was created. The + * metadata is guaranteed to be freed when the slot is + * freed, and will be kept/used if userspace retries + * the failed ioctl() instead of killing the VM. + */ + r = kvm_page_track_write_tracking_alloc(slot); + if (r) + goto out_unlock; + } + } + +out_success: + /* + * Ensure that external_write_tracking_enabled becomes true strictly + * after all the related pointers are set. + */ + smp_store_release(&kvm->arch.external_write_tracking_enabled, true); +out_unlock: + mutex_unlock(&kvm->slots_arch_lock); + return r; } /* * register the notifier so that event interception for the tracked guest * pages can be received. */ -void -kvm_page_track_register_notifier(struct kvm *kvm, - struct kvm_page_track_notifier_node *n) +int kvm_page_track_register_notifier(struct kvm *kvm, + struct kvm_page_track_notifier_node *n) { struct kvm_page_track_notifier_head *head; + int r; + + if (!kvm || kvm->mm != current->mm) + return -ESRCH; + + if (!kvm_external_write_tracking_enabled(kvm)) { + r = kvm_enable_external_write_tracking(kvm); + if (r) + return r; + } + + kvm_get_kvm(kvm); head = &kvm->arch.track_notifier_head; write_lock(&kvm->mmu_lock); hlist_add_head_rcu(&n->node, &head->track_notifier_list); write_unlock(&kvm->mmu_lock); + return 0; } EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier); @@ -194,9 +247,8 @@ EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier); * stop receiving the event interception. It is the opposed operation of * kvm_page_track_register_notifier(). */ -void -kvm_page_track_unregister_notifier(struct kvm *kvm, - struct kvm_page_track_notifier_node *n) +void kvm_page_track_unregister_notifier(struct kvm *kvm, + struct kvm_page_track_notifier_node *n) { struct kvm_page_track_notifier_head *head; @@ -206,6 +258,8 @@ kvm_page_track_unregister_notifier(struct kvm *kvm, hlist_del_rcu(&n->node); write_unlock(&kvm->mmu_lock); synchronize_srcu(&head->track_srcu); + + kvm_put_kvm(kvm); } EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier); @@ -216,34 +270,30 @@ EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier); * The node should figure out if the written page is the one that node is * interested in by itself. */ -void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, - int bytes) +void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes) { struct kvm_page_track_notifier_head *head; struct kvm_page_track_notifier_node *n; int idx; - head = &vcpu->kvm->arch.track_notifier_head; + head = &kvm->arch.track_notifier_head; if (hlist_empty(&head->track_notifier_list)) return; idx = srcu_read_lock(&head->track_srcu); hlist_for_each_entry_srcu(n, &head->track_notifier_list, node, - srcu_read_lock_held(&head->track_srcu)) + srcu_read_lock_held(&head->track_srcu)) if (n->track_write) - n->track_write(vcpu, gpa, new, bytes, n); + n->track_write(gpa, new, bytes, n); srcu_read_unlock(&head->track_srcu, idx); } /* - * Notify the node that memory slot is being removed or moved so that it can - * drop write-protection for the pages in the memory slot. - * - * The node should figure out it has any write-protected pages in this slot - * by itself. + * Notify external page track nodes that a memory region is being removed from + * the VM, e.g. so that users can free any associated metadata. */ -void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot) +void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot) { struct kvm_page_track_notifier_head *head; struct kvm_page_track_notifier_node *n; @@ -256,8 +306,69 @@ void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot) idx = srcu_read_lock(&head->track_srcu); hlist_for_each_entry_srcu(n, &head->track_notifier_list, node, - srcu_read_lock_held(&head->track_srcu)) - if (n->track_flush_slot) - n->track_flush_slot(kvm, slot, n); + srcu_read_lock_held(&head->track_srcu)) + if (n->track_remove_region) + n->track_remove_region(slot->base_gfn, slot->npages, n); srcu_read_unlock(&head->track_srcu, idx); } + +/* + * add guest page to the tracking pool so that corresponding access on that + * page will be intercepted. + * + * @kvm: the guest instance we are interested in. + * @gfn: the guest page. + */ +int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn) +{ + struct kvm_memory_slot *slot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + + slot = gfn_to_memslot(kvm, gfn); + if (!slot) { + srcu_read_unlock(&kvm->srcu, idx); + return -EINVAL; + } + + write_lock(&kvm->mmu_lock); + __kvm_write_track_add_gfn(kvm, slot, gfn); + write_unlock(&kvm->mmu_lock); + + srcu_read_unlock(&kvm->srcu, idx); + + return 0; +} +EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn); + +/* + * remove the guest page from the tracking pool which stops the interception + * of corresponding access on that page. + * + * @kvm: the guest instance we are interested in. + * @gfn: the guest page. + */ +int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn) +{ + struct kvm_memory_slot *slot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + + slot = gfn_to_memslot(kvm, gfn); + if (!slot) { + srcu_read_unlock(&kvm->srcu, idx); + return -EINVAL; + } + + write_lock(&kvm->mmu_lock); + __kvm_write_track_remove_gfn(kvm, slot, gfn); + write_unlock(&kvm->mmu_lock); + + srcu_read_unlock(&kvm->srcu, idx); + + return 0; +} +EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn); +#endif |
