diff options
Diffstat (limited to 'include/linux/kvm_host.h')
| -rw-r--r-- | include/linux/kvm_host.h | 705 |
1 files changed, 511 insertions, 194 deletions
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9d3ac7720da9..d93f75b05ae2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2,7 +2,7 @@ #ifndef __KVM_HOST_H #define __KVM_HOST_H - +#include <linux/entry-virt.h> #include <linux/types.h> #include <linux/hardirq.h> #include <linux/list.h> @@ -52,9 +52,10 @@ /* * The bit 16 ~ bit 31 of kvm_userspace_memory_region::flags are internally * used in kvm, other bits are visible for userspace which are defined in - * include/linux/kvm_h. + * include/uapi/linux/kvm.h. */ -#define KVM_MEMSLOT_INVALID (1UL << 16) +#define KVM_MEMSLOT_INVALID (1UL << 16) +#define KVM_MEMSLOT_GMEM_ONLY (1UL << 17) /* * Bit 63 of the memslot generation number is an "update in-progress flag", @@ -80,8 +81,8 @@ /* Two fragments for cross MMIO pages. */ #define KVM_MAX_MMIO_FRAGMENTS 2 -#ifndef KVM_ADDRESS_SPACE_NUM -#define KVM_ADDRESS_SPACE_NUM 1 +#ifndef KVM_MAX_NR_ADDRESS_SPACES +#define KVM_MAX_NR_ADDRESS_SPACES 1 #endif /* @@ -97,6 +98,7 @@ #define KVM_PFN_ERR_HWPOISON (KVM_PFN_ERR_MASK + 1) #define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 2) #define KVM_PFN_ERR_SIGPENDING (KVM_PFN_ERR_MASK + 3) +#define KVM_PFN_ERR_NEEDS_IO (KVM_PFN_ERR_MASK + 4) /* * error pfns indicate that the gfn is in slot but faild to @@ -148,11 +150,9 @@ static inline bool kvm_is_error_hva(unsigned long addr) #endif -#define KVM_ERR_PTR_BAD_PAGE (ERR_PTR(-ENOENT)) - -static inline bool is_error_page(struct page *page) +static inline bool kvm_is_error_gpa(gpa_t gpa) { - return IS_ERR(page); + return gpa == INVALID_GPA; } #define KVM_REQUEST_MASK GENMASK(7,0) @@ -188,13 +188,10 @@ static inline bool is_error_page(struct page *page) bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, unsigned long *vcpu_bitmap); bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); -bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req, - struct kvm_vcpu *except); -bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req, - unsigned long *vcpu_bitmap); #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 +#define KVM_PIT_IRQ_SOURCE_ID 2 extern struct mutex kvm_lock; extern struct list_head vm_list; @@ -210,6 +207,7 @@ struct kvm_io_range { struct kvm_io_bus { int dev_count; int ioeventfd_count; + struct rcu_head rcu; struct kvm_io_range range[]; }; @@ -218,6 +216,7 @@ enum kvm_bus { KVM_PIO_BUS, KVM_VIRTIO_CCW_NOTIFY_BUS, KVM_FAST_MMIO_BUS, + KVM_IOCSR_BUS, KVM_NR_BUSES }; @@ -240,7 +239,6 @@ struct kvm_async_pf { struct list_head link; struct list_head queue; struct kvm_vcpu *vcpu; - struct mm_struct *mm; gpa_t cr2_or_gpa; unsigned long addr; struct kvm_arch_async_pf arch; @@ -255,18 +253,28 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif -#ifdef KVM_ARCH_WANT_MMU_NOTIFIER +#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER +union kvm_mmu_notifier_arg { + unsigned long attributes; +}; + +enum kvm_gfn_range_filter { + KVM_FILTER_SHARED = BIT(0), + KVM_FILTER_PRIVATE = BIT(1), +}; + struct kvm_gfn_range { struct kvm_memory_slot *slot; gfn_t start; gfn_t end; - pte_t pte; + union kvm_mmu_notifier_arg arg; + enum kvm_gfn_range_filter attr_filter; bool may_block; + bool lockless; }; bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range); #endif enum { @@ -276,21 +284,19 @@ enum { READING_SHADOW_PAGE_TABLES, }; -#define KVM_UNMAPPED_PAGE ((void *) 0x500 + POISON_POINTER_DELTA) - struct kvm_host_map { /* * Only valid if the 'pfn' is managed by the host kernel (i.e. There is * a 'struct page' for it. When using mem= kernel parameter some memory * can be used as guest memory but they are not managed by host * kernel). - * If 'pfn' is not managed by the host kernel, this field is - * initialized to KVM_UNMAPPED_PAGE. */ + struct page *pinned_page; struct page *page; void *hva; kvm_pfn_t pfn; kvm_pfn_t gfn; + bool writable; }; /* @@ -339,7 +345,8 @@ struct kvm_vcpu { #ifndef __KVM_HAVE_ARCH_WQP struct rcuwait wait; #endif - struct pid __rcu *pid; + struct pid *pid; + rwlock_t pid_lock; int sigset_active; sigset_t sigset; unsigned int halt_poll_ns; @@ -375,8 +382,10 @@ struct kvm_vcpu { bool dy_eligible; } spin_loop; #endif + bool wants_to_run; bool preempted; bool ready; + bool scheduled_out; struct kvm_vcpu_arch arch; struct kvm_vcpu_stat stat; char stats_id[KVM_STATS_NAME_SIZE]; @@ -480,7 +489,15 @@ static __always_inline void guest_state_enter_irqoff(void) */ static __always_inline void guest_context_exit_irqoff(void) { - context_tracking_guest_exit(); + /* + * Guest mode is treated as a quiescent state, see + * guest_context_enter_irqoff() for more details. + */ + if (!context_tracking_guest_exit()) { + instrumentation_begin(); + rcu_virt_note_context_switch(); + instrumentation_end(); + } } /* @@ -586,8 +603,25 @@ struct kvm_memory_slot { u32 flags; short id; u16 as_id; + +#ifdef CONFIG_KVM_GUEST_MEMFD + struct { + /* + * Writes protected by kvm->slots_lock. Acquiring a + * reference via kvm_gmem_get_file() is protected by + * either kvm->slots_lock or kvm->srcu. + */ + struct file *file; + pgoff_t pgoff; + } gmem; +#endif }; +static inline bool kvm_slot_has_gmem(const struct kvm_memory_slot *slot) +{ + return slot && (slot->flags & KVM_MEM_GUEST_MEMFD); +} + static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot) { return slot->flags & KVM_MEM_LOG_DIRTY_PAGES; @@ -662,7 +696,7 @@ struct kvm_irq_routing_table { * Array indexed by gsi. Each entry contains list of irq chips * the gsi is connected to. */ - struct hlist_head map[]; + struct hlist_head map[] __counted_by(nr_rt_entries); }; #endif @@ -675,13 +709,46 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm); #define KVM_MEM_SLOTS_NUM SHRT_MAX #define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_INTERNAL_MEM_SLOTS) -#ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE +#if KVM_MAX_NR_ADDRESS_SPACES == 1 +static inline int kvm_arch_nr_memslot_as_ids(struct kvm *kvm) +{ + return KVM_MAX_NR_ADDRESS_SPACES; +} + static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) { return 0; } #endif +#ifndef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES +static inline bool kvm_arch_has_private_mem(struct kvm *kvm) +{ + return false; +} +#endif + +#ifdef CONFIG_KVM_GUEST_MEMFD +bool kvm_arch_supports_gmem_init_shared(struct kvm *kvm); + +static inline u64 kvm_gmem_get_supported_flags(struct kvm *kvm) +{ + u64 flags = GUEST_MEMFD_FLAG_MMAP; + + if (!kvm || kvm_arch_supports_gmem_init_shared(kvm)) + flags |= GUEST_MEMFD_FLAG_INIT_SHARED; + + return flags; +} +#endif + +#ifndef kvm_arch_has_readonly_mem +static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) +{ + return IS_ENABLED(CONFIG_HAVE_KVM_READONLY_MEM); +} +#endif + struct kvm_memslots { u64 generation; atomic_long_t last_used_slot; @@ -719,9 +786,9 @@ struct kvm { struct mm_struct *mm; /* userspace tied to this vm */ unsigned long nr_memslot_pages; /* The two memslot sets - active and inactive (per address space) */ - struct kvm_memslots __memslots[KVM_ADDRESS_SPACE_NUM][2]; + struct kvm_memslots __memslots[KVM_MAX_NR_ADDRESS_SPACES][2]; /* The current active memslot set for each address space */ - struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM]; + struct kvm_memslots __rcu *memslots[KVM_MAX_NR_ADDRESS_SPACES]; struct xarray vcpu_array; /* * Protected by slots_lock, but can be read outside if an @@ -751,7 +818,7 @@ struct kvm { struct list_head vm_list; struct mutex lock; struct kvm_io_bus __rcu *buses[KVM_NR_BUSES]; -#ifdef CONFIG_HAVE_KVM_EVENTFD +#ifdef CONFIG_HAVE_KVM_IRQCHIP struct { spinlock_t lock; struct list_head items; @@ -759,8 +826,8 @@ struct kvm { struct list_head resampler_list; struct mutex resampler_lock; } irqfds; - struct list_head ioeventfds; #endif + struct list_head ioeventfds; struct kvm_vm_stat stat; struct kvm_arch arch; refcount_t users_count; @@ -776,17 +843,16 @@ struct kvm { * Update side is protected by irq_lock. */ struct kvm_irq_routing_table __rcu *irq_routing; -#endif -#ifdef CONFIG_HAVE_KVM_IRQFD + struct hlist_head irq_ack_notifier_list; #endif -#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) +#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER struct mmu_notifier mmu_notifier; unsigned long mmu_invalidate_seq; long mmu_invalidate_in_progress; - unsigned long mmu_invalidate_range_start; - unsigned long mmu_invalidate_range_end; + gfn_t mmu_invalidate_range_start; + gfn_t mmu_invalidate_range_end; #endif struct list_head devices; u64 manual_dirty_log_protect; @@ -805,6 +871,10 @@ struct kvm { #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER struct notifier_block pm_notifier; #endif +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES + /* Protected by slots_lock (for writes) and RCU (for reads) */ + struct xarray mem_attr_array; +#endif char stats_id[KVM_STATS_NAME_SIZE]; }; @@ -865,6 +935,25 @@ static inline void kvm_vm_bugged(struct kvm *kvm) unlikely(__ret); \ }) +/* + * Note, "data corruption" refers to corruption of host kernel data structures, + * not guest data. Guest data corruption, suspected or confirmed, that is tied + * and contained to a single VM should *never* BUG() and potentially panic the + * host, i.e. use this variant of KVM_BUG() if and only if a KVM data structure + * is corrupted and that corruption can have a cascading effect to other parts + * of the hosts and/or to other VMs. + */ +#define KVM_BUG_ON_DATA_CORRUPTION(cond, kvm) \ +({ \ + bool __ret = !!(cond); \ + \ + if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) \ + BUG_ON(__ret); \ + else if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged)) \ + kvm_vm_bugged(kvm); \ + unlikely(__ret); \ +}) + static inline void kvm_vcpu_srcu_read_lock(struct kvm_vcpu *vcpu) { #ifdef CONFIG_PROVE_RCU @@ -889,16 +978,29 @@ static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm) return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET); } +/* + * Get a bus reference under the update-side lock. No long-term SRCU reader + * references are permitted, to avoid stale reads vs concurrent IO + * registrations. + */ static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx) { - return srcu_dereference_check(kvm->buses[idx], &kvm->srcu, - lockdep_is_held(&kvm->slots_lock) || - !refcount_read(&kvm->users_count)); + return rcu_dereference_protected(kvm->buses[idx], + lockdep_is_held(&kvm->slots_lock)); } static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) { int num_vcpus = atomic_read(&kvm->online_vcpus); + + /* + * Explicitly verify the target vCPU is online, as the anti-speculation + * logic only limits the CPU's ability to speculate, e.g. given a "bad" + * index, clamping the index to 0 would return vCPU0, not NULL. + */ + if (i >= num_vcpus) + return NULL; + i = array_index_nospec(i, num_vcpus); /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */ @@ -906,9 +1008,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) return xa_load(&kvm->vcpu_array, i); } -#define kvm_for_each_vcpu(idx, vcpup, kvm) \ - xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \ - (atomic_read(&kvm->online_vcpus) - 1)) +#define kvm_for_each_vcpu(idx, vcpup, kvm) \ + if (atomic_read(&kvm->online_vcpus)) \ + xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \ + (atomic_read(&kvm->online_vcpus) - 1)) static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) { @@ -929,22 +1032,22 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) void kvm_destroy_vcpus(struct kvm *kvm); +int kvm_trylock_all_vcpus(struct kvm *kvm); +int kvm_lock_all_vcpus(struct kvm *kvm); +void kvm_unlock_all_vcpus(struct kvm *kvm); + void vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_KVM_IOAPIC void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm); -void kvm_arch_post_irq_routing_update(struct kvm *kvm); #else static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm) { } -static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm) -{ -} #endif -#ifdef CONFIG_HAVE_KVM_IRQFD +#ifdef CONFIG_HAVE_KVM_IRQCHIP int kvm_irqfd_init(void); void kvm_irqfd_exit(void); #else @@ -968,7 +1071,7 @@ void kvm_put_kvm_no_destroy(struct kvm *kvm); static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) { - as_id = array_index_nospec(as_id, KVM_ADDRESS_SPACE_NUM); + as_id = array_index_nospec(as_id, KVM_MAX_NR_ADDRESS_SPACES); return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu, lockdep_is_held(&kvm->slots_lock) || !refcount_read(&kvm->users_count)); @@ -1106,6 +1209,10 @@ static inline bool kvm_memslot_iter_is_valid(struct kvm_memslot_iter *iter, gfn_ kvm_memslot_iter_is_valid(iter, end); \ kvm_memslot_iter_next(iter)) +struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); +struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); +struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn); + /* * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: * - create a new memory slot @@ -1115,7 +1222,7 @@ static inline bool kvm_memslot_iter_is_valid(struct kvm_memslot_iter *iter, gfn_ * -- just change its flags * * Since flags can be changed by some of these operations, the following - * differentiation is the best we can do for __kvm_set_memory_region(): + * differentiation is the best we can do for kvm_set_memory_region(): */ enum kvm_mr_change { KVM_MR_CREATE, @@ -1124,10 +1231,8 @@ enum kvm_mr_change { KVM_MR_FLAGS_ONLY, }; -int kvm_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem); -int __kvm_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem); +int kvm_set_internal_memslot(struct kvm *kvm, + const struct kvm_userspace_memory_region2 *mem); void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen); int kvm_arch_prepare_memory_region(struct kvm *kvm, @@ -1144,33 +1249,70 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm); void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); -int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn, - struct page **pages, int nr_pages); +int kvm_prefetch_pages(struct kvm_memory_slot *slot, gfn_t gfn, + struct page **pages, int nr_pages); + +struct page *__gfn_to_page(struct kvm *kvm, gfn_t gfn, bool write); +static inline struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) +{ + return __gfn_to_page(kvm, gfn, true); +} -struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, bool *writable); + +static inline void kvm_release_page_unused(struct page *page) +{ + if (!page) + return; + + put_page(page); +} + void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); -kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); -kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, - bool *writable); -kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn); -kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn); -kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn, - bool atomic, bool interruptible, bool *async, - bool write_fault, bool *writable, hva_t *hva); - -void kvm_release_pfn_clean(kvm_pfn_t pfn); -void kvm_release_pfn_dirty(kvm_pfn_t pfn); -void kvm_set_pfn_dirty(kvm_pfn_t pfn); -void kvm_set_pfn_accessed(kvm_pfn_t pfn); - -void kvm_release_pfn(kvm_pfn_t pfn, bool dirty); +static inline void kvm_release_faultin_page(struct kvm *kvm, struct page *page, + bool unused, bool dirty) +{ + lockdep_assert_once(lockdep_is_held(&kvm->mmu_lock) || unused); + + if (!page) + return; + + /* + * If the page that KVM got from the *primary MMU* is writable, and KVM + * installed or reused a SPTE, mark the page/folio dirty. Note, this + * may mark a folio dirty even if KVM created a read-only SPTE, e.g. if + * the GFN is write-protected. Folios can't be safely marked dirty + * outside of mmu_lock as doing so could race with writeback on the + * folio. As a result, KVM can't mark folios dirty in the fast page + * fault handler, and so KVM must (somewhat) speculatively mark the + * folio dirty if KVM could locklessly make the SPTE writable. + */ + if (unused) + kvm_release_page_unused(page); + else if (dirty) + kvm_release_page_dirty(page); + else + kvm_release_page_clean(page); +} + +kvm_pfn_t __kvm_faultin_pfn(const struct kvm_memory_slot *slot, gfn_t gfn, + unsigned int foll, bool *writable, + struct page **refcounted_page); + +static inline kvm_pfn_t kvm_faultin_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, + bool write, bool *writable, + struct page **refcounted_page) +{ + return __kvm_faultin_pfn(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, + write ? FOLL_WRITE : 0, writable, refcounted_page); +} + int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int len); int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); @@ -1234,19 +1376,28 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, }) int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); -struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); -struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); -struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn); -kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn); -kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); -int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map); -void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty); +int __kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map, + bool writable); +void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map); + +static inline int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, + struct kvm_host_map *map) +{ + return __kvm_vcpu_map(vcpu, gpa, map, true); +} + +static inline int kvm_vcpu_map_readonly(struct kvm_vcpu *vcpu, gpa_t gpa, + struct kvm_host_map *map) +{ + return __kvm_vcpu_map(vcpu, gpa, map, false); +} + unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset, @@ -1266,21 +1417,12 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); * * @gpc: struct gfn_to_pfn_cache object. * @kvm: pointer to kvm instance. - * @vcpu: vCPU to be used for marking pages dirty and to be woken on - * invalidation. - * @usage: indicates if the resulting host physical PFN is used while - * the @vcpu is IN_GUEST_MODE (in which case invalidation of - * the cache from MMU notifiers---but not for KVM memslot - * changes!---will also force @vcpu to exit the guest and - * refresh the cache); and/or if the PFN used directly - * by KVM (and thus needs a kernel virtual mapping). * * This sets up a gfn_to_pfn_cache by initializing locks and assigning the * immutable attributes. Note, the cache must be zero-allocated (or zeroed by * the caller before init). */ -void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm, - struct kvm_vcpu *vcpu, enum pfn_cache_usage usage); +void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm); /** * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest @@ -1301,6 +1443,22 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm, int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); /** + * kvm_gpc_activate_hva - prepare a cached kernel mapping and HPA for a given HVA. + * + * @gpc: struct gfn_to_pfn_cache object. + * @hva: userspace virtual address to map. + * @len: sanity check; the range being access must fit a single page. + * + * @return: 0 for success. + * -EINVAL for a mapping which would cross a page boundary. + * -EFAULT for an untranslatable guest physical address. + * + * The semantics of this function are the same as those of kvm_gpc_activate(). It + * merely bypasses a layer of address translation. + */ +int kvm_gpc_activate_hva(struct gfn_to_pfn_cache *gpc, unsigned long hva, unsigned long len); + +/** * kvm_gpc_check - check validity of a gfn_to_pfn_cache. * * @gpc: struct gfn_to_pfn_cache object. @@ -1346,6 +1504,16 @@ int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len); */ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc); +static inline bool kvm_gpc_is_gpa_active(struct gfn_to_pfn_cache *gpc) +{ + return gpc->active && !kvm_is_error_gpa(gpc->gpa); +} + +static inline bool kvm_gpc_is_hva_active(struct gfn_to_pfn_cache *gpc) +{ + return gpc->active && kvm_is_error_gpa(gpc->gpa); +} + void kvm_sigset_activate(struct kvm_vcpu *vcpu); void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); @@ -1354,11 +1522,23 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); -void kvm_vcpu_kick(struct kvm_vcpu *vcpu); + +#ifndef CONFIG_S390 +void __kvm_vcpu_kick(struct kvm_vcpu *vcpu, bool wait); + +static inline void kvm_vcpu_kick(struct kvm_vcpu *vcpu) +{ + __kvm_vcpu_kick(vcpu, false); +} +#endif + int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode); void kvm_flush_remote_tlbs(struct kvm *kvm); +void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages); +void kvm_flush_remote_tlbs_memslot(struct kvm *kvm, + const struct kvm_memory_slot *memslot); #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min); @@ -1368,15 +1548,17 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc); void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); #endif -void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, - unsigned long end); -void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start, - unsigned long end); +void kvm_mmu_invalidate_begin(struct kvm *kvm); +void kvm_mmu_invalidate_range_add(struct kvm *kvm, gfn_t start, gfn_t end); +void kvm_mmu_invalidate_end(struct kvm *kvm); +bool kvm_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); +long kvm_arch_vcpu_unlocked_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg); vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf); int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext); @@ -1387,10 +1569,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, unsigned long mask); void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot); -#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT -void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, - const struct kvm_memory_slot *memslot); -#else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ +#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, int *is_dirty, struct kvm_memory_slot **memslot); @@ -1424,8 +1603,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg); int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu); -void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu); - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id); @@ -1444,17 +1621,32 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} #endif #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING -int kvm_arch_hardware_enable(void); -void kvm_arch_hardware_disable(void); +/* + * kvm_arch_{enable,disable}_virtualization() are called on one CPU, under + * kvm_usage_lock, immediately after/before 0=>1 and 1=>0 transitions of + * kvm_usage_count, i.e. at the beginning of the generic hardware enabling + * sequence, and at the end of the generic hardware disabling sequence. + */ +void kvm_arch_enable_virtualization(void); +void kvm_arch_disable_virtualization(void); +/* + * kvm_arch_{enable,disable}_virtualization_cpu() are called on "every" CPU to + * do the actual twiddling of hardware bits. The hooks are called on all + * online CPUs when KVM enables/disabled virtualization, and on a single CPU + * when that CPU is onlined/offlined (including for Resume/Suspend). + */ +int kvm_arch_enable_virtualization_cpu(void); +void kvm_arch_disable_virtualization_cpu(void); #endif +bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu); -int kvm_arch_post_init_vm(struct kvm *kvm); +bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu); void kvm_arch_pre_destroy_vm(struct kvm *kvm); -int kvm_arch_create_vm_debugfs(struct kvm *kvm); +void kvm_arch_create_vm_debugfs(struct kvm *kvm); #ifndef __KVM_HAVE_ARCH_VM_ALLOC /* @@ -1479,11 +1671,23 @@ static inline void kvm_arch_free_vm(struct kvm *kvm) } #endif -#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB -static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) +#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS +static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { return -ENOTSUPP; } +#else +int kvm_arch_flush_remote_tlbs(struct kvm *kvm); +#endif + +#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE +static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, + gfn_t gfn, u64 nr_pages) +{ + return -EOPNOTSUPP; +} +#else +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages); #endif #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA @@ -1504,24 +1708,6 @@ static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) return false; } #endif -#ifdef __KVM_HAVE_ARCH_ASSIGNED_DEVICE -void kvm_arch_start_assignment(struct kvm *kvm); -void kvm_arch_end_assignment(struct kvm *kvm); -bool kvm_arch_has_assigned_device(struct kvm *kvm); -#else -static inline void kvm_arch_start_assignment(struct kvm *kvm) -{ -} - -static inline void kvm_arch_end_assignment(struct kvm *kvm) -{ -} - -static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm) -{ - return false; -} -#endif static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu) { @@ -1572,13 +1758,9 @@ static inline void kvm_unregister_perf_callbacks(void) {} int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); void kvm_arch_destroy_vm(struct kvm *kvm); -void kvm_arch_sync_events(struct kvm *kvm); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); -struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn); -bool kvm_is_zone_device_page(struct page *page); - struct kvm_irq_ack_notifier { struct hlist_node link; unsigned gsi; @@ -1603,8 +1785,6 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); void kvm_unregister_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); -int kvm_request_irq_source_id(struct kvm *kvm); -void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args); /* @@ -1723,11 +1903,21 @@ static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn) return (hpa_t)pfn << PAGE_SHIFT; } -static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa) +static inline bool kvm_is_gpa_in_memslot(struct kvm *kvm, gpa_t gpa) { unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); - return kvm_is_error_hva(hva); + return !kvm_is_error_hva(hva); +} + +static inline void kvm_gpc_mark_dirty_in_slot(struct gfn_to_pfn_cache *gpc) +{ + lockdep_assert_held(&gpc->lock); + + if (!gpc->memslot) + return; + + mark_page_dirty_in_slot(gpc->kvm, gpc->memslot, gpa_to_gfn(gpc->gpa)); } enum kvm_stat_kind { @@ -1862,8 +2052,6 @@ struct _kvm_stats_desc { HALT_POLL_HIST_COUNT), \ STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking) -extern struct dentry *kvm_debugfs_dir; - ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header, const struct _kvm_stats_desc *desc, void *stats, size_t size_stats, @@ -1914,7 +2102,7 @@ extern const struct _kvm_stats_desc kvm_vm_stats_desc[]; extern const struct kvm_stats_header kvm_vcpu_stats_header; extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[]; -#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) +#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq) { if (unlikely(kvm->mmu_invalidate_in_progress)) @@ -1937,9 +2125,9 @@ static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq) return 0; } -static inline int mmu_invalidate_retry_hva(struct kvm *kvm, +static inline int mmu_invalidate_retry_gfn(struct kvm *kvm, unsigned long mmu_seq, - unsigned long hva) + gfn_t gfn) { lockdep_assert_held(&kvm->mmu_lock); /* @@ -1948,14 +2136,50 @@ static inline int mmu_invalidate_retry_hva(struct kvm *kvm, * that might be being invalidated. Note that it may include some false * positives, due to shortcuts when handing concurrent invalidations. */ - if (unlikely(kvm->mmu_invalidate_in_progress) && - hva >= kvm->mmu_invalidate_range_start && - hva < kvm->mmu_invalidate_range_end) - return 1; + if (unlikely(kvm->mmu_invalidate_in_progress)) { + /* + * Dropping mmu_lock after bumping mmu_invalidate_in_progress + * but before updating the range is a KVM bug. + */ + if (WARN_ON_ONCE(kvm->mmu_invalidate_range_start == INVALID_GPA || + kvm->mmu_invalidate_range_end == INVALID_GPA)) + return 1; + + if (gfn >= kvm->mmu_invalidate_range_start && + gfn < kvm->mmu_invalidate_range_end) + return 1; + } + if (kvm->mmu_invalidate_seq != mmu_seq) return 1; return 0; } + +/* + * This lockless version of the range-based retry check *must* be paired with a + * call to the locked version after acquiring mmu_lock, i.e. this is safe to + * use only as a pre-check to avoid contending mmu_lock. This version *will* + * get false negatives and false positives. + */ +static inline bool mmu_invalidate_retry_gfn_unsafe(struct kvm *kvm, + unsigned long mmu_seq, + gfn_t gfn) +{ + /* + * Use READ_ONCE() to ensure the in-progress flag and sequence counter + * are always read from memory, e.g. so that checking for retry in a + * loop won't result in an infinite retry loop. Don't force loads for + * start+end, as the key to avoiding infinite retry loops is observing + * the 1=>0 transition of in-progress, i.e. getting false negatives + * due to stale start+end values is acceptable. + */ + if (unlikely(READ_ONCE(kvm->mmu_invalidate_in_progress)) && + gfn >= kvm->mmu_invalidate_range_start && + gfn < kvm->mmu_invalidate_range_end) + return true; + + return READ_ONCE(kvm->mmu_invalidate_seq) != mmu_seq; +} #endif #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING @@ -1967,6 +2191,7 @@ int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, unsigned flags); +int kvm_init_irq_routing(struct kvm *kvm); int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue); @@ -1976,16 +2201,19 @@ void kvm_free_irq_routing(struct kvm *kvm); static inline void kvm_free_irq_routing(struct kvm *kvm) {} +static inline int kvm_init_irq_routing(struct kvm *kvm) +{ + return 0; +} + #endif int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); -#ifdef CONFIG_HAVE_KVM_EVENTFD - void kvm_eventfd_init(struct kvm *kvm); int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); -#ifdef CONFIG_HAVE_KVM_IRQFD +#ifdef CONFIG_HAVE_KVM_IRQCHIP int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); void kvm_irqfd_release(struct kvm *kvm); bool kvm_notify_irqfd_resampler(struct kvm *kvm, @@ -2006,31 +2234,7 @@ static inline bool kvm_notify_irqfd_resampler(struct kvm *kvm, { return false; } -#endif - -#else - -static inline void kvm_eventfd_init(struct kvm *kvm) {} - -static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) -{ - return -EINVAL; -} - -static inline void kvm_irqfd_release(struct kvm *kvm) {} - -#ifdef CONFIG_HAVE_KVM_IRQCHIP -static inline void kvm_irq_routing_update(struct kvm *kvm) -{ -} -#endif - -static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) -{ - return -ENOSYS; -} - -#endif /* CONFIG_HAVE_KVM_EVENTFD */ +#endif /* CONFIG_HAVE_KVM_IRQCHIP */ void kvm_arch_irq_routing_update(struct kvm *kvm); @@ -2057,6 +2261,14 @@ static __always_inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) __kvm_make_request(req, vcpu); } +#ifndef CONFIG_S390 +static inline void kvm_make_request_and_kick(int req, struct kvm_vcpu *vcpu) +{ + kvm_make_request(req, vcpu); + __kvm_vcpu_kick(vcpu, req & KVM_REQUEST_WAIT); +} +#endif + static inline bool kvm_request_pending(struct kvm_vcpu *vcpu) { return READ_ONCE(vcpu->requests); @@ -2089,6 +2301,7 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) } #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING +extern bool enable_virt_at_load; extern bool kvm_rebooting; #endif @@ -2148,8 +2361,6 @@ struct kvm_device_ops { int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma); }; -void kvm_device_get(struct kvm_device *dev); -void kvm_device_put(struct kvm_device *dev); struct kvm_device *kvm_device_from_filp(struct file *filp); int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type); void kvm_unregister_device_ops(u32 type); @@ -2189,7 +2400,9 @@ static inline bool kvm_is_visible_memslot(struct kvm_memory_slot *memslot) struct kvm_vcpu *kvm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) +struct kvm_kernel_irqfd; + bool kvm_arch_has_irq_bypass(void); int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); @@ -2197,10 +2410,9 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *); void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); -int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set); -bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *, - struct kvm_kernel_irq_routing_entry *); +void kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd, + struct kvm_kernel_irq_routing_entry *old, + struct kvm_kernel_irq_routing_entry *new); #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ #ifdef CONFIG_HAVE_KVM_INVALID_WAKEUPS @@ -2227,18 +2439,6 @@ static inline bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) } #endif /* CONFIG_HAVE_KVM_NO_POLL */ -#ifdef CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL -long kvm_arch_vcpu_async_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg); -#else -static inline long kvm_arch_vcpu_async_ioctl(struct file *filp, - unsigned int ioctl, - unsigned long arg) -{ - return -ENOIOCTLCMD; -} -#endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */ - void kvm_arch_guest_memory_reclaimed(struct kvm *kvm); #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE @@ -2250,19 +2450,24 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) } #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */ -typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data); - -int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, - uintptr_t data, const char *name, - struct task_struct **thread_ptr); - -#ifdef CONFIG_KVM_XFER_TO_GUEST_WORK +#ifdef CONFIG_VIRT_XFER_TO_GUEST_WORK static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) { vcpu->run->exit_reason = KVM_EXIT_INTR; vcpu->stat.signal_exits++; } -#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ + +static inline int kvm_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu) +{ + int r = xfer_to_guest_mode_handle_work(); + + if (r) { + WARN_ON_ONCE(r != -EINTR); + kvm_handle_signal_exit(vcpu); + } + return r; +} +#endif /* CONFIG_VIRT_XFER_TO_GUEST_WORK */ /* * If more than one page is being (un)accounted, @virt must be the address of @@ -2287,4 +2492,116 @@ static inline void kvm_account_pgtable_pages(void *virt, int nr) /* Max number of entries allowed for each kvm dirty ring */ #define KVM_DIRTY_RING_MAX_ENTRIES 65536 +static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, + gpa_t gpa, gpa_t size, + bool is_write, bool is_exec, + bool is_private) +{ + vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT; + vcpu->run->memory_fault.gpa = gpa; + vcpu->run->memory_fault.size = size; + + /* RWX flags are not (yet) defined or communicated to userspace. */ + vcpu->run->memory_fault.flags = 0; + if (is_private) + vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_PRIVATE; +} + +static inline bool kvm_memslot_is_gmem_only(const struct kvm_memory_slot *slot) +{ + if (!IS_ENABLED(CONFIG_KVM_GUEST_MEMFD)) + return false; + + return slot->flags & KVM_MEMSLOT_GMEM_ONLY; +} + +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES +static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn) +{ + return xa_to_value(xa_load(&kvm->mem_attr_array, gfn)); +} + +bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, + unsigned long mask, unsigned long attrs); +bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, + struct kvm_gfn_range *range); +bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, + struct kvm_gfn_range *range); + +static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) +{ + return kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE; +} +#else +static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) +{ + return false; +} +#endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */ + +#ifdef CONFIG_KVM_GUEST_MEMFD +int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, + gfn_t gfn, kvm_pfn_t *pfn, struct page **page, + int *max_order); +#else +static inline int kvm_gmem_get_pfn(struct kvm *kvm, + struct kvm_memory_slot *slot, gfn_t gfn, + kvm_pfn_t *pfn, struct page **page, + int *max_order) +{ + KVM_BUG_ON(1, kvm); + return -EIO; +} +#endif /* CONFIG_KVM_GUEST_MEMFD */ + +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE +int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); +#endif + +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_POPULATE +/** + * kvm_gmem_populate() - Populate/prepare a GPA range with guest data + * + * @kvm: KVM instance + * @gfn: starting GFN to be populated + * @src: userspace-provided buffer containing data to copy into GFN range + * (passed to @post_populate, and incremented on each iteration + * if not NULL) + * @npages: number of pages to copy from userspace-buffer + * @post_populate: callback to issue for each gmem page that backs the GPA + * range + * @opaque: opaque data to pass to @post_populate callback + * + * This is primarily intended for cases where a gmem-backed GPA range needs + * to be initialized with userspace-provided data prior to being mapped into + * the guest as a private page. This should be called with the slots->lock + * held so that caller-enforced invariants regarding the expected memory + * attributes of the GPA range do not race with KVM_SET_MEMORY_ATTRIBUTES. + * + * Returns the number of pages that were populated. + */ +typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, + void __user *src, int order, void *opaque); + +long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, + kvm_gmem_populate_cb post_populate, void *opaque); +#endif + +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE +void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); +#endif + +#ifdef CONFIG_KVM_GENERIC_PRE_FAULT_MEMORY +long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, + struct kvm_pre_fault_memory *range); +#endif + +#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING +int kvm_enable_virtualization(void); +void kvm_disable_virtualization(void); +#else +static inline int kvm_enable_virtualization(void) { return 0; } +static inline void kvm_disable_virtualization(void) { } +#endif + #endif |
