summaryrefslogtreecommitdiff
path: root/include/linux/kvm_host.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/kvm_host.h')
-rw-r--r--include/linux/kvm_host.h705
1 files changed, 511 insertions, 194 deletions
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9d3ac7720da9..d93f75b05ae2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2,7 +2,7 @@
#ifndef __KVM_HOST_H
#define __KVM_HOST_H
-
+#include <linux/entry-virt.h>
#include <linux/types.h>
#include <linux/hardirq.h>
#include <linux/list.h>
@@ -52,9 +52,10 @@
/*
* The bit 16 ~ bit 31 of kvm_userspace_memory_region::flags are internally
* used in kvm, other bits are visible for userspace which are defined in
- * include/linux/kvm_h.
+ * include/uapi/linux/kvm.h.
*/
-#define KVM_MEMSLOT_INVALID (1UL << 16)
+#define KVM_MEMSLOT_INVALID (1UL << 16)
+#define KVM_MEMSLOT_GMEM_ONLY (1UL << 17)
/*
* Bit 63 of the memslot generation number is an "update in-progress flag",
@@ -80,8 +81,8 @@
/* Two fragments for cross MMIO pages. */
#define KVM_MAX_MMIO_FRAGMENTS 2
-#ifndef KVM_ADDRESS_SPACE_NUM
-#define KVM_ADDRESS_SPACE_NUM 1
+#ifndef KVM_MAX_NR_ADDRESS_SPACES
+#define KVM_MAX_NR_ADDRESS_SPACES 1
#endif
/*
@@ -97,6 +98,7 @@
#define KVM_PFN_ERR_HWPOISON (KVM_PFN_ERR_MASK + 1)
#define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 2)
#define KVM_PFN_ERR_SIGPENDING (KVM_PFN_ERR_MASK + 3)
+#define KVM_PFN_ERR_NEEDS_IO (KVM_PFN_ERR_MASK + 4)
/*
* error pfns indicate that the gfn is in slot but faild to
@@ -148,11 +150,9 @@ static inline bool kvm_is_error_hva(unsigned long addr)
#endif
-#define KVM_ERR_PTR_BAD_PAGE (ERR_PTR(-ENOENT))
-
-static inline bool is_error_page(struct page *page)
+static inline bool kvm_is_error_gpa(gpa_t gpa)
{
- return IS_ERR(page);
+ return gpa == INVALID_GPA;
}
#define KVM_REQUEST_MASK GENMASK(7,0)
@@ -188,13 +188,10 @@ static inline bool is_error_page(struct page *page)
bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
unsigned long *vcpu_bitmap);
bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
-bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
- struct kvm_vcpu *except);
-bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req,
- unsigned long *vcpu_bitmap);
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
+#define KVM_PIT_IRQ_SOURCE_ID 2
extern struct mutex kvm_lock;
extern struct list_head vm_list;
@@ -210,6 +207,7 @@ struct kvm_io_range {
struct kvm_io_bus {
int dev_count;
int ioeventfd_count;
+ struct rcu_head rcu;
struct kvm_io_range range[];
};
@@ -218,6 +216,7 @@ enum kvm_bus {
KVM_PIO_BUS,
KVM_VIRTIO_CCW_NOTIFY_BUS,
KVM_FAST_MMIO_BUS,
+ KVM_IOCSR_BUS,
KVM_NR_BUSES
};
@@ -240,7 +239,6 @@ struct kvm_async_pf {
struct list_head link;
struct list_head queue;
struct kvm_vcpu *vcpu;
- struct mm_struct *mm;
gpa_t cr2_or_gpa;
unsigned long addr;
struct kvm_arch_async_pf arch;
@@ -255,18 +253,28 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
#endif
-#ifdef KVM_ARCH_WANT_MMU_NOTIFIER
+#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
+union kvm_mmu_notifier_arg {
+ unsigned long attributes;
+};
+
+enum kvm_gfn_range_filter {
+ KVM_FILTER_SHARED = BIT(0),
+ KVM_FILTER_PRIVATE = BIT(1),
+};
+
struct kvm_gfn_range {
struct kvm_memory_slot *slot;
gfn_t start;
gfn_t end;
- pte_t pte;
+ union kvm_mmu_notifier_arg arg;
+ enum kvm_gfn_range_filter attr_filter;
bool may_block;
+ bool lockless;
};
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
-bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
#endif
enum {
@@ -276,21 +284,19 @@ enum {
READING_SHADOW_PAGE_TABLES,
};
-#define KVM_UNMAPPED_PAGE ((void *) 0x500 + POISON_POINTER_DELTA)
-
struct kvm_host_map {
/*
* Only valid if the 'pfn' is managed by the host kernel (i.e. There is
* a 'struct page' for it. When using mem= kernel parameter some memory
* can be used as guest memory but they are not managed by host
* kernel).
- * If 'pfn' is not managed by the host kernel, this field is
- * initialized to KVM_UNMAPPED_PAGE.
*/
+ struct page *pinned_page;
struct page *page;
void *hva;
kvm_pfn_t pfn;
kvm_pfn_t gfn;
+ bool writable;
};
/*
@@ -339,7 +345,8 @@ struct kvm_vcpu {
#ifndef __KVM_HAVE_ARCH_WQP
struct rcuwait wait;
#endif
- struct pid __rcu *pid;
+ struct pid *pid;
+ rwlock_t pid_lock;
int sigset_active;
sigset_t sigset;
unsigned int halt_poll_ns;
@@ -375,8 +382,10 @@ struct kvm_vcpu {
bool dy_eligible;
} spin_loop;
#endif
+ bool wants_to_run;
bool preempted;
bool ready;
+ bool scheduled_out;
struct kvm_vcpu_arch arch;
struct kvm_vcpu_stat stat;
char stats_id[KVM_STATS_NAME_SIZE];
@@ -480,7 +489,15 @@ static __always_inline void guest_state_enter_irqoff(void)
*/
static __always_inline void guest_context_exit_irqoff(void)
{
- context_tracking_guest_exit();
+ /*
+ * Guest mode is treated as a quiescent state, see
+ * guest_context_enter_irqoff() for more details.
+ */
+ if (!context_tracking_guest_exit()) {
+ instrumentation_begin();
+ rcu_virt_note_context_switch();
+ instrumentation_end();
+ }
}
/*
@@ -586,8 +603,25 @@ struct kvm_memory_slot {
u32 flags;
short id;
u16 as_id;
+
+#ifdef CONFIG_KVM_GUEST_MEMFD
+ struct {
+ /*
+ * Writes protected by kvm->slots_lock. Acquiring a
+ * reference via kvm_gmem_get_file() is protected by
+ * either kvm->slots_lock or kvm->srcu.
+ */
+ struct file *file;
+ pgoff_t pgoff;
+ } gmem;
+#endif
};
+static inline bool kvm_slot_has_gmem(const struct kvm_memory_slot *slot)
+{
+ return slot && (slot->flags & KVM_MEM_GUEST_MEMFD);
+}
+
static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot)
{
return slot->flags & KVM_MEM_LOG_DIRTY_PAGES;
@@ -662,7 +696,7 @@ struct kvm_irq_routing_table {
* Array indexed by gsi. Each entry contains list of irq chips
* the gsi is connected to.
*/
- struct hlist_head map[];
+ struct hlist_head map[] __counted_by(nr_rt_entries);
};
#endif
@@ -675,13 +709,46 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm);
#define KVM_MEM_SLOTS_NUM SHRT_MAX
#define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_INTERNAL_MEM_SLOTS)
-#ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
+#if KVM_MAX_NR_ADDRESS_SPACES == 1
+static inline int kvm_arch_nr_memslot_as_ids(struct kvm *kvm)
+{
+ return KVM_MAX_NR_ADDRESS_SPACES;
+}
+
static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
{
return 0;
}
#endif
+#ifndef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
+static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
+{
+ return false;
+}
+#endif
+
+#ifdef CONFIG_KVM_GUEST_MEMFD
+bool kvm_arch_supports_gmem_init_shared(struct kvm *kvm);
+
+static inline u64 kvm_gmem_get_supported_flags(struct kvm *kvm)
+{
+ u64 flags = GUEST_MEMFD_FLAG_MMAP;
+
+ if (!kvm || kvm_arch_supports_gmem_init_shared(kvm))
+ flags |= GUEST_MEMFD_FLAG_INIT_SHARED;
+
+ return flags;
+}
+#endif
+
+#ifndef kvm_arch_has_readonly_mem
+static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
+{
+ return IS_ENABLED(CONFIG_HAVE_KVM_READONLY_MEM);
+}
+#endif
+
struct kvm_memslots {
u64 generation;
atomic_long_t last_used_slot;
@@ -719,9 +786,9 @@ struct kvm {
struct mm_struct *mm; /* userspace tied to this vm */
unsigned long nr_memslot_pages;
/* The two memslot sets - active and inactive (per address space) */
- struct kvm_memslots __memslots[KVM_ADDRESS_SPACE_NUM][2];
+ struct kvm_memslots __memslots[KVM_MAX_NR_ADDRESS_SPACES][2];
/* The current active memslot set for each address space */
- struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
+ struct kvm_memslots __rcu *memslots[KVM_MAX_NR_ADDRESS_SPACES];
struct xarray vcpu_array;
/*
* Protected by slots_lock, but can be read outside if an
@@ -751,7 +818,7 @@ struct kvm {
struct list_head vm_list;
struct mutex lock;
struct kvm_io_bus __rcu *buses[KVM_NR_BUSES];
-#ifdef CONFIG_HAVE_KVM_EVENTFD
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
struct {
spinlock_t lock;
struct list_head items;
@@ -759,8 +826,8 @@ struct kvm {
struct list_head resampler_list;
struct mutex resampler_lock;
} irqfds;
- struct list_head ioeventfds;
#endif
+ struct list_head ioeventfds;
struct kvm_vm_stat stat;
struct kvm_arch arch;
refcount_t users_count;
@@ -776,17 +843,16 @@ struct kvm {
* Update side is protected by irq_lock.
*/
struct kvm_irq_routing_table __rcu *irq_routing;
-#endif
-#ifdef CONFIG_HAVE_KVM_IRQFD
+
struct hlist_head irq_ack_notifier_list;
#endif
-#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
struct mmu_notifier mmu_notifier;
unsigned long mmu_invalidate_seq;
long mmu_invalidate_in_progress;
- unsigned long mmu_invalidate_range_start;
- unsigned long mmu_invalidate_range_end;
+ gfn_t mmu_invalidate_range_start;
+ gfn_t mmu_invalidate_range_end;
#endif
struct list_head devices;
u64 manual_dirty_log_protect;
@@ -805,6 +871,10 @@ struct kvm {
#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
struct notifier_block pm_notifier;
#endif
+#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
+ /* Protected by slots_lock (for writes) and RCU (for reads) */
+ struct xarray mem_attr_array;
+#endif
char stats_id[KVM_STATS_NAME_SIZE];
};
@@ -865,6 +935,25 @@ static inline void kvm_vm_bugged(struct kvm *kvm)
unlikely(__ret); \
})
+/*
+ * Note, "data corruption" refers to corruption of host kernel data structures,
+ * not guest data. Guest data corruption, suspected or confirmed, that is tied
+ * and contained to a single VM should *never* BUG() and potentially panic the
+ * host, i.e. use this variant of KVM_BUG() if and only if a KVM data structure
+ * is corrupted and that corruption can have a cascading effect to other parts
+ * of the hosts and/or to other VMs.
+ */
+#define KVM_BUG_ON_DATA_CORRUPTION(cond, kvm) \
+({ \
+ bool __ret = !!(cond); \
+ \
+ if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) \
+ BUG_ON(__ret); \
+ else if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged)) \
+ kvm_vm_bugged(kvm); \
+ unlikely(__ret); \
+})
+
static inline void kvm_vcpu_srcu_read_lock(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_PROVE_RCU
@@ -889,16 +978,29 @@ static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm)
return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET);
}
+/*
+ * Get a bus reference under the update-side lock. No long-term SRCU reader
+ * references are permitted, to avoid stale reads vs concurrent IO
+ * registrations.
+ */
static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
{
- return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
- lockdep_is_held(&kvm->slots_lock) ||
- !refcount_read(&kvm->users_count));
+ return rcu_dereference_protected(kvm->buses[idx],
+ lockdep_is_held(&kvm->slots_lock));
}
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
{
int num_vcpus = atomic_read(&kvm->online_vcpus);
+
+ /*
+ * Explicitly verify the target vCPU is online, as the anti-speculation
+ * logic only limits the CPU's ability to speculate, e.g. given a "bad"
+ * index, clamping the index to 0 would return vCPU0, not NULL.
+ */
+ if (i >= num_vcpus)
+ return NULL;
+
i = array_index_nospec(i, num_vcpus);
/* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */
@@ -906,9 +1008,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
return xa_load(&kvm->vcpu_array, i);
}
-#define kvm_for_each_vcpu(idx, vcpup, kvm) \
- xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \
- (atomic_read(&kvm->online_vcpus) - 1))
+#define kvm_for_each_vcpu(idx, vcpup, kvm) \
+ if (atomic_read(&kvm->online_vcpus)) \
+ xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \
+ (atomic_read(&kvm->online_vcpus) - 1))
static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
{
@@ -929,22 +1032,22 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
void kvm_destroy_vcpus(struct kvm *kvm);
+int kvm_trylock_all_vcpus(struct kvm *kvm);
+int kvm_lock_all_vcpus(struct kvm *kvm);
+void kvm_unlock_all_vcpus(struct kvm *kvm);
+
void vcpu_load(struct kvm_vcpu *vcpu);
void vcpu_put(struct kvm_vcpu *vcpu);
-#ifdef __KVM_HAVE_IOAPIC
+#ifdef CONFIG_KVM_IOAPIC
void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm);
-void kvm_arch_post_irq_routing_update(struct kvm *kvm);
#else
static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm)
{
}
-static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm)
-{
-}
#endif
-#ifdef CONFIG_HAVE_KVM_IRQFD
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
int kvm_irqfd_init(void);
void kvm_irqfd_exit(void);
#else
@@ -968,7 +1071,7 @@ void kvm_put_kvm_no_destroy(struct kvm *kvm);
static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
{
- as_id = array_index_nospec(as_id, KVM_ADDRESS_SPACE_NUM);
+ as_id = array_index_nospec(as_id, KVM_MAX_NR_ADDRESS_SPACES);
return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
lockdep_is_held(&kvm->slots_lock) ||
!refcount_read(&kvm->users_count));
@@ -1106,6 +1209,10 @@ static inline bool kvm_memslot_iter_is_valid(struct kvm_memslot_iter *iter, gfn_
kvm_memslot_iter_is_valid(iter, end); \
kvm_memslot_iter_next(iter))
+struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
+struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
+struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn);
+
/*
* KVM_SET_USER_MEMORY_REGION ioctl allows the following operations:
* - create a new memory slot
@@ -1115,7 +1222,7 @@ static inline bool kvm_memslot_iter_is_valid(struct kvm_memslot_iter *iter, gfn_
* -- just change its flags
*
* Since flags can be changed by some of these operations, the following
- * differentiation is the best we can do for __kvm_set_memory_region():
+ * differentiation is the best we can do for kvm_set_memory_region():
*/
enum kvm_mr_change {
KVM_MR_CREATE,
@@ -1124,10 +1231,8 @@ enum kvm_mr_change {
KVM_MR_FLAGS_ONLY,
};
-int kvm_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem);
-int __kvm_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem);
+int kvm_set_internal_memslot(struct kvm *kvm,
+ const struct kvm_userspace_memory_region2 *mem);
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot);
void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen);
int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -1144,33 +1249,70 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm);
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot);
-int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
- struct page **pages, int nr_pages);
+int kvm_prefetch_pages(struct kvm_memory_slot *slot, gfn_t gfn,
+ struct page **pages, int nr_pages);
+
+struct page *__gfn_to_page(struct kvm *kvm, gfn_t gfn, bool write);
+static inline struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
+{
+ return __gfn_to_page(kvm, gfn, true);
+}
-struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn,
bool *writable);
+
+static inline void kvm_release_page_unused(struct page *page)
+{
+ if (!page)
+ return;
+
+ put_page(page);
+}
+
void kvm_release_page_clean(struct page *page);
void kvm_release_page_dirty(struct page *page);
-kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
-kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
- bool *writable);
-kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn);
-kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn);
-kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
- bool atomic, bool interruptible, bool *async,
- bool write_fault, bool *writable, hva_t *hva);
-
-void kvm_release_pfn_clean(kvm_pfn_t pfn);
-void kvm_release_pfn_dirty(kvm_pfn_t pfn);
-void kvm_set_pfn_dirty(kvm_pfn_t pfn);
-void kvm_set_pfn_accessed(kvm_pfn_t pfn);
-
-void kvm_release_pfn(kvm_pfn_t pfn, bool dirty);
+static inline void kvm_release_faultin_page(struct kvm *kvm, struct page *page,
+ bool unused, bool dirty)
+{
+ lockdep_assert_once(lockdep_is_held(&kvm->mmu_lock) || unused);
+
+ if (!page)
+ return;
+
+ /*
+ * If the page that KVM got from the *primary MMU* is writable, and KVM
+ * installed or reused a SPTE, mark the page/folio dirty. Note, this
+ * may mark a folio dirty even if KVM created a read-only SPTE, e.g. if
+ * the GFN is write-protected. Folios can't be safely marked dirty
+ * outside of mmu_lock as doing so could race with writeback on the
+ * folio. As a result, KVM can't mark folios dirty in the fast page
+ * fault handler, and so KVM must (somewhat) speculatively mark the
+ * folio dirty if KVM could locklessly make the SPTE writable.
+ */
+ if (unused)
+ kvm_release_page_unused(page);
+ else if (dirty)
+ kvm_release_page_dirty(page);
+ else
+ kvm_release_page_clean(page);
+}
+
+kvm_pfn_t __kvm_faultin_pfn(const struct kvm_memory_slot *slot, gfn_t gfn,
+ unsigned int foll, bool *writable,
+ struct page **refcounted_page);
+
+static inline kvm_pfn_t kvm_faultin_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
+ bool write, bool *writable,
+ struct page **refcounted_page)
+{
+ return __kvm_faultin_pfn(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn,
+ write ? FOLL_WRITE : 0, writable, refcounted_page);
+}
+
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
int len);
int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
@@ -1234,19 +1376,28 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
})
int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
-struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn);
void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
-struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
-struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn);
-kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
-kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
-int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map);
-void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty);
+int __kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map,
+ bool writable);
+void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map);
+
+static inline int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa,
+ struct kvm_host_map *map)
+{
+ return __kvm_vcpu_map(vcpu, gpa, map, true);
+}
+
+static inline int kvm_vcpu_map_readonly(struct kvm_vcpu *vcpu, gpa_t gpa,
+ struct kvm_host_map *map)
+{
+ return __kvm_vcpu_map(vcpu, gpa, map, false);
+}
+
unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset,
@@ -1266,21 +1417,12 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
*
* @gpc: struct gfn_to_pfn_cache object.
* @kvm: pointer to kvm instance.
- * @vcpu: vCPU to be used for marking pages dirty and to be woken on
- * invalidation.
- * @usage: indicates if the resulting host physical PFN is used while
- * the @vcpu is IN_GUEST_MODE (in which case invalidation of
- * the cache from MMU notifiers---but not for KVM memslot
- * changes!---will also force @vcpu to exit the guest and
- * refresh the cache); and/or if the PFN used directly
- * by KVM (and thus needs a kernel virtual mapping).
*
* This sets up a gfn_to_pfn_cache by initializing locks and assigning the
* immutable attributes. Note, the cache must be zero-allocated (or zeroed by
* the caller before init).
*/
-void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
- struct kvm_vcpu *vcpu, enum pfn_cache_usage usage);
+void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm);
/**
* kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest
@@ -1301,6 +1443,22 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len);
/**
+ * kvm_gpc_activate_hva - prepare a cached kernel mapping and HPA for a given HVA.
+ *
+ * @gpc: struct gfn_to_pfn_cache object.
+ * @hva: userspace virtual address to map.
+ * @len: sanity check; the range being access must fit a single page.
+ *
+ * @return: 0 for success.
+ * -EINVAL for a mapping which would cross a page boundary.
+ * -EFAULT for an untranslatable guest physical address.
+ *
+ * The semantics of this function are the same as those of kvm_gpc_activate(). It
+ * merely bypasses a layer of address translation.
+ */
+int kvm_gpc_activate_hva(struct gfn_to_pfn_cache *gpc, unsigned long hva, unsigned long len);
+
+/**
* kvm_gpc_check - check validity of a gfn_to_pfn_cache.
*
* @gpc: struct gfn_to_pfn_cache object.
@@ -1346,6 +1504,16 @@ int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len);
*/
void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc);
+static inline bool kvm_gpc_is_gpa_active(struct gfn_to_pfn_cache *gpc)
+{
+ return gpc->active && !kvm_is_error_gpa(gpc->gpa);
+}
+
+static inline bool kvm_gpc_is_hva_active(struct gfn_to_pfn_cache *gpc)
+{
+ return gpc->active && kvm_is_error_gpa(gpc->gpa);
+}
+
void kvm_sigset_activate(struct kvm_vcpu *vcpu);
void kvm_sigset_deactivate(struct kvm_vcpu *vcpu);
@@ -1354,11 +1522,23 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu);
-void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
+
+#ifndef CONFIG_S390
+void __kvm_vcpu_kick(struct kvm_vcpu *vcpu, bool wait);
+
+static inline void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+ __kvm_vcpu_kick(vcpu, false);
+}
+#endif
+
int kvm_vcpu_yield_to(struct kvm_vcpu *target);
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode);
void kvm_flush_remote_tlbs(struct kvm *kvm);
+void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
+void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot);
#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min);
@@ -1368,15 +1548,17 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc);
void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
#endif
-void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
- unsigned long end);
-void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start,
- unsigned long end);
+void kvm_mmu_invalidate_begin(struct kvm *kvm);
+void kvm_mmu_invalidate_range_add(struct kvm *kvm, gfn_t start, gfn_t end);
+void kvm_mmu_invalidate_end(struct kvm *kvm);
+bool kvm_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
+long kvm_arch_vcpu_unlocked_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg);
vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf);
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext);
@@ -1387,10 +1569,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
unsigned long mask);
void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot);
-#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
- const struct kvm_memory_slot *memslot);
-#else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */
+#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log);
int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,
int *is_dirty, struct kvm_memory_slot **memslot);
@@ -1424,8 +1603,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg);
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu);
-void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu);
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id);
@@ -1444,17 +1621,32 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
#endif
#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
-int kvm_arch_hardware_enable(void);
-void kvm_arch_hardware_disable(void);
+/*
+ * kvm_arch_{enable,disable}_virtualization() are called on one CPU, under
+ * kvm_usage_lock, immediately after/before 0=>1 and 1=>0 transitions of
+ * kvm_usage_count, i.e. at the beginning of the generic hardware enabling
+ * sequence, and at the end of the generic hardware disabling sequence.
+ */
+void kvm_arch_enable_virtualization(void);
+void kvm_arch_disable_virtualization(void);
+/*
+ * kvm_arch_{enable,disable}_virtualization_cpu() are called on "every" CPU to
+ * do the actual twiddling of hardware bits. The hooks are called on all
+ * online CPUs when KVM enables/disabled virtualization, and on a single CPU
+ * when that CPU is onlined/offlined (including for Resume/Suspend).
+ */
+int kvm_arch_enable_virtualization_cpu(void);
+void kvm_arch_disable_virtualization_cpu(void);
#endif
+bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu);
bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu);
-int kvm_arch_post_init_vm(struct kvm *kvm);
+bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu);
void kvm_arch_pre_destroy_vm(struct kvm *kvm);
-int kvm_arch_create_vm_debugfs(struct kvm *kvm);
+void kvm_arch_create_vm_debugfs(struct kvm *kvm);
#ifndef __KVM_HAVE_ARCH_VM_ALLOC
/*
@@ -1479,11 +1671,23 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
}
#endif
-#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
return -ENOTSUPP;
}
+#else
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm);
+#endif
+
+#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
+ gfn_t gfn, u64 nr_pages)
+{
+ return -EOPNOTSUPP;
+}
+#else
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
#endif
#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
@@ -1504,24 +1708,6 @@ static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
return false;
}
#endif
-#ifdef __KVM_HAVE_ARCH_ASSIGNED_DEVICE
-void kvm_arch_start_assignment(struct kvm *kvm);
-void kvm_arch_end_assignment(struct kvm *kvm);
-bool kvm_arch_has_assigned_device(struct kvm *kvm);
-#else
-static inline void kvm_arch_start_assignment(struct kvm *kvm)
-{
-}
-
-static inline void kvm_arch_end_assignment(struct kvm *kvm)
-{
-}
-
-static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
-{
- return false;
-}
-#endif
static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu)
{
@@ -1572,13 +1758,9 @@ static inline void kvm_unregister_perf_callbacks(void) {}
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
void kvm_arch_destroy_vm(struct kvm *kvm);
-void kvm_arch_sync_events(struct kvm *kvm);
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
-struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn);
-bool kvm_is_zone_device_page(struct page *page);
-
struct kvm_irq_ack_notifier {
struct hlist_node link;
unsigned gsi;
@@ -1603,8 +1785,6 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian);
void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian);
-int kvm_request_irq_source_id(struct kvm *kvm);
-void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
/*
@@ -1723,11 +1903,21 @@ static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn)
return (hpa_t)pfn << PAGE_SHIFT;
}
-static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
+static inline bool kvm_is_gpa_in_memslot(struct kvm *kvm, gpa_t gpa)
{
unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
- return kvm_is_error_hva(hva);
+ return !kvm_is_error_hva(hva);
+}
+
+static inline void kvm_gpc_mark_dirty_in_slot(struct gfn_to_pfn_cache *gpc)
+{
+ lockdep_assert_held(&gpc->lock);
+
+ if (!gpc->memslot)
+ return;
+
+ mark_page_dirty_in_slot(gpc->kvm, gpc->memslot, gpa_to_gfn(gpc->gpa));
}
enum kvm_stat_kind {
@@ -1862,8 +2052,6 @@ struct _kvm_stats_desc {
HALT_POLL_HIST_COUNT), \
STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking)
-extern struct dentry *kvm_debugfs_dir;
-
ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
const struct _kvm_stats_desc *desc,
void *stats, size_t size_stats,
@@ -1914,7 +2102,7 @@ extern const struct _kvm_stats_desc kvm_vm_stats_desc[];
extern const struct kvm_stats_header kvm_vcpu_stats_header;
extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[];
-#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq)
{
if (unlikely(kvm->mmu_invalidate_in_progress))
@@ -1937,9 +2125,9 @@ static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq)
return 0;
}
-static inline int mmu_invalidate_retry_hva(struct kvm *kvm,
+static inline int mmu_invalidate_retry_gfn(struct kvm *kvm,
unsigned long mmu_seq,
- unsigned long hva)
+ gfn_t gfn)
{
lockdep_assert_held(&kvm->mmu_lock);
/*
@@ -1948,14 +2136,50 @@ static inline int mmu_invalidate_retry_hva(struct kvm *kvm,
* that might be being invalidated. Note that it may include some false
* positives, due to shortcuts when handing concurrent invalidations.
*/
- if (unlikely(kvm->mmu_invalidate_in_progress) &&
- hva >= kvm->mmu_invalidate_range_start &&
- hva < kvm->mmu_invalidate_range_end)
- return 1;
+ if (unlikely(kvm->mmu_invalidate_in_progress)) {
+ /*
+ * Dropping mmu_lock after bumping mmu_invalidate_in_progress
+ * but before updating the range is a KVM bug.
+ */
+ if (WARN_ON_ONCE(kvm->mmu_invalidate_range_start == INVALID_GPA ||
+ kvm->mmu_invalidate_range_end == INVALID_GPA))
+ return 1;
+
+ if (gfn >= kvm->mmu_invalidate_range_start &&
+ gfn < kvm->mmu_invalidate_range_end)
+ return 1;
+ }
+
if (kvm->mmu_invalidate_seq != mmu_seq)
return 1;
return 0;
}
+
+/*
+ * This lockless version of the range-based retry check *must* be paired with a
+ * call to the locked version after acquiring mmu_lock, i.e. this is safe to
+ * use only as a pre-check to avoid contending mmu_lock. This version *will*
+ * get false negatives and false positives.
+ */
+static inline bool mmu_invalidate_retry_gfn_unsafe(struct kvm *kvm,
+ unsigned long mmu_seq,
+ gfn_t gfn)
+{
+ /*
+ * Use READ_ONCE() to ensure the in-progress flag and sequence counter
+ * are always read from memory, e.g. so that checking for retry in a
+ * loop won't result in an infinite retry loop. Don't force loads for
+ * start+end, as the key to avoiding infinite retry loops is observing
+ * the 1=>0 transition of in-progress, i.e. getting false negatives
+ * due to stale start+end values is acceptable.
+ */
+ if (unlikely(READ_ONCE(kvm->mmu_invalidate_in_progress)) &&
+ gfn >= kvm->mmu_invalidate_range_start &&
+ gfn < kvm->mmu_invalidate_range_end)
+ return true;
+
+ return READ_ONCE(kvm->mmu_invalidate_seq) != mmu_seq;
+}
#endif
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
@@ -1967,6 +2191,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
const struct kvm_irq_routing_entry *entries,
unsigned nr,
unsigned flags);
+int kvm_init_irq_routing(struct kvm *kvm);
int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue);
@@ -1976,16 +2201,19 @@ void kvm_free_irq_routing(struct kvm *kvm);
static inline void kvm_free_irq_routing(struct kvm *kvm) {}
+static inline int kvm_init_irq_routing(struct kvm *kvm)
+{
+ return 0;
+}
+
#endif
int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
-#ifdef CONFIG_HAVE_KVM_EVENTFD
-
void kvm_eventfd_init(struct kvm *kvm);
int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
-#ifdef CONFIG_HAVE_KVM_IRQFD
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
void kvm_irqfd_release(struct kvm *kvm);
bool kvm_notify_irqfd_resampler(struct kvm *kvm,
@@ -2006,31 +2234,7 @@ static inline bool kvm_notify_irqfd_resampler(struct kvm *kvm,
{
return false;
}
-#endif
-
-#else
-
-static inline void kvm_eventfd_init(struct kvm *kvm) {}
-
-static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
-{
- return -EINVAL;
-}
-
-static inline void kvm_irqfd_release(struct kvm *kvm) {}
-
-#ifdef CONFIG_HAVE_KVM_IRQCHIP
-static inline void kvm_irq_routing_update(struct kvm *kvm)
-{
-}
-#endif
-
-static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
-{
- return -ENOSYS;
-}
-
-#endif /* CONFIG_HAVE_KVM_EVENTFD */
+#endif /* CONFIG_HAVE_KVM_IRQCHIP */
void kvm_arch_irq_routing_update(struct kvm *kvm);
@@ -2057,6 +2261,14 @@ static __always_inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
__kvm_make_request(req, vcpu);
}
+#ifndef CONFIG_S390
+static inline void kvm_make_request_and_kick(int req, struct kvm_vcpu *vcpu)
+{
+ kvm_make_request(req, vcpu);
+ __kvm_vcpu_kick(vcpu, req & KVM_REQUEST_WAIT);
+}
+#endif
+
static inline bool kvm_request_pending(struct kvm_vcpu *vcpu)
{
return READ_ONCE(vcpu->requests);
@@ -2089,6 +2301,7 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
}
#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
+extern bool enable_virt_at_load;
extern bool kvm_rebooting;
#endif
@@ -2148,8 +2361,6 @@ struct kvm_device_ops {
int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma);
};
-void kvm_device_get(struct kvm_device *dev);
-void kvm_device_put(struct kvm_device *dev);
struct kvm_device *kvm_device_from_filp(struct file *filp);
int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type);
void kvm_unregister_device_ops(u32 type);
@@ -2189,7 +2400,9 @@ static inline bool kvm_is_visible_memslot(struct kvm_memory_slot *memslot)
struct kvm_vcpu *kvm_get_running_vcpu(void);
struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
-#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
+struct kvm_kernel_irqfd;
+
bool kvm_arch_has_irq_bypass(void);
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *,
struct irq_bypass_producer *);
@@ -2197,10 +2410,9 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *,
struct irq_bypass_producer *);
void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *);
void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
-int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
- uint32_t guest_irq, bool set);
-bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *,
- struct kvm_kernel_irq_routing_entry *);
+void kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
+ struct kvm_kernel_irq_routing_entry *old,
+ struct kvm_kernel_irq_routing_entry *new);
#endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
#ifdef CONFIG_HAVE_KVM_INVALID_WAKEUPS
@@ -2227,18 +2439,6 @@ static inline bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
}
#endif /* CONFIG_HAVE_KVM_NO_POLL */
-#ifdef CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL
-long kvm_arch_vcpu_async_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg);
-#else
-static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
- unsigned int ioctl,
- unsigned long arg)
-{
- return -ENOIOCTLCMD;
-}
-#endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
-
void kvm_arch_guest_memory_reclaimed(struct kvm *kvm);
#ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
@@ -2250,19 +2450,24 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
}
#endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */
-typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data);
-
-int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
- uintptr_t data, const char *name,
- struct task_struct **thread_ptr);
-
-#ifdef CONFIG_KVM_XFER_TO_GUEST_WORK
+#ifdef CONFIG_VIRT_XFER_TO_GUEST_WORK
static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu)
{
vcpu->run->exit_reason = KVM_EXIT_INTR;
vcpu->stat.signal_exits++;
}
-#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */
+
+static inline int kvm_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu)
+{
+ int r = xfer_to_guest_mode_handle_work();
+
+ if (r) {
+ WARN_ON_ONCE(r != -EINTR);
+ kvm_handle_signal_exit(vcpu);
+ }
+ return r;
+}
+#endif /* CONFIG_VIRT_XFER_TO_GUEST_WORK */
/*
* If more than one page is being (un)accounted, @virt must be the address of
@@ -2287,4 +2492,116 @@ static inline void kvm_account_pgtable_pages(void *virt, int nr)
/* Max number of entries allowed for each kvm dirty ring */
#define KVM_DIRTY_RING_MAX_ENTRIES 65536
+static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
+ gpa_t gpa, gpa_t size,
+ bool is_write, bool is_exec,
+ bool is_private)
+{
+ vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
+ vcpu->run->memory_fault.gpa = gpa;
+ vcpu->run->memory_fault.size = size;
+
+ /* RWX flags are not (yet) defined or communicated to userspace. */
+ vcpu->run->memory_fault.flags = 0;
+ if (is_private)
+ vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_PRIVATE;
+}
+
+static inline bool kvm_memslot_is_gmem_only(const struct kvm_memory_slot *slot)
+{
+ if (!IS_ENABLED(CONFIG_KVM_GUEST_MEMFD))
+ return false;
+
+ return slot->flags & KVM_MEMSLOT_GMEM_ONLY;
+}
+
+#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
+static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn)
+{
+ return xa_to_value(xa_load(&kvm->mem_attr_array, gfn));
+}
+
+bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
+ unsigned long mask, unsigned long attrs);
+bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
+ struct kvm_gfn_range *range);
+bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
+ struct kvm_gfn_range *range);
+
+static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
+{
+ return kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE;
+}
+#else
+static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
+{
+ return false;
+}
+#endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
+
+#ifdef CONFIG_KVM_GUEST_MEMFD
+int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
+ gfn_t gfn, kvm_pfn_t *pfn, struct page **page,
+ int *max_order);
+#else
+static inline int kvm_gmem_get_pfn(struct kvm *kvm,
+ struct kvm_memory_slot *slot, gfn_t gfn,
+ kvm_pfn_t *pfn, struct page **page,
+ int *max_order)
+{
+ KVM_BUG_ON(1, kvm);
+ return -EIO;
+}
+#endif /* CONFIG_KVM_GUEST_MEMFD */
+
+#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
+int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order);
+#endif
+
+#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_POPULATE
+/**
+ * kvm_gmem_populate() - Populate/prepare a GPA range with guest data
+ *
+ * @kvm: KVM instance
+ * @gfn: starting GFN to be populated
+ * @src: userspace-provided buffer containing data to copy into GFN range
+ * (passed to @post_populate, and incremented on each iteration
+ * if not NULL)
+ * @npages: number of pages to copy from userspace-buffer
+ * @post_populate: callback to issue for each gmem page that backs the GPA
+ * range
+ * @opaque: opaque data to pass to @post_populate callback
+ *
+ * This is primarily intended for cases where a gmem-backed GPA range needs
+ * to be initialized with userspace-provided data prior to being mapped into
+ * the guest as a private page. This should be called with the slots->lock
+ * held so that caller-enforced invariants regarding the expected memory
+ * attributes of the GPA range do not race with KVM_SET_MEMORY_ATTRIBUTES.
+ *
+ * Returns the number of pages that were populated.
+ */
+typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
+ void __user *src, int order, void *opaque);
+
+long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
+ kvm_gmem_populate_cb post_populate, void *opaque);
+#endif
+
+#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
+void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
+#endif
+
+#ifdef CONFIG_KVM_GENERIC_PRE_FAULT_MEMORY
+long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
+ struct kvm_pre_fault_memory *range);
+#endif
+
+#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
+int kvm_enable_virtualization(void);
+void kvm_disable_virtualization(void);
+#else
+static inline int kvm_enable_virtualization(void) { return 0; }
+static inline void kvm_disable_virtualization(void) { }
+#endif
+
#endif