summaryrefslogtreecommitdiff
path: root/include/linux/kvm_host.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/kvm_host.h')
-rw-r--r--include/linux/kvm_host.h423
1 files changed, 293 insertions, 130 deletions
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 692c01e41a18..d93f75b05ae2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2,7 +2,7 @@
#ifndef __KVM_HOST_H
#define __KVM_HOST_H
-
+#include <linux/entry-virt.h>
#include <linux/types.h>
#include <linux/hardirq.h>
#include <linux/list.h>
@@ -52,9 +52,10 @@
/*
* The bit 16 ~ bit 31 of kvm_userspace_memory_region::flags are internally
* used in kvm, other bits are visible for userspace which are defined in
- * include/linux/kvm_h.
+ * include/uapi/linux/kvm.h.
*/
-#define KVM_MEMSLOT_INVALID (1UL << 16)
+#define KVM_MEMSLOT_INVALID (1UL << 16)
+#define KVM_MEMSLOT_GMEM_ONLY (1UL << 17)
/*
* Bit 63 of the memslot generation number is an "update in-progress flag",
@@ -97,6 +98,7 @@
#define KVM_PFN_ERR_HWPOISON (KVM_PFN_ERR_MASK + 1)
#define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 2)
#define KVM_PFN_ERR_SIGPENDING (KVM_PFN_ERR_MASK + 3)
+#define KVM_PFN_ERR_NEEDS_IO (KVM_PFN_ERR_MASK + 4)
/*
* error pfns indicate that the gfn is in slot but faild to
@@ -153,13 +155,6 @@ static inline bool kvm_is_error_gpa(gpa_t gpa)
return gpa == INVALID_GPA;
}
-#define KVM_ERR_PTR_BAD_PAGE (ERR_PTR(-ENOENT))
-
-static inline bool is_error_page(struct page *page)
-{
- return IS_ERR(page);
-}
-
#define KVM_REQUEST_MASK GENMASK(7,0)
#define KVM_REQUEST_NO_WAKEUP BIT(8)
#define KVM_REQUEST_WAIT BIT(9)
@@ -196,6 +191,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
+#define KVM_PIT_IRQ_SOURCE_ID 2
extern struct mutex kvm_lock;
extern struct list_head vm_list;
@@ -211,6 +207,7 @@ struct kvm_io_range {
struct kvm_io_bus {
int dev_count;
int ioeventfd_count;
+ struct rcu_head rcu;
struct kvm_io_range range[];
};
@@ -219,6 +216,7 @@ enum kvm_bus {
KVM_PIO_BUS,
KVM_VIRTIO_CCW_NOTIFY_BUS,
KVM_FAST_MMIO_BUS,
+ KVM_IOCSR_BUS,
KVM_NR_BUSES
};
@@ -260,12 +258,19 @@ union kvm_mmu_notifier_arg {
unsigned long attributes;
};
+enum kvm_gfn_range_filter {
+ KVM_FILTER_SHARED = BIT(0),
+ KVM_FILTER_PRIVATE = BIT(1),
+};
+
struct kvm_gfn_range {
struct kvm_memory_slot *slot;
gfn_t start;
gfn_t end;
union kvm_mmu_notifier_arg arg;
+ enum kvm_gfn_range_filter attr_filter;
bool may_block;
+ bool lockless;
};
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
@@ -279,21 +284,19 @@ enum {
READING_SHADOW_PAGE_TABLES,
};
-#define KVM_UNMAPPED_PAGE ((void *) 0x500 + POISON_POINTER_DELTA)
-
struct kvm_host_map {
/*
* Only valid if the 'pfn' is managed by the host kernel (i.e. There is
* a 'struct page' for it. When using mem= kernel parameter some memory
* can be used as guest memory but they are not managed by host
* kernel).
- * If 'pfn' is not managed by the host kernel, this field is
- * initialized to KVM_UNMAPPED_PAGE.
*/
+ struct page *pinned_page;
struct page *page;
void *hva;
kvm_pfn_t pfn;
kvm_pfn_t gfn;
+ bool writable;
};
/*
@@ -342,7 +345,8 @@ struct kvm_vcpu {
#ifndef __KVM_HAVE_ARCH_WQP
struct rcuwait wait;
#endif
- struct pid __rcu *pid;
+ struct pid *pid;
+ rwlock_t pid_lock;
int sigset_active;
sigset_t sigset;
unsigned int halt_poll_ns;
@@ -378,8 +382,10 @@ struct kvm_vcpu {
bool dy_eligible;
} spin_loop;
#endif
+ bool wants_to_run;
bool preempted;
bool ready;
+ bool scheduled_out;
struct kvm_vcpu_arch arch;
struct kvm_vcpu_stat stat;
char stats_id[KVM_STATS_NAME_SIZE];
@@ -483,7 +489,15 @@ static __always_inline void guest_state_enter_irqoff(void)
*/
static __always_inline void guest_context_exit_irqoff(void)
{
- context_tracking_guest_exit();
+ /*
+ * Guest mode is treated as a quiescent state, see
+ * guest_context_enter_irqoff() for more details.
+ */
+ if (!context_tracking_guest_exit()) {
+ instrumentation_begin();
+ rcu_virt_note_context_switch();
+ instrumentation_end();
+ }
}
/*
@@ -590,15 +604,20 @@ struct kvm_memory_slot {
short id;
u16 as_id;
-#ifdef CONFIG_KVM_PRIVATE_MEM
+#ifdef CONFIG_KVM_GUEST_MEMFD
struct {
- struct file __rcu *file;
+ /*
+ * Writes protected by kvm->slots_lock. Acquiring a
+ * reference via kvm_gmem_get_file() is protected by
+ * either kvm->slots_lock or kvm->srcu.
+ */
+ struct file *file;
pgoff_t pgoff;
} gmem;
#endif
};
-static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot)
+static inline bool kvm_slot_has_gmem(const struct kvm_memory_slot *slot)
{
return slot && (slot->flags & KVM_MEM_GUEST_MEMFD);
}
@@ -702,17 +721,34 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
}
#endif
-/*
- * Arch code must define kvm_arch_has_private_mem if support for private memory
- * is enabled.
- */
-#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
+#ifndef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
{
return false;
}
#endif
+#ifdef CONFIG_KVM_GUEST_MEMFD
+bool kvm_arch_supports_gmem_init_shared(struct kvm *kvm);
+
+static inline u64 kvm_gmem_get_supported_flags(struct kvm *kvm)
+{
+ u64 flags = GUEST_MEMFD_FLAG_MMAP;
+
+ if (!kvm || kvm_arch_supports_gmem_init_shared(kvm))
+ flags |= GUEST_MEMFD_FLAG_INIT_SHARED;
+
+ return flags;
+}
+#endif
+
+#ifndef kvm_arch_has_readonly_mem
+static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
+{
+ return IS_ENABLED(CONFIG_HAVE_KVM_READONLY_MEM);
+}
+#endif
+
struct kvm_memslots {
u64 generation;
atomic_long_t last_used_slot;
@@ -836,7 +872,7 @@ struct kvm {
struct notifier_block pm_notifier;
#endif
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
- /* Protected by slots_locks (for writes) and RCU (for reads) */
+ /* Protected by slots_lock (for writes) and RCU (for reads) */
struct xarray mem_attr_array;
#endif
char stats_id[KVM_STATS_NAME_SIZE];
@@ -942,16 +978,29 @@ static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm)
return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET);
}
+/*
+ * Get a bus reference under the update-side lock. No long-term SRCU reader
+ * references are permitted, to avoid stale reads vs concurrent IO
+ * registrations.
+ */
static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
{
- return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
- lockdep_is_held(&kvm->slots_lock) ||
- !refcount_read(&kvm->users_count));
+ return rcu_dereference_protected(kvm->buses[idx],
+ lockdep_is_held(&kvm->slots_lock));
}
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
{
int num_vcpus = atomic_read(&kvm->online_vcpus);
+
+ /*
+ * Explicitly verify the target vCPU is online, as the anti-speculation
+ * logic only limits the CPU's ability to speculate, e.g. given a "bad"
+ * index, clamping the index to 0 would return vCPU0, not NULL.
+ */
+ if (i >= num_vcpus)
+ return NULL;
+
i = array_index_nospec(i, num_vcpus);
/* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */
@@ -959,9 +1008,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
return xa_load(&kvm->vcpu_array, i);
}
-#define kvm_for_each_vcpu(idx, vcpup, kvm) \
- xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \
- (atomic_read(&kvm->online_vcpus) - 1))
+#define kvm_for_each_vcpu(idx, vcpup, kvm) \
+ if (atomic_read(&kvm->online_vcpus)) \
+ xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \
+ (atomic_read(&kvm->online_vcpus) - 1))
static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
{
@@ -982,19 +1032,19 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
void kvm_destroy_vcpus(struct kvm *kvm);
+int kvm_trylock_all_vcpus(struct kvm *kvm);
+int kvm_lock_all_vcpus(struct kvm *kvm);
+void kvm_unlock_all_vcpus(struct kvm *kvm);
+
void vcpu_load(struct kvm_vcpu *vcpu);
void vcpu_put(struct kvm_vcpu *vcpu);
-#ifdef __KVM_HAVE_IOAPIC
+#ifdef CONFIG_KVM_IOAPIC
void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm);
-void kvm_arch_post_irq_routing_update(struct kvm *kvm);
#else
static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm)
{
}
-static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm)
-{
-}
#endif
#ifdef CONFIG_HAVE_KVM_IRQCHIP
@@ -1159,6 +1209,10 @@ static inline bool kvm_memslot_iter_is_valid(struct kvm_memslot_iter *iter, gfn_
kvm_memslot_iter_is_valid(iter, end); \
kvm_memslot_iter_next(iter))
+struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
+struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
+struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn);
+
/*
* KVM_SET_USER_MEMORY_REGION ioctl allows the following operations:
* - create a new memory slot
@@ -1168,7 +1222,7 @@ static inline bool kvm_memslot_iter_is_valid(struct kvm_memslot_iter *iter, gfn_
* -- just change its flags
*
* Since flags can be changed by some of these operations, the following
- * differentiation is the best we can do for __kvm_set_memory_region():
+ * differentiation is the best we can do for kvm_set_memory_region():
*/
enum kvm_mr_change {
KVM_MR_CREATE,
@@ -1177,10 +1231,8 @@ enum kvm_mr_change {
KVM_MR_FLAGS_ONLY,
};
-int kvm_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region2 *mem);
-int __kvm_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region2 *mem);
+int kvm_set_internal_memslot(struct kvm *kvm,
+ const struct kvm_userspace_memory_region2 *mem);
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot);
void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen);
int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -1197,33 +1249,70 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm);
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot);
-int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
- struct page **pages, int nr_pages);
+int kvm_prefetch_pages(struct kvm_memory_slot *slot, gfn_t gfn,
+ struct page **pages, int nr_pages);
+
+struct page *__gfn_to_page(struct kvm *kvm, gfn_t gfn, bool write);
+static inline struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
+{
+ return __gfn_to_page(kvm, gfn, true);
+}
-struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn,
bool *writable);
+
+static inline void kvm_release_page_unused(struct page *page)
+{
+ if (!page)
+ return;
+
+ put_page(page);
+}
+
void kvm_release_page_clean(struct page *page);
void kvm_release_page_dirty(struct page *page);
-kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
-kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
- bool *writable);
-kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn);
-kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn);
-kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
- bool atomic, bool interruptible, bool *async,
- bool write_fault, bool *writable, hva_t *hva);
-
-void kvm_release_pfn_clean(kvm_pfn_t pfn);
-void kvm_release_pfn_dirty(kvm_pfn_t pfn);
-void kvm_set_pfn_dirty(kvm_pfn_t pfn);
-void kvm_set_pfn_accessed(kvm_pfn_t pfn);
-
-void kvm_release_pfn(kvm_pfn_t pfn, bool dirty);
+static inline void kvm_release_faultin_page(struct kvm *kvm, struct page *page,
+ bool unused, bool dirty)
+{
+ lockdep_assert_once(lockdep_is_held(&kvm->mmu_lock) || unused);
+
+ if (!page)
+ return;
+
+ /*
+ * If the page that KVM got from the *primary MMU* is writable, and KVM
+ * installed or reused a SPTE, mark the page/folio dirty. Note, this
+ * may mark a folio dirty even if KVM created a read-only SPTE, e.g. if
+ * the GFN is write-protected. Folios can't be safely marked dirty
+ * outside of mmu_lock as doing so could race with writeback on the
+ * folio. As a result, KVM can't mark folios dirty in the fast page
+ * fault handler, and so KVM must (somewhat) speculatively mark the
+ * folio dirty if KVM could locklessly make the SPTE writable.
+ */
+ if (unused)
+ kvm_release_page_unused(page);
+ else if (dirty)
+ kvm_release_page_dirty(page);
+ else
+ kvm_release_page_clean(page);
+}
+
+kvm_pfn_t __kvm_faultin_pfn(const struct kvm_memory_slot *slot, gfn_t gfn,
+ unsigned int foll, bool *writable,
+ struct page **refcounted_page);
+
+static inline kvm_pfn_t kvm_faultin_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
+ bool write, bool *writable,
+ struct page **refcounted_page)
+{
+ return __kvm_faultin_pfn(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn,
+ write ? FOLL_WRITE : 0, writable, refcounted_page);
+}
+
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
int len);
int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
@@ -1287,19 +1376,28 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
})
int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
-struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn);
void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
-struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
-struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn);
-kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
-kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
-int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map);
-void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty);
+int __kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map,
+ bool writable);
+void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map);
+
+static inline int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa,
+ struct kvm_host_map *map)
+{
+ return __kvm_vcpu_map(vcpu, gpa, map, true);
+}
+
+static inline int kvm_vcpu_map_readonly(struct kvm_vcpu *vcpu, gpa_t gpa,
+ struct kvm_host_map *map)
+{
+ return __kvm_vcpu_map(vcpu, gpa, map, false);
+}
+
unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset,
@@ -1424,7 +1522,16 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu);
-void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
+
+#ifndef CONFIG_S390
+void __kvm_vcpu_kick(struct kvm_vcpu *vcpu, bool wait);
+
+static inline void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+ __kvm_vcpu_kick(vcpu, false);
+}
+#endif
+
int kvm_vcpu_yield_to(struct kvm_vcpu *target);
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode);
@@ -1450,6 +1557,8 @@ long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
+long kvm_arch_vcpu_unlocked_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg);
vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf);
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext);
@@ -1494,8 +1603,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg);
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu);
-void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu);
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id);
@@ -1514,16 +1621,30 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
#endif
#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
-int kvm_arch_hardware_enable(void);
-void kvm_arch_hardware_disable(void);
+/*
+ * kvm_arch_{enable,disable}_virtualization() are called on one CPU, under
+ * kvm_usage_lock, immediately after/before 0=>1 and 1=>0 transitions of
+ * kvm_usage_count, i.e. at the beginning of the generic hardware enabling
+ * sequence, and at the end of the generic hardware disabling sequence.
+ */
+void kvm_arch_enable_virtualization(void);
+void kvm_arch_disable_virtualization(void);
+/*
+ * kvm_arch_{enable,disable}_virtualization_cpu() are called on "every" CPU to
+ * do the actual twiddling of hardware bits. The hooks are called on all
+ * online CPUs when KVM enables/disabled virtualization, and on a single CPU
+ * when that CPU is onlined/offlined (including for Resume/Suspend).
+ */
+int kvm_arch_enable_virtualization_cpu(void);
+void kvm_arch_disable_virtualization_cpu(void);
#endif
+bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu);
bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu);
bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu);
-int kvm_arch_post_init_vm(struct kvm *kvm);
void kvm_arch_pre_destroy_vm(struct kvm *kvm);
void kvm_arch_create_vm_debugfs(struct kvm *kvm);
@@ -1587,24 +1708,6 @@ static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
return false;
}
#endif
-#ifdef __KVM_HAVE_ARCH_ASSIGNED_DEVICE
-void kvm_arch_start_assignment(struct kvm *kvm);
-void kvm_arch_end_assignment(struct kvm *kvm);
-bool kvm_arch_has_assigned_device(struct kvm *kvm);
-#else
-static inline void kvm_arch_start_assignment(struct kvm *kvm)
-{
-}
-
-static inline void kvm_arch_end_assignment(struct kvm *kvm)
-{
-}
-
-static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
-{
- return false;
-}
-#endif
static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu)
{
@@ -1655,13 +1758,9 @@ static inline void kvm_unregister_perf_callbacks(void) {}
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
void kvm_arch_destroy_vm(struct kvm *kvm);
-void kvm_arch_sync_events(struct kvm *kvm);
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
-struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn);
-bool kvm_is_zone_device_page(struct page *page);
-
struct kvm_irq_ack_notifier {
struct hlist_node link;
unsigned gsi;
@@ -1686,8 +1785,6 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian);
void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian);
-int kvm_request_irq_source_id(struct kvm *kvm);
-void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
/*
@@ -1955,8 +2052,6 @@ struct _kvm_stats_desc {
HALT_POLL_HIST_COUNT), \
STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking)
-extern struct dentry *kvm_debugfs_dir;
-
ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
const struct _kvm_stats_desc *desc,
void *stats, size_t size_stats,
@@ -2096,6 +2191,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
const struct kvm_irq_routing_entry *entries,
unsigned nr,
unsigned flags);
+int kvm_init_irq_routing(struct kvm *kvm);
int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue);
@@ -2105,6 +2201,11 @@ void kvm_free_irq_routing(struct kvm *kvm);
static inline void kvm_free_irq_routing(struct kvm *kvm) {}
+static inline int kvm_init_irq_routing(struct kvm *kvm)
+{
+ return 0;
+}
+
#endif
int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
@@ -2160,6 +2261,14 @@ static __always_inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
__kvm_make_request(req, vcpu);
}
+#ifndef CONFIG_S390
+static inline void kvm_make_request_and_kick(int req, struct kvm_vcpu *vcpu)
+{
+ kvm_make_request(req, vcpu);
+ __kvm_vcpu_kick(vcpu, req & KVM_REQUEST_WAIT);
+}
+#endif
+
static inline bool kvm_request_pending(struct kvm_vcpu *vcpu)
{
return READ_ONCE(vcpu->requests);
@@ -2192,6 +2301,7 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
}
#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
+extern bool enable_virt_at_load;
extern bool kvm_rebooting;
#endif
@@ -2290,7 +2400,9 @@ static inline bool kvm_is_visible_memslot(struct kvm_memory_slot *memslot)
struct kvm_vcpu *kvm_get_running_vcpu(void);
struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
-#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
+struct kvm_kernel_irqfd;
+
bool kvm_arch_has_irq_bypass(void);
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *,
struct irq_bypass_producer *);
@@ -2298,10 +2410,9 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *,
struct irq_bypass_producer *);
void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *);
void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
-int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
- uint32_t guest_irq, bool set);
-bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *,
- struct kvm_kernel_irq_routing_entry *);
+void kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
+ struct kvm_kernel_irq_routing_entry *old,
+ struct kvm_kernel_irq_routing_entry *new);
#endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
#ifdef CONFIG_HAVE_KVM_INVALID_WAKEUPS
@@ -2328,18 +2439,6 @@ static inline bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
}
#endif /* CONFIG_HAVE_KVM_NO_POLL */
-#ifdef CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL
-long kvm_arch_vcpu_async_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg);
-#else
-static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
- unsigned int ioctl,
- unsigned long arg)
-{
- return -ENOIOCTLCMD;
-}
-#endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
-
void kvm_arch_guest_memory_reclaimed(struct kvm *kvm);
#ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
@@ -2351,19 +2450,24 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
}
#endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */
-typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data);
-
-int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
- uintptr_t data, const char *name,
- struct task_struct **thread_ptr);
-
-#ifdef CONFIG_KVM_XFER_TO_GUEST_WORK
+#ifdef CONFIG_VIRT_XFER_TO_GUEST_WORK
static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu)
{
vcpu->run->exit_reason = KVM_EXIT_INTR;
vcpu->stat.signal_exits++;
}
-#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */
+
+static inline int kvm_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu)
+{
+ int r = xfer_to_guest_mode_handle_work();
+
+ if (r) {
+ WARN_ON_ONCE(r != -EINTR);
+ kvm_handle_signal_exit(vcpu);
+ }
+ return r;
+}
+#endif /* CONFIG_VIRT_XFER_TO_GUEST_WORK */
/*
* If more than one page is being (un)accounted, @virt must be the address of
@@ -2403,6 +2507,14 @@ static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_PRIVATE;
}
+static inline bool kvm_memslot_is_gmem_only(const struct kvm_memory_slot *slot)
+{
+ if (!IS_ENABLED(CONFIG_KVM_GUEST_MEMFD))
+ return false;
+
+ return slot->flags & KVM_MEMSLOT_GMEM_ONLY;
+}
+
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn)
{
@@ -2410,7 +2522,7 @@ static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn
}
bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
- unsigned long attrs);
+ unsigned long mask, unsigned long attrs);
bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
struct kvm_gfn_range *range);
bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
@@ -2418,8 +2530,7 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
{
- return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) &&
- kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE;
+ return kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE;
}
#else
static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
@@ -2428,17 +2539,69 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
}
#endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
-#ifdef CONFIG_KVM_PRIVATE_MEM
+#ifdef CONFIG_KVM_GUEST_MEMFD
int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
- gfn_t gfn, kvm_pfn_t *pfn, int *max_order);
+ gfn_t gfn, kvm_pfn_t *pfn, struct page **page,
+ int *max_order);
#else
static inline int kvm_gmem_get_pfn(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn,
- kvm_pfn_t *pfn, int *max_order)
+ kvm_pfn_t *pfn, struct page **page,
+ int *max_order)
{
KVM_BUG_ON(1, kvm);
return -EIO;
}
-#endif /* CONFIG_KVM_PRIVATE_MEM */
+#endif /* CONFIG_KVM_GUEST_MEMFD */
+
+#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
+int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order);
+#endif
+
+#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_POPULATE
+/**
+ * kvm_gmem_populate() - Populate/prepare a GPA range with guest data
+ *
+ * @kvm: KVM instance
+ * @gfn: starting GFN to be populated
+ * @src: userspace-provided buffer containing data to copy into GFN range
+ * (passed to @post_populate, and incremented on each iteration
+ * if not NULL)
+ * @npages: number of pages to copy from userspace-buffer
+ * @post_populate: callback to issue for each gmem page that backs the GPA
+ * range
+ * @opaque: opaque data to pass to @post_populate callback
+ *
+ * This is primarily intended for cases where a gmem-backed GPA range needs
+ * to be initialized with userspace-provided data prior to being mapped into
+ * the guest as a private page. This should be called with the slots->lock
+ * held so that caller-enforced invariants regarding the expected memory
+ * attributes of the GPA range do not race with KVM_SET_MEMORY_ATTRIBUTES.
+ *
+ * Returns the number of pages that were populated.
+ */
+typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
+ void __user *src, int order, void *opaque);
+
+long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
+ kvm_gmem_populate_cb post_populate, void *opaque);
+#endif
+
+#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
+void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
+#endif
+
+#ifdef CONFIG_KVM_GENERIC_PRE_FAULT_MEMORY
+long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
+ struct kvm_pre_fault_memory *range);
+#endif
+
+#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
+int kvm_enable_virtualization(void);
+void kvm_disable_virtualization(void);
+#else
+static inline int kvm_enable_virtualization(void) { return 0; }
+static inline void kvm_disable_virtualization(void) { }
+#endif
#endif