From df06dae3f2a8bfb83683abf88d3dcde23fc8093d Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 23 Feb 2022 16:53:02 +0000
Subject: KVM: Don't actually set a request when evicting vCPUs for GFN cache
 invd

Don't actually set a request bit in vcpu->requests when making a request
purely to force a vCPU to exit the guest.  Logging a request but not
actually consuming it would cause the vCPU to get stuck in an infinite
loop during KVM_RUN because KVM would see the pending request and bail
from VM-Enter to service the request.

Note, it's currently impossible for KVM to set KVM_REQ_GPC_INVALIDATE as
nothing in KVM is wired up to set guest_uses_pa=true.  But, it'd be all
too easy for arch code to introduce use of kvm_gfn_to_pfn_cache_init()
without implementing handling of the request, especially since getting
test coverage of MMU notifier interaction with specific KVM features
usually requires a directed test.

Opportunistically rename gfn_to_pfn_cache_invalidate_start()'s wake_vcpus
to evict_vcpus.  The purpose of the request is to get vCPUs out of guest
mode, it's supposed to _avoid_ waking vCPUs that are blocking.

Opportunistically rename KVM_REQ_GPC_INVALIDATE to be more specific as to
what it wants to accomplish, and to genericize the name so that it can
used for similar but unrelated scenarios, should they arise in the future.
Add a comment and documentation to explain why the "no action" request
exists.

Add compile-time assertions to help detect improper usage.  Use the inner
assertless helper in the one s390 path that makes requests without a
hardcoded request.

Cc: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220223165302.3205276-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 38 +++++++++++++++++++++++++++++++-------
 1 file changed, 31 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9536ffa0473b..678fd7914521 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -148,6 +148,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQUEST_MASK           GENMASK(7,0)
 #define KVM_REQUEST_NO_WAKEUP      BIT(8)
 #define KVM_REQUEST_WAIT           BIT(9)
+#define KVM_REQUEST_NO_ACTION      BIT(10)
 /*
  * Architecture-independent vcpu->requests bit members
  * Bits 4-7 are reserved for more arch-independent bits.
@@ -156,9 +157,18 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_VM_DEAD           (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_UNBLOCK           2
 #define KVM_REQ_UNHALT            3
-#define KVM_REQ_GPC_INVALIDATE    (5 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQUEST_ARCH_BASE     8
 
+/*
+ * KVM_REQ_OUTSIDE_GUEST_MODE exists is purely as way to force the vCPU to
+ * OUTSIDE_GUEST_MODE.  KVM_REQ_OUTSIDE_GUEST_MODE differs from a vCPU "kick"
+ * in that it ensures the vCPU has reached OUTSIDE_GUEST_MODE before continuing
+ * on.  A kick only guarantees that the vCPU is on its way out, e.g. a previous
+ * kick may have set vcpu->mode to EXITING_GUEST_MODE, and so there's no
+ * guarantee the vCPU received an IPI and has actually exited guest mode.
+ */
+#define KVM_REQ_OUTSIDE_GUEST_MODE	(KVM_REQUEST_NO_ACTION | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+
 #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
 	BUILD_BUG_ON((unsigned)(nr) >= (sizeof_field(struct kvm_vcpu, requests) * 8) - KVM_REQUEST_ARCH_BASE); \
 	(unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \
@@ -1222,7 +1232,9 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
  * @vcpu:	   vCPU to be used for marking pages dirty and to be woken on
  *		   invalidation.
  * @guest_uses_pa: indicates that the resulting host physical PFN is used while
- *		   @vcpu is IN_GUEST_MODE so invalidations should wake it.
+ *		   @vcpu is IN_GUEST_MODE; invalidations of the cache from MMU
+ *		   notifiers (but not for KVM memslot changes!) will also force
+ *		   @vcpu to exit the guest to refresh the cache.
  * @kernel_map:    requests a kernel virtual mapping (kmap / memremap).
  * @gpa:	   guest physical address to map.
  * @len:	   sanity check; the range being access must fit a single page.
@@ -1233,10 +1245,9 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
  *                 -EFAULT for an untranslatable guest physical address.
  *
  * This primes a gfn_to_pfn_cache and links it into the @kvm's list for
- * invalidations to be processed. Invalidation callbacks to @vcpu using
- * %KVM_REQ_GPC_INVALIDATE will occur only for MMU notifiers, not for KVM
- * memslot changes. Callers are required to use kvm_gfn_to_pfn_cache_check()
- * to ensure that the cache is valid before accessing the target page.
+ * invalidations to be processed.  Callers are required to use
+ * kvm_gfn_to_pfn_cache_check() to ensure that the cache is valid before
+ * accessing the target page.
  */
 int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
 			      struct kvm_vcpu *vcpu, bool guest_uses_pa,
@@ -1984,7 +1995,7 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 void kvm_arch_irq_routing_update(struct kvm *kvm);
 
-static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
+static inline void __kvm_make_request(int req, struct kvm_vcpu *vcpu)
 {
 	/*
 	 * Ensure the rest of the request is published to kvm_check_request's
@@ -1994,6 +2005,19 @@ static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
 	set_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests);
 }
 
+static __always_inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
+{
+	/*
+	 * Request that don't require vCPU action should never be logged in
+	 * vcpu->requests.  The vCPU won't clear the request, so it will stay
+	 * logged indefinitely and prevent the vCPU from entering the guest.
+	 */
+	BUILD_BUG_ON(!__builtin_constant_p(req) ||
+		     (req & KVM_REQUEST_NO_ACTION));
+
+	__kvm_make_request(req, vcpu);
+}
+
 static inline bool kvm_request_pending(struct kvm_vcpu *vcpu)
 {
 	return READ_ONCE(vcpu->requests);
-- 
cgit 


From d0d96121d03d6d9cf608d948247a9f24f5a02da9 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 3 Mar 2022 15:41:11 +0000
Subject: KVM: Use enum to track if cached PFN will be used in guest and/or
 host

Replace the guest_uses_pa and kernel_map booleans in the PFN cache code
with a unified enum/bitmask. Using explicit names makes it easier to
review and audit call sites.

Opportunistically add a WARN to prevent passing garbage; instantating a
cache without declaring its usage is either buggy or pointless.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-2-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h  | 16 ++++++++--------
 include/linux/kvm_types.h | 10 ++++++++--
 2 files changed, 16 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 678fd7914521..be9bbc0c6200 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1231,11 +1231,12 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
  * @gpc:	   struct gfn_to_pfn_cache object.
  * @vcpu:	   vCPU to be used for marking pages dirty and to be woken on
  *		   invalidation.
- * @guest_uses_pa: indicates that the resulting host physical PFN is used while
- *		   @vcpu is IN_GUEST_MODE; invalidations of the cache from MMU
- *		   notifiers (but not for KVM memslot changes!) will also force
- *		   @vcpu to exit the guest to refresh the cache.
- * @kernel_map:    requests a kernel virtual mapping (kmap / memremap).
+ * @usage:	   indicates if the resulting host physical PFN is used while
+ *		   the @vcpu is IN_GUEST_MODE (in which case invalidation of 
+ *		   the cache from MMU notifiers---but not for KVM memslot
+ *		   changes!---will also force @vcpu to exit the guest and
+ *		   refresh the cache); and/or if the PFN used directly
+ *		   by KVM (and thus needs a kernel virtual mapping).
  * @gpa:	   guest physical address to map.
  * @len:	   sanity check; the range being access must fit a single page.
  * @dirty:         mark the cache dirty immediately.
@@ -1250,9 +1251,8 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
  * accessing the target page.
  */
 int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
-			      struct kvm_vcpu *vcpu, bool guest_uses_pa,
-			      bool kernel_map, gpa_t gpa, unsigned long len,
-			      bool dirty);
+			      struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
+			      gpa_t gpa, unsigned long len, bool dirty);
 
 /**
  * kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache.
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index dceac12c1ce5..784f37cbf33e 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -18,6 +18,7 @@ struct kvm_memslots;
 
 enum kvm_mr_change;
 
+#include <linux/bits.h>
 #include <linux/types.h>
 #include <linux/spinlock_types.h>
 
@@ -46,6 +47,12 @@ typedef u64            hfn_t;
 
 typedef hfn_t kvm_pfn_t;
 
+enum pfn_cache_usage {
+	KVM_GUEST_USES_PFN = BIT(0),
+	KVM_HOST_USES_PFN  = BIT(1),
+	KVM_GUEST_AND_HOST_USE_PFN = KVM_GUEST_USES_PFN | KVM_HOST_USES_PFN,
+};
+
 struct gfn_to_hva_cache {
 	u64 generation;
 	gpa_t gpa;
@@ -64,11 +71,10 @@ struct gfn_to_pfn_cache {
 	rwlock_t lock;
 	void *khva;
 	kvm_pfn_t pfn;
+	enum pfn_cache_usage usage;
 	bool active;
 	bool valid;
 	bool dirty;
-	bool kernel_map;
-	bool guest_uses_pa;
 };
 
 #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
-- 
cgit 


From cf1d88b36ba7e83bdaa50bccc4c47864e8f08cbe Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Thu, 3 Mar 2022 15:41:12 +0000
Subject: KVM: Remove dirty handling from gfn_to_pfn_cache completely

It isn't OK to cache the dirty status of a page in internal structures
for an indefinite period of time.

Any time a vCPU exits the run loop to userspace might be its last; the
VMM might do its final check of the dirty log, flush the last remaining
dirty pages to the destination and complete a live migration. If we
have internal 'dirty' state which doesn't get flushed until the vCPU
is finally destroyed on the source after migration is complete, then
we have lost data because that will escape the final copy.

This problem already exists with the use of kvm_vcpu_unmap() to mark
pages dirty in e.g. VMX nesting.

Note that the actual Linux MM already considers the page to be dirty
since we have a writeable mapping of it. This is just about the KVM
dirty logging.

For the nesting-style use cases (KVM_GUEST_USES_PFN) we will need to
track which gfn_to_pfn_caches have been used and explicitly mark the
corresponding pages dirty before returning to userspace. But we would
have needed external tracking of that anyway, rather than walking the
full list of GPCs to find those belonging to this vCPU which are dirty.

So let's rely *solely* on that external tracking, and keep it simple
rather than laying a tempting trap for callers to fall into.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-3-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h  | 14 +++++---------
 include/linux/kvm_types.h |  1 -
 2 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index be9bbc0c6200..3f9b22c4983a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1239,7 +1239,6 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
  *		   by KVM (and thus needs a kernel virtual mapping).
  * @gpa:	   guest physical address to map.
  * @len:	   sanity check; the range being access must fit a single page.
- * @dirty:         mark the cache dirty immediately.
  *
  * @return:	   0 for success.
  *		   -EINVAL for a mapping which would cross a page boundary.
@@ -1252,7 +1251,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
  */
 int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
 			      struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
-			      gpa_t gpa, unsigned long len, bool dirty);
+			      gpa_t gpa, unsigned long len);
 
 /**
  * kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache.
@@ -1261,7 +1260,6 @@ int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
  * @gpc:	   struct gfn_to_pfn_cache object.
  * @gpa:	   current guest physical address to map.
  * @len:	   sanity check; the range being access must fit a single page.
- * @dirty:         mark the cache dirty immediately.
  *
  * @return:	   %true if the cache is still valid and the address matches.
  *		   %false if the cache is not valid.
@@ -1283,7 +1281,6 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
  * @gpc:	   struct gfn_to_pfn_cache object.
  * @gpa:	   updated guest physical address to map.
  * @len:	   sanity check; the range being access must fit a single page.
- * @dirty:         mark the cache dirty immediately.
  *
  * @return:	   0 for success.
  *		   -EINVAL for a mapping which would cross a page boundary.
@@ -1296,7 +1293,7 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
  * with the lock still held to permit access.
  */
 int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
-				 gpa_t gpa, unsigned long len, bool dirty);
+				 gpa_t gpa, unsigned long len);
 
 /**
  * kvm_gfn_to_pfn_cache_unmap - temporarily unmap a gfn_to_pfn_cache.
@@ -1304,10 +1301,9 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
  * @kvm:	   pointer to kvm instance.
  * @gpc:	   struct gfn_to_pfn_cache object.
  *
- * This unmaps the referenced page and marks it dirty, if appropriate. The
- * cache is left in the invalid state but at least the mapping from GPA to
- * userspace HVA will remain cached and can be reused on a subsequent
- * refresh.
+ * This unmaps the referenced page. The cache is left in the invalid state
+ * but at least the mapping from GPA to userspace HVA will remain cached
+ * and can be reused on a subsequent refresh.
  */
 void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
 
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 784f37cbf33e..ac1ebb37a0ff 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -74,7 +74,6 @@ struct gfn_to_pfn_cache {
 	enum pfn_cache_usage usage;
 	bool active;
 	bool valid;
-	bool dirty;
 };
 
 #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
-- 
cgit