summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/virtual/kvm/mmu.txt13
-rw-r--r--arch/x86/kvm/mmu.c31
-rw-r--r--include/linux/kvm_host.h4
-rw-r--r--virt/kvm/kvm_main.c8
4 files changed, 26 insertions, 30 deletions
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt
index e507a9e0421e..367a952f50ab 100644
--- a/Documentation/virtual/kvm/mmu.txt
+++ b/Documentation/virtual/kvm/mmu.txt
@@ -452,13 +452,16 @@ stored into the MMIO spte. Thus, the MMIO spte might be created based on
out-of-date information, but with an up-to-date generation number.
To avoid this, the generation number is incremented again after synchronize_srcu
-returns; thus, the low bit of kvm_memslots(kvm)->generation is only 1 during a
+returns; thus, bit 63 of kvm_memslots(kvm)->generation set to 1 only during a
memslot update, while some SRCU readers might be using the old copy. We do not
want to use an MMIO sptes created with an odd generation number, and we can do
-this without losing a bit in the MMIO spte. The low bit of the generation
-is not stored in MMIO spte, and presumed zero when it is extracted out of the
-spte. If KVM is unlucky and creates an MMIO spte while the low bit is 1,
-the next access to the spte will always be a cache miss.
+this without losing a bit in the MMIO spte. The "update in-progress" bit of the
+generation is not stored in MMIO spte, and is so is implicitly zero when the
+generation is extracted out of the spte. If KVM is unlucky and creates an MMIO
+spte while an update is in-progress, the next access to the spte will always be
+a cache miss. For example, a subsequent access during the update window will
+miss due to the in-progress flag diverging, while an access after the update
+window closes will have a higher generation number (as compared to the spte).
Further reading
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 364b2a737d94..bcf62e1e1ff7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -335,18 +335,17 @@ static inline bool is_access_track_spte(u64 spte)
* Due to limited space in PTEs, the MMIO generation is a 19 bit subset of
* the memslots generation and is derived as follows:
*
- * Bits 1-9 of the memslot generation are propagated to spte bits 3-11
- * Bits 10-19 of the memslot generation are propagated to spte bits 52-61
+ * Bits 0-8 of the MMIO generation are propagated to spte bits 3-11
+ * Bits 9-18 of the MMIO generation are propagated to spte bits 52-61
*
- * The MMIO generation starts at bit 1 of the memslots generation in order to
- * skip over bit 0, the KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag. Including
- * the flag would require stealing a bit from the "real" generation number and
- * thus effectively halve the maximum number of MMIO generations that can be
- * handled before encountering a wrap (which requires a full MMU zap). The
- * flag is instead explicitly queried when checking for MMIO spte cache hits.
+ * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in
+ * the MMIO generation number, as doing so would require stealing a bit from
+ * the "real" generation number and thus effectively halve the maximum number
+ * of MMIO generations that can be handled before encountering a wrap (which
+ * requires a full MMU zap). The flag is instead explicitly queried when
+ * checking for MMIO spte cache hits.
*/
-#define MMIO_SPTE_GEN_MASK GENMASK_ULL(19, 1)
-#define MMIO_SPTE_GEN_SHIFT 1
+#define MMIO_SPTE_GEN_MASK GENMASK_ULL(18, 0)
#define MMIO_SPTE_GEN_LOW_START 3
#define MMIO_SPTE_GEN_LOW_END 11
@@ -363,8 +362,6 @@ static u64 generation_mmio_spte_mask(u64 gen)
WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
- gen >>= MMIO_SPTE_GEN_SHIFT;
-
mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK;
mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK;
return mask;
@@ -378,7 +375,7 @@ static u64 get_mmio_spte_generation(u64 spte)
gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START;
gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START;
- return gen << MMIO_SPTE_GEN_SHIFT;
+ return gen;
}
static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
@@ -5905,13 +5902,9 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
{
- gen &= MMIO_SPTE_GEN_MASK;
+ WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
- /*
- * Shift to adjust for the "update in-progress" flag, which isn't
- * included in the MMIO generation number.
- */
- gen >>= MMIO_SPTE_GEN_SHIFT;
+ gen &= MMIO_SPTE_GEN_MASK;
/*
* Generation numbers are incremented in multiples of the number of
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5e1cb74922b3..85c0c00d5159 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -49,7 +49,7 @@
#define KVM_MEMSLOT_INVALID (1UL << 16)
/*
- * Bit 0 of the memslot generation number is an "update in-progress flag",
+ * Bit 63 of the memslot generation number is an "update in-progress flag",
* e.g. is temporarily set for the duration of install_new_memslots().
* This flag effectively creates a unique generation number that is used to
* mark cached memslot data, e.g. MMIO accesses, as potentially being stale,
@@ -67,7 +67,7 @@
* the actual generation number against accesses that were inserted into the
* cache *before* the memslots were updated.
*/
-#define KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS BIT_ULL(0)
+#define KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS BIT_ULL(63)
/* Two fragments for cross MMIO pages. */
#define KVM_MAX_MMIO_FRAGMENTS 2
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5c2e7e173a46..c9d0bc01f8cb 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -657,7 +657,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
if (!slots)
goto out_err_no_srcu;
/* Generations must be different for each address space. */
- slots->generation = i * 2;
+ slots->generation = i;
rcu_assign_pointer(kvm->memslots[i], slots);
}
@@ -890,10 +890,10 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
* Generations must be unique even across address spaces. We do not need
* a global counter for that, instead the generation space is evenly split
* across address spaces. For example, with two address spaces, address
- * space 0 will use generations 0, 4, 8, ... while address space 1 will
- * use generations 2, 6, 10, 14, ...
+ * space 0 will use generations 0, 2, 4, ... while address space 1 will
+ * use generations 1, 3, 5, ...
*/
- gen += KVM_ADDRESS_SPACE_NUM * 2;
+ gen += KVM_ADDRESS_SPACE_NUM;
kvm_arch_memslots_updated(kvm, gen);