From 8120337a4c5502118e255b170799040eefe2f280 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 25 Feb 2021 12:47:35 -0800 Subject: KVM: x86/mmu: Stop using software available bits to denote MMIO SPTEs Stop tagging MMIO SPTEs with specific available bits and instead detect MMIO SPTEs by checking for their unique SPTE value. The value is guaranteed to be unique on shadow paging and NPT as setting reserved physical address bits on any other type of SPTE would consistute a KVM bug. Ditto for EPT, as creating a WX non-MMIO would also be a bug. Note, this approach is also future-compatibile with TDX, which will need to reflect MMIO EPT violations as #VEs into the guest. To create an EPT violation instead of a misconfig, TDX EPTs will need to have RWX=0, But, MMIO SPTEs will also be the only case where KVM clears SUPPRESS_VE, so MMIO SPTEs will still be guaranteed to have a unique value within a given MMU context. The main motivation is to make it easier to reason about which types of SPTEs use which available bits. As a happy side effect, this frees up two more bits for storing the MMIO generation. Signed-off-by: Sean Christopherson Message-Id: <20210225204749.1512652-11-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/spte.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86/kvm/mmu/spte.h') diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 6de3950fd704..642a17b9964c 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -8,15 +8,11 @@ #define PT_FIRST_AVAIL_BITS_SHIFT 10 #define PT64_SECOND_AVAIL_BITS_SHIFT 54 -/* - * The mask used to denote special SPTEs, which can be either MMIO SPTEs or - * Access Tracking SPTEs. - */ +/* The mask used to denote Access Tracking SPTEs. Note, val=3 is available. */ #define SPTE_SPECIAL_MASK (3ULL << 52) #define SPTE_AD_ENABLED_MASK (0ULL << 52) #define SPTE_AD_DISABLED_MASK (1ULL << 52) #define SPTE_AD_WRPROT_ONLY_MASK (2ULL << 52) -#define SPTE_MMIO_MASK (3ULL << 52) #ifdef CONFIG_DYNAMIC_PHYSICAL_MASK #define PT64_BASE_ADDR_MASK (physical_mask & ~(u64)(PAGE_SIZE-1)) @@ -98,6 +94,7 @@ extern u64 __read_mostly shadow_user_mask; extern u64 __read_mostly shadow_accessed_mask; extern u64 __read_mostly shadow_dirty_mask; extern u64 __read_mostly shadow_mmio_value; +extern u64 __read_mostly shadow_mmio_mask; extern u64 __read_mostly shadow_mmio_access_mask; extern u64 __read_mostly shadow_present_mask; extern u64 __read_mostly shadow_me_mask; @@ -167,7 +164,8 @@ extern u8 __read_mostly shadow_phys_bits; static inline bool is_mmio_spte(u64 spte) { - return (spte & SPTE_SPECIAL_MASK) == SPTE_MMIO_MASK; + return (spte & shadow_mmio_mask) == shadow_mmio_value && + likely(shadow_mmio_value); } static inline bool sp_ad_disabled(struct kvm_mmu_page *sp) -- cgit From 8a406c89532c91ee50688d4e728474dd09a11be3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 25 Feb 2021 12:47:37 -0800 Subject: KVM: x86/mmu: Rename and document A/D scheme for TDP SPTEs Rename the various A/D status defines to explicitly associated them with TDP. There is a subtle dependency on the bits in question never being set when using PAE paging, as those bits are reserved, not available. I.e. using these bits outside of TDP (technically EPT) would cause explosions. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20210225204749.1512652-13-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/spte.h | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) (limited to 'arch/x86/kvm/mmu/spte.h') diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 642a17b9964c..fd0a7911f098 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -8,11 +8,24 @@ #define PT_FIRST_AVAIL_BITS_SHIFT 10 #define PT64_SECOND_AVAIL_BITS_SHIFT 54 -/* The mask used to denote Access Tracking SPTEs. Note, val=3 is available. */ -#define SPTE_SPECIAL_MASK (3ULL << 52) -#define SPTE_AD_ENABLED_MASK (0ULL << 52) -#define SPTE_AD_DISABLED_MASK (1ULL << 52) -#define SPTE_AD_WRPROT_ONLY_MASK (2ULL << 52) +/* + * TDP SPTES (more specifically, EPT SPTEs) may not have A/D bits, and may also + * be restricted to using write-protection (for L2 when CPU dirty logging, i.e. + * PML, is enabled). Use bits 52 and 53 to hold the type of A/D tracking that + * is must be employed for a given TDP SPTE. + * + * Note, the "enabled" mask must be '0', as bits 62:52 are _reserved_ for PAE + * paging, including NPT PAE. This scheme works because legacy shadow paging + * is guaranteed to have A/D bits and write-protection is forced only for + * TDP with CPU dirty logging (PML). If NPT ever gains PML-like support, it + * must be restricted to 64-bit KVM. + */ +#define SPTE_TDP_AD_SHIFT 52 +#define SPTE_TDP_AD_MASK (3ULL << SPTE_TDP_AD_SHIFT) +#define SPTE_TDP_AD_ENABLED_MASK (0ULL << SPTE_TDP_AD_SHIFT) +#define SPTE_TDP_AD_DISABLED_MASK (1ULL << SPTE_TDP_AD_SHIFT) +#define SPTE_TDP_AD_WRPROT_ONLY_MASK (2ULL << SPTE_TDP_AD_SHIFT) +static_assert(SPTE_TDP_AD_ENABLED_MASK == 0); #ifdef CONFIG_DYNAMIC_PHYSICAL_MASK #define PT64_BASE_ADDR_MASK (physical_mask & ~(u64)(PAGE_SIZE-1)) @@ -100,7 +113,7 @@ extern u64 __read_mostly shadow_present_mask; extern u64 __read_mostly shadow_me_mask; /* - * SPTEs used by MMUs without A/D bits are marked with SPTE_AD_DISABLED_MASK; + * SPTEs in MMUs without A/D bits are marked with SPTE_TDP_AD_DISABLED_MASK; * shadow_acc_track_mask is the set of bits to be cleared in non-accessed * pages. */ @@ -176,13 +189,18 @@ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp) static inline bool spte_ad_enabled(u64 spte) { MMU_WARN_ON(is_mmio_spte(spte)); - return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_DISABLED_MASK; + return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_DISABLED_MASK; } static inline bool spte_ad_need_write_protect(u64 spte) { MMU_WARN_ON(is_mmio_spte(spte)); - return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_ENABLED_MASK; + /* + * This is benign for non-TDP SPTEs as SPTE_TDP_AD_ENABLED_MASK is '0', + * and non-TDP SPTEs will never set these bits. Optimize for 64-bit + * TDP and do the A/D type check unconditionally. + */ + return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_ENABLED_MASK; } static inline u64 spte_shadow_accessed_mask(u64 spte) -- cgit From b0de568018a6cd216ae060c33832e898f870abed Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 25 Feb 2021 12:47:38 -0800 Subject: KVM: x86/mmu: Use MMIO SPTE bits 53 and 52 for the MMIO generation Use bits 53 and 52 for the MMIO generation now that they're not used to identify MMIO SPTEs. Signed-off-by: Sean Christopherson Message-Id: <20210225204749.1512652-14-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/spte.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/mmu/spte.h') diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index fd0a7911f098..bf4f49890606 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -65,11 +65,11 @@ static_assert(SPTE_TDP_AD_ENABLED_MASK == 0); #define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1)) /* - * Due to limited space in PTEs, the MMIO generation is a 18 bit subset of + * Due to limited space in PTEs, the MMIO generation is a 20 bit subset of * the memslots generation and is derived as follows: * * Bits 0-8 of the MMIO generation are propagated to spte bits 3-11 - * Bits 9-17 of the MMIO generation are propagated to spte bits 54-62 + * Bits 9-19 of the MMIO generation are propagated to spte bits 52-62 * * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in * the MMIO generation number, as doing so would require stealing a bit from @@ -82,7 +82,7 @@ static_assert(SPTE_TDP_AD_ENABLED_MASK == 0); #define MMIO_SPTE_GEN_LOW_START 3 #define MMIO_SPTE_GEN_LOW_END 11 -#define MMIO_SPTE_GEN_HIGH_START PT64_SECOND_AVAIL_BITS_SHIFT +#define MMIO_SPTE_GEN_HIGH_START 52 #define MMIO_SPTE_GEN_HIGH_END 62 #define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \ @@ -94,7 +94,7 @@ static_assert(SPTE_TDP_AD_ENABLED_MASK == 0); #define MMIO_SPTE_GEN_HIGH_BITS (MMIO_SPTE_GEN_HIGH_END - MMIO_SPTE_GEN_HIGH_START + 1) /* remember to adjust the comment above as well if you change these */ -static_assert(MMIO_SPTE_GEN_LOW_BITS == 9 && MMIO_SPTE_GEN_HIGH_BITS == 9); +static_assert(MMIO_SPTE_GEN_LOW_BITS == 9 && MMIO_SPTE_GEN_HIGH_BITS == 11); #define MMIO_SPTE_GEN_LOW_SHIFT (MMIO_SPTE_GEN_LOW_START - 0) #define MMIO_SPTE_GEN_HIGH_SHIFT (MMIO_SPTE_GEN_HIGH_START - MMIO_SPTE_GEN_LOW_BITS) -- cgit From c4827eabe1a89ca82335a9e90bf8ed19a63ba063 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 25 Feb 2021 12:47:39 -0800 Subject: KVM: x86/mmu: Document dependency bewteen TDP A/D type and saved bits Document that SHADOW_ACC_TRACK_SAVED_BITS_SHIFT is directly dependent on bits 53:52 being used to track the A/D type. Remove PT64_SECOND_AVAIL_BITS_SHIFT as it is at best misleading, and at worst wrong. For PAE paging, which arguably is a variant of PT64, the bits are reserved. For MMIO SPTEs the bits are not available as they're used for the MMIO generation. For access tracked SPTEs, they are also not available as bits 56:54 are used to store the original RX bits. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20210225204749.1512652-15-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/spte.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/kvm/mmu/spte.h') diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index bf4f49890606..e918b8f0b21d 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -6,7 +6,6 @@ #include "mmu_internal.h" #define PT_FIRST_AVAIL_BITS_SHIFT 10 -#define PT64_SECOND_AVAIL_BITS_SHIFT 54 /* * TDP SPTES (more specifically, EPT SPTEs) may not have A/D bits, and may also @@ -134,11 +133,14 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask; * The mask/shift to use for saving the original R/X bits when marking the PTE * as not-present for access tracking purposes. We do not save the W bit as the * PTEs being access tracked also need to be dirty tracked, so the W bit will be - * restored only when a write is attempted to the page. + * restored only when a write is attempted to the page. This mask obviously + * must not overlap the A/D type mask. */ #define SHADOW_ACC_TRACK_SAVED_BITS_MASK (PT64_EPT_READABLE_MASK | \ PT64_EPT_EXECUTABLE_MASK) -#define SHADOW_ACC_TRACK_SAVED_BITS_SHIFT PT64_SECOND_AVAIL_BITS_SHIFT +#define SHADOW_ACC_TRACK_SAVED_BITS_SHIFT 54 +static_assert(!(SPTE_TDP_AD_MASK & (SHADOW_ACC_TRACK_SAVED_BITS_MASK << + SHADOW_ACC_TRACK_SAVED_BITS_SHIFT))); /* * If a thread running without exclusive control of the MMU lock must perform a -- cgit From 5fc3424f8b854584f8f6fb6ea03f1419487fdc96 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 25 Feb 2021 12:47:43 -0800 Subject: KVM: x86/mmu: Make Host-writable and MMU-writable bit locations dynamic Make the location of the HOST_WRITABLE and MMU_WRITABLE configurable for a given KVM instance. This will allow EPT to use high available bits, which in turn will free up bit 11 for a constant MMU_PRESENT bit. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20210225204749.1512652-19-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/spte.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'arch/x86/kvm/mmu/spte.h') diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index e918b8f0b21d..287540d211a9 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -5,8 +5,6 @@ #include "mmu_internal.h" -#define PT_FIRST_AVAIL_BITS_SHIFT 10 - /* * TDP SPTES (more specifically, EPT SPTEs) may not have A/D bits, and may also * be restricted to using write-protection (for L2 when CPU dirty logging, i.e. @@ -59,9 +57,8 @@ static_assert(SPTE_TDP_AD_ENABLED_MASK == 0); (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1)) #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) - -#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) -#define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1)) +#define DEFAULT_SPTE_HOST_WRITEABLE BIT_ULL(10) +#define DEFAULT_SPTE_MMU_WRITEABLE BIT_ULL(11) /* * Due to limited space in PTEs, the MMIO generation is a 20 bit subset of @@ -100,6 +97,8 @@ static_assert(MMIO_SPTE_GEN_LOW_BITS == 9 && MMIO_SPTE_GEN_HIGH_BITS == 11); #define MMIO_SPTE_GEN_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_BITS + MMIO_SPTE_GEN_HIGH_BITS - 1, 0) +extern u64 __read_mostly shadow_host_writable_mask; +extern u64 __read_mostly shadow_mmu_writable_mask; extern u64 __read_mostly shadow_nx_mask; extern u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ extern u64 __read_mostly shadow_user_mask; @@ -264,8 +263,8 @@ static inline bool is_dirty_spte(u64 spte) static inline bool spte_can_locklessly_be_made_writable(u64 spte) { - return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) == - (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE); + return (spte & shadow_host_writable_mask) && + (spte & shadow_mmu_writable_mask); } static inline u64 get_mmio_spte_generation(u64 spte) -- cgit From 613a3f3797528be489d280c35c4f6ebfcbe77e9e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 25 Feb 2021 12:47:44 -0800 Subject: KVM: x86/mmu: Use high bits for host/mmu writable masks for EPT SPTEs Use bits 57 and 58 for HOST_WRITABLE and MMU_WRITABLE when using EPT. This will allow using bit 11 as a constant MMU_PRESENT, which is desirable as checking for a shadow-present SPTE is one of the most common SPTE operations in KVM, particular in hot paths such as page faults. EPT is short on low available bits; currently only bit 11 is the only always-available bit. Bit 10 is also available, but only while KVM doesn't support mode-based execution. On the other hand, PAE paging doesn't have _any_ high available bits. Thus, using bit 11 is the only feasible option for MMU_PRESENT. Signed-off-by: Sean Christopherson Message-Id: <20210225204749.1512652-20-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/spte.h | 48 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 15 deletions(-) (limited to 'arch/x86/kvm/mmu/spte.h') diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 287540d211a9..8996baa8da15 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -57,8 +57,39 @@ static_assert(SPTE_TDP_AD_ENABLED_MASK == 0); (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1)) #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) -#define DEFAULT_SPTE_HOST_WRITEABLE BIT_ULL(10) -#define DEFAULT_SPTE_MMU_WRITEABLE BIT_ULL(11) +/* Bits 9 and 10 are ignored by all non-EPT PTEs. */ +#define DEFAULT_SPTE_HOST_WRITEABLE BIT_ULL(9) +#define DEFAULT_SPTE_MMU_WRITEABLE BIT_ULL(10) + +/* + * The mask/shift to use for saving the original R/X bits when marking the PTE + * as not-present for access tracking purposes. We do not save the W bit as the + * PTEs being access tracked also need to be dirty tracked, so the W bit will be + * restored only when a write is attempted to the page. This mask obviously + * must not overlap the A/D type mask. + */ +#define SHADOW_ACC_TRACK_SAVED_BITS_MASK (PT64_EPT_READABLE_MASK | \ + PT64_EPT_EXECUTABLE_MASK) +#define SHADOW_ACC_TRACK_SAVED_BITS_SHIFT 54 +#define SHADOW_ACC_TRACK_SAVED_MASK (SHADOW_ACC_TRACK_SAVED_BITS_MASK << \ + SHADOW_ACC_TRACK_SAVED_BITS_SHIFT) +static_assert(!(SPTE_TDP_AD_MASK & SHADOW_ACC_TRACK_SAVED_MASK)); + +/* + * Low ignored bits are at a premium for EPT, use high ignored bits, taking care + * to not overlap the A/D type mask or the saved access bits of access-tracked + * SPTEs when A/D bits are disabled. + */ +#define EPT_SPTE_HOST_WRITABLE BIT_ULL(57) +#define EPT_SPTE_MMU_WRITABLE BIT_ULL(58) + +static_assert(!(EPT_SPTE_HOST_WRITABLE & SPTE_TDP_AD_MASK)); +static_assert(!(EPT_SPTE_MMU_WRITABLE & SPTE_TDP_AD_MASK)); +static_assert(!(EPT_SPTE_HOST_WRITABLE & SHADOW_ACC_TRACK_SAVED_MASK)); +static_assert(!(EPT_SPTE_MMU_WRITABLE & SHADOW_ACC_TRACK_SAVED_MASK)); + +/* Defined only to keep the above static asserts readable. */ +#undef SHADOW_ACC_TRACK_SAVED_MASK /* * Due to limited space in PTEs, the MMIO generation is a 20 bit subset of @@ -128,19 +159,6 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask; */ #define SHADOW_NONPRESENT_OR_RSVD_MASK_LEN 5 -/* - * The mask/shift to use for saving the original R/X bits when marking the PTE - * as not-present for access tracking purposes. We do not save the W bit as the - * PTEs being access tracked also need to be dirty tracked, so the W bit will be - * restored only when a write is attempted to the page. This mask obviously - * must not overlap the A/D type mask. - */ -#define SHADOW_ACC_TRACK_SAVED_BITS_MASK (PT64_EPT_READABLE_MASK | \ - PT64_EPT_EXECUTABLE_MASK) -#define SHADOW_ACC_TRACK_SAVED_BITS_SHIFT 54 -static_assert(!(SPTE_TDP_AD_MASK & (SHADOW_ACC_TRACK_SAVED_BITS_MASK << - SHADOW_ACC_TRACK_SAVED_BITS_SHIFT))); - /* * If a thread running without exclusive control of the MMU lock must perform a * multi-part operation on an SPTE, it can set the SPTE to REMOVED_SPTE as a -- cgit From edea7c4fc215c7ee1cc98363b016ad505cbac9f7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 25 Feb 2021 12:47:45 -0800 Subject: KVM: x86/mmu: Use a dedicated bit to track shadow/MMU-present SPTEs Introduce MMU_PRESENT to explicitly track which SPTEs are "present" from the MMU's perspective. Checking for shadow-present SPTEs is a very common operation for the MMU, particularly in hot paths such as page faults. With the addition of "removed" SPTEs for the TDP MMU, identifying shadow-present SPTEs is quite costly especially since it requires checking multiple 64-bit values. On 64-bit KVM, this reduces the footprint of kvm.ko's .text by ~2k bytes. On 32-bit KVM, this increases the footprint by ~200 bytes, but only because gcc now inlines several more MMU helpers, e.g. drop_parent_pte(). We now need to drop bit 11, used for the MMU_PRESENT flag, from the set of bits used to store the generation number in MMIO SPTEs. Otherwise MMIO SPTEs with bit 11 set would get false positives for is_shadow_present_spte() and lead to a variety of fireworks, from oopses to likely hangs of the host kernel. Signed-off-by: Sean Christopherson Message-Id: <20210225204749.1512652-21-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/spte.h | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'arch/x86/kvm/mmu/spte.h') diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 8996baa8da15..60c6cd0a8f53 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -5,6 +5,15 @@ #include "mmu_internal.h" +/* + * A MMU present SPTE is backed by actual memory and may or may not be present + * in hardware. E.g. MMIO SPTEs are not considered present. Use bit 11, as it + * is ignored by all flavors of SPTEs and checking a low bit often generates + * better code than for a high bit, e.g. 56+. MMU present checks are pervasive + * enough that the improved code generation is noticeable in KVM's footprint. + */ +#define SPTE_MMU_PRESENT_MASK BIT_ULL(11) + /* * TDP SPTES (more specifically, EPT SPTEs) may not have A/D bits, and may also * be restricted to using write-protection (for L2 when CPU dirty logging, i.e. @@ -92,11 +101,11 @@ static_assert(!(EPT_SPTE_MMU_WRITABLE & SHADOW_ACC_TRACK_SAVED_MASK)); #undef SHADOW_ACC_TRACK_SAVED_MASK /* - * Due to limited space in PTEs, the MMIO generation is a 20 bit subset of + * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of * the memslots generation and is derived as follows: * - * Bits 0-8 of the MMIO generation are propagated to spte bits 3-11 - * Bits 9-19 of the MMIO generation are propagated to spte bits 52-62 + * Bits 0-7 of the MMIO generation are propagated to spte bits 3-10 + * Bits 8-18 of the MMIO generation are propagated to spte bits 52-62 * * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in * the MMIO generation number, as doing so would require stealing a bit from @@ -107,7 +116,7 @@ static_assert(!(EPT_SPTE_MMU_WRITABLE & SHADOW_ACC_TRACK_SAVED_MASK)); */ #define MMIO_SPTE_GEN_LOW_START 3 -#define MMIO_SPTE_GEN_LOW_END 11 +#define MMIO_SPTE_GEN_LOW_END 10 #define MMIO_SPTE_GEN_HIGH_START 52 #define MMIO_SPTE_GEN_HIGH_END 62 @@ -116,12 +125,14 @@ static_assert(!(EPT_SPTE_MMU_WRITABLE & SHADOW_ACC_TRACK_SAVED_MASK)); MMIO_SPTE_GEN_LOW_START) #define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \ MMIO_SPTE_GEN_HIGH_START) +static_assert(!(SPTE_MMU_PRESENT_MASK & + (MMIO_SPTE_GEN_LOW_MASK | MMIO_SPTE_GEN_HIGH_MASK))); #define MMIO_SPTE_GEN_LOW_BITS (MMIO_SPTE_GEN_LOW_END - MMIO_SPTE_GEN_LOW_START + 1) #define MMIO_SPTE_GEN_HIGH_BITS (MMIO_SPTE_GEN_HIGH_END - MMIO_SPTE_GEN_HIGH_START + 1) /* remember to adjust the comment above as well if you change these */ -static_assert(MMIO_SPTE_GEN_LOW_BITS == 9 && MMIO_SPTE_GEN_HIGH_BITS == 11); +static_assert(MMIO_SPTE_GEN_LOW_BITS == 8 && MMIO_SPTE_GEN_HIGH_BITS == 11); #define MMIO_SPTE_GEN_LOW_SHIFT (MMIO_SPTE_GEN_LOW_START - 0) #define MMIO_SPTE_GEN_HIGH_SHIFT (MMIO_SPTE_GEN_HIGH_START - MMIO_SPTE_GEN_LOW_BITS) @@ -241,7 +252,7 @@ static inline bool is_access_track_spte(u64 spte) static inline bool is_shadow_present_pte(u64 pte) { - return (pte != 0) && !is_mmio_spte(pte) && !is_removed_spte(pte); + return !!(pte & SPTE_MMU_PRESENT_MASK); } static inline bool is_large_pte(u64 pte) -- cgit From 8f366ae6d8c5eaaae086016934802954abb8959e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 25 Feb 2021 12:47:46 -0800 Subject: KVM: x86/mmu: Tweak auditing WARN for A/D bits to !PRESENT (was MMIO) Tweak the MMU_WARN that guards against weirdness when querying A/D status to fire on a !MMU_PRESENT SPTE, as opposed to a MMIO SPTE. Attempting to query A/D status on any kind of !MMU_PRESENT SPTE, MMIO or otherwise, indicates a KVM bug. Case in point, several now-fixed bugs were identified by enabling this new WARN. Signed-off-by: Sean Christopherson Message-Id: <20210225204749.1512652-22-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/spte.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86/kvm/mmu/spte.h') diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 60c6cd0a8f53..d89719397e26 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -211,6 +211,11 @@ static inline bool is_mmio_spte(u64 spte) likely(shadow_mmio_value); } +static inline bool is_shadow_present_pte(u64 pte) +{ + return !!(pte & SPTE_MMU_PRESENT_MASK); +} + static inline bool sp_ad_disabled(struct kvm_mmu_page *sp) { return sp->role.ad_disabled; @@ -218,13 +223,13 @@ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp) static inline bool spte_ad_enabled(u64 spte) { - MMU_WARN_ON(is_mmio_spte(spte)); + MMU_WARN_ON(!is_shadow_present_pte(spte)); return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_DISABLED_MASK; } static inline bool spte_ad_need_write_protect(u64 spte) { - MMU_WARN_ON(is_mmio_spte(spte)); + MMU_WARN_ON(!is_shadow_present_pte(spte)); /* * This is benign for non-TDP SPTEs as SPTE_TDP_AD_ENABLED_MASK is '0', * and non-TDP SPTEs will never set these bits. Optimize for 64-bit @@ -235,13 +240,13 @@ static inline bool spte_ad_need_write_protect(u64 spte) static inline u64 spte_shadow_accessed_mask(u64 spte) { - MMU_WARN_ON(is_mmio_spte(spte)); + MMU_WARN_ON(!is_shadow_present_pte(spte)); return spte_ad_enabled(spte) ? shadow_accessed_mask : 0; } static inline u64 spte_shadow_dirty_mask(u64 spte) { - MMU_WARN_ON(is_mmio_spte(spte)); + MMU_WARN_ON(!is_shadow_present_pte(spte)); return spte_ad_enabled(spte) ? shadow_dirty_mask : 0; } @@ -250,11 +255,6 @@ static inline bool is_access_track_spte(u64 spte) return !spte_ad_enabled(spte) && (spte & shadow_acc_track_mask) == 0; } -static inline bool is_shadow_present_pte(u64 pte) -{ - return !!(pte & SPTE_MMU_PRESENT_MASK); -} - static inline bool is_large_pte(u64 pte) { return pte & PT_PAGE_SIZE_MASK; -- cgit From 715f1079eee12f629b2de5c8a9489124a5af0a18 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 25 Feb 2021 12:47:48 -0800 Subject: KVM: x86/mmu: Use low available bits for removed SPTEs Use low "available" bits to tag REMOVED SPTEs. Using a high bit is moderately costly as it often causes the compiler to generate a 64-bit immediate. More importantly, this makes it very clear REMOVED_SPTE is a value, not a flag. Cc: Ben Gardon Signed-off-by: Sean Christopherson Message-Id: <20210225204749.1512652-24-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/spte.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/mmu/spte.h') diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index d89719397e26..bca0ba11cccf 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -176,13 +176,16 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask; * non-present intermediate value. Other threads which encounter this value * should not modify the SPTE. * - * This constant works because it is considered non-present on both AMD and - * Intel CPUs and does not create a L1TF vulnerability because the pfn section - * is zeroed out. + * Use a semi-arbitrary value that doesn't set RWX bits, i.e. is not-present on + * bot AMD and Intel CPUs, and doesn't set PFN bits, i.e. doesn't create a L1TF + * vulnerability. Use only low bits to avoid 64-bit immediates. * * Only used by the TDP MMU. */ -#define REMOVED_SPTE (1ull << 59) +#define REMOVED_SPTE 0x5a0ULL + +/* Removed SPTEs must not be misconstrued as shadow present PTEs. */ +static_assert(!(REMOVED_SPTE & SPTE_MMU_PRESENT_MASK)); static inline bool is_removed_spte(u64 spte) { -- cgit