summaryrefslogtreecommitdiff
path: root/arch/x86/include/asm/tlbflush.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include/asm/tlbflush.h')
-rw-r--r--arch/x86/include/asm/tlbflush.h285
1 files changed, 258 insertions, 27 deletions
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 8c87a2e0b660..00daedfefc1b 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -2,9 +2,11 @@
#ifndef _ASM_X86_TLBFLUSH_H
#define _ASM_X86_TLBFLUSH_H
-#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/mmu_notifier.h>
#include <linux/sched.h>
+#include <asm/barrier.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/special_insns.h>
@@ -12,10 +14,14 @@
#include <asm/invpcid.h>
#include <asm/pti.h>
#include <asm/processor-flags.h>
+#include <asm/pgtable.h>
+
+DECLARE_PER_CPU(u64, tlbstate_untag_mask);
void __flush_tlb_all(void);
#define TLB_FLUSH_ALL -1UL
+#define TLB_GENERATION_INVALID 0
void cr4_update_irqsoff(unsigned long set, unsigned long clear);
unsigned long cr4_read_shadow(void);
@@ -83,30 +89,13 @@ struct tlb_state {
/* Last user mm for optimizing IBPB */
union {
struct mm_struct *last_user_mm;
- unsigned long last_user_mm_ibpb;
+ unsigned long last_user_mm_spec;
};
u16 loaded_mm_asid;
u16 next_asid;
/*
- * We can be in one of several states:
- *
- * - Actively using an mm. Our CPU's bit will be set in
- * mm_cpumask(loaded_mm) and is_lazy == false;
- *
- * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit
- * will not be set in mm_cpumask(&init_mm) and is_lazy == false.
- *
- * - Lazily using a real mm. loaded_mm != &init_mm, our bit
- * is set in mm_cpumask(loaded_mm), but is_lazy == true.
- * We're heuristically guessing that the CR3 load we
- * skipped more than makes up for the overhead added by
- * lazy mode.
- */
- bool is_lazy;
-
- /*
* If set we changed the page tables in such a way that we
* needed an invalidation of all contexts (aka. PCIDs / ASIDs).
* This tells us to go invalidate all the non-loaded ctxs[]
@@ -117,6 +106,16 @@ struct tlb_state {
*/
bool invalidate_other;
+#ifdef CONFIG_ADDRESS_MASKING
+ /*
+ * Active LAM mode.
+ *
+ * X86_CR3_LAM_U57/U48 shifted right by X86_CR3_LAM_U57_BIT or 0 if LAM
+ * disabled.
+ */
+ u8 lam;
+#endif
+
/*
* Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
* the corresponding user PCID needs a flush next time we
@@ -151,7 +150,27 @@ struct tlb_state {
*/
struct tlb_context ctxs[TLB_NR_DYN_ASIDS];
};
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
+DECLARE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate);
+
+struct tlb_state_shared {
+ /*
+ * We can be in one of several states:
+ *
+ * - Actively using an mm. Our CPU's bit will be set in
+ * mm_cpumask(loaded_mm) and is_lazy == false;
+ *
+ * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit
+ * will not be set in mm_cpumask(&init_mm) and is_lazy == false.
+ *
+ * - Lazily using a real mm. loaded_mm != &init_mm, our bit
+ * is set in mm_cpumask(loaded_mm), but is_lazy == true.
+ * We're heuristically guessing that the CR3 load we
+ * skipped more than makes up for the overhead added by
+ * lazy mode.
+ */
+ bool is_lazy;
+};
+DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);
bool nmi_uaccess_okay(void);
#define nmi_uaccess_okay nmi_uaccess_okay
@@ -165,6 +184,9 @@ static inline void cr4_init_shadow(void)
extern unsigned long mmu_cr4_features;
extern u32 *trampoline_cr4_features;
+/* How many pages can be invalidated with one INVLPGB. */
+extern u16 invlpgb_count_max;
+
extern void initialize_tlbstate_and_flush(void);
/*
@@ -175,7 +197,7 @@ extern void initialize_tlbstate_and_flush(void);
* - flush_tlb_page(vma, vmaddr) flushes one page
* - flush_tlb_range(vma, start, end) flushes a range of pages
* - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- * - flush_tlb_others(cpumask, info) flushes TLBs on other cpus
+ * - flush_tlb_multi(cpumask, info) flushes TLBs on multiple cpus
*
* ..but the i386 has somewhat limited tlb flushing capabilities,
* and page-granular flushes are available only on i486 and up.
@@ -201,16 +223,83 @@ struct flush_tlb_info {
unsigned long start;
unsigned long end;
u64 new_tlb_gen;
- unsigned int stride_shift;
- bool freed_tables;
+ unsigned int initiating_cpu;
+ u8 stride_shift;
+ u8 freed_tables;
+ u8 trim_cpumask;
};
void flush_tlb_local(void);
void flush_tlb_one_user(unsigned long addr);
void flush_tlb_one_kernel(unsigned long addr);
-void flush_tlb_others(const struct cpumask *cpumask,
+void flush_tlb_multi(const struct cpumask *cpumask,
const struct flush_tlb_info *info);
+static inline bool is_dyn_asid(u16 asid)
+{
+ return asid < TLB_NR_DYN_ASIDS;
+}
+
+static inline bool is_global_asid(u16 asid)
+{
+ return !is_dyn_asid(asid);
+}
+
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
+static inline u16 mm_global_asid(struct mm_struct *mm)
+{
+ u16 asid;
+
+ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ return 0;
+
+ asid = smp_load_acquire(&mm->context.global_asid);
+
+ /* mm->context.global_asid is either 0, or a global ASID */
+ VM_WARN_ON_ONCE(asid && is_dyn_asid(asid));
+
+ return asid;
+}
+
+static inline void mm_init_global_asid(struct mm_struct *mm)
+{
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
+ mm->context.global_asid = 0;
+ mm->context.asid_transition = false;
+ }
+}
+
+static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid)
+{
+ /*
+ * Notably flush_tlb_mm_range() -> broadcast_tlb_flush() ->
+ * finish_asid_transition() needs to observe asid_transition = true
+ * once it observes global_asid.
+ */
+ mm->context.asid_transition = true;
+ smp_store_release(&mm->context.global_asid, asid);
+}
+
+static inline void mm_clear_asid_transition(struct mm_struct *mm)
+{
+ WRITE_ONCE(mm->context.asid_transition, false);
+}
+
+static inline bool mm_in_asid_transition(struct mm_struct *mm)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ return false;
+
+ return mm && READ_ONCE(mm->context.asid_transition);
+}
+#else
+static inline u16 mm_global_asid(struct mm_struct *mm) { return 0; }
+static inline void mm_init_global_asid(struct mm_struct *mm) { }
+static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) { }
+static inline void mm_clear_asid_transition(struct mm_struct *mm) { }
+static inline bool mm_in_asid_transition(struct mm_struct *mm) { return false; }
+#endif /* CONFIG_BROADCAST_TLB_FLUSH */
+
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#endif
@@ -222,7 +311,7 @@ void flush_tlb_others(const struct cpumask *cpumask,
flush_tlb_mm_range((vma)->vm_mm, start, end, \
((vma)->vm_flags & VM_HUGETLB) \
? huge_page_shift(hstate_vma(vma)) \
- : PAGE_SHIFT, false)
+ : PAGE_SHIFT, true)
extern void flush_tlb_all(void);
extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
@@ -235,6 +324,18 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false);
}
+static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
+{
+ bool should_defer = false;
+
+ /* If remote CPUs need to be flushed then defer batch the flush */
+ if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
+ should_defer = true;
+ put_cpu();
+
+ return should_defer;
+}
+
static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
{
/*
@@ -246,15 +347,145 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
return atomic64_inc_return(&mm->context.tlb_gen);
}
-static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
- struct mm_struct *mm)
+static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+ struct mm_struct *mm, unsigned long start, unsigned long end)
{
inc_mm_tlb_gen(mm);
cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+ batch->unmapped_pages = true;
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
+static inline bool pte_flags_need_flush(unsigned long oldflags,
+ unsigned long newflags,
+ bool ignore_access)
+{
+ /*
+ * Flags that require a flush when cleared but not when they are set.
+ * Only include flags that would not trigger spurious page-faults.
+ * Non-present entries are not cached. Hardware would set the
+ * dirty/access bit if needed without a fault.
+ */
+ const pteval_t flush_on_clear = _PAGE_DIRTY | _PAGE_PRESENT |
+ _PAGE_ACCESSED;
+ const pteval_t software_flags = _PAGE_SOFTW1 | _PAGE_SOFTW2 |
+ _PAGE_SOFTW3 | _PAGE_SOFTW4 |
+ _PAGE_SAVED_DIRTY;
+ const pteval_t flush_on_change = _PAGE_RW | _PAGE_USER | _PAGE_PWT |
+ _PAGE_PCD | _PAGE_PSE | _PAGE_GLOBAL | _PAGE_PAT |
+ _PAGE_PAT_LARGE | _PAGE_PKEY_BIT0 | _PAGE_PKEY_BIT1 |
+ _PAGE_PKEY_BIT2 | _PAGE_PKEY_BIT3 | _PAGE_NX;
+ unsigned long diff = oldflags ^ newflags;
+
+ BUILD_BUG_ON(flush_on_clear & software_flags);
+ BUILD_BUG_ON(flush_on_clear & flush_on_change);
+ BUILD_BUG_ON(flush_on_change & software_flags);
+
+ /* Ignore software flags */
+ diff &= ~software_flags;
+
+ if (ignore_access)
+ diff &= ~_PAGE_ACCESSED;
+
+ /*
+ * Did any of the 'flush_on_clear' flags was clleared set from between
+ * 'oldflags' and 'newflags'?
+ */
+ if (diff & oldflags & flush_on_clear)
+ return true;
+
+ /* Flush on modified flags. */
+ if (diff & flush_on_change)
+ return true;
+
+ /* Ensure there are no flags that were left behind */
+ if (IS_ENABLED(CONFIG_DEBUG_VM) &&
+ (diff & ~(flush_on_clear | software_flags | flush_on_change))) {
+ VM_WARN_ON_ONCE(1);
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * pte_needs_flush() checks whether permissions were demoted and require a
+ * flush. It should only be used for userspace PTEs.
+ */
+static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte)
+{
+ /* !PRESENT -> * ; no need for flush */
+ if (!(pte_flags(oldpte) & _PAGE_PRESENT))
+ return false;
+
+ /* PFN changed ; needs flush */
+ if (pte_pfn(oldpte) != pte_pfn(newpte))
+ return true;
+
+ /*
+ * check PTE flags; ignore access-bit; see comment in
+ * ptep_clear_flush_young().
+ */
+ return pte_flags_need_flush(pte_flags(oldpte), pte_flags(newpte),
+ true);
+}
+#define pte_needs_flush pte_needs_flush
+
+/*
+ * huge_pmd_needs_flush() checks whether permissions were demoted and require a
+ * flush. It should only be used for userspace huge PMDs.
+ */
+static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
+{
+ /* !PRESENT -> * ; no need for flush */
+ if (!(pmd_flags(oldpmd) & _PAGE_PRESENT))
+ return false;
+
+ /* PFN changed ; needs flush */
+ if (pmd_pfn(oldpmd) != pmd_pfn(newpmd))
+ return true;
+
+ /*
+ * check PMD flags; do not ignore access-bit; see
+ * pmdp_clear_flush_young().
+ */
+ return pte_flags_need_flush(pmd_flags(oldpmd), pmd_flags(newpmd),
+ false);
+}
+#define huge_pmd_needs_flush huge_pmd_needs_flush
+
+#ifdef CONFIG_ADDRESS_MASKING
+static inline u64 tlbstate_lam_cr3_mask(void)
+{
+ u64 lam = this_cpu_read(cpu_tlbstate.lam);
+
+ return lam << X86_CR3_LAM_U57_BIT;
+}
+
+static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask)
+{
+ this_cpu_write(cpu_tlbstate.lam, lam >> X86_CR3_LAM_U57_BIT);
+ this_cpu_write(tlbstate_untag_mask, untag_mask);
+}
+
+#else
+
+static inline u64 tlbstate_lam_cr3_mask(void)
+{
+ return 0;
+}
+
+static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask)
+{
+}
+#endif
#endif /* !MODULE */
+static inline void __native_tlb_flush_global(unsigned long cr4)
+{
+ native_write_cr4(cr4 ^ X86_CR4_PGE);
+ native_write_cr4(cr4);
+}
#endif /* _ASM_X86_TLBFLUSH_H */