summaryrefslogtreecommitdiff
path: root/arch/arm64/include/asm/pgtable.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/include/asm/pgtable.h')
-rw-r--r--arch/arm64/include/asm/pgtable.h292
1 files changed, 190 insertions, 102 deletions
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 84f05f781a70..abd2dee416b3 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -40,6 +40,85 @@
#include <linux/sched.h>
#include <linux/page_table_check.h>
+static inline void emit_pte_barriers(void)
+{
+ /*
+ * These barriers are emitted under certain conditions after a pte entry
+ * was modified (see e.g. __set_pte_complete()). The dsb makes the store
+ * visible to the table walker. The isb ensures that any previous
+ * speculative "invalid translation" marker that is in the CPU's
+ * pipeline gets cleared, so that any access to that address after
+ * setting the pte to valid won't cause a spurious fault. If the thread
+ * gets preempted after storing to the pgtable but before emitting these
+ * barriers, __switch_to() emits a dsb which ensure the walker gets to
+ * see the store. There is no guarantee of an isb being issued though.
+ * This is safe because it will still get issued (albeit on a
+ * potentially different CPU) when the thread starts running again,
+ * before any access to the address.
+ */
+ dsb(ishst);
+ isb();
+}
+
+static inline void queue_pte_barriers(void)
+{
+ unsigned long flags;
+
+ if (in_interrupt()) {
+ emit_pte_barriers();
+ return;
+ }
+
+ flags = read_thread_flags();
+
+ if (flags & BIT(TIF_LAZY_MMU)) {
+ /* Avoid the atomic op if already set. */
+ if (!(flags & BIT(TIF_LAZY_MMU_PENDING)))
+ set_thread_flag(TIF_LAZY_MMU_PENDING);
+ } else {
+ emit_pte_barriers();
+ }
+}
+
+#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
+static inline void arch_enter_lazy_mmu_mode(void)
+{
+ /*
+ * lazy_mmu_mode is not supposed to permit nesting. But in practice this
+ * does happen with CONFIG_DEBUG_PAGEALLOC, where a page allocation
+ * inside a lazy_mmu_mode section (such as zap_pte_range()) will change
+ * permissions on the linear map with apply_to_page_range(), which
+ * re-enters lazy_mmu_mode. So we tolerate nesting in our
+ * implementation. The first call to arch_leave_lazy_mmu_mode() will
+ * flush and clear the flag such that the remainder of the work in the
+ * outer nest behaves as if outside of lazy mmu mode. This is safe and
+ * keeps tracking simple.
+ */
+
+ if (in_interrupt())
+ return;
+
+ set_thread_flag(TIF_LAZY_MMU);
+}
+
+static inline void arch_flush_lazy_mmu_mode(void)
+{
+ if (in_interrupt())
+ return;
+
+ if (test_and_clear_thread_flag(TIF_LAZY_MMU_PENDING))
+ emit_pte_barriers();
+}
+
+static inline void arch_leave_lazy_mmu_mode(void)
+{
+ if (in_interrupt())
+ return;
+
+ arch_flush_lazy_mmu_mode();
+ clear_thread_flag(TIF_LAZY_MMU);
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
@@ -111,7 +190,6 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
#define pte_user(pte) (!!(pte_val(pte) & PTE_USER))
#define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN))
#define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
-#define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP))
#define pte_tagged(pte) ((pte_val(pte) & PTE_ATTRINDX_MASK) == \
PTE_ATTRINDX(MT_NORMAL_TAGGED))
@@ -293,11 +371,6 @@ static inline pmd_t pmd_mkcont(pmd_t pmd)
return __pmd(pmd_val(pmd) | PMD_SECT_CONT);
}
-static inline pte_t pte_mkdevmap(pte_t pte)
-{
- return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL));
-}
-
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
static inline int pte_uffd_wp(pte_t pte)
{
@@ -320,18 +393,20 @@ static inline void __set_pte_nosync(pte_t *ptep, pte_t pte)
WRITE_ONCE(*ptep, pte);
}
-static inline void __set_pte(pte_t *ptep, pte_t pte)
+static inline void __set_pte_complete(pte_t pte)
{
- __set_pte_nosync(ptep, pte);
-
/*
* Only if the new pte is valid and kernel, otherwise TLB maintenance
- * or update_mmu_cache() have the necessary barriers.
+ * has the necessary barriers.
*/
- if (pte_valid_not_user(pte)) {
- dsb(ishst);
- isb();
- }
+ if (pte_valid_not_user(pte))
+ queue_pte_barriers();
+}
+
+static inline void __set_pte(pte_t *ptep, pte_t pte)
+{
+ __set_pte_nosync(ptep, pte);
+ __set_pte_complete(pte);
}
static inline pte_t __ptep_get(pte_t *ptep)
@@ -423,23 +498,6 @@ static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
return pfn_pte(pte_pfn(pte) + nr, pte_pgprot(pte));
}
-static inline void __set_ptes(struct mm_struct *mm,
- unsigned long __always_unused addr,
- pte_t *ptep, pte_t pte, unsigned int nr)
-{
- page_table_check_ptes_set(mm, ptep, pte, nr);
- __sync_cache_and_tags(pte, nr);
-
- for (;;) {
- __check_safe_pte_update(mm, ptep, pte);
- __set_pte(ptep, pte);
- if (--nr == 0)
- break;
- ptep++;
- pte = pte_advance_pfn(pte, 1);
- }
-}
-
/*
* Hugetlb definitions.
*/
@@ -499,7 +557,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte)
return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
}
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & PTE_SWP_EXCLUSIVE;
}
@@ -589,14 +647,6 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
return __pmd((pmd_val(pmd) & ~mask) | val);
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define pmd_devmap(pmd) pte_devmap(pmd_pte(pmd))
-#endif
-static inline pmd_t pmd_mkdevmap(pmd_t pmd)
-{
- return pte_pmd(set_pte_bit(pmd_pte(pmd), __pgprot(PTE_DEVMAP)));
-}
-
#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
#define pmd_special(pte) (!!((pmd_val(pte) & PTE_SPECIAL)))
static inline pmd_t pmd_mkspecial(pmd_t pmd)
@@ -609,7 +659,6 @@ static inline pmd_t pmd_mkspecial(pmd_t pmd)
#define __phys_to_pmd_val(phys) __phys_to_pte_val(phys)
#define pmd_pfn(pmd) ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT)
#define pfn_pmd(pfn,prot) __pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
-#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
#define pud_young(pud) pte_young(pud_pte(pud))
#define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud)))
@@ -633,11 +682,6 @@ static inline pud_t pud_mkhuge(pud_t pud)
#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
#define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
-#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
-#define pud_special(pte) pte_special(pud_pte(pud))
-#define pud_mkspecial(pte) pte_pud(pte_mkspecial(pud_pte(pud)))
-#endif
-
#define pmd_pgprot pmd_pgprot
static inline pgprot_t pmd_pgprot(pmd_t pmd)
{
@@ -654,30 +698,64 @@ static inline pgprot_t pud_pgprot(pud_t pud)
return __pgprot(pud_val(pfn_pud(pfn, __pgprot(0))) ^ pud_val(pud));
}
-static inline void __set_pte_at(struct mm_struct *mm,
- unsigned long __always_unused addr,
- pte_t *ptep, pte_t pte, unsigned int nr)
+static inline void __set_ptes_anysz(struct mm_struct *mm, pte_t *ptep,
+ pte_t pte, unsigned int nr,
+ unsigned long pgsize)
{
- __sync_cache_and_tags(pte, nr);
- __check_safe_pte_update(mm, ptep, pte);
- __set_pte(ptep, pte);
+ unsigned long stride = pgsize >> PAGE_SHIFT;
+
+ switch (pgsize) {
+ case PAGE_SIZE:
+ page_table_check_ptes_set(mm, ptep, pte, nr);
+ break;
+ case PMD_SIZE:
+ page_table_check_pmds_set(mm, (pmd_t *)ptep, pte_pmd(pte), nr);
+ break;
+#ifndef __PAGETABLE_PMD_FOLDED
+ case PUD_SIZE:
+ page_table_check_puds_set(mm, (pud_t *)ptep, pte_pud(pte), nr);
+ break;
+#endif
+ default:
+ VM_WARN_ON(1);
+ }
+
+ __sync_cache_and_tags(pte, nr * stride);
+
+ for (;;) {
+ __check_safe_pte_update(mm, ptep, pte);
+ __set_pte_nosync(ptep, pte);
+ if (--nr == 0)
+ break;
+ ptep++;
+ pte = pte_advance_pfn(pte, stride);
+ }
+
+ __set_pte_complete(pte);
}
-static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, pmd_t pmd)
+static inline void __set_ptes(struct mm_struct *mm,
+ unsigned long __always_unused addr,
+ pte_t *ptep, pte_t pte, unsigned int nr)
{
- page_table_check_pmd_set(mm, pmdp, pmd);
- return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd),
- PMD_SIZE >> PAGE_SHIFT);
+ __set_ptes_anysz(mm, ptep, pte, nr, PAGE_SIZE);
}
-static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
- pud_t *pudp, pud_t pud)
+static inline void __set_pmds(struct mm_struct *mm,
+ unsigned long __always_unused addr,
+ pmd_t *pmdp, pmd_t pmd, unsigned int nr)
{
- page_table_check_pud_set(mm, pudp, pud);
- return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud),
- PUD_SIZE >> PAGE_SHIFT);
+ __set_ptes_anysz(mm, (pte_t *)pmdp, pmd_pte(pmd), nr, PMD_SIZE);
}
+#define set_pmd_at(mm, addr, pmdp, pmd) __set_pmds(mm, addr, pmdp, pmd, 1)
+
+static inline void __set_puds(struct mm_struct *mm,
+ unsigned long __always_unused addr,
+ pud_t *pudp, pud_t pud, unsigned int nr)
+{
+ __set_ptes_anysz(mm, (pte_t *)pudp, pud_pte(pud), nr, PUD_SIZE);
+}
+#define set_pud_at(mm, addr, pudp, pud) __set_puds(mm, addr, pudp, pud, 1)
#define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d))
#define __phys_to_p4d_val(phys) __phys_to_pte_val(phys)
@@ -744,8 +822,7 @@ static inline int pmd_trans_huge(pmd_t pmd)
* If pmd is present-invalid, pmd_table() won't detect it
* as a table, so force the valid bit for the comparison.
*/
- return pmd_val(pmd) && pmd_present(pmd) &&
- !pmd_table(__pmd(pmd_val(pmd) | PTE_VALID));
+ return pmd_present(pmd) && !pmd_table(__pmd(pmd_val(pmd) | PTE_VALID));
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -759,8 +836,6 @@ static inline bool pud_table(pud_t pud) { return true; }
PUD_TYPE_TABLE)
#endif
-extern pgd_t init_pg_dir[];
-extern pgd_t init_pg_end[];
extern pgd_t swapper_pg_dir[];
extern pgd_t idmap_pg_dir[];
extern pgd_t tramp_pg_dir[];
@@ -785,10 +860,8 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
WRITE_ONCE(*pmdp, pmd);
- if (pmd_valid(pmd)) {
- dsb(ishst);
- isb();
- }
+ if (pmd_valid(pmd))
+ queue_pte_barriers();
}
static inline void pmd_clear(pmd_t *pmdp)
@@ -818,12 +891,6 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
/* use ONLY for statically allocated translation tables */
#define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr))))
-/*
- * Conversion functions: convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
- */
-#define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot)
-
#if CONFIG_PGTABLE_LEVELS > 2
#define pmd_ERROR(e) \
@@ -853,10 +920,8 @@ static inline void set_pud(pud_t *pudp, pud_t pud)
WRITE_ONCE(*pudp, pud);
- if (pud_valid(pud)) {
- dsb(ishst);
- isb();
- }
+ if (pud_valid(pud))
+ queue_pte_barriers();
}
static inline void pud_clear(pud_t *pudp)
@@ -935,8 +1000,7 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
}
WRITE_ONCE(*p4dp, p4d);
- dsb(ishst);
- isb();
+ queue_pte_barriers();
}
static inline void p4d_clear(p4d_t *p4dp)
@@ -1064,8 +1128,7 @@ static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
}
WRITE_ONCE(*pgdp, pgd);
- dsb(ishst);
- isb();
+ queue_pte_barriers();
}
static inline void pgd_clear(pgd_t *pgdp)
@@ -1225,16 +1288,6 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
return __ptep_set_access_flags(vma, address, (pte_t *)pmdp,
pmd_pte(entry), dirty);
}
-
-static inline int pud_devmap(pud_t pud)
-{
- return 0;
-}
-
-static inline int pgd_devmap(pgd_t pgd)
-{
- return 0;
-}
#endif
#ifdef CONFIG_PAGE_TABLE_CHECK
@@ -1306,16 +1359,37 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */
-static inline pte_t __ptep_get_and_clear(struct mm_struct *mm,
- unsigned long address, pte_t *ptep)
+static inline pte_t __ptep_get_and_clear_anysz(struct mm_struct *mm,
+ pte_t *ptep,
+ unsigned long pgsize)
{
pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0));
- page_table_check_pte_clear(mm, pte);
+ switch (pgsize) {
+ case PAGE_SIZE:
+ page_table_check_pte_clear(mm, pte);
+ break;
+ case PMD_SIZE:
+ page_table_check_pmd_clear(mm, pte_pmd(pte));
+ break;
+#ifndef __PAGETABLE_PMD_FOLDED
+ case PUD_SIZE:
+ page_table_check_pud_clear(mm, pte_pud(pte));
+ break;
+#endif
+ default:
+ VM_WARN_ON(1);
+ }
return pte;
}
+static inline pte_t __ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep)
+{
+ return __ptep_get_and_clear_anysz(mm, ptep, PAGE_SIZE);
+}
+
static inline void __clear_full_ptes(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned int nr, int full)
{
@@ -1352,11 +1426,7 @@ static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm,
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp)
{
- pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0));
-
- page_table_check_pmd_clear(mm, pmd);
-
- return pmd;
+ return pte_pmd(__ptep_get_and_clear_anysz(mm, (pte_t *)pmdp, PMD_SIZE));
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -1549,6 +1619,14 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf,
*/
#define arch_wants_old_prefaulted_pte cpu_has_hw_af
+/*
+ * Request exec memory is read into pagecache in at least 64K folios. This size
+ * can be contpte-mapped when 4K base pages are in use (16 pages into 1 iTLB
+ * entry), and HPA can coalesce it (4 pages into 1 TLB entry) when 16K base
+ * pages are in use.
+ */
+#define exec_folio_order() ilog2(SZ_64K >> PAGE_SHIFT)
+
static inline bool pud_sect_supported(void)
{
return PAGE_SIZE == SZ_4K;
@@ -1565,6 +1643,16 @@ extern void ptep_modify_prot_commit(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t old_pte, pte_t new_pte);
+#define modify_prot_start_ptes modify_prot_start_ptes
+extern pte_t modify_prot_start_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr);
+
+#define modify_prot_commit_ptes modify_prot_commit_ptes
+extern void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pte_t old_pte, pte_t pte,
+ unsigned int nr);
+
#ifdef CONFIG_ARM64_CONTPTE
/*