diff options
Diffstat (limited to 'arch/arm64/include/asm/pgtable.h')
-rw-r--r-- | arch/arm64/include/asm/pgtable.h | 453 |
1 files changed, 349 insertions, 104 deletions
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 5d15b4735a0e..79ce70fbb751 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -9,6 +9,7 @@ #include <asm/proc-fns.h> #include <asm/memory.h> +#include <asm/mte.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable-prot.h> #include <asm/tlbflush.h> @@ -21,9 +22,9 @@ * and fixed mappings */ #define VMALLOC_START (MODULES_END) -#define VMALLOC_END (- PUD_SIZE - VMEMMAP_SIZE - SZ_64K) +#define VMALLOC_END (VMEMMAP_START - SZ_256M) -#define FIRST_USER_ADDRESS 0UL +#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) #ifndef __ASSEMBLY__ @@ -32,13 +33,31 @@ #include <linux/mmdebug.h> #include <linux/mm_types.h> #include <linux/sched.h> +#include <linux/page_table_check.h> -extern struct page *vmemmap; +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE + +/* Set stride and tlb_level in flush_*_tlb_range */ +#define flush_pmd_tlb_range(vma, addr, end) \ + __flush_tlb_range(vma, addr, end, PMD_SIZE, false, 2) +#define flush_pud_tlb_range(vma, addr, end) \ + __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1) +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -extern void __pte_error(const char *file, int line, unsigned long val); -extern void __pmd_error(const char *file, int line, unsigned long val); -extern void __pud_error(const char *file, int line, unsigned long val); -extern void __pgd_error(const char *file, int line, unsigned long val); +static inline bool arch_thp_swp_supported(void) +{ + return !system_supports_mte(); +} +#define arch_thp_swp_supported arch_thp_swp_supported + +/* + * Outside of a few very special situations (e.g. hibernation), we always + * use broadcast TLB invalidation instructions, therefore a spurious page + * fault on one CPU which has been handled concurrently by another CPU + * does not need to perform additional invalidation. + */ +#define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0) /* * ZERO_PAGE is a global shared page that is always zero: used @@ -47,16 +66,23 @@ extern void __pgd_error(const char *file, int line, unsigned long val); extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define ZERO_PAGE(vaddr) phys_to_page(__pa_symbol(empty_zero_page)) -#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) +#define pte_ERROR(e) \ + pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e)) /* * Macros to convert between a physical address and its placement in a * page table entry, taking care of 52-bit addresses. */ #ifdef CONFIG_ARM64_PA_BITS_52 -#define __pte_to_phys(pte) \ - ((pte_val(pte) & PTE_ADDR_LOW) | ((pte_val(pte) & PTE_ADDR_HIGH) << 36)) -#define __phys_to_pte_val(phys) (((phys) | ((phys) >> 36)) & PTE_ADDR_MASK) +static inline phys_addr_t __pte_to_phys(pte_t pte) +{ + return (pte_val(pte) & PTE_ADDR_LOW) | + ((pte_val(pte) & PTE_ADDR_HIGH) << PTE_ADDR_HIGH_SHIFT); +} +static inline pteval_t __phys_to_pte_val(phys_addr_t phys) +{ + return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PTE_ADDR_MASK; +} #else #define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK) #define __phys_to_pte_val(phys) (phys) @@ -77,9 +103,13 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) +#define pte_rdonly(pte) (!!(pte_val(pte) & PTE_RDONLY)) +#define pte_user(pte) (!!(pte_val(pte) & PTE_USER)) #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) #define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP)) +#define pte_tagged(pte) ((pte_val(pte) & PTE_ATTRINDX_MASK) == \ + PTE_ATTRINDX(MT_NORMAL_TAGGED)) #define pte_cont_addr_end(addr, end) \ ({ unsigned long __boundary = ((addr) + CONT_PTE_SIZE) & CONT_PTE_MASK; \ @@ -91,7 +121,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; (__boundary - 1 < (end) - 1) ? __boundary : (end); \ }) -#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) +#define pte_hw_dirty(pte) (pte_write(pte) && !pte_rdonly(pte)) #define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) @@ -102,26 +132,26 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; */ #define pte_valid_not_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) -#define pte_valid_young(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) -#define pte_valid_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) - /* * Could the pte be present in the TLB? We must check mm_tlb_flush_pending * so that we don't erroneously return false for pages that have been * remapped as PROT_NONE but are yet to be flushed from the TLB. + * Note that we can't make any assumptions based on the state of the access + * flag, since ptep_clear_flush_young() elides a DSB when invalidating the + * TLB. */ #define pte_accessible(mm, pte) \ - (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte)) + (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) /* - * p??_access_permitted() is true for valid user mappings (subject to the - * write permission check) other than user execute-only which do not have the - * PTE_USER bit set. PROT_NONE mappings do not have the PTE_VALID bit set. + * p??_access_permitted() is true for valid user mappings (PTE_USER + * bit set, subject to the write permission check). For execute-only + * mappings, like PROT_EXEC with EPAN (both PTE_USER and PTE_UXN bits + * not set) must return false. PROT_NONE mappings do not have the + * PTE_VALID bit set. */ #define pte_access_permitted(pte, write) \ - (pte_valid_user(pte) && (!(write) || pte_write(pte))) + (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte))) #define pmd_access_permitted(pmd, write) \ (pte_access_permitted(pmd_pte(pmd), (write))) #define pud_access_permitted(pud, write) \ @@ -139,14 +169,19 @@ static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot) return pte; } -static inline pte_t pte_wrprotect(pte_t pte) +static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot) { - pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); - pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); - return pte; + pmd_val(pmd) &= ~pgprot_val(prot); + return pmd; } -static inline pte_t pte_mkwrite(pte_t pte) +static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) +{ + pmd_val(pmd) |= pgprot_val(prot); + return pmd; +} + +static inline pte_t pte_mkwrite_novma(pte_t pte) { pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); @@ -171,6 +206,20 @@ static inline pte_t pte_mkdirty(pte_t pte) return pte; } +static inline pte_t pte_wrprotect(pte_t pte) +{ + /* + * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY + * clear), set the PTE_DIRTY bit. + */ + if (pte_hw_dirty(pte)) + pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); + + pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); + pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); + return pte; +} + static inline pte_t pte_mkold(pte_t pte) { return clear_pte_bit(pte, __pgprot(PTE_AF)); @@ -227,6 +276,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte) } extern void __sync_icache_dcache(pte_t pteval); +bool pgattr_change_is_safe(u64 old, u64 new); /* * PTE bits configuration in the presence of hardware Dirty Bit Management @@ -244,7 +294,7 @@ extern void __sync_icache_dcache(pte_t pteval); * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY) */ -static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep, +static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep, pte_t pte) { pte_t old_pte; @@ -270,18 +320,44 @@ static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep, VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte), "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx", __func__, pte_val(old_pte), pte_val(pte)); + VM_WARN_ONCE(!pgattr_change_is_safe(pte_val(old_pte), pte_val(pte)), + "%s: unsafe attribute change: 0x%016llx -> 0x%016llx", + __func__, pte_val(old_pte), pte_val(pte)); } -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) +static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages) { if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) __sync_icache_dcache(pte); - __check_racy_pte_update(mm, ptep, pte); + /* + * If the PTE would provide user space access to the tags associated + * with it then ensure that the MTE tags are synchronised. Although + * pte_access_permitted() returns false for exec only mappings, they + * don't expose tags (instruction fetches don't check tags). + */ + if (system_supports_mte() && pte_access_permitted(pte, false) && + !pte_special(pte) && pte_tagged(pte)) + mte_sync_tags(pte, nr_pages); +} - set_pte(ptep, pte); +static inline void set_ptes(struct mm_struct *mm, + unsigned long __always_unused addr, + pte_t *ptep, pte_t pte, unsigned int nr) +{ + page_table_check_ptes_set(mm, ptep, pte, nr); + __sync_cache_and_tags(pte, nr); + + for (;;) { + __check_safe_pte_update(mm, ptep, pte); + set_pte(ptep, pte); + if (--nr == 0) + break; + ptep++; + pte_val(pte) += PAGE_SIZE; + } } +#define set_ptes set_ptes /* * Huge pte definitions. @@ -302,6 +378,11 @@ static inline pte_t pgd_pte(pgd_t pgd) return __pte(pgd_val(pgd)); } +static inline pte_t p4d_pte(p4d_t p4d) +{ + return __pte(p4d_val(p4d)); +} + static inline pte_t pud_pte(pud_t pud) { return __pte(pud_val(pud)); @@ -337,9 +418,34 @@ static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot) return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT); } +static inline pte_t pte_swp_mkexclusive(pte_t pte) +{ + return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); +} + +static inline int pte_swp_exclusive(pte_t pte) +{ + return pte_val(pte) & PTE_SWP_EXCLUSIVE; +} + +static inline pte_t pte_swp_clear_exclusive(pte_t pte) +{ + return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); +} + +/* + * Select all bits except the pfn + */ +static inline pgprot_t pte_pgprot(pte_t pte) +{ + unsigned long pfn = pte_pfn(pte); + + return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); +} + #ifdef CONFIG_NUMA_BALANCING /* - * See the comment in include/asm-generic/pgtable.h + * See the comment in include/linux/pgtable.h */ static inline int pte_protnone(pte_t pte) { @@ -352,25 +458,44 @@ static inline int pmd_protnone(pmd_t pmd) } #endif +#define pmd_present_invalid(pmd) (!!(pmd_val(pmd) & PMD_PRESENT_INVALID)) + +static inline int pmd_present(pmd_t pmd) +{ + return pte_present(pmd_pte(pmd)) || pmd_present_invalid(pmd); +} + /* * THP definitions. */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) +static inline int pmd_trans_huge(pmd_t pmd) +{ + return pmd_val(pmd) && pmd_present(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -#define pmd_present(pmd) pte_present(pmd_pte(pmd)) #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) #define pmd_valid(pmd) pte_valid(pmd_pte(pmd)) +#define pmd_user(pmd) pte_user(pmd_pte(pmd)) +#define pmd_user_exec(pmd) pte_user_exec(pmd_pte(pmd)) +#define pmd_cont(pmd) pte_cont(pmd_pte(pmd)) #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) -#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) +#define pmd_mkwrite_novma(pmd) pte_pmd(pte_mkwrite_novma(pmd_pte(pmd))) #define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) -#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_SECT_VALID)) + +static inline pmd_t pmd_mkinvalid(pmd_t pmd) +{ + pmd = set_pmd_bit(pmd, __pgprot(PMD_PRESENT_INVALID)); + pmd = clear_pmd_bit(pmd, __pgprot(PMD_SECT_VALID)); + + return pmd; +} #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) @@ -403,7 +528,33 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) #define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT) #define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) -#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)) +static inline void __set_pte_at(struct mm_struct *mm, + unsigned long __always_unused addr, + pte_t *ptep, pte_t pte, unsigned int nr) +{ + __sync_cache_and_tags(pte, nr); + __check_safe_pte_update(mm, ptep, pte); + set_pte(ptep, pte); +} + +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + page_table_check_pmd_set(mm, pmdp, pmd); + return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd), + PMD_SIZE >> PAGE_SHIFT); +} + +static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, + pud_t *pudp, pud_t pud) +{ + page_table_check_pud_set(mm, pudp, pud); + return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud), + PUD_SIZE >> PAGE_SHIFT); +} + +#define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d)) +#define __phys_to_p4d_val(phys) __phys_to_pte_val(phys) #define __pgd_to_phys(pgd) __pte_to_phys(pgd_pte(pgd)) #define __phys_to_pgd_val(phys) __phys_to_pte_val(phys) @@ -411,6 +562,9 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) #define __pgprot_modify(prot,mask,bits) \ __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) +#define pgprot_nx(prot) \ + __pgprot_modify(prot, PTE_MAYBE_GP, PTE_PXN) + /* * Mark the prot value as uncacheable and unbufferable. */ @@ -420,6 +574,9 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) #define pgprot_device(prot) \ __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN) +#define pgprot_tagged(prot) \ + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_TAGGED)) +#define pgprot_mhp pgprot_tagged /* * DMA allocations for non-coherent devices use what the Arm architecture calls * "Normal non-cacheable" memory, which permits speculation, unaligned accesses @@ -439,12 +596,15 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, #define pmd_none(pmd) (!pmd_val(pmd)) -#define pmd_bad(pmd) (!(pmd_val(pmd) & PMD_TABLE_BIT)) - #define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ PMD_TYPE_TABLE) #define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ PMD_TYPE_SECT) +#define pmd_leaf(pmd) (pmd_present(pmd) && !pmd_table(pmd)) +#define pmd_bad(pmd) (!pmd_table(pmd)) + +#define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE) +#define pte_leaf_size(pte) (pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE) #if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3 static inline bool pud_sect(pud_t pud) { return false; } @@ -461,6 +621,7 @@ extern pgd_t init_pg_end[]; extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; +extern pgd_t reserved_pg_dir[PTRS_PER_PGD]; extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); @@ -497,21 +658,19 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) return __pmd_to_phys(pmd); } -static inline void pte_unmap(pte_t *pte) { } +static inline unsigned long pmd_page_vaddr(pmd_t pmd) +{ + return (unsigned long)__va(pmd_page_paddr(pmd)); +} /* Find an entry in the third-level page table. */ -#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) - #define pte_offset_phys(dir,addr) (pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t)) -#define pte_offset_kernel(dir,addr) ((pte_t *)__va(pte_offset_phys((dir), (addr)))) - -#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) #define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr)) #define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr)) #define pte_clear_fixmap() clear_fixmap(FIX_PTE) -#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(__pmd_to_phys(pmd))) +#define pmd_page(pmd) phys_to_page(__pmd_to_phys(pmd)) /* use ONLY for statically allocated translation tables */ #define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr)))) @@ -524,12 +683,16 @@ static inline void pte_unmap(pte_t *pte) { } #if CONFIG_PGTABLE_LEVELS > 2 -#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) +#define pmd_ERROR(e) \ + pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e)) #define pud_none(pud) (!pud_val(pud)) -#define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) +#define pud_bad(pud) (!pud_table(pud)) #define pud_present(pud) pte_present(pud_pte(pud)) +#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud)) #define pud_valid(pud) pte_valid(pud_pte(pud)) +#define pud_user(pud) pte_user(pud_pte(pud)) +#define pud_user_exec(pud) pte_user_exec(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) { @@ -558,17 +721,19 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) return __pud_to_phys(pud); } -/* Find an entry in the second-level page table. */ -#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) +static inline pmd_t *pud_pgtable(pud_t pud) +{ + return (pmd_t *)__va(pud_page_paddr(pud)); +} +/* Find an entry in the second-level page table. */ #define pmd_offset_phys(dir, addr) (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t)) -#define pmd_offset(dir, addr) ((pmd_t *)__va(pmd_offset_phys((dir), (addr)))) #define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr)) #define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr)) #define pmd_clear_fixmap() clear_fixmap(FIX_PMD) -#define pud_page(pud) pfn_to_page(__phys_to_pfn(__pud_to_phys(pud))) +#define pud_page(pud) phys_to_page(__pud_to_phys(pud)) /* use ONLY for statically allocated translation tables */ #define pmd_offset_kimg(dir,addr) ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr)))) @@ -576,6 +741,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) #else #define pud_page_paddr(pud) ({ BUILD_BUG(); 0; }) +#define pud_user_exec(pud) pud_user(pud) /* Always 0 with folding */ /* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */ #define pmd_set_fixmap(addr) NULL @@ -588,51 +754,55 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) #if CONFIG_PGTABLE_LEVELS > 3 -#define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) +#define pud_ERROR(e) \ + pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e)) -#define pgd_none(pgd) (!pgd_val(pgd)) -#define pgd_bad(pgd) (!(pgd_val(pgd) & 2)) -#define pgd_present(pgd) (pgd_val(pgd)) +#define p4d_none(p4d) (!p4d_val(p4d)) +#define p4d_bad(p4d) (!(p4d_val(p4d) & 2)) +#define p4d_present(p4d) (p4d_val(p4d)) -static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) { - if (in_swapper_pgdir(pgdp)) { - set_swapper_pgd(pgdp, pgd); + if (in_swapper_pgdir(p4dp)) { + set_swapper_pgd((pgd_t *)p4dp, __pgd(p4d_val(p4d))); return; } - WRITE_ONCE(*pgdp, pgd); + WRITE_ONCE(*p4dp, p4d); dsb(ishst); isb(); } -static inline void pgd_clear(pgd_t *pgdp) +static inline void p4d_clear(p4d_t *p4dp) { - set_pgd(pgdp, __pgd(0)); + set_p4d(p4dp, __p4d(0)); } -static inline phys_addr_t pgd_page_paddr(pgd_t pgd) +static inline phys_addr_t p4d_page_paddr(p4d_t p4d) { - return __pgd_to_phys(pgd); + return __p4d_to_phys(p4d); } -/* Find an entry in the frst-level page table. */ -#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +static inline pud_t *p4d_pgtable(p4d_t p4d) +{ + return (pud_t *)__va(p4d_page_paddr(p4d)); +} -#define pud_offset_phys(dir, addr) (pgd_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t)) -#define pud_offset(dir, addr) ((pud_t *)__va(pud_offset_phys((dir), (addr)))) +/* Find an entry in the first-level page table. */ +#define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t)) #define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr)) -#define pud_set_fixmap_offset(pgd, addr) pud_set_fixmap(pud_offset_phys(pgd, addr)) +#define pud_set_fixmap_offset(p4d, addr) pud_set_fixmap(pud_offset_phys(p4d, addr)) #define pud_clear_fixmap() clear_fixmap(FIX_PUD) -#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd))) +#define p4d_page(p4d) pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d))) /* use ONLY for statically allocated translation tables */ #define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr)))) #else +#define p4d_page_paddr(p4d) ({ BUILD_BUG(); 0;}) #define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;}) /* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */ @@ -644,29 +814,32 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd) #endif /* CONFIG_PGTABLE_LEVELS > 3 */ -#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) - -/* to find an entry in a page-table-directory */ -#define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) - -#define pgd_offset_raw(pgd, addr) ((pgd) + pgd_index(addr)) - -#define pgd_offset(mm, addr) (pgd_offset_raw((mm)->pgd, (addr))) - -/* to find an entry in a kernel page-table-directory */ -#define pgd_offset_k(addr) pgd_offset(&init_mm, addr) +#define pgd_ERROR(e) \ + pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e)) #define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr)) #define pgd_clear_fixmap() clear_fixmap(FIX_PGD) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { + /* + * Normal and Normal-Tagged are two different memory types and indices + * in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK. + */ const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_VALID | PTE_WRITE; + PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP | + PTE_ATTRINDX_MASK; /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) - pte = pte_mkdirty(pte); + pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); + pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); + /* + * If we end up clearing hw dirtiness for a sw-dirty PTE, set hardware + * dirtiness again. + */ + if (pte_sw_dirty(pte)) + pte = pte_mkdirty(pte); return pte; } @@ -700,6 +873,23 @@ static inline int pgd_devmap(pgd_t pgd) } #endif +#ifdef CONFIG_PAGE_TABLE_CHECK +static inline bool pte_user_accessible_page(pte_t pte) +{ + return pte_present(pte) && (pte_user(pte) || pte_user_exec(pte)); +} + +static inline bool pmd_user_accessible_page(pmd_t pmd) +{ + return pmd_leaf(pmd) && !pmd_present_invalid(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); +} + +static inline bool pud_user_accessible_page(pud_t pud) +{ + return pud_leaf(pud) && (pud_user(pud) || pud_user_exec(pud)); +} +#endif + /* * Atomic pte/pmd modifications. */ @@ -761,7 +951,11 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long address, pte_t *ptep) { - return __pte(xchg_relaxed(&pte_val(*ptep), 0)); + pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0)); + + page_table_check_pte_clear(mm, pte); + + return pte; } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -769,7 +963,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { - return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp)); + pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0)); + + page_table_check_pmd_clear(mm, pmd); + + return pmd; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -785,12 +983,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres pte = READ_ONCE(*ptep); do { old_pte = pte; - /* - * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY - * clear), set the PTE_DIRTY bit. - */ - if (pte_hw_dirty(pte)) - pte = pte_mkdirty(pte); pte = pte_wrprotect(pte); pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte), pte_val(pte)); @@ -809,6 +1001,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { + page_table_check_pmd_set(vma->vm_mm, pmdp, pmd); return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd))); } #endif @@ -816,12 +1009,13 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, /* * Encode and decode a swap entry: * bits 0-1: present (must be zero) - * bits 2-7: swap type + * bits 2: remember PG_anon_exclusive + * bits 3-7: swap type * bits 8-57: swap offset * bit 58: PTE_PROT_NONE (must be zero) */ -#define __SWP_TYPE_SHIFT 2 -#define __SWP_TYPE_BITS 6 +#define __SWP_TYPE_SHIFT 3 +#define __SWP_TYPE_BITS 5 #define __SWP_OFFSET_BITS 50 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) @@ -834,21 +1028,55 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(swp) ((pte_t) { (swp).val }) +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) }) +#define __swp_entry_to_pmd(swp) __pmd((swp).val) +#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ + /* * Ensure that there are not more swap files than can be encoded in the kernel * PTEs. */ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS) -extern int kern_addr_valid(unsigned long addr); +#ifdef CONFIG_ARM64_MTE + +#define __HAVE_ARCH_PREPARE_TO_SWAP +static inline int arch_prepare_to_swap(struct page *page) +{ + if (system_supports_mte()) + return mte_save_tags(page); + return 0; +} + +#define __HAVE_ARCH_SWAP_INVALIDATE +static inline void arch_swap_invalidate_page(int type, pgoff_t offset) +{ + if (system_supports_mte()) + mte_invalidate_tags(type, offset); +} -#include <asm-generic/pgtable.h> +static inline void arch_swap_invalidate_area(int type) +{ + if (system_supports_mte()) + mte_invalidate_tags_area(type); +} + +#define __HAVE_ARCH_SWAP_RESTORE +static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) +{ + if (system_supports_mte()) + mte_restore_tags(entry, &folio->page); +} + +#endif /* CONFIG_ARM64_MTE */ /* * On AArch64, the cache coherency is handled via the set_pte_at() function. */ -static inline void update_mmu_cache(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) +static inline void update_mmu_cache_range(struct vm_fault *vmf, + struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, + unsigned int nr) { /* * We don't do anything here, so there's a very small chance of @@ -857,6 +1085,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, */ } +#define update_mmu_cache(vma, addr, ptep) \ + update_mmu_cache_range(NULL, vma, addr, ptep, 1) #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) #ifdef CONFIG_ARM64_PA_BITS_52 @@ -871,14 +1101,29 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, * page after fork() + CoW for pfn mappings. We don't always have a * hardware-managed access flag on arm64. */ -static inline bool arch_faults_on_old_pte(void) -{ - WARN_ON(preemptible()); +#define arch_has_hw_pte_young cpu_has_hw_af + +/* + * Experimentally, it's cheap to set the access flag in hardware and we + * benefit from prefaulting mappings as 'old' to start with. + */ +#define arch_wants_old_prefaulted_pte cpu_has_hw_af - return !cpu_has_hw_af(); +static inline bool pud_sect_supported(void) +{ + return PAGE_SIZE == SZ_4K; } -#define arch_faults_on_old_pte arch_faults_on_old_pte + +#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION +#define ptep_modify_prot_start ptep_modify_prot_start +extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); + +#define ptep_modify_prot_commit ptep_modify_prot_commit +extern void ptep_modify_prot_commit(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t old_pte, pte_t new_pte); #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ |