diff options
Diffstat (limited to 'include/linux/pgtable.h')
-rw-r--r-- | include/linux/pgtable.h | 246 |
1 files changed, 166 insertions, 80 deletions
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e2b705c14945..e3b99920be05 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -456,6 +456,17 @@ static inline bool arch_has_hw_pte_young(void) } #endif +#ifndef exec_folio_order +/* + * Returns preferred minimum folio order for executable file-backed memory. Must + * be in range [0, PMD_ORDER). Default to order-0. + */ +static inline unsigned int exec_folio_order(void) +{ + return 0; +} +#endif + #ifndef arch_check_zapped_pte static inline void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte) @@ -1164,10 +1175,6 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) } #endif -#ifndef __HAVE_ARCH_PGD_OFFSET_GATE -#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr) -#endif - #ifndef __HAVE_ARCH_MOVE_PTE #define move_pte(pte, old_addr, new_addr) (pte) #endif @@ -1324,7 +1331,9 @@ static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma, /* * Commit an update to a pte, leaving any hardware-controlled bits in - * the PTE unmodified. + * the PTE unmodified. The pte returned from ptep_modify_prot_start() may + * additionally have young and/or dirty bits set where previously they were not, + * so the updated pte may have these additional changes. */ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, @@ -1333,6 +1342,86 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, __ptep_modify_prot_commit(vma, addr, ptep, pte); } #endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */ + +/** + * modify_prot_start_ptes - Start a pte protection read-modify-write transaction + * over a batch of ptes, which protects against asynchronous hardware + * modifications to the ptes. The intention is not to prevent the hardware from + * making pte updates, but to prevent any updates it may make from being lost. + * Please see the comment above ptep_modify_prot_start() for full description. + * + * @vma: The virtual memory area the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries. + * + * May be overridden by the architecture; otherwise, implemented as a simple + * loop over ptep_modify_prot_start(), collecting the a/d bits from each pte + * in the batch. + * + * Note that PTE bits in the PTE batch besides the PFN can differ. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. All other PTE bits must be identical for + * all PTEs in the batch except for young and dirty bits. The PTEs are all in + * the same PMD. + */ +#ifndef modify_prot_start_ptes +static inline pte_t modify_prot_start_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, unsigned int nr) +{ + pte_t pte, tmp_pte; + + pte = ptep_modify_prot_start(vma, addr, ptep); + while (--nr) { + ptep++; + addr += PAGE_SIZE; + tmp_pte = ptep_modify_prot_start(vma, addr, ptep); + if (pte_dirty(tmp_pte)) + pte = pte_mkdirty(pte); + if (pte_young(tmp_pte)) + pte = pte_mkyoung(pte); + } + return pte; +} +#endif + +/** + * modify_prot_commit_ptes - Commit an update to a batch of ptes, leaving any + * hardware-controlled bits in the PTE unmodified. + * + * @vma: The virtual memory area the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @old_pte: Old page table entry (for the first entry) which is now cleared. + * @pte: New page table entry to be set. + * @nr: Number of entries. + * + * May be overridden by the architecture; otherwise, implemented as a simple + * loop over ptep_modify_prot_commit(). + * + * Context: The caller holds the page table lock. The PTEs are all in the same + * PMD. On exit, the set ptes in the batch map the same folio. The ptes set by + * ptep_modify_prot_start() may additionally have young and/or dirty bits set + * where previously they were not, so the updated ptes may have these + * additional changes. + */ +#ifndef modify_prot_commit_ptes +static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep, pte_t old_pte, pte_t pte, unsigned int nr) +{ + int i; + + for (i = 0; i < nr; ++i, ++ptep, addr += PAGE_SIZE) { + ptep_modify_prot_commit(vma, addr, ptep, old_pte, pte); + + /* Advance PFN only, set same prot */ + old_pte = pte_next_pfn(old_pte); + pte = pte_next_pfn(pte); + } +} +#endif + #endif /* CONFIG_MMU */ /* @@ -1489,80 +1578,92 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) * vmf_insert_pfn. */ -/* - * track_pfn_remap is called when a _new_ pfn mapping is being established - * by remap_pfn_range() for physical range indicated by pfn and size. - */ -static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, - unsigned long pfn, unsigned long addr, - unsigned long size) +static inline int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size, + pgprot_t *prot) { return 0; } -/* - * track_pfn_insert is called when a _new_ single pfn is established - * by vmf_insert_pfn(). - */ -static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, - pfn_t pfn) +static inline int pfnmap_track(unsigned long pfn, unsigned long size, + pgprot_t *prot) { + return 0; } -/* - * track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page - * tables copied during copy_page_range(). On success, stores the pfn to be - * passed to untrack_pfn_copy(). - */ -static inline int track_pfn_copy(struct vm_area_struct *dst_vma, - struct vm_area_struct *src_vma, unsigned long *pfn) +static inline void pfnmap_untrack(unsigned long pfn, unsigned long size) { - return 0; } +#else +/** + * pfnmap_setup_cachemode - setup the cachemode in the pgprot for a pfn range + * @pfn: the start of the pfn range + * @size: the size of the pfn range in bytes + * @prot: the pgprot to modify + * + * Lookup the cachemode for the pfn range starting at @pfn with the size + * @size and store it in @prot, leaving other data in @prot unchanged. + * + * This allows for a hardware implementation to have fine-grained control of + * memory cache behavior at page level granularity. Without a hardware + * implementation, this function does nothing. + * + * Currently there is only one implementation for this - x86 Page Attribute + * Table (PAT). See Documentation/arch/x86/pat.rst for more details. + * + * This function can fail if the pfn range spans pfns that require differing + * cachemodes. If the pfn range was previously verified to have a single + * cachemode, it is sufficient to query only a single pfn. The assumption is + * that this is the case for drivers using the vmf_insert_pfn*() interface. + * + * Returns 0 on success and -EINVAL on error. + */ +int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size, + pgprot_t *prot); -/* - * untrack_pfn_copy is called when a VM_PFNMAP VMA failed to copy during - * copy_page_range(), but after track_pfn_copy() was already called. +/** + * pfnmap_track - track a pfn range + * @pfn: the start of the pfn range + * @size: the size of the pfn range in bytes + * @prot: the pgprot to track + * + * Requested the pfn range to be 'tracked' by a hardware implementation and + * setup the cachemode in @prot similar to pfnmap_setup_cachemode(). + * + * This allows for fine-grained control of memory cache behaviour at page + * level granularity. Tracking memory this way is persisted across VMA splits + * (VMA merging does not apply for VM_PFNMAP). + * + * Currently, there is only one implementation for this - x86 Page Attribute + * Table (PAT). See Documentation/arch/x86/pat.rst for more details. + * + * Returns 0 on success and -EINVAL on error. */ -static inline void untrack_pfn_copy(struct vm_area_struct *dst_vma, - unsigned long pfn) -{ -} +int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot); -/* - * untrack_pfn is called while unmapping a pfnmap for a region. - * untrack can be called for a specific region indicated by pfn and size or - * can be for the entire vma (in which case pfn, size are zero). +/** + * pfnmap_untrack - untrack a pfn range + * @pfn: the start of the pfn range + * @size: the size of the pfn range in bytes + * + * Untrack a pfn range previously tracked through pfnmap_track(). */ -static inline void untrack_pfn(struct vm_area_struct *vma, - unsigned long pfn, unsigned long size, - bool mm_wr_locked) -{ -} +void pfnmap_untrack(unsigned long pfn, unsigned long size); +#endif -/* - * untrack_pfn_clear is called in the following cases on a VM_PFNMAP VMA: +/** + * pfnmap_setup_cachemode_pfn - setup the cachemode in the pgprot for a pfn + * @pfn: the pfn + * @prot: the pgprot to modify + * + * Lookup the cachemode for @pfn and store it in @prot, leaving other + * data in @prot unchanged. * - * 1) During mremap() on the src VMA after the page tables were moved. - * 2) During fork() on the dst VMA, immediately after duplicating the src VMA. + * See pfnmap_setup_cachemode() for details. */ -static inline void untrack_pfn_clear(struct vm_area_struct *vma) +static inline void pfnmap_setup_cachemode_pfn(unsigned long pfn, pgprot_t *prot) { + pfnmap_setup_cachemode(pfn, PAGE_SIZE, prot); } -#else -extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, - unsigned long pfn, unsigned long addr, - unsigned long size); -extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, - pfn_t pfn); -extern int track_pfn_copy(struct vm_area_struct *dst_vma, - struct vm_area_struct *src_vma, unsigned long *pfn); -extern void untrack_pfn_copy(struct vm_area_struct *dst_vma, - unsigned long pfn); -extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, - unsigned long size, bool mm_wr_locked); -extern void untrack_pfn_clear(struct vm_area_struct *vma); -#endif #ifdef CONFIG_MMU #ifdef __HAVE_COLOR_ZERO_PAGE @@ -1624,21 +1725,6 @@ static inline int pud_write(pud_t pud) } #endif /* pud_write */ -#if !defined(CONFIG_ARCH_HAS_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE) -static inline int pmd_devmap(pmd_t pmd) -{ - return 0; -} -static inline int pud_devmap(pud_t pud) -{ - return 0; -} -static inline int pgd_devmap(pgd_t pgd) -{ - return 0; -} -#endif - #if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \ !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) static inline int pud_trans_huge(pud_t pud) @@ -1653,7 +1739,7 @@ static inline int pud_trans_unstable(pud_t *pud) defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) pud_t pudval = READ_ONCE(*pud); - if (pud_none(pudval) || pud_trans_huge(pudval) || pud_devmap(pudval)) + if (pud_none(pudval) || pud_trans_huge(pudval)) return 1; if (unlikely(pud_bad(pudval))) { pud_clear_bad(pud); @@ -1893,8 +1979,8 @@ typedef unsigned int pgtbl_mod_mask; * - It should contain a huge PFN, which points to a huge page larger than * PAGE_SIZE of the platform. The PFN format isn't important here. * - * - It should cover all kinds of huge mappings (e.g., pXd_trans_huge(), - * pXd_devmap(), or hugetlb mappings). + * - It should cover all kinds of huge mappings (i.e. pXd_trans_huge() + * or hugetlb mappings). */ #ifndef pgd_leaf #define pgd_leaf(x) false @@ -1997,7 +2083,7 @@ typedef unsigned int pgtbl_mod_mask; * x: (yes) yes */ #define DECLARE_VM_GET_PAGE_PROT \ -pgprot_t vm_get_page_prot(unsigned long vm_flags) \ +pgprot_t vm_get_page_prot(vm_flags_t vm_flags) \ { \ return protection_map[vm_flags & \ (VM_READ | VM_WRITE | VM_EXEC | VM_SHARED)]; \ |