summaryrefslogtreecommitdiff
path: root/include/linux/pgtable.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/pgtable.h')
-rw-r--r--include/linux/pgtable.h494
1 files changed, 366 insertions, 128 deletions
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 18019f037bae..652f287c1ef6 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -90,6 +90,27 @@ static inline unsigned long pud_index(unsigned long address)
#define pgd_index(a) (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
#endif
+#ifndef kernel_pte_init
+static inline void kernel_pte_init(void *addr)
+{
+}
+#define kernel_pte_init kernel_pte_init
+#endif
+
+#ifndef pmd_init
+static inline void pmd_init(void *addr)
+{
+}
+#define pmd_init pmd_init
+#endif
+
+#ifndef pud_init
+static inline void pud_init(void *addr)
+{
+}
+#define pud_init pud_init
+#endif
+
#ifndef pte_offset_kernel
static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
{
@@ -201,15 +222,19 @@ static inline int pmd_dirty(pmd_t pmd)
* hazard could result in the direct mode hypervisor case, since the actual
* write to the page tables may not yet have taken place, so reads though
* a raw PTE pointer after it has been modified are not guaranteed to be
- * up to date. This mode can only be entered and left under the protection of
- * the page table locks for all page tables which may be modified. In the UP
- * case, this is required so that preemption is disabled, and in the SMP case,
- * it must synchronize the delayed page table writes properly on other CPUs.
+ * up to date.
+ *
+ * In the general case, no lock is guaranteed to be held between entry and exit
+ * of the lazy mode. So the implementation must assume preemption may be enabled
+ * and cpu migration is possible; it must take steps to be robust against this.
+ * (In practice, for user PTE updates, the appropriate page table lock(s) are
+ * held, but for kernel PTE updates, no lock is held). Nesting is not permitted
+ * and the mode cannot be used in interrupt context.
*/
#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
-#define arch_enter_lazy_mmu_mode() do {} while (0)
-#define arch_leave_lazy_mmu_mode() do {} while (0)
-#define arch_flush_lazy_mmu_mode() do {} while (0)
+static inline void arch_enter_lazy_mmu_mode(void) {}
+static inline void arch_leave_lazy_mmu_mode(void) {}
+static inline void arch_flush_lazy_mmu_mode(void) {}
#endif
#ifndef pte_batch_hint
@@ -266,7 +291,6 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
{
page_table_check_ptes_set(mm, ptep, pte, nr);
- arch_enter_lazy_mmu_mode();
for (;;) {
set_pte(ptep, pte);
if (--nr == 0)
@@ -274,7 +298,6 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
ptep++;
pte = pte_next_pfn(pte);
}
- arch_leave_lazy_mmu_mode();
}
#endif
#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
@@ -433,6 +456,17 @@ static inline bool arch_has_hw_pte_young(void)
}
#endif
+#ifndef exec_folio_order
+/*
+ * Returns preferred minimum folio order for executable file-backed memory. Must
+ * be in range [0, PMD_ORDER). Default to order-0.
+ */
+static inline unsigned int exec_folio_order(void)
+{
+ return 0;
+}
+#endif
+
#ifndef arch_check_zapped_pte
static inline void arch_check_zapped_pte(struct vm_area_struct *vma,
pte_t pte)
@@ -447,6 +481,12 @@ static inline void arch_check_zapped_pmd(struct vm_area_struct *vma,
}
#endif
+#ifndef arch_check_zapped_pud
+static inline void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud)
+{
+}
+#endif
+
#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long address,
@@ -506,7 +546,14 @@ static inline void clear_young_dirty_ptes(struct vm_area_struct *vma,
static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
- ptep_get_and_clear(mm, addr, ptep);
+ pte_t pte = ptep_get(ptep);
+
+ pte_clear(mm, addr, ptep);
+ /*
+ * No need for ptep_get_and_clear(): page table check doesn't care about
+ * any bits that could have been set by HW concurrently.
+ */
+ page_table_check_pte_clear(mm, pte);
}
#ifdef CONFIG_GUP_GET_PXX_LOW_HIGH
@@ -689,6 +736,29 @@ static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm,
}
#endif
+/**
+ * get_and_clear_ptes - Clear present PTEs that map consecutive pages of
+ * the same folio, collecting dirty/accessed bits.
+ * @mm: Address space the pages are mapped into.
+ * @addr: Address the first page is mapped at.
+ * @ptep: Page table pointer for the first entry.
+ * @nr: Number of entries to clear.
+ *
+ * Use this instead of get_and_clear_full_ptes() if it is known that we don't
+ * need to clear the full mm, which is mostly the case.
+ *
+ * Note that PTE bits in the PTE range besides the PFN can differ. For example,
+ * some PTEs might be write-protected.
+ *
+ * Context: The caller holds the page table lock. The PTEs map consecutive
+ * pages that belong to the same folio. The PTEs are all in the same PMD.
+ */
+static inline pte_t get_and_clear_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr)
+{
+ return get_and_clear_full_ptes(mm, addr, ptep, nr, 0);
+}
+
#ifndef clear_full_ptes
/**
* clear_full_ptes - Clear present PTEs that map consecutive pages of the same
@@ -721,6 +791,28 @@ static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr,
}
#endif
+/**
+ * clear_ptes - Clear present PTEs that map consecutive pages of the same folio.
+ * @mm: Address space the pages are mapped into.
+ * @addr: Address the first page is mapped at.
+ * @ptep: Page table pointer for the first entry.
+ * @nr: Number of entries to clear.
+ *
+ * Use this instead of clear_full_ptes() if it is known that we don't need to
+ * clear the full mm, which is mostly the case.
+ *
+ * Note that PTE bits in the PTE range besides the PFN can differ. For example,
+ * some PTEs might be write-protected.
+ *
+ * Context: The caller holds the page table lock. The PTEs map consecutive
+ * pages that belong to the same folio. The PTEs are all in the same PMD.
+ */
+static inline void clear_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr)
+{
+ clear_full_ptes(mm, addr, ptep, nr, 0);
+}
+
/*
* If two threads concurrently fault at the same page, the thread that
* won the race updates the PTE and its local TLB/Cache. The other thread
@@ -729,13 +821,18 @@ static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr,
* fault. This function updates TLB only, do nothing with cache or others.
* It is the difference with function update_mmu_cache.
*/
-#ifndef __HAVE_ARCH_UPDATE_MMU_TLB
+#ifndef update_mmu_tlb_range
+static inline void update_mmu_tlb_range(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep, unsigned int nr)
+{
+}
+#endif
+
static inline void update_mmu_tlb(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
+ update_mmu_tlb_range(vma, address, ptep, 1);
}
-#define __HAVE_ARCH_UPDATE_MMU_TLB
-#endif
/*
* Some architectures may be able to avoid expensive synchronization
@@ -1045,45 +1142,16 @@ static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b)
}
#endif
-/*
- * Use set_p*_safe(), and elide TLB flushing, when confident that *no*
- * TLB flush will be required as a result of the "set". For example, use
- * in scenarios where it is known ahead of time that the routine is
- * setting non-present entries, or re-setting an existing entry to the
- * same value. Otherwise, use the typical "set" helpers and flush the
- * TLB.
- */
-#define set_pte_safe(ptep, pte) \
-({ \
- WARN_ON_ONCE(pte_present(*ptep) && !pte_same(*ptep, pte)); \
- set_pte(ptep, pte); \
-})
-
-#define set_pmd_safe(pmdp, pmd) \
-({ \
- WARN_ON_ONCE(pmd_present(*pmdp) && !pmd_same(*pmdp, pmd)); \
- set_pmd(pmdp, pmd); \
-})
-
-#define set_pud_safe(pudp, pud) \
-({ \
- WARN_ON_ONCE(pud_present(*pudp) && !pud_same(*pudp, pud)); \
- set_pud(pudp, pud); \
-})
-
-#define set_p4d_safe(p4dp, p4d) \
-({ \
- WARN_ON_ONCE(p4d_present(*p4dp) && !p4d_same(*p4dp, p4d)); \
- set_p4d(p4dp, p4d); \
-})
-
-#define set_pgd_safe(pgdp, pgd) \
-({ \
- WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \
- set_pgd(pgdp, pgd); \
-})
-
#ifndef __HAVE_ARCH_DO_SWAP_PAGE
+static inline void arch_do_swap_page_nr(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ unsigned long addr,
+ pte_t pte, pte_t oldpte,
+ int nr)
+{
+
+}
+#else
/*
* Some architectures support metadata associated with a page. When a
* page is being swapped out, this metadata must be saved so it can be
@@ -1092,12 +1160,17 @@ static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b)
* page as metadata for the page. arch_do_swap_page() can restore this
* metadata when a page is swapped back in.
*/
-static inline void arch_do_swap_page(struct mm_struct *mm,
- struct vm_area_struct *vma,
- unsigned long addr,
- pte_t pte, pte_t oldpte)
-{
-
+static inline void arch_do_swap_page_nr(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ unsigned long addr,
+ pte_t pte, pte_t oldpte,
+ int nr)
+{
+ for (int i = 0; i < nr; i++) {
+ arch_do_swap_page(vma->vm_mm, vma, addr + i * PAGE_SIZE,
+ pte_advance_pfn(pte, i),
+ pte_advance_pfn(oldpte, i));
+ }
}
#endif
@@ -1147,10 +1220,6 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
}
#endif
-#ifndef __HAVE_ARCH_PGD_OFFSET_GATE
-#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr)
-#endif
-
#ifndef __HAVE_ARCH_MOVE_PTE
#define move_pte(pte, old_addr, new_addr) (pte)
#endif
@@ -1163,6 +1232,10 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
#define flush_tlb_fix_spurious_fault(vma, address, ptep) flush_tlb_page(vma, address)
#endif
+#ifndef flush_tlb_fix_spurious_fault_pmd
+#define flush_tlb_fix_spurious_fault_pmd(vma, address, pmdp) do { } while (0)
+#endif
+
/*
* When walking page tables, get the address of the next boundary,
* or the end address of the range if that comes earlier. Although no
@@ -1307,7 +1380,9 @@ static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
/*
* Commit an update to a pte, leaving any hardware-controlled bits in
- * the PTE unmodified.
+ * the PTE unmodified. The pte returned from ptep_modify_prot_start() may
+ * additionally have young and/or dirty bits set where previously they were not,
+ * so the updated pte may have these additional changes.
*/
static inline void ptep_modify_prot_commit(struct vm_area_struct *vma,
unsigned long addr,
@@ -1316,6 +1391,102 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma,
__ptep_modify_prot_commit(vma, addr, ptep, pte);
}
#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
+
+/**
+ * modify_prot_start_ptes - Start a pte protection read-modify-write transaction
+ * over a batch of ptes, which protects against asynchronous hardware
+ * modifications to the ptes. The intention is not to prevent the hardware from
+ * making pte updates, but to prevent any updates it may make from being lost.
+ * Please see the comment above ptep_modify_prot_start() for full description.
+ *
+ * @vma: The virtual memory area the pages are mapped into.
+ * @addr: Address the first page is mapped at.
+ * @ptep: Page table pointer for the first entry.
+ * @nr: Number of entries.
+ *
+ * May be overridden by the architecture; otherwise, implemented as a simple
+ * loop over ptep_modify_prot_start(), collecting the a/d bits from each pte
+ * in the batch.
+ *
+ * Note that PTE bits in the PTE batch besides the PFN can differ.
+ *
+ * Context: The caller holds the page table lock. The PTEs map consecutive
+ * pages that belong to the same folio. All other PTE bits must be identical for
+ * all PTEs in the batch except for young and dirty bits. The PTEs are all in
+ * the same PMD.
+ */
+#ifndef modify_prot_start_ptes
+static inline pte_t modify_prot_start_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep, unsigned int nr)
+{
+ pte_t pte, tmp_pte;
+
+ pte = ptep_modify_prot_start(vma, addr, ptep);
+ while (--nr) {
+ ptep++;
+ addr += PAGE_SIZE;
+ tmp_pte = ptep_modify_prot_start(vma, addr, ptep);
+ if (pte_dirty(tmp_pte))
+ pte = pte_mkdirty(pte);
+ if (pte_young(tmp_pte))
+ pte = pte_mkyoung(pte);
+ }
+ return pte;
+}
+#endif
+
+/**
+ * modify_prot_commit_ptes - Commit an update to a batch of ptes, leaving any
+ * hardware-controlled bits in the PTE unmodified.
+ *
+ * @vma: The virtual memory area the pages are mapped into.
+ * @addr: Address the first page is mapped at.
+ * @ptep: Page table pointer for the first entry.
+ * @old_pte: Old page table entry (for the first entry) which is now cleared.
+ * @pte: New page table entry to be set.
+ * @nr: Number of entries.
+ *
+ * May be overridden by the architecture; otherwise, implemented as a simple
+ * loop over ptep_modify_prot_commit().
+ *
+ * Context: The caller holds the page table lock. The PTEs are all in the same
+ * PMD. On exit, the set ptes in the batch map the same folio. The ptes set by
+ * ptep_modify_prot_start() may additionally have young and/or dirty bits set
+ * where previously they were not, so the updated ptes may have these
+ * additional changes.
+ */
+#ifndef modify_prot_commit_ptes
+static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pte_t old_pte, pte_t pte, unsigned int nr)
+{
+ int i;
+
+ for (i = 0; i < nr; ++i, ++ptep, addr += PAGE_SIZE) {
+ ptep_modify_prot_commit(vma, addr, ptep, old_pte, pte);
+
+ /* Advance PFN only, set same prot */
+ old_pte = pte_next_pfn(old_pte);
+ pte = pte_next_pfn(pte);
+ }
+}
+#endif
+
+/*
+ * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
+ * and let generic vmalloc, ioremap and page table update code know when
+ * arch_sync_kernel_mappings() needs to be called.
+ */
+#ifndef ARCH_PAGE_TABLE_SYNC_MASK
+#define ARCH_PAGE_TABLE_SYNC_MASK 0
+#endif
+
+/*
+ * There is no default implementation for arch_sync_kernel_mappings(). It is
+ * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK
+ * is 0.
+ */
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end);
+
#endif /* CONFIG_MMU */
/*
@@ -1386,6 +1557,18 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
#define arch_start_context_switch(prev) do {} while (0)
#endif
+/*
+ * Some platforms can customize the PTE soft-dirty bit making it unavailable
+ * even if the architecture provides the resource.
+ * Adding this API allows architectures to add their own checks for the
+ * devices on which the kernel is running.
+ * Note: When overriding it, please make sure the CONFIG_MEM_SOFT_DIRTY
+ * is part of this macro.
+ */
+#ifndef pgtable_supports_soft_dirty
+#define pgtable_supports_soft_dirty() IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)
+#endif
+
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
#ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
@@ -1472,64 +1655,92 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
* vmf_insert_pfn.
*/
-/*
- * track_pfn_remap is called when a _new_ pfn mapping is being established
- * by remap_pfn_range() for physical range indicated by pfn and size.
- */
-static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
- unsigned long pfn, unsigned long addr,
- unsigned long size)
+static inline int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size,
+ pgprot_t *prot)
{
return 0;
}
-/*
- * track_pfn_insert is called when a _new_ single pfn is established
- * by vmf_insert_pfn().
- */
-static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
- pfn_t pfn)
+static inline int pfnmap_track(unsigned long pfn, unsigned long size,
+ pgprot_t *prot)
{
+ return 0;
}
-/*
- * track_pfn_copy is called when vma that is covering the pfnmap gets
- * copied through copy_page_range().
- */
-static inline int track_pfn_copy(struct vm_area_struct *vma)
+static inline void pfnmap_untrack(unsigned long pfn, unsigned long size)
{
- return 0;
}
+#else
+/**
+ * pfnmap_setup_cachemode - setup the cachemode in the pgprot for a pfn range
+ * @pfn: the start of the pfn range
+ * @size: the size of the pfn range in bytes
+ * @prot: the pgprot to modify
+ *
+ * Lookup the cachemode for the pfn range starting at @pfn with the size
+ * @size and store it in @prot, leaving other data in @prot unchanged.
+ *
+ * This allows for a hardware implementation to have fine-grained control of
+ * memory cache behavior at page level granularity. Without a hardware
+ * implementation, this function does nothing.
+ *
+ * Currently there is only one implementation for this - x86 Page Attribute
+ * Table (PAT). See Documentation/arch/x86/pat.rst for more details.
+ *
+ * This function can fail if the pfn range spans pfns that require differing
+ * cachemodes. If the pfn range was previously verified to have a single
+ * cachemode, it is sufficient to query only a single pfn. The assumption is
+ * that this is the case for drivers using the vmf_insert_pfn*() interface.
+ *
+ * Returns 0 on success and -EINVAL on error.
+ */
+int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size,
+ pgprot_t *prot);
-/*
- * untrack_pfn is called while unmapping a pfnmap for a region.
- * untrack can be called for a specific region indicated by pfn and size or
- * can be for the entire vma (in which case pfn, size are zero).
+/**
+ * pfnmap_track - track a pfn range
+ * @pfn: the start of the pfn range
+ * @size: the size of the pfn range in bytes
+ * @prot: the pgprot to track
+ *
+ * Requested the pfn range to be 'tracked' by a hardware implementation and
+ * setup the cachemode in @prot similar to pfnmap_setup_cachemode().
+ *
+ * This allows for fine-grained control of memory cache behaviour at page
+ * level granularity. Tracking memory this way is persisted across VMA splits
+ * (VMA merging does not apply for VM_PFNMAP).
+ *
+ * Currently, there is only one implementation for this - x86 Page Attribute
+ * Table (PAT). See Documentation/arch/x86/pat.rst for more details.
+ *
+ * Returns 0 on success and -EINVAL on error.
*/
-static inline void untrack_pfn(struct vm_area_struct *vma,
- unsigned long pfn, unsigned long size,
- bool mm_wr_locked)
-{
-}
+int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot);
-/*
- * untrack_pfn_clear is called while mremapping a pfnmap for a new region
- * or fails to copy pgtable during duplicate vm area.
+/**
+ * pfnmap_untrack - untrack a pfn range
+ * @pfn: the start of the pfn range
+ * @size: the size of the pfn range in bytes
+ *
+ * Untrack a pfn range previously tracked through pfnmap_track().
+ */
+void pfnmap_untrack(unsigned long pfn, unsigned long size);
+#endif
+
+/**
+ * pfnmap_setup_cachemode_pfn - setup the cachemode in the pgprot for a pfn
+ * @pfn: the pfn
+ * @prot: the pgprot to modify
+ *
+ * Lookup the cachemode for @pfn and store it in @prot, leaving other
+ * data in @prot unchanged.
+ *
+ * See pfnmap_setup_cachemode() for details.
*/
-static inline void untrack_pfn_clear(struct vm_area_struct *vma)
+static inline void pfnmap_setup_cachemode_pfn(unsigned long pfn, pgprot_t *prot)
{
+ pfnmap_setup_cachemode(pfn, PAGE_SIZE, prot);
}
-#else
-extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
- unsigned long pfn, unsigned long addr,
- unsigned long size);
-extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
- pfn_t pfn);
-extern int track_pfn_copy(struct vm_area_struct *vma);
-extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
- unsigned long size, bool mm_wr_locked);
-extern void untrack_pfn_clear(struct vm_area_struct *vma);
-#endif
#ifdef CONFIG_MMU
#ifdef __HAVE_COLOR_ZERO_PAGE
@@ -1591,21 +1802,6 @@ static inline int pud_write(pud_t pud)
}
#endif /* pud_write */
-#if !defined(CONFIG_ARCH_HAS_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE)
-static inline int pmd_devmap(pmd_t pmd)
-{
- return 0;
-}
-static inline int pud_devmap(pud_t pud)
-{
- return 0;
-}
-static inline int pgd_devmap(pgd_t pgd)
-{
- return 0;
-}
-#endif
-
#if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \
!defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
static inline int pud_trans_huge(pud_t pud)
@@ -1620,7 +1816,7 @@ static inline int pud_trans_unstable(pud_t *pud)
defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
pud_t pudval = READ_ONCE(*pud);
- if (pud_none(pudval) || pud_trans_huge(pudval) || pud_devmap(pudval))
+ if (pud_none(pudval) || pud_trans_huge(pudval))
return 1;
if (unlikely(pud_bad(pudval))) {
pud_clear_bad(pud);
@@ -1774,10 +1970,11 @@ static inline bool arch_has_pfn_modify_check(void)
/*
* Page Table Modification bits for pgtbl_mod_mask.
*
- * These are used by the p?d_alloc_track*() set of functions an in the generic
- * vmalloc/ioremap code to track at which page-table levels entries have been
- * modified. Based on that the code can better decide when vmalloc and ioremap
- * mapping changes need to be synchronized to other page-tables in the system.
+ * These are used by the p?d_alloc_track*() and p*d_populate_kernel()
+ * functions in the generic vmalloc, ioremap and page table update code
+ * to track at which page-table levels entries have been modified.
+ * Based on that the code can better decide when page table changes need
+ * to be synchronized to other page-tables in the system.
*/
#define __PGTBL_PGD_MODIFIED 0
#define __PGTBL_P4D_MODIFIED 1
@@ -1794,6 +1991,32 @@ static inline bool arch_has_pfn_modify_check(void)
/* Page-Table Modification Mask */
typedef unsigned int pgtbl_mod_mask;
+enum pgtable_level {
+ PGTABLE_LEVEL_PTE = 0,
+ PGTABLE_LEVEL_PMD,
+ PGTABLE_LEVEL_PUD,
+ PGTABLE_LEVEL_P4D,
+ PGTABLE_LEVEL_PGD,
+};
+
+static inline const char *pgtable_level_to_str(enum pgtable_level level)
+{
+ switch (level) {
+ case PGTABLE_LEVEL_PTE:
+ return "pte";
+ case PGTABLE_LEVEL_PMD:
+ return "pmd";
+ case PGTABLE_LEVEL_PUD:
+ return "pud";
+ case PGTABLE_LEVEL_P4D:
+ return "p4d";
+ case PGTABLE_LEVEL_PGD:
+ return "pgd";
+ default:
+ return "unknown";
+ }
+}
+
#endif /* !__ASSEMBLY__ */
#if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT)
@@ -1860,8 +2083,8 @@ typedef unsigned int pgtbl_mod_mask;
* - It should contain a huge PFN, which points to a huge page larger than
* PAGE_SIZE of the platform. The PFN format isn't important here.
*
- * - It should cover all kinds of huge mappings (e.g., pXd_trans_huge(),
- * pXd_devmap(), or hugetlb mappings).
+ * - It should cover all kinds of huge mappings (i.e. pXd_trans_huge()
+ * or hugetlb mappings).
*/
#ifndef pgd_leaf
#define pgd_leaf(x) false
@@ -1888,9 +2111,12 @@ typedef unsigned int pgtbl_mod_mask;
#ifndef pmd_leaf_size
#define pmd_leaf_size(x) PMD_SIZE
#endif
+#ifndef __pte_leaf_size
#ifndef pte_leaf_size
#define pte_leaf_size(x) PAGE_SIZE
#endif
+#define __pte_leaf_size(x,y) pte_leaf_size(y)
+#endif
/*
* We always define pmd_pfn for all archs as it's used in lots of generic
@@ -1928,6 +2154,18 @@ typedef unsigned int pgtbl_mod_mask;
#define MAX_PTRS_PER_P4D PTRS_PER_P4D
#endif
+#ifndef pte_pgprot
+#define pte_pgprot(x) ((pgprot_t) {0})
+#endif
+
+#ifndef pmd_pgprot
+#define pmd_pgprot(x) ((pgprot_t) {0})
+#endif
+
+#ifndef pud_pgprot
+#define pud_pgprot(x) ((pgprot_t) {0})
+#endif
+
/* description of effects of mapping type and prot in current implementation.
* this is due to the limited x86 page protection hardware. The expected
* behavior is in parens:
@@ -1949,7 +2187,7 @@ typedef unsigned int pgtbl_mod_mask;
* x: (yes) yes
*/
#define DECLARE_VM_GET_PAGE_PROT \
-pgprot_t vm_get_page_prot(unsigned long vm_flags) \
+pgprot_t vm_get_page_prot(vm_flags_t vm_flags) \
{ \
return protection_map[vm_flags & \
(VM_READ | VM_WRITE | VM_EXEC | VM_SHARED)]; \