summaryrefslogtreecommitdiff
path: root/mm/pagewalk.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/pagewalk.c')
-rw-r--r--mm/pagewalk.c90
1 files changed, 62 insertions, 28 deletions
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index e478777c86e1..648038247a8d 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -143,8 +143,7 @@ again:
* We are ONLY installing, so avoid unnecessarily
* splitting a present huge page.
*/
- if (pmd_present(*pmd) &&
- (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)))
+ if (pmd_present(*pmd) && pmd_trans_huge(*pmd))
continue;
}
@@ -210,8 +209,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
* We are ONLY installing, so avoid unnecessarily
* splitting a present huge page.
*/
- if (pud_present(*pud) &&
- (pud_trans_huge(*pud) || pud_devmap(*pud)))
+ if (pud_present(*pud) && pud_trans_huge(*pud))
continue;
}
@@ -422,7 +420,7 @@ static inline void process_mm_walk_lock(struct mm_struct *mm,
{
if (walk_lock == PGWALK_RDLOCK)
mmap_assert_locked(mm);
- else
+ else if (walk_lock != PGWALK_VMA_RDLOCK_VERIFY)
mmap_assert_write_locked(mm);
}
@@ -437,6 +435,9 @@ static inline void process_vma_walk_lock(struct vm_area_struct *vma,
case PGWALK_WRLOCK_VERIFY:
vma_assert_write_locked(vma);
break;
+ case PGWALK_VMA_RDLOCK_VERIFY:
+ vma_assert_locked(vma);
+ break;
case PGWALK_RDLOCK:
/* PGWALK_RDLOCK is handled by process_mm_walk_lock */
break;
@@ -585,8 +586,7 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
}
/**
- * walk_page_range_novma - walk a range of pagetables not backed by a vma
- * @mm: mm_struct representing the target process of page table walk
+ * walk_kernel_page_table_range - walk a range of kernel pagetables.
* @start: start address of the virtual address range
* @end: end address of the virtual address range
* @ops: operation to call during the walk
@@ -596,17 +596,61 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
* Similar to walk_page_range() but can walk any page tables even if they are
* not backed by VMAs. Because 'unusual' entries may be walked this function
* will also not lock the PTEs for the pte_entry() callback. This is useful for
- * walking the kernel pages tables or page tables for firmware.
+ * walking kernel pages tables or page tables for firmware.
*
* Note: Be careful to walk the kernel pages tables, the caller may be need to
* take other effective approaches (mmap lock may be insufficient) to prevent
* the intermediate kernel page tables belonging to the specified address range
* from being freed (e.g. memory hot-remove).
*/
-int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
+int walk_kernel_page_table_range(unsigned long start, unsigned long end,
+ const struct mm_walk_ops *ops, pgd_t *pgd, void *private)
+{
+ struct mm_struct *mm = &init_mm;
+ struct mm_walk walk = {
+ .ops = ops,
+ .mm = mm,
+ .pgd = pgd,
+ .private = private,
+ .no_vma = true
+ };
+
+ if (start >= end)
+ return -EINVAL;
+ if (!check_ops_valid(ops))
+ return -EINVAL;
+
+ /*
+ * Kernel intermediate page tables are usually not freed, so the mmap
+ * read lock is sufficient. But there are some exceptions.
+ * E.g. memory hot-remove. In which case, the mmap lock is insufficient
+ * to prevent the intermediate kernel pages tables belonging to the
+ * specified address range from being freed. The caller should take
+ * other actions to prevent this race.
+ */
+ mmap_assert_locked(mm);
+
+ return walk_pgd_range(start, end, &walk);
+}
+
+/**
+ * walk_page_range_debug - walk a range of pagetables not backed by a vma
+ * @mm: mm_struct representing the target process of page table walk
+ * @start: start address of the virtual address range
+ * @end: end address of the virtual address range
+ * @ops: operation to call during the walk
+ * @pgd: pgd to walk if different from mm->pgd
+ * @private: private data for callbacks' usage
+ *
+ * Similar to walk_page_range() but can walk any page tables even if they are
+ * not backed by VMAs. Because 'unusual' entries may be walked this function
+ * will also not lock the PTEs for the pte_entry() callback.
+ *
+ * This is for debugging purposes ONLY.
+ */
+int walk_page_range_debug(struct mm_struct *mm, unsigned long start,
unsigned long end, const struct mm_walk_ops *ops,
- pgd_t *pgd,
- void *private)
+ pgd_t *pgd, void *private)
{
struct mm_walk walk = {
.ops = ops,
@@ -616,34 +660,24 @@ int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
.no_vma = true
};
+ /* For convenience, we allow traversal of kernel mappings. */
+ if (mm == &init_mm)
+ return walk_kernel_page_table_range(start, end, ops,
+ pgd, private);
if (start >= end || !walk.mm)
return -EINVAL;
if (!check_ops_valid(ops))
return -EINVAL;
/*
- * 1) For walking the user virtual address space:
- *
* The mmap lock protects the page walker from changes to the page
* tables during the walk. However a read lock is insufficient to
* protect those areas which don't have a VMA as munmap() detaches
* the VMAs before downgrading to a read lock and actually tearing
* down PTEs/page tables. In which case, the mmap write lock should
- * be hold.
- *
- * 2) For walking the kernel virtual address space:
- *
- * The kernel intermediate page tables usually do not be freed, so
- * the mmap map read lock is sufficient. But there are some exceptions.
- * E.g. memory hot-remove. In which case, the mmap lock is insufficient
- * to prevent the intermediate kernel pages tables belonging to the
- * specified address range from being freed. The caller should take
- * other actions to prevent this race.
+ * be held.
*/
- if (mm == &init_mm)
- mmap_assert_locked(walk.mm);
- else
- mmap_assert_write_locked(walk.mm);
+ mmap_assert_write_locked(mm);
return walk_pgd_range(start, end, &walk);
}
@@ -872,7 +906,7 @@ struct folio *folio_walk_start(struct folio_walk *fw,
* TODO: FW_MIGRATION support for PUD migration entries
* once there are relevant users.
*/
- if (!pud_present(pud) || pud_devmap(pud) || pud_special(pud)) {
+ if (!pud_present(pud) || pud_special(pud)) {
spin_unlock(ptl);
goto not_found;
} else if (!pud_leaf(pud)) {