summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/loongarch/mm/pageattr.c2
-rw-r--r--arch/openrisc/kernel/dma.c4
-rw-r--r--arch/riscv/mm/pageattr.c8
-rw-r--r--include/linux/pagewalk.h7
-rw-r--r--mm/hugetlb_vmemmap.c2
-rw-r--r--mm/internal.h3
-rw-r--r--mm/pagewalk.c77
-rw-r--r--mm/ptdump.c3
8 files changed, 71 insertions, 35 deletions
diff --git a/arch/loongarch/mm/pageattr.c b/arch/loongarch/mm/pageattr.c
index 99165903908a..f5e910b68229 100644
--- a/arch/loongarch/mm/pageattr.c
+++ b/arch/loongarch/mm/pageattr.c
@@ -118,7 +118,7 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, pgp
return 0;
mmap_write_lock(&init_mm);
- ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, &masks);
+ ret = walk_kernel_page_table_range(start, end, &pageattr_ops, NULL, &masks);
mmap_write_unlock(&init_mm);
flush_tlb_kernel_range(start, end);
diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
index 3a7b5baaa450..af932a4ad306 100644
--- a/arch/openrisc/kernel/dma.c
+++ b/arch/openrisc/kernel/dma.c
@@ -72,7 +72,7 @@ void *arch_dma_set_uncached(void *cpu_addr, size_t size)
* them and setting the cache-inhibit bit.
*/
mmap_write_lock(&init_mm);
- error = walk_page_range_novma(&init_mm, va, va + size,
+ error = walk_kernel_page_table_range(va, va + size,
&set_nocache_walk_ops, NULL, NULL);
mmap_write_unlock(&init_mm);
@@ -87,7 +87,7 @@ void arch_dma_clear_uncached(void *cpu_addr, size_t size)
mmap_write_lock(&init_mm);
/* walk_page_range shouldn't be able to fail here */
- WARN_ON(walk_page_range_novma(&init_mm, va, va + size,
+ WARN_ON(walk_kernel_page_table_range(va, va + size,
&clear_nocache_walk_ops, NULL, NULL));
mmap_write_unlock(&init_mm);
}
diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
index d815448758a1..3f76db3d2769 100644
--- a/arch/riscv/mm/pageattr.c
+++ b/arch/riscv/mm/pageattr.c
@@ -299,7 +299,7 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
if (ret)
goto unlock;
- ret = walk_page_range_novma(&init_mm, lm_start, lm_end,
+ ret = walk_kernel_page_table_range(lm_start, lm_end,
&pageattr_ops, NULL, &masks);
if (ret)
goto unlock;
@@ -317,13 +317,13 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
if (ret)
goto unlock;
- ret = walk_page_range_novma(&init_mm, lm_start, lm_end,
+ ret = walk_kernel_page_table_range(lm_start, lm_end,
&pageattr_ops, NULL, &masks);
if (ret)
goto unlock;
}
- ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
+ ret = walk_kernel_page_table_range(start, end, &pageattr_ops, NULL,
&masks);
unlock:
@@ -335,7 +335,7 @@ unlock:
*/
flush_tlb_all();
#else
- ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
+ ret = walk_kernel_page_table_range(start, end, &pageattr_ops, NULL,
&masks);
mmap_write_unlock(&init_mm);
diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index 9700a29f8afb..8ac2f6d6d2a3 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -129,10 +129,9 @@ struct mm_walk {
int walk_page_range(struct mm_struct *mm, unsigned long start,
unsigned long end, const struct mm_walk_ops *ops,
void *private);
-int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
- unsigned long end, const struct mm_walk_ops *ops,
- pgd_t *pgd,
- void *private);
+int walk_kernel_page_table_range(unsigned long start,
+ unsigned long end, const struct mm_walk_ops *ops,
+ pgd_t *pgd, void *private);
int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start,
unsigned long end, const struct mm_walk_ops *ops,
void *private);
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 27245e86df25..ba0fb1b6a5a8 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -166,7 +166,7 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end,
VM_BUG_ON(!PAGE_ALIGNED(start | end));
mmap_read_lock(&init_mm);
- ret = walk_page_range_novma(&init_mm, start, end, &vmemmap_remap_ops,
+ ret = walk_kernel_page_table_range(start, end, &vmemmap_remap_ops,
NULL, walk);
mmap_read_unlock(&init_mm);
if (ret)
diff --git a/mm/internal.h b/mm/internal.h
index f91688e2894f..2c0d9f197d81 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1604,6 +1604,9 @@ static inline void accept_page(struct page *page)
int walk_page_range_mm(struct mm_struct *mm, unsigned long start,
unsigned long end, const struct mm_walk_ops *ops,
void *private);
+int walk_page_range_debug(struct mm_struct *mm, unsigned long start,
+ unsigned long end, const struct mm_walk_ops *ops,
+ pgd_t *pgd, void *private);
/* pt_reclaim.c */
bool try_get_and_clear_pmd(struct mm_struct *mm, pmd_t *pmd, pmd_t *pmdval);
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index e478777c86e1..ff5299eca687 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -585,8 +585,7 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
}
/**
- * walk_page_range_novma - walk a range of pagetables not backed by a vma
- * @mm: mm_struct representing the target process of page table walk
+ * walk_kernel_page_table_range - walk a range of kernel pagetables.
* @start: start address of the virtual address range
* @end: end address of the virtual address range
* @ops: operation to call during the walk
@@ -596,17 +595,61 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
* Similar to walk_page_range() but can walk any page tables even if they are
* not backed by VMAs. Because 'unusual' entries may be walked this function
* will also not lock the PTEs for the pte_entry() callback. This is useful for
- * walking the kernel pages tables or page tables for firmware.
+ * walking kernel pages tables or page tables for firmware.
*
* Note: Be careful to walk the kernel pages tables, the caller may be need to
* take other effective approaches (mmap lock may be insufficient) to prevent
* the intermediate kernel page tables belonging to the specified address range
* from being freed (e.g. memory hot-remove).
*/
-int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
+int walk_kernel_page_table_range(unsigned long start, unsigned long end,
+ const struct mm_walk_ops *ops, pgd_t *pgd, void *private)
+{
+ struct mm_struct *mm = &init_mm;
+ struct mm_walk walk = {
+ .ops = ops,
+ .mm = mm,
+ .pgd = pgd,
+ .private = private,
+ .no_vma = true
+ };
+
+ if (start >= end)
+ return -EINVAL;
+ if (!check_ops_valid(ops))
+ return -EINVAL;
+
+ /*
+ * Kernel intermediate page tables are usually not freed, so the mmap
+ * read lock is sufficient. But there are some exceptions.
+ * E.g. memory hot-remove. In which case, the mmap lock is insufficient
+ * to prevent the intermediate kernel pages tables belonging to the
+ * specified address range from being freed. The caller should take
+ * other actions to prevent this race.
+ */
+ mmap_assert_locked(mm);
+
+ return walk_pgd_range(start, end, &walk);
+}
+
+/**
+ * walk_page_range_debug - walk a range of pagetables not backed by a vma
+ * @mm: mm_struct representing the target process of page table walk
+ * @start: start address of the virtual address range
+ * @end: end address of the virtual address range
+ * @ops: operation to call during the walk
+ * @pgd: pgd to walk if different from mm->pgd
+ * @private: private data for callbacks' usage
+ *
+ * Similar to walk_page_range() but can walk any page tables even if they are
+ * not backed by VMAs. Because 'unusual' entries may be walked this function
+ * will also not lock the PTEs for the pte_entry() callback.
+ *
+ * This is for debugging purposes ONLY.
+ */
+int walk_page_range_debug(struct mm_struct *mm, unsigned long start,
unsigned long end, const struct mm_walk_ops *ops,
- pgd_t *pgd,
- void *private)
+ pgd_t *pgd, void *private)
{
struct mm_walk walk = {
.ops = ops,
@@ -616,34 +659,24 @@ int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
.no_vma = true
};
+ /* For convenience, we allow traversal of kernel mappings. */
+ if (mm == &init_mm)
+ return walk_kernel_page_table_range(start, end, ops,
+ pgd, private);
if (start >= end || !walk.mm)
return -EINVAL;
if (!check_ops_valid(ops))
return -EINVAL;
/*
- * 1) For walking the user virtual address space:
- *
* The mmap lock protects the page walker from changes to the page
* tables during the walk. However a read lock is insufficient to
* protect those areas which don't have a VMA as munmap() detaches
* the VMAs before downgrading to a read lock and actually tearing
* down PTEs/page tables. In which case, the mmap write lock should
- * be hold.
- *
- * 2) For walking the kernel virtual address space:
- *
- * The kernel intermediate page tables usually do not be freed, so
- * the mmap map read lock is sufficient. But there are some exceptions.
- * E.g. memory hot-remove. In which case, the mmap lock is insufficient
- * to prevent the intermediate kernel pages tables belonging to the
- * specified address range from being freed. The caller should take
- * other actions to prevent this race.
+ * be held.
*/
- if (mm == &init_mm)
- mmap_assert_locked(walk.mm);
- else
- mmap_assert_write_locked(walk.mm);
+ mmap_assert_write_locked(mm);
return walk_pgd_range(start, end, &walk);
}
diff --git a/mm/ptdump.c b/mm/ptdump.c
index 9374f29cdc6f..61a352aa12ed 100644
--- a/mm/ptdump.c
+++ b/mm/ptdump.c
@@ -4,6 +4,7 @@
#include <linux/debugfs.h>
#include <linux/ptdump.h>
#include <linux/kasan.h>
+#include "internal.h"
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
/*
@@ -177,7 +178,7 @@ void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd)
mmap_write_lock(mm);
while (range->start != range->end) {
- walk_page_range_novma(mm, range->start, range->end,
+ walk_page_range_debug(mm, range->start, range->end,
&ptdump_ops, pgd, st);
range++;
}