diff options
Diffstat (limited to 'include/linux/rmap.h')
| -rw-r--r-- | include/linux/rmap.h | 431 |
1 files changed, 351 insertions, 80 deletions
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 7229b9baf20d..daa92a58585d 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -13,6 +13,7 @@ #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/memremap.h> +#include <linux/bit_spinlock.h> /* * The anon_vma heads a list of private "related" vmas, to scan if @@ -171,7 +172,215 @@ static inline void anon_vma_merge(struct vm_area_struct *vma, unlink_anon_vmas(next); } -struct anon_vma *folio_get_anon_vma(struct folio *folio); +struct anon_vma *folio_get_anon_vma(const struct folio *folio); + +#ifdef CONFIG_MM_ID +static __always_inline void folio_lock_large_mapcount(struct folio *folio) +{ + bit_spin_lock(FOLIO_MM_IDS_LOCK_BITNUM, &folio->_mm_ids); +} + +static __always_inline void folio_unlock_large_mapcount(struct folio *folio) +{ + __bit_spin_unlock(FOLIO_MM_IDS_LOCK_BITNUM, &folio->_mm_ids); +} + +static inline unsigned int folio_mm_id(const struct folio *folio, int idx) +{ + VM_WARN_ON_ONCE(idx != 0 && idx != 1); + return folio->_mm_id[idx] & MM_ID_MASK; +} + +static inline void folio_set_mm_id(struct folio *folio, int idx, mm_id_t id) +{ + VM_WARN_ON_ONCE(idx != 0 && idx != 1); + folio->_mm_id[idx] &= ~MM_ID_MASK; + folio->_mm_id[idx] |= id; +} + +static inline void __folio_large_mapcount_sanity_checks(const struct folio *folio, + int diff, mm_id_t mm_id) +{ + VM_WARN_ON_ONCE(!folio_test_large(folio) || folio_test_hugetlb(folio)); + VM_WARN_ON_ONCE(diff <= 0); + VM_WARN_ON_ONCE(mm_id < MM_ID_MIN || mm_id > MM_ID_MAX); + + /* + * Make sure we can detect at least one complete PTE mapping of the + * folio in a single MM as "exclusively mapped". This is primarily + * a check on 32bit, where we currently reduce the size of the per-MM + * mapcount to a short. + */ + VM_WARN_ON_ONCE(diff > folio_large_nr_pages(folio)); + VM_WARN_ON_ONCE(folio_large_nr_pages(folio) - 1 > MM_ID_MAPCOUNT_MAX); + + VM_WARN_ON_ONCE(folio_mm_id(folio, 0) == MM_ID_DUMMY && + folio->_mm_id_mapcount[0] != -1); + VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != MM_ID_DUMMY && + folio->_mm_id_mapcount[0] < 0); + VM_WARN_ON_ONCE(folio_mm_id(folio, 1) == MM_ID_DUMMY && + folio->_mm_id_mapcount[1] != -1); + VM_WARN_ON_ONCE(folio_mm_id(folio, 1) != MM_ID_DUMMY && + folio->_mm_id_mapcount[1] < 0); + VM_WARN_ON_ONCE(!folio_mapped(folio) && + test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids)); +} + +static __always_inline void folio_set_large_mapcount(struct folio *folio, + int mapcount, struct vm_area_struct *vma) +{ + __folio_large_mapcount_sanity_checks(folio, mapcount, vma->vm_mm->mm_id); + + VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != MM_ID_DUMMY); + VM_WARN_ON_ONCE(folio_mm_id(folio, 1) != MM_ID_DUMMY); + + /* Note: mapcounts start at -1. */ + atomic_set(&folio->_large_mapcount, mapcount - 1); + folio->_mm_id_mapcount[0] = mapcount - 1; + folio_set_mm_id(folio, 0, vma->vm_mm->mm_id); +} + +static __always_inline int folio_add_return_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + const mm_id_t mm_id = vma->vm_mm->mm_id; + int new_mapcount_val; + + folio_lock_large_mapcount(folio); + __folio_large_mapcount_sanity_checks(folio, diff, mm_id); + + new_mapcount_val = atomic_read(&folio->_large_mapcount) + diff; + atomic_set(&folio->_large_mapcount, new_mapcount_val); + + /* + * If a folio is mapped more than once into an MM on 32bit, we + * can in theory overflow the per-MM mapcount (although only for + * fairly large folios), turning it negative. In that case, just + * free up the slot and mark the folio "mapped shared", otherwise + * we might be in trouble when unmapping pages later. + */ + if (folio_mm_id(folio, 0) == mm_id) { + folio->_mm_id_mapcount[0] += diff; + if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio->_mm_id_mapcount[0] < 0)) { + folio->_mm_id_mapcount[0] = -1; + folio_set_mm_id(folio, 0, MM_ID_DUMMY); + folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; + } + } else if (folio_mm_id(folio, 1) == mm_id) { + folio->_mm_id_mapcount[1] += diff; + if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio->_mm_id_mapcount[1] < 0)) { + folio->_mm_id_mapcount[1] = -1; + folio_set_mm_id(folio, 1, MM_ID_DUMMY); + folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; + } + } else if (folio_mm_id(folio, 0) == MM_ID_DUMMY) { + folio_set_mm_id(folio, 0, mm_id); + folio->_mm_id_mapcount[0] = diff - 1; + /* We might have other mappings already. */ + if (new_mapcount_val != diff - 1) + folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; + } else if (folio_mm_id(folio, 1) == MM_ID_DUMMY) { + folio_set_mm_id(folio, 1, mm_id); + folio->_mm_id_mapcount[1] = diff - 1; + /* Slot 0 certainly has mappings as well. */ + folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; + } + folio_unlock_large_mapcount(folio); + return new_mapcount_val + 1; +} +#define folio_add_large_mapcount folio_add_return_large_mapcount + +static __always_inline int folio_sub_return_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + const mm_id_t mm_id = vma->vm_mm->mm_id; + int new_mapcount_val; + + folio_lock_large_mapcount(folio); + __folio_large_mapcount_sanity_checks(folio, diff, mm_id); + + new_mapcount_val = atomic_read(&folio->_large_mapcount) - diff; + atomic_set(&folio->_large_mapcount, new_mapcount_val); + + /* + * There are valid corner cases where we might underflow a per-MM + * mapcount (some mappings added when no slot was free, some mappings + * added once a slot was free), so we always set it to -1 once we go + * negative. + */ + if (folio_mm_id(folio, 0) == mm_id) { + folio->_mm_id_mapcount[0] -= diff; + if (folio->_mm_id_mapcount[0] >= 0) + goto out; + folio->_mm_id_mapcount[0] = -1; + folio_set_mm_id(folio, 0, MM_ID_DUMMY); + } else if (folio_mm_id(folio, 1) == mm_id) { + folio->_mm_id_mapcount[1] -= diff; + if (folio->_mm_id_mapcount[1] >= 0) + goto out; + folio->_mm_id_mapcount[1] = -1; + folio_set_mm_id(folio, 1, MM_ID_DUMMY); + } + + /* + * If one MM slot owns all mappings, the folio is mapped exclusively. + * Note that if the folio is now unmapped (new_mapcount_val == -1), both + * slots must be free (mapcount == -1), and we'll also mark it as + * exclusive. + */ + if (folio->_mm_id_mapcount[0] == new_mapcount_val || + folio->_mm_id_mapcount[1] == new_mapcount_val) + folio->_mm_ids &= ~FOLIO_MM_IDS_SHARED_BIT; +out: + folio_unlock_large_mapcount(folio); + return new_mapcount_val + 1; +} +#define folio_sub_large_mapcount folio_sub_return_large_mapcount +#else /* !CONFIG_MM_ID */ +/* + * See __folio_rmap_sanity_checks(), we might map large folios even without + * CONFIG_TRANSPARENT_HUGEPAGE. We'll keep that working for now. + */ +static inline void folio_set_large_mapcount(struct folio *folio, int mapcount, + struct vm_area_struct *vma) +{ + /* Note: mapcounts start at -1. */ + atomic_set(&folio->_large_mapcount, mapcount - 1); +} + +static inline void folio_add_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + atomic_add(diff, &folio->_large_mapcount); +} + +static inline int folio_add_return_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + BUILD_BUG(); +} + +static inline void folio_sub_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + atomic_sub(diff, &folio->_large_mapcount); +} + +static inline int folio_sub_return_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + BUILD_BUG(); +} +#endif /* CONFIG_MM_ID */ + +#define folio_inc_large_mapcount(folio, vma) \ + folio_add_large_mapcount(folio, 1, vma) +#define folio_inc_return_large_mapcount(folio, vma) \ + folio_add_return_large_mapcount(folio, 1, vma) +#define folio_dec_large_mapcount(folio, vma) \ + folio_sub_large_mapcount(folio, 1, vma) +#define folio_dec_return_large_mapcount(folio, vma) \ + folio_sub_return_large_mapcount(folio, 1, vma) /* RMAP flags, currently only relevant for some anon rmap operations. */ typedef int __bitwise rmap_t; @@ -185,21 +394,15 @@ typedef int __bitwise rmap_t; /* The anonymous (sub)page is exclusive to a single process. */ #define RMAP_EXCLUSIVE ((__force rmap_t)BIT(0)) -/* - * Internally, we're using an enum to specify the granularity. We make the - * compiler emit specialized code for each granularity. - */ -enum rmap_level { - RMAP_LEVEL_PTE = 0, - RMAP_LEVEL_PMD, -}; - -static inline void __folio_rmap_sanity_checks(struct folio *folio, - struct page *page, int nr_pages, enum rmap_level level) +static __always_inline void __folio_rmap_sanity_checks(const struct folio *folio, + const struct page *page, int nr_pages, enum pgtable_level level) { /* hugetlb folios are handled separately. */ VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); + /* When (un)mapping zeropages, we should never touch ref+mapcount. */ + VM_WARN_ON_FOLIO(is_zero_folio(folio), folio); + /* * TODO: we get driver-allocated folios that have nothing to do with * the rmap using vm_insert_page(); therefore, we cannot assume that @@ -214,19 +417,49 @@ static inline void __folio_rmap_sanity_checks(struct folio *folio, VM_WARN_ON_FOLIO(page_folio(page + nr_pages - 1) != folio, folio); switch (level) { - case RMAP_LEVEL_PTE: + case PGTABLE_LEVEL_PTE: break; - case RMAP_LEVEL_PMD: + case PGTABLE_LEVEL_PMD: /* * We don't support folios larger than a single PMD yet. So - * when RMAP_LEVEL_PMD is set, we assume that we are creating + * when PGTABLE_LEVEL_PMD is set, we assume that we are creating * a single "entire" mapping of the folio. */ VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PMD_NR, folio); VM_WARN_ON_FOLIO(nr_pages != HPAGE_PMD_NR, folio); break; + case PGTABLE_LEVEL_PUD: + /* + * Assume that we are creating a single "entire" mapping of the + * folio. + */ + VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PUD_NR, folio); + VM_WARN_ON_FOLIO(nr_pages != HPAGE_PUD_NR, folio); + break; default: - VM_WARN_ON_ONCE(true); + BUILD_BUG(); + } + + /* + * Anon folios must have an associated live anon_vma as long as they're + * mapped into userspace. + * Note that the atomic_read() mainly does two things: + * + * 1. In KASAN builds with CONFIG_SLUB_RCU_DEBUG, it causes KASAN to + * check that the associated anon_vma has not yet been freed (subject + * to KASAN's usual limitations). This check will pass if the + * anon_vma's refcount has already dropped to 0 but an RCU grace + * period hasn't passed since then. + * 2. If the anon_vma has not yet been freed, it checks that the + * anon_vma still has a nonzero refcount (as opposed to being in the + * middle of an RCU delay for getting freed). + */ + if (folio_test_anon(folio) && !folio_test_ksm(folio)) { + unsigned long mapping = (unsigned long)folio->mapping; + struct anon_vma *anon_vma; + + anon_vma = (void *)(mapping - FOLIO_MAPPING_ANON); + VM_WARN_ON_FOLIO(atomic_read(&anon_vma->refcount) == 0, folio); } } @@ -241,19 +474,23 @@ void folio_add_anon_rmap_ptes(struct folio *, struct page *, int nr_pages, void folio_add_anon_rmap_pmd(struct folio *, struct page *, struct vm_area_struct *, unsigned long address, rmap_t flags); void folio_add_new_anon_rmap(struct folio *, struct vm_area_struct *, - unsigned long address); + unsigned long address, rmap_t flags); void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages, struct vm_area_struct *); #define folio_add_file_rmap_pte(folio, page, vma) \ folio_add_file_rmap_ptes(folio, page, 1, vma) void folio_add_file_rmap_pmd(struct folio *, struct page *, struct vm_area_struct *); +void folio_add_file_rmap_pud(struct folio *, struct page *, + struct vm_area_struct *); void folio_remove_rmap_ptes(struct folio *, struct page *, int nr_pages, struct vm_area_struct *); #define folio_remove_rmap_pte(folio, page, vma) \ folio_remove_rmap_ptes(folio, page, 1, vma) void folio_remove_rmap_pmd(struct folio *, struct page *, struct vm_area_struct *); +void folio_remove_rmap_pud(struct folio *, struct page *, + struct vm_area_struct *); void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address, rmap_t flags); @@ -319,28 +556,34 @@ static inline void hugetlb_remove_rmap(struct folio *folio) } static __always_inline void __folio_dup_file_rmap(struct folio *folio, - struct page *page, int nr_pages, enum rmap_level level) + struct page *page, int nr_pages, struct vm_area_struct *dst_vma, + enum pgtable_level level) { const int orig_nr_pages = nr_pages; __folio_rmap_sanity_checks(folio, page, nr_pages, level); switch (level) { - case RMAP_LEVEL_PTE: + case PGTABLE_LEVEL_PTE: if (!folio_test_large(folio)) { - atomic_inc(&page->_mapcount); + atomic_inc(&folio->_mapcount); break; } - do { - atomic_inc(&page->_mapcount); - } while (page++, --nr_pages > 0); - atomic_add(orig_nr_pages, &folio->_large_mapcount); + if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) { + do { + atomic_inc(&page->_mapcount); + } while (page++, --nr_pages > 0); + } + folio_add_large_mapcount(folio, orig_nr_pages, dst_vma); break; - case RMAP_LEVEL_PMD: + case PGTABLE_LEVEL_PMD: + case PGTABLE_LEVEL_PUD: atomic_inc(&folio->_entire_mapcount); - atomic_inc(&folio->_large_mapcount); + folio_inc_large_mapcount(folio, dst_vma); break; + default: + BUILD_BUG(); } } @@ -349,45 +592,47 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio, * @folio: The folio to duplicate the mappings of * @page: The first page to duplicate the mappings of * @nr_pages: The number of pages of which the mapping will be duplicated + * @dst_vma: The destination vm area * * The page range of the folio is defined by [page, page + nr_pages) * * The caller needs to hold the page table lock. */ static inline void folio_dup_file_rmap_ptes(struct folio *folio, - struct page *page, int nr_pages) + struct page *page, int nr_pages, struct vm_area_struct *dst_vma) { - __folio_dup_file_rmap(folio, page, nr_pages, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, nr_pages, dst_vma, PGTABLE_LEVEL_PTE); } static __always_inline void folio_dup_file_rmap_pte(struct folio *folio, - struct page *page) + struct page *page, struct vm_area_struct *dst_vma) { - __folio_dup_file_rmap(folio, page, 1, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, 1, dst_vma, PGTABLE_LEVEL_PTE); } /** * folio_dup_file_rmap_pmd - duplicate a PMD mapping of a page range of a folio * @folio: The folio to duplicate the mapping of * @page: The first page to duplicate the mapping of + * @dst_vma: The destination vm area * * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) * * The caller needs to hold the page table lock. */ static inline void folio_dup_file_rmap_pmd(struct folio *folio, - struct page *page) + struct page *page, struct vm_area_struct *dst_vma) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, dst_vma, PGTABLE_LEVEL_PTE); #else WARN_ON_ONCE(true); #endif } static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, - struct page *page, int nr_pages, struct vm_area_struct *src_vma, - enum rmap_level level) + struct page *page, int nr_pages, struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma, enum pgtable_level level) { const int orig_nr_pages = nr_pages; bool maybe_pinned; @@ -412,7 +657,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, * copying if the folio maybe pinned. */ switch (level) { - case RMAP_LEVEL_PTE: + case PGTABLE_LEVEL_PTE: if (unlikely(maybe_pinned)) { for (i = 0; i < nr_pages; i++) if (PageAnonExclusive(page + i)) @@ -422,26 +667,30 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, if (!folio_test_large(folio)) { if (PageAnonExclusive(page)) ClearPageAnonExclusive(page); - atomic_inc(&page->_mapcount); + atomic_inc(&folio->_mapcount); break; } do { if (PageAnonExclusive(page)) ClearPageAnonExclusive(page); - atomic_inc(&page->_mapcount); + if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) + atomic_inc(&page->_mapcount); } while (page++, --nr_pages > 0); - atomic_add(orig_nr_pages, &folio->_large_mapcount); + folio_add_large_mapcount(folio, orig_nr_pages, dst_vma); break; - case RMAP_LEVEL_PMD: + case PGTABLE_LEVEL_PMD: + case PGTABLE_LEVEL_PUD: if (PageAnonExclusive(page)) { if (unlikely(maybe_pinned)) return -EBUSY; ClearPageAnonExclusive(page); } atomic_inc(&folio->_entire_mapcount); - atomic_inc(&folio->_large_mapcount); + folio_inc_large_mapcount(folio, dst_vma); break; + default: + BUILD_BUG(); } return 0; } @@ -452,6 +701,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, * @folio: The folio to duplicate the mappings of * @page: The first page to duplicate the mappings of * @nr_pages: The number of pages of which the mapping will be duplicated + * @dst_vma: The destination vm area * @src_vma: The vm area from which the mappings are duplicated * * The page range of the folio is defined by [page, page + nr_pages) @@ -470,17 +720,19 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, * Returns 0 if duplicating the mappings succeeded. Returns -EBUSY otherwise. */ static inline int folio_try_dup_anon_rmap_ptes(struct folio *folio, - struct page *page, int nr_pages, struct vm_area_struct *src_vma) + struct page *page, int nr_pages, struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma) { - return __folio_try_dup_anon_rmap(folio, page, nr_pages, src_vma, - RMAP_LEVEL_PTE); + return __folio_try_dup_anon_rmap(folio, page, nr_pages, dst_vma, + src_vma, PGTABLE_LEVEL_PTE); } static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, - struct page *page, struct vm_area_struct *src_vma) + struct page *page, struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma) { - return __folio_try_dup_anon_rmap(folio, page, 1, src_vma, - RMAP_LEVEL_PTE); + return __folio_try_dup_anon_rmap(folio, page, 1, dst_vma, src_vma, + PGTABLE_LEVEL_PTE); } /** @@ -488,6 +740,7 @@ static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, * of a folio * @folio: The folio to duplicate the mapping of * @page: The first page to duplicate the mapping of + * @dst_vma: The destination vm area * @src_vma: The vm area from which the mapping is duplicated * * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) @@ -506,11 +759,12 @@ static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, * Returns 0 if duplicating the mapping succeeded. Returns -EBUSY otherwise. */ static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio, - struct page *page, struct vm_area_struct *src_vma) + struct page *page, struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, src_vma, - RMAP_LEVEL_PMD); + return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, dst_vma, + src_vma, PGTABLE_LEVEL_PMD); #else WARN_ON_ONCE(true); return -EBUSY; @@ -518,7 +772,7 @@ static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio, } static __always_inline int __folio_try_share_anon_rmap(struct folio *folio, - struct page *page, int nr_pages, enum rmap_level level) + struct page *page, int nr_pages, enum pgtable_level level) { VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); VM_WARN_ON_FOLIO(!PageAnonExclusive(page), folio); @@ -613,7 +867,7 @@ static __always_inline int __folio_try_share_anon_rmap(struct folio *folio, static inline int folio_try_share_anon_rmap_pte(struct folio *folio, struct page *page) { - return __folio_try_share_anon_rmap(folio, page, 1, RMAP_LEVEL_PTE); + return __folio_try_share_anon_rmap(folio, page, 1, PGTABLE_LEVEL_PTE); } /** @@ -644,7 +898,7 @@ static inline int folio_try_share_anon_rmap_pmd(struct folio *folio, { #ifdef CONFIG_TRANSPARENT_HUGEPAGE return __folio_try_share_anon_rmap(folio, page, HPAGE_PMD_NR, - RMAP_LEVEL_PMD); + PGTABLE_LEVEL_PMD); #else WARN_ON_ONCE(true); return -EBUSY; @@ -655,20 +909,24 @@ static inline int folio_try_share_anon_rmap_pmd(struct folio *folio, * Called from mm/vmscan.c to handle paging out */ int folio_referenced(struct folio *, int is_locked, - struct mem_cgroup *memcg, unsigned long *vm_flags); + struct mem_cgroup *memcg, vm_flags_t *vm_flags); void try_to_migrate(struct folio *folio, enum ttu_flags flags); void try_to_unmap(struct folio *, enum ttu_flags flags); -int make_device_exclusive_range(struct mm_struct *mm, unsigned long start, - unsigned long end, struct page **pages, - void *arg); +struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr, + void *owner, struct folio **foliop); /* Avoid racy checks */ #define PVMW_SYNC (1 << 0) /* Look for migration entries rather than present PTEs */ #define PVMW_MIGRATION (1 << 1) +/* Result flags */ + +/* The page is mapped across page table boundary */ +#define PVMW_PGTABLE_CROSSED (1 << 16) + struct page_vma_mapped_walk { unsigned long pfn; unsigned long nr_pages; @@ -681,16 +939,6 @@ struct page_vma_mapped_walk { unsigned int flags; }; -#define DEFINE_PAGE_VMA_WALK(name, _page, _vma, _address, _flags) \ - struct page_vma_mapped_walk name = { \ - .pfn = page_to_pfn(_page), \ - .nr_pages = compound_nr(_page), \ - .pgoff = page_to_pgoff(_page), \ - .vma = _vma, \ - .address = _address, \ - .flags = _flags, \ - } - #define DEFINE_FOLIO_VMA_WALK(name, _folio, _vma, _address, _flags) \ struct page_vma_mapped_walk name = { \ .pfn = folio_pfn(_folio), \ @@ -710,12 +958,33 @@ static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw) spin_unlock(pvmw->ptl); } -bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw); - -/* - * Used by swapoff to help locate where page is expected in vma. +/** + * page_vma_mapped_walk_restart - Restart the page table walk. + * @pvmw: Pointer to struct page_vma_mapped_walk. + * + * It restarts the page table walk when changes occur in the page + * table, such as splitting a PMD. Ensures that the PTL held during + * the previous walk is released and resets the state to allow for + * a new walk starting at the current address stored in pvmw->address. */ -unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); +static inline void +page_vma_mapped_walk_restart(struct page_vma_mapped_walk *pvmw) +{ + WARN_ON_ONCE(!pvmw->pmd && !pvmw->pte); + + if (likely(pvmw->ptl)) + spin_unlock(pvmw->ptl); + else + WARN_ON_ONCE(1); + + pvmw->ptl = NULL; + pvmw->pmd = NULL; + pvmw->pte = NULL; +} + +bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw); +unsigned long page_address_in_vma(const struct folio *folio, + const struct page *, const struct vm_area_struct *); /* * Cleans the PTEs of shared mappings. @@ -725,12 +994,18 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); */ int folio_mkclean(struct folio *); +int mapping_wrprotect_range(struct address_space *mapping, pgoff_t pgoff, + unsigned long pfn, unsigned long nr_pages); + int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, struct vm_area_struct *vma); -void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked); +enum rmp_flags { + RMP_LOCKED = 1 << 0, + RMP_USE_SHARED_ZEROPAGE = 1 << 1, +}; -unsigned long page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); +void remove_migration_ptes(struct folio *src, struct folio *dst, int flags); /* * rmap_walk_control: To control rmap traversing for specific needs @@ -754,14 +1029,14 @@ struct rmap_walk_control { bool (*rmap_one)(struct folio *folio, struct vm_area_struct *vma, unsigned long addr, void *arg); int (*done)(struct folio *folio); - struct anon_vma *(*anon_lock)(struct folio *folio, + struct anon_vma *(*anon_lock)(const struct folio *folio, struct rmap_walk_control *rwc); bool (*invalid_vma)(struct vm_area_struct *vma, void *arg); }; void rmap_walk(struct folio *folio, struct rmap_walk_control *rwc); void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc); -struct anon_vma *folio_lock_anon_vma_read(struct folio *folio, +struct anon_vma *folio_lock_anon_vma_read(const struct folio *folio, struct rmap_walk_control *rwc); #else /* !CONFIG_MMU */ @@ -771,7 +1046,7 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio, static inline int folio_referenced(struct folio *folio, int is_locked, struct mem_cgroup *memcg, - unsigned long *vm_flags) + vm_flags_t *vm_flags) { *vm_flags = 0; return 0; @@ -787,8 +1062,4 @@ static inline int folio_mkclean(struct folio *folio) } #endif /* CONFIG_MMU */ -static inline int page_mkclean(struct page *page) -{ - return folio_mkclean(page_folio(page)); -} #endif /* _LINUX_RMAP_H */ |
