diff options
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 380 |
1 files changed, 215 insertions, 165 deletions
diff --git a/mm/rmap.c b/mm/rmap.c index f5d43edad529..c6c4d4ea29a7 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -23,7 +23,7 @@ * inode->i_rwsem (while writing or truncating, not reading or faulting) * mm->mmap_lock * mapping->invalidate_lock (in filemap_fault) - * page->flags PG_locked (lock_page) + * folio_lock * hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share, see hugetlbfs below) * vma_start_write * mapping->i_mmap_rwsem @@ -32,7 +32,6 @@ * swap_lock (in swap_duplicate, swap_info_get) * mmlist_lock (in mmput, drain_mmlist and others) * mapping->private_lock (in block_dirty_folio) - * folio_lock_memcg move_lock (in block_dirty_folio) * i_pages lock (widely used) * lruvec->lru_lock (in folio_lruvec_lock_irq) * inode->i_lock (in set_page_dirty's __mark_inode_dirty) @@ -50,7 +49,7 @@ * hugetlb_fault_mutex (hugetlbfs specific page fault mutex) * vma_lock (hugetlb specific lock for pmd_sharing) * mapping->i_mmap_rwsem (also used for hugetlb pmd sharing) - * page->flags PG_locked (lock_page) + * folio_lock */ #include <linux/mm.h> @@ -75,6 +74,7 @@ #include <linux/memremap.h> #include <linux/userfaultfd_k.h> #include <linux/mm_inline.h> +#include <linux/oom.h> #include <asm/tlbflush.h> @@ -182,8 +182,6 @@ static void anon_vma_chain_link(struct vm_area_struct *vma, * for the new allocation. At the same time, we do not want * to do any locking for the common case of already having * an anon_vma. - * - * This must be called with the mmap_lock held for reading. */ int __anon_vma_prepare(struct vm_area_struct *vma) { @@ -191,6 +189,7 @@ int __anon_vma_prepare(struct vm_area_struct *vma) struct anon_vma *anon_vma, *allocated; struct anon_vma_chain *avc; + mmap_assert_locked(mm); might_sleep(); avc = anon_vma_chain_alloc(GFP_KERNEL); @@ -497,7 +496,7 @@ void __init anon_vma_init(void) * concurrently without folio lock protection). See folio_lock_anon_vma_read() * which has already covered that, and comment above remap_pages(). */ -struct anon_vma *folio_get_anon_vma(struct folio *folio) +struct anon_vma *folio_get_anon_vma(const struct folio *folio) { struct anon_vma *anon_vma = NULL; unsigned long anon_mapping; @@ -541,7 +540,7 @@ out: * reference like with folio_get_anon_vma() and then block on the mutex * on !rwc->try_lock case. */ -struct anon_vma *folio_lock_anon_vma_read(struct folio *folio, +struct anon_vma *folio_lock_anon_vma_read(const struct folio *folio, struct rmap_walk_control *rwc) { struct anon_vma *anon_vma = NULL; @@ -768,13 +767,27 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) } #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ -/* - * At what user virtual address is page expected in vma? - * Caller should check the page is actually part of the vma. +/** + * page_address_in_vma - The virtual address of a page in this VMA. + * @folio: The folio containing the page. + * @page: The page within the folio. + * @vma: The VMA we need to know the address in. + * + * Calculates the user virtual address of this page in the specified VMA. + * It is the caller's responsibililty to check the page is actually + * within the VMA. There may not currently be a PTE pointing at this + * page, but if a page fault occurs at this address, this is the page + * which will be accessed. + * + * Context: Caller should hold a reference to the folio. Caller should + * hold a lock (eg the i_mmap_lock or the mmap_lock) which keeps the + * VMA from being altered. + * + * Return: The virtual address corresponding to this page in the VMA. */ -unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) +unsigned long page_address_in_vma(const struct folio *folio, + const struct page *page, const struct vm_area_struct *vma) { - struct folio *folio = page_folio(page); if (folio_test_anon(folio)) { struct anon_vma *page__anon_vma = folio_anon_vma(folio); /* @@ -790,7 +803,8 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) return -EFAULT; } - return vma_address(page, vma); + /* KSM folios don't reach here because of the !page__anon_vma check */ + return vma_address(vma, page_pgoff(folio, page), 1); } /* @@ -867,13 +881,24 @@ static bool folio_referenced_one(struct folio *folio, continue; } - if (pvmw.pte) { - if (lru_gen_enabled() && - pte_young(ptep_get(pvmw.pte))) { - lru_gen_look_around(&pvmw); - referenced++; - } + /* + * Skip the non-shared swapbacked folio mapped solely by + * the exiting or OOM-reaped process. This avoids redundant + * swap-out followed by an immediate unmap. + */ + if ((!atomic_read(&vma->vm_mm->mm_users) || + check_stable_address_space(vma->vm_mm)) && + folio_test_anon(folio) && folio_test_swapbacked(folio) && + !folio_likely_mapped_shared(folio)) { + pra->referenced = -1; + page_vma_mapped_walk_done(&pvmw); + return false; + } + if (lru_gen_enabled() && pvmw.pte) { + if (lru_gen_look_around(&pvmw)) + referenced++; + } else if (pvmw.pte) { if (ptep_clear_flush_young_notify(vma, address, pvmw.pte)) referenced++; @@ -961,7 +986,7 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg) int folio_referenced(struct folio *folio, int is_locked, struct mem_cgroup *memcg, unsigned long *vm_flags) { - int we_locked = 0; + bool we_locked = false; struct folio_referenced_arg pra = { .mapcount = folio_mapcount(folio), .memcg = memcg, @@ -1128,56 +1153,38 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, if (invalid_mkclean_vma(vma, NULL)) return 0; - pvmw.address = vma_pgoff_address(pgoff, nr_pages, vma); + pvmw.address = vma_address(vma, pgoff, nr_pages); VM_BUG_ON_VMA(pvmw.address == -EFAULT, vma); return page_vma_mkclean_one(&pvmw); } -int folio_total_mapcount(struct folio *folio) -{ - int mapcount = folio_entire_mapcount(folio); - int nr_pages; - int i; - - /* In the common case, avoid the loop when no pages mapped by PTE */ - if (folio_nr_pages_mapped(folio) == 0) - return mapcount; - /* - * Add all the PTE mappings of those pages mapped by PTE. - * Limit the loop to folio_nr_pages_mapped()? - * Perhaps: given all the raciness, that may be a good or a bad idea. - */ - nr_pages = folio_nr_pages(folio); - for (i = 0; i < nr_pages; i++) - mapcount += atomic_read(&folio_page(folio, i)->_mapcount); - - /* But each of those _mapcounts was based on -1 */ - mapcount += nr_pages; - return mapcount; -} - static __always_inline unsigned int __folio_add_rmap(struct folio *folio, struct page *page, int nr_pages, enum rmap_level level, int *nr_pmdmapped) { atomic_t *mapped = &folio->_nr_pages_mapped; - int first, nr = 0; + const int orig_nr_pages = nr_pages; + int first = 0, nr = 0; __folio_rmap_sanity_checks(folio, page, nr_pages, level); switch (level) { case RMAP_LEVEL_PTE: - do { - first = atomic_inc_and_test(&page->_mapcount); - if (first && folio_test_large(folio)) { - first = atomic_inc_return_relaxed(mapped); - first = (first < ENTIRELY_MAPPED); - } + if (!folio_test_large(folio)) { + nr = atomic_inc_and_test(&folio->_mapcount); + break; + } - if (first) - nr++; + do { + first += atomic_inc_and_test(&page->_mapcount); } while (page++, --nr_pages > 0); + + if (first && + atomic_add_return_relaxed(first, mapped) < ENTIRELY_MAPPED) + nr = first; + + atomic_add(orig_nr_pages, &folio->_large_mapcount); break; case RMAP_LEVEL_PMD: first = atomic_inc_and_test(&folio->_entire_mapcount); @@ -1194,6 +1201,7 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio, nr = 0; } } + atomic_inc(&folio->_large_mapcount); break; } return nr; @@ -1263,8 +1271,9 @@ static void __folio_set_anon(struct folio *folio, struct vm_area_struct *vma, * @vma: the vm area in which the mapping is added * @address: the user virtual address mapped */ -static void __page_check_anon_rmap(struct folio *folio, struct page *page, - struct vm_area_struct *vma, unsigned long address) +static void __page_check_anon_rmap(const struct folio *folio, + const struct page *page, struct vm_area_struct *vma, + unsigned long address) { /* * The page's anon-rmap details (mapping and index) are guaranteed to @@ -1279,37 +1288,46 @@ static void __page_check_anon_rmap(struct folio *folio, struct page *page, */ VM_BUG_ON_FOLIO(folio_anon_vma(folio)->root != vma->anon_vma->root, folio); - VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address), + VM_BUG_ON_PAGE(page_pgoff(folio, page) != linear_page_index(vma, address), page); } +static void __folio_mod_stat(struct folio *folio, int nr, int nr_pmdmapped) +{ + int idx; + + if (nr) { + idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED; + __lruvec_stat_mod_folio(folio, idx, nr); + } + if (nr_pmdmapped) { + if (folio_test_anon(folio)) { + idx = NR_ANON_THPS; + __lruvec_stat_mod_folio(folio, idx, nr_pmdmapped); + } else { + /* NR_*_PMDMAPPED are not maintained per-memcg */ + idx = folio_test_swapbacked(folio) ? + NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED; + __mod_node_page_state(folio_pgdat(folio), idx, + nr_pmdmapped); + } + } +} + static __always_inline void __folio_add_anon_rmap(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *vma, unsigned long address, rmap_t flags, enum rmap_level level) { int i, nr, nr_pmdmapped = 0; + VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); + nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped); - if (nr_pmdmapped) - __lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr_pmdmapped); - if (nr) - __lruvec_stat_mod_folio(folio, NR_ANON_MAPPED, nr); - if (unlikely(!folio_test_anon(folio))) { - VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio); - /* - * For a PTE-mapped large folio, we only know that the single - * PTE is exclusive. Further, __folio_set_anon() might not get - * folio->index right when not given the address of the head - * page. - */ - VM_WARN_ON_FOLIO(folio_test_large(folio) && - level != RMAP_LEVEL_PMD, folio); - __folio_set_anon(folio, vma, address, - !!(flags & RMAP_EXCLUSIVE)); - } else if (likely(!folio_test_ksm(folio))) { + if (likely(!folio_test_ksm(folio))) __page_check_anon_rmap(folio, page, vma, address); - } + + __folio_mod_stat(folio, nr, nr_pmdmapped); if (flags & RMAP_EXCLUSIVE) { switch (level) { @@ -1395,29 +1413,41 @@ void folio_add_anon_rmap_pmd(struct folio *folio, struct page *page, * @folio: The folio to add the mapping to. * @vma: the vm area in which the mapping is added * @address: the user virtual address mapped + * @flags: The rmap flags * * Like folio_add_anon_rmap_*() but must only be called on *new* folios. * This means the inc-and-test can be bypassed. - * The folio does not have to be locked. + * The folio doesn't necessarily need to be locked while it's exclusive + * unless two threads map it concurrently. However, the folio must be + * locked if it's shared. * - * If the folio is pmd-mappable, it is accounted as a THP. As the folio - * is new, it's assumed to be mapped exclusively by a single process. + * If the folio is pmd-mappable, it is accounted as a THP. */ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma, - unsigned long address) + unsigned long address, rmap_t flags) { - int nr = folio_nr_pages(folio); + const int nr = folio_nr_pages(folio); + const bool exclusive = flags & RMAP_EXCLUSIVE; + int nr_pmdmapped = 0; VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); + VM_WARN_ON_FOLIO(!exclusive && !folio_test_locked(folio), folio); VM_BUG_ON_VMA(address < vma->vm_start || address + (nr << PAGE_SHIFT) > vma->vm_end, vma); - __folio_set_swapbacked(folio); - __folio_set_anon(folio, vma, address, true); + + /* + * VM_DROPPABLE mappings don't swap; instead they're just dropped when + * under memory pressure. + */ + if (!folio_test_swapbacked(folio) && !(vma->vm_flags & VM_DROPPABLE)) + __folio_set_swapbacked(folio); + __folio_set_anon(folio, vma, address, exclusive); if (likely(!folio_test_large(folio))) { /* increment count (starts at -1) */ atomic_set(&folio->_mapcount, 0); - SetPageAnonExclusive(&folio->page); + if (exclusive) + SetPageAnonExclusive(&folio->page); } else if (!folio_test_pmd_mappable(folio)) { int i; @@ -1426,19 +1456,26 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma, /* increment count (starts at -1) */ atomic_set(&page->_mapcount, 0); - SetPageAnonExclusive(page); + if (exclusive) + SetPageAnonExclusive(page); } + /* increment count (starts at -1) */ + atomic_set(&folio->_large_mapcount, nr - 1); atomic_set(&folio->_nr_pages_mapped, nr); } else { /* increment count (starts at -1) */ atomic_set(&folio->_entire_mapcount, 0); + /* increment count (starts at -1) */ + atomic_set(&folio->_large_mapcount, 0); atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED); - SetPageAnonExclusive(&folio->page); - __lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr); + if (exclusive) + SetPageAnonExclusive(&folio->page); + nr_pmdmapped = nr; } - __lruvec_stat_mod_folio(folio, NR_ANON_MAPPED, nr); + __folio_mod_stat(folio, nr, nr_pmdmapped); + mod_mthp_stat(folio_order(folio), MTHP_STAT_NR_ANON, 1); } static __always_inline void __folio_add_file_rmap(struct folio *folio, @@ -1450,11 +1487,7 @@ static __always_inline void __folio_add_file_rmap(struct folio *folio, VM_WARN_ON_FOLIO(folio_test_anon(folio), folio); nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped); - if (nr_pmdmapped) - __lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ? - NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped); - if (nr) - __lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr); + __folio_mod_stat(folio, nr, nr_pmdmapped); /* See comments in folio_add_anon_rmap_*() */ if (!folio_test_large(folio)) @@ -1503,25 +1536,31 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, enum rmap_level level) { atomic_t *mapped = &folio->_nr_pages_mapped; - int last, nr = 0, nr_pmdmapped = 0; - enum node_stat_item idx; + int last = 0, nr = 0, nr_pmdmapped = 0; + bool partially_mapped = false; __folio_rmap_sanity_checks(folio, page, nr_pages, level); switch (level) { case RMAP_LEVEL_PTE: - do { - last = atomic_add_negative(-1, &page->_mapcount); - if (last && folio_test_large(folio)) { - last = atomic_dec_return_relaxed(mapped); - last = (last < ENTIRELY_MAPPED); - } + if (!folio_test_large(folio)) { + nr = atomic_add_negative(-1, &folio->_mapcount); + break; + } - if (last) - nr++; + atomic_sub(nr_pages, &folio->_large_mapcount); + do { + last += atomic_add_negative(-1, &page->_mapcount); } while (page++, --nr_pages > 0); + + if (last && + atomic_sub_return_relaxed(last, mapped) < ENTIRELY_MAPPED) + nr = last; + + partially_mapped = nr && atomic_read(mapped); break; case RMAP_LEVEL_PMD: + atomic_dec(&folio->_large_mapcount); last = atomic_add_negative(-1, &folio->_entire_mapcount); if (last) { nr = atomic_sub_return_relaxed(ENTIRELY_MAPPED, mapped); @@ -1536,31 +1575,22 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, nr = 0; } } + + partially_mapped = nr && nr < nr_pmdmapped; break; } - if (nr_pmdmapped) { - if (folio_test_anon(folio)) - idx = NR_ANON_THPS; - else if (folio_test_swapbacked(folio)) - idx = NR_SHMEM_PMDMAPPED; - else - idx = NR_FILE_PMDMAPPED; - __lruvec_stat_mod_folio(folio, idx, -nr_pmdmapped); - } - if (nr) { - idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED; - __lruvec_stat_mod_folio(folio, idx, -nr); + /* + * Queue anon large folio for deferred split if at least one page of + * the folio is unmapped and at least one page is still mapped. + * + * Check partially_mapped first to ensure it is a large folio. + */ + if (partially_mapped && folio_test_anon(folio) && + !folio_test_partially_mapped(folio)) + deferred_split_folio(folio, true); - /* - * Queue anon large folio for deferred split if at least one - * page of the folio is unmapped and at least one page - * is still mapped. - */ - if (folio_test_large(folio) && folio_test_anon(folio)) - if (level == RMAP_LEVEL_PTE || nr < nr_pmdmapped) - deferred_split_folio(folio); - } + __folio_mod_stat(folio, -nr, -nr_pmdmapped); /* * It would be tidy to reset folio_test_anon mapping when fully @@ -1635,9 +1665,6 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, if (flags & TTU_SYNC) pvmw.flags = PVMW_SYNC; - if (flags & TTU_SPLIT_HUGE_PMD) - split_huge_pmd_address(vma, address, false, folio); - /* * For THP, we have to assume the worse case ie pmd for invalidation. * For hugetlb, it could be much worse if we need to do pud @@ -1663,9 +1690,6 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, mmu_notifier_invalidate_range_start(&range); while (page_vma_mapped_walk(&pvmw)) { - /* Unexpected PMD-mapped THP? */ - VM_BUG_ON_FOLIO(!pvmw.pte, folio); - /* * If the folio is in an mlock()d vma, we must not swap it out. */ @@ -1674,11 +1698,30 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, /* Restore the mlock which got missed */ if (!folio_test_large(folio)) mlock_vma_folio(folio, vma); - page_vma_mapped_walk_done(&pvmw); - ret = false; - break; + goto walk_abort; } + if (!pvmw.pte) { + if (unmap_huge_pmd_locked(vma, pvmw.address, pvmw.pmd, + folio)) + goto walk_done; + + if (flags & TTU_SPLIT_HUGE_PMD) { + /* + * We temporarily have to drop the PTL and + * restart so we can process the PTE-mapped THP. + */ + split_huge_pmd_locked(vma, pvmw.address, + pvmw.pmd, false, folio); + flags &= ~TTU_SPLIT_HUGE_PMD; + page_vma_mapped_walk_restart(&pvmw); + continue; + } + } + + /* Unexpected PMD-mapped THP? */ + VM_BUG_ON_FOLIO(!pvmw.pte, folio); + pfn = pte_pfn(ptep_get(pvmw.pte)); subpage = folio_page(folio, pfn - folio_pfn(folio)); address = pvmw.address; @@ -1714,11 +1757,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, */ if (!anon) { VM_BUG_ON(!(flags & TTU_RMAP_LOCKED)); - if (!hugetlb_vma_trylock_write(vma)) { - page_vma_mapped_walk_done(&pvmw); - ret = false; - break; - } + if (!hugetlb_vma_trylock_write(vma)) + goto walk_abort; if (huge_pmd_unshare(mm, vma, address, pvmw.pte)) { hugetlb_vma_unlock_write(vma); flush_tlb_range(vma, @@ -1733,8 +1773,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, * actual page and drop map count * to zero. */ - page_vma_mapped_walk_done(&pvmw); - break; + goto walk_done; } hugetlb_vma_unlock_write(vma); } @@ -1780,7 +1819,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, set_huge_pte_at(mm, address, pvmw.pte, pteval, hsz); } else { - dec_mm_counter(mm, mm_counter(&folio->page)); + dec_mm_counter(mm, mm_counter(folio)); set_pte_at(mm, address, pvmw.pte, pteval); } @@ -1795,7 +1834,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, * migration) will not expect userfaults on already * copied pages. */ - dec_mm_counter(mm, mm_counter(&folio->page)); + dec_mm_counter(mm, mm_counter(folio)); } else if (folio_test_anon(folio)) { swp_entry_t entry = page_swap_entry(subpage); pte_t swp_pte; @@ -1806,9 +1845,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, if (unlikely(folio_test_swapbacked(folio) != folio_test_swapcache(folio))) { WARN_ON_ONCE(1); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; + goto walk_abort; } /* MADV_FREE page check */ @@ -1836,7 +1873,13 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, * plus the rmap(s) (dropped by discard:). */ if (ref_count == 1 + map_count && - !folio_test_dirty(folio)) { + (!folio_test_dirty(folio) || + /* + * Unlike MADV_FREE mappings, VM_DROPPABLE + * ones can be dropped even if they've + * been dirtied. + */ + (vma->vm_flags & VM_DROPPABLE))) { dec_mm_counter(mm, MM_ANONPAGES); goto discard; } @@ -1846,24 +1889,23 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, * discarded. Remap the page to page table. */ set_pte_at(mm, address, pvmw.pte, pteval); - folio_set_swapbacked(folio); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; + /* + * Unlike MADV_FREE mappings, VM_DROPPABLE ones + * never get swap backed on failure to drop. + */ + if (!(vma->vm_flags & VM_DROPPABLE)) + folio_set_swapbacked(folio); + goto walk_abort; } if (swap_duplicate(entry) < 0) { set_pte_at(mm, address, pvmw.pte, pteval); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; + goto walk_abort; } if (arch_unmap_one(mm, vma, address, pteval) < 0) { swap_free(entry); set_pte_at(mm, address, pvmw.pte, pteval); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; + goto walk_abort; } /* See folio_try_share_anon_rmap(): clear PTE first. */ @@ -1871,9 +1913,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, folio_try_share_anon_rmap_pte(folio, subpage)) { swap_free(entry); set_pte_at(mm, address, pvmw.pte, pteval); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; + goto walk_abort; } if (list_empty(&mm->mmlist)) { spin_lock(&mmlist_lock); @@ -1903,7 +1943,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, * * See Documentation/mm/mmu_notifier.rst */ - dec_mm_counter(mm, mm_counter_file(&folio->page)); + dec_mm_counter(mm, mm_counter_file(folio)); } discard: if (unlikely(folio_test_hugetlb(folio))) @@ -1913,6 +1953,12 @@ discard: if (vma->vm_flags & VM_LOCKED) mlock_drain_local(); folio_put(folio); + continue; +walk_abort: + ret = false; +walk_done: + page_vma_mapped_walk_done(&pvmw); + break; } mmu_notifier_invalidate_range_end(&range); @@ -2169,7 +2215,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, swp_pte = pte_swp_mkuffd_wp(swp_pte); set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); trace_set_migration_pte(pvmw.address, pte_val(swp_pte), - compound_order(&folio->page)); + folio_order(folio)); /* * No need to invalidate here it will synchronize on * against the special swap migration pte. @@ -2181,7 +2227,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, set_huge_pte_at(mm, address, pvmw.pte, pteval, hsz); } else { - dec_mm_counter(mm, mm_counter(&folio->page)); + dec_mm_counter(mm, mm_counter(folio)); set_pte_at(mm, address, pvmw.pte, pteval); } @@ -2196,7 +2242,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, * migration) will not expect userfaults on already * copied pages. */ - dec_mm_counter(mm, mm_counter(&folio->page)); + dec_mm_counter(mm, mm_counter(folio)); } else { swp_entry_t entry; pte_t swp_pte; @@ -2261,7 +2307,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, else set_pte_at(mm, address, pvmw.pte, swp_pte); trace_set_migration_pte(address, pte_val(swp_pte), - compound_order(&folio->page)); + folio_order(folio)); /* * No need to invalidate here it will synchronize on * against the special swap migration pte. @@ -2524,7 +2570,7 @@ void __put_anon_vma(struct anon_vma *anon_vma) anon_vma_free(root); } -static struct anon_vma *rmap_walk_anon_lock(struct folio *folio, +static struct anon_vma *rmap_walk_anon_lock(const struct folio *folio, struct rmap_walk_control *rwc) { struct anon_vma *anon_vma; @@ -2588,7 +2634,8 @@ static void rmap_walk_anon(struct folio *folio, anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff_start, pgoff_end) { struct vm_area_struct *vma = avc->vma; - unsigned long address = vma_address(&folio->page, vma); + unsigned long address = vma_address(vma, pgoff_start, + folio_nr_pages(folio)); VM_BUG_ON_VMA(address == -EFAULT, vma); cond_resched(); @@ -2649,7 +2696,8 @@ static void rmap_walk_file(struct folio *folio, lookup: vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff_start, pgoff_end) { - unsigned long address = vma_address(&folio->page, vma); + unsigned long address = vma_address(vma, pgoff_start, + folio_nr_pages(folio)); VM_BUG_ON_VMA(address == -EFAULT, vma); cond_resched(); @@ -2702,6 +2750,7 @@ void hugetlb_add_anon_rmap(struct folio *folio, struct vm_area_struct *vma, VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); atomic_inc(&folio->_entire_mapcount); + atomic_inc(&folio->_large_mapcount); if (flags & RMAP_EXCLUSIVE) SetPageAnonExclusive(&folio->page); VM_WARN_ON_FOLIO(folio_entire_mapcount(folio) > 1 && @@ -2716,6 +2765,7 @@ void hugetlb_add_new_anon_rmap(struct folio *folio, BUG_ON(address < vma->vm_start || address >= vma->vm_end); /* increment count (starts at -1) */ atomic_set(&folio->_entire_mapcount, 0); + atomic_set(&folio->_large_mapcount, 0); folio_clear_hugetlb_restore_reserve(folio); __folio_set_anon(folio, vma, address, true); SetPageAnonExclusive(&folio->page); |