diff options
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 235 |
1 files changed, 124 insertions, 111 deletions
diff --git a/mm/rmap.c b/mm/rmap.c index 568198e9efc2..ac4f783d6ec2 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -79,7 +79,6 @@ #include <asm/tlbflush.h> #define CREATE_TRACE_POINTS -#include <trace/events/tlb.h> #include <trace/events/migrate.h> #include "internal.h" @@ -285,7 +284,7 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) { struct anon_vma *anon_vma; - avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN); + avc = anon_vma_chain_alloc(GFP_NOWAIT); if (unlikely(!avc)) { unlock_anon_vma_root(root); root = NULL; @@ -851,34 +850,34 @@ static bool folio_referenced_one(struct folio *folio, { struct folio_referenced_arg *pra = arg; DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0); - int referenced = 0; - unsigned long start = address, ptes = 0; + int ptes = 0, referenced = 0; while (page_vma_mapped_walk(&pvmw)) { address = pvmw.address; if (vma->vm_flags & VM_LOCKED) { - if (!folio_test_large(folio) || !pvmw.pte) { - /* Restore the mlock which got missed */ - mlock_vma_folio(folio, vma); - page_vma_mapped_walk_done(&pvmw); - pra->vm_flags |= VM_LOCKED; - return false; /* To break the loop */ - } + ptes++; + pra->mapcount--; + + /* Only mlock fully mapped pages */ + if (pvmw.pte && ptes != pvmw.nr_pages) + continue; + /* - * For large folio fully mapped to VMA, will - * be handled after the pvmw loop. + * All PTEs must be protected by page table lock in + * order to mlock the page. * - * For large folio cross VMA boundaries, it's - * expected to be picked by page reclaim. But - * should skip reference of pages which are in - * the range of VM_LOCKED vma. As page reclaim - * should just count the reference of pages out - * the range of VM_LOCKED vma. + * If page table boundary has been cross, current ptl + * only protect part of ptes. */ - ptes++; - pra->mapcount--; - continue; + if (pvmw.flags & PVMW_PGTABLE_CROSSED) + continue; + + /* Restore the mlock which got missed */ + mlock_vma_folio(folio, vma); + page_vma_mapped_walk_done(&pvmw); + pra->vm_flags |= VM_LOCKED; + return false; /* To break the loop */ } /* @@ -914,23 +913,6 @@ static bool folio_referenced_one(struct folio *folio, pra->mapcount--; } - if ((vma->vm_flags & VM_LOCKED) && - folio_test_large(folio) && - folio_within_vma(folio, vma)) { - unsigned long s_align, e_align; - - s_align = ALIGN_DOWN(start, PMD_SIZE); - e_align = ALIGN_DOWN(start + folio_size(folio) - 1, PMD_SIZE); - - /* folio doesn't cross page table boundary and fully mapped */ - if ((s_align == e_align) && (ptes == folio_nr_pages(folio))) { - /* Restore the mlock which got missed */ - mlock_vma_folio(folio, vma); - pra->vm_flags |= VM_LOCKED; - return false; /* To break the loop */ - } - } - if (referenced) folio_clear_idle(folio); if (folio_test_clear_young(folio)) @@ -1241,18 +1223,40 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, return page_vma_mkclean_one(&pvmw); } -static __always_inline unsigned int __folio_add_rmap(struct folio *folio, +static void __folio_mod_stat(struct folio *folio, int nr, int nr_pmdmapped) +{ + int idx; + + if (nr) { + idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED; + __lruvec_stat_mod_folio(folio, idx, nr); + } + if (nr_pmdmapped) { + if (folio_test_anon(folio)) { + idx = NR_ANON_THPS; + __lruvec_stat_mod_folio(folio, idx, nr_pmdmapped); + } else { + /* NR_*_PMDMAPPED are not maintained per-memcg */ + idx = folio_test_swapbacked(folio) ? + NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED; + __mod_node_page_state(folio_pgdat(folio), idx, + nr_pmdmapped); + } + } +} + +static __always_inline void __folio_add_rmap(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *vma, - enum rmap_level level, int *nr_pmdmapped) + enum pgtable_level level) { atomic_t *mapped = &folio->_nr_pages_mapped; const int orig_nr_pages = nr_pages; - int first = 0, nr = 0; + int first = 0, nr = 0, nr_pmdmapped = 0; __folio_rmap_sanity_checks(folio, page, nr_pages, level); switch (level) { - case RMAP_LEVEL_PTE: + case PGTABLE_LEVEL_PTE: if (!folio_test_large(folio)) { nr = atomic_inc_and_test(&folio->_mapcount); break; @@ -1278,12 +1282,12 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio, folio_add_large_mapcount(folio, orig_nr_pages, vma); break; - case RMAP_LEVEL_PMD: - case RMAP_LEVEL_PUD: + case PGTABLE_LEVEL_PMD: + case PGTABLE_LEVEL_PUD: first = atomic_inc_and_test(&folio->_entire_mapcount); if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) { - if (level == RMAP_LEVEL_PMD && first) - *nr_pmdmapped = folio_large_nr_pages(folio); + if (level == PGTABLE_LEVEL_PMD && first) + nr_pmdmapped = folio_large_nr_pages(folio); nr = folio_inc_return_large_mapcount(folio, vma); if (nr == 1) /* Was completely unmapped. */ @@ -1301,8 +1305,8 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio, * We only track PMD mappings of PMD-sized * folios separately. */ - if (level == RMAP_LEVEL_PMD) - *nr_pmdmapped = nr_pages; + if (level == PGTABLE_LEVEL_PMD) + nr_pmdmapped = nr_pages; nr = nr_pages - (nr & FOLIO_PAGES_MAPPED); /* Raced ahead of a remove and another add? */ if (unlikely(nr < 0)) @@ -1314,8 +1318,10 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio, } folio_inc_large_mapcount(folio, vma); break; + default: + BUILD_BUG(); } - return nr; + __folio_mod_stat(folio, nr, nr_pmdmapped); } /** @@ -1403,59 +1409,37 @@ static void __page_check_anon_rmap(const struct folio *folio, page); } -static void __folio_mod_stat(struct folio *folio, int nr, int nr_pmdmapped) -{ - int idx; - - if (nr) { - idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED; - __lruvec_stat_mod_folio(folio, idx, nr); - } - if (nr_pmdmapped) { - if (folio_test_anon(folio)) { - idx = NR_ANON_THPS; - __lruvec_stat_mod_folio(folio, idx, nr_pmdmapped); - } else { - /* NR_*_PMDMAPPED are not maintained per-memcg */ - idx = folio_test_swapbacked(folio) ? - NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED; - __mod_node_page_state(folio_pgdat(folio), idx, - nr_pmdmapped); - } - } -} - static __always_inline void __folio_add_anon_rmap(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *vma, - unsigned long address, rmap_t flags, enum rmap_level level) + unsigned long address, rmap_t flags, enum pgtable_level level) { - int i, nr, nr_pmdmapped = 0; + int i; VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); - nr = __folio_add_rmap(folio, page, nr_pages, vma, level, &nr_pmdmapped); + __folio_add_rmap(folio, page, nr_pages, vma, level); if (likely(!folio_test_ksm(folio))) __page_check_anon_rmap(folio, page, vma, address); - __folio_mod_stat(folio, nr, nr_pmdmapped); - if (flags & RMAP_EXCLUSIVE) { switch (level) { - case RMAP_LEVEL_PTE: + case PGTABLE_LEVEL_PTE: for (i = 0; i < nr_pages; i++) SetPageAnonExclusive(page + i); break; - case RMAP_LEVEL_PMD: + case PGTABLE_LEVEL_PMD: SetPageAnonExclusive(page); break; - case RMAP_LEVEL_PUD: + case PGTABLE_LEVEL_PUD: /* * Keep the compiler happy, we don't support anonymous * PUD mappings. */ WARN_ON_ONCE(1); break; + default: + BUILD_BUG(); } } @@ -1479,12 +1463,12 @@ static __always_inline void __folio_add_anon_rmap(struct folio *folio, } /* - * For large folio, only mlock it if it's fully mapped to VMA. It's - * not easy to check whether the large folio is fully mapped to VMA - * here. Only mlock normal 4K folio and leave page reclaim to handle - * large folio. + * Only mlock it if the folio is fully mapped to the VMA. + * + * Partially mapped folios can be split on reclaim and part outside + * of mlocked VMA can be evicted or freed. */ - if (!folio_test_large(folio)) + if (folio_nr_pages(folio) == nr_pages) mlock_vma_folio(folio, vma); } @@ -1509,7 +1493,7 @@ void folio_add_anon_rmap_ptes(struct folio *folio, struct page *page, rmap_t flags) { __folio_add_anon_rmap(folio, page, nr_pages, vma, address, flags, - RMAP_LEVEL_PTE); + PGTABLE_LEVEL_PTE); } /** @@ -1530,7 +1514,7 @@ void folio_add_anon_rmap_pmd(struct folio *folio, struct page *page, { #ifdef CONFIG_TRANSPARENT_HUGEPAGE __folio_add_anon_rmap(folio, page, HPAGE_PMD_NR, vma, address, flags, - RMAP_LEVEL_PMD); + PGTABLE_LEVEL_PMD); #else WARN_ON_ONCE(true); #endif @@ -1611,17 +1595,19 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma, static __always_inline void __folio_add_file_rmap(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *vma, - enum rmap_level level) + enum pgtable_level level) { - int nr, nr_pmdmapped = 0; - VM_WARN_ON_FOLIO(folio_test_anon(folio), folio); - nr = __folio_add_rmap(folio, page, nr_pages, vma, level, &nr_pmdmapped); - __folio_mod_stat(folio, nr, nr_pmdmapped); + __folio_add_rmap(folio, page, nr_pages, vma, level); - /* See comments in folio_add_anon_rmap_*() */ - if (!folio_test_large(folio)) + /* + * Only mlock it if the folio is fully mapped to the VMA. + * + * Partially mapped folios can be split on reclaim and part outside + * of mlocked VMA can be evicted or freed. + */ + if (folio_nr_pages(folio) == nr_pages) mlock_vma_folio(folio, vma); } @@ -1639,7 +1625,7 @@ static __always_inline void __folio_add_file_rmap(struct folio *folio, void folio_add_file_rmap_ptes(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *vma) { - __folio_add_file_rmap(folio, page, nr_pages, vma, RMAP_LEVEL_PTE); + __folio_add_file_rmap(folio, page, nr_pages, vma, PGTABLE_LEVEL_PTE); } /** @@ -1656,7 +1642,7 @@ void folio_add_file_rmap_pmd(struct folio *folio, struct page *page, struct vm_area_struct *vma) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - __folio_add_file_rmap(folio, page, HPAGE_PMD_NR, vma, RMAP_LEVEL_PMD); + __folio_add_file_rmap(folio, page, HPAGE_PMD_NR, vma, PGTABLE_LEVEL_PMD); #else WARN_ON_ONCE(true); #endif @@ -1677,7 +1663,7 @@ void folio_add_file_rmap_pud(struct folio *folio, struct page *page, { #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) - __folio_add_file_rmap(folio, page, HPAGE_PUD_NR, vma, RMAP_LEVEL_PUD); + __folio_add_file_rmap(folio, page, HPAGE_PUD_NR, vma, PGTABLE_LEVEL_PUD); #else WARN_ON_ONCE(true); #endif @@ -1685,7 +1671,7 @@ void folio_add_file_rmap_pud(struct folio *folio, struct page *page, static __always_inline void __folio_remove_rmap(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *vma, - enum rmap_level level) + enum pgtable_level level) { atomic_t *mapped = &folio->_nr_pages_mapped; int last = 0, nr = 0, nr_pmdmapped = 0; @@ -1694,7 +1680,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, __folio_rmap_sanity_checks(folio, page, nr_pages, level); switch (level) { - case RMAP_LEVEL_PTE: + case PGTABLE_LEVEL_PTE: if (!folio_test_large(folio)) { nr = atomic_add_negative(-1, &folio->_mapcount); break; @@ -1704,7 +1690,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, nr = folio_sub_return_large_mapcount(folio, nr_pages, vma); if (!nr) { /* Now completely unmapped. */ - nr = folio_nr_pages(folio); + nr = folio_large_nr_pages(folio); } else { partially_mapped = nr < folio_large_nr_pages(folio) && !folio_entire_mapcount(folio); @@ -1724,11 +1710,11 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, partially_mapped = nr && atomic_read(mapped); break; - case RMAP_LEVEL_PMD: - case RMAP_LEVEL_PUD: + case PGTABLE_LEVEL_PMD: + case PGTABLE_LEVEL_PUD: if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) { last = atomic_add_negative(-1, &folio->_entire_mapcount); - if (level == RMAP_LEVEL_PMD && last) + if (level == PGTABLE_LEVEL_PMD && last) nr_pmdmapped = folio_large_nr_pages(folio); nr = folio_dec_return_large_mapcount(folio, vma); if (!nr) { @@ -1748,9 +1734,9 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, nr = atomic_sub_return_relaxed(ENTIRELY_MAPPED, mapped); if (likely(nr < ENTIRELY_MAPPED)) { nr_pages = folio_large_nr_pages(folio); - if (level == RMAP_LEVEL_PMD) + if (level == PGTABLE_LEVEL_PMD) nr_pmdmapped = nr_pages; - nr = nr_pages - (nr & FOLIO_PAGES_MAPPED); + nr = nr_pages - nr; /* Raced ahead of another remove and an add? */ if (unlikely(nr < 0)) nr = 0; @@ -1762,6 +1748,8 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, partially_mapped = nr && nr < nr_pmdmapped; break; + default: + BUILD_BUG(); } /* @@ -1801,7 +1789,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, void folio_remove_rmap_ptes(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *vma) { - __folio_remove_rmap(folio, page, nr_pages, vma, RMAP_LEVEL_PTE); + __folio_remove_rmap(folio, page, nr_pages, vma, PGTABLE_LEVEL_PTE); } /** @@ -1818,7 +1806,7 @@ void folio_remove_rmap_pmd(struct folio *folio, struct page *page, struct vm_area_struct *vma) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - __folio_remove_rmap(folio, page, HPAGE_PMD_NR, vma, RMAP_LEVEL_PMD); + __folio_remove_rmap(folio, page, HPAGE_PMD_NR, vma, PGTABLE_LEVEL_PMD); #else WARN_ON_ONCE(true); #endif @@ -1839,7 +1827,7 @@ void folio_remove_rmap_pud(struct folio *folio, struct page *page, { #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) - __folio_remove_rmap(folio, page, HPAGE_PUD_NR, vma, RMAP_LEVEL_PUD); + __folio_remove_rmap(folio, page, HPAGE_PUD_NR, vma, PGTABLE_LEVEL_PUD); #else WARN_ON_ONCE(true); #endif @@ -1887,6 +1875,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, unsigned long nr_pages = 1, end_addr; unsigned long pfn; unsigned long hsz = 0; + int ptes = 0; /* * When racing against e.g. zap_pte_range() on another cpu, @@ -1927,10 +1916,34 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, */ if (!(flags & TTU_IGNORE_MLOCK) && (vma->vm_flags & VM_LOCKED)) { + ptes++; + + /* + * Set 'ret' to indicate the page cannot be unmapped. + * + * Do not jump to walk_abort immediately as additional + * iteration might be required to detect fully mapped + * folio an mlock it. + */ + ret = false; + + /* Only mlock fully mapped pages */ + if (pvmw.pte && ptes != pvmw.nr_pages) + continue; + + /* + * All PTEs must be protected by page table lock in + * order to mlock the page. + * + * If page table boundary has been cross, current ptl + * only protect part of ptes. + */ + if (pvmw.flags & PVMW_PGTABLE_CROSSED) + goto walk_done; + /* Restore the mlock which got missed */ - if (!folio_test_large(folio)) - mlock_vma_folio(folio, vma); - goto walk_abort; + mlock_vma_folio(folio, vma); + goto walk_done; } if (!pvmw.pte) { |