summaryrefslogtreecommitdiff
path: root/mm/rmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c380
1 files changed, 215 insertions, 165 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index f5d43edad529..c6c4d4ea29a7 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -23,7 +23,7 @@
* inode->i_rwsem (while writing or truncating, not reading or faulting)
* mm->mmap_lock
* mapping->invalidate_lock (in filemap_fault)
- * page->flags PG_locked (lock_page)
+ * folio_lock
* hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share, see hugetlbfs below)
* vma_start_write
* mapping->i_mmap_rwsem
@@ -32,7 +32,6 @@
* swap_lock (in swap_duplicate, swap_info_get)
* mmlist_lock (in mmput, drain_mmlist and others)
* mapping->private_lock (in block_dirty_folio)
- * folio_lock_memcg move_lock (in block_dirty_folio)
* i_pages lock (widely used)
* lruvec->lru_lock (in folio_lruvec_lock_irq)
* inode->i_lock (in set_page_dirty's __mark_inode_dirty)
@@ -50,7 +49,7 @@
* hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
* vma_lock (hugetlb specific lock for pmd_sharing)
* mapping->i_mmap_rwsem (also used for hugetlb pmd sharing)
- * page->flags PG_locked (lock_page)
+ * folio_lock
*/
#include <linux/mm.h>
@@ -75,6 +74,7 @@
#include <linux/memremap.h>
#include <linux/userfaultfd_k.h>
#include <linux/mm_inline.h>
+#include <linux/oom.h>
#include <asm/tlbflush.h>
@@ -182,8 +182,6 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
* for the new allocation. At the same time, we do not want
* to do any locking for the common case of already having
* an anon_vma.
- *
- * This must be called with the mmap_lock held for reading.
*/
int __anon_vma_prepare(struct vm_area_struct *vma)
{
@@ -191,6 +189,7 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
struct anon_vma *anon_vma, *allocated;
struct anon_vma_chain *avc;
+ mmap_assert_locked(mm);
might_sleep();
avc = anon_vma_chain_alloc(GFP_KERNEL);
@@ -497,7 +496,7 @@ void __init anon_vma_init(void)
* concurrently without folio lock protection). See folio_lock_anon_vma_read()
* which has already covered that, and comment above remap_pages().
*/
-struct anon_vma *folio_get_anon_vma(struct folio *folio)
+struct anon_vma *folio_get_anon_vma(const struct folio *folio)
{
struct anon_vma *anon_vma = NULL;
unsigned long anon_mapping;
@@ -541,7 +540,7 @@ out:
* reference like with folio_get_anon_vma() and then block on the mutex
* on !rwc->try_lock case.
*/
-struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,
+struct anon_vma *folio_lock_anon_vma_read(const struct folio *folio,
struct rmap_walk_control *rwc)
{
struct anon_vma *anon_vma = NULL;
@@ -768,13 +767,27 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
}
#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
-/*
- * At what user virtual address is page expected in vma?
- * Caller should check the page is actually part of the vma.
+/**
+ * page_address_in_vma - The virtual address of a page in this VMA.
+ * @folio: The folio containing the page.
+ * @page: The page within the folio.
+ * @vma: The VMA we need to know the address in.
+ *
+ * Calculates the user virtual address of this page in the specified VMA.
+ * It is the caller's responsibililty to check the page is actually
+ * within the VMA. There may not currently be a PTE pointing at this
+ * page, but if a page fault occurs at this address, this is the page
+ * which will be accessed.
+ *
+ * Context: Caller should hold a reference to the folio. Caller should
+ * hold a lock (eg the i_mmap_lock or the mmap_lock) which keeps the
+ * VMA from being altered.
+ *
+ * Return: The virtual address corresponding to this page in the VMA.
*/
-unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
+unsigned long page_address_in_vma(const struct folio *folio,
+ const struct page *page, const struct vm_area_struct *vma)
{
- struct folio *folio = page_folio(page);
if (folio_test_anon(folio)) {
struct anon_vma *page__anon_vma = folio_anon_vma(folio);
/*
@@ -790,7 +803,8 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
return -EFAULT;
}
- return vma_address(page, vma);
+ /* KSM folios don't reach here because of the !page__anon_vma check */
+ return vma_address(vma, page_pgoff(folio, page), 1);
}
/*
@@ -867,13 +881,24 @@ static bool folio_referenced_one(struct folio *folio,
continue;
}
- if (pvmw.pte) {
- if (lru_gen_enabled() &&
- pte_young(ptep_get(pvmw.pte))) {
- lru_gen_look_around(&pvmw);
- referenced++;
- }
+ /*
+ * Skip the non-shared swapbacked folio mapped solely by
+ * the exiting or OOM-reaped process. This avoids redundant
+ * swap-out followed by an immediate unmap.
+ */
+ if ((!atomic_read(&vma->vm_mm->mm_users) ||
+ check_stable_address_space(vma->vm_mm)) &&
+ folio_test_anon(folio) && folio_test_swapbacked(folio) &&
+ !folio_likely_mapped_shared(folio)) {
+ pra->referenced = -1;
+ page_vma_mapped_walk_done(&pvmw);
+ return false;
+ }
+ if (lru_gen_enabled() && pvmw.pte) {
+ if (lru_gen_look_around(&pvmw))
+ referenced++;
+ } else if (pvmw.pte) {
if (ptep_clear_flush_young_notify(vma, address,
pvmw.pte))
referenced++;
@@ -961,7 +986,7 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
int folio_referenced(struct folio *folio, int is_locked,
struct mem_cgroup *memcg, unsigned long *vm_flags)
{
- int we_locked = 0;
+ bool we_locked = false;
struct folio_referenced_arg pra = {
.mapcount = folio_mapcount(folio),
.memcg = memcg,
@@ -1128,56 +1153,38 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
if (invalid_mkclean_vma(vma, NULL))
return 0;
- pvmw.address = vma_pgoff_address(pgoff, nr_pages, vma);
+ pvmw.address = vma_address(vma, pgoff, nr_pages);
VM_BUG_ON_VMA(pvmw.address == -EFAULT, vma);
return page_vma_mkclean_one(&pvmw);
}
-int folio_total_mapcount(struct folio *folio)
-{
- int mapcount = folio_entire_mapcount(folio);
- int nr_pages;
- int i;
-
- /* In the common case, avoid the loop when no pages mapped by PTE */
- if (folio_nr_pages_mapped(folio) == 0)
- return mapcount;
- /*
- * Add all the PTE mappings of those pages mapped by PTE.
- * Limit the loop to folio_nr_pages_mapped()?
- * Perhaps: given all the raciness, that may be a good or a bad idea.
- */
- nr_pages = folio_nr_pages(folio);
- for (i = 0; i < nr_pages; i++)
- mapcount += atomic_read(&folio_page(folio, i)->_mapcount);
-
- /* But each of those _mapcounts was based on -1 */
- mapcount += nr_pages;
- return mapcount;
-}
-
static __always_inline unsigned int __folio_add_rmap(struct folio *folio,
struct page *page, int nr_pages, enum rmap_level level,
int *nr_pmdmapped)
{
atomic_t *mapped = &folio->_nr_pages_mapped;
- int first, nr = 0;
+ const int orig_nr_pages = nr_pages;
+ int first = 0, nr = 0;
__folio_rmap_sanity_checks(folio, page, nr_pages, level);
switch (level) {
case RMAP_LEVEL_PTE:
- do {
- first = atomic_inc_and_test(&page->_mapcount);
- if (first && folio_test_large(folio)) {
- first = atomic_inc_return_relaxed(mapped);
- first = (first < ENTIRELY_MAPPED);
- }
+ if (!folio_test_large(folio)) {
+ nr = atomic_inc_and_test(&folio->_mapcount);
+ break;
+ }
- if (first)
- nr++;
+ do {
+ first += atomic_inc_and_test(&page->_mapcount);
} while (page++, --nr_pages > 0);
+
+ if (first &&
+ atomic_add_return_relaxed(first, mapped) < ENTIRELY_MAPPED)
+ nr = first;
+
+ atomic_add(orig_nr_pages, &folio->_large_mapcount);
break;
case RMAP_LEVEL_PMD:
first = atomic_inc_and_test(&folio->_entire_mapcount);
@@ -1194,6 +1201,7 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio,
nr = 0;
}
}
+ atomic_inc(&folio->_large_mapcount);
break;
}
return nr;
@@ -1263,8 +1271,9 @@ static void __folio_set_anon(struct folio *folio, struct vm_area_struct *vma,
* @vma: the vm area in which the mapping is added
* @address: the user virtual address mapped
*/
-static void __page_check_anon_rmap(struct folio *folio, struct page *page,
- struct vm_area_struct *vma, unsigned long address)
+static void __page_check_anon_rmap(const struct folio *folio,
+ const struct page *page, struct vm_area_struct *vma,
+ unsigned long address)
{
/*
* The page's anon-rmap details (mapping and index) are guaranteed to
@@ -1279,37 +1288,46 @@ static void __page_check_anon_rmap(struct folio *folio, struct page *page,
*/
VM_BUG_ON_FOLIO(folio_anon_vma(folio)->root != vma->anon_vma->root,
folio);
- VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address),
+ VM_BUG_ON_PAGE(page_pgoff(folio, page) != linear_page_index(vma, address),
page);
}
+static void __folio_mod_stat(struct folio *folio, int nr, int nr_pmdmapped)
+{
+ int idx;
+
+ if (nr) {
+ idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED;
+ __lruvec_stat_mod_folio(folio, idx, nr);
+ }
+ if (nr_pmdmapped) {
+ if (folio_test_anon(folio)) {
+ idx = NR_ANON_THPS;
+ __lruvec_stat_mod_folio(folio, idx, nr_pmdmapped);
+ } else {
+ /* NR_*_PMDMAPPED are not maintained per-memcg */
+ idx = folio_test_swapbacked(folio) ?
+ NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED;
+ __mod_node_page_state(folio_pgdat(folio), idx,
+ nr_pmdmapped);
+ }
+ }
+}
+
static __always_inline void __folio_add_anon_rmap(struct folio *folio,
struct page *page, int nr_pages, struct vm_area_struct *vma,
unsigned long address, rmap_t flags, enum rmap_level level)
{
int i, nr, nr_pmdmapped = 0;
+ VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
+
nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped);
- if (nr_pmdmapped)
- __lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr_pmdmapped);
- if (nr)
- __lruvec_stat_mod_folio(folio, NR_ANON_MAPPED, nr);
- if (unlikely(!folio_test_anon(folio))) {
- VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio);
- /*
- * For a PTE-mapped large folio, we only know that the single
- * PTE is exclusive. Further, __folio_set_anon() might not get
- * folio->index right when not given the address of the head
- * page.
- */
- VM_WARN_ON_FOLIO(folio_test_large(folio) &&
- level != RMAP_LEVEL_PMD, folio);
- __folio_set_anon(folio, vma, address,
- !!(flags & RMAP_EXCLUSIVE));
- } else if (likely(!folio_test_ksm(folio))) {
+ if (likely(!folio_test_ksm(folio)))
__page_check_anon_rmap(folio, page, vma, address);
- }
+
+ __folio_mod_stat(folio, nr, nr_pmdmapped);
if (flags & RMAP_EXCLUSIVE) {
switch (level) {
@@ -1395,29 +1413,41 @@ void folio_add_anon_rmap_pmd(struct folio *folio, struct page *page,
* @folio: The folio to add the mapping to.
* @vma: the vm area in which the mapping is added
* @address: the user virtual address mapped
+ * @flags: The rmap flags
*
* Like folio_add_anon_rmap_*() but must only be called on *new* folios.
* This means the inc-and-test can be bypassed.
- * The folio does not have to be locked.
+ * The folio doesn't necessarily need to be locked while it's exclusive
+ * unless two threads map it concurrently. However, the folio must be
+ * locked if it's shared.
*
- * If the folio is pmd-mappable, it is accounted as a THP. As the folio
- * is new, it's assumed to be mapped exclusively by a single process.
+ * If the folio is pmd-mappable, it is accounted as a THP.
*/
void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
- unsigned long address)
+ unsigned long address, rmap_t flags)
{
- int nr = folio_nr_pages(folio);
+ const int nr = folio_nr_pages(folio);
+ const bool exclusive = flags & RMAP_EXCLUSIVE;
+ int nr_pmdmapped = 0;
VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
+ VM_WARN_ON_FOLIO(!exclusive && !folio_test_locked(folio), folio);
VM_BUG_ON_VMA(address < vma->vm_start ||
address + (nr << PAGE_SHIFT) > vma->vm_end, vma);
- __folio_set_swapbacked(folio);
- __folio_set_anon(folio, vma, address, true);
+
+ /*
+ * VM_DROPPABLE mappings don't swap; instead they're just dropped when
+ * under memory pressure.
+ */
+ if (!folio_test_swapbacked(folio) && !(vma->vm_flags & VM_DROPPABLE))
+ __folio_set_swapbacked(folio);
+ __folio_set_anon(folio, vma, address, exclusive);
if (likely(!folio_test_large(folio))) {
/* increment count (starts at -1) */
atomic_set(&folio->_mapcount, 0);
- SetPageAnonExclusive(&folio->page);
+ if (exclusive)
+ SetPageAnonExclusive(&folio->page);
} else if (!folio_test_pmd_mappable(folio)) {
int i;
@@ -1426,19 +1456,26 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
/* increment count (starts at -1) */
atomic_set(&page->_mapcount, 0);
- SetPageAnonExclusive(page);
+ if (exclusive)
+ SetPageAnonExclusive(page);
}
+ /* increment count (starts at -1) */
+ atomic_set(&folio->_large_mapcount, nr - 1);
atomic_set(&folio->_nr_pages_mapped, nr);
} else {
/* increment count (starts at -1) */
atomic_set(&folio->_entire_mapcount, 0);
+ /* increment count (starts at -1) */
+ atomic_set(&folio->_large_mapcount, 0);
atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED);
- SetPageAnonExclusive(&folio->page);
- __lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr);
+ if (exclusive)
+ SetPageAnonExclusive(&folio->page);
+ nr_pmdmapped = nr;
}
- __lruvec_stat_mod_folio(folio, NR_ANON_MAPPED, nr);
+ __folio_mod_stat(folio, nr, nr_pmdmapped);
+ mod_mthp_stat(folio_order(folio), MTHP_STAT_NR_ANON, 1);
}
static __always_inline void __folio_add_file_rmap(struct folio *folio,
@@ -1450,11 +1487,7 @@ static __always_inline void __folio_add_file_rmap(struct folio *folio,
VM_WARN_ON_FOLIO(folio_test_anon(folio), folio);
nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped);
- if (nr_pmdmapped)
- __lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ?
- NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped);
- if (nr)
- __lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr);
+ __folio_mod_stat(folio, nr, nr_pmdmapped);
/* See comments in folio_add_anon_rmap_*() */
if (!folio_test_large(folio))
@@ -1503,25 +1536,31 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
enum rmap_level level)
{
atomic_t *mapped = &folio->_nr_pages_mapped;
- int last, nr = 0, nr_pmdmapped = 0;
- enum node_stat_item idx;
+ int last = 0, nr = 0, nr_pmdmapped = 0;
+ bool partially_mapped = false;
__folio_rmap_sanity_checks(folio, page, nr_pages, level);
switch (level) {
case RMAP_LEVEL_PTE:
- do {
- last = atomic_add_negative(-1, &page->_mapcount);
- if (last && folio_test_large(folio)) {
- last = atomic_dec_return_relaxed(mapped);
- last = (last < ENTIRELY_MAPPED);
- }
+ if (!folio_test_large(folio)) {
+ nr = atomic_add_negative(-1, &folio->_mapcount);
+ break;
+ }
- if (last)
- nr++;
+ atomic_sub(nr_pages, &folio->_large_mapcount);
+ do {
+ last += atomic_add_negative(-1, &page->_mapcount);
} while (page++, --nr_pages > 0);
+
+ if (last &&
+ atomic_sub_return_relaxed(last, mapped) < ENTIRELY_MAPPED)
+ nr = last;
+
+ partially_mapped = nr && atomic_read(mapped);
break;
case RMAP_LEVEL_PMD:
+ atomic_dec(&folio->_large_mapcount);
last = atomic_add_negative(-1, &folio->_entire_mapcount);
if (last) {
nr = atomic_sub_return_relaxed(ENTIRELY_MAPPED, mapped);
@@ -1536,31 +1575,22 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
nr = 0;
}
}
+
+ partially_mapped = nr && nr < nr_pmdmapped;
break;
}
- if (nr_pmdmapped) {
- if (folio_test_anon(folio))
- idx = NR_ANON_THPS;
- else if (folio_test_swapbacked(folio))
- idx = NR_SHMEM_PMDMAPPED;
- else
- idx = NR_FILE_PMDMAPPED;
- __lruvec_stat_mod_folio(folio, idx, -nr_pmdmapped);
- }
- if (nr) {
- idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED;
- __lruvec_stat_mod_folio(folio, idx, -nr);
+ /*
+ * Queue anon large folio for deferred split if at least one page of
+ * the folio is unmapped and at least one page is still mapped.
+ *
+ * Check partially_mapped first to ensure it is a large folio.
+ */
+ if (partially_mapped && folio_test_anon(folio) &&
+ !folio_test_partially_mapped(folio))
+ deferred_split_folio(folio, true);
- /*
- * Queue anon large folio for deferred split if at least one
- * page of the folio is unmapped and at least one page
- * is still mapped.
- */
- if (folio_test_large(folio) && folio_test_anon(folio))
- if (level == RMAP_LEVEL_PTE || nr < nr_pmdmapped)
- deferred_split_folio(folio);
- }
+ __folio_mod_stat(folio, -nr, -nr_pmdmapped);
/*
* It would be tidy to reset folio_test_anon mapping when fully
@@ -1635,9 +1665,6 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
if (flags & TTU_SYNC)
pvmw.flags = PVMW_SYNC;
- if (flags & TTU_SPLIT_HUGE_PMD)
- split_huge_pmd_address(vma, address, false, folio);
-
/*
* For THP, we have to assume the worse case ie pmd for invalidation.
* For hugetlb, it could be much worse if we need to do pud
@@ -1663,9 +1690,6 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
mmu_notifier_invalidate_range_start(&range);
while (page_vma_mapped_walk(&pvmw)) {
- /* Unexpected PMD-mapped THP? */
- VM_BUG_ON_FOLIO(!pvmw.pte, folio);
-
/*
* If the folio is in an mlock()d vma, we must not swap it out.
*/
@@ -1674,11 +1698,30 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
/* Restore the mlock which got missed */
if (!folio_test_large(folio))
mlock_vma_folio(folio, vma);
- page_vma_mapped_walk_done(&pvmw);
- ret = false;
- break;
+ goto walk_abort;
}
+ if (!pvmw.pte) {
+ if (unmap_huge_pmd_locked(vma, pvmw.address, pvmw.pmd,
+ folio))
+ goto walk_done;
+
+ if (flags & TTU_SPLIT_HUGE_PMD) {
+ /*
+ * We temporarily have to drop the PTL and
+ * restart so we can process the PTE-mapped THP.
+ */
+ split_huge_pmd_locked(vma, pvmw.address,
+ pvmw.pmd, false, folio);
+ flags &= ~TTU_SPLIT_HUGE_PMD;
+ page_vma_mapped_walk_restart(&pvmw);
+ continue;
+ }
+ }
+
+ /* Unexpected PMD-mapped THP? */
+ VM_BUG_ON_FOLIO(!pvmw.pte, folio);
+
pfn = pte_pfn(ptep_get(pvmw.pte));
subpage = folio_page(folio, pfn - folio_pfn(folio));
address = pvmw.address;
@@ -1714,11 +1757,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
*/
if (!anon) {
VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
- if (!hugetlb_vma_trylock_write(vma)) {
- page_vma_mapped_walk_done(&pvmw);
- ret = false;
- break;
- }
+ if (!hugetlb_vma_trylock_write(vma))
+ goto walk_abort;
if (huge_pmd_unshare(mm, vma, address, pvmw.pte)) {
hugetlb_vma_unlock_write(vma);
flush_tlb_range(vma,
@@ -1733,8 +1773,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
* actual page and drop map count
* to zero.
*/
- page_vma_mapped_walk_done(&pvmw);
- break;
+ goto walk_done;
}
hugetlb_vma_unlock_write(vma);
}
@@ -1780,7 +1819,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
set_huge_pte_at(mm, address, pvmw.pte, pteval,
hsz);
} else {
- dec_mm_counter(mm, mm_counter(&folio->page));
+ dec_mm_counter(mm, mm_counter(folio));
set_pte_at(mm, address, pvmw.pte, pteval);
}
@@ -1795,7 +1834,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
* migration) will not expect userfaults on already
* copied pages.
*/
- dec_mm_counter(mm, mm_counter(&folio->page));
+ dec_mm_counter(mm, mm_counter(folio));
} else if (folio_test_anon(folio)) {
swp_entry_t entry = page_swap_entry(subpage);
pte_t swp_pte;
@@ -1806,9 +1845,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
if (unlikely(folio_test_swapbacked(folio) !=
folio_test_swapcache(folio))) {
WARN_ON_ONCE(1);
- ret = false;
- page_vma_mapped_walk_done(&pvmw);
- break;
+ goto walk_abort;
}
/* MADV_FREE page check */
@@ -1836,7 +1873,13 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
* plus the rmap(s) (dropped by discard:).
*/
if (ref_count == 1 + map_count &&
- !folio_test_dirty(folio)) {
+ (!folio_test_dirty(folio) ||
+ /*
+ * Unlike MADV_FREE mappings, VM_DROPPABLE
+ * ones can be dropped even if they've
+ * been dirtied.
+ */
+ (vma->vm_flags & VM_DROPPABLE))) {
dec_mm_counter(mm, MM_ANONPAGES);
goto discard;
}
@@ -1846,24 +1889,23 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
* discarded. Remap the page to page table.
*/
set_pte_at(mm, address, pvmw.pte, pteval);
- folio_set_swapbacked(folio);
- ret = false;
- page_vma_mapped_walk_done(&pvmw);
- break;
+ /*
+ * Unlike MADV_FREE mappings, VM_DROPPABLE ones
+ * never get swap backed on failure to drop.
+ */
+ if (!(vma->vm_flags & VM_DROPPABLE))
+ folio_set_swapbacked(folio);
+ goto walk_abort;
}
if (swap_duplicate(entry) < 0) {
set_pte_at(mm, address, pvmw.pte, pteval);
- ret = false;
- page_vma_mapped_walk_done(&pvmw);
- break;
+ goto walk_abort;
}
if (arch_unmap_one(mm, vma, address, pteval) < 0) {
swap_free(entry);
set_pte_at(mm, address, pvmw.pte, pteval);
- ret = false;
- page_vma_mapped_walk_done(&pvmw);
- break;
+ goto walk_abort;
}
/* See folio_try_share_anon_rmap(): clear PTE first. */
@@ -1871,9 +1913,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
folio_try_share_anon_rmap_pte(folio, subpage)) {
swap_free(entry);
set_pte_at(mm, address, pvmw.pte, pteval);
- ret = false;
- page_vma_mapped_walk_done(&pvmw);
- break;
+ goto walk_abort;
}
if (list_empty(&mm->mmlist)) {
spin_lock(&mmlist_lock);
@@ -1903,7 +1943,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
*
* See Documentation/mm/mmu_notifier.rst
*/
- dec_mm_counter(mm, mm_counter_file(&folio->page));
+ dec_mm_counter(mm, mm_counter_file(folio));
}
discard:
if (unlikely(folio_test_hugetlb(folio)))
@@ -1913,6 +1953,12 @@ discard:
if (vma->vm_flags & VM_LOCKED)
mlock_drain_local();
folio_put(folio);
+ continue;
+walk_abort:
+ ret = false;
+walk_done:
+ page_vma_mapped_walk_done(&pvmw);
+ break;
}
mmu_notifier_invalidate_range_end(&range);
@@ -2169,7 +2215,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
swp_pte = pte_swp_mkuffd_wp(swp_pte);
set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
trace_set_migration_pte(pvmw.address, pte_val(swp_pte),
- compound_order(&folio->page));
+ folio_order(folio));
/*
* No need to invalidate here it will synchronize on
* against the special swap migration pte.
@@ -2181,7 +2227,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
set_huge_pte_at(mm, address, pvmw.pte, pteval,
hsz);
} else {
- dec_mm_counter(mm, mm_counter(&folio->page));
+ dec_mm_counter(mm, mm_counter(folio));
set_pte_at(mm, address, pvmw.pte, pteval);
}
@@ -2196,7 +2242,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
* migration) will not expect userfaults on already
* copied pages.
*/
- dec_mm_counter(mm, mm_counter(&folio->page));
+ dec_mm_counter(mm, mm_counter(folio));
} else {
swp_entry_t entry;
pte_t swp_pte;
@@ -2261,7 +2307,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
else
set_pte_at(mm, address, pvmw.pte, swp_pte);
trace_set_migration_pte(address, pte_val(swp_pte),
- compound_order(&folio->page));
+ folio_order(folio));
/*
* No need to invalidate here it will synchronize on
* against the special swap migration pte.
@@ -2524,7 +2570,7 @@ void __put_anon_vma(struct anon_vma *anon_vma)
anon_vma_free(root);
}
-static struct anon_vma *rmap_walk_anon_lock(struct folio *folio,
+static struct anon_vma *rmap_walk_anon_lock(const struct folio *folio,
struct rmap_walk_control *rwc)
{
struct anon_vma *anon_vma;
@@ -2588,7 +2634,8 @@ static void rmap_walk_anon(struct folio *folio,
anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
pgoff_start, pgoff_end) {
struct vm_area_struct *vma = avc->vma;
- unsigned long address = vma_address(&folio->page, vma);
+ unsigned long address = vma_address(vma, pgoff_start,
+ folio_nr_pages(folio));
VM_BUG_ON_VMA(address == -EFAULT, vma);
cond_resched();
@@ -2649,7 +2696,8 @@ static void rmap_walk_file(struct folio *folio,
lookup:
vma_interval_tree_foreach(vma, &mapping->i_mmap,
pgoff_start, pgoff_end) {
- unsigned long address = vma_address(&folio->page, vma);
+ unsigned long address = vma_address(vma, pgoff_start,
+ folio_nr_pages(folio));
VM_BUG_ON_VMA(address == -EFAULT, vma);
cond_resched();
@@ -2702,6 +2750,7 @@ void hugetlb_add_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
atomic_inc(&folio->_entire_mapcount);
+ atomic_inc(&folio->_large_mapcount);
if (flags & RMAP_EXCLUSIVE)
SetPageAnonExclusive(&folio->page);
VM_WARN_ON_FOLIO(folio_entire_mapcount(folio) > 1 &&
@@ -2716,6 +2765,7 @@ void hugetlb_add_new_anon_rmap(struct folio *folio,
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
/* increment count (starts at -1) */
atomic_set(&folio->_entire_mapcount, 0);
+ atomic_set(&folio->_large_mapcount, 0);
folio_clear_hugetlb_restore_reserve(folio);
__folio_set_anon(folio, vma, address, true);
SetPageAnonExclusive(&folio->page);