diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 127 |
1 files changed, 79 insertions, 48 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 6114a1fc6c68..27f90896f789 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1606,9 +1606,10 @@ static void folio_check_dirty_writeback(struct folio *folio, mapping->a_ops->is_dirty_writeback(folio, dirty, writeback); } -static struct page *alloc_demote_page(struct page *page, unsigned long private) +static struct folio *alloc_demote_folio(struct folio *src, + unsigned long private) { - struct page *target_page; + struct folio *dst; nodemask_t *allowed_mask; struct migration_target_control *mtc; @@ -1626,14 +1627,14 @@ static struct page *alloc_demote_page(struct page *page, unsigned long private) */ mtc->nmask = NULL; mtc->gfp_mask |= __GFP_THISNODE; - target_page = alloc_migration_target(page, (unsigned long)mtc); - if (target_page) - return target_page; + dst = alloc_migration_target(src, (unsigned long)mtc); + if (dst) + return dst; mtc->gfp_mask &= ~__GFP_THISNODE; mtc->nmask = allowed_mask; - return alloc_migration_target(page, (unsigned long)mtc); + return alloc_migration_target(src, (unsigned long)mtc); } /* @@ -1668,7 +1669,7 @@ static unsigned int demote_folio_list(struct list_head *demote_folios, node_get_allowed_targets(pgdat, &allowed_mask); /* Demotion ignores all cpuset and mempolicy settings */ - migrate_pages(demote_folios, alloc_demote_page, NULL, + migrate_pages(demote_folios, alloc_demote_folio, NULL, (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION, &nr_succeeded); @@ -2255,6 +2256,25 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec, } +#ifdef CONFIG_CMA +/* + * It is waste of effort to scan and reclaim CMA pages if it is not available + * for current allocation context. Kswapd can not be enrolled as it can not + * distinguish this scenario by using sc->gfp_mask = GFP_KERNEL + */ +static bool skip_cma(struct folio *folio, struct scan_control *sc) +{ + return !current_is_kswapd() && + gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE && + get_pageblock_migratetype(&folio->page) == MIGRATE_CMA; +} +#else +static bool skip_cma(struct folio *folio, struct scan_control *sc) +{ + return false; +} +#endif + /* * Isolating page from the lruvec to fill in @dst list by nr_to_scan times. * @@ -2301,7 +2321,8 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan, nr_pages = folio_nr_pages(folio); total_scan += nr_pages; - if (folio_zonenum(folio) > sc->reclaim_idx) { + if (folio_zonenum(folio) > sc->reclaim_idx || + skip_cma(folio, sc)) { nr_skipped[folio_zonenum(folio)] += nr_pages; move_to = &folios_skipped; goto move; @@ -2443,7 +2464,7 @@ static int too_many_isolated(struct pglist_data *pgdat, int file, * won't get blocked by normal direct-reclaimers, forming a circular * deadlock. */ - if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) + if (gfp_has_io_fs(sc->gfp_mask)) inactive >>= 3; too_many = isolated > inactive; @@ -3218,6 +3239,16 @@ DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS); #define get_cap(cap) static_branch_unlikely(&lru_gen_caps[cap]) #endif +static bool should_walk_mmu(void) +{ + return arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK); +} + +static bool should_clear_pmd_young(void) +{ + return arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG); +} + /****************************************************************************** * shorthand helpers ******************************************************************************/ @@ -3978,28 +4009,29 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec); int old_gen, new_gen = lru_gen_from_seq(walk->max_seq); - VM_WARN_ON_ONCE(pmd_leaf(*pmd)); - - ptl = pte_lockptr(args->mm, pmd); - if (!spin_trylock(ptl)) + pte = pte_offset_map_nolock(args->mm, pmd, start & PMD_MASK, &ptl); + if (!pte) + return false; + if (!spin_trylock(ptl)) { + pte_unmap(pte); return false; + } arch_enter_lazy_mmu_mode(); - - pte = pte_offset_map(pmd, start & PMD_MASK); restart: for (i = pte_index(start), addr = start; addr != end; i++, addr += PAGE_SIZE) { unsigned long pfn; struct folio *folio; + pte_t ptent = ptep_get(pte + i); total++; walk->mm_stats[MM_LEAF_TOTAL]++; - pfn = get_pte_pfn(pte[i], args->vma, addr); + pfn = get_pte_pfn(ptent, args->vma, addr); if (pfn == -1) continue; - if (!pte_young(pte[i])) { + if (!pte_young(ptent)) { walk->mm_stats[MM_LEAF_OLD]++; continue; } @@ -4014,7 +4046,7 @@ restart: young++; walk->mm_stats[MM_LEAF_YOUNG]++; - if (pte_dirty(pte[i]) && !folio_test_dirty(folio) && + if (pte_dirty(ptent) && !folio_test_dirty(folio) && !(folio_test_anon(folio) && folio_test_swapbacked(folio) && !folio_test_swapcache(folio))) folio_mark_dirty(folio); @@ -4027,10 +4059,8 @@ restart: if (i < PTRS_PER_PTE && get_next_vma(PMD_MASK, PAGE_SIZE, args, &start, &end)) goto restart; - pte_unmap(pte); - arch_leave_lazy_mmu_mode(); - spin_unlock(ptl); + pte_unmap_unlock(pte, ptl); return suitable_to_scan(total, young); } @@ -4082,7 +4112,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area goto next; if (!pmd_trans_huge(pmd[i])) { - if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) + if (should_clear_pmd_young()) pmdp_test_and_clear_young(vma, addr, pmd + i); goto next; } @@ -4128,7 +4158,7 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, unsigned long next; unsigned long addr; struct vm_area_struct *vma; - unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)]; + DECLARE_BITMAP(bitmap, MIN_LRU_BATCH); unsigned long first = -1; struct lru_gen_mm_walk *walk = args->private; @@ -4175,7 +4205,7 @@ restart: #endif walk->mm_stats[MM_NONLEAF_TOTAL]++; - if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) { + if (should_clear_pmd_young()) { if (!pmd_young(val)) continue; @@ -4477,7 +4507,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, * handful of PTEs. Spreading the work out over a period of time usually * is less efficient, but it avoids bursty page faults. */ - if (!arch_has_hw_pte_young() || !get_cap(LRU_GEN_MM_WALK)) { + if (!should_walk_mmu()) { success = iterate_mm_list_nowalk(lruvec, max_seq); goto done; } @@ -4659,12 +4689,13 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) { unsigned long pfn; + pte_t ptent = ptep_get(pte + i); - pfn = get_pte_pfn(pte[i], pvmw->vma, addr); + pfn = get_pte_pfn(ptent, pvmw->vma, addr); if (pfn == -1) continue; - if (!pte_young(pte[i])) + if (!pte_young(ptent)) continue; folio = get_pfn_folio(pfn, memcg, pgdat, !walk || walk->can_swap); @@ -4676,7 +4707,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) young++; - if (pte_dirty(pte[i]) && !folio_test_dirty(folio) && + if (pte_dirty(ptent) && !folio_test_dirty(folio) && !(folio_test_anon(folio) && folio_test_swapbacked(folio) && !folio_test_swapcache(folio))) folio_mark_dirty(folio); @@ -4831,8 +4862,10 @@ void lru_gen_release_memcg(struct mem_cgroup *memcg) } } -void lru_gen_soft_reclaim(struct lruvec *lruvec) +void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid) { + struct lruvec *lruvec = get_lruvec(memcg, nid); + /* see the comment on MEMCG_NR_GENS */ if (lru_gen_memcg_seg(lruvec) != MEMCG_LRU_HEAD) lru_gen_rotate_memcg(lruvec, MEMCG_LRU_HEAD); @@ -4898,7 +4931,6 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) WRITE_ONCE(lrugen->protected[hist][type][tier - 1], lrugen->protected[hist][type][tier - 1] + delta); - __mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + type, delta); return true; } @@ -5713,10 +5745,10 @@ static ssize_t enabled_show(struct kobject *kobj, struct kobj_attribute *attr, c if (get_cap(LRU_GEN_CORE)) caps |= BIT(LRU_GEN_CORE); - if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK)) + if (should_walk_mmu()) caps |= BIT(LRU_GEN_MM_WALK); - if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) + if (should_clear_pmd_young()) caps |= BIT(LRU_GEN_NONLEAF_YOUNG); return sysfs_emit(buf, "0x%04x\n", caps); @@ -6384,14 +6416,13 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat, if (!managed_zone(zone)) continue; - switch (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx)) { - case COMPACT_SUCCESS: - case COMPACT_CONTINUE: + /* Allocation can already succeed, nothing to do */ + if (zone_watermark_ok(zone, sc->order, min_wmark_pages(zone), + sc->reclaim_idx, 0)) + return false; + + if (compaction_suitable(zone, sc->order, sc->reclaim_idx)) return false; - default: - /* check next zone */ - ; - } } /* @@ -6579,14 +6610,14 @@ again: static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) { unsigned long watermark; - enum compact_result suitable; - suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx); - if (suitable == COMPACT_SUCCESS) - /* Allocation should succeed already. Don't reclaim. */ + /* Allocation can already succeed, nothing to do */ + if (zone_watermark_ok(zone, sc->order, min_wmark_pages(zone), + sc->reclaim_idx, 0)) return true; - if (suitable == COMPACT_SKIPPED) - /* Compaction cannot yet proceed. Do reclaim. */ + + /* Compaction cannot yet proceed. Do reclaim. */ + if (!compaction_suitable(zone, sc->order, sc->reclaim_idx)) return false; /* @@ -6873,7 +6904,7 @@ static bool allow_direct_reclaim(pg_data_t *pgdat) continue; pfmemalloc_reserve += min_wmark_pages(zone); - free_pages += zone_page_state(zone, NR_FREE_PAGES); + free_pages += zone_page_state_snapshot(zone, NR_FREE_PAGES); } /* If there are no reserves (unexpected config) then do not throttle */ @@ -7826,7 +7857,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) /* * This kswapd start function will be called by init and node-hot-add. */ -void kswapd_run(int nid) +void __meminit kswapd_run(int nid) { pg_data_t *pgdat = NODE_DATA(nid); @@ -7847,7 +7878,7 @@ void kswapd_run(int nid) * Called by memory hotplug when all memory in a node is offlined. Caller must * be holding mem_hotplug_begin/done(). */ -void kswapd_stop(int nid) +void __meminit kswapd_stop(int nid) { pg_data_t *pgdat = NODE_DATA(nid); struct task_struct *kswapd; |