diff options
Diffstat (limited to 'mm/swap.c')
| -rw-r--r-- | mm/swap.c | 190 |
1 files changed, 122 insertions, 68 deletions
diff --git a/mm/swap.c b/mm/swap.c index 10decd9dffa1..2260dcd2775e 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -45,7 +45,7 @@ /* How many pages do we try to swap or page in/out together? As a power of 2 */ int page_cluster; -const int page_cluster_max = 31; +static const int page_cluster_max = 31; struct cpu_fbatches { /* @@ -109,7 +109,7 @@ void __folio_put(struct folio *folio) page_cache_release(folio); folio_unqueue_deferred_split(folio); mem_cgroup_uncharge(folio); - free_unref_page(&folio->page, folio_order(folio)); + free_frozen_pages(&folio->page, folio_order(folio)); } EXPORT_SYMBOL(__folio_put); @@ -164,6 +164,10 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn) for (i = 0; i < folio_batch_count(fbatch); i++) { struct folio *folio = fbatch->folios[i]; + /* block memcg migration while the folio moves between lru */ + if (move_fn != lru_add && !folio_test_clear_lru(folio)) + continue; + folio_lruvec_relock_irqsave(folio, &lruvec, &flags); move_fn(lruvec, folio); @@ -176,14 +180,10 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn) } static void __folio_batch_add_and_move(struct folio_batch __percpu *fbatch, - struct folio *folio, move_fn_t move_fn, - bool on_lru, bool disable_irq) + struct folio *folio, move_fn_t move_fn, bool disable_irq) { unsigned long flags; - if (on_lru && !folio_test_clear_lru(folio)) - return; - folio_get(folio); if (disable_irq) @@ -191,8 +191,8 @@ static void __folio_batch_add_and_move(struct folio_batch __percpu *fbatch, else local_lock(&cpu_fbatches.lock); - if (!folio_batch_add(this_cpu_ptr(fbatch), folio) || folio_test_large(folio) || - lru_cache_disabled()) + if (!folio_batch_add(this_cpu_ptr(fbatch), folio) || + !folio_may_be_lru_cached(folio) || lru_cache_disabled()) folio_batch_move_lru(this_cpu_ptr(fbatch), move_fn); if (disable_irq) @@ -201,13 +201,13 @@ static void __folio_batch_add_and_move(struct folio_batch __percpu *fbatch, local_unlock(&cpu_fbatches.lock); } -#define folio_batch_add_and_move(folio, op, on_lru) \ - __folio_batch_add_and_move( \ - &cpu_fbatches.op, \ - folio, \ - op, \ - on_lru, \ - offsetof(struct cpu_fbatches, op) >= offsetof(struct cpu_fbatches, lock_irq) \ +#define folio_batch_add_and_move(folio, op) \ + __folio_batch_add_and_move( \ + &cpu_fbatches.op, \ + folio, \ + op, \ + offsetof(struct cpu_fbatches, op) >= \ + offsetof(struct cpu_fbatches, lock_irq) \ ) static void lru_move_tail(struct lruvec *lruvec, struct folio *folio) @@ -231,14 +231,15 @@ static void lru_move_tail(struct lruvec *lruvec, struct folio *folio) void folio_rotate_reclaimable(struct folio *folio) { if (folio_test_locked(folio) || folio_test_dirty(folio) || - folio_test_unevictable(folio)) + folio_test_unevictable(folio) || !folio_test_lru(folio)) return; - folio_batch_add_and_move(folio, lru_move_tail, true); + folio_batch_add_and_move(folio, lru_move_tail); } -void lru_note_cost(struct lruvec *lruvec, bool file, - unsigned int nr_io, unsigned int nr_rotated) +void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file, + unsigned int nr_io, unsigned int nr_rotated) + __releases(lruvec->lru_lock) { unsigned long cost; @@ -250,18 +251,14 @@ void lru_note_cost(struct lruvec *lruvec, bool file, * different between them, adjust scan balance for CPU work. */ cost = nr_io * SWAP_CLUSTER_MAX + nr_rotated; + if (!cost) { + spin_unlock_irq(&lruvec->lru_lock); + return; + } - do { + for (;;) { unsigned long lrusize; - /* - * Hold lruvec->lru_lock is safe here, since - * 1) The pinned lruvec in reclaim, or - * 2) From a pre-LRU page during refault (which also holds the - * rcu lock, so would be safe even if the page was on the LRU - * and could move simultaneously to a new lruvec). - */ - spin_lock_irq(&lruvec->lru_lock); /* Record cost event */ if (file) lruvec->file_cost += cost; @@ -285,14 +282,22 @@ void lru_note_cost(struct lruvec *lruvec, bool file, lruvec->file_cost /= 2; lruvec->anon_cost /= 2; } + spin_unlock_irq(&lruvec->lru_lock); - } while ((lruvec = parent_lruvec(lruvec))); + lruvec = parent_lruvec(lruvec); + if (!lruvec) + break; + spin_lock_irq(&lruvec->lru_lock); + } } void lru_note_cost_refault(struct folio *folio) { - lru_note_cost(folio_lruvec(folio), folio_is_file_lru(folio), - folio_nr_pages(folio), 0); + struct lruvec *lruvec; + + lruvec = folio_lruvec_lock_irq(folio); + lru_note_cost_unlock_irq(lruvec, folio_is_file_lru(folio), + folio_nr_pages(folio), 0); } static void lru_activate(struct lruvec *lruvec, struct folio *folio) @@ -309,7 +314,7 @@ static void lru_activate(struct lruvec *lruvec, struct folio *folio) trace_mm_lru_activate(folio); __count_vm_events(PGACTIVATE, nr_pages); - __count_memcg_events(lruvec_memcg(lruvec), PGACTIVATE, nr_pages); + count_memcg_events(lruvec_memcg(lruvec), PGACTIVATE, nr_pages); } #ifdef CONFIG_SMP @@ -323,10 +328,11 @@ static void folio_activate_drain(int cpu) void folio_activate(struct folio *folio) { - if (folio_test_active(folio) || folio_test_unevictable(folio)) + if (folio_test_active(folio) || folio_test_unevictable(folio) || + !folio_test_lru(folio)) return; - folio_batch_add_and_move(folio, lru_activate, true); + folio_batch_add_and_move(folio, lru_activate); } #else @@ -379,37 +385,58 @@ static void __lru_cache_activate_folio(struct folio *folio) } #ifdef CONFIG_LRU_GEN -static void folio_inc_refs(struct folio *folio) + +static void lru_gen_inc_refs(struct folio *folio) { - unsigned long new_flags, old_flags = READ_ONCE(folio->flags); + unsigned long new_flags, old_flags = READ_ONCE(folio->flags.f); if (folio_test_unevictable(folio)) return; + /* see the comment on LRU_REFS_FLAGS */ if (!folio_test_referenced(folio)) { - folio_set_referenced(folio); + set_mask_bits(&folio->flags.f, LRU_REFS_MASK, BIT(PG_referenced)); return; } - if (!folio_test_workingset(folio)) { - folio_set_workingset(folio); - return; - } - - /* see the comment on MAX_NR_TIERS */ do { - new_flags = old_flags & LRU_REFS_MASK; - if (new_flags == LRU_REFS_MASK) - break; + if ((old_flags & LRU_REFS_MASK) == LRU_REFS_MASK) { + if (!folio_test_workingset(folio)) + folio_set_workingset(folio); + return; + } - new_flags += BIT(LRU_REFS_PGOFF); - new_flags |= old_flags & ~LRU_REFS_MASK; - } while (!try_cmpxchg(&folio->flags, &old_flags, new_flags)); + new_flags = old_flags + BIT(LRU_REFS_PGOFF); + } while (!try_cmpxchg(&folio->flags.f, &old_flags, new_flags)); } -#else -static void folio_inc_refs(struct folio *folio) + +static bool lru_gen_clear_refs(struct folio *folio) +{ + struct lru_gen_folio *lrugen; + int gen = folio_lru_gen(folio); + int type = folio_is_file_lru(folio); + + if (gen < 0) + return true; + + set_mask_bits(&folio->flags.f, LRU_REFS_FLAGS | BIT(PG_workingset), 0); + + lrugen = &folio_lruvec(folio)->lrugen; + /* whether can do without shuffling under the LRU lock */ + return gen == lru_gen_from_seq(READ_ONCE(lrugen->min_seq[type])); +} + +#else /* !CONFIG_LRU_GEN */ + +static void lru_gen_inc_refs(struct folio *folio) { } + +static bool lru_gen_clear_refs(struct folio *folio) +{ + return false; +} + #endif /* CONFIG_LRU_GEN */ /** @@ -427,8 +454,10 @@ static void folio_inc_refs(struct folio *folio) */ void folio_mark_accessed(struct folio *folio) { + if (folio_test_dropbehind(folio)) + return; if (lru_gen_enabled()) { - folio_inc_refs(folio); + lru_gen_inc_refs(folio); return; } @@ -474,12 +503,12 @@ void folio_add_lru(struct folio *folio) folio_test_unevictable(folio), folio); VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); - /* see the comment in lru_gen_add_folio() */ + /* see the comment in lru_gen_folio_seq() */ if (lru_gen_enabled() && !folio_test_unevictable(folio) && lru_gen_in_fault() && !(current->flags & PF_MEMALLOC)) folio_set_active(folio); - folio_batch_add_and_move(folio, lru_add, false); + folio_batch_add_and_move(folio, lru_add); } EXPORT_SYMBOL(folio_add_lru); @@ -524,7 +553,7 @@ void folio_add_lru_vma(struct folio *folio, struct vm_area_struct *vma) */ static void lru_deactivate_file(struct lruvec *lruvec, struct folio *folio) { - bool active = folio_test_active(folio); + bool active = folio_test_active(folio) || lru_gen_enabled(); long nr_pages = folio_nr_pages(folio); if (folio_test_unevictable(folio)) @@ -558,7 +587,7 @@ static void lru_deactivate_file(struct lruvec *lruvec, struct folio *folio) if (active) { __count_vm_events(PGDEACTIVATE, nr_pages); - __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, + count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_pages); } } @@ -576,7 +605,7 @@ static void lru_deactivate(struct lruvec *lruvec, struct folio *folio) lruvec_add_folio(lruvec, folio); __count_vm_events(PGDEACTIVATE, nr_pages); - __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_pages); + count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_pages); } static void lru_lazyfree(struct lruvec *lruvec, struct folio *folio) @@ -589,7 +618,10 @@ static void lru_lazyfree(struct lruvec *lruvec, struct folio *folio) lruvec_del_folio(lruvec, folio); folio_clear_active(folio); - folio_clear_referenced(folio); + if (lru_gen_enabled()) + lru_gen_clear_refs(folio); + else + folio_clear_referenced(folio); /* * Lazyfree folios are clean anonymous folios. They have * the swapbacked flag cleared, to distinguish them from normal @@ -599,7 +631,7 @@ static void lru_lazyfree(struct lruvec *lruvec, struct folio *folio) lruvec_add_folio(lruvec, folio); __count_vm_events(PGLAZYFREE, nr_pages); - __count_memcg_events(lruvec_memcg(lruvec), PGLAZYFREE, nr_pages); + count_memcg_events(lruvec_memcg(lruvec), PGLAZYFREE, nr_pages); } /* @@ -654,10 +686,13 @@ void lru_add_drain_cpu(int cpu) void deactivate_file_folio(struct folio *folio) { /* Deactivating an unevictable folio will not accelerate reclaim */ - if (folio_test_unevictable(folio)) + if (folio_test_unevictable(folio) || !folio_test_lru(folio)) + return; + + if (lru_gen_enabled() && lru_gen_clear_refs(folio)) return; - folio_batch_add_and_move(folio, lru_deactivate_file, true); + folio_batch_add_and_move(folio, lru_deactivate_file); } /* @@ -670,10 +705,13 @@ void deactivate_file_folio(struct folio *folio) */ void folio_deactivate(struct folio *folio) { - if (folio_test_unevictable(folio) || !(folio_test_active(folio) || lru_gen_enabled())) + if (folio_test_unevictable(folio) || !folio_test_lru(folio)) + return; + + if (lru_gen_enabled() ? lru_gen_clear_refs(folio) : !folio_test_active(folio)) return; - folio_batch_add_and_move(folio, lru_deactivate, true); + folio_batch_add_and_move(folio, lru_deactivate); } /** @@ -686,10 +724,11 @@ void folio_deactivate(struct folio *folio) void folio_mark_lazyfree(struct folio *folio) { if (!folio_test_anon(folio) || !folio_test_swapbacked(folio) || + !folio_test_lru(folio) || folio_test_swapcache(folio) || folio_test_unevictable(folio)) return; - folio_batch_add_and_move(folio, lru_lazyfree, true); + folio_batch_add_and_move(folio, lru_lazyfree); } void lru_add_drain(void) @@ -795,6 +834,9 @@ static inline void __lru_add_drain_all(bool force_all_cpus) */ this_gen = smp_load_acquire(&lru_drain_gen); + /* It helps everyone if we do our own local drain immediately. */ + lru_add_drain(); + mutex_lock(&lock); /* @@ -924,8 +966,6 @@ void folios_put_refs(struct folio_batch *folios, unsigned int *refs) unlock_page_lruvec_irqrestore(lruvec, flags); lruvec = NULL; } - if (put_devmap_managed_folio_refs(folio, nr_refs)) - continue; if (folio_ref_sub_and_test(folio, nr_refs)) free_zone_device_folio(folio); continue; @@ -1044,12 +1084,24 @@ void folio_batch_remove_exceptionals(struct folio_batch *fbatch) fbatch->nr = j; } +static const struct ctl_table swap_sysctl_table[] = { + { + .procname = "page-cluster", + .data = &page_cluster, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = (void *)&page_cluster_max, + } +}; + /* * Perform any setup for the swap system */ void __init swap_setup(void) { - unsigned long megs = totalram_pages() >> (20 - PAGE_SHIFT); + unsigned long megs = PAGES_TO_MB(totalram_pages()); /* Use a smaller cluster for small-memory machines */ if (megs < 16) @@ -1060,4 +1112,6 @@ void __init swap_setup(void) * Right now other parts of the system means that we * _really_ don't want to cluster much more */ + + register_sysctl_init("vm", swap_sysctl_table); } |
