diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/damon/tests/core-kunit.h | 9 | ||||
| -rw-r--r-- | mm/huge_memory.c | 165 | ||||
| -rw-r--r-- | mm/hugetlb.c | 25 | ||||
| -rw-r--r-- | mm/shmem.c | 11 | ||||
| -rw-r--r-- | mm/slab.h | 1 | ||||
| -rw-r--r-- | mm/slab_common.c | 52 | ||||
| -rw-r--r-- | mm/slub.c | 55 | ||||
| -rw-r--r-- | mm/vmscan.c | 8 |
8 files changed, 197 insertions, 129 deletions
diff --git a/mm/damon/tests/core-kunit.h b/mm/damon/tests/core-kunit.h index a1eff023e928..8cb369b63e08 100644 --- a/mm/damon/tests/core-kunit.h +++ b/mm/damon/tests/core-kunit.h @@ -924,7 +924,7 @@ static void damos_test_commit_for(struct kunit *test, struct damos *dst, } } -static void damos_test_commit(struct kunit *test) +static void damos_test_commit_pageout(struct kunit *test) { damos_test_commit_for(test, &(struct damos){ @@ -945,6 +945,10 @@ static void damos_test_commit(struct kunit *test) DAMOS_WMARK_FREE_MEM_RATE, 800, 50, 30}, }); +} + +static void damos_test_commit_migrate_hot(struct kunit *test) +{ damos_test_commit_for(test, &(struct damos){ .pattern = (struct damos_access_pattern){ @@ -1230,7 +1234,8 @@ static struct kunit_case damon_test_cases[] = { KUNIT_CASE(damos_test_commit_quota), KUNIT_CASE(damos_test_commit_dests), KUNIT_CASE(damos_test_commit_filter), - KUNIT_CASE(damos_test_commit), + KUNIT_CASE(damos_test_commit_pageout), + KUNIT_CASE(damos_test_commit_migrate_hot), KUNIT_CASE(damon_test_commit_target_regions), KUNIT_CASE(damos_test_filter_out), KUNIT_CASE(damon_test_feed_loop_next_input), diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f7c565f11a98..40cf59301c21 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3464,23 +3464,6 @@ static void lru_add_split_folio(struct folio *folio, struct folio *new_folio, } } -/* Racy check whether the huge page can be split */ -bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins) -{ - int extra_pins; - - /* Additional pins from page cache */ - if (folio_test_anon(folio)) - extra_pins = folio_test_swapcache(folio) ? - folio_nr_pages(folio) : 0; - else - extra_pins = folio_nr_pages(folio); - if (pextra_pins) - *pextra_pins = extra_pins; - return folio_mapcount(folio) == folio_ref_count(folio) - extra_pins - - caller_pins; -} - static bool page_range_has_hwpoisoned(struct page *page, long nr_pages) { for (; nr_pages; page++, nr_pages--) @@ -3697,15 +3680,40 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, return 0; } -bool folio_split_supported(struct folio *folio, unsigned int new_order, - enum split_type split_type, bool warns) +/** + * folio_check_splittable() - check if a folio can be split to a given order + * @folio: folio to be split + * @new_order: the smallest order of the after split folios (since buddy + * allocator like split generates folios with orders from @folio's + * order - 1 to new_order). + * @split_type: uniform or non-uniform split + * + * folio_check_splittable() checks if @folio can be split to @new_order using + * @split_type method. The truncated folio check must come first. + * + * Context: folio must be locked. + * + * Return: 0 - @folio can be split to @new_order, otherwise an error number is + * returned. + */ +int folio_check_splittable(struct folio *folio, unsigned int new_order, + enum split_type split_type) { + VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio); + /* + * Folios that just got truncated cannot get split. Signal to the + * caller that there was a race. + * + * TODO: this will also currently refuse folios without a mapping in the + * swapcache (shmem or to-be-anon folios). + */ + if (!folio->mapping && !folio_test_anon(folio)) + return -EBUSY; + if (folio_test_anon(folio)) { /* order-1 is not supported for anonymous THP. */ - VM_WARN_ONCE(warns && new_order == 1, - "Cannot split to order-1 folio"); if (new_order == 1) - return false; + return -EINVAL; } else if (split_type == SPLIT_TYPE_NON_UNIFORM || new_order) { if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !mapping_large_folio_support(folio->mapping)) { @@ -3726,9 +3734,7 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order, * case, the mapping does not actually support large * folios properly. */ - VM_WARN_ONCE(warns, - "Cannot split file folio to non-0 order"); - return false; + return -EINVAL; } } @@ -3741,19 +3747,31 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order, * here. */ if ((split_type == SPLIT_TYPE_NON_UNIFORM || new_order) && folio_test_swapcache(folio)) { - VM_WARN_ONCE(warns, - "Cannot split swapcache folio to non-0 order"); - return false; + return -EINVAL; } - return true; + if (is_huge_zero_folio(folio)) + return -EINVAL; + + if (folio_test_writeback(folio)) + return -EBUSY; + + return 0; +} + +/* Number of folio references from the pagecache or the swapcache. */ +static unsigned int folio_cache_ref_count(const struct folio *folio) +{ + if (folio_test_anon(folio) && !folio_test_swapcache(folio)) + return 0; + return folio_nr_pages(folio); } static int __folio_freeze_and_split_unmapped(struct folio *folio, unsigned int new_order, struct page *split_at, struct xa_state *xas, struct address_space *mapping, bool do_lru, struct list_head *list, enum split_type split_type, - pgoff_t end, int *nr_shmem_dropped, int extra_pins) + pgoff_t end, int *nr_shmem_dropped) { struct folio *end_folio = folio_next(folio); struct folio *new_folio, *next; @@ -3764,10 +3782,9 @@ static int __folio_freeze_and_split_unmapped(struct folio *folio, unsigned int n VM_WARN_ON_ONCE(!mapping && end); /* Prevent deferred_split_scan() touching ->_refcount */ ds_queue = folio_split_queue_lock(folio); - if (folio_ref_freeze(folio, 1 + extra_pins)) { + if (folio_ref_freeze(folio, folio_cache_ref_count(folio) + 1)) { struct swap_cluster_info *ci = NULL; struct lruvec *lruvec; - int expected_refs; if (old_order > 1) { if (!list_empty(&folio->_deferred_list)) { @@ -3835,8 +3852,8 @@ static int __folio_freeze_and_split_unmapped(struct folio *folio, unsigned int n zone_device_private_split_cb(folio, new_folio); - expected_refs = folio_expected_ref_count(new_folio) + 1; - folio_ref_unfreeze(new_folio, expected_refs); + folio_ref_unfreeze(new_folio, + folio_cache_ref_count(new_folio) + 1); if (do_lru) lru_add_split_folio(folio, new_folio, lruvec, list); @@ -3879,8 +3896,7 @@ static int __folio_freeze_and_split_unmapped(struct folio *folio, unsigned int n * Otherwise, a parallel folio_try_get() can grab @folio * and its caller can see stale page cache entries. */ - expected_refs = folio_expected_ref_count(folio) + 1; - folio_ref_unfreeze(folio, expected_refs); + folio_ref_unfreeze(folio, folio_cache_ref_count(folio) + 1); if (do_lru) unlock_page_lruvec(lruvec); @@ -3929,40 +3945,27 @@ static int __folio_split(struct folio *folio, unsigned int new_order, struct folio *new_folio, *next; int nr_shmem_dropped = 0; int remap_flags = 0; - int extra_pins, ret; + int ret; pgoff_t end = 0; - bool is_hzp; VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); VM_WARN_ON_ONCE_FOLIO(!folio_test_large(folio), folio); - if (folio != page_folio(split_at) || folio != page_folio(lock_at)) - return -EINVAL; - - /* - * Folios that just got truncated cannot get split. Signal to the - * caller that there was a race. - * - * TODO: this will also currently refuse shmem folios that are in the - * swapcache. - */ - if (!is_anon && !folio->mapping) - return -EBUSY; - - if (new_order >= old_order) - return -EINVAL; - - if (!folio_split_supported(folio, new_order, split_type, /* warn = */ true)) - return -EINVAL; + if (folio != page_folio(split_at) || folio != page_folio(lock_at)) { + ret = -EINVAL; + goto out; + } - is_hzp = is_huge_zero_folio(folio); - if (is_hzp) { - pr_warn_ratelimited("Called split_huge_page for huge zero page\n"); - return -EBUSY; + if (new_order >= old_order) { + ret = -EINVAL; + goto out; } - if (folio_test_writeback(folio)) - return -EBUSY; + ret = folio_check_splittable(folio, new_order, split_type); + if (ret) { + VM_WARN_ONCE(ret == -EINVAL, "Tried to split an unsplittable folio"); + goto out; + } if (is_anon) { /* @@ -4027,7 +4030,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order, * Racy check if we can split the page, before unmap_folio() will * split PMDs */ - if (!can_split_folio(folio, 1, &extra_pins)) { + if (folio_expected_ref_count(folio) != folio_ref_count(folio) - 1) { ret = -EAGAIN; goto out_unlock; } @@ -4050,8 +4053,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order, } ret = __folio_freeze_and_split_unmapped(folio, new_order, split_at, &xas, mapping, - true, list, split_type, end, &nr_shmem_dropped, - extra_pins); + true, list, split_type, end, &nr_shmem_dropped); fail: if (mapping) xas_unlock(&xas); @@ -4125,20 +4127,20 @@ out: */ int folio_split_unmapped(struct folio *folio, unsigned int new_order) { - int extra_pins, ret = 0; + int ret = 0; VM_WARN_ON_ONCE_FOLIO(folio_mapped(folio), folio); VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); VM_WARN_ON_ONCE_FOLIO(!folio_test_large(folio), folio); VM_WARN_ON_ONCE_FOLIO(!folio_test_anon(folio), folio); - if (!can_split_folio(folio, 1, &extra_pins)) + if (folio_expected_ref_count(folio) != folio_ref_count(folio) - 1) return -EAGAIN; local_irq_disable(); ret = __folio_freeze_and_split_unmapped(folio, new_order, &folio->page, NULL, NULL, false, NULL, SPLIT_TYPE_UNIFORM, - 0, NULL, extra_pins); + 0, NULL); local_irq_enable(); return ret; } @@ -4230,16 +4232,29 @@ int folio_split(struct folio *folio, unsigned int new_order, SPLIT_TYPE_NON_UNIFORM); } -int min_order_for_split(struct folio *folio) +/** + * min_order_for_split() - get the minimum order @folio can be split to + * @folio: folio to split + * + * min_order_for_split() tells the minimum order @folio can be split to. + * If a file-backed folio is truncated, 0 will be returned. Any subsequent + * split attempt should get -EBUSY from split checking code. + * + * Return: @folio's minimum order for split + */ +unsigned int min_order_for_split(struct folio *folio) { if (folio_test_anon(folio)) return 0; - if (!folio->mapping) { - if (folio_test_pmd_mappable(folio)) - count_vm_event(THP_SPLIT_PAGE_FAILED); - return -EBUSY; - } + /* + * If the folio got truncated, we don't know the previous mapping and + * consequently the old min order. But it doesn't matter, as any split + * attempt will immediately fail with -EBUSY as the folio cannot get + * split until freed. + */ + if (!folio->mapping) + return 0; return mapping_min_folio_order(folio->mapping); } @@ -4631,7 +4646,7 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start, * can be split or not. So skip the check here. */ if (!folio_test_private(folio) && - !can_split_folio(folio, 0, NULL)) + folio_expected_ref_count(folio) != folio_ref_count(folio)) goto next; if (!folio_trylock(folio)) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 9e7815b4f058..51273baec9e5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6579,6 +6579,7 @@ long hugetlb_reserve_pages(struct inode *inode, struct resv_map *resv_map; struct hugetlb_cgroup *h_cg = NULL; long gbl_reserve, regions_needed = 0; + int err; /* This should never happen */ if (from > to) { @@ -6612,8 +6613,10 @@ long hugetlb_reserve_pages(struct inode *inode, } else { /* Private mapping. */ resv_map = resv_map_alloc(); - if (!resv_map) + if (!resv_map) { + err = -ENOMEM; goto out_err; + } chg = to - from; @@ -6621,11 +6624,15 @@ long hugetlb_reserve_pages(struct inode *inode, set_vma_desc_resv_flags(desc, HPAGE_RESV_OWNER); } - if (chg < 0) + if (chg < 0) { + /* region_chg() above can return -ENOMEM */ + err = (chg == -ENOMEM) ? -ENOMEM : -EINVAL; goto out_err; + } - if (hugetlb_cgroup_charge_cgroup_rsvd(hstate_index(h), - chg * pages_per_huge_page(h), &h_cg) < 0) + err = hugetlb_cgroup_charge_cgroup_rsvd(hstate_index(h), + chg * pages_per_huge_page(h), &h_cg); + if (err < 0) goto out_err; if (desc && !(desc->vm_flags & VM_MAYSHARE) && h_cg) { @@ -6641,14 +6648,17 @@ long hugetlb_reserve_pages(struct inode *inode, * reservations already in place (gbl_reserve). */ gbl_reserve = hugepage_subpool_get_pages(spool, chg); - if (gbl_reserve < 0) + if (gbl_reserve < 0) { + err = gbl_reserve; goto out_uncharge_cgroup; + } /* * Check enough hugepages are available for the reservation. * Hand the pages back to the subpool if there are not */ - if (hugetlb_acct_memory(h, gbl_reserve) < 0) + err = hugetlb_acct_memory(h, gbl_reserve); + if (err < 0) goto out_put_pages; /* @@ -6667,6 +6677,7 @@ long hugetlb_reserve_pages(struct inode *inode, if (unlikely(add < 0)) { hugetlb_acct_memory(h, -gbl_reserve); + err = add; goto out_put_pages; } else if (unlikely(chg > add)) { /* @@ -6726,7 +6737,7 @@ out_err: kref_put(&resv_map->refs, resv_map_release); set_vma_desc_resv_map(desc, NULL); } - return chg < 0 ? chg : add < 0 ? add : -EINVAL; + return err; } long hugetlb_unreserve_pages(struct inode *inode, long start, long end, diff --git a/mm/shmem.c b/mm/shmem.c index 3f194c9842a8..b329b5302c48 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -5794,8 +5794,15 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range); #define shmem_vm_ops generic_file_vm_ops #define shmem_anon_vm_ops generic_file_vm_ops #define shmem_file_operations ramfs_file_operations -#define shmem_acct_size(flags, size) 0 -#define shmem_unacct_size(flags, size) do {} while (0) + +static inline int shmem_acct_size(unsigned long flags, loff_t size) +{ + return 0; +} + +static inline void shmem_unacct_size(unsigned long flags, loff_t size) +{ +} static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block *sb, struct inode *dir, diff --git a/mm/slab.h b/mm/slab.h index f730e012553c..e767aa7e91b0 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -422,6 +422,7 @@ static inline bool is_kmalloc_normal(struct kmem_cache *s) bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj); void flush_all_rcu_sheaves(void); +void flush_rcu_sheaves_on_cache(struct kmem_cache *s); #define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \ SLAB_CACHE_DMA32 | SLAB_PANIC | \ diff --git a/mm/slab_common.c b/mm/slab_common.c index b613533b29e7..eed7ea556cb1 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -492,7 +492,7 @@ void kmem_cache_destroy(struct kmem_cache *s) return; /* in-flight kfree_rcu()'s may include objects from our cache */ - kvfree_rcu_barrier(); + kvfree_rcu_barrier_on_cache(s); if (IS_ENABLED(CONFIG_SLUB_RCU_DEBUG) && (s->flags & SLAB_TYPESAFE_BY_RCU)) { @@ -2038,25 +2038,13 @@ unlock_return: } EXPORT_SYMBOL_GPL(kvfree_call_rcu); -/** - * kvfree_rcu_barrier - Wait until all in-flight kvfree_rcu() complete. - * - * Note that a single argument of kvfree_rcu() call has a slow path that - * triggers synchronize_rcu() following by freeing a pointer. It is done - * before the return from the function. Therefore for any single-argument - * call that will result in a kfree() to a cache that is to be destroyed - * during module exit, it is developer's responsibility to ensure that all - * such calls have returned before the call to kmem_cache_destroy(). - */ -void kvfree_rcu_barrier(void) +static inline void __kvfree_rcu_barrier(void) { struct kfree_rcu_cpu_work *krwp; struct kfree_rcu_cpu *krcp; bool queued; int i, cpu; - flush_all_rcu_sheaves(); - /* * Firstly we detach objects and queue them over an RCU-batch * for all CPUs. Finally queued works are flushed for each CPU. @@ -2118,8 +2106,43 @@ void kvfree_rcu_barrier(void) } } } + +/** + * kvfree_rcu_barrier - Wait until all in-flight kvfree_rcu() complete. + * + * Note that a single argument of kvfree_rcu() call has a slow path that + * triggers synchronize_rcu() following by freeing a pointer. It is done + * before the return from the function. Therefore for any single-argument + * call that will result in a kfree() to a cache that is to be destroyed + * during module exit, it is developer's responsibility to ensure that all + * such calls have returned before the call to kmem_cache_destroy(). + */ +void kvfree_rcu_barrier(void) +{ + flush_all_rcu_sheaves(); + __kvfree_rcu_barrier(); +} EXPORT_SYMBOL_GPL(kvfree_rcu_barrier); +/** + * kvfree_rcu_barrier_on_cache - Wait for in-flight kvfree_rcu() calls on a + * specific slab cache. + * @s: slab cache to wait for + * + * See the description of kvfree_rcu_barrier() for details. + */ +void kvfree_rcu_barrier_on_cache(struct kmem_cache *s) +{ + if (s->cpu_sheaves) + flush_rcu_sheaves_on_cache(s); + /* + * TODO: Introduce a version of __kvfree_rcu_barrier() that works + * on a specific slab cache. + */ + __kvfree_rcu_barrier(); +} +EXPORT_SYMBOL_GPL(kvfree_rcu_barrier_on_cache); + static unsigned long kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { @@ -2215,4 +2238,3 @@ void __init kvfree_rcu_init(void) } #endif /* CONFIG_KVFREE_RCU_BATCHED */ - diff --git a/mm/slub.c b/mm/slub.c index e6a330e24145..f21b2f0c6f5a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4122,42 +4122,47 @@ static void flush_rcu_sheaf(struct work_struct *w) /* needed for kvfree_rcu_barrier() */ -void flush_all_rcu_sheaves(void) +void flush_rcu_sheaves_on_cache(struct kmem_cache *s) { struct slub_flush_work *sfw; - struct kmem_cache *s; unsigned int cpu; - cpus_read_lock(); - mutex_lock(&slab_mutex); + mutex_lock(&flush_lock); - list_for_each_entry(s, &slab_caches, list) { - if (!s->cpu_sheaves) - continue; + for_each_online_cpu(cpu) { + sfw = &per_cpu(slub_flush, cpu); - mutex_lock(&flush_lock); + /* + * we don't check if rcu_free sheaf exists - racing + * __kfree_rcu_sheaf() might have just removed it. + * by executing flush_rcu_sheaf() on the cpu we make + * sure the __kfree_rcu_sheaf() finished its call_rcu() + */ - for_each_online_cpu(cpu) { - sfw = &per_cpu(slub_flush, cpu); + INIT_WORK(&sfw->work, flush_rcu_sheaf); + sfw->s = s; + queue_work_on(cpu, flushwq, &sfw->work); + } - /* - * we don't check if rcu_free sheaf exists - racing - * __kfree_rcu_sheaf() might have just removed it. - * by executing flush_rcu_sheaf() on the cpu we make - * sure the __kfree_rcu_sheaf() finished its call_rcu() - */ + for_each_online_cpu(cpu) { + sfw = &per_cpu(slub_flush, cpu); + flush_work(&sfw->work); + } - INIT_WORK(&sfw->work, flush_rcu_sheaf); - sfw->s = s; - queue_work_on(cpu, flushwq, &sfw->work); - } + mutex_unlock(&flush_lock); +} - for_each_online_cpu(cpu) { - sfw = &per_cpu(slub_flush, cpu); - flush_work(&sfw->work); - } +void flush_all_rcu_sheaves(void) +{ + struct kmem_cache *s; + + cpus_read_lock(); + mutex_lock(&slab_mutex); - mutex_unlock(&flush_lock); + list_for_each_entry(s, &slab_caches, list) { + if (!s->cpu_sheaves) + continue; + flush_rcu_sheaves_on_cache(s); } mutex_unlock(&slab_mutex); diff --git a/mm/vmscan.c b/mm/vmscan.c index 900c74b6aa62..670fe9fae5ba 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1284,7 +1284,8 @@ retry: goto keep_locked; if (folio_test_large(folio)) { /* cannot split folio, skip it */ - if (!can_split_folio(folio, 1, NULL)) + if (folio_expected_ref_count(folio) != + folio_ref_count(folio) - 1) goto activate_locked; /* * Split partially mapped folios right away. @@ -4540,7 +4541,8 @@ static int scan_folios(unsigned long nr_to_scan, struct lruvec *lruvec, int scanned = 0; int isolated = 0; int skipped = 0; - int remaining = min(nr_to_scan, MAX_LRU_BATCH); + int scan_batch = min(nr_to_scan, MAX_LRU_BATCH); + int remaining = scan_batch; struct lru_gen_folio *lrugen = &lruvec->lrugen; struct mem_cgroup *memcg = lruvec_memcg(lruvec); @@ -4600,7 +4602,7 @@ static int scan_folios(unsigned long nr_to_scan, struct lruvec *lruvec, count_memcg_events(memcg, item, isolated); count_memcg_events(memcg, PGREFILL, sorted); __count_vm_events(PGSCAN_ANON + type, isolated); - trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, MAX_LRU_BATCH, + trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, scan_batch, scanned, skipped, isolated, type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON); if (type == LRU_GEN_FILE) |
