diff options
Diffstat (limited to 'mm/huge_memory.c')
| -rw-r--r-- | mm/huge_memory.c | 165 |
1 files changed, 90 insertions, 75 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f7c565f11a98..40cf59301c21 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3464,23 +3464,6 @@ static void lru_add_split_folio(struct folio *folio, struct folio *new_folio, } } -/* Racy check whether the huge page can be split */ -bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins) -{ - int extra_pins; - - /* Additional pins from page cache */ - if (folio_test_anon(folio)) - extra_pins = folio_test_swapcache(folio) ? - folio_nr_pages(folio) : 0; - else - extra_pins = folio_nr_pages(folio); - if (pextra_pins) - *pextra_pins = extra_pins; - return folio_mapcount(folio) == folio_ref_count(folio) - extra_pins - - caller_pins; -} - static bool page_range_has_hwpoisoned(struct page *page, long nr_pages) { for (; nr_pages; page++, nr_pages--) @@ -3697,15 +3680,40 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, return 0; } -bool folio_split_supported(struct folio *folio, unsigned int new_order, - enum split_type split_type, bool warns) +/** + * folio_check_splittable() - check if a folio can be split to a given order + * @folio: folio to be split + * @new_order: the smallest order of the after split folios (since buddy + * allocator like split generates folios with orders from @folio's + * order - 1 to new_order). + * @split_type: uniform or non-uniform split + * + * folio_check_splittable() checks if @folio can be split to @new_order using + * @split_type method. The truncated folio check must come first. + * + * Context: folio must be locked. + * + * Return: 0 - @folio can be split to @new_order, otherwise an error number is + * returned. + */ +int folio_check_splittable(struct folio *folio, unsigned int new_order, + enum split_type split_type) { + VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio); + /* + * Folios that just got truncated cannot get split. Signal to the + * caller that there was a race. + * + * TODO: this will also currently refuse folios without a mapping in the + * swapcache (shmem or to-be-anon folios). + */ + if (!folio->mapping && !folio_test_anon(folio)) + return -EBUSY; + if (folio_test_anon(folio)) { /* order-1 is not supported for anonymous THP. */ - VM_WARN_ONCE(warns && new_order == 1, - "Cannot split to order-1 folio"); if (new_order == 1) - return false; + return -EINVAL; } else if (split_type == SPLIT_TYPE_NON_UNIFORM || new_order) { if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !mapping_large_folio_support(folio->mapping)) { @@ -3726,9 +3734,7 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order, * case, the mapping does not actually support large * folios properly. */ - VM_WARN_ONCE(warns, - "Cannot split file folio to non-0 order"); - return false; + return -EINVAL; } } @@ -3741,19 +3747,31 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order, * here. */ if ((split_type == SPLIT_TYPE_NON_UNIFORM || new_order) && folio_test_swapcache(folio)) { - VM_WARN_ONCE(warns, - "Cannot split swapcache folio to non-0 order"); - return false; + return -EINVAL; } - return true; + if (is_huge_zero_folio(folio)) + return -EINVAL; + + if (folio_test_writeback(folio)) + return -EBUSY; + + return 0; +} + +/* Number of folio references from the pagecache or the swapcache. */ +static unsigned int folio_cache_ref_count(const struct folio *folio) +{ + if (folio_test_anon(folio) && !folio_test_swapcache(folio)) + return 0; + return folio_nr_pages(folio); } static int __folio_freeze_and_split_unmapped(struct folio *folio, unsigned int new_order, struct page *split_at, struct xa_state *xas, struct address_space *mapping, bool do_lru, struct list_head *list, enum split_type split_type, - pgoff_t end, int *nr_shmem_dropped, int extra_pins) + pgoff_t end, int *nr_shmem_dropped) { struct folio *end_folio = folio_next(folio); struct folio *new_folio, *next; @@ -3764,10 +3782,9 @@ static int __folio_freeze_and_split_unmapped(struct folio *folio, unsigned int n VM_WARN_ON_ONCE(!mapping && end); /* Prevent deferred_split_scan() touching ->_refcount */ ds_queue = folio_split_queue_lock(folio); - if (folio_ref_freeze(folio, 1 + extra_pins)) { + if (folio_ref_freeze(folio, folio_cache_ref_count(folio) + 1)) { struct swap_cluster_info *ci = NULL; struct lruvec *lruvec; - int expected_refs; if (old_order > 1) { if (!list_empty(&folio->_deferred_list)) { @@ -3835,8 +3852,8 @@ static int __folio_freeze_and_split_unmapped(struct folio *folio, unsigned int n zone_device_private_split_cb(folio, new_folio); - expected_refs = folio_expected_ref_count(new_folio) + 1; - folio_ref_unfreeze(new_folio, expected_refs); + folio_ref_unfreeze(new_folio, + folio_cache_ref_count(new_folio) + 1); if (do_lru) lru_add_split_folio(folio, new_folio, lruvec, list); @@ -3879,8 +3896,7 @@ static int __folio_freeze_and_split_unmapped(struct folio *folio, unsigned int n * Otherwise, a parallel folio_try_get() can grab @folio * and its caller can see stale page cache entries. */ - expected_refs = folio_expected_ref_count(folio) + 1; - folio_ref_unfreeze(folio, expected_refs); + folio_ref_unfreeze(folio, folio_cache_ref_count(folio) + 1); if (do_lru) unlock_page_lruvec(lruvec); @@ -3929,40 +3945,27 @@ static int __folio_split(struct folio *folio, unsigned int new_order, struct folio *new_folio, *next; int nr_shmem_dropped = 0; int remap_flags = 0; - int extra_pins, ret; + int ret; pgoff_t end = 0; - bool is_hzp; VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); VM_WARN_ON_ONCE_FOLIO(!folio_test_large(folio), folio); - if (folio != page_folio(split_at) || folio != page_folio(lock_at)) - return -EINVAL; - - /* - * Folios that just got truncated cannot get split. Signal to the - * caller that there was a race. - * - * TODO: this will also currently refuse shmem folios that are in the - * swapcache. - */ - if (!is_anon && !folio->mapping) - return -EBUSY; - - if (new_order >= old_order) - return -EINVAL; - - if (!folio_split_supported(folio, new_order, split_type, /* warn = */ true)) - return -EINVAL; + if (folio != page_folio(split_at) || folio != page_folio(lock_at)) { + ret = -EINVAL; + goto out; + } - is_hzp = is_huge_zero_folio(folio); - if (is_hzp) { - pr_warn_ratelimited("Called split_huge_page for huge zero page\n"); - return -EBUSY; + if (new_order >= old_order) { + ret = -EINVAL; + goto out; } - if (folio_test_writeback(folio)) - return -EBUSY; + ret = folio_check_splittable(folio, new_order, split_type); + if (ret) { + VM_WARN_ONCE(ret == -EINVAL, "Tried to split an unsplittable folio"); + goto out; + } if (is_anon) { /* @@ -4027,7 +4030,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order, * Racy check if we can split the page, before unmap_folio() will * split PMDs */ - if (!can_split_folio(folio, 1, &extra_pins)) { + if (folio_expected_ref_count(folio) != folio_ref_count(folio) - 1) { ret = -EAGAIN; goto out_unlock; } @@ -4050,8 +4053,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order, } ret = __folio_freeze_and_split_unmapped(folio, new_order, split_at, &xas, mapping, - true, list, split_type, end, &nr_shmem_dropped, - extra_pins); + true, list, split_type, end, &nr_shmem_dropped); fail: if (mapping) xas_unlock(&xas); @@ -4125,20 +4127,20 @@ out: */ int folio_split_unmapped(struct folio *folio, unsigned int new_order) { - int extra_pins, ret = 0; + int ret = 0; VM_WARN_ON_ONCE_FOLIO(folio_mapped(folio), folio); VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); VM_WARN_ON_ONCE_FOLIO(!folio_test_large(folio), folio); VM_WARN_ON_ONCE_FOLIO(!folio_test_anon(folio), folio); - if (!can_split_folio(folio, 1, &extra_pins)) + if (folio_expected_ref_count(folio) != folio_ref_count(folio) - 1) return -EAGAIN; local_irq_disable(); ret = __folio_freeze_and_split_unmapped(folio, new_order, &folio->page, NULL, NULL, false, NULL, SPLIT_TYPE_UNIFORM, - 0, NULL, extra_pins); + 0, NULL); local_irq_enable(); return ret; } @@ -4230,16 +4232,29 @@ int folio_split(struct folio *folio, unsigned int new_order, SPLIT_TYPE_NON_UNIFORM); } -int min_order_for_split(struct folio *folio) +/** + * min_order_for_split() - get the minimum order @folio can be split to + * @folio: folio to split + * + * min_order_for_split() tells the minimum order @folio can be split to. + * If a file-backed folio is truncated, 0 will be returned. Any subsequent + * split attempt should get -EBUSY from split checking code. + * + * Return: @folio's minimum order for split + */ +unsigned int min_order_for_split(struct folio *folio) { if (folio_test_anon(folio)) return 0; - if (!folio->mapping) { - if (folio_test_pmd_mappable(folio)) - count_vm_event(THP_SPLIT_PAGE_FAILED); - return -EBUSY; - } + /* + * If the folio got truncated, we don't know the previous mapping and + * consequently the old min order. But it doesn't matter, as any split + * attempt will immediately fail with -EBUSY as the folio cannot get + * split until freed. + */ + if (!folio->mapping) + return 0; return mapping_min_folio_order(folio->mapping); } @@ -4631,7 +4646,7 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start, * can be split or not. So skip the check here. */ if (!folio_test_private(folio) && - !can_split_folio(folio, 0, NULL)) + folio_expected_ref_count(folio) != folio_ref_count(folio)) goto next; if (!folio_trylock(folio)) |
