diff options
Diffstat (limited to 'mm/truncate.c')
| -rw-r--r-- | mm/truncate.c | 155 |
1 files changed, 117 insertions, 38 deletions
diff --git a/mm/truncate.c b/mm/truncate.c index 7c304d2f0052..12467c1bd711 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -46,7 +46,7 @@ static void clear_shadow_entries(struct address_space *mapping, xas_unlock_irq(&xas); if (mapping_shrinkable(mapping)) - inode_add_lru(mapping->host); + inode_lru_list_add(mapping->host); spin_unlock(&mapping->host->i_lock); } @@ -78,8 +78,22 @@ static void truncate_folio_batch_exceptionals(struct address_space *mapping, if (dax_mapping(mapping)) { for (i = j; i < nr; i++) { - if (xa_is_value(fbatch->folios[i])) + if (xa_is_value(fbatch->folios[i])) { + /* + * File systems should already have called + * dax_break_layout_entry() to remove all DAX + * entries while holding a lock to prevent + * establishing new entries. Therefore we + * shouldn't find any here. + */ + WARN_ON_ONCE(1); + + /* + * Delete the mapping so truncate_pagecache() + * doesn't loop forever. + */ dax_delete_mapping_entry(mapping, indices[i]); + } } goto out; } @@ -97,7 +111,7 @@ static void truncate_folio_batch_exceptionals(struct address_space *mapping, xas_unlock_irq(&xas); if (mapping_shrinkable(mapping)) - inode_add_lru(mapping->host); + inode_lru_list_add(mapping->host); spin_unlock(&mapping->host->i_lock); out: folio_batch_remove_exceptionals(fbatch); @@ -163,6 +177,32 @@ int truncate_inode_folio(struct address_space *mapping, struct folio *folio) return 0; } +static int try_folio_split_or_unmap(struct folio *folio, struct page *split_at, + unsigned long min_order) +{ + enum ttu_flags ttu_flags = + TTU_SYNC | + TTU_SPLIT_HUGE_PMD | + TTU_IGNORE_MLOCK; + int ret; + + ret = try_folio_split_to_order(folio, split_at, min_order); + + /* + * If the split fails, unmap the folio, so it will be refaulted + * with PTEs to respect SIGBUS semantics. + * + * Make an exception for shmem/tmpfs that for long time + * intentionally mapped with PMDs across i_size. + */ + if (ret && !shmem_mapping(folio->mapping)) { + try_to_unmap(folio, ttu_flags); + WARN_ON(folio_mapped(folio)); + } + + return ret; +} + /* * Handle partial folios. The folio may be entirely within the * range if a split has raced with us. If not, we zero the part of the @@ -177,20 +217,22 @@ int truncate_inode_folio(struct address_space *mapping, struct folio *folio) bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) { loff_t pos = folio_pos(folio); + size_t size = folio_size(folio); unsigned int offset, length; + struct page *split_at, *split_at2; + unsigned int min_order; if (pos < start) offset = start - pos; else offset = 0; - length = folio_size(folio); - if (pos + length <= (u64)end) - length = length - offset; + if (pos + size <= (u64)end) + length = size - offset; else length = end + 1 - pos - offset; folio_wait_writeback(folio); - if (length == folio_size(folio)) { + if (length == size) { truncate_inode_folio(folio->mapping, folio); return true; } @@ -207,8 +249,44 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) folio_invalidate(folio, offset, length); if (!folio_test_large(folio)) return true; - if (split_folio(folio) == 0) + + min_order = mapping_min_folio_order(folio->mapping); + split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE); + if (!try_folio_split_or_unmap(folio, split_at, min_order)) { + /* + * try to split at offset + length to make sure folios within + * the range can be dropped, especially to avoid memory waste + * for shmem truncate + */ + struct folio *folio2; + + if (offset + length == size) + goto no_split; + + split_at2 = folio_page(folio, + PAGE_ALIGN_DOWN(offset + length) / PAGE_SIZE); + folio2 = page_folio(split_at2); + + if (!folio_try_get(folio2)) + goto no_split; + + if (!folio_test_large(folio2)) + goto out; + + if (!folio_trylock(folio2)) + goto out; + + /* make sure folio2 is large and does not change its mapping */ + if (folio_test_large(folio2) && + folio2->mapping == folio->mapping) + try_folio_split_or_unmap(folio2, split_at2, min_order); + + folio_unlock(folio2); +out: + folio_put(folio2); +no_split: return true; + } if (folio_test_dirty(folio)) return false; truncate_inode_folio(folio->mapping, folio); @@ -286,7 +364,7 @@ long mapping_evict_folio(struct address_space *mapping, struct folio *folio) * page aligned properly. */ void truncate_inode_pages_range(struct address_space *mapping, - loff_t lstart, loff_t lend) + loff_t lstart, uoff_t lend) { pgoff_t start; /* inclusive */ pgoff_t end; /* exclusive */ @@ -334,7 +412,7 @@ void truncate_inode_pages_range(struct address_space *mapping, same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT); folio = __filemap_get_folio(mapping, lstart >> PAGE_SHIFT, FGP_LOCK, 0); if (!IS_ERR(folio)) { - same_folio = lend < folio_pos(folio) + folio_size(folio); + same_folio = lend < folio_next_pos(folio); if (!truncate_inode_partial_folio(folio, lstart, lend)) { start = folio_next_index(folio); if (same_folio) @@ -372,7 +450,7 @@ void truncate_inode_pages_range(struct address_space *mapping, for (i = 0; i < folio_batch_count(&fbatch); i++) { struct folio *folio = fbatch.folios[i]; - /* We rely upon deletion not changing page->index */ + /* We rely upon deletion not changing folio->index */ if (xa_is_value(folio)) continue; @@ -525,6 +603,15 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, } EXPORT_SYMBOL(invalidate_mapping_pages); +static int folio_launder(struct address_space *mapping, struct folio *folio) +{ + if (!folio_test_dirty(folio)) + return 0; + if (folio->mapping != mapping || mapping->a_ops->launder_folio == NULL) + return 0; + return mapping->a_ops->launder_folio(folio); +} + /* * This is like mapping_evict_folio(), except it ignores the folio's * refcount. We do this because invalidate_inode_pages2() needs stronger @@ -532,14 +619,24 @@ EXPORT_SYMBOL(invalidate_mapping_pages); * shrink_folio_list() has a temp ref on them, or because they're transiently * sitting in the folio_add_lru() caches. */ -static int invalidate_complete_folio2(struct address_space *mapping, - struct folio *folio) +int folio_unmap_invalidate(struct address_space *mapping, struct folio *folio, + gfp_t gfp) { - if (folio->mapping != mapping) - return 0; + int ret; - if (!filemap_release_folio(folio, GFP_KERNEL)) - return 0; + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + + if (folio_mapped(folio)) + unmap_mapping_folio(folio); + BUG_ON(folio_mapped(folio)); + + ret = folio_launder(mapping, folio); + if (ret) + return ret; + if (folio->mapping != mapping) + return -EBUSY; + if (!filemap_release_folio(folio, gfp)) + return -EBUSY; spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); @@ -550,7 +647,7 @@ static int invalidate_complete_folio2(struct address_space *mapping, __filemap_remove_folio(folio, NULL); xa_unlock_irq(&mapping->i_pages); if (mapping_shrinkable(mapping)) - inode_add_lru(mapping->host); + inode_lru_list_add(mapping->host); spin_unlock(&mapping->host->i_lock); filemap_free_folio(mapping, folio); @@ -558,16 +655,7 @@ static int invalidate_complete_folio2(struct address_space *mapping, failed: xa_unlock_irq(&mapping->i_pages); spin_unlock(&mapping->host->i_lock); - return 0; -} - -static int folio_launder(struct address_space *mapping, struct folio *folio) -{ - if (!folio_test_dirty(folio)) - return 0; - if (folio->mapping != mapping || mapping->a_ops->launder_folio == NULL) - return 0; - return mapping->a_ops->launder_folio(folio); + return -EBUSY; } /** @@ -631,16 +719,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, } VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio); folio_wait_writeback(folio); - - if (folio_mapped(folio)) - unmap_mapping_folio(folio); - BUG_ON(folio_mapped(folio)); - - ret2 = folio_launder(mapping, folio); - if (ret2 == 0) { - if (!invalidate_complete_folio2(mapping, folio)) - ret2 = -EBUSY; - } + ret2 = folio_unmap_invalidate(mapping, folio, GFP_KERNEL); if (ret2 < 0) ret = ret2; folio_unlock(folio); |
