summaryrefslogtreecommitdiff
path: root/fs/iomap
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2023-07-24 16:12:29 -0700
committerDarrick J. Wong <djwong@kernel.org>2023-07-24 16:12:29 -0700
commitd42bd17c6a20638ddf96862bfc0c47e481c28392 (patch)
tree97191965a56afcb9f875fcc8dfd44a1bec80136f /fs/iomap
parent6eaae198076080886b9e7d57f4ae06fa782f90ef (diff)
parent5d8edfb900d55db7dbf2dbf325bfb9fdb863ec72 (diff)
Merge tag 'large-folio-writes' of git://git.infradead.org/users/willy/pagecache into iomap-6.6-merge
Create large folios in iomap buffered write path Commit ebb7fb1557b1 limited the length of ioend chains to 4096 entries to improve worst-case latency. Unfortunately, this had the effect of limiting the performance of: fio -name write-bandwidth -rw=write -bs=1024Ki -size=32Gi -runtime=30 \ -iodepth 1 -ioengine sync -zero_buffers=1 -direct=0 -end_fsync=1 \ -numjobs=4 -directory=/mnt/test https://lore.kernel.org/linux-xfs/20230508172406.1CF3.409509F4@e16-tech.com/ The problem ends up being lock contention on the i_pages spinlock as we clear the writeback bit on each folio (and propagate that up through the tree). By using larger folios, we decrease the number of folios to be processed by a factor of 256 for this benchmark, eliminating the lock contention. Creating large folios in the buffered write path is also the right thing to do. It's a project that has been on the back burner for years, it just hasn't been important enough to do before now. * tag 'large-folio-writes' of git://git.infradead.org/users/willy/pagecache: iomap: Copy larger chunks from userspace iomap: Create large folios in the buffered write path filemap: Allow __filemap_get_folio to allocate large folios filemap: Add fgf_t typedef iomap: Remove unnecessary test from iomap_release_folio() doc: Correct the description of ->release_folio iomap: Remove large folio handling in iomap_invalidate_folio() iov_iter: Add copy_folio_from_iter_atomic() iov_iter: Handle compound highmem pages in copy_page_from_iter_atomic() iov_iter: Map the page later in copy_page_from_iter_atomic() [djwong: yay amortizations!] Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Diffstat (limited to 'fs/iomap')
-rw-r--r--fs/iomap/buffered-io.c54
1 files changed, 26 insertions, 28 deletions
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index aa8967cca1a3..608cd2e5a7c9 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -461,16 +461,18 @@ EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);
* iomap_get_folio - get a folio reference for writing
* @iter: iteration structure
* @pos: start offset of write
+ * @len: Suggested size of folio to create.
*
* Returns a locked reference to the folio at @pos, or an error pointer if the
* folio could not be obtained.
*/
-struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos)
+struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len)
{
- unsigned fgp = FGP_WRITEBEGIN | FGP_NOFS;
+ fgf_t fgp = FGP_WRITEBEGIN | FGP_NOFS;
if (iter->flags & IOMAP_NOWAIT)
fgp |= FGP_NOWAIT;
+ fgp |= fgf_set_order(len);
return __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT,
fgp, mapping_gfp_mask(iter->inode->i_mapping));
@@ -483,12 +485,11 @@ bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags)
folio_size(folio));
/*
- * mm accommodates an old ext3 case where clean folios might
- * not have had the dirty bit cleared. Thus, it can send actual
- * dirty folios to ->release_folio() via shrink_active_list();
- * skip those here.
+ * If the folio is dirty, we refuse to release our metadata because
+ * it may be partially dirty. Once we track per-block dirty state,
+ * we can release the metadata if every block is dirty.
*/
- if (folio_test_dirty(folio) || folio_test_writeback(folio))
+ if (folio_test_dirty(folio))
return false;
iomap_page_release(folio);
return true;
@@ -508,11 +509,6 @@ void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len)
WARN_ON_ONCE(folio_test_writeback(folio));
folio_cancel_dirty(folio);
iomap_page_release(folio);
- } else if (folio_test_large(folio)) {
- /* Must release the iop so the page can be split */
- WARN_ON_ONCE(!folio_test_uptodate(folio) &&
- folio_test_dirty(folio));
- iomap_page_release(folio);
}
}
EXPORT_SYMBOL_GPL(iomap_invalidate_folio);
@@ -603,7 +599,7 @@ static struct folio *__iomap_get_folio(struct iomap_iter *iter, loff_t pos,
if (folio_ops && folio_ops->get_folio)
return folio_ops->get_folio(iter, pos, len);
else
- return iomap_get_folio(iter, pos);
+ return iomap_get_folio(iter, pos, len);
}
static void __iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
@@ -773,6 +769,7 @@ static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
{
loff_t length = iomap_length(iter);
+ size_t chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
loff_t pos = iter->pos;
ssize_t written = 0;
long status = 0;
@@ -781,15 +778,12 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
do {
struct folio *folio;
- struct page *page;
- unsigned long offset; /* Offset into pagecache page */
- unsigned long bytes; /* Bytes to write to page */
+ size_t offset; /* Offset into folio */
+ size_t bytes; /* Bytes to write to folio */
size_t copied; /* Bytes copied from user */
- offset = offset_in_page(pos);
- bytes = min_t(unsigned long, PAGE_SIZE - offset,
- iov_iter_count(i));
-again:
+ offset = pos & (chunk - 1);
+ bytes = min(chunk - offset, iov_iter_count(i));
status = balance_dirty_pages_ratelimited_flags(mapping,
bdp_flags);
if (unlikely(status))
@@ -819,12 +813,14 @@ again:
if (iter->iomap.flags & IOMAP_F_STALE)
break;
- page = folio_file_page(folio, pos >> PAGE_SHIFT);
- if (mapping_writably_mapped(mapping))
- flush_dcache_page(page);
+ offset = offset_in_folio(folio, pos);
+ if (bytes > folio_size(folio) - offset)
+ bytes = folio_size(folio) - offset;
- copied = copy_page_from_iter_atomic(page, offset, bytes, i);
+ if (mapping_writably_mapped(mapping))
+ flush_dcache_folio(folio);
+ copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
status = iomap_write_end(iter, pos, bytes, copied, folio);
if (unlikely(copied != status))
@@ -840,11 +836,13 @@ again:
*/
if (copied)
bytes = copied;
- goto again;
+ if (chunk > PAGE_SIZE)
+ chunk /= 2;
+ } else {
+ pos += status;
+ written += status;
+ length -= status;
}
- pos += status;
- written += status;
- length -= status;
} while (iov_iter_count(i) && length);
if (status == -EAGAIN) {