From a5fd8390d2b2db15fd043b8bd571b536101222c2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 15 Dec 2022 21:44:02 +0000 Subject: mpage: use b_folio in do_mpage_readpage() Remove this conversion of a folio back to a page. Link: https://lkml.kernel.org/r/20221215214402.3522366-13-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Jan Kara Signed-off-by: Andrew Morton --- fs/mpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/mpage.c') diff --git a/fs/mpage.c b/fs/mpage.c index 0f8ae954a579..db59cbf6affc 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -198,7 +198,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args) /* * Then do more get_blocks calls until we are done with this folio. */ - map_bh->b_page = &folio->page; + map_bh->b_folio = folio; while (page_block < blocks_per_page) { map_bh->b_state = 0; map_bh->b_size = 0; -- cgit From 5b68de67037168f826d6fe434d03b5876aec4cb6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Dec 2022 06:10:26 -1000 Subject: fs: remove an outdated comment on mpage_writepages Patch series "remove generic_writepages" This series removes generic_writepages by open coding the current functionality in the three remaining callers. Besides removing some code the main benefit is that one of the few remaining ->writepage callers from outside the core page cache code go away. This patch (of 6): mpage_writepages doesn't do any of the page locking itself, so remove and outdated comment on the locking pattern there. Link: https://lkml.kernel.org/r/20221229161031.391878-1-hch@lst.de Link: https://lkml.kernel.org/r/20221229161031.391878-2-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Cc: Joel Becker Cc: Joseph Qi Cc: Konstantin Komarov Cc: Mark Fasheh Cc: Matthew Wilcox Cc: Theodore Ts'o Signed-off-by: Andrew Morton --- fs/mpage.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'fs/mpage.c') diff --git a/fs/mpage.c b/fs/mpage.c index db59cbf6affc..d36a95473f77 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -641,14 +641,6 @@ out: * * This is a library function, which implements the writepages() * address_space_operation. - * - * If a page is already under I/O, generic_writepages() skips it, even - * if it's dirty. This is desirable behaviour for memory-cleaning writeback, - * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() - * and msync() need to guarantee that all the data which was dirty at the time - * the call was made get new I/O started against them. If wbc->sync_mode is - * WB_SYNC_ALL then we were called for data integrity and we must wait for - * existing IO to complete. */ int mpage_writepages(struct address_space *mapping, -- cgit From 4b89a37d54a0b5ed6b2e5a9afc44a15a22e563f5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jan 2023 11:44:30 +0100 Subject: fs: don't allocate blocks beyond EOF from __mpage_writepage When __mpage_writepage() is called for a page beyond EOF, it will go and allocate all blocks underlying the page. This is not only unnecessary but this way blocks can get leaked (e.g. if a page beyond EOF is marked dirty but in the end write fails and i_size is not extended). Link: https://lkml.kernel.org/r/20230103104430.27749-1-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Cc: Matthew Wilcox Cc: Al Viro Signed-off-by: Andrew Morton --- fs/mpage.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/mpage.c') diff --git a/fs/mpage.c b/fs/mpage.c index d36a95473f77..b8e7975159bc 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -524,6 +524,12 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc, */ BUG_ON(!PageUptodate(page)); block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits); + /* + * Whole page beyond EOF? Skip allocating blocks to avoid leaking + * space. + */ + if (block_in_file >= (i_size + (1 << blkbits) - 1) >> blkbits) + goto page_is_mapped; last_block = (i_size - 1) >> blkbits; map_bh.b_page = page; for (page_block = 0; page_block < blocks_per_page; ) { -- cgit From 7d28631786b2333c5d48ad25172eb159aaa2945f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Jan 2023 14:34:30 +0100 Subject: mpage: stop using bdev_{read,write}_page Patch series "remove ->rw_page". This series removes the ->rw_page block_device_operation, which is an old and clumsy attempt at a simple read/write fast path for the block layer. It isn't actually used by the fastest block layer operations that we support (polled I/O through io_uring), but only used by the mpage buffered I/O helpers which are some of the slowest I/O we have and do not make any difference there at all, and zram which is a block device abused to duplicate the zram functionality. Given that zram is heavily used we need to make sure there is a good replacement for synchronous I/O, so this series adds a new flag for drivers that complete I/O synchronously and uses that flag to use on-stack bios and synchronous submission for them in the swap code. This patch (of 7): These are micro-optimizations for synchronous I/O, which do not matter compared to all the other inefficiencies in the legacy buffer_head based mpage code. Link: https://lkml.kernel.org/r/20230125133436.447864-1-hch@lst.de Link: https://lkml.kernel.org/r/20230125133436.447864-2-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Dan Williams Cc: Keith Busch Cc: Dave Jiang Cc: Ira Weiny Cc: Jens Axboe Cc: Minchan Kim Cc: Sergey Senozhatsky Cc: Vishal Verma Signed-off-by: Andrew Morton --- fs/mpage.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'fs/mpage.c') diff --git a/fs/mpage.c b/fs/mpage.c index b8e7975159bc..55988ea994ee 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -269,11 +269,6 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args) alloc_new: if (args->bio == NULL) { - if (first_hole == blocks_per_page) { - if (!bdev_read_page(bdev, blocks[0] << (blkbits - 9), - &folio->page)) - goto out; - } args->bio = bio_alloc(bdev, bio_max_segs(args->nr_pages), opf, gfp); if (args->bio == NULL) @@ -585,11 +580,6 @@ page_is_mapped: alloc_new: if (bio == NULL) { - if (first_unmapped == blocks_per_page) { - if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9), - page, wbc)) - goto out; - } bio = bio_alloc(bdev, BIO_MAX_VECS, REQ_OP_WRITE | wbc_to_write_flags(wbc), GFP_NOFS); -- cgit From d585bdbeb79aa13b8a9bbe952d90f5252f7fe909 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 26 Jan 2023 20:12:54 +0000 Subject: fs: convert writepage_t callback to pass a folio Patch series "Convert writepage_t to use a folio". More folioisation. I split out the mpage work from everything else because it completely dominated the patch, but some implementations I just converted outright. This patch (of 2): We always write back an entire folio, but that's currently passed as the head page. Convert all filesystems that use write_cache_pages() to expect a folio instead of a page. Link: https://lkml.kernel.org/r/20230126201255.1681189-1-willy@infradead.org Link: https://lkml.kernel.org/r/20230126201255.1681189-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Christoph Hellwig Signed-off-by: Andrew Morton --- fs/mpage.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/mpage.c') diff --git a/fs/mpage.c b/fs/mpage.c index 55988ea994ee..4890369bb10a 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -440,9 +440,10 @@ void clean_page_buffers(struct page *page) clean_buffers(page, ~0U); } -static int __mpage_writepage(struct page *page, struct writeback_control *wbc, +static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc, void *data) { + struct page *page = &folio->page; struct mpage_data *mpd = data; struct bio *bio = mpd->bio; struct address_space *mapping = page->mapping; -- cgit From 9160cffd45ee93bc20de134e4f127dac9af0cc18 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 26 Jan 2023 20:12:55 +0000 Subject: mpage: convert __mpage_writepage() to use a folio more fully This is just a conversion to the folio API. While there are some nods towards supporting multi-page folios in here, the blocks array is still sized for one page's worth of blocks, and there are other assumptions such as the blocks_per_page variable. [willy@infradead.org: fix accidentally-triggering WARN_ON_ONCE] Link: https://lkml.kernel.org/r/Y9kuaBgXf9lKJ8b0@casper.infradead.org Link: https://lkml.kernel.org/r/20230126201255.1681189-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Christoph Hellwig Cc: Jan Kara Signed-off-by: Andrew Morton --- fs/mpage.c | 46 ++++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 24 deletions(-) (limited to 'fs/mpage.c') diff --git a/fs/mpage.c b/fs/mpage.c index 4890369bb10a..b9b7f6dc9c37 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -443,13 +443,11 @@ void clean_page_buffers(struct page *page) static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc, void *data) { - struct page *page = &folio->page; struct mpage_data *mpd = data; struct bio *bio = mpd->bio; - struct address_space *mapping = page->mapping; - struct inode *inode = page->mapping->host; + struct address_space *mapping = folio->mapping; + struct inode *inode = mapping->host; const unsigned blkbits = inode->i_blkbits; - unsigned long end_index; const unsigned blocks_per_page = PAGE_SIZE >> blkbits; sector_t last_block; sector_t block_in_file; @@ -460,13 +458,13 @@ static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc, int boundary = 0; sector_t boundary_block = 0; struct block_device *boundary_bdev = NULL; - int length; + size_t length; struct buffer_head map_bh; loff_t i_size = i_size_read(inode); int ret = 0; + struct buffer_head *head = folio_buffers(folio); - if (page_has_buffers(page)) { - struct buffer_head *head = page_buffers(page); + if (head) { struct buffer_head *bh = head; /* If they're all mapped and dirty, do it */ @@ -518,8 +516,8 @@ static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc, /* * The page has no buffers: map it to disk */ - BUG_ON(!PageUptodate(page)); - block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits); + BUG_ON(!folio_test_uptodate(folio)); + block_in_file = (sector_t)folio->index << (PAGE_SHIFT - blkbits); /* * Whole page beyond EOF? Skip allocating blocks to avoid leaking * space. @@ -527,7 +525,7 @@ static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc, if (block_in_file >= (i_size + (1 << blkbits) - 1) >> blkbits) goto page_is_mapped; last_block = (i_size - 1) >> blkbits; - map_bh.b_page = page; + map_bh.b_folio = folio; for (page_block = 0; page_block < blocks_per_page; ) { map_bh.b_state = 0; @@ -556,8 +554,11 @@ static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc, first_unmapped = page_block; page_is_mapped: - end_index = i_size >> PAGE_SHIFT; - if (page->index >= end_index) { + /* Don't bother writing beyond EOF, truncate will discard the folio */ + if (folio_pos(folio) >= i_size) + goto confused; + length = folio_size(folio); + if (folio_pos(folio) + length > i_size) { /* * The page straddles i_size. It must be zeroed out on each * and every writepage invocation because it may be mmapped. @@ -566,11 +567,8 @@ page_is_mapped: * is zeroed when mapped, and writes to that region are not * written out to the file." */ - unsigned offset = i_size & (PAGE_SIZE - 1); - - if (page->index > end_index || !offset) - goto confused; - zero_user_segment(page, offset, PAGE_SIZE); + length = i_size - folio_pos(folio); + folio_zero_segment(folio, length, folio_size(folio)); } /* @@ -593,18 +591,18 @@ alloc_new: * the confused fail path above (OOM) will be very confused when * it finds all bh marked clean (i.e. it will not write anything) */ - wbc_account_cgroup_owner(wbc, page, PAGE_SIZE); + wbc_account_cgroup_owner(wbc, &folio->page, folio_size(folio)); length = first_unmapped << blkbits; - if (bio_add_page(bio, page, length, 0) < length) { + if (!bio_add_folio(bio, folio, length, 0)) { bio = mpage_bio_submit(bio); goto alloc_new; } - clean_buffers(page, first_unmapped); + clean_buffers(&folio->page, first_unmapped); - BUG_ON(PageWriteback(page)); - set_page_writeback(page); - unlock_page(page); + BUG_ON(folio_test_writeback(folio)); + folio_start_writeback(folio); + folio_unlock(folio); if (boundary || (first_unmapped != blocks_per_page)) { bio = mpage_bio_submit(bio); if (boundary_block) { @@ -623,7 +621,7 @@ confused: /* * The caller has a ref on the inode, so *mapping is stable */ - ret = block_write_full_page(page, mpd->get_block, wbc); + ret = block_write_full_page(&folio->page, mpd->get_block, wbc); mapping_set_error(mapping, ret); out: mpd->bio = bio; -- cgit