summaryrefslogtreecommitdiff
path: root/fs/mpage.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/mpage.c')
-rw-r--r--fs/mpage.c264
1 files changed, 135 insertions, 129 deletions
diff --git a/fs/mpage.c b/fs/mpage.c
index 0f8ae954a579..7dae5afc2b9e 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -43,23 +43,42 @@
* status of that page is hard. See end_buffer_async_read() for the details.
* There is no point in duplicating all that complexity.
*/
-static void mpage_end_io(struct bio *bio)
+static void mpage_read_end_io(struct bio *bio)
{
- struct bio_vec *bv;
- struct bvec_iter_all iter_all;
+ struct folio_iter fi;
+ int err = blk_status_to_errno(bio->bi_status);
- bio_for_each_segment_all(bv, bio, iter_all) {
- struct page *page = bv->bv_page;
- page_endio(page, bio_op(bio),
- blk_status_to_errno(bio->bi_status));
+ bio_for_each_folio_all(fi, bio)
+ folio_end_read(fi.folio, err == 0);
+
+ bio_put(bio);
+}
+
+static void mpage_write_end_io(struct bio *bio)
+{
+ struct folio_iter fi;
+ int err = blk_status_to_errno(bio->bi_status);
+
+ bio_for_each_folio_all(fi, bio) {
+ if (err)
+ mapping_set_error(fi.folio->mapping, err);
+ folio_end_writeback(fi.folio);
}
bio_put(bio);
}
-static struct bio *mpage_bio_submit(struct bio *bio)
+static struct bio *mpage_bio_submit_read(struct bio *bio)
{
- bio->bi_end_io = mpage_end_io;
+ bio->bi_end_io = mpage_read_end_io;
+ guard_bio_eod(bio);
+ submit_bio(bio);
+ return NULL;
+}
+
+static struct bio *mpage_bio_submit_write(struct bio *bio)
+{
+ bio->bi_end_io = mpage_write_end_io;
guard_bio_eod(bio);
submit_bio(bio);
return NULL;
@@ -88,13 +107,12 @@ static void map_buffer_to_folio(struct folio *folio, struct buffer_head *bh,
* don't make any buffers if there is only one buffer on
* the folio and the folio just needs to be set up to date
*/
- if (inode->i_blkbits == PAGE_SHIFT &&
+ if (inode->i_blkbits == folio_shift(folio) &&
buffer_uptodate(bh)) {
folio_mark_uptodate(folio);
return;
}
- create_empty_buffers(&folio->page, i_blocksize(inode), 0);
- head = folio_buffers(folio);
+ head = create_empty_buffers(folio, i_blocksize(inode), 0);
}
page_bh = head;
@@ -130,20 +148,20 @@ struct mpage_readpage_args {
* represent the validity of its disk mapping and to decide when to do the next
* get_block() call.
*/
-static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
+static void do_mpage_readpage(struct mpage_readpage_args *args)
{
struct folio *folio = args->folio;
struct inode *inode = folio->mapping->host;
const unsigned blkbits = inode->i_blkbits;
- const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
+ const unsigned blocks_per_folio = folio_size(folio) >> blkbits;
const unsigned blocksize = 1 << blkbits;
struct buffer_head *map_bh = &args->map_bh;
sector_t block_in_file;
sector_t last_block;
sector_t last_block_in_file;
- sector_t blocks[MAX_BUF_PER_PAGE];
+ sector_t first_block;
unsigned page_block;
- unsigned first_hole = blocks_per_page;
+ unsigned first_hole = blocks_per_folio;
struct block_device *bdev = NULL;
int length;
int fully_mapped = 1;
@@ -152,9 +170,6 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
unsigned relative_block;
gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL);
- /* MAX_BUF_PER_PAGE, for example */
- VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
-
if (args->is_readahead) {
opf |= REQ_RAHEAD;
gfp |= __GFP_NORETRY | __GFP_NOWARN;
@@ -163,8 +178,8 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
if (folio_buffers(folio))
goto confused;
- block_in_file = (sector_t)folio->index << (PAGE_SHIFT - blkbits);
- last_block = block_in_file + args->nr_pages * blocks_per_page;
+ block_in_file = folio_pos(folio) >> blkbits;
+ last_block = block_in_file + ((args->nr_pages * PAGE_SIZE) >> blkbits);
last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
if (last_block > last_block_in_file)
last_block = last_block_in_file;
@@ -180,15 +195,14 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
unsigned map_offset = block_in_file - args->first_logical_block;
unsigned last = nblocks - map_offset;
+ first_block = map_bh->b_blocknr + map_offset;
for (relative_block = 0; ; relative_block++) {
if (relative_block == last) {
clear_buffer_mapped(map_bh);
break;
}
- if (page_block == blocks_per_page)
+ if (page_block == blocks_per_folio)
break;
- blocks[page_block] = map_bh->b_blocknr + map_offset +
- relative_block;
page_block++;
block_in_file++;
}
@@ -198,8 +212,8 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
/*
* Then do more get_blocks calls until we are done with this folio.
*/
- map_bh->b_page = &folio->page;
- while (page_block < blocks_per_page) {
+ map_bh->b_folio = folio;
+ while (page_block < blocks_per_folio) {
map_bh->b_state = 0;
map_bh->b_size = 0;
@@ -212,7 +226,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
if (!buffer_mapped(map_bh)) {
fully_mapped = 0;
- if (first_hole == blocks_per_page)
+ if (first_hole == blocks_per_folio)
first_hole = page_block;
page_block++;
block_in_file++;
@@ -230,28 +244,29 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
goto confused;
}
- if (first_hole != blocks_per_page)
+ if (first_hole != blocks_per_folio)
goto confused; /* hole -> non-hole */
/* Contiguous blocks? */
- if (page_block && blocks[page_block-1] != map_bh->b_blocknr-1)
+ if (!page_block)
+ first_block = map_bh->b_blocknr;
+ else if (first_block + page_block != map_bh->b_blocknr)
goto confused;
nblocks = map_bh->b_size >> blkbits;
for (relative_block = 0; ; relative_block++) {
if (relative_block == nblocks) {
clear_buffer_mapped(map_bh);
break;
- } else if (page_block == blocks_per_page)
+ } else if (page_block == blocks_per_folio)
break;
- blocks[page_block] = map_bh->b_blocknr+relative_block;
page_block++;
block_in_file++;
}
bdev = map_bh->b_bdev;
}
- if (first_hole != blocks_per_page) {
- folio_zero_segment(folio, first_hole << blkbits, PAGE_SIZE);
+ if (first_hole != blocks_per_folio) {
+ folio_zero_segment(folio, first_hole << blkbits, folio_size(folio));
if (first_hole == 0) {
folio_mark_uptodate(folio);
folio_unlock(folio);
@@ -264,42 +279,37 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
/*
* This folio will go to BIO. Do we need to send this BIO off first?
*/
- if (args->bio && (args->last_block_in_bio != blocks[0] - 1))
- args->bio = mpage_bio_submit(args->bio);
+ if (args->bio && (args->last_block_in_bio != first_block - 1))
+ args->bio = mpage_bio_submit_read(args->bio);
alloc_new:
if (args->bio == NULL) {
- if (first_hole == blocks_per_page) {
- if (!bdev_read_page(bdev, blocks[0] << (blkbits - 9),
- &folio->page))
- goto out;
- }
args->bio = bio_alloc(bdev, bio_max_segs(args->nr_pages), opf,
gfp);
if (args->bio == NULL)
goto confused;
- args->bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
+ args->bio->bi_iter.bi_sector = first_block << (blkbits - 9);
}
length = first_hole << blkbits;
if (!bio_add_folio(args->bio, folio, length, 0)) {
- args->bio = mpage_bio_submit(args->bio);
+ args->bio = mpage_bio_submit_read(args->bio);
goto alloc_new;
}
relative_block = block_in_file - args->first_logical_block;
nblocks = map_bh->b_size >> blkbits;
if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
- (first_hole != blocks_per_page))
- args->bio = mpage_bio_submit(args->bio);
+ (first_hole != blocks_per_folio))
+ args->bio = mpage_bio_submit_read(args->bio);
else
- args->last_block_in_bio = blocks[blocks_per_page - 1];
+ args->last_block_in_bio = first_block + blocks_per_folio - 1;
out:
- return args->bio;
+ return;
confused:
if (args->bio)
- args->bio = mpage_bio_submit(args->bio);
+ args->bio = mpage_bio_submit_read(args->bio);
if (!folio_test_uptodate(folio))
block_read_full_folio(folio, args->get_block);
else
@@ -358,10 +368,16 @@ void mpage_readahead(struct readahead_control *rac, get_block_t get_block)
prefetchw(&folio->flags);
args.folio = folio;
args.nr_pages = readahead_count(rac);
- args.bio = do_mpage_readpage(&args);
+ do_mpage_readpage(&args);
+ /*
+ * If read ahead failed synchronously, it may cause by removed
+ * device, or some filesystem metadata error.
+ */
+ if (!folio_test_locked(folio) && !folio_test_uptodate(folio))
+ break;
}
if (args.bio)
- mpage_bio_submit(args.bio);
+ mpage_bio_submit_read(args.bio);
}
EXPORT_SYMBOL(mpage_readahead);
@@ -372,13 +388,13 @@ int mpage_read_folio(struct folio *folio, get_block_t get_block)
{
struct mpage_readpage_args args = {
.folio = folio,
- .nr_pages = 1,
+ .nr_pages = folio_nr_pages(folio),
.get_block = get_block,
};
- args.bio = do_mpage_readpage(&args);
+ do_mpage_readpage(&args);
if (args.bio)
- mpage_bio_submit(args.bio);
+ mpage_bio_submit_read(args.bio);
return 0;
}
EXPORT_SYMBOL(mpage_read_folio);
@@ -410,13 +426,13 @@ struct mpage_data {
* We have our BIO, so we can now mark the buffers clean. Make
* sure to only clean buffers which we know we'll be writing.
*/
-static void clean_buffers(struct page *page, unsigned first_unmapped)
+static void clean_buffers(struct folio *folio, unsigned first_unmapped)
{
unsigned buffer_counter = 0;
- struct buffer_head *bh, *head;
- if (!page_has_buffers(page))
+ struct buffer_head *bh, *head = folio_buffers(folio);
+
+ if (!head)
return;
- head = page_buffers(page);
bh = head;
do {
@@ -431,46 +447,34 @@ static void clean_buffers(struct page *page, unsigned first_unmapped)
* read_folio would fail to serialize with the bh and it would read from
* disk before we reach the platter.
*/
- if (buffer_heads_over_limit && PageUptodate(page))
- try_to_free_buffers(page_folio(page));
+ if (buffer_heads_over_limit && folio_test_uptodate(folio))
+ try_to_free_buffers(folio);
}
-/*
- * For situations where we want to clean all buffers attached to a page.
- * We don't need to calculate how many buffers are attached to the page,
- * we just need to specify a number larger than the maximum number of buffers.
- */
-void clean_page_buffers(struct page *page)
+static int mpage_write_folio(struct writeback_control *wbc, struct folio *folio,
+ struct mpage_data *mpd)
{
- clean_buffers(page, ~0U);
-}
-
-static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
- void *data)
-{
- struct mpage_data *mpd = data;
struct bio *bio = mpd->bio;
- struct address_space *mapping = page->mapping;
- struct inode *inode = page->mapping->host;
+ struct address_space *mapping = folio->mapping;
+ struct inode *inode = mapping->host;
const unsigned blkbits = inode->i_blkbits;
- unsigned long end_index;
- const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
+ const unsigned blocks_per_folio = folio_size(folio) >> blkbits;
sector_t last_block;
sector_t block_in_file;
- sector_t blocks[MAX_BUF_PER_PAGE];
+ sector_t first_block;
unsigned page_block;
- unsigned first_unmapped = blocks_per_page;
+ unsigned first_unmapped = blocks_per_folio;
struct block_device *bdev = NULL;
int boundary = 0;
sector_t boundary_block = 0;
struct block_device *boundary_bdev = NULL;
- int length;
+ size_t length;
struct buffer_head map_bh;
loff_t i_size = i_size_read(inode);
int ret = 0;
+ struct buffer_head *head = folio_buffers(folio);
- if (page_has_buffers(page)) {
- struct buffer_head *head = page_buffers(page);
+ if (head) {
struct buffer_head *bh = head;
/* If they're all mapped and dirty, do it */
@@ -484,21 +488,23 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
*/
if (buffer_dirty(bh))
goto confused;
- if (first_unmapped == blocks_per_page)
+ if (first_unmapped == blocks_per_folio)
first_unmapped = page_block;
continue;
}
- if (first_unmapped != blocks_per_page)
+ if (first_unmapped != blocks_per_folio)
goto confused; /* hole -> non-hole */
if (!buffer_dirty(bh) || !buffer_uptodate(bh))
goto confused;
if (page_block) {
- if (bh->b_blocknr != blocks[page_block-1] + 1)
+ if (bh->b_blocknr != first_block + page_block)
goto confused;
+ } else {
+ first_block = bh->b_blocknr;
}
- blocks[page_block++] = bh->b_blocknr;
+ page_block++;
boundary = buffer_boundary(bh);
if (boundary) {
boundary_block = bh->b_blocknr;
@@ -522,16 +528,24 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
/*
* The page has no buffers: map it to disk
*/
- BUG_ON(!PageUptodate(page));
- block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
+ BUG_ON(!folio_test_uptodate(folio));
+ block_in_file = folio_pos(folio) >> blkbits;
+ /*
+ * Whole page beyond EOF? Skip allocating blocks to avoid leaking
+ * space.
+ */
+ if (block_in_file >= (i_size + (1 << blkbits) - 1) >> blkbits)
+ goto page_is_mapped;
last_block = (i_size - 1) >> blkbits;
- map_bh.b_page = page;
- for (page_block = 0; page_block < blocks_per_page; ) {
+ map_bh.b_folio = folio;
+ for (page_block = 0; page_block < blocks_per_folio; ) {
map_bh.b_state = 0;
map_bh.b_size = 1 << blkbits;
if (mpd->get_block(inode, block_in_file, &map_bh, 1))
goto confused;
+ if (!buffer_mapped(&map_bh))
+ goto confused;
if (buffer_new(&map_bh))
clean_bdev_bh_alias(&map_bh);
if (buffer_boundary(&map_bh)) {
@@ -539,10 +553,12 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
boundary_bdev = map_bh.b_bdev;
}
if (page_block) {
- if (map_bh.b_blocknr != blocks[page_block-1] + 1)
+ if (map_bh.b_blocknr != first_block + page_block)
goto confused;
+ } else {
+ first_block = map_bh.b_blocknr;
}
- blocks[page_block++] = map_bh.b_blocknr;
+ page_block++;
boundary = buffer_boundary(&map_bh);
bdev = map_bh.b_bdev;
if (block_in_file == last_block)
@@ -554,8 +570,11 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
first_unmapped = page_block;
page_is_mapped:
- end_index = i_size >> PAGE_SHIFT;
- if (page->index >= end_index) {
+ /* Don't bother writing beyond EOF, truncate will discard the folio */
+ if (folio_pos(folio) >= i_size)
+ goto confused;
+ length = folio_size(folio);
+ if (folio_pos(folio) + length > i_size) {
/*
* The page straddles i_size. It must be zeroed out on each
* and every writepage invocation because it may be mmapped.
@@ -564,31 +583,24 @@ page_is_mapped:
* is zeroed when mapped, and writes to that region are not
* written out to the file."
*/
- unsigned offset = i_size & (PAGE_SIZE - 1);
-
- if (page->index > end_index || !offset)
- goto confused;
- zero_user_segment(page, offset, PAGE_SIZE);
+ length = i_size - folio_pos(folio);
+ folio_zero_segment(folio, length, folio_size(folio));
}
/*
* This page will go to BIO. Do we need to send this BIO off first?
*/
- if (bio && mpd->last_block_in_bio != blocks[0] - 1)
- bio = mpage_bio_submit(bio);
+ if (bio && mpd->last_block_in_bio != first_block - 1)
+ bio = mpage_bio_submit_write(bio);
alloc_new:
if (bio == NULL) {
- if (first_unmapped == blocks_per_page) {
- if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9),
- page, wbc))
- goto out;
- }
bio = bio_alloc(bdev, BIO_MAX_VECS,
REQ_OP_WRITE | wbc_to_write_flags(wbc),
GFP_NOFS);
- bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
+ bio->bi_iter.bi_sector = first_block << (blkbits - 9);
wbc_init_bio(wbc, bio);
+ bio->bi_write_hint = inode->i_write_hint;
}
/*
@@ -596,37 +608,37 @@ alloc_new:
* the confused fail path above (OOM) will be very confused when
* it finds all bh marked clean (i.e. it will not write anything)
*/
- wbc_account_cgroup_owner(wbc, page, PAGE_SIZE);
+ wbc_account_cgroup_owner(wbc, folio, folio_size(folio));
length = first_unmapped << blkbits;
- if (bio_add_page(bio, page, length, 0) < length) {
- bio = mpage_bio_submit(bio);
+ if (!bio_add_folio(bio, folio, length, 0)) {
+ bio = mpage_bio_submit_write(bio);
goto alloc_new;
}
- clean_buffers(page, first_unmapped);
+ clean_buffers(folio, first_unmapped);
- BUG_ON(PageWriteback(page));
- set_page_writeback(page);
- unlock_page(page);
- if (boundary || (first_unmapped != blocks_per_page)) {
- bio = mpage_bio_submit(bio);
+ BUG_ON(folio_test_writeback(folio));
+ folio_start_writeback(folio);
+ folio_unlock(folio);
+ if (boundary || (first_unmapped != blocks_per_folio)) {
+ bio = mpage_bio_submit_write(bio);
if (boundary_block) {
write_boundary_block(boundary_bdev,
boundary_block, 1 << blkbits);
}
} else {
- mpd->last_block_in_bio = blocks[blocks_per_page - 1];
+ mpd->last_block_in_bio = first_block + blocks_per_folio - 1;
}
goto out;
confused:
if (bio)
- bio = mpage_bio_submit(bio);
+ bio = mpage_bio_submit_write(bio);
/*
* The caller has a ref on the inode, so *mapping is stable
*/
- ret = block_write_full_page(page, mpd->get_block, wbc);
+ ret = block_write_full_folio(folio, wbc, mpd->get_block);
mapping_set_error(mapping, ret);
out:
mpd->bio = bio;
@@ -641,14 +653,6 @@ out:
*
* This is a library function, which implements the writepages()
* address_space_operation.
- *
- * If a page is already under I/O, generic_writepages() skips it, even
- * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
- * but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
- * and msync() need to guarantee that all the data which was dirty at the time
- * the call was made get new I/O started against them. If wbc->sync_mode is
- * WB_SYNC_ALL then we were called for data integrity and we must wait for
- * existing IO to complete.
*/
int
mpage_writepages(struct address_space *mapping,
@@ -657,14 +661,16 @@ mpage_writepages(struct address_space *mapping,
struct mpage_data mpd = {
.get_block = get_block,
};
+ struct folio *folio = NULL;
struct blk_plug plug;
- int ret;
+ int error;
blk_start_plug(&plug);
- ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
+ while ((folio = writeback_iter(mapping, wbc, folio, &error)))
+ error = mpage_write_folio(wbc, folio, &mpd);
if (mpd.bio)
- mpage_bio_submit(mpd.bio);
+ mpage_bio_submit_write(mpd.bio);
blk_finish_plug(&plug);
- return ret;
+ return error;
}
EXPORT_SYMBOL(mpage_writepages);