diff options
Diffstat (limited to 'fs/bcachefs/fs-io-buffered.c')
-rw-r--r-- | fs/bcachefs/fs-io-buffered.c | 167 |
1 files changed, 82 insertions, 85 deletions
diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index 27710cdd5710..ab1d5db2fa56 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -30,15 +30,8 @@ static void bch2_readpages_end_io(struct bio *bio) { struct folio_iter fi; - bio_for_each_folio_all(fi, bio) { - if (!bio->bi_status) { - folio_mark_uptodate(fi.folio); - } else { - folio_clear_uptodate(fi.folio); - folio_set_error(fi.folio); - } - folio_unlock(fi.folio); - } + bio_for_each_folio_all(fi, bio) + folio_end_read(fi.folio, bio->bi_status == BLK_STS_OK); bio_put(bio); } @@ -158,7 +151,6 @@ static void bchfs_read(struct btree_trans *trans, struct bkey_buf sk; int flags = BCH_READ_RETRY_IF_STALE| BCH_READ_MAY_PROMOTE; - u32 snapshot; int ret = 0; rbio->c = c; @@ -166,29 +158,24 @@ static void bchfs_read(struct btree_trans *trans, rbio->subvol = inum.subvol; bch2_bkey_buf_init(&sk); -retry: bch2_trans_begin(trans); - iter = (struct btree_iter) { NULL }; - - ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); - if (ret) - goto err; - bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, - SPOS(inum.inum, rbio->bio.bi_iter.bi_sector, snapshot), - BTREE_ITER_SLOTS); + POS(inum.inum, rbio->bio.bi_iter.bi_sector), + BTREE_ITER_slots); while (1) { struct bkey_s_c k; - unsigned bytes, sectors, offset_into_extent; + unsigned bytes, sectors; + s64 offset_into_extent; enum btree_id data_btree = BTREE_ID_extents; - /* - * read_extent -> io_time_reset may cause a transaction restart - * without returning an error, we need to check for that here: - */ - ret = bch2_trans_relock(trans); + bch2_trans_begin(trans); + + u32 snapshot; + ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); if (ret) - break; + goto err; + + bch2_btree_iter_set_snapshot(&iter, snapshot); bch2_btree_iter_set_pos(&iter, POS(inum.inum, rbio->bio.bi_iter.bi_sector)); @@ -196,7 +183,7 @@ retry: k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) - break; + goto err; offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); @@ -207,17 +194,17 @@ retry: ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, &sk); if (ret) - break; + goto err; k = bkey_i_to_s_c(sk.k); - sectors = min(sectors, k.k->size - offset_into_extent); + sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent); if (readpages_iter) { ret = readpage_bio_extend(trans, readpages_iter, &rbio->bio, sectors, extent_partial_reads_expensive(k)); if (ret) - break; + goto err; } bytes = min(sectors, bio_sectors(&rbio->bio)) << 9; @@ -236,22 +223,20 @@ retry: swap(rbio->bio.bi_iter.bi_size, bytes); bio_advance(&rbio->bio, bytes); - - ret = btree_trans_too_many_iters(trans); - if (ret) +err: + if (ret && + !bch2_err_matches(ret, BCH_ERR_transaction_restart)) break; } -err: bch2_trans_iter_exit(trans, &iter); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - if (ret) { - bch_err_inum_offset_ratelimited(c, - iter.pos.inode, - iter.pos.offset << 9, - "read error %i from btree lookup", ret); + struct printbuf buf = PRINTBUF; + bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter.pos.offset << 9); + prt_printf(&buf, "read error %i from btree lookup", ret); + bch_err_ratelimited(c, "%s", buf.buf); + printbuf_exit(&buf); + rbio->bio.bi_status = BLK_STS_IOERR; bio_endio(&rbio->bio); } @@ -264,9 +249,9 @@ void bch2_readahead(struct readahead_control *ractl) struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_io_opts opts; - struct btree_trans *trans = bch2_trans_get(c); struct folio *folio; struct readpages_iter readpages_iter; + struct blk_plug plug; bch2_inode_opts_get(&opts, c, &inode->ei_inode); @@ -274,8 +259,19 @@ void bch2_readahead(struct readahead_control *ractl) if (ret) return; + /* + * Besides being a general performance optimization, plugging helps with + * avoiding btree transaction srcu warnings - submitting a bio can + * block, and we don't want todo that with the transaction locked. + * + * However, plugged bios are submitted when we schedule; we ideally + * would have our own scheduler hook to call unlock_long() before + * scheduling. + */ + blk_start_plug(&plug); bch2_pagecache_add_get(inode); + struct btree_trans *trans = bch2_trans_get(c); while ((folio = readpage_iter_peek(&readpages_iter))) { unsigned n = min_t(unsigned, readpages_iter.folios.nr - @@ -296,10 +292,10 @@ void bch2_readahead(struct readahead_control *ractl) &readpages_iter); bch2_trans_unlock(trans); } + bch2_trans_put(trans); bch2_pagecache_add_put(inode); - - bch2_trans_put(trans); + blk_finish_plug(&plug); darray_exit(&readpages_iter.folios); } @@ -314,9 +310,13 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping) struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_read_bio *rbio; struct bch_io_opts opts; + struct blk_plug plug; int ret; DECLARE_COMPLETION_ONSTACK(done); + BUG_ON(folio_test_uptodate(folio)); + BUG_ON(folio_test_dirty(folio)); + if (!bch2_folio_create(folio, GFP_KERNEL)) return -ENOMEM; @@ -331,7 +331,9 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping) rbio->bio.bi_iter.bi_sector = folio_sector(folio); BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); + blk_start_plug(&plug); bch2_trans_run(c, (bchfs_read(trans, rbio, inode_inum(inode), NULL), 0)); + blk_finish_plug(&plug); wait_for_completion(&done); ret = blk_status_to_errno(rbio->bio.bi_status); @@ -408,7 +410,6 @@ static void bch2_writepage_io_done(struct bch_write_op *op) bio_for_each_folio_all(fi, bio) { struct bch_folio *s; - folio_set_error(fi.folio); mapping_set_error(fi.folio->mapping, -EIO); s = __bch2_folio(fi.folio); @@ -445,8 +446,8 @@ static void bch2_writepage_io_done(struct bch_write_op *op) */ /* - * PageWriteback is effectively our ref on the inode - fixup i_blocks - * before calling end_page_writeback: + * The writeback flag is effectively our ref on the inode - + * fixup i_blocks before calling folio_end_writeback: */ bch2_i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta); @@ -494,7 +495,7 @@ static void bch2_writepage_io_alloc(struct bch_fs *c, op->nr_replicas = nr_replicas; op->res.nr_replicas = nr_replicas; op->write_point = writepoint_hashed(inode->ei_last_dirtied); - op->subvol = inode->ei_subvol; + op->subvol = inode->ei_inum.subvol; op->pos = POS(inode->v.i_ino, sector); op->end_io = bch2_writepage_io_done; op->devs_need_flush = &inode->ei_devs_need_flush; @@ -542,7 +543,7 @@ do_io: if (f_sectors > w->tmp_sectors) { kfree(w->tmp); - w->tmp = kcalloc(f_sectors, sizeof(struct bch_folio_sector), __GFP_NOFAIL); + w->tmp = kcalloc(f_sectors, sizeof(struct bch_folio_sector), GFP_NOFS|__GFP_NOFAIL); w->tmp_sectors = f_sectors; } @@ -624,15 +625,6 @@ do_io: BUG_ON(!bio_add_folio(&w->io->op.wbio.bio, folio, sectors << 9, offset << 9)); - /* Check for writing past i_size: */ - WARN_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) > - round_up(i_size, block_bytes(c)) && - !test_bit(BCH_FS_emergency_ro, &c->flags), - "writing past i_size: %llu > %llu (unrounded %llu)\n", - bio_end_sector(&w->io->op.wbio.bio) << 9, - round_up(i_size, block_bytes(c)), - i_size); - w->io->op.res.sectors += reserved_sectors; w->io->op.i_sectors_delta -= dirty_sectors; w->io->op.new_i_size = i_size; @@ -667,7 +659,7 @@ int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc int bch2_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, - struct page **pagep, void **fsdata) + struct folio **foliop, void **fsdata) { struct bch_inode_info *inode = to_bch_ei(mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; @@ -686,9 +678,9 @@ int bch2_write_begin(struct file *file, struct address_space *mapping, bch2_pagecache_add_get(inode); folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, - FGP_LOCK|FGP_WRITE|FGP_CREAT|FGP_STABLE, - mapping_gfp_mask(mapping)); - if (IS_ERR_OR_NULL(folio)) + FGP_WRITEBEGIN | fgf_set_order(len), + mapping_gfp_mask(mapping)); + if (IS_ERR(folio)) goto err_unlock; offset = pos - folio_pos(folio); @@ -736,12 +728,11 @@ out: goto err; } - *pagep = &folio->page; + *foliop = folio; return 0; err: folio_unlock(folio); folio_put(folio); - *pagep = NULL; err_unlock: bch2_pagecache_add_put(inode); kfree(res); @@ -751,12 +742,11 @@ err_unlock: int bch2_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct folio *folio, void *fsdata) { struct bch_inode_info *inode = to_bch_ei(mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch2_folio_reservation *res = fsdata; - struct folio *folio = page_folio(page); unsigned offset = pos - folio_pos(folio); lockdep_assert_held(&inode->v.i_rwsem); @@ -827,9 +817,8 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, darray_init(&fs); ret = bch2_filemap_get_contig_folios_d(mapping, pos, end, - FGP_LOCK|FGP_WRITE|FGP_STABLE|FGP_CREAT, - mapping_gfp_mask(mapping), - &fs); + FGP_WRITEBEGIN | fgf_set_order(len), + mapping_gfp_mask(mapping), &fs); if (ret) goto out; @@ -862,24 +851,32 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, f_pos = pos; f_offset = pos - folio_pos(darray_first(fs)); darray_for_each(fs, fi) { + ssize_t f_reserved; + f = *fi; f_len = min(end, folio_end_pos(f)) - f_pos; + f_reserved = bch2_folio_reservation_get_partial(c, inode, f, &res, f_offset, f_len); + + if (unlikely(f_reserved != f_len)) { + if (f_reserved < 0) { + if (f == darray_first(fs)) { + ret = f_reserved; + goto out; + } + + folios_trunc(&fs, fi); + end = min(end, folio_end_pos(darray_last(fs))); + } else { + if (!folio_test_uptodate(f)) { + ret = bch2_read_single_folio(f, mapping); + if (ret) + goto out; + } + + folios_trunc(&fs, fi + 1); + end = f_pos + f_reserved; + } - /* - * XXX: per POSIX and fstests generic/275, on -ENOSPC we're - * supposed to write as much as we have disk space for. - * - * On failure here we should still write out a partial page if - * we aren't completely out of disk space - we don't do that - * yet: - */ - ret = bch2_folio_reservation_get(c, inode, f, &res, f_offset, f_len); - if (unlikely(ret)) { - folios_trunc(&fs, fi); - if (!fs.nr) - goto out; - - end = min(end, folio_end_pos(darray_last(fs))); break; } @@ -896,7 +893,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, darray_for_each(fs, fi) { f = *fi; f_len = min(end, folio_end_pos(f)) - f_pos; - f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter); + f_copied = copy_folio_from_iter_atomic(f, f_offset, f_len, iter); if (!f_copied) { folios_trunc(&fs, fi); break; |