summaryrefslogtreecommitdiff
path: root/fs/btrfs/bio.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/bio.c')
-rw-r--r--fs/btrfs/bio.c196
1 files changed, 117 insertions, 79 deletions
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 928f512cdb4a..bc2555c44a12 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -11,7 +11,6 @@
#include "raid56.h"
#include "async-thread.h"
#include "dev-replace.h"
-#include "rcu-string.h"
#include "zoned.h"
#include "file-item.h"
#include "raid-stripe-tree.h"
@@ -30,7 +29,7 @@ struct btrfs_failed_bio {
/* Is this a data path I/O that needs storage layer checksum and repair? */
static inline bool is_data_bbio(struct btrfs_bio *bbio)
{
- return bbio->inode && is_data_inode(&bbio->inode->vfs_inode);
+ return bbio->inode && is_data_inode(bbio->inode);
}
static bool bbio_has_ordered_extent(struct btrfs_bio *bbio)
@@ -50,11 +49,12 @@ void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
bbio->end_io = end_io;
bbio->private = private;
atomic_set(&bbio->pending_ios, 1);
+ WRITE_ONCE(bbio->status, BLK_STS_OK);
}
/*
* Allocate a btrfs_bio structure. The btrfs_bio is the main I/O container for
- * btrfs, and is used for all I/O submitted through btrfs_submit_bio.
+ * btrfs, and is used for all I/O submitted through btrfs_submit_bbio().
*
* Just like the underlying bio_alloc_bioset it will not fail as it is backed by
* a mempool.
@@ -74,20 +74,16 @@ struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
struct btrfs_bio *orig_bbio,
- u64 map_length, bool use_append)
+ u64 map_length)
{
struct btrfs_bio *bbio;
struct bio *bio;
- if (use_append) {
- unsigned int nr_segs;
+ bio = bio_split(&orig_bbio->bio, map_length >> SECTOR_SHIFT, GFP_NOFS,
+ &btrfs_clone_bioset);
+ if (IS_ERR(bio))
+ return ERR_CAST(bio);
- bio = bio_split_rw(&orig_bbio->bio, &fs_info->limits, &nr_segs,
- &btrfs_clone_bioset, map_length);
- } else {
- bio = bio_split(&orig_bbio->bio, map_length >> SECTOR_SHIFT,
- GFP_NOFS, &btrfs_clone_bioset);
- }
bbio = btrfs_bio(bio);
btrfs_bio_init(bbio, fs_info, NULL, orig_bbio);
bbio->inode = orig_bbio->inode;
@@ -124,43 +120,26 @@ static void __btrfs_bio_end_io(struct btrfs_bio *bbio)
void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
{
bbio->bio.bi_status = status;
- __btrfs_bio_end_io(bbio);
-}
-
-static void btrfs_orig_write_end_io(struct bio *bio);
-
-static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio,
- struct btrfs_bio *orig_bbio)
-{
- /*
- * For writes we tolerate nr_mirrors - 1 write failures, so we can't
- * just blindly propagate a write failure here. Instead increment the
- * error count in the original I/O context so that it is guaranteed to
- * be larger than the error tolerance.
- */
- if (bbio->bio.bi_end_io == &btrfs_orig_write_end_io) {
- struct btrfs_io_stripe *orig_stripe = orig_bbio->bio.bi_private;
- struct btrfs_io_context *orig_bioc = orig_stripe->bioc;
-
- atomic_add(orig_bioc->max_errors, &orig_bioc->error);
- } else {
- orig_bbio->bio.bi_status = bbio->bio.bi_status;
- }
-}
-
-static void btrfs_orig_bbio_end_io(struct btrfs_bio *bbio)
-{
if (bbio->bio.bi_pool == &btrfs_clone_bioset) {
struct btrfs_bio *orig_bbio = bbio->private;
- if (bbio->bio.bi_status)
- btrfs_bbio_propagate_error(bbio, orig_bbio);
btrfs_cleanup_bio(bbio);
bbio = orig_bbio;
}
- if (atomic_dec_and_test(&bbio->pending_ios))
+ /*
+ * At this point, bbio always points to the original btrfs_bio. Save
+ * the first error in it.
+ */
+ if (status != BLK_STS_OK)
+ cmpxchg(&bbio->status, BLK_STS_OK, status);
+
+ if (atomic_dec_and_test(&bbio->pending_ios)) {
+ /* Load split bio's error which might be set above. */
+ if (status == BLK_STS_OK)
+ bbio->bio.bi_status = READ_ONCE(bbio->status);
__btrfs_bio_end_io(bbio);
+ }
}
static int next_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
@@ -180,7 +159,7 @@ static int prev_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
static void btrfs_repair_done(struct btrfs_failed_bio *fbio)
{
if (atomic_dec_and_test(&fbio->repair_count)) {
- btrfs_orig_bbio_end_io(fbio->bbio);
+ btrfs_bio_end_io(fbio->bbio, fbio->bbio->bio.bi_status);
mempool_free(fbio, &btrfs_failed_bio_pool);
}
}
@@ -212,7 +191,7 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
goto done;
}
- btrfs_submit_bio(repair_bbio, mirror);
+ btrfs_submit_bbio(repair_bbio, mirror);
return;
}
@@ -281,7 +260,7 @@ static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
mirror = next_repair_mirror(fbio, failed_bbio->mirror_num);
btrfs_debug(fs_info, "submitting repair read to mirror %d", mirror);
- btrfs_submit_bio(repair_bbio, mirror);
+ btrfs_submit_bbio(repair_bbio, mirror);
return fbio;
}
@@ -327,7 +306,7 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
if (fbio)
btrfs_repair_done(fbio);
else
- btrfs_orig_bbio_end_io(bbio);
+ btrfs_bio_end_io(bbio, bbio->bio.bi_status);
}
static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev)
@@ -361,7 +340,7 @@ static void btrfs_end_bio_work(struct work_struct *work)
if (is_data_bbio(bbio))
btrfs_check_read_bio(bbio, bbio->bio.bi_private);
else
- btrfs_orig_bbio_end_io(bbio);
+ btrfs_bio_end_io(bbio, bbio->bio.bi_status);
}
static void btrfs_simple_end_io(struct bio *bio)
@@ -379,9 +358,9 @@ static void btrfs_simple_end_io(struct bio *bio)
INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
} else {
- if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status)
+ if (bio_is_zone_append(bio) && !bio->bi_status)
btrfs_record_physical_zoned(bbio);
- btrfs_orig_bbio_end_io(bbio);
+ btrfs_bio_end_io(bbio, bbio->bio.bi_status);
}
}
@@ -395,7 +374,7 @@ static void btrfs_raid56_end_io(struct bio *bio)
if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio))
btrfs_check_read_bio(bbio, NULL);
else
- btrfs_orig_bbio_end_io(bbio);
+ btrfs_bio_end_io(bbio, bbio->bio.bi_status);
btrfs_put_bioc(bioc);
}
@@ -422,10 +401,10 @@ static void btrfs_orig_write_end_io(struct bio *bio)
else
bio->bi_status = BLK_STS_OK;
- if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status)
+ if (bio_is_zone_append(bio) && !bio->bi_status)
stripe->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
- btrfs_orig_bbio_end_io(bbio);
+ btrfs_bio_end_io(bbio, bbio->bio.bi_status);
btrfs_put_bioc(bioc);
}
@@ -436,7 +415,7 @@ static void btrfs_clone_write_end_io(struct bio *bio)
if (bio->bi_status) {
atomic_inc(&stripe->bioc->error);
btrfs_log_dev_io_error(bio, stripe->dev);
- } else if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+ } else if (bio_is_zone_append(bio)) {
stripe->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
}
@@ -474,6 +453,14 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
(unsigned long)dev->bdev->bd_dev, btrfs_dev_name(dev),
dev->devid, bio->bi_iter.bi_size);
+ /*
+ * Track reads if tracking is enabled; ignore I/O operations before the
+ * filesystem is fully initialized.
+ */
+ if (dev->fs_devices->collect_fs_stats && bio_op(bio) == REQ_OP_READ && dev->fs_info)
+ percpu_counter_add(&dev->fs_info->stats_read_blocks,
+ bio->bi_iter.bi_size >> dev->fs_info->sectorsize_bits);
+
if (bio->bi_opf & REQ_BTRFS_CGROUP_PUNT)
blkcg_punt_bio_submit(bio);
else
@@ -503,14 +490,12 @@ static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
btrfs_submit_dev_bio(bioc->stripes[dev_nr].dev, bio);
}
-static void __btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
- struct btrfs_io_stripe *smap, int mirror_num)
+static void btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
+ struct btrfs_io_stripe *smap, int mirror_num)
{
if (!bioc) {
/* Single mirror read/write fast path. */
btrfs_bio(bio)->mirror_num = mirror_num;
- if (bio_op(bio) != REQ_OP_READ)
- btrfs_bio(bio)->orig_physical = smap->physical;
bio->bi_iter.bi_sector = smap->physical >> SECTOR_SHIFT;
if (bio_op(bio) != REQ_OP_READ)
btrfs_bio(bio)->orig_physical = smap->physical;
@@ -596,7 +581,7 @@ static void run_one_async_done(struct btrfs_work *work, bool do_free)
/* If an error occurred we just want to clean up the bio and move on. */
if (bio->bi_status) {
- btrfs_orig_bbio_end_io(async->bbio);
+ btrfs_bio_end_io(async->bbio, async->bbio->bio.bi_status);
return;
}
@@ -606,13 +591,25 @@ static void run_one_async_done(struct btrfs_work *work, bool do_free)
* context. This changes nothing when cgroups aren't in use.
*/
bio->bi_opf |= REQ_BTRFS_CGROUP_PUNT;
- __btrfs_submit_bio(bio, async->bioc, &async->smap, async->mirror_num);
+ btrfs_submit_bio(bio, async->bioc, &async->smap, async->mirror_num);
}
static bool should_async_write(struct btrfs_bio *bbio)
{
+ bool auto_csum_mode = true;
+
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ struct btrfs_fs_devices *fs_devices = bbio->fs_info->fs_devices;
+ enum btrfs_offload_csum_mode csum_mode = READ_ONCE(fs_devices->offload_csum_mode);
+
+ if (csum_mode == BTRFS_OFFLOAD_CSUM_FORCE_OFF)
+ return false;
+
+ auto_csum_mode = (csum_mode == BTRFS_OFFLOAD_CSUM_AUTO);
+#endif
+
/* Submit synchronously if the checksum implementation is fast. */
- if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
+ if (auto_csum_mode && test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
return false;
/*
@@ -655,11 +652,29 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
return true;
}
+static u64 btrfs_append_map_length(struct btrfs_bio *bbio, u64 map_length)
+{
+ unsigned int nr_segs;
+ int sector_offset;
+
+ map_length = min(map_length, bbio->fs_info->max_zone_append_size);
+ sector_offset = bio_split_rw_at(&bbio->bio, &bbio->fs_info->limits,
+ &nr_segs, map_length);
+ if (sector_offset) {
+ /*
+ * bio_split_rw_at() could split at a size smaller than our
+ * sectorsize and thus cause unaligned I/Os. Fix that by
+ * always rounding down to the nearest boundary.
+ */
+ return ALIGN_DOWN(sector_offset << SECTOR_SHIFT, bbio->fs_info->sectorsize);
+ }
+ return map_length;
+}
+
static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
{
struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = bbio->fs_info;
- struct btrfs_bio *orig_bbio = bbio;
struct bio *bio = &bbio->bio;
u64 logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
u64 length = bio->bi_iter.bi_size;
@@ -670,22 +685,34 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
blk_status_t ret;
int error;
- smap.is_scrub = !bbio->inode;
+ if (!bbio->inode || btrfs_is_data_reloc_root(inode->root))
+ smap.rst_search_commit_root = true;
+ else
+ smap.rst_search_commit_root = false;
btrfs_bio_counter_inc_blocked(fs_info);
error = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
&bioc, &smap, &mirror_num);
if (error) {
ret = errno_to_blk_status(error);
- goto fail;
+ btrfs_bio_counter_dec(fs_info);
+ goto end_bbio;
}
map_length = min(map_length, length);
if (use_append)
- map_length = min(map_length, fs_info->max_zone_append_size);
+ map_length = btrfs_append_map_length(bbio, map_length);
if (map_length < length) {
- bbio = btrfs_split_bio(fs_info, bbio, map_length, use_append);
+ struct btrfs_bio *split;
+
+ split = btrfs_split_bio(fs_info, bbio, map_length);
+ if (IS_ERR(split)) {
+ ret = errno_to_blk_status(PTR_ERR(split));
+ btrfs_bio_counter_dec(fs_info);
+ goto end_bbio;
+ }
+ bbio = split;
bio = &bbio->bio;
}
@@ -697,7 +724,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
bbio->saved_iter = bio->bi_iter;
ret = btrfs_lookup_bio_sums(bbio);
if (ret)
- goto fail_put_bio;
+ goto fail;
}
if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
@@ -706,8 +733,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
bio->bi_opf |= REQ_OP_ZONE_APPEND;
}
- if (is_data_bbio(bbio) && bioc &&
- btrfs_need_stripe_tree_update(bioc->fs_info, bioc->map_type)) {
+ if (is_data_bbio(bbio) && bioc && bioc->use_rst) {
/*
* No locking for the list update, as we only add to
* the list in the I/O submission path, and list
@@ -723,7 +749,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
* point, so they are handled as part of the no-checksum case.
*/
if (inode && !(inode->flags & BTRFS_INODE_NODATASUM) &&
- !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state) &&
+ !test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state) &&
!btrfs_is_data_reloc_root(inode->root)) {
if (should_async_write(bbio) &&
btrfs_wq_submit_bio(bbio, bioc, &smap, mirror_num))
@@ -731,29 +757,41 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
ret = btrfs_bio_csum(bbio);
if (ret)
- goto fail_put_bio;
- } else if (use_append) {
+ goto fail;
+ } else if (use_append ||
+ (btrfs_is_zoned(fs_info) && inode &&
+ inode->flags & BTRFS_INODE_NODATASUM)) {
ret = btrfs_alloc_dummy_sum(bbio);
if (ret)
- goto fail_put_bio;
+ goto fail;
}
}
- __btrfs_submit_bio(bio, bioc, &smap, mirror_num);
+ btrfs_submit_bio(bio, bioc, &smap, mirror_num);
done:
return map_length == length;
-fail_put_bio:
- if (map_length < length)
- btrfs_cleanup_bio(bbio);
fail:
btrfs_bio_counter_dec(fs_info);
- btrfs_bio_end_io(orig_bbio, ret);
+ /*
+ * We have split the original bbio, now we have to end both the current
+ * @bbio and remaining one, as the remaining one will never be submitted.
+ */
+ if (map_length < length) {
+ struct btrfs_bio *remaining = bbio->private;
+
+ ASSERT(bbio->bio.bi_pool == &btrfs_clone_bioset);
+ ASSERT(remaining);
+
+ btrfs_bio_end_io(remaining, ret);
+ }
+end_bbio:
+ btrfs_bio_end_io(bbio, ret);
/* Do not submit another chunk */
return true;
}
-void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num)
+void btrfs_submit_bbio(struct btrfs_bio *bbio, int mirror_num)
{
/* If bbio->inode is not populated, its file_offset must be 0. */
ASSERT(bbio->inode || bbio->file_offset == 0);
@@ -765,7 +803,7 @@ void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num)
/*
* Submit a repair write.
*
- * This bypasses btrfs_submit_bio deliberately, as that writes all copies in a
+ * This bypasses btrfs_submit_bbio() deliberately, as that writes all copies in a
* RAID setup. Here we only want to write the one bad copy, so we do the
* mapping ourselves and submit the bio directly.
*
@@ -854,7 +892,7 @@ void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_
ASSERT(smap.dev == fs_info->dev_replace.srcdev);
smap.dev = fs_info->dev_replace.tgtdev;
}
- __btrfs_submit_bio(&bbio->bio, NULL, &smap, mirror_num);
+ btrfs_submit_bio(&bbio->bio, NULL, &smap, mirror_num);
return;
fail: