summaryrefslogtreecommitdiff
path: root/fs/btrfs/scrub.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r--fs/btrfs/scrub.c241
1 files changed, 149 insertions, 92 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index e3f6c49e5c4d..1a2066ac6fe7 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -231,7 +231,7 @@ struct scrub_warning {
struct btrfs_path *path;
u64 extent_item_size;
const char *errstr;
- sector_t sector;
+ u64 physical;
u64 logical;
struct btrfs_device *dev;
};
@@ -301,6 +301,11 @@ static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
static void scrub_put_ctx(struct scrub_ctx *sctx);
+static inline int scrub_is_page_on_raid56(struct scrub_page *page)
+{
+ return page->recover &&
+ (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
+}
static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
{
@@ -366,7 +371,7 @@ static struct full_stripe_lock *insert_full_stripe_lock(
struct full_stripe_lock *entry;
struct full_stripe_lock *ret;
- WARN_ON(!mutex_is_locked(&locks_root->lock));
+ lockdep_assert_held(&locks_root->lock);
p = &locks_root->root.rb_node;
while (*p) {
@@ -408,7 +413,7 @@ static struct full_stripe_lock *search_full_stripe_lock(
struct rb_node *node;
struct full_stripe_lock *entry;
- WARN_ON(!mutex_is_locked(&locks_root->lock));
+ lockdep_assert_held(&locks_root->lock);
node = locks_root->root.rb_node;
while (node) {
@@ -797,10 +802,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
*/
for (i = 0; i < ipath->fspath->elem_cnt; ++i)
btrfs_warn_in_rcu(fs_info,
- "%s at logical %llu on dev %s, sector %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)",
+"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)",
swarn->errstr, swarn->logical,
rcu_str_deref(swarn->dev->name),
- (unsigned long long)swarn->sector,
+ swarn->physical,
root, inum, offset,
min(isize - offset, (u64)PAGE_SIZE), nlink,
(char *)(unsigned long)ipath->fspath->val[i]);
@@ -810,10 +815,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
err:
btrfs_warn_in_rcu(fs_info,
- "%s at logical %llu on dev %s, sector %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
+ "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
swarn->errstr, swarn->logical,
rcu_str_deref(swarn->dev->name),
- (unsigned long long)swarn->sector,
+ swarn->physical,
root, inum, offset, ret);
free_ipath(ipath);
@@ -845,7 +850,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
if (!path)
return;
- swarn.sector = (sblock->pagev[0]->physical) >> 9;
+ swarn.physical = sblock->pagev[0]->physical;
swarn.logical = sblock->pagev[0]->logical;
swarn.errstr = errstr;
swarn.dev = NULL;
@@ -868,10 +873,10 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
item_size, &ref_root,
&ref_level);
btrfs_warn_in_rcu(fs_info,
- "%s at logical %llu on dev %s, sector %llu: metadata %s (level %d) in tree %llu",
+"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
errstr, swarn.logical,
rcu_str_deref(dev->name),
- (unsigned long long)swarn.sector,
+ swarn.physical,
ref_level ? "node" : "leaf",
ret < 0 ? -1 : ref_level,
ret < 0 ? -1 : ref_root);
@@ -883,7 +888,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
swarn.dev = dev;
iterate_extent_inodes(fs_info, found_key.objectid,
extent_item_pos, 1,
- scrub_print_warning_inode, &swarn);
+ scrub_print_warning_inode, &swarn, false);
}
out:
@@ -1047,7 +1052,7 @@ static void scrub_fixup_nodatasum(struct btrfs_work *work)
* can be found.
*/
ret = iterate_inodes_from_logical(fixup->logical, fs_info, path,
- scrub_fixup_readpage, fixup);
+ scrub_fixup_readpage, fixup, false);
if (ret < 0) {
uncorrectable = 1;
goto out;
@@ -1106,7 +1111,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
struct scrub_ctx *sctx = sblock_to_check->sctx;
struct btrfs_device *dev;
struct btrfs_fs_info *fs_info;
- u64 length;
u64 logical;
unsigned int failed_mirror_index;
unsigned int is_metadata;
@@ -1134,7 +1138,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
spin_unlock(&sctx->stat_lock);
return 0;
}
- length = sblock_to_check->page_count * PAGE_SIZE;
logical = sblock_to_check->pagev[0]->logical;
BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
@@ -1323,15 +1326,34 @@ nodatasum_case:
* could happen otherwise that a correct page would be
* overwritten by a bad one).
*/
- for (mirror_index = 0;
- mirror_index < BTRFS_MAX_MIRRORS &&
- sblocks_for_recheck[mirror_index].page_count > 0;
- mirror_index++) {
+ for (mirror_index = 0; ;mirror_index++) {
struct scrub_block *sblock_other;
if (mirror_index == failed_mirror_index)
continue;
- sblock_other = sblocks_for_recheck + mirror_index;
+
+ /* raid56's mirror can be more than BTRFS_MAX_MIRRORS */
+ if (!scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
+ if (mirror_index >= BTRFS_MAX_MIRRORS)
+ break;
+ if (!sblocks_for_recheck[mirror_index].page_count)
+ break;
+
+ sblock_other = sblocks_for_recheck + mirror_index;
+ } else {
+ struct scrub_recover *r = sblock_bad->pagev[0]->recover;
+ int max_allowed = r->bbio->num_stripes -
+ r->bbio->num_tgtdevs;
+
+ if (mirror_index >= max_allowed)
+ break;
+ if (!sblocks_for_recheck[1].page_count)
+ break;
+
+ ASSERT(failed_mirror_index == 0);
+ sblock_other = sblocks_for_recheck + 1;
+ sblock_other->pagev[0]->mirror_num = 1 + mirror_index;
+ }
/* build and submit the bios, check checksums */
scrub_recheck_block(fs_info, sblock_other, 0);
@@ -1388,8 +1410,17 @@ nodatasum_case:
if (!page_bad->io_error && !sctx->is_dev_replace)
continue;
- /* try to find no-io-error page in mirrors */
- if (page_bad->io_error) {
+ if (scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
+ /*
+ * In case of dev replace, if raid56 rebuild process
+ * didn't work out correct data, then copy the content
+ * in sblock_bad to make sure target device is identical
+ * to source device, instead of writing garbage data in
+ * sblock_for_recheck array to target device.
+ */
+ sblock_other = NULL;
+ } else if (page_bad->io_error) {
+ /* try to find no-io-error page in mirrors */
for (mirror_index = 0;
mirror_index < BTRFS_MAX_MIRRORS &&
sblocks_for_recheck[mirror_index].page_count > 0;
@@ -1666,49 +1697,71 @@ leave_nomem:
return 0;
}
-struct scrub_bio_ret {
- struct completion event;
- blk_status_t status;
-};
-
static void scrub_bio_wait_endio(struct bio *bio)
{
- struct scrub_bio_ret *ret = bio->bi_private;
-
- ret->status = bio->bi_status;
- complete(&ret->event);
-}
-
-static inline int scrub_is_page_on_raid56(struct scrub_page *page)
-{
- return page->recover &&
- (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
+ complete(bio->bi_private);
}
static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
struct bio *bio,
struct scrub_page *page)
{
- struct scrub_bio_ret done;
+ DECLARE_COMPLETION_ONSTACK(done);
int ret;
+ int mirror_num;
- init_completion(&done.event);
- done.status = 0;
bio->bi_iter.bi_sector = page->logical >> 9;
bio->bi_private = &done;
bio->bi_end_io = scrub_bio_wait_endio;
+ mirror_num = page->sblock->pagev[0]->mirror_num;
ret = raid56_parity_recover(fs_info, bio, page->recover->bbio,
page->recover->map_length,
- page->mirror_num, 0);
+ mirror_num, 0);
if (ret)
return ret;
- wait_for_completion_io(&done.event);
- if (done.status)
- return -EIO;
+ wait_for_completion_io(&done);
+ return blk_status_to_errno(bio->bi_status);
+}
- return 0;
+static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
+ struct scrub_block *sblock)
+{
+ struct scrub_page *first_page = sblock->pagev[0];
+ struct bio *bio;
+ int page_num;
+
+ /* All pages in sblock belong to the same stripe on the same device. */
+ ASSERT(first_page->dev);
+ if (!first_page->dev->bdev)
+ goto out;
+
+ bio = btrfs_io_bio_alloc(BIO_MAX_PAGES);
+ bio_set_dev(bio, first_page->dev->bdev);
+
+ for (page_num = 0; page_num < sblock->page_count; page_num++) {
+ struct scrub_page *page = sblock->pagev[page_num];
+
+ WARN_ON(!page->page);
+ bio_add_page(bio, page->page, PAGE_SIZE, 0);
+ }
+
+ if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) {
+ bio_put(bio);
+ goto out;
+ }
+
+ bio_put(bio);
+
+ scrub_recheck_block_checksum(sblock);
+
+ return;
+out:
+ for (page_num = 0; page_num < sblock->page_count; page_num++)
+ sblock->pagev[page_num]->io_error = 1;
+
+ sblock->no_io_error_seen = 0;
}
/*
@@ -1726,6 +1779,10 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
sblock->no_io_error_seen = 1;
+ /* short cut for raid56 */
+ if (!retry_failed_mirror && scrub_is_page_on_raid56(sblock->pagev[0]))
+ return scrub_recheck_block_on_raid56(fs_info, sblock);
+
for (page_num = 0; page_num < sblock->page_count; page_num++) {
struct bio *bio;
struct scrub_page *page = sblock->pagev[page_num];
@@ -1741,19 +1798,12 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
bio_set_dev(bio, page->dev->bdev);
bio_add_page(bio, page->page, PAGE_SIZE, 0);
- if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) {
- if (scrub_submit_raid56_bio_wait(fs_info, bio, page)) {
- page->io_error = 1;
- sblock->no_io_error_seen = 0;
- }
- } else {
- bio->bi_iter.bi_sector = page->physical >> 9;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio->bi_iter.bi_sector = page->physical >> 9;
+ bio->bi_opf = REQ_OP_READ;
- if (btrfsic_submit_bio_wait(bio)) {
- page->io_error = 1;
- sblock->no_io_error_seen = 0;
- }
+ if (btrfsic_submit_bio_wait(bio)) {
+ page->io_error = 1;
+ sblock->no_io_error_seen = 0;
}
bio_put(bio);
@@ -2535,7 +2585,7 @@ leave_nomem:
}
WARN_ON(sblock->page_count == 0);
- if (dev->missing) {
+ if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
/*
* This case should only be hit for RAID 5/6 device replace. See
* the comment in scrub_missing_raid56_pages() for details.
@@ -2721,7 +2771,8 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
}
/* scrub extent tries to collect up to 64 kB for each bio */
-static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
+static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
+ u64 logical, u64 len,
u64 physical, struct btrfs_device *dev, u64 flags,
u64 gen, int mirror_num, u64 physical_for_dev_replace)
{
@@ -2730,13 +2781,19 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
u32 blocksize;
if (flags & BTRFS_EXTENT_FLAG_DATA) {
- blocksize = sctx->fs_info->sectorsize;
+ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
+ blocksize = map->stripe_len;
+ else
+ blocksize = sctx->fs_info->sectorsize;
spin_lock(&sctx->stat_lock);
sctx->stat.data_extents_scrubbed++;
sctx->stat.data_bytes_scrubbed += len;
spin_unlock(&sctx->stat_lock);
} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- blocksize = sctx->fs_info->nodesize;
+ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
+ blocksize = map->stripe_len;
+ else
+ blocksize = sctx->fs_info->nodesize;
spin_lock(&sctx->stat_lock);
sctx->stat.tree_extents_scrubbed++;
sctx->stat.tree_bytes_scrubbed += len;
@@ -2870,15 +2927,15 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity,
u8 csum[BTRFS_CSUM_SIZE];
u32 blocksize;
- if (dev->missing) {
+ if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
scrub_parity_mark_sectors_error(sparity, logical, len);
return 0;
}
if (flags & BTRFS_EXTENT_FLAG_DATA) {
- blocksize = sctx->fs_info->sectorsize;
+ blocksize = sparity->stripe_len;
} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- blocksize = sctx->fs_info->nodesize;
+ blocksize = sparity->stripe_len;
} else {
blocksize = sctx->fs_info->sectorsize;
WARN_ON(1);
@@ -3588,7 +3645,7 @@ again:
if (ret)
goto out;
- ret = scrub_extent(sctx, extent_logical, extent_len,
+ ret = scrub_extent(sctx, map, extent_logical, extent_len,
extent_physical, extent_dev, flags,
generation, extent_mirror_num,
extent_logical - logical + physical);
@@ -3878,11 +3935,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
break;
}
- btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
+ btrfs_dev_replace_write_lock(&fs_info->dev_replace);
dev_replace->cursor_right = found_key.offset + length;
dev_replace->cursor_left = found_key.offset;
dev_replace->item_needs_writeback = 1;
- btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
+ btrfs_dev_replace_write_unlock(&fs_info->dev_replace);
ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
found_key.offset, cache, is_dev_replace);
@@ -3918,10 +3975,10 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
scrub_pause_off(fs_info);
- btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
+ btrfs_dev_replace_write_lock(&fs_info->dev_replace);
dev_replace->cursor_left = dev_replace->cursor_right;
dev_replace->item_needs_writeback = 1;
- btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
+ btrfs_dev_replace_write_unlock(&fs_info->dev_replace);
if (ro_set)
btrfs_dec_block_group_ro(cache);
@@ -4112,12 +4169,14 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
mutex_lock(&fs_info->fs_devices->device_list_mutex);
dev = btrfs_find_device(fs_info, devid, NULL, NULL);
- if (!dev || (dev->missing && !is_dev_replace)) {
+ if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
+ !is_dev_replace)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
return -ENODEV;
}
- if (!is_dev_replace && !readonly && !dev->writeable) {
+ if (!is_dev_replace && !readonly &&
+ !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
rcu_read_lock();
name = rcu_dereference(dev->name);
@@ -4128,22 +4187,23 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
}
mutex_lock(&fs_info->scrub_lock);
- if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) {
+ if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
+ test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
return -EIO;
}
- btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
- if (dev->scrub_device ||
+ btrfs_dev_replace_read_lock(&fs_info->dev_replace);
+ if (dev->scrub_ctx ||
(!is_dev_replace &&
btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
- btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
+ btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
return -EINPROGRESS;
}
- btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
+ btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
ret = scrub_workers_get(fs_info, is_dev_replace);
if (ret) {
@@ -4160,7 +4220,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
return PTR_ERR(sctx);
}
sctx->readonly = readonly;
- dev->scrub_device = sctx;
+ dev->scrub_ctx = sctx;
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
/*
@@ -4195,7 +4255,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
memcpy(progress, &sctx->stat, sizeof(*progress));
mutex_lock(&fs_info->scrub_lock);
- dev->scrub_device = NULL;
+ dev->scrub_ctx = NULL;
scrub_workers_put(fs_info);
mutex_unlock(&fs_info->scrub_lock);
@@ -4252,16 +4312,16 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
struct scrub_ctx *sctx;
mutex_lock(&fs_info->scrub_lock);
- sctx = dev->scrub_device;
+ sctx = dev->scrub_ctx;
if (!sctx) {
mutex_unlock(&fs_info->scrub_lock);
return -ENOTCONN;
}
atomic_inc(&sctx->cancel_req);
- while (dev->scrub_device) {
+ while (dev->scrub_ctx) {
mutex_unlock(&fs_info->scrub_lock);
wait_event(fs_info->scrub_pause_wait,
- dev->scrub_device == NULL);
+ dev->scrub_ctx == NULL);
mutex_lock(&fs_info->scrub_lock);
}
mutex_unlock(&fs_info->scrub_lock);
@@ -4278,7 +4338,7 @@ int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
mutex_lock(&fs_info->fs_devices->device_list_mutex);
dev = btrfs_find_device(fs_info, devid, NULL, NULL);
if (dev)
- sctx = dev->scrub_device;
+ sctx = dev->scrub_ctx;
if (sctx)
memcpy(progress, &sctx->stat, sizeof(*progress));
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
@@ -4390,7 +4450,7 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
}
ret = iterate_inodes_from_logical(logical, fs_info, path,
- record_inode_for_nocow, nocow_ctx);
+ record_inode_for_nocow, nocow_ctx, false);
if (ret != 0 && ret != -ENOENT) {
btrfs_warn(fs_info,
"iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d",
@@ -4470,7 +4530,8 @@ static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len,
* move on to the next inode.
*/
if (em->block_start > logical ||
- em->block_start + em->block_len < logical + len) {
+ em->block_start + em->block_len < logical + len ||
+ test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
free_extent_map(em);
ret = 1;
goto out_unlock;
@@ -4478,8 +4539,7 @@ static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len,
free_extent_map(em);
out_unlock:
- unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
- GFP_NOFS);
+ unlock_extent_cached(io_tree, lockstart, lockend, &cached_state);
return ret;
}
@@ -4611,7 +4671,6 @@ static int write_page_nocow(struct scrub_ctx *sctx,
{
struct bio *bio;
struct btrfs_device *dev;
- int ret;
dev = sctx->wr_tgtdev;
if (!dev)
@@ -4626,17 +4685,15 @@ static int write_page_nocow(struct scrub_ctx *sctx,
bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
bio_set_dev(bio, dev->bdev);
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
- ret = bio_add_page(bio, page, PAGE_SIZE, 0);
- if (ret != PAGE_SIZE) {
-leave_with_eio:
+ /* bio_add_page won't fail on a freshly allocated bio */
+ bio_add_page(bio, page, PAGE_SIZE, 0);
+
+ if (btrfsic_submit_bio_wait(bio)) {
bio_put(bio);
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
return -EIO;
}
- if (btrfsic_submit_bio_wait(bio))
- goto leave_with_eio;
-
bio_put(bio);
return 0;
}