summaryrefslogtreecommitdiff
path: root/fs/btrfs/scrub.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r--fs/btrfs/scrub.c874
1 files changed, 535 insertions, 339 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 204c928beaf9..a40ee41f42c6 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -66,8 +66,6 @@ struct scrub_ctx;
/* Represent one sector and its needed info to verify the content. */
struct scrub_sector_verification {
- bool is_metadata;
-
union {
/*
* Csum pointer for data csum verification. Should point to a
@@ -100,7 +98,39 @@ enum scrub_stripe_flags {
SCRUB_STRIPE_FLAG_NO_REPORT,
};
-#define SCRUB_STRIPE_PAGES (BTRFS_STRIPE_LEN / PAGE_SIZE)
+/*
+ * We have multiple bitmaps for one scrub_stripe.
+ * However each bitmap has at most (BTRFS_STRIPE_LEN / blocksize) bits,
+ * which is normally 16, and much smaller than BITS_PER_LONG (32 or 64).
+ *
+ * So to reduce memory usage for each scrub_stripe, we pack those bitmaps
+ * into a larger one.
+ *
+ * These enum records where the sub-bitmap are inside the larger one.
+ * Each subbitmap starts at scrub_bitmap_nr_##name * nr_sectors bit.
+ */
+enum {
+ /* Which blocks are covered by extent items. */
+ scrub_bitmap_nr_has_extent = 0,
+
+ /* Which blocks are metadata. */
+ scrub_bitmap_nr_is_metadata,
+
+ /*
+ * Which blocks have errors, including IO, csum, and metadata
+ * errors.
+ * This sub-bitmap is the OR results of the next few error related
+ * sub-bitmaps.
+ */
+ scrub_bitmap_nr_error,
+ scrub_bitmap_nr_io_error,
+ scrub_bitmap_nr_csum_error,
+ scrub_bitmap_nr_meta_error,
+ scrub_bitmap_nr_meta_gen_error,
+ scrub_bitmap_nr_last,
+};
+
+#define SCRUB_STRIPE_MAX_FOLIOS (BTRFS_STRIPE_LEN / PAGE_SIZE)
/*
* Represent one contiguous range with a length of BTRFS_STRIPE_LEN.
@@ -109,7 +139,7 @@ struct scrub_stripe {
struct scrub_ctx *sctx;
struct btrfs_block_group *bg;
- struct page *pages[SCRUB_STRIPE_PAGES];
+ struct folio *folios[SCRUB_STRIPE_MAX_FOLIOS];
struct scrub_sector_verification *sectors;
struct btrfs_device *dev;
@@ -138,36 +168,15 @@ struct scrub_stripe {
*/
unsigned long state;
- /* Indicate which sectors are covered by extent items. */
- unsigned long extent_sector_bitmap;
+ /* The large bitmap contains all the sub-bitmaps. */
+ unsigned long bitmaps[BITS_TO_LONGS(scrub_bitmap_nr_last *
+ (BTRFS_STRIPE_LEN / BTRFS_MIN_BLOCKSIZE))];
/*
- * The errors hit during the initial read of the stripe.
- *
- * Would be utilized for error reporting and repair.
- *
- * The remaining init_nr_* records the number of errors hit, only used
- * by error reporting.
+ * For writeback (repair or replace) error reporting.
+ * This one is protected by a spinlock, thus can not be packed into
+ * the larger bitmap.
*/
- unsigned long init_error_bitmap;
- unsigned int init_nr_io_errors;
- unsigned int init_nr_csum_errors;
- unsigned int init_nr_meta_errors;
-
- /*
- * The following error bitmaps are all for the current status.
- * Every time we submit a new read, these bitmaps may be updated.
- *
- * error_bitmap = io_error_bitmap | csum_error_bitmap | meta_error_bitmap;
- *
- * IO and csum errors can happen for both metadata and data.
- */
- unsigned long error_bitmap;
- unsigned long io_error_bitmap;
- unsigned long csum_error_bitmap;
- unsigned long meta_error_bitmap;
-
- /* For writeback (repair or replace) error reporting. */
unsigned long write_error_bitmap;
/* Writeback can be concurrent, thus we need to protect the bitmap. */
@@ -197,7 +206,7 @@ struct scrub_ctx {
ktime_t throttle_deadline;
u64 throttle_sent;
- int is_dev_replace;
+ bool is_dev_replace;
u64 write_pointer;
struct mutex wr_lock;
@@ -219,6 +228,90 @@ struct scrub_ctx {
refcount_t refs;
};
+#define scrub_calc_start_bit(stripe, name, block_nr) \
+({ \
+ unsigned int __start_bit; \
+ \
+ ASSERT(block_nr < stripe->nr_sectors, \
+ "nr_sectors=%u block_nr=%u", stripe->nr_sectors, block_nr); \
+ __start_bit = scrub_bitmap_nr_##name * stripe->nr_sectors + block_nr; \
+ __start_bit; \
+})
+
+#define IMPLEMENT_SCRUB_BITMAP_OPS(name) \
+static inline void scrub_bitmap_set_##name(struct scrub_stripe *stripe, \
+ unsigned int block_nr, \
+ unsigned int nr_blocks) \
+{ \
+ const unsigned int start_bit = scrub_calc_start_bit(stripe, \
+ name, block_nr); \
+ \
+ bitmap_set(stripe->bitmaps, start_bit, nr_blocks); \
+} \
+static inline void scrub_bitmap_clear_##name(struct scrub_stripe *stripe, \
+ unsigned int block_nr, \
+ unsigned int nr_blocks) \
+{ \
+ const unsigned int start_bit = scrub_calc_start_bit(stripe, name, \
+ block_nr); \
+ \
+ bitmap_clear(stripe->bitmaps, start_bit, nr_blocks); \
+} \
+static inline bool scrub_bitmap_test_bit_##name(struct scrub_stripe *stripe, \
+ unsigned int block_nr) \
+{ \
+ const unsigned int start_bit = scrub_calc_start_bit(stripe, name, \
+ block_nr); \
+ \
+ return test_bit(start_bit, stripe->bitmaps); \
+} \
+static inline void scrub_bitmap_set_bit_##name(struct scrub_stripe *stripe, \
+ unsigned int block_nr) \
+{ \
+ const unsigned int start_bit = scrub_calc_start_bit(stripe, name, \
+ block_nr); \
+ \
+ set_bit(start_bit, stripe->bitmaps); \
+} \
+static inline void scrub_bitmap_clear_bit_##name(struct scrub_stripe *stripe, \
+ unsigned int block_nr) \
+{ \
+ const unsigned int start_bit = scrub_calc_start_bit(stripe, name, \
+ block_nr); \
+ \
+ clear_bit(start_bit, stripe->bitmaps); \
+} \
+static inline unsigned long scrub_bitmap_read_##name(struct scrub_stripe *stripe) \
+{ \
+ const unsigned int nr_blocks = stripe->nr_sectors; \
+ \
+ ASSERT(nr_blocks > 0 && nr_blocks <= BITS_PER_LONG, \
+ "nr_blocks=%u BITS_PER_LONG=%u", \
+ nr_blocks, BITS_PER_LONG); \
+ \
+ return bitmap_read(stripe->bitmaps, nr_blocks * scrub_bitmap_nr_##name, \
+ stripe->nr_sectors); \
+} \
+static inline bool scrub_bitmap_empty_##name(struct scrub_stripe *stripe) \
+{ \
+ unsigned long bitmap = scrub_bitmap_read_##name(stripe); \
+ \
+ return bitmap_empty(&bitmap, stripe->nr_sectors); \
+} \
+static inline unsigned int scrub_bitmap_weight_##name(struct scrub_stripe *stripe) \
+{ \
+ unsigned long bitmap = scrub_bitmap_read_##name(stripe); \
+ \
+ return bitmap_weight(&bitmap, stripe->nr_sectors); \
+}
+IMPLEMENT_SCRUB_BITMAP_OPS(has_extent);
+IMPLEMENT_SCRUB_BITMAP_OPS(is_metadata);
+IMPLEMENT_SCRUB_BITMAP_OPS(error);
+IMPLEMENT_SCRUB_BITMAP_OPS(io_error);
+IMPLEMENT_SCRUB_BITMAP_OPS(csum_error);
+IMPLEMENT_SCRUB_BITMAP_OPS(meta_error);
+IMPLEMENT_SCRUB_BITMAP_OPS(meta_gen_error);
+
struct scrub_warning {
struct btrfs_path *path;
u64 extent_item_size;
@@ -228,15 +321,28 @@ struct scrub_warning {
struct btrfs_device *dev;
};
+struct scrub_error_records {
+ /*
+ * Bitmap recording which blocks hit errors (IO/csum/...) during the
+ * initial read.
+ */
+ unsigned long init_error_bitmap;
+
+ unsigned int nr_io_errors;
+ unsigned int nr_csum_errors;
+ unsigned int nr_meta_errors;
+ unsigned int nr_meta_gen_errors;
+};
+
static void release_scrub_stripe(struct scrub_stripe *stripe)
{
if (!stripe)
return;
- for (int i = 0; i < SCRUB_STRIPE_PAGES; i++) {
- if (stripe->pages[i])
- __free_page(stripe->pages[i]);
- stripe->pages[i] = NULL;
+ for (int i = 0; i < SCRUB_STRIPE_MAX_FOLIOS; i++) {
+ if (stripe->folios[i])
+ folio_put(stripe->folios[i]);
+ stripe->folios[i] = NULL;
}
kfree(stripe->sectors);
kfree(stripe->csums);
@@ -249,6 +355,7 @@ static void release_scrub_stripe(struct scrub_stripe *stripe)
static int init_scrub_stripe(struct btrfs_fs_info *fs_info,
struct scrub_stripe *stripe)
{
+ const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
int ret;
memset(stripe, 0, sizeof(*stripe));
@@ -261,7 +368,9 @@ static int init_scrub_stripe(struct btrfs_fs_info *fs_info,
atomic_set(&stripe->pending_io, 0);
spin_lock_init(&stripe->write_error_lock);
- ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages, false);
+ ASSERT(BTRFS_STRIPE_LEN >> min_folio_shift <= SCRUB_STRIPE_MAX_FOLIOS);
+ ret = btrfs_alloc_folio_array(BTRFS_STRIPE_LEN >> min_folio_shift,
+ fs_info->block_min_order, stripe->folios);
if (ret < 0)
goto error;
@@ -340,7 +449,7 @@ static void scrub_put_ctx(struct scrub_ctx *sctx)
}
static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
- struct btrfs_fs_info *fs_info, int is_dev_replace)
+ struct btrfs_fs_info *fs_info, bool is_dev_replace)
{
struct scrub_ctx *sctx;
int i;
@@ -354,10 +463,10 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
refcount_set(&sctx->refs, 1);
sctx->is_dev_replace = is_dev_replace;
sctx->fs_info = fs_info;
- sctx->extent_path.search_commit_root = 1;
- sctx->extent_path.skip_locking = 1;
- sctx->csum_path.search_commit_root = 1;
- sctx->csum_path.skip_locking = 1;
+ sctx->extent_path.search_commit_root = true;
+ sctx->extent_path.skip_locking = true;
+ sctx->csum_path.search_commit_root = true;
+ sctx->csum_path.skip_locking = true;
for (i = 0; i < SCRUB_TOTAL_STRIPES; i++) {
int ret;
@@ -396,7 +505,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
struct btrfs_inode_item *inode_item;
struct scrub_warning *swarn = warn_ctx;
struct btrfs_fs_info *fs_info = swarn->dev->fs_info;
- struct inode_fs_paths *ipath = NULL;
+ struct inode_fs_paths *ipath __free(inode_fs_paths) = NULL;
struct btrfs_root *local_root;
struct btrfs_key key;
@@ -450,8 +559,8 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
* hold all of the paths here
*/
for (i = 0; i < ipath->fspath->elem_cnt; ++i)
- btrfs_warn_in_rcu(fs_info,
-"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %u, links %u (path: %s)",
+ btrfs_warn(fs_info,
+"scrub: %s at logical %llu on dev %s, physical %llu root %llu inode %llu offset %llu length %u links %u (path: %s)",
swarn->errstr, swarn->logical,
btrfs_dev_name(swarn->dev),
swarn->physical,
@@ -460,18 +569,16 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
(char *)(unsigned long)ipath->fspath->val[i]);
btrfs_put_root(local_root);
- free_ipath(ipath);
return 0;
err:
- btrfs_warn_in_rcu(fs_info,
- "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
+ btrfs_warn(fs_info,
+ "scrub: %s at logical %llu on dev %s, physical %llu root %llu inode %llu offset %llu: path resolving failed with ret=%d",
swarn->errstr, swarn->logical,
btrfs_dev_name(swarn->dev),
swarn->physical,
root, inum, offset, ret);
- free_ipath(ipath);
return 0;
}
@@ -479,7 +586,7 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
bool is_super, u64 logical, u64 physical)
{
struct btrfs_fs_info *fs_info = dev->fs_info;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key found_key;
struct extent_buffer *eb;
struct btrfs_extent_item *ei;
@@ -490,7 +597,7 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
/* Super block error, no need to search extent tree. */
if (is_super) {
- btrfs_warn_in_rcu(fs_info, "%s on device %s, physical %llu",
+ btrfs_warn(fs_info, "scrub: %s on device %s, physical %llu",
errstr, btrfs_dev_name(dev), physical);
return;
}
@@ -506,7 +613,7 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
&flags);
if (ret < 0)
- goto out;
+ return;
swarn.extent_item_size = found_key.offset;
@@ -525,14 +632,14 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
&ref_level);
if (ret < 0) {
btrfs_warn(fs_info,
- "failed to resolve tree backref for logical %llu: %d",
- swarn.logical, ret);
+ "scrub: failed to resolve tree backref for logical %llu: %d",
+ swarn.logical, ret);
break;
}
if (ret > 0)
break;
- btrfs_warn_in_rcu(fs_info,
-"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
+ btrfs_warn(fs_info,
+"scrub: %s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
errstr, swarn.logical, btrfs_dev_name(dev),
swarn.physical, (ref_level ? "node" : "leaf"),
ref_level, ref_root);
@@ -552,9 +659,6 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
iterate_extent_inodes(&ctx, true, scrub_print_warning_inode, &swarn);
}
-
-out:
- btrfs_free_path(path);
}
static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical)
@@ -579,20 +683,32 @@ static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical)
return ret;
}
-static struct page *scrub_stripe_get_page(struct scrub_stripe *stripe, int sector_nr)
+static void *scrub_stripe_get_kaddr(struct scrub_stripe *stripe, int sector_nr)
{
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
- int page_index = (sector_nr << fs_info->sectorsize_bits) >> PAGE_SHIFT;
+ const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
+ u32 offset = (sector_nr << fs_info->sectorsize_bits);
+ const struct folio *folio = stripe->folios[offset >> min_folio_shift];
- return stripe->pages[page_index];
+ /* stripe->folios[] is allocated by us and no highmem is allowed. */
+ ASSERT(folio);
+ ASSERT(!folio_test_highmem(folio));
+ return folio_address(folio) + offset_in_folio(folio, offset);
}
-static unsigned int scrub_stripe_get_page_offset(struct scrub_stripe *stripe,
- int sector_nr)
+static phys_addr_t scrub_stripe_get_paddr(struct scrub_stripe *stripe, int sector_nr)
{
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
+ const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
+ u32 offset = (sector_nr << fs_info->sectorsize_bits);
+ const struct folio *folio = stripe->folios[offset >> min_folio_shift];
- return offset_in_page(sector_nr << fs_info->sectorsize_bits);
+ /* stripe->folios[] is allocated by us and no highmem is allowed. */
+ ASSERT(folio);
+ ASSERT(!folio_test_highmem(folio));
+ /* And the range must be contained inside the folio. */
+ ASSERT(offset_in_folio(folio, offset) + fs_info->sectorsize <= folio_size(folio));
+ return page_to_phys(folio_page(folio, 0)) + offset_in_folio(folio, offset);
}
static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr)
@@ -600,46 +716,44 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
const u32 sectors_per_tree = fs_info->nodesize >> fs_info->sectorsize_bits;
const u64 logical = stripe->logical + (sector_nr << fs_info->sectorsize_bits);
- const struct page *first_page = scrub_stripe_get_page(stripe, sector_nr);
- const unsigned int first_off = scrub_stripe_get_page_offset(stripe, sector_nr);
+ void *first_kaddr = scrub_stripe_get_kaddr(stripe, sector_nr);
+ struct btrfs_header *header = first_kaddr;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
u8 on_disk_csum[BTRFS_CSUM_SIZE];
u8 calculated_csum[BTRFS_CSUM_SIZE];
- struct btrfs_header *header;
/*
* Here we don't have a good way to attach the pages (and subpages)
* to a dummy extent buffer, thus we have to directly grab the members
* from pages.
*/
- header = (struct btrfs_header *)(page_address(first_page) + first_off);
memcpy(on_disk_csum, header->csum, fs_info->csum_size);
if (logical != btrfs_stack_header_bytenr(header)) {
- bitmap_set(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree);
- bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
+ scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
+ scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
- "tree block %llu mirror %u has bad bytenr, has %llu want %llu",
+ "scrub: tree block %llu mirror %u has bad bytenr, has %llu want %llu",
logical, stripe->mirror_num,
btrfs_stack_header_bytenr(header), logical);
return;
}
if (memcmp(header->fsid, fs_info->fs_devices->metadata_uuid,
BTRFS_FSID_SIZE) != 0) {
- bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
- bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
+ scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
+ scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
- "tree block %llu mirror %u has bad fsid, has %pU want %pU",
+ "scrub: tree block %llu mirror %u has bad fsid, has %pU want %pU",
logical, stripe->mirror_num,
header->fsid, fs_info->fs_devices->fsid);
return;
}
if (memcmp(header->chunk_tree_uuid, fs_info->chunk_tree_uuid,
BTRFS_UUID_SIZE) != 0) {
- bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
- bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
+ scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
+ scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
- "tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU",
+ "scrub: tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU",
logical, stripe->mirror_num,
header->chunk_tree_uuid, fs_info->chunk_tree_uuid);
return;
@@ -648,42 +762,40 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
/* Now check tree block csum. */
shash->tfm = fs_info->csum_shash;
crypto_shash_init(shash);
- crypto_shash_update(shash, page_address(first_page) + first_off +
- BTRFS_CSUM_SIZE, fs_info->sectorsize - BTRFS_CSUM_SIZE);
+ crypto_shash_update(shash, first_kaddr + BTRFS_CSUM_SIZE,
+ fs_info->sectorsize - BTRFS_CSUM_SIZE);
for (int i = sector_nr + 1; i < sector_nr + sectors_per_tree; i++) {
- struct page *page = scrub_stripe_get_page(stripe, i);
- unsigned int page_off = scrub_stripe_get_page_offset(stripe, i);
-
- crypto_shash_update(shash, page_address(page) + page_off,
+ crypto_shash_update(shash, scrub_stripe_get_kaddr(stripe, i),
fs_info->sectorsize);
}
crypto_shash_final(shash, calculated_csum);
if (memcmp(calculated_csum, on_disk_csum, fs_info->csum_size) != 0) {
- bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
- bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
+ scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
+ scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
- "tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT,
+"scrub: tree block %llu mirror %u has bad csum, has " BTRFS_CSUM_FMT " want " BTRFS_CSUM_FMT,
logical, stripe->mirror_num,
- CSUM_FMT_VALUE(fs_info->csum_size, on_disk_csum),
- CSUM_FMT_VALUE(fs_info->csum_size, calculated_csum));
+ BTRFS_CSUM_FMT_VALUE(fs_info->csum_size, on_disk_csum),
+ BTRFS_CSUM_FMT_VALUE(fs_info->csum_size, calculated_csum));
return;
}
if (stripe->sectors[sector_nr].generation !=
btrfs_stack_header_generation(header)) {
- bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
- bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
+ scrub_bitmap_set_meta_gen_error(stripe, sector_nr, sectors_per_tree);
+ scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
- "tree block %llu mirror %u has bad generation, has %llu want %llu",
+ "scrub: tree block %llu mirror %u has bad generation, has %llu want %llu",
logical, stripe->mirror_num,
btrfs_stack_header_generation(header),
stripe->sectors[sector_nr].generation);
return;
}
- bitmap_clear(&stripe->error_bitmap, sector_nr, sectors_per_tree);
- bitmap_clear(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree);
- bitmap_clear(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
+ scrub_bitmap_clear_error(stripe, sector_nr, sectors_per_tree);
+ scrub_bitmap_clear_csum_error(stripe, sector_nr, sectors_per_tree);
+ scrub_bitmap_clear_meta_error(stripe, sector_nr, sectors_per_tree);
+ scrub_bitmap_clear_meta_gen_error(stripe, sector_nr, sectors_per_tree);
}
static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
@@ -691,23 +803,22 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
struct scrub_sector_verification *sector = &stripe->sectors[sector_nr];
const u32 sectors_per_tree = fs_info->nodesize >> fs_info->sectorsize_bits;
- struct page *page = scrub_stripe_get_page(stripe, sector_nr);
- unsigned int pgoff = scrub_stripe_get_page_offset(stripe, sector_nr);
+ phys_addr_t paddr = scrub_stripe_get_paddr(stripe, sector_nr);
u8 csum_buf[BTRFS_CSUM_SIZE];
int ret;
ASSERT(sector_nr >= 0 && sector_nr < stripe->nr_sectors);
/* Sector not utilized, skip it. */
- if (!test_bit(sector_nr, &stripe->extent_sector_bitmap))
+ if (!scrub_bitmap_test_bit_has_extent(stripe, sector_nr))
return;
/* IO error, no need to check. */
- if (test_bit(sector_nr, &stripe->io_error_bitmap))
+ if (scrub_bitmap_test_bit_io_error(stripe, sector_nr))
return;
/* Metadata, verify the full tree block. */
- if (sector->is_metadata) {
+ if (scrub_bitmap_test_bit_is_metadata(stripe, sector_nr)) {
/*
* Check if the tree block crosses the stripe boundary. If
* crossed the boundary, we cannot verify it but only give a
@@ -718,7 +829,7 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
*/
if (unlikely(sector_nr + sectors_per_tree > stripe->nr_sectors)) {
btrfs_warn_rl(fs_info,
- "tree block at %llu crosses stripe boundary %llu",
+ "scrub: tree block at %llu crosses stripe boundary %llu",
stripe->logical +
(sector_nr << fs_info->sectorsize_bits),
stripe->logical);
@@ -733,17 +844,17 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
* cases without csum, we have no other choice but to trust it.
*/
if (!sector->csum) {
- clear_bit(sector_nr, &stripe->error_bitmap);
+ scrub_bitmap_clear_bit_error(stripe, sector_nr);
return;
}
- ret = btrfs_check_sector_csum(fs_info, page, pgoff, csum_buf, sector->csum);
+ ret = btrfs_check_block_csum(fs_info, paddr, csum_buf, sector->csum);
if (ret < 0) {
- set_bit(sector_nr, &stripe->csum_error_bitmap);
- set_bit(sector_nr, &stripe->error_bitmap);
+ scrub_bitmap_set_bit_csum_error(stripe, sector_nr);
+ scrub_bitmap_set_bit_error(stripe, sector_nr);
} else {
- clear_bit(sector_nr, &stripe->csum_error_bitmap);
- clear_bit(sector_nr, &stripe->error_bitmap);
+ scrub_bitmap_clear_bit_csum_error(stripe, sector_nr);
+ scrub_bitmap_clear_bit_error(stripe, sector_nr);
}
}
@@ -756,7 +867,7 @@ static void scrub_verify_one_stripe(struct scrub_stripe *stripe, unsigned long b
for_each_set_bit(sector_nr, &bitmap, stripe->nr_sectors) {
scrub_verify_one_sector(stripe, sector_nr);
- if (stripe->sectors[sector_nr].is_metadata)
+ if (scrub_bitmap_test_bit_is_metadata(stripe, sector_nr))
sector_nr += sectors_per_tree - 1;
}
}
@@ -766,8 +877,7 @@ static int calc_sector_number(struct scrub_stripe *stripe, struct bio_vec *first
int i;
for (i = 0; i < stripe->nr_sectors; i++) {
- if (scrub_stripe_get_page(stripe, i) == first_bvec->bv_page &&
- scrub_stripe_get_page_offset(stripe, i) == first_bvec->bv_offset)
+ if (scrub_stripe_get_kaddr(stripe, i) == bvec_virt(first_bvec))
break;
}
ASSERT(i < stripe->nr_sectors);
@@ -795,13 +905,13 @@ static void scrub_repair_read_endio(struct btrfs_bio *bbio)
bio_size += bvec->bv_len;
if (bbio->bio.bi_status) {
- bitmap_set(&stripe->io_error_bitmap, sector_nr,
- bio_size >> fs_info->sectorsize_bits);
- bitmap_set(&stripe->error_bitmap, sector_nr,
- bio_size >> fs_info->sectorsize_bits);
+ scrub_bitmap_set_io_error(stripe, sector_nr,
+ bio_size >> fs_info->sectorsize_bits);
+ scrub_bitmap_set_error(stripe, sector_nr,
+ bio_size >> fs_info->sectorsize_bits);
} else {
- bitmap_clear(&stripe->io_error_bitmap, sector_nr,
- bio_size >> fs_info->sectorsize_bits);
+ scrub_bitmap_clear_io_error(stripe, sector_nr,
+ bio_size >> fs_info->sectorsize_bits);
}
bio_put(&bbio->bio);
if (atomic_dec_and_test(&stripe->pending_io))
@@ -814,27 +924,55 @@ static int calc_next_mirror(int mirror, int num_copies)
return (mirror + 1 > num_copies) ? 1 : mirror + 1;
}
+static void scrub_bio_add_sector(struct btrfs_bio *bbio, struct scrub_stripe *stripe,
+ int sector_nr)
+{
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
+ void *kaddr = scrub_stripe_get_kaddr(stripe, sector_nr);
+ int ret;
+
+ ret = bio_add_page(&bbio->bio, virt_to_page(kaddr), fs_info->sectorsize,
+ offset_in_page(kaddr));
+ /*
+ * Caller should ensure the bbio has enough size.
+ * And we cannot use __bio_add_page(), which doesn't do any merge.
+ *
+ * Meanwhile for scrub_submit_initial_read() we fully rely on the merge
+ * to create the minimal amount of bio vectors, for fs block size < page
+ * size cases.
+ */
+ ASSERT(ret == fs_info->sectorsize);
+}
+
+static struct btrfs_bio *alloc_scrub_bbio(struct btrfs_fs_info *fs_info,
+ unsigned int nr_vecs, blk_opf_t opf,
+ u64 logical,
+ btrfs_bio_end_io_t end_io, void *private)
+{
+ struct btrfs_bio *bbio;
+
+ bbio = btrfs_bio_alloc(nr_vecs, opf, BTRFS_I(fs_info->btree_inode),
+ logical, end_io, private);
+ bbio->is_scrub = true;
+ bbio->bio.bi_iter.bi_sector = logical >> SECTOR_SHIFT;
+ return bbio;
+}
+
static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe,
int mirror, int blocksize, bool wait)
{
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
struct btrfs_bio *bbio = NULL;
- const unsigned long old_error_bitmap = stripe->error_bitmap;
+ const unsigned long old_error_bitmap = scrub_bitmap_read_error(stripe);
int i;
- ASSERT(stripe->mirror_num >= 1);
- ASSERT(atomic_read(&stripe->pending_io) == 0);
+ ASSERT(stripe->mirror_num >= 1, "stripe->mirror_num=%d", stripe->mirror_num);
+ ASSERT(atomic_read(&stripe->pending_io) == 0,
+ "atomic_read(&stripe->pending_io)=%d", atomic_read(&stripe->pending_io));
for_each_set_bit(i, &old_error_bitmap, stripe->nr_sectors) {
- struct page *page;
- int pgoff;
- int ret;
-
- page = scrub_stripe_get_page(stripe, i);
- pgoff = scrub_stripe_get_page_offset(stripe, i);
-
/* The current sector cannot be merged, submit the bio. */
- if (bbio && ((i > 0 && !test_bit(i - 1, &stripe->error_bitmap)) ||
+ if (bbio && ((i > 0 && !test_bit(i - 1, &old_error_bitmap)) ||
bbio->bio.bi_iter.bi_size >= blocksize)) {
ASSERT(bbio->bio.bi_iter.bi_size);
atomic_inc(&stripe->pending_io);
@@ -844,15 +982,12 @@ static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe,
bbio = NULL;
}
- if (!bbio) {
- bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ,
- fs_info, scrub_repair_read_endio, stripe);
- bbio->bio.bi_iter.bi_sector = (stripe->logical +
- (i << fs_info->sectorsize_bits)) >> SECTOR_SHIFT;
- }
+ if (!bbio)
+ bbio = alloc_scrub_bbio(fs_info, stripe->nr_sectors, REQ_OP_READ,
+ stripe->logical + (i << fs_info->sectorsize_bits),
+ scrub_repair_read_endio, stripe);
- ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
- ASSERT(ret == fs_info->sectorsize);
+ scrub_bio_add_sector(bbio, stripe, i);
}
if (bbio) {
ASSERT(bbio->bio.bi_iter.bi_size);
@@ -864,12 +999,15 @@ static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe,
}
static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
- struct scrub_stripe *stripe)
+ struct scrub_stripe *stripe,
+ const struct scrub_error_records *errors)
{
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
struct btrfs_fs_info *fs_info = sctx->fs_info;
struct btrfs_device *dev = NULL;
+ const unsigned long extent_bitmap = scrub_bitmap_read_has_extent(stripe);
+ const unsigned long error_bitmap = scrub_bitmap_read_error(stripe);
u64 physical = 0;
int nr_data_sectors = 0;
int nr_meta_sectors = 0;
@@ -886,14 +1024,14 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
* Although our scrub_stripe infrastructure is mostly based on btrfs_submit_bio()
* thus no need for dev/physical, error reporting still needs dev and physical.
*/
- if (!bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors)) {
+ if (!bitmap_empty(&errors->init_error_bitmap, stripe->nr_sectors)) {
u64 mapped_len = fs_info->sectorsize;
struct btrfs_io_context *bioc = NULL;
int stripe_index = stripe->mirror_num - 1;
int ret;
/* For scrub, our mirror_num should always start at 1. */
- ASSERT(stripe->mirror_num >= 1);
+ ASSERT(stripe->mirror_num >= 1, "stripe->mirror_num=%d", stripe->mirror_num);
ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
stripe->logical, &mapped_len, &bioc,
NULL, NULL);
@@ -909,10 +1047,10 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
}
skip:
- for_each_set_bit(sector_nr, &stripe->extent_sector_bitmap, stripe->nr_sectors) {
+ for_each_set_bit(sector_nr, &extent_bitmap, stripe->nr_sectors) {
bool repaired = false;
- if (stripe->sectors[sector_nr].is_metadata) {
+ if (scrub_bitmap_test_bit_is_metadata(stripe, sector_nr)) {
nr_meta_sectors++;
} else {
nr_data_sectors++;
@@ -920,14 +1058,14 @@ skip:
nr_nodatacsum_sectors++;
}
- if (test_bit(sector_nr, &stripe->init_error_bitmap) &&
- !test_bit(sector_nr, &stripe->error_bitmap)) {
+ if (test_bit(sector_nr, &errors->init_error_bitmap) &&
+ !test_bit(sector_nr, &error_bitmap)) {
nr_repaired_sectors++;
repaired = true;
}
/* Good sector from the beginning, nothing need to be done. */
- if (!test_bit(sector_nr, &stripe->init_error_bitmap))
+ if (!test_bit(sector_nr, &errors->init_error_bitmap))
continue;
/*
@@ -936,13 +1074,13 @@ skip:
*/
if (repaired) {
if (dev) {
- btrfs_err_rl_in_rcu(fs_info,
- "fixed up error at logical %llu on dev %s physical %llu",
+ btrfs_err_rl(fs_info,
+ "scrub: fixed up error at logical %llu on dev %s physical %llu",
stripe->logical, btrfs_dev_name(dev),
physical);
} else {
- btrfs_err_rl_in_rcu(fs_info,
- "fixed up error at logical %llu on mirror %u",
+ btrfs_err_rl(fs_info,
+ "scrub: fixed up error at logical %llu on mirror %u",
stripe->logical, stripe->mirror_num);
}
continue;
@@ -950,41 +1088,56 @@ skip:
/* The remaining are all for unrepaired. */
if (dev) {
- btrfs_err_rl_in_rcu(fs_info,
- "unable to fixup (regular) error at logical %llu on dev %s physical %llu",
+ btrfs_err_rl(fs_info,
+"scrub: unable to fixup (regular) error at logical %llu on dev %s physical %llu",
stripe->logical, btrfs_dev_name(dev),
physical);
} else {
- btrfs_err_rl_in_rcu(fs_info,
- "unable to fixup (regular) error at logical %llu on mirror %u",
+ btrfs_err_rl(fs_info,
+ "scrub: unable to fixup (regular) error at logical %llu on mirror %u",
stripe->logical, stripe->mirror_num);
}
- if (test_bit(sector_nr, &stripe->io_error_bitmap))
+ if (scrub_bitmap_test_bit_io_error(stripe, sector_nr))
if (__ratelimit(&rs) && dev)
scrub_print_common_warning("i/o error", dev, false,
stripe->logical, physical);
- if (test_bit(sector_nr, &stripe->csum_error_bitmap))
+ if (scrub_bitmap_test_bit_csum_error(stripe, sector_nr))
if (__ratelimit(&rs) && dev)
scrub_print_common_warning("checksum error", dev, false,
stripe->logical, physical);
- if (test_bit(sector_nr, &stripe->meta_error_bitmap))
+ if (scrub_bitmap_test_bit_meta_error(stripe, sector_nr))
if (__ratelimit(&rs) && dev)
scrub_print_common_warning("header error", dev, false,
stripe->logical, physical);
+ if (scrub_bitmap_test_bit_meta_gen_error(stripe, sector_nr))
+ if (__ratelimit(&rs) && dev)
+ scrub_print_common_warning("generation error", dev, false,
+ stripe->logical, physical);
}
+ /* Update the device stats. */
+ for (int i = 0; i < errors->nr_io_errors; i++)
+ btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_READ_ERRS);
+ for (int i = 0; i < errors->nr_csum_errors; i++)
+ btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_CORRUPTION_ERRS);
+ /* Generation mismatch error is based on each metadata, not each block. */
+ for (int i = 0; i < errors->nr_meta_gen_errors;
+ i += (fs_info->nodesize >> fs_info->sectorsize_bits))
+ btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_GENERATION_ERRS);
+
spin_lock(&sctx->stat_lock);
sctx->stat.data_extents_scrubbed += stripe->nr_data_extents;
sctx->stat.tree_extents_scrubbed += stripe->nr_meta_extents;
sctx->stat.data_bytes_scrubbed += nr_data_sectors << fs_info->sectorsize_bits;
sctx->stat.tree_bytes_scrubbed += nr_meta_sectors << fs_info->sectorsize_bits;
sctx->stat.no_csum += nr_nodatacsum_sectors;
- sctx->stat.read_errors += stripe->init_nr_io_errors;
- sctx->stat.csum_errors += stripe->init_nr_csum_errors;
- sctx->stat.verify_errors += stripe->init_nr_meta_errors;
+ sctx->stat.read_errors += errors->nr_io_errors;
+ sctx->stat.csum_errors += errors->nr_csum_errors;
+ sctx->stat.verify_errors += errors->nr_meta_errors +
+ errors->nr_meta_gen_errors;
sctx->stat.uncorrectable_errors +=
- bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors);
+ bitmap_weight(&error_bitmap, stripe->nr_sectors);
sctx->stat.corrected_errors += nr_repaired_sectors;
spin_unlock(&sctx->stat_lock);
}
@@ -1010,26 +1163,26 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
struct scrub_stripe *stripe = container_of(work, struct scrub_stripe, work);
struct scrub_ctx *sctx = stripe->sctx;
struct btrfs_fs_info *fs_info = sctx->fs_info;
+ struct scrub_error_records errors = { 0 };
int num_copies = btrfs_num_copies(fs_info, stripe->bg->start,
stripe->bg->length);
unsigned long repaired;
+ unsigned long error;
int mirror;
int i;
- ASSERT(stripe->mirror_num > 0);
+ ASSERT(stripe->mirror_num >= 1, "stripe->mirror_num=%d", stripe->mirror_num);
wait_scrub_stripe_io(stripe);
- scrub_verify_one_stripe(stripe, stripe->extent_sector_bitmap);
+ scrub_verify_one_stripe(stripe, scrub_bitmap_read_has_extent(stripe));
/* Save the initial failed bitmap for later repair and report usage. */
- stripe->init_error_bitmap = stripe->error_bitmap;
- stripe->init_nr_io_errors = bitmap_weight(&stripe->io_error_bitmap,
- stripe->nr_sectors);
- stripe->init_nr_csum_errors = bitmap_weight(&stripe->csum_error_bitmap,
- stripe->nr_sectors);
- stripe->init_nr_meta_errors = bitmap_weight(&stripe->meta_error_bitmap,
- stripe->nr_sectors);
-
- if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors))
+ errors.init_error_bitmap = scrub_bitmap_read_error(stripe);
+ errors.nr_io_errors = scrub_bitmap_weight_io_error(stripe);
+ errors.nr_csum_errors = scrub_bitmap_weight_csum_error(stripe);
+ errors.nr_meta_errors = scrub_bitmap_weight_meta_error(stripe);
+ errors.nr_meta_gen_errors = scrub_bitmap_weight_meta_gen_error(stripe);
+
+ if (bitmap_empty(&errors.init_error_bitmap, stripe->nr_sectors))
goto out;
/*
@@ -1041,13 +1194,13 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
for (mirror = calc_next_mirror(stripe->mirror_num, num_copies);
mirror != stripe->mirror_num;
mirror = calc_next_mirror(mirror, num_copies)) {
- const unsigned long old_error_bitmap = stripe->error_bitmap;
+ const unsigned long old_error_bitmap = scrub_bitmap_read_error(stripe);
scrub_stripe_submit_repair_read(stripe, mirror,
BTRFS_STRIPE_LEN, false);
wait_scrub_stripe_io(stripe);
scrub_verify_one_stripe(stripe, old_error_bitmap);
- if (bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors))
+ if (scrub_bitmap_empty_error(stripe))
goto out;
}
@@ -1065,21 +1218,22 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
for (i = 0, mirror = stripe->mirror_num;
i < num_copies;
i++, mirror = calc_next_mirror(mirror, num_copies)) {
- const unsigned long old_error_bitmap = stripe->error_bitmap;
+ const unsigned long old_error_bitmap = scrub_bitmap_read_error(stripe);
scrub_stripe_submit_repair_read(stripe, mirror,
fs_info->sectorsize, true);
wait_scrub_stripe_io(stripe);
scrub_verify_one_stripe(stripe, old_error_bitmap);
- if (bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors))
+ if (scrub_bitmap_empty_error(stripe))
goto out;
}
out:
+ error = scrub_bitmap_read_error(stripe);
/*
* Submit the repaired sectors. For zoned case, we cannot do repair
* in-place, but queue the bg to be relocated.
*/
- bitmap_andnot(&repaired, &stripe->init_error_bitmap, &stripe->error_bitmap,
+ bitmap_andnot(&repaired, &errors.init_error_bitmap, &error,
stripe->nr_sectors);
if (!sctx->readonly && !bitmap_empty(&repaired, stripe->nr_sectors)) {
if (btrfs_is_zoned(fs_info)) {
@@ -1090,7 +1244,7 @@ out:
}
}
- scrub_stripe_report_errors(sctx, stripe);
+ scrub_stripe_report_errors(sctx, stripe, &errors);
set_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state);
wake_up(&stripe->repair_wait);
}
@@ -1110,10 +1264,10 @@ static void scrub_read_endio(struct btrfs_bio *bbio)
num_sectors = bio_size >> stripe->bg->fs_info->sectorsize_bits;
if (bbio->bio.bi_status) {
- bitmap_set(&stripe->io_error_bitmap, sector_nr, num_sectors);
- bitmap_set(&stripe->error_bitmap, sector_nr, num_sectors);
+ scrub_bitmap_set_io_error(stripe, sector_nr, num_sectors);
+ scrub_bitmap_set_error(stripe, sector_nr, num_sectors);
} else {
- bitmap_clear(&stripe->io_error_bitmap, sector_nr, num_sectors);
+ scrub_bitmap_clear_io_error(stripe, sector_nr, num_sectors);
}
bio_put(&bbio->bio);
if (atomic_dec_and_test(&stripe->pending_io)) {
@@ -1142,6 +1296,9 @@ static void scrub_write_endio(struct btrfs_bio *bbio)
bitmap_set(&stripe->write_error_bitmap, sector_nr,
bio_size >> fs_info->sectorsize_bits);
spin_unlock_irqrestore(&stripe->write_error_lock, flags);
+ for (i = 0; i < (bio_size >> fs_info->sectorsize_bits); i++)
+ btrfs_dev_stat_inc_and_print(stripe->dev,
+ BTRFS_DEV_STAT_WRITE_ERRS);
}
bio_put(&bbio->bio);
@@ -1199,27 +1356,19 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str
int sector_nr;
for_each_set_bit(sector_nr, &write_bitmap, stripe->nr_sectors) {
- struct page *page = scrub_stripe_get_page(stripe, sector_nr);
- unsigned int pgoff = scrub_stripe_get_page_offset(stripe, sector_nr);
- int ret;
-
/* We should only writeback sectors covered by an extent. */
- ASSERT(test_bit(sector_nr, &stripe->extent_sector_bitmap));
+ ASSERT(scrub_bitmap_test_bit_has_extent(stripe, sector_nr));
/* Cannot merge with previous sector, submit the current one. */
if (bbio && sector_nr && !test_bit(sector_nr - 1, &write_bitmap)) {
scrub_submit_write_bio(sctx, stripe, bbio, dev_replace);
bbio = NULL;
}
- if (!bbio) {
- bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_WRITE,
- fs_info, scrub_write_endio, stripe);
- bbio->bio.bi_iter.bi_sector = (stripe->logical +
- (sector_nr << fs_info->sectorsize_bits)) >>
- SECTOR_SHIFT;
- }
- ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
- ASSERT(ret == fs_info->sectorsize);
+ if (!bbio)
+ bbio = alloc_scrub_bbio(fs_info, stripe->nr_sectors, REQ_OP_WRITE,
+ stripe->logical + (sector_nr << fs_info->sectorsize_bits),
+ scrub_write_endio, stripe);
+ scrub_bio_add_sector(bbio, stripe, sector_nr);
}
if (bbio)
scrub_submit_write_bio(sctx, stripe, bbio, dev_replace);
@@ -1246,8 +1395,7 @@ static void scrub_throttle_dev_io(struct scrub_ctx *sctx, struct btrfs_device *d
* Slice is divided into intervals when the IO is submitted, adjust by
* bwlimit and maximum of 64 intervals.
*/
- div = max_t(u32, 1, (u32)(bwlimit / (16 * 1024 * 1024)));
- div = min_t(u32, 64, div);
+ div = clamp(bwlimit / (16 * 1024 * 1024), 1, 64);
/* Start new epoch, set deadline */
now = ktime_get();
@@ -1339,7 +1487,7 @@ static int compare_extent_item_range(struct btrfs_path *path,
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
ASSERT(key.type == BTRFS_EXTENT_ITEM_KEY ||
- key.type == BTRFS_METADATA_ITEM_KEY);
+ key.type == BTRFS_METADATA_ITEM_KEY, "key.type=%u", key.type);
if (key.type == BTRFS_METADATA_ITEM_KEY)
len = fs_info->nodesize;
else
@@ -1380,17 +1528,17 @@ static int find_first_extent_item(struct btrfs_root *extent_root,
if (path->nodes[0])
goto search_forward;
+ key.objectid = search_start;
if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
key.type = BTRFS_METADATA_ITEM_KEY;
else
key.type = BTRFS_EXTENT_ITEM_KEY;
- key.objectid = search_start;
key.offset = (u64)-1;
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
return ret;
- if (ret == 0) {
+ if (unlikely(ret == 0)) {
/*
* Key with offset -1 found, there would have to exist an extent
* item with such offset, but this is out of the valid range.
@@ -1444,7 +1592,7 @@ static void get_extent_info(struct btrfs_path *path, u64 *extent_start_ret,
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
ASSERT(key.type == BTRFS_METADATA_ITEM_KEY ||
- key.type == BTRFS_EXTENT_ITEM_KEY);
+ key.type == BTRFS_EXTENT_ITEM_KEY, "key.type=%u", key.type);
*extent_start_ret = key.objectid;
if (key.type == BTRFS_METADATA_ITEM_KEY)
*size_ret = path->nodes[0]->fs_info->nodesize;
@@ -1470,8 +1618,7 @@ static int sync_write_pointer_for_zoned(struct scrub_ctx *sctx, u64 logical,
physical,
sctx->write_pointer);
if (ret)
- btrfs_err(fs_info,
- "zoned: failed to recover write pointer");
+ btrfs_err(fs_info, "scrub: zoned: failed to recover write pointer");
}
mutex_unlock(&sctx->wr_lock);
btrfs_dev_clear_zone_empty(sctx->wr_tgtdev, physical);
@@ -1493,9 +1640,9 @@ static void fill_one_extent_info(struct btrfs_fs_info *fs_info,
struct scrub_sector_verification *sector =
&stripe->sectors[nr_sector];
- set_bit(nr_sector, &stripe->extent_sector_bitmap);
+ scrub_bitmap_set_bit_has_extent(stripe, nr_sector);
if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- sector->is_metadata = true;
+ scrub_bitmap_set_bit_is_metadata(stripe, nr_sector);
sector->generation = extent_gen;
}
}
@@ -1503,15 +1650,8 @@ static void fill_one_extent_info(struct btrfs_fs_info *fs_info,
static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe)
{
- stripe->extent_sector_bitmap = 0;
- stripe->init_error_bitmap = 0;
- stripe->init_nr_io_errors = 0;
- stripe->init_nr_csum_errors = 0;
- stripe->init_nr_meta_errors = 0;
- stripe->error_bitmap = 0;
- stripe->io_error_bitmap = 0;
- stripe->csum_error_bitmap = 0;
- stripe->meta_error_bitmap = 0;
+ ASSERT(stripe->nr_sectors);
+ bitmap_zero(stripe->bitmaps, scrub_bitmap_nr_last * stripe->nr_sectors);
}
/*
@@ -1541,12 +1681,18 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
u64 extent_gen;
int ret;
+ if (unlikely(!extent_root || !csum_root)) {
+ btrfs_err(fs_info, "scrub: no valid extent or csum root found");
+ return -EUCLEAN;
+ }
memset(stripe->sectors, 0, sizeof(struct scrub_sector_verification) *
stripe->nr_sectors);
scrub_stripe_reset_bitmaps(stripe);
/* The range must be inside the bg. */
- ASSERT(logical_start >= bg->start && logical_end <= bg->start + bg->length);
+ ASSERT(logical_start >= bg->start && logical_end <= bg->start + bg->length,
+ "bg->start=%llu logical_start=%llu logical_end=%llu end=%llu",
+ bg->start, logical_start, logical_end, bg->start + bg->length);
ret = find_first_extent_item(extent_root, extent_path, logical_start,
logical_len);
@@ -1642,7 +1788,6 @@ static void scrub_reset_stripe(struct scrub_stripe *stripe)
stripe->state = 0;
for (int i = 0; i < stripe->nr_sectors; i++) {
- stripe->sectors[i].is_metadata = false;
stripe->sectors[i].csum = NULL;
stripe->sectors[i].generation = 0;
}
@@ -1661,24 +1806,21 @@ static void scrub_submit_extent_sector_read(struct scrub_stripe *stripe)
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
struct btrfs_bio *bbio = NULL;
unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits;
+ const unsigned long has_extent = scrub_bitmap_read_has_extent(stripe);
u64 stripe_len = BTRFS_STRIPE_LEN;
int mirror = stripe->mirror_num;
int i;
atomic_inc(&stripe->pending_io);
- for_each_set_bit(i, &stripe->extent_sector_bitmap, stripe->nr_sectors) {
- struct page *page = scrub_stripe_get_page(stripe, i);
- unsigned int pgoff = scrub_stripe_get_page_offset(stripe, i);
-
+ for_each_set_bit(i, &has_extent, stripe->nr_sectors) {
/* We're beyond the chunk boundary, no need to read anymore. */
if (i >= nr_sectors)
break;
/* The current sector cannot be merged, submit the bio. */
if (bbio &&
- ((i > 0 &&
- !test_bit(i - 1, &stripe->extent_sector_bitmap)) ||
+ ((i > 0 && !test_bit(i - 1, &has_extent)) ||
bbio->bio.bi_iter.bi_size >= stripe_len)) {
ASSERT(bbio->bio.bi_iter.bi_size);
atomic_inc(&stripe->pending_io);
@@ -1691,7 +1833,7 @@ static void scrub_submit_extent_sector_read(struct scrub_stripe *stripe)
struct btrfs_io_context *bioc = NULL;
const u64 logical = stripe->logical +
(i << fs_info->sectorsize_bits);
- int err;
+ int ret;
io_stripe.rst_search_commit_root = true;
stripe_len = (nr_sectors - i) << fs_info->sectorsize_bits;
@@ -1699,11 +1841,11 @@ static void scrub_submit_extent_sector_read(struct scrub_stripe *stripe)
* For RST cases, we need to manually split the bbio to
* follow the RST boundary.
*/
- err = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
&stripe_len, &bioc, &io_stripe, &mirror);
btrfs_put_bioc(bioc);
- if (err < 0) {
- if (err != -ENODATA) {
+ if (ret < 0) {
+ if (ret != -ENODATA) {
/*
* Earlier btrfs_get_raid_extent_offset()
* returned -ENODATA, which means there's
@@ -1712,18 +1854,17 @@ static void scrub_submit_extent_sector_read(struct scrub_stripe *stripe)
* the extent tree, then it's a preallocated
* extent and not an error.
*/
- set_bit(i, &stripe->io_error_bitmap);
- set_bit(i, &stripe->error_bitmap);
+ scrub_bitmap_set_bit_io_error(stripe, i);
+ scrub_bitmap_set_bit_error(stripe, i);
}
continue;
}
- bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ,
- fs_info, scrub_read_endio, stripe);
- bbio->bio.bi_iter.bi_sector = logical >> SECTOR_SHIFT;
+ bbio = alloc_scrub_bbio(fs_info, stripe->nr_sectors, REQ_OP_READ,
+ logical, scrub_read_endio, stripe);
}
- __bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
+ scrub_bio_add_sector(bbio, stripe, i);
}
if (bbio) {
@@ -1744,6 +1885,7 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
{
struct btrfs_fs_info *fs_info = sctx->fs_info;
struct btrfs_bio *bbio;
+ const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits;
int mirror = stripe->mirror_num;
@@ -1756,20 +1898,11 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
return;
}
- bbio = btrfs_bio_alloc(SCRUB_STRIPE_PAGES, REQ_OP_READ, fs_info,
- scrub_read_endio, stripe);
-
- bbio->bio.bi_iter.bi_sector = stripe->logical >> SECTOR_SHIFT;
+ bbio = alloc_scrub_bbio(fs_info, BTRFS_STRIPE_LEN >> min_folio_shift, REQ_OP_READ,
+ stripe->logical, scrub_read_endio, stripe);
/* Read the whole range inside the chunk boundary. */
- for (unsigned int cur = 0; cur < nr_sectors; cur++) {
- struct page *page = scrub_stripe_get_page(stripe, cur);
- unsigned int pgoff = scrub_stripe_get_page_offset(stripe, cur);
- int ret;
-
- ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
- /* We should have allocated enough bio vectors. */
- ASSERT(ret == fs_info->sectorsize);
- }
+ for (unsigned int cur = 0; cur < nr_sectors; cur++)
+ scrub_bio_add_sector(bbio, stripe, cur);
atomic_inc(&stripe->pending_io);
/*
@@ -1790,14 +1923,15 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
static bool stripe_has_metadata_error(struct scrub_stripe *stripe)
{
+ const unsigned long error = scrub_bitmap_read_error(stripe);
int i;
- for_each_set_bit(i, &stripe->error_bitmap, stripe->nr_sectors) {
- if (stripe->sectors[i].is_metadata) {
+ for_each_set_bit(i, &error, stripe->nr_sectors) {
+ if (scrub_bitmap_test_bit_is_metadata(stripe, i)) {
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
btrfs_err(fs_info,
- "stripe %llu has unrepaired metadata sector at %llu",
+ "scrub: stripe %llu has unrepaired metadata sector at logical %llu",
stripe->logical,
stripe->logical + (i << fs_info->sectorsize_bits));
return true;
@@ -1861,20 +1995,23 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx)
* metadata, we should immediately abort.
*/
for (int i = 0; i < nr_stripes; i++) {
- if (stripe_has_metadata_error(&sctx->stripes[i])) {
+ if (unlikely(stripe_has_metadata_error(&sctx->stripes[i]))) {
ret = -EIO;
goto out;
}
}
for (int i = 0; i < nr_stripes; i++) {
unsigned long good;
+ unsigned long has_extent;
+ unsigned long error;
stripe = &sctx->stripes[i];
ASSERT(stripe->dev == fs_info->dev_replace.srcdev);
- bitmap_andnot(&good, &stripe->extent_sector_bitmap,
- &stripe->error_bitmap, stripe->nr_sectors);
+ has_extent = scrub_bitmap_read_has_extent(stripe);
+ error = scrub_bitmap_read_error(stripe);
+ bitmap_andnot(&good, &has_extent, &error, stripe->nr_sectors);
scrub_write_sectors(sctx, stripe, good, true);
}
}
@@ -1940,37 +2077,135 @@ static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *
return 0;
}
+/*
+ * Return 0 if we should not cancel the scrub.
+ * Return <0 if we need to cancel the scrub, returned value will
+ * indicate the reason:
+ * - -ECANCELED - Being explicitly canceled through ioctl.
+ * - -EINTR - Being interrupted by signal or fs/process freezing.
+ */
+static int should_cancel_scrub(const struct scrub_ctx *sctx)
+{
+ struct btrfs_fs_info *fs_info = sctx->fs_info;
+
+ if (atomic_read(&fs_info->scrub_cancel_req) ||
+ atomic_read(&sctx->cancel_req))
+ return -ECANCELED;
+
+ /*
+ * The user (e.g. fsfreeze command) or power management (PM)
+ * suspend/hibernate can freeze the fs. And PM suspend/hibernate will
+ * also freeze all user processes.
+ *
+ * A user process can only be frozen when it is in user space, thus we
+ * have to cancel the run so that the process can return to the user
+ * space.
+ *
+ * Furthermore we have to check both filesystem and process freezing,
+ * as PM can be configured to freeze the filesystems before processes.
+ *
+ * If we only check fs freezing, then suspend without fs freezing
+ * will timeout, as the process is still in kernel space.
+ *
+ * If we only check process freezing, then suspend with fs freezing
+ * will timeout, as the running scrub will prevent the fs from being frozen.
+ */
+ if (fs_info->sb->s_writers.frozen > SB_UNFROZEN ||
+ freezing(current) || signal_pending(current))
+ return -EINTR;
+ return 0;
+}
+
+static int scrub_raid56_cached_parity(struct scrub_ctx *sctx,
+ struct btrfs_device *scrub_dev,
+ struct btrfs_chunk_map *map,
+ u64 full_stripe_start,
+ unsigned long *extent_bitmap)
+{
+ DECLARE_COMPLETION_ONSTACK(io_done);
+ struct btrfs_fs_info *fs_info = sctx->fs_info;
+ struct btrfs_io_context *bioc = NULL;
+ struct btrfs_raid_bio *rbio;
+ struct bio bio;
+ const int data_stripes = nr_data_stripes(map);
+ u64 length = btrfs_stripe_nr_to_offset(data_stripes);
+ int ret;
+
+ bio_init(&bio, NULL, NULL, 0, REQ_OP_READ);
+ bio.bi_iter.bi_sector = full_stripe_start >> SECTOR_SHIFT;
+ bio.bi_private = &io_done;
+ bio.bi_end_io = raid56_scrub_wait_endio;
+
+ btrfs_bio_counter_inc_blocked(fs_info);
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, full_stripe_start,
+ &length, &bioc, NULL, NULL);
+ if (ret < 0)
+ goto out;
+ /* For RAID56 write there must be an @bioc allocated. */
+ ASSERT(bioc);
+ rbio = raid56_parity_alloc_scrub_rbio(&bio, bioc, scrub_dev, extent_bitmap,
+ BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits);
+ btrfs_put_bioc(bioc);
+ if (!rbio) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ /* Use the recovered stripes as cache to avoid read them from disk again. */
+ for (int i = 0; i < data_stripes; i++) {
+ struct scrub_stripe *stripe = &sctx->raid56_data_stripes[i];
+
+ raid56_parity_cache_data_folios(rbio, stripe->folios,
+ full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT));
+ }
+ raid56_parity_submit_scrub_rbio(rbio);
+ wait_for_completion_io(&io_done);
+ ret = blk_status_to_errno(bio.bi_status);
+out:
+ btrfs_bio_counter_dec(fs_info);
+ bio_uninit(&bio);
+ return ret;
+}
+
static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
struct btrfs_device *scrub_dev,
struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
u64 full_stripe_start)
{
- DECLARE_COMPLETION_ONSTACK(io_done);
struct btrfs_fs_info *fs_info = sctx->fs_info;
- struct btrfs_raid_bio *rbio;
- struct btrfs_io_context *bioc = NULL;
struct btrfs_path extent_path = { 0 };
struct btrfs_path csum_path = { 0 };
- struct bio *bio;
struct scrub_stripe *stripe;
bool all_empty = true;
const int data_stripes = nr_data_stripes(map);
unsigned long extent_bitmap = 0;
- u64 length = btrfs_stripe_nr_to_offset(data_stripes);
int ret;
ASSERT(sctx->raid56_data_stripes);
+ ret = should_cancel_scrub(sctx);
+ if (ret < 0)
+ return ret;
+
+ if (atomic_read(&fs_info->scrub_pause_req))
+ scrub_blocked_if_needed(fs_info);
+
+ spin_lock(&bg->lock);
+ if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &bg->runtime_flags)) {
+ spin_unlock(&bg->lock);
+ return 0;
+ }
+ spin_unlock(&bg->lock);
+
/*
* For data stripe search, we cannot reuse the same extent/csum paths,
* as the data stripe bytenr may be smaller than previous extent. Thus
* we have to use our own extent/csum paths.
*/
- extent_path.search_commit_root = 1;
- extent_path.skip_locking = 1;
- csum_path.search_commit_root = 1;
- csum_path.skip_locking = 1;
+ extent_path.search_commit_root = true;
+ extent_path.skip_locking = true;
+ csum_path.search_commit_root = true;
+ csum_path.skip_locking = true;
for (int i = 0; i < data_stripes; i++) {
int stripe_index;
@@ -2008,7 +2243,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
/* Check if all data stripes are empty. */
for (int i = 0; i < data_stripes; i++) {
stripe = &sctx->raid56_data_stripes[i];
- if (!bitmap_empty(&stripe->extent_sector_bitmap, stripe->nr_sectors)) {
+ if (!scrub_bitmap_empty_has_extent(stripe)) {
all_empty = false;
break;
}
@@ -2040,65 +2275,36 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
*/
for (int i = 0; i < data_stripes; i++) {
unsigned long error;
+ unsigned long has_extent;
stripe = &sctx->raid56_data_stripes[i];
+ error = scrub_bitmap_read_error(stripe);
+ has_extent = scrub_bitmap_read_has_extent(stripe);
+
/*
* We should only check the errors where there is an extent.
* As we may hit an empty data stripe while it's missing.
*/
- bitmap_and(&error, &stripe->error_bitmap,
- &stripe->extent_sector_bitmap, stripe->nr_sectors);
- if (!bitmap_empty(&error, stripe->nr_sectors)) {
+ bitmap_and(&error, &error, &has_extent, stripe->nr_sectors);
+ if (unlikely(!bitmap_empty(&error, stripe->nr_sectors))) {
btrfs_err(fs_info,
-"unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl",
+"scrub: unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl",
full_stripe_start, i, stripe->nr_sectors,
&error);
ret = -EIO;
goto out;
}
- bitmap_or(&extent_bitmap, &extent_bitmap,
- &stripe->extent_sector_bitmap, stripe->nr_sectors);
+ bitmap_or(&extent_bitmap, &extent_bitmap, &has_extent,
+ stripe->nr_sectors);
}
/* Now we can check and regenerate the P/Q stripe. */
- bio = bio_alloc(NULL, 1, REQ_OP_READ, GFP_NOFS);
- bio->bi_iter.bi_sector = full_stripe_start >> SECTOR_SHIFT;
- bio->bi_private = &io_done;
- bio->bi_end_io = raid56_scrub_wait_endio;
-
- btrfs_bio_counter_inc_blocked(fs_info);
- ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, full_stripe_start,
- &length, &bioc, NULL, NULL);
- if (ret < 0) {
- btrfs_put_bioc(bioc);
- btrfs_bio_counter_dec(fs_info);
- goto out;
- }
- rbio = raid56_parity_alloc_scrub_rbio(bio, bioc, scrub_dev, &extent_bitmap,
- BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits);
- btrfs_put_bioc(bioc);
- if (!rbio) {
- ret = -ENOMEM;
- btrfs_bio_counter_dec(fs_info);
- goto out;
- }
- /* Use the recovered stripes as cache to avoid read them from disk again. */
- for (int i = 0; i < data_stripes; i++) {
- stripe = &sctx->raid56_data_stripes[i];
-
- raid56_parity_cache_data_pages(rbio, stripe->pages,
- full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT));
- }
- raid56_parity_submit_scrub_rbio(rbio);
- wait_for_completion_io(&io_done);
- ret = blk_status_to_errno(bio->bi_status);
- bio_put(bio);
- btrfs_bio_counter_dec(fs_info);
-
+ ret = scrub_raid56_cached_parity(sctx, scrub_dev, map, full_stripe_start,
+ &extent_bitmap);
+out:
btrfs_release_path(&extent_path);
btrfs_release_path(&csum_path);
-out:
return ret;
}
@@ -2129,18 +2335,13 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
u64 found_logical = U64_MAX;
u64 cur_physical = physical + cur_logical - logical_start;
- /* Canceled? */
- if (atomic_read(&fs_info->scrub_cancel_req) ||
- atomic_read(&sctx->cancel_req)) {
- ret = -ECANCELED;
+ ret = should_cancel_scrub(sctx);
+ if (ret < 0)
break;
- }
- /* Paused? */
- if (atomic_read(&fs_info->scrub_pause_req)) {
- /* Push queued extents */
+
+ if (atomic_read(&fs_info->scrub_pause_req))
scrub_blocked_if_needed(fs_info);
- }
- /* Block group removed? */
+
spin_lock(&bg->lock);
if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &bg->runtime_flags)) {
spin_unlock(&bg->lock);
@@ -2395,8 +2596,6 @@ out:
}
if (sctx->is_dev_replace && ret >= 0) {
- int ret2;
-
ret2 = sync_write_pointer_for_zoned(sctx,
chunk_logical + offset,
map->stripes[stripe_index].physical,
@@ -2471,7 +2670,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
struct btrfs_device *scrub_dev, u64 start, u64 end)
{
struct btrfs_dev_extent *dev_extent = NULL;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_fs_info *fs_info = sctx->fs_info;
struct btrfs_root *root = fs_info->dev_root;
u64 chunk_offset;
@@ -2489,12 +2688,12 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
return -ENOMEM;
path->reada = READA_FORWARD;
- path->search_commit_root = 1;
- path->skip_locking = 1;
+ path->search_commit_root = true;
+ path->skip_locking = true;
key.objectid = scrub_dev->devid;
- key.offset = 0ull;
key.type = BTRFS_DEV_EXTENT_KEY;
+ key.offset = 0ull;
while (1) {
u64 dev_extent_len;
@@ -2673,14 +2872,14 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
ro_set = 0;
} else if (ret == -ETXTBSY) {
btrfs_warn(fs_info,
- "skipping scrub of block group %llu due to active swapfile",
+ "scrub: skipping scrub of block group %llu due to active swapfile",
cache->start);
scrub_pause_off(fs_info);
ret = 0;
goto skip_unfreeze;
} else {
- btrfs_warn(fs_info,
- "failed setting block group ro: %d", ret);
+ btrfs_warn(fs_info, "scrub: failed setting block group ro: %d",
+ ret);
btrfs_unfreeze_block_group(cache);
btrfs_put_block_group(cache);
scrub_pause_off(fs_info);
@@ -2743,8 +2942,8 @@ skip_unfreeze:
btrfs_put_block_group(cache);
if (ret)
break;
- if (sctx->is_dev_replace &&
- atomic64_read(&dev_replace->num_write_errors) > 0) {
+ if (unlikely(sctx->is_dev_replace &&
+ atomic64_read(&dev_replace->num_write_errors) > 0)) {
ret = -EIO;
break;
}
@@ -2757,8 +2956,6 @@ skip:
btrfs_release_path(path);
}
- btrfs_free_path(path);
-
return ret;
}
@@ -2766,29 +2963,23 @@ static int scrub_one_super(struct scrub_ctx *sctx, struct btrfs_device *dev,
struct page *page, u64 physical, u64 generation)
{
struct btrfs_fs_info *fs_info = sctx->fs_info;
- struct bio_vec bvec;
- struct bio bio;
struct btrfs_super_block *sb = page_address(page);
int ret;
- bio_init(&bio, dev->bdev, &bvec, 1, REQ_OP_READ);
- bio.bi_iter.bi_sector = physical >> SECTOR_SHIFT;
- __bio_add_page(&bio, page, BTRFS_SUPER_INFO_SIZE, 0);
- ret = submit_bio_wait(&bio);
- bio_uninit(&bio);
-
+ ret = bdev_rw_virt(dev->bdev, physical >> SECTOR_SHIFT, sb,
+ BTRFS_SUPER_INFO_SIZE, REQ_OP_READ);
if (ret < 0)
return ret;
ret = btrfs_check_super_csum(fs_info, sb);
- if (ret != 0) {
+ if (unlikely(ret != 0)) {
btrfs_err_rl(fs_info,
- "super block at physical %llu devid %llu has bad csum",
+ "scrub: super block at physical %llu devid %llu has bad csum",
physical, dev->devid);
return -EIO;
}
- if (btrfs_super_generation(sb) != generation) {
+ if (unlikely(btrfs_super_generation(sb) != generation)) {
btrfs_err_rl(fs_info,
-"super block at physical %llu devid %llu has bad generation %llu expect %llu",
+"scrub: super block at physical %llu devid %llu has bad generation %llu expect %llu",
physical, dev->devid,
btrfs_super_generation(sb), generation);
return -EUCLEAN;
@@ -2904,7 +3095,7 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info)
int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
u64 end, struct btrfs_scrub_progress *progress,
- int readonly, int is_dev_replace)
+ bool readonly, bool is_dev_replace)
{
struct btrfs_dev_lookup_args args = { .devid = devid };
struct scrub_ctx *sctx;
@@ -2913,6 +3104,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
unsigned int nofs_flag;
bool need_commit = false;
+ /* Set the basic fallback @last_physical before we got a sctx. */
+ if (progress)
+ progress->last_physical = start;
+
if (btrfs_fs_closing(fs_info))
return -EAGAIN;
@@ -2931,6 +3126,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
sctx = scrub_setup_ctx(fs_info, is_dev_replace);
if (IS_ERR(sctx))
return PTR_ERR(sctx);
+ sctx->stat.last_physical = start;
ret = scrub_workers_get(fs_info);
if (ret)
@@ -2948,16 +3144,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
if (!is_dev_replace && !readonly &&
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- btrfs_err_in_rcu(fs_info,
- "scrub on devid %llu: filesystem on %s is not writable",
+ btrfs_err(fs_info,
+ "scrub: devid %llu: filesystem on %s is not writable",
devid, btrfs_dev_name(dev));
ret = -EROFS;
goto out;
}
mutex_lock(&fs_info->scrub_lock);
- if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
- test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
+ if (unlikely(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
+ test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state))) {
mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
ret = -EIO;