summaryrefslogtreecommitdiff
path: root/fs/btrfs/raid56.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/raid56.c')
-rw-r--r--fs/btrfs/raid56.c1614
1 files changed, 899 insertions, 715 deletions
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 6a2cf754912d..f38d8305e46d 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -14,7 +14,6 @@
#include <linux/raid/xor.h>
#include <linux/mm.h>
#include "messages.h"
-#include "misc.h"
#include "ctree.h"
#include "disk-io.h"
#include "volumes.h"
@@ -41,6 +40,85 @@
#define BTRFS_STRIPE_HASH_TABLE_BITS 11
+static void dump_bioc(const struct btrfs_fs_info *fs_info, const struct btrfs_io_context *bioc)
+{
+ if (unlikely(!bioc)) {
+ btrfs_crit(fs_info, "bioc=NULL");
+ return;
+ }
+ btrfs_crit(fs_info,
+"bioc logical=%llu full_stripe=%llu size=%llu map_type=0x%llx mirror=%u replace_nr_stripes=%u replace_stripe_src=%d num_stripes=%u",
+ bioc->logical, bioc->full_stripe_logical, bioc->size,
+ bioc->map_type, bioc->mirror_num, bioc->replace_nr_stripes,
+ bioc->replace_stripe_src, bioc->num_stripes);
+ for (int i = 0; i < bioc->num_stripes; i++) {
+ btrfs_crit(fs_info, " nr=%d devid=%llu physical=%llu",
+ i, bioc->stripes[i].dev->devid,
+ bioc->stripes[i].physical);
+ }
+}
+
+static void btrfs_dump_rbio(const struct btrfs_fs_info *fs_info,
+ const struct btrfs_raid_bio *rbio)
+{
+ if (!IS_ENABLED(CONFIG_BTRFS_ASSERT))
+ return;
+
+ dump_bioc(fs_info, rbio->bioc);
+ btrfs_crit(fs_info,
+"rbio flags=0x%lx nr_sectors=%u nr_data=%u real_stripes=%u stripe_nsectors=%u sector_nsteps=%u scrubp=%u dbitmap=0x%lx",
+ rbio->flags, rbio->nr_sectors, rbio->nr_data,
+ rbio->real_stripes, rbio->stripe_nsectors,
+ rbio->sector_nsteps, rbio->scrubp, rbio->dbitmap);
+}
+
+#define ASSERT_RBIO(expr, rbio) \
+({ \
+ if (IS_ENABLED(CONFIG_BTRFS_ASSERT) && unlikely(!(expr))) { \
+ const struct btrfs_fs_info *__fs_info = (rbio)->bioc ? \
+ (rbio)->bioc->fs_info : NULL; \
+ \
+ btrfs_dump_rbio(__fs_info, (rbio)); \
+ } \
+ ASSERT((expr)); \
+})
+
+#define ASSERT_RBIO_STRIPE(expr, rbio, stripe_nr) \
+({ \
+ if (IS_ENABLED(CONFIG_BTRFS_ASSERT) && unlikely(!(expr))) { \
+ const struct btrfs_fs_info *__fs_info = (rbio)->bioc ? \
+ (rbio)->bioc->fs_info : NULL; \
+ \
+ btrfs_dump_rbio(__fs_info, (rbio)); \
+ btrfs_crit(__fs_info, "stripe_nr=%d", (stripe_nr)); \
+ } \
+ ASSERT((expr)); \
+})
+
+#define ASSERT_RBIO_SECTOR(expr, rbio, sector_nr) \
+({ \
+ if (IS_ENABLED(CONFIG_BTRFS_ASSERT) && unlikely(!(expr))) { \
+ const struct btrfs_fs_info *__fs_info = (rbio)->bioc ? \
+ (rbio)->bioc->fs_info : NULL; \
+ \
+ btrfs_dump_rbio(__fs_info, (rbio)); \
+ btrfs_crit(__fs_info, "sector_nr=%d", (sector_nr)); \
+ } \
+ ASSERT((expr)); \
+})
+
+#define ASSERT_RBIO_LOGICAL(expr, rbio, logical) \
+({ \
+ if (IS_ENABLED(CONFIG_BTRFS_ASSERT) && unlikely(!(expr))) { \
+ const struct btrfs_fs_info *__fs_info = (rbio)->bioc ? \
+ (rbio)->bioc->fs_info : NULL; \
+ \
+ btrfs_dump_rbio(__fs_info, (rbio)); \
+ btrfs_crit(__fs_info, "logical=%llu", (logical)); \
+ } \
+ ASSERT((expr)); \
+})
+
/* Used by the raid56 code to lock stripes for read/modify/write */
struct btrfs_stripe_hash {
struct list_head hash_list;
@@ -56,30 +134,25 @@ struct btrfs_stripe_hash_table {
};
/*
- * A bvec like structure to present a sector inside a page.
- *
- * Unlike bvec we don't need bvlen, as it's fixed to sectorsize.
+ * The PFN may still be valid, but our paddrs should always be block size
+ * aligned, thus such -1 paddr is definitely not a valid one.
*/
-struct sector_ptr {
- struct page *page;
- unsigned int pgoff:24;
- unsigned int uptodate:8;
-};
+#define INVALID_PADDR (~(phys_addr_t)0)
static void rmw_rbio_work(struct work_struct *work);
static void rmw_rbio_work_locked(struct work_struct *work);
static void index_rbio_pages(struct btrfs_raid_bio *rbio);
static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
-static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check);
+static int finish_parity_scrub(struct btrfs_raid_bio *rbio);
static void scrub_rbio_work_locked(struct work_struct *work);
static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio)
{
bitmap_free(rbio->error_bitmap);
kfree(rbio->stripe_pages);
- kfree(rbio->bio_sectors);
- kfree(rbio->stripe_sectors);
+ kfree(rbio->bio_paddrs);
+ kfree(rbio->stripe_paddrs);
kfree(rbio->finish_pointers);
}
@@ -122,8 +195,7 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
struct btrfs_stripe_hash_table *x;
struct btrfs_stripe_hash *cur;
struct btrfs_stripe_hash *h;
- int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS;
- int i;
+ unsigned int num_entries = 1U << BTRFS_STRIPE_HASH_TABLE_BITS;
if (info->stripe_hash_table)
return 0;
@@ -144,7 +216,7 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
h = table->table;
- for (i = 0; i < num_entries; i++) {
+ for (unsigned int i = 0; i < num_entries; i++) {
cur = h + i;
INIT_LIST_HEAD(&cur->hash_list);
spin_lock_init(&cur->lock);
@@ -155,6 +227,24 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
return 0;
}
+static void memcpy_from_bio_to_stripe(struct btrfs_raid_bio *rbio, unsigned int sector_nr)
+{
+ const u32 step = min(rbio->bioc->fs_info->sectorsize, PAGE_SIZE);
+
+ ASSERT(sector_nr < rbio->nr_sectors);
+ for (int i = 0; i < rbio->sector_nsteps; i++) {
+ unsigned int index = sector_nr * rbio->sector_nsteps + i;
+ phys_addr_t dst = rbio->stripe_paddrs[index];
+ phys_addr_t src = rbio->bio_paddrs[index];
+
+ ASSERT(dst != INVALID_PADDR);
+ ASSERT(src != INVALID_PADDR);
+
+ memcpy_page(phys_to_page(dst), offset_in_page(dst),
+ phys_to_page(src), offset_in_page(src), step);
+ }
+}
+
/*
* caching an rbio means to copy anything from the
* bio_sectors array into the stripe_pages array. We
@@ -175,24 +265,19 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
for (i = 0; i < rbio->nr_sectors; i++) {
/* Some range not covered by bio (partial write), skip it */
- if (!rbio->bio_sectors[i].page) {
+ if (rbio->bio_paddrs[i * rbio->sector_nsteps] == INVALID_PADDR) {
/*
* Even if the sector is not covered by bio, if it is
* a data sector it should still be uptodate as it is
* read from disk.
*/
if (i < rbio->nr_data * rbio->stripe_nsectors)
- ASSERT(rbio->stripe_sectors[i].uptodate);
+ ASSERT(test_bit(i, rbio->stripe_uptodate_bitmap));
continue;
}
- ASSERT(rbio->stripe_sectors[i].page);
- memcpy_page(rbio->stripe_sectors[i].page,
- rbio->stripe_sectors[i].pgoff,
- rbio->bio_sectors[i].page,
- rbio->bio_sectors[i].pgoff,
- rbio->bioc->fs_info->sectorsize);
- rbio->stripe_sectors[i].uptodate = 1;
+ memcpy_from_bio_to_stripe(rbio, i);
+ set_bit(i, rbio->stripe_uptodate_bitmap);
}
set_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
}
@@ -202,7 +287,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
*/
static int rbio_bucket(struct btrfs_raid_bio *rbio)
{
- u64 num = rbio->bioc->raid_map[0];
+ u64 num = rbio->bioc->full_stripe_logical;
/*
* we shift down quite a bit. We're using byte
@@ -215,19 +300,48 @@ static int rbio_bucket(struct btrfs_raid_bio *rbio)
return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS);
}
-static bool full_page_sectors_uptodate(struct btrfs_raid_bio *rbio,
- unsigned int page_nr)
+/* Get the sector number of the first sector covered by @page_nr. */
+static u32 page_nr_to_sector_nr(struct btrfs_raid_bio *rbio, unsigned int page_nr)
{
- const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
- const u32 sectors_per_page = PAGE_SIZE / sectorsize;
+ u32 sector_nr;
+
+ ASSERT(page_nr < rbio->nr_pages);
+
+ sector_nr = (page_nr << PAGE_SHIFT) >> rbio->bioc->fs_info->sectorsize_bits;
+ ASSERT(sector_nr < rbio->nr_sectors);
+ return sector_nr;
+}
+
+/*
+ * Get the number of sectors covered by @page_nr.
+ *
+ * For bs > ps cases, the result will always be 1.
+ * For bs <= ps cases, the result will be ps / bs.
+ */
+static u32 page_nr_to_num_sectors(struct btrfs_raid_bio *rbio, unsigned int page_nr)
+{
+ struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
+ u32 nr_sectors;
+
+ ASSERT(page_nr < rbio->nr_pages);
+
+ nr_sectors = round_up(PAGE_SIZE, fs_info->sectorsize) >> fs_info->sectorsize_bits;
+ ASSERT(nr_sectors > 0);
+ return nr_sectors;
+}
+
+static __maybe_unused bool full_page_sectors_uptodate(struct btrfs_raid_bio *rbio,
+ unsigned int page_nr)
+{
+ const u32 sector_nr = page_nr_to_sector_nr(rbio, page_nr);
+ const u32 nr_bits = page_nr_to_num_sectors(rbio, page_nr);
int i;
ASSERT(page_nr < rbio->nr_pages);
+ ASSERT(sector_nr + nr_bits < rbio->nr_sectors);
- for (i = sectors_per_page * page_nr;
- i < sectors_per_page * page_nr + sectors_per_page;
- i++) {
- if (!rbio->stripe_sectors[i].uptodate)
+ for (i = sector_nr; i < sector_nr + nr_bits; i++) {
+ if (!test_bit(i, rbio->stripe_uptodate_bitmap))
return false;
}
return true;
@@ -240,41 +354,44 @@ static bool full_page_sectors_uptodate(struct btrfs_raid_bio *rbio,
*/
static void index_stripe_sectors(struct btrfs_raid_bio *rbio)
{
- const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
+ const u32 step = min(rbio->bioc->fs_info->sectorsize, PAGE_SIZE);
u32 offset;
int i;
- for (i = 0, offset = 0; i < rbio->nr_sectors; i++, offset += sectorsize) {
+ for (i = 0, offset = 0; i < rbio->nr_sectors * rbio->sector_nsteps;
+ i++, offset += step) {
int page_index = offset >> PAGE_SHIFT;
ASSERT(page_index < rbio->nr_pages);
- rbio->stripe_sectors[i].page = rbio->stripe_pages[page_index];
- rbio->stripe_sectors[i].pgoff = offset_in_page(offset);
+ if (!rbio->stripe_pages[page_index])
+ continue;
+
+ rbio->stripe_paddrs[i] = page_to_phys(rbio->stripe_pages[page_index]) +
+ offset_in_page(offset);
}
}
static void steal_rbio_page(struct btrfs_raid_bio *src,
struct btrfs_raid_bio *dest, int page_nr)
{
- const u32 sectorsize = src->bioc->fs_info->sectorsize;
- const u32 sectors_per_page = PAGE_SIZE / sectorsize;
- int i;
+ const u32 sector_nr = page_nr_to_sector_nr(src, page_nr);
+ const u32 nr_bits = page_nr_to_num_sectors(src, page_nr);
+
+ ASSERT(page_nr < src->nr_pages);
+ ASSERT(sector_nr + nr_bits < src->nr_sectors);
if (dest->stripe_pages[page_nr])
__free_page(dest->stripe_pages[page_nr]);
dest->stripe_pages[page_nr] = src->stripe_pages[page_nr];
src->stripe_pages[page_nr] = NULL;
- /* Also update the sector->uptodate bits. */
- for (i = sectors_per_page * page_nr;
- i < sectors_per_page * page_nr + sectors_per_page; i++)
- dest->stripe_sectors[i].uptodate = true;
+ /* Also update the stripe_uptodate_bitmap bits. */
+ bitmap_set(dest->stripe_uptodate_bitmap, sector_nr, nr_bits);
}
static bool is_data_stripe_page(struct btrfs_raid_bio *rbio, int page_nr)
{
- const int sector_nr = (page_nr << PAGE_SHIFT) >>
- rbio->bioc->fs_info->sectorsize_bits;
+ const int sector_nr = page_nr_to_sector_nr(rbio, page_nr);
/*
* We have ensured PAGE_SIZE is aligned with sectorsize, thus
@@ -332,12 +449,11 @@ static void steal_rbio(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest)
static void merge_rbio(struct btrfs_raid_bio *dest,
struct btrfs_raid_bio *victim)
{
- bio_list_merge(&dest->bio_list, &victim->bio_list);
+ bio_list_merge_init(&dest->bio_list, &victim->bio_list);
dest->bio_list_bytes += victim->bio_list_bytes;
/* Also inherit the bitmaps from @victim. */
bitmap_or(&dest->dbitmap, &victim->dbitmap, &dest->dbitmap,
dest->stripe_nsectors);
- bio_list_init(&victim->bio_list);
}
/*
@@ -407,16 +523,15 @@ static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
{
struct btrfs_stripe_hash_table *table;
- unsigned long flags;
if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
return;
table = rbio->bioc->fs_info->stripe_hash_table;
- spin_lock_irqsave(&table->cache_lock, flags);
+ spin_lock(&table->cache_lock);
__remove_rbio_from_cache(rbio);
- spin_unlock_irqrestore(&table->cache_lock, flags);
+ spin_unlock(&table->cache_lock);
}
/*
@@ -425,19 +540,17 @@ static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info)
{
struct btrfs_stripe_hash_table *table;
- unsigned long flags;
struct btrfs_raid_bio *rbio;
table = info->stripe_hash_table;
- spin_lock_irqsave(&table->cache_lock, flags);
+ spin_lock(&table->cache_lock);
while (!list_empty(&table->stripe_cache)) {
- rbio = list_entry(table->stripe_cache.next,
- struct btrfs_raid_bio,
- stripe_cache);
+ rbio = list_first_entry(&table->stripe_cache,
+ struct btrfs_raid_bio, stripe_cache);
__remove_rbio_from_cache(rbio);
}
- spin_unlock_irqrestore(&table->cache_lock, flags);
+ spin_unlock(&table->cache_lock);
}
/*
@@ -467,14 +580,13 @@ void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info)
static void cache_rbio(struct btrfs_raid_bio *rbio)
{
struct btrfs_stripe_hash_table *table;
- unsigned long flags;
if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags))
return;
table = rbio->bioc->fs_info->stripe_hash_table;
- spin_lock_irqsave(&table->cache_lock, flags);
+ spin_lock(&table->cache_lock);
spin_lock(&rbio->bio_list_lock);
/* bump our ref if we were not in the list before */
@@ -493,15 +605,15 @@ static void cache_rbio(struct btrfs_raid_bio *rbio)
if (table->cache_size > RBIO_CACHE_SIZE) {
struct btrfs_raid_bio *found;
- found = list_entry(table->stripe_cache.prev,
- struct btrfs_raid_bio,
- stripe_cache);
+ found = list_last_entry(&table->stripe_cache,
+ struct btrfs_raid_bio,
+ stripe_cache);
if (found != rbio)
__remove_rbio_from_cache(found);
}
- spin_unlock_irqrestore(&table->cache_lock, flags);
+ spin_unlock(&table->cache_lock);
}
/*
@@ -530,15 +642,14 @@ static void run_xor(void **pages, int src_cnt, ssize_t len)
*/
static int rbio_is_full(struct btrfs_raid_bio *rbio)
{
- unsigned long flags;
unsigned long size = rbio->bio_list_bytes;
int ret = 1;
- spin_lock_irqsave(&rbio->bio_list_lock, flags);
+ spin_lock(&rbio->bio_list_lock);
if (size != rbio->nr_data * BTRFS_STRIPE_LEN)
ret = 0;
BUG_ON(size > rbio->nr_data * BTRFS_STRIPE_LEN);
- spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
+ spin_unlock(&rbio->bio_list_lock);
return ret;
}
@@ -571,7 +682,7 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
test_bit(RBIO_CACHE_BIT, &cur->flags))
return 0;
- if (last->bioc->raid_map[0] != cur->bioc->raid_map[0])
+ if (last->bioc->full_stripe_logical != cur->bioc->full_stripe_logical)
return 0;
/* we can't merge with different operations */
@@ -588,46 +699,68 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
if (last->operation == BTRFS_RBIO_PARITY_SCRUB)
return 0;
- if (last->operation == BTRFS_RBIO_REBUILD_MISSING ||
- last->operation == BTRFS_RBIO_READ_REBUILD)
+ if (last->operation == BTRFS_RBIO_READ_REBUILD)
return 0;
return 1;
}
-static unsigned int rbio_stripe_sector_index(const struct btrfs_raid_bio *rbio,
- unsigned int stripe_nr,
- unsigned int sector_nr)
+/* Return the sector index for @stripe_nr and @sector_nr. */
+static unsigned int rbio_sector_index(const struct btrfs_raid_bio *rbio,
+ unsigned int stripe_nr,
+ unsigned int sector_nr)
{
- ASSERT(stripe_nr < rbio->real_stripes);
- ASSERT(sector_nr < rbio->stripe_nsectors);
+ unsigned int ret;
+
+ ASSERT_RBIO_STRIPE(stripe_nr < rbio->real_stripes, rbio, stripe_nr);
+ ASSERT_RBIO_SECTOR(sector_nr < rbio->stripe_nsectors, rbio, sector_nr);
+
+ ret = stripe_nr * rbio->stripe_nsectors + sector_nr;
+ ASSERT(ret < rbio->nr_sectors);
+ return ret;
+}
+
+/* Return the paddr array index for @stripe_nr, @sector_nr and @step_nr. */
+static unsigned int rbio_paddr_index(const struct btrfs_raid_bio *rbio,
+ unsigned int stripe_nr,
+ unsigned int sector_nr,
+ unsigned int step_nr)
+{
+ unsigned int ret;
- return stripe_nr * rbio->stripe_nsectors + sector_nr;
+ ASSERT_RBIO_SECTOR(step_nr < rbio->sector_nsteps, rbio, step_nr);
+
+ ret = rbio_sector_index(rbio, stripe_nr, sector_nr) * rbio->sector_nsteps + step_nr;
+ ASSERT(ret < rbio->nr_sectors * rbio->sector_nsteps);
+ return ret;
}
-/* Return a sector from rbio->stripe_sectors, not from the bio list */
-static struct sector_ptr *rbio_stripe_sector(const struct btrfs_raid_bio *rbio,
- unsigned int stripe_nr,
- unsigned int sector_nr)
+static phys_addr_t rbio_stripe_paddr(const struct btrfs_raid_bio *rbio,
+ unsigned int stripe_nr, unsigned int sector_nr,
+ unsigned int step_nr)
{
- return &rbio->stripe_sectors[rbio_stripe_sector_index(rbio, stripe_nr,
- sector_nr)];
+ return rbio->stripe_paddrs[rbio_paddr_index(rbio, stripe_nr, sector_nr, step_nr)];
}
-/* Grab a sector inside P stripe */
-static struct sector_ptr *rbio_pstripe_sector(const struct btrfs_raid_bio *rbio,
- unsigned int sector_nr)
+static phys_addr_t rbio_pstripe_paddr(const struct btrfs_raid_bio *rbio,
+ unsigned int sector_nr, unsigned int step_nr)
{
- return rbio_stripe_sector(rbio, rbio->nr_data, sector_nr);
+ return rbio_stripe_paddr(rbio, rbio->nr_data, sector_nr, step_nr);
}
-/* Grab a sector inside Q stripe, return NULL if not RAID6 */
-static struct sector_ptr *rbio_qstripe_sector(const struct btrfs_raid_bio *rbio,
- unsigned int sector_nr)
+static phys_addr_t rbio_qstripe_paddr(const struct btrfs_raid_bio *rbio,
+ unsigned int sector_nr, unsigned int step_nr)
{
if (rbio->nr_data + 1 == rbio->real_stripes)
- return NULL;
- return rbio_stripe_sector(rbio, rbio->nr_data + 1, sector_nr);
+ return INVALID_PADDR;
+ return rbio_stripe_paddr(rbio, rbio->nr_data + 1, sector_nr, step_nr);
+}
+
+/* Return a paddr pointer into the rbio::stripe_paddrs[] for the specified sector. */
+static phys_addr_t *rbio_stripe_paddrs(const struct btrfs_raid_bio *rbio,
+ unsigned int stripe_nr, unsigned int sector_nr)
+{
+ return &rbio->stripe_paddrs[rbio_paddr_index(rbio, stripe_nr, sector_nr, 0)];
}
/*
@@ -657,16 +790,15 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
struct btrfs_stripe_hash *h;
struct btrfs_raid_bio *cur;
struct btrfs_raid_bio *pending;
- unsigned long flags;
struct btrfs_raid_bio *freeit = NULL;
struct btrfs_raid_bio *cache_drop = NULL;
int ret = 0;
h = rbio->bioc->fs_info->stripe_hash_table->table + rbio_bucket(rbio);
- spin_lock_irqsave(&h->lock, flags);
+ spin_lock(&h->lock);
list_for_each_entry(cur, &h->hash_list, hash_list) {
- if (cur->bioc->raid_map[0] != rbio->bioc->raid_map[0])
+ if (cur->bioc->full_stripe_logical != rbio->bioc->full_stripe_logical)
continue;
spin_lock(&cur->bio_list_lock);
@@ -724,7 +856,7 @@ lockit:
refcount_inc(&rbio->refs);
list_add(&rbio->hash_list, &h->hash_list);
out:
- spin_unlock_irqrestore(&h->lock, flags);
+ spin_unlock(&h->lock);
if (cache_drop)
remove_rbio_from_cache(cache_drop);
if (freeit)
@@ -742,7 +874,6 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
{
int bucket;
struct btrfs_stripe_hash *h;
- unsigned long flags;
int keep_cache = 0;
bucket = rbio_bucket(rbio);
@@ -751,7 +882,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
if (list_empty(&rbio->plug_list))
cache_rbio(rbio);
- spin_lock_irqsave(&h->lock, flags);
+ spin_lock(&h->lock);
spin_lock(&rbio->bio_list_lock);
if (!list_empty(&rbio->hash_list)) {
@@ -788,12 +919,9 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
list_add(&next->hash_list, &h->hash_list);
refcount_inc(&next->refs);
spin_unlock(&rbio->bio_list_lock);
- spin_unlock_irqrestore(&h->lock, flags);
+ spin_unlock(&h->lock);
- if (next->operation == BTRFS_RBIO_READ_REBUILD)
- start_async_work(next, recover_rbio_work_locked);
- else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) {
- steal_rbio(rbio, next);
+ if (next->operation == BTRFS_RBIO_READ_REBUILD) {
start_async_work(next, recover_rbio_work_locked);
} else if (next->operation == BTRFS_RBIO_WRITE) {
steal_rbio(rbio, next);
@@ -808,21 +936,21 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
}
done:
spin_unlock(&rbio->bio_list_lock);
- spin_unlock_irqrestore(&h->lock, flags);
+ spin_unlock(&h->lock);
done_nolock:
if (!keep_cache)
remove_rbio_from_cache(rbio);
}
-static void rbio_endio_bio_list(struct bio *cur, blk_status_t err)
+static void rbio_endio_bio_list(struct bio *cur, blk_status_t status)
{
struct bio *next;
while (cur) {
next = cur->bi_next;
cur->bi_next = NULL;
- cur->bi_status = err;
+ cur->bi_status = status;
bio_endio(cur);
cur = next;
}
@@ -832,7 +960,7 @@ static void rbio_endio_bio_list(struct bio *cur, blk_status_t err)
* this frees the rbio and runs through all the bios in the
* bio_list and calls end_io on them
*/
-static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
+static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t status)
{
struct bio *cur = bio_list_get(&rbio->bio_list);
struct bio *extra;
@@ -861,13 +989,13 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
extra = bio_list_get(&rbio->bio_list);
free_raid_bio(rbio);
- rbio_endio_bio_list(cur, err);
+ rbio_endio_bio_list(cur, status);
if (extra)
- rbio_endio_bio_list(extra, err);
+ rbio_endio_bio_list(extra, status);
}
/*
- * Get a sector pointer specified by its @stripe_nr and @sector_nr.
+ * Get paddr pointer for the sector specified by its @stripe_nr and @sector_nr.
*
* @rbio: The raid bio
* @stripe_nr: Stripe number, valid range [0, real_stripe)
@@ -877,32 +1005,52 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
*
* The read/modify/write code wants to reuse the original bio page as much
* as possible, and only use stripe_sectors as fallback.
+ *
+ * Return NULL if bio_list_only is set but the specified sector has no
+ * coresponding bio.
*/
-static struct sector_ptr *sector_in_rbio(struct btrfs_raid_bio *rbio,
- int stripe_nr, int sector_nr,
- bool bio_list_only)
+static phys_addr_t *sector_paddrs_in_rbio(struct btrfs_raid_bio *rbio,
+ int stripe_nr, int sector_nr,
+ bool bio_list_only)
{
- struct sector_ptr *sector;
- int index;
-
- ASSERT(stripe_nr >= 0 && stripe_nr < rbio->real_stripes);
- ASSERT(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors);
+ phys_addr_t *ret = NULL;
+ const int index = rbio_paddr_index(rbio, stripe_nr, sector_nr, 0);
- index = stripe_nr * rbio->stripe_nsectors + sector_nr;
- ASSERT(index >= 0 && index < rbio->nr_sectors);
+ ASSERT(index >= 0 && index < rbio->nr_sectors * rbio->sector_nsteps);
- spin_lock_irq(&rbio->bio_list_lock);
- sector = &rbio->bio_sectors[index];
- if (sector->page || bio_list_only) {
- /* Don't return sector without a valid page pointer */
- if (!sector->page)
- sector = NULL;
- spin_unlock_irq(&rbio->bio_list_lock);
- return sector;
+ scoped_guard(spinlock, &rbio->bio_list_lock) {
+ if (rbio->bio_paddrs[index] != INVALID_PADDR || bio_list_only) {
+ /* Don't return sector without a valid page pointer */
+ if (rbio->bio_paddrs[index] != INVALID_PADDR)
+ ret = &rbio->bio_paddrs[index];
+ return ret;
+ }
}
- spin_unlock_irq(&rbio->bio_list_lock);
+ return &rbio->stripe_paddrs[index];
+}
+
+/*
+ * Similar to sector_paddr_in_rbio(), but with extra consideration for
+ * bs > ps cases, where we can have multiple steps for a fs block.
+ */
+static phys_addr_t sector_paddr_in_rbio(struct btrfs_raid_bio *rbio,
+ int stripe_nr, int sector_nr, int step_nr,
+ bool bio_list_only)
+{
+ phys_addr_t ret = INVALID_PADDR;
+ const int index = rbio_paddr_index(rbio, stripe_nr, sector_nr, step_nr);
- return &rbio->stripe_sectors[index];
+ ASSERT(index >= 0 && index < rbio->nr_sectors * rbio->sector_nsteps);
+
+ scoped_guard(spinlock, &rbio->bio_list_lock) {
+ if (rbio->bio_paddrs[index] != INVALID_PADDR || bio_list_only) {
+ /* Don't return sector without a valid page pointer */
+ if (rbio->bio_paddrs[index] != INVALID_PADDR)
+ ret = rbio->bio_paddrs[index];
+ return ret;
+ }
+ }
+ return rbio->stripe_paddrs[index];
}
/*
@@ -912,40 +1060,56 @@ static struct sector_ptr *sector_in_rbio(struct btrfs_raid_bio *rbio,
static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
struct btrfs_io_context *bioc)
{
- const unsigned int real_stripes = bioc->num_stripes - bioc->num_tgtdevs;
+ const unsigned int real_stripes = bioc->num_stripes - bioc->replace_nr_stripes;
const unsigned int stripe_npages = BTRFS_STRIPE_LEN >> PAGE_SHIFT;
const unsigned int num_pages = stripe_npages * real_stripes;
const unsigned int stripe_nsectors =
BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits;
const unsigned int num_sectors = stripe_nsectors * real_stripes;
+ const unsigned int step = min(fs_info->sectorsize, PAGE_SIZE);
+ const unsigned int sector_nsteps = fs_info->sectorsize / step;
struct btrfs_raid_bio *rbio;
- /* PAGE_SIZE must also be aligned to sectorsize for subpage support */
- ASSERT(IS_ALIGNED(PAGE_SIZE, fs_info->sectorsize));
+ /*
+ * For bs <= ps cases, ps must be aligned to bs.
+ * For bs > ps cases, bs must be aligned to ps.
+ */
+ ASSERT(IS_ALIGNED(PAGE_SIZE, fs_info->sectorsize) ||
+ IS_ALIGNED(fs_info->sectorsize, PAGE_SIZE));
/*
* Our current stripe len should be fixed to 64k thus stripe_nsectors
* (at most 16) should be no larger than BITS_PER_LONG.
*/
ASSERT(stripe_nsectors <= BITS_PER_LONG);
+ /*
+ * Real stripes must be between 2 (2 disks RAID5, aka RAID1) and 256
+ * (limited by u8).
+ */
+ ASSERT(real_stripes >= 2);
+ ASSERT(real_stripes <= U8_MAX);
+
rbio = kzalloc(sizeof(*rbio), GFP_NOFS);
if (!rbio)
return ERR_PTR(-ENOMEM);
rbio->stripe_pages = kcalloc(num_pages, sizeof(struct page *),
GFP_NOFS);
- rbio->bio_sectors = kcalloc(num_sectors, sizeof(struct sector_ptr),
- GFP_NOFS);
- rbio->stripe_sectors = kcalloc(num_sectors, sizeof(struct sector_ptr),
- GFP_NOFS);
+ rbio->bio_paddrs = kcalloc(num_sectors * sector_nsteps, sizeof(phys_addr_t), GFP_NOFS);
+ rbio->stripe_paddrs = kcalloc(num_sectors * sector_nsteps, sizeof(phys_addr_t), GFP_NOFS);
rbio->finish_pointers = kcalloc(real_stripes, sizeof(void *), GFP_NOFS);
rbio->error_bitmap = bitmap_zalloc(num_sectors, GFP_NOFS);
+ rbio->stripe_uptodate_bitmap = bitmap_zalloc(num_sectors, GFP_NOFS);
- if (!rbio->stripe_pages || !rbio->bio_sectors || !rbio->stripe_sectors ||
- !rbio->finish_pointers || !rbio->error_bitmap) {
+ if (!rbio->stripe_pages || !rbio->bio_paddrs || !rbio->stripe_paddrs ||
+ !rbio->finish_pointers || !rbio->error_bitmap || !rbio->stripe_uptodate_bitmap) {
free_raid_bio_pointers(rbio);
kfree(rbio);
return ERR_PTR(-ENOMEM);
}
+ for (int i = 0; i < num_sectors * sector_nsteps; i++) {
+ rbio->stripe_paddrs[i] = INVALID_PADDR;
+ rbio->bio_paddrs[i] = INVALID_PADDR;
+ }
bio_list_init(&rbio->bio_list);
init_waitqueue_head(&rbio->io_wait);
@@ -960,11 +1124,13 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
rbio->real_stripes = real_stripes;
rbio->stripe_npages = stripe_npages;
rbio->stripe_nsectors = stripe_nsectors;
+ rbio->sector_nsteps = sector_nsteps;
refcount_set(&rbio->refs, 1);
atomic_set(&rbio->stripes_pending, 0);
ASSERT(btrfs_nr_parity_stripes(bioc->map_type));
rbio->nr_data = real_stripes - btrfs_nr_parity_stripes(bioc->map_type);
+ ASSERT(rbio->nr_data > 0);
return rbio;
}
@@ -974,7 +1140,7 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
{
int ret;
- ret = btrfs_alloc_page_array(rbio->nr_pages, rbio->stripe_pages);
+ ret = btrfs_alloc_page_array(rbio->nr_pages, rbio->stripe_pages, false);
if (ret < 0)
return ret;
/* Mapping all sectors */
@@ -989,7 +1155,7 @@ static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
int ret;
ret = btrfs_alloc_page_array(rbio->nr_pages - data_pages,
- rbio->stripe_pages + data_pages);
+ rbio->stripe_pages + data_pages, false);
if (ret < 0)
return ret;
@@ -998,13 +1164,13 @@ static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
}
/*
- * Return the total numer of errors found in the vertical stripe of @sector_nr.
+ * Return the total number of errors found in the vertical stripe of @sector_nr.
*
* @faila and @failb will also be updated to the first and second stripe
* number of the errors.
*/
-static int get_rbio_veritical_errors(struct btrfs_raid_bio *rbio, int sector_nr,
- int *faila, int *failb)
+static int get_rbio_vertical_errors(struct btrfs_raid_bio *rbio, int sector_nr,
+ int *faila, int *failb)
{
int stripe_nr;
int found_errors = 0;
@@ -1036,20 +1202,41 @@ static int get_rbio_veritical_errors(struct btrfs_raid_bio *rbio, int sector_nr,
return found_errors;
}
+static int bio_add_paddrs(struct bio *bio, phys_addr_t *paddrs, unsigned int nr_steps,
+ unsigned int step)
+{
+ int added = 0;
+ int ret;
+
+ for (int i = 0; i < nr_steps; i++) {
+ ret = bio_add_page(bio, phys_to_page(paddrs[i]), step,
+ offset_in_page(paddrs[i]));
+ if (ret != step)
+ goto revert;
+ added += ret;
+ }
+ return added;
+revert:
+ /*
+ * We don't need to revert the bvec, as the bio will be submitted immediately,
+ * as long as the size is reduced the extra bvec will not be accessed.
+ */
+ bio->bi_iter.bi_size -= added;
+ return 0;
+}
+
/*
* Add a single sector @sector into our list of bios for IO.
*
* Return 0 if everything went well.
- * Return <0 for error.
+ * Return <0 for error, and no byte will be added to @rbio.
*/
-static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
- struct bio_list *bio_list,
- struct sector_ptr *sector,
- unsigned int stripe_nr,
- unsigned int sector_nr,
- enum req_op op)
+static int rbio_add_io_paddrs(struct btrfs_raid_bio *rbio, struct bio_list *bio_list,
+ phys_addr_t *paddrs, unsigned int stripe_nr,
+ unsigned int sector_nr, enum req_op op)
{
const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
+ const u32 step = min(sectorsize, PAGE_SIZE);
struct bio *last = bio_list->tail;
int ret;
struct bio *bio;
@@ -1061,9 +1248,11 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
* thus it can be larger than rbio->real_stripe.
* So here we check against bioc->num_stripes, not rbio->real_stripes.
*/
- ASSERT(stripe_nr >= 0 && stripe_nr < rbio->bioc->num_stripes);
- ASSERT(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors);
- ASSERT(sector->page);
+ ASSERT_RBIO_STRIPE(stripe_nr >= 0 && stripe_nr < rbio->bioc->num_stripes,
+ rbio, stripe_nr);
+ ASSERT_RBIO_SECTOR(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors,
+ rbio, sector_nr);
+ ASSERT(paddrs != NULL);
stripe = &rbio->bioc->stripes[stripe_nr];
disk_start = stripe->physical + sector_nr * sectorsize;
@@ -1076,16 +1265,16 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
rbio->error_bitmap);
/* Check if we have reached tolerance early. */
- found_errors = get_rbio_veritical_errors(rbio, sector_nr,
- NULL, NULL);
- if (found_errors > rbio->bioc->max_errors)
+ found_errors = get_rbio_vertical_errors(rbio, sector_nr,
+ NULL, NULL);
+ if (unlikely(found_errors > rbio->bioc->max_errors))
return -EIO;
return 0;
}
/* see if we can add this page onto our existing bio */
if (last) {
- u64 last_end = last->bi_iter.bi_sector << 9;
+ u64 last_end = last->bi_iter.bi_sector << SECTOR_SHIFT;
last_end += last->bi_iter.bi_size;
/*
@@ -1094,8 +1283,7 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
*/
if (last_end == disk_start && !last->bi_status &&
last->bi_bdev == stripe->dev->bdev) {
- ret = bio_add_page(last, sector->page, sectorsize,
- sector->pgoff);
+ ret = bio_add_paddrs(last, paddrs, rbio->sector_nsteps, step);
if (ret == sectorsize)
return 0;
}
@@ -1105,34 +1293,30 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
bio = bio_alloc(stripe->dev->bdev,
max(BTRFS_STRIPE_LEN >> PAGE_SHIFT, 1),
op, GFP_NOFS);
- bio->bi_iter.bi_sector = disk_start >> 9;
+ bio->bi_iter.bi_sector = disk_start >> SECTOR_SHIFT;
bio->bi_private = rbio;
- bio_add_page(bio, sector->page, sectorsize, sector->pgoff);
+ ret = bio_add_paddrs(bio, paddrs, rbio->sector_nsteps, step);
+ ASSERT(ret == sectorsize);
bio_list_add(bio_list, bio);
return 0;
}
static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
{
- const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
- struct bio_vec bvec;
- struct bvec_iter iter;
+ struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
+ const u32 step = min(fs_info->sectorsize, PAGE_SIZE);
+ const u32 step_bits = min(fs_info->sectorsize_bits, PAGE_SHIFT);
+ struct bvec_iter iter = bio->bi_iter;
+ phys_addr_t paddr;
u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
- rbio->bioc->raid_map[0];
+ rbio->bioc->full_stripe_logical;
- bio_for_each_segment(bvec, bio, iter) {
- u32 bvec_offset;
+ btrfs_bio_for_each_block(paddr, bio, &iter, step) {
+ unsigned int index = (offset >> step_bits);
- for (bvec_offset = 0; bvec_offset < bvec.bv_len;
- bvec_offset += sectorsize, offset += sectorsize) {
- int index = offset / sectorsize;
- struct sector_ptr *sector = &rbio->bio_sectors[index];
-
- sector->page = bvec.bv_page;
- sector->pgoff = bvec.bv_offset + bvec_offset;
- ASSERT(sector->pgoff < PAGE_SIZE);
- }
+ rbio->bio_paddrs[index] = paddr;
+ offset += step;
}
}
@@ -1148,11 +1332,11 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
{
struct bio *bio;
- spin_lock_irq(&rbio->bio_list_lock);
+ spin_lock(&rbio->bio_list_lock);
bio_list_for_each(bio, &rbio->bio_list)
index_one_bio(rbio, bio);
- spin_unlock_irq(&rbio->bio_list_lock);
+ spin_unlock(&rbio->bio_list_lock);
}
static void bio_get_trace_info(struct btrfs_raid_bio *rbio, struct bio *bio,
@@ -1183,52 +1367,94 @@ not_found:
trace_info->stripe_nr = -1;
}
-/* Generate PQ for one veritical stripe. */
-static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
+static inline void bio_list_put(struct bio_list *bio_list)
+{
+ struct bio *bio;
+
+ while ((bio = bio_list_pop(bio_list)))
+ bio_put(bio);
+}
+
+static void assert_rbio(struct btrfs_raid_bio *rbio)
+{
+ if (!IS_ENABLED(CONFIG_BTRFS_ASSERT))
+ return;
+
+ /*
+ * At least two stripes (2 disks RAID5), and since real_stripes is U8,
+ * we won't go beyond 256 disks anyway.
+ */
+ ASSERT_RBIO(rbio->real_stripes >= 2, rbio);
+ ASSERT_RBIO(rbio->nr_data > 0, rbio);
+
+ /*
+ * This is another check to make sure nr data stripes is smaller
+ * than total stripes.
+ */
+ ASSERT_RBIO(rbio->nr_data < rbio->real_stripes, rbio);
+}
+
+static inline void *kmap_local_paddr(phys_addr_t paddr)
+{
+ /* The sector pointer must have a page mapped to it. */
+ ASSERT(paddr != INVALID_PADDR);
+
+ return kmap_local_page(phys_to_page(paddr)) + offset_in_page(paddr);
+}
+
+static void generate_pq_vertical_step(struct btrfs_raid_bio *rbio, unsigned int sector_nr,
+ unsigned int step_nr)
{
void **pointers = rbio->finish_pointers;
- const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
- struct sector_ptr *sector;
+ const u32 step = min(rbio->bioc->fs_info->sectorsize, PAGE_SIZE);
int stripe;
const bool has_qstripe = rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6;
/* First collect one sector from each data stripe */
- for (stripe = 0; stripe < rbio->nr_data; stripe++) {
- sector = sector_in_rbio(rbio, stripe, sectornr, 0);
- pointers[stripe] = kmap_local_page(sector->page) +
- sector->pgoff;
- }
+ for (stripe = 0; stripe < rbio->nr_data; stripe++)
+ pointers[stripe] = kmap_local_paddr(
+ sector_paddr_in_rbio(rbio, stripe, sector_nr, step_nr, 0));
/* Then add the parity stripe */
- sector = rbio_pstripe_sector(rbio, sectornr);
- sector->uptodate = 1;
- pointers[stripe++] = kmap_local_page(sector->page) + sector->pgoff;
+ pointers[stripe++] = kmap_local_paddr(rbio_pstripe_paddr(rbio, sector_nr, step_nr));
if (has_qstripe) {
/*
* RAID6, add the qstripe and call the library function
* to fill in our p/q
*/
- sector = rbio_qstripe_sector(rbio, sectornr);
- sector->uptodate = 1;
- pointers[stripe++] = kmap_local_page(sector->page) +
- sector->pgoff;
+ pointers[stripe++] = kmap_local_paddr(
+ rbio_qstripe_paddr(rbio, sector_nr, step_nr));
- raid6_call.gen_syndrome(rbio->real_stripes, sectorsize,
- pointers);
+ assert_rbio(rbio);
+ raid6_call.gen_syndrome(rbio->real_stripes, step, pointers);
} else {
/* raid5 */
- memcpy(pointers[rbio->nr_data], pointers[0], sectorsize);
- run_xor(pointers + 1, rbio->nr_data - 1, sectorsize);
+ memcpy(pointers[rbio->nr_data], pointers[0], step);
+ run_xor(pointers + 1, rbio->nr_data - 1, step);
}
for (stripe = stripe - 1; stripe >= 0; stripe--)
kunmap_local(pointers[stripe]);
}
+/* Generate PQ for one vertical stripe. */
+static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
+{
+ const bool has_qstripe = (rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6);
+
+ for (int i = 0; i < rbio->sector_nsteps; i++)
+ generate_pq_vertical_step(rbio, sectornr, i);
+
+ set_bit(rbio_sector_index(rbio, rbio->nr_data, sectornr),
+ rbio->stripe_uptodate_bitmap);
+ if (has_qstripe)
+ set_bit(rbio_sector_index(rbio, rbio->nr_data + 1, sectornr),
+ rbio->stripe_uptodate_bitmap);
+}
+
static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio,
struct bio_list *bio_list)
{
- struct bio *bio;
/* The total sector number inside the full stripe. */
int total_sector_nr;
int sectornr;
@@ -1252,7 +1478,7 @@ static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio,
*/
for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
total_sector_nr++) {
- struct sector_ptr *sector;
+ phys_addr_t *paddrs;
stripe = total_sector_nr / rbio->stripe_nsectors;
sectornr = total_sector_nr % rbio->stripe_nsectors;
@@ -1262,31 +1488,42 @@ static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio,
continue;
if (stripe < rbio->nr_data) {
- sector = sector_in_rbio(rbio, stripe, sectornr, 1);
- if (!sector)
+ paddrs = sector_paddrs_in_rbio(rbio, stripe, sectornr, 1);
+ if (paddrs == NULL)
continue;
} else {
- sector = rbio_stripe_sector(rbio, stripe, sectornr);
+ paddrs = rbio_stripe_paddrs(rbio, stripe, sectornr);
}
- ret = rbio_add_io_sector(rbio, bio_list, sector, stripe,
+ ret = rbio_add_io_paddrs(rbio, bio_list, paddrs, stripe,
sectornr, REQ_OP_WRITE);
if (ret)
goto error;
}
- if (likely(!rbio->bioc->num_tgtdevs))
+ if (likely(!rbio->bioc->replace_nr_stripes))
return 0;
- /* Make a copy for the replace target device. */
+ /*
+ * Make a copy for the replace target device.
+ *
+ * Thus the source stripe number (in replace_stripe_src) should be valid.
+ */
+ ASSERT(rbio->bioc->replace_stripe_src >= 0);
+
for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
total_sector_nr++) {
- struct sector_ptr *sector;
+ phys_addr_t *paddrs;
stripe = total_sector_nr / rbio->stripe_nsectors;
sectornr = total_sector_nr % rbio->stripe_nsectors;
- if (!rbio->bioc->tgtdev_map[stripe]) {
+ /*
+ * For RAID56, there is only one device that can be replaced,
+ * and replace_stripe_src[0] indicates the stripe number we
+ * need to copy from.
+ */
+ if (stripe != rbio->bioc->replace_stripe_src) {
/*
* We can skip the whole stripe completely, note
* total_sector_nr will be increased by one anyway.
@@ -1301,15 +1538,15 @@ static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio,
continue;
if (stripe < rbio->nr_data) {
- sector = sector_in_rbio(rbio, stripe, sectornr, 1);
- if (!sector)
+ paddrs = sector_paddrs_in_rbio(rbio, stripe, sectornr, 1);
+ if (paddrs == NULL)
continue;
} else {
- sector = rbio_stripe_sector(rbio, stripe, sectornr);
+ paddrs = rbio_stripe_paddrs(rbio, stripe, sectornr);
}
- ret = rbio_add_io_sector(rbio, bio_list, sector,
- rbio->bioc->tgtdev_map[stripe],
+ ret = rbio_add_io_paddrs(rbio, bio_list, paddrs,
+ rbio->real_stripes,
sectornr, REQ_OP_WRITE);
if (ret)
goto error;
@@ -1317,8 +1554,7 @@ static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio,
return 0;
error:
- while ((bio = bio_list_pop(bio_list)))
- bio_put(bio);
+ bio_list_put(bio_list);
return -EIO;
}
@@ -1326,7 +1562,7 @@ static void set_rbio_range_error(struct btrfs_raid_bio *rbio, struct bio *bio)
{
struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
- rbio->bioc->raid_map[0];
+ rbio->bioc->full_stripe_logical;
int total_nr_sector = offset >> fs_info->sectorsize_bits;
ASSERT(total_nr_sector < rbio->nr_data * rbio->stripe_nsectors);
@@ -1357,22 +1593,17 @@ static void set_rbio_range_error(struct btrfs_raid_bio *rbio, struct bio *bio)
}
/*
- * For subpage case, we can no longer set page Uptodate directly for
- * stripe_pages[], thus we need to locate the sector.
+ * Return the index inside the rbio->stripe_sectors[] array.
+ *
+ * Return -1 if not found.
*/
-static struct sector_ptr *find_stripe_sector(struct btrfs_raid_bio *rbio,
- struct page *page,
- unsigned int pgoff)
+static int find_stripe_sector_nr(struct btrfs_raid_bio *rbio, phys_addr_t paddr)
{
- int i;
-
- for (i = 0; i < rbio->nr_sectors; i++) {
- struct sector_ptr *sector = &rbio->stripe_sectors[i];
-
- if (sector->page == page && sector->pgoff == pgoff)
- return sector;
+ for (int i = 0; i < rbio->nr_sectors; i++) {
+ if (rbio->stripe_paddrs[i * rbio->sector_nsteps] == paddr)
+ return i;
}
- return NULL;
+ return -1;
}
/*
@@ -1382,38 +1613,34 @@ static struct sector_ptr *find_stripe_sector(struct btrfs_raid_bio *rbio,
static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio)
{
const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
+ const u32 step = min(sectorsize, PAGE_SIZE);
+ u32 offset = 0;
+ phys_addr_t paddr;
ASSERT(!bio_flagged(bio, BIO_CLONED));
- bio_for_each_segment_all(bvec, bio, iter_all) {
- struct sector_ptr *sector;
- int pgoff;
+ btrfs_bio_for_each_block_all(paddr, bio, step) {
+ /* Hitting the first step of a sector. */
+ if (IS_ALIGNED(offset, sectorsize)) {
+ int sector_nr = find_stripe_sector_nr(rbio, paddr);
- for (pgoff = bvec->bv_offset; pgoff - bvec->bv_offset < bvec->bv_len;
- pgoff += sectorsize) {
- sector = find_stripe_sector(rbio, bvec->bv_page, pgoff);
- ASSERT(sector);
- if (sector)
- sector->uptodate = 1;
+ ASSERT(sector_nr >= 0);
+ if (sector_nr >= 0)
+ set_bit(sector_nr, rbio->stripe_uptodate_bitmap);
}
+ offset += step;
}
}
static int get_bio_sector_nr(struct btrfs_raid_bio *rbio, struct bio *bio)
{
- struct bio_vec *bv = bio_first_bvec_all(bio);
+ phys_addr_t bvec_paddr = bvec_phys(bio_first_bvec_all(bio));
int i;
for (i = 0; i < rbio->nr_sectors; i++) {
- struct sector_ptr *sector;
-
- sector = &rbio->stripe_sectors[i];
- if (sector->page == bv->bv_page && sector->pgoff == bv->bv_offset)
+ if (rbio->stripe_paddrs[i * rbio->sector_nsteps] == bvec_paddr)
break;
- sector = &rbio->bio_sectors[i];
- if (sector->page == bv->bv_page && sector->pgoff == bv->bv_offset)
+ if (rbio->bio_paddrs[i * rbio->sector_nsteps] == bvec_paddr)
break;
}
ASSERT(i < rbio->nr_sectors);
@@ -1425,13 +1652,20 @@ static void rbio_update_error_bitmap(struct btrfs_raid_bio *rbio, struct bio *bi
int total_sector_nr = get_bio_sector_nr(rbio, bio);
u32 bio_size = 0;
struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
+ int i;
- bio_for_each_segment_all(bvec, bio, iter_all)
+ bio_for_each_bvec_all(bvec, bio, i)
bio_size += bvec->bv_len;
- bitmap_set(rbio->error_bitmap, total_sector_nr,
- bio_size >> rbio->bioc->fs_info->sectorsize_bits);
+ /*
+ * Since we can have multiple bios touching the error_bitmap, we cannot
+ * call bitmap_set() without protection.
+ *
+ * Instead use set_bit() for each bit, as set_bit() itself is atomic.
+ */
+ for (i = total_sector_nr; i < total_sector_nr +
+ (bio_size >> rbio->bioc->fs_info->sectorsize_bits); i++)
+ set_bit(i, rbio->error_bitmap);
}
/* Verify the data sectors at read time. */
@@ -1439,9 +1673,12 @@ static void verify_bio_data_sectors(struct btrfs_raid_bio *rbio,
struct bio *bio)
{
struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
+ const u32 step = min(fs_info->sectorsize, PAGE_SIZE);
+ const u32 nr_steps = rbio->sector_nsteps;
int total_sector_nr = get_bio_sector_nr(rbio, bio);
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
+ u32 offset = 0;
+ phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE];
+ phys_addr_t paddr;
/* No data csum for the whole stripe, no need to verify. */
if (!rbio->csum_bitmap || !rbio->csum_buf)
@@ -1451,26 +1688,26 @@ static void verify_bio_data_sectors(struct btrfs_raid_bio *rbio,
if (total_sector_nr >= rbio->nr_data * rbio->stripe_nsectors)
return;
- bio_for_each_segment_all(bvec, bio, iter_all) {
- int bv_offset;
+ btrfs_bio_for_each_block_all(paddr, bio, step) {
+ u8 csum_buf[BTRFS_CSUM_SIZE];
+ u8 *expected_csum;
- for (bv_offset = bvec->bv_offset;
- bv_offset < bvec->bv_offset + bvec->bv_len;
- bv_offset += fs_info->sectorsize, total_sector_nr++) {
- u8 csum_buf[BTRFS_CSUM_SIZE];
- u8 *expected_csum = rbio->csum_buf +
- total_sector_nr * fs_info->csum_size;
- int ret;
+ paddrs[(offset / step) % nr_steps] = paddr;
+ offset += step;
- /* No csum for this sector, skip to the next sector. */
- if (!test_bit(total_sector_nr, rbio->csum_bitmap))
- continue;
+ /* Not yet covering the full fs block, continue to the next step. */
+ if (!IS_ALIGNED(offset, fs_info->sectorsize))
+ continue;
- ret = btrfs_check_sector_csum(fs_info, bvec->bv_page,
- bv_offset, csum_buf, expected_csum);
- if (ret < 0)
- set_bit(total_sector_nr, rbio->error_bitmap);
- }
+ /* No csum for this sector, skip to the next sector. */
+ if (!test_bit(total_sector_nr, rbio->csum_bitmap))
+ continue;
+
+ expected_csum = rbio->csum_buf + total_sector_nr * fs_info->csum_size;
+ btrfs_calculate_block_csum_pages(fs_info, paddrs, csum_buf);
+ if (unlikely(memcmp(csum_buf, expected_csum, fs_info->csum_size) != 0))
+ set_bit(total_sector_nr, rbio->error_bitmap);
+ total_sector_nr++;
}
}
@@ -1490,7 +1727,7 @@ static void raid_wait_read_end_io(struct bio *bio)
wake_up(&rbio->io_wait);
}
-static void submit_read_bios(struct btrfs_raid_bio *rbio,
+static void submit_read_wait_bio_list(struct btrfs_raid_bio *rbio,
struct bio_list *bio_list)
{
struct bio *bio;
@@ -1499,49 +1736,16 @@ static void submit_read_bios(struct btrfs_raid_bio *rbio,
while ((bio = bio_list_pop(bio_list))) {
bio->bi_end_io = raid_wait_read_end_io;
- if (trace_raid56_scrub_read_recover_enabled()) {
+ if (trace_raid56_read_enabled()) {
struct raid56_bio_trace_info trace_info = { 0 };
bio_get_trace_info(rbio, bio, &trace_info);
- trace_raid56_scrub_read_recover(rbio, bio, &trace_info);
+ trace_raid56_read(rbio, bio, &trace_info);
}
submit_bio(bio);
}
-}
-
-static int rmw_assemble_read_bios(struct btrfs_raid_bio *rbio,
- struct bio_list *bio_list)
-{
- struct bio *bio;
- int total_sector_nr;
- int ret = 0;
-
- ASSERT(bio_list_size(bio_list) == 0);
-
- /*
- * Build a list of bios to read all sectors (including data and P/Q).
- *
- * This behaviro is to compensate the later csum verification and
- * recovery.
- */
- for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
- total_sector_nr++) {
- struct sector_ptr *sector;
- int stripe = total_sector_nr / rbio->stripe_nsectors;
- int sectornr = total_sector_nr % rbio->stripe_nsectors;
-
- sector = rbio_stripe_sector(rbio, stripe, sectornr);
- ret = rbio_add_io_sector(rbio, bio_list, sector,
- stripe, sectornr, REQ_OP_READ);
- if (ret)
- goto cleanup;
- }
- return 0;
-cleanup:
- while ((bio = bio_list_pop(bio_list)))
- bio_put(bio);
- return ret;
+ wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
}
static int alloc_rbio_data_pages(struct btrfs_raid_bio *rbio)
@@ -1549,7 +1753,7 @@ static int alloc_rbio_data_pages(struct btrfs_raid_bio *rbio)
const int data_pages = rbio->nr_data * rbio->stripe_npages;
int ret;
- ret = btrfs_alloc_page_array(data_pages, rbio->stripe_pages);
+ ret = btrfs_alloc_page_array(data_pages, rbio->stripe_pages, false);
if (ret < 0)
return ret;
@@ -1568,7 +1772,6 @@ struct btrfs_plug_cb {
struct blk_plug_cb cb;
struct btrfs_fs_info *info;
struct list_head rbio_list;
- struct work_struct work;
};
/*
@@ -1600,8 +1803,8 @@ static void raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
list_sort(NULL, &plug->rbio_list, plug_cmp);
while (!list_empty(&plug->rbio_list)) {
- cur = list_entry(plug->rbio_list.next,
- struct btrfs_raid_bio, plug_list);
+ cur = list_first_entry(&plug->rbio_list,
+ struct btrfs_raid_bio, plug_list);
list_del_init(&cur->plug_list);
if (rbio_is_full(cur)) {
@@ -1629,14 +1832,15 @@ static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
{
const struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT;
- const u64 full_stripe_start = rbio->bioc->raid_map[0];
+ const u64 full_stripe_start = rbio->bioc->full_stripe_logical;
const u32 orig_len = orig_bio->bi_iter.bi_size;
const u32 sectorsize = fs_info->sectorsize;
u64 cur_logical;
- ASSERT(orig_logical >= full_stripe_start &&
- orig_logical + orig_len <= full_stripe_start +
- rbio->nr_data * BTRFS_STRIPE_LEN);
+ ASSERT_RBIO_LOGICAL(orig_logical >= full_stripe_start &&
+ orig_logical + orig_len <= full_stripe_start +
+ rbio->nr_data * BTRFS_STRIPE_LEN,
+ rbio, orig_logical);
bio_list_add(&rbio->bio_list, orig_bio);
rbio->bio_list_bytes += orig_bio->bi_iter.bi_size;
@@ -1660,12 +1864,12 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
struct btrfs_raid_bio *rbio;
struct btrfs_plug_cb *plug = NULL;
struct blk_plug_cb *cb;
- int ret = 0;
rbio = alloc_rbio(fs_info, bioc);
if (IS_ERR(rbio)) {
- ret = PTR_ERR(rbio);
- goto fail;
+ bio->bi_status = errno_to_blk_status(PTR_ERR(rbio));
+ bio_endio(bio);
+ return;
}
rbio->operation = BTRFS_RBIO_WRITE;
rbio_add_bio(rbio, bio);
@@ -1674,41 +1878,33 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
* Don't plug on full rbios, just get them out the door
* as quickly as we can
*/
- if (rbio_is_full(rbio))
- goto queue_rbio;
-
- cb = blk_check_plugged(raid_unplug, fs_info, sizeof(*plug));
- if (cb) {
- plug = container_of(cb, struct btrfs_plug_cb, cb);
- if (!plug->info) {
- plug->info = fs_info;
- INIT_LIST_HEAD(&plug->rbio_list);
+ if (!rbio_is_full(rbio)) {
+ cb = blk_check_plugged(raid_unplug, fs_info, sizeof(*plug));
+ if (cb) {
+ plug = container_of(cb, struct btrfs_plug_cb, cb);
+ if (!plug->info) {
+ plug->info = fs_info;
+ INIT_LIST_HEAD(&plug->rbio_list);
+ }
+ list_add_tail(&rbio->plug_list, &plug->rbio_list);
+ return;
}
- list_add_tail(&rbio->plug_list, &plug->rbio_list);
- return;
}
-queue_rbio:
+
/*
* Either we don't have any existing plug, or we're doing a full stripe,
- * can queue the rmw work now.
+ * queue the rmw work now.
*/
start_async_work(rbio, rmw_rbio_work);
-
- return;
-
-fail:
- bio->bi_status = errno_to_blk_status(ret);
- bio_endio(bio);
}
static int verify_one_sector(struct btrfs_raid_bio *rbio,
int stripe_nr, int sector_nr)
{
struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
- struct sector_ptr *sector;
+ phys_addr_t *paddrs;
u8 csum_buf[BTRFS_CSUM_SIZE];
u8 *csum_expected;
- int ret;
if (!rbio->csum_bitmap || !rbio->csum_buf)
return 0;
@@ -1720,59 +1916,33 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio,
* If we're rebuilding a read, we have to use pages from the
* bio list if possible.
*/
- if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
- rbio->operation == BTRFS_RBIO_REBUILD_MISSING)) {
- sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0);
+ if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
+ paddrs = sector_paddrs_in_rbio(rbio, stripe_nr, sector_nr, 0);
} else {
- sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
+ paddrs = rbio_stripe_paddrs(rbio, stripe_nr, sector_nr);
}
- ASSERT(sector->page);
-
csum_expected = rbio->csum_buf +
(stripe_nr * rbio->stripe_nsectors + sector_nr) *
fs_info->csum_size;
- ret = btrfs_check_sector_csum(fs_info, sector->page, sector->pgoff,
- csum_buf, csum_expected);
- return ret;
+ btrfs_calculate_block_csum_pages(fs_info, paddrs, csum_buf);
+ if (unlikely(memcmp(csum_buf, csum_expected, fs_info->csum_size) != 0))
+ return -EIO;
+ return 0;
}
-/*
- * Recover a vertical stripe specified by @sector_nr.
- * @*pointers are the pre-allocated pointers by the caller, so we don't
- * need to allocate/free the pointers again and again.
- */
-static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
- void **pointers, void **unmap_array)
+static void recover_vertical_step(struct btrfs_raid_bio *rbio,
+ unsigned int sector_nr,
+ unsigned int step_nr,
+ int faila, int failb,
+ void **pointers, void **unmap_array)
{
struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
- struct sector_ptr *sector;
- const u32 sectorsize = fs_info->sectorsize;
- int found_errors;
- int faila;
- int failb;
+ const u32 step = min(fs_info->sectorsize, PAGE_SIZE);
int stripe_nr;
- int ret = 0;
-
- /*
- * Now we just use bitmap to mark the horizontal stripes in
- * which we have data when doing parity scrub.
- */
- if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB &&
- !test_bit(sector_nr, &rbio->dbitmap))
- return 0;
-
- found_errors = get_rbio_veritical_errors(rbio, sector_nr, &faila,
- &failb);
- /*
- * No errors in the veritical stripe, skip it. Can happen for recovery
- * which only part of a stripe failed csum check.
- */
- if (!found_errors)
- return 0;
- if (found_errors > rbio->bioc->max_errors)
- return -EIO;
+ ASSERT(step_nr < rbio->sector_nsteps);
+ ASSERT(sector_nr < rbio->stripe_nsectors);
/*
* Setup our array of pointers with sectors from each stripe
@@ -1781,19 +1951,18 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
* pointer order.
*/
for (stripe_nr = 0; stripe_nr < rbio->real_stripes; stripe_nr++) {
+ phys_addr_t paddr;
+
/*
* If we're rebuilding a read, we have to use pages from the
* bio list if possible.
*/
- if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
- rbio->operation == BTRFS_RBIO_REBUILD_MISSING)) {
- sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0);
+ if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
+ paddr = sector_paddr_in_rbio(rbio, stripe_nr, sector_nr, step_nr, 0);
} else {
- sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
+ paddr = rbio_stripe_paddr(rbio, stripe_nr, sector_nr, step_nr);
}
- ASSERT(sector->page);
- pointers[stripe_nr] = kmap_local_page(sector->page) +
- sector->pgoff;
+ pointers[stripe_nr] = kmap_local_paddr(paddr);
unmap_array[stripe_nr] = pointers[stripe_nr];
}
@@ -1823,9 +1992,8 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
* here due to a crc mismatch and we can't give them the
* data they want.
*/
- if (rbio->bioc->raid_map[failb] == RAID6_Q_STRIPE) {
- if (rbio->bioc->raid_map[faila] ==
- RAID5_P_STRIPE)
+ if (failb == rbio->real_stripes - 1) {
+ if (faila == rbio->real_stripes - 2)
/*
* Only P and Q are corrupted.
* We only care about data stripes recovery,
@@ -1839,11 +2007,11 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
goto pstripe;
}
- if (rbio->bioc->raid_map[failb] == RAID5_P_STRIPE) {
- raid6_datap_recov(rbio->real_stripes, sectorsize,
+ if (failb == rbio->real_stripes - 2) {
+ raid6_datap_recov(rbio->real_stripes, step,
faila, pointers);
} else {
- raid6_2data_recov(rbio->real_stripes, sectorsize,
+ raid6_2data_recov(rbio->real_stripes, step,
faila, failb, pointers);
}
} else {
@@ -1853,7 +2021,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
ASSERT(failb == -1);
pstripe:
/* Copy parity block into failed block to start with */
- memcpy(pointers[faila], pointers[rbio->nr_data], sectorsize);
+ memcpy(pointers[faila], pointers[rbio->nr_data], step);
/* Rearrange the pointer array */
p = pointers[faila];
@@ -1863,40 +2031,66 @@ pstripe:
pointers[rbio->nr_data - 1] = p;
/* Xor in the rest */
- run_xor(pointers, rbio->nr_data - 1, sectorsize);
-
+ run_xor(pointers, rbio->nr_data - 1, step);
}
+cleanup:
+ for (stripe_nr = rbio->real_stripes - 1; stripe_nr >= 0; stripe_nr--)
+ kunmap_local(unmap_array[stripe_nr]);
+}
+
+/*
+ * Recover a vertical stripe specified by @sector_nr.
+ * @*pointers are the pre-allocated pointers by the caller, so we don't
+ * need to allocate/free the pointers again and again.
+ */
+static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
+ void **pointers, void **unmap_array)
+{
+ int found_errors;
+ int faila;
+ int failb;
+ int ret = 0;
+
/*
- * No matter if this is a RMW or recovery, we should have all
- * failed sectors repaired in the vertical stripe, thus they are now
- * uptodate.
- * Especially if we determine to cache the rbio, we need to
- * have at least all data sectors uptodate.
- *
- * If possible, also check if the repaired sector matches its data
- * checksum.
+ * Now we just use bitmap to mark the horizontal stripes in
+ * which we have data when doing parity scrub.
*/
+ if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB &&
+ !test_bit(sector_nr, &rbio->dbitmap))
+ return 0;
+
+ found_errors = get_rbio_vertical_errors(rbio, sector_nr, &faila,
+ &failb);
+ /*
+ * No errors in the vertical stripe, skip it. Can happen for recovery
+ * which only part of a stripe failed csum check.
+ */
+ if (!found_errors)
+ return 0;
+
+ if (unlikely(found_errors > rbio->bioc->max_errors))
+ return -EIO;
+
+ for (int i = 0; i < rbio->sector_nsteps; i++)
+ recover_vertical_step(rbio, sector_nr, i, faila, failb,
+ pointers, unmap_array);
if (faila >= 0) {
ret = verify_one_sector(rbio, faila, sector_nr);
if (ret < 0)
- goto cleanup;
+ return ret;
- sector = rbio_stripe_sector(rbio, faila, sector_nr);
- sector->uptodate = 1;
+ set_bit(rbio_sector_index(rbio, faila, sector_nr),
+ rbio->stripe_uptodate_bitmap);
}
if (failb >= 0) {
- ret = verify_one_sector(rbio, faila, sector_nr);
+ ret = verify_one_sector(rbio, failb, sector_nr);
if (ret < 0)
- goto cleanup;
+ return ret;
- sector = rbio_stripe_sector(rbio, failb, sector_nr);
- sector->uptodate = 1;
+ set_bit(rbio_sector_index(rbio, failb, sector_nr),
+ rbio->stripe_uptodate_bitmap);
}
-
-cleanup:
- for (stripe_nr = rbio->real_stripes - 1; stripe_nr >= 0; stripe_nr--)
- kunmap_local(unmap_array[stripe_nr]);
return ret;
}
@@ -1920,11 +2114,10 @@ static int recover_sectors(struct btrfs_raid_bio *rbio)
goto out;
}
- if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
- rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
- spin_lock_irq(&rbio->bio_list_lock);
+ if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
+ spin_lock(&rbio->bio_list_lock);
set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
- spin_unlock_irq(&rbio->bio_list_lock);
+ spin_unlock(&rbio->bio_list_lock);
}
index_rbio_pages(rbio);
@@ -1941,14 +2134,25 @@ out:
return ret;
}
-static int recover_assemble_read_bios(struct btrfs_raid_bio *rbio,
- struct bio_list *bio_list)
+static void recover_rbio(struct btrfs_raid_bio *rbio)
{
- struct bio *bio;
+ struct bio_list bio_list = BIO_EMPTY_LIST;
int total_sector_nr;
int ret = 0;
- ASSERT(bio_list_size(bio_list) == 0);
+ /*
+ * Either we're doing recover for a read failure or degraded write,
+ * caller should have set error bitmap correctly.
+ */
+ ASSERT(bitmap_weight(rbio->error_bitmap, rbio->nr_sectors));
+
+ /* For recovery, we need to read all sectors including P/Q. */
+ ret = alloc_rbio_pages(rbio);
+ if (ret < 0)
+ goto out;
+
+ index_rbio_pages(rbio);
+
/*
* Read everything that hasn't failed. However this time we will
* not trust any cached sector.
@@ -1961,7 +2165,7 @@ static int recover_assemble_read_bios(struct btrfs_raid_bio *rbio,
total_sector_nr++) {
int stripe = total_sector_nr / rbio->stripe_nsectors;
int sectornr = total_sector_nr % rbio->stripe_nsectors;
- struct sector_ptr *sector;
+ phys_addr_t *paddrs;
/*
* Skip the range which has error. It can be a range which is
@@ -1978,79 +2182,33 @@ static int recover_assemble_read_bios(struct btrfs_raid_bio *rbio,
continue;
}
- sector = rbio_stripe_sector(rbio, stripe, sectornr);
- ret = rbio_add_io_sector(rbio, bio_list, sector, stripe,
+ paddrs = rbio_stripe_paddrs(rbio, stripe, sectornr);
+ ret = rbio_add_io_paddrs(rbio, &bio_list, paddrs, stripe,
sectornr, REQ_OP_READ);
- if (ret < 0)
- goto error;
+ if (ret < 0) {
+ bio_list_put(&bio_list);
+ goto out;
+ }
}
- return 0;
-error:
- while ((bio = bio_list_pop(bio_list)))
- bio_put(bio);
-
- return -EIO;
-}
-
-static int recover_rbio(struct btrfs_raid_bio *rbio)
-{
- struct bio_list bio_list;
- struct bio *bio;
- int ret;
-
- /*
- * Either we're doing recover for a read failure or degraded write,
- * caller should have set error bitmap correctly.
- */
- ASSERT(bitmap_weight(rbio->error_bitmap, rbio->nr_sectors));
- bio_list_init(&bio_list);
-
- /* For recovery, we need to read all sectors including P/Q. */
- ret = alloc_rbio_pages(rbio);
- if (ret < 0)
- goto out;
-
- index_rbio_pages(rbio);
-
- ret = recover_assemble_read_bios(rbio, &bio_list);
- if (ret < 0)
- goto out;
-
- submit_read_bios(rbio, &bio_list);
- wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
+ submit_read_wait_bio_list(rbio, &bio_list);
ret = recover_sectors(rbio);
-
out:
- while ((bio = bio_list_pop(&bio_list)))
- bio_put(bio);
-
- return ret;
+ rbio_orig_end_io(rbio, errno_to_blk_status(ret));
}
static void recover_rbio_work(struct work_struct *work)
{
struct btrfs_raid_bio *rbio;
- int ret;
rbio = container_of(work, struct btrfs_raid_bio, work);
-
- ret = lock_stripe_add(rbio);
- if (ret == 0) {
- ret = recover_rbio(rbio);
- rbio_orig_end_io(rbio, errno_to_blk_status(ret));
- }
+ if (!lock_stripe_add(rbio))
+ recover_rbio(rbio);
}
static void recover_rbio_work_locked(struct work_struct *work)
{
- struct btrfs_raid_bio *rbio;
- int ret;
-
- rbio = container_of(work, struct btrfs_raid_bio, work);
-
- ret = recover_rbio(rbio);
- rbio_orig_end_io(rbio, errno_to_blk_status(ret));
+ recover_rbio(container_of(work, struct btrfs_raid_bio, work));
}
static void set_rbio_raid6_extra_error(struct btrfs_raid_bio *rbio, int mirror_num)
@@ -2070,7 +2228,7 @@ static void set_rbio_raid6_extra_error(struct btrfs_raid_bio *rbio, int mirror_n
int faila;
int failb;
- found_errors = get_rbio_veritical_errors(rbio, sector_nr,
+ found_errors = get_rbio_vertical_errors(rbio, sector_nr,
&faila, &failb);
/* This vertical stripe doesn't have errors. */
if (!found_errors)
@@ -2137,8 +2295,8 @@ static void fill_data_csums(struct btrfs_raid_bio *rbio)
{
struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
struct btrfs_root *csum_root = btrfs_csum_root(fs_info,
- rbio->bioc->raid_map[0]);
- const u64 start = rbio->bioc->raid_map[0];
+ rbio->bioc->full_stripe_logical);
+ const u64 start = rbio->bioc->full_stripe_logical;
const u32 len = (rbio->nr_data * rbio->stripe_nsectors) <<
fs_info->sectorsize_bits;
int ret;
@@ -2170,7 +2328,7 @@ static void fill_data_csums(struct btrfs_raid_bio *rbio)
goto error;
}
- ret = btrfs_lookup_csums_bitmap(csum_root, start, start + len - 1,
+ ret = btrfs_lookup_csums_bitmap(csum_root, NULL, start, start + len - 1,
rbio->csum_buf, rbio->csum_bitmap);
if (ret < 0)
goto error;
@@ -2186,7 +2344,7 @@ error:
*/
btrfs_warn_rl(fs_info,
"sub-stripe write for full stripe %llu is not safe, failed to get csum: %d",
- rbio->bioc->raid_map[0], ret);
+ rbio->bioc->full_stripe_logical, ret);
no_csum:
kfree(rbio->csum_buf);
bitmap_free(rbio->csum_bitmap);
@@ -2196,11 +2354,9 @@ no_csum:
static int rmw_read_wait_recover(struct btrfs_raid_bio *rbio)
{
- struct bio_list bio_list;
- struct bio *bio;
- int ret;
-
- bio_list_init(&bio_list);
+ struct bio_list bio_list = BIO_EMPTY_LIST;
+ int total_sector_nr;
+ int ret = 0;
/*
* Fill the data csums we need for data verification. We need to fill
@@ -2209,32 +2365,39 @@ static int rmw_read_wait_recover(struct btrfs_raid_bio *rbio)
*/
fill_data_csums(rbio);
- ret = rmw_assemble_read_bios(rbio, &bio_list);
- if (ret < 0)
- goto out;
+ /*
+ * Build a list of bios to read all sectors (including data and P/Q).
+ *
+ * This behavior is to compensate the later csum verification and recovery.
+ */
+ for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
+ total_sector_nr++) {
+ int stripe = total_sector_nr / rbio->stripe_nsectors;
+ int sectornr = total_sector_nr % rbio->stripe_nsectors;
+ phys_addr_t *paddrs;
- submit_read_bios(rbio, &bio_list);
- wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
+ paddrs = rbio_stripe_paddrs(rbio, stripe, sectornr);
+ ret = rbio_add_io_paddrs(rbio, &bio_list, paddrs, stripe,
+ sectornr, REQ_OP_READ);
+ if (ret) {
+ bio_list_put(&bio_list);
+ return ret;
+ }
+ }
/*
* We may or may not have any corrupted sectors (including missing dev
* and csum mismatch), just let recover_sectors() to handle them all.
*/
- ret = recover_sectors(rbio);
- return ret;
-out:
- while ((bio = bio_list_pop(&bio_list)))
- bio_put(bio);
-
- return ret;
+ submit_read_wait_bio_list(rbio, &bio_list);
+ return recover_sectors(rbio);
}
static void raid_wait_write_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
- blk_status_t err = bio->bi_status;
- if (err)
+ if (bio->bi_status)
rbio_update_error_bitmap(rbio, bio);
bio_put(bio);
if (atomic_dec_and_test(&rbio->stripes_pending))
@@ -2250,11 +2413,11 @@ static void submit_write_bios(struct btrfs_raid_bio *rbio,
while ((bio = bio_list_pop(bio_list))) {
bio->bi_end_io = raid_wait_write_end_io;
- if (trace_raid56_write_stripe_enabled()) {
+ if (trace_raid56_write_enabled()) {
struct raid56_bio_trace_info trace_info = { 0 };
bio_get_trace_info(rbio, bio, &trace_info);
- trace_raid56_write_stripe(rbio, bio, &trace_info);
+ trace_raid56_write(rbio, bio, &trace_info);
}
submit_bio(bio);
}
@@ -2269,20 +2432,21 @@ static bool need_read_stripe_sectors(struct btrfs_raid_bio *rbio)
int i;
for (i = 0; i < rbio->nr_data * rbio->stripe_nsectors; i++) {
- struct sector_ptr *sector = &rbio->stripe_sectors[i];
+ phys_addr_t paddr = rbio->stripe_paddrs[i * rbio->sector_nsteps];
/*
* We have a sector which doesn't have page nor uptodate,
* thus this rbio can not be cached one, as cached one must
* have all its data sectors present and uptodate.
*/
- if (!sector->page || !sector->uptodate)
+ if (paddr == INVALID_PADDR ||
+ !test_bit(i, rbio->stripe_uptodate_bitmap))
return true;
}
return false;
}
-static int rmw_rbio(struct btrfs_raid_bio *rbio)
+static void rmw_rbio(struct btrfs_raid_bio *rbio)
{
struct bio_list bio_list;
int sectornr;
@@ -2294,38 +2458,36 @@ static int rmw_rbio(struct btrfs_raid_bio *rbio)
*/
ret = alloc_rbio_parity_pages(rbio);
if (ret < 0)
- return ret;
+ goto out;
/*
* Either full stripe write, or we have every data sector already
* cached, can go to write path immediately.
*/
- if (rbio_is_full(rbio) || !need_read_stripe_sectors(rbio))
- goto write;
-
- /*
- * Now we're doing sub-stripe write, also need all data stripes to do
- * the full RMW.
- */
- ret = alloc_rbio_data_pages(rbio);
- if (ret < 0)
- return ret;
+ if (!rbio_is_full(rbio) && need_read_stripe_sectors(rbio)) {
+ /*
+ * Now we're doing sub-stripe write, also need all data stripes
+ * to do the full RMW.
+ */
+ ret = alloc_rbio_data_pages(rbio);
+ if (ret < 0)
+ goto out;
- index_rbio_pages(rbio);
+ index_rbio_pages(rbio);
- ret = rmw_read_wait_recover(rbio);
- if (ret < 0)
- return ret;
+ ret = rmw_read_wait_recover(rbio);
+ if (ret < 0)
+ goto out;
+ }
-write:
/*
* At this stage we're not allowed to add any new bios to the
* bio list any more, anyone else that wants to change this stripe
* needs to do their own rmw.
*/
- spin_lock_irq(&rbio->bio_list_lock);
+ spin_lock(&rbio->bio_list_lock);
set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
- spin_unlock_irq(&rbio->bio_list_lock);
+ spin_unlock(&rbio->bio_list_lock);
bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors);
@@ -2348,7 +2510,7 @@ write:
bio_list_init(&bio_list);
ret = rmw_assemble_write_bios(rbio, &bio_list);
if (ret < 0)
- return ret;
+ goto out;
/* We should have at least one bio assembled. */
ASSERT(bio_list_size(&bio_list));
@@ -2359,38 +2521,28 @@ write:
for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) {
int found_errors;
- found_errors = get_rbio_veritical_errors(rbio, sectornr, NULL, NULL);
- if (found_errors > rbio->bioc->max_errors) {
+ found_errors = get_rbio_vertical_errors(rbio, sectornr, NULL, NULL);
+ if (unlikely(found_errors > rbio->bioc->max_errors)) {
ret = -EIO;
break;
}
}
- return ret;
+out:
+ rbio_orig_end_io(rbio, errno_to_blk_status(ret));
}
static void rmw_rbio_work(struct work_struct *work)
{
struct btrfs_raid_bio *rbio;
- int ret;
rbio = container_of(work, struct btrfs_raid_bio, work);
-
- ret = lock_stripe_add(rbio);
- if (ret == 0) {
- ret = rmw_rbio(rbio);
- rbio_orig_end_io(rbio, errno_to_blk_status(ret));
- }
+ if (lock_stripe_add(rbio) == 0)
+ rmw_rbio(rbio);
}
static void rmw_rbio_work_locked(struct work_struct *work)
{
- struct btrfs_raid_bio *rbio;
- int ret;
-
- rbio = container_of(work, struct btrfs_raid_bio, work);
-
- ret = rmw_rbio(rbio);
- rbio_orig_end_io(rbio, errno_to_blk_status(ret));
+ rmw_rbio(container_of(work, struct btrfs_raid_bio, work));
}
/*
@@ -2434,27 +2586,30 @@ struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
break;
}
}
- ASSERT(i < rbio->real_stripes);
+ ASSERT_RBIO_STRIPE(i < rbio->real_stripes, rbio, i);
bitmap_copy(&rbio->dbitmap, dbitmap, stripe_nsectors);
return rbio;
}
-/* Used for both parity scrub and missing. */
-void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
- unsigned int pgoff, u64 logical)
+static int alloc_rbio_sector_pages(struct btrfs_raid_bio *rbio,
+ int sector_nr)
{
- const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
- int stripe_offset;
- int index;
+ const u32 step = min(PAGE_SIZE, rbio->bioc->fs_info->sectorsize);
+ const u32 base = sector_nr * rbio->sector_nsteps;
+
+ for (int i = base; i < base + rbio->sector_nsteps; i++) {
+ const unsigned int page_index = (i * step) >> PAGE_SHIFT;
+ struct page *page;
- ASSERT(logical >= rbio->bioc->raid_map[0]);
- ASSERT(logical + sectorsize <= rbio->bioc->raid_map[0] +
- BTRFS_STRIPE_LEN * rbio->nr_data);
- stripe_offset = (int)(logical - rbio->bioc->raid_map[0]);
- index = stripe_offset / sectorsize;
- rbio->bio_sectors[index].page = page;
- rbio->bio_sectors[index].pgoff = pgoff;
+ if (rbio->stripe_pages[page_index])
+ continue;
+ page = alloc_page(GFP_NOFS);
+ if (!page)
+ return -ENOMEM;
+ rbio->stripe_pages[page_index] = page;
+ }
+ return 0;
}
/*
@@ -2463,42 +2618,96 @@ void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
*/
static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
{
- const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
int total_sector_nr;
for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
total_sector_nr++) {
- struct page *page;
int sectornr = total_sector_nr % rbio->stripe_nsectors;
- int index = (total_sector_nr * sectorsize) >> PAGE_SHIFT;
+ int ret;
if (!test_bit(sectornr, &rbio->dbitmap))
continue;
- if (rbio->stripe_pages[index])
- continue;
- page = alloc_page(GFP_NOFS);
- if (!page)
- return -ENOMEM;
- rbio->stripe_pages[index] = page;
+ ret = alloc_rbio_sector_pages(rbio, total_sector_nr);
+ if (ret < 0)
+ return ret;
}
index_stripe_sectors(rbio);
return 0;
}
-static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
+/* Return true if the content of the step matches the caclulated one. */
+static bool verify_one_parity_step(struct btrfs_raid_bio *rbio,
+ void *pointers[], unsigned int sector_nr,
+ unsigned int step_nr)
+{
+ const unsigned int nr_data = rbio->nr_data;
+ const bool has_qstripe = (rbio->real_stripes - rbio->nr_data == 2);
+ const u32 step = min(rbio->bioc->fs_info->sectorsize, PAGE_SIZE);
+ void *parity;
+ bool ret = false;
+
+ ASSERT(step_nr < rbio->sector_nsteps);
+
+ /* First collect one page from each data stripe. */
+ for (int stripe = 0; stripe < nr_data; stripe++)
+ pointers[stripe] = kmap_local_paddr(
+ sector_paddr_in_rbio(rbio, stripe, sector_nr,
+ step_nr, 0));
+
+ if (has_qstripe) {
+ assert_rbio(rbio);
+ /* RAID6, call the library function to fill in our P/Q. */
+ raid6_call.gen_syndrome(rbio->real_stripes, step, pointers);
+ } else {
+ /* RAID5. */
+ memcpy(pointers[nr_data], pointers[0], step);
+ run_xor(pointers + 1, nr_data - 1, step);
+ }
+
+ /* Check scrubbing parity and repair it. */
+ parity = kmap_local_paddr(rbio_stripe_paddr(rbio, rbio->scrubp, sector_nr, step_nr));
+ if (memcmp(parity, pointers[rbio->scrubp], step) != 0)
+ memcpy(parity, pointers[rbio->scrubp], step);
+ else
+ ret = true;
+ kunmap_local(parity);
+
+ for (int stripe = nr_data - 1; stripe >= 0; stripe--)
+ kunmap_local(pointers[stripe]);
+ return ret;
+}
+
+/*
+ * The @pointers array should have the P/Q parity already mapped.
+ */
+static void verify_one_parity_sector(struct btrfs_raid_bio *rbio,
+ void *pointers[], unsigned int sector_nr)
+{
+ bool found_error = false;
+
+ for (int step_nr = 0; step_nr < rbio->sector_nsteps; step_nr++) {
+ bool match;
+
+ match = verify_one_parity_step(rbio, pointers, sector_nr, step_nr);
+ if (!match)
+ found_error = true;
+ }
+ if (!found_error)
+ bitmap_clear(&rbio->dbitmap, sector_nr, 1);
+}
+
+static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
{
struct btrfs_io_context *bioc = rbio->bioc;
- const u32 sectorsize = bioc->fs_info->sectorsize;
void **pointers = rbio->finish_pointers;
unsigned long *pbitmap = &rbio->finish_pbitmap;
int nr_data = rbio->nr_data;
- int stripe;
int sectornr;
bool has_qstripe;
- struct sector_ptr p_sector = { 0 };
- struct sector_ptr q_sector = { 0 };
+ struct page *page;
+ phys_addr_t p_paddr = INVALID_PADDR;
+ phys_addr_t q_paddr = INVALID_PADDR;
struct bio_list bio_list;
- struct bio *bio;
int is_replace = 0;
int ret;
@@ -2511,7 +2720,11 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
else
BUG();
- if (bioc->num_tgtdevs && bioc->tgtdev_map[rbio->scrubp]) {
+ /*
+ * Replace is running and our P/Q stripe is being replaced, then we
+ * need to duplicate the final write to replace target.
+ */
+ if (bioc->replace_nr_stripes && bioc->replace_stripe_src == rbio->scrubp) {
is_replace = 1;
bitmap_copy(pbitmap, &rbio->dbitmap, rbio->stripe_nsectors);
}
@@ -2523,88 +2736,51 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
*/
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
- if (!need_check)
- goto writeback;
-
- p_sector.page = alloc_page(GFP_NOFS);
- if (!p_sector.page)
+ page = alloc_page(GFP_NOFS);
+ if (!page)
return -ENOMEM;
- p_sector.pgoff = 0;
- p_sector.uptodate = 1;
+ p_paddr = page_to_phys(page);
+ page = NULL;
+ pointers[nr_data] = kmap_local_paddr(p_paddr);
if (has_qstripe) {
/* RAID6, allocate and map temp space for the Q stripe */
- q_sector.page = alloc_page(GFP_NOFS);
- if (!q_sector.page) {
- __free_page(p_sector.page);
- p_sector.page = NULL;
+ page = alloc_page(GFP_NOFS);
+ if (!page) {
+ __free_page(phys_to_page(p_paddr));
+ p_paddr = INVALID_PADDR;
return -ENOMEM;
}
- q_sector.pgoff = 0;
- q_sector.uptodate = 1;
- pointers[rbio->real_stripes - 1] = kmap_local_page(q_sector.page);
+ q_paddr = page_to_phys(page);
+ page = NULL;
+ pointers[rbio->real_stripes - 1] = kmap_local_paddr(q_paddr);
}
bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors);
/* Map the parity stripe just once */
- pointers[nr_data] = kmap_local_page(p_sector.page);
-
- for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) {
- struct sector_ptr *sector;
- void *parity;
-
- /* first collect one page from each data stripe */
- for (stripe = 0; stripe < nr_data; stripe++) {
- sector = sector_in_rbio(rbio, stripe, sectornr, 0);
- pointers[stripe] = kmap_local_page(sector->page) +
- sector->pgoff;
- }
- if (has_qstripe) {
- /* RAID6, call the library function to fill in our P/Q */
- raid6_call.gen_syndrome(rbio->real_stripes, sectorsize,
- pointers);
- } else {
- /* raid5 */
- memcpy(pointers[nr_data], pointers[0], sectorsize);
- run_xor(pointers + 1, nr_data - 1, sectorsize);
- }
-
- /* Check scrubbing parity and repair it */
- sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
- parity = kmap_local_page(sector->page) + sector->pgoff;
- if (memcmp(parity, pointers[rbio->scrubp], sectorsize) != 0)
- memcpy(parity, pointers[rbio->scrubp], sectorsize);
- else
- /* Parity is right, needn't writeback */
- bitmap_clear(&rbio->dbitmap, sectornr, 1);
- kunmap_local(parity);
-
- for (stripe = nr_data - 1; stripe >= 0; stripe--)
- kunmap_local(pointers[stripe]);
- }
+ for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors)
+ verify_one_parity_sector(rbio, pointers, sectornr);
kunmap_local(pointers[nr_data]);
- __free_page(p_sector.page);
- p_sector.page = NULL;
- if (q_sector.page) {
- kunmap_local(pointers[rbio->real_stripes - 1]);
- __free_page(q_sector.page);
- q_sector.page = NULL;
+ __free_page(phys_to_page(p_paddr));
+ p_paddr = INVALID_PADDR;
+ if (q_paddr != INVALID_PADDR) {
+ __free_page(phys_to_page(q_paddr));
+ q_paddr = INVALID_PADDR;
}
-writeback:
/*
* time to start writing. Make bios for everything from the
* higher layers (the bio_list in our rbio) and our p/q. Ignore
* everything else.
*/
for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) {
- struct sector_ptr *sector;
+ phys_addr_t *paddrs;
- sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
- ret = rbio_add_io_sector(rbio, &bio_list, sector, rbio->scrubp,
+ paddrs = rbio_stripe_paddrs(rbio, rbio->scrubp, sectornr);
+ ret = rbio_add_io_paddrs(rbio, &bio_list, paddrs, rbio->scrubp,
sectornr, REQ_OP_WRITE);
if (ret)
goto cleanup;
@@ -2613,13 +2789,17 @@ writeback:
if (!is_replace)
goto submit_write;
+ /*
+ * Replace is running and our parity stripe needs to be duplicated to
+ * the target device. Check we have a valid source stripe number.
+ */
+ ASSERT_RBIO(rbio->bioc->replace_stripe_src >= 0, rbio);
for_each_set_bit(sectornr, pbitmap, rbio->stripe_nsectors) {
- struct sector_ptr *sector;
+ phys_addr_t *paddrs;
- sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
- ret = rbio_add_io_sector(rbio, &bio_list, sector,
- bioc->tgtdev_map[rbio->scrubp],
- sectornr, REQ_OP_WRITE);
+ paddrs = rbio_stripe_paddrs(rbio, rbio->scrubp, sectornr);
+ ret = rbio_add_io_paddrs(rbio, &bio_list, paddrs, rbio->real_stripes,
+ sectornr, REQ_OP_WRITE);
if (ret)
goto cleanup;
}
@@ -2629,8 +2809,7 @@ submit_write:
return 0;
cleanup:
- while ((bio = bio_list_pop(&bio_list)))
- bio_put(bio);
+ bio_list_put(&bio_list);
return ret;
}
@@ -2667,9 +2846,9 @@ static int recover_scrub_rbio(struct btrfs_raid_bio *rbio)
int failb;
int found_errors;
- found_errors = get_rbio_veritical_errors(rbio, sector_nr,
+ found_errors = get_rbio_vertical_errors(rbio, sector_nr,
&faila, &failb);
- if (found_errors > rbio->bioc->max_errors) {
+ if (unlikely(found_errors > rbio->bioc->max_errors)) {
ret = -EIO;
goto out;
}
@@ -2693,7 +2872,7 @@ static int recover_scrub_rbio(struct btrfs_raid_bio *rbio)
* data, so the capability of the repair is declined. (In the
* case of RAID5, we can not repair anything.)
*/
- if (dfail > rbio->bioc->max_errors - 1) {
+ if (unlikely(dfail > rbio->bioc->max_errors - 1)) {
ret = -EIO;
goto out;
}
@@ -2710,7 +2889,7 @@ static int recover_scrub_rbio(struct btrfs_raid_bio *rbio)
* scrubbing parity, luckily, use the other one to repair the
* data, or we can not repair the data stripe.
*/
- if (failp != rbio->scrubp) {
+ if (unlikely(failp != rbio->scrubp)) {
ret = -EIO;
goto out;
}
@@ -2725,21 +2904,18 @@ out:
return ret;
}
-static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio,
- struct bio_list *bio_list)
+static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio)
{
- struct bio *bio;
+ struct bio_list bio_list = BIO_EMPTY_LIST;
int total_sector_nr;
int ret = 0;
- ASSERT(bio_list_size(bio_list) == 0);
-
/* Build a list of bios to read all the missing parts. */
for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
total_sector_nr++) {
int sectornr = total_sector_nr % rbio->stripe_nsectors;
int stripe = total_sector_nr / rbio->stripe_nsectors;
- struct sector_ptr *sector;
+ phys_addr_t *paddrs;
/* No data in the vertical stripe, no need to read. */
if (!test_bit(sectornr, &rbio->dbitmap))
@@ -2747,93 +2923,76 @@ static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio,
/*
* We want to find all the sectors missing from the rbio and
- * read them from the disk. If sector_in_rbio() finds a sector
+ * read them from the disk. If sector_paddr_in_rbio() finds a sector
* in the bio list we don't need to read it off the stripe.
*/
- sector = sector_in_rbio(rbio, stripe, sectornr, 1);
- if (sector)
+ paddrs = sector_paddrs_in_rbio(rbio, stripe, sectornr, 1);
+ if (paddrs == NULL)
continue;
- sector = rbio_stripe_sector(rbio, stripe, sectornr);
+ paddrs = rbio_stripe_paddrs(rbio, stripe, sectornr);
/*
* The bio cache may have handed us an uptodate sector. If so,
* use it.
*/
- if (sector->uptodate)
+ if (test_bit(rbio_sector_index(rbio, stripe, sectornr),
+ rbio->stripe_uptodate_bitmap))
continue;
- ret = rbio_add_io_sector(rbio, bio_list, sector, stripe,
+ ret = rbio_add_io_paddrs(rbio, &bio_list, paddrs, stripe,
sectornr, REQ_OP_READ);
- if (ret)
- goto error;
+ if (ret) {
+ bio_list_put(&bio_list);
+ return ret;
+ }
}
+
+ submit_read_wait_bio_list(rbio, &bio_list);
return 0;
-error:
- while ((bio = bio_list_pop(bio_list)))
- bio_put(bio);
- return ret;
}
-static int scrub_rbio(struct btrfs_raid_bio *rbio)
+static void scrub_rbio(struct btrfs_raid_bio *rbio)
{
- bool need_check = false;
- struct bio_list bio_list;
int sector_nr;
int ret;
- struct bio *bio;
-
- bio_list_init(&bio_list);
ret = alloc_rbio_essential_pages(rbio);
if (ret)
- goto cleanup;
+ goto out;
bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors);
- ret = scrub_assemble_read_bios(rbio, &bio_list);
+ ret = scrub_assemble_read_bios(rbio);
if (ret < 0)
- goto cleanup;
-
- submit_read_bios(rbio, &bio_list);
- wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
+ goto out;
/* We may have some failures, recover the failed sectors first. */
ret = recover_scrub_rbio(rbio);
if (ret < 0)
- goto cleanup;
+ goto out;
/*
* We have every sector properly prepared. Can finish the scrub
* and writeback the good content.
*/
- ret = finish_parity_scrub(rbio, need_check);
+ ret = finish_parity_scrub(rbio);
wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) {
int found_errors;
- found_errors = get_rbio_veritical_errors(rbio, sector_nr, NULL, NULL);
- if (found_errors > rbio->bioc->max_errors) {
+ found_errors = get_rbio_vertical_errors(rbio, sector_nr, NULL, NULL);
+ if (unlikely(found_errors > rbio->bioc->max_errors)) {
ret = -EIO;
break;
}
}
- return ret;
-
-cleanup:
- while ((bio = bio_list_pop(&bio_list)))
- bio_put(bio);
-
- return ret;
+out:
+ rbio_orig_end_io(rbio, errno_to_blk_status(ret));
}
static void scrub_rbio_work_locked(struct work_struct *work)
{
- struct btrfs_raid_bio *rbio;
- int ret;
-
- rbio = container_of(work, struct btrfs_raid_bio, work);
- ret = scrub_rbio(rbio);
- rbio_orig_end_io(rbio, errno_to_blk_status(ret));
+ scrub_rbio(container_of(work, struct btrfs_raid_bio, work));
}
void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
@@ -2842,32 +3001,57 @@ void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
start_async_work(rbio, scrub_rbio_work_locked);
}
-/* The following code is used for dev replace of a missing RAID 5/6 device. */
-
-struct btrfs_raid_bio *
-raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc)
+/*
+ * This is for scrub call sites where we already have correct data contents.
+ * This allows us to avoid reading data stripes again.
+ *
+ * Unfortunately here we have to do folio copy, other than reusing the pages.
+ * This is due to the fact rbio has its own page management for its cache.
+ */
+void raid56_parity_cache_data_folios(struct btrfs_raid_bio *rbio,
+ struct folio **data_folios, u64 data_logical)
{
- struct btrfs_fs_info *fs_info = bioc->fs_info;
- struct btrfs_raid_bio *rbio;
-
- rbio = alloc_rbio(fs_info, bioc);
- if (IS_ERR(rbio))
- return NULL;
+ struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
+ const u64 offset_in_full_stripe = data_logical -
+ rbio->bioc->full_stripe_logical;
+ unsigned int findex = 0;
+ unsigned int foffset = 0;
+ int ret;
- rbio->operation = BTRFS_RBIO_REBUILD_MISSING;
- bio_list_add(&rbio->bio_list, bio);
/*
- * This is a special bio which is used to hold the completion handler
- * and make the scrub rbio is similar to the other types
+ * If we hit ENOMEM temporarily, but later at
+ * raid56_parity_submit_scrub_rbio() time it succeeded, we just do
+ * the extra read, not a big deal.
+ *
+ * If we hit ENOMEM later at raid56_parity_submit_scrub_rbio() time,
+ * the bio would got proper error number set.
*/
- ASSERT(!bio->bi_iter.bi_size);
-
- set_rbio_range_error(rbio, bio);
-
- return rbio;
-}
+ ret = alloc_rbio_data_pages(rbio);
+ if (ret < 0)
+ return;
-void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio)
-{
- start_async_work(rbio, recover_rbio_work);
+ /* data_logical must be at stripe boundary and inside the full stripe. */
+ ASSERT(IS_ALIGNED(offset_in_full_stripe, BTRFS_STRIPE_LEN));
+ ASSERT(offset_in_full_stripe < (rbio->nr_data << BTRFS_STRIPE_LEN_SHIFT));
+
+ for (unsigned int cur_off = offset_in_full_stripe;
+ cur_off < offset_in_full_stripe + BTRFS_STRIPE_LEN;
+ cur_off += PAGE_SIZE) {
+ const unsigned int pindex = cur_off >> PAGE_SHIFT;
+ void *kaddr;
+
+ kaddr = kmap_local_page(rbio->stripe_pages[pindex]);
+ memcpy_from_folio(kaddr, data_folios[findex], foffset, PAGE_SIZE);
+ kunmap_local(kaddr);
+
+ foffset += PAGE_SIZE;
+ ASSERT(foffset <= folio_size(data_folios[findex]));
+ if (foffset == folio_size(data_folios[findex])) {
+ findex++;
+ foffset = 0;
+ }
+ }
+ bitmap_set(rbio->stripe_uptodate_bitmap,
+ offset_in_full_stripe >> fs_info->sectorsize_bits,
+ BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits);
}