summaryrefslogtreecommitdiff
path: root/fs/btrfs/raid56.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/raid56.c')
-rw-r--r--fs/btrfs/raid56.c178
1 files changed, 117 insertions, 61 deletions
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 24a62224b24b..c3a2bc8af675 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -231,7 +231,6 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
cur = h + i;
INIT_LIST_HEAD(&cur->hash_list);
spin_lock_init(&cur->lock);
- init_waitqueue_head(&cur->wait);
}
x = cmpxchg(&info->stripe_hash_table, NULL, table);
@@ -595,14 +594,31 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
* bio list here, anyone else that wants to
* change this stripe needs to do their own rmw.
*/
- if (last->operation == BTRFS_RBIO_PARITY_SCRUB ||
- cur->operation == BTRFS_RBIO_PARITY_SCRUB)
+ if (last->operation == BTRFS_RBIO_PARITY_SCRUB)
return 0;
- if (last->operation == BTRFS_RBIO_REBUILD_MISSING ||
- cur->operation == BTRFS_RBIO_REBUILD_MISSING)
+ if (last->operation == BTRFS_RBIO_REBUILD_MISSING)
return 0;
+ if (last->operation == BTRFS_RBIO_READ_REBUILD) {
+ int fa = last->faila;
+ int fb = last->failb;
+ int cur_fa = cur->faila;
+ int cur_fb = cur->failb;
+
+ if (last->faila >= last->failb) {
+ fa = last->failb;
+ fb = last->faila;
+ }
+
+ if (cur->faila >= cur->failb) {
+ cur_fa = cur->failb;
+ cur_fb = cur->faila;
+ }
+
+ if (fa != cur_fa || fb != cur_fb)
+ return 0;
+ }
return 1;
}
@@ -670,7 +686,6 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
struct btrfs_raid_bio *cur;
struct btrfs_raid_bio *pending;
unsigned long flags;
- DEFINE_WAIT(wait);
struct btrfs_raid_bio *freeit = NULL;
struct btrfs_raid_bio *cache_drop = NULL;
int ret = 0;
@@ -816,15 +831,6 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
}
goto done_nolock;
- /*
- * The barrier for this waitqueue_active is not needed,
- * we're protected by h->lock and can't miss a wakeup.
- */
- } else if (waitqueue_active(&h->wait)) {
- spin_unlock(&rbio->bio_list_lock);
- spin_unlock_irqrestore(&h->lock, flags);
- wake_up(&h->wait);
- goto done_nolock;
}
}
done:
@@ -858,10 +864,17 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)
kfree(rbio);
}
-static void free_raid_bio(struct btrfs_raid_bio *rbio)
+static void rbio_endio_bio_list(struct bio *cur, blk_status_t err)
{
- unlock_stripe(rbio);
- __free_raid_bio(rbio);
+ struct bio *next;
+
+ while (cur) {
+ next = cur->bi_next;
+ cur->bi_next = NULL;
+ cur->bi_status = err;
+ bio_endio(cur);
+ cur = next;
+ }
}
/*
@@ -871,20 +884,26 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio)
static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
{
struct bio *cur = bio_list_get(&rbio->bio_list);
- struct bio *next;
+ struct bio *extra;
if (rbio->generic_bio_cnt)
btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
- free_raid_bio(rbio);
+ /*
+ * At this moment, rbio->bio_list is empty, however since rbio does not
+ * always have RBIO_RMW_LOCKED_BIT set and rbio is still linked on the
+ * hash list, rbio may be merged with others so that rbio->bio_list
+ * becomes non-empty.
+ * Once unlock_stripe() is done, rbio->bio_list will not be updated any
+ * more and we can call bio_endio() on all queued bios.
+ */
+ unlock_stripe(rbio);
+ extra = bio_list_get(&rbio->bio_list);
+ __free_raid_bio(rbio);
- while (cur) {
- next = cur->bi_next;
- cur->bi_next = NULL;
- cur->bi_status = err;
- bio_endio(cur);
- cur = next;
- }
+ rbio_endio_bio_list(cur, err);
+ if (extra)
+ rbio_endio_bio_list(extra, err);
}
/*
@@ -1326,6 +1345,9 @@ write_data:
cleanup:
rbio_orig_end_io(rbio, BLK_STS_IOERR);
+
+ while ((bio = bio_list_pop(&bio_list)))
+ bio_put(bio);
}
/*
@@ -1348,6 +1370,7 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
stripe_start = stripe->physical;
if (physical >= stripe_start &&
physical < stripe_start + rbio->stripe_len &&
+ stripe->dev->bdev &&
bio->bi_disk == stripe->dev->bdev->bd_disk &&
bio->bi_partno == stripe->dev->bdev->bd_partno) {
return i;
@@ -1432,14 +1455,13 @@ static int fail_bio_stripe(struct btrfs_raid_bio *rbio,
*/
static void set_bio_pages_uptodate(struct bio *bio)
{
- struct bio_vec bvec;
- struct bvec_iter iter;
+ struct bio_vec *bvec;
+ int i;
- if (bio_flagged(bio, BIO_CLONED))
- bio->bi_iter = btrfs_io_bio(bio)->iter;
+ ASSERT(!bio_flagged(bio, BIO_CLONED));
- bio_for_each_segment(bvec, bio, iter)
- SetPageUptodate(bvec.bv_page);
+ bio_for_each_segment_all(bvec, bio, i)
+ SetPageUptodate(bvec->bv_page);
}
/*
@@ -1582,6 +1604,10 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
cleanup:
rbio_orig_end_io(rbio, BLK_STS_IOERR);
+
+ while ((bio = bio_list_pop(&bio_list)))
+ bio_put(bio);
+
return -EIO;
finish:
@@ -1961,15 +1987,34 @@ cleanup:
kfree(pointers);
cleanup_io:
- if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
- if (err == BLK_STS_OK)
+ /*
+ * Similar to READ_REBUILD, REBUILD_MISSING at this point also has a
+ * valid rbio which is consistent with ondisk content, thus such a
+ * valid rbio can be cached to avoid further disk reads.
+ */
+ if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
+ rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
+ /*
+ * - In case of two failures, where rbio->failb != -1:
+ *
+ * Do not cache this rbio since the above read reconstruction
+ * (raid6_datap_recov() or raid6_2data_recov()) may have
+ * changed some content of stripes which are not identical to
+ * on-disk content any more, otherwise, a later write/recover
+ * may steal stripe_pages from this rbio and end up with
+ * corruptions or rebuild failures.
+ *
+ * - In case of single failure, where rbio->failb == -1:
+ *
+ * Cache this rbio iff the above read reconstruction is
+ * excuted without problems.
+ */
+ if (err == BLK_STS_OK && rbio->failb < 0)
cache_rbio_pages(rbio);
else
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
rbio_orig_end_io(rbio, err);
- } else if (rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
- rbio_orig_end_io(rbio, err);
} else if (err == BLK_STS_OK) {
rbio->faila = -1;
rbio->failb = -1;
@@ -2107,6 +2152,10 @@ cleanup:
if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
rbio->operation == BTRFS_RBIO_REBUILD_MISSING)
rbio_orig_end_io(rbio, BLK_STS_IOERR);
+
+ while ((bio = bio_list_pop(&bio_list)))
+ bio_put(bio);
+
return -EIO;
}
@@ -2159,11 +2208,21 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
}
/*
- * reconstruct from the q stripe if they are
- * asking for mirror 3
+ * Loop retry:
+ * for 'mirror == 2', reconstruct from all other stripes.
+ * for 'mirror_num > 2', select a stripe to fail on every retry.
*/
- if (mirror_num == 3)
- rbio->failb = rbio->real_stripes - 2;
+ if (mirror_num > 2) {
+ /*
+ * 'mirror == 3' is to fail the p stripe and
+ * reconstruct from the q stripe. 'mirror > 3' is to
+ * fail a data stripe and reconstruct from p+q stripe.
+ */
+ rbio->failb = rbio->real_stripes - (mirror_num - 1);
+ ASSERT(rbio->failb > 0);
+ if (rbio->failb <= rbio->faila)
+ rbio->failb--;
+ }
ret = lock_stripe_add(rbio);
@@ -2231,12 +2290,18 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
ASSERT(!bio->bi_iter.bi_size);
rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
- for (i = 0; i < rbio->real_stripes; i++) {
+ /*
+ * After mapping bbio with BTRFS_MAP_WRITE, parities have been sorted
+ * to the end position, so this search can start from the first parity
+ * stripe.
+ */
+ for (i = rbio->nr_data; i < rbio->real_stripes; i++) {
if (bbio->stripes[i].dev == scrub_dev) {
rbio->scrubp = i;
break;
}
}
+ ASSERT(i < rbio->real_stripes);
/* Now we just support the sectorsize equals to page size */
ASSERT(fs_info->sectorsize == PAGE_SIZE);
@@ -2454,6 +2519,9 @@ submit_write:
cleanup:
rbio_orig_end_io(rbio, BLK_STS_IOERR);
+
+ while ((bio = bio_list_pop(&bio_list)))
+ bio_put(bio);
}
static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
@@ -2563,12 +2631,12 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
int stripe;
struct bio *bio;
+ bio_list_init(&bio_list);
+
ret = alloc_rbio_essential_pages(rbio);
if (ret)
goto cleanup;
- bio_list_init(&bio_list);
-
atomic_set(&rbio->error, 0);
/*
* build a list of bios to read all the missing parts of this
@@ -2636,6 +2704,10 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
cleanup:
rbio_orig_end_io(rbio, BLK_STS_IOERR);
+
+ while ((bio = bio_list_pop(&bio_list)))
+ bio_put(bio);
+
return;
finish:
@@ -2700,24 +2772,8 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
return rbio;
}
-static void missing_raid56_work(struct btrfs_work *work)
-{
- struct btrfs_raid_bio *rbio;
-
- rbio = container_of(work, struct btrfs_raid_bio, work);
- __raid56_parity_recover(rbio);
-}
-
-static void async_missing_raid56(struct btrfs_raid_bio *rbio)
-{
- btrfs_init_work(&rbio->work, btrfs_rmw_helper,
- missing_raid56_work, NULL, NULL);
-
- btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
-}
-
void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio)
{
if (!lock_stripe_add(rbio))
- async_missing_raid56(rbio);
+ async_read_rebuild(rbio);
}