diff options
| author | Jens Axboe <axboe@kernel.dk> | 2024-03-06 08:27:41 -0700 |
|---|---|---|
| committer | Jens Axboe <axboe@kernel.dk> | 2024-03-06 08:27:41 -0700 |
| commit | 33bffbb727bcf96bd298375cda61d55c912ff68b (patch) | |
| tree | 7246f0a18a926dd2f49856b56325969147337df3 /drivers/md/raid5.c | |
| parent | fde07a4d74e3b511105e0b6c9372d42376fbbecb (diff) | |
| parent | 3a889fdce7e8927a7d81d11ca3d26608b3be1c31 (diff) | |
Merge tag 'md-6.9-20240305' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.9/block
Pull MD fixes from Song:
"This set fixes two issues:
1. dmraid regression since 6.7 kernels. This issue was initially
reported in [1]. This set of fix has been reviewed and tested by
md and dm folks.
2. raid5 hang since 6.7 kernel, reported in [2]. We haven't got a
better fix for this issue yet. This revert is a workaround. It has
been applied to 6.7 stable kernels [3], and proved to be affective.
We will look more into this issue for a better fix.
[1] https://lore.kernel.org/linux-raid/e5e8afe2-e9a8-49a2-5ab0-958d4065c55e@redhat.com/
[2] https://lore.kernel.org/linux-raid/20240123005700.9302-1-dan@danm.net/
[3] 87165c64fe1a in linux-6.7.y branch."
* tag 'md-6.9-20240305' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
dm-raid: fix lockdep waring in "pers->hot_add_disk"
dm-raid456, md/raid456: fix a deadlock for dm-raid456 while io concurrent with reshape
dm-raid: add a new helper prepare_suspend() in md_personality
md/dm-raid: don't call md_reap_sync_thread() directly
dm-raid: really frozen sync_thread during suspend
md: add a new helper reshape_interrupted()
md: export helper md_is_rdwr()
md: export helpers to stop sync_thread
md: don't clear MD_RECOVERY_FROZEN for new dm-raid until resume
Revert "Revert "md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d""
Diffstat (limited to 'drivers/md/raid5.c')
| -rw-r--r-- | drivers/md/raid5.c | 44 |
1 files changed, 42 insertions, 2 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 48129de21aec..58f8395b56d6 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -36,6 +36,7 @@ */ #include <linux/blkdev.h> +#include <linux/delay.h> #include <linux/kthread.h> #include <linux/raid/pq.h> #include <linux/async_tx.h> @@ -760,6 +761,7 @@ enum stripe_result { STRIPE_RETRY, STRIPE_SCHEDULE_AND_RETRY, STRIPE_FAIL, + STRIPE_WAIT_RESHAPE, }; struct stripe_request_ctx { @@ -5937,7 +5939,8 @@ static enum stripe_result make_stripe_request(struct mddev *mddev, if (ahead_of_reshape(mddev, logical_sector, conf->reshape_safe)) { spin_unlock_irq(&conf->device_lock); - return STRIPE_SCHEDULE_AND_RETRY; + ret = STRIPE_SCHEDULE_AND_RETRY; + goto out; } } spin_unlock_irq(&conf->device_lock); @@ -6016,6 +6019,12 @@ static enum stripe_result make_stripe_request(struct mddev *mddev, out_release: raid5_release_stripe(sh); +out: + if (ret == STRIPE_SCHEDULE_AND_RETRY && reshape_interrupted(mddev)) { + bi->bi_status = BLK_STS_RESOURCE; + ret = STRIPE_WAIT_RESHAPE; + pr_err_ratelimited("dm-raid456: io across reshape position while reshape can't make progress"); + } return ret; } @@ -6137,7 +6146,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) while (1) { res = make_stripe_request(mddev, conf, &ctx, logical_sector, bi); - if (res == STRIPE_FAIL) + if (res == STRIPE_FAIL || res == STRIPE_WAIT_RESHAPE) break; if (res == STRIPE_RETRY) @@ -6175,6 +6184,11 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) if (rw == WRITE) md_write_end(mddev); + if (res == STRIPE_WAIT_RESHAPE) { + md_free_cloned_bio(bi); + return false; + } + bio_endio(bi); return true; } @@ -6764,7 +6778,18 @@ static void raid5d(struct md_thread *thread) spin_unlock_irq(&conf->device_lock); md_check_recovery(mddev); spin_lock_irq(&conf->device_lock); + + /* + * Waiting on MD_SB_CHANGE_PENDING below may deadlock + * seeing md_check_recovery() is needed to clear + * the flag when using mdmon. + */ + continue; } + + wait_event_lock_irq(mddev->sb_wait, + !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags), + conf->device_lock); } pr_debug("%d stripes handled\n", handled); @@ -8925,6 +8950,18 @@ static int raid5_start(struct mddev *mddev) return r5l_start(conf->log); } +/* + * This is only used for dm-raid456, caller already frozen sync_thread, hence + * if rehsape is still in progress, io that is waiting for reshape can never be + * done now, hence wake up and handle those IO. + */ +static void raid5_prepare_suspend(struct mddev *mddev) +{ + struct r5conf *conf = mddev->private; + + wake_up(&conf->wait_for_overlap); +} + static struct md_personality raid6_personality = { .name = "raid6", @@ -8948,6 +8985,7 @@ static struct md_personality raid6_personality = .quiesce = raid5_quiesce, .takeover = raid6_takeover, .change_consistency_policy = raid5_change_consistency_policy, + .prepare_suspend = raid5_prepare_suspend, }; static struct md_personality raid5_personality = { @@ -8972,6 +9010,7 @@ static struct md_personality raid5_personality = .quiesce = raid5_quiesce, .takeover = raid5_takeover, .change_consistency_policy = raid5_change_consistency_policy, + .prepare_suspend = raid5_prepare_suspend, }; static struct md_personality raid4_personality = @@ -8997,6 +9036,7 @@ static struct md_personality raid4_personality = .quiesce = raid5_quiesce, .takeover = raid4_takeover, .change_consistency_policy = raid5_change_consistency_policy, + .prepare_suspend = raid5_prepare_suspend, }; static int __init raid5_init(void) |
