summaryrefslogtreecommitdiff
path: root/drivers/md/dm-raid.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-raid.c')
-rw-r--r--drivers/md/dm-raid.c356
1 files changed, 208 insertions, 148 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 54263679a7b1..c6f7129e43d3 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2010-2011 Neil Brown
* Copyright (C) 2010-2018 Red Hat, Inc. All rights reserved.
@@ -29,10 +30,10 @@
*/
#define MIN_RAID456_JOURNAL_SPACE (4*2048)
-static bool devices_handle_discard_safely = false;
+static bool devices_handle_discard_safely;
/*
- * The following flags are used by dm-raid.c to set up the array state.
+ * The following flags are used by dm-raid to set up the array state.
* They must be cleared before md_run is called.
*/
#define FirstUse 10 /* rdev flag */
@@ -212,6 +213,7 @@ struct raid_dev {
#define RT_FLAG_RS_IN_SYNC 6
#define RT_FLAG_RS_RESYNCING 7
#define RT_FLAG_RS_GROW 8
+#define RT_FLAG_RS_FROZEN 9
/* Array elements of 64 bit needed for rebuild/failed disk bits */
#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
@@ -253,7 +255,7 @@ struct raid_set {
int mode;
} journal_dev;
- struct raid_dev dev[];
+ struct raid_dev dev[] __counted_by(raid_disks);
};
static void rs_config_backup(struct raid_set *rs, struct rs_layout *l)
@@ -362,8 +364,8 @@ static struct {
const int mode;
const char *param;
} _raid456_journal_mode[] = {
- { R5C_JOURNAL_MODE_WRITE_THROUGH , "writethrough" },
- { R5C_JOURNAL_MODE_WRITE_BACK , "writeback" }
+ { R5C_JOURNAL_MODE_WRITE_THROUGH, "writethrough" },
+ { R5C_JOURNAL_MODE_WRITE_BACK, "writeback" }
};
/* Return MD raid4/5/6 journal mode for dm @journal_mode one */
@@ -436,7 +438,7 @@ static bool rs_is_reshapable(struct raid_set *rs)
/* Return true, if raid set in @rs is recovering */
static bool rs_is_recovering(struct raid_set *rs)
{
- return rs->md.recovery_cp < rs->md.dev_sectors;
+ return rs->md.resync_offset < rs->md.dev_sectors;
}
/* Return true, if raid set in @rs is reshaping */
@@ -748,7 +750,11 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r
return ERR_PTR(-ENOMEM);
}
- mddev_init(&rs->md);
+ if (mddev_init(&rs->md)) {
+ kfree(rs);
+ ti->error = "Cannot initialize raid context";
+ return ERR_PTR(-ENOMEM);
+ }
rs->raid_disks = raid_devs;
rs->delta_disks = 0;
@@ -762,7 +768,7 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r
rs->md.layout = raid_type->algorithm;
rs->md.new_layout = rs->md.layout;
rs->md.delta_disks = 0;
- rs->md.recovery_cp = MaxSector;
+ rs->md.resync_offset = MaxSector;
for (i = 0; i < raid_devs; i++)
md_rdev_init(&rs->dev[i].rdev);
@@ -797,6 +803,7 @@ static void raid_set_free(struct raid_set *rs)
dm_put_device(rs->ti, rs->dev[i].data_dev);
}
+ mddev_destroy(&rs->md);
kfree(rs);
}
@@ -905,7 +912,7 @@ static int parse_dev_params(struct raid_set *rs, struct dm_arg_set *as)
rs->md.external = 0;
rs->md.persistent = 1;
rs->md.major_version = 2;
- } else if (rebuild && !rs->md.recovery_cp) {
+ } else if (rebuild && !rs->md.resync_offset) {
/*
* Without metadata, we will not be able to tell if the array
* is in-sync or not - we must assume it is not. Therefore,
@@ -1081,7 +1088,7 @@ static int validate_raid_redundancy(struct raid_set *rs)
if ((!rs->dev[i].rdev.sb_page ||
!test_bit(In_sync, &rs->dev[i].rdev.flags)) &&
(++rebuilds_per_group >= copies))
- goto too_many;
+ goto too_many;
}
break;
default:
@@ -1114,7 +1121,7 @@ too_many:
* [stripe_cache <sectors>] Stripe cache size for higher RAIDs
* [region_size <sectors>] Defines granularity of bitmap
* [journal_dev <dev>] raid4/5/6 journaling deviice
- * (i.e. write hole closing log)
+ * (i.e. write hole closing log)
*
* RAID10-only options:
* [raid10_copies <# copies>] Number of copies. (Default: 2)
@@ -1348,11 +1355,7 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
return -EINVAL;
}
- /*
- * In device-mapper, we specify things in sectors, but
- * MD records this value in kB
- */
- if (value < 0 || value / 2 > COUNTER_MAX) {
+ if (value < 0) {
rs->ti->error = "Max write-behind limit out of range";
return -EINVAL;
}
@@ -1619,6 +1622,23 @@ static int _check_data_dev_sectors(struct raid_set *rs)
return 0;
}
+/* Get reshape sectors from data_offsets or raid set */
+static sector_t _get_reshape_sectors(struct raid_set *rs)
+{
+ struct md_rdev *rdev;
+ sector_t reshape_sectors = 0;
+
+ rdev_for_each(rdev, &rs->md)
+ if (!test_bit(Journal, &rdev->flags)) {
+ reshape_sectors = (rdev->data_offset > rdev->new_data_offset) ?
+ rdev->data_offset - rdev->new_data_offset :
+ rdev->new_data_offset - rdev->data_offset;
+ break;
+ }
+
+ return max(reshape_sectors, (sector_t) rs->data_offset);
+}
+
/* Calculate the sectors per device and per array used for @rs */
static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, bool use_mddev)
{
@@ -1649,7 +1669,7 @@ static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, b
if (sector_div(dev_sectors, data_stripes))
goto bad;
- array_sectors = (data_stripes + delta_disks) * dev_sectors;
+ array_sectors = (data_stripes + delta_disks) * (dev_sectors - _get_reshape_sectors(rs));
if (sector_div(array_sectors, rs->raid10_copies))
goto bad;
@@ -1658,7 +1678,7 @@ static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, b
else
/* Striped layouts */
- array_sectors = (data_stripes + delta_disks) * dev_sectors;
+ array_sectors = (data_stripes + delta_disks) * (dev_sectors - _get_reshape_sectors(rs));
mddev->array_sectors = array_sectors;
mddev->dev_sectors = dev_sectors;
@@ -1675,20 +1695,20 @@ static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
{
/* raid0 does not recover */
if (rs_is_raid0(rs))
- rs->md.recovery_cp = MaxSector;
+ rs->md.resync_offset = MaxSector;
/*
* A raid6 set has to be recovered either
* completely or for the grown part to
* ensure proper parity and Q-Syndrome
*/
else if (rs_is_raid6(rs))
- rs->md.recovery_cp = dev_sectors;
+ rs->md.resync_offset = dev_sectors;
/*
* Other raid set types may skip recovery
* depending on the 'nosync' flag.
*/
else
- rs->md.recovery_cp = test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)
+ rs->md.resync_offset = test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)
? MaxSector : dev_sectors;
}
@@ -1697,11 +1717,20 @@ static void do_table_event(struct work_struct *ws)
struct raid_set *rs = container_of(ws, struct raid_set, md.event_work);
smp_rmb(); /* Make sure we access most actual mddev properties */
- if (!rs_is_reshaping(rs)) {
+
+ /* Only grow size resulting from added stripe(s) after reshape ended. */
+ if (!rs_is_reshaping(rs) &&
+ rs->array_sectors > rs->md.array_sectors &&
+ !rs->md.delta_disks &&
+ rs->md.raid_disks == rs->raid_disks) {
+ /* The raid10 personality doesn't provide proper device sizes -> correct. */
if (rs_is_raid10(rs))
rs_set_rdev_sectors(rs);
+
+ rs->md.array_sectors = rs->array_sectors;
rs_set_capacity(rs);
}
+
dm_table_event(rs->ti->table);
}
@@ -1988,7 +2017,7 @@ struct dm_raid_superblock {
__le64 sectors; /* Used device size in sectors */
/*
- * Additonal Bit field of devices indicating failures to support
+ * Additional Bit field of devices indicating failures to support
* up to 256 devices with the 1.9.0 on-disk metadata format
*/
__le64 extended_failed_devices[DISKS_ARRAY_ELEMS - 1];
@@ -2114,7 +2143,7 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
sb->events = cpu_to_le64(mddev->events);
sb->disk_recovery_offset = cpu_to_le64(rdev->recovery_offset);
- sb->array_resync_offset = cpu_to_le64(mddev->recovery_cp);
+ sb->array_resync_offset = cpu_to_le64(mddev->resync_offset);
sb->level = cpu_to_le32(mddev->level);
sb->layout = cpu_to_le32(mddev->layout);
@@ -2208,7 +2237,6 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev)
static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
{
int role;
- unsigned int d;
struct mddev *mddev = &rs->md;
uint64_t events_sb;
uint64_t failed_devices[DISKS_ARRAY_ELEMS];
@@ -2306,24 +2334,23 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
}
if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags))
- mddev->recovery_cp = le64_to_cpu(sb->array_resync_offset);
+ mddev->resync_offset = le64_to_cpu(sb->array_resync_offset);
/*
* During load, we set FirstUse if a new superblock was written.
* There are two reasons we might not have a superblock:
* 1) The raid set is brand new - in which case, all of the
* devices must have their In_sync bit set. Also,
- * recovery_cp must be 0, unless forced.
+ * resync_offset must be 0, unless forced.
* 2) This is a new device being added to an old raid set
* and the new device needs to be rebuilt - in which
* case the In_sync bit will /not/ be set and
- * recovery_cp must be MaxSector.
+ * resync_offset must be MaxSector.
* 3) This is/are a new device(s) being added to an old
* raid set during takeover to a higher raid level
* to provide capacity for redundancy or during reshape
* to add capacity to grow the raid set.
*/
- d = 0;
rdev_for_each(r, mddev) {
if (test_bit(Journal, &rdev->flags))
continue;
@@ -2339,8 +2366,6 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
if (test_bit(FirstUse, &r->flags))
rebuild_and_new++;
}
-
- d++;
}
if (new_devs == rs->raid_disks || !rebuilds) {
@@ -2365,8 +2390,8 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
new_devs > 1 ? "s" : "");
return -EINVAL;
} else if (!test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags) && rs_is_recovering(rs)) {
- DMERR("'rebuild' specified while raid set is not in-sync (recovery_cp=%llu)",
- (unsigned long long) mddev->recovery_cp);
+ DMERR("'rebuild' specified while raid set is not in-sync (resync_offset=%llu)",
+ (unsigned long long) mddev->resync_offset);
return -EINVAL;
} else if (rs_is_reshaping(rs)) {
DMERR("'rebuild' specified while raid set is being reshaped (reshape_position=%llu)",
@@ -2381,7 +2406,7 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
*/
sb_retrieve_failed_devices(sb, failed_devices);
rdev_for_each(r, mddev) {
- if (test_bit(Journal, &rdev->flags) ||
+ if (test_bit(Journal, &r->flags) ||
!r->sb_page)
continue;
sb2 = page_address(r->sb_page);
@@ -2490,7 +2515,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
rdev->saved_raid_disk = rdev->raid_disk;
}
- /* Reshape support -> restore repective data offsets */
+ /* Reshape support -> restore respective data offsets */
rdev->data_offset = le64_to_cpu(sb->data_offset);
rdev->new_data_offset = le64_to_cpu(sb->new_data_offset);
@@ -2506,6 +2531,10 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
struct md_rdev *rdev, *freshest;
struct mddev *mddev = &rs->md;
+ /* Respect resynchronization requested with "sync" argument. */
+ if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags))
+ set_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
+
freshest = NULL;
rdev_for_each(rdev, mddev) {
if (test_bit(Journal, &rdev->flags))
@@ -2671,11 +2700,11 @@ static int rs_adjust_data_offsets(struct raid_set *rs)
}
out:
/*
- * Raise recovery_cp in case data_offset != 0 to
+ * Raise resync_offset in case data_offset != 0 to
* avoid false recovery positives in the constructor.
*/
- if (rs->md.recovery_cp < rs->md.dev_sectors)
- rs->md.recovery_cp += rs->dev[0].rdev.data_offset;
+ if (rs->md.resync_offset < rs->md.dev_sectors)
+ rs->md.resync_offset += rs->dev[0].rdev.data_offset;
/* Adjust data offsets on all rdevs but on any raid4/5/6 journal device */
rdev_for_each(rdev, &rs->md) {
@@ -2730,7 +2759,7 @@ static int rs_setup_takeover(struct raid_set *rs)
}
clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
- mddev->recovery_cp = MaxSector;
+ mddev->resync_offset = MaxSector;
while (d--) {
rdev = &rs->dev[d].rdev;
@@ -2738,7 +2767,7 @@ static int rs_setup_takeover(struct raid_set *rs)
if (test_bit(d, (void *) rs->rebuild_disks)) {
clear_bit(In_sync, &rdev->flags);
clear_bit(Faulty, &rdev->flags);
- mddev->recovery_cp = rdev->recovery_offset = 0;
+ mddev->resync_offset = rdev->recovery_offset = 0;
/* Bitmap has to be created when we do an "up" takeover */
set_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
}
@@ -2808,23 +2837,6 @@ static int rs_prepare_reshape(struct raid_set *rs)
return 0;
}
-/* Get reshape sectors from data_offsets or raid set */
-static sector_t _get_reshape_sectors(struct raid_set *rs)
-{
- struct md_rdev *rdev;
- sector_t reshape_sectors = 0;
-
- rdev_for_each(rdev, &rs->md)
- if (!test_bit(Journal, &rdev->flags)) {
- reshape_sectors = (rdev->data_offset > rdev->new_data_offset) ?
- rdev->data_offset - rdev->new_data_offset :
- rdev->new_data_offset - rdev->data_offset;
- break;
- }
-
- return max(reshape_sectors, (sector_t) rs->data_offset);
-}
-
/*
* Reshape:
* - change raid layout
@@ -2855,7 +2867,7 @@ static int rs_setup_reshape(struct raid_set *rs)
*
* - in case of adding disk(s), array size has
* to grow after the disk adding reshape,
- * which'll hapen in the event handler;
+ * which'll happen in the event handler;
* reshape will happen forward, so space has to
* be available at the beginning of each disk
*
@@ -3148,7 +3160,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
* If a takeover is needed, userspace sets any additional
* devices to rebuild and we can check for a valid request here.
*
- * If acceptible, set the level to the new requested
+ * If acceptable, set the level to the new requested
* one, prohibit requesting recovery, allow the raid
* set to run and store superblocks during resume.
*/
@@ -3183,12 +3195,12 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
/* Out-of-place space has to be available to allow for a reshape unless raid1! */
if (reshape_sectors || rs_is_raid1(rs)) {
/*
- * We can only prepare for a reshape here, because the
- * raid set needs to run to provide the repective reshape
- * check functions via its MD personality instance.
- *
- * So do the reshape check after md_run() succeeded.
- */
+ * We can only prepare for a reshape here, because the
+ * raid set needs to run to provide the respective reshape
+ * check functions via its MD personality instance.
+ *
+ * So do the reshape check after md_run() succeeded.
+ */
r = rs_prepare_reshape(rs);
if (r)
goto bad;
@@ -3213,7 +3225,7 @@ size_check:
if (r)
goto bad;
- rs_setup_recovery(rs, rs->md.recovery_cp < rs->md.dev_sectors ? rs->md.recovery_cp : rs->md.dev_sectors);
+ rs_setup_recovery(rs, rs->md.resync_offset < rs->md.dev_sectors ? rs->md.resync_offset : rs->md.dev_sectors);
} else {
/* This is no size change or it is shrinking, update size and record in superblocks */
r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false);
@@ -3235,14 +3247,15 @@ size_check:
rs_reset_inconclusive_reshape(rs);
/* Start raid set read-only and assumed clean to change in raid_resume() */
- rs->md.ro = 1;
+ rs->md.ro = MD_RDONLY;
rs->md.in_sync = 1;
+ /* Has to be held on running the array */
+ mddev_suspend_and_lock_nointr(&rs->md);
+
/* Keep array frozen until resume. */
- set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
+ md_frozen_sync_thread(&rs->md);
- /* Has to be held on running the array */
- mddev_lock_nointr(&rs->md);
r = md_run(&rs->md);
rs->md.in_sync = 0; /* Assume already marked dirty */
if (r) {
@@ -3254,8 +3267,7 @@ size_check:
r = md_start(&rs->md);
if (r) {
ti->error = "Failed to start raid array";
- mddev_unlock(&rs->md);
- goto bad_md_start;
+ goto bad_unlock;
}
/* If raid4/5/6 journal mode explicitly requested (only possible with journal dev) -> set it */
@@ -3263,26 +3275,24 @@ size_check:
r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode);
if (r) {
ti->error = "Failed to set raid4/5/6 journal mode";
- mddev_unlock(&rs->md);
- goto bad_journal_mode_set;
+ goto bad_unlock;
}
}
- mddev_suspend(&rs->md);
set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags);
/* Try to adjust the raid4/5/6 stripe cache size to the stripe size */
if (rs_is_raid456(rs)) {
r = rs_set_raid456_stripe_cache(rs);
if (r)
- goto bad_stripe_cache;
+ goto bad_unlock;
}
/* Now do an early reshape check */
if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) {
r = rs_check_reshape(rs);
if (r)
- goto bad_check_reshape;
+ goto bad_unlock;
/* Restore new, ctr requested layout to perform check */
rs_config_restore(rs, &rs_layout);
@@ -3291,22 +3301,21 @@ size_check:
r = rs->md.pers->check_reshape(&rs->md);
if (r) {
ti->error = "Reshape check failed";
- goto bad_check_reshape;
+ goto bad_unlock;
}
}
}
/* Disable/enable discard support on raid set. */
configure_discard_support(rs);
+ rs->md.dm_gendisk = dm_disk(dm_table_get_md(ti->table));
mddev_unlock(&rs->md);
return 0;
-bad_md_start:
-bad_journal_mode_set:
-bad_stripe_cache:
-bad_check_reshape:
+bad_unlock:
md_stop(&rs->md);
+ mddev_unlock(&rs->md);
bad:
raid_set_free(rs);
@@ -3317,7 +3326,13 @@ static void raid_dtr(struct dm_target *ti)
{
struct raid_set *rs = ti->private;
+ mddev_lock_nointr(&rs->md);
md_stop(&rs->md);
+ rs->md.dm_gendisk = NULL;
+ mddev_unlock(&rs->md);
+
+ if (work_pending(&rs->md.event_work))
+ flush_work(&rs->md.event_work);
raid_set_free(rs);
}
@@ -3327,17 +3342,18 @@ static int raid_map(struct dm_target *ti, struct bio *bio)
struct mddev *mddev = &rs->md;
/*
- * If we're reshaping to add disk(s)), ti->len and
+ * If we're reshaping to add disk(s), ti->len and
* mddev->array_sectors will differ during the process
* (ti->len > mddev->array_sectors), so we have to requeue
* bios with addresses > mddev->array_sectors here or
* there will occur accesses past EOD of the component
* data images thus erroring the raid set.
*/
- if (unlikely(bio_end_sector(bio) > mddev->array_sectors))
+ if (unlikely(bio_has_data(bio) && bio_end_sector(bio) > mddev->array_sectors))
return DM_MAPIO_REQUEUE;
- md_handle_request(mddev, bio);
+ if (unlikely(!md_handle_request(mddev, bio)))
+ return DM_MAPIO_REQUEUE;
return DM_MAPIO_SUBMITTED;
}
@@ -3369,7 +3385,7 @@ static enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long r
/* The MD sync thread can be done with io or be interrupted but still be running */
if (!test_bit(MD_RECOVERY_DONE, &recovery) &&
(test_bit(MD_RECOVERY_RUNNING, &recovery) ||
- (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery)))) {
+ (md_is_rdwr(mddev) && test_bit(MD_RECOVERY_NEEDED, &recovery)))) {
if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
return st_reshape;
@@ -3433,7 +3449,7 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
} else {
if (state == st_idle && !test_bit(MD_RECOVERY_INTR, &recovery))
- r = mddev->recovery_cp;
+ r = mddev->resync_offset;
else
r = mddev->curr_resync_completed;
@@ -3537,7 +3553,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
recovery = rs->md.recovery;
state = decipher_sync_action(mddev, recovery);
progress = rs_get_progress(rs, recovery, state, resync_max_sectors);
- resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ?
+ resync_mismatches = mddev->last_sync_action == ACTION_CHECK ?
atomic64_read(&mddev->resync_mismatches) : 0;
/* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */
@@ -3712,26 +3728,37 @@ static void raid_status(struct dm_target *ti, status_type_t type,
}
static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
- char *result, unsigned maxlen)
+ char *result, unsigned int maxlen)
{
struct raid_set *rs = ti->private;
struct mddev *mddev = &rs->md;
+ int ret = 0;
if (!mddev->pers || !mddev->pers->sync_request)
return -EINVAL;
- if (!strcasecmp(argv[0], "frozen"))
- set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- else
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ if (test_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags) ||
+ test_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags))
+ return -EBUSY;
- if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
- if (mddev->sync_thread) {
- set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_unregister_thread(&mddev->sync_thread);
- md_reap_sync_thread(mddev);
- }
- } else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
+ if (!strcasecmp(argv[0], "frozen")) {
+ ret = mddev_lock(mddev);
+ if (ret)
+ return ret;
+
+ md_frozen_sync_thread(mddev);
+ mddev_unlock(mddev);
+ } else if (!strcasecmp(argv[0], "idle")) {
+ ret = mddev_lock(mddev);
+ if (ret)
+ return ret;
+
+ md_idle_sync_thread(mddev);
+ mddev_unlock(mddev);
+ }
+
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
return -EBUSY;
else if (!strcasecmp(argv[0], "resync"))
; /* MD_RECOVERY_NEEDED set below */
@@ -3748,16 +3775,16 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
} else
return -EINVAL;
}
- if (mddev->ro == 2) {
+ if (mddev->ro == MD_AUTO_READ) {
/* A write to sync_action is enough to justify
* canceling read-auto mode
*/
- mddev->ro = 0;
- if (!mddev->suspended && mddev->sync_thread)
+ mddev->ro = MD_RDWR;
+ if (!mddev->suspended)
md_wakeup_thread(mddev->sync_thread);
}
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- if (!mddev->suspended && mddev->thread)
+ if (!mddev->suspended)
md_wakeup_thread(mddev->thread);
return 0;
@@ -3786,8 +3813,40 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
struct raid_set *rs = ti->private;
unsigned int chunk_size_bytes = to_bytes(rs->md.chunk_sectors);
- blk_limits_io_min(limits, chunk_size_bytes);
- blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));
+ if (chunk_size_bytes) {
+ limits->io_min = chunk_size_bytes;
+ limits->io_opt = chunk_size_bytes * mddev_data_stripes(rs);
+ }
+}
+
+static void raid_presuspend(struct dm_target *ti)
+{
+ struct raid_set *rs = ti->private;
+ struct mddev *mddev = &rs->md;
+
+ /*
+ * From now on, disallow raid_message() to change sync_thread until
+ * resume, raid_postsuspend() is too late.
+ */
+ set_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags);
+
+ if (!reshape_interrupted(mddev))
+ return;
+
+ /*
+ * For raid456, if reshape is interrupted, IO across reshape position
+ * will never make progress, while caller will wait for IO to be done.
+ * Inform raid456 to handle those IO to prevent deadlock.
+ */
+ if (mddev->pers && mddev->pers->prepare_suspend)
+ mddev->pers->prepare_suspend(mddev);
+}
+
+static void raid_presuspend_undo(struct dm_target *ti)
+{
+ struct raid_set *rs = ti->private;
+
+ clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags);
}
static void raid_postsuspend(struct dm_target *ti)
@@ -3795,13 +3854,13 @@ static void raid_postsuspend(struct dm_target *ti)
struct raid_set *rs = ti->private;
if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
- /* Writes have to be stopped before suspending to avoid deadlocks. */
- if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery))
- md_stop_writes(&rs->md);
-
- mddev_lock_nointr(&rs->md);
- mddev_suspend(&rs->md);
- mddev_unlock(&rs->md);
+ /*
+ * sync_thread must be stopped during suspend, and writes have
+ * to be stopped before suspending to avoid deadlocks.
+ */
+ md_stop_writes(&rs->md);
+ mddev_suspend(&rs->md, false);
+ rs->md.ro = MD_RDONLY;
}
}
@@ -3895,9 +3954,13 @@ static int __load_dirty_region_bitmap(struct raid_set *rs)
/* Try loading the bitmap unless "raid0", which does not have one */
if (!rs_is_raid0(rs) &&
!test_and_set_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags)) {
- r = md_bitmap_load(&rs->md);
- if (r)
- DMERR("Failed to load bitmap");
+ struct mddev *mddev = &rs->md;
+
+ if (md_bitmap_enabled(mddev, false)) {
+ r = mddev->bitmap_ops->load(mddev);
+ if (r)
+ DMERR("Failed to load bitmap");
+ }
}
return r;
@@ -3910,7 +3973,7 @@ static void rs_update_sbs(struct raid_set *rs)
int ro = mddev->ro;
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
- mddev->ro = 0;
+ mddev->ro = MD_RDWR;
md_update_sb(mddev, 1);
mddev->ro = ro;
}
@@ -3978,6 +4041,11 @@ static int raid_preresume(struct dm_target *ti)
if (test_and_set_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags))
return 0;
+ /* If different and no explicit grow request, expose MD array size as of superblock. */
+ if (!test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags) &&
+ rs->array_sectors != mddev->array_sectors)
+ rs_set_capacity(rs);
+
/*
* The superblocks need to be updated on disk if the
* array is new or new devices got added (thus zeroed
@@ -4001,23 +4069,24 @@ static int raid_preresume(struct dm_target *ti)
}
/* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) or grown device size */
- if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap &&
+ if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap &&
(test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags) ||
(rs->requested_bitmap_chunk_sectors &&
mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)))) {
int chunksize = to_bytes(rs->requested_bitmap_chunk_sectors) ?: mddev->bitmap_info.chunksize;
- r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors, chunksize, 0);
- if (r)
- DMERR("Failed to resize bitmap");
+ if (md_bitmap_enabled(mddev, false)) {
+ r = mddev->bitmap_ops->resize(mddev, mddev->dev_sectors,
+ chunksize);
+ if (r)
+ DMERR("Failed to resize bitmap");
+ }
}
/* Check for any resize/reshape on @rs and adjust/initiate */
- /* Be prepared for mddev_resume() in raid_resume() */
- set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) {
+ if (mddev->resync_offset && mddev->resync_offset < MaxSector) {
set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
- mddev->resync_min = mddev->recovery_cp;
+ mddev->resync_min = mddev->resync_offset;
if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags))
mddev->resync_max_sectors = mddev->dev_sectors;
}
@@ -4048,7 +4117,9 @@ static void raid_resume(struct dm_target *ti)
* Take this opportunity to check whether any failed
* devices are reachable again.
*/
+ mddev_lock_nointr(mddev);
attempt_restore_of_faulty_devices(rs);
+ mddev_unlock(mddev);
}
if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
@@ -4057,11 +4128,14 @@ static void raid_resume(struct dm_target *ti)
rs_set_capacity(rs);
mddev_lock_nointr(mddev);
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- mddev->ro = 0;
+ WARN_ON_ONCE(!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery));
+ WARN_ON_ONCE(rcu_dereference_protected(mddev->sync_thread,
+ lockdep_is_held(&mddev->reconfig_mutex)));
+ clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags);
+ mddev->ro = MD_RDWR;
mddev->in_sync = 0;
- mddev_resume(mddev);
- mddev_unlock(mddev);
+ md_unfrozen_sync_thread(mddev);
+ mddev_unlock_and_resume(mddev);
}
}
@@ -4076,27 +4150,13 @@ static struct target_type raid_target = {
.message = raid_message,
.iterate_devices = raid_iterate_devices,
.io_hints = raid_io_hints,
+ .presuspend = raid_presuspend,
+ .presuspend_undo = raid_presuspend_undo,
.postsuspend = raid_postsuspend,
.preresume = raid_preresume,
.resume = raid_resume,
};
-
-static int __init dm_raid_init(void)
-{
- DMINFO("Loading target version %u.%u.%u",
- raid_target.version[0],
- raid_target.version[1],
- raid_target.version[2]);
- return dm_register_target(&raid_target);
-}
-
-static void __exit dm_raid_exit(void)
-{
- dm_unregister_target(&raid_target);
-}
-
-module_init(dm_raid_init);
-module_exit(dm_raid_exit);
+module_dm(raid);
module_param(devices_handle_discard_safely, bool, 0644);
MODULE_PARM_DESC(devices_handle_discard_safely,
@@ -4109,6 +4169,6 @@ MODULE_ALIAS("dm-raid10");
MODULE_ALIAS("dm-raid4");
MODULE_ALIAS("dm-raid5");
MODULE_ALIAS("dm-raid6");
-MODULE_AUTHOR("Neil Brown <dm-devel@redhat.com>");
-MODULE_AUTHOR("Heinz Mauelshagen <dm-devel@redhat.com>");
+MODULE_AUTHOR("Neil Brown <dm-devel@lists.linux.dev>");
+MODULE_AUTHOR("Heinz Mauelshagen <dm-devel@lists.linux.dev>");
MODULE_LICENSE("GPL");