diff options
Diffstat (limited to 'drivers/md/dm-raid.c')
| -rw-r--r-- | drivers/md/dm-raid.c | 356 |
1 files changed, 208 insertions, 148 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 54263679a7b1..c6f7129e43d3 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2010-2011 Neil Brown * Copyright (C) 2010-2018 Red Hat, Inc. All rights reserved. @@ -29,10 +30,10 @@ */ #define MIN_RAID456_JOURNAL_SPACE (4*2048) -static bool devices_handle_discard_safely = false; +static bool devices_handle_discard_safely; /* - * The following flags are used by dm-raid.c to set up the array state. + * The following flags are used by dm-raid to set up the array state. * They must be cleared before md_run is called. */ #define FirstUse 10 /* rdev flag */ @@ -212,6 +213,7 @@ struct raid_dev { #define RT_FLAG_RS_IN_SYNC 6 #define RT_FLAG_RS_RESYNCING 7 #define RT_FLAG_RS_GROW 8 +#define RT_FLAG_RS_FROZEN 9 /* Array elements of 64 bit needed for rebuild/failed disk bits */ #define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8) @@ -253,7 +255,7 @@ struct raid_set { int mode; } journal_dev; - struct raid_dev dev[]; + struct raid_dev dev[] __counted_by(raid_disks); }; static void rs_config_backup(struct raid_set *rs, struct rs_layout *l) @@ -362,8 +364,8 @@ static struct { const int mode; const char *param; } _raid456_journal_mode[] = { - { R5C_JOURNAL_MODE_WRITE_THROUGH , "writethrough" }, - { R5C_JOURNAL_MODE_WRITE_BACK , "writeback" } + { R5C_JOURNAL_MODE_WRITE_THROUGH, "writethrough" }, + { R5C_JOURNAL_MODE_WRITE_BACK, "writeback" } }; /* Return MD raid4/5/6 journal mode for dm @journal_mode one */ @@ -436,7 +438,7 @@ static bool rs_is_reshapable(struct raid_set *rs) /* Return true, if raid set in @rs is recovering */ static bool rs_is_recovering(struct raid_set *rs) { - return rs->md.recovery_cp < rs->md.dev_sectors; + return rs->md.resync_offset < rs->md.dev_sectors; } /* Return true, if raid set in @rs is reshaping */ @@ -748,7 +750,11 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r return ERR_PTR(-ENOMEM); } - mddev_init(&rs->md); + if (mddev_init(&rs->md)) { + kfree(rs); + ti->error = "Cannot initialize raid context"; + return ERR_PTR(-ENOMEM); + } rs->raid_disks = raid_devs; rs->delta_disks = 0; @@ -762,7 +768,7 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r rs->md.layout = raid_type->algorithm; rs->md.new_layout = rs->md.layout; rs->md.delta_disks = 0; - rs->md.recovery_cp = MaxSector; + rs->md.resync_offset = MaxSector; for (i = 0; i < raid_devs; i++) md_rdev_init(&rs->dev[i].rdev); @@ -797,6 +803,7 @@ static void raid_set_free(struct raid_set *rs) dm_put_device(rs->ti, rs->dev[i].data_dev); } + mddev_destroy(&rs->md); kfree(rs); } @@ -905,7 +912,7 @@ static int parse_dev_params(struct raid_set *rs, struct dm_arg_set *as) rs->md.external = 0; rs->md.persistent = 1; rs->md.major_version = 2; - } else if (rebuild && !rs->md.recovery_cp) { + } else if (rebuild && !rs->md.resync_offset) { /* * Without metadata, we will not be able to tell if the array * is in-sync or not - we must assume it is not. Therefore, @@ -1081,7 +1088,7 @@ static int validate_raid_redundancy(struct raid_set *rs) if ((!rs->dev[i].rdev.sb_page || !test_bit(In_sync, &rs->dev[i].rdev.flags)) && (++rebuilds_per_group >= copies)) - goto too_many; + goto too_many; } break; default: @@ -1114,7 +1121,7 @@ too_many: * [stripe_cache <sectors>] Stripe cache size for higher RAIDs * [region_size <sectors>] Defines granularity of bitmap * [journal_dev <dev>] raid4/5/6 journaling deviice - * (i.e. write hole closing log) + * (i.e. write hole closing log) * * RAID10-only options: * [raid10_copies <# copies>] Number of copies. (Default: 2) @@ -1348,11 +1355,7 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as, return -EINVAL; } - /* - * In device-mapper, we specify things in sectors, but - * MD records this value in kB - */ - if (value < 0 || value / 2 > COUNTER_MAX) { + if (value < 0) { rs->ti->error = "Max write-behind limit out of range"; return -EINVAL; } @@ -1619,6 +1622,23 @@ static int _check_data_dev_sectors(struct raid_set *rs) return 0; } +/* Get reshape sectors from data_offsets or raid set */ +static sector_t _get_reshape_sectors(struct raid_set *rs) +{ + struct md_rdev *rdev; + sector_t reshape_sectors = 0; + + rdev_for_each(rdev, &rs->md) + if (!test_bit(Journal, &rdev->flags)) { + reshape_sectors = (rdev->data_offset > rdev->new_data_offset) ? + rdev->data_offset - rdev->new_data_offset : + rdev->new_data_offset - rdev->data_offset; + break; + } + + return max(reshape_sectors, (sector_t) rs->data_offset); +} + /* Calculate the sectors per device and per array used for @rs */ static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, bool use_mddev) { @@ -1649,7 +1669,7 @@ static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, b if (sector_div(dev_sectors, data_stripes)) goto bad; - array_sectors = (data_stripes + delta_disks) * dev_sectors; + array_sectors = (data_stripes + delta_disks) * (dev_sectors - _get_reshape_sectors(rs)); if (sector_div(array_sectors, rs->raid10_copies)) goto bad; @@ -1658,7 +1678,7 @@ static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, b else /* Striped layouts */ - array_sectors = (data_stripes + delta_disks) * dev_sectors; + array_sectors = (data_stripes + delta_disks) * (dev_sectors - _get_reshape_sectors(rs)); mddev->array_sectors = array_sectors; mddev->dev_sectors = dev_sectors; @@ -1675,20 +1695,20 @@ static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors) { /* raid0 does not recover */ if (rs_is_raid0(rs)) - rs->md.recovery_cp = MaxSector; + rs->md.resync_offset = MaxSector; /* * A raid6 set has to be recovered either * completely or for the grown part to * ensure proper parity and Q-Syndrome */ else if (rs_is_raid6(rs)) - rs->md.recovery_cp = dev_sectors; + rs->md.resync_offset = dev_sectors; /* * Other raid set types may skip recovery * depending on the 'nosync' flag. */ else - rs->md.recovery_cp = test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags) + rs->md.resync_offset = test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags) ? MaxSector : dev_sectors; } @@ -1697,11 +1717,20 @@ static void do_table_event(struct work_struct *ws) struct raid_set *rs = container_of(ws, struct raid_set, md.event_work); smp_rmb(); /* Make sure we access most actual mddev properties */ - if (!rs_is_reshaping(rs)) { + + /* Only grow size resulting from added stripe(s) after reshape ended. */ + if (!rs_is_reshaping(rs) && + rs->array_sectors > rs->md.array_sectors && + !rs->md.delta_disks && + rs->md.raid_disks == rs->raid_disks) { + /* The raid10 personality doesn't provide proper device sizes -> correct. */ if (rs_is_raid10(rs)) rs_set_rdev_sectors(rs); + + rs->md.array_sectors = rs->array_sectors; rs_set_capacity(rs); } + dm_table_event(rs->ti->table); } @@ -1988,7 +2017,7 @@ struct dm_raid_superblock { __le64 sectors; /* Used device size in sectors */ /* - * Additonal Bit field of devices indicating failures to support + * Additional Bit field of devices indicating failures to support * up to 256 devices with the 1.9.0 on-disk metadata format */ __le64 extended_failed_devices[DISKS_ARRAY_ELEMS - 1]; @@ -2114,7 +2143,7 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev) sb->events = cpu_to_le64(mddev->events); sb->disk_recovery_offset = cpu_to_le64(rdev->recovery_offset); - sb->array_resync_offset = cpu_to_le64(mddev->recovery_cp); + sb->array_resync_offset = cpu_to_le64(mddev->resync_offset); sb->level = cpu_to_le32(mddev->level); sb->layout = cpu_to_le32(mddev->layout); @@ -2208,7 +2237,6 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev) static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) { int role; - unsigned int d; struct mddev *mddev = &rs->md; uint64_t events_sb; uint64_t failed_devices[DISKS_ARRAY_ELEMS]; @@ -2306,24 +2334,23 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) } if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) - mddev->recovery_cp = le64_to_cpu(sb->array_resync_offset); + mddev->resync_offset = le64_to_cpu(sb->array_resync_offset); /* * During load, we set FirstUse if a new superblock was written. * There are two reasons we might not have a superblock: * 1) The raid set is brand new - in which case, all of the * devices must have their In_sync bit set. Also, - * recovery_cp must be 0, unless forced. + * resync_offset must be 0, unless forced. * 2) This is a new device being added to an old raid set * and the new device needs to be rebuilt - in which * case the In_sync bit will /not/ be set and - * recovery_cp must be MaxSector. + * resync_offset must be MaxSector. * 3) This is/are a new device(s) being added to an old * raid set during takeover to a higher raid level * to provide capacity for redundancy or during reshape * to add capacity to grow the raid set. */ - d = 0; rdev_for_each(r, mddev) { if (test_bit(Journal, &rdev->flags)) continue; @@ -2339,8 +2366,6 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) if (test_bit(FirstUse, &r->flags)) rebuild_and_new++; } - - d++; } if (new_devs == rs->raid_disks || !rebuilds) { @@ -2365,8 +2390,8 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) new_devs > 1 ? "s" : ""); return -EINVAL; } else if (!test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags) && rs_is_recovering(rs)) { - DMERR("'rebuild' specified while raid set is not in-sync (recovery_cp=%llu)", - (unsigned long long) mddev->recovery_cp); + DMERR("'rebuild' specified while raid set is not in-sync (resync_offset=%llu)", + (unsigned long long) mddev->resync_offset); return -EINVAL; } else if (rs_is_reshaping(rs)) { DMERR("'rebuild' specified while raid set is being reshaped (reshape_position=%llu)", @@ -2381,7 +2406,7 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) */ sb_retrieve_failed_devices(sb, failed_devices); rdev_for_each(r, mddev) { - if (test_bit(Journal, &rdev->flags) || + if (test_bit(Journal, &r->flags) || !r->sb_page) continue; sb2 = page_address(r->sb_page); @@ -2490,7 +2515,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev) rdev->saved_raid_disk = rdev->raid_disk; } - /* Reshape support -> restore repective data offsets */ + /* Reshape support -> restore respective data offsets */ rdev->data_offset = le64_to_cpu(sb->data_offset); rdev->new_data_offset = le64_to_cpu(sb->new_data_offset); @@ -2506,6 +2531,10 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) struct md_rdev *rdev, *freshest; struct mddev *mddev = &rs->md; + /* Respect resynchronization requested with "sync" argument. */ + if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags)) + set_bit(MD_ARRAY_FIRST_USE, &mddev->flags); + freshest = NULL; rdev_for_each(rdev, mddev) { if (test_bit(Journal, &rdev->flags)) @@ -2671,11 +2700,11 @@ static int rs_adjust_data_offsets(struct raid_set *rs) } out: /* - * Raise recovery_cp in case data_offset != 0 to + * Raise resync_offset in case data_offset != 0 to * avoid false recovery positives in the constructor. */ - if (rs->md.recovery_cp < rs->md.dev_sectors) - rs->md.recovery_cp += rs->dev[0].rdev.data_offset; + if (rs->md.resync_offset < rs->md.dev_sectors) + rs->md.resync_offset += rs->dev[0].rdev.data_offset; /* Adjust data offsets on all rdevs but on any raid4/5/6 journal device */ rdev_for_each(rdev, &rs->md) { @@ -2730,7 +2759,7 @@ static int rs_setup_takeover(struct raid_set *rs) } clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags); - mddev->recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; while (d--) { rdev = &rs->dev[d].rdev; @@ -2738,7 +2767,7 @@ static int rs_setup_takeover(struct raid_set *rs) if (test_bit(d, (void *) rs->rebuild_disks)) { clear_bit(In_sync, &rdev->flags); clear_bit(Faulty, &rdev->flags); - mddev->recovery_cp = rdev->recovery_offset = 0; + mddev->resync_offset = rdev->recovery_offset = 0; /* Bitmap has to be created when we do an "up" takeover */ set_bit(MD_ARRAY_FIRST_USE, &mddev->flags); } @@ -2808,23 +2837,6 @@ static int rs_prepare_reshape(struct raid_set *rs) return 0; } -/* Get reshape sectors from data_offsets or raid set */ -static sector_t _get_reshape_sectors(struct raid_set *rs) -{ - struct md_rdev *rdev; - sector_t reshape_sectors = 0; - - rdev_for_each(rdev, &rs->md) - if (!test_bit(Journal, &rdev->flags)) { - reshape_sectors = (rdev->data_offset > rdev->new_data_offset) ? - rdev->data_offset - rdev->new_data_offset : - rdev->new_data_offset - rdev->data_offset; - break; - } - - return max(reshape_sectors, (sector_t) rs->data_offset); -} - /* * Reshape: * - change raid layout @@ -2855,7 +2867,7 @@ static int rs_setup_reshape(struct raid_set *rs) * * - in case of adding disk(s), array size has * to grow after the disk adding reshape, - * which'll hapen in the event handler; + * which'll happen in the event handler; * reshape will happen forward, so space has to * be available at the beginning of each disk * @@ -3148,7 +3160,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) * If a takeover is needed, userspace sets any additional * devices to rebuild and we can check for a valid request here. * - * If acceptible, set the level to the new requested + * If acceptable, set the level to the new requested * one, prohibit requesting recovery, allow the raid * set to run and store superblocks during resume. */ @@ -3183,12 +3195,12 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) /* Out-of-place space has to be available to allow for a reshape unless raid1! */ if (reshape_sectors || rs_is_raid1(rs)) { /* - * We can only prepare for a reshape here, because the - * raid set needs to run to provide the repective reshape - * check functions via its MD personality instance. - * - * So do the reshape check after md_run() succeeded. - */ + * We can only prepare for a reshape here, because the + * raid set needs to run to provide the respective reshape + * check functions via its MD personality instance. + * + * So do the reshape check after md_run() succeeded. + */ r = rs_prepare_reshape(rs); if (r) goto bad; @@ -3213,7 +3225,7 @@ size_check: if (r) goto bad; - rs_setup_recovery(rs, rs->md.recovery_cp < rs->md.dev_sectors ? rs->md.recovery_cp : rs->md.dev_sectors); + rs_setup_recovery(rs, rs->md.resync_offset < rs->md.dev_sectors ? rs->md.resync_offset : rs->md.dev_sectors); } else { /* This is no size change or it is shrinking, update size and record in superblocks */ r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false); @@ -3235,14 +3247,15 @@ size_check: rs_reset_inconclusive_reshape(rs); /* Start raid set read-only and assumed clean to change in raid_resume() */ - rs->md.ro = 1; + rs->md.ro = MD_RDONLY; rs->md.in_sync = 1; + /* Has to be held on running the array */ + mddev_suspend_and_lock_nointr(&rs->md); + /* Keep array frozen until resume. */ - set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery); + md_frozen_sync_thread(&rs->md); - /* Has to be held on running the array */ - mddev_lock_nointr(&rs->md); r = md_run(&rs->md); rs->md.in_sync = 0; /* Assume already marked dirty */ if (r) { @@ -3254,8 +3267,7 @@ size_check: r = md_start(&rs->md); if (r) { ti->error = "Failed to start raid array"; - mddev_unlock(&rs->md); - goto bad_md_start; + goto bad_unlock; } /* If raid4/5/6 journal mode explicitly requested (only possible with journal dev) -> set it */ @@ -3263,26 +3275,24 @@ size_check: r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode); if (r) { ti->error = "Failed to set raid4/5/6 journal mode"; - mddev_unlock(&rs->md); - goto bad_journal_mode_set; + goto bad_unlock; } } - mddev_suspend(&rs->md); set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags); /* Try to adjust the raid4/5/6 stripe cache size to the stripe size */ if (rs_is_raid456(rs)) { r = rs_set_raid456_stripe_cache(rs); if (r) - goto bad_stripe_cache; + goto bad_unlock; } /* Now do an early reshape check */ if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) { r = rs_check_reshape(rs); if (r) - goto bad_check_reshape; + goto bad_unlock; /* Restore new, ctr requested layout to perform check */ rs_config_restore(rs, &rs_layout); @@ -3291,22 +3301,21 @@ size_check: r = rs->md.pers->check_reshape(&rs->md); if (r) { ti->error = "Reshape check failed"; - goto bad_check_reshape; + goto bad_unlock; } } } /* Disable/enable discard support on raid set. */ configure_discard_support(rs); + rs->md.dm_gendisk = dm_disk(dm_table_get_md(ti->table)); mddev_unlock(&rs->md); return 0; -bad_md_start: -bad_journal_mode_set: -bad_stripe_cache: -bad_check_reshape: +bad_unlock: md_stop(&rs->md); + mddev_unlock(&rs->md); bad: raid_set_free(rs); @@ -3317,7 +3326,13 @@ static void raid_dtr(struct dm_target *ti) { struct raid_set *rs = ti->private; + mddev_lock_nointr(&rs->md); md_stop(&rs->md); + rs->md.dm_gendisk = NULL; + mddev_unlock(&rs->md); + + if (work_pending(&rs->md.event_work)) + flush_work(&rs->md.event_work); raid_set_free(rs); } @@ -3327,17 +3342,18 @@ static int raid_map(struct dm_target *ti, struct bio *bio) struct mddev *mddev = &rs->md; /* - * If we're reshaping to add disk(s)), ti->len and + * If we're reshaping to add disk(s), ti->len and * mddev->array_sectors will differ during the process * (ti->len > mddev->array_sectors), so we have to requeue * bios with addresses > mddev->array_sectors here or * there will occur accesses past EOD of the component * data images thus erroring the raid set. */ - if (unlikely(bio_end_sector(bio) > mddev->array_sectors)) + if (unlikely(bio_has_data(bio) && bio_end_sector(bio) > mddev->array_sectors)) return DM_MAPIO_REQUEUE; - md_handle_request(mddev, bio); + if (unlikely(!md_handle_request(mddev, bio))) + return DM_MAPIO_REQUEUE; return DM_MAPIO_SUBMITTED; } @@ -3369,7 +3385,7 @@ static enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long r /* The MD sync thread can be done with io or be interrupted but still be running */ if (!test_bit(MD_RECOVERY_DONE, &recovery) && (test_bit(MD_RECOVERY_RUNNING, &recovery) || - (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery)))) { + (md_is_rdwr(mddev) && test_bit(MD_RECOVERY_NEEDED, &recovery)))) { if (test_bit(MD_RECOVERY_RESHAPE, &recovery)) return st_reshape; @@ -3433,7 +3449,7 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, } else { if (state == st_idle && !test_bit(MD_RECOVERY_INTR, &recovery)) - r = mddev->recovery_cp; + r = mddev->resync_offset; else r = mddev->curr_resync_completed; @@ -3537,7 +3553,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, recovery = rs->md.recovery; state = decipher_sync_action(mddev, recovery); progress = rs_get_progress(rs, recovery, state, resync_max_sectors); - resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ? + resync_mismatches = mddev->last_sync_action == ACTION_CHECK ? atomic64_read(&mddev->resync_mismatches) : 0; /* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */ @@ -3712,26 +3728,37 @@ static void raid_status(struct dm_target *ti, status_type_t type, } static int raid_message(struct dm_target *ti, unsigned int argc, char **argv, - char *result, unsigned maxlen) + char *result, unsigned int maxlen) { struct raid_set *rs = ti->private; struct mddev *mddev = &rs->md; + int ret = 0; if (!mddev->pers || !mddev->pers->sync_request) return -EINVAL; - if (!strcasecmp(argv[0], "frozen")) - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - else - clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + if (test_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags) || + test_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags)) + return -EBUSY; - if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) { - if (mddev->sync_thread) { - set_bit(MD_RECOVERY_INTR, &mddev->recovery); - md_unregister_thread(&mddev->sync_thread); - md_reap_sync_thread(mddev); - } - } else if (decipher_sync_action(mddev, mddev->recovery) != st_idle) + if (!strcasecmp(argv[0], "frozen")) { + ret = mddev_lock(mddev); + if (ret) + return ret; + + md_frozen_sync_thread(mddev); + mddev_unlock(mddev); + } else if (!strcasecmp(argv[0], "idle")) { + ret = mddev_lock(mddev); + if (ret) + return ret; + + md_idle_sync_thread(mddev); + mddev_unlock(mddev); + } + + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + if (decipher_sync_action(mddev, mddev->recovery) != st_idle) return -EBUSY; else if (!strcasecmp(argv[0], "resync")) ; /* MD_RECOVERY_NEEDED set below */ @@ -3748,16 +3775,16 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv, } else return -EINVAL; } - if (mddev->ro == 2) { + if (mddev->ro == MD_AUTO_READ) { /* A write to sync_action is enough to justify * canceling read-auto mode */ - mddev->ro = 0; - if (!mddev->suspended && mddev->sync_thread) + mddev->ro = MD_RDWR; + if (!mddev->suspended) md_wakeup_thread(mddev->sync_thread); } set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - if (!mddev->suspended && mddev->thread) + if (!mddev->suspended) md_wakeup_thread(mddev->thread); return 0; @@ -3786,8 +3813,40 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits) struct raid_set *rs = ti->private; unsigned int chunk_size_bytes = to_bytes(rs->md.chunk_sectors); - blk_limits_io_min(limits, chunk_size_bytes); - blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs)); + if (chunk_size_bytes) { + limits->io_min = chunk_size_bytes; + limits->io_opt = chunk_size_bytes * mddev_data_stripes(rs); + } +} + +static void raid_presuspend(struct dm_target *ti) +{ + struct raid_set *rs = ti->private; + struct mddev *mddev = &rs->md; + + /* + * From now on, disallow raid_message() to change sync_thread until + * resume, raid_postsuspend() is too late. + */ + set_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags); + + if (!reshape_interrupted(mddev)) + return; + + /* + * For raid456, if reshape is interrupted, IO across reshape position + * will never make progress, while caller will wait for IO to be done. + * Inform raid456 to handle those IO to prevent deadlock. + */ + if (mddev->pers && mddev->pers->prepare_suspend) + mddev->pers->prepare_suspend(mddev); +} + +static void raid_presuspend_undo(struct dm_target *ti) +{ + struct raid_set *rs = ti->private; + + clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags); } static void raid_postsuspend(struct dm_target *ti) @@ -3795,13 +3854,13 @@ static void raid_postsuspend(struct dm_target *ti) struct raid_set *rs = ti->private; if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { - /* Writes have to be stopped before suspending to avoid deadlocks. */ - if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery)) - md_stop_writes(&rs->md); - - mddev_lock_nointr(&rs->md); - mddev_suspend(&rs->md); - mddev_unlock(&rs->md); + /* + * sync_thread must be stopped during suspend, and writes have + * to be stopped before suspending to avoid deadlocks. + */ + md_stop_writes(&rs->md); + mddev_suspend(&rs->md, false); + rs->md.ro = MD_RDONLY; } } @@ -3895,9 +3954,13 @@ static int __load_dirty_region_bitmap(struct raid_set *rs) /* Try loading the bitmap unless "raid0", which does not have one */ if (!rs_is_raid0(rs) && !test_and_set_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags)) { - r = md_bitmap_load(&rs->md); - if (r) - DMERR("Failed to load bitmap"); + struct mddev *mddev = &rs->md; + + if (md_bitmap_enabled(mddev, false)) { + r = mddev->bitmap_ops->load(mddev); + if (r) + DMERR("Failed to load bitmap"); + } } return r; @@ -3910,7 +3973,7 @@ static void rs_update_sbs(struct raid_set *rs) int ro = mddev->ro; set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); - mddev->ro = 0; + mddev->ro = MD_RDWR; md_update_sb(mddev, 1); mddev->ro = ro; } @@ -3978,6 +4041,11 @@ static int raid_preresume(struct dm_target *ti) if (test_and_set_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags)) return 0; + /* If different and no explicit grow request, expose MD array size as of superblock. */ + if (!test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags) && + rs->array_sectors != mddev->array_sectors) + rs_set_capacity(rs); + /* * The superblocks need to be updated on disk if the * array is new or new devices got added (thus zeroed @@ -4001,23 +4069,24 @@ static int raid_preresume(struct dm_target *ti) } /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) or grown device size */ - if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap && + if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap && (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags) || (rs->requested_bitmap_chunk_sectors && mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)))) { int chunksize = to_bytes(rs->requested_bitmap_chunk_sectors) ?: mddev->bitmap_info.chunksize; - r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors, chunksize, 0); - if (r) - DMERR("Failed to resize bitmap"); + if (md_bitmap_enabled(mddev, false)) { + r = mddev->bitmap_ops->resize(mddev, mddev->dev_sectors, + chunksize); + if (r) + DMERR("Failed to resize bitmap"); + } } /* Check for any resize/reshape on @rs and adjust/initiate */ - /* Be prepared for mddev_resume() in raid_resume() */ - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) { + if (mddev->resync_offset && mddev->resync_offset < MaxSector) { set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); - mddev->resync_min = mddev->recovery_cp; + mddev->resync_min = mddev->resync_offset; if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) mddev->resync_max_sectors = mddev->dev_sectors; } @@ -4048,7 +4117,9 @@ static void raid_resume(struct dm_target *ti) * Take this opportunity to check whether any failed * devices are reachable again. */ + mddev_lock_nointr(mddev); attempt_restore_of_faulty_devices(rs); + mddev_unlock(mddev); } if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { @@ -4057,11 +4128,14 @@ static void raid_resume(struct dm_target *ti) rs_set_capacity(rs); mddev_lock_nointr(mddev); - clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - mddev->ro = 0; + WARN_ON_ONCE(!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)); + WARN_ON_ONCE(rcu_dereference_protected(mddev->sync_thread, + lockdep_is_held(&mddev->reconfig_mutex))); + clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags); + mddev->ro = MD_RDWR; mddev->in_sync = 0; - mddev_resume(mddev); - mddev_unlock(mddev); + md_unfrozen_sync_thread(mddev); + mddev_unlock_and_resume(mddev); } } @@ -4076,27 +4150,13 @@ static struct target_type raid_target = { .message = raid_message, .iterate_devices = raid_iterate_devices, .io_hints = raid_io_hints, + .presuspend = raid_presuspend, + .presuspend_undo = raid_presuspend_undo, .postsuspend = raid_postsuspend, .preresume = raid_preresume, .resume = raid_resume, }; - -static int __init dm_raid_init(void) -{ - DMINFO("Loading target version %u.%u.%u", - raid_target.version[0], - raid_target.version[1], - raid_target.version[2]); - return dm_register_target(&raid_target); -} - -static void __exit dm_raid_exit(void) -{ - dm_unregister_target(&raid_target); -} - -module_init(dm_raid_init); -module_exit(dm_raid_exit); +module_dm(raid); module_param(devices_handle_discard_safely, bool, 0644); MODULE_PARM_DESC(devices_handle_discard_safely, @@ -4109,6 +4169,6 @@ MODULE_ALIAS("dm-raid10"); MODULE_ALIAS("dm-raid4"); MODULE_ALIAS("dm-raid5"); MODULE_ALIAS("dm-raid6"); -MODULE_AUTHOR("Neil Brown <dm-devel@redhat.com>"); -MODULE_AUTHOR("Heinz Mauelshagen <dm-devel@redhat.com>"); +MODULE_AUTHOR("Neil Brown <dm-devel@lists.linux.dev>"); +MODULE_AUTHOR("Heinz Mauelshagen <dm-devel@lists.linux.dev>"); MODULE_LICENSE("GPL"); |
