diff options
Diffstat (limited to 'drivers/md/dm-raid.c')
| -rw-r--r-- | drivers/md/dm-raid.c | 159 |
1 files changed, 93 insertions, 66 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index abe88d1e6735..c6f7129e43d3 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -438,7 +438,7 @@ static bool rs_is_reshapable(struct raid_set *rs) /* Return true, if raid set in @rs is recovering */ static bool rs_is_recovering(struct raid_set *rs) { - return rs->md.recovery_cp < rs->md.dev_sectors; + return rs->md.resync_offset < rs->md.dev_sectors; } /* Return true, if raid set in @rs is reshaping */ @@ -768,7 +768,7 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r rs->md.layout = raid_type->algorithm; rs->md.new_layout = rs->md.layout; rs->md.delta_disks = 0; - rs->md.recovery_cp = MaxSector; + rs->md.resync_offset = MaxSector; for (i = 0; i < raid_devs; i++) md_rdev_init(&rs->dev[i].rdev); @@ -912,7 +912,7 @@ static int parse_dev_params(struct raid_set *rs, struct dm_arg_set *as) rs->md.external = 0; rs->md.persistent = 1; rs->md.major_version = 2; - } else if (rebuild && !rs->md.recovery_cp) { + } else if (rebuild && !rs->md.resync_offset) { /* * Without metadata, we will not be able to tell if the array * is in-sync or not - we must assume it is not. Therefore, @@ -1355,11 +1355,7 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as, return -EINVAL; } - /* - * In device-mapper, we specify things in sectors, but - * MD records this value in kB - */ - if (value < 0 || value / 2 > COUNTER_MAX) { + if (value < 0) { rs->ti->error = "Max write-behind limit out of range"; return -EINVAL; } @@ -1626,6 +1622,23 @@ static int _check_data_dev_sectors(struct raid_set *rs) return 0; } +/* Get reshape sectors from data_offsets or raid set */ +static sector_t _get_reshape_sectors(struct raid_set *rs) +{ + struct md_rdev *rdev; + sector_t reshape_sectors = 0; + + rdev_for_each(rdev, &rs->md) + if (!test_bit(Journal, &rdev->flags)) { + reshape_sectors = (rdev->data_offset > rdev->new_data_offset) ? + rdev->data_offset - rdev->new_data_offset : + rdev->new_data_offset - rdev->data_offset; + break; + } + + return max(reshape_sectors, (sector_t) rs->data_offset); +} + /* Calculate the sectors per device and per array used for @rs */ static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, bool use_mddev) { @@ -1656,7 +1669,7 @@ static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, b if (sector_div(dev_sectors, data_stripes)) goto bad; - array_sectors = (data_stripes + delta_disks) * dev_sectors; + array_sectors = (data_stripes + delta_disks) * (dev_sectors - _get_reshape_sectors(rs)); if (sector_div(array_sectors, rs->raid10_copies)) goto bad; @@ -1665,7 +1678,7 @@ static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, b else /* Striped layouts */ - array_sectors = (data_stripes + delta_disks) * dev_sectors; + array_sectors = (data_stripes + delta_disks) * (dev_sectors - _get_reshape_sectors(rs)); mddev->array_sectors = array_sectors; mddev->dev_sectors = dev_sectors; @@ -1682,20 +1695,20 @@ static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors) { /* raid0 does not recover */ if (rs_is_raid0(rs)) - rs->md.recovery_cp = MaxSector; + rs->md.resync_offset = MaxSector; /* * A raid6 set has to be recovered either * completely or for the grown part to * ensure proper parity and Q-Syndrome */ else if (rs_is_raid6(rs)) - rs->md.recovery_cp = dev_sectors; + rs->md.resync_offset = dev_sectors; /* * Other raid set types may skip recovery * depending on the 'nosync' flag. */ else - rs->md.recovery_cp = test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags) + rs->md.resync_offset = test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags) ? MaxSector : dev_sectors; } @@ -1704,11 +1717,20 @@ static void do_table_event(struct work_struct *ws) struct raid_set *rs = container_of(ws, struct raid_set, md.event_work); smp_rmb(); /* Make sure we access most actual mddev properties */ - if (!rs_is_reshaping(rs)) { + + /* Only grow size resulting from added stripe(s) after reshape ended. */ + if (!rs_is_reshaping(rs) && + rs->array_sectors > rs->md.array_sectors && + !rs->md.delta_disks && + rs->md.raid_disks == rs->raid_disks) { + /* The raid10 personality doesn't provide proper device sizes -> correct. */ if (rs_is_raid10(rs)) rs_set_rdev_sectors(rs); + + rs->md.array_sectors = rs->array_sectors; rs_set_capacity(rs); } + dm_table_event(rs->ti->table); } @@ -2121,7 +2143,7 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev) sb->events = cpu_to_le64(mddev->events); sb->disk_recovery_offset = cpu_to_le64(rdev->recovery_offset); - sb->array_resync_offset = cpu_to_le64(mddev->recovery_cp); + sb->array_resync_offset = cpu_to_le64(mddev->resync_offset); sb->level = cpu_to_le32(mddev->level); sb->layout = cpu_to_le32(mddev->layout); @@ -2312,18 +2334,18 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) } if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) - mddev->recovery_cp = le64_to_cpu(sb->array_resync_offset); + mddev->resync_offset = le64_to_cpu(sb->array_resync_offset); /* * During load, we set FirstUse if a new superblock was written. * There are two reasons we might not have a superblock: * 1) The raid set is brand new - in which case, all of the * devices must have their In_sync bit set. Also, - * recovery_cp must be 0, unless forced. + * resync_offset must be 0, unless forced. * 2) This is a new device being added to an old raid set * and the new device needs to be rebuilt - in which * case the In_sync bit will /not/ be set and - * recovery_cp must be MaxSector. + * resync_offset must be MaxSector. * 3) This is/are a new device(s) being added to an old * raid set during takeover to a higher raid level * to provide capacity for redundancy or during reshape @@ -2368,8 +2390,8 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) new_devs > 1 ? "s" : ""); return -EINVAL; } else if (!test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags) && rs_is_recovering(rs)) { - DMERR("'rebuild' specified while raid set is not in-sync (recovery_cp=%llu)", - (unsigned long long) mddev->recovery_cp); + DMERR("'rebuild' specified while raid set is not in-sync (resync_offset=%llu)", + (unsigned long long) mddev->resync_offset); return -EINVAL; } else if (rs_is_reshaping(rs)) { DMERR("'rebuild' specified while raid set is being reshaped (reshape_position=%llu)", @@ -2384,7 +2406,7 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) */ sb_retrieve_failed_devices(sb, failed_devices); rdev_for_each(r, mddev) { - if (test_bit(Journal, &rdev->flags) || + if (test_bit(Journal, &r->flags) || !r->sb_page) continue; sb2 = page_address(r->sb_page); @@ -2493,7 +2515,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev) rdev->saved_raid_disk = rdev->raid_disk; } - /* Reshape support -> restore repective data offsets */ + /* Reshape support -> restore respective data offsets */ rdev->data_offset = le64_to_cpu(sb->data_offset); rdev->new_data_offset = le64_to_cpu(sb->new_data_offset); @@ -2509,6 +2531,10 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) struct md_rdev *rdev, *freshest; struct mddev *mddev = &rs->md; + /* Respect resynchronization requested with "sync" argument. */ + if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags)) + set_bit(MD_ARRAY_FIRST_USE, &mddev->flags); + freshest = NULL; rdev_for_each(rdev, mddev) { if (test_bit(Journal, &rdev->flags)) @@ -2674,11 +2700,11 @@ static int rs_adjust_data_offsets(struct raid_set *rs) } out: /* - * Raise recovery_cp in case data_offset != 0 to + * Raise resync_offset in case data_offset != 0 to * avoid false recovery positives in the constructor. */ - if (rs->md.recovery_cp < rs->md.dev_sectors) - rs->md.recovery_cp += rs->dev[0].rdev.data_offset; + if (rs->md.resync_offset < rs->md.dev_sectors) + rs->md.resync_offset += rs->dev[0].rdev.data_offset; /* Adjust data offsets on all rdevs but on any raid4/5/6 journal device */ rdev_for_each(rdev, &rs->md) { @@ -2733,7 +2759,7 @@ static int rs_setup_takeover(struct raid_set *rs) } clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags); - mddev->recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; while (d--) { rdev = &rs->dev[d].rdev; @@ -2741,7 +2767,7 @@ static int rs_setup_takeover(struct raid_set *rs) if (test_bit(d, (void *) rs->rebuild_disks)) { clear_bit(In_sync, &rdev->flags); clear_bit(Faulty, &rdev->flags); - mddev->recovery_cp = rdev->recovery_offset = 0; + mddev->resync_offset = rdev->recovery_offset = 0; /* Bitmap has to be created when we do an "up" takeover */ set_bit(MD_ARRAY_FIRST_USE, &mddev->flags); } @@ -2811,23 +2837,6 @@ static int rs_prepare_reshape(struct raid_set *rs) return 0; } -/* Get reshape sectors from data_offsets or raid set */ -static sector_t _get_reshape_sectors(struct raid_set *rs) -{ - struct md_rdev *rdev; - sector_t reshape_sectors = 0; - - rdev_for_each(rdev, &rs->md) - if (!test_bit(Journal, &rdev->flags)) { - reshape_sectors = (rdev->data_offset > rdev->new_data_offset) ? - rdev->data_offset - rdev->new_data_offset : - rdev->new_data_offset - rdev->data_offset; - break; - } - - return max(reshape_sectors, (sector_t) rs->data_offset); -} - /* * Reshape: * - change raid layout @@ -3187,7 +3196,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (reshape_sectors || rs_is_raid1(rs)) { /* * We can only prepare for a reshape here, because the - * raid set needs to run to provide the repective reshape + * raid set needs to run to provide the respective reshape * check functions via its MD personality instance. * * So do the reshape check after md_run() succeeded. @@ -3216,7 +3225,7 @@ size_check: if (r) goto bad; - rs_setup_recovery(rs, rs->md.recovery_cp < rs->md.dev_sectors ? rs->md.recovery_cp : rs->md.dev_sectors); + rs_setup_recovery(rs, rs->md.resync_offset < rs->md.dev_sectors ? rs->md.resync_offset : rs->md.dev_sectors); } else { /* This is no size change or it is shrinking, update size and record in superblocks */ r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false); @@ -3238,7 +3247,7 @@ size_check: rs_reset_inconclusive_reshape(rs); /* Start raid set read-only and assumed clean to change in raid_resume() */ - rs->md.ro = 1; + rs->md.ro = MD_RDONLY; rs->md.in_sync = 1; /* Has to be held on running the array */ @@ -3299,6 +3308,7 @@ size_check: /* Disable/enable discard support on raid set. */ configure_discard_support(rs); + rs->md.dm_gendisk = dm_disk(dm_table_get_md(ti->table)); mddev_unlock(&rs->md); return 0; @@ -3318,6 +3328,7 @@ static void raid_dtr(struct dm_target *ti) mddev_lock_nointr(&rs->md); md_stop(&rs->md); + rs->md.dm_gendisk = NULL; mddev_unlock(&rs->md); if (work_pending(&rs->md.event_work)) @@ -3374,7 +3385,7 @@ static enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long r /* The MD sync thread can be done with io or be interrupted but still be running */ if (!test_bit(MD_RECOVERY_DONE, &recovery) && (test_bit(MD_RECOVERY_RUNNING, &recovery) || - (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery)))) { + (md_is_rdwr(mddev) && test_bit(MD_RECOVERY_NEEDED, &recovery)))) { if (test_bit(MD_RECOVERY_RESHAPE, &recovery)) return st_reshape; @@ -3438,7 +3449,7 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, } else { if (state == st_idle && !test_bit(MD_RECOVERY_INTR, &recovery)) - r = mddev->recovery_cp; + r = mddev->resync_offset; else r = mddev->curr_resync_completed; @@ -3542,7 +3553,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, recovery = rs->md.recovery; state = decipher_sync_action(mddev, recovery); progress = rs_get_progress(rs, recovery, state, resync_max_sectors); - resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ? + resync_mismatches = mddev->last_sync_action == ACTION_CHECK ? atomic64_read(&mddev->resync_mismatches) : 0; /* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */ @@ -3764,11 +3775,11 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv, } else return -EINVAL; } - if (mddev->ro == 2) { + if (mddev->ro == MD_AUTO_READ) { /* A write to sync_action is enough to justify * canceling read-auto mode */ - mddev->ro = 0; + mddev->ro = MD_RDWR; if (!mddev->suspended) md_wakeup_thread(mddev->sync_thread); } @@ -3802,8 +3813,10 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits) struct raid_set *rs = ti->private; unsigned int chunk_size_bytes = to_bytes(rs->md.chunk_sectors); - blk_limits_io_min(limits, chunk_size_bytes); - blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs)); + if (chunk_size_bytes) { + limits->io_min = chunk_size_bytes; + limits->io_opt = chunk_size_bytes * mddev_data_stripes(rs); + } } static void raid_presuspend(struct dm_target *ti) @@ -3847,6 +3860,7 @@ static void raid_postsuspend(struct dm_target *ti) */ md_stop_writes(&rs->md); mddev_suspend(&rs->md, false); + rs->md.ro = MD_RDONLY; } } @@ -3940,9 +3954,13 @@ static int __load_dirty_region_bitmap(struct raid_set *rs) /* Try loading the bitmap unless "raid0", which does not have one */ if (!rs_is_raid0(rs) && !test_and_set_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags)) { - r = md_bitmap_load(&rs->md); - if (r) - DMERR("Failed to load bitmap"); + struct mddev *mddev = &rs->md; + + if (md_bitmap_enabled(mddev, false)) { + r = mddev->bitmap_ops->load(mddev); + if (r) + DMERR("Failed to load bitmap"); + } } return r; @@ -3955,7 +3973,7 @@ static void rs_update_sbs(struct raid_set *rs) int ro = mddev->ro; set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); - mddev->ro = 0; + mddev->ro = MD_RDWR; md_update_sb(mddev, 1); mddev->ro = ro; } @@ -4023,6 +4041,11 @@ static int raid_preresume(struct dm_target *ti) if (test_and_set_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags)) return 0; + /* If different and no explicit grow request, expose MD array size as of superblock. */ + if (!test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags) && + rs->array_sectors != mddev->array_sectors) + rs_set_capacity(rs); + /* * The superblocks need to be updated on disk if the * array is new or new devices got added (thus zeroed @@ -4052,15 +4075,18 @@ static int raid_preresume(struct dm_target *ti) mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)))) { int chunksize = to_bytes(rs->requested_bitmap_chunk_sectors) ?: mddev->bitmap_info.chunksize; - r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors, chunksize, 0); - if (r) - DMERR("Failed to resize bitmap"); + if (md_bitmap_enabled(mddev, false)) { + r = mddev->bitmap_ops->resize(mddev, mddev->dev_sectors, + chunksize); + if (r) + DMERR("Failed to resize bitmap"); + } } /* Check for any resize/reshape on @rs and adjust/initiate */ - if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) { + if (mddev->resync_offset && mddev->resync_offset < MaxSector) { set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); - mddev->resync_min = mddev->recovery_cp; + mddev->resync_min = mddev->resync_offset; if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) mddev->resync_max_sectors = mddev->dev_sectors; } @@ -4101,11 +4127,12 @@ static void raid_resume(struct dm_target *ti) if (mddev->delta_disks < 0) rs_set_capacity(rs); + mddev_lock_nointr(mddev); WARN_ON_ONCE(!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)); - WARN_ON_ONCE(test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)); + WARN_ON_ONCE(rcu_dereference_protected(mddev->sync_thread, + lockdep_is_held(&mddev->reconfig_mutex))); clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags); - mddev_lock_nointr(mddev); - mddev->ro = 0; + mddev->ro = MD_RDWR; mddev->in_sync = 0; md_unfrozen_sync_thread(mddev); mddev_unlock_and_resume(mddev); |
