From 506c9e44a85f6a79fc0643f2d2498ab6cda3d3f8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 23 Dec 2011 10:17:26 +1100 Subject: md: allow non-privileged uses to GET_*_INFO about raid arrays. The info is already available in /proc/mdstat and /sys/block in an accessible form so there is no point in putting a road-block in the ioctl for information gathering. Signed-off-by: NeilBrown --- drivers/md/md.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index f47f1f8ac44b..004d8e5b2a57 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6054,8 +6054,15 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, struct mddev *mddev = NULL; int ro; - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; + switch (cmd) { + case RAID_VERSION: + case GET_ARRAY_INFO: + case GET_DISK_INFO: + break; + default: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + } /* * Commands dealing with the RAID driver but not any -- cgit From 476a7abb9b00adfe6bf70e82800367319ab8078b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 23 Dec 2011 10:17:51 +1100 Subject: md: remove test for duplicate device when setting slot number. When setting the slot number on a device in an active array we currently check that the number is not already in use. We then call into the personality's hot_add_disk function which performs the same test and returns the same error. Thus the common test is not needed. As we will shortly be changing some personalities to allow duplicates in some cases (to support hot-replace), the common test will become inconvenient. So remove the common test. Reviewed-by: Dan Williams Signed-off-by: NeilBrown --- drivers/md/md.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 004d8e5b2a57..d51c688d9c94 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2697,7 +2697,6 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); md_wakeup_thread(rdev->mddev->thread); } else if (rdev->mddev->pers) { - struct md_rdev *rdev2; /* Activating a spare .. or possibly reactivating * if we ever get bitmaps working here. */ @@ -2711,10 +2710,6 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) if (rdev->mddev->pers->hot_add_disk == NULL) return -EINVAL; - list_for_each_entry(rdev2, &rdev->mddev->disks, same_set) - if (rdev2->raid_disk == slot) - return -EEXIST; - if (slot >= rdev->mddev->raid_disks && slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks) return -ENOSPC; -- cgit From b8321b68d1445f308324517e45fb0a5c2b48e271 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 23 Dec 2011 10:17:51 +1100 Subject: md: change hot_remove_disk to take an rdev rather than a number. Soon an array will be able to have multiple devices with the same raid_disk number (an original and a replacement). So removing a device based on the number won't work. So pass the actual device handle instead. Reviewed-by: Dan Williams Signed-off-by: NeilBrown --- drivers/md/md.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index d51c688d9c94..0e2288824938 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2689,7 +2689,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) if (rdev->mddev->pers->hot_remove_disk == NULL) return -EINVAL; err = rdev->mddev->pers-> - hot_remove_disk(rdev->mddev, rdev->raid_disk); + hot_remove_disk(rdev->mddev, rdev); if (err) return err; sysfs_unlink_rdev(rdev->mddev, rdev); @@ -7340,7 +7340,7 @@ static int remove_and_add_spares(struct mddev *mddev) ! test_bit(In_sync, &rdev->flags)) && atomic_read(&rdev->nr_pending)==0) { if (mddev->pers->hot_remove_disk( - mddev, rdev->raid_disk)==0) { + mddev, rdev) == 0) { sysfs_unlink_rdev(mddev, rdev); rdev->raid_disk = -1; } @@ -7477,7 +7477,7 @@ void md_check_recovery(struct mddev *mddev) test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)==0) { if (mddev->pers->hot_remove_disk( - mddev, rdev->raid_disk)==0) { + mddev, rdev) == 0) { sysfs_unlink_rdev(mddev, rdev); rdev->raid_disk = -1; } -- cgit From 2d78f8c451785f030ac1676a18691896b59c69d8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 23 Dec 2011 10:17:51 +1100 Subject: md: create externally visible flags for supporting hot-replace. hot-replace is a feature being added to md which will allow a device to be replaced without removing it from the array first. With hot-replace a spare can be activated and recovery can start while the original device is still in place, thus allowing a transition from an unreliable device to a reliable device without leaving the array degraded during the transition. It can also be use when the original device is still reliable but it not wanted for some reason. This will eventually be supported in RAID4/5/6 and RAID10. This patch adds a super-block flag to distinguish the replacement device. If an old kernel sees this flag it will reject the device. It also adds two per-device flags which are viewable and settable via sysfs. "want_replacement" can be set to request that a device be replaced. "replacement" is set to show that this device is replacing another device. The "rd%d" links in /sys/block/mdXx/md only apply to the original device, not the replacement. We currently don't make links for the replacement - there doesn't seem to be a need. Signed-off-by: NeilBrown --- drivers/md/md.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 0e2288824938..be569eb41a93 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1714,6 +1714,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) } if (sb->devflags & WriteMostly1) set_bit(WriteMostly, &rdev->flags); + if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT) + set_bit(Replacement, &rdev->flags); } else /* MULTIPATH are always insync */ set_bit(In_sync, &rdev->flags); @@ -1767,6 +1769,9 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev) sb->recovery_offset = cpu_to_le64(rdev->recovery_offset); } + if (test_bit(Replacement, &rdev->flags)) + sb->feature_map |= + cpu_to_le32(MD_FEATURE_REPLACEMENT); if (mddev->reshape_position != MaxSector) { sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE); @@ -2560,6 +2565,15 @@ state_show(struct md_rdev *rdev, char *page) len += sprintf(page+len, "%swrite_error", sep); sep = ","; } + if (test_bit(WantReplacement, &rdev->flags)) { + len += sprintf(page+len, "%swant_replacement", sep); + sep = ","; + } + if (test_bit(Replacement, &rdev->flags)) { + len += sprintf(page+len, "%sreplacement", sep); + sep = ","; + } + return len+sprintf(page+len, "\n"); } @@ -2628,6 +2642,42 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) } else if (cmd_match(buf, "-write_error")) { clear_bit(WriteErrorSeen, &rdev->flags); err = 0; + } else if (cmd_match(buf, "want_replacement")) { + /* Any non-spare device that is not a replacement can + * become want_replacement at any time, but we then need to + * check if recovery is needed. + */ + if (rdev->raid_disk >= 0 && + !test_bit(Replacement, &rdev->flags)) + set_bit(WantReplacement, &rdev->flags); + set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); + md_wakeup_thread(rdev->mddev->thread); + err = 0; + } else if (cmd_match(buf, "-want_replacement")) { + /* Clearing 'want_replacement' is always allowed. + * Once replacements starts it is too late though. + */ + err = 0; + clear_bit(WantReplacement, &rdev->flags); + } else if (cmd_match(buf, "replacement")) { + /* Can only set a device as a replacement when array has not + * yet been started. Once running, replacement is automatic + * from spares, or by assigning 'slot'. + */ + if (rdev->mddev->pers) + err = -EBUSY; + else { + set_bit(Replacement, &rdev->flags); + err = 0; + } + } else if (cmd_match(buf, "-replacement")) { + /* Similarly, can only clear Replacement before start */ + if (rdev->mddev->pers) + err = -EBUSY; + else { + clear_bit(Replacement, &rdev->flags); + err = 0; + } } if (!err) sysfs_notify_dirent_safe(rdev->sysfs_state); @@ -6717,8 +6767,11 @@ static int md_seq_show(struct seq_file *seq, void *v) if (test_bit(Faulty, &rdev->flags)) { seq_printf(seq, "(F)"); continue; - } else if (rdev->raid_disk < 0) + } + if (rdev->raid_disk < 0) seq_printf(seq, "(S)"); /* spare */ + if (test_bit(Replacement, &rdev->flags)) + seq_printf(seq, "(R)"); sectors += rdev->sectors; } -- cgit From 7bfec5f35c68121e7b1849f3f4166dd96c8da5b3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 23 Dec 2011 10:17:53 +1100 Subject: md/raid5: If there is a spare and a want_replacement device, start replacement. When attempting to add a spare to a RAID[456] array, also consider adding it as a replacement for a want_replacement device. This requires that common md code attempt hot_add even when the array is not formally degraded. Reviewed-by: Dan Williams Signed-off-by: NeilBrown --- drivers/md/md.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index be569eb41a93..1c1c562f63dc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7399,23 +7399,21 @@ static int remove_and_add_spares(struct mddev *mddev) } } - if (mddev->degraded) { - list_for_each_entry(rdev, &mddev->disks, same_set) { - if (rdev->raid_disk >= 0 && - !test_bit(In_sync, &rdev->flags) && - !test_bit(Faulty, &rdev->flags)) + list_for_each_entry(rdev, &mddev->disks, same_set) { + if (rdev->raid_disk >= 0 && + !test_bit(In_sync, &rdev->flags) && + !test_bit(Faulty, &rdev->flags)) + spares++; + if (rdev->raid_disk < 0 + && !test_bit(Faulty, &rdev->flags)) { + rdev->recovery_offset = 0; + if (mddev->pers-> + hot_add_disk(mddev, rdev) == 0) { + if (sysfs_link_rdev(mddev, rdev)) + /* failure here is OK */; spares++; - if (rdev->raid_disk < 0 - && !test_bit(Faulty, &rdev->flags)) { - rdev->recovery_offset = 0; - if (mddev->pers-> - hot_add_disk(mddev, rdev) == 0) { - if (sysfs_link_rdev(mddev, rdev)) - /* failure here is OK */; - spares++; - md_new_event(mddev); - set_bit(MD_CHANGE_DEVS, &mddev->flags); - } + md_new_event(mddev); + set_bit(MD_CHANGE_DEVS, &mddev->flags); } } } -- cgit