summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/md/md-cluster.txt2
-rw-r--r--drivers/md/md-cluster.c76
-rw-r--r--drivers/md/md-cluster.h1
-rw-r--r--drivers/md/md.c21
4 files changed, 94 insertions, 6 deletions
diff --git a/Documentation/md/md-cluster.txt b/Documentation/md/md-cluster.txt
index 38883276d31c..2663d49dd8a0 100644
--- a/Documentation/md/md-cluster.txt
+++ b/Documentation/md/md-cluster.txt
@@ -321,4 +321,4 @@ The algorithm is:
There are somethings which are not supported by cluster MD yet.
-- update size and change array_sectors.
+- change array_sectors.
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 92c3c51ede4d..b21ef58819f6 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -1151,6 +1151,81 @@ int cluster_check_sync_size(struct mddev *mddev)
return (my_sync_size == sync_size) ? 0 : -1;
}
+/*
+ * Update the size for cluster raid is a little more complex, we perform it
+ * by the steps:
+ * 1. hold token lock and update superblock in initiator node.
+ * 2. send METADATA_UPDATED msg to other nodes.
+ * 3. The initiator node continues to check each bitmap's sync_size, if all
+ * bitmaps have the same value of sync_size, then we can set capacity and
+ * let other nodes to perform it. If one node can't update sync_size
+ * accordingly, we need to revert to previous value.
+ */
+static void update_size(struct mddev *mddev, sector_t old_dev_sectors)
+{
+ struct md_cluster_info *cinfo = mddev->cluster_info;
+ struct cluster_msg cmsg;
+ struct md_rdev *rdev;
+ int ret = 0;
+ int raid_slot = -1;
+
+ md_update_sb(mddev, 1);
+ lock_comm(cinfo, 1);
+
+ memset(&cmsg, 0, sizeof(cmsg));
+ cmsg.type = cpu_to_le32(METADATA_UPDATED);
+ rdev_for_each(rdev, mddev)
+ if (rdev->raid_disk >= 0 && !test_bit(Faulty, &rdev->flags)) {
+ raid_slot = rdev->desc_nr;
+ break;
+ }
+ if (raid_slot >= 0) {
+ cmsg.raid_slot = cpu_to_le32(raid_slot);
+ /*
+ * We can only change capiticy after all the nodes can do it,
+ * so need to wait after other nodes already received the msg
+ * and handled the change
+ */
+ ret = __sendmsg(cinfo, &cmsg);
+ if (ret) {
+ pr_err("%s:%d: failed to send METADATA_UPDATED msg\n",
+ __func__, __LINE__);
+ unlock_comm(cinfo);
+ return;
+ }
+ } else {
+ pr_err("md-cluster: No good device id found to send\n");
+ unlock_comm(cinfo);
+ return;
+ }
+
+ /*
+ * check the sync_size from other node's bitmap, if sync_size
+ * have already updated in other nodes as expected, send an
+ * empty metadata msg to permit the change of capacity
+ */
+ if (cluster_check_sync_size(mddev) == 0) {
+ memset(&cmsg, 0, sizeof(cmsg));
+ cmsg.type = cpu_to_le32(CHANGE_CAPACITY);
+ ret = __sendmsg(cinfo, &cmsg);
+ if (ret)
+ pr_err("%s:%d: failed to send CHANGE_CAPACITY msg\n",
+ __func__, __LINE__);
+ set_capacity(mddev->gendisk, mddev->array_sectors);
+ revalidate_disk(mddev->gendisk);
+ } else {
+ /* revert to previous sectors */
+ ret = mddev->pers->resize(mddev, old_dev_sectors);
+ if (!ret)
+ revalidate_disk(mddev->gendisk);
+ ret = __sendmsg(cinfo, &cmsg);
+ if (ret)
+ pr_err("%s:%d: failed to send METADATA_UPDATED msg\n",
+ __func__, __LINE__);
+ }
+ unlock_comm(cinfo);
+}
+
static int resync_start(struct mddev *mddev)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
@@ -1396,6 +1471,7 @@ static struct md_cluster_operations cluster_ops = {
.gather_bitmaps = gather_bitmaps,
.lock_all_bitmaps = lock_all_bitmaps,
.unlock_all_bitmaps = unlock_all_bitmaps,
+ .update_size = update_size,
};
static int __init cluster_init(void)
diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h
index e765499ba591..274016177983 100644
--- a/drivers/md/md-cluster.h
+++ b/drivers/md/md-cluster.h
@@ -27,6 +27,7 @@ struct md_cluster_operations {
int (*gather_bitmaps)(struct md_rdev *rdev);
int (*lock_all_bitmaps)(struct mddev *mddev);
void (*unlock_all_bitmaps)(struct mddev *mddev);
+ void (*update_size)(struct mddev *mddev, sector_t old_dev_sectors);
};
#endif /* _MD_CLUSTER_H */
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 83f325077dc8..72ef3f18ac9a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6493,10 +6493,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
struct md_rdev *rdev;
int rv;
int fit = (num_sectors == 0);
-
- /* cluster raid doesn't support update size */
- if (mddev_is_clustered(mddev))
- return -EINVAL;
+ sector_t old_dev_sectors = mddev->dev_sectors;
if (mddev->pers->resize == NULL)
return -EINVAL;
@@ -6525,7 +6522,9 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
}
rv = mddev->pers->resize(mddev, num_sectors);
if (!rv) {
- if (mddev->queue) {
+ if (mddev_is_clustered(mddev))
+ md_cluster_ops->update_size(mddev, old_dev_sectors);
+ else if (mddev->queue) {
set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
}
@@ -8743,6 +8742,18 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
int role, ret;
char b[BDEVNAME_SIZE];
+ /*
+ * If size is changed in another node then we need to
+ * do resize as well.
+ */
+ if (mddev->dev_sectors != le64_to_cpu(sb->size)) {
+ ret = mddev->pers->resize(mddev, le64_to_cpu(sb->size));
+ if (ret)
+ pr_info("md-cluster: resize failed\n");
+ else
+ bitmap_update_sb(mddev->bitmap);
+ }
+
/* Check for change of roles in the active devices */
rdev_for_each(rdev2, mddev) {
if (test_bit(Faulty, &rdev2->flags))