summaryrefslogtreecommitdiff
path: root/drivers/md/raid0.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid0.c')
-rw-r--r--drivers/md/raid0.c331
1 files changed, 194 insertions, 137 deletions
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 62c8b6adac70..985c377356eb 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -37,7 +37,6 @@ static void dump_zones(struct mddev *mddev)
int j, k;
sector_t zone_size = 0;
sector_t zone_start = 0;
- char b[BDEVNAME_SIZE];
struct r0conf *conf = mddev->private;
int raid_disks = conf->strip_zone[0].nb_dev;
pr_debug("md: RAID0 configuration for %s - %d zone%s\n",
@@ -48,9 +47,8 @@ static void dump_zones(struct mddev *mddev)
int len = 0;
for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
- len += snprintf(line+len, 200-len, "%s%s", k?"/":"",
- bdevname(conf->devlist[j*raid_disks
- + k]->bdev, b));
+ len += scnprintf(line+len, 200-len, "%s%pg", k?"/":"",
+ conf->devlist[j * raid_disks + k]->bdev);
pr_debug("md: zone%d=[%s]\n", j, line);
zone_size = conf->strip_zone[j].zone_end - zone_start;
@@ -69,18 +67,19 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
struct md_rdev *smallest, *rdev1, *rdev2, *rdev, **dev;
struct strip_zone *zone;
int cnt;
- char b[BDEVNAME_SIZE];
- char b2[BDEVNAME_SIZE];
struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
- unsigned blksize = 512;
+ unsigned int blksize = 512;
+
+ if (!mddev_is_dm(mddev))
+ blksize = queue_logical_block_size(mddev->gendisk->queue);
*private_conf = ERR_PTR(-ENOMEM);
if (!conf)
return -ENOMEM;
rdev_for_each(rdev1, mddev) {
- pr_debug("md/raid0:%s: looking at %s\n",
+ pr_debug("md/raid0:%s: looking at %pg\n",
mdname(mddev),
- bdevname(rdev1->bdev, b));
+ rdev1->bdev);
c = 0;
/* round size to chunk_size */
@@ -88,16 +87,17 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
sector_div(sectors, mddev->chunk_sectors);
rdev1->sectors = sectors * mddev->chunk_sectors;
- blksize = max(blksize, queue_logical_block_size(
+ if (mddev_is_dm(mddev))
+ blksize = max(blksize, queue_logical_block_size(
rdev1->bdev->bd_disk->queue));
rdev_for_each(rdev2, mddev) {
- pr_debug("md/raid0:%s: comparing %s(%llu)"
- " with %s(%llu)\n",
+ pr_debug("md/raid0:%s: comparing %pg(%llu)"
+ " with %pg(%llu)\n",
mdname(mddev),
- bdevname(rdev1->bdev,b),
+ rdev1->bdev,
(unsigned long long)rdev1->sectors,
- bdevname(rdev2->bdev,b2),
+ rdev2->bdev,
(unsigned long long)rdev2->sectors);
if (rdev2 == rdev1) {
pr_debug("md/raid0:%s: END\n",
@@ -128,21 +128,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
pr_debug("md/raid0:%s: FINAL %d zones\n",
mdname(mddev), conf->nr_strip_zones);
- if (conf->nr_strip_zones == 1) {
- conf->layout = RAID0_ORIG_LAYOUT;
- } else if (mddev->layout == RAID0_ORIG_LAYOUT ||
- mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
- conf->layout = mddev->layout;
- } else if (default_layout == RAID0_ORIG_LAYOUT ||
- default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
- conf->layout = default_layout;
- } else {
- pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
- mdname(mddev));
- pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
- err = -ENOTSUPP;
- goto abort;
- }
/*
* now since we have the hard sector sizes, we can make sure
* chunk size is a multiple of that sector size
@@ -240,15 +225,15 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
for (j=0; j<cnt; j++) {
rdev = conf->devlist[j];
if (rdev->sectors <= zone->dev_start) {
- pr_debug("md/raid0:%s: checking %s ... nope\n",
+ pr_debug("md/raid0:%s: checking %pg ... nope\n",
mdname(mddev),
- bdevname(rdev->bdev, b));
+ rdev->bdev);
continue;
}
- pr_debug("md/raid0:%s: checking %s ..."
+ pr_debug("md/raid0:%s: checking %pg ..."
" contained as device %d\n",
mdname(mddev),
- bdevname(rdev->bdev, b), c);
+ rdev->bdev, c);
dev[c] = rdev;
c++;
if (!smallest || rdev->sectors < smallest->sectors) {
@@ -273,6 +258,34 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
(unsigned long long)smallest->sectors);
}
+ if (conf->nr_strip_zones == 1 || conf->strip_zone[1].nb_dev == 1) {
+ conf->layout = RAID0_ORIG_LAYOUT;
+ } else if (mddev->layout == RAID0_ORIG_LAYOUT ||
+ mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
+ conf->layout = mddev->layout;
+ } else if (default_layout == RAID0_ORIG_LAYOUT ||
+ default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
+ conf->layout = default_layout;
+ } else {
+ pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
+ mdname(mddev));
+ pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
+ err = -EOPNOTSUPP;
+ goto abort;
+ }
+
+ if (conf->layout == RAID0_ORIG_LAYOUT) {
+ for (i = 1; i < conf->nr_strip_zones; i++) {
+ sector_t first_sector = conf->strip_zone[i-1].zone_end;
+
+ sector_div(first_sector, mddev->chunk_sectors);
+ zone = conf->strip_zone + i;
+ /* disk_shift is first disk index used in the zone */
+ zone->disk_shift = sector_div(first_sector,
+ zone->nb_dev);
+ }
+ }
+
pr_debug("md/raid0:%s: done.\n", mdname(mddev));
*private_conf = conf;
@@ -356,7 +369,34 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks
return array_sectors;
}
-static void raid0_free(struct mddev *mddev, void *priv);
+static void raid0_free(struct mddev *mddev, void *priv)
+{
+ struct r0conf *conf = priv;
+
+ kfree(conf->strip_zone);
+ kfree(conf->devlist);
+ kfree(conf);
+}
+
+static int raid0_set_limits(struct mddev *mddev)
+{
+ struct queue_limits lim;
+ int err;
+
+ md_init_stacking_limits(&lim);
+ lim.max_hw_sectors = mddev->chunk_sectors;
+ lim.max_write_zeroes_sectors = mddev->chunk_sectors;
+ lim.max_hw_wzeroes_unmap_sectors = mddev->chunk_sectors;
+ lim.logical_block_size = mddev->logical_block_size;
+ lim.io_min = mddev->chunk_sectors << 9;
+ lim.io_opt = lim.io_min * mddev->raid_disks;
+ lim.chunk_sectors = mddev->chunk_sectors;
+ lim.features |= BLK_FEAT_ATOMIC_WRITES;
+ err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
+ if (err)
+ return err;
+ return queue_limits_set(mddev->gendisk->queue, &lim);
+}
static int raid0_run(struct mddev *mddev)
{
@@ -370,6 +410,12 @@ static int raid0_run(struct mddev *mddev)
if (md_check_no_bitmap(mddev))
return -EINVAL;
+ if (!mddev_is_dm(mddev)) {
+ ret = raid0_set_limits(mddev);
+ if (ret)
+ return ret;
+ }
+
/* if private is not null, we are here after takeover */
if (mddev->private == NULL) {
ret = create_strip_zones(mddev, &conf);
@@ -378,30 +424,6 @@ static int raid0_run(struct mddev *mddev)
mddev->private = conf;
}
conf = mddev->private;
- if (mddev->queue) {
- struct md_rdev *rdev;
- bool discard_supported = false;
-
- blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
- blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
- blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);
- blk_queue_max_discard_sectors(mddev->queue, UINT_MAX);
-
- blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
- blk_queue_io_opt(mddev->queue,
- (mddev->chunk_sectors << 9) * mddev->raid_disks);
-
- rdev_for_each(rdev, mddev) {
- disk_stack_limits(mddev->gendisk, rdev->bdev,
- rdev->data_offset << 9);
- if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
- discard_supported = true;
- }
- if (!discard_supported)
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
- else
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
- }
/* calculate array device size */
md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
@@ -412,18 +434,21 @@ static int raid0_run(struct mddev *mddev)
dump_zones(mddev);
- ret = md_integrity_register(mddev);
-
- return ret;
+ return md_integrity_register(mddev);
}
-static void raid0_free(struct mddev *mddev, void *priv)
+/*
+ * Convert disk_index to the disk order in which it is read/written.
+ * For example, if we have 4 disks, they are numbered 0,1,2,3. If we
+ * write the disks starting at disk 3, then the read/write order would
+ * be disk 3, then 0, then 1, and then disk 2 and we want map_disk_shift()
+ * to map the disks as follows 0,1,2,3 => 1,2,3,0. So disk 0 would map
+ * to 1, 1 to 2, 2 to 3, and 3 to 0. That way we can compare disks in
+ * that 'output' space to understand the read/write disk ordering.
+ */
+static int map_disk_shift(int disk_index, int num_disks, int disk_shift)
{
- struct r0conf *conf = priv;
-
- kfree(conf->strip_zone);
- kfree(conf->devlist);
- kfree(conf);
+ return ((disk_index + num_disks - disk_shift) % num_disks);
}
static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
@@ -439,20 +464,24 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
sector_t end_disk_offset;
unsigned int end_disk_index;
unsigned int disk;
+ sector_t orig_start, orig_end;
+ orig_start = start;
zone = find_zone(conf, &start);
if (bio_end_sector(bio) > zone->zone_end) {
- struct bio *split = bio_split(bio,
- zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO,
- &mddev->bio_set);
- bio_chain(split, bio);
- submit_bio_noacct(bio);
- bio = split;
+ bio = bio_submit_split_bioset(bio,
+ zone->zone_end - bio->bi_iter.bi_sector,
+ &mddev->bio_set);
+ if (!bio)
+ return;
+
end = zone->zone_end;
- } else
+ } else {
end = bio_end_sector(bio);
+ }
+ orig_end = end;
if (zone != conf->strip_zone)
end = end - zone[-1].zone_end;
@@ -464,13 +493,26 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
last_stripe_index = end;
sector_div(last_stripe_index, stripe_size);
- start_disk_index = (int)(start - first_stripe_index * stripe_size) /
- mddev->chunk_sectors;
+ /* In the first zone the original and alternate layouts are the same */
+ if ((conf->layout == RAID0_ORIG_LAYOUT) && (zone != conf->strip_zone)) {
+ sector_div(orig_start, mddev->chunk_sectors);
+ start_disk_index = sector_div(orig_start, zone->nb_dev);
+ start_disk_index = map_disk_shift(start_disk_index,
+ zone->nb_dev,
+ zone->disk_shift);
+ sector_div(orig_end, mddev->chunk_sectors);
+ end_disk_index = sector_div(orig_end, zone->nb_dev);
+ end_disk_index = map_disk_shift(end_disk_index,
+ zone->nb_dev, zone->disk_shift);
+ } else {
+ start_disk_index = (int)(start - first_stripe_index * stripe_size) /
+ mddev->chunk_sectors;
+ end_disk_index = (int)(end - last_stripe_index * stripe_size) /
+ mddev->chunk_sectors;
+ }
start_disk_offset = ((int)(start - first_stripe_index * stripe_size) %
mddev->chunk_sectors) +
first_stripe_index * mddev->chunk_sectors;
- end_disk_index = (int)(end - last_stripe_index * stripe_size) /
- mddev->chunk_sectors;
end_disk_offset = ((int)(end - last_stripe_index * stripe_size) %
mddev->chunk_sectors) +
last_stripe_index * mddev->chunk_sectors;
@@ -478,18 +520,22 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
for (disk = 0; disk < zone->nb_dev; disk++) {
sector_t dev_start, dev_end;
struct md_rdev *rdev;
+ int compare_disk;
- if (disk < start_disk_index)
+ compare_disk = map_disk_shift(disk, zone->nb_dev,
+ zone->disk_shift);
+
+ if (compare_disk < start_disk_index)
dev_start = (first_stripe_index + 1) *
mddev->chunk_sectors;
- else if (disk > start_disk_index)
+ else if (compare_disk > start_disk_index)
dev_start = first_stripe_index * mddev->chunk_sectors;
else
dev_start = start_disk_offset;
- if (disk < end_disk_index)
+ if (compare_disk < end_disk_index)
dev_end = (last_stripe_index + 1) * mddev->chunk_sectors;
- else if (disk > end_disk_index)
+ else if (compare_disk > end_disk_index)
dev_end = last_stripe_index * mddev->chunk_sectors;
else
dev_end = end_disk_offset;
@@ -506,14 +552,47 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
bio_endio(bio);
}
-static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
+static void raid0_map_submit_bio(struct mddev *mddev, struct bio *bio)
{
struct r0conf *conf = mddev->private;
struct strip_zone *zone;
struct md_rdev *tmp_dev;
- sector_t bio_sector;
+ sector_t bio_sector = bio->bi_iter.bi_sector;
+ sector_t sector = bio_sector;
+
+ md_account_bio(mddev, &bio);
+
+ zone = find_zone(mddev->private, &sector);
+ switch (conf->layout) {
+ case RAID0_ORIG_LAYOUT:
+ tmp_dev = map_sector(mddev, zone, bio_sector, &sector);
+ break;
+ case RAID0_ALT_MULTIZONE_LAYOUT:
+ tmp_dev = map_sector(mddev, zone, sector, &sector);
+ break;
+ default:
+ WARN(1, "md/raid0:%s: Invalid layout\n", mdname(mddev));
+ bio_io_error(bio);
+ return;
+ }
+
+ if (unlikely(is_rdev_broken(tmp_dev))) {
+ bio_io_error(bio);
+ md_error(mddev, tmp_dev);
+ return;
+ }
+
+ bio_set_dev(bio, tmp_dev->bdev);
+ bio->bi_iter.bi_sector = sector + zone->dev_start +
+ tmp_dev->data_offset;
+ mddev_trace_remap(mddev, bio, bio_sector);
+ mddev_check_write_zeroes(mddev, bio);
+ submit_bio_noacct(bio);
+}
+
+static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
+{
sector_t sector;
- sector_t orig_sector;
unsigned chunk_sects;
unsigned sectors;
@@ -526,8 +605,7 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
return true;
}
- bio_sector = bio->bi_iter.bi_sector;
- sector = bio_sector;
+ sector = bio->bi_iter.bi_sector;
chunk_sects = mddev->chunk_sectors;
sectors = chunk_sects -
@@ -535,50 +613,14 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
? (sector & (chunk_sects-1))
: sector_div(sector, chunk_sects));
- /* Restore due to sector_div */
- sector = bio_sector;
-
if (sectors < bio_sectors(bio)) {
- struct bio *split = bio_split(bio, sectors, GFP_NOIO,
+ bio = bio_submit_split_bioset(bio, sectors,
&mddev->bio_set);
- bio_chain(split, bio);
- submit_bio_noacct(bio);
- bio = split;
+ if (!bio)
+ return true;
}
- if (bio->bi_pool != &mddev->bio_set)
- md_account_bio(mddev, &bio);
-
- orig_sector = sector;
- zone = find_zone(mddev->private, &sector);
- switch (conf->layout) {
- case RAID0_ORIG_LAYOUT:
- tmp_dev = map_sector(mddev, zone, orig_sector, &sector);
- break;
- case RAID0_ALT_MULTIZONE_LAYOUT:
- tmp_dev = map_sector(mddev, zone, sector, &sector);
- break;
- default:
- WARN(1, "md/raid0:%s: Invalid layout\n", mdname(mddev));
- bio_io_error(bio);
- return true;
- }
-
- if (unlikely(is_mddev_broken(tmp_dev, "raid0"))) {
- bio_io_error(bio);
- return true;
- }
-
- bio_set_dev(bio, tmp_dev->bdev);
- bio->bi_iter.bi_sector = sector + zone->dev_start +
- tmp_dev->data_offset;
-
- if (mddev->gendisk)
- trace_block_bio_remap(bio, disk_devt(mddev->gendisk),
- bio_sector);
- mddev_check_writesame(mddev, bio);
- mddev_check_write_zeroes(mddev, bio);
- submit_bio_noacct(bio);
+ raid0_map_submit_bio(mddev, bio);
return true;
}
@@ -588,6 +630,16 @@ static void raid0_status(struct seq_file *seq, struct mddev *mddev)
return;
}
+static void raid0_error(struct mddev *mddev, struct md_rdev *rdev)
+{
+ if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) {
+ char *md_name = mdname(mddev);
+
+ pr_crit("md/raid0%s: Disk failure on %pg detected, failing array.\n",
+ md_name, rdev->bdev);
+ }
+}
+
static void *raid0_takeover_raid45(struct mddev *mddev)
{
struct md_rdev *rdev;
@@ -617,7 +669,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev)
mddev->raid_disks--;
mddev->delta_disks = -1;
/* make sure it will be not marked as dirty */
- mddev->recovery_cp = MaxSector;
+ mddev->resync_offset = MaxSector;
mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
create_strip_zones(mddev, &priv_conf);
@@ -660,7 +712,7 @@ static void *raid0_takeover_raid10(struct mddev *mddev)
mddev->raid_disks += mddev->delta_disks;
mddev->degraded = 0;
/* make sure it will be not marked as dirty */
- mddev->recovery_cp = MaxSector;
+ mddev->resync_offset = MaxSector;
mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
create_strip_zones(mddev, &priv_conf);
@@ -703,7 +755,7 @@ static void *raid0_takeover_raid1(struct mddev *mddev)
mddev->delta_disks = 1 - mddev->raid_disks;
mddev->raid_disks = 1;
/* make sure it will be not marked as dirty */
- mddev->recovery_cp = MaxSector;
+ mddev->resync_offset = MaxSector;
mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);
create_strip_zones(mddev, &priv_conf);
@@ -753,9 +805,13 @@ static void raid0_quiesce(struct mddev *mddev, int quiesce)
static struct md_personality raid0_personality=
{
- .name = "raid0",
- .level = 0,
- .owner = THIS_MODULE,
+ .head = {
+ .type = MD_PERSONALITY,
+ .id = ID_RAID0,
+ .name = "raid0",
+ .owner = THIS_MODULE,
+ },
+
.make_request = raid0_make_request,
.run = raid0_run,
.free = raid0_free,
@@ -763,16 +819,17 @@ static struct md_personality raid0_personality=
.size = raid0_size,
.takeover = raid0_takeover,
.quiesce = raid0_quiesce,
+ .error_handler = raid0_error,
};
-static int __init raid0_init (void)
+static int __init raid0_init(void)
{
- return register_md_personality (&raid0_personality);
+ return register_md_submodule(&raid0_personality.head);
}
-static void raid0_exit (void)
+static void __exit raid0_exit(void)
{
- unregister_md_personality (&raid0_personality);
+ unregister_md_submodule(&raid0_personality.head);
}
module_init(raid0_init);