summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2019-09-13 15:02:04 -0600
committerJens Axboe <axboe@kernel.dk>2019-09-13 15:02:04 -0600
commit99e5381d548db60ce05f3f39d1ca554caa85db6d (patch)
treeef55a627de233ea52d6deac581d9c32220936c1d
parent21fa1004ff5d749c90cef77525b73a49ef5583dc (diff)
parent067df25c83902e2950ef24fed713f0fa38282f34 (diff)
Merge branch 'md-next' of git://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-5.4/block
Pull MD fixes from Song. * 'md-next' of git://git.kernel.org/pub/scm/linux/kernel/git/song/md: raid5: use bio_end_sector in r5_next_bio raid5: remove STRIPE_OPS_REQ_PENDING md: add feature flag MD_FEATURE_RAID0_LAYOUT md/raid0: avoid RAID0 data corruption due to layout confusion. raid5: don't set STRIPE_HANDLE to stripe which is in batch list raid5: don't increment read_errors on EILSEQ return
-rw-r--r--drivers/md/md.c13
-rw-r--r--drivers/md/raid0.c35
-rw-r--r--drivers/md/raid0.h14
-rw-r--r--drivers/md/raid5.c7
-rw-r--r--drivers/md/raid5.h5
-rw-r--r--include/uapi/linux/raid/md_p.h2
6 files changed, 68 insertions, 8 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 73d5a1b04022..1be7abeb24fd 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1237,6 +1237,8 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
mddev->new_layout = mddev->layout;
mddev->new_chunk_sectors = mddev->chunk_sectors;
}
+ if (mddev->level == 0)
+ mddev->layout = -1;
if (sb->state & (1<<MD_SB_CLEAN))
mddev->recovery_cp = MaxSector;
@@ -1652,6 +1654,10 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
}
+ if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
+ sb->level != 0)
+ return -EINVAL;
+
if (!refdev) {
ret = 1;
} else {
@@ -1762,6 +1768,10 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
mddev->new_chunk_sectors = mddev->chunk_sectors;
}
+ if (mddev->level == 0 &&
+ !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
+ mddev->layout = -1;
+
if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
set_bit(MD_HAS_JOURNAL, &mddev->flags);
@@ -6898,6 +6908,9 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
mddev->external = 0;
mddev->layout = info->layout;
+ if (mddev->level == 0)
+ /* Cannot trust RAID0 layout info here */
+ mddev->layout = -1;
mddev->chunk_sectors = info->chunk_size >> 9;
if (mddev->persistent) {
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index bc422eae2c95..f61693e59684 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -19,6 +19,9 @@
#include "raid0.h"
#include "raid5.h"
+static int default_layout = 0;
+module_param(default_layout, int, 0644);
+
#define UNSUPPORTED_MDDEV_FLAGS \
((1L << MD_HAS_JOURNAL) | \
(1L << MD_JOURNAL_CLEAN) | \
@@ -139,6 +142,22 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
}
pr_debug("md/raid0:%s: FINAL %d zones\n",
mdname(mddev), conf->nr_strip_zones);
+
+ if (conf->nr_strip_zones == 1) {
+ conf->layout = RAID0_ORIG_LAYOUT;
+ } else if (mddev->layout == RAID0_ORIG_LAYOUT ||
+ mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
+ conf->layout = mddev->layout;
+ } else if (default_layout == RAID0_ORIG_LAYOUT ||
+ default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
+ conf->layout = default_layout;
+ } else {
+ pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
+ mdname(mddev));
+ pr_err("md/raid0: please set raid.default_layout to 1 or 2\n");
+ err = -ENOTSUPP;
+ goto abort;
+ }
/*
* now since we have the hard sector sizes, we can make sure
* chunk size is a multiple of that sector size
@@ -547,10 +566,12 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
{
+ struct r0conf *conf = mddev->private;
struct strip_zone *zone;
struct md_rdev *tmp_dev;
sector_t bio_sector;
sector_t sector;
+ sector_t orig_sector;
unsigned chunk_sects;
unsigned sectors;
@@ -584,8 +605,20 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
bio = split;
}
+ orig_sector = sector;
zone = find_zone(mddev->private, &sector);
- tmp_dev = map_sector(mddev, zone, sector, &sector);
+ switch (conf->layout) {
+ case RAID0_ORIG_LAYOUT:
+ tmp_dev = map_sector(mddev, zone, orig_sector, &sector);
+ break;
+ case RAID0_ALT_MULTIZONE_LAYOUT:
+ tmp_dev = map_sector(mddev, zone, sector, &sector);
+ break;
+ default:
+ WARN("md/raid0:%s: Invalid layout\n", mdname(mddev));
+ bio_io_error(bio);
+ return true;
+ }
if (unlikely(is_mddev_broken(tmp_dev, "raid0"))) {
bio_io_error(bio);
diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h
index 540e65d92642..3816e5477db1 100644
--- a/drivers/md/raid0.h
+++ b/drivers/md/raid0.h
@@ -8,11 +8,25 @@ struct strip_zone {
int nb_dev; /* # of devices attached to the zone */
};
+/* Linux 3.14 (20d0189b101) made an unintended change to
+ * the RAID0 layout for multi-zone arrays (where devices aren't all
+ * the same size.
+ * RAID0_ORIG_LAYOUT restores the original layout
+ * RAID0_ALT_MULTIZONE_LAYOUT uses the altered layout
+ * The layouts are identical when there is only one zone (all
+ * devices the same size).
+ */
+
+enum r0layout {
+ RAID0_ORIG_LAYOUT = 1,
+ RAID0_ALT_MULTIZONE_LAYOUT = 2,
+};
struct r0conf {
struct strip_zone *strip_zone;
struct md_rdev **devlist; /* lists of rdevs, pointed to
* by strip_zone->dev */
int nr_strip_zones;
+ enum r0layout layout;
};
#endif
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index da6a86e28318..223e97ab27e6 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2526,7 +2526,8 @@ static void raid5_end_read_request(struct bio * bi)
int set_bad = 0;
clear_bit(R5_UPTODATE, &sh->dev[i].flags);
- atomic_inc(&rdev->read_errors);
+ if (!(bi->bi_status == BLK_STS_PROTECTION))
+ atomic_inc(&rdev->read_errors);
if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
pr_warn_ratelimited(
"md/raid:%s: read error on replacement device (sector %llu on %s).\n",
@@ -4620,7 +4621,6 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
(1 << STRIPE_FULL_WRITE) |
(1 << STRIPE_BIOFILL_RUN) |
(1 << STRIPE_COMPUTE_RUN) |
- (1 << STRIPE_OPS_REQ_PENDING) |
(1 << STRIPE_DISCARD) |
(1 << STRIPE_BATCH_READY) |
(1 << STRIPE_BATCH_ERR) |
@@ -5726,7 +5726,8 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
do_flush = false;
}
- set_bit(STRIPE_HANDLE, &sh->state);
+ if (!sh->batch_head)
+ set_bit(STRIPE_HANDLE, &sh->state);
clear_bit(STRIPE_DELAYED, &sh->state);
if ((!sh->batch_head || sh == sh->batch_head) &&
(bi->bi_opf & REQ_SYNC) &&
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index cf991f13403e..f90e0704bed9 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -357,7 +357,6 @@ enum {
STRIPE_FULL_WRITE, /* all blocks are set to be overwritten */
STRIPE_BIOFILL_RUN,
STRIPE_COMPUTE_RUN,
- STRIPE_OPS_REQ_PENDING,
STRIPE_ON_UNPLUG_LIST,
STRIPE_DISCARD,
STRIPE_ON_RELEASE_LIST,
@@ -493,9 +492,7 @@ struct disk_info {
*/
static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector)
{
- int sectors = bio_sectors(bio);
-
- if (bio->bi_iter.bi_sector + sectors < sector + STRIPE_SECTORS)
+ if (bio_end_sector(bio) < sector + STRIPE_SECTORS)
return bio->bi_next;
else
return NULL;
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
index b0d15c73f6d7..1f2d8c81f0e0 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -329,6 +329,7 @@ struct mdp_superblock_1 {
#define MD_FEATURE_JOURNAL 512 /* support write cache */
#define MD_FEATURE_PPL 1024 /* support PPL */
#define MD_FEATURE_MULTIPLE_PPLS 2048 /* support for multiple PPLs */
+#define MD_FEATURE_RAID0_LAYOUT 4096 /* layout is meaningful for RAID0 */
#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
|MD_FEATURE_RECOVERY_OFFSET \
|MD_FEATURE_RESHAPE_ACTIVE \
@@ -341,6 +342,7 @@ struct mdp_superblock_1 {
|MD_FEATURE_JOURNAL \
|MD_FEATURE_PPL \
|MD_FEATURE_MULTIPLE_PPLS \
+ |MD_FEATURE_RAID0_LAYOUT \
)
struct r5l_payload_header {