summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-bufio.c12
-rw-r--r--drivers/md/dm-cache-background-tracker.c25
-rw-r--r--drivers/md/dm-cache-background-tracker.h8
-rw-r--r--drivers/md/dm-cache-target.c88
-rw-r--r--drivers/md/dm-clone-target.c4
-rw-r--r--drivers/md/dm-thin.c2
-rw-r--r--drivers/md/dm-unstripe.c4
-rw-r--r--drivers/md/dm-verity-target.c9
-rw-r--r--drivers/md/dm-verity.h1
-rw-r--r--drivers/md/dm-zone.c4
-rw-r--r--drivers/md/dm.c4
-rw-r--r--drivers/md/md-bitmap.c1
-rw-r--r--drivers/md/md.c39
-rw-r--r--drivers/md/md.h24
-rw-r--r--drivers/md/raid0.c12
-rw-r--r--drivers/md/raid1.c108
-rw-r--r--drivers/md/raid10.c94
-rw-r--r--drivers/md/raid5-ppl.c2
-rw-r--r--drivers/md/raid5.c17
-rw-r--r--drivers/md/raid5.h2
20 files changed, 314 insertions, 146 deletions
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index d478aafa02c9..23e0b71b991e 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -2471,7 +2471,8 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
int r;
unsigned int num_locks;
struct dm_bufio_client *c;
- char slab_name[27];
+ char slab_name[64];
+ static atomic_t seqno = ATOMIC_INIT(0);
if (!block_size || block_size & ((1 << SECTOR_SHIFT) - 1)) {
DMERR("%s: block size not specified or is not multiple of 512b", __func__);
@@ -2522,7 +2523,8 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
(block_size < PAGE_SIZE || !is_power_of_2(block_size))) {
unsigned int align = min(1U << __ffs(block_size), (unsigned int)PAGE_SIZE);
- snprintf(slab_name, sizeof(slab_name), "dm_bufio_cache-%u", block_size);
+ snprintf(slab_name, sizeof(slab_name), "dm_bufio_cache-%u-%u",
+ block_size, atomic_inc_return(&seqno));
c->slab_cache = kmem_cache_create(slab_name, block_size, align,
SLAB_RECLAIM_ACCOUNT, NULL);
if (!c->slab_cache) {
@@ -2531,9 +2533,11 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
}
}
if (aux_size)
- snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer-%u", aux_size);
+ snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer-%u-%u",
+ aux_size, atomic_inc_return(&seqno));
else
- snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer");
+ snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer-%u",
+ atomic_inc_return(&seqno));
c->slab_buffer = kmem_cache_create(slab_name, sizeof(struct dm_buffer) + aux_size,
0, SLAB_RECLAIM_ACCOUNT, NULL);
if (!c->slab_buffer) {
diff --git a/drivers/md/dm-cache-background-tracker.c b/drivers/md/dm-cache-background-tracker.c
index 9c5308298cf1..f3051bd7d2df 100644
--- a/drivers/md/dm-cache-background-tracker.c
+++ b/drivers/md/dm-cache-background-tracker.c
@@ -11,12 +11,6 @@
#define DM_MSG_PREFIX "dm-background-tracker"
-struct bt_work {
- struct list_head list;
- struct rb_node node;
- struct policy_work work;
-};
-
struct background_tracker {
unsigned int max_work;
atomic_t pending_promotes;
@@ -26,10 +20,10 @@ struct background_tracker {
struct list_head issued;
struct list_head queued;
struct rb_root pending;
-
- struct kmem_cache *work_cache;
};
+struct kmem_cache *btracker_work_cache = NULL;
+
struct background_tracker *btracker_create(unsigned int max_work)
{
struct background_tracker *b = kmalloc(sizeof(*b), GFP_KERNEL);
@@ -48,12 +42,6 @@ struct background_tracker *btracker_create(unsigned int max_work)
INIT_LIST_HEAD(&b->queued);
b->pending = RB_ROOT;
- b->work_cache = KMEM_CACHE(bt_work, 0);
- if (!b->work_cache) {
- DMERR("couldn't create mempool for background work items");
- kfree(b);
- b = NULL;
- }
return b;
}
@@ -66,10 +54,9 @@ void btracker_destroy(struct background_tracker *b)
BUG_ON(!list_empty(&b->issued));
list_for_each_entry_safe (w, tmp, &b->queued, list) {
list_del(&w->list);
- kmem_cache_free(b->work_cache, w);
+ kmem_cache_free(btracker_work_cache, w);
}
- kmem_cache_destroy(b->work_cache);
kfree(b);
}
EXPORT_SYMBOL_GPL(btracker_destroy);
@@ -180,7 +167,7 @@ static struct bt_work *alloc_work(struct background_tracker *b)
if (max_work_reached(b))
return NULL;
- return kmem_cache_alloc(b->work_cache, GFP_NOWAIT);
+ return kmem_cache_alloc(btracker_work_cache, GFP_NOWAIT);
}
int btracker_queue(struct background_tracker *b,
@@ -203,7 +190,7 @@ int btracker_queue(struct background_tracker *b,
* There was a race, we'll just ignore this second
* bit of work for the same oblock.
*/
- kmem_cache_free(b->work_cache, w);
+ kmem_cache_free(btracker_work_cache, w);
return -EINVAL;
}
@@ -244,7 +231,7 @@ void btracker_complete(struct background_tracker *b,
update_stats(b, &w->work, -1);
rb_erase(&w->node, &b->pending);
list_del(&w->list);
- kmem_cache_free(b->work_cache, w);
+ kmem_cache_free(btracker_work_cache, w);
}
EXPORT_SYMBOL_GPL(btracker_complete);
diff --git a/drivers/md/dm-cache-background-tracker.h b/drivers/md/dm-cache-background-tracker.h
index 5b8f5c667b81..09c8fc59f7bb 100644
--- a/drivers/md/dm-cache-background-tracker.h
+++ b/drivers/md/dm-cache-background-tracker.h
@@ -26,6 +26,14 @@
* protected with a spinlock.
*/
+struct bt_work {
+ struct list_head list;
+ struct rb_node node;
+ struct policy_work work;
+};
+
+extern struct kmem_cache *btracker_work_cache;
+
struct background_work;
struct background_tracker;
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index aaeeabfab09b..9cb797a561d6 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -10,6 +10,7 @@
#include "dm-bio-record.h"
#include "dm-cache-metadata.h"
#include "dm-io-tracker.h"
+#include "dm-cache-background-tracker.h"
#include <linux/dm-io.h>
#include <linux/dm-kcopyd.h>
@@ -1905,16 +1906,13 @@ static void check_migrations(struct work_struct *ws)
* This function gets called on the error paths of the constructor, so we
* have to cope with a partially initialised struct.
*/
-static void destroy(struct cache *cache)
+static void __destroy(struct cache *cache)
{
- unsigned int i;
-
mempool_exit(&cache->migration_pool);
if (cache->prison)
dm_bio_prison_destroy_v2(cache->prison);
- cancel_delayed_work_sync(&cache->waker);
if (cache->wq)
destroy_workqueue(cache->wq);
@@ -1942,13 +1940,22 @@ static void destroy(struct cache *cache)
if (cache->policy)
dm_cache_policy_destroy(cache->policy);
+ bioset_exit(&cache->bs);
+
+ kfree(cache);
+}
+
+static void destroy(struct cache *cache)
+{
+ unsigned int i;
+
+ cancel_delayed_work_sync(&cache->waker);
+
for (i = 0; i < cache->nr_ctr_args ; i++)
kfree(cache->ctr_args[i]);
kfree(cache->ctr_args);
- bioset_exit(&cache->bs);
-
- kfree(cache);
+ __destroy(cache);
}
static void cache_dtr(struct dm_target *ti)
@@ -2003,7 +2010,6 @@ struct cache_args {
sector_t cache_sectors;
struct dm_dev *origin_dev;
- sector_t origin_sectors;
uint32_t block_size;
@@ -2084,6 +2090,7 @@ static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
char **error)
{
+ sector_t origin_sectors;
int r;
if (!at_least_one_arg(as, error))
@@ -2096,8 +2103,8 @@ static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
return r;
}
- ca->origin_sectors = get_dev_size(ca->origin_dev);
- if (ca->ti->len > ca->origin_sectors) {
+ origin_sectors = get_dev_size(ca->origin_dev);
+ if (ca->ti->len > origin_sectors) {
*error = "Device size larger than cached device";
return -EINVAL;
}
@@ -2257,7 +2264,7 @@ static int parse_cache_args(struct cache_args *ca, int argc, char **argv,
/*----------------------------------------------------------------*/
-static struct kmem_cache *migration_cache;
+static struct kmem_cache *migration_cache = NULL;
#define NOT_CORE_OPTION 1
@@ -2407,7 +2414,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL;
- origin_blocks = cache->origin_sectors = ca->origin_sectors;
+ origin_blocks = cache->origin_sectors = ti->len;
origin_blocks = block_div(origin_blocks, ca->block_size);
cache->origin_blocks = to_oblock(origin_blocks);
@@ -2561,7 +2568,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
*result = cache;
return 0;
bad:
- destroy(cache);
+ __destroy(cache);
return r;
}
@@ -2612,7 +2619,7 @@ static int cache_ctr(struct dm_target *ti, unsigned int argc, char **argv)
r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
if (r) {
- destroy(cache);
+ __destroy(cache);
goto out;
}
@@ -2895,19 +2902,19 @@ static dm_cblock_t get_cache_dev_size(struct cache *cache)
static bool can_resize(struct cache *cache, dm_cblock_t new_size)
{
if (from_cblock(new_size) > from_cblock(cache->cache_size)) {
- if (cache->sized) {
- DMERR("%s: unable to extend cache due to missing cache table reload",
- cache_device_name(cache));
- return false;
- }
+ DMERR("%s: unable to extend cache due to missing cache table reload",
+ cache_device_name(cache));
+ return false;
}
/*
* We can't drop a dirty block when shrinking the cache.
*/
- while (from_cblock(new_size) < from_cblock(cache->cache_size)) {
- new_size = to_cblock(from_cblock(new_size) + 1);
- if (is_dirty(cache, new_size)) {
+ if (cache->loaded_mappings) {
+ new_size = to_cblock(find_next_bit(cache->dirty_bitset,
+ from_cblock(cache->cache_size),
+ from_cblock(new_size)));
+ if (new_size != cache->cache_size) {
DMERR("%s: unable to shrink cache; cache block %llu is dirty",
cache_device_name(cache),
(unsigned long long) from_cblock(new_size));
@@ -2943,20 +2950,15 @@ static int cache_preresume(struct dm_target *ti)
/*
* Check to see if the cache has resized.
*/
- if (!cache->sized) {
- r = resize_cache_dev(cache, csize);
- if (r)
- return r;
-
- cache->sized = true;
-
- } else if (csize != cache->cache_size) {
+ if (!cache->sized || csize != cache->cache_size) {
if (!can_resize(cache, csize))
return -EINVAL;
r = resize_cache_dev(cache, csize);
if (r)
return r;
+
+ cache->sized = true;
}
if (!cache->loaded_mappings) {
@@ -3360,7 +3362,7 @@ static int cache_iterate_devices(struct dm_target *ti,
static void disable_passdown_if_not_supported(struct cache *cache)
{
struct block_device *origin_bdev = cache->origin_dev->bdev;
- struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
+ struct queue_limits *origin_limits = bdev_limits(origin_bdev);
const char *reason = NULL;
if (!cache->features.discard_passdown)
@@ -3382,7 +3384,7 @@ static void disable_passdown_if_not_supported(struct cache *cache)
static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
{
struct block_device *origin_bdev = cache->origin_dev->bdev;
- struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
+ struct queue_limits *origin_limits = bdev_limits(origin_bdev);
if (!cache->features.discard_passdown) {
/* No passdown is done so setting own virtual limits */
@@ -3444,22 +3446,36 @@ static int __init dm_cache_init(void)
int r;
migration_cache = KMEM_CACHE(dm_cache_migration, 0);
- if (!migration_cache)
- return -ENOMEM;
+ if (!migration_cache) {
+ r = -ENOMEM;
+ goto err;
+ }
+
+ btracker_work_cache = kmem_cache_create("dm_cache_bt_work",
+ sizeof(struct bt_work), __alignof__(struct bt_work), 0, NULL);
+ if (!btracker_work_cache) {
+ r = -ENOMEM;
+ goto err;
+ }
r = dm_register_target(&cache_target);
if (r) {
- kmem_cache_destroy(migration_cache);
- return r;
+ goto err;
}
return 0;
+
+err:
+ kmem_cache_destroy(migration_cache);
+ kmem_cache_destroy(btracker_work_cache);
+ return r;
}
static void __exit dm_cache_exit(void)
{
dm_unregister_target(&cache_target);
kmem_cache_destroy(migration_cache);
+ kmem_cache_destroy(btracker_work_cache);
}
module_init(dm_cache_init);
diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c
index 12bbe487a4c8..e956d980672c 100644
--- a/drivers/md/dm-clone-target.c
+++ b/drivers/md/dm-clone-target.c
@@ -2020,7 +2020,7 @@ static void clone_resume(struct dm_target *ti)
static void disable_passdown_if_not_supported(struct clone *clone)
{
struct block_device *dest_dev = clone->dest_dev->bdev;
- struct queue_limits *dest_limits = &bdev_get_queue(dest_dev)->limits;
+ struct queue_limits *dest_limits = bdev_limits(dest_dev);
const char *reason = NULL;
if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags))
@@ -2041,7 +2041,7 @@ static void disable_passdown_if_not_supported(struct clone *clone)
static void set_discard_limits(struct clone *clone, struct queue_limits *limits)
{
struct block_device *dest_bdev = clone->dest_dev->bdev;
- struct queue_limits *dest_limits = &bdev_get_queue(dest_bdev)->limits;
+ struct queue_limits *dest_limits = bdev_limits(dest_bdev);
if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags)) {
/* No passdown is done so we set our own virtual limits */
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 89632ce97760..9095f19a84f3 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -2842,7 +2842,7 @@ static void disable_discard_passdown_if_not_supported(struct pool_c *pt)
{
struct pool *pool = pt->pool;
struct block_device *data_bdev = pt->data_dev->bdev;
- struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits;
+ struct queue_limits *data_limits = bdev_limits(data_bdev);
const char *reason = NULL;
if (!pt->adjusted_pf.discard_passdown)
diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c
index 48587c16c445..e8a9432057dc 100644
--- a/drivers/md/dm-unstripe.c
+++ b/drivers/md/dm-unstripe.c
@@ -85,8 +85,8 @@ static int unstripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
uc->physical_start = start;
- uc->unstripe_offset = uc->unstripe * uc->chunk_size;
- uc->unstripe_width = (uc->stripes - 1) * uc->chunk_size;
+ uc->unstripe_offset = (sector_t)uc->unstripe * uc->chunk_size;
+ uc->unstripe_width = (sector_t)(uc->stripes - 1) * uc->chunk_size;
uc->chunk_shift = is_power_of_2(uc->chunk_size) ? fls(uc->chunk_size) - 1 : 0;
tmp_len = ti->len;
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 7d4d90b4395a..c142ec5458b7 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -356,9 +356,9 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
else if (verity_handle_err(v,
DM_VERITY_BLOCK_TYPE_METADATA,
hash_block)) {
- struct bio *bio =
- dm_bio_from_per_bio_data(io,
- v->ti->per_io_data_size);
+ struct bio *bio;
+ io->had_mismatch = true;
+ bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
dm_audit_log_bio(DM_MSG_PREFIX, "verify-metadata", bio,
block, 0);
r = -EIO;
@@ -482,6 +482,7 @@ static int verity_handle_data_hash_mismatch(struct dm_verity *v,
return -EIO; /* Error correction failed; Just return error */
if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, blkno)) {
+ io->had_mismatch = true;
dm_audit_log_bio(DM_MSG_PREFIX, "verify-data", bio, blkno, 0);
return -EIO;
}
@@ -606,6 +607,7 @@ static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
if (unlikely(status != BLK_STS_OK) &&
unlikely(!(bio->bi_opf & REQ_RAHEAD)) &&
+ !io->had_mismatch &&
!verity_is_system_shutting_down()) {
if (v->error_mode == DM_VERITY_MODE_PANIC) {
panic("dm-verity device has I/O error");
@@ -779,6 +781,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
io->orig_bi_end_io = bio->bi_end_io;
io->block = bio->bi_iter.bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT);
io->n_blocks = bio->bi_iter.bi_size >> v->data_dev_block_bits;
+ io->had_mismatch = false;
bio->bi_end_io = verity_end_io;
bio->bi_private = io;
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index 6b75159bf835..c996140bda94 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -92,6 +92,7 @@ struct dm_verity_io {
sector_t block;
unsigned int n_blocks;
bool in_bh;
+ bool had_mismatch;
struct work_struct work;
struct work_struct bh_work;
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index c0d41c36e06e..20edd3fabbab 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -344,7 +344,7 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
} else {
set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
- lim->max_zone_append_sectors = 0;
+ lim->max_hw_zone_append_sectors = 0;
}
/*
@@ -379,7 +379,7 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
if (!zlim.mapped_nr_seq_zones) {
lim->max_open_zones = 0;
lim->max_active_zones = 0;
- lim->max_zone_append_sectors = 0;
+ lim->max_hw_zone_append_sectors = 0;
lim->zone_write_granularity = 0;
lim->chunk_sectors = 0;
lim->features &= ~BLK_FEAT_ZONED;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index ff4a6b570b76..19230404d8c2 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2290,8 +2290,10 @@ static struct mapped_device *alloc_dev(int minor)
* override accordingly.
*/
md->disk = blk_alloc_disk(NULL, md->numa_node_id);
- if (IS_ERR(md->disk))
+ if (IS_ERR(md->disk)) {
+ md->disk = NULL;
goto bad;
+ }
md->queue = md->disk->queue;
init_waitqueue_head(&md->wait);
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index 29da10e6f703..c3a42dd66ce5 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -1285,6 +1285,7 @@ static void bitmap_unplug_async(struct bitmap *bitmap)
queue_work(md_bitmap_wq, &unplug_work.work);
wait_for_completion(&done);
+ destroy_work_on_stack(&unplug_work.work);
}
static void bitmap_unplug(struct mddev *mddev, bool sync)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 179ee4afe937..aebe12b0ee27 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -546,6 +546,26 @@ static int mddev_set_closing_and_sync_blockdev(struct mddev *mddev, int opener_n
return 0;
}
+/*
+ * The only difference from bio_chain_endio() is that the current
+ * bi_status of bio does not affect the bi_status of parent.
+ */
+static void md_end_flush(struct bio *bio)
+{
+ struct bio *parent = bio->bi_private;
+
+ /*
+ * If any flush io error before the power failure,
+ * disk data may be lost.
+ */
+ if (bio->bi_status)
+ pr_err("md: %pg flush io error %d\n", bio->bi_bdev,
+ blk_status_to_errno(bio->bi_status));
+
+ bio_put(bio);
+ bio_endio(parent);
+}
+
bool md_flush_request(struct mddev *mddev, struct bio *bio)
{
struct md_rdev *rdev;
@@ -565,7 +585,9 @@ bool md_flush_request(struct mddev *mddev, struct bio *bio)
new = bio_alloc_bioset(rdev->bdev, 0,
REQ_OP_WRITE | REQ_PREFLUSH, GFP_NOIO,
&mddev->bio_set);
- bio_chain(new, bio);
+ new->bi_private = bio;
+ new->bi_end_io = md_end_flush;
+ bio_inc_remaining(bio);
submit_bio(new);
}
@@ -9762,9 +9784,7 @@ EXPORT_SYMBOL(md_reap_sync_thread);
void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
{
sysfs_notify_dirent_safe(rdev->sysfs_state);
- wait_event_timeout(rdev->blocked_wait,
- !test_bit(Blocked, &rdev->flags) &&
- !test_bit(BlockedBadBlocks, &rdev->flags),
+ wait_event_timeout(rdev->blocked_wait, !rdev_blocked(rdev),
msecs_to_jiffies(5000));
rdev_dec_pending(rdev, mddev);
}
@@ -9793,6 +9813,17 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
{
struct mddev *mddev = rdev->mddev;
int rv;
+
+ /*
+ * Recording new badblocks for faulty rdev will force unnecessary
+ * super block updating. This is fragile for external management because
+ * userspace daemon may trying to remove this device and deadlock may
+ * occur. This will be probably solved in the mdadm, but it is safer to
+ * avoid it.
+ */
+ if (test_bit(Faulty, &rdev->flags))
+ return 1;
+
if (is_new)
s += rdev->new_data_offset;
else
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 5d2e6bd58e4d..4ba93af36126 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -1002,6 +1002,30 @@ static inline void mddev_trace_remap(struct mddev *mddev, struct bio *bio,
trace_block_bio_remap(bio, disk_devt(mddev->gendisk), sector);
}
+static inline bool rdev_blocked(struct md_rdev *rdev)
+{
+ /*
+ * Blocked will be set by error handler and cleared by daemon after
+ * updating superblock, meanwhile write IO should be blocked to prevent
+ * reading old data after power failure.
+ */
+ if (test_bit(Blocked, &rdev->flags))
+ return true;
+
+ /*
+ * Faulty device should not be accessed anymore, there is no need to
+ * wait for bad block to be acknowledged.
+ */
+ if (test_bit(Faulty, &rdev->flags))
+ return false;
+
+ /* rdev is blocked by badblocks. */
+ if (test_bit(BlockedBadBlocks, &rdev->flags))
+ return true;
+
+ return false;
+}
+
#define mddev_add_trace_msg(mddev, fmt, args...) \
do { \
if (!mddev_is_dm(mddev)) \
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 32d587524778..baaf5f8b80ae 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -466,6 +466,12 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
struct bio *split = bio_split(bio,
zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO,
&mddev->bio_set);
+
+ if (IS_ERR(split)) {
+ bio->bi_status = errno_to_blk_status(PTR_ERR(split));
+ bio_endio(bio);
+ return;
+ }
bio_chain(split, bio);
submit_bio_noacct(bio);
bio = split;
@@ -608,6 +614,12 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
if (sectors < bio_sectors(bio)) {
struct bio *split = bio_split(bio, sectors, GFP_NOIO,
&mddev->bio_set);
+
+ if (IS_ERR(split)) {
+ bio->bi_status = errno_to_blk_status(PTR_ERR(split));
+ bio_endio(bio);
+ return true;
+ }
bio_chain(split, bio);
raid0_map_submit_bio(mddev, bio);
bio = split;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 6c9d24203f39..a5adf08ee174 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1322,7 +1322,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
const enum req_op op = bio_op(bio);
const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
int max_sectors;
- int rdisk;
+ int rdisk, error;
bool r1bio_existed = !!r1_bio;
/*
@@ -1383,6 +1383,11 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
if (max_sectors < bio_sectors(bio)) {
struct bio *split = bio_split(bio, max_sectors,
gfp, &conf->bio_split);
+
+ if (IS_ERR(split)) {
+ error = PTR_ERR(split);
+ goto err_handle;
+ }
bio_chain(split, bio);
submit_bio_noacct(bio);
bio = split;
@@ -1410,6 +1415,47 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
read_bio->bi_private = r1_bio;
mddev_trace_remap(mddev, read_bio, r1_bio->sector);
submit_bio_noacct(read_bio);
+ return;
+
+err_handle:
+ atomic_dec(&mirror->rdev->nr_pending);
+ bio->bi_status = errno_to_blk_status(error);
+ set_bit(R1BIO_Uptodate, &r1_bio->state);
+ raid_end_bio_io(r1_bio);
+}
+
+static bool wait_blocked_rdev(struct mddev *mddev, struct bio *bio)
+{
+ struct r1conf *conf = mddev->private;
+ int disks = conf->raid_disks * 2;
+ int i;
+
+retry:
+ for (i = 0; i < disks; i++) {
+ struct md_rdev *rdev = conf->mirrors[i].rdev;
+
+ if (!rdev)
+ continue;
+
+ /* don't write here until the bad block is acknowledged */
+ if (test_bit(WriteErrorSeen, &rdev->flags) &&
+ rdev_has_badblock(rdev, bio->bi_iter.bi_sector,
+ bio_sectors(bio)) < 0)
+ set_bit(BlockedBadBlocks, &rdev->flags);
+
+ if (rdev_blocked(rdev)) {
+ if (bio->bi_opf & REQ_NOWAIT)
+ return false;
+
+ mddev_add_trace_msg(rdev->mddev, "raid1 wait rdev %d blocked",
+ rdev->raid_disk);
+ atomic_inc(&rdev->nr_pending);
+ md_wait_for_blocked_rdev(rdev, rdev->mddev);
+ goto retry;
+ }
+ }
+
+ return true;
}
static void raid1_write_request(struct mddev *mddev, struct bio *bio,
@@ -1417,9 +1463,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
{
struct r1conf *conf = mddev->private;
struct r1bio *r1_bio;
- int i, disks;
+ int i, disks, k, error;
unsigned long flags;
- struct md_rdev *blocked_rdev;
int first_clone;
int max_sectors;
bool write_behind = false;
@@ -1457,7 +1502,11 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
return;
}
- retry_write:
+ if (!wait_blocked_rdev(mddev, bio)) {
+ bio_wouldblock_error(bio);
+ return;
+ }
+
r1_bio = alloc_r1bio(mddev, bio);
r1_bio->sectors = max_write_sectors;
@@ -1473,7 +1522,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
*/
disks = conf->raid_disks * 2;
- blocked_rdev = NULL;
max_sectors = r1_bio->sectors;
for (i = 0; i < disks; i++) {
struct md_rdev *rdev = conf->mirrors[i].rdev;
@@ -1486,11 +1534,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
if (!is_discard && rdev && test_bit(WriteMostly, &rdev->flags))
write_behind = true;
- if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
- atomic_inc(&rdev->nr_pending);
- blocked_rdev = rdev;
- break;
- }
r1_bio->bios[i] = NULL;
if (!rdev || test_bit(Faulty, &rdev->flags)) {
if (i < conf->raid_disks)
@@ -1506,13 +1549,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
is_bad = is_badblock(rdev, r1_bio->sector, max_sectors,
&first_bad, &bad_sectors);
- if (is_bad < 0) {
- /* mustn't write here until the bad block is
- * acknowledged*/
- set_bit(BlockedBadBlocks, &rdev->flags);
- blocked_rdev = rdev;
- break;
- }
if (is_bad && first_bad <= r1_bio->sector) {
/* Cannot write here at all */
bad_sectors -= (r1_bio->sector - first_bad);
@@ -1543,27 +1579,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
r1_bio->bios[i] = bio;
}
- if (unlikely(blocked_rdev)) {
- /* Wait for this device to become unblocked */
- int j;
-
- for (j = 0; j < i; j++)
- if (r1_bio->bios[j])
- rdev_dec_pending(conf->mirrors[j].rdev, mddev);
- mempool_free(r1_bio, &conf->r1bio_pool);
- allow_barrier(conf, bio->bi_iter.bi_sector);
-
- if (bio->bi_opf & REQ_NOWAIT) {
- bio_wouldblock_error(bio);
- return;
- }
- mddev_add_trace_msg(mddev, "raid1 wait rdev %d blocked",
- blocked_rdev->raid_disk);
- md_wait_for_blocked_rdev(blocked_rdev, mddev);
- wait_barrier(conf, bio->bi_iter.bi_sector, false);
- goto retry_write;
- }
-
/*
* When using a bitmap, we may call alloc_behind_master_bio below.
* alloc_behind_master_bio allocates a copy of the data payload a page
@@ -1576,6 +1591,11 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
if (max_sectors < bio_sectors(bio)) {
struct bio *split = bio_split(bio, max_sectors,
GFP_NOIO, &conf->bio_split);
+
+ if (IS_ERR(split)) {
+ error = PTR_ERR(split);
+ goto err_handle;
+ }
bio_chain(split, bio);
submit_bio_noacct(bio);
bio = split;
@@ -1660,6 +1680,18 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
/* In case raid1d snuck in to freeze_array */
wake_up_barrier(conf);
+ return;
+err_handle:
+ for (k = 0; k < i; k++) {
+ if (r1_bio->bios[k]) {
+ rdev_dec_pending(conf->mirrors[k].rdev, mddev);
+ r1_bio->bios[k] = NULL;
+ }
+ }
+
+ bio->bi_status = errno_to_blk_status(error);
+ set_bit(R1BIO_Uptodate, &r1_bio->state);
+ raid_end_bio_io(r1_bio);
}
static bool raid1_make_request(struct mddev *mddev, struct bio *bio)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index f3bf1116794a..18989231791a 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1159,6 +1159,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
int slot = r10_bio->read_slot;
struct md_rdev *err_rdev = NULL;
gfp_t gfp = GFP_NOIO;
+ int error;
if (slot >= 0 && r10_bio->devs[slot].rdev) {
/*
@@ -1206,6 +1207,10 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
if (max_sectors < bio_sectors(bio)) {
struct bio *split = bio_split(bio, max_sectors,
gfp, &conf->bio_split);
+ if (IS_ERR(split)) {
+ error = PTR_ERR(split);
+ goto err_handle;
+ }
bio_chain(split, bio);
allow_barrier(conf);
submit_bio_noacct(bio);
@@ -1236,6 +1241,11 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
mddev_trace_remap(mddev, read_bio, r10_bio->sector);
submit_bio_noacct(read_bio);
return;
+err_handle:
+ atomic_dec(&rdev->nr_pending);
+ bio->bi_status = errno_to_blk_status(error);
+ set_bit(R10BIO_Uptodate, &r10_bio->state);
+ raid_end_bio_io(r10_bio);
}
static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
@@ -1285,9 +1295,9 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
{
- int i;
struct r10conf *conf = mddev->private;
struct md_rdev *blocked_rdev;
+ int i;
retry_wait:
blocked_rdev = NULL;
@@ -1295,40 +1305,36 @@ retry_wait:
struct md_rdev *rdev, *rrdev;
rdev = conf->mirrors[i].rdev;
- rrdev = conf->mirrors[i].replacement;
- if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
- atomic_inc(&rdev->nr_pending);
- blocked_rdev = rdev;
- break;
- }
- if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
- atomic_inc(&rrdev->nr_pending);
- blocked_rdev = rrdev;
- break;
- }
-
- if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
+ if (rdev) {
sector_t dev_sector = r10_bio->devs[i].addr;
/*
* Discard request doesn't care the write result
* so it doesn't need to wait blocked disk here.
*/
- if (!r10_bio->sectors)
- continue;
-
- if (rdev_has_badblock(rdev, dev_sector,
- r10_bio->sectors) < 0) {
+ if (test_bit(WriteErrorSeen, &rdev->flags) &&
+ r10_bio->sectors &&
+ rdev_has_badblock(rdev, dev_sector,
+ r10_bio->sectors) < 0)
/*
- * Mustn't write here until the bad block
- * is acknowledged
+ * Mustn't write here until the bad
+ * block is acknowledged
*/
- atomic_inc(&rdev->nr_pending);
set_bit(BlockedBadBlocks, &rdev->flags);
+
+ if (rdev_blocked(rdev)) {
blocked_rdev = rdev;
+ atomic_inc(&rdev->nr_pending);
break;
}
}
+
+ rrdev = conf->mirrors[i].replacement;
+ if (rrdev && rdev_blocked(rrdev)) {
+ atomic_inc(&rrdev->nr_pending);
+ blocked_rdev = rrdev;
+ break;
+ }
}
if (unlikely(blocked_rdev)) {
@@ -1347,9 +1353,10 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
struct r10bio *r10_bio)
{
struct r10conf *conf = mddev->private;
- int i;
+ int i, k;
sector_t sectors;
int max_sectors;
+ int error;
if ((mddev_is_clustered(mddev) &&
md_cluster_ops->area_resyncing(mddev, WRITE,
@@ -1482,6 +1489,10 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
if (r10_bio->sectors < bio_sectors(bio)) {
struct bio *split = bio_split(bio, r10_bio->sectors,
GFP_NOIO, &conf->bio_split);
+ if (IS_ERR(split)) {
+ error = PTR_ERR(split);
+ goto err_handle;
+ }
bio_chain(split, bio);
allow_barrier(conf);
submit_bio_noacct(bio);
@@ -1503,6 +1514,26 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
raid10_write_one_disk(mddev, r10_bio, bio, true, i);
}
one_write_done(r10_bio);
+ return;
+err_handle:
+ for (k = 0; k < i; k++) {
+ int d = r10_bio->devs[k].devnum;
+ struct md_rdev *rdev = conf->mirrors[d].rdev;
+ struct md_rdev *rrdev = conf->mirrors[d].replacement;
+
+ if (r10_bio->devs[k].bio) {
+ rdev_dec_pending(rdev, mddev);
+ r10_bio->devs[k].bio = NULL;
+ }
+ if (r10_bio->devs[k].repl_bio) {
+ rdev_dec_pending(rrdev, mddev);
+ r10_bio->devs[k].repl_bio = NULL;
+ }
+ }
+
+ bio->bi_status = errno_to_blk_status(error);
+ set_bit(R10BIO_Uptodate, &r10_bio->state);
+ raid_end_bio_io(r10_bio);
}
static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
@@ -1644,6 +1675,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
if (remainder) {
split_size = stripe_size - remainder;
split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split);
+ if (IS_ERR(split)) {
+ bio->bi_status = errno_to_blk_status(PTR_ERR(split));
+ bio_endio(bio);
+ return 0;
+ }
bio_chain(split, bio);
allow_barrier(conf);
/* Resend the fist split part */
@@ -1654,6 +1690,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
if (remainder) {
split_size = bio_sectors(bio) - remainder;
split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split);
+ if (IS_ERR(split)) {
+ bio->bi_status = errno_to_blk_status(PTR_ERR(split));
+ bio_endio(bio);
+ return 0;
+ }
bio_chain(split, bio);
allow_barrier(conf);
/* Resend the second split part */
@@ -4061,9 +4102,12 @@ static int raid10_run(struct mddev *mddev)
}
if (!mddev_is_dm(conf->mddev)) {
- ret = raid10_set_queue_limits(mddev);
- if (ret)
+ int err = raid10_set_queue_limits(mddev);
+
+ if (err) {
+ ret = err;
goto out_free_conf;
+ }
}
/* need to check that every block has at least one working mirror */
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index a70cbec12ed0..37c4da5311ca 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -258,7 +258,7 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
pplhdr->signature = cpu_to_le32(ppl_conf->signature);
- io->seq = atomic64_add_return(1, &ppl_conf->seq);
+ io->seq = atomic64_inc_return(&ppl_conf->seq);
pplhdr->generation = cpu_to_le64(io->seq);
return io;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index dc2ea636d173..f09e7677ee9f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4724,14 +4724,13 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
if (rdev) {
is_bad = rdev_has_badblock(rdev, sh->sector,
RAID5_STRIPE_SECTORS(conf));
- if (s->blocked_rdev == NULL
- && (test_bit(Blocked, &rdev->flags)
- || is_bad < 0)) {
+ if (s->blocked_rdev == NULL) {
if (is_bad < 0)
- set_bit(BlockedBadBlocks,
- &rdev->flags);
- s->blocked_rdev = rdev;
- atomic_inc(&rdev->nr_pending);
+ set_bit(BlockedBadBlocks, &rdev->flags);
+ if (rdev_blocked(rdev)) {
+ s->blocked_rdev = rdev;
+ atomic_inc(&rdev->nr_pending);
+ }
}
}
clear_bit(R5_Insync, &dev->flags);
@@ -7177,6 +7176,8 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
err = mddev_suspend_and_lock(mddev);
if (err)
return err;
+ raid5_quiesce(mddev, true);
+
conf = mddev->private;
if (!conf)
err = -ENODEV;
@@ -7198,6 +7199,8 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
kfree(old_groups);
}
}
+
+ raid5_quiesce(mddev, false);
mddev_unlock_and_resume(mddev);
return err ?: len;
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 896ecfc4afa6..d174e586698f 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -633,7 +633,7 @@ struct r5conf {
* two caches.
*/
int active_name;
- char cache_name[2][32];
+ char cache_name[2][48];
struct kmem_cache *slab_cache; /* for allocating stripes */
struct mutex cache_size_mutex; /* Protect changes to cache size */