diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-bufio.c | 12 | ||||
-rw-r--r-- | drivers/md/dm-cache-background-tracker.c | 25 | ||||
-rw-r--r-- | drivers/md/dm-cache-background-tracker.h | 8 | ||||
-rw-r--r-- | drivers/md/dm-cache-target.c | 88 | ||||
-rw-r--r-- | drivers/md/dm-clone-target.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-unstripe.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-verity-target.c | 9 | ||||
-rw-r--r-- | drivers/md/dm-verity.h | 1 | ||||
-rw-r--r-- | drivers/md/dm-zone.c | 4 | ||||
-rw-r--r-- | drivers/md/dm.c | 4 | ||||
-rw-r--r-- | drivers/md/md-bitmap.c | 1 | ||||
-rw-r--r-- | drivers/md/md.c | 39 | ||||
-rw-r--r-- | drivers/md/md.h | 24 | ||||
-rw-r--r-- | drivers/md/raid0.c | 12 | ||||
-rw-r--r-- | drivers/md/raid1.c | 108 | ||||
-rw-r--r-- | drivers/md/raid10.c | 94 | ||||
-rw-r--r-- | drivers/md/raid5-ppl.c | 2 | ||||
-rw-r--r-- | drivers/md/raid5.c | 17 | ||||
-rw-r--r-- | drivers/md/raid5.h | 2 |
20 files changed, 314 insertions, 146 deletions
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index d478aafa02c9..23e0b71b991e 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -2471,7 +2471,8 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign int r; unsigned int num_locks; struct dm_bufio_client *c; - char slab_name[27]; + char slab_name[64]; + static atomic_t seqno = ATOMIC_INIT(0); if (!block_size || block_size & ((1 << SECTOR_SHIFT) - 1)) { DMERR("%s: block size not specified or is not multiple of 512b", __func__); @@ -2522,7 +2523,8 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign (block_size < PAGE_SIZE || !is_power_of_2(block_size))) { unsigned int align = min(1U << __ffs(block_size), (unsigned int)PAGE_SIZE); - snprintf(slab_name, sizeof(slab_name), "dm_bufio_cache-%u", block_size); + snprintf(slab_name, sizeof(slab_name), "dm_bufio_cache-%u-%u", + block_size, atomic_inc_return(&seqno)); c->slab_cache = kmem_cache_create(slab_name, block_size, align, SLAB_RECLAIM_ACCOUNT, NULL); if (!c->slab_cache) { @@ -2531,9 +2533,11 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign } } if (aux_size) - snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer-%u", aux_size); + snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer-%u-%u", + aux_size, atomic_inc_return(&seqno)); else - snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer"); + snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer-%u", + atomic_inc_return(&seqno)); c->slab_buffer = kmem_cache_create(slab_name, sizeof(struct dm_buffer) + aux_size, 0, SLAB_RECLAIM_ACCOUNT, NULL); if (!c->slab_buffer) { diff --git a/drivers/md/dm-cache-background-tracker.c b/drivers/md/dm-cache-background-tracker.c index 9c5308298cf1..f3051bd7d2df 100644 --- a/drivers/md/dm-cache-background-tracker.c +++ b/drivers/md/dm-cache-background-tracker.c @@ -11,12 +11,6 @@ #define DM_MSG_PREFIX "dm-background-tracker" -struct bt_work { - struct list_head list; - struct rb_node node; - struct policy_work work; -}; - struct background_tracker { unsigned int max_work; atomic_t pending_promotes; @@ -26,10 +20,10 @@ struct background_tracker { struct list_head issued; struct list_head queued; struct rb_root pending; - - struct kmem_cache *work_cache; }; +struct kmem_cache *btracker_work_cache = NULL; + struct background_tracker *btracker_create(unsigned int max_work) { struct background_tracker *b = kmalloc(sizeof(*b), GFP_KERNEL); @@ -48,12 +42,6 @@ struct background_tracker *btracker_create(unsigned int max_work) INIT_LIST_HEAD(&b->queued); b->pending = RB_ROOT; - b->work_cache = KMEM_CACHE(bt_work, 0); - if (!b->work_cache) { - DMERR("couldn't create mempool for background work items"); - kfree(b); - b = NULL; - } return b; } @@ -66,10 +54,9 @@ void btracker_destroy(struct background_tracker *b) BUG_ON(!list_empty(&b->issued)); list_for_each_entry_safe (w, tmp, &b->queued, list) { list_del(&w->list); - kmem_cache_free(b->work_cache, w); + kmem_cache_free(btracker_work_cache, w); } - kmem_cache_destroy(b->work_cache); kfree(b); } EXPORT_SYMBOL_GPL(btracker_destroy); @@ -180,7 +167,7 @@ static struct bt_work *alloc_work(struct background_tracker *b) if (max_work_reached(b)) return NULL; - return kmem_cache_alloc(b->work_cache, GFP_NOWAIT); + return kmem_cache_alloc(btracker_work_cache, GFP_NOWAIT); } int btracker_queue(struct background_tracker *b, @@ -203,7 +190,7 @@ int btracker_queue(struct background_tracker *b, * There was a race, we'll just ignore this second * bit of work for the same oblock. */ - kmem_cache_free(b->work_cache, w); + kmem_cache_free(btracker_work_cache, w); return -EINVAL; } @@ -244,7 +231,7 @@ void btracker_complete(struct background_tracker *b, update_stats(b, &w->work, -1); rb_erase(&w->node, &b->pending); list_del(&w->list); - kmem_cache_free(b->work_cache, w); + kmem_cache_free(btracker_work_cache, w); } EXPORT_SYMBOL_GPL(btracker_complete); diff --git a/drivers/md/dm-cache-background-tracker.h b/drivers/md/dm-cache-background-tracker.h index 5b8f5c667b81..09c8fc59f7bb 100644 --- a/drivers/md/dm-cache-background-tracker.h +++ b/drivers/md/dm-cache-background-tracker.h @@ -26,6 +26,14 @@ * protected with a spinlock. */ +struct bt_work { + struct list_head list; + struct rb_node node; + struct policy_work work; +}; + +extern struct kmem_cache *btracker_work_cache; + struct background_work; struct background_tracker; diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index aaeeabfab09b..9cb797a561d6 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -10,6 +10,7 @@ #include "dm-bio-record.h" #include "dm-cache-metadata.h" #include "dm-io-tracker.h" +#include "dm-cache-background-tracker.h" #include <linux/dm-io.h> #include <linux/dm-kcopyd.h> @@ -1905,16 +1906,13 @@ static void check_migrations(struct work_struct *ws) * This function gets called on the error paths of the constructor, so we * have to cope with a partially initialised struct. */ -static void destroy(struct cache *cache) +static void __destroy(struct cache *cache) { - unsigned int i; - mempool_exit(&cache->migration_pool); if (cache->prison) dm_bio_prison_destroy_v2(cache->prison); - cancel_delayed_work_sync(&cache->waker); if (cache->wq) destroy_workqueue(cache->wq); @@ -1942,13 +1940,22 @@ static void destroy(struct cache *cache) if (cache->policy) dm_cache_policy_destroy(cache->policy); + bioset_exit(&cache->bs); + + kfree(cache); +} + +static void destroy(struct cache *cache) +{ + unsigned int i; + + cancel_delayed_work_sync(&cache->waker); + for (i = 0; i < cache->nr_ctr_args ; i++) kfree(cache->ctr_args[i]); kfree(cache->ctr_args); - bioset_exit(&cache->bs); - - kfree(cache); + __destroy(cache); } static void cache_dtr(struct dm_target *ti) @@ -2003,7 +2010,6 @@ struct cache_args { sector_t cache_sectors; struct dm_dev *origin_dev; - sector_t origin_sectors; uint32_t block_size; @@ -2084,6 +2090,7 @@ static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as, static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as, char **error) { + sector_t origin_sectors; int r; if (!at_least_one_arg(as, error)) @@ -2096,8 +2103,8 @@ static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as, return r; } - ca->origin_sectors = get_dev_size(ca->origin_dev); - if (ca->ti->len > ca->origin_sectors) { + origin_sectors = get_dev_size(ca->origin_dev); + if (ca->ti->len > origin_sectors) { *error = "Device size larger than cached device"; return -EINVAL; } @@ -2257,7 +2264,7 @@ static int parse_cache_args(struct cache_args *ca, int argc, char **argv, /*----------------------------------------------------------------*/ -static struct kmem_cache *migration_cache; +static struct kmem_cache *migration_cache = NULL; #define NOT_CORE_OPTION 1 @@ -2407,7 +2414,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL; - origin_blocks = cache->origin_sectors = ca->origin_sectors; + origin_blocks = cache->origin_sectors = ti->len; origin_blocks = block_div(origin_blocks, ca->block_size); cache->origin_blocks = to_oblock(origin_blocks); @@ -2561,7 +2568,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) *result = cache; return 0; bad: - destroy(cache); + __destroy(cache); return r; } @@ -2612,7 +2619,7 @@ static int cache_ctr(struct dm_target *ti, unsigned int argc, char **argv) r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3); if (r) { - destroy(cache); + __destroy(cache); goto out; } @@ -2895,19 +2902,19 @@ static dm_cblock_t get_cache_dev_size(struct cache *cache) static bool can_resize(struct cache *cache, dm_cblock_t new_size) { if (from_cblock(new_size) > from_cblock(cache->cache_size)) { - if (cache->sized) { - DMERR("%s: unable to extend cache due to missing cache table reload", - cache_device_name(cache)); - return false; - } + DMERR("%s: unable to extend cache due to missing cache table reload", + cache_device_name(cache)); + return false; } /* * We can't drop a dirty block when shrinking the cache. */ - while (from_cblock(new_size) < from_cblock(cache->cache_size)) { - new_size = to_cblock(from_cblock(new_size) + 1); - if (is_dirty(cache, new_size)) { + if (cache->loaded_mappings) { + new_size = to_cblock(find_next_bit(cache->dirty_bitset, + from_cblock(cache->cache_size), + from_cblock(new_size))); + if (new_size != cache->cache_size) { DMERR("%s: unable to shrink cache; cache block %llu is dirty", cache_device_name(cache), (unsigned long long) from_cblock(new_size)); @@ -2943,20 +2950,15 @@ static int cache_preresume(struct dm_target *ti) /* * Check to see if the cache has resized. */ - if (!cache->sized) { - r = resize_cache_dev(cache, csize); - if (r) - return r; - - cache->sized = true; - - } else if (csize != cache->cache_size) { + if (!cache->sized || csize != cache->cache_size) { if (!can_resize(cache, csize)) return -EINVAL; r = resize_cache_dev(cache, csize); if (r) return r; + + cache->sized = true; } if (!cache->loaded_mappings) { @@ -3360,7 +3362,7 @@ static int cache_iterate_devices(struct dm_target *ti, static void disable_passdown_if_not_supported(struct cache *cache) { struct block_device *origin_bdev = cache->origin_dev->bdev; - struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits; + struct queue_limits *origin_limits = bdev_limits(origin_bdev); const char *reason = NULL; if (!cache->features.discard_passdown) @@ -3382,7 +3384,7 @@ static void disable_passdown_if_not_supported(struct cache *cache) static void set_discard_limits(struct cache *cache, struct queue_limits *limits) { struct block_device *origin_bdev = cache->origin_dev->bdev; - struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits; + struct queue_limits *origin_limits = bdev_limits(origin_bdev); if (!cache->features.discard_passdown) { /* No passdown is done so setting own virtual limits */ @@ -3444,22 +3446,36 @@ static int __init dm_cache_init(void) int r; migration_cache = KMEM_CACHE(dm_cache_migration, 0); - if (!migration_cache) - return -ENOMEM; + if (!migration_cache) { + r = -ENOMEM; + goto err; + } + + btracker_work_cache = kmem_cache_create("dm_cache_bt_work", + sizeof(struct bt_work), __alignof__(struct bt_work), 0, NULL); + if (!btracker_work_cache) { + r = -ENOMEM; + goto err; + } r = dm_register_target(&cache_target); if (r) { - kmem_cache_destroy(migration_cache); - return r; + goto err; } return 0; + +err: + kmem_cache_destroy(migration_cache); + kmem_cache_destroy(btracker_work_cache); + return r; } static void __exit dm_cache_exit(void) { dm_unregister_target(&cache_target); kmem_cache_destroy(migration_cache); + kmem_cache_destroy(btracker_work_cache); } module_init(dm_cache_init); diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c index 12bbe487a4c8..e956d980672c 100644 --- a/drivers/md/dm-clone-target.c +++ b/drivers/md/dm-clone-target.c @@ -2020,7 +2020,7 @@ static void clone_resume(struct dm_target *ti) static void disable_passdown_if_not_supported(struct clone *clone) { struct block_device *dest_dev = clone->dest_dev->bdev; - struct queue_limits *dest_limits = &bdev_get_queue(dest_dev)->limits; + struct queue_limits *dest_limits = bdev_limits(dest_dev); const char *reason = NULL; if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags)) @@ -2041,7 +2041,7 @@ static void disable_passdown_if_not_supported(struct clone *clone) static void set_discard_limits(struct clone *clone, struct queue_limits *limits) { struct block_device *dest_bdev = clone->dest_dev->bdev; - struct queue_limits *dest_limits = &bdev_get_queue(dest_bdev)->limits; + struct queue_limits *dest_limits = bdev_limits(dest_bdev); if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags)) { /* No passdown is done so we set our own virtual limits */ diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 89632ce97760..9095f19a84f3 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2842,7 +2842,7 @@ static void disable_discard_passdown_if_not_supported(struct pool_c *pt) { struct pool *pool = pt->pool; struct block_device *data_bdev = pt->data_dev->bdev; - struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits; + struct queue_limits *data_limits = bdev_limits(data_bdev); const char *reason = NULL; if (!pt->adjusted_pf.discard_passdown) diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c index 48587c16c445..e8a9432057dc 100644 --- a/drivers/md/dm-unstripe.c +++ b/drivers/md/dm-unstripe.c @@ -85,8 +85,8 @@ static int unstripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) } uc->physical_start = start; - uc->unstripe_offset = uc->unstripe * uc->chunk_size; - uc->unstripe_width = (uc->stripes - 1) * uc->chunk_size; + uc->unstripe_offset = (sector_t)uc->unstripe * uc->chunk_size; + uc->unstripe_width = (sector_t)(uc->stripes - 1) * uc->chunk_size; uc->chunk_shift = is_power_of_2(uc->chunk_size) ? fls(uc->chunk_size) - 1 : 0; tmp_len = ti->len; diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 7d4d90b4395a..c142ec5458b7 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -356,9 +356,9 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA, hash_block)) { - struct bio *bio = - dm_bio_from_per_bio_data(io, - v->ti->per_io_data_size); + struct bio *bio; + io->had_mismatch = true; + bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); dm_audit_log_bio(DM_MSG_PREFIX, "verify-metadata", bio, block, 0); r = -EIO; @@ -482,6 +482,7 @@ static int verity_handle_data_hash_mismatch(struct dm_verity *v, return -EIO; /* Error correction failed; Just return error */ if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, blkno)) { + io->had_mismatch = true; dm_audit_log_bio(DM_MSG_PREFIX, "verify-data", bio, blkno, 0); return -EIO; } @@ -606,6 +607,7 @@ static void verity_finish_io(struct dm_verity_io *io, blk_status_t status) if (unlikely(status != BLK_STS_OK) && unlikely(!(bio->bi_opf & REQ_RAHEAD)) && + !io->had_mismatch && !verity_is_system_shutting_down()) { if (v->error_mode == DM_VERITY_MODE_PANIC) { panic("dm-verity device has I/O error"); @@ -779,6 +781,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio) io->orig_bi_end_io = bio->bi_end_io; io->block = bio->bi_iter.bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT); io->n_blocks = bio->bi_iter.bi_size >> v->data_dev_block_bits; + io->had_mismatch = false; bio->bi_end_io = verity_end_io; bio->bi_private = io; diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 6b75159bf835..c996140bda94 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -92,6 +92,7 @@ struct dm_verity_io { sector_t block; unsigned int n_blocks; bool in_bh; + bool had_mismatch; struct work_struct work; struct work_struct bh_work; diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index c0d41c36e06e..20edd3fabbab 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -344,7 +344,7 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); } else { set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); - lim->max_zone_append_sectors = 0; + lim->max_hw_zone_append_sectors = 0; } /* @@ -379,7 +379,7 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, if (!zlim.mapped_nr_seq_zones) { lim->max_open_zones = 0; lim->max_active_zones = 0; - lim->max_zone_append_sectors = 0; + lim->max_hw_zone_append_sectors = 0; lim->zone_write_granularity = 0; lim->chunk_sectors = 0; lim->features &= ~BLK_FEAT_ZONED; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ff4a6b570b76..19230404d8c2 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2290,8 +2290,10 @@ static struct mapped_device *alloc_dev(int minor) * override accordingly. */ md->disk = blk_alloc_disk(NULL, md->numa_node_id); - if (IS_ERR(md->disk)) + if (IS_ERR(md->disk)) { + md->disk = NULL; goto bad; + } md->queue = md->disk->queue; init_waitqueue_head(&md->wait); diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 29da10e6f703..c3a42dd66ce5 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -1285,6 +1285,7 @@ static void bitmap_unplug_async(struct bitmap *bitmap) queue_work(md_bitmap_wq, &unplug_work.work); wait_for_completion(&done); + destroy_work_on_stack(&unplug_work.work); } static void bitmap_unplug(struct mddev *mddev, bool sync) diff --git a/drivers/md/md.c b/drivers/md/md.c index 179ee4afe937..aebe12b0ee27 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -546,6 +546,26 @@ static int mddev_set_closing_and_sync_blockdev(struct mddev *mddev, int opener_n return 0; } +/* + * The only difference from bio_chain_endio() is that the current + * bi_status of bio does not affect the bi_status of parent. + */ +static void md_end_flush(struct bio *bio) +{ + struct bio *parent = bio->bi_private; + + /* + * If any flush io error before the power failure, + * disk data may be lost. + */ + if (bio->bi_status) + pr_err("md: %pg flush io error %d\n", bio->bi_bdev, + blk_status_to_errno(bio->bi_status)); + + bio_put(bio); + bio_endio(parent); +} + bool md_flush_request(struct mddev *mddev, struct bio *bio) { struct md_rdev *rdev; @@ -565,7 +585,9 @@ bool md_flush_request(struct mddev *mddev, struct bio *bio) new = bio_alloc_bioset(rdev->bdev, 0, REQ_OP_WRITE | REQ_PREFLUSH, GFP_NOIO, &mddev->bio_set); - bio_chain(new, bio); + new->bi_private = bio; + new->bi_end_io = md_end_flush; + bio_inc_remaining(bio); submit_bio(new); } @@ -9762,9 +9784,7 @@ EXPORT_SYMBOL(md_reap_sync_thread); void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev) { sysfs_notify_dirent_safe(rdev->sysfs_state); - wait_event_timeout(rdev->blocked_wait, - !test_bit(Blocked, &rdev->flags) && - !test_bit(BlockedBadBlocks, &rdev->flags), + wait_event_timeout(rdev->blocked_wait, !rdev_blocked(rdev), msecs_to_jiffies(5000)); rdev_dec_pending(rdev, mddev); } @@ -9793,6 +9813,17 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, { struct mddev *mddev = rdev->mddev; int rv; + + /* + * Recording new badblocks for faulty rdev will force unnecessary + * super block updating. This is fragile for external management because + * userspace daemon may trying to remove this device and deadlock may + * occur. This will be probably solved in the mdadm, but it is safer to + * avoid it. + */ + if (test_bit(Faulty, &rdev->flags)) + return 1; + if (is_new) s += rdev->new_data_offset; else diff --git a/drivers/md/md.h b/drivers/md/md.h index 5d2e6bd58e4d..4ba93af36126 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -1002,6 +1002,30 @@ static inline void mddev_trace_remap(struct mddev *mddev, struct bio *bio, trace_block_bio_remap(bio, disk_devt(mddev->gendisk), sector); } +static inline bool rdev_blocked(struct md_rdev *rdev) +{ + /* + * Blocked will be set by error handler and cleared by daemon after + * updating superblock, meanwhile write IO should be blocked to prevent + * reading old data after power failure. + */ + if (test_bit(Blocked, &rdev->flags)) + return true; + + /* + * Faulty device should not be accessed anymore, there is no need to + * wait for bad block to be acknowledged. + */ + if (test_bit(Faulty, &rdev->flags)) + return false; + + /* rdev is blocked by badblocks. */ + if (test_bit(BlockedBadBlocks, &rdev->flags)) + return true; + + return false; +} + #define mddev_add_trace_msg(mddev, fmt, args...) \ do { \ if (!mddev_is_dm(mddev)) \ diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 32d587524778..baaf5f8b80ae 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -466,6 +466,12 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) struct bio *split = bio_split(bio, zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO, &mddev->bio_set); + + if (IS_ERR(split)) { + bio->bi_status = errno_to_blk_status(PTR_ERR(split)); + bio_endio(bio); + return; + } bio_chain(split, bio); submit_bio_noacct(bio); bio = split; @@ -608,6 +614,12 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio) if (sectors < bio_sectors(bio)) { struct bio *split = bio_split(bio, sectors, GFP_NOIO, &mddev->bio_set); + + if (IS_ERR(split)) { + bio->bi_status = errno_to_blk_status(PTR_ERR(split)); + bio_endio(bio); + return true; + } bio_chain(split, bio); raid0_map_submit_bio(mddev, bio); bio = split; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 6c9d24203f39..a5adf08ee174 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1322,7 +1322,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, const enum req_op op = bio_op(bio); const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC; int max_sectors; - int rdisk; + int rdisk, error; bool r1bio_existed = !!r1_bio; /* @@ -1383,6 +1383,11 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, if (max_sectors < bio_sectors(bio)) { struct bio *split = bio_split(bio, max_sectors, gfp, &conf->bio_split); + + if (IS_ERR(split)) { + error = PTR_ERR(split); + goto err_handle; + } bio_chain(split, bio); submit_bio_noacct(bio); bio = split; @@ -1410,6 +1415,47 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, read_bio->bi_private = r1_bio; mddev_trace_remap(mddev, read_bio, r1_bio->sector); submit_bio_noacct(read_bio); + return; + +err_handle: + atomic_dec(&mirror->rdev->nr_pending); + bio->bi_status = errno_to_blk_status(error); + set_bit(R1BIO_Uptodate, &r1_bio->state); + raid_end_bio_io(r1_bio); +} + +static bool wait_blocked_rdev(struct mddev *mddev, struct bio *bio) +{ + struct r1conf *conf = mddev->private; + int disks = conf->raid_disks * 2; + int i; + +retry: + for (i = 0; i < disks; i++) { + struct md_rdev *rdev = conf->mirrors[i].rdev; + + if (!rdev) + continue; + + /* don't write here until the bad block is acknowledged */ + if (test_bit(WriteErrorSeen, &rdev->flags) && + rdev_has_badblock(rdev, bio->bi_iter.bi_sector, + bio_sectors(bio)) < 0) + set_bit(BlockedBadBlocks, &rdev->flags); + + if (rdev_blocked(rdev)) { + if (bio->bi_opf & REQ_NOWAIT) + return false; + + mddev_add_trace_msg(rdev->mddev, "raid1 wait rdev %d blocked", + rdev->raid_disk); + atomic_inc(&rdev->nr_pending); + md_wait_for_blocked_rdev(rdev, rdev->mddev); + goto retry; + } + } + + return true; } static void raid1_write_request(struct mddev *mddev, struct bio *bio, @@ -1417,9 +1463,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, { struct r1conf *conf = mddev->private; struct r1bio *r1_bio; - int i, disks; + int i, disks, k, error; unsigned long flags; - struct md_rdev *blocked_rdev; int first_clone; int max_sectors; bool write_behind = false; @@ -1457,7 +1502,11 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, return; } - retry_write: + if (!wait_blocked_rdev(mddev, bio)) { + bio_wouldblock_error(bio); + return; + } + r1_bio = alloc_r1bio(mddev, bio); r1_bio->sectors = max_write_sectors; @@ -1473,7 +1522,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, */ disks = conf->raid_disks * 2; - blocked_rdev = NULL; max_sectors = r1_bio->sectors; for (i = 0; i < disks; i++) { struct md_rdev *rdev = conf->mirrors[i].rdev; @@ -1486,11 +1534,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, if (!is_discard && rdev && test_bit(WriteMostly, &rdev->flags)) write_behind = true; - if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { - atomic_inc(&rdev->nr_pending); - blocked_rdev = rdev; - break; - } r1_bio->bios[i] = NULL; if (!rdev || test_bit(Faulty, &rdev->flags)) { if (i < conf->raid_disks) @@ -1506,13 +1549,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, is_bad = is_badblock(rdev, r1_bio->sector, max_sectors, &first_bad, &bad_sectors); - if (is_bad < 0) { - /* mustn't write here until the bad block is - * acknowledged*/ - set_bit(BlockedBadBlocks, &rdev->flags); - blocked_rdev = rdev; - break; - } if (is_bad && first_bad <= r1_bio->sector) { /* Cannot write here at all */ bad_sectors -= (r1_bio->sector - first_bad); @@ -1543,27 +1579,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, r1_bio->bios[i] = bio; } - if (unlikely(blocked_rdev)) { - /* Wait for this device to become unblocked */ - int j; - - for (j = 0; j < i; j++) - if (r1_bio->bios[j]) - rdev_dec_pending(conf->mirrors[j].rdev, mddev); - mempool_free(r1_bio, &conf->r1bio_pool); - allow_barrier(conf, bio->bi_iter.bi_sector); - - if (bio->bi_opf & REQ_NOWAIT) { - bio_wouldblock_error(bio); - return; - } - mddev_add_trace_msg(mddev, "raid1 wait rdev %d blocked", - blocked_rdev->raid_disk); - md_wait_for_blocked_rdev(blocked_rdev, mddev); - wait_barrier(conf, bio->bi_iter.bi_sector, false); - goto retry_write; - } - /* * When using a bitmap, we may call alloc_behind_master_bio below. * alloc_behind_master_bio allocates a copy of the data payload a page @@ -1576,6 +1591,11 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, if (max_sectors < bio_sectors(bio)) { struct bio *split = bio_split(bio, max_sectors, GFP_NOIO, &conf->bio_split); + + if (IS_ERR(split)) { + error = PTR_ERR(split); + goto err_handle; + } bio_chain(split, bio); submit_bio_noacct(bio); bio = split; @@ -1660,6 +1680,18 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, /* In case raid1d snuck in to freeze_array */ wake_up_barrier(conf); + return; +err_handle: + for (k = 0; k < i; k++) { + if (r1_bio->bios[k]) { + rdev_dec_pending(conf->mirrors[k].rdev, mddev); + r1_bio->bios[k] = NULL; + } + } + + bio->bi_status = errno_to_blk_status(error); + set_bit(R1BIO_Uptodate, &r1_bio->state); + raid_end_bio_io(r1_bio); } static bool raid1_make_request(struct mddev *mddev, struct bio *bio) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index f3bf1116794a..18989231791a 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1159,6 +1159,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, int slot = r10_bio->read_slot; struct md_rdev *err_rdev = NULL; gfp_t gfp = GFP_NOIO; + int error; if (slot >= 0 && r10_bio->devs[slot].rdev) { /* @@ -1206,6 +1207,10 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, if (max_sectors < bio_sectors(bio)) { struct bio *split = bio_split(bio, max_sectors, gfp, &conf->bio_split); + if (IS_ERR(split)) { + error = PTR_ERR(split); + goto err_handle; + } bio_chain(split, bio); allow_barrier(conf); submit_bio_noacct(bio); @@ -1236,6 +1241,11 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, mddev_trace_remap(mddev, read_bio, r10_bio->sector); submit_bio_noacct(read_bio); return; +err_handle: + atomic_dec(&rdev->nr_pending); + bio->bi_status = errno_to_blk_status(error); + set_bit(R10BIO_Uptodate, &r10_bio->state); + raid_end_bio_io(r10_bio); } static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, @@ -1285,9 +1295,9 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio) { - int i; struct r10conf *conf = mddev->private; struct md_rdev *blocked_rdev; + int i; retry_wait: blocked_rdev = NULL; @@ -1295,40 +1305,36 @@ retry_wait: struct md_rdev *rdev, *rrdev; rdev = conf->mirrors[i].rdev; - rrdev = conf->mirrors[i].replacement; - if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { - atomic_inc(&rdev->nr_pending); - blocked_rdev = rdev; - break; - } - if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) { - atomic_inc(&rrdev->nr_pending); - blocked_rdev = rrdev; - break; - } - - if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) { + if (rdev) { sector_t dev_sector = r10_bio->devs[i].addr; /* * Discard request doesn't care the write result * so it doesn't need to wait blocked disk here. */ - if (!r10_bio->sectors) - continue; - - if (rdev_has_badblock(rdev, dev_sector, - r10_bio->sectors) < 0) { + if (test_bit(WriteErrorSeen, &rdev->flags) && + r10_bio->sectors && + rdev_has_badblock(rdev, dev_sector, + r10_bio->sectors) < 0) /* - * Mustn't write here until the bad block - * is acknowledged + * Mustn't write here until the bad + * block is acknowledged */ - atomic_inc(&rdev->nr_pending); set_bit(BlockedBadBlocks, &rdev->flags); + + if (rdev_blocked(rdev)) { blocked_rdev = rdev; + atomic_inc(&rdev->nr_pending); break; } } + + rrdev = conf->mirrors[i].replacement; + if (rrdev && rdev_blocked(rrdev)) { + atomic_inc(&rrdev->nr_pending); + blocked_rdev = rrdev; + break; + } } if (unlikely(blocked_rdev)) { @@ -1347,9 +1353,10 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, struct r10bio *r10_bio) { struct r10conf *conf = mddev->private; - int i; + int i, k; sector_t sectors; int max_sectors; + int error; if ((mddev_is_clustered(mddev) && md_cluster_ops->area_resyncing(mddev, WRITE, @@ -1482,6 +1489,10 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, if (r10_bio->sectors < bio_sectors(bio)) { struct bio *split = bio_split(bio, r10_bio->sectors, GFP_NOIO, &conf->bio_split); + if (IS_ERR(split)) { + error = PTR_ERR(split); + goto err_handle; + } bio_chain(split, bio); allow_barrier(conf); submit_bio_noacct(bio); @@ -1503,6 +1514,26 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, raid10_write_one_disk(mddev, r10_bio, bio, true, i); } one_write_done(r10_bio); + return; +err_handle: + for (k = 0; k < i; k++) { + int d = r10_bio->devs[k].devnum; + struct md_rdev *rdev = conf->mirrors[d].rdev; + struct md_rdev *rrdev = conf->mirrors[d].replacement; + + if (r10_bio->devs[k].bio) { + rdev_dec_pending(rdev, mddev); + r10_bio->devs[k].bio = NULL; + } + if (r10_bio->devs[k].repl_bio) { + rdev_dec_pending(rrdev, mddev); + r10_bio->devs[k].repl_bio = NULL; + } + } + + bio->bi_status = errno_to_blk_status(error); + set_bit(R10BIO_Uptodate, &r10_bio->state); + raid_end_bio_io(r10_bio); } static void __make_request(struct mddev *mddev, struct bio *bio, int sectors) @@ -1644,6 +1675,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio) if (remainder) { split_size = stripe_size - remainder; split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split); + if (IS_ERR(split)) { + bio->bi_status = errno_to_blk_status(PTR_ERR(split)); + bio_endio(bio); + return 0; + } bio_chain(split, bio); allow_barrier(conf); /* Resend the fist split part */ @@ -1654,6 +1690,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio) if (remainder) { split_size = bio_sectors(bio) - remainder; split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split); + if (IS_ERR(split)) { + bio->bi_status = errno_to_blk_status(PTR_ERR(split)); + bio_endio(bio); + return 0; + } bio_chain(split, bio); allow_barrier(conf); /* Resend the second split part */ @@ -4061,9 +4102,12 @@ static int raid10_run(struct mddev *mddev) } if (!mddev_is_dm(conf->mddev)) { - ret = raid10_set_queue_limits(mddev); - if (ret) + int err = raid10_set_queue_limits(mddev); + + if (err) { + ret = err; goto out_free_conf; + } } /* need to check that every block has at least one working mirror */ diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index a70cbec12ed0..37c4da5311ca 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -258,7 +258,7 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log, memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED); pplhdr->signature = cpu_to_le32(ppl_conf->signature); - io->seq = atomic64_add_return(1, &ppl_conf->seq); + io->seq = atomic64_inc_return(&ppl_conf->seq); pplhdr->generation = cpu_to_le64(io->seq); return io; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index dc2ea636d173..f09e7677ee9f 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4724,14 +4724,13 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) if (rdev) { is_bad = rdev_has_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf)); - if (s->blocked_rdev == NULL - && (test_bit(Blocked, &rdev->flags) - || is_bad < 0)) { + if (s->blocked_rdev == NULL) { if (is_bad < 0) - set_bit(BlockedBadBlocks, - &rdev->flags); - s->blocked_rdev = rdev; - atomic_inc(&rdev->nr_pending); + set_bit(BlockedBadBlocks, &rdev->flags); + if (rdev_blocked(rdev)) { + s->blocked_rdev = rdev; + atomic_inc(&rdev->nr_pending); + } } } clear_bit(R5_Insync, &dev->flags); @@ -7177,6 +7176,8 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len) err = mddev_suspend_and_lock(mddev); if (err) return err; + raid5_quiesce(mddev, true); + conf = mddev->private; if (!conf) err = -ENODEV; @@ -7198,6 +7199,8 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len) kfree(old_groups); } } + + raid5_quiesce(mddev, false); mddev_unlock_and_resume(mddev); return err ?: len; diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 896ecfc4afa6..d174e586698f 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -633,7 +633,7 @@ struct r5conf { * two caches. */ int active_name; - char cache_name[2][32]; + char cache_name[2][48]; struct kmem_cache *slab_cache; /* for allocating stripes */ struct mutex cache_size_mutex; /* Protect changes to cache size */ |