summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig4
-rw-r--r--drivers/md/bcache/Kconfig1
-rw-r--r--drivers/md/bcache/btree.c2
-rw-r--r--drivers/md/bcache/super.c26
-rw-r--r--drivers/md/bcache/util.h2
-rw-r--r--drivers/md/dm-ebs-target.c2
-rw-r--r--drivers/md/dm-integrity.c4
-rw-r--r--drivers/md/dm-ioctl.c4
-rw-r--r--drivers/md/dm-rq.c1
-rw-r--r--drivers/md/dm-table.c2
-rw-r--r--drivers/md/dm-writecache.c5
-rw-r--r--drivers/md/dm.c32
-rw-r--r--drivers/md/md.h4
-rw-r--r--drivers/md/raid1.c21
-rw-r--r--drivers/md/raid10.c18
-rw-r--r--drivers/md/raid5.c4
16 files changed, 74 insertions, 58 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 0602e82a9516..f45fb372e51b 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -15,6 +15,7 @@ if MD
config BLK_DEV_MD
tristate "RAID support"
+ select BLOCK_HOLDER_DEPRECATED if SYSFS
help
This driver lets you combine several hard disk partitions into one
logical block device. This can be used to simply append one
@@ -201,6 +202,7 @@ config BLK_DEV_DM_BUILTIN
config BLK_DEV_DM
tristate "Device mapper support"
+ select BLOCK_HOLDER_DEPRECATED if SYSFS
select BLK_DEV_DM_BUILTIN
depends on DAX || DAX=n
help
@@ -340,7 +342,7 @@ config DM_WRITECACHE
config DM_EBS
tristate "Emulated block size target (EXPERIMENTAL)"
- depends on BLK_DEV_DM
+ depends on BLK_DEV_DM && !HIGHMEM
select DM_BUFIO
help
dm-ebs emulates smaller logical block size on backing devices
diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
index d1ca4d059c20..cf3e8096942a 100644
--- a/drivers/md/bcache/Kconfig
+++ b/drivers/md/bcache/Kconfig
@@ -2,6 +2,7 @@
config BCACHE
tristate "Block device as cache"
+ select BLOCK_HOLDER_DEPRECATED if SYSFS
select CRC64
help
Allows a block device to be used as cache for other devices; uses
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 183a58c89377..0595559de174 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -378,7 +378,7 @@ static void do_btree_node_write(struct btree *b)
struct bvec_iter_all iter_all;
bio_for_each_segment_all(bv, b->bio, iter_all) {
- memcpy(page_address(bv->bv_page), addr, PAGE_SIZE);
+ memcpy(bvec_virt(bv), addr, PAGE_SIZE);
addr += PAGE_SIZE;
}
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 185246a0d855..f2874c77ff79 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -885,11 +885,6 @@ static void bcache_device_free(struct bcache_device *d)
bcache_device_detach(d);
if (disk) {
- bool disk_added = (disk->flags & GENHD_FL_UP) != 0;
-
- if (disk_added)
- del_gendisk(disk);
-
blk_cleanup_disk(disk);
ida_simple_remove(&bcache_device_idx,
first_minor_to_idx(disk->first_minor));
@@ -931,20 +926,20 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
n = BITS_TO_LONGS(d->nr_stripes) * sizeof(unsigned long);
d->full_dirty_stripes = kvzalloc(n, GFP_KERNEL);
if (!d->full_dirty_stripes)
- return -ENOMEM;
+ goto out_free_stripe_sectors_dirty;
idx = ida_simple_get(&bcache_device_idx, 0,
BCACHE_DEVICE_IDX_MAX, GFP_KERNEL);
if (idx < 0)
- return idx;
+ goto out_free_full_dirty_stripes;
if (bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio),
BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
- goto err;
+ goto out_ida_remove;
d->disk = blk_alloc_disk(NUMA_NO_NODE);
if (!d->disk)
- goto err;
+ goto out_bioset_exit;
set_capacity(d->disk, sectors);
snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx);
@@ -987,8 +982,14 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
return 0;
-err:
+out_bioset_exit:
+ bioset_exit(&d->bio_split);
+out_ida_remove:
ida_simple_remove(&bcache_device_idx, idx);
+out_free_full_dirty_stripes:
+ kvfree(d->full_dirty_stripes);
+out_free_stripe_sectors_dirty:
+ kvfree(d->stripe_sectors_dirty);
return -ENOMEM;
}
@@ -1365,8 +1366,10 @@ static void cached_dev_free(struct closure *cl)
mutex_lock(&bch_register_lock);
- if (atomic_read(&dc->running))
+ if (atomic_read(&dc->running)) {
bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
+ del_gendisk(dc->disk.disk);
+ }
bcache_device_free(&dc->disk);
list_del(&dc->list);
@@ -1512,6 +1515,7 @@ static void flash_dev_free(struct closure *cl)
mutex_lock(&bch_register_lock);
atomic_long_sub(bcache_dev_sectors_dirty(d),
&d->c->flash_dev_dirty_sectors);
+ del_gendisk(d->disk);
bcache_device_free(d);
mutex_unlock(&bch_register_lock);
kobject_put(&d->kobj);
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index bca4a7c97da7..b64460a76267 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -15,8 +15,6 @@
#include "closure.h"
-#define PAGE_SECTORS (PAGE_SIZE / 512)
-
struct closure;
#ifdef CONFIG_BCACHE_DEBUG
diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c
index 71475a2410be..0c509dae0ff8 100644
--- a/drivers/md/dm-ebs-target.c
+++ b/drivers/md/dm-ebs-target.c
@@ -74,7 +74,7 @@ static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bv
if (unlikely(!bv->bv_page || !bv_len))
return -EIO;
- pa = page_address(bv->bv_page) + bv->bv_offset;
+ pa = bvec_virt(bv);
/* Handle overlapping page <-> blocks */
while (bv_len) {
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 20f2510db1f6..a9ea361769a7 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -1819,7 +1819,7 @@ again:
unsigned this_len;
BUG_ON(PageHighMem(biv.bv_page));
- tag = lowmem_page_address(biv.bv_page) + biv.bv_offset;
+ tag = bvec_virt(&biv);
this_len = min(biv.bv_len, data_to_process);
r = dm_integrity_rw_tag(ic, tag, &dio->metadata_block, &dio->metadata_offset,
this_len, dio->op == REQ_OP_READ ? TAG_READ : TAG_WRITE);
@@ -2006,7 +2006,7 @@ retry_kmap:
unsigned tag_now = min(biv.bv_len, tag_todo);
char *tag_addr;
BUG_ON(PageHighMem(biv.bv_page));
- tag_addr = lowmem_page_address(biv.bv_page) + biv.bv_offset;
+ tag_addr = bvec_virt(&biv);
if (likely(dio->op == REQ_OP_WRITE))
memcpy(tag_ptr, tag_addr, tag_now);
else
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 2209cbcd84db..2575074a2204 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1436,9 +1436,6 @@ static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_si
}
if (dm_get_md_type(md) == DM_TYPE_NONE) {
- /* Initial table load: acquire type of table. */
- dm_set_md_type(md, dm_table_get_type(t));
-
/* setup md->queue to reflect md's type (may block) */
r = dm_setup_md_queue(md, t);
if (r) {
@@ -2187,7 +2184,6 @@ int __init dm_early_create(struct dm_ioctl *dmi,
if (r)
goto err_destroy_table;
- md->type = dm_table_get_type(t);
/* setup md->queue to reflect md's type (may block) */
r = dm_setup_md_queue(md, t);
if (r) {
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 0dbd48cbdff9..5b95eea517d1 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -559,7 +559,6 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
err = blk_mq_init_allocated_queue(md->tag_set, md->queue);
if (err)
goto out_tag_set;
- elevator_init_mq(md->queue);
return 0;
out_tag_set:
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 0543cdf89e92..b03eabc1ed7c 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -2076,7 +2076,7 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
}
dm_update_keyslot_manager(q, t);
- blk_queue_update_readahead(q);
+ disk_update_readahead(t->md->disk);
return 0;
}
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index e21e29e81bbf..3d2cf811ec3e 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -1214,14 +1214,13 @@ static void memcpy_flushcache_optimized(void *dest, void *source, size_t size)
static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data)
{
void *buf;
- unsigned long flags;
unsigned size;
int rw = bio_data_dir(bio);
unsigned remaining_size = wc->block_size;
do {
struct bio_vec bv = bio_iter_iovec(bio, bio->bi_iter);
- buf = bvec_kmap_irq(&bv, &flags);
+ buf = bvec_kmap_local(&bv);
size = bv.bv_len;
if (unlikely(size > remaining_size))
size = remaining_size;
@@ -1239,7 +1238,7 @@ static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data
memcpy_flushcache_optimized(data, buf, size);
}
- bvec_kunmap_irq(buf, &flags);
+ kunmap_local(buf);
data = (char *)data + size;
remaining_size -= size;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 2c5f9e585211..7981b7287628 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1693,14 +1693,13 @@ static void cleanup_mapped_device(struct mapped_device *md)
spin_lock(&_minor_lock);
md->disk->private_data = NULL;
spin_unlock(&_minor_lock);
- del_gendisk(md->disk);
- }
-
- if (md->queue)
+ if (dm_get_md_type(md) != DM_TYPE_NONE) {
+ dm_sysfs_exit(md);
+ del_gendisk(md->disk);
+ }
dm_queue_destroy_keyslot_manager(md->queue);
-
- if (md->disk)
blk_cleanup_disk(md->disk);
+ }
cleanup_srcu_struct(&md->io_barrier);
@@ -1792,7 +1791,6 @@ static struct mapped_device *alloc_dev(int minor)
goto bad;
}
- add_disk_no_queue_reg(md->disk);
format_dev_t(md->name, MKDEV(_major, minor));
md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0);
@@ -1993,19 +1991,12 @@ static struct dm_table *__unbind(struct mapped_device *md)
*/
int dm_create(int minor, struct mapped_device **result)
{
- int r;
struct mapped_device *md;
md = alloc_dev(minor);
if (!md)
return -ENXIO;
- r = dm_sysfs_init(md);
- if (r) {
- free_dev(md);
- return r;
- }
-
*result = md;
return 0;
}
@@ -2056,9 +2047,9 @@ EXPORT_SYMBOL_GPL(dm_get_queue_limits);
*/
int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
{
- int r;
+ enum dm_queue_mode type = dm_table_get_type(t);
struct queue_limits limits;
- enum dm_queue_mode type = dm_get_md_type(md);
+ int r;
switch (type) {
case DM_TYPE_REQUEST_BASED:
@@ -2086,8 +2077,14 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
if (r)
return r;
- blk_register_queue(md->disk);
+ add_disk(md->disk);
+ r = dm_sysfs_init(md);
+ if (r) {
+ del_gendisk(md->disk);
+ return r;
+ }
+ md->type = type;
return 0;
}
@@ -2193,7 +2190,6 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
dm_device_name(md), atomic_read(&md->holders));
- dm_sysfs_exit(md);
dm_table_destroy(__unbind(md));
free_dev(md);
}
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 832547cf038f..4c96c36bd01a 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -764,9 +764,7 @@ struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type)
{
- int flags = rdev->bdev->bd_disk->flags;
-
- if (!(flags & GENHD_FL_UP)) {
+ if (!disk_live(rdev->bdev->bd_disk)) {
if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
pr_warn("md: %s: %s array has a missing/failed member\n",
mdname(rdev->mddev), md_type);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 51f2547c2007..19598bd38939 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -474,8 +474,6 @@ static void raid1_end_write_request(struct bio *bio)
/*
* When the device is faulty, it is not necessary to
* handle write error.
- * For failfast, this is the only remaining device,
- * We need to retry the write without FailFast.
*/
if (!test_bit(Faulty, &rdev->flags))
set_bit(R1BIO_WriteError, &r1_bio->state);
@@ -1331,6 +1329,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
struct raid1_plug_cb *plug = NULL;
int first_clone;
int max_sectors;
+ bool write_behind = false;
if (mddev_is_clustered(mddev) &&
md_cluster_ops->area_resyncing(mddev, WRITE,
@@ -1383,6 +1382,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
max_sectors = r1_bio->sectors;
for (i = 0; i < disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
+
+ /*
+ * The write-behind io is only attempted on drives marked as
+ * write-mostly, which means we could allocate write behind
+ * bio later.
+ */
+ if (rdev && test_bit(WriteMostly, &rdev->flags))
+ write_behind = true;
+
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
atomic_inc(&rdev->nr_pending);
blocked_rdev = rdev;
@@ -1456,6 +1464,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
goto retry_write;
}
+ /*
+ * When using a bitmap, we may call alloc_behind_master_bio below.
+ * alloc_behind_master_bio allocates a copy of the data payload a page
+ * at a time and thus needs a new bio that can fit the whole payload
+ * this bio in page sized chunks.
+ */
+ if (write_behind && bitmap)
+ max_sectors = min_t(int, max_sectors,
+ BIO_MAX_VECS * (PAGE_SIZE >> 9));
if (max_sectors < bio_sectors(bio)) {
struct bio *split = bio_split(bio, max_sectors,
GFP_NOIO, &conf->bio_split);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 16977e8e075d..aa2636582841 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -471,12 +471,12 @@ static void raid10_end_write_request(struct bio *bio)
/*
* When the device is faulty, it is not necessary to
* handle write error.
- * For failfast, this is the only remaining device,
- * We need to retry the write without FailFast.
*/
if (!test_bit(Faulty, &rdev->flags))
set_bit(R10BIO_WriteError, &r10_bio->state);
else {
+ /* Fail the request */
+ set_bit(R10BIO_Degraded, &r10_bio->state);
r10_bio->devs[slot].bio = NULL;
to_put = bio;
dec_rdev = 1;
@@ -1712,6 +1712,11 @@ retry_discard:
} else
r10_bio->master_bio = (struct bio *)first_r10bio;
+ /*
+ * first select target devices under rcu_lock and
+ * inc refcount on their rdev. Record them by setting
+ * bios[x] to bio
+ */
rcu_read_lock();
for (disk = 0; disk < geo->raid_disks; disk++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
@@ -1743,9 +1748,6 @@ retry_discard:
for (disk = 0; disk < geo->raid_disks; disk++) {
sector_t dev_start, dev_end;
struct bio *mbio, *rbio = NULL;
- struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
- struct md_rdev *rrdev = rcu_dereference(
- conf->mirrors[disk].replacement);
/*
* Now start to calculate the start and end address for each disk.
@@ -1775,9 +1777,12 @@ retry_discard:
/*
* It only handles discard bio which size is >= stripe size, so
- * dev_end > dev_start all the time
+ * dev_end > dev_start all the time.
+ * It doesn't need to use rcu lock to get rdev here. We already
+ * add rdev->nr_pending in the first loop.
*/
if (r10_bio->devs[disk].bio) {
+ struct md_rdev *rdev = conf->mirrors[disk].rdev;
mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
mbio->bi_end_io = raid10_end_discard_request;
mbio->bi_private = r10_bio;
@@ -1790,6 +1795,7 @@ retry_discard:
bio_endio(mbio);
}
if (r10_bio->devs[disk].repl_bio) {
+ struct md_rdev *rrdev = conf->mirrors[disk].replacement;
rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
rbio->bi_end_io = raid10_end_discard_request;
rbio->bi_private = r10_bio;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index b8436e4930ed..02ed53b20654 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2437,7 +2437,7 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
conf->scribble_sectors >= new_sectors)
return 0;
mddev_suspend(conf->mddev);
- get_online_cpus();
+ cpus_read_lock();
for_each_present_cpu(cpu) {
struct raid5_percpu *percpu;
@@ -2449,7 +2449,7 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
break;
}
- put_online_cpus();
+ cpus_read_unlock();
mddev_resume(conf->mddev);
if (!err) {
conf->scribble_disks = new_disks;