From 568c73a355e0b845dc983aa59c8a8dc69294b275 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 18 Jan 2019 14:10:37 -0500 Subject: dm: update dm_process_bio() to split bio if in ->make_request_fn() Must call blk_queue_split() otherwise queue_limits for abnormal requests (e.g. discard, writesame, etc) won't be imposed. In addition, add dm_queue_split() to simplify DM specific splitting that is needed for targets that impose ti->max_io_len. Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 93 ++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 66 insertions(+), 27 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 515e6af9bed2..7a774fcd0194 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1533,6 +1533,22 @@ static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti) return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti), false); } +static bool is_abnormal_io(struct bio *bio) +{ + bool r = false; + + switch (bio_op(bio)) { + case REQ_OP_DISCARD: + case REQ_OP_SECURE_ERASE: + case REQ_OP_WRITE_SAME: + case REQ_OP_WRITE_ZEROES: + r = true; + break; + } + + return r; +} + static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti, int *result) { @@ -1565,7 +1581,7 @@ static int __split_and_process_non_flush(struct clone_info *ci) if (!dm_target_is_valid(ti)) return -EIO; - if (unlikely(__process_abnormal_io(ci, ti, &r))) + if (__process_abnormal_io(ci, ti, &r)) return r; len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count); @@ -1601,13 +1617,6 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, blk_qc_t ret = BLK_QC_T_NONE; int error = 0; - if (unlikely(!map)) { - bio_io_error(bio); - return ret; - } - - blk_queue_split(md->queue, &bio); - init_clone_info(&ci, md, map, bio); if (bio->bi_opf & REQ_PREFLUSH) { @@ -1675,18 +1684,13 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, * Optimized variant of __split_and_process_bio that leverages the * fact that targets that use it do _not_ have a need to split bios. */ -static blk_qc_t __process_bio(struct mapped_device *md, - struct dm_table *map, struct bio *bio) +static blk_qc_t __process_bio(struct mapped_device *md, struct dm_table *map, + struct bio *bio, struct dm_target *ti) { struct clone_info ci; blk_qc_t ret = BLK_QC_T_NONE; int error = 0; - if (unlikely(!map)) { - bio_io_error(bio); - return ret; - } - init_clone_info(&ci, md, map, bio); if (bio->bi_opf & REQ_PREFLUSH) { @@ -1704,21 +1708,11 @@ static blk_qc_t __process_bio(struct mapped_device *md, error = __send_empty_flush(&ci); /* dec_pending submits any data associated with flush */ } else { - struct dm_target *ti = md->immutable_target; struct dm_target_io *tio; - /* - * Defend against IO still getting in during teardown - * - as was seen for a time with nvme-fcloop - */ - if (WARN_ON_ONCE(!ti || !dm_target_is_valid(ti))) { - error = -EIO; - goto out; - } - ci.bio = bio; ci.sector_count = bio_sectors(bio); - if (unlikely(__process_abnormal_io(&ci, ti, &error))) + if (__process_abnormal_io(&ci, ti, &error)) goto out; tio = alloc_tio(&ci, ti, 0, GFP_NOIO); @@ -1730,11 +1724,56 @@ out: return ret; } +static void dm_queue_split(struct mapped_device *md, struct dm_target *ti, struct bio **bio) +{ + unsigned len, sector_count; + + sector_count = bio_sectors(*bio); + len = min_t(sector_t, max_io_len((*bio)->bi_iter.bi_sector, ti), sector_count); + + if (sector_count > len) { + struct bio *split = bio_split(*bio, len, GFP_NOIO, &md->queue->bio_split); + + bio_chain(split, *bio); + trace_block_split(md->queue, split, (*bio)->bi_iter.bi_sector); + generic_make_request(*bio); + *bio = split; + } +} + static blk_qc_t dm_process_bio(struct mapped_device *md, struct dm_table *map, struct bio *bio) { + blk_qc_t ret = BLK_QC_T_NONE; + struct dm_target *ti = md->immutable_target; + + if (unlikely(!map)) { + bio_io_error(bio); + return ret; + } + + if (!ti) { + ti = dm_table_find_target(map, bio->bi_iter.bi_sector); + if (unlikely(!ti || !dm_target_is_valid(ti))) { + bio_io_error(bio); + return ret; + } + } + + /* + * If in ->make_request_fn we need to use blk_queue_split(), otherwise + * queue_limits for abnormal requests (e.g. discard, writesame, etc) + * won't be imposed. + */ + if (current->bio_list) { + if (is_abnormal_io(bio)) + blk_queue_split(md->queue, &bio); + else + dm_queue_split(md, ti, &bio); + } + if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED) - return __process_bio(md, map, bio); + return __process_bio(md, map, bio, ti); else return __split_and_process_bio(md, map, bio); } -- cgit From 61697a6abd24acba941359c6268a94f4afe4a53d Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 18 Jan 2019 14:19:26 -0500 Subject: dm: eliminate 'split_discard_bios' flag from DM target interface There is no need to have DM core split discards on behalf of a DM target now that blk_queue_split() handles splitting discards based on the queue_limits. A DM target just needs to set max_discard_sectors, discard_granularity, etc, in queue_limits. Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 1 - drivers/md/dm-raid.c | 14 +++++++++----- drivers/md/dm-thin.c | 1 - drivers/md/dm-zoned-target.c | 1 - drivers/md/dm.c | 25 ++++++------------------- 5 files changed, 15 insertions(+), 27 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index b29a8327eed1..adc529f12b6b 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2496,7 +2496,6 @@ static int cache_create(struct cache_args *ca, struct cache **result) ti->num_discard_bios = 1; ti->discards_supported = true; - ti->split_discard_bios = false; ti->per_io_data_size = sizeof(struct per_bio_data); diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index adcfe8ae10aa..9fdef6897316 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2986,11 +2986,6 @@ static void configure_discard_support(struct raid_set *rs) } } - /* - * RAID1 and RAID10 personalities require bio splitting, - * RAID0/4/5/6 don't and process large discard bios properly. - */ - ti->split_discard_bios = !!(rs_is_raid1(rs) || rs_is_raid10(rs)); ti->num_discard_bios = 1; } @@ -3747,6 +3742,15 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_min(limits, chunk_size); blk_limits_io_opt(limits, chunk_size * mddev_data_stripes(rs)); + + /* + * RAID1 and RAID10 personalities require bio splitting, + * RAID0/4/5/6 don't and process large discard bios properly. + */ + if (rs_is_raid1(rs) || rs_is_raid10(rs)) { + limits->discard_granularity = chunk_size; + limits->max_discard_sectors = chunk_size; + } } static void raid_postsuspend(struct dm_target *ti) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index e83b63608262..0d9ded0f5e50 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -4227,7 +4227,6 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) if (tc->pool->pf.discard_enabled) { ti->discards_supported = true; ti->num_discard_bios = 1; - ti->split_discard_bios = false; } mutex_unlock(&dm_thin_pool_table.mutex); diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 6af5babe6837..8865c1709e16 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -727,7 +727,6 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->per_io_data_size = sizeof(struct dmz_bioctx); ti->flush_supported = true; ti->discards_supported = true; - ti->split_discard_bios = true; /* The exposed capacity is the number of chunks that can be mapped */ ti->len = (sector_t)dmz_nr_chunks(dmz->metadata) << dev->zone_nr_sectors_shift; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7a774fcd0194..55f12df3589d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1478,17 +1478,10 @@ static unsigned get_num_write_zeroes_bios(struct dm_target *ti) return ti->num_write_zeroes_bios; } -typedef bool (*is_split_required_fn)(struct dm_target *ti); - -static bool is_split_required_for_discard(struct dm_target *ti) -{ - return ti->split_discard_bios; -} - static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, - unsigned num_bios, bool is_split_required) + unsigned num_bios) { - unsigned len; + unsigned len = ci->sector_count; /* * Even though the device advertised support for this type of @@ -1499,11 +1492,6 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target * if (!num_bios) return -EOPNOTSUPP; - if (!is_split_required) - len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); - else - len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti)); - __send_duplicate_bios(ci, ti, num_bios, &len); ci->sector += len; @@ -1514,23 +1502,22 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target * static int __send_discard(struct clone_info *ci, struct dm_target *ti) { - return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti), - is_split_required_for_discard(ti)); + return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti)); } static int __send_secure_erase(struct clone_info *ci, struct dm_target *ti) { - return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti), false); + return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti)); } static int __send_write_same(struct clone_info *ci, struct dm_target *ti) { - return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti), false); + return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti)); } static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti) { - return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti), false); + return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti)); } static bool is_abnormal_io(struct bio *bio) -- cgit From e689fbab3ddd92557134ef92c40a780a33299d05 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 20 Feb 2019 15:37:44 -0500 Subject: dm: remove unused _rq_tio_cache and _rq_cache Also move dm_rq_target_io structure definition from dm-rq.h to dm-rq.c Fixes: 6a23e05c2fe3c6 ("dm: remove legacy request-based IO path") Signed-off-by: Mike Snitzer --- drivers/md/dm-rq.c | 16 ++++++++++++++++ drivers/md/dm-rq.h | 16 ---------------- drivers/md/dm.c | 22 ++-------------------- 3 files changed, 18 insertions(+), 36 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index a20531e5f3b4..9428cd951e3b 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -12,6 +12,22 @@ #define DM_MSG_PREFIX "core-rq" +/* + * One of these is allocated per request. + */ +struct dm_rq_target_io { + struct mapped_device *md; + struct dm_target *ti; + struct request *orig, *clone; + struct kthread_work work; + blk_status_t error; + union map_info info; + struct dm_stats_aux stats_aux; + unsigned long duration_jiffies; + unsigned n_sectors; + unsigned completed; +}; + #define DM_MQ_NR_HW_QUEUES 1 #define DM_MQ_QUEUE_DEPTH 2048 static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES; diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h index b39245545229..1eea0da641db 100644 --- a/drivers/md/dm-rq.h +++ b/drivers/md/dm-rq.h @@ -16,22 +16,6 @@ struct mapped_device; -/* - * One of these is allocated per request. - */ -struct dm_rq_target_io { - struct mapped_device *md; - struct dm_target *ti; - struct request *orig, *clone; - struct kthread_work work; - blk_status_t error; - union map_info info; - struct dm_stats_aux stats_aux; - unsigned long duration_jiffies; - unsigned n_sectors; - unsigned completed; -}; - /* * For request-based dm - the bio clones we allocate are embedded in these * structs. diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 55f12df3589d..d8a844c522e6 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -158,9 +158,6 @@ struct table_device { struct dm_dev dm_dev; }; -static struct kmem_cache *_rq_tio_cache; -static struct kmem_cache *_rq_cache; - /* * Bio-based DM's mempools' reserved IOs set by the user. */ @@ -222,20 +219,11 @@ static unsigned dm_get_numa_node(void) static int __init local_init(void) { - int r = -ENOMEM; - - _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0); - if (!_rq_tio_cache) - return r; - - _rq_cache = kmem_cache_create("dm_old_clone_request", sizeof(struct request), - __alignof__(struct request), 0, NULL); - if (!_rq_cache) - goto out_free_rq_tio_cache; + int r; r = dm_uevent_init(); if (r) - goto out_free_rq_cache; + return r; deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1); if (!deferred_remove_workqueue) { @@ -257,10 +245,6 @@ out_free_workqueue: destroy_workqueue(deferred_remove_workqueue); out_uevent_exit: dm_uevent_exit(); -out_free_rq_cache: - kmem_cache_destroy(_rq_cache); -out_free_rq_tio_cache: - kmem_cache_destroy(_rq_tio_cache); return r; } @@ -270,8 +254,6 @@ static void local_exit(void) flush_scheduled_work(); destroy_workqueue(deferred_remove_workqueue); - kmem_cache_destroy(_rq_cache); - kmem_cache_destroy(_rq_tio_cache); unregister_blkdev(_major, _name); dm_uevent_exit(); -- cgit From d2832376b69e1e02cae0de660ab7c03223f09341 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 8 Jan 2019 10:02:33 -0600 Subject: dm switch: use struct_size() in kzalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; void *entry[]; }; instance = kzalloc(sizeof(struct foo) + sizeof(void *) * count, GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kzalloc(struct_size(instance, entry, count), GFP_KERNEL); This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Mike Snitzer --- drivers/md/dm-switch.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c index fae35caf3672..8a0f057b8122 100644 --- a/drivers/md/dm-switch.c +++ b/drivers/md/dm-switch.c @@ -61,8 +61,7 @@ static struct switch_ctx *alloc_switch_ctx(struct dm_target *ti, unsigned nr_pat { struct switch_ctx *sctx; - sctx = kzalloc(sizeof(struct switch_ctx) + nr_paths * sizeof(struct switch_path), - GFP_KERNEL); + sctx = kzalloc(struct_size(sctx, path_list, nr_paths), GFP_KERNEL); if (!sctx) return NULL; -- cgit From effd58c95f277744f75d6e08819ac859dbcbd351 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 22 Feb 2019 09:52:02 -0500 Subject: dm: always call blk_queue_split() in dm_process_bio() Do not just call blk_queue_split() if the bio is_abnormal_io(). Fixes: 568c73a355e ("dm: update dm_process_bio() to split bio if in ->make_request_fn()") Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d8a844c522e6..68d24056d0b1 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1735,9 +1735,8 @@ static blk_qc_t dm_process_bio(struct mapped_device *md, * won't be imposed. */ if (current->bio_list) { - if (is_abnormal_io(bio)) - blk_queue_split(md->queue, &bio); - else + blk_queue_split(md->queue, &bio); + if (!is_abnormal_io(bio)) dm_queue_split(md, ti, &bio); } -- cgit From 5e3d0e37062e2fc28187db403b11ce7764d843a5 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 13 Feb 2019 13:46:56 +0800 Subject: dm integrity: remove redundant unlikely annotation unlikely has already included in IS_ERR(), so just remove redundant unlikely annotation. Signed-off-by: Chengguang Xu Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 457200ca6287..f1ab81b19de9 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1122,7 +1122,7 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se return r; data = dm_bufio_read(ic->bufio, *metadata_block, &b); - if (unlikely(IS_ERR(data))) + if (IS_ERR(data)) return PTR_ERR(data); to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size); -- cgit From 821b40da4d91839005166b808abdc6ca4f5235aa Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 13 Feb 2019 13:46:57 +0800 Subject: dm verity fec: remove redundant unlikely annotation unlikely has already included in IS_ERR(), so just remove redundant unlikely annotation. Signed-off-by: Chengguang Xu Signed-off-by: Mike Snitzer --- drivers/md/dm-verity-fec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 0ce04e5b4afb..b634fa23f4c4 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -73,7 +73,7 @@ static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index, *offset = (unsigned)(position - (block << v->data_dev_block_bits)); res = dm_bufio_read(v->fec->bufio, v->fec->start + block, buf); - if (unlikely(IS_ERR(res))) { + if (IS_ERR(res)) { DMERR("%s: FEC %llu: parity read failed (block %llu): %ld", v->data_dev->name, (unsigned long long)rsb, (unsigned long long)(v->fec->start + block), @@ -163,7 +163,7 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio, dm_bufio_release(buf); par = fec_read_parity(v, rsb, block_offset, &offset, &buf); - if (unlikely(IS_ERR(par))) + if (IS_ERR(par)) return PTR_ERR(par); } } @@ -253,7 +253,7 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io, } bbuf = dm_bufio_read(bufio, block, &buf); - if (unlikely(IS_ERR(bbuf))) { + if (IS_ERR(bbuf)) { DMWARN_LIMIT("%s: FEC %llu: read failed (%llu): %ld", v->data_dev->name, (unsigned long long)rsb, -- cgit From 5941c621dc9eb61e3b22f934675577d559d1fa27 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 13 Feb 2019 13:46:58 +0800 Subject: dm block manager: remove redundant unlikely annotation unlikely has already included in IS_ERR(), so just remove redundant unlikely annotation. Signed-off-by: Chengguang Xu Signed-off-by: Mike Snitzer --- drivers/md/persistent-data/dm-block-manager.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 492a3f8ac119..3972232b8037 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -462,7 +462,7 @@ int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, int r; p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) return PTR_ERR(p); aux = dm_bufio_get_aux_data(to_buffer(*result)); @@ -498,7 +498,7 @@ int dm_bm_write_lock(struct dm_block_manager *bm, return -EPERM; p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) return PTR_ERR(p); aux = dm_bufio_get_aux_data(to_buffer(*result)); @@ -531,7 +531,7 @@ int dm_bm_read_try_lock(struct dm_block_manager *bm, int r; p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) return PTR_ERR(p); if (unlikely(!p)) return -EWOULDBLOCK; @@ -567,7 +567,7 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, return -EPERM; p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) return PTR_ERR(p); memset(p, 0, dm_bm_block_size(bm)); -- cgit From 70de2cbda8a5d788284469e755f8b097d339c240 Mon Sep 17 00:00:00 2001 From: "Jason Cai (Xiang Feng)" Date: Sun, 20 Jan 2019 22:39:13 +0800 Subject: dm thin: add sanity checks to thin-pool and external snapshot creation Invoking dm_get_device() twice on the same device path with different modes is dangerous. Because in that case, upgrade_mode() will alloc a new 'dm_dev' and free the old one, which may be referenced by a previous caller. Dereferencing the dangling pointer will trigger kernel NULL pointer dereference. The following two cases can reproduce this issue. Actually, they are invalid setups that must be disallowed, e.g.: 1. Creating a thin-pool with read_only mode, and the same device as both metadata and data. dmsetup create thinp --table \ "0 41943040 thin-pool /dev/vdb /dev/vdb 128 0 1 read_only" BUG: unable to handle kernel NULL pointer dereference at 0000000000000080 ... Call Trace: new_read+0xfb/0x110 [dm_bufio] dm_bm_read_lock+0x43/0x190 [dm_persistent_data] ? kmem_cache_alloc_trace+0x15c/0x1e0 __create_persistent_data_objects+0x65/0x3e0 [dm_thin_pool] dm_pool_metadata_open+0x8c/0xf0 [dm_thin_pool] pool_ctr.cold.79+0x213/0x913 [dm_thin_pool] ? realloc_argv+0x50/0x70 [dm_mod] dm_table_add_target+0x14e/0x330 [dm_mod] table_load+0x122/0x2e0 [dm_mod] ? dev_status+0x40/0x40 [dm_mod] ctl_ioctl+0x1aa/0x3e0 [dm_mod] dm_ctl_ioctl+0xa/0x10 [dm_mod] do_vfs_ioctl+0xa2/0x600 ? handle_mm_fault+0xda/0x200 ? __do_page_fault+0x26c/0x4f0 ksys_ioctl+0x60/0x90 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x55/0x150 entry_SYSCALL_64_after_hwframe+0x44/0xa9 2. Creating a external snapshot using the same thin-pool device. dmsetup create thinp --table \ "0 41943040 thin-pool /dev/vdc /dev/vdb 128 0 2 ignore_discard" dmsetup message /dev/mapper/thinp 0 "create_thin 0" dmsetup create snap --table \ "0 204800 thin /dev/mapper/thinp 0 /dev/mapper/thinp" BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 ... Call Trace: ? __alloc_pages_nodemask+0x13c/0x2e0 retrieve_status+0xa5/0x1f0 [dm_mod] ? dm_get_live_or_inactive_table.isra.7+0x20/0x20 [dm_mod] table_status+0x61/0xa0 [dm_mod] ctl_ioctl+0x1aa/0x3e0 [dm_mod] dm_ctl_ioctl+0xa/0x10 [dm_mod] do_vfs_ioctl+0xa2/0x600 ksys_ioctl+0x60/0x90 ? ksys_write+0x4f/0xb0 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x55/0x150 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Signed-off-by: Jason Cai (Xiang Feng) Signed-off-by: Mike Snitzer --- drivers/md/dm-thin.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 0d9ded0f5e50..fcd887703f95 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -3283,6 +3283,13 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) as.argc = argc; as.argv = argv; + /* make sure metadata and data are different devices */ + if (!strcmp(argv[0], argv[1])) { + ti->error = "Error setting metadata or data device"; + r = -EINVAL; + goto out_unlock; + } + /* * Set default pool features. */ @@ -4167,6 +4174,12 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) tc->sort_bio_list = RB_ROOT; if (argc == 3) { + if (!strcmp(argv[0], argv[2])) { + ti->error = "Error setting origin device"; + r = -EINVAL; + goto bad_origin_dev; + } + r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev); if (r) { ti->error = "Error opening origin device"; -- cgit From 6bbc923dfcf57d6b97388819a7393835664c7a8e Mon Sep 17 00:00:00 2001 From: Helen Koike Date: Thu, 21 Feb 2019 17:33:34 -0300 Subject: dm: add support to directly boot to a mapped device Add a "create" module parameter, which allows device-mapper targets to be configured at boot time. This enables early use of DM targets in the boot process (as the root device or otherwise) without the need of an initramfs. The syntax used in the boot param is based on the concise format from the dmsetup tool to follow the rule of least surprise: dmsetup table --concise /dev/mapper/lroot Which is: dm-mod.create=,,,,[,
+][;,,,,
[,
+]+] Where, ::= The device name. ::= xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx | "" ::= The device minor number | "" ::= "ro" | "rw"
::= ::= "verity" | "linear" | ... For example, the following could be added in the boot parameters: dm-mod.create="lroot,,,rw, 0 4096 linear 98:16 0, 4096 4096 linear 98:32 0" root=/dev/dm-0 Only the targets that were tested are allowed and the ones that don't change any block device when the device is create as read-only. For example, mirror and cache targets are not allowed. The rationale behind this is that if the user makes a mistake, choosing the wrong device to be the mirror or the cache can corrupt data. The only targets initially allowed are: * crypt * delay * linear * snapshot-origin * striped * verity Co-developed-by: Will Drewry Co-developed-by: Kees Cook Co-developed-by: Enric Balletbo i Serra Signed-off-by: Helen Koike Reviewed-by: Kees Cook Signed-off-by: Mike Snitzer --- drivers/md/Kconfig | 12 ++ drivers/md/Makefile | 4 + drivers/md/dm-init.c | 303 ++++++++++++++++++++++++++++++++++++++++++++++++++ drivers/md/dm-ioctl.c | 103 +++++++++++++++++ 4 files changed, 422 insertions(+) create mode 100644 drivers/md/dm-init.c (limited to 'drivers/md') diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 3db222509e44..2557f198e175 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -436,6 +436,18 @@ config DM_DELAY If unsure, say N. +config DM_INIT + bool "DM \"dm-mod.create=\" parameter support" + depends on BLK_DEV_DM=y + ---help--- + Enable "dm-mod.create=" parameter to create mapped devices at init time. + This option is useful to allow mounting rootfs without requiring an + initramfs. + See Documentation/device-mapper/dm-init.txt for dm-mod.create="..." + format. + + If unsure, say N. + config DM_UEVENT bool "DM uevents" depends on BLK_DEV_DM diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 822f4e8753bc..a52b703e588e 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -69,6 +69,10 @@ obj-$(CONFIG_DM_INTEGRITY) += dm-integrity.o obj-$(CONFIG_DM_ZONED) += dm-zoned.o obj-$(CONFIG_DM_WRITECACHE) += dm-writecache.o +ifeq ($(CONFIG_DM_INIT),y) +dm-mod-objs += dm-init.o +endif + ifeq ($(CONFIG_DM_UEVENT),y) dm-mod-objs += dm-uevent.o endif diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c new file mode 100644 index 000000000000..b53f30f16b4d --- /dev/null +++ b/drivers/md/dm-init.c @@ -0,0 +1,303 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * dm-init.c + * Copyright (C) 2017 The Chromium OS Authors + * + * This file is released under the GPLv2. + */ + +#include +#include +#include +#include +#include +#include + +#define DM_MSG_PREFIX "init" +#define DM_MAX_DEVICES 256 +#define DM_MAX_TARGETS 256 +#define DM_MAX_STR_SIZE 4096 + +static char *create; + +/* + * Format: dm-mod.create=,,,,
[,
+][;,,,,
[,
+]+] + * Table format: + * + * See Documentation/device-mapper/dm-init.txt for dm-mod.create="..." format + * details. + */ + +struct dm_device { + struct dm_ioctl dmi; + struct dm_target_spec *table[DM_MAX_TARGETS]; + char *target_args_array[DM_MAX_TARGETS]; + struct list_head list; +}; + +const char *dm_allowed_targets[] __initconst = { + "crypt", + "delay", + "linear", + "snapshot-origin", + "striped", + "verity", +}; + +static int __init dm_verify_target_type(const char *target) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(dm_allowed_targets); i++) { + if (!strcmp(dm_allowed_targets[i], target)) + return 0; + } + return -EINVAL; +} + +static void __init dm_setup_cleanup(struct list_head *devices) +{ + struct dm_device *dev, *tmp; + unsigned int i; + + list_for_each_entry_safe(dev, tmp, devices, list) { + list_del(&dev->list); + for (i = 0; i < dev->dmi.target_count; i++) { + kfree(dev->table[i]); + kfree(dev->target_args_array[i]); + } + kfree(dev); + } +} + +/** + * str_field_delimit - delimit a string based on a separator char. + * @str: the pointer to the string to delimit. + * @separator: char that delimits the field + * + * Find a @separator and replace it by '\0'. + * Remove leading and trailing spaces. + * Return the remainder string after the @separator. + */ +static char __init *str_field_delimit(char **str, char separator) +{ + char *s; + + /* TODO: add support for escaped characters */ + *str = skip_spaces(*str); + s = strchr(*str, separator); + /* Delimit the field and remove trailing spaces */ + if (s) + *s = '\0'; + *str = strim(*str); + return s ? ++s : NULL; +} + +/** + * dm_parse_table_entry - parse a table entry + * @dev: device to store the parsed information. + * @str: the pointer to a string with the format: + * [, ...] + * + * Return the remainder string after the table entry, i.e, after the comma which + * delimits the entry or NULL if reached the end of the string. + */ +static char __init *dm_parse_table_entry(struct dm_device *dev, char *str) +{ + const unsigned int n = dev->dmi.target_count - 1; + struct dm_target_spec *sp; + unsigned int i; + /* fields: */ + char *field[4]; + char *next; + + field[0] = str; + /* Delimit first 3 fields that are separated by space */ + for (i = 0; i < ARRAY_SIZE(field) - 1; i++) { + field[i + 1] = str_field_delimit(&field[i], ' '); + if (!field[i + 1]) + return ERR_PTR(-EINVAL); + } + /* Delimit last field that can be terminated by comma */ + next = str_field_delimit(&field[i], ','); + + sp = kzalloc(sizeof(*sp), GFP_KERNEL); + if (!sp) + return ERR_PTR(-ENOMEM); + dev->table[n] = sp; + + /* start_sector */ + if (kstrtoull(field[0], 0, &sp->sector_start)) + return ERR_PTR(-EINVAL); + /* num_sector */ + if (kstrtoull(field[1], 0, &sp->length)) + return ERR_PTR(-EINVAL); + /* target_type */ + strscpy(sp->target_type, field[2], sizeof(sp->target_type)); + if (dm_verify_target_type(sp->target_type)) { + DMERR("invalid type \"%s\"", sp->target_type); + return ERR_PTR(-EINVAL); + } + /* target_args */ + dev->target_args_array[n] = kstrndup(field[3], GFP_KERNEL, + DM_MAX_STR_SIZE); + if (!dev->target_args_array[n]) + return ERR_PTR(-ENOMEM); + + return next; +} + +/** + * dm_parse_table - parse "dm-mod.create=" table field + * @dev: device to store the parsed information. + * @str: the pointer to a string with the format: + *
[,
+] + */ +static int __init dm_parse_table(struct dm_device *dev, char *str) +{ + char *table_entry = str; + + while (table_entry) { + DMDEBUG("parsing table \"%s\"", str); + if (++dev->dmi.target_count >= DM_MAX_TARGETS) { + DMERR("too many targets %u > %d", + dev->dmi.target_count, DM_MAX_TARGETS); + return -EINVAL; + } + table_entry = dm_parse_table_entry(dev, table_entry); + if (IS_ERR(table_entry)) { + DMERR("couldn't parse table"); + return PTR_ERR(table_entry); + } + } + + return 0; +} + +/** + * dm_parse_device_entry - parse a device entry + * @dev: device to store the parsed information. + * @str: the pointer to a string with the format: + * name,uuid,minor,flags,table[; ...] + * + * Return the remainder string after the table entry, i.e, after the semi-colon + * which delimits the entry or NULL if reached the end of the string. + */ +static char __init *dm_parse_device_entry(struct dm_device *dev, char *str) +{ + /* There are 5 fields: name,uuid,minor,flags,table; */ + char *field[5]; + unsigned int i; + char *next; + + field[0] = str; + /* Delimit first 4 fields that are separated by comma */ + for (i = 0; i < ARRAY_SIZE(field) - 1; i++) { + field[i+1] = str_field_delimit(&field[i], ','); + if (!field[i+1]) + return ERR_PTR(-EINVAL); + } + /* Delimit last field that can be delimited by semi-colon */ + next = str_field_delimit(&field[i], ';'); + + /* name */ + strscpy(dev->dmi.name, field[0], sizeof(dev->dmi.name)); + /* uuid */ + strscpy(dev->dmi.uuid, field[1], sizeof(dev->dmi.uuid)); + /* minor */ + if (strlen(field[2])) { + if (kstrtoull(field[2], 0, &dev->dmi.dev)) + return ERR_PTR(-EINVAL); + dev->dmi.flags |= DM_PERSISTENT_DEV_FLAG; + } + /* flags */ + if (!strcmp(field[3], "ro")) + dev->dmi.flags |= DM_READONLY_FLAG; + else if (strcmp(field[3], "rw")) + return ERR_PTR(-EINVAL); + /* table */ + if (dm_parse_table(dev, field[4])) + return ERR_PTR(-EINVAL); + + return next; +} + +/** + * dm_parse_devices - parse "dm-mod.create=" argument + * @devices: list of struct dm_device to store the parsed information. + * @str: the pointer to a string with the format: + * [;+] + */ +static int __init dm_parse_devices(struct list_head *devices, char *str) +{ + unsigned long ndev = 0; + struct dm_device *dev; + char *device = str; + + DMDEBUG("parsing \"%s\"", str); + while (device) { + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + list_add_tail(&dev->list, devices); + + if (++ndev >= DM_MAX_DEVICES) { + DMERR("too many targets %u > %d", + dev->dmi.target_count, DM_MAX_TARGETS); + return -EINVAL; + } + + device = dm_parse_device_entry(dev, device); + if (IS_ERR(device)) { + DMERR("couldn't parse device"); + return PTR_ERR(device); + } + } + + return 0; +} + +/** + * dm_init_init - parse "dm-mod.create=" argument and configure drivers + */ +static int __init dm_init_init(void) +{ + struct dm_device *dev; + LIST_HEAD(devices); + char *str; + int r; + + if (!create) + return 0; + + if (strlen(create) >= DM_MAX_STR_SIZE) { + DMERR("Argument is too big. Limit is %d\n", DM_MAX_STR_SIZE); + return -EINVAL; + } + str = kstrndup(create, GFP_KERNEL, DM_MAX_STR_SIZE); + if (!str) + return -ENOMEM; + + r = dm_parse_devices(&devices, str); + if (r) + goto out; + + DMINFO("waiting for all devices to be available before creating mapped devices\n"); + wait_for_device_probe(); + + list_for_each_entry(dev, &devices, list) { + if (dm_early_create(&dev->dmi, dev->table, + dev->target_args_array)) + break; + } +out: + kfree(str); + dm_setup_cleanup(&devices); + return r; +} + +late_initcall(dm_init_init); + +module_param(create, charp, 0); +MODULE_PARM_DESC(create, "Create a mapped device in early boot"); diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index f666778ad237..c740153b4e52 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -2018,3 +2018,106 @@ out: return r; } + + +/** + * dm_early_create - create a mapped device in early boot. + * + * @dmi: Contains main information of the device mapping to be created. + * @spec_array: array of pointers to struct dm_target_spec. Describes the + * mapping table of the device. + * @target_params_array: array of strings with the parameters to a specific + * target. + * + * Instead of having the struct dm_target_spec and the parameters for every + * target embedded at the end of struct dm_ioctl (as performed in a normal + * ioctl), pass them as arguments, so the caller doesn't need to serialize them. + * The size of the spec_array and target_params_array is given by + * @dmi->target_count. + * This function is supposed to be called in early boot, so locking mechanisms + * to protect against concurrent loads are not required. + */ +int __init dm_early_create(struct dm_ioctl *dmi, + struct dm_target_spec **spec_array, + char **target_params_array) +{ + int r, m = DM_ANY_MINOR; + struct dm_table *t, *old_map; + struct mapped_device *md; + unsigned int i; + + if (!dmi->target_count) + return -EINVAL; + + r = check_name(dmi->name); + if (r) + return r; + + if (dmi->flags & DM_PERSISTENT_DEV_FLAG) + m = MINOR(huge_decode_dev(dmi->dev)); + + /* alloc dm device */ + r = dm_create(m, &md); + if (r) + return r; + + /* hash insert */ + r = dm_hash_insert(dmi->name, *dmi->uuid ? dmi->uuid : NULL, md); + if (r) + goto err_destroy_dm; + + /* alloc table */ + r = dm_table_create(&t, get_mode(dmi), dmi->target_count, md); + if (r) + goto err_destroy_dm; + + /* add targets */ + for (i = 0; i < dmi->target_count; i++) { + r = dm_table_add_target(t, spec_array[i]->target_type, + (sector_t) spec_array[i]->sector_start, + (sector_t) spec_array[i]->length, + target_params_array[i]); + if (r) { + DMWARN("error adding target to table"); + goto err_destroy_table; + } + } + + /* finish table */ + r = dm_table_complete(t); + if (r) + goto err_destroy_table; + + md->type = dm_table_get_type(t); + /* setup md->queue to reflect md's type (may block) */ + r = dm_setup_md_queue(md, t); + if (r) { + DMWARN("unable to set up device queue for new table."); + goto err_destroy_table; + } + + /* Set new map */ + dm_suspend(md, 0); + old_map = dm_swap_table(md, t); + if (IS_ERR(old_map)) { + r = PTR_ERR(old_map); + goto err_destroy_table; + } + set_disk_ro(dm_disk(md), !!(dmi->flags & DM_READONLY_FLAG)); + + /* resume device */ + r = dm_resume(md); + if (r) + goto err_destroy_table; + + DMINFO("%s (%s) is ready", md->disk->disk_name, dmi->name); + dm_put(md); + return 0; + +err_destroy_table: + dm_table_destroy(t); +err_destroy_dm: + dm_put(md); + dm_destroy(md); + return r; +} -- cgit From f87e033b3b923d91194348c11221e1bbc92e51b2 Mon Sep 17 00:00:00 2001 From: Huaisheng Ye Date: Thu, 21 Feb 2019 00:34:47 +0800 Subject: dm writecache: fix typo in name for writeback_wq The workqueue's name should be "writecache-writeback" instead of "writecache-writeabck". Signed-off-by: Huaisheng Ye Signed-off-by: Mike Snitzer --- drivers/md/dm-writecache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 2b8cee35e4d5..f7822875589e 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -1859,7 +1859,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - wc->writeback_wq = alloc_workqueue("writecache-writeabck", WQ_MEM_RECLAIM, 1); + wc->writeback_wq = alloc_workqueue("writecache-writeback", WQ_MEM_RECLAIM, 1); if (!wc->writeback_wq) { r = -ENOMEM; ti->error = "Could not allocate writeback workqueue"; -- cgit From de7180ff908b2bc0342e832dbdaa9a5f1ecaa33a Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 25 Feb 2019 11:07:10 -0500 Subject: dm cache: add support for discard passdown to the origin device DM cache now defaults to passing discards down to the origin device. User may disable this using the "no_discard_passdown" feature when creating the cache device. If the cache's underlying origin device doesn't support discards then passdown is disabled (with warning). Similarly, if the underlying origin device's max_discard_sectors is less than a cache block discard passdown will be disabled (this is required because sizing of the cache internal discard bitset depends on it). Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 126 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 100 insertions(+), 26 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index adc529f12b6b..d249cf8ac277 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -353,6 +353,7 @@ struct cache_features { enum cache_metadata_mode mode; enum cache_io_mode io_mode; unsigned metadata_version; + bool discard_passdown:1; }; struct cache_stats { @@ -1899,7 +1900,11 @@ static bool process_discard_bio(struct cache *cache, struct bio *bio) b = to_dblock(from_dblock(b) + 1); } - bio_endio(bio); + if (cache->features.discard_passdown) { + remap_to_origin(cache, bio); + generic_make_request(bio); + } else + bio_endio(bio); return false; } @@ -2233,13 +2238,14 @@ static void init_features(struct cache_features *cf) cf->mode = CM_WRITE; cf->io_mode = CM_IO_WRITEBACK; cf->metadata_version = 1; + cf->discard_passdown = true; } static int parse_features(struct cache_args *ca, struct dm_arg_set *as, char **error) { static const struct dm_arg _args[] = { - {0, 2, "Invalid number of cache feature arguments"}, + {0, 3, "Invalid number of cache feature arguments"}, }; int r, mode_ctr = 0; @@ -2274,6 +2280,9 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as, else if (!strcasecmp(arg, "metadata2")) cf->metadata_version = 2; + else if (!strcasecmp(arg, "no_discard_passdown")) + cf->discard_passdown = false; + else { *error = "Unrecognised cache feature requested"; return -EINVAL; @@ -3119,6 +3128,39 @@ static void cache_resume(struct dm_target *ti) do_waker(&cache->waker.work); } +static void emit_flags(struct cache *cache, char *result, + unsigned maxlen, ssize_t *sz_ptr) +{ + ssize_t sz = *sz_ptr; + struct cache_features *cf = &cache->features; + unsigned count = (cf->metadata_version == 2) + !cf->discard_passdown + 1; + + DMEMIT("%u ", count); + + if (cf->metadata_version == 2) + DMEMIT("metadata2 "); + + if (writethrough_mode(cache)) + DMEMIT("writethrough "); + + else if (passthrough_mode(cache)) + DMEMIT("passthrough "); + + else if (writeback_mode(cache)) + DMEMIT("writeback "); + + else { + DMEMIT("unknown "); + DMERR("%s: internal error: unknown io mode: %d", + cache_device_name(cache), (int) cf->io_mode); + } + + if (!cf->discard_passdown) + DMEMIT("no_discard_passdown "); + + *sz_ptr = sz; +} + /* * Status format: * @@ -3185,25 +3227,7 @@ static void cache_status(struct dm_target *ti, status_type_t type, (unsigned) atomic_read(&cache->stats.promotion), (unsigned long) atomic_read(&cache->nr_dirty)); - if (cache->features.metadata_version == 2) - DMEMIT("2 metadata2 "); - else - DMEMIT("1 "); - - if (writethrough_mode(cache)) - DMEMIT("writethrough "); - - else if (passthrough_mode(cache)) - DMEMIT("passthrough "); - - else if (writeback_mode(cache)) - DMEMIT("writeback "); - - else { - DMERR("%s: internal error: unknown io mode: %d", - cache_device_name(cache), (int) cache->features.io_mode); - goto err; - } + emit_flags(cache, result, maxlen, &sz); DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold); @@ -3432,14 +3456,62 @@ static int cache_iterate_devices(struct dm_target *ti, return r; } +static bool origin_dev_supports_discard(struct block_device *origin_bdev) +{ + struct request_queue *q = bdev_get_queue(origin_bdev); + + return q && blk_queue_discard(q); +} + +/* + * If discard_passdown was enabled verify that the origin device + * supports discards. Disable discard_passdown if not. + */ +static void disable_passdown_if_not_supported(struct cache *cache) +{ + struct block_device *origin_bdev = cache->origin_dev->bdev; + struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits; + const char *reason = NULL; + char buf[BDEVNAME_SIZE]; + + if (!cache->features.discard_passdown) + return; + + if (!origin_dev_supports_discard(origin_bdev)) + reason = "discard unsupported"; + + else if (origin_limits->max_discard_sectors < cache->sectors_per_block) + reason = "max discard sectors smaller than a block"; + + if (reason) { + DMWARN("Origin device (%s) %s: Disabling discard passdown.", + bdevname(origin_bdev, buf), reason); + cache->features.discard_passdown = false; + } +} + static void set_discard_limits(struct cache *cache, struct queue_limits *limits) { + struct block_device *origin_bdev = cache->origin_dev->bdev; + struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits; + + if (!cache->features.discard_passdown) { + /* No passdown is done so setting own virtual limits */ + limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024, + cache->origin_sectors); + limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; + return; + } + /* - * FIXME: these limits may be incompatible with the cache device + * cache_iterate_devices() is stacking both origin and fast device limits + * but discards aren't passed to fast device, so inherit origin's limits. */ - limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024, - cache->origin_sectors); - limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; + limits->max_discard_sectors = origin_limits->max_discard_sectors; + limits->max_hw_discard_sectors = origin_limits->max_hw_discard_sectors; + limits->discard_granularity = origin_limits->discard_granularity; + limits->discard_alignment = origin_limits->discard_alignment; + limits->discard_misaligned = origin_limits->discard_misaligned; } static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) @@ -3456,6 +3528,8 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT); blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT); } + + disable_passdown_if_not_supported(cache); set_discard_limits(cache, limits); } @@ -3463,7 +3537,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type cache_target = { .name = "cache", - .version = {2, 0, 0}, + .version = {2, 1, 0}, .module = THIS_MODULE, .ctr = cache_ctr, .dtr = cache_dtr, -- cgit From c439ca69d5c8306a14ee2f3f39e9c833b4cda3e7 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 27 Feb 2019 12:02:05 -0500 Subject: dm snapshot: don't define direct_access if we don't support it Don't define a direct_access function that fails, dm_dax_direct_access already fails with -EIO if the pointer is zero; Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-snap.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 36805b12661e..a168963b757d 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -2338,13 +2338,6 @@ static int origin_map(struct dm_target *ti, struct bio *bio) return do_origin(o->dev, bio); } -static long origin_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) -{ - DMWARN("device does not support dax."); - return -EIO; -} - /* * Set the target "max_io_len" field to the minimum of all the snapshots' * chunk sizes. @@ -2404,7 +2397,6 @@ static struct target_type origin_target = { .postsuspend = origin_postsuspend, .status = origin_status, .iterate_devices = origin_iterate_devices, - .direct_access = origin_dax_direct_access, }; static struct target_type snapshot_target = { -- cgit From 225557446856448039a9e495da37b72c20071ef2 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 6 Mar 2019 08:29:34 -0500 Subject: dm integrity: limit the rate of error messages When using dm-integrity underneath md-raid, some tests with raid auto-correction trigger large amounts of integrity failures - and all these failures print an error message. These messages can bring the system to a halt if the system is using serial console. Fix this by limiting the rate of error messages - it improves the speed of raid recovery and avoids the hang. Fixes: 7eada909bfd7a ("dm: add integrity target") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index f1ab81b19de9..d57d997a52c8 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1368,8 +1368,8 @@ again: checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE); if (unlikely(r)) { if (r > 0) { - DMERR("Checksum failed at sector 0x%llx", - (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size))); + DMERR_LIMIT("Checksum failed at sector 0x%llx", + (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size))); r = -EILSEQ; atomic64_inc(&ic->number_of_mismatches); } @@ -1561,8 +1561,8 @@ retry_kmap: integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack); if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) { - DMERR("Checksum failed when reading from journal, at sector 0x%llx", - (unsigned long long)logical_sector); + DMERR_LIMIT("Checksum failed when reading from journal, at sector 0x%llx", + (unsigned long long)logical_sector); } } #endif -- cgit