From 4ae9944d132b160d444fa3aa875307eb0fa3eeec Mon Sep 17 00:00:00 2001 From: Junichi Nomura Date: Tue, 26 May 2015 08:25:54 +0000 Subject: dm: run queue on re-queue Without kicking queue, requeued request may stay forever in the queue if there are no other I/O activities to the device. The original error had been in v2.6.39 with commit 7eaceaccab5f ("block: remove per-queue plugging"), which replaced conditional plugging by periodic runqueue. Commit 9d1deb83d489 in v4.1-rc1 removed the periodic runqueue and the problem started to manifest. Fixes: 9d1deb83d489 ("dm: don't schedule delayed run of the queue if nothing to do") Signed-off-by: Jun'ichi Nomura Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a930b72314ac..0bf79a0bad37 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1164,6 +1164,7 @@ static void old_requeue_request(struct request *rq) spin_lock_irqsave(q->queue_lock, flags); blk_requeue_request(q, rq); + blk_run_queue_async(q); spin_unlock_irqrestore(q->queue_lock, flags); } -- cgit From 3a1407559a593d4360af12dd2df5296bf8eb0d28 Mon Sep 17 00:00:00 2001 From: Junichi Nomura Date: Wed, 27 May 2015 04:22:07 +0000 Subject: dm: fix NULL pointer when clone_and_map_rq returns !DM_MAPIO_REMAPPED When stacking request-based DM on blk_mq device, request cloning and remapping are done in a single call to target's clone_and_map_rq(). The clone is allocated and valid only if clone_and_map_rq() returns DM_MAPIO_REMAPPED. The "IS_ERR(clone)" check in map_request() does not cover all the !DM_MAPIO_REMAPPED cases that are possible (E.g. if underlying devices are not ready or unavailable, clone_and_map_rq() may return DM_MAPIO_REQUEUE without ever having established an ERR_PTR). Fix this by explicitly checking for a return that is not DM_MAPIO_REMAPPED in map_request(). Without this fix, DM core may call setup_clone() for a NULL clone and oops like this: BUG: unable to handle kernel NULL pointer dereference at 0000000000000068 IP: [] blk_rq_prep_clone+0x7d/0x137 ... CPU: 2 PID: 5793 Comm: kdmwork-253:3 Not tainted 4.0.0-nm #1 ... Call Trace: [] map_tio_request+0xa9/0x258 [dm_mod] [] kthread_worker_fn+0xfd/0x150 [] ? kthread_parkme+0x24/0x24 [] ? kthread_parkme+0x24/0x24 [] kthread+0xe6/0xee [] ? put_lock_stats+0xe/0x20 [] ? __init_kthread_worker+0x5b/0x5b [] ret_from_fork+0x58/0x90 [] ? __init_kthread_worker+0x5b/0x5b Fixes: e5863d9ad ("dm: allocate requests in target when stacking on blk-mq devices") Reported-by: Bart Van Assche Signed-off-by: Jun'ichi Nomura Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 4.0+ --- drivers/md/dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0bf79a0bad37..1c62ed8d09f4 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1972,8 +1972,8 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq, dm_kill_unmapped_request(rq, r); return r; } - if (IS_ERR(clone)) - return DM_MAPIO_REQUEUE; + if (r != DM_MAPIO_REMAPPED) + return r; if (setup_clone(clone, rq, tio, GFP_ATOMIC)) { /* -ENOMEM */ ti->type->release_clone_rq(clone); -- cgit From 45714fbed4556149d7f1730f5bae74f81d5e2cd5 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 27 May 2015 15:25:27 -0400 Subject: dm: requeue from blk-mq dm_mq_queue_rq() using BLK_MQ_RQ_QUEUE_BUSY Use BLK_MQ_RQ_QUEUE_BUSY to requeue a blk-mq request directly from the DM blk-mq device's .queue_rq. This cleans up the previous convoluted handling of request requeueing that would return BLK_MQ_RQ_QUEUE_OK (even though it wasn't) and then run blk_mq_requeue_request() followed by blk_mq_kick_requeue_list(). Also, document that DM blk-mq ontop of old request_fn devices cannot fail in clone_rq() since the clone request is preallocated as part of the pdu. Reported-by: Christoph Hellwig Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1c62ed8d09f4..1badfb250a18 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2754,13 +2754,15 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) { /* clone request is allocated at the end of the pdu */ tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io); - if (!clone_rq(rq, md, tio, GFP_ATOMIC)) - return BLK_MQ_RQ_QUEUE_BUSY; + (void) clone_rq(rq, md, tio, GFP_ATOMIC); queue_kthread_work(&md->kworker, &tio->work); } else { /* Direct call is fine since .queue_rq allows allocations */ - if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) - dm_requeue_unmapped_original_request(md, rq); + if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) { + /* Undo dm_start_request() before requeuing */ + rq_completed(md, rq_data_dir(rq), false); + return BLK_MQ_RQ_QUEUE_BUSY; + } } return BLK_MQ_RQ_QUEUE_OK; -- cgit From e5d8de32cc02a259e1a237ab57cba00f2930fa6a Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 28 May 2015 15:12:52 -0400 Subject: dm: fix false warning in free_rq_clone() for unmapped requests When stacking request-based dm device on non blk-mq device and device-mapper target could not map the request (error target is used, multipath target with all paths down, etc), the WARN_ON_ONCE() in free_rq_clone() will trigger when it shouldn't. The warning was added by commit aa6df8d ("dm: fix free_rq_clone() NULL pointer when requeueing unmapped request"). But free_rq_clone() with clone->q == NULL is valid usage for the case where dm_kill_unmapped_request() initiates request cleanup. Fix this false warning by just removing the WARN_ON -- it only generated false positives and was never useful in catching the intended case (completing clone request not being mapped e.g. clone->q being NULL). Fixes: aa6df8d ("dm: fix free_rq_clone() NULL pointer when requeueing unmapped request") Reported-by: Bart Van Assche Reported-by: Jun'ichi Nomura Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1badfb250a18..e24069aaeb18 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1082,13 +1082,11 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) dm_put(md); } -static void free_rq_clone(struct request *clone, bool must_be_mapped) +static void free_rq_clone(struct request *clone) { struct dm_rq_target_io *tio = clone->end_io_data; struct mapped_device *md = tio->md; - WARN_ON_ONCE(must_be_mapped && !clone->q); - blk_rq_unprep_clone(clone); if (md->type == DM_TYPE_MQ_REQUEST_BASED) @@ -1132,7 +1130,7 @@ static void dm_end_request(struct request *clone, int error) rq->sense_len = clone->sense_len; } - free_rq_clone(clone, true); + free_rq_clone(clone); if (!rq->q->mq_ops) blk_end_request_all(rq, error); else @@ -1151,7 +1149,7 @@ static void dm_unprep_request(struct request *rq) } if (clone) - free_rq_clone(clone, false); + free_rq_clone(clone); } /* -- cgit From 1c220c69ce0dcc0f234a9f263ad9c0864f971852 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 29 May 2015 14:52:51 +0100 Subject: dm: fix casting bug in dm_merge_bvec() dm_merge_bvec() was originally added in f6fccb ("dm: introduce merge_bvec_fn"). In that commit a value in sectors is converted to bytes using << 9, and then assigned to an int. This code made assumptions about the value of BIO_MAX_SECTORS. A later commit 148e51 ("dm: improve documentation and code clarity in dm_merge_bvec") was meant to have no functional change but it removed the use of BIO_MAX_SECTORS in favor of using queue_max_sectors(). At this point the cast from sector_t to int resulted in a zero value. The fallout being dm_merge_bvec() would only allow a single page to be added to a bio. This interim fix is minimal for the benefit of stable@ because the more comprehensive cleanup of passing a sector_t to all DM targets' merge function will impact quite a few DM targets. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 3.19+ --- drivers/md/dm.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e24069aaeb18..2caf492890d6 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1723,8 +1723,7 @@ static int dm_merge_bvec(struct request_queue *q, struct mapped_device *md = q->queuedata; struct dm_table *map = dm_get_live_table_fast(md); struct dm_target *ti; - sector_t max_sectors; - int max_size = 0; + sector_t max_sectors, max_size = 0; if (unlikely(!map)) goto out; @@ -1739,8 +1738,16 @@ static int dm_merge_bvec(struct request_queue *q, max_sectors = min(max_io_len(bvm->bi_sector, ti), (sector_t) queue_max_sectors(q)); max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; - if (unlikely(max_size < 0)) /* this shouldn't _ever_ happen */ - max_size = 0; + + /* + * FIXME: this stop-gap fix _must_ be cleaned up (by passing a sector_t + * to the targets' merge function since it holds sectors not bytes). + * Just doing this as an interim fix for stable@ because the more + * comprehensive cleanup of switching to sector_t will impact every + * DM target that implements a ->merge hook. + */ + if (max_size > INT_MAX) + max_size = INT_MAX; /* * merge_bvec_fn() returns number of bytes @@ -1748,7 +1755,7 @@ static int dm_merge_bvec(struct request_queue *q, * max is precomputed maximal io size */ if (max_size && ti->type->merge) - max_size = ti->type->merge(ti, bvm, biovec, max_size); + max_size = ti->type->merge(ti, bvm, biovec, (int) max_size); /* * If the target doesn't support merge method and some of the devices * provided their merge_bvec method (we know this by looking for the -- cgit