From eb619fdb2d4cb8b3d3419e9113921e87e7daf557 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 9 Nov 2017 08:32:43 -0700
Subject: blk-mq: fix issue with shared tag queue re-running

This patch attempts to make the case of hctx re-running on driver tag
failure more robust. Without this patch, it's pretty easy to trigger a
stall condition with shared tags. An example is using null_blk like
this:

modprobe null_blk queue_mode=2 nr_devices=4 shared_tags=1 submit_queues=1 hw_queue_depth=1

which sets up 4 devices, sharing the same tag set with a depth of 1.
Running a fio job ala:

[global]
bs=4k
rw=randread
norandommap
direct=1
ioengine=libaio
iodepth=4

[nullb0]
filename=/dev/nullb0
[nullb1]
filename=/dev/nullb1
[nullb2]
filename=/dev/nullb2
[nullb3]
filename=/dev/nullb3

will inevitably end with one or more threads being stuck waiting for a
scheduler tag. That IO is then stuck forever, until someone else
triggers a run of the queue.

Ensure that we always re-run the hardware queue, if the driver tag we
were waiting for got freed before we added our leftover request entries
back on the dispatch list.

Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
Tested-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux/blk-mq.h')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 674641527da7..4ae987c2352c 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -35,7 +35,7 @@ struct blk_mq_hw_ctx {
 	struct blk_mq_ctx	**ctxs;
 	unsigned int		nr_ctx;
 
-	wait_queue_entry_t		dispatch_wait;
+	wait_queue_entry_t	dispatch_wait;
 	atomic_t		wait_index;
 
 	struct blk_mq_tags	*tags;
@@ -181,8 +181,7 @@ enum {
 	BLK_MQ_S_STOPPED	= 0,
 	BLK_MQ_S_TAG_ACTIVE	= 1,
 	BLK_MQ_S_SCHED_RESTART	= 2,
-	BLK_MQ_S_TAG_WAITING	= 3,
-	BLK_MQ_S_START_ON_RUN	= 4,
+	BLK_MQ_S_START_ON_RUN	= 3,
 
 	BLK_MQ_MAX_DEPTH	= 10240,
 
-- 
cgit