From cb8432d650fe3be58bb962bc8e602dc405510327 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 26 Nov 2020 18:47:17 +0100 Subject: block: allocate struct hd_struct as part of struct bdev_inode Allocate hd_struct together with struct block_device to pre-load the lifetime rule changes in preparation of merging the two structures. Note that part0 was previously embedded into struct gendisk, but is a separate allocation now, and already points to the block_device instead of the hd_struct. The lifetime of struct gendisk is still controlled by the struct device embedded in the part0 hd_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-flush.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-flush.c') diff --git a/block/blk-flush.c b/block/blk-flush.c index e32958f0b687..fcd0a60574df 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -139,7 +139,7 @@ static void blk_flush_queue_rq(struct request *rq, bool add_front) static void blk_account_io_flush(struct request *rq) { - struct hd_struct *part = &rq->rq_disk->part0; + struct hd_struct *part = rq->rq_disk->part0->bd_part; part_stat_lock(); part_stat_inc(part, ios[STAT_FLUSH]); -- cgit From 8446fe9255be821cb38ffd306d7e8edc4b9ea662 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 09:36:54 +0100 Subject: block: switch partition lookup to use struct block_device Use struct block_device to lookup partitions on a disk. This removes all usage of struct hd_struct from the I/O path. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Acked-by: Coly Li [bcache] Acked-by: Chao Yu [f2fs] Signed-off-by: Jens Axboe --- block/blk-flush.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-flush.c') diff --git a/block/blk-flush.c b/block/blk-flush.c index fcd0a60574df..9507dcdd5881 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -139,7 +139,7 @@ static void blk_flush_queue_rq(struct request *rq, bool add_front) static void blk_account_io_flush(struct request *rq) { - struct hd_struct *part = rq->rq_disk->part0->bd_part; + struct block_device *part = rq->rq_disk->part0; part_stat_lock(); part_stat_inc(part, ios[STAT_FLUSH]); -- cgit From fb01a2932e81a1fb2273f87ff92dc8172b8880ee Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 3 Dec 2020 09:26:36 +0800 Subject: blk-mq: add new API of blk_mq_hctx_set_fq_lock_class flush_end_io() may be called recursively from some driver, such as nvme-loop, so lockdep may complain 'possible recursive locking'. Commit b3c6a5997541("block: Fix a lockdep complaint triggered by request queue flushing") tried to address this issue by assigning dynamically allocated per-flush-queue lock class. This solution adds synchronize_rcu() for each hctx's release handler, and causes horrible SCSI MQ probe delay(more than half an hour on megaraid sas). Add new API of blk_mq_hctx_set_fq_lock_class() for these drivers, so we just need to use driver specific lock class for avoiding the lockdep warning of 'possible recursive locking'. Tested-by: Kashyap Desai Reported-by: Qian Cai Cc: Sumit Saxena Cc: John Garry Cc: Kashyap Desai Cc: Bart Van Assche Cc: Hannes Reinecke Signed-off-by: Ming Lei Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-flush.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'block/blk-flush.c') diff --git a/block/blk-flush.c b/block/blk-flush.c index 9507dcdd5881..bf51588762d8 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -490,3 +490,28 @@ void blk_free_flush_queue(struct blk_flush_queue *fq) kfree(fq->flush_rq); kfree(fq); } + +/* + * Allow driver to set its own lock class to fq->mq_flush_lock for + * avoiding lockdep complaint. + * + * flush_end_io() may be called recursively from some driver, such as + * nvme-loop, so lockdep may complain 'possible recursive locking' because + * all 'struct blk_flush_queue' instance share same mq_flush_lock lock class + * key. We need to assign different lock class for these driver's + * fq->mq_flush_lock for avoiding the lockdep warning. + * + * Use dynamically allocated lock class key for each 'blk_flush_queue' + * instance is over-kill, and more worse it introduces horrible boot delay + * issue because synchronize_rcu() is implied in lockdep_unregister_key which + * is called for each hctx release. SCSI probing may synchronously create and + * destroy lots of MQ request_queues for non-existent devices, and some robot + * test kernel always enable lockdep option. It is observed that more than half + * an hour is taken during SCSI MQ probe with per-fq lock class. + */ +void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, + struct lock_class_key *key) +{ + lockdep_set_class(&hctx->fq->mq_flush_lock, key); +} +EXPORT_SYMBOL_GPL(blk_mq_hctx_set_fq_lock_class); -- cgit From 7aa390ec2d9db0cd6677d95d0b8f307f9c086770 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 3 Dec 2020 09:26:38 +0800 Subject: Revert "block: Fix a lockdep complaint triggered by request queue flushing" This reverts commit b3c6a59975415bde29cfd76ff1ab008edbf614a9. Now we can avoid nvme-loop lockdep warning of 'lockdep possible recursive locking' by nvme-loop's lock class, no need to apply dynamically allocated lock class key, so revert commit b3c6a5997541("block: Fix a lockdep complaint triggered by request queue flushing"). This way fixes horrible SCSI probe delay issue on megaraid_sas, and it is reported the whole probe may take more than half an hour. Tested-by: Kashyap Desai Reported-by: Qian Cai Reviewed-by: Christoph Hellwig Cc: Sumit Saxena Cc: John Garry Cc: Kashyap Desai Cc: Bart Van Assche Cc: Hannes Reinecke Signed-off-by: Ming Lei Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-flush.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'block/blk-flush.c') diff --git a/block/blk-flush.c b/block/blk-flush.c index bf51588762d8..996d5d03dade 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -69,7 +69,6 @@ #include #include #include -#include #include "blk.h" #include "blk-mq.h" @@ -469,9 +468,6 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, INIT_LIST_HEAD(&fq->flush_queue[1]); INIT_LIST_HEAD(&fq->flush_data_in_flight); - lockdep_register_key(&fq->key); - lockdep_set_class(&fq->mq_flush_lock, &fq->key); - return fq; fail_rq: @@ -486,7 +482,6 @@ void blk_free_flush_queue(struct blk_flush_queue *fq) if (!fq) return; - lockdep_unregister_key(&fq->key); kfree(fq->flush_rq); kfree(fq); } -- cgit