block/rq_qos: protect rq_qos apis with a new lock

commit 50e34d78815e ("block: disable the elevator int del_gendisk") move rq_qos_exit() from disk_release() to del_gendisk(), this will introduce some problems: 1) If rq_qos_add() is triggered by enabling iocost/iolatency through cgroupfs, then it can concurrent with del_gendisk(), it's not safe to write 'q->rq_qos' concurrently. 2) Activate cgroup policy that is relied on rq_qos will call rq_qos_add() and blkcg_activate_policy(), and if rq_qos_exit() is called in the middle, null-ptr-dereference will be triggered in blkcg_activate_policy(). 3) blkg_conf_open_bdev() can call blkdev_get_no_open() first to find the disk, then if rq_qos_exit() from del_gendisk() is done before rq_qos_add(), then memory will be leaked. This patch add a new disk level mutex 'rq_qos_mutex': 1) The lock will protect rq_qos_exit() directly. 2) For wbt that doesn't relied on blk-cgroup, rq_qos_add() can only be called from disk initialization for now because wbt can't be destructed until rq_qos_exit(), so it's safe not to protect wbt for now. Hoever, in case that rq_qos dynamically destruction is supported in the furture, this patch also protect rq_qos_add() from wbt_init() directly, this is enough because blk-sysfs already synchronize writers with disk removal. 3) For iocost and iolatency, in order to synchronize disk removal and cgroup configuration, the lock is held after blkdev_get_no_open() from blkg_conf_open_bdev(), and is released in blkg_conf_exit(). In order to fix the above memory leak, disk_live() is checked after holding the new lock. Fixes: 50e34d78815e ("block: disable the elevator int del_gendisk") Signed-off-by: Yu Kuai <yukuai3@huawei.com> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/20230414084008.2085155-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Yu Kuai <yukuai3@huawei.com> 2023-04-14 16:40:08 +0800
committer: Jens Axboe <axboe@kernel.dk> 2023-05-23 11:13:19 -0600
commit: a13bd91be22318768d55470cbc0b0f4488ef9edf (patch)
tree: 311128f0620f3519dc92e5d509b5d176f0f72b3a /block/blk-rq-qos.c
parent: 712fd23a90eed6a73ea5135a500e59d30356d4f1 (diff)
1 files changed, 6 insertions, 14 deletions
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index d8cc820a365e..167be74df4ee 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -288,11 +288,13 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
 
 void rq_qos_exit(struct request_queue *q)
 {
+	mutex_lock(&q->rq_qos_mutex);
 	while (q->rq_qos) {
 		struct rq_qos *rqos = q->rq_qos;
 		q->rq_qos = rqos->next;
 		rqos->ops->exit(rqos);
 	}
+	mutex_unlock(&q->rq_qos_mutex);
 }
 
 int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
@@ -300,6 +302,8 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
 {
 	struct request_queue *q = disk->queue;
 
+	lockdep_assert_held(&q->rq_qos_mutex);
+
 	rqos->disk = disk;
 	rqos->id = id;
 	rqos->ops = ops;
@@ -307,18 +311,13 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
 	/*
 	 * No IO can be in-flight when adding rqos, so freeze queue, which
 	 * is fine since we only support rq_qos for blk-mq queue.
-	 *
-	 * Reuse ->queue_lock for protecting against other concurrent
-	 * rq_qos adding/deleting
 	 */
 	blk_mq_freeze_queue(q);
 
-	spin_lock_irq(&q->queue_lock);
 	if (rq_qos_id(q, rqos->id))
 		goto ebusy;
 	rqos->next = q->rq_qos;
 	q->rq_qos = rqos;
-	spin_unlock_irq(&q->queue_lock);
 
 	blk_mq_unfreeze_queue(q);
 
@@ -330,7 +329,6 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
 
 	return 0;
 ebusy:
-	spin_unlock_irq(&q->queue_lock);
 	blk_mq_unfreeze_queue(q);
 	return -EBUSY;
 }
@@ -340,21 +338,15 @@ void rq_qos_del(struct rq_qos *rqos)
 	struct request_queue *q = rqos->disk->queue;
 	struct rq_qos **cur;
 
-	/*
-	 * See comment in rq_qos_add() about freezing queue & using
-	 * ->queue_lock.
-	 */
-	blk_mq_freeze_queue(q);
+	lockdep_assert_held(&q->rq_qos_mutex);
 
-	spin_lock_irq(&q->queue_lock);
+	blk_mq_freeze_queue(q);
 	for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
 		if (*cur == rqos) {
 			*cur = rqos->next;
 			break;
 		}
 	}
-	spin_unlock_irq(&q->queue_lock);
-
 	blk_mq_unfreeze_queue(q);
 
 	mutex_lock(&q->debugfs_mutex);
author	Yu Kuai <yukuai3@huawei.com>	2023-04-14 16:40:08 +0800
committer	Jens Axboe <axboe@kernel.dk>	2023-05-23 11:13:19 -0600
commit	a13bd91be22318768d55470cbc0b0f4488ef9edf (patch)
tree	311128f0620f3519dc92e5d509b5d176f0f72b3a /block/blk-rq-qos.c
parent	712fd23a90eed6a73ea5135a500e59d30356d4f1 (diff)