diff options
Diffstat (limited to 'block/blk-rq-qos.c')
| -rw-r--r-- | block/blk-rq-qos.c | 203 |
1 files changed, 157 insertions, 46 deletions
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index d169d7188fa6..654478dfbc20 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 + #include "blk-rq-qos.h" /* @@ -8,16 +10,10 @@ static bool atomic_inc_below(atomic_t *v, unsigned int below) { unsigned int cur = atomic_read(v); - for (;;) { - unsigned int old; - + do { if (cur >= below) return false; - old = atomic_cmpxchg(v, cur, cur + 1); - if (old == cur) - break; - cur = old; - } + } while (!atomic_try_cmpxchg(v, &cur, cur + 1)); return true; } @@ -81,6 +77,15 @@ void __rq_qos_track(struct rq_qos *rqos, struct request *rq, struct bio *bio) } while (rqos); } +void __rq_qos_merge(struct rq_qos *rqos, struct request *rq, struct bio *bio) +{ + do { + if (rqos->ops->merge) + rqos->ops->merge(rqos, rq, bio); + rqos = rqos->next; + } while (rqos); +} + void __rq_qos_done_bio(struct rq_qos *rqos, struct bio *bio) { do { @@ -90,6 +95,15 @@ void __rq_qos_done_bio(struct rq_qos *rqos, struct bio *bio) } while (rqos); } +void __rq_qos_queue_depth_changed(struct rq_qos *rqos) +{ + do { + if (rqos->ops->queue_depth_changed) + rqos->ops->queue_depth_changed(rqos); + rqos = rqos->next; + } while (rqos); +} + /* * Return true, if we can't increase the depth further by scaling */ @@ -140,24 +154,27 @@ bool rq_depth_calc_max_depth(struct rq_depth *rqd) return ret; } -void rq_depth_scale_up(struct rq_depth *rqd) +/* Returns true on success and false if scaling up wasn't possible */ +bool rq_depth_scale_up(struct rq_depth *rqd) { /* * Hit max in previous round, stop here */ if (rqd->scaled_max) - return; + return false; rqd->scale_step--; rqd->scaled_max = rq_depth_calc_max_depth(rqd); + return true; } /* * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we - * had a latency violation. + * had a latency violation. Returns true on success and returns false if + * scaling down wasn't possible. */ -void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) +bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) { /* * Stop scaling down when we've hit the limit. This also prevents @@ -165,7 +182,7 @@ void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) * keep up. */ if (rqd->max_depth == 1) - return; + return false; if (rqd->scale_step < 0 && hard_throttle) rqd->scale_step = 0; @@ -174,11 +191,11 @@ void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) rqd->scaled_max = false; rq_depth_calc_max_depth(rqd); + return true; } struct rq_qos_wait_data { struct wait_queue_entry wq; - struct task_struct *task; struct rq_wait *rqw; acquire_inflight_cb_t *cb; void *private_data; @@ -200,16 +217,30 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr, return -1; data->got_token = true; - list_del_init(&curr->entry); - wake_up_process(data->task); + /* + * autoremove_wake_function() removes the wait entry only when it + * actually changed the task state. We want the wait always removed. + * Remove explicitly and use default_wake_function(). + */ + default_wake_function(curr, mode, wake_flags, key); + /* + * Note that the order of operations is important as finish_wait() + * tests whether @curr is removed without grabbing the lock. This + * should be the last thing to do to make sure we will not have a + * UAF access to @data. And the semantics of memory barrier in it + * also make sure the waiter will see the latest @data->got_token + * once list_empty_careful() in finish_wait() returns true. + */ + list_del_init_careful(&curr->entry); return 1; } /** * rq_qos_wait - throttle on a rqw if we need to - * @private_data - caller provided specific data - * @acquire_inflight_cb - inc the rqw->inflight counter if we can - * @cleanup_cb - the callback to cleanup in case we race with a waker + * @rqw: rqw to throttle on + * @private_data: caller provided specific data + * @acquire_inflight_cb: inc the rqw->inflight counter if we can + * @cleanup_cb: the callback to cleanup in case we race with a waker * * This provides a uniform place for the rq_qos users to do their throttling. * Since you can end up with a lot of things sleeping at once, this manages the @@ -225,50 +256,130 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, cleanup_cb_t *cleanup_cb) { struct rq_qos_wait_data data = { - .wq = { - .func = rq_qos_wake_function, - .entry = LIST_HEAD_INIT(data.wq.entry), - }, - .task = current, - .rqw = rqw, - .cb = acquire_inflight_cb, - .private_data = private_data, + .rqw = rqw, + .cb = acquire_inflight_cb, + .private_data = private_data, + .got_token = false, }; - bool has_sleeper; + bool first_waiter; - has_sleeper = wq_has_sleeper(&rqw->wait); - if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) + /* + * If there are no waiters in the waiting queue, try to increase the + * inflight counter if we can. Otherwise, prepare for adding ourselves + * to the waiting queue. + */ + if (!waitqueue_active(&rqw->wait) && acquire_inflight_cb(rqw, private_data)) + return; + + init_wait_func(&data.wq, rq_qos_wake_function); + first_waiter = prepare_to_wait_exclusive(&rqw->wait, &data.wq, + TASK_UNINTERRUPTIBLE); + /* + * Make sure there is at least one inflight process; otherwise, waiters + * will never be woken up. Since there may be no inflight process before + * adding ourselves to the waiting queue above, we need to try to + * increase the inflight counter for ourselves. And it is sufficient to + * guarantee that at least the first waiter to enter the waiting queue + * will re-check the waiting condition before going to sleep, thus + * ensuring forward progress. + */ + if (!data.got_token && first_waiter && acquire_inflight_cb(rqw, private_data)) { + finish_wait(&rqw->wait, &data.wq); + /* + * We raced with rq_qos_wake_function() getting a token, + * which means we now have two. Put our local token + * and wake anyone else potentially waiting for one. + * + * Enough memory barrier in list_empty_careful() in + * finish_wait() is paired with list_del_init_careful() + * in rq_qos_wake_function() to make sure we will see + * the latest @data->got_token. + */ + if (data.got_token) + cleanup_cb(rqw, private_data); return; + } - prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); + /* we are now relying on the waker to increase our inflight counter. */ do { if (data.got_token) break; - if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) { - finish_wait(&rqw->wait, &data.wq); - - /* - * We raced with wbt_wake_function() getting a token, - * which means we now have two. Put our local token - * and wake anyone else potentially waiting for one. - */ - if (data.got_token) - cleanup_cb(rqw, private_data); - break; - } io_schedule(); - has_sleeper = false; + set_current_state(TASK_UNINTERRUPTIBLE); } while (1); finish_wait(&rqw->wait, &data.wq); } void rq_qos_exit(struct request_queue *q) { - blk_mq_debugfs_unregister_queue_rqos(q); - + mutex_lock(&q->rq_qos_mutex); while (q->rq_qos) { struct rq_qos *rqos = q->rq_qos; q->rq_qos = rqos->next; rqos->ops->exit(rqos); } + blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q); + mutex_unlock(&q->rq_qos_mutex); +} + +int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, + const struct rq_qos_ops *ops) +{ + struct request_queue *q = disk->queue; + unsigned int memflags; + + lockdep_assert_held(&q->rq_qos_mutex); + + rqos->disk = disk; + rqos->id = id; + rqos->ops = ops; + + /* + * No IO can be in-flight when adding rqos, so freeze queue, which + * is fine since we only support rq_qos for blk-mq queue. + */ + memflags = blk_mq_freeze_queue(q); + + if (rq_qos_id(q, rqos->id)) + goto ebusy; + rqos->next = q->rq_qos; + q->rq_qos = rqos; + blk_queue_flag_set(QUEUE_FLAG_QOS_ENABLED, q); + + blk_mq_unfreeze_queue(q, memflags); + + if (rqos->ops->debugfs_attrs) { + mutex_lock(&q->debugfs_mutex); + blk_mq_debugfs_register_rqos(rqos); + mutex_unlock(&q->debugfs_mutex); + } + + return 0; +ebusy: + blk_mq_unfreeze_queue(q, memflags); + return -EBUSY; +} + +void rq_qos_del(struct rq_qos *rqos) +{ + struct request_queue *q = rqos->disk->queue; + struct rq_qos **cur; + unsigned int memflags; + + lockdep_assert_held(&q->rq_qos_mutex); + + memflags = blk_mq_freeze_queue(q); + for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { + if (*cur == rqos) { + *cur = rqos->next; + break; + } + } + if (!q->rq_qos) + blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q); + blk_mq_unfreeze_queue(q, memflags); + + mutex_lock(&q->debugfs_mutex); + blk_mq_debugfs_unregister_rqos(rqos); + mutex_unlock(&q->debugfs_mutex); } |
