diff options
Diffstat (limited to 'block/blk-rq-qos.c')
| -rw-r--r-- | block/blk-rq-qos.c | 163 |
1 files changed, 122 insertions, 41 deletions
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index e83af7bc7591..654478dfbc20 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -10,16 +10,10 @@ static bool atomic_inc_below(atomic_t *v, unsigned int below) { unsigned int cur = atomic_read(v); - for (;;) { - unsigned int old; - + do { if (cur >= below) return false; - old = atomic_cmpxchg(v, cur, cur + 1); - if (old == cur) - break; - cur = old; - } + } while (!atomic_try_cmpxchg(v, &cur, cur + 1)); return true; } @@ -202,7 +196,6 @@ bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) struct rq_qos_wait_data { struct wait_queue_entry wq; - struct task_struct *task; struct rq_wait *rqw; acquire_inflight_cb_t *cb; void *private_data; @@ -224,9 +217,21 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr, return -1; data->got_token = true; - smp_wmb(); - list_del_init(&curr->entry); - wake_up_process(data->task); + /* + * autoremove_wake_function() removes the wait entry only when it + * actually changed the task state. We want the wait always removed. + * Remove explicitly and use default_wake_function(). + */ + default_wake_function(curr, mode, wake_flags, key); + /* + * Note that the order of operations is important as finish_wait() + * tests whether @curr is removed without grabbing the lock. This + * should be the last thing to do to make sure we will not have a + * UAF access to @data. And the semantics of memory barrier in it + * also make sure the waiter will see the latest @data->got_token + * once list_empty_careful() in finish_wait() returns true. + */ + list_del_init_careful(&curr->entry); return 1; } @@ -251,42 +256,55 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, cleanup_cb_t *cleanup_cb) { struct rq_qos_wait_data data = { - .wq = { - .func = rq_qos_wake_function, - .entry = LIST_HEAD_INIT(data.wq.entry), - }, - .task = current, - .rqw = rqw, - .cb = acquire_inflight_cb, - .private_data = private_data, + .rqw = rqw, + .cb = acquire_inflight_cb, + .private_data = private_data, + .got_token = false, }; - bool has_sleeper; + bool first_waiter; - has_sleeper = wq_has_sleeper(&rqw->wait); - if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) + /* + * If there are no waiters in the waiting queue, try to increase the + * inflight counter if we can. Otherwise, prepare for adding ourselves + * to the waiting queue. + */ + if (!waitqueue_active(&rqw->wait) && acquire_inflight_cb(rqw, private_data)) return; - has_sleeper = !prepare_to_wait_exclusive(&rqw->wait, &data.wq, + init_wait_func(&data.wq, rq_qos_wake_function); + first_waiter = prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); + /* + * Make sure there is at least one inflight process; otherwise, waiters + * will never be woken up. Since there may be no inflight process before + * adding ourselves to the waiting queue above, we need to try to + * increase the inflight counter for ourselves. And it is sufficient to + * guarantee that at least the first waiter to enter the waiting queue + * will re-check the waiting condition before going to sleep, thus + * ensuring forward progress. + */ + if (!data.got_token && first_waiter && acquire_inflight_cb(rqw, private_data)) { + finish_wait(&rqw->wait, &data.wq); + /* + * We raced with rq_qos_wake_function() getting a token, + * which means we now have two. Put our local token + * and wake anyone else potentially waiting for one. + * + * Enough memory barrier in list_empty_careful() in + * finish_wait() is paired with list_del_init_careful() + * in rq_qos_wake_function() to make sure we will see + * the latest @data->got_token. + */ + if (data.got_token) + cleanup_cb(rqw, private_data); + return; + } + + /* we are now relying on the waker to increase our inflight counter. */ do { - /* The memory barrier in set_task_state saves us here. */ if (data.got_token) break; - if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) { - finish_wait(&rqw->wait, &data.wq); - - /* - * We raced with wbt_wake_function() getting a token, - * which means we now have two. Put our local token - * and wake anyone else potentially waiting for one. - */ - smp_rmb(); - if (data.got_token) - cleanup_cb(rqw, private_data); - break; - } io_schedule(); - has_sleeper = true; set_current_state(TASK_UNINTERRUPTIBLE); } while (1); finish_wait(&rqw->wait, &data.wq); @@ -294,11 +312,74 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, void rq_qos_exit(struct request_queue *q) { - blk_mq_debugfs_unregister_queue_rqos(q); - + mutex_lock(&q->rq_qos_mutex); while (q->rq_qos) { struct rq_qos *rqos = q->rq_qos; q->rq_qos = rqos->next; rqos->ops->exit(rqos); } + blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q); + mutex_unlock(&q->rq_qos_mutex); +} + +int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, + const struct rq_qos_ops *ops) +{ + struct request_queue *q = disk->queue; + unsigned int memflags; + + lockdep_assert_held(&q->rq_qos_mutex); + + rqos->disk = disk; + rqos->id = id; + rqos->ops = ops; + + /* + * No IO can be in-flight when adding rqos, so freeze queue, which + * is fine since we only support rq_qos for blk-mq queue. + */ + memflags = blk_mq_freeze_queue(q); + + if (rq_qos_id(q, rqos->id)) + goto ebusy; + rqos->next = q->rq_qos; + q->rq_qos = rqos; + blk_queue_flag_set(QUEUE_FLAG_QOS_ENABLED, q); + + blk_mq_unfreeze_queue(q, memflags); + + if (rqos->ops->debugfs_attrs) { + mutex_lock(&q->debugfs_mutex); + blk_mq_debugfs_register_rqos(rqos); + mutex_unlock(&q->debugfs_mutex); + } + + return 0; +ebusy: + blk_mq_unfreeze_queue(q, memflags); + return -EBUSY; +} + +void rq_qos_del(struct rq_qos *rqos) +{ + struct request_queue *q = rqos->disk->queue; + struct rq_qos **cur; + unsigned int memflags; + + lockdep_assert_held(&q->rq_qos_mutex); + + memflags = blk_mq_freeze_queue(q); + for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { + if (*cur == rqos) { + *cur = rqos->next; + break; + } + } + if (!q->rq_qos) + blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q); + blk_mq_unfreeze_queue(q, memflags); + + mutex_lock(&q->debugfs_mutex); + blk_mq_debugfs_unregister_rqos(rqos); + mutex_unlock(&q->debugfs_mutex); } |
