summaryrefslogtreecommitdiff
path: root/block/bfq-iosched.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/bfq-iosched.c')
-rw-r--r--block/bfq-iosched.c342
1 files changed, 268 insertions, 74 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index a4783da90ba8..bcb6d21baf12 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -108,6 +108,7 @@
#include "blk-mq-tag.h"
#include "blk-mq-sched.h"
#include "bfq-iosched.h"
+#include "blk-wbt.h"
#define BFQ_BFQQ_FNS(name) \
void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \
@@ -724,6 +725,44 @@ static void bfq_updated_next_req(struct bfq_data *bfqd,
}
}
+static unsigned int bfq_wr_duration(struct bfq_data *bfqd)
+{
+ u64 dur;
+
+ if (bfqd->bfq_wr_max_time > 0)
+ return bfqd->bfq_wr_max_time;
+
+ dur = bfqd->RT_prod;
+ do_div(dur, bfqd->peak_rate);
+
+ /*
+ * Limit duration between 3 and 13 seconds. Tests show that
+ * higher values than 13 seconds often yield the opposite of
+ * the desired result, i.e., worsen responsiveness by letting
+ * non-interactive and non-soft-real-time applications
+ * preserve weight raising for a too long time interval.
+ *
+ * On the other end, lower values than 3 seconds make it
+ * difficult for most interactive tasks to complete their jobs
+ * before weight-raising finishes.
+ */
+ if (dur > msecs_to_jiffies(13000))
+ dur = msecs_to_jiffies(13000);
+ else if (dur < msecs_to_jiffies(3000))
+ dur = msecs_to_jiffies(3000);
+
+ return dur;
+}
+
+/* switch back from soft real-time to interactive weight raising */
+static void switch_back_to_interactive_wr(struct bfq_queue *bfqq,
+ struct bfq_data *bfqd)
+{
+ bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+ bfqq->last_wr_start_finish = bfqq->wr_start_at_switch_to_srt;
+}
+
static void
bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
struct bfq_io_cq *bic, bool bfq_already_existing)
@@ -750,10 +789,16 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
if (bfqq->wr_coeff > 1 && (bfq_bfqq_in_large_burst(bfqq) ||
time_is_before_jiffies(bfqq->last_wr_start_finish +
bfqq->wr_cur_max_time))) {
- bfq_log_bfqq(bfqq->bfqd, bfqq,
- "resume state: switching off wr");
-
- bfqq->wr_coeff = 1;
+ if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time &&
+ !bfq_bfqq_in_large_burst(bfqq) &&
+ time_is_after_eq_jiffies(bfqq->wr_start_at_switch_to_srt +
+ bfq_wr_duration(bfqd))) {
+ switch_back_to_interactive_wr(bfqq, bfqd);
+ } else {
+ bfqq->wr_coeff = 1;
+ bfq_log_bfqq(bfqq->bfqd, bfqq,
+ "resume state: switching off wr");
+ }
}
/* make sure weight will be updated, however we got here */
@@ -1173,33 +1218,22 @@ static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd,
return wr_or_deserves_wr;
}
-static unsigned int bfq_wr_duration(struct bfq_data *bfqd)
+/*
+ * Return the farthest future time instant according to jiffies
+ * macros.
+ */
+static unsigned long bfq_greatest_from_now(void)
{
- u64 dur;
-
- if (bfqd->bfq_wr_max_time > 0)
- return bfqd->bfq_wr_max_time;
-
- dur = bfqd->RT_prod;
- do_div(dur, bfqd->peak_rate);
-
- /*
- * Limit duration between 3 and 13 seconds. Tests show that
- * higher values than 13 seconds often yield the opposite of
- * the desired result, i.e., worsen responsiveness by letting
- * non-interactive and non-soft-real-time applications
- * preserve weight raising for a too long time interval.
- *
- * On the other end, lower values than 3 seconds make it
- * difficult for most interactive tasks to complete their jobs
- * before weight-raising finishes.
- */
- if (dur > msecs_to_jiffies(13000))
- dur = msecs_to_jiffies(13000);
- else if (dur < msecs_to_jiffies(3000))
- dur = msecs_to_jiffies(3000);
+ return jiffies + MAX_JIFFY_OFFSET;
+}
- return dur;
+/*
+ * Return the farthest past time instant according to jiffies
+ * macros.
+ */
+static unsigned long bfq_smallest_from_now(void)
+{
+ return jiffies - MAX_JIFFY_OFFSET;
}
static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd,
@@ -1216,7 +1250,19 @@ static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd,
bfqq->wr_coeff = bfqd->bfq_wr_coeff;
bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
} else {
- bfqq->wr_start_at_switch_to_srt = jiffies;
+ /*
+ * No interactive weight raising in progress
+ * here: assign minus infinity to
+ * wr_start_at_switch_to_srt, to make sure
+ * that, at the end of the soft-real-time
+ * weight raising periods that is starting
+ * now, no interactive weight-raising period
+ * may be wrongly considered as still in
+ * progress (and thus actually started by
+ * mistake).
+ */
+ bfqq->wr_start_at_switch_to_srt =
+ bfq_smallest_from_now();
bfqq->wr_coeff = bfqd->bfq_wr_coeff *
BFQ_SOFTRT_WEIGHT_FACTOR;
bfqq->wr_cur_max_time =
@@ -1313,7 +1359,6 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
bfqq->ttime.last_end_request +
bfqd->bfq_slice_idle * 3;
- bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq, rq->cmd_flags);
/*
* bfqq deserves to be weight-raised if:
@@ -1587,7 +1632,6 @@ static void bfq_remove_request(struct request_queue *q,
if (rq->cmd_flags & REQ_META)
bfqq->meta_pending--;
- bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags);
}
static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio)
@@ -1700,6 +1744,7 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq,
bfqq->next_rq = rq;
bfq_remove_request(q, next);
+ bfqg_stats_update_io_remove(bfqq_group(bfqq), next->cmd_flags);
spin_unlock_irq(&bfqq->bfqd->lock);
end:
@@ -2016,10 +2061,27 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq);
bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq);
bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node);
- bic->saved_wr_coeff = bfqq->wr_coeff;
- bic->saved_wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt;
- bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish;
- bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time;
+ if (unlikely(bfq_bfqq_just_created(bfqq) &&
+ !bfq_bfqq_in_large_burst(bfqq))) {
+ /*
+ * bfqq being merged right after being created: bfqq
+ * would have deserved interactive weight raising, but
+ * did not make it to be set in a weight-raised state,
+ * because of this early merge. Store directly the
+ * weight-raising state that would have been assigned
+ * to bfqq, so that to avoid that bfqq unjustly fails
+ * to enjoy weight raising if split soon.
+ */
+ bic->saved_wr_coeff = bfqq->bfqd->bfq_wr_coeff;
+ bic->saved_wr_cur_max_time = bfq_wr_duration(bfqq->bfqd);
+ bic->saved_last_wr_start_finish = jiffies;
+ } else {
+ bic->saved_wr_coeff = bfqq->wr_coeff;
+ bic->saved_wr_start_at_switch_to_srt =
+ bfqq->wr_start_at_switch_to_srt;
+ bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish;
+ bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time;
+ }
}
static void
@@ -2166,7 +2228,6 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
struct bfq_queue *bfqq)
{
if (bfqq) {
- bfqg_stats_update_avg_queue_size(bfqq_group(bfqq));
bfq_clear_bfqq_fifo_expire(bfqq);
bfqd->budgets_assigned = (bfqd->budgets_assigned * 7 + 256) / 8;
@@ -2897,24 +2958,6 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4);
}
-/*
- * Return the farthest future time instant according to jiffies
- * macros.
- */
-static unsigned long bfq_greatest_from_now(void)
-{
- return jiffies + MAX_JIFFY_OFFSET;
-}
-
-/*
- * Return the farthest past time instant according to jiffies
- * macros.
- */
-static unsigned long bfq_smallest_from_now(void)
-{
- return jiffies - MAX_JIFFY_OFFSET;
-}
-
/**
* bfq_bfqq_expire - expire a queue.
* @bfqd: device owning the queue.
@@ -3425,7 +3468,6 @@ check_queue:
*/
bfq_clear_bfqq_wait_request(bfqq);
hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
- bfqg_stats_update_idle_time(bfqq_group(bfqq));
}
goto keep_queue;
}
@@ -3489,11 +3531,7 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_wr_duration(bfqd)))
bfq_bfqq_end_wr(bfqq);
else {
- /* switch back to interactive wr */
- bfqq->wr_coeff = bfqd->bfq_wr_coeff;
- bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
- bfqq->last_wr_start_finish =
- bfqq->wr_start_at_switch_to_srt;
+ switch_back_to_interactive_wr(bfqq, bfqd);
bfqq->entity.prio_changed = 1;
}
}
@@ -3655,12 +3693,67 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
{
struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
struct request *rq;
+#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
+ struct bfq_queue *in_serv_queue, *bfqq;
+ bool waiting_rq, idle_timer_disabled;
+#endif
spin_lock_irq(&bfqd->lock);
+#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
+ in_serv_queue = bfqd->in_service_queue;
+ waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue);
+
rq = __bfq_dispatch_request(hctx);
+
+ idle_timer_disabled =
+ waiting_rq && !bfq_bfqq_wait_request(in_serv_queue);
+
+#else
+ rq = __bfq_dispatch_request(hctx);
+#endif
spin_unlock_irq(&bfqd->lock);
+#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
+ bfqq = rq ? RQ_BFQQ(rq) : NULL;
+ if (!idle_timer_disabled && !bfqq)
+ return rq;
+
+ /*
+ * rq and bfqq are guaranteed to exist until this function
+ * ends, for the following reasons. First, rq can be
+ * dispatched to the device, and then can be completed and
+ * freed, only after this function ends. Second, rq cannot be
+ * merged (and thus freed because of a merge) any longer,
+ * because it has already started. Thus rq cannot be freed
+ * before this function ends, and, since rq has a reference to
+ * bfqq, the same guarantee holds for bfqq too.
+ *
+ * In addition, the following queue lock guarantees that
+ * bfqq_group(bfqq) exists as well.
+ */
+ spin_lock_irq(hctx->queue->queue_lock);
+ if (idle_timer_disabled)
+ /*
+ * Since the idle timer has been disabled,
+ * in_serv_queue contained some request when
+ * __bfq_dispatch_request was invoked above, which
+ * implies that rq was picked exactly from
+ * in_serv_queue. Thus in_serv_queue == bfqq, and is
+ * therefore guaranteed to exist because of the above
+ * arguments.
+ */
+ bfqg_stats_update_idle_time(bfqq_group(in_serv_queue));
+ if (bfqq) {
+ struct bfq_group *bfqg = bfqq_group(bfqq);
+
+ bfqg_stats_update_avg_queue_size(bfqg);
+ bfqg_stats_set_start_empty_time(bfqg);
+ bfqg_stats_update_io_remove(bfqg, rq->cmd_flags);
+ }
+ spin_unlock_irq(hctx->queue->queue_lock);
+#endif
+
return rq;
}
@@ -3685,16 +3778,37 @@ void bfq_put_queue(struct bfq_queue *bfqq)
if (bfqq->ref)
return;
- if (bfq_bfqq_sync(bfqq))
+ if (!hlist_unhashed(&bfqq->burst_list_node)) {
+ hlist_del_init(&bfqq->burst_list_node);
/*
- * The fact that this queue is being destroyed does not
- * invalidate the fact that this queue may have been
- * activated during the current burst. As a consequence,
- * although the queue does not exist anymore, and hence
- * needs to be removed from the burst list if there,
- * the burst size has not to be decremented.
+ * Decrement also burst size after the removal, if the
+ * process associated with bfqq is exiting, and thus
+ * does not contribute to the burst any longer. This
+ * decrement helps filter out false positives of large
+ * bursts, when some short-lived process (often due to
+ * the execution of commands by some service) happens
+ * to start and exit while a complex application is
+ * starting, and thus spawning several processes that
+ * do I/O (and that *must not* be treated as a large
+ * burst, see comments on bfq_handle_burst).
+ *
+ * In particular, the decrement is performed only if:
+ * 1) bfqq is not a merged queue, because, if it is,
+ * then this free of bfqq is not triggered by the exit
+ * of the process bfqq is associated with, but exactly
+ * by the fact that bfqq has just been merged.
+ * 2) burst_size is greater than 0, to handle
+ * unbalanced decrements. Unbalanced decrements may
+ * happen in te following case: bfqq is inserted into
+ * the current burst list--without incrementing
+ * bust_size--because of a split, but the current
+ * burst list is not the burst list bfqq belonged to
+ * (see comments on the case of a split in
+ * bfq_set_request).
*/
- hlist_del_init(&bfqq->burst_list_node);
+ if (bfqq->bic && bfqq->bfqd->burst_size > 0)
+ bfqq->bfqd->burst_size--;
+ }
kmem_cache_free(bfq_pool, bfqq);
#ifdef CONFIG_BFQ_GROUP_IOSCHED
@@ -4097,7 +4211,6 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
*/
bfq_clear_bfqq_wait_request(bfqq);
hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
- bfqg_stats_update_idle_time(bfqq_group(bfqq));
/*
* The queue is not empty, because a new request just
@@ -4112,10 +4225,12 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
}
}
-static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
+/* returns true if it causes the idle timer to be disabled */
+static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
{
struct bfq_queue *bfqq = RQ_BFQQ(rq),
*new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true);
+ bool waiting, idle_timer_disabled = false;
if (new_bfqq) {
if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)
@@ -4127,7 +4242,6 @@ static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
new_bfqq->allocated++;
bfqq->allocated--;
new_bfqq->ref++;
- bfq_clear_bfqq_just_created(bfqq);
/*
* If the bic associated with the process
* issuing this request still points to bfqq
@@ -4139,6 +4253,8 @@ static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq)
bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
bfqq, new_bfqq);
+
+ bfq_clear_bfqq_just_created(bfqq);
/*
* rq is about to be enqueued into new_bfqq,
* release rq reference on bfqq
@@ -4148,12 +4264,16 @@ static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
bfqq = new_bfqq;
}
+ waiting = bfqq && bfq_bfqq_wait_request(bfqq);
bfq_add_request(rq);
+ idle_timer_disabled = waiting && !bfq_bfqq_wait_request(bfqq);
rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
list_add_tail(&rq->queuelist, &bfqq->fifo);
bfq_rq_enqueued(bfqd, bfqq, rq);
+
+ return idle_timer_disabled;
}
static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
@@ -4161,6 +4281,11 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
{
struct request_queue *q = hctx->queue;
struct bfq_data *bfqd = q->elevator->elevator_data;
+#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
+ struct bfq_queue *bfqq = RQ_BFQQ(rq);
+ bool idle_timer_disabled = false;
+ unsigned int cmd_flags;
+#endif
spin_lock_irq(&bfqd->lock);
if (blk_mq_sched_try_insert_merge(q, rq)) {
@@ -4179,7 +4304,17 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
else
list_add_tail(&rq->queuelist, &bfqd->dispatch);
} else {
+#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
+ idle_timer_disabled = __bfq_insert_request(bfqd, rq);
+ /*
+ * Update bfqq, because, if a queue merge has occurred
+ * in __bfq_insert_request, then rq has been
+ * redirected into a new queue.
+ */
+ bfqq = RQ_BFQQ(rq);
+#else
__bfq_insert_request(bfqd, rq);
+#endif
if (rq_mergeable(rq)) {
elv_rqhash_add(q, rq);
@@ -4188,7 +4323,35 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
}
}
+#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
+ /*
+ * Cache cmd_flags before releasing scheduler lock, because rq
+ * may disappear afterwards (for example, because of a request
+ * merge).
+ */
+ cmd_flags = rq->cmd_flags;
+#endif
spin_unlock_irq(&bfqd->lock);
+
+#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
+ if (!bfqq)
+ return;
+ /*
+ * bfqq still exists, because it can disappear only after
+ * either it is merged with another queue, or the process it
+ * is associated with exits. But both actions must be taken by
+ * the same process currently executing this flow of
+ * instruction.
+ *
+ * In addition, the following queue lock guarantees that
+ * bfqq_group(bfqq) exists as well.
+ */
+ spin_lock_irq(q->queue_lock);
+ bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, cmd_flags);
+ if (idle_timer_disabled)
+ bfqg_stats_update_idle_time(bfqq_group(bfqq));
+ spin_unlock_irq(q->queue_lock);
+#endif
}
static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx,
@@ -4365,8 +4528,11 @@ static void bfq_finish_request(struct request *rq)
* lock is held.
*/
- if (!RB_EMPTY_NODE(&rq->rb_node))
+ if (!RB_EMPTY_NODE(&rq->rb_node)) {
bfq_remove_request(rq->q, rq);
+ bfqg_stats_update_io_remove(bfqq_group(bfqq),
+ rq->cmd_flags);
+ }
bfq_put_rq_priv_body(bfqq);
}
@@ -4424,6 +4590,34 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
else {
bfq_clear_bfqq_in_large_burst(bfqq);
if (bic->was_in_burst_list)
+ /*
+ * If bfqq was in the current
+ * burst list before being
+ * merged, then we have to add
+ * it back. And we do not need
+ * to increase burst_size, as
+ * we did not decrement
+ * burst_size when we removed
+ * bfqq from the burst list as
+ * a consequence of a merge
+ * (see comments in
+ * bfq_put_queue). In this
+ * respect, it would be rather
+ * costly to know whether the
+ * current burst list is still
+ * the same burst list from
+ * which bfqq was removed on
+ * the merge. To avoid this
+ * cost, if bfqq was in a
+ * burst list, then we add
+ * bfqq to the current burst
+ * list without any further
+ * check. This can cause
+ * inappropriate insertions,
+ * but rarely enough to not
+ * harm the detection of large
+ * bursts significantly.
+ */
hlist_add_head(&bfqq->burst_list_node,
&bfqd->burst_list);
}
@@ -4775,7 +4969,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
bfq_init_root_group(bfqd->root_group, bfqd);
bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
-
+ wbt_disable_default(q);
return 0;
out_free: