diff options
Diffstat (limited to 'block/bfq-iosched.c')
-rw-r--r-- | block/bfq-iosched.c | 342 |
1 files changed, 268 insertions, 74 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index a4783da90ba8..bcb6d21baf12 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -108,6 +108,7 @@ #include "blk-mq-tag.h" #include "blk-mq-sched.h" #include "bfq-iosched.h" +#include "blk-wbt.h" #define BFQ_BFQQ_FNS(name) \ void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \ @@ -724,6 +725,44 @@ static void bfq_updated_next_req(struct bfq_data *bfqd, } } +static unsigned int bfq_wr_duration(struct bfq_data *bfqd) +{ + u64 dur; + + if (bfqd->bfq_wr_max_time > 0) + return bfqd->bfq_wr_max_time; + + dur = bfqd->RT_prod; + do_div(dur, bfqd->peak_rate); + + /* + * Limit duration between 3 and 13 seconds. Tests show that + * higher values than 13 seconds often yield the opposite of + * the desired result, i.e., worsen responsiveness by letting + * non-interactive and non-soft-real-time applications + * preserve weight raising for a too long time interval. + * + * On the other end, lower values than 3 seconds make it + * difficult for most interactive tasks to complete their jobs + * before weight-raising finishes. + */ + if (dur > msecs_to_jiffies(13000)) + dur = msecs_to_jiffies(13000); + else if (dur < msecs_to_jiffies(3000)) + dur = msecs_to_jiffies(3000); + + return dur; +} + +/* switch back from soft real-time to interactive weight raising */ +static void switch_back_to_interactive_wr(struct bfq_queue *bfqq, + struct bfq_data *bfqd) +{ + bfqq->wr_coeff = bfqd->bfq_wr_coeff; + bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); + bfqq->last_wr_start_finish = bfqq->wr_start_at_switch_to_srt; +} + static void bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, struct bfq_io_cq *bic, bool bfq_already_existing) @@ -750,10 +789,16 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, if (bfqq->wr_coeff > 1 && (bfq_bfqq_in_large_burst(bfqq) || time_is_before_jiffies(bfqq->last_wr_start_finish + bfqq->wr_cur_max_time))) { - bfq_log_bfqq(bfqq->bfqd, bfqq, - "resume state: switching off wr"); - - bfqq->wr_coeff = 1; + if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time && + !bfq_bfqq_in_large_burst(bfqq) && + time_is_after_eq_jiffies(bfqq->wr_start_at_switch_to_srt + + bfq_wr_duration(bfqd))) { + switch_back_to_interactive_wr(bfqq, bfqd); + } else { + bfqq->wr_coeff = 1; + bfq_log_bfqq(bfqq->bfqd, bfqq, + "resume state: switching off wr"); + } } /* make sure weight will be updated, however we got here */ @@ -1173,33 +1218,22 @@ static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd, return wr_or_deserves_wr; } -static unsigned int bfq_wr_duration(struct bfq_data *bfqd) +/* + * Return the farthest future time instant according to jiffies + * macros. + */ +static unsigned long bfq_greatest_from_now(void) { - u64 dur; - - if (bfqd->bfq_wr_max_time > 0) - return bfqd->bfq_wr_max_time; - - dur = bfqd->RT_prod; - do_div(dur, bfqd->peak_rate); - - /* - * Limit duration between 3 and 13 seconds. Tests show that - * higher values than 13 seconds often yield the opposite of - * the desired result, i.e., worsen responsiveness by letting - * non-interactive and non-soft-real-time applications - * preserve weight raising for a too long time interval. - * - * On the other end, lower values than 3 seconds make it - * difficult for most interactive tasks to complete their jobs - * before weight-raising finishes. - */ - if (dur > msecs_to_jiffies(13000)) - dur = msecs_to_jiffies(13000); - else if (dur < msecs_to_jiffies(3000)) - dur = msecs_to_jiffies(3000); + return jiffies + MAX_JIFFY_OFFSET; +} - return dur; +/* + * Return the farthest past time instant according to jiffies + * macros. + */ +static unsigned long bfq_smallest_from_now(void) +{ + return jiffies - MAX_JIFFY_OFFSET; } static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd, @@ -1216,7 +1250,19 @@ static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd, bfqq->wr_coeff = bfqd->bfq_wr_coeff; bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); } else { - bfqq->wr_start_at_switch_to_srt = jiffies; + /* + * No interactive weight raising in progress + * here: assign minus infinity to + * wr_start_at_switch_to_srt, to make sure + * that, at the end of the soft-real-time + * weight raising periods that is starting + * now, no interactive weight-raising period + * may be wrongly considered as still in + * progress (and thus actually started by + * mistake). + */ + bfqq->wr_start_at_switch_to_srt = + bfq_smallest_from_now(); bfqq->wr_coeff = bfqd->bfq_wr_coeff * BFQ_SOFTRT_WEIGHT_FACTOR; bfqq->wr_cur_max_time = @@ -1313,7 +1359,6 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, bfqq->ttime.last_end_request + bfqd->bfq_slice_idle * 3; - bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq, rq->cmd_flags); /* * bfqq deserves to be weight-raised if: @@ -1587,7 +1632,6 @@ static void bfq_remove_request(struct request_queue *q, if (rq->cmd_flags & REQ_META) bfqq->meta_pending--; - bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags); } static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) @@ -1700,6 +1744,7 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq, bfqq->next_rq = rq; bfq_remove_request(q, next); + bfqg_stats_update_io_remove(bfqq_group(bfqq), next->cmd_flags); spin_unlock_irq(&bfqq->bfqd->lock); end: @@ -2016,10 +2061,27 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node); - bic->saved_wr_coeff = bfqq->wr_coeff; - bic->saved_wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt; - bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish; - bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time; + if (unlikely(bfq_bfqq_just_created(bfqq) && + !bfq_bfqq_in_large_burst(bfqq))) { + /* + * bfqq being merged right after being created: bfqq + * would have deserved interactive weight raising, but + * did not make it to be set in a weight-raised state, + * because of this early merge. Store directly the + * weight-raising state that would have been assigned + * to bfqq, so that to avoid that bfqq unjustly fails + * to enjoy weight raising if split soon. + */ + bic->saved_wr_coeff = bfqq->bfqd->bfq_wr_coeff; + bic->saved_wr_cur_max_time = bfq_wr_duration(bfqq->bfqd); + bic->saved_last_wr_start_finish = jiffies; + } else { + bic->saved_wr_coeff = bfqq->wr_coeff; + bic->saved_wr_start_at_switch_to_srt = + bfqq->wr_start_at_switch_to_srt; + bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish; + bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time; + } } static void @@ -2166,7 +2228,6 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd, struct bfq_queue *bfqq) { if (bfqq) { - bfqg_stats_update_avg_queue_size(bfqq_group(bfqq)); bfq_clear_bfqq_fifo_expire(bfqq); bfqd->budgets_assigned = (bfqd->budgets_assigned * 7 + 256) / 8; @@ -2897,24 +2958,6 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); } -/* - * Return the farthest future time instant according to jiffies - * macros. - */ -static unsigned long bfq_greatest_from_now(void) -{ - return jiffies + MAX_JIFFY_OFFSET; -} - -/* - * Return the farthest past time instant according to jiffies - * macros. - */ -static unsigned long bfq_smallest_from_now(void) -{ - return jiffies - MAX_JIFFY_OFFSET; -} - /** * bfq_bfqq_expire - expire a queue. * @bfqd: device owning the queue. @@ -3425,7 +3468,6 @@ check_queue: */ bfq_clear_bfqq_wait_request(bfqq); hrtimer_try_to_cancel(&bfqd->idle_slice_timer); - bfqg_stats_update_idle_time(bfqq_group(bfqq)); } goto keep_queue; } @@ -3489,11 +3531,7 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_wr_duration(bfqd))) bfq_bfqq_end_wr(bfqq); else { - /* switch back to interactive wr */ - bfqq->wr_coeff = bfqd->bfq_wr_coeff; - bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); - bfqq->last_wr_start_finish = - bfqq->wr_start_at_switch_to_srt; + switch_back_to_interactive_wr(bfqq, bfqd); bfqq->entity.prio_changed = 1; } } @@ -3655,12 +3693,67 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) { struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; struct request *rq; +#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) + struct bfq_queue *in_serv_queue, *bfqq; + bool waiting_rq, idle_timer_disabled; +#endif spin_lock_irq(&bfqd->lock); +#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) + in_serv_queue = bfqd->in_service_queue; + waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue); + rq = __bfq_dispatch_request(hctx); + + idle_timer_disabled = + waiting_rq && !bfq_bfqq_wait_request(in_serv_queue); + +#else + rq = __bfq_dispatch_request(hctx); +#endif spin_unlock_irq(&bfqd->lock); +#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) + bfqq = rq ? RQ_BFQQ(rq) : NULL; + if (!idle_timer_disabled && !bfqq) + return rq; + + /* + * rq and bfqq are guaranteed to exist until this function + * ends, for the following reasons. First, rq can be + * dispatched to the device, and then can be completed and + * freed, only after this function ends. Second, rq cannot be + * merged (and thus freed because of a merge) any longer, + * because it has already started. Thus rq cannot be freed + * before this function ends, and, since rq has a reference to + * bfqq, the same guarantee holds for bfqq too. + * + * In addition, the following queue lock guarantees that + * bfqq_group(bfqq) exists as well. + */ + spin_lock_irq(hctx->queue->queue_lock); + if (idle_timer_disabled) + /* + * Since the idle timer has been disabled, + * in_serv_queue contained some request when + * __bfq_dispatch_request was invoked above, which + * implies that rq was picked exactly from + * in_serv_queue. Thus in_serv_queue == bfqq, and is + * therefore guaranteed to exist because of the above + * arguments. + */ + bfqg_stats_update_idle_time(bfqq_group(in_serv_queue)); + if (bfqq) { + struct bfq_group *bfqg = bfqq_group(bfqq); + + bfqg_stats_update_avg_queue_size(bfqg); + bfqg_stats_set_start_empty_time(bfqg); + bfqg_stats_update_io_remove(bfqg, rq->cmd_flags); + } + spin_unlock_irq(hctx->queue->queue_lock); +#endif + return rq; } @@ -3685,16 +3778,37 @@ void bfq_put_queue(struct bfq_queue *bfqq) if (bfqq->ref) return; - if (bfq_bfqq_sync(bfqq)) + if (!hlist_unhashed(&bfqq->burst_list_node)) { + hlist_del_init(&bfqq->burst_list_node); /* - * The fact that this queue is being destroyed does not - * invalidate the fact that this queue may have been - * activated during the current burst. As a consequence, - * although the queue does not exist anymore, and hence - * needs to be removed from the burst list if there, - * the burst size has not to be decremented. + * Decrement also burst size after the removal, if the + * process associated with bfqq is exiting, and thus + * does not contribute to the burst any longer. This + * decrement helps filter out false positives of large + * bursts, when some short-lived process (often due to + * the execution of commands by some service) happens + * to start and exit while a complex application is + * starting, and thus spawning several processes that + * do I/O (and that *must not* be treated as a large + * burst, see comments on bfq_handle_burst). + * + * In particular, the decrement is performed only if: + * 1) bfqq is not a merged queue, because, if it is, + * then this free of bfqq is not triggered by the exit + * of the process bfqq is associated with, but exactly + * by the fact that bfqq has just been merged. + * 2) burst_size is greater than 0, to handle + * unbalanced decrements. Unbalanced decrements may + * happen in te following case: bfqq is inserted into + * the current burst list--without incrementing + * bust_size--because of a split, but the current + * burst list is not the burst list bfqq belonged to + * (see comments on the case of a split in + * bfq_set_request). */ - hlist_del_init(&bfqq->burst_list_node); + if (bfqq->bic && bfqq->bfqd->burst_size > 0) + bfqq->bfqd->burst_size--; + } kmem_cache_free(bfq_pool, bfqq); #ifdef CONFIG_BFQ_GROUP_IOSCHED @@ -4097,7 +4211,6 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, */ bfq_clear_bfqq_wait_request(bfqq); hrtimer_try_to_cancel(&bfqd->idle_slice_timer); - bfqg_stats_update_idle_time(bfqq_group(bfqq)); /* * The queue is not empty, because a new request just @@ -4112,10 +4225,12 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, } } -static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) +/* returns true if it causes the idle timer to be disabled */ +static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) { struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true); + bool waiting, idle_timer_disabled = false; if (new_bfqq) { if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq) @@ -4127,7 +4242,6 @@ static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) new_bfqq->allocated++; bfqq->allocated--; new_bfqq->ref++; - bfq_clear_bfqq_just_created(bfqq); /* * If the bic associated with the process * issuing this request still points to bfqq @@ -4139,6 +4253,8 @@ static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq) bfq_merge_bfqqs(bfqd, RQ_BIC(rq), bfqq, new_bfqq); + + bfq_clear_bfqq_just_created(bfqq); /* * rq is about to be enqueued into new_bfqq, * release rq reference on bfqq @@ -4148,12 +4264,16 @@ static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) bfqq = new_bfqq; } + waiting = bfqq && bfq_bfqq_wait_request(bfqq); bfq_add_request(rq); + idle_timer_disabled = waiting && !bfq_bfqq_wait_request(bfqq); rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)]; list_add_tail(&rq->queuelist, &bfqq->fifo); bfq_rq_enqueued(bfqd, bfqq, rq); + + return idle_timer_disabled; } static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, @@ -4161,6 +4281,11 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, { struct request_queue *q = hctx->queue; struct bfq_data *bfqd = q->elevator->elevator_data; +#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) + struct bfq_queue *bfqq = RQ_BFQQ(rq); + bool idle_timer_disabled = false; + unsigned int cmd_flags; +#endif spin_lock_irq(&bfqd->lock); if (blk_mq_sched_try_insert_merge(q, rq)) { @@ -4179,7 +4304,17 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, else list_add_tail(&rq->queuelist, &bfqd->dispatch); } else { +#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) + idle_timer_disabled = __bfq_insert_request(bfqd, rq); + /* + * Update bfqq, because, if a queue merge has occurred + * in __bfq_insert_request, then rq has been + * redirected into a new queue. + */ + bfqq = RQ_BFQQ(rq); +#else __bfq_insert_request(bfqd, rq); +#endif if (rq_mergeable(rq)) { elv_rqhash_add(q, rq); @@ -4188,7 +4323,35 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, } } +#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) + /* + * Cache cmd_flags before releasing scheduler lock, because rq + * may disappear afterwards (for example, because of a request + * merge). + */ + cmd_flags = rq->cmd_flags; +#endif spin_unlock_irq(&bfqd->lock); + +#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) + if (!bfqq) + return; + /* + * bfqq still exists, because it can disappear only after + * either it is merged with another queue, or the process it + * is associated with exits. But both actions must be taken by + * the same process currently executing this flow of + * instruction. + * + * In addition, the following queue lock guarantees that + * bfqq_group(bfqq) exists as well. + */ + spin_lock_irq(q->queue_lock); + bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, cmd_flags); + if (idle_timer_disabled) + bfqg_stats_update_idle_time(bfqq_group(bfqq)); + spin_unlock_irq(q->queue_lock); +#endif } static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx, @@ -4365,8 +4528,11 @@ static void bfq_finish_request(struct request *rq) * lock is held. */ - if (!RB_EMPTY_NODE(&rq->rb_node)) + if (!RB_EMPTY_NODE(&rq->rb_node)) { bfq_remove_request(rq->q, rq); + bfqg_stats_update_io_remove(bfqq_group(bfqq), + rq->cmd_flags); + } bfq_put_rq_priv_body(bfqq); } @@ -4424,6 +4590,34 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd, else { bfq_clear_bfqq_in_large_burst(bfqq); if (bic->was_in_burst_list) + /* + * If bfqq was in the current + * burst list before being + * merged, then we have to add + * it back. And we do not need + * to increase burst_size, as + * we did not decrement + * burst_size when we removed + * bfqq from the burst list as + * a consequence of a merge + * (see comments in + * bfq_put_queue). In this + * respect, it would be rather + * costly to know whether the + * current burst list is still + * the same burst list from + * which bfqq was removed on + * the merge. To avoid this + * cost, if bfqq was in a + * burst list, then we add + * bfqq to the current burst + * list without any further + * check. This can cause + * inappropriate insertions, + * but rarely enough to not + * harm the detection of large + * bursts significantly. + */ hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list); } @@ -4775,7 +4969,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) bfq_init_root_group(bfqd->root_group, bfqd); bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group); - + wbt_disable_default(q); return 0; out_free: |