diff options
Diffstat (limited to 'block/bfq-cgroup.c')
| -rw-r--r-- | block/bfq-cgroup.c | 229 |
1 files changed, 91 insertions, 138 deletions
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 144bca006463..9fb9f3533150 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -127,7 +127,7 @@ static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats) if (!bfqg_stats_waiting(stats)) return; - now = ktime_get_ns(); + now = blk_time_get_ns(); if (now > stats->start_group_wait_time) bfq_stat_add(&stats->group_wait_time, now - stats->start_group_wait_time); @@ -144,7 +144,7 @@ static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg, return; if (bfqg == curr_bfqg) return; - stats->start_group_wait_time = ktime_get_ns(); + stats->start_group_wait_time = blk_time_get_ns(); bfqg_stats_mark_waiting(stats); } @@ -156,7 +156,7 @@ static void bfqg_stats_end_empty_time(struct bfqg_stats *stats) if (!bfqg_stats_empty(stats)) return; - now = ktime_get_ns(); + now = blk_time_get_ns(); if (now > stats->start_empty_time) bfq_stat_add(&stats->empty_time, now - stats->start_empty_time); @@ -183,7 +183,7 @@ void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) if (bfqg_stats_empty(stats)) return; - stats->start_empty_time = ktime_get_ns(); + stats->start_empty_time = blk_time_get_ns(); bfqg_stats_mark_empty(stats); } @@ -192,7 +192,7 @@ void bfqg_stats_update_idle_time(struct bfq_group *bfqg) struct bfqg_stats *stats = &bfqg->stats; if (bfqg_stats_idling(stats)) { - u64 now = ktime_get_ns(); + u64 now = blk_time_get_ns(); if (now > stats->start_idle_time) bfq_stat_add(&stats->idle_time, @@ -205,7 +205,7 @@ void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { struct bfqg_stats *stats = &bfqg->stats; - stats->start_idle_time = ktime_get_ns(); + stats->start_idle_time = blk_time_get_ns(); bfqg_stats_mark_idling(stats); } @@ -224,7 +224,7 @@ void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, { blkg_rwstat_add(&bfqg->stats.queued, opf, 1); bfqg_stats_end_empty_time(&bfqg->stats); - if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue)) + if (!(bfqq == bfqg->bfqd->in_service_queue)) bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq)); } @@ -242,7 +242,7 @@ void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, u64 io_start_time_ns, blk_opf_t opf) { struct bfqg_stats *stats = &bfqg->stats; - u64 now = ktime_get_ns(); + u64 now = blk_time_get_ns(); if (now > io_start_time_ns) blkg_rwstat_add(&stats->service_time, opf, @@ -316,14 +316,12 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq) static void bfqg_get(struct bfq_group *bfqg) { - bfqg->ref++; + refcount_inc(&bfqg->ref); } static void bfqg_put(struct bfq_group *bfqg) { - bfqg->ref--; - - if (bfqg->ref == 0) + if (refcount_dec_and_test(&bfqg->ref)) kfree(bfqg); } @@ -499,15 +497,9 @@ static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp) bgd = kzalloc(sizeof(*bgd), gfp); if (!bgd) return NULL; - return &bgd->pd; -} - -static void bfq_cpd_init(struct blkcg_policy_data *cpd) -{ - struct bfq_group_data *d = cpd_to_bfqgd(cpd); - d->weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ? - CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL; + bgd->weight = CGROUP_WEIGHT_DFL; + return &bgd->pd; } static void bfq_cpd_free(struct blkcg_policy_data *cpd) @@ -515,12 +507,12 @@ static void bfq_cpd_free(struct blkcg_policy_data *cpd) kfree(cpd_to_bfqgd(cpd)); } -static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q, - struct blkcg *blkcg) +static struct blkg_policy_data *bfq_pd_alloc(struct gendisk *disk, + struct blkcg *blkcg, gfp_t gfp) { struct bfq_group *bfqg; - bfqg = kzalloc_node(sizeof(*bfqg), gfp, q->node); + bfqg = kzalloc_node(sizeof(*bfqg), gfp, disk->node_id); if (!bfqg) return NULL; @@ -530,7 +522,7 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q, } /* see comments in bfq_bic_update_cgroup for why refcounting */ - bfqg_get(bfqg); + refcount_set(&bfqg->ref, 1); return &bfqg->pd; } @@ -552,7 +544,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd) */ bfqg->bfqd = bfqd; bfqg->active_entities = 0; - bfqg->online = true; + bfqg->num_queues_with_pending_reqs = 0; bfqg->rq_pos_tree = RB_ROOT; } @@ -610,8 +602,12 @@ struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) struct bfq_group *bfqg; while (blkg) { + if (!blkg->online) { + blkg = blkg->parent; + continue; + } bfqg = blkg_to_bfqg(blkg); - if (bfqg->online) { + if (bfqg->pd.online) { bio_associate_blkg_from_css(bio, &blkg->blkcg->css); return bfqg; } @@ -641,6 +637,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_entity *entity = &bfqq->entity; struct bfq_group *old_parent = bfqq_group(bfqq); + bool has_pending_reqs = false; /* * No point to move bfqq to the same group, which can happen when @@ -661,6 +658,11 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, */ bfqq->ref++; + if (entity->in_groups_with_pending_reqs) { + has_pending_reqs = true; + bfq_del_bfqq_in_groups_with_pending_reqs(bfqq); + } + /* If bfqq is empty, then bfq_bfqq_expire also invokes * bfq_del_bfqq_busy, thereby removing bfqq and its entity * from data structures related to current group. Otherwise we @@ -677,29 +679,67 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); bfqg_and_blkg_put(old_parent); - if (entity->parent && - entity->parent->last_bfqq_created == bfqq) - entity->parent->last_bfqq_created = NULL; - else if (bfqd->last_bfqq_created == bfqq) - bfqd->last_bfqq_created = NULL; - + bfq_reassign_last_bfqq(bfqq, NULL); entity->parent = bfqg->my_entity; entity->sched_data = &bfqg->sched_data; /* pin down bfqg and its associated blkg */ bfqg_and_blkg_get(bfqg); + if (has_pending_reqs) + bfq_add_bfqq_in_groups_with_pending_reqs(bfqq); + if (bfq_bfqq_busy(bfqq)) { if (unlikely(!bfqd->nonrot_with_queueing)) bfq_pos_tree_add_move(bfqd, bfqq); bfq_activate_bfqq(bfqd, bfqq); } - if (!bfqd->in_service_queue && !bfqd->rq_in_driver) + if (!bfqd->in_service_queue && !bfqd->tot_rq_in_driver) bfq_schedule_dispatch(bfqd); /* release extra ref taken above, bfqq may happen to be freed now */ bfq_put_queue(bfqq); } +static void bfq_sync_bfqq_move(struct bfq_data *bfqd, + struct bfq_queue *sync_bfqq, + struct bfq_io_cq *bic, + struct bfq_group *bfqg, + unsigned int act_idx) +{ + struct bfq_queue *bfqq; + + if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) { + /* We are the only user of this bfqq, just move it */ + if (sync_bfqq->entity.sched_data != &bfqg->sched_data) + bfq_bfqq_move(bfqd, sync_bfqq, bfqg); + return; + } + + /* + * The queue was merged to a different queue. Check + * that the merge chain still belongs to the same + * cgroup. + */ + for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq) + if (bfqq->entity.sched_data != &bfqg->sched_data) + break; + if (bfqq) { + /* + * Some queue changed cgroup so the merge is not valid + * anymore. We cannot easily just cancel the merge (by + * clearing new_bfqq) as there may be other processes + * using this queue and holding refs to all queues + * below sync_bfqq->new_bfqq. Similarly if the merge + * already happened, we need to detach from bfqq now + * so that we cannot merge bio to a request from the + * old cgroup. + */ + bfq_put_cooperator(sync_bfqq); + bic_set_bfqq(bic, NULL, true, act_idx); + bfq_release_process_ref(bfqd, sync_bfqq); + } +} + /** * __bfq_bic_change_cgroup - move @bic to @bfqg. * @bfqd: the queue descriptor. @@ -710,60 +750,25 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, * sure that the reference to cgroup is valid across the call (see * comments in bfq_bic_update_cgroup on this issue) */ -static void *__bfq_bic_change_cgroup(struct bfq_data *bfqd, - struct bfq_io_cq *bic, - struct bfq_group *bfqg) +static void __bfq_bic_change_cgroup(struct bfq_data *bfqd, + struct bfq_io_cq *bic, + struct bfq_group *bfqg) { - struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0); - struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1); - struct bfq_entity *entity; + unsigned int act_idx; - if (async_bfqq) { - entity = &async_bfqq->entity; + for (act_idx = 0; act_idx < bfqd->num_actuators; act_idx++) { + struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false, act_idx); + struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true, act_idx); - if (entity->sched_data != &bfqg->sched_data) { - bic_set_bfqq(bic, NULL, 0); + if (async_bfqq && + async_bfqq->entity.sched_data != &bfqg->sched_data) { + bic_set_bfqq(bic, NULL, false, act_idx); bfq_release_process_ref(bfqd, async_bfqq); } - } - if (sync_bfqq) { - if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) { - /* We are the only user of this bfqq, just move it */ - if (sync_bfqq->entity.sched_data != &bfqg->sched_data) - bfq_bfqq_move(bfqd, sync_bfqq, bfqg); - } else { - struct bfq_queue *bfqq; - - /* - * The queue was merged to a different queue. Check - * that the merge chain still belongs to the same - * cgroup. - */ - for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq) - if (bfqq->entity.sched_data != - &bfqg->sched_data) - break; - if (bfqq) { - /* - * Some queue changed cgroup so the merge is - * not valid anymore. We cannot easily just - * cancel the merge (by clearing new_bfqq) as - * there may be other processes using this - * queue and holding refs to all queues below - * sync_bfqq->new_bfqq. Similarly if the merge - * already happened, we need to detach from - * bfqq now so that we cannot merge bio to a - * request from the old cgroup. - */ - bfq_put_cooperator(sync_bfqq); - bfq_release_process_ref(bfqd, sync_bfqq); - bic_set_bfqq(bic, NULL, 1); - } - } + if (sync_bfqq) + bfq_sync_bfqq_move(bfqd, sync_bfqq, bic, bfqg, act_idx); } - - return bfqg; } void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) @@ -787,57 +792,6 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) */ bfq_link_bfqg(bfqd, bfqg); __bfq_bic_change_cgroup(bfqd, bic, bfqg); - /* - * Update blkg_path for bfq_log_* functions. We cache this - * path, and update it here, for the following - * reasons. Operations on blkg objects in blk-cgroup are - * protected with the request_queue lock, and not with the - * lock that protects the instances of this scheduler - * (bfqd->lock). This exposes BFQ to the following sort of - * race. - * - * The blkg_lookup performed in bfq_get_queue, protected - * through rcu, may happen to return the address of a copy of - * the original blkg. If this is the case, then the - * bfqg_and_blkg_get performed in bfq_get_queue, to pin down - * the blkg, is useless: it does not prevent blk-cgroup code - * from destroying both the original blkg and all objects - * directly or indirectly referred by the copy of the - * blkg. - * - * On the bright side, destroy operations on a blkg invoke, as - * a first step, hooks of the scheduler associated with the - * blkg. And these hooks are executed with bfqd->lock held for - * BFQ. As a consequence, for any blkg associated with the - * request queue this instance of the scheduler is attached - * to, we are guaranteed that such a blkg is not destroyed, and - * that all the pointers it contains are consistent, while we - * are holding bfqd->lock. A blkg_lookup performed with - * bfqd->lock held then returns a fully consistent blkg, which - * remains consistent until this lock is held. - * - * Thanks to the last fact, and to the fact that: (1) bfqg has - * been obtained through a blkg_lookup in the above - * assignment, and (2) bfqd->lock is being held, here we can - * safely use the policy data for the involved blkg (i.e., the - * field bfqg->pd) to get to the blkg associated with bfqg, - * and then we can safely use any field of blkg. After we - * release bfqd->lock, even just getting blkg through this - * bfqg may cause dangling references to be traversed, as - * bfqg->pd may not exist any more. - * - * In view of the above facts, here we cache, in the bfqg, any - * blkg data we may need for this bic, and for its associated - * bfq_queue. As of now, we need to cache only the path of the - * blkg, which is used in the bfq_log_* functions. - * - * Finally, note that bfqg itself needs to be protected from - * destruction on the blkg_free of the original blkg (which - * invokes bfq_pd_free). We use an additional private - * refcounter for bfqg, to let it disappear only after no - * bfq_queue refers to it any longer. - */ - blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path)); bic->blkcg_serial_nr = serial_nr; } @@ -968,7 +922,6 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) put_async_queues: bfq_put_async_queues(bfqd, bfqg); - bfqg->online = false; spin_unlock_irqrestore(&bfqd->lock, flags); /* @@ -1096,9 +1049,11 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of, struct bfq_group *bfqg; u64 v; - ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, buf, &ctx); + blkg_conf_init(&ctx, buf); + + ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, &ctx); if (ret) - return ret; + goto out; if (sscanf(ctx.body, "%llu", &v) == 1) { /* require "default" on dfl */ @@ -1120,7 +1075,7 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of, ret = 0; } out: - blkg_conf_finish(&ctx); + blkg_conf_exit(&ctx); return ret ?: nbytes; } @@ -1274,7 +1229,7 @@ struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) { int ret; - ret = blkcg_activate_policy(bfqd->queue, &blkcg_policy_bfq); + ret = blkcg_activate_policy(bfqd->queue->disk, &blkcg_policy_bfq); if (ret) return NULL; @@ -1286,8 +1241,6 @@ struct blkcg_policy blkcg_policy_bfq = { .legacy_cftypes = bfq_blkcg_legacy_files, .cpd_alloc_fn = bfq_cpd_alloc, - .cpd_init_fn = bfq_cpd_init, - .cpd_bind_fn = bfq_cpd_init, .cpd_free_fn = bfq_cpd_free, .pd_alloc_fn = bfq_pd_alloc, |
