diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/bfq-cgroup.c | 148 | ||||
-rw-r--r-- | block/bfq-iosched.c | 117 | ||||
-rw-r--r-- | block/bfq-iosched.h | 4 | ||||
-rw-r--r-- | block/bfq-wf2q.c | 1 | ||||
-rw-r--r-- | block/bio.c | 195 | ||||
-rw-r--r-- | block/blk-core.c | 14 | ||||
-rw-r--r-- | block/blk-map.c | 7 | ||||
-rw-r--r-- | block/blk-stat.c | 6 | ||||
-rw-r--r-- | block/blk-sysfs.c | 5 | ||||
-rw-r--r-- | block/blk-throttle.c | 9 | ||||
-rw-r--r-- | block/blk-wbt.c | 7 | ||||
-rw-r--r-- | block/genhd.c | 9 |
12 files changed, 294 insertions, 228 deletions
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index ceefb9a706d6..da1525ec4c87 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -24,7 +24,7 @@ #include "bfq-iosched.h" -#ifdef CONFIG_BFQ_GROUP_IOSCHED +#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) /* bfqg stats flags */ enum bfqg_stats_flags { @@ -152,6 +152,57 @@ void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) bfqg_stats_update_group_wait_time(stats); } +void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, + unsigned int op) +{ + blkg_rwstat_add(&bfqg->stats.queued, op, 1); + bfqg_stats_end_empty_time(&bfqg->stats); + if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue)) + bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq)); +} + +void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) +{ + blkg_rwstat_add(&bfqg->stats.queued, op, -1); +} + +void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) +{ + blkg_rwstat_add(&bfqg->stats.merged, op, 1); +} + +void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time, + uint64_t io_start_time, unsigned int op) +{ + struct bfqg_stats *stats = &bfqg->stats; + unsigned long long now = sched_clock(); + + if (time_after64(now, io_start_time)) + blkg_rwstat_add(&stats->service_time, op, + now - io_start_time); + if (time_after64(io_start_time, start_time)) + blkg_rwstat_add(&stats->wait_time, op, + io_start_time - start_time); +} + +#else /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */ + +void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, + unsigned int op) { } +void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { } +void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { } +void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time, + uint64_t io_start_time, unsigned int op) { } +void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { } +void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { } +void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { } +void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { } +void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { } + +#endif /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */ + +#ifdef CONFIG_BFQ_GROUP_IOSCHED + /* * blk-cgroup policy-related handlers * The following functions help in converting between blk-cgroup @@ -229,42 +280,10 @@ void bfqg_and_blkg_put(struct bfq_group *bfqg) blkg_put(bfqg_to_blkg(bfqg)); } -void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, - unsigned int op) -{ - blkg_rwstat_add(&bfqg->stats.queued, op, 1); - bfqg_stats_end_empty_time(&bfqg->stats); - if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue)) - bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq)); -} - -void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) -{ - blkg_rwstat_add(&bfqg->stats.queued, op, -1); -} - -void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) -{ - blkg_rwstat_add(&bfqg->stats.merged, op, 1); -} - -void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time, - uint64_t io_start_time, unsigned int op) -{ - struct bfqg_stats *stats = &bfqg->stats; - unsigned long long now = sched_clock(); - - if (time_after64(now, io_start_time)) - blkg_rwstat_add(&stats->service_time, op, - now - io_start_time); - if (time_after64(io_start_time, start_time)) - blkg_rwstat_add(&stats->wait_time, op, - io_start_time - start_time); -} - /* @stats = 0 */ static void bfqg_stats_reset(struct bfqg_stats *stats) { +#ifdef CONFIG_DEBUG_BLK_CGROUP /* queued stats shouldn't be cleared */ blkg_rwstat_reset(&stats->merged); blkg_rwstat_reset(&stats->service_time); @@ -276,6 +295,7 @@ static void bfqg_stats_reset(struct bfqg_stats *stats) blkg_stat_reset(&stats->group_wait_time); blkg_stat_reset(&stats->idle_time); blkg_stat_reset(&stats->empty_time); +#endif } /* @to += @from */ @@ -284,6 +304,7 @@ static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from) if (!to || !from) return; +#ifdef CONFIG_DEBUG_BLK_CGROUP /* queued stats shouldn't be cleared */ blkg_rwstat_add_aux(&to->merged, &from->merged); blkg_rwstat_add_aux(&to->service_time, &from->service_time); @@ -296,6 +317,7 @@ static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from) blkg_stat_add_aux(&to->group_wait_time, &from->group_wait_time); blkg_stat_add_aux(&to->idle_time, &from->idle_time); blkg_stat_add_aux(&to->empty_time, &from->empty_time); +#endif } /* @@ -342,6 +364,7 @@ void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg) static void bfqg_stats_exit(struct bfqg_stats *stats) { +#ifdef CONFIG_DEBUG_BLK_CGROUP blkg_rwstat_exit(&stats->merged); blkg_rwstat_exit(&stats->service_time); blkg_rwstat_exit(&stats->wait_time); @@ -353,10 +376,12 @@ static void bfqg_stats_exit(struct bfqg_stats *stats) blkg_stat_exit(&stats->group_wait_time); blkg_stat_exit(&stats->idle_time); blkg_stat_exit(&stats->empty_time); +#endif } static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp) { +#ifdef CONFIG_DEBUG_BLK_CGROUP if (blkg_rwstat_init(&stats->merged, gfp) || blkg_rwstat_init(&stats->service_time, gfp) || blkg_rwstat_init(&stats->wait_time, gfp) || @@ -371,6 +396,7 @@ static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp) bfqg_stats_exit(stats); return -ENOMEM; } +#endif return 0; } @@ -887,6 +913,7 @@ static ssize_t bfq_io_set_weight(struct kernfs_open_file *of, return bfq_io_set_weight_legacy(of_css(of), NULL, weight); } +#ifdef CONFIG_DEBUG_BLK_CGROUP static int bfqg_print_stat(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat, @@ -991,6 +1018,7 @@ static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v) 0, false); return 0; } +#endif /* CONFIG_DEBUG_BLK_CGROUP */ struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) { @@ -1029,15 +1057,6 @@ struct cftype bfq_blkcg_legacy_files[] = { /* statistics, covers only the tasks in the bfqg */ { - .name = "bfq.time", - .private = offsetof(struct bfq_group, stats.time), - .seq_show = bfqg_print_stat, - }, - { - .name = "bfq.sectors", - .seq_show = bfqg_print_stat_sectors, - }, - { .name = "bfq.io_service_bytes", .private = (unsigned long)&blkcg_policy_bfq, .seq_show = blkg_print_stat_bytes, @@ -1047,6 +1066,16 @@ struct cftype bfq_blkcg_legacy_files[] = { .private = (unsigned long)&blkcg_policy_bfq, .seq_show = blkg_print_stat_ios, }, +#ifdef CONFIG_DEBUG_BLK_CGROUP + { + .name = "bfq.time", + .private = offsetof(struct bfq_group, stats.time), + .seq_show = bfqg_print_stat, + }, + { + .name = "bfq.sectors", + .seq_show = bfqg_print_stat_sectors, + }, { .name = "bfq.io_service_time", .private = offsetof(struct bfq_group, stats.service_time), @@ -1067,18 +1096,10 @@ struct cftype bfq_blkcg_legacy_files[] = { .private = offsetof(struct bfq_group, stats.queued), .seq_show = bfqg_print_rwstat, }, +#endif /* CONFIG_DEBUG_BLK_CGROUP */ /* the same statictics which cover the bfqg and its descendants */ { - .name = "bfq.time_recursive", - .private = offsetof(struct bfq_group, stats.time), - .seq_show = bfqg_print_stat_recursive, - }, - { - .name = "bfq.sectors_recursive", - .seq_show = bfqg_print_stat_sectors_recursive, - }, - { .name = "bfq.io_service_bytes_recursive", .private = (unsigned long)&blkcg_policy_bfq, .seq_show = blkg_print_stat_bytes_recursive, @@ -1088,6 +1109,16 @@ struct cftype bfq_blkcg_legacy_files[] = { .private = (unsigned long)&blkcg_policy_bfq, .seq_show = blkg_print_stat_ios_recursive, }, +#ifdef CONFIG_DEBUG_BLK_CGROUP + { + .name = "bfq.time_recursive", + .private = offsetof(struct bfq_group, stats.time), + .seq_show = bfqg_print_stat_recursive, + }, + { + .name = "bfq.sectors_recursive", + .seq_show = bfqg_print_stat_sectors_recursive, + }, { .name = "bfq.io_service_time_recursive", .private = offsetof(struct bfq_group, stats.service_time), @@ -1132,6 +1163,7 @@ struct cftype bfq_blkcg_legacy_files[] = { .private = offsetof(struct bfq_group, stats.dequeue), .seq_show = bfqg_print_stat, }, +#endif /* CONFIG_DEBUG_BLK_CGROUP */ { } /* terminate */ }; @@ -1147,18 +1179,6 @@ struct cftype bfq_blkg_files[] = { #else /* CONFIG_BFQ_GROUP_IOSCHED */ -void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, - unsigned int op) { } -void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { } -void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { } -void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time, - uint64_t io_start_time, unsigned int op) { } -void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { } -void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { } -void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { } -void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { } -void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { } - void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_group *bfqg) {} diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 889a8549d97f..bcb6d21baf12 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -1359,7 +1359,6 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, bfqq->ttime.last_end_request + bfqd->bfq_slice_idle * 3; - bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq, rq->cmd_flags); /* * bfqq deserves to be weight-raised if: @@ -1633,7 +1632,6 @@ static void bfq_remove_request(struct request_queue *q, if (rq->cmd_flags & REQ_META) bfqq->meta_pending--; - bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags); } static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) @@ -1746,6 +1744,7 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq, bfqq->next_rq = rq; bfq_remove_request(q, next); + bfqg_stats_update_io_remove(bfqq_group(bfqq), next->cmd_flags); spin_unlock_irq(&bfqq->bfqd->lock); end: @@ -2229,7 +2228,6 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd, struct bfq_queue *bfqq) { if (bfqq) { - bfqg_stats_update_avg_queue_size(bfqq_group(bfqq)); bfq_clear_bfqq_fifo_expire(bfqq); bfqd->budgets_assigned = (bfqd->budgets_assigned * 7 + 256) / 8; @@ -3470,7 +3468,6 @@ check_queue: */ bfq_clear_bfqq_wait_request(bfqq); hrtimer_try_to_cancel(&bfqd->idle_slice_timer); - bfqg_stats_update_idle_time(bfqq_group(bfqq)); } goto keep_queue; } @@ -3696,12 +3693,67 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) { struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; struct request *rq; +#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) + struct bfq_queue *in_serv_queue, *bfqq; + bool waiting_rq, idle_timer_disabled; +#endif spin_lock_irq(&bfqd->lock); +#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) + in_serv_queue = bfqd->in_service_queue; + waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue); + + rq = __bfq_dispatch_request(hctx); + + idle_timer_disabled = + waiting_rq && !bfq_bfqq_wait_request(in_serv_queue); + +#else rq = __bfq_dispatch_request(hctx); +#endif spin_unlock_irq(&bfqd->lock); +#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) + bfqq = rq ? RQ_BFQQ(rq) : NULL; + if (!idle_timer_disabled && !bfqq) + return rq; + + /* + * rq and bfqq are guaranteed to exist until this function + * ends, for the following reasons. First, rq can be + * dispatched to the device, and then can be completed and + * freed, only after this function ends. Second, rq cannot be + * merged (and thus freed because of a merge) any longer, + * because it has already started. Thus rq cannot be freed + * before this function ends, and, since rq has a reference to + * bfqq, the same guarantee holds for bfqq too. + * + * In addition, the following queue lock guarantees that + * bfqq_group(bfqq) exists as well. + */ + spin_lock_irq(hctx->queue->queue_lock); + if (idle_timer_disabled) + /* + * Since the idle timer has been disabled, + * in_serv_queue contained some request when + * __bfq_dispatch_request was invoked above, which + * implies that rq was picked exactly from + * in_serv_queue. Thus in_serv_queue == bfqq, and is + * therefore guaranteed to exist because of the above + * arguments. + */ + bfqg_stats_update_idle_time(bfqq_group(in_serv_queue)); + if (bfqq) { + struct bfq_group *bfqg = bfqq_group(bfqq); + + bfqg_stats_update_avg_queue_size(bfqg); + bfqg_stats_set_start_empty_time(bfqg); + bfqg_stats_update_io_remove(bfqg, rq->cmd_flags); + } + spin_unlock_irq(hctx->queue->queue_lock); +#endif + return rq; } @@ -4159,7 +4211,6 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, */ bfq_clear_bfqq_wait_request(bfqq); hrtimer_try_to_cancel(&bfqd->idle_slice_timer); - bfqg_stats_update_idle_time(bfqq_group(bfqq)); /* * The queue is not empty, because a new request just @@ -4174,10 +4225,12 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, } } -static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) +/* returns true if it causes the idle timer to be disabled */ +static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) { struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true); + bool waiting, idle_timer_disabled = false; if (new_bfqq) { if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq) @@ -4211,12 +4264,16 @@ static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) bfqq = new_bfqq; } + waiting = bfqq && bfq_bfqq_wait_request(bfqq); bfq_add_request(rq); + idle_timer_disabled = waiting && !bfq_bfqq_wait_request(bfqq); rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)]; list_add_tail(&rq->queuelist, &bfqq->fifo); bfq_rq_enqueued(bfqd, bfqq, rq); + + return idle_timer_disabled; } static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, @@ -4224,6 +4281,11 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, { struct request_queue *q = hctx->queue; struct bfq_data *bfqd = q->elevator->elevator_data; +#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) + struct bfq_queue *bfqq = RQ_BFQQ(rq); + bool idle_timer_disabled = false; + unsigned int cmd_flags; +#endif spin_lock_irq(&bfqd->lock); if (blk_mq_sched_try_insert_merge(q, rq)) { @@ -4242,7 +4304,17 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, else list_add_tail(&rq->queuelist, &bfqd->dispatch); } else { +#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) + idle_timer_disabled = __bfq_insert_request(bfqd, rq); + /* + * Update bfqq, because, if a queue merge has occurred + * in __bfq_insert_request, then rq has been + * redirected into a new queue. + */ + bfqq = RQ_BFQQ(rq); +#else __bfq_insert_request(bfqd, rq); +#endif if (rq_mergeable(rq)) { elv_rqhash_add(q, rq); @@ -4251,7 +4323,35 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, } } +#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) + /* + * Cache cmd_flags before releasing scheduler lock, because rq + * may disappear afterwards (for example, because of a request + * merge). + */ + cmd_flags = rq->cmd_flags; +#endif spin_unlock_irq(&bfqd->lock); + +#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) + if (!bfqq) + return; + /* + * bfqq still exists, because it can disappear only after + * either it is merged with another queue, or the process it + * is associated with exits. But both actions must be taken by + * the same process currently executing this flow of + * instruction. + * + * In addition, the following queue lock guarantees that + * bfqq_group(bfqq) exists as well. + */ + spin_lock_irq(q->queue_lock); + bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, cmd_flags); + if (idle_timer_disabled) + bfqg_stats_update_idle_time(bfqq_group(bfqq)); + spin_unlock_irq(q->queue_lock); +#endif } static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx, @@ -4428,8 +4528,11 @@ static void bfq_finish_request(struct request *rq) * lock is held. */ - if (!RB_EMPTY_NODE(&rq->rb_node)) + if (!RB_EMPTY_NODE(&rq->rb_node)) { bfq_remove_request(rq->q, rq); + bfqg_stats_update_io_remove(bfqq_group(bfqq), + rq->cmd_flags); + } bfq_put_rq_priv_body(bfqq); } diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index ac0809c72c98..91c4390903a1 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -689,7 +689,7 @@ enum bfqq_expiration { }; struct bfqg_stats { -#ifdef CONFIG_BFQ_GROUP_IOSCHED +#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) /* number of ios merged */ struct blkg_rwstat merged; /* total time spent on device in ns, may not be accurate w/ queueing */ @@ -717,7 +717,7 @@ struct bfqg_stats { uint64_t start_idle_time; uint64_t start_empty_time; uint16_t flags; -#endif /* CONFIG_BFQ_GROUP_IOSCHED */ +#endif /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */ }; #ifdef CONFIG_BFQ_GROUP_IOSCHED diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 414ba686a847..e495d3f9b4b0 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -843,7 +843,6 @@ void bfq_bfqq_served(struct bfq_queue *bfqq, int served) st->vtime += bfq_delta(served, st->wsum); bfq_forget_idle(st); } - bfqg_stats_set_start_empty_time(bfqq_group(bfqq)); bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs", served); } diff --git a/block/bio.c b/block/bio.c index b94a802f8ba3..8bfdea58159b 100644 --- a/block/bio.c +++ b/block/bio.c @@ -597,6 +597,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) * so we don't set nor calculate new physical/hw segment counts here */ bio->bi_disk = bio_src->bi_disk; + bio->bi_partno = bio_src->bi_partno; bio_set_flag(bio, BIO_CLONED); bio->bi_opf = bio_src->bi_opf; bio->bi_write_hint = bio_src->bi_write_hint; @@ -1061,14 +1062,21 @@ struct bio_map_data { struct iovec iov[]; }; -static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count, +static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, gfp_t gfp_mask) { - if (iov_count > UIO_MAXIOV) + struct bio_map_data *bmd; + if (data->nr_segs > UIO_MAXIOV) return NULL; - return kmalloc(sizeof(struct bio_map_data) + - sizeof(struct iovec) * iov_count, gfp_mask); + bmd = kmalloc(sizeof(struct bio_map_data) + + sizeof(struct iovec) * data->nr_segs, gfp_mask); + if (!bmd) + return NULL; + memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs); + bmd->iter = *data; + bmd->iter.iov = bmd->iov; + return bmd; } /** @@ -1079,7 +1087,7 @@ static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count, * Copy all pages from iov_iter to bio. * Returns 0 on success, or error on failure. */ -static int bio_copy_from_iter(struct bio *bio, struct iov_iter iter) +static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) { int i; struct bio_vec *bvec; @@ -1090,9 +1098,9 @@ static int bio_copy_from_iter(struct bio *bio, struct iov_iter iter) ret = copy_page_from_iter(bvec->bv_page, bvec->bv_offset, bvec->bv_len, - &iter); + iter); - if (!iov_iter_count(&iter)) + if (!iov_iter_count(iter)) break; if (ret < bvec->bv_len) @@ -1186,40 +1194,18 @@ int bio_uncopy_user(struct bio *bio) */ struct bio *bio_copy_user_iov(struct request_queue *q, struct rq_map_data *map_data, - const struct iov_iter *iter, + struct iov_iter *iter, gfp_t gfp_mask) { struct bio_map_data *bmd; struct page *page; struct bio *bio; - int i, ret; - int nr_pages = 0; + int i = 0, ret; + int nr_pages; unsigned int len = iter->count; unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; - for (i = 0; i < iter->nr_segs; i++) { - unsigned long uaddr; - unsigned long end; - unsigned long start; - - uaddr = (unsigned long) iter->iov[i].iov_base; - end = (uaddr + iter->iov[i].iov_len + PAGE_SIZE - 1) - >> PAGE_SHIFT; - start = uaddr >> PAGE_SHIFT; - - /* - * Overflow, abort - */ - if (end < start) - return ERR_PTR(-EINVAL); - - nr_pages += end - start; - } - - if (offset) - nr_pages++; - - bmd = bio_alloc_map_data(iter->nr_segs, gfp_mask); + bmd = bio_alloc_map_data(iter, gfp_mask); if (!bmd) return ERR_PTR(-ENOMEM); @@ -1229,9 +1215,10 @@ struct bio *bio_copy_user_iov(struct request_queue *q, * shortlived one. */ bmd->is_our_pages = map_data ? 0 : 1; - memcpy(bmd->iov, iter->iov, sizeof(struct iovec) * iter->nr_segs); - bmd->iter = *iter; - bmd->iter.iov = bmd->iov; + + nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); + if (nr_pages > BIO_MAX_PAGES) + nr_pages = BIO_MAX_PAGES; ret = -ENOMEM; bio = bio_kmalloc(gfp_mask, nr_pages); @@ -1280,17 +1267,24 @@ struct bio *bio_copy_user_iov(struct request_queue *q, if (ret) goto cleanup; + if (map_data) + map_data->offset += bio->bi_iter.bi_size; + /* * success */ if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) || (map_data && map_data->from_user)) { - ret = bio_copy_from_iter(bio, *iter); + ret = bio_copy_from_iter(bio, iter); if (ret) goto cleanup; + } else { + iov_iter_advance(iter, bio->bi_iter.bi_size); } bio->bi_private = bmd; + if (map_data && map_data->null_mapped) + bio_set_flag(bio, BIO_NULL_MAPPED); return bio; cleanup: if (!map_data) @@ -1311,111 +1305,74 @@ out_bmd: * device. Returns an error pointer in case of error. */ struct bio *bio_map_user_iov(struct request_queue *q, - const struct iov_iter *iter, + struct iov_iter *iter, gfp_t gfp_mask) { int j; - int nr_pages = 0; - struct page **pages; struct bio *bio; - int cur_page = 0; - int ret, offset; - struct iov_iter i; - struct iovec iov; + int ret; struct bio_vec *bvec; - iov_for_each(iov, i, *iter) { - unsigned long uaddr = (unsigned long) iov.iov_base; - unsigned long len = iov.iov_len; - unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = uaddr >> PAGE_SHIFT; - - /* - * Overflow, abort - */ - if (end < start) - return ERR_PTR(-EINVAL); - - nr_pages += end - start; - /* - * buffer must be aligned to at least logical block size for now - */ - if (uaddr & queue_dma_alignment(q)) - return ERR_PTR(-EINVAL); - } - - if (!nr_pages) + if (!iov_iter_count(iter)) return ERR_PTR(-EINVAL); - bio = bio_kmalloc(gfp_mask, nr_pages); + bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES)); if (!bio) return ERR_PTR(-ENOMEM); - ret = -ENOMEM; - pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask); - if (!pages) - goto out; + while (iov_iter_count(iter)) { + struct page **pages; + ssize_t bytes; + size_t offs, added = 0; + int npages; - iov_for_each(iov, i, *iter) { - unsigned long uaddr = (unsigned long) iov.iov_base; - unsigned long len = iov.iov_len; - unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = uaddr >> PAGE_SHIFT; - const int local_nr_pages = end - start; - const int page_limit = cur_page + local_nr_pages; - - ret = get_user_pages_fast(uaddr, local_nr_pages, - (iter->type & WRITE) != WRITE, - &pages[cur_page]); - if (unlikely(ret < local_nr_pages)) { - for (j = cur_page; j < page_limit; j++) { - if (!pages[j]) - break; - put_page(pages[j]); - } - ret = -EFAULT; + bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs); + if (unlikely(bytes <= 0)) { + ret = bytes ? bytes : -EFAULT; goto out_unmap; } - offset = offset_in_page(uaddr); - for (j = cur_page; j < page_limit; j++) { - unsigned int bytes = PAGE_SIZE - offset; - unsigned short prev_bi_vcnt = bio->bi_vcnt; + npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); - if (len <= 0) - break; - - if (bytes > len) - bytes = len; - - /* - * sorry... - */ - if (bio_add_pc_page(q, bio, pages[j], bytes, offset) < - bytes) - break; + if (unlikely(offs & queue_dma_alignment(q))) { + ret = -EINVAL; + j = 0; + } else { + for (j = 0; j < npages; j++) { + struct page *page = pages[j]; + unsigned int n = PAGE_SIZE - offs; + unsigned short prev_bi_vcnt = bio->bi_vcnt; - /* - * check if vector was merged with previous - * drop page reference if needed - */ - if (bio->bi_vcnt == prev_bi_vcnt) - put_page(pages[j]); + if (n > bytes) + n = bytes; - len -= bytes; - offset = 0; - } + if (!bio_add_pc_page(q, bio, page, n, offs)) + break; - cur_page = j; + /* + * check if vector was merged with previous + * drop page reference if needed + */ + if (bio->bi_vcnt == prev_bi_vcnt) + put_page(page); + + added += n; + bytes -= n; + offs = 0; + } + iov_iter_advance(iter, added); + } /* * release the pages we didn't map into the bio, if any */ - while (j < page_limit) + while (j < npages) put_page(pages[j++]); + kvfree(pages); + /* couldn't stuff something into bio? */ + if (bytes) + break; } - kfree(pages); - bio_set_flag(bio, BIO_USER_MAPPED); /* @@ -1431,8 +1388,6 @@ struct bio *bio_map_user_iov(struct request_queue *q, bio_for_each_segment_all(bvec, bio, j) { put_page(bvec->bv_page); } - out: - kfree(pages); bio_put(bio); return ERR_PTR(ret); } @@ -1864,7 +1819,7 @@ EXPORT_SYMBOL(bio_endio); struct bio *bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs) { - struct bio *split = NULL; + struct bio *split; BUG_ON(sectors <= 0); BUG_ON(sectors >= bio_sectors(bio)); diff --git a/block/blk-core.c b/block/blk-core.c index 7c54c195e79e..b8881750a3ac 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -637,8 +637,8 @@ void blk_set_queue_dying(struct request_queue *q) spin_lock_irq(q->queue_lock); blk_queue_for_each_rl(rl, q) { if (rl->rq_pool) { - wake_up(&rl->wait[BLK_RW_SYNC]); - wake_up(&rl->wait[BLK_RW_ASYNC]); + wake_up_all(&rl->wait[BLK_RW_SYNC]); + wake_up_all(&rl->wait[BLK_RW_ASYNC]); } } spin_unlock_irq(q->queue_lock); @@ -863,9 +863,9 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref) wake_up_all(&q->mq_freeze_wq); } -static void blk_rq_timed_out_timer(unsigned long data) +static void blk_rq_timed_out_timer(struct timer_list *t) { - struct request_queue *q = (struct request_queue *)data; + struct request_queue *q = from_timer(q, t, timeout); kblockd_schedule_work(&q->timeout_work); } @@ -901,9 +901,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) q->backing_dev_info->name = "block"; q->node = node_id; - setup_timer(&q->backing_dev_info->laptop_mode_wb_timer, - laptop_mode_timer_fn, (unsigned long) q); - setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); + timer_setup(&q->backing_dev_info->laptop_mode_wb_timer, + laptop_mode_timer_fn, 0); + timer_setup(&q->timeout, blk_rq_timed_out_timer, 0); INIT_WORK(&q->timeout_work, NULL); INIT_LIST_HEAD(&q->queue_head); INIT_LIST_HEAD(&q->timeout_list); diff --git a/block/blk-map.c b/block/blk-map.c index d5251edcc0dd..b21f8e86f120 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -67,13 +67,6 @@ static int __blk_rq_map_user_iov(struct request *rq, bio->bi_opf &= ~REQ_OP_MASK; bio->bi_opf |= req_op(rq); - if (map_data && map_data->null_mapped) - bio_set_flag(bio, BIO_NULL_MAPPED); - - iov_iter_advance(iter, bio->bi_iter.bi_size); - if (map_data) - map_data->offset += bio->bi_iter.bi_size; - orig_bio = bio; /* diff --git a/block/blk-stat.c b/block/blk-stat.c index 3a2f3c96f367..28003bf9941c 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -79,9 +79,9 @@ void blk_stat_add(struct request *rq) rcu_read_unlock(); } -static void blk_stat_timer_fn(unsigned long data) +static void blk_stat_timer_fn(struct timer_list *t) { - struct blk_stat_callback *cb = (void *)data; + struct blk_stat_callback *cb = from_timer(cb, t, timer); unsigned int bucket; int cpu; @@ -130,7 +130,7 @@ blk_stat_alloc_callback(void (*timer_fn)(struct blk_stat_callback *), cb->bucket_fn = bucket_fn; cb->data = data; cb->buckets = buckets; - setup_timer(&cb->timer, blk_stat_timer_fn, (unsigned long)cb); + timer_setup(&cb->timer, blk_stat_timer_fn, 0); return cb; } diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e54be402899d..870484eaed1f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -450,12 +450,9 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, ret = wbt_init(q); if (ret) return ret; - - rwb = q->rq_wb; - if (!rwb) - return -EINVAL; } + rwb = q->rq_wb; if (val == -1) rwb->min_lat_nsec = wbt_default_latency_nsec(q); else if (val >= 0) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 96ad32623427..825bc29767e6 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -225,7 +225,7 @@ struct throtl_data bool track_bio_latency; }; -static void throtl_pending_timer_fn(unsigned long arg); +static void throtl_pending_timer_fn(struct timer_list *t); static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd) { @@ -478,8 +478,7 @@ static void throtl_service_queue_init(struct throtl_service_queue *sq) INIT_LIST_HEAD(&sq->queued[0]); INIT_LIST_HEAD(&sq->queued[1]); sq->pending_tree = RB_ROOT; - setup_timer(&sq->pending_timer, throtl_pending_timer_fn, - (unsigned long)sq); + timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0); } static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node) @@ -1249,9 +1248,9 @@ static bool throtl_can_upgrade(struct throtl_data *td, * the top-level service_tree is reached, throtl_data->dispatch_work is * kicked so that the ready bio's are issued. */ -static void throtl_pending_timer_fn(unsigned long arg) +static void throtl_pending_timer_fn(struct timer_list *t) { - struct throtl_service_queue *sq = (void *)arg; + struct throtl_service_queue *sq = from_timer(sq, t, pending_timer); struct throtl_grp *tg = sq_to_tg(sq); struct throtl_data *td = sq_to_td(sq); struct request_queue *q = td->queue; diff --git a/block/blk-wbt.c b/block/blk-wbt.c index b252da0e4c11..ae8de9780085 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -178,12 +178,11 @@ void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat) if (wbt_is_read(stat)) wb_timestamp(rwb, &rwb->last_comp); - wbt_clear_state(stat); } else { WARN_ON_ONCE(stat == rwb->sync_cookie); __wbt_done(rwb, wbt_stat_to_mask(stat)); - wbt_clear_state(stat); } + wbt_clear_state(stat); } /* @@ -482,7 +481,7 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) /* * At this point we know it's a buffered write. If this is - * kswapd trying to free memory, or REQ_SYNC is set, set, then + * kswapd trying to free memory, or REQ_SYNC is set, then * it's WB_SYNC_ALL writeback, and we'll use the max limit for * that. If the write is marked as a background write, then use * the idle limit, or go to normal if we haven't had competing @@ -723,8 +722,6 @@ int wbt_init(struct request_queue *q) init_waitqueue_head(&rwb->rq_wait[i].wait); } - rwb->wc = 1; - rwb->queue_depth = RWB_DEF_DEPTH; rwb->last_comp = rwb->last_issue = jiffies; rwb->queue = q; rwb->win_nsec = RWB_WINDOW_NSEC; diff --git a/block/genhd.c b/block/genhd.c index c2223f12a805..96a66f671720 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -671,10 +671,13 @@ void device_add_disk(struct device *parent, struct gendisk *disk) disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; disk->flags |= GENHD_FL_NO_PART_SCAN; } else { + int ret; + /* Register BDI before referencing it from bdev */ disk_to_dev(disk)->devt = devt; - bdi_register_owner(disk->queue->backing_dev_info, - disk_to_dev(disk)); + ret = bdi_register_owner(disk->queue->backing_dev_info, + disk_to_dev(disk)); + WARN_ON(ret); blk_register_region(disk_devt(disk), disk->minors, NULL, exact_match, exact_lock, disk); } @@ -1389,7 +1392,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) if (minors > DISK_MAX_PARTS) { printk(KERN_ERR - "block: can't allocated more than %d partitions\n", + "block: can't allocate more than %d partitions\n", DISK_MAX_PARTS); minors = DISK_MAX_PARTS; } |