diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-01 10:39:57 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-01 10:39:57 -0700 |
commit | 694752922b12bd318aa80191bd9d8c3dcfb39055 (patch) | |
tree | 5afe83fd99100bea546dd5a1c1f778c58f41e5c0 /block/blk-mq-sched.c | |
parent | a351e9b9fc24e982ec2f0e76379a49826036da12 (diff) | |
parent | 9438b3e080beccf6022138ea62192d55cc7dc4ed (diff) |
Merge branch 'for-4.12/block' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe:
- Add BFQ IO scheduler under the new blk-mq scheduling framework. BFQ
was initially a fork of CFQ, but subsequently changed to implement
fairness based on B-WF2Q+, a modified variant of WF2Q. BFQ is meant
to be used on desktop type single drives, providing good fairness.
From Paolo.
- Add Kyber IO scheduler. This is a full multiqueue aware scheduler,
using a scalable token based algorithm that throttles IO based on
live completion IO stats, similary to blk-wbt. From Omar.
- A series from Jan, moving users to separately allocated backing
devices. This continues the work of separating backing device life
times, solving various problems with hot removal.
- A series of updates for lightnvm, mostly from Javier. Includes a
'pblk' target that exposes an open channel SSD as a physical block
device.
- A series of fixes and improvements for nbd from Josef.
- A series from Omar, removing queue sharing between devices on mostly
legacy drivers. This helps us clean up other bits, if we know that a
queue only has a single device backing. This has been overdue for
more than a decade.
- Fixes for the blk-stats, and improvements to unify the stats and user
windows. This both improves blk-wbt, and enables other users to
register a need to receive IO stats for a device. From Omar.
- blk-throttle improvements from Shaohua. This provides a scalable
framework for implementing scalable priotization - particularly for
blk-mq, but applicable to any type of block device. The interface is
marked experimental for now.
- Bucketized IO stats for IO polling from Stephen Bates. This improves
efficiency of polled workloads in the presence of mixed block size
IO.
- A few fixes for opal, from Scott.
- A few pulls for NVMe, including a lot of fixes for NVMe-over-fabrics.
From a variety of folks, mostly Sagi and James Smart.
- A series from Bart, improving our exposed info and capabilities from
the blk-mq debugfs support.
- A series from Christoph, cleaning up how handle WRITE_ZEROES.
- A series from Christoph, cleaning up the block layer handling of how
we track errors in a request. On top of being a nice cleanup, it also
shrinks the size of struct request a bit.
- Removal of mg_disk and hd (sorry Linus) by Christoph. The former was
never used by platforms, and the latter has outlived it's usefulness.
- Various little bug fixes and cleanups from a wide variety of folks.
* 'for-4.12/block' of git://git.kernel.dk/linux-block: (329 commits)
block: hide badblocks attribute by default
blk-mq: unify hctx delay_work and run_work
block: add kblock_mod_delayed_work_on()
blk-mq: unify hctx delayed_run_work and run_work
nbd: fix use after free on module unload
MAINTAINERS: bfq: Add Paolo as maintainer for the BFQ I/O scheduler
blk-mq-sched: alloate reserved tags out of normal pool
mtip32xx: use runtime tag to initialize command header
scsi: Implement blk_mq_ops.show_rq()
blk-mq: Add blk_mq_ops.show_rq()
blk-mq: Show operation, cmd_flags and rq_flags names
blk-mq: Make blk_flags_show() callers append a newline character
blk-mq: Move the "state" debugfs attribute one level down
blk-mq: Unregister debugfs attributes earlier
blk-mq: Only unregister hctxs for which registration succeeded
blk-mq-debugfs: Rename functions for registering and unregistering the mq directory
blk-mq: Let blk_mq_debugfs_register() look up the queue name
blk-mq: Register <dev>/queue/mq after having registered <dev>/queue
ide-pm: always pass 0 error to ide_complete_rq in ide_do_devset
ide-pm: always pass 0 error to __blk_end_request_all
..
Diffstat (limited to 'block/blk-mq-sched.c')
-rw-r--r-- | block/blk-mq-sched.c | 103 |
1 files changed, 48 insertions, 55 deletions
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index c974a1bbf4cb..8b361e192e8a 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -30,43 +30,6 @@ void blk_mq_sched_free_hctx_data(struct request_queue *q, } EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data); -int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size, - int (*init)(struct blk_mq_hw_ctx *), - void (*exit)(struct blk_mq_hw_ctx *)) -{ - struct blk_mq_hw_ctx *hctx; - int ret; - int i; - - queue_for_each_hw_ctx(q, hctx, i) { - hctx->sched_data = kmalloc_node(size, GFP_KERNEL, hctx->numa_node); - if (!hctx->sched_data) { - ret = -ENOMEM; - goto error; - } - - if (init) { - ret = init(hctx); - if (ret) { - /* - * We don't want to give exit() a partially - * initialized sched_data. init() must clean up - * if it fails. - */ - kfree(hctx->sched_data); - hctx->sched_data = NULL; - goto error; - } - } - } - - return 0; -error: - blk_mq_sched_free_hctx_data(q, exit); - return ret; -} -EXPORT_SYMBOL_GPL(blk_mq_sched_init_hctx_data); - static void __blk_mq_sched_assign_ioc(struct request_queue *q, struct request *rq, struct bio *bio, @@ -119,7 +82,11 @@ struct request *blk_mq_sched_get_request(struct request_queue *q, if (likely(!data->hctx)) data->hctx = blk_mq_map_queue(q, data->ctx->cpu); - if (e) { + /* + * For a reserved tag, allocate a normal request since we might + * have driver dependencies on the value of the internal tag. + */ + if (e && !(data->flags & BLK_MQ_REQ_RESERVED)) { data->flags |= BLK_MQ_REQ_INTERNAL; /* @@ -227,22 +194,6 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) } } -void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx, - struct list_head *rq_list, - struct request *(*get_rq)(struct blk_mq_hw_ctx *)) -{ - do { - struct request *rq; - - rq = get_rq(hctx); - if (!rq) - break; - - list_add_tail(&rq->queuelist, rq_list); - } while (1); -} -EXPORT_SYMBOL_GPL(blk_mq_sched_move_to_dispatch); - bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, struct request **merged_request) { @@ -508,11 +459,24 @@ int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { struct elevator_queue *e = q->elevator; + int ret; if (!e) return 0; - return blk_mq_sched_alloc_tags(q, hctx, hctx_idx); + ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx); + if (ret) + return ret; + + if (e->type->ops.mq.init_hctx) { + ret = e->type->ops.mq.init_hctx(hctx, hctx_idx); + if (ret) { + blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx); + return ret; + } + } + + return 0; } void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, @@ -523,12 +487,18 @@ void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, if (!e) return; + if (e->type->ops.mq.exit_hctx && hctx->sched_data) { + e->type->ops.mq.exit_hctx(hctx, hctx_idx); + hctx->sched_data = NULL; + } + blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx); } int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) { struct blk_mq_hw_ctx *hctx; + struct elevator_queue *eq; unsigned int i; int ret; @@ -553,6 +523,18 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) if (ret) goto err; + if (e->ops.mq.init_hctx) { + queue_for_each_hw_ctx(q, hctx, i) { + ret = e->ops.mq.init_hctx(hctx, i); + if (ret) { + eq = q->elevator; + blk_mq_exit_sched(q, eq); + kobject_put(&eq->kobj); + return ret; + } + } + } + return 0; err: @@ -563,6 +545,17 @@ err: void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) { + struct blk_mq_hw_ctx *hctx; + unsigned int i; + + if (e->type->ops.mq.exit_hctx) { + queue_for_each_hw_ctx(q, hctx, i) { + if (hctx->sched_data) { + e->type->ops.mq.exit_hctx(hctx, i); + hctx->sched_data = NULL; + } + } + } if (e->type->ops.mq.exit_sched) e->type->ops.mq.exit_sched(e); blk_mq_sched_tags_teardown(q); |