summaryrefslogtreecommitdiff
path: root/block/blk-mq-sched.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-01 10:39:57 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-01 10:39:57 -0700
commit694752922b12bd318aa80191bd9d8c3dcfb39055 (patch)
tree5afe83fd99100bea546dd5a1c1f778c58f41e5c0 /block/blk-mq-sched.c
parenta351e9b9fc24e982ec2f0e76379a49826036da12 (diff)
parent9438b3e080beccf6022138ea62192d55cc7dc4ed (diff)
Merge branch 'for-4.12/block' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe: - Add BFQ IO scheduler under the new blk-mq scheduling framework. BFQ was initially a fork of CFQ, but subsequently changed to implement fairness based on B-WF2Q+, a modified variant of WF2Q. BFQ is meant to be used on desktop type single drives, providing good fairness. From Paolo. - Add Kyber IO scheduler. This is a full multiqueue aware scheduler, using a scalable token based algorithm that throttles IO based on live completion IO stats, similary to blk-wbt. From Omar. - A series from Jan, moving users to separately allocated backing devices. This continues the work of separating backing device life times, solving various problems with hot removal. - A series of updates for lightnvm, mostly from Javier. Includes a 'pblk' target that exposes an open channel SSD as a physical block device. - A series of fixes and improvements for nbd from Josef. - A series from Omar, removing queue sharing between devices on mostly legacy drivers. This helps us clean up other bits, if we know that a queue only has a single device backing. This has been overdue for more than a decade. - Fixes for the blk-stats, and improvements to unify the stats and user windows. This both improves blk-wbt, and enables other users to register a need to receive IO stats for a device. From Omar. - blk-throttle improvements from Shaohua. This provides a scalable framework for implementing scalable priotization - particularly for blk-mq, but applicable to any type of block device. The interface is marked experimental for now. - Bucketized IO stats for IO polling from Stephen Bates. This improves efficiency of polled workloads in the presence of mixed block size IO. - A few fixes for opal, from Scott. - A few pulls for NVMe, including a lot of fixes for NVMe-over-fabrics. From a variety of folks, mostly Sagi and James Smart. - A series from Bart, improving our exposed info and capabilities from the blk-mq debugfs support. - A series from Christoph, cleaning up how handle WRITE_ZEROES. - A series from Christoph, cleaning up the block layer handling of how we track errors in a request. On top of being a nice cleanup, it also shrinks the size of struct request a bit. - Removal of mg_disk and hd (sorry Linus) by Christoph. The former was never used by platforms, and the latter has outlived it's usefulness. - Various little bug fixes and cleanups from a wide variety of folks. * 'for-4.12/block' of git://git.kernel.dk/linux-block: (329 commits) block: hide badblocks attribute by default blk-mq: unify hctx delay_work and run_work block: add kblock_mod_delayed_work_on() blk-mq: unify hctx delayed_run_work and run_work nbd: fix use after free on module unload MAINTAINERS: bfq: Add Paolo as maintainer for the BFQ I/O scheduler blk-mq-sched: alloate reserved tags out of normal pool mtip32xx: use runtime tag to initialize command header scsi: Implement blk_mq_ops.show_rq() blk-mq: Add blk_mq_ops.show_rq() blk-mq: Show operation, cmd_flags and rq_flags names blk-mq: Make blk_flags_show() callers append a newline character blk-mq: Move the "state" debugfs attribute one level down blk-mq: Unregister debugfs attributes earlier blk-mq: Only unregister hctxs for which registration succeeded blk-mq-debugfs: Rename functions for registering and unregistering the mq directory blk-mq: Let blk_mq_debugfs_register() look up the queue name blk-mq: Register <dev>/queue/mq after having registered <dev>/queue ide-pm: always pass 0 error to ide_complete_rq in ide_do_devset ide-pm: always pass 0 error to __blk_end_request_all ..
Diffstat (limited to 'block/blk-mq-sched.c')
-rw-r--r--block/blk-mq-sched.c103
1 files changed, 48 insertions, 55 deletions
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index c974a1bbf4cb..8b361e192e8a 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -30,43 +30,6 @@ void blk_mq_sched_free_hctx_data(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
-int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
- int (*init)(struct blk_mq_hw_ctx *),
- void (*exit)(struct blk_mq_hw_ctx *))
-{
- struct blk_mq_hw_ctx *hctx;
- int ret;
- int i;
-
- queue_for_each_hw_ctx(q, hctx, i) {
- hctx->sched_data = kmalloc_node(size, GFP_KERNEL, hctx->numa_node);
- if (!hctx->sched_data) {
- ret = -ENOMEM;
- goto error;
- }
-
- if (init) {
- ret = init(hctx);
- if (ret) {
- /*
- * We don't want to give exit() a partially
- * initialized sched_data. init() must clean up
- * if it fails.
- */
- kfree(hctx->sched_data);
- hctx->sched_data = NULL;
- goto error;
- }
- }
- }
-
- return 0;
-error:
- blk_mq_sched_free_hctx_data(q, exit);
- return ret;
-}
-EXPORT_SYMBOL_GPL(blk_mq_sched_init_hctx_data);
-
static void __blk_mq_sched_assign_ioc(struct request_queue *q,
struct request *rq,
struct bio *bio,
@@ -119,7 +82,11 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
if (likely(!data->hctx))
data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
- if (e) {
+ /*
+ * For a reserved tag, allocate a normal request since we might
+ * have driver dependencies on the value of the internal tag.
+ */
+ if (e && !(data->flags & BLK_MQ_REQ_RESERVED)) {
data->flags |= BLK_MQ_REQ_INTERNAL;
/*
@@ -227,22 +194,6 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
}
}
-void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
- struct list_head *rq_list,
- struct request *(*get_rq)(struct blk_mq_hw_ctx *))
-{
- do {
- struct request *rq;
-
- rq = get_rq(hctx);
- if (!rq)
- break;
-
- list_add_tail(&rq->queuelist, rq_list);
- } while (1);
-}
-EXPORT_SYMBOL_GPL(blk_mq_sched_move_to_dispatch);
-
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
struct request **merged_request)
{
@@ -508,11 +459,24 @@ int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx)
{
struct elevator_queue *e = q->elevator;
+ int ret;
if (!e)
return 0;
- return blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
+ ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
+ if (ret)
+ return ret;
+
+ if (e->type->ops.mq.init_hctx) {
+ ret = e->type->ops.mq.init_hctx(hctx, hctx_idx);
+ if (ret) {
+ blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
+ return ret;
+ }
+ }
+
+ return 0;
}
void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
@@ -523,12 +487,18 @@ void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
if (!e)
return;
+ if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
+ e->type->ops.mq.exit_hctx(hctx, hctx_idx);
+ hctx->sched_data = NULL;
+ }
+
blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
}
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
{
struct blk_mq_hw_ctx *hctx;
+ struct elevator_queue *eq;
unsigned int i;
int ret;
@@ -553,6 +523,18 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
if (ret)
goto err;
+ if (e->ops.mq.init_hctx) {
+ queue_for_each_hw_ctx(q, hctx, i) {
+ ret = e->ops.mq.init_hctx(hctx, i);
+ if (ret) {
+ eq = q->elevator;
+ blk_mq_exit_sched(q, eq);
+ kobject_put(&eq->kobj);
+ return ret;
+ }
+ }
+ }
+
return 0;
err:
@@ -563,6 +545,17 @@ err:
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
{
+ struct blk_mq_hw_ctx *hctx;
+ unsigned int i;
+
+ if (e->type->ops.mq.exit_hctx) {
+ queue_for_each_hw_ctx(q, hctx, i) {
+ if (hctx->sched_data) {
+ e->type->ops.mq.exit_hctx(hctx, i);
+ hctx->sched_data = NULL;
+ }
+ }
+ }
if (e->type->ops.mq.exit_sched)
e->type->ops.mq.exit_sched(e);
blk_mq_sched_tags_teardown(q);