summaryrefslogtreecommitdiff
path: root/block/elevator.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/elevator.c')
-rw-r--r--block/elevator.c400
1 files changed, 219 insertions, 181 deletions
diff --git a/block/elevator.c b/block/elevator.c
index 5ff093cb3cf8..ab22542e6cf0 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -45,6 +45,17 @@
#include "blk-wbt.h"
#include "blk-cgroup.h"
+/* Holding context data for changing elevator */
+struct elv_change_ctx {
+ const char *name;
+ bool no_uevent;
+
+ /* for unregistering old elevator */
+ struct elevator_queue *old;
+ /* for registering new elevator */
+ struct elevator_queue *new;
+};
+
static DEFINE_SPINLOCK(elv_list_lock);
static LIST_HEAD(elv_list);
@@ -83,13 +94,6 @@ bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
}
EXPORT_SYMBOL(elv_bio_merge_ok);
-static inline bool elv_support_features(struct request_queue *q,
- const struct elevator_type *e)
-{
- return (q->required_elevator_features & e->elevator_features) ==
- q->required_elevator_features;
-}
-
/**
* elevator_match - Check whether @e's name or alias matches @name
* @e: Scheduler to test
@@ -113,14 +117,13 @@ static struct elevator_type *__elevator_find(const char *name)
return NULL;
}
-static struct elevator_type *elevator_find_get(struct request_queue *q,
- const char *name)
+static struct elevator_type *elevator_find_get(const char *name)
{
struct elevator_type *e;
spin_lock(&elv_list_lock);
e = __elevator_find(name);
- if (e && (!elv_support_features(q, e) || !elevator_tryget(e)))
+ if (e && (!elevator_tryget(e)))
e = NULL;
spin_unlock(&elv_list_lock);
return e;
@@ -156,18 +159,18 @@ static void elevator_release(struct kobject *kobj)
kfree(e);
}
-void elevator_exit(struct request_queue *q)
+static void elevator_exit(struct request_queue *q)
{
struct elevator_queue *e = q->elevator;
+ lockdep_assert_held(&q->elevator_lock);
+
ioc_clear_queue(q);
blk_mq_sched_free_rqs(q);
mutex_lock(&e->sysfs_lock);
blk_mq_exit_sched(q, e);
mutex_unlock(&e->sysfs_lock);
-
- kobject_put(&e->kobj);
}
static inline void __elv_rqhash_del(struct request *rq)
@@ -413,21 +416,22 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
return NULL;
}
-#define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
+#define to_elv(atr) container_of_const((atr), struct elv_fs_entry, attr)
static ssize_t
elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
{
- struct elv_fs_entry *entry = to_elv(attr);
+ const struct elv_fs_entry *entry = to_elv(attr);
struct elevator_queue *e;
- ssize_t error;
+ ssize_t error = -ENODEV;
if (!entry->show)
return -EIO;
e = container_of(kobj, struct elevator_queue, kobj);
mutex_lock(&e->sysfs_lock);
- error = e->type ? entry->show(e, page) : -ENOENT;
+ if (!test_bit(ELEVATOR_FLAG_DYING, &e->flags))
+ error = entry->show(e, page);
mutex_unlock(&e->sysfs_lock);
return error;
}
@@ -436,16 +440,17 @@ static ssize_t
elv_attr_store(struct kobject *kobj, struct attribute *attr,
const char *page, size_t length)
{
- struct elv_fs_entry *entry = to_elv(attr);
+ const struct elv_fs_entry *entry = to_elv(attr);
struct elevator_queue *e;
- ssize_t error;
+ ssize_t error = -ENODEV;
if (!entry->store)
return -EIO;
e = container_of(kobj, struct elevator_queue, kobj);
mutex_lock(&e->sysfs_lock);
- error = e->type ? entry->store(e, page, length) : -ENOENT;
+ if (!test_bit(ELEVATOR_FLAG_DYING, &e->flags))
+ error = entry->store(e, page, length);
mutex_unlock(&e->sysfs_lock);
return error;
}
@@ -460,16 +465,15 @@ static const struct kobj_type elv_ktype = {
.release = elevator_release,
};
-int elv_register_queue(struct request_queue *q, bool uevent)
+static int elv_register_queue(struct request_queue *q,
+ struct elevator_queue *e,
+ bool uevent)
{
- struct elevator_queue *e = q->elevator;
int error;
- lockdep_assert_held(&q->sysfs_lock);
-
error = kobject_add(&e->kobj, &q->disk->queue_kobj, "iosched");
if (!error) {
- struct elv_fs_entry *attr = e->type->elevator_attrs;
+ const struct elv_fs_entry *attr = e->type->elevator_attrs;
if (attr) {
while (attr->attr.name) {
if (sysfs_create_file(&e->kobj, &attr->attr))
@@ -480,20 +484,25 @@ int elv_register_queue(struct request_queue *q, bool uevent)
if (uevent)
kobject_uevent(&e->kobj, KOBJ_ADD);
+ /*
+ * Sched is initialized, it is ready to export it via
+ * debugfs
+ */
+ blk_mq_sched_reg_debugfs(q);
set_bit(ELEVATOR_FLAG_REGISTERED, &e->flags);
}
return error;
}
-void elv_unregister_queue(struct request_queue *q)
+static void elv_unregister_queue(struct request_queue *q,
+ struct elevator_queue *e)
{
- struct elevator_queue *e = q->elevator;
-
- lockdep_assert_held(&q->sysfs_lock);
-
if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) {
kobject_uevent(&e->kobj, KOBJ_REMOVE);
kobject_del(&e->kobj);
+
+ /* unexport via debugfs before exiting sched */
+ blk_mq_sched_unreg_debugfs(q);
}
}
@@ -555,238 +564,267 @@ void elv_unregister(struct elevator_type *e)
}
EXPORT_SYMBOL_GPL(elv_unregister);
-static inline bool elv_support_iosched(struct request_queue *q)
-{
- if (!queue_is_mq(q) ||
- (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED)))
- return false;
- return true;
-}
-
/*
- * For single queue devices, default to using mq-deadline. If we have multiple
- * queues or mq-deadline is not available, default to "none".
+ * Switch to new_e io scheduler.
+ *
+ * If switching fails, we are most likely running out of memory and not able
+ * to restore the old io scheduler, so leaving the io scheduler being none.
*/
-static struct elevator_type *elevator_get_default(struct request_queue *q)
+static int elevator_switch(struct request_queue *q, struct elv_change_ctx *ctx)
{
- if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
- return NULL;
+ struct elevator_type *new_e = NULL;
+ int ret = 0;
- if (q->nr_hw_queues != 1 &&
- !blk_mq_is_shared_tags(q->tag_set->flags))
- return NULL;
+ WARN_ON_ONCE(q->mq_freeze_depth == 0);
+ lockdep_assert_held(&q->elevator_lock);
- return elevator_find_get(q, "mq-deadline");
-}
+ if (strncmp(ctx->name, "none", 4)) {
+ new_e = elevator_find_get(ctx->name);
+ if (!new_e)
+ return -EINVAL;
+ }
-/*
- * Get the first elevator providing the features required by the request queue.
- * Default to "none" if no matching elevator is found.
- */
-static struct elevator_type *elevator_get_by_features(struct request_queue *q)
-{
- struct elevator_type *e, *found = NULL;
+ blk_mq_quiesce_queue(q);
- spin_lock(&elv_list_lock);
+ if (q->elevator) {
+ ctx->old = q->elevator;
+ elevator_exit(q);
+ }
- list_for_each_entry(e, &elv_list, list) {
- if (elv_support_features(q, e)) {
- found = e;
- break;
- }
+ if (new_e) {
+ ret = blk_mq_init_sched(q, new_e);
+ if (ret)
+ goto out_unfreeze;
+ ctx->new = q->elevator;
+ } else {
+ blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
+ q->elevator = NULL;
+ q->nr_requests = q->tag_set->queue_depth;
}
+ blk_add_trace_msg(q, "elv switch: %s", ctx->name);
- if (found && !elevator_tryget(found))
- found = NULL;
+out_unfreeze:
+ blk_mq_unquiesce_queue(q);
- spin_unlock(&elv_list_lock);
- return found;
+ if (ret) {
+ pr_warn("elv: switch to \"%s\" failed, falling back to \"none\"\n",
+ new_e->elevator_name);
+ }
+
+ if (new_e)
+ elevator_put(new_e);
+ return ret;
}
-/*
- * For a device queue that has no required features, use the default elevator
- * settings. Otherwise, use the first elevator available matching the required
- * features. If no suitable elevator is find or if the chosen elevator
- * initialization fails, fall back to the "none" elevator (no elevator).
- */
-void elevator_init_mq(struct request_queue *q)
+static void elv_exit_and_release(struct request_queue *q)
{
- struct elevator_type *e;
- int err;
+ struct elevator_queue *e;
+ unsigned memflags;
- if (!elv_support_iosched(q))
- return;
+ memflags = blk_mq_freeze_queue(q);
+ mutex_lock(&q->elevator_lock);
+ e = q->elevator;
+ elevator_exit(q);
+ mutex_unlock(&q->elevator_lock);
+ blk_mq_unfreeze_queue(q, memflags);
+ if (e)
+ kobject_put(&e->kobj);
+}
- WARN_ON_ONCE(blk_queue_registered(q));
+static int elevator_change_done(struct request_queue *q,
+ struct elv_change_ctx *ctx)
+{
+ int ret = 0;
- if (unlikely(q->elevator))
- return;
+ if (ctx->old) {
+ bool enable_wbt = test_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT,
+ &ctx->old->flags);
- if (!q->required_elevator_features)
- e = elevator_get_default(q);
- else
- e = elevator_get_by_features(q);
- if (!e)
- return;
+ elv_unregister_queue(q, ctx->old);
+ kobject_put(&ctx->old->kobj);
+ if (enable_wbt)
+ wbt_enable_default(q->disk);
+ }
+ if (ctx->new) {
+ ret = elv_register_queue(q, ctx->new, !ctx->no_uevent);
+ if (ret)
+ elv_exit_and_release(q);
+ }
+ return ret;
+}
+
+/*
+ * Switch this queue to the given IO scheduler.
+ */
+static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
+{
+ unsigned int memflags;
+ int ret = 0;
+
+ lockdep_assert_held(&q->tag_set->update_nr_hwq_lock);
+ memflags = blk_mq_freeze_queue(q);
/*
- * We are called before adding disk, when there isn't any FS I/O,
+ * May be called before adding disk, when there isn't any FS I/O,
* so freezing queue plus canceling dispatch work is enough to
* drain any dispatch activities originated from passthrough
* requests, then no need to quiesce queue which may add long boot
* latency, especially when lots of disks are involved.
+ *
+ * Disk isn't added yet, so verifying queue lock only manually.
*/
- blk_mq_freeze_queue(q);
blk_mq_cancel_work_sync(q);
+ mutex_lock(&q->elevator_lock);
+ if (!(q->elevator && elevator_match(q->elevator->type, ctx->name)))
+ ret = elevator_switch(q, ctx);
+ mutex_unlock(&q->elevator_lock);
+ blk_mq_unfreeze_queue(q, memflags);
+ if (!ret)
+ ret = elevator_change_done(q, ctx);
- err = blk_mq_init_sched(q, e);
-
- blk_mq_unfreeze_queue(q);
-
- if (err) {
- pr_warn("\"%s\" elevator initialization failed, "
- "falling back to \"none\"\n", e->elevator_name);
- }
-
- elevator_put(e);
+ return ret;
}
/*
- * Switch to new_e io scheduler.
- *
- * If switching fails, we are most likely running out of memory and not able
- * to restore the old io scheduler, so leaving the io scheduler being none.
+ * The I/O scheduler depends on the number of hardware queues, this forces a
+ * reattachment when nr_hw_queues changes.
*/
-int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
+void elv_update_nr_hw_queues(struct request_queue *q)
{
- int ret;
-
- lockdep_assert_held(&q->sysfs_lock);
-
- blk_mq_freeze_queue(q);
- blk_mq_quiesce_queue(q);
-
- if (q->elevator) {
- elv_unregister_queue(q);
- elevator_exit(q);
- }
+ struct elv_change_ctx ctx = {};
+ int ret = -ENODEV;
- ret = blk_mq_init_sched(q, new_e);
- if (ret)
- goto out_unfreeze;
+ WARN_ON_ONCE(q->mq_freeze_depth == 0);
- ret = elv_register_queue(q, true);
- if (ret) {
- elevator_exit(q);
- goto out_unfreeze;
- }
- blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
+ mutex_lock(&q->elevator_lock);
+ if (q->elevator && !blk_queue_dying(q) && blk_queue_registered(q)) {
+ ctx.name = q->elevator->type->elevator_name;
-out_unfreeze:
- blk_mq_unquiesce_queue(q);
- blk_mq_unfreeze_queue(q);
-
- if (ret) {
- pr_warn("elv: switch to \"%s\" failed, falling back to \"none\"\n",
- new_e->elevator_name);
+ /* force to reattach elevator after nr_hw_queue is updated */
+ ret = elevator_switch(q, &ctx);
}
-
- return ret;
+ mutex_unlock(&q->elevator_lock);
+ blk_mq_unfreeze_queue_nomemrestore(q);
+ if (!ret)
+ WARN_ON_ONCE(elevator_change_done(q, &ctx));
}
-void elevator_disable(struct request_queue *q)
+/*
+ * Use the default elevator settings. If the chosen elevator initialization
+ * fails, fall back to the "none" elevator (no elevator).
+ */
+void elevator_set_default(struct request_queue *q)
{
- lockdep_assert_held(&q->sysfs_lock);
+ struct elv_change_ctx ctx = {
+ .name = "mq-deadline",
+ .no_uevent = true,
+ };
+ int err = 0;
- blk_mq_freeze_queue(q);
- blk_mq_quiesce_queue(q);
+ /* now we allow to switch elevator */
+ blk_queue_flag_clear(QUEUE_FLAG_NO_ELV_SWITCH, q);
- elv_unregister_queue(q);
- elevator_exit(q);
- blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
- q->elevator = NULL;
- q->nr_requests = q->tag_set->queue_depth;
- blk_add_trace_msg(q, "elv switch: none");
+ if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
+ return;
- blk_mq_unquiesce_queue(q);
- blk_mq_unfreeze_queue(q);
+ /*
+ * For single queue devices, default to using mq-deadline. If we
+ * have multiple queues or mq-deadline is not available, default
+ * to "none".
+ */
+ if (elevator_find_get(ctx.name) && (q->nr_hw_queues == 1 ||
+ blk_mq_is_shared_tags(q->tag_set->flags)))
+ err = elevator_change(q, &ctx);
+ if (err < 0)
+ pr_warn("\"%s\" elevator initialization, failed %d, "
+ "falling back to \"none\"\n", ctx.name, err);
}
-/*
- * Switch this queue to the given IO scheduler.
- */
-static int elevator_change(struct request_queue *q, const char *elevator_name)
+void elevator_set_none(struct request_queue *q)
{
- struct elevator_type *e;
- int ret;
+ struct elv_change_ctx ctx = {
+ .name = "none",
+ };
+ int err;
- /* Make sure queue is not in the middle of being removed */
- if (!blk_queue_registered(q))
- return -ENOENT;
+ err = elevator_change(q, &ctx);
+ if (err < 0)
+ pr_warn("%s: set none elevator failed %d\n", __func__, err);
+}
- if (!strncmp(elevator_name, "none", 4)) {
- if (q->elevator)
- elevator_disable(q);
- return 0;
- }
+static void elv_iosched_load_module(const char *elevator_name)
+{
+ struct elevator_type *found;
- if (q->elevator && elevator_match(q->elevator->type, elevator_name))
- return 0;
+ spin_lock(&elv_list_lock);
+ found = __elevator_find(elevator_name);
+ spin_unlock(&elv_list_lock);
- e = elevator_find_get(q, elevator_name);
- if (!e) {
+ if (!found)
request_module("%s-iosched", elevator_name);
- e = elevator_find_get(q, elevator_name);
- if (!e)
- return -EINVAL;
- }
- ret = elevator_switch(q, e);
- elevator_put(e);
- return ret;
}
-ssize_t elv_iosched_store(struct request_queue *q, const char *buf,
+ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
size_t count)
{
char elevator_name[ELV_NAME_MAX];
+ struct elv_change_ctx ctx = {};
int ret;
+ struct request_queue *q = disk->queue;
+ struct blk_mq_tag_set *set = q->tag_set;
- if (!elv_support_iosched(q))
- return count;
+ /* Make sure queue is not in the middle of being removed */
+ if (!blk_queue_registered(q))
+ return -ENOENT;
+ /*
+ * If the attribute needs to load a module, do it before freezing the
+ * queue to ensure that the module file can be read when the request
+ * queue is the one for the device storing the module file.
+ */
strscpy(elevator_name, buf, sizeof(elevator_name));
- ret = elevator_change(q, strstrip(elevator_name));
- if (!ret)
- return count;
+ ctx.name = strstrip(elevator_name);
+
+ elv_iosched_load_module(ctx.name);
+
+ down_read(&set->update_nr_hwq_lock);
+ if (!blk_queue_no_elv_switch(q)) {
+ ret = elevator_change(q, &ctx);
+ if (!ret)
+ ret = count;
+ } else {
+ ret = -ENOENT;
+ }
+ up_read(&set->update_nr_hwq_lock);
return ret;
}
-ssize_t elv_iosched_show(struct request_queue *q, char *name)
+ssize_t elv_iosched_show(struct gendisk *disk, char *name)
{
- struct elevator_queue *eq = q->elevator;
+ struct request_queue *q = disk->queue;
struct elevator_type *cur = NULL, *e;
int len = 0;
- if (!elv_support_iosched(q))
- return sprintf(name, "none\n");
-
+ mutex_lock(&q->elevator_lock);
if (!q->elevator) {
len += sprintf(name+len, "[none] ");
} else {
len += sprintf(name+len, "none ");
- cur = eq->type;
+ cur = q->elevator->type;
}
spin_lock(&elv_list_lock);
list_for_each_entry(e, &elv_list, list) {
if (e == cur)
len += sprintf(name+len, "[%s] ", e->elevator_name);
- else if (elv_support_features(q, e))
+ else
len += sprintf(name+len, "%s ", e->elevator_name);
}
spin_unlock(&elv_list_lock);
len += sprintf(name+len, "\n");
+ mutex_unlock(&q->elevator_lock);
+
return len;
}