diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-17 16:57:47 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-17 16:57:47 -0700 |
commit | 7ad67ca5534ee7c958559c4ad610f05c4578e361 (patch) | |
tree | dc6b6a8a6b70b5f25b07bcdc06d8e77e705f6822 /block/elevator.c | |
parent | 5260c2b863ef1152445ce93476c95d8c8a727eef (diff) | |
parent | 9c7eddf1b080f98fed1aadb74fe784f29bf77a08 (diff) |
Merge tag 'for-5.4/block-2019-09-16' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
- Two NVMe pull requests:
- ana log parse fix from Anton
- nvme quirks support for Apple devices from Ben
- fix missing bio completion tracing for multipath stack devices
from Hannes and Mikhail
- IP TOS settings for nvme rdma and tcp transports from Israel
- rq_dma_dir cleanups from Israel
- tracing for Get LBA Status command from Minwoo
- Some nvme-tcp cleanups from Minwoo, Potnuri and Myself
- Some consolidation between the fabrics transports for handling
the CAP register
- reset race with ns scanning fix for fabrics (move fabrics
commands to a dedicated request queue with a different lifetime
from the admin request queue)."
- controller reset and namespace scan races fixes
- nvme discovery log change uevent support
- naming improvements from Keith
- multiple discovery controllers reject fix from James
- some regular cleanups from various people
- Series fixing (and re-fixing) null_blk debug printing and nr_devices
checks (André)
- A few pull requests from Song, with fixes from Andy, Guoqing,
Guilherme, Neil, Nigel, and Yufen.
- REQ_OP_ZONE_RESET_ALL support (Chaitanya)
- Bio merge handling unification (Christoph)
- Pick default elevator correctly for devices with special needs
(Damien)
- Block stats fixes (Hou)
- Timeout and support devices nbd fixes (Mike)
- Series fixing races around elevator switching and device add/remove
(Ming)
- sed-opal cleanups (Revanth)
- Per device weight support for BFQ (Fam)
- Support for blk-iocost, a new model that can properly account cost of
IO workloads. (Tejun)
- blk-cgroup writeback fixes (Tejun)
- paride queue init fixes (zhengbin)
- blk_set_runtime_active() cleanup (Stanley)
- Block segment mapping optimizations (Bart)
- lightnvm fixes (Hans/Minwoo/YueHaibing)
- Various little fixes and cleanups
* tag 'for-5.4/block-2019-09-16' of git://git.kernel.dk/linux-block: (186 commits)
null_blk: format pr_* logs with pr_fmt
null_blk: match the type of parameter nr_devices
null_blk: do not fail the module load with zero devices
block: also check RQF_STATS in blk_mq_need_time_stamp()
block: make rq sector size accessible for block stats
bfq: Fix bfq linkage error
raid5: use bio_end_sector in r5_next_bio
raid5: remove STRIPE_OPS_REQ_PENDING
md: add feature flag MD_FEATURE_RAID0_LAYOUT
md/raid0: avoid RAID0 data corruption due to layout confusion.
raid5: don't set STRIPE_HANDLE to stripe which is in batch list
raid5: don't increment read_errors on EILSEQ return
nvmet: fix a wrong error status returned in error log page
nvme: send discovery log page change events to userspace
nvme: add uevent variables for controller devices
nvme: enable aen regardless of the presence of I/O queues
nvme-fabrics: allow discovery subsystems accept a kato
nvmet: Use PTR_ERR_OR_ZERO() in nvmet_init_discovery()
nvme: Remove redundant assignment of cq vector
nvme: Assign subsys instance from first ctrl
...
Diffstat (limited to 'block/elevator.c')
-rw-r--r-- | block/elevator.c | 217 |
1 files changed, 157 insertions, 60 deletions
diff --git a/block/elevator.c b/block/elevator.c index 2f17d66d0e61..bba10e83478a 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -83,8 +83,26 @@ bool elv_bio_merge_ok(struct request *rq, struct bio *bio) } EXPORT_SYMBOL(elv_bio_merge_ok); -static bool elevator_match(const struct elevator_type *e, const char *name) +static inline bool elv_support_features(unsigned int elv_features, + unsigned int required_features) { + return (required_features & elv_features) == required_features; +} + +/** + * elevator_match - Test an elevator name and features + * @e: Scheduler to test + * @name: Elevator name to test + * @required_features: Features that the elevator must provide + * + * Return true is the elevator @e name matches @name and if @e provides all the + * the feratures spcified by @required_features. + */ +static bool elevator_match(const struct elevator_type *e, const char *name, + unsigned int required_features) +{ + if (!elv_support_features(e->elevator_features, required_features)) + return false; if (!strcmp(e->elevator_name, name)) return true; if (e->elevator_alias && !strcmp(e->elevator_alias, name)) @@ -93,15 +111,21 @@ static bool elevator_match(const struct elevator_type *e, const char *name) return false; } -/* - * Return scheduler with name 'name' +/** + * elevator_find - Find an elevator + * @name: Name of the elevator to find + * @required_features: Features that the elevator must provide + * + * Return the first registered scheduler with name @name and supporting the + * features @required_features and NULL otherwise. */ -static struct elevator_type *elevator_find(const char *name) +static struct elevator_type *elevator_find(const char *name, + unsigned int required_features) { struct elevator_type *e; list_for_each_entry(e, &elv_list, list) { - if (elevator_match(e, name)) + if (elevator_match(e, name, required_features)) return e; } @@ -120,12 +144,12 @@ static struct elevator_type *elevator_get(struct request_queue *q, spin_lock(&elv_list_lock); - e = elevator_find(name); + e = elevator_find(name, q->required_elevator_features); if (!e && try_loading) { spin_unlock(&elv_list_lock); request_module("%s-iosched", name); spin_lock(&elv_list_lock); - e = elevator_find(name); + e = elevator_find(name, q->required_elevator_features); } if (e && !try_module_get(e->elevator_owner)) @@ -135,20 +159,6 @@ static struct elevator_type *elevator_get(struct request_queue *q, return e; } -static char chosen_elevator[ELV_NAME_MAX]; - -static int __init elevator_setup(char *str) -{ - /* - * Be backwards-compatible with previous kernels, so users - * won't get the wrong elevator. - */ - strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); - return 1; -} - -__setup("elevator=", elevator_setup); - static struct kobj_type elv_ktype; struct elevator_queue *elevator_alloc(struct request_queue *q, @@ -470,13 +480,16 @@ static struct kobj_type elv_ktype = { .release = elevator_release, }; -int elv_register_queue(struct request_queue *q) +/* + * elv_register_queue is called from either blk_register_queue or + * elevator_switch, elevator switch is prevented from being happen + * in the two paths, so it is safe to not hold q->sysfs_lock. + */ +int elv_register_queue(struct request_queue *q, bool uevent) { struct elevator_queue *e = q->elevator; int error; - lockdep_assert_held(&q->sysfs_lock); - error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched"); if (!error) { struct elv_fs_entry *attr = e->type->elevator_attrs; @@ -487,24 +500,34 @@ int elv_register_queue(struct request_queue *q) attr++; } } - kobject_uevent(&e->kobj, KOBJ_ADD); + if (uevent) + kobject_uevent(&e->kobj, KOBJ_ADD); + + mutex_lock(&q->sysfs_lock); e->registered = 1; + mutex_unlock(&q->sysfs_lock); } return error; } +/* + * elv_unregister_queue is called from either blk_unregister_queue or + * elevator_switch, elevator switch is prevented from being happen + * in the two paths, so it is safe to not hold q->sysfs_lock. + */ void elv_unregister_queue(struct request_queue *q) { - lockdep_assert_held(&q->sysfs_lock); - if (q) { struct elevator_queue *e = q->elevator; kobject_uevent(&e->kobj, KOBJ_REMOVE); kobject_del(&e->kobj); + + mutex_lock(&q->sysfs_lock); e->registered = 0; /* Re-enable throttling in case elevator disabled it */ wbt_enable_default(q); + mutex_unlock(&q->sysfs_lock); } } @@ -526,7 +549,7 @@ int elv_register(struct elevator_type *e) /* register, don't allow duplicate names */ spin_lock(&elv_list_lock); - if (elevator_find(e->elevator_name)) { + if (elevator_find(e->elevator_name, 0)) { spin_unlock(&elv_list_lock); kmem_cache_destroy(e->icq_cache); return -EBUSY; @@ -567,10 +590,32 @@ int elevator_switch_mq(struct request_queue *q, lockdep_assert_held(&q->sysfs_lock); if (q->elevator) { - if (q->elevator->registered) + if (q->elevator->registered) { + mutex_unlock(&q->sysfs_lock); + + /* + * Concurrent elevator switch can't happen becasue + * sysfs write is always exclusively on same file. + * + * Also the elevator queue won't be freed after + * sysfs_lock is released becasue kobject_del() in + * blk_unregister_queue() waits for completion of + * .store & .show on its attributes. + */ elv_unregister_queue(q); + + mutex_lock(&q->sysfs_lock); + } ioc_clear_queue(q); elevator_exit(q, q->elevator); + + /* + * sysfs_lock may be dropped, so re-check if queue is + * unregistered. If yes, don't switch to new elevator + * any more + */ + if (!blk_queue_registered(q)) + return 0; } ret = blk_mq_init_sched(q, new_e); @@ -578,7 +623,11 @@ int elevator_switch_mq(struct request_queue *q, goto out; if (new_e) { - ret = elv_register_queue(q); + mutex_unlock(&q->sysfs_lock); + + ret = elv_register_queue(q, true); + + mutex_lock(&q->sysfs_lock); if (ret) { elevator_exit(q, q->elevator); goto out; @@ -594,37 +643,89 @@ out: return ret; } +static inline bool elv_support_iosched(struct request_queue *q) +{ + if (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED)) + return false; + return true; +} + /* - * For blk-mq devices, we default to using mq-deadline, if available, for single - * queue devices. If deadline isn't available OR we have multiple queues, - * default to "none". + * For single queue devices, default to using mq-deadline. If we have multiple + * queues or mq-deadline is not available, default to "none". */ -int elevator_init_mq(struct request_queue *q) +static struct elevator_type *elevator_get_default(struct request_queue *q) +{ + if (q->nr_hw_queues != 1) + return NULL; + + return elevator_get(q, "mq-deadline", false); +} + +/* + * Get the first elevator providing the features required by the request queue. + * Default to "none" if no matching elevator is found. + */ +static struct elevator_type *elevator_get_by_features(struct request_queue *q) +{ + struct elevator_type *e, *found = NULL; + + spin_lock(&elv_list_lock); + + list_for_each_entry(e, &elv_list, list) { + if (elv_support_features(e->elevator_features, + q->required_elevator_features)) { + found = e; + break; + } + } + + if (found && !try_module_get(found->elevator_owner)) + found = NULL; + + spin_unlock(&elv_list_lock); + return found; +} + +/* + * For a device queue that has no required features, use the default elevator + * settings. Otherwise, use the first elevator available matching the required + * features. If no suitable elevator is find or if the chosen elevator + * initialization fails, fall back to the "none" elevator (no elevator). + */ +void elevator_init_mq(struct request_queue *q) { struct elevator_type *e; - int err = 0; + int err; - if (q->nr_hw_queues != 1) - return 0; + if (!elv_support_iosched(q)) + return; + + WARN_ON_ONCE(test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags)); - /* - * q->sysfs_lock must be held to provide mutual exclusion between - * elevator_switch() and here. - */ - mutex_lock(&q->sysfs_lock); if (unlikely(q->elevator)) - goto out_unlock; + return; - e = elevator_get(q, "mq-deadline", false); + if (!q->required_elevator_features) + e = elevator_get_default(q); + else + e = elevator_get_by_features(q); if (!e) - goto out_unlock; + return; + + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); err = blk_mq_init_sched(q, e); - if (err) + + blk_mq_unquiesce_queue(q); + blk_mq_unfreeze_queue(q); + + if (err) { + pr_warn("\"%s\" elevator initialization failed, " + "falling back to \"none\"\n", e->elevator_name); elevator_put(e); -out_unlock: - mutex_unlock(&q->sysfs_lock); - return err; + } } @@ -660,7 +761,7 @@ static int __elevator_change(struct request_queue *q, const char *name) struct elevator_type *e; /* Make sure queue is not in the middle of being removed */ - if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags)) + if (!blk_queue_registered(q)) return -ENOENT; /* @@ -677,7 +778,8 @@ static int __elevator_change(struct request_queue *q, const char *name) if (!e) return -EINVAL; - if (q->elevator && elevator_match(q->elevator->type, elevator_name)) { + if (q->elevator && + elevator_match(q->elevator->type, elevator_name, 0)) { elevator_put(e); return 0; } @@ -685,13 +787,6 @@ static int __elevator_change(struct request_queue *q, const char *name) return elevator_switch(q, e); } -static inline bool elv_support_iosched(struct request_queue *q) -{ - if (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED)) - return false; - return true; -} - ssize_t elv_iosched_store(struct request_queue *q, const char *name, size_t count) { @@ -724,11 +819,13 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name) spin_lock(&elv_list_lock); list_for_each_entry(__e, &elv_list, list) { - if (elv && elevator_match(elv, __e->elevator_name)) { + if (elv && elevator_match(elv, __e->elevator_name, 0)) { len += sprintf(name+len, "[%s] ", elv->elevator_name); continue; } - if (elv_support_iosched(q)) + if (elv_support_iosched(q) && + elevator_match(__e, __e->elevator_name, + q->required_elevator_features)) len += sprintf(name+len, "%s ", __e->elevator_name); } spin_unlock(&elv_list_lock); |