diff options
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r-- | block/blk-mq.c | 96 |
1 files changed, 83 insertions, 13 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index 4806b867e37d..9692fa4c3ef2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -883,7 +883,8 @@ static void blk_complete_request(struct request *req) /* Completion has already been traced */ bio_clear_flag(bio, BIO_TRACE_COMPLETION); - blk_zone_update_request_bio(req, bio); + if (blk_req_bio_is_zone_append(req, bio)) + blk_zone_append_update_request_bio(req, bio); if (!is_flush) bio_endio(bio); @@ -982,7 +983,8 @@ bool blk_update_request(struct request *req, blk_status_t error, /* Don't actually finish bio if it's part of flush sequence */ if (!bio->bi_iter.bi_size) { - blk_zone_update_request_bio(req, bio); + if (blk_req_bio_is_zone_append(req, bio)) + blk_zone_append_update_request_bio(req, bio); if (!is_flush) bio_endio(bio); } @@ -3169,8 +3171,10 @@ void blk_mq_submit_bio(struct bio *bio) if (blk_mq_attempt_bio_merge(q, bio, nr_segs)) goto queue_exit; - if (blk_queue_is_zoned(q) && blk_zone_plug_bio(bio, nr_segs)) - goto queue_exit; + if (bio_needs_zone_write_plugging(bio)) { + if (blk_zone_plug_bio(bio, nr_segs)) + goto queue_exit; + } new_request: if (rq) { @@ -4966,6 +4970,60 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) return ret; } +/* + * Switch back to the elevator type stored in the xarray. + */ +static void blk_mq_elv_switch_back(struct request_queue *q, + struct xarray *elv_tbl) +{ + struct elevator_type *e = xa_load(elv_tbl, q->id); + + /* The elv_update_nr_hw_queues unfreezes the queue. */ + elv_update_nr_hw_queues(q, e); + + /* Drop the reference acquired in blk_mq_elv_switch_none. */ + if (e) + elevator_put(e); +} + +/* + * Stores elevator type in xarray and set current elevator to none. It uses + * q->id as an index to store the elevator type into the xarray. + */ +static int blk_mq_elv_switch_none(struct request_queue *q, + struct xarray *elv_tbl) +{ + int ret = 0; + + lockdep_assert_held_write(&q->tag_set->update_nr_hwq_lock); + + /* + * Accessing q->elevator without holding q->elevator_lock is safe here + * because we're called from nr_hw_queue update which is protected by + * set->update_nr_hwq_lock in the writer context. So, scheduler update/ + * switch code (which acquires the same lock in the reader context) + * can't run concurrently. + */ + if (q->elevator) { + + ret = xa_insert(elv_tbl, q->id, q->elevator->type, GFP_KERNEL); + if (WARN_ON_ONCE(ret)) + return ret; + + /* + * Before we switch elevator to 'none', take a reference to + * the elevator module so that while nr_hw_queue update is + * running, no one can remove elevator module. We'd put the + * reference to elevator module later when we switch back + * elevator. + */ + __elevator_get(q->elevator->type); + + elevator_set_none(q); + } + return ret; +} + static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) { @@ -4973,6 +5031,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int prev_nr_hw_queues = set->nr_hw_queues; unsigned int memflags; int i; + struct xarray elv_tbl; lockdep_assert_held(&set->tag_list_lock); @@ -4984,6 +5043,9 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, return; memflags = memalloc_noio_save(); + + xa_init(&elv_tbl); + list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_debugfs_unregister_hctxs(q); blk_mq_sysfs_unregister_hctxs(q); @@ -4992,11 +5054,17 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, list_for_each_entry(q, &set->tag_list, tag_set_list) blk_mq_freeze_queue_nomemsave(q); - if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0) { - list_for_each_entry(q, &set->tag_list, tag_set_list) - blk_mq_unfreeze_queue_nomemrestore(q); - goto reregister; - } + /* + * Switch IO scheduler to 'none', cleaning up the data associated + * with the previous scheduler. We will switch back once we are done + * updating the new sw to hw queue mappings. + */ + list_for_each_entry(q, &set->tag_list, tag_set_list) + if (blk_mq_elv_switch_none(q, &elv_tbl)) + goto switch_back; + + if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0) + goto switch_back; fallback: blk_mq_update_queue_map(set); @@ -5016,12 +5084,11 @@ fallback: } blk_mq_map_swqueue(q); } - - /* elv_update_nr_hw_queues() unfreeze queue for us */ +switch_back: + /* The blk_mq_elv_switch_back unfreezes queue for us. */ list_for_each_entry(q, &set->tag_list, tag_set_list) - elv_update_nr_hw_queues(q); + blk_mq_elv_switch_back(q, &elv_tbl); -reregister: list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_sysfs_register_hctxs(q); blk_mq_debugfs_register_hctxs(q); @@ -5029,6 +5096,9 @@ reregister: blk_mq_remove_hw_queues_cpuhp(q); blk_mq_add_hw_queues_cpuhp(q); } + + xa_destroy(&elv_tbl); + memalloc_noio_restore(memflags); /* Free the excess tags when nr_hw_queues shrink. */ |