summaryrefslogtreecommitdiff
path: root/block/blk-mq.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-01-18 18:22:40 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-01-18 18:22:40 -0800
commit9d1694dc91ce7b80bc96d6d8eaf1a1eca668d847 (patch)
treed9093aecb9261cccaea1f0a58887fcd9db542172 /block/blk-mq.c
parente9a5a78d1ad8ceb4e3df6d6ad93360094c84ac40 (diff)
parentb2e792ae883a0aa976d4176dfa7dc933263440ea (diff)
Merge tag 'for-6.8/block-2024-01-18' of git://git.kernel.dk/linux
Pull block fixes from Jens Axboe: - NVMe pull request via Keith: - tcp, fc, and rdma target fixes (Maurizio, Daniel, Hannes, Christoph) - discard fixes and improvements (Christoph) - timeout debug improvements (Keith, Max) - various cleanups (Daniel, Max, Giuxen) - trace event string fixes (Arnd) - shadow doorbell setup on reset fix (William) - a write zeroes quirk for SK Hynix (Jim) - MD pull request via Song: - Sparse warning since v6.0 (Bart) - /proc/mdstat regression since v6.7 (Yu Kuai) - Use symbolic error value (Christian) - IO Priority documentation update (Christian) - Fix for accessing queue limits without having entered the queue (Christoph, me) - Fix for loop dio support (Christoph) - Move null_blk off deprecated ida interface (Christophe) - Ensure nbd initializes full msghdr (Eric) - Fix for a regression with the folio conversion, which is now easier to hit because of an unrelated change (Matthew) - Remove redundant check in virtio-blk (Li) - Fix for a potential hang in sbitmap (Ming) - Fix for partial zone appending (Damien) - Misc changes and fixes (Bart, me, Kemeng, Dmitry) * tag 'for-6.8/block-2024-01-18' of git://git.kernel.dk/linux: (45 commits) Documentation: block: ioprio: Update schedulers loop: fix the the direct I/O support check when used on top of block devices blk-mq: Remove the hctx 'run' debugfs attribute nbd: always initialize struct msghdr completely block: Fix iterating over an empty bio with bio_for_each_folio_all block: bio-integrity: fix kcalloc() arguments order virtio_blk: remove duplicate check if queue is broken in virtblk_done sbitmap: remove stale comment in sbq_calc_wake_batch block: Correct a documentation comment in blk-cgroup.c null_blk: Remove usage of the deprecated ida_simple_xx() API block: ensure we hold a queue reference when using queue limits blk-mq: rename blk_mq_can_use_cached_rq block: print symbolic error name instead of error code blk-mq: fix IO hang from sbitmap wakeup race nvmet-rdma: avoid circular locking dependency on install_queue() nvmet-tcp: avoid circular locking dependency on install_queue() nvme-pci: set doorbell config before unquiescing block: fix partial zone append completion handling in req_bio_endio() block/iocost: silence warning on 'last_period' potentially being unused md/raid1: Use blk_opf_t for read and write operations ...
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r--block/blk-mq.c50
1 files changed, 39 insertions, 11 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c11c97afa0bc..aa87fcfda1ec 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -772,11 +772,16 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
/*
* Partial zone append completions cannot be supported as the
* BIO fragments may end up not being written sequentially.
+ * For such case, force the completed nbytes to be equal to
+ * the BIO size so that bio_advance() sets the BIO remaining
+ * size to 0 and we end up calling bio_endio() before returning.
*/
- if (bio->bi_iter.bi_size != nbytes)
+ if (bio->bi_iter.bi_size != nbytes) {
bio->bi_status = BLK_STS_IOERR;
- else
+ nbytes = bio->bi_iter.bi_size;
+ } else {
bio->bi_iter.bi_sector = rq->__sector;
+ }
}
bio_advance(bio, nbytes);
@@ -1860,6 +1865,22 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
__add_wait_queue(wq, wait);
/*
+ * Add one explicit barrier since blk_mq_get_driver_tag() may
+ * not imply barrier in case of failure.
+ *
+ * Order adding us to wait queue and allocating driver tag.
+ *
+ * The pair is the one implied in sbitmap_queue_wake_up() which
+ * orders clearing sbitmap tag bits and waitqueue_active() in
+ * __sbitmap_queue_wake_up(), since waitqueue_active() is lockless
+ *
+ * Otherwise, re-order of adding wait queue and getting driver tag
+ * may cause __sbitmap_queue_wake_up() to wake up nothing because
+ * the waitqueue_active() may not observe us in wait queue.
+ */
+ smp_mb();
+
+ /*
* It's possible that a tag was freed in the window between the
* allocation failure and adding the hardware queue to the wait
* queue.
@@ -2891,8 +2912,11 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
return NULL;
}
-/* return true if this @rq can be used for @bio */
-static bool blk_mq_can_use_cached_rq(struct request *rq, struct blk_plug *plug,
+/*
+ * Check if we can use the passed on request for submitting the passed in bio,
+ * and remove it from the request list if it can be used.
+ */
+static bool blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
struct bio *bio)
{
enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf);
@@ -2952,12 +2976,6 @@ void blk_mq_submit_bio(struct bio *bio)
blk_status_t ret;
bio = blk_queue_bounce(bio, q);
- if (bio_may_exceed_limits(bio, &q->limits)) {
- bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
- if (!bio)
- return;
- }
-
bio_set_ioprio(bio);
if (plug) {
@@ -2966,16 +2984,26 @@ void blk_mq_submit_bio(struct bio *bio)
rq = NULL;
}
if (rq) {
+ if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
+ bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
+ if (!bio)
+ return;
+ }
if (!bio_integrity_prep(bio))
return;
if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
return;
- if (blk_mq_can_use_cached_rq(rq, plug, bio))
+ if (blk_mq_use_cached_rq(rq, plug, bio))
goto done;
percpu_ref_get(&q->q_usage_counter);
} else {
if (unlikely(bio_queue_enter(bio)))
return;
+ if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
+ bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
+ if (!bio)
+ goto fail;
+ }
if (!bio_integrity_prep(bio))
goto fail;
}