summaryrefslogtreecommitdiff
path: root/block/blk-core.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-03-21 16:48:55 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-03-21 16:48:55 -0700
commit616355cc818c6ddadc393fdfd4491f94458cb715 (patch)
treea96907b179ccbeb84cd1df5515cd10c90eb5bd59 /block/blk-core.c
parentb080cee72ef355669cbc52ff55dc513d37433600 (diff)
parent8f9e7b65f833cb9a4b2e2f54a049d74df394d906 (diff)
Merge tag 'for-5.18/block-2022-03-18' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: - BFQ cleanups and fixes (Yu, Zhang, Yahu, Paolo) - blk-rq-qos completion fix (Tejun) - blk-cgroup merge fix (Tejun) - Add offline error return value to distinguish it from an IO error on the device (Song) - IO stats fixes (Zhang, Christoph) - blkcg refcount fixes (Ming, Yu) - Fix for indefinite dispatch loop softlockup (Shin'ichiro) - blk-mq hardware queue management improvements (Ming) - sbitmap dead code removal (Ming, John) - Plugging merge improvements (me) - Show blk-crypto capabilities in sysfs (Eric) - Multiple delayed queue run improvement (David) - Block throttling fixes (Ming) - Start deprecating auto module loading based on dev_t (Christoph) - bio allocation improvements (Christoph, Chaitanya) - Get rid of bio_devname (Christoph) - bio clone improvements (Christoph) - Block plugging improvements (Christoph) - Get rid of genhd.h header (Christoph) - Ensure drivers use appropriate flush helpers (Christoph) - Refcounting improvements (Christoph) - Queue initialization and teardown improvements (Ming, Christoph) - Misc fixes/improvements (Barry, Chaitanya, Colin, Dan, Jiapeng, Lukas, Nian, Yang, Eric, Chengming) * tag 'for-5.18/block-2022-03-18' of git://git.kernel.dk/linux-block: (127 commits) block: cancel all throttled bios in del_gendisk() block: let blkcg_gq grab request queue's refcnt block: avoid use-after-free on throttle data block: limit request dispatch loop duration block/bfq-iosched: Fix spelling mistake "tenative" -> "tentative" sr: simplify the local variable initialization in sr_block_open() block: don't merge across cgroup boundaries if blkcg is enabled block: fix rq-qos breakage from skipping rq_qos_done_bio() block: flush plug based on hardware and software queue order block: ensure plug merging checks the correct queue at least once block: move rq_qos_exit() into disk_release() block: do more work in elevator_exit block: move blk_exit_queue into disk_release block: move q_usage_counter release into blk_queue_release block: don't remove hctx debugfs dir from blk_mq_exit_queue block: move blkcg initialization/destroy into disk allocation/release handler sr: implement ->free_disk to simplify refcounting sd: implement ->free_disk to simplify refcounting sd: delay calling free_opal_dev sd: call sd_zbc_release_disk before releasing the scsi_device reference ...
Diffstat (limited to 'block/blk-core.c')
-rw-r--r--block/blk-core.c293
1 files changed, 132 insertions, 161 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 779b4a1f66ac..9c14deab3af4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -34,7 +34,6 @@
#include <linux/delay.h>
#include <linux/ratelimit.h>
#include <linux/pm_runtime.h>
-#include <linux/blk-cgroup.h>
#include <linux/t10-pi.h>
#include <linux/debugfs.h>
#include <linux/bpf.h>
@@ -49,6 +48,7 @@
#include "blk.h"
#include "blk-mq-sched.h"
#include "blk-pm.h"
+#include "blk-cgroup.h"
#include "blk-throttle.h"
#include "blk-rq-qos.h"
@@ -165,6 +165,7 @@ static const struct {
[BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" },
[BLK_STS_DEV_RESOURCE] = { -EBUSY, "device resource" },
[BLK_STS_AGAIN] = { -EAGAIN, "nonblocking retry" },
+ [BLK_STS_OFFLINE] = { -ENODEV, "device offline" },
/* device mapper special case, should not leak out: */
[BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" },
@@ -339,8 +340,6 @@ void blk_cleanup_queue(struct request_queue *q)
blk_mq_sched_free_rqs(q);
mutex_unlock(&q->sysfs_lock);
- percpu_ref_exit(&q->q_usage_counter);
-
/* @q is and will stay empty, shutdown and put */
blk_put_queue(q);
}
@@ -473,9 +472,6 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
INIT_WORK(&q->timeout_work, blk_timeout_work);
INIT_LIST_HEAD(&q->icq_list);
-#ifdef CONFIG_BLK_CGROUP
- INIT_LIST_HEAD(&q->blkg_list);
-#endif
kobject_init(&q->kobj, &blk_queue_ktype);
@@ -496,17 +492,12 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
goto fail_stats;
- if (blkcg_init_queue(q))
- goto fail_ref;
-
blk_queue_dma_alignment(q, 511);
blk_set_default_limits(&q->limits);
q->nr_requests = BLKDEV_DEFAULT_RQ;
return q;
-fail_ref:
- percpu_ref_exit(&q->q_usage_counter);
fail_stats:
blk_free_queue_stats(q->stats);
fail_split:
@@ -540,17 +531,6 @@ bool blk_get_queue(struct request_queue *q)
}
EXPORT_SYMBOL(blk_get_queue);
-static void handle_bad_sector(struct bio *bio, sector_t maxsector)
-{
- char b[BDEVNAME_SIZE];
-
- pr_info_ratelimited("%s: attempt to access beyond end of device\n"
- "%s: rw=%d, want=%llu, limit=%llu\n",
- current->comm,
- bio_devname(bio, b), bio->bi_opf,
- bio_end_sector(bio), maxsector);
-}
-
#ifdef CONFIG_FAIL_MAKE_REQUEST
static DECLARE_FAULT_ATTR(fail_make_request);
@@ -580,14 +560,10 @@ late_initcall(fail_make_request_debugfs);
static inline bool bio_check_ro(struct bio *bio)
{
if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) {
- char b[BDEVNAME_SIZE];
-
if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
return false;
-
- WARN_ONCE(1,
- "Trying to write to read-only block-device %s (partno %d)\n",
- bio_devname(bio, b), bio->bi_bdev->bd_partno);
+ pr_warn("Trying to write to read-only block-device %pg\n",
+ bio->bi_bdev);
/* Older lvm-tools actually trigger this */
return false;
}
@@ -616,7 +592,11 @@ static inline int bio_check_eod(struct bio *bio)
if (nr_sectors && maxsector &&
(nr_sectors > maxsector ||
bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
- handle_bad_sector(bio, maxsector);
+ pr_info_ratelimited("%s: attempt to access beyond end of device\n"
+ "%pg: rw=%d, want=%llu, limit=%llu\n",
+ current->comm,
+ bio->bi_bdev, bio->bi_opf,
+ bio_end_sector(bio), maxsector);
return -EIO;
}
return 0;
@@ -676,7 +656,123 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
return BLK_STS_OK;
}
-noinline_for_stack bool submit_bio_checks(struct bio *bio)
+static void __submit_bio(struct bio *bio)
+{
+ struct gendisk *disk = bio->bi_bdev->bd_disk;
+
+ if (unlikely(!blk_crypto_bio_prep(&bio)))
+ return;
+
+ if (!disk->fops->submit_bio) {
+ blk_mq_submit_bio(bio);
+ } else if (likely(bio_queue_enter(bio) == 0)) {
+ disk->fops->submit_bio(bio);
+ blk_queue_exit(disk->queue);
+ }
+}
+
+/*
+ * The loop in this function may be a bit non-obvious, and so deserves some
+ * explanation:
+ *
+ * - Before entering the loop, bio->bi_next is NULL (as all callers ensure
+ * that), so we have a list with a single bio.
+ * - We pretend that we have just taken it off a longer list, so we assign
+ * bio_list to a pointer to the bio_list_on_stack, thus initialising the
+ * bio_list of new bios to be added. ->submit_bio() may indeed add some more
+ * bios through a recursive call to submit_bio_noacct. If it did, we find a
+ * non-NULL value in bio_list and re-enter the loop from the top.
+ * - In this case we really did just take the bio of the top of the list (no
+ * pretending) and so remove it from bio_list, and call into ->submit_bio()
+ * again.
+ *
+ * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio.
+ * bio_list_on_stack[1] contains bios that were submitted before the current
+ * ->submit_bio_bio, but that haven't been processed yet.
+ */
+static void __submit_bio_noacct(struct bio *bio)
+{
+ struct bio_list bio_list_on_stack[2];
+
+ BUG_ON(bio->bi_next);
+
+ bio_list_init(&bio_list_on_stack[0]);
+ current->bio_list = bio_list_on_stack;
+
+ do {
+ struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+ struct bio_list lower, same;
+
+ /*
+ * Create a fresh bio_list for all subordinate requests.
+ */
+ bio_list_on_stack[1] = bio_list_on_stack[0];
+ bio_list_init(&bio_list_on_stack[0]);
+
+ __submit_bio(bio);
+
+ /*
+ * Sort new bios into those for a lower level and those for the
+ * same level.
+ */
+ bio_list_init(&lower);
+ bio_list_init(&same);
+ while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
+ if (q == bdev_get_queue(bio->bi_bdev))
+ bio_list_add(&same, bio);
+ else
+ bio_list_add(&lower, bio);
+
+ /*
+ * Now assemble so we handle the lowest level first.
+ */
+ bio_list_merge(&bio_list_on_stack[0], &lower);
+ bio_list_merge(&bio_list_on_stack[0], &same);
+ bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
+ } while ((bio = bio_list_pop(&bio_list_on_stack[0])));
+
+ current->bio_list = NULL;
+}
+
+static void __submit_bio_noacct_mq(struct bio *bio)
+{
+ struct bio_list bio_list[2] = { };
+
+ current->bio_list = bio_list;
+
+ do {
+ __submit_bio(bio);
+ } while ((bio = bio_list_pop(&bio_list[0])));
+
+ current->bio_list = NULL;
+}
+
+void submit_bio_noacct_nocheck(struct bio *bio)
+{
+ /*
+ * We only want one ->submit_bio to be active at a time, else stack
+ * usage with stacked devices could be a problem. Use current->bio_list
+ * to collect a list of requests submited by a ->submit_bio method while
+ * it is active, and then process them after it returned.
+ */
+ if (current->bio_list)
+ bio_list_add(&current->bio_list[0], bio);
+ else if (!bio->bi_bdev->bd_disk->fops->submit_bio)
+ __submit_bio_noacct_mq(bio);
+ else
+ __submit_bio_noacct(bio);
+}
+
+/**
+ * submit_bio_noacct - re-submit a bio to the block device layer for I/O
+ * @bio: The bio describing the location in memory and on the device.
+ *
+ * This is a version of submit_bio() that shall only be used for I/O that is
+ * resubmitted to lower level drivers by stacking block drivers. All file
+ * systems and other upper level users of the block layer should use
+ * submit_bio() instead.
+ */
+void submit_bio_noacct(struct bio *bio)
{
struct block_device *bdev = bio->bi_bdev;
struct request_queue *q = bdev_get_queue(bdev);
@@ -761,7 +857,7 @@ noinline_for_stack bool submit_bio_checks(struct bio *bio)
}
if (blk_throtl_bio(bio))
- return false;
+ return;
blk_cgroup_bio_start(bio);
blkcg_bio_issue_init(bio);
@@ -773,138 +869,14 @@ noinline_for_stack bool submit_bio_checks(struct bio *bio)
*/
bio_set_flag(bio, BIO_TRACE_COMPLETION);
}
- return true;
+ submit_bio_noacct_nocheck(bio);
+ return;
not_supported:
status = BLK_STS_NOTSUPP;
end_io:
bio->bi_status = status;
bio_endio(bio);
- return false;
-}
-
-static void __submit_bio_fops(struct gendisk *disk, struct bio *bio)
-{
- if (blk_crypto_bio_prep(&bio)) {
- if (likely(bio_queue_enter(bio) == 0)) {
- disk->fops->submit_bio(bio);
- blk_queue_exit(disk->queue);
- }
- }
-}
-
-static void __submit_bio(struct bio *bio)
-{
- struct gendisk *disk = bio->bi_bdev->bd_disk;
-
- if (unlikely(!submit_bio_checks(bio)))
- return;
-
- if (!disk->fops->submit_bio)
- blk_mq_submit_bio(bio);
- else
- __submit_bio_fops(disk, bio);
-}
-
-/*
- * The loop in this function may be a bit non-obvious, and so deserves some
- * explanation:
- *
- * - Before entering the loop, bio->bi_next is NULL (as all callers ensure
- * that), so we have a list with a single bio.
- * - We pretend that we have just taken it off a longer list, so we assign
- * bio_list to a pointer to the bio_list_on_stack, thus initialising the
- * bio_list of new bios to be added. ->submit_bio() may indeed add some more
- * bios through a recursive call to submit_bio_noacct. If it did, we find a
- * non-NULL value in bio_list and re-enter the loop from the top.
- * - In this case we really did just take the bio of the top of the list (no
- * pretending) and so remove it from bio_list, and call into ->submit_bio()
- * again.
- *
- * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio.
- * bio_list_on_stack[1] contains bios that were submitted before the current
- * ->submit_bio_bio, but that haven't been processed yet.
- */
-static void __submit_bio_noacct(struct bio *bio)
-{
- struct bio_list bio_list_on_stack[2];
-
- BUG_ON(bio->bi_next);
-
- bio_list_init(&bio_list_on_stack[0]);
- current->bio_list = bio_list_on_stack;
-
- do {
- struct request_queue *q = bdev_get_queue(bio->bi_bdev);
- struct bio_list lower, same;
-
- /*
- * Create a fresh bio_list for all subordinate requests.
- */
- bio_list_on_stack[1] = bio_list_on_stack[0];
- bio_list_init(&bio_list_on_stack[0]);
-
- __submit_bio(bio);
-
- /*
- * Sort new bios into those for a lower level and those for the
- * same level.
- */
- bio_list_init(&lower);
- bio_list_init(&same);
- while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
- if (q == bdev_get_queue(bio->bi_bdev))
- bio_list_add(&same, bio);
- else
- bio_list_add(&lower, bio);
-
- /*
- * Now assemble so we handle the lowest level first.
- */
- bio_list_merge(&bio_list_on_stack[0], &lower);
- bio_list_merge(&bio_list_on_stack[0], &same);
- bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
- } while ((bio = bio_list_pop(&bio_list_on_stack[0])));
-
- current->bio_list = NULL;
-}
-
-static void __submit_bio_noacct_mq(struct bio *bio)
-{
- struct bio_list bio_list[2] = { };
-
- current->bio_list = bio_list;
-
- do {
- __submit_bio(bio);
- } while ((bio = bio_list_pop(&bio_list[0])));
-
- current->bio_list = NULL;
-}
-
-/**
- * submit_bio_noacct - re-submit a bio to the block device layer for I/O
- * @bio: The bio describing the location in memory and on the device.
- *
- * This is a version of submit_bio() that shall only be used for I/O that is
- * resubmitted to lower level drivers by stacking block drivers. All file
- * systems and other upper level users of the block layer should use
- * submit_bio() instead.
- */
-void submit_bio_noacct(struct bio *bio)
-{
- /*
- * We only want one ->submit_bio to be active at a time, else stack
- * usage with stacked devices could be a problem. Use current->bio_list
- * to collect a list of requests submited by a ->submit_bio method while
- * it is active, and then process them after it returned.
- */
- if (current->bio_list)
- bio_list_add(&current->bio_list[0], bio);
- else if (!bio->bi_bdev->bd_disk->fops->submit_bio)
- __submit_bio_noacct_mq(bio);
- else
- __submit_bio_noacct(bio);
}
EXPORT_SYMBOL(submit_bio_noacct);
@@ -989,8 +961,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
return 0;
- if (current->plug)
- blk_flush_plug(current->plug, false);
+ blk_flush_plug(current->plug, false);
if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT))
return 0;
@@ -1272,7 +1243,7 @@ struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data,
}
EXPORT_SYMBOL(blk_check_plugged);
-void blk_flush_plug(struct blk_plug *plug, bool from_schedule)
+void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
{
if (!list_empty(&plug->cb_list))
flush_plug_callbacks(plug, from_schedule);
@@ -1301,7 +1272,7 @@ void blk_flush_plug(struct blk_plug *plug, bool from_schedule)
void blk_finish_plug(struct blk_plug *plug)
{
if (plug == current->plug) {
- blk_flush_plug(plug, false);
+ __blk_flush_plug(plug, false);
current->plug = NULL;
}
}