summaryrefslogtreecommitdiff
path: root/block/blk-lib.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-lib.c')
-rw-r--r--block/blk-lib.c497
1 files changed, 213 insertions, 284 deletions
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9f09beadcbe3..9e2cc58f881f 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -10,110 +10,64 @@
#include "blk.h"
-struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp)
+static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector)
{
- struct bio *new = bio_alloc(gfp, nr_pages);
+ unsigned int discard_granularity = bdev_discard_granularity(bdev);
+ sector_t granularity_aligned_sector;
- if (bio) {
- bio_chain(bio, new);
- submit_bio(bio);
- }
-
- return new;
+ if (bdev_is_partition(bdev))
+ sector += bdev->bd_start_sect;
+
+ granularity_aligned_sector =
+ round_up(sector, discard_granularity >> SECTOR_SHIFT);
+
+ /*
+ * Make sure subsequent bios start aligned to the discard granularity if
+ * it needs to be split.
+ */
+ if (granularity_aligned_sector != sector)
+ return granularity_aligned_sector - sector;
+
+ /*
+ * Align the bio size to the discard granularity to make splitting the bio
+ * at discard granularity boundaries easier in the driver if needed.
+ */
+ return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT;
}
-EXPORT_SYMBOL_GPL(blk_next_bio);
-int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, int flags,
- struct bio **biop)
+struct bio *blk_alloc_discard_bio(struct block_device *bdev,
+ sector_t *sector, sector_t *nr_sects, gfp_t gfp_mask)
{
- struct request_queue *q = bdev_get_queue(bdev);
- struct bio *bio = *biop;
- unsigned int op;
- sector_t bs_mask, part_offset = 0;
-
- if (!q)
- return -ENXIO;
-
- if (bdev_read_only(bdev))
- return -EPERM;
-
- if (flags & BLKDEV_DISCARD_SECURE) {
- if (!blk_queue_secure_erase(q))
- return -EOPNOTSUPP;
- op = REQ_OP_SECURE_ERASE;
- } else {
- if (!blk_queue_discard(q))
- return -EOPNOTSUPP;
- op = REQ_OP_DISCARD;
- }
-
- /* In case the discard granularity isn't set by buggy device driver */
- if (WARN_ON_ONCE(!q->limits.discard_granularity)) {
- char dev_name[BDEVNAME_SIZE];
-
- bdevname(bdev, dev_name);
- pr_err_ratelimited("%s: Error: discard_granularity is 0.\n", dev_name);
- return -EOPNOTSUPP;
- }
-
- bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
- if ((sector | nr_sects) & bs_mask)
- return -EINVAL;
-
- if (!nr_sects)
- return -EINVAL;
+ sector_t bio_sects = min(*nr_sects, bio_discard_limit(bdev, *sector));
+ struct bio *bio;
- /* In case the discard request is in a partition */
- if (bdev_is_partition(bdev))
- part_offset = bdev->bd_start_sect;
+ if (!bio_sects)
+ return NULL;
+
+ bio = bio_alloc(bdev, 0, REQ_OP_DISCARD, gfp_mask);
+ if (!bio)
+ return NULL;
+ bio->bi_iter.bi_sector = *sector;
+ bio->bi_iter.bi_size = bio_sects << SECTOR_SHIFT;
+ *sector += bio_sects;
+ *nr_sects -= bio_sects;
+ /*
+ * We can loop for a long time in here if someone does full device
+ * discards (like mkfs). Be nice and allow us to schedule out to avoid
+ * softlocking if preempt is disabled.
+ */
+ cond_resched();
+ return bio;
+}
- while (nr_sects) {
- sector_t granularity_aligned_lba, req_sects;
- sector_t sector_mapped = sector + part_offset;
-
- granularity_aligned_lba = round_up(sector_mapped,
- q->limits.discard_granularity >> SECTOR_SHIFT);
-
- /*
- * Check whether the discard bio starts at a discard_granularity
- * aligned LBA,
- * - If no: set (granularity_aligned_lba - sector_mapped) to
- * bi_size of the first split bio, then the second bio will
- * start at a discard_granularity aligned LBA on the device.
- * - If yes: use bio_aligned_discard_max_sectors() as the max
- * possible bi_size of the first split bio. Then when this bio
- * is split in device drive, the split ones are very probably
- * to be aligned to discard_granularity of the device's queue.
- */
- if (granularity_aligned_lba == sector_mapped)
- req_sects = min_t(sector_t, nr_sects,
- bio_aligned_discard_max_sectors(q));
- else
- req_sects = min_t(sector_t, nr_sects,
- granularity_aligned_lba - sector_mapped);
-
- WARN_ON_ONCE((req_sects << 9) > UINT_MAX);
-
- bio = blk_next_bio(bio, 0, gfp_mask);
- bio->bi_iter.bi_sector = sector;
- bio_set_dev(bio, bdev);
- bio_set_op_attrs(bio, op, 0);
-
- bio->bi_iter.bi_size = req_sects << 9;
- sector += req_sects;
- nr_sects -= req_sects;
-
- /*
- * We can loop for a long time in here, if someone does
- * full device discards (like mkfs). Be nice and allow
- * us to schedule out to avoid softlocking if preempt
- * is disabled.
- */
- cond_resched();
- }
+int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask, struct bio **biop)
+{
+ struct bio *bio;
- *biop = bio;
+ while ((bio = blk_alloc_discard_bio(bdev, &sector, &nr_sects,
+ gfp_mask)))
+ *biop = bio_chain_and_submit(*biop, bio);
return 0;
}
EXPORT_SYMBOL(__blkdev_issue_discard);
@@ -124,22 +78,20 @@ EXPORT_SYMBOL(__blkdev_issue_discard);
* @sector: start sector
* @nr_sects: number of sectors to discard
* @gfp_mask: memory allocation flags (for bio_alloc)
- * @flags: BLKDEV_DISCARD_* flags to control behaviour
*
* Description:
* Issue a discard request for the sectors in question.
*/
int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
+ sector_t nr_sects, gfp_t gfp_mask)
{
struct bio *bio = NULL;
struct blk_plug plug;
- int ret;
+ int ret = 0;
blk_start_plug(&plug);
- ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
- &bio);
- if (!ret && bio) {
+ __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, &bio);
+ if (bio) {
ret = submit_bio_wait(bio);
if (ret == -EOPNOTSUPP)
ret = 0;
@@ -151,140 +103,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL(blkdev_issue_discard);
-/**
- * __blkdev_issue_write_same - generate number of bios with same page
- * @bdev: target blockdev
- * @sector: start sector
- * @nr_sects: number of sectors to write
- * @gfp_mask: memory allocation flags (for bio_alloc)
- * @page: page containing data to write
- * @biop: pointer to anchor bio
- *
- * Description:
- * Generate and issue number of bios(REQ_OP_WRITE_SAME) with same page.
- */
-static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, struct page *page,
- struct bio **biop)
+static sector_t bio_write_zeroes_limit(struct block_device *bdev)
{
- struct request_queue *q = bdev_get_queue(bdev);
- unsigned int max_write_same_sectors;
- struct bio *bio = *biop;
- sector_t bs_mask;
+ sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
- if (!q)
- return -ENXIO;
-
- if (bdev_read_only(bdev))
- return -EPERM;
+ return min(bdev_write_zeroes_sectors(bdev),
+ (UINT_MAX >> SECTOR_SHIFT) & ~bs_mask);
+}
- bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
- if ((sector | nr_sects) & bs_mask)
- return -EINVAL;
+/*
+ * There is no reliable way for the SCSI subsystem to determine whether a
+ * device supports a WRITE SAME operation without actually performing a write
+ * to media. As a result, write_zeroes is enabled by default and will be
+ * disabled if a zeroing operation subsequently fails. This means that this
+ * queue limit is likely to change at runtime.
+ */
+static void __blkdev_issue_write_zeroes(struct block_device *bdev,
+ sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
+ struct bio **biop, unsigned flags, sector_t limit)
+{
- if (!bdev_write_same(bdev))
- return -EOPNOTSUPP;
+ while (nr_sects) {
+ unsigned int len = min(nr_sects, limit);
+ struct bio *bio;
- /* Ensure that max_write_same_sectors doesn't overflow bi_size */
- max_write_same_sectors = bio_allowed_max_sectors(q);
+ if ((flags & BLKDEV_ZERO_KILLABLE) &&
+ fatal_signal_pending(current))
+ break;
- while (nr_sects) {
- bio = blk_next_bio(bio, 1, gfp_mask);
+ bio = bio_alloc(bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask);
bio->bi_iter.bi_sector = sector;
- bio_set_dev(bio, bdev);
- bio->bi_vcnt = 1;
- bio->bi_io_vec->bv_page = page;
- bio->bi_io_vec->bv_offset = 0;
- bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
- bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0);
-
- if (nr_sects > max_write_same_sectors) {
- bio->bi_iter.bi_size = max_write_same_sectors << 9;
- nr_sects -= max_write_same_sectors;
- sector += max_write_same_sectors;
- } else {
- bio->bi_iter.bi_size = nr_sects << 9;
- nr_sects = 0;
- }
+ if (flags & BLKDEV_ZERO_NOUNMAP)
+ bio->bi_opf |= REQ_NOUNMAP;
+
+ bio->bi_iter.bi_size = len << SECTOR_SHIFT;
+ *biop = bio_chain_and_submit(*biop, bio);
+
+ nr_sects -= len;
+ sector += len;
cond_resched();
}
-
- *biop = bio;
- return 0;
}
-/**
- * blkdev_issue_write_same - queue a write same operation
- * @bdev: target blockdev
- * @sector: start sector
- * @nr_sects: number of sectors to write
- * @gfp_mask: memory allocation flags (for bio_alloc)
- * @page: page containing data
- *
- * Description:
- * Issue a write same request for the sectors in question.
- */
-int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask,
- struct page *page)
+static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp, unsigned flags)
{
+ sector_t limit = bio_write_zeroes_limit(bdev);
struct bio *bio = NULL;
struct blk_plug plug;
- int ret;
+ int ret = 0;
blk_start_plug(&plug);
- ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, page,
- &bio);
- if (ret == 0 && bio) {
+ __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio,
+ flags, limit);
+ if (bio) {
+ if ((flags & BLKDEV_ZERO_KILLABLE) &&
+ fatal_signal_pending(current)) {
+ bio_await_chain(bio);
+ blk_finish_plug(&plug);
+ return -EINTR;
+ }
ret = submit_bio_wait(bio);
bio_put(bio);
}
blk_finish_plug(&plug);
- return ret;
-}
-EXPORT_SYMBOL(blkdev_issue_write_same);
-static int __blkdev_issue_write_zeroes(struct block_device *bdev,
- sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
- struct bio **biop, unsigned flags)
-{
- struct bio *bio = *biop;
- unsigned int max_write_zeroes_sectors;
- struct request_queue *q = bdev_get_queue(bdev);
-
- if (!q)
- return -ENXIO;
-
- if (bdev_read_only(bdev))
- return -EPERM;
-
- /* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */
- max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev);
-
- if (max_write_zeroes_sectors == 0)
+ /*
+ * For some devices there is no non-destructive way to verify whether
+ * WRITE ZEROES is actually supported. These will clear the capability
+ * on an I/O error, in which case we'll turn any error into
+ * "not supported" here.
+ */
+ if (ret && !bdev_write_zeroes_sectors(bdev))
return -EOPNOTSUPP;
-
- while (nr_sects) {
- bio = blk_next_bio(bio, 0, gfp_mask);
- bio->bi_iter.bi_sector = sector;
- bio_set_dev(bio, bdev);
- bio->bi_opf = REQ_OP_WRITE_ZEROES;
- if (flags & BLKDEV_ZERO_NOUNMAP)
- bio->bi_opf |= REQ_NOUNMAP;
-
- if (nr_sects > max_write_zeroes_sectors) {
- bio->bi_iter.bi_size = max_write_zeroes_sectors << 9;
- nr_sects -= max_write_zeroes_sectors;
- sector += max_write_zeroes_sectors;
- } else {
- bio->bi_iter.bi_size = nr_sects << 9;
- nr_sects = 0;
- }
- cond_resched();
- }
-
- *biop = bio;
- return 0;
+ return ret;
}
/*
@@ -300,41 +192,64 @@ static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
return min(pages, (sector_t)BIO_MAX_VECS);
}
-static int __blkdev_issue_zero_pages(struct block_device *bdev,
+static void __blkdev_issue_zero_pages(struct block_device *bdev,
sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
- struct bio **biop)
+ struct bio **biop, unsigned int flags)
{
- struct request_queue *q = bdev_get_queue(bdev);
- struct bio *bio = *biop;
- int bi_size = 0;
- unsigned int sz;
+ struct folio *zero_folio = largest_zero_folio();
- if (!q)
- return -ENXIO;
+ while (nr_sects) {
+ unsigned int nr_vecs = __blkdev_sectors_to_bio_pages(nr_sects);
+ struct bio *bio;
- if (bdev_read_only(bdev))
- return -EPERM;
+ if ((flags & BLKDEV_ZERO_KILLABLE) &&
+ fatal_signal_pending(current))
+ break;
- while (nr_sects != 0) {
- bio = blk_next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
- gfp_mask);
+ bio = bio_alloc(bdev, nr_vecs, REQ_OP_WRITE, gfp_mask);
bio->bi_iter.bi_sector = sector;
- bio_set_dev(bio, bdev);
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
-
- while (nr_sects != 0) {
- sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
- bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0);
- nr_sects -= bi_size >> 9;
- sector += bi_size >> 9;
- if (bi_size < sz)
+
+ do {
+ unsigned int len;
+
+ len = min_t(sector_t, folio_size(zero_folio),
+ nr_sects << SECTOR_SHIFT);
+ if (!bio_add_folio(bio, zero_folio, len, 0))
break;
- }
+ nr_sects -= len >> SECTOR_SHIFT;
+ sector += len >> SECTOR_SHIFT;
+ } while (nr_sects);
+
+ *biop = bio_chain_and_submit(*biop, bio);
cond_resched();
}
+}
- *biop = bio;
- return 0;
+static int blkdev_issue_zero_pages(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp, unsigned flags)
+{
+ struct bio *bio = NULL;
+ struct blk_plug plug;
+ int ret = 0;
+
+ if (flags & BLKDEV_ZERO_NOFALLBACK)
+ return -EOPNOTSUPP;
+
+ blk_start_plug(&plug);
+ __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp, &bio, flags);
+ if (bio) {
+ if ((flags & BLKDEV_ZERO_KILLABLE) &&
+ fatal_signal_pending(current)) {
+ bio_await_chain(bio);
+ blk_finish_plug(&plug);
+ return -EINTR;
+ }
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
+ }
+ blk_finish_plug(&plug);
+
+ return ret;
}
/**
@@ -360,20 +275,21 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
unsigned flags)
{
- int ret;
- sector_t bs_mask;
+ sector_t limit = bio_write_zeroes_limit(bdev);
- bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
- if ((sector | nr_sects) & bs_mask)
- return -EINVAL;
-
- ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
- biop, flags);
- if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK))
- return ret;
+ if (bdev_read_only(bdev))
+ return -EPERM;
- return __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask,
- biop);
+ if (limit) {
+ __blkdev_issue_write_zeroes(bdev, sector, nr_sects,
+ gfp_mask, biop, flags, limit);
+ } else {
+ if (flags & BLKDEV_ZERO_NOFALLBACK)
+ return -EOPNOTSUPP;
+ __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask,
+ biop, flags);
+ }
+ return 0;
}
EXPORT_SYMBOL(__blkdev_issue_zeroout);
@@ -393,50 +309,63 @@ EXPORT_SYMBOL(__blkdev_issue_zeroout);
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned flags)
{
- int ret = 0;
- sector_t bs_mask;
- struct bio *bio;
+ int ret;
+
+ if ((sector | nr_sects) & ((bdev_logical_block_size(bdev) >> 9) - 1))
+ return -EINVAL;
+ if (bdev_read_only(bdev))
+ return -EPERM;
+
+ if (bdev_write_zeroes_sectors(bdev)) {
+ ret = blkdev_issue_write_zeroes(bdev, sector, nr_sects,
+ gfp_mask, flags);
+ if (ret != -EOPNOTSUPP)
+ return ret;
+ }
+
+ return blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, flags);
+}
+EXPORT_SYMBOL(blkdev_issue_zeroout);
+
+int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp)
+{
+ sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
+ unsigned int max_sectors = bdev_max_secure_erase_sectors(bdev);
+ struct bio *bio = NULL;
struct blk_plug plug;
- bool try_write_zeroes = !!bdev_write_zeroes_sectors(bdev);
+ int ret = 0;
- bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
+ /* make sure that "len << SECTOR_SHIFT" doesn't overflow */
+ if (max_sectors > UINT_MAX >> SECTOR_SHIFT)
+ max_sectors = UINT_MAX >> SECTOR_SHIFT;
+ max_sectors &= ~bs_mask;
+
+ if (max_sectors == 0)
+ return -EOPNOTSUPP;
if ((sector | nr_sects) & bs_mask)
return -EINVAL;
+ if (bdev_read_only(bdev))
+ return -EPERM;
-retry:
- bio = NULL;
blk_start_plug(&plug);
- if (try_write_zeroes) {
- ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects,
- gfp_mask, &bio, flags);
- } else if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
- ret = __blkdev_issue_zero_pages(bdev, sector, nr_sects,
- gfp_mask, &bio);
- } else {
- /* No zeroing offload support */
- ret = -EOPNOTSUPP;
+ while (nr_sects) {
+ unsigned int len = min_t(sector_t, nr_sects, max_sectors);
+
+ bio = blk_next_bio(bio, bdev, 0, REQ_OP_SECURE_ERASE, gfp);
+ bio->bi_iter.bi_sector = sector;
+ bio->bi_iter.bi_size = len << SECTOR_SHIFT;
+
+ sector += len;
+ nr_sects -= len;
+ cond_resched();
}
- if (ret == 0 && bio) {
+ if (bio) {
ret = submit_bio_wait(bio);
bio_put(bio);
}
blk_finish_plug(&plug);
- if (ret && try_write_zeroes) {
- if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
- try_write_zeroes = false;
- goto retry;
- }
- if (!bdev_write_zeroes_sectors(bdev)) {
- /*
- * Zeroing offload support was indicated, but the
- * device reported ILLEGAL REQUEST (for some devices
- * there is no non-destructive way to verify whether
- * WRITE ZEROES is actually supported).
- */
- ret = -EOPNOTSUPP;
- }
- }
return ret;
}
-EXPORT_SYMBOL(blkdev_issue_zeroout);
+EXPORT_SYMBOL(blkdev_issue_secure_erase);