summaryrefslogtreecommitdiff
path: root/block/blk-lib.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-lib.c')
-rw-r--r--block/blk-lib.c460
1 files changed, 227 insertions, 233 deletions
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 3fe0aec90597..9e2cc58f881f 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Functions related to generic helpers functions
*/
@@ -9,91 +10,64 @@
#include "blk.h"
-static struct bio *next_bio(struct bio *bio, unsigned int nr_pages,
- gfp_t gfp)
+static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector)
{
- struct bio *new = bio_alloc(gfp, nr_pages);
-
- if (bio) {
- bio_chain(bio, new);
- submit_bio(bio);
- }
+ unsigned int discard_granularity = bdev_discard_granularity(bdev);
+ sector_t granularity_aligned_sector;
+
+ if (bdev_is_partition(bdev))
+ sector += bdev->bd_start_sect;
+
+ granularity_aligned_sector =
+ round_up(sector, discard_granularity >> SECTOR_SHIFT);
+
+ /*
+ * Make sure subsequent bios start aligned to the discard granularity if
+ * it needs to be split.
+ */
+ if (granularity_aligned_sector != sector)
+ return granularity_aligned_sector - sector;
+
+ /*
+ * Align the bio size to the discard granularity to make splitting the bio
+ * at discard granularity boundaries easier in the driver if needed.
+ */
+ return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT;
+}
- return new;
+struct bio *blk_alloc_discard_bio(struct block_device *bdev,
+ sector_t *sector, sector_t *nr_sects, gfp_t gfp_mask)
+{
+ sector_t bio_sects = min(*nr_sects, bio_discard_limit(bdev, *sector));
+ struct bio *bio;
+
+ if (!bio_sects)
+ return NULL;
+
+ bio = bio_alloc(bdev, 0, REQ_OP_DISCARD, gfp_mask);
+ if (!bio)
+ return NULL;
+ bio->bi_iter.bi_sector = *sector;
+ bio->bi_iter.bi_size = bio_sects << SECTOR_SHIFT;
+ *sector += bio_sects;
+ *nr_sects -= bio_sects;
+ /*
+ * We can loop for a long time in here if someone does full device
+ * discards (like mkfs). Be nice and allow us to schedule out to avoid
+ * softlocking if preempt is disabled.
+ */
+ cond_resched();
+ return bio;
}
int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, int flags,
- struct bio **biop)
+ sector_t nr_sects, gfp_t gfp_mask, struct bio **biop)
{
- struct request_queue *q = bdev_get_queue(bdev);
- struct bio *bio = *biop;
- unsigned int granularity;
- unsigned int op;
- int alignment;
- sector_t bs_mask;
-
- if (!q)
- return -ENXIO;
-
- if (flags & BLKDEV_DISCARD_SECURE) {
- if (!blk_queue_secure_erase(q))
- return -EOPNOTSUPP;
- op = REQ_OP_SECURE_ERASE;
- } else {
- if (!blk_queue_discard(q))
- return -EOPNOTSUPP;
- op = REQ_OP_DISCARD;
- }
+ struct bio *bio;
- bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
- if ((sector | nr_sects) & bs_mask)
- return -EINVAL;
-
- /* Zero-sector (unknown) and one-sector granularities are the same. */
- granularity = max(q->limits.discard_granularity >> 9, 1U);
- alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
-
- while (nr_sects) {
- unsigned int req_sects;
- sector_t end_sect, tmp;
-
- /* Make sure bi_size doesn't overflow */
- req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9);
-
- /**
- * If splitting a request, and the next starting sector would be
- * misaligned, stop the discard at the previous aligned sector.
- */
- end_sect = sector + req_sects;
- tmp = end_sect;
- if (req_sects < nr_sects &&
- sector_div(tmp, granularity) != alignment) {
- end_sect = end_sect - alignment;
- sector_div(end_sect, granularity);
- end_sect = end_sect * granularity + alignment;
- req_sects = end_sect - sector;
- }
-
- bio = next_bio(bio, 0, gfp_mask);
- bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = bdev;
- bio_set_op_attrs(bio, op, 0);
-
- bio->bi_iter.bi_size = req_sects << 9;
- nr_sects -= req_sects;
- sector = end_sect;
-
- /*
- * We can loop for a long time in here, if someone does
- * full device discards (like mkfs). Be nice and allow
- * us to schedule out to avoid softlocking if preempt
- * is disabled.
- */
- cond_resched();
- }
-
- *biop = bio;
+ while ((bio = blk_alloc_discard_bio(bdev, &sector, &nr_sects,
+ gfp_mask)))
+ *biop = bio_chain_and_submit(*biop, bio);
return 0;
}
EXPORT_SYMBOL(__blkdev_issue_discard);
@@ -104,22 +78,20 @@ EXPORT_SYMBOL(__blkdev_issue_discard);
* @sector: start sector
* @nr_sects: number of sectors to discard
* @gfp_mask: memory allocation flags (for bio_alloc)
- * @flags: BLKDEV_DISCARD_* flags to control behaviour
*
* Description:
* Issue a discard request for the sectors in question.
*/
int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
+ sector_t nr_sects, gfp_t gfp_mask)
{
struct bio *bio = NULL;
struct blk_plug plug;
- int ret;
+ int ret = 0;
blk_start_plug(&plug);
- ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
- &bio);
- if (!ret && bio) {
+ __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, &bio);
+ if (bio) {
ret = submit_bio_wait(bio);
if (ret == -EOPNOTSUPP)
ret = 0;
@@ -131,147 +103,153 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL(blkdev_issue_discard);
-/**
- * __blkdev_issue_write_same - generate number of bios with same page
- * @bdev: target blockdev
- * @sector: start sector
- * @nr_sects: number of sectors to write
- * @gfp_mask: memory allocation flags (for bio_alloc)
- * @page: page containing data to write
- * @biop: pointer to anchor bio
- *
- * Description:
- * Generate and issue number of bios(REQ_OP_WRITE_SAME) with same page.
- */
-static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, struct page *page,
- struct bio **biop)
+static sector_t bio_write_zeroes_limit(struct block_device *bdev)
{
- struct request_queue *q = bdev_get_queue(bdev);
- unsigned int max_write_same_sectors;
- struct bio *bio = *biop;
- sector_t bs_mask;
+ sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
- if (!q)
- return -ENXIO;
+ return min(bdev_write_zeroes_sectors(bdev),
+ (UINT_MAX >> SECTOR_SHIFT) & ~bs_mask);
+}
- bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
- if ((sector | nr_sects) & bs_mask)
- return -EINVAL;
+/*
+ * There is no reliable way for the SCSI subsystem to determine whether a
+ * device supports a WRITE SAME operation without actually performing a write
+ * to media. As a result, write_zeroes is enabled by default and will be
+ * disabled if a zeroing operation subsequently fails. This means that this
+ * queue limit is likely to change at runtime.
+ */
+static void __blkdev_issue_write_zeroes(struct block_device *bdev,
+ sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
+ struct bio **biop, unsigned flags, sector_t limit)
+{
- if (!bdev_write_same(bdev))
- return -EOPNOTSUPP;
+ while (nr_sects) {
+ unsigned int len = min(nr_sects, limit);
+ struct bio *bio;
- /* Ensure that max_write_same_sectors doesn't overflow bi_size */
- max_write_same_sectors = UINT_MAX >> 9;
+ if ((flags & BLKDEV_ZERO_KILLABLE) &&
+ fatal_signal_pending(current))
+ break;
- while (nr_sects) {
- bio = next_bio(bio, 1, gfp_mask);
+ bio = bio_alloc(bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask);
bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = bdev;
- bio->bi_vcnt = 1;
- bio->bi_io_vec->bv_page = page;
- bio->bi_io_vec->bv_offset = 0;
- bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
- bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0);
-
- if (nr_sects > max_write_same_sectors) {
- bio->bi_iter.bi_size = max_write_same_sectors << 9;
- nr_sects -= max_write_same_sectors;
- sector += max_write_same_sectors;
- } else {
- bio->bi_iter.bi_size = nr_sects << 9;
- nr_sects = 0;
- }
+ if (flags & BLKDEV_ZERO_NOUNMAP)
+ bio->bi_opf |= REQ_NOUNMAP;
+
+ bio->bi_iter.bi_size = len << SECTOR_SHIFT;
+ *biop = bio_chain_and_submit(*biop, bio);
+
+ nr_sects -= len;
+ sector += len;
cond_resched();
}
-
- *biop = bio;
- return 0;
}
-/**
- * blkdev_issue_write_same - queue a write same operation
- * @bdev: target blockdev
- * @sector: start sector
- * @nr_sects: number of sectors to write
- * @gfp_mask: memory allocation flags (for bio_alloc)
- * @page: page containing data
- *
- * Description:
- * Issue a write same request for the sectors in question.
- */
-int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask,
- struct page *page)
+static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp, unsigned flags)
{
+ sector_t limit = bio_write_zeroes_limit(bdev);
struct bio *bio = NULL;
struct blk_plug plug;
- int ret;
+ int ret = 0;
blk_start_plug(&plug);
- ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, page,
- &bio);
- if (ret == 0 && bio) {
+ __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio,
+ flags, limit);
+ if (bio) {
+ if ((flags & BLKDEV_ZERO_KILLABLE) &&
+ fatal_signal_pending(current)) {
+ bio_await_chain(bio);
+ blk_finish_plug(&plug);
+ return -EINTR;
+ }
ret = submit_bio_wait(bio);
bio_put(bio);
}
blk_finish_plug(&plug);
+
+ /*
+ * For some devices there is no non-destructive way to verify whether
+ * WRITE ZEROES is actually supported. These will clear the capability
+ * on an I/O error, in which case we'll turn any error into
+ * "not supported" here.
+ */
+ if (ret && !bdev_write_zeroes_sectors(bdev))
+ return -EOPNOTSUPP;
return ret;
}
-EXPORT_SYMBOL(blkdev_issue_write_same);
-static int __blkdev_issue_write_zeroes(struct block_device *bdev,
- sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
- struct bio **biop, unsigned flags)
+/*
+ * Convert a number of 512B sectors to a number of pages.
+ * The result is limited to a number of pages that can fit into a BIO.
+ * Also make sure that the result is always at least 1 (page) for the cases
+ * where nr_sects is lower than the number of sectors in a page.
+ */
+static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
{
- struct bio *bio = *biop;
- unsigned int max_write_zeroes_sectors;
- struct request_queue *q = bdev_get_queue(bdev);
+ sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512);
- if (!q)
- return -ENXIO;
-
- /* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */
- max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev);
+ return min(pages, (sector_t)BIO_MAX_VECS);
+}
- if (max_write_zeroes_sectors == 0)
- return -EOPNOTSUPP;
+static void __blkdev_issue_zero_pages(struct block_device *bdev,
+ sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
+ struct bio **biop, unsigned int flags)
+{
+ struct folio *zero_folio = largest_zero_folio();
while (nr_sects) {
- bio = next_bio(bio, 0, gfp_mask);
+ unsigned int nr_vecs = __blkdev_sectors_to_bio_pages(nr_sects);
+ struct bio *bio;
+
+ if ((flags & BLKDEV_ZERO_KILLABLE) &&
+ fatal_signal_pending(current))
+ break;
+
+ bio = bio_alloc(bdev, nr_vecs, REQ_OP_WRITE, gfp_mask);
bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = bdev;
- bio->bi_opf = REQ_OP_WRITE_ZEROES;
- if (flags & BLKDEV_ZERO_NOUNMAP)
- bio->bi_opf |= REQ_NOUNMAP;
- if (nr_sects > max_write_zeroes_sectors) {
- bio->bi_iter.bi_size = max_write_zeroes_sectors << 9;
- nr_sects -= max_write_zeroes_sectors;
- sector += max_write_zeroes_sectors;
- } else {
- bio->bi_iter.bi_size = nr_sects << 9;
- nr_sects = 0;
- }
+ do {
+ unsigned int len;
+
+ len = min_t(sector_t, folio_size(zero_folio),
+ nr_sects << SECTOR_SHIFT);
+ if (!bio_add_folio(bio, zero_folio, len, 0))
+ break;
+ nr_sects -= len >> SECTOR_SHIFT;
+ sector += len >> SECTOR_SHIFT;
+ } while (nr_sects);
+
+ *biop = bio_chain_and_submit(*biop, bio);
cond_resched();
}
-
- *biop = bio;
- return 0;
}
-/*
- * Convert a number of 512B sectors to a number of pages.
- * The result is limited to a number of pages that can fit into a BIO.
- * Also make sure that the result is always at least 1 (page) for the cases
- * where nr_sects is lower than the number of sectors in a page.
- */
-static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
+static int blkdev_issue_zero_pages(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp, unsigned flags)
{
- sector_t bytes = (nr_sects << 9) + PAGE_SIZE - 1;
+ struct bio *bio = NULL;
+ struct blk_plug plug;
+ int ret = 0;
- return min(bytes >> PAGE_SHIFT, (sector_t)BIO_MAX_PAGES);
+ if (flags & BLKDEV_ZERO_NOFALLBACK)
+ return -EOPNOTSUPP;
+
+ blk_start_plug(&plug);
+ __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp, &bio, flags);
+ if (bio) {
+ if ((flags & BLKDEV_ZERO_KILLABLE) &&
+ fatal_signal_pending(current)) {
+ bio_await_chain(bio);
+ blk_finish_plug(&plug);
+ return -EINTR;
+ }
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
+ }
+ blk_finish_plug(&plug);
+
+ return ret;
}
/**
@@ -287,12 +265,6 @@ static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
* Zero-fill a block range, either using hardware offload or by explicitly
* writing zeroes to the device.
*
- * Note that this function may fail with -EOPNOTSUPP if the driver signals
- * zeroing offload support, but the device fails to process the command (for
- * some devices there is no non-destructive way to verify whether this
- * operation is actually supported). In this case the caller should call
- * retry the call to blkdev_issue_zeroout() and the fallback path will be used.
- *
* If a device is using logical block provisioning, the underlying space will
* not be released if %flags contains BLKDEV_ZERO_NOUNMAP.
*
@@ -303,43 +275,21 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
unsigned flags)
{
- int ret;
- int bi_size = 0;
- struct bio *bio = *biop;
- unsigned int sz;
- sector_t bs_mask;
+ sector_t limit = bio_write_zeroes_limit(bdev);
- bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
- if ((sector | nr_sects) & bs_mask)
- return -EINVAL;
+ if (bdev_read_only(bdev))
+ return -EPERM;
- ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
- biop, flags);
- if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK))
- goto out;
-
- ret = 0;
- while (nr_sects != 0) {
- bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
- gfp_mask);
- bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = bdev;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
-
- while (nr_sects != 0) {
- sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
- bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0);
- nr_sects -= bi_size >> 9;
- sector += bi_size >> 9;
- if (bi_size < sz)
- break;
- }
- cond_resched();
+ if (limit) {
+ __blkdev_issue_write_zeroes(bdev, sector, nr_sects,
+ gfp_mask, biop, flags, limit);
+ } else {
+ if (flags & BLKDEV_ZERO_NOFALLBACK)
+ return -EOPNOTSUPP;
+ __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask,
+ biop, flags);
}
-
- *biop = bio;
-out:
- return ret;
+ return 0;
}
EXPORT_SYMBOL(__blkdev_issue_zeroout);
@@ -360,13 +310,57 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned flags)
{
int ret;
+
+ if ((sector | nr_sects) & ((bdev_logical_block_size(bdev) >> 9) - 1))
+ return -EINVAL;
+ if (bdev_read_only(bdev))
+ return -EPERM;
+
+ if (bdev_write_zeroes_sectors(bdev)) {
+ ret = blkdev_issue_write_zeroes(bdev, sector, nr_sects,
+ gfp_mask, flags);
+ if (ret != -EOPNOTSUPP)
+ return ret;
+ }
+
+ return blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, flags);
+}
+EXPORT_SYMBOL(blkdev_issue_zeroout);
+
+int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp)
+{
+ sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
+ unsigned int max_sectors = bdev_max_secure_erase_sectors(bdev);
struct bio *bio = NULL;
struct blk_plug plug;
+ int ret = 0;
+
+ /* make sure that "len << SECTOR_SHIFT" doesn't overflow */
+ if (max_sectors > UINT_MAX >> SECTOR_SHIFT)
+ max_sectors = UINT_MAX >> SECTOR_SHIFT;
+ max_sectors &= ~bs_mask;
+
+ if (max_sectors == 0)
+ return -EOPNOTSUPP;
+ if ((sector | nr_sects) & bs_mask)
+ return -EINVAL;
+ if (bdev_read_only(bdev))
+ return -EPERM;
blk_start_plug(&plug);
- ret = __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask,
- &bio, flags);
- if (ret == 0 && bio) {
+ while (nr_sects) {
+ unsigned int len = min_t(sector_t, nr_sects, max_sectors);
+
+ bio = blk_next_bio(bio, bdev, 0, REQ_OP_SECURE_ERASE, gfp);
+ bio->bi_iter.bi_sector = sector;
+ bio->bi_iter.bi_size = len << SECTOR_SHIFT;
+
+ sector += len;
+ nr_sects -= len;
+ cond_resched();
+ }
+ if (bio) {
ret = submit_bio_wait(bio);
bio_put(bio);
}
@@ -374,4 +368,4 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
return ret;
}
-EXPORT_SYMBOL(blkdev_issue_zeroout);
+EXPORT_SYMBOL(blkdev_issue_secure_erase);